source: CIVL/examples/experimental/sum.cvl@ fbc5eb8

1.23 2.0 main test-branch
Last change on this file since fbc5eb8 was 0b9a80a, checked in by Manchun Zheng <zmanchun@…>, 12 years ago

implemented $scopeof expression; removed unused examples; cleaned up library executors and enablers.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@703 fb995dde-84ed-4084-dfe6-e5aef3e2452c

  • Property mode set to 100644
File size: 1.8 KB
Line 
1/**
2* This is an example from the paper "Formal Semantics of Heterogeneous CUDA-C:
3* A Modular Approach with Applications" by Chris Hathhorn et al.
4*/
5
6#include <stdio.h>
7#include <civlc.h>
8
9#define N 18
10#define NBLOCKS 2
11#define NTHREADS (N/NBLOCKS)
12
13void gpu(int nb, int nt, int *in, int *out){
14 void gpuBlock(int bid){
15 int shared[];
16 int num_in_barrier =0;
17 int barrier_size = 0;
18 int in_barrier[nt];
19
20 #include "cuda.cvh"
21
22 void gpuThread(int tid){
23 int i;
24 int bdim = NTHREADS;
25
26 shared[tid] = in[bid * bdim + tid];
27
28 __syncthreads(in_barrier, tid);
29 if(tid < bdim/2) {
30 shared[tid] += shared[bdim/2 + tid];
31 }
32 __syncthreads(in_barrier, tid);
33 if(tid == 0) {
34 for (i = 1; i != (bdim/2) + (bdim%2); ++i) {
35 shared[0] += shared[i];
36 }
37 out[bid] = shared[0];
38 }
39 }
40
41 $proc threads[nt];
42 __sync_init(in_barrier, nt);
43 for(int i = 0; i < nt; i++) {
44 threads[i] = $spawn gpuThread(i);
45 }
46 for(int i = 0; i < nt; i++) {
47 $wait threads[i];
48 }
49 }
50
51 $proc blocks[nb];
52 for(int i = 0; i < nb; i++) {
53 blocks[i] = $spawn gpuBlock(i);
54 }
55 for(int i = 0; i < nb; i++) {
56 $wait blocks[i];
57 }
58}
59
60int main(void) {
61 int i, *dev_out, host[N];
62 $heap h;
63
64 printf("INPUT: ");
65 for(i = 0; i != N; ++i) {
66 host[i] = (21*i + 29) % 100;
67 printf(" %d ", host[i]);
68 }
69 printf("\n");
70
71 //dev_in = (int *) $malloc(&h, N * sizeof(int));
72 dev_out = (int *) $malloc(&h, NBLOCKS * sizeof(int));
73 //memcpy(dev_in, &host[0], N * sizeof(int));
74
75 gpu(NBLOCKS, NTHREADS, host, dev_out);
76 gpu(1, NBLOCKS, dev_out, dev_out);
77 //memcpy(&host[0], dev_out, sizeof(int));
78 //cudaDeviceSynchronize();
79 printf("OUTPUT: %u\n", *dev_out);
80 //$free(&h, dev_in);
81 $free(&h, dev_out);
82 return 0;
83}
Note: See TracBrowser for help on using the repository browser.