source: CIVL/examples/translation/cuda/dot.cvl@ 4540352

1.23 2.0 acw/focus-triggers main test-branch
Last change on this file since 4540352 was bba38b8, checked in by Manchun Zheng <zmanchun@…>, 12 years ago

minor correction

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@1111 fb995dde-84ed-4084-dfe6-e5aef3e2452c

  • Property mode set to 100644
File size: 2.6 KB
Line 
1/* dot product of two arrays.
2 * Command line execution:
3 * civl verify dot.cvl -inputN_BOUND=8 -inputTHREADS_PER_BLOCK=4
4 */
5#include <civlc.h>
6#include <stdio.h>
7
8#define imin(a,b) (a<b?a:b)
9
10$input int THREADS_PER_BLOCK; // thread number per block: must be a power of 2, due to the while loop at the end of gpuThread();
11$input int N_BOUND;
12$input int N;
13$assume 0 < N && N <= N_BOUND;
14int const threadsPerBlock = THREADS_PER_BLOCK;
15int const blocksPerGrid =
16 imin(32, (N+threadsPerBlock-1) / threadsPerBlock );
17double *a, *b, c, *partial_c;
18
19void gpu(){
20 $proc blocks[blocksPerGrid];
21
22 void gpuBlock(int blockID){
23 int num_in_barrier =0;
24 int barrier_size = 0;
25 double cache[threadsPerBlock];
26 $gbarrier gbarrier = $gbarrier_create($here, threadsPerBlock);
27 $proc threads[threadsPerBlock];
28
29 void gpuThread(int threadID){
30 int tid = threadID + blockID * threadsPerBlock;
31 int cacheIndex = threadID;
32 double temp = 0;
33 $barrier barrier = $barrier_create($here, gbarrier, threadID);
34 int i;
35
36 $atomic {
37 while (tid < N) {
38 temp += a[tid] * b[tid];
39 tid += threadsPerBlock * blocksPerGrid;
40 }
41 // set cache values
42 cache[cacheIndex] = temp;
43 }
44 // synchronize threads
45 $barrier_call(barrier);
46 i = threadsPerBlock/2;
47 while (i != 0) {
48 if (cacheIndex < i)
49 cache[cacheIndex] += cache[cacheIndex + i];
50 // synchronize threads
51 $barrier_call(barrier);
52 i /= 2;
53 }
54 if (cacheIndex == 0)
55 partial_c[blockID] = cache[0];
56 $barrier_destroy(barrier);
57 }
58
59 for(int i = 0; i < threadsPerBlock; i++) {
60 threads[i] = $spawn gpuThread(i);
61 }
62 for(int i = 0; i < threadsPerBlock; i++) {
63 $wait(threads[i]);
64 }
65 $gbarrier_destroy(gbarrier);
66 }
67 // spawns gpublock's
68 for(int i = 0; i < blocksPerGrid; i++) {
69 blocks[i] = $spawn gpuBlock(i);
70 }
71 // waits for gpublock's
72 for(int i = 0; i < blocksPerGrid; i++) {
73 $wait(blocks[i]);
74 }
75}
76
77int main( void ) {
78 $scope host = $here;
79
80 // allocate memory on the cpu side
81 a = (double *) $malloc(host, N*sizeof(double));
82 b = (double *) $malloc(host, N*sizeof(double));
83 partial_c = (double *) $malloc(host, blocksPerGrid*sizeof(double));
84 // fill in the host memory with data
85 for (int i=0; i<N; i++) {
86 a[i] = i;
87 b[i] = i*2;
88 }
89 gpu();
90 // finish up on the CPU side
91 c = 0;
92 for (int i=0; i<blocksPerGrid; i++) {
93 c += partial_c[i];
94 }
95 #define sum_squares(x) (x*(x+1)*(2*x+1)/6)
96 // check result
97 $assert(c == 2 * sum_squares( (double)(N - 1) ));
98 $free(a);
99 $free(b);
100 $free(partial_c);
101}
Note: See TracBrowser for help on using the repository browser.