source: CIVL/examples/cuda/dot.cvl@ 631a953

1.23 2.0 main test-branch
Last change on this file since 631a953 was 3ff27cf, checked in by Manchun Zheng <zmanchun@…>, 11 years ago

updated examples since $assert/$assume has been changed to functions; fixed the model builder for the new side-effect remover.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@2085 fb995dde-84ed-4084-dfe6-e5aef3e2452c

  • Property mode set to 100644
File size: 2.7 KB
Line 
1/* dot product of two arrays.
2 * Command line execution:
3 * civl verify dot.cvl -inputN_BOUND=8 -inputTHREADS_PER_BLOCK=4
4 */
5#include <civlc.cvh>
6#include <concurrency.cvh>
7#include <stdio.h>
8
9#define imin(a,b) (a<b?a:b)
10
11$input int THREADS_PER_BLOCK = 4; // thread number per block: must be a power of 2, due to the while loop at the end of gpuThread();
12$input int N_BOUND = 8;
13$input int N;
14$assume(0 < N && N <= N_BOUND);
15int const threadsPerBlock = THREADS_PER_BLOCK;
16int const blocksPerGrid =
17 imin(32, (N+threadsPerBlock-1) / threadsPerBlock );
18double *a, *b, c, *partial_c;
19
20void gpu(){
21 $proc blocks[blocksPerGrid];
22
23 void gpuBlock(int blockID){
24 int num_in_barrier =0;
25 int barrier_size = 0;
26 double cache[threadsPerBlock];
27 $gbarrier gbarrier = $gbarrier_create($here, threadsPerBlock);
28 $proc threads[threadsPerBlock];
29
30 void gpuThread(int threadID){
31 int tid = threadID + blockID * threadsPerBlock;
32 int cacheIndex = threadID;
33 double temp = 0;
34 $barrier barrier = $barrier_create($here, gbarrier, threadID);
35 int i;
36
37 $atomic {
38 while (tid < N) {
39 temp += a[tid] * b[tid];
40 tid += threadsPerBlock * blocksPerGrid;
41 }
42 // set cache values
43 cache[cacheIndex] = temp;
44 }
45 // synchronize threads
46 $barrier_call(barrier);
47 i = threadsPerBlock/2;
48 while (i != 0) {
49 if (cacheIndex < i)
50 cache[cacheIndex] += cache[cacheIndex + i];
51 // synchronize threads
52 $barrier_call(barrier);
53 i /= 2;
54 }
55 if (cacheIndex == 0)
56 partial_c[blockID] = cache[0];
57 $barrier_destroy(barrier);
58 }
59
60 for(int i = 0; i < threadsPerBlock; i++) {
61 threads[i] = $spawn gpuThread(i);
62 }
63 for(int i = 0; i < threadsPerBlock; i++) {
64 $wait(threads[i]);
65 }
66 $gbarrier_destroy(gbarrier);
67 }
68 // spawns gpublock's
69 for(int i = 0; i < blocksPerGrid; i++) {
70 blocks[i] = $spawn gpuBlock(i);
71 }
72 // waits for gpublock's
73 for(int i = 0; i < blocksPerGrid; i++) {
74 $wait(blocks[i]);
75 }
76}
77
78int main( void ) {
79 $scope host = $here;
80
81 // allocate memory on the cpu side
82 a = (double *) $malloc(host, N*sizeof(double));
83 b = (double *) $malloc(host, N*sizeof(double));
84 partial_c = (double *) $malloc(host, blocksPerGrid*sizeof(double));
85 // fill in the host memory with data
86 for (int i=0; i<N; i++) {
87 a[i] = i;
88 b[i] = i*2;
89 }
90 gpu();
91 // finish up on the CPU side
92 c = 0;
93 for (int i=0; i<blocksPerGrid; i++) {
94 c += partial_c[i];
95 }
96 #define sum_squares(x) (x*(x+1)*(2*x+1)/6)
97 // check result
98 $assert((c == 2 * sum_squares( (double)(N - 1) )));
99 $free(a);
100 $free(b);
101 $free(partial_c);
102}
Note: See TracBrowser for help on using the repository browser.