| [54d543c] | 1 | /* dot product of two arrays.
|
|---|
| [69bf2e6] | 2 | * Command line execution:
|
|---|
| [bba38b8] | 3 | * civl verify dot.cvl -inputN_BOUND=8 -inputTHREADS_PER_BLOCK=4
|
|---|
| [54d543c] | 4 | */
|
|---|
| [e6b02c8] | 5 | #include <civlc.cvh>
|
|---|
| 6 | #include <concurrency.cvh>
|
|---|
| [54d543c] | 7 | #include <stdio.h>
|
|---|
| 8 |
|
|---|
| 9 | #define imin(a,b) (a<b?a:b)
|
|---|
| 10 |
|
|---|
| [0baeebd] | 11 | $input int THREADS_PER_BLOCK = 4; // thread number per block: must be a power of 2, due to the while loop at the end of gpuThread();
|
|---|
| 12 | $input int N_BOUND = 8;
|
|---|
| [69bf2e6] | 13 | $input int N;
|
|---|
| 14 | $assume 0 < N && N <= N_BOUND;
|
|---|
| 15 | int const threadsPerBlock = THREADS_PER_BLOCK;
|
|---|
| 16 | int const blocksPerGrid =
|
|---|
| 17 | imin(32, (N+threadsPerBlock-1) / threadsPerBlock );
|
|---|
| [54d543c] | 18 | double *a, *b, c, *partial_c;
|
|---|
| 19 |
|
|---|
| 20 | void gpu(){
|
|---|
| [69bf2e6] | 21 | $proc blocks[blocksPerGrid];
|
|---|
| 22 |
|
|---|
| [54d543c] | 23 | void gpuBlock(int blockID){
|
|---|
| [69bf2e6] | 24 | int num_in_barrier =0;
|
|---|
| 25 | int barrier_size = 0;
|
|---|
| [54d543c] | 26 | double cache[threadsPerBlock];
|
|---|
| [a489900c] | 27 | $gbarrier gbarrier = $gbarrier_create($here, threadsPerBlock);
|
|---|
| [69bf2e6] | 28 | $proc threads[threadsPerBlock];
|
|---|
| 29 |
|
|---|
| [54d543c] | 30 | void gpuThread(int threadID){
|
|---|
| 31 | int tid = threadID + blockID * threadsPerBlock;
|
|---|
| 32 | int cacheIndex = threadID;
|
|---|
| 33 | double temp = 0;
|
|---|
| [a489900c] | 34 | $barrier barrier = $barrier_create($here, gbarrier, threadID);
|
|---|
| [bba38b8] | 35 | int i;
|
|---|
| [54d543c] | 36 |
|
|---|
| 37 | $atomic {
|
|---|
| 38 | while (tid < N) {
|
|---|
| 39 | temp += a[tid] * b[tid];
|
|---|
| 40 | tid += threadsPerBlock * blocksPerGrid;
|
|---|
| 41 | }
|
|---|
| 42 | // set cache values
|
|---|
| 43 | cache[cacheIndex] = temp;
|
|---|
| 44 | }
|
|---|
| [9c73065] | 45 | // synchronize threads
|
|---|
| [a489900c] | 46 | $barrier_call(barrier);
|
|---|
| [bba38b8] | 47 | i = threadsPerBlock/2;
|
|---|
| [54d543c] | 48 | while (i != 0) {
|
|---|
| 49 | if (cacheIndex < i)
|
|---|
| 50 | cache[cacheIndex] += cache[cacheIndex + i];
|
|---|
| [9c73065] | 51 | // synchronize threads
|
|---|
| [a489900c] | 52 | $barrier_call(barrier);
|
|---|
| [54d543c] | 53 | i /= 2;
|
|---|
| 54 | }
|
|---|
| 55 | if (cacheIndex == 0)
|
|---|
| 56 | partial_c[blockID] = cache[0];
|
|---|
| [a489900c] | 57 | $barrier_destroy(barrier);
|
|---|
| [54d543c] | 58 | }
|
|---|
| [a489900c] | 59 |
|
|---|
| [9c73065] | 60 | for(int i = 0; i < threadsPerBlock; i++) {
|
|---|
| 61 | threads[i] = $spawn gpuThread(i);
|
|---|
| 62 | }
|
|---|
| 63 | for(int i = 0; i < threadsPerBlock; i++) {
|
|---|
| [a82987f] | 64 | $wait(threads[i]);
|
|---|
| [54d543c] | 65 | }
|
|---|
| [a489900c] | 66 | $gbarrier_destroy(gbarrier);
|
|---|
| [54d543c] | 67 | }
|
|---|
| [bba38b8] | 68 | // spawns gpublock's
|
|---|
| [9c73065] | 69 | for(int i = 0; i < blocksPerGrid; i++) {
|
|---|
| 70 | blocks[i] = $spawn gpuBlock(i);
|
|---|
| 71 | }
|
|---|
| [bba38b8] | 72 | // waits for gpublock's
|
|---|
| [9c73065] | 73 | for(int i = 0; i < blocksPerGrid; i++) {
|
|---|
| [a82987f] | 74 | $wait(blocks[i]);
|
|---|
| [9c73065] | 75 | }
|
|---|
| [54d543c] | 76 | }
|
|---|
| 77 |
|
|---|
| 78 | int main( void ) {
|
|---|
| [78fdaf0] | 79 | $scope host = $here;
|
|---|
| [54d543c] | 80 |
|
|---|
| 81 | // allocate memory on the cpu side
|
|---|
| [78fdaf0] | 82 | a = (double *) $malloc(host, N*sizeof(double));
|
|---|
| 83 | b = (double *) $malloc(host, N*sizeof(double));
|
|---|
| 84 | partial_c = (double *) $malloc(host, blocksPerGrid*sizeof(double));
|
|---|
| [77fc58c] | 85 | // fill in the host memory with data
|
|---|
| 86 | for (int i=0; i<N; i++) {
|
|---|
| 87 | a[i] = i;
|
|---|
| 88 | b[i] = i*2;
|
|---|
| [54d543c] | 89 | }
|
|---|
| 90 | gpu();
|
|---|
| [77fc58c] | 91 | // finish up on the CPU side
|
|---|
| 92 | c = 0;
|
|---|
| 93 | for (int i=0; i<blocksPerGrid; i++) {
|
|---|
| 94 | c += partial_c[i];
|
|---|
| [54d543c] | 95 | }
|
|---|
| 96 | #define sum_squares(x) (x*(x+1)*(2*x+1)/6)
|
|---|
| [de02f09] | 97 | // check result
|
|---|
| 98 | $assert(c == 2 * sum_squares( (double)(N - 1) ));
|
|---|
| [48bfab9] | 99 | $free(a);
|
|---|
| 100 | $free(b);
|
|---|
| 101 | $free(partial_c);
|
|---|
| [54d543c] | 102 | }
|
|---|