source: CIVL/examples/cuda/sum.cu@ 83af34d

1.23 2.0 main test-branch
Last change on this file since 83af34d was ef14ce6, checked in by Andre Marianiello <andre.marianiello@…>, 11 years ago

Cuda header implementations corresponding to Cuda headers in ABC. Changed ModelTranslator to find correct source files to link when Cuda headers are used. Cuda2CIVLTransformTest updated to verfiy sum.cu (which it currently does).

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@1843 fb995dde-84ed-4084-dfe6-e5aef3e2452c

  • Property mode set to 100644
File size: 1.4 KB
Line 
1/**
2* This is an example from the paper "Formal Semantics of Heterogeneous CUDA-C:
3* A Modular Approach with Applications" by Chris Hathhorn et al.
4*/
5
6#include <stdio.h>
7#include <cuda.h>
8
9#define N 8
10#define NBLOCKS 4
11#define NTHREADS (N/NBLOCKS)
12
13__global__ void sum(int* in, int* out) {
14 extern __shared__ int shared[];
15 int i, tid = threadIdx.x,
16 bid = blockIdx.x,
17 bdim = blockDim.x;
18
19 shared[tid] = in[bid * bdim + tid];
20
21 __syncthreads();
22 if(tid < bdim/2) {
23 shared[tid] += shared[bdim/2 + tid];
24 }
25 __syncthreads();
26 if(tid == 0) {
27 for (i = 1; i != (bdim/2) + (bdim%2); ++i) {
28 shared[0] += shared[i];
29 }
30 out[bid] = shared[0];
31 }
32}
33
34int main(void) {
35 int i, *dev_in, *dev_out, host[N];
36
37 printf("INPUT: ");
38 for(i = 0; i != N; ++i) {
39 host[i] = (21*i + 29) % 100;
40 printf(" %d ", host[i]);
41 }
42 printf("\n");
43
44 cudaMalloc(&dev_in, N * sizeof(int));
45 cudaMalloc(&dev_out, NBLOCKS * sizeof(int));
46
47 cudaMemcpy(dev_in, host, N * sizeof(int),
48 cudaMemcpyHostToDevice);
49 sum<<<NBLOCKS, NTHREADS, NTHREADS * sizeof(int)>>>(
50 dev_in, dev_out);
51 sum<<<1, NBLOCKS, NBLOCKS * sizeof(int)>>>(
52 dev_out, dev_out);
53 cudaMemcpy(host, dev_out, sizeof(int),
54 cudaMemcpyDeviceToHost);
55 cudaDeviceSynchronize();
56
57 printf("OUTPUT: %u\n", *host);
58 cudaFree(dev_in);
59 cudaFree(dev_out);
60 return 0;
61}
62
Note: See TracBrowser for help on using the repository browser.