source: CIVL/mods/dev.civl.abc/examples/cuda/sum.cu

main
Last change on this file was aad342c, checked in by Stephen Siegel <siegel@…>, 3 years ago

Performing huge refactor to incorporate ABC, GMC, and SARL into CIVL repo and use Java modules.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@5664 fb995dde-84ed-4084-dfe6-e5aef3e2452c

  • Property mode set to 100644
File size: 1.4 KB
Line 
1/**
2* This is an example from the paper "Formal Semantics of Heterogeneous CUDA-C:
3* A Modular Approach with Applications" by Chris Hathhorn et al.
4*/
5
6#include <stdio.h>
7#include "cuda.h"
8
9#define N 8
10#define NBLOCKS 4
11#define NTHREADS (N/NBLOCKS)
12
13__global__ void sum(int* in, int* out) {
14 extern __shared__ int shared[];
15 int i, tid = threadIdx.x,
16 bid = blockIdx.x,
17 bdim = blockDim.x;
18
19 shared[tid] = in[bid * bdim + tid];
20
21 __syncthreads();
22 if(tid < bdim/2) {
23 shared[tid] += shared[bdim/2 + tid];
24 }
25 __syncthreads();
26 if(tid == 0) {
27 for (i = 1; i != (bdim/2) + (bdim%2); ++i) {
28 shared[0] += shared[i];
29 }
30 out[bid] = shared[0];
31 }
32}
33
34int main(void) {
35 int i, *dev_in, *dev_out, host[N];
36
37 printf("INPUT: ");
38 for(i = 0; i != N; ++i) {
39 host[i] = (21*i + 29) % 100;
40 printf(" %d ", host[i]);
41 }
42 printf("\n");
43
44 cudaMalloc(&dev_in, N * sizeof(int));
45 cudaMalloc(&dev_out, NBLOCKS * sizeof(int));
46
47 cudaMemcpy(dev_in, host, N * sizeof(int),
48 cudaMemcpyHostToDevice);
49 sum<<<NBLOCKS, NTHREADS, NTHREADS * sizeof(int)>>>(
50 dev_in, dev_out);
51 sum<<<1, NBLOCKS, NBLOCKS * sizeof(int)>>>(
52 dev_out, dev_out);
53 cudaMemcpy(host, dev_out, sizeof(int),
54 cudaMemcpyDeviceToHost);
55 cudaDeviceSynchronize();
56
57 printf("OUTPUT: %u\n", *host);
58 cudaFree(dev_in);
59 cudaFree(dev_out);
60 return 0;
61}
62
Note: See TracBrowser for help on using the repository browser.