source: CIVL/examples/cuda/sum.cu

main
Last change on this file was ea777aa, checked in by Alex Wilton <awilton@…>, 3 years ago

Moved examples, include, build_default.properties, common.xml, and README out from dev.civl.com into the root of the repo.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@5704 fb995dde-84ed-4084-dfe6-e5aef3e2452c

  • Property mode set to 100644
File size: 2.0 KB
Line 
1/**
2* This is an example from the paper "Formal Semantics of Heterogeneous CUDA-C:
3* A Modular Approach with Applications" by Chris Hathhorn et al.
4*/
5
6#include <stdio.h>
7#include <cuda.h>
8
9#ifdef _CIVL
10$input int N;
11$input int N_B;
12$assume(1 <= N && N <= N_B);
13$input int NBLOCKS;
14$input int NBLOCKS_B;
15$assume(1 <= NBLOCKS && NBLOCKS <= NBLOCKS_B);
16$assume(NBLOCKS <= N);
17$assume(N % NBLOCKS == 0);
18$assume(N % 2 == 0);
19$assume(NBLOCKS % 2 == 0);
20#else
21#define N 8
22#define NBLOCKS 4
23#endif
24#define NTHREADS (N/NBLOCKS)
25
26__global__ void sum(int* in, int* out) {
27 //extern __shared__ int shared[];
28 __shared__ int shared[blockDim.x]; // commenting out the extern qualifier since
29 // it doen't have the definition,
30 // need to figure out if this is something special for cuda
31 int i, tid = threadIdx.x,
32 bid = blockIdx.x,
33 bdim = blockDim.x;
34
35 shared[tid] = in[bid * bdim + tid];
36
37 __syncthreads();
38 if(tid < bdim/2) {
39 shared[tid] += shared[bdim/2 + tid];
40 }
41 __syncthreads();
42 if(tid == 0) {
43 for (i = 1; i != (bdim/2) + (bdim%2); ++i) {
44 shared[0] += shared[i];
45 }
46 out[bid] = shared[0];
47 }
48}
49
50int main() {
51
52 int i, *dev_in, *dev_out, host[N];
53#ifdef _CIVL
54 int seqSum = 0;
55#endif
56
57 printf("INPUT: ");
58 for(i = 0; i != N; ++i) {
59 host[i] = (21*i + 29) % 100;
60#ifdef _CIVL
61 seqSum += host[i];
62#endif
63 printf(" %d ", host[i]);
64 }
65 printf("\n");
66
67 cudaMalloc(&dev_in, N * sizeof(int));
68 cudaMalloc(&dev_out, NBLOCKS * sizeof(int));
69
70 cudaMemcpy(dev_in, host, N * sizeof(int),
71 cudaMemcpyHostToDevice);
72 sum<<<NBLOCKS, NTHREADS, NTHREADS * sizeof(int)>>>(
73 dev_in, dev_out);
74 sum<<<1, NBLOCKS, NBLOCKS * sizeof(int)>>>(
75 dev_out, dev_out);
76 cudaMemcpy(host, dev_out, sizeof(int),
77 cudaMemcpyDeviceToHost);
78 cudaDeviceSynchronize();
79
80 printf("OUTPUT: %u\n", *host);
81#ifdef _CIVL
82 $assert(*host == seqSum);
83#endif
84
85 cudaFree(dev_in);
86 cudaFree(dev_out);
87 return 0;
88}
89
Note: See TracBrowser for help on using the repository browser.