source: CIVL/examples/cuda/sum.cu@ 5bc08d6

1.23 2.0 main test-branch
Last change on this file since 5bc08d6 was 47665c1, checked in by Andre Marianiello <andre.marianiello@…>, 11 years ago

Updated cuda.cvl and civl-cuda.cvl implementations to hide some struct definitions

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@2211 fb995dde-84ed-4084-dfe6-e5aef3e2452c

  • Property mode set to 100644
File size: 1.8 KB
Line 
1/**
2* This is an example from the paper "Formal Semantics of Heterogeneous CUDA-C:
3* A Modular Approach with Applications" by Chris Hathhorn et al.
4*/
5
6#include <stdio.h>
7#include <cuda.h>
8
9#ifdef _CIVL
10$input int N;
11$input int N_B;
12$assume(1 <= N && N <= N_B);
13$input int NBLOCKS;
14$input int NBLOCKS_B;
15$assume(1 <= NBLOCKS && NBLOCKS <= NBLOCKS_B);
16$assume(NBLOCKS <= N);
17$assume(N % NBLOCKS == 0);
18$assume(N % 2 == 0);
19$assume(NBLOCKS % 2 == 0);
20#else
21#define N 8
22#define NBLOCKS 4
23#endif
24#define NTHREADS (N/NBLOCKS)
25
26__global__ void sum(int* in, int* out) {
27 extern __shared__ int shared[];
28 int i, tid = threadIdx.x,
29 bid = blockIdx.x,
30 bdim = blockDim.x;
31
32 shared[tid] = in[bid * bdim + tid];
33
34 __syncthreads();
35 if(tid < bdim/2) {
36 shared[tid] += shared[bdim/2 + tid];
37 }
38 __syncthreads();
39 if(tid == 0) {
40 for (i = 1; i != (bdim/2) + (bdim%2); ++i) {
41 shared[0] += shared[i];
42 }
43 out[bid] = shared[0];
44 }
45}
46
47int main(void) {
48
49 int i, *dev_in, *dev_out, host[N];
50#ifdef _CIVL
51 int seqSum = 0;
52#endif
53
54 printf("INPUT: ");
55 for(i = 0; i != N; ++i) {
56 host[i] = (21*i + 29) % 100;
57#ifdef _CIVL
58 seqSum += host[i];
59#endif
60 printf(" %d ", host[i]);
61 }
62 printf("\n");
63
64 cudaMalloc(&dev_in, N * sizeof(int));
65 cudaMalloc(&dev_out, NBLOCKS * sizeof(int));
66
67 cudaMemcpy(dev_in, host, N * sizeof(int),
68 cudaMemcpyHostToDevice);
69 sum<<<NBLOCKS, NTHREADS, NTHREADS * sizeof(int)>>>(
70 dev_in, dev_out);
71 sum<<<1, NBLOCKS, NBLOCKS * sizeof(int)>>>(
72 dev_out, dev_out);
73 cudaMemcpy(host, dev_out, sizeof(int),
74 cudaMemcpyDeviceToHost);
75 cudaDeviceSynchronize();
76
77 printf("OUTPUT: %u\n", *host);
78#ifdef _CIVL
79 $assert(*host == seqSum);
80#endif
81
82 cudaFree(dev_in);
83 cudaFree(dev_out);
84 return 0;
85}
86
Note: See TracBrowser for help on using the repository browser.