Context Navigation

sum.cu

main

Last change on this file was ea777aa, checked in by Alex Wilton <awilton@…>, 3 years ago

Moved examples, include, build_default.properties, common.xml, and README out from dev.civl.com into the root of the repo.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@5704 fb995dde-84ed-4084-dfe6-e5aef3e2452c

Property mode set to 100644

File size: 2.0 KB

Line
1	/**
2	* This is an example from the paper "Formal Semantics of Heterogeneous CUDA-C:
3	* A Modular Approach with Applications" by Chris Hathhorn et al.
4	*/
5
6	#include <stdio.h>
7	#include <cuda.h>
8
9	#ifdef _CIVL
10	$input int N;
11	$input int N_B;
12	$assume(1 <= N && N <= N_B);
13	$input int NBLOCKS;
14	$input int NBLOCKS_B;
15	$assume(1 <= NBLOCKS && NBLOCKS <= NBLOCKS_B);
16	$assume(NBLOCKS <= N);
17	$assume(N % NBLOCKS == 0);
18	$assume(N % 2 == 0);
19	$assume(NBLOCKS % 2 == 0);
20	#else
21	#define N 8
22	#define NBLOCKS 4
23	#endif
24	#define NTHREADS (N/NBLOCKS)
25
26	__global__ void sum(int* in, int* out) {
27	//extern __shared__ int shared[];
28	__shared__ int shared[blockDim.x]; // commenting out the extern qualifier since
29	// it doen't have the definition,
30	// need to figure out if this is something special for cuda
31	int i, tid = threadIdx.x,
32	bid = blockIdx.x,
33	bdim = blockDim.x;
34
35	shared[tid] = in[bid * bdim + tid];
36
37	__syncthreads();
38	if(tid < bdim/2) {
39	shared[tid] += shared[bdim/2 + tid];
40	}
41	__syncthreads();
42	if(tid == 0) {
43	for (i = 1; i != (bdim/2) + (bdim%2); ++i) {
44	shared[0] += shared[i];
45	}
46	out[bid] = shared[0];
47	}
48	}
49
50	int main() {
51
52	int i, dev_in, dev_out, host[N];
53	#ifdef _CIVL
54	int seqSum = 0;
55	#endif
56
57	printf("INPUT: ");
58	for(i = 0; i != N; ++i) {
59	host[i] = (21*i + 29) % 100;
60	#ifdef _CIVL
61	seqSum += host[i];
62	#endif
63	printf(" %d ", host[i]);
64	}
65	printf("\n");
66
67	cudaMalloc(&dev_in, N * sizeof(int));
68	cudaMalloc(&dev_out, NBLOCKS * sizeof(int));
69
70	cudaMemcpy(dev_in, host, N * sizeof(int),
71	cudaMemcpyHostToDevice);
72	sum<<<NBLOCKS, NTHREADS, NTHREADS * sizeof(int)>>>(
73	dev_in, dev_out);
74	sum<<<1, NBLOCKS, NBLOCKS * sizeof(int)>>>(
75	dev_out, dev_out);
76	cudaMemcpy(host, dev_out, sizeof(int),
77	cudaMemcpyDeviceToHost);
78	cudaDeviceSynchronize();
79
80	printf("OUTPUT: %u\n", *host);
81	#ifdef _CIVL
82	$assert(*host == seqSum);
83	#endif
84
85	cudaFree(dev_in);
86	cudaFree(dev_out);
87	return 0;
88	}
89

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: CIVL/examples/cuda/sum.cu

Download in other formats: