source: CIVL/examples/cuda/cuda-helper.cvh@ 5c27aa5

1.23 2.0 main test-branch
Last change on this file since 5c27aa5 was e3151da, checked in by Ziqing Luo <ziqing@…>, 11 years ago

re-organized example directory

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@1763 fb995dde-84ed-4084-dfe6-e5aef3e2452c

  • Property mode set to 100644
File size: 7.8 KB
Line 
1/* This header file contains useful helper functions for manipulating
2 * the CIVL versions of various Cuda objects.
3 */
4#ifdef __CUDA_HELPER__
5#else
6#define __CUDA_HELPER__
7
8#include "civlc.cvh"
9#include "cuda-types.cvh"
10#include <string.h>
11#include <stdio.h>
12
13/* Computes the one dimensional index of a grid cell at a given location
14 * in a three dimensional grid of a given size
15 */
16int _index (dim3 size, uint3 location) {
17 return location.x + size.x * (location.y + size.y * location.z);
18}
19
20/* Lifts a single integer x into a three dimensional vector representing
21 * a one dimensional grid of length x
22 */
23dim3 _toDim3(int x) {
24 dim3 d = { x, 1, 1 };
25
26 return d;
27}
28
29/* Given a three dimensional vector representing a grid of size dim,
30 * create and destroy a process, in parallel, for each cell in the grid.
31 * The location of the cell is passed to the spawning function.
32 */
33void _runProcs(dim3 dim, void spawningFunction(uint3)) {
34/*/
35 $range rx = 0 .. dim.x;
36 $range ry = 0 .. dim.y;
37 $range rz = 0 .. dim.z;
38 $domain(3) dom = ($domain){rx, ry, rz};
39 $parfor(int x,y,z : dom){
40 uint3 id = { x, y, z };
41 spawningFunction(id);
42 }
43/*/
44 $proc procs[dim.x][dim.y][dim.z];
45 for (int x = 0; x < dim.x; x++) {
46 for (int y = 0; y < dim.y; y++) {
47 for (int z = 0; z < dim.z; z++) {
48 uint3 id = { x, y, z };
49 procs[x][y][z] = $spawn spawningFunction(id);
50 }
51
52 }
53 }
54 for (int x = 0; x < dim.x; x++) {
55 for (int y = 0; y < dim.y; y++) {
56 for (int z = 0; z < dim.z; z++) {
57 $wait(procs[x][y][z]);
58 }
59 }
60 }
61//*/
62}
63
64// ------------------------------------------------
65
66/* $wait on a given process is it is non-null
67 */
68void _tryWait($proc p) {
69 if (p != $proc_null)
70 $wait(p);
71}
72
73/* The current state of the GPU
74 */
75_cudaContext _context = {
76 .headNode = NULL,
77 .nullStream = NULL,
78 .numStreams = 0
79};
80
81/* malloc and initialize a new _kernelInstance
82 */
83_kernelInstance *_kernelInstanceCreate() {
84 //printf("mallocing kernel instance\n");
85 _kernelInstance *i = (_kernelInstance*)$malloc($root, sizeof(_kernelInstance));
86
87 i->process = $proc_null;
88 i->status = _kernelStatusWaiting;
89 return i;
90}
91
92/* cleanup and free a given _kernelInstance
93 */
94void _kernelInstanceDestroy(_kernelInstance *i) {
95 _tryWait(i->process);
96 //printf("freeing kernel instance\n");
97 $free(i);
98}
99
100/* malloc and initialize a new _kernelInstanceNode
101 */
102_kernelInstanceNode *_kernelInstanceNodeCreate() {
103 //printf("mallocing kernel instance node\n");
104 _kernelInstanceNode *node = (_kernelInstanceNode*)$malloc($root, sizeof(_kernelInstanceNode));
105
106 node->instance = NULL;
107 node->next = NULL;
108 return node;
109}
110
111/* cleanup and free a given _kernelInstanceNode
112 */
113void _kernelInstanceNodeDestroy(_kernelInstanceNode *node) {
114 _kernelInstanceDestroy(node->instance);
115 //printf("freeing kernel instance node\n");
116 $free(node);
117}
118
119/* malloc and initialize a new stream
120 */
121cudaStream_t _streamCreate() {
122 cudaStream_t s;
123
124 //printf("mallocing cuda stream\n");
125 s = (cudaStream_t)$malloc($root, sizeof(_CUstream));
126 s->mostRecent = _kernelInstanceNodeCreate();
127 s->mostRecent->instance = _kernelInstanceCreate();
128 s->mostRecent->instance->status = _kernelStatusFinished;
129 s->usable = $true;
130 return s;
131}
132
133/* block until the most recently enqueued process on the given stream
134 * has terminated (meaning all kernels in that stream have completed)
135 */
136void _streamWait(cudaStream_t s) {
137 _kernelInstance *mostRecentInstance = s->mostRecent->instance;
138
139 $when (mostRecentInstance->status == _kernelStatusFinished) ;
140}
141
142/* block until no more streams have kernels executing
143 */
144void _streamWaitAll() {
145 _cudaStreamNode *curNode = _context.headNode;
146
147 while (curNode != NULL) {
148 _streamWait(curNode->stream);
149 curNode = curNode->next;
150 }
151}
152
153/* cleanup and free a given stream
154 */
155void _streamDestroy(cudaStream_t s) {
156 _kernelInstanceNode *curNode = s->mostRecent;
157 _kernelInstanceNode *nextNode;
158
159 while (curNode != NULL) {
160 nextNode = curNode->next;
161 _kernelInstanceNodeDestroy(curNode);
162 curNode = nextNode;
163 }
164 //printf("freeing cuda stream\n");
165 $free(s);
166}
167
168/* malloc and initialize a new _cudaStreamNode
169 */
170_cudaStreamNode *_streamNodeCreate() {
171 //printf("mallocing cuda stream node\n");
172 _cudaStreamNode *node = (_cudaStreamNode*)$malloc($root, sizeof(_cudaStreamNode));
173
174 node->stream = NULL;
175 node->next = NULL;
176 return node;
177}
178
179/* cleanup and free a given _cudaStreamNode
180 */
181void _streamNodeDestroy(_cudaStreamNode *node) {
182 $assert(!node->stream->usable);
183 _streamDestroy(node->stream);
184 //printf("freeing cuda stream node\n");
185 $free(node);
186}
187
188/* destroy all stream nodes contained in the context
189 */
190void _streamNodeDestroyAll() {
191 _cudaStreamNode *curNode = _context.headNode;
192 _cudaStreamNode *nextNode;
193
194 while (curNode != NULL) {
195 nextNode = curNode->next;
196 _streamNodeDestroy(curNode);
197 curNode = nextNode;
198 }
199 _context.headNode = NULL;
200}
201
202/* malloc and initialize a new event
203 */
204cudaEvent_t _eventCreate() {
205 //printf("mallocing event\n");
206 cudaEvent_t e = (cudaEvent_t)$malloc($root, sizeof(_CUevent));
207
208 e->numInstances = 0;
209 e->instances = NULL;
210 return e;
211}
212
213/* block until all _kernelInstances contained in this event have
214 * completed
215 */
216void _eventWait(cudaEvent_t e) {
217 for (int i = 0; i < e->numInstances; i++) {
218 $when (e->instances[i]->status == _kernelStatusFinished) ;
219 }
220}
221
222/* cleanup and free a given event
223 */
224void _eventDestroy(cudaEvent_t e) {
225 if (e->instances != NULL) {
226 //printf("freeing instance list a\n");
227 $free(e->instances);
228 }
229 //printf("freeing event\n");
230 $free(e);
231}
232
233/* initialize the cuda context. must be called before any cuda functions.
234 */
235void _cudaInit() {
236 _context.nullStream = _streamCreate();
237}
238
239/* cleanup the cuda context. must be called after all cuda functions.
240 */
241void _cudaFinalize() {
242 _streamWaitAll();
243 _streamWait(_context.nullStream);
244 _streamNodeDestroyAll();
245 _streamDestroy(_context.nullStream);
246}
247
248/* returns an array of pointers to the most recently enqueued kernel
249 * of each stream.
250 */
251_kernelInstance **_allMostRecentKernels() {
252 int n = _context.numStreams + 1;
253 _cudaStreamNode *curNode = _context.headNode;
254 //printf("mallocing instance list a\n");
255 _kernelInstance **insts = (_kernelInstance**)$malloc($root, n * sizeof(_kernelInstance*)) ;
256
257 insts[0] = _context.nullStream->mostRecent->instance;
258 for (int i = 1; i < n; i++, curNode = curNode->next) {
259 insts[i] = curNode->stream->mostRecent->instance;
260 }
261 return insts;
262}
263
264/* create a kernel instance for the given function k, and enqueue it
265 * onto the given stream.
266 */
267void _enqueueKernel(cudaStream_t stream, void (*k)(_kernelInstance*, cudaEvent_t)) {
268 cudaStream_t s;
269 cudaEvent_t e = _eventCreate();
270 _kernelInstanceNode *newNode = _kernelInstanceNodeCreate();
271
272 if (stream == NULL) {
273 e->numInstances = _context.numStreams + 1;
274 e->instances = _allMostRecentKernels();
275 s = _context.nullStream;
276 } else {
277 e->numInstances = 2;
278 //printf("mallocing instance list b\n");
279 e->instances = (_kernelInstance**)$malloc($root, 2 * sizeof(_kernelInstance*)) ;
280 e->instances[0] = stream->mostRecent->instance;
281 e->instances[1] = _context.nullStream->mostRecent->instance;
282 s = stream;
283 }
284 $assert(s->usable);
285 newNode->instance = _kernelInstanceCreate();
286 newNode->next = s->mostRecent;
287 s->mostRecent = newNode;
288 s->mostRecent->instance->process = $spawn k(s->mostRecent->instance, e);
289}
290
291/* called by kernel processes. wait on the given event, then update
292 * the status of the calling kernel to indicate it has finished waiting
293 */
294void _waitInQueue (_kernelInstance *this, cudaEvent_t e) {
295 _eventWait(e);
296 _eventDestroy(e);
297 this->status = _kernelStatusRunning;
298}
299
300/* called by kernel processes. update the status of the calling kernel
301 * to indicate that it has completed execution
302 */
303void _kernelFinish(_kernelInstance *k) {
304 k->status = _kernelStatusFinished;
305}
306
307#endif
Note: See TracBrowser for help on using the repository browser.