source: CIVL/include/headers/civl-cuda.cvh@ 1aaefd4

main test-branch
Last change on this file since 1aaefd4 was ea777aa, checked in by Alex Wilton <awilton@…>, 3 years ago

Moved examples, include, build_default.properties, common.xml, and README out from dev.civl.com into the root of the repo.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@5704 fb995dde-84ed-4084-dfe6-e5aef3e2452c

  • Property mode set to 100644
File size: 10.5 KB
Line 
1/* This header file contains useful helper functions for manipulating
2 * the CIVL versions of various Cuda objects.
3 */
4
5#ifndef _CUDA_HELPER_
6#define _CUDA_HELPER_
7
8#include <seq.cvh>
9#include <mem.cvh>
10#include <concurrency.cvh>
11#include <comm.cvh>
12
13/* used to represent the size of three dimensional grids
14 */
15typedef struct {
16 unsigned int x, y, z;
17} dim3;
18
19/* used to represent a location in a three dimensional grid
20 */
21typedef struct {
22 unsigned int x, y, z;
23} uint3;
24
25/* a flag type used to indicate the type of memory transfer to occur
26 * in a call to cudaMemcpy
27 */
28enum cudaMemcpyKind {
29 cudaMemcpyHostToHost,
30 cudaMemcpyHostToDevice,
31 cudaMemcpyDeviceToHost,
32 cudaMemcpyDeviceToDevice,
33 cudaMemcpyDefault
34};
35
36/* the type returned by all Cuda functions
37 */
38enum cudaError {
39 cudaSuccess,
40 cudaErrorMissingConfiguration,
41 cudaErrorMemoryAllocation,
42 cudaErrorInitializationError,
43 cudaErrorLaunchFailure,
44 cudaErrorPriorLaunchFailure,
45 cudaErrorLaunchTimeout,
46 cudaErrorLaunchOutOfResources,
47 cudaErrorInvalidDeviceFunction,
48 cudaErrorInvalidConfiguration,
49 cudaErrorInvalidDevice,
50 cudaErrorInvalidValue,
51 cudaErrorInvalidPitchValue,
52 cudaErrorInvalidSymbol,
53 cudaErrorMapBufferObjectFailed,
54 cudaErrorUnmapBufferObjectFailed,
55 cudaErrorInvalidHostPointer,
56 cudaErrorInvalidDevicePointer,
57 cudaErrorInvalidTexture,
58 cudaErrorInvalidTextureBinding,
59 cudaErrorInvalidChannelDescriptor,
60 cudaErrorInvalidMemcpyDirection,
61 cudaErrorAddressOfConstant,
62 cudaErrorTextureFetchFailed,
63 cudaErrorTextureNotBound,
64 cudaErrorSynchronizationError,
65 cudaErrorInvalidFilterSetting,
66 cudaErrorInvalidNormSetting,
67 cudaErrorMixedDeviceExecution,
68 cudaErrorCudartUnloading,
69 cudaErrorUnknown,
70 cudaErrorNotYetImplemented,
71 cudaErrorMemoryValueTooLarge,
72 cudaErrorInvalidResourceHandle,
73 cudaErrorNotReady,
74 cudaErrorInsufficientDriver,
75 cudaErrorSetOnActiveProcess,
76 cudaErrorInvalidSurface,
77 cudaErrorNoDevice,
78 cudaErrorECCUncorrectable,
79 cudaErrorSharedObjectSymbolNotFound,
80 cudaErrorSharedObjectInitFailed,
81 cudaErrorUnsupportedLimit,
82 cudaErrorDuplicateVariableName,
83 cudaErrorDuplicateTextureName,
84 cudaErrorDuplicateSurfaceName,
85 cudaErrorDevicesUnavailable,
86 cudaErrorInvalidKernelImage,
87 cudaErrorNoKernelImageForDevice,
88 cudaErrorIncompatibleDriverContext,
89 cudaErrorPeerAccessAlreadyEnabled,
90 cudaErrorPeerAccessNotEnabled,
91 cudaErrorDeviceAlreadyInUse,
92 cudaErrorProfilerDisabled,
93 cudaErrorProfilerNotInitialized,
94 cudaErrorProfilerAlreadyStarted,
95 cudaErrorProfilerAlreadyStopped,
96 cudaErrorAssert,
97 cudaErrorTooManyPeers,
98 cudaErrorHostMemoryAlreadyRegistered,
99 cudaErrorHostMemoryNotRegistered,
100 cudaErrorOperatingSystem,
101 cudaErrorStartupFailure,
102 cudaErrorApiFailureBase
103};
104typedef enum cudaError cudaError_t;
105
106/* struct representing the properties of a Cuda device
107 */
108typedef struct cudaDeviceProp {
109 char name[256];
110 size_t totalGlobalMem;
111 size_t sharedMemPerBlock;
112 int regsPerBlock;
113 int warpSize;
114 size_t memPitch;
115 int maxThreadsPerBlock;
116 int maxThreadsDim[3];
117 int maxGridSize[3];
118 int clockRate;
119 size_t totalConstMem;
120 int major;
121 int minor;
122 size_t textureAlignment;
123 size_t texturePitchAlignment;
124 int deviceOverlap;
125 int multiProcessorCount;
126 int kernelExecTimeoutEnabled;
127 int integrated;
128 int canMapHostMemory;
129 int computeMode;
130 int maxTexture1D;
131 int maxTexture1DLinear;
132 int maxTexture2D[2];
133 int maxTexture2DLinear[3];
134 int maxTexture2DGather[2];
135 int maxTexture3D[3];
136 int maxTextureCubemap;
137 int maxTexture1DLayered[2];
138 int maxTexture2DLayered[3];
139 int maxTextureCubemapLayered[2];
140 int maxSurface1D;
141 int maxSurface2D[2];
142 int maxSurface3D[3];
143 int maxSurface1DLayered[2];
144 int maxSurface2DLayered[3];
145 int maxSurfaceCubemap;
146 int maxSurfaceCubemapLayered[2];
147 size_t surfaceAlignment;
148 int concurrentKernels;
149 int ECCEnabled;
150 int pciBusID;
151 int pciDeviceID;
152 int pciDomainID;
153 int tccDriver;
154 int asyncEngineCount;
155 int unifiedAddressing;
156 int memoryClockRate;
157 int memoryBusWidth;
158 int l2CacheSize;
159 int maxThreadsPerMultiProcessor;
160} cudaDeviceProp;
161
162/* flag type used to represent the status of a kernel instance
163 */
164typedef enum $cuda_kernel_status {
165 $cuda_kernel_status_waiting,
166 $cuda_kernel_status_running,
167 $cuda_kernel_status_finished
168} $cuda_kernel_status;
169
170/* type used to represent an instance of a Cuda kernel
171 */
172typedef struct $cuda_kernel_instance $cuda_kernel_instance_t;
173
174$cuda_kernel_status $cuda_get_status($cuda_kernel_instance_t*);
175
176/* a type that wraps a kernel instance for insertion into a list
177 */
178typedef struct $cuda_kernel_instance_node $cuda_kernel_instance_node_t;
179
180/* $cuda_kernel_instance_node_t interface
181 */
182$cuda_kernel_instance_t *$cuda_get_instance($cuda_kernel_instance_node_t*);
183
184/* a type used to represent a Cuda stream
185 */
186typedef struct _CUstream _CUstream;
187typedef _CUstream* cudaStream_t;
188
189/* _CUstream interface
190 */
191$cuda_kernel_instance_node_t *$cuda_get_most_recent(cudaStream_t);
192
193_Bool $cuda_is_usable(cudaStream_t);
194
195void $cuda_set_usable(cudaStream_t, _Bool);
196
197/* a type that wraps a stream for insertion into a list
198 */
199typedef struct $cuda_stream_node $cuda_stream_node_t;
200
201/* $cuda_stream_node_t interface
202 */
203void $cuda_set_stream($cuda_stream_node_t*, cudaStream_t);
204
205void $cuda_set_next($cuda_stream_node_t*, $cuda_stream_node_t*);
206
207/* a type used to represent a Cuda event
208 */
209typedef struct _CUevent _CUevent;
210typedef _CUevent* cudaEvent_t;
211
212/* _CUevent interface
213 */
214$cuda_kernel_instance_t **$cuda_get_instances(cudaEvent_t);
215
216void $cuda_set_instances(cudaEvent_t, $cuda_kernel_instance_t**, int);
217
218int $cuda_get_num_instances(cudaEvent_t);
219
220
221/* a type representing the state of a Cuda device
222 */
223typedef struct $cuda_context $cuda_context_t;
224
225/* $cuda_context_t interface
226 */
227int $cuda_get_num_streams($cuda_context_t*);
228
229$cuda_stream_node_t *$cuda_get_head_node($cuda_context_t*);
230
231cudaStream_t $cuda_get_null_stream($cuda_context_t*);
232
233void $cuda_add_new_stream($cuda_context_t*, $cuda_stream_node_t*);
234
235
236/* Computes the one dimensional index of a grid cell at a given location
237 * in a three dimensional grid of a given size
238 */
239int $cuda_index (dim3 size, uint3 location);
240
241/* Compues the one dimensional index of a specific thread in the grid given the
242 * grid dimension, block dimension, block index, and thread index
243 */
244int $cuda_kernel_index (dim3 gDim, dim3 bDim, uint3 bIdx, uint3 tIdx);
245
246/* Lifts a single integer x into a three dimensional vector representing
247 * a one dimensional grid of length x
248 */
249dim3 $cuda_to_dim3(int x);
250
251/* Given a three dimensional vector representing a grid of size dim,
252 * create and destroy a process, in parallel, for each cell in the grid.
253 * The location of the cell is passed to the spawning function.
254 */
255void $cuda_run_procs(dim3 dim, void spawningFunction(uint3));
256
257// ------------------------------------------------
258
259/* $wait on a given process is it is non-null
260 */
261void $cuda_try_wait($proc p);
262
263/* The current state of the GPU
264 */
265$cuda_context_t $cuda_current_context;
266
267/* malloc and initialize a new $cuda_kernel_instance_t
268 */
269$cuda_kernel_instance_t *$cuda_kernel_instance_create(dim3 gDim, dim3 bDim);
270
271/* cleanup and free a given $cuda_kernel_instance_t
272 */
273void $cuda_kernel_instance_destroy($cuda_kernel_instance_t *i);
274
275/* malloc and initialize a new $cuda_kernel_instance_node_t
276 */
277$cuda_kernel_instance_node_t *$cuda_kernel_instance_node_tCreate(void);
278
279/* cleanup and free a given $cuda_kernel_instance_node_t
280 */
281void $cuda_kernel_instance_node_destroy($cuda_kernel_instance_node_t *node);
282
283/* malloc and initialize a new stream
284 */
285cudaStream_t $cuda_stream_create(void);
286
287/* block until the most recently enqueued process on the given stream
288 * has terminated (meaning all kernels in that stream have completed)
289 */
290void $cuda_stream_wait(cudaStream_t s);
291
292/* block until no more streams have kernels executing
293 */
294void $cuda_stream_wait_all(void);
295
296/* cleanup and free a given stream
297 */
298void $cuda_stream_destroy(cudaStream_t s);
299
300/* malloc and initialize a new $cuda_stream_node_t
301 */
302$cuda_stream_node_t *$cuda_stream_node_create(void);
303
304/* cleanup and free a given $cuda_stream_node_t
305 */
306void $cuda_stream_node_destroy($cuda_stream_node_t *node);
307
308/* destroy all stream nodes contained in the context
309 */
310void $cuda_stream_node_destroy_all(void);
311
312/* malloc and initialize a new event
313 */
314cudaEvent_t $cuda_event_create(void);
315
316/* block until all $cuda_kernel_instance_ts contained in this event have
317 * completed
318 */
319void $cuda_event_wait(cudaEvent_t e);
320
321/* cleanup and free a given event
322 */
323void $cuda_event_destroy(cudaEvent_t e);
324
325/* initialize the cuda context. must be called before any cuda functions.
326 */
327void $cuda_init(void);
328
329/* cleanup the cuda context. must be called after all cuda functions.
330 */
331void $cuda_finalize(void);
332
333/* returns an array of pointers to the most recently enqueued kernel
334 * of each stream.
335 */
336$cuda_kernel_instance_t **$cuda_all_most_recent_kernels(void);
337
338/* create a kernel instance for the given function k, and enqueue it
339 * onto the given stream.
340 */
341void $cuda_enqueue_kernel(cudaStream_t stream, void (*k)($cuda_kernel_instance_t*, cudaEvent_t), dim3 gDim,
342dim3 bDim);
343
344/* called by kernel processes. wait on the given event, then update
345 * the status of the calling kernel to indicate it has finished waiting
346 */
347void $cuda_wait_in_queue ($cuda_kernel_instance_t *this, cudaEvent_t e);
348
349/* called by kernel processes. update the status of the calling kernel
350 * to indicate that it has completed execution
351 */
352void $cuda_kernel_finish($cuda_kernel_instance_t *k);
353
354/* A barrier wrapper around barrier call that checks data races
355 */
356void $cuda_barrier($cuda_kernel_instance_t *k, int kernel_id, $barrier g);
357
358/* Checks data races
359 */
360$atomic_f void $check_data_race($cuda_kernel_instance_t *k, int cur_tid);
361
362/* Clears read and write memory sets of the given thread
363*/
364void $clear_mem_sets($cuda_kernel_instance_t *k, int cur_tid);
365
366void $clear_all_mem_sets($cuda_kernel_instance_t *k);
367
368/* Publishes current read a write sets to global arrays. Local sets are not cleared
369*/
370void $publish($cuda_kernel_instance_t *k, int cur_tid);
371
372int _cuda__shfl_sync(unsigned mask, int var, int srcLane, int width, int numThreads, int tid, $comm comm, $gbarrier* warpBarriers);
373int _cuda__shfl_up_sync(unsigned mask, int var, unsigned int delta, int width, int numThreads, int tid, $comm comm, $gbarrier* warpBarriers);
374int _cuda__shfl_down_sync(unsigned mask, int var, unsigned int delta, int width, int numThreads, int tid, $comm comm, $gbarrier* warpBarriers);
375int _cuda__shfl_xor_sync(unsigned mask, int var, int laneMask, int width, int numThreads, int tid, $comm comm, $gbarrier* warpBarriers);
376
377#endif
378
Note: See TracBrowser for help on using the repository browser.