Context Navigation

civl-cuda.cvh@ 1aaefd4

main test-branch

Last change on this file since 1aaefd4 was ea777aa, checked in by Alex Wilton <awilton@…>, 3 years ago

Moved examples, include, build_default.properties, common.xml, and README out from dev.civl.com into the root of the repo.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@5704 fb995dde-84ed-4084-dfe6-e5aef3e2452c

Property mode set to 100644

File size: 10.5 KB

Line
1	/* This header file contains useful helper functions for manipulating
2	* the CIVL versions of various Cuda objects.
3	*/
4
5	#ifndef _CUDA_HELPER_
6	#define _CUDA_HELPER_
7
8	#include <seq.cvh>
9	#include <mem.cvh>
10	#include <concurrency.cvh>
11	#include <comm.cvh>
12
13	/* used to represent the size of three dimensional grids
14	*/
15	typedef struct {
16	unsigned int x, y, z;
17	} dim3;
18
19	/* used to represent a location in a three dimensional grid
20	*/
21	typedef struct {
22	unsigned int x, y, z;
23	} uint3;
24
25	/* a flag type used to indicate the type of memory transfer to occur
26	* in a call to cudaMemcpy
27	*/
28	enum cudaMemcpyKind {
29	cudaMemcpyHostToHost,
30	cudaMemcpyHostToDevice,
31	cudaMemcpyDeviceToHost,
32	cudaMemcpyDeviceToDevice,
33	cudaMemcpyDefault
34	};
35
36	/* the type returned by all Cuda functions
37	*/
38	enum cudaError {
39	cudaSuccess,
40	cudaErrorMissingConfiguration,
41	cudaErrorMemoryAllocation,
42	cudaErrorInitializationError,
43	cudaErrorLaunchFailure,
44	cudaErrorPriorLaunchFailure,
45	cudaErrorLaunchTimeout,
46	cudaErrorLaunchOutOfResources,
47	cudaErrorInvalidDeviceFunction,
48	cudaErrorInvalidConfiguration,
49	cudaErrorInvalidDevice,
50	cudaErrorInvalidValue,
51	cudaErrorInvalidPitchValue,
52	cudaErrorInvalidSymbol,
53	cudaErrorMapBufferObjectFailed,
54	cudaErrorUnmapBufferObjectFailed,
55	cudaErrorInvalidHostPointer,
56	cudaErrorInvalidDevicePointer,
57	cudaErrorInvalidTexture,
58	cudaErrorInvalidTextureBinding,
59	cudaErrorInvalidChannelDescriptor,
60	cudaErrorInvalidMemcpyDirection,
61	cudaErrorAddressOfConstant,
62	cudaErrorTextureFetchFailed,
63	cudaErrorTextureNotBound,
64	cudaErrorSynchronizationError,
65	cudaErrorInvalidFilterSetting,
66	cudaErrorInvalidNormSetting,
67	cudaErrorMixedDeviceExecution,
68	cudaErrorCudartUnloading,
69	cudaErrorUnknown,
70	cudaErrorNotYetImplemented,
71	cudaErrorMemoryValueTooLarge,
72	cudaErrorInvalidResourceHandle,
73	cudaErrorNotReady,
74	cudaErrorInsufficientDriver,
75	cudaErrorSetOnActiveProcess,
76	cudaErrorInvalidSurface,
77	cudaErrorNoDevice,
78	cudaErrorECCUncorrectable,
79	cudaErrorSharedObjectSymbolNotFound,
80	cudaErrorSharedObjectInitFailed,
81	cudaErrorUnsupportedLimit,
82	cudaErrorDuplicateVariableName,
83	cudaErrorDuplicateTextureName,
84	cudaErrorDuplicateSurfaceName,
85	cudaErrorDevicesUnavailable,
86	cudaErrorInvalidKernelImage,
87	cudaErrorNoKernelImageForDevice,
88	cudaErrorIncompatibleDriverContext,
89	cudaErrorPeerAccessAlreadyEnabled,
90	cudaErrorPeerAccessNotEnabled,
91	cudaErrorDeviceAlreadyInUse,
92	cudaErrorProfilerDisabled,
93	cudaErrorProfilerNotInitialized,
94	cudaErrorProfilerAlreadyStarted,
95	cudaErrorProfilerAlreadyStopped,
96	cudaErrorAssert,
97	cudaErrorTooManyPeers,
98	cudaErrorHostMemoryAlreadyRegistered,
99	cudaErrorHostMemoryNotRegistered,
100	cudaErrorOperatingSystem,
101	cudaErrorStartupFailure,
102	cudaErrorApiFailureBase
103	};
104	typedef enum cudaError cudaError_t;
105
106	/* struct representing the properties of a Cuda device
107	*/
108	typedef struct cudaDeviceProp {
109	char name[256];
110	size_t totalGlobalMem;
111	size_t sharedMemPerBlock;
112	int regsPerBlock;
113	int warpSize;
114	size_t memPitch;
115	int maxThreadsPerBlock;
116	int maxThreadsDim[3];
117	int maxGridSize[3];
118	int clockRate;
119	size_t totalConstMem;
120	int major;
121	int minor;
122	size_t textureAlignment;
123	size_t texturePitchAlignment;
124	int deviceOverlap;
125	int multiProcessorCount;
126	int kernelExecTimeoutEnabled;
127	int integrated;
128	int canMapHostMemory;
129	int computeMode;
130	int maxTexture1D;
131	int maxTexture1DLinear;
132	int maxTexture2D[2];
133	int maxTexture2DLinear[3];
134	int maxTexture2DGather[2];
135	int maxTexture3D[3];
136	int maxTextureCubemap;
137	int maxTexture1DLayered[2];
138	int maxTexture2DLayered[3];
139	int maxTextureCubemapLayered[2];
140	int maxSurface1D;
141	int maxSurface2D[2];
142	int maxSurface3D[3];
143	int maxSurface1DLayered[2];
144	int maxSurface2DLayered[3];
145	int maxSurfaceCubemap;
146	int maxSurfaceCubemapLayered[2];
147	size_t surfaceAlignment;
148	int concurrentKernels;
149	int ECCEnabled;
150	int pciBusID;
151	int pciDeviceID;
152	int pciDomainID;
153	int tccDriver;
154	int asyncEngineCount;
155	int unifiedAddressing;
156	int memoryClockRate;
157	int memoryBusWidth;
158	int l2CacheSize;
159	int maxThreadsPerMultiProcessor;
160	} cudaDeviceProp;
161
162	/* flag type used to represent the status of a kernel instance
163	*/
164	typedef enum $cuda_kernel_status {
165	$cuda_kernel_status_waiting,
166	$cuda_kernel_status_running,
167	$cuda_kernel_status_finished
168	} $cuda_kernel_status;
169
170	/* type used to represent an instance of a Cuda kernel
171	*/
172	typedef struct $cuda_kernel_instance $cuda_kernel_instance_t;
173
174	$cuda_kernel_status $cuda_get_status($cuda_kernel_instance_t*);
175
176	/* a type that wraps a kernel instance for insertion into a list
177	*/
178	typedef struct $cuda_kernel_instance_node $cuda_kernel_instance_node_t;
179
180	/* $cuda_kernel_instance_node_t interface
181	*/
182	$cuda_kernel_instance_t $cuda_get_instance($cuda_kernel_instance_node_t);
183
184	/* a type used to represent a Cuda stream
185	*/
186	typedef struct _CUstream _CUstream;
187	typedef _CUstream* cudaStream_t;
188
189	/* _CUstream interface
190	*/
191	$cuda_kernel_instance_node_t *$cuda_get_most_recent(cudaStream_t);
192
193	_Bool $cuda_is_usable(cudaStream_t);
194
195	void $cuda_set_usable(cudaStream_t, _Bool);
196
197	/* a type that wraps a stream for insertion into a list
198	*/
199	typedef struct $cuda_stream_node $cuda_stream_node_t;
200
201	/* $cuda_stream_node_t interface
202	*/
203	void $cuda_set_stream($cuda_stream_node_t*, cudaStream_t);
204
205	void $cuda_set_next($cuda_stream_node_t, $cuda_stream_node_t);
206
207	/* a type used to represent a Cuda event
208	*/
209	typedef struct _CUevent _CUevent;
210	typedef _CUevent* cudaEvent_t;
211
212	/* _CUevent interface
213	*/
214	$cuda_kernel_instance_t **$cuda_get_instances(cudaEvent_t);
215
216	void $cuda_set_instances(cudaEvent_t, $cuda_kernel_instance_t**, int);
217
218	int $cuda_get_num_instances(cudaEvent_t);
219
220
221	/* a type representing the state of a Cuda device
222	*/
223	typedef struct $cuda_context $cuda_context_t;
224
225	/* $cuda_context_t interface
226	*/
227	int $cuda_get_num_streams($cuda_context_t*);
228
229	$cuda_stream_node_t $cuda_get_head_node($cuda_context_t);
230
231	cudaStream_t $cuda_get_null_stream($cuda_context_t*);
232
233	void $cuda_add_new_stream($cuda_context_t, $cuda_stream_node_t);
234
235
236	/* Computes the one dimensional index of a grid cell at a given location
237	* in a three dimensional grid of a given size
238	*/
239	int $cuda_index (dim3 size, uint3 location);
240
241	/* Compues the one dimensional index of a specific thread in the grid given the
242	* grid dimension, block dimension, block index, and thread index
243	*/
244	int $cuda_kernel_index (dim3 gDim, dim3 bDim, uint3 bIdx, uint3 tIdx);
245
246	/* Lifts a single integer x into a three dimensional vector representing
247	* a one dimensional grid of length x
248	*/
249	dim3 $cuda_to_dim3(int x);
250
251	/* Given a three dimensional vector representing a grid of size dim,
252	* create and destroy a process, in parallel, for each cell in the grid.
253	* The location of the cell is passed to the spawning function.
254	*/
255	void $cuda_run_procs(dim3 dim, void spawningFunction(uint3));
256
257	// ------------------------------------------------
258
259	/* $wait on a given process is it is non-null
260	*/
261	void $cuda_try_wait($proc p);
262
263	/* The current state of the GPU
264	*/
265	$cuda_context_t $cuda_current_context;
266
267	/* malloc and initialize a new $cuda_kernel_instance_t
268	*/
269	$cuda_kernel_instance_t *$cuda_kernel_instance_create(dim3 gDim, dim3 bDim);
270
271	/* cleanup and free a given $cuda_kernel_instance_t
272	*/
273	void $cuda_kernel_instance_destroy($cuda_kernel_instance_t *i);
274
275	/* malloc and initialize a new $cuda_kernel_instance_node_t
276	*/
277	$cuda_kernel_instance_node_t *$cuda_kernel_instance_node_tCreate(void);
278
279	/* cleanup and free a given $cuda_kernel_instance_node_t
280	*/
281	void $cuda_kernel_instance_node_destroy($cuda_kernel_instance_node_t *node);
282
283	/* malloc and initialize a new stream
284	*/
285	cudaStream_t $cuda_stream_create(void);
286
287	/* block until the most recently enqueued process on the given stream
288	* has terminated (meaning all kernels in that stream have completed)
289	*/
290	void $cuda_stream_wait(cudaStream_t s);
291
292	/* block until no more streams have kernels executing
293	*/
294	void $cuda_stream_wait_all(void);
295
296	/* cleanup and free a given stream
297	*/
298	void $cuda_stream_destroy(cudaStream_t s);
299
300	/* malloc and initialize a new $cuda_stream_node_t
301	*/
302	$cuda_stream_node_t *$cuda_stream_node_create(void);
303
304	/* cleanup and free a given $cuda_stream_node_t
305	*/
306	void $cuda_stream_node_destroy($cuda_stream_node_t *node);
307
308	/* destroy all stream nodes contained in the context
309	*/
310	void $cuda_stream_node_destroy_all(void);
311
312	/* malloc and initialize a new event
313	*/
314	cudaEvent_t $cuda_event_create(void);
315
316	/* block until all $cuda_kernel_instance_ts contained in this event have
317	* completed
318	*/
319	void $cuda_event_wait(cudaEvent_t e);
320
321	/* cleanup and free a given event
322	*/
323	void $cuda_event_destroy(cudaEvent_t e);
324
325	/* initialize the cuda context. must be called before any cuda functions.
326	*/
327	void $cuda_init(void);
328
329	/* cleanup the cuda context. must be called after all cuda functions.
330	*/
331	void $cuda_finalize(void);
332
333	/* returns an array of pointers to the most recently enqueued kernel
334	* of each stream.
335	*/
336	$cuda_kernel_instance_t **$cuda_all_most_recent_kernels(void);
337
338	/* create a kernel instance for the given function k, and enqueue it
339	* onto the given stream.
340	*/
341	void $cuda_enqueue_kernel(cudaStream_t stream, void (k)($cuda_kernel_instance_t, cudaEvent_t), dim3 gDim,
342	dim3 bDim);
343
344	/* called by kernel processes. wait on the given event, then update
345	* the status of the calling kernel to indicate it has finished waiting
346	*/
347	void $cuda_wait_in_queue ($cuda_kernel_instance_t *this, cudaEvent_t e);
348
349	/* called by kernel processes. update the status of the calling kernel
350	* to indicate that it has completed execution
351	*/
352	void $cuda_kernel_finish($cuda_kernel_instance_t *k);
353
354	/* A barrier wrapper around barrier call that checks data races
355	*/
356	void $cuda_barrier($cuda_kernel_instance_t *k, int kernel_id, $barrier g);
357
358	/* Checks data races
359	*/
360	$atomic_f void $check_data_race($cuda_kernel_instance_t *k, int cur_tid);
361
362	/* Clears read and write memory sets of the given thread
363	*/
364	void $clear_mem_sets($cuda_kernel_instance_t *k, int cur_tid);
365
366	void $clear_all_mem_sets($cuda_kernel_instance_t *k);
367
368	/* Publishes current read a write sets to global arrays. Local sets are not cleared
369	*/
370	void $publish($cuda_kernel_instance_t *k, int cur_tid);
371
372	int _cuda__shfl_sync(unsigned mask, int var, int srcLane, int width, int numThreads, int tid, $comm comm, $gbarrier* warpBarriers);
373	int _cuda__shfl_up_sync(unsigned mask, int var, unsigned int delta, int width, int numThreads, int tid, $comm comm, $gbarrier* warpBarriers);
374	int _cuda__shfl_down_sync(unsigned mask, int var, unsigned int delta, int width, int numThreads, int tid, $comm comm, $gbarrier* warpBarriers);
375	int _cuda__shfl_xor_sync(unsigned mask, int var, int laneMask, int width, int numThreads, int tid, $comm comm, $gbarrier* warpBarriers);
376
377	#endif
378

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format