Context Navigation

cuda.cvl@ 1aaefd4

main test-branch

Last change on this file since 1aaefd4 was ea777aa, checked in by Alex Wilton <awilton@…>, 3 years ago

Moved examples, include, build_default.properties, common.xml, and README out from dev.civl.com into the root of the repo.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@5704 fb995dde-84ed-4084-dfe6-e5aef3e2452c

Property mode set to 100644

File size: 12.9 KB

Line
1	#include <civlc.cvh>
2
3	/* Functions in this file are meant to serve as drop-in CIVL replacements
4	* for the Cuda function of the same name. Because of this, much of the
5	* documentation of these functions is identical to the documentation
6	* for its Cuda counterpart.
7	*/
8
9	#include <string.h>
10	#include <stdlib.h>
11	#include <civl-cuda.cvh>
12	#include <concurrency.cvh>
13	#include <cuda.h>
14
15	/* Returns in *count the number of devices with compute capability
16	* greater or equal to 1.0 that are available for execution.
17	*/
18	cudaError_t cudaGetDeviceCount(int *count) {
19	// possibly this should return an value specified as $input?
20	*count = 1;
21	return cudaSuccess;
22	}
23
24	/* Returns in *device the current device for the calling host thread
25	*/
26	cudaError_t cudaGetDevice(int * device) {
27	*device = 0;
28	return cudaSuccess;
29	}
30
31	/* Returns in *prop the properties of device dev
32	*/
33	cudaError_t cudaGetDeviceProperties(struct cudaDeviceProp * prop, int dev) {
34	if (dev > 1)
35	return cudaErrorInvalidDevice;
36
37	strcpy(prop->name, "CIVL_CudaDevice0");
38
39	return cudaSuccess;
40	}
41
42	/* Creates and event object
43	*/
44	cudaError_t cudaEventCreate(cudaEvent_t *event) {
45	*event = $cuda_event_create();
46	return cudaSuccess;
47	}
48
49	/* Records an event. If stream is non-zero, the event is recorded
50	* after all preceding operations in stream have been completed;
51	* otherwise, it is recorded after all preceding operations in the
52	* CUDA context have been completed. Since operation is asynchronous,
53	* cudaEventQuery() and/or cudaEventSynchronize() must be used to
54	* determine when the event has actually been recorded.
55	*/
56	cudaError_t cudaEventRecord(cudaEvent_t event, cudaStream_t s) {
57	if ($cuda_get_instances(event) != NULL) {
58	//printf("freeing instance list b\n");
59	$free($cuda_get_instances(event));
60	}
61	if (s == NULL) {
62	$cuda_set_instances(event, $cuda_all_most_recent_kernels(), $cuda_get_num_streams(&$cuda_current_context) + 1);
63	} else {
64	//printf("mallocing instance list c\n");
65	$cuda_kernel_instance_t instanceList = ($cuda_kernel_instance_t)malloc(sizeof($cuda_kernel_instance_t*));
66	instanceList[0] = $cuda_get_instance($cuda_get_most_recent(s));
67	$cuda_set_instances(event, instanceList, 1);
68	}
69	}
70
71	/* Query the status of all device work preceding the most recent call
72	* to cudaEventRecord() (in the appropriate compute streams, as
73	* specified by the arguments to cudaEventRecord()).
74	*
75	* If this work has successfully been completed by the device, or if
76	* cudaEventRecord() has not been called on event, then cudaSuccess
77	* is returned. If this work has not yet been completed by the device
78	* then cudaErrorNotReady is returned.
79	*/
80	cudaError_t cudaEventQuery(cudaEvent_t event) {
81	_Bool allKernelsFinished = $true;
82
83	for (int i = 0; i < $cuda_get_num_instances(event); i++) {
84	if ($cuda_get_status($cuda_get_instances(event)[i]) != $cuda_kernel_status_finished) {
85	allKernelsFinished = $false;
86	break;
87	}
88	}
89	return allKernelsFinished ? cudaSuccess : cudaErrorNotReady;
90	}
91
92	/* Wait until the completion of all device work preceding the most
93	* recent call to cudaEventRecord() (in the appropriate compute streams,
94	* as specified by the arguments to cudaEventRecord()).
95	*
96	* If cudaEventRecord() has not been called on event, cudaSuccess
97	* is returned immediately.
98	*/
99	cudaError_t cudaEventSynchronize(cudaEvent_t event) {
100	$cuda_event_wait(event);
101	return cudaSuccess;
102	}
103
104	/* since "timing" doesn't really make sense in the verification process
105	* I'm not sure what this should do. maybe it shouldn't exist.
106	*/
107	cudaError_t cudaEventElapsedTime(float *t, cudaEvent_t from, cudaEvent_t to) {
108	*t = 1.0;
109	return cudaSuccess;
110	}
111
112	/* Destroys the event specified by event.
113	*/
114	cudaError_t cudaEventDestroy(cudaEvent_t event) {
115	$cuda_event_destroy(event);
116	return cudaSuccess;
117	}
118
119	/* Creates a new asynchronous stream.
120	*/
121	cudaError_t cudaStreamCreate(cudaStream_t *pStream) {
122	$cuda_stream_node_t *newNode = $cuda_stream_node_create();
123
124	*pStream = $cuda_stream_create();
125	$cuda_set_stream(newNode, *pStream);
126	$cuda_set_next(newNode, $cuda_get_head_node(&$cuda_current_context));
127	$cuda_add_new_stream(&$cuda_current_context, newNode);
128
129	return cudaSuccess;
130	}
131
132	/* Blocks until stream has completed all operations.
133	*/
134	cudaError_t cudaStreamSynchronize(cudaStream_t stream) {
135	cudaStream_t s;
136
137	if (stream == NULL)
138	s = $cuda_get_null_stream(&$cuda_current_context);
139	else
140	s = stream;
141	$cuda_stream_wait(s);
142	return cudaSuccess;
143	}
144
145	/* Destroys and cleans up the asynchronous stream specified by stream.
146	*/
147	cudaError_t cudaStreamDestroy(cudaStream_t pStream) {
148	$assert($cuda_is_usable(pStream));
149	$cuda_set_usable(pStream, $false);
150	return cudaSuccess;
151	}
152
153	/* Explicitly destroys and cleans up all resources associated with the
154	* current device in the current process. Any subsequent API call to
155	* this device will reinitialize the device.
156	*/
157	cudaError_t cudaDeviceReset( void ) {
158	// TODO: Figure out if _cudaContext must be destroyed here
159	return cudaSuccess;
160	}
161
162	/* locks until stream has completed all operations.
163	*/
164	cudaError_t cudaDeviceSynchronize() {
165	$cuda_stream_wait_all();
166	$cuda_stream_wait($cuda_get_null_stream(&$cuda_current_context));
167	return cudaSuccess;
168	}
169
170	/* Copies count bytes from the memory area pointed to by src to the
171	* memory area pointed to by dst, where kind is one of
172	* cudaMemcpyHostToHost, cudaMemcpyHostToDevice, cudaMemcpyDeviceToHost,
173	* or cudaMemcpyDeviceToDevice, and specifies the direction of the
174	* copy. The memory areas may not overlap.
175	*/
176	cudaError_t cudaMemcpy ( void dst, const void src, size_t count, enum cudaMemcpyKind kind ) {
177	cudaDeviceSynchronize();
178	memcpy(dst, src, count);
179	cudaDeviceSynchronize();
180	return cudaSuccess;
181	}
182
183	/* Fills the first count bytes of the memory area pointed to by devPtr
184	* with the constant byte value value
185	*/
186	cudaError_t cudaMemset(void * devPtr, int value, size_t count) {
187	memset(devPtr, value, count);
188	return cudaSuccess;
189	}
190
191	/* Frees the memory space pointed to by devPtr. Similar semantics to free/$free.
192	*/
193	cudaError_t cudaFree(void *devPtr) {
194	$free(devPtr);
195	return cudaSuccess;
196	}
197
198	/* Sets device as the current device for the calling host thread. Currently,
199	* only a single device is supported, so this call always succeeds with a noop.
200	*/
201	cudaError_t cudaSetDevice(int device_id) {
202	return cudaSuccess;
203	}
204
205	/* Returns the message string from an error code
206	*/
207	char _cudaErrorString[10];
208
209	const char* cudaGetErrorString(cudaError_t error) {
210	strcpy(_cudaErrorString, "test");
211	return _cudaErrorString;
212	}
213
214	/* Returns the last error that has been produced by any of the runtime calls
215	* in the same host thread and resets it to cudaSuccess
216	*/
217	cudaError_t cudaGetLastError() {
218	return cudaSuccess;
219	}
220
221	/* DEPRECATED. DO NOT USE
222	*/
223	cudaError_t cudaThreadExit() {
224	return cudaSuccess;
225	}
226
227	/* C++ Language Extensions */
228
229	/* Reads the 16-bit, 32-bit or 64-bit word old located at the address address in
230	* global or shared memory, computes (old + val), and stores the result back to
231	* memory at the same address. These three operations are performed in one atomic
232	* transaction. The function returns old.
233	*/
234	int cudaAtomicAdd_int(int* address, int val) {
235	int old = *address;
236	*address += val;
237	// Add call to $check_data_race??
238	return old;
239	}
240	unsigned int cudaAtomicAdd_uint(unsigned int* address, unsigned int val) {
241	unsigned int old = *address;
242	*address += val;
243	return old;
244	}
245	unsigned long long int cudaAtomicAdd_ullint(unsigned long long int* address,
246	unsigned long long int val) {
247	unsigned long long int old = *address;
248	*address += val;
249	return old;
250	}
251	float cudaAtomicAdd_float(float* address, float val) {
252	float old = *address;
253	*address += val;
254	return old;
255	}
256	double cudaAtomicAdd_double(double* address, double val) {
257	double old = *address;
258	*address += val;
259	return old;
260	}
261
262	/* reads the 32-bit word old located at the address address in global or shared
263	* memory, computes (old - val), and stores the result back to memory at the same
264	* address. These three operations are performed in one atomic transaction. The
265	* function returns old.
266	*/
267	int cudaAtomicSub_int(int* address, int val) {
268	int old = *address;
269	*address -= val;
270	return old;
271	}
272	unsigned int cudaAtomicSub_uint(unsigned int* address, unsigned int val) {
273	unsigned int old = *address;
274	*address -= val;
275	return old;
276	}
277
278	/* reads the 32-bit or 64-bit word old located at the address address in global
279	* or shared memory and stores val back to memory at the same address. These two
280	* operations are performed in one atomic transaction. The function returns old.
281	*/
282	int cudaAtomicExch_int(int* address, int val) {
283	int old = *address;
284	*address = val;
285	return old;
286	}
287	unsigned int cudaAtomicExch_uint(unsigned int* address, unsigned int val) {
288	unsigned int old = *address;
289	*address = val;
290	return old;
291	}
292	unsigned long long int cudaAtomicExch_ullint(unsigned long long int* address,
293	unsigned long long int val) {
294	unsigned long long int old = *address;
295	*address = val;
296	return old;
297	}
298	float cudaAtomicExch_float(float* address, float val) {
299	float old = *address;
300	*address = val;
301	return old;
302	}
303
304	/* reads the 32-bit or 64-bit word old located at the address address in global
305	* or shared memory, computes the minimum of old and val, and stores the result
306	* back to memory at the same address. These three operations are performed in one
307	* atomic transaction. The function returns old.
308	*/
309	int cudaAtomicMin_int(int* address, int val) {
310	int old = *address;
311	*address = (old <= val) ? old : val;
312	return old;
313	}
314	unsigned int cudaAtomicMin_uint(unsigned int* address, unsigned int val) {
315	unsigned int old = *address;
316	*address = (old <= val) ? old : val;
317	return old;
318	}
319	unsigned long long int cudaAtomicMin_ullint(unsigned long long int* address,
320	unsigned long long int val) {
321	unsigned long long int old = *address;
322	*address = (old <= val) ? old : val;
323	return old;
324	}
325
326	/* reads the 32-bit or 64-bit word old located at the address address in global
327	* or shared memory, computes the maximum of old and val, and stores the result
328	* back to memory at the same address. These three operations are performed in one
329	* atomic transaction. The function returns old.
330	*/
331	int cudaAtomicMax_int(int* address, int val) {
332	int old = *address;
333	*address = (old >= val) ? old : val;
334	return old;
335	}
336	unsigned int cudaAtomicMax_uint(unsigned int* address, unsigned int val) {
337	unsigned int old = *address;
338	*address = (old >= val) ? old : val;
339	return old;
340	}
341	unsigned long long int cudaAtomicMax_ullint(unsigned long long int* address,
342	unsigned long long int val) {
343	unsigned long long int old = *address;
344	*address = (old >= val) ? old : val;
345	return old;
346	}
347
348	/* reads the 32-bit word old located at the address address in global or shared
349	* memory, computes ((old >= val) ? 0 : (old+1)), and stores the result back to
350	* memory at the same address. These three operations are performed in one atomic
351	* transaction. The function returns old.
352	*/
353	unsigned int atomicInc(unsigned int* address, unsigned int val) {
354	unsigned int old = *address;
355	*address = (old >= val) ? 0 : old + 1;
356	return old;
357	}
358
359	/* reads the 32-bit word old located at the address address in global or shared
360	* memory, computes (((old == 0) \|\| (old > val)) ? val : (old-1) ), and stores
361	* the result back to memory at the same address. These three operations are
362	* performed in one atomic transaction. The function returns old.
363	*/
364	unsigned int atomicDec(unsigned int* address, unsigned int val) {
365	unsigned int old = *address;
366	*address = ((old == 0) \|\| (old > val)) ? val : old-1;
367	return old;
368	}
369
370	/* reads the 16-bit, 32-bit or 64-bit word old located at the address address in
371	* global or shared memory, computes (old == compare ? val : old) , and stores the
372	* result back to memory at the same address. These three operations are performed
373	* in one atomic transaction. The function returns old (Compare And Swap).
374	*/
375	int cudaAtomicCAS_int(int* address, int compare, int val) {
376	int old = *address;
377	*address = old == compare ? val : old;
378	return old;
379	}
380	unsigned int cudaAtomicCAS_uint(unsigned int* address,
381	unsigned int compare,
382	unsigned int val) {
383	unsigned int old = *address;
384	*address = old == compare ? val : old;
385	return old;
386	}
387	unsigned long long int cudaAtomicCAS_ullint(unsigned long long int* address,
388	unsigned long long int compare,
389	unsigned long long int val) {
390	unsigned long long int old = *address;
391	*address = old == compare ? val : old;
392	return old;
393	}
394	unsigned short int cudaAtomicCAS_usint(unsigned short int* address,
395	unsigned short int compare,
396	unsigned short int val) {
397	unsigned short int old = *address;
398	*address = old == compare ? val : old;
399	return old;
400	}
401

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format