Context Navigation

cuda.cvh@ 5c27aa5

1.23 2.0 main test-branch

Last change on this file since 5c27aa5 was e3151da, checked in by Ziqing Luo <ziqing@…>, 11 years ago

re-organized example directory

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@1763 fb995dde-84ed-4084-dfe6-e5aef3e2452c

Property mode set to 100644

File size: 5.1 KB

Line
1
2	/* Functions in this file are meant to serve as drop-in CIVL replacements
3	* for the Cuda function of the same name. Because of this, much of the
4	* documentation of these functions is identical to the documentation
5	* for its Cuda counterpart.
6	*/
7
8	#ifdef __CUDA__
9	#else
10	#define __CUDA__
11
12	#include "civlc.h"
13	#include "cuda-helper.cvh"
14	#include <string.h>
15	#include <stdio.h>
16
17	/* Returns in *count the number of devices with compute capability
18	* greater or equal to 1.0 that are available for execution.
19	*/
20	cudaError_t cudaGetDeviceCount(int *count) {
21	// possibly this should return an value specified as $input?
22	*count = 1;
23	return cudaSuccess;
24	}
25
26	/* Creates and event object
27	*/
28	cudaError_t cudaEventCreate(cudaEvent_t *event) {
29	*event = _eventCreate();
30	return cudaSuccess;
31	}
32
33	/* Records an event. If stream is non-zero, the event is recorded
34	* after all preceding operations in stream have been completed;
35	* otherwise, it is recorded after all preceding operations in the
36	* CUDA context have been completed. Since operation is asynchronous,
37	* cudaEventQuery() and/or cudaEventSynchronize() must be used to
38	* determine when the event has actually been recorded.
39	*/
40	cudaError_t cudaEventRecord(cudaEvent_t event, cudaStream_t s) {
41	if (event->instances != NULL) {
42	//printf("freeing instance list b\n");
43	$free(event->instances);
44	}
45	if (s == NULL) {
46	event->instances = _allMostRecentKernels();
47	event->numInstances = _context.numStreams + 1;
48	} else {
49	//printf("mallocing instance list c\n");
50	event->instances = (_kernelInstance*)$malloc($root, sizeof(_kernelInstance));
51	event->instances[0] = s->mostRecent->instance;
52	event->numInstances = 1;
53	}
54	}
55
56	/* Query the status of all device work preceding the most recent call
57	* to cudaEventRecord() (in the appropriate compute streams, as
58	* specified by the arguments to cudaEventRecord()).
59	*
60	* If this work has successfully been completed by the device, or if
61	* cudaEventRecord() has not been called on event, then cudaSuccess
62	* is returned. If this work has not yet been completed by the device
63	* then cudaErrorNotReady is returned.
64	*/
65	cudaError_t cudaEventQuery(cudaEvent_t event) {
66	_Bool allKernelsFinished = $true;
67
68	for (int i = 0; i < event->numInstances; i++) {
69	if (event->instances[i]->status != _kernelStatusFinished) {
70	allKernelsFinished = $false;
71	break;
72	}
73	}
74	return allKernelsFinished ? cudaSuccess : cudaErrorNotReady;
75	}
76
77	/* Wait until the completion of all device work preceding the most
78	* recent call to cudaEventRecord() (in the appropriate compute streams,
79	* as specified by the arguments to cudaEventRecord()).
80	*
81	* If cudaEventRecord() has not been called on event, cudaSuccess
82	* is returned immediately.
83	*/
84	cudaError_t cudaEventSynchronize(cudaEvent_t event) {
85	_eventWait(event);
86	return cudaSuccess;
87	}
88
89	/* since "timing" doesn't really make sense in the verification process
90	* I'm not sure what this should do. maybe it shouldn't exist.
91	*/
92	cudaError_t cudaEventElapsedTime(float *t, cudaEvent_t from, cudaEvent_t to) {
93	*t = 1.0;
94	return cudaSuccess;
95	}
96
97	/* Destroys the event specified by event.
98	*/
99	cudaError_t cudaEventDestroy(cudaEvent_t event) {
100	_eventDestroy(event);
101	return cudaSuccess;
102	}
103
104	/* Creates a new asynchronous stream.
105	*/
106	cudaError_t cudaStreamCreate(cudaStream_t *pStream) {
107	_cudaStreamNode *newNode = _streamNodeCreate();
108
109	*pStream = _streamCreate();
110	newNode->stream = *pStream;
111	newNode->next = _context.headNode;
112	_context.headNode = newNode;
113	_context.numStreams += 1;
114
115	return cudaSuccess;
116	}
117
118	/* Blocks until stream has completed all operations.
119	*/
120	cudaError_t cudaStreamSynchronize(cudaStream_t stream) {
121	cudaStream_t s;
122
123	if (stream == NULL)
124	s = _context.nullStream;
125	else
126	s = stream;
127	_streamWait(s);
128	return cudaSuccess;
129	}
130
131	/* Destroys and cleans up the asynchronous stream specified by stream.
132	*/
133	cudaError_t cudaStreamDestroy(cudaStream_t pStream) {
134	$assert(pStream->usable);
135	pStream->usable = $false;
136	return cudaSuccess;
137	}
138
139	/* locks until stream has completed all operations.
140	*/
141	cudaError_t cudaDeviceSynchronize() {
142	_streamWaitAll();
143	_streamWait(_context.nullStream);
144	return cudaSuccess;
145	}
146
147	/* Copies count bytes from the memory area pointed to by src to the
148	* memory area pointed to by dst, where kind is one of
149	* cudaMemcpyHostToHost, cudaMemcpyHostToDevice, cudaMemcpyDeviceToHost,
150	* or cudaMemcpyDeviceToDevice, and specifies the direction of the
151	* copy. The memory areas may not overlap.
152	*/
153	cudaError_t cudaMemcpy ( void dst, const void src, size_t count, enum cudaMemcpyKind kind ) {
154	cudaDeviceSynchronize();
155	memcpy(dst, src, count);
156	cudaDeviceSynchronize();
157	return cudaSuccess;
158	}
159
160	/* Frees the memory space pointed to by devPtr. Similar semantics to free/$free.
161	*/
162	cudaError_t cudaFree(void *devPtr) {
163	$free(devPtr);
164	return cudaSuccess;
165	}
166
167	/* Sets device as the current device for the calling host thread. Currently,
168	* only a single device is supported, so this call always succeeds with a noop.
169	*/
170	cudaError_t cudaSetDevice(int device_id) {
171	return cudaSuccess;
172	}
173
174	#endif

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format