source: CIVL/mods/dev.civl.com/examples/cuda/newCudaMockup.cvl@ cb4d4f4

main test-branch
Last change on this file since cb4d4f4 was aad342c, checked in by Stephen Siegel <siegel@…>, 3 years ago

Performing huge refactor to incorporate ABC, GMC, and SARL into CIVL repo and use Java modules.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@5664 fb995dde-84ed-4084-dfe6-e5aef3e2452c

  • Property mode set to 100644
File size: 7.0 KB
RevLine 
[29eb398]1/**
2 * TODO:
3 * - implement cudaMemset and cudaMemsetAsync
4 * - flesh out basic structure of cuda kernel:
5 * - spawn gridDim blocks
6 * - spawn blockDim threads
7 * Alternatively, spawn blockDim warps and then spawn warp's threads
8 * - wait for blocks to finish
9 * - Add in block-level barriers and a __syncthreads() nested function that uses the barrier.
10 * - Add in data race checking support and implement atomicAdd for integers
11 */
12
[109d05e]13enum cudaError {
14 cudaSuccess
15};
16typedef enum cudaError cudaError_t;
17
18typedef enum cudaMemcpyKind {
19 cudaMemcpyHostToHost,
20 cudaMemcpyHostToDevice,
21 cudaMemcpyDeviceToHost,
22 cudaMemcpyDeviceToDevice,
23 cudaMemcpyDefault
24} cudaMemcpyKind;
25
[e76acca]26typedef struct $cuda_op_state {
[109d05e]27 _Bool start;
[e76acca]28 $proc op;
29}* $cuda_op_state_t;
[109d05e]30
[e76acca]31typdef struct $cuda_op_state_node {
32 $cuda_op_state_t opState;
33 $cuda_op_state_node_t next;
34}* $cuda_op_state_node_t;
[109d05e]35
36typedef struct cudaStream {
[e76acca]37 $cuda_op_state_node_t head;
38 $cuda_op_state_node_t tail;
[109d05e]39 int numOps;
40
41 $cuda_stream_node_t containingNode;
42 _Bool alive;
43}* cudaStream_t;
44cudaStream_t $cuda_default_stream;
45
46typedef struct $cuda_stream_node {
47 $cuda_stream_t stream;
48 $cuda_stream_node_t prev;
49 $cuda_stream_node_t next;
50}* $cuda_stream_node_t;
51
52typedef struct $cuda_context {
53 $cuda_stream_node_t head;
54 int numStreams;
55} $cuda_context;
56$cuda_context $cuda_global_context;
57
[e76acca]58void $cuda_op_wait_start($cuda_op_state_t cudaOpState) {
59 $when(cudaOpState->start) cudaOpState->op = $this;
[109d05e]60}
61
62// Helper function to get the default stream if passed NULL, and just returns stream otherwise
63cudaStream_t $default_stream_if_null(cudaStream_t stream) {
64 return stream == NULL ? $cuda_default_stream : stream;
65}
66
67$cuda_stream_node_t $create_new_stream_node() {
68 cudaStream_t newStream = (cudaStream_t) malloc(sizeof(struct cudaStream));
69 newStream->head = NULL;
70 newStream->tail = NULL;
71 newStream->numOps = 0;
72 newStream->alive = true;
73
74 $cuda_stream_node_t newHead = ($cuda_stream_node_t) malloc(sizeof(struct $cuda_stream_node));
75 newHead->stream = newStream;
76 newStream->containingNode = newHead;
77 newHead->prev = NULL;
78 newHead->next = NULL;
79}
80
81// TODO: atomic
82cudaError_t cudaStreamCreate(cudaStream_t * pStream) {
83 // Create new stream node in linked list
84 $cuda_stream_node_t newHead = $create_new_stream_node();
85 newHead->next = $cuda_global_context.head;
86 $cuda_global_context.head->prev = newHead;
87
88 // Update cuda context's head to be the new node we created
89 $cuda_global_context.head = newHead;
90 $cuda_global_context.numStreams++;
91
92 return cudaSuccess
93}
94
95cudaError_t cudaStreamSynchronize(cudaStream_t stream) {
96 stream = $default_stream_if_null(stream);
97 $assert(stream->alive, "Attempt to synchronize with a destroyed stream");
98 $when(stream->head == NULL) return cudaSuccess;
99}
100
101$proc $destroy_stream_node($cuda_stream_node_t node) {
102 if (node->prev != NULL) {
103 node->prev->next = node->next;
104 }
105 if (node->next != NULL) {
106 node->next->prev = node->prev;
107 }
108 free(node);
109 node->stream->alive = false;
110
111 void $destroy_stream_when_complete(cudaStream_t stream) {
112 $when(stream->head==NULL) free(stream);
113 }
114 return $spawn $destroy_when_complete(node->stream);
115}
116
117// TODO: atomic
118cudaError_t cudaStreamDestroy(cudaStream_t stream) {
119 $assert(stream != NULL && stream != $cuda_default_stream, "Attempt to destroy default stream");
120 $assert(stream->alive, "Attempt to destroy an already destroyed stream);
121 $destroy_stream_node(stream->containingNode);
122 return cudaSuccess;
123}
124
125// TODO: atomic
[e76acca]126$cuda_op_state_t $stream_enqueue(cudaStream_t stream) {
[109d05e]127 stream = $default_stream_if_null(stream);
128 $assert(stream->alive, "Attempt to enqueue a CUDA operation onto a destroyed stream");
129
[e76acca]130 $cuda_op_state_t newOpState = ($cuda_op_state_t) malloc(sizeof(struct $cuda_op_state));
131 newOpState->start = false;
132 newOpState->op = NULL;
[109d05e]133
[e76acca]134 $cuda_op_state_node_t newOpStateNode = ($cuda_op_state_node_t) malloc(sizeof($cuda_op_state_node));
135 newOpStateNode->opState = newOpState;
136 newOpStateNode->next = NULL;
[109d05e]137
138 if (stream->tail == NULL) {
[e76acca]139 stream->head = newOpStateNode;
140 stream->tail = newOpStateNode;
141 newOpState->start = true;
[109d05e]142 } else {
[e76acca]143 stream->tail->next = newOpStateNode;
144 stream->tail = newOpStateNode;
[109d05e]145 }
146 stream->numOps++;
[e76acca]147 return newOpState;
[109d05e]148}
149
150// TODO: atomic
151void $stream_dequeue(cudaStream_t stream) {
152 stream = $default_stream_if_null(stream);
153 $assert(stream->head != NULL, "Attempt to dequeue an empty stream");
154
155 if (stream->head == stream->tail) {
156 stream->tail = NULL;
157 }
158
[e76acca]159 $cuda_op_state_node_t oldHead = stream->head;
[109d05e]160 stream->head = oldHead->next;
161 if (stream->head != NULL) {
162 stream->head->op->start = true;
163 }
164
[e76acca]165 free(oldHead->opState);
[109d05e]166 stream->numOps--;
167}
168
169void $cuda_memcpy_proc(void* dst, const void* src, size_t count,
[e76acca]170 $cuda_op_state_t opState, cudaStream_t stream) {
171 $cuda_op_wait_start(opState);
[109d05e]172 memcpy(dst, src, count);
173 $stream_dequeue(stream);
174}
175
176cudaError_t cudaMemcpy(void* dst, const void* src, size_t count, cudaMemcpyKind kind) {
177 if (kind == cudaHostToHost) {
[29eb398]178 memcpy(dst, src, count);
[109d05e]179 } else {
[e76acca]180 $cuda_op_state_t opState = $stream_enqueue($cuda_default_stream);
181 $proc memcpyProc = $spawn $cuda_memcpy_proc(dst, src, count, opState, $cuda_default_stream);
182 if (kind != cudaMemcpyDeviceToDevice) {
[109d05e]183 $wait(memcpyProc);
184 }
185 }
186
187 return cudaSuccess;
188}
189
190cudaError_t cudaMemcpyAsync(void* dst, const void* src, size_t count,
191 cudaMemcpyKind kind, cudaStream_t stream) {
[e76acca]192 if (kind == cudaMemcpyHostToHost) {
[29eb398]193 memcpy(dst, src, count);
[109d05e]194 } else {
[e76acca]195 $cuda_op_state_t opState = $stream_enqueue(stream);
196 $spawn $cuda_memcpy_proc(dst, src, count, opState, stream);
[109d05e]197 }
198
199 return cudaSuccess;
200}
201
202cudaError_t cudaDeviceSynchronize() {
[e76acca]203 $proc opsToWaitOn[] = ($proc*) malloc(sizeof($proc) *
[109d05e]204 $cuda_global_context.numStreams);
205 int numOps = 0;
206
207 $atomic {
208 for ($cuda_stream_node_t node = $cuda_global_context.head;
209 node != NULL;
210 node = node->next) {
211 if (node->stream->tail != NULL) {
[e76acca]212 opsToWaitOn[numOps] = node->stream->tail->opState->op;
[109d05e]213 numOps++;
214 }
215 }
216 }
[e76acca]217 $waitall(opsToWaitOn, numOps);
[109d05e]218
219 return cudaSuccess;
220}
221
222void $cuda_setup() {
223 $cuda_stream_node_t defaultStreamNode = $create_new_stream_node();
224 $cuda_default_stream = defaultStreamNode->stream;
225
226 $cuda_global_context.head = defaultStreamNode;
227 $cuda_global_context.count = 1;
228}
229
230void $cuda_teardown() {
231 $proc destructor = $destroy_stream_node($cuda_default_stream->containingNode);
232 $wait(destructor);
233}
234
235void _cuda_kernel_1(dim3 gridDim, dim3 blockDim, size_t _cuda_mem_size,
236 int x) {
237
238}
[e76acca]239void $proc_kernel_1(dim3 gridDim, dim3 blockDim, size_t _cudaMemSize,
240 cudaStream_t _cudaStream, $cuda_op_state_t _cudaOpState,
[109d05e]241 int x) {
[e76acca]242 $cuda_op_wait_start(_cudaOpState);
243 _cuda_kernel_1(gridDim, blockDim, _cudaMemSize, x);
244 $stream_dequeue(_cudaStream);
[109d05e]245}
246
247void _civl_main() {
248 {
[e76acca]249 $cuda_op_state_t _newOpState = $stream_enqueue(stream);
250 $spawn $proc_kernel_1(gridDim, blockDim, 0, stream, _newOpState, x);
[109d05e]251 }
252}
253
254int main() {
255 $cuda_setup();
256 _civl_main();
257 $cuda_teardown();
[e76acca]258}
Note: See TracBrowser for help on using the repository browser.