Context Navigation

newCudaMockup.cvl@ cb4d4f4

main test-branch

Last change on this file since cb4d4f4 was aad342c, checked in by Stephen Siegel <siegel@…>, 3 years ago

Performing huge refactor to incorporate ABC, GMC, and SARL into CIVL repo and use Java modules.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@5664 fb995dde-84ed-4084-dfe6-e5aef3e2452c

Property mode set to 100644

File size: 7.0 KB

Rev	Line
[29eb398]	1	/**
	2	* TODO:
	3	* - implement cudaMemset and cudaMemsetAsync
	4	* - flesh out basic structure of cuda kernel:
	5	* - spawn gridDim blocks
	6	* - spawn blockDim threads
	7	* Alternatively, spawn blockDim warps and then spawn warp's threads
	8	* - wait for blocks to finish
	9	* - Add in block-level barriers and a __syncthreads() nested function that uses the barrier.
	10	* - Add in data race checking support and implement atomicAdd for integers
	11	*/
	12
[109d05e]	13	enum cudaError {
	14	cudaSuccess
	15	};
	16	typedef enum cudaError cudaError_t;
	17
	18	typedef enum cudaMemcpyKind {
	19	cudaMemcpyHostToHost,
	20	cudaMemcpyHostToDevice,
	21	cudaMemcpyDeviceToHost,
	22	cudaMemcpyDeviceToDevice,
	23	cudaMemcpyDefault
	24	} cudaMemcpyKind;
	25
[e76acca]	26	typedef struct $cuda_op_state {
[109d05e]	27	_Bool start;
[e76acca]	28	$proc op;
	29	}* $cuda_op_state_t;
[109d05e]	30
[e76acca]	31	typdef struct $cuda_op_state_node {
	32	$cuda_op_state_t opState;
	33	$cuda_op_state_node_t next;
	34	}* $cuda_op_state_node_t;
[109d05e]	35
	36	typedef struct cudaStream {
[e76acca]	37	$cuda_op_state_node_t head;
	38	$cuda_op_state_node_t tail;
[109d05e]	39	int numOps;
	40
	41	$cuda_stream_node_t containingNode;
	42	_Bool alive;
	43	}* cudaStream_t;
	44	cudaStream_t $cuda_default_stream;
	45
	46	typedef struct $cuda_stream_node {
	47	$cuda_stream_t stream;
	48	$cuda_stream_node_t prev;
	49	$cuda_stream_node_t next;
	50	}* $cuda_stream_node_t;
	51
	52	typedef struct $cuda_context {
	53	$cuda_stream_node_t head;
	54	int numStreams;
	55	} $cuda_context;
	56	$cuda_context $cuda_global_context;
	57
[e76acca]	58	void $cuda_op_wait_start($cuda_op_state_t cudaOpState) {
	59	$when(cudaOpState->start) cudaOpState->op = $this;
[109d05e]	60	}
	61
	62	// Helper function to get the default stream if passed NULL, and just returns stream otherwise
	63	cudaStream_t $default_stream_if_null(cudaStream_t stream) {
	64	return stream == NULL ? $cuda_default_stream : stream;
	65	}
	66
	67	$cuda_stream_node_t $create_new_stream_node() {
	68	cudaStream_t newStream = (cudaStream_t) malloc(sizeof(struct cudaStream));
	69	newStream->head = NULL;
	70	newStream->tail = NULL;
	71	newStream->numOps = 0;
	72	newStream->alive = true;
	73
	74	$cuda_stream_node_t newHead = ($cuda_stream_node_t) malloc(sizeof(struct $cuda_stream_node));
	75	newHead->stream = newStream;
	76	newStream->containingNode = newHead;
	77	newHead->prev = NULL;
	78	newHead->next = NULL;
	79	}
	80
	81	// TODO: atomic
	82	cudaError_t cudaStreamCreate(cudaStream_t * pStream) {
	83	// Create new stream node in linked list
	84	$cuda_stream_node_t newHead = $create_new_stream_node();
	85	newHead->next = $cuda_global_context.head;
	86	$cuda_global_context.head->prev = newHead;
	87
	88	// Update cuda context's head to be the new node we created
	89	$cuda_global_context.head = newHead;
	90	$cuda_global_context.numStreams++;
	91
	92	return cudaSuccess
	93	}
	94
	95	cudaError_t cudaStreamSynchronize(cudaStream_t stream) {
	96	stream = $default_stream_if_null(stream);
	97	$assert(stream->alive, "Attempt to synchronize with a destroyed stream");
	98	$when(stream->head == NULL) return cudaSuccess;
	99	}
	100
	101	$proc $destroy_stream_node($cuda_stream_node_t node) {
	102	if (node->prev != NULL) {
	103	node->prev->next = node->next;
	104	}
	105	if (node->next != NULL) {
	106	node->next->prev = node->prev;
	107	}
	108	free(node);
	109	node->stream->alive = false;
	110
	111	void $destroy_stream_when_complete(cudaStream_t stream) {
	112	$when(stream->head==NULL) free(stream);
	113	}
	114	return $spawn $destroy_when_complete(node->stream);
	115	}
	116
	117	// TODO: atomic
	118	cudaError_t cudaStreamDestroy(cudaStream_t stream) {
	119	$assert(stream != NULL && stream != $cuda_default_stream, "Attempt to destroy default stream");
	120	$assert(stream->alive, "Attempt to destroy an already destroyed stream);
	121	$destroy_stream_node(stream->containingNode);
	122	return cudaSuccess;
	123	}
	124
	125	// TODO: atomic
[e76acca]	126	$cuda_op_state_t $stream_enqueue(cudaStream_t stream) {
[109d05e]	127	stream = $default_stream_if_null(stream);
	128	$assert(stream->alive, "Attempt to enqueue a CUDA operation onto a destroyed stream");
	129
[e76acca]	130	$cuda_op_state_t newOpState = ($cuda_op_state_t) malloc(sizeof(struct $cuda_op_state));
	131	newOpState->start = false;
	132	newOpState->op = NULL;
[109d05e]	133
[e76acca]	134	$cuda_op_state_node_t newOpStateNode = ($cuda_op_state_node_t) malloc(sizeof($cuda_op_state_node));
	135	newOpStateNode->opState = newOpState;
	136	newOpStateNode->next = NULL;
[109d05e]	137
	138	if (stream->tail == NULL) {
[e76acca]	139	stream->head = newOpStateNode;
	140	stream->tail = newOpStateNode;
	141	newOpState->start = true;
[109d05e]	142	} else {
[e76acca]	143	stream->tail->next = newOpStateNode;
	144	stream->tail = newOpStateNode;
[109d05e]	145	}
	146	stream->numOps++;
[e76acca]	147	return newOpState;
[109d05e]	148	}
	149
	150	// TODO: atomic
	151	void $stream_dequeue(cudaStream_t stream) {
	152	stream = $default_stream_if_null(stream);
	153	$assert(stream->head != NULL, "Attempt to dequeue an empty stream");
	154
	155	if (stream->head == stream->tail) {
	156	stream->tail = NULL;
	157	}
	158
[e76acca]	159	$cuda_op_state_node_t oldHead = stream->head;
[109d05e]	160	stream->head = oldHead->next;
	161	if (stream->head != NULL) {
	162	stream->head->op->start = true;
	163	}
	164
[e76acca]	165	free(oldHead->opState);
[109d05e]	166	stream->numOps--;
	167	}
	168
	169	void $cuda_memcpy_proc(void* dst, const void* src, size_t count,
[e76acca]	170	$cuda_op_state_t opState, cudaStream_t stream) {
	171	$cuda_op_wait_start(opState);
[109d05e]	172	memcpy(dst, src, count);
	173	$stream_dequeue(stream);
	174	}
	175
	176	cudaError_t cudaMemcpy(void* dst, const void* src, size_t count, cudaMemcpyKind kind) {
	177	if (kind == cudaHostToHost) {
[29eb398]	178	memcpy(dst, src, count);
[109d05e]	179	} else {
[e76acca]	180	$cuda_op_state_t opState = $stream_enqueue($cuda_default_stream);
	181	$proc memcpyProc = $spawn $cuda_memcpy_proc(dst, src, count, opState, $cuda_default_stream);
	182	if (kind != cudaMemcpyDeviceToDevice) {
[109d05e]	183	$wait(memcpyProc);
	184	}
	185	}
	186
	187	return cudaSuccess;
	188	}
	189
	190	cudaError_t cudaMemcpyAsync(void* dst, const void* src, size_t count,
	191	cudaMemcpyKind kind, cudaStream_t stream) {
[e76acca]	192	if (kind == cudaMemcpyHostToHost) {
[29eb398]	193	memcpy(dst, src, count);
[109d05e]	194	} else {
[e76acca]	195	$cuda_op_state_t opState = $stream_enqueue(stream);
	196	$spawn $cuda_memcpy_proc(dst, src, count, opState, stream);
[109d05e]	197	}
	198
	199	return cudaSuccess;
	200	}
	201
	202	cudaError_t cudaDeviceSynchronize() {
[e76acca]	203	$proc opsToWaitOn[] = ($proc) malloc(sizeof($proc)
[109d05e]	204	$cuda_global_context.numStreams);
	205	int numOps = 0;
	206
	207	$atomic {
	208	for ($cuda_stream_node_t node = $cuda_global_context.head;
	209	node != NULL;
	210	node = node->next) {
	211	if (node->stream->tail != NULL) {
[e76acca]	212	opsToWaitOn[numOps] = node->stream->tail->opState->op;
[109d05e]	213	numOps++;
	214	}
	215	}
	216	}
[e76acca]	217	$waitall(opsToWaitOn, numOps);
[109d05e]	218
	219	return cudaSuccess;
	220	}
	221
	222	void $cuda_setup() {
	223	$cuda_stream_node_t defaultStreamNode = $create_new_stream_node();
	224	$cuda_default_stream = defaultStreamNode->stream;
	225
	226	$cuda_global_context.head = defaultStreamNode;
	227	$cuda_global_context.count = 1;
	228	}
	229
	230	void $cuda_teardown() {
	231	$proc destructor = $destroy_stream_node($cuda_default_stream->containingNode);
	232	$wait(destructor);
	233	}
	234
	235	void _cuda_kernel_1(dim3 gridDim, dim3 blockDim, size_t _cuda_mem_size,
	236	int x) {
	237
	238	}
[e76acca]	239	void $proc_kernel_1(dim3 gridDim, dim3 blockDim, size_t _cudaMemSize,
	240	cudaStream_t _cudaStream, $cuda_op_state_t _cudaOpState,
[109d05e]	241	int x) {
[e76acca]	242	$cuda_op_wait_start(_cudaOpState);
	243	_cuda_kernel_1(gridDim, blockDim, _cudaMemSize, x);
	244	$stream_dequeue(_cudaStream);
[109d05e]	245	}
	246
	247	void _civl_main() {
	248	{
[e76acca]	249	$cuda_op_state_t _newOpState = $stream_enqueue(stream);
	250	$spawn $proc_kernel_1(gridDim, blockDim, 0, stream, _newOpState, x);
[109d05e]	251	}
	252	}
	253
	254	int main() {
	255	$cuda_setup();
	256	_civl_main();
	257	$cuda_teardown();
[e76acca]	258	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format