Context Navigation

pathfinderAtomic.cvl

main

Last change on this file was ea777aa, checked in by Alex Wilton <awilton@…>, 3 years ago

Moved examples, include, build_default.properties, common.xml, and README out from dev.civl.com into the root of the repo.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@5704 fb995dde-84ed-4084-dfe6-e5aef3e2452c

Property mode set to 100644

File size: 4.7 KB

Line
1	/**
2	* This file is modified from the original pathfinder_cuda.cvl
3	* program by the following way:
4	* First, GPU/GPU_BLOCK/GPU_THREAD no longer access global variables of
5	* int type, but use their value (by parameter passing) instead.
6	* Second, the process that spawns GPU_BLOCK/GPU_THREAD
7	* doen't reuse itself to run GPU_BLOCK/GPU_THREAD.
8	*/
9
10	#include<civlc.cvh>
11
12	//pyramid height of 0 does nothing, of 1 is normal stencil, 2 is where
13	//the algorithm even kicks in
14	#define pyramid_height 2
15
16	//This is how many iterations of the loop (minus 1). If less than
17	//the pyramid_height, then the pyramid_height does nothing.
18	#define rows 3
19
20	//I think this is the minimum this can be to be useful
21	//(the number of cells)
22	#define cols 2
23
24
25	//BLOCK_SIZE must satisfy (BLOCK_SIZE > pyramid_height*2)
26	#define BLOCK_SIZE 5
27	#define HALO 1
28
29	int borderCols = (pyramid_height)*HALO;
30	int smallBlockCol = BLOCK_SIZE - (pyramid_height) * HALO * 2;
31	int blockCols = cols/smallBlockCol+((cols%smallBlockCol==0)?0:1);
32
33	int result[cols];
34
35	//GPU MEMORY (couldn't do it scoped because need references to memory
36	//on host side)
37	int gpuResult[2][cols];
38	int gpuWall[rows*cols - cols];
39
40	//$input int wall[rows][cols];
41
42	#define IN_RANGE(x, min, max) ((x)>=(min) && (x)<=(max))
43	#define CLAMP_RANGE(x, min, max) x = (x<(min)) ? min : ((x>(max)) ? max : x )
44	#define MIN(a, b) ((a)<=(b) ? (a) : (b))
45
46	//Not implemented yet.
47	//void __syncthreads() {
48	//}
49
50
51
52	void GPU_BLOCK(int bx, int bIteration, int bBorderCols, int src, int dst, int startStep){
53	#include "barrier.cvh"
54
55	//shared memory
56	int prev[BLOCK_SIZE];
57	int result[BLOCK_SIZE];
58	$proc thread_procs[BLOCK_SIZE];
59	int in_barrier[BLOCK_SIZE];
60
61	void GPU_THREAD(int tx, int tbx, int tIteration, int tBorderCols, int src, int dst, int* prev, int* result, int startStep){
62	int small_block_cols = BLOCK_SIZE-tIterationHALO2;
63	int blkX = small_block_cols*tbx-tBorderCols;
64	int blkXmax = blkX+BLOCK_SIZE-1;
65	// calculate the global thread coordination
66	int xidx = blkX+tx;
67
68	// effective range within this block that falls within
69	// the valid range of the input data
70	// used to rule out computation outside the boundary.
71	int validXmin = (blkX < 0) ? -blkX : 0;
72	int validXmax = (blkXmax > cols-1) ? BLOCK_SIZE-1-(blkXmax-cols+1) : BLOCK_SIZE-1;
73	int isValid;
74	int computed;
75	int W = tx-1;
76	int E = tx+1;
77
78	$atomic{
79	W = (W < validXmin) ? validXmin : W;
80	E = (E > validXmax) ? validXmax : E;
81	isValid = IN_RANGE(tx, validXmin, validXmax);
82	}
83	if(IN_RANGE(xidx, 0, cols-1)){
84	prev[tx] = gpuResult[src][xidx];
85	}
86	for (int i=0; i<tIteration; i++){
87	computed = 0;
88	if( IN_RANGE(tx, i+1, BLOCK_SIZE-i-2) && isValid) {
89	computed = 1;
90	int left = prev[W];
91	int up = prev[tx];
92	int right = prev[E];
93	int shortest;
94	int index;
95	$atomic{
96	shortest = MIN(left, up);
97	shortest = MIN(shortest, right);
98	index = cols*(startStep+i)+xidx;
99	}
100	result[tx] = shortest + gpuWall[index];
101	}
102
103	//Break not implemented yet...
104	/* if(i==iteration-1) */
105	/* break; */
106	if(computed != 0) //Assign the computation range
107	prev[tx]= result[tx];
108	//__syncthreads(); // [Ronny] Added sync to avoid race on prev Aug. 14 201
109	barrier(in_barrier, tx);
110	}
111
112	// update the global memory
113	// after the last iteration, only threads coordinated within the
114	// small block perform the calculation and switch on ``computed''
115	if (computed != 0) {
116	gpuResult[dst][xidx]=result[tx];
117	}
118	}
119
120	$atomic {
121	for(int tp = 0; tp < BLOCK_SIZE; tp++) {
122	in_barrier[tp] = 0;
123	}
124	barrier_init(in_barrier, BLOCK_SIZE);
125	}
126
127	//Launch the threads per block
128	$atomic {
129	for (int tp = 0; tp < BLOCK_SIZE; tp++) {
130	thread_procs[tp] = $spawn GPU_THREAD(tp, bx, bIteration, bBorderCols, src, dst, prev, result, startStep);
131	}
132	}
133	$atomic {
134	for (int tp = 0; tp < BLOCK_SIZE; tp++) {
135	$wait thread_procs[tp];
136	}
137	}
138	}
139
140	void GPU(int iteration, int src, int dst, int startStep, int blocks, int threads, int gBorderCols) {
141	$proc block_procs[blocks];
142
143	//Launch the blocks
144	$atom{
145	for (int b = 0; b < blocks; b++) {
146	block_procs[b] = $spawn GPU_BLOCK(b, iteration, gBorderCols, src, dst, startStep);
147	}
148	}
149	$atomic{
150	for (int b = 0; b < blocks; b++) {
151	$wait block_procs[b];
152	}
153	}
154	}
155
156	void calc_path() {
157	int src = 1, dst = 0;
158	for (int t = 0; t < rows-1; t+=pyramid_height) {
159	int temp;
160	$atom{
161	temp = src;
162	src = dst;
163	dst = temp;
164	}
165	GPU(MIN(pyramid_height, rows-t-1), src, dst,t, blockCols, BLOCK_SIZE, borderCols);
166	}
167	}
168
169	void main() {
170	$assert(BLOCK_SIZE > pyramid_height*2);
171	calc_path();
172	$assert(1 == 1);
173	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format