source: CIVL/examples/experimental/pathfinder2.cvl

main
Last change on this file was ea777aa, checked in by Alex Wilton <awilton@…>, 3 years ago

Moved examples, include, build_default.properties, common.xml, and README out from dev.civl.com into the root of the repo.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@5704 fb995dde-84ed-4084-dfe6-e5aef3e2452c

  • Property mode set to 100644
File size: 4.6 KB
Line 
1/**
2 * This file is modified from the original pathfinder_cuda.cvl
3 * program by the following way:
4 * First, GPU/GPU_BLOCK/GPU_THREAD no longer access global variables of
5 * int type, but use their value (by parameter passing) instead.
6 * Second, the process that spawns GPU_BLOCK/GPU_THREAD
7 * doen't reuse itself to run GPU_BLOCK/GPU_THREAD.
8 */
9
10#include<civlc.cvh>
11
12//pyramid height of 0 does nothing, of 1 is normal stencil, 2 is where
13//the algorithm even kicks in
14#define pyramid_height 2
15
16//This is how many iterations of the loop (minus 1). If less than
17//the pyramid_height, then the pyramid_height does nothing.
18#define rows 3
19
20//I think this is the minimum this can be to be useful
21//(the number of cells)
22#define cols 2
23
24
25//BLOCK_SIZE must satisfy (BLOCK_SIZE > pyramid_height*2)
26#define BLOCK_SIZE 5
27#define HALO 1
28
29int borderCols = (pyramid_height)*HALO;
30int smallBlockCol = BLOCK_SIZE - (pyramid_height) * HALO * 2;
31int blockCols = cols/smallBlockCol+((cols%smallBlockCol==0)?0:1);
32
33int result[cols];
34
35//GPU MEMORY (couldn't do it scoped because need references to memory
36//on host side)
37int gpuResult[2][cols];
38int gpuWall[rows*cols - cols];
39
40//$input int wall[rows][cols];
41
42#define IN_RANGE(x, min, max) ((x)>=(min) && (x)<=(max))
43#define CLAMP_RANGE(x, min, max) x = (x<(min)) ? min : ((x>(max)) ? max : x )
44#define MIN(a, b) ((a)<=(b) ? (a) : (b))
45
46//Not implemented yet.
47//void __syncthreads() {
48//}
49
50void GPU(int iteration, int src, int dst, int startStep, int blocks, int threads, int gBorderCols) {
51
52 void GPU_BLOCK(int bx, int bIteration, int bBorderCols, int src, int dst, int startStep){
53 //shared memory
54 int prev[BLOCK_SIZE];
55 int result[BLOCK_SIZE];
56
57 void GPU_THREAD(int tx, int tbx, int tIteration, int tBorderCols, int src, int dst, int startStep, int* prev, int* result){
58 int small_block_cols = BLOCK_SIZE-tIteration*HALO*2;
59 int blkX = small_block_cols*tbx-tBorderCols;
60 int blkXmax = blkX+BLOCK_SIZE-1;
61
62 // calculate the global thread coordination
63 int xidx = blkX+tx;
64
65 // effective range within this block that falls within
66 // the valid range of the input data
67 // used to rule out computation outside the boundary.
68 int validXmin = (blkX < 0) ? -blkX : 0;
69 int validXmax = (blkXmax > cols-1) ? BLOCK_SIZE-1-(blkXmax-cols+1) : BLOCK_SIZE-1;
70
71 int W = tx-1;
72 int E = tx+1;
73
74 W = (W < validXmin) ? validXmin : W;
75 E = (E > validXmax) ? validXmax : E;
76
77 int isValid = IN_RANGE(tx, validXmin, validXmax);
78 if(IN_RANGE(xidx, 0, cols-1)){
79 prev[tx] = gpuResult[src][xidx];
80 }
81
82 int computed;
83
84 for (int i=0; i<tIteration; i++){
85 computed = 0;
86 if( IN_RANGE(tx, i+1, BLOCK_SIZE-i-2) && isValid) {
87 computed = 1;
88 int left = prev[W];
89 int up = prev[tx];
90 int right = prev[E];
91 int shortest = MIN(left, up);
92 shortest = MIN(shortest, right);
93 int index = cols*(startStep+i)+xidx;
94 result[tx] = shortest + gpuWall[index];
95 }
96
97 //Break not implemented yet...
98 /* if(i==iteration-1) */
99 /* break; */
100 if(computed != 0) //Assign the computation range
101 prev[tx]= result[tx];
102 //__syncthreads(); // [Ronny] Added sync to avoid race on prev Aug. 14 201
103 }
104
105 // update the global memory
106 // after the last iteration, only threads coordinated within the
107 // small block perform the calculation and switch on ``computed''
108 if (computed != 0) {
109 gpuResult[dst][xidx]=result[tx];
110 }
111 }
112
113 $proc thread_procs[BLOCK_SIZE];
114
115 //Launch the threads per block
116 for (int tp = 0; tp < BLOCK_SIZE; tp++) {
117 thread_procs[tp] = $spawn GPU_THREAD(tp, bx, bIteration, bBorderCols, src, dst, startStep, prev, result);
118 }
119
120 for (int tp = 0; tp < BLOCK_SIZE; tp++) {
121 $wait thread_procs[tp];
122 }
123 }
124
125 $proc block_procs[blocks];
126
127 //Launch the blocks
128 for (int b = 0; b < blocks; b++) {
129 block_procs[b] = $spawn GPU_BLOCK(b, iteration, gBorderCols, src, dst, startStep);
130 }
131
132 for (int b = 0; b < blocks; b++) {
133 $wait block_procs[b];
134 }
135}
136
137void calc_path() {
138 int src = 1, dst = 0;
139 for (int t = 0; t < rows-1; t+=pyramid_height) {
140 int temp = src;
141 src = dst;
142 dst = temp;
143 GPU(MIN(pyramid_height, rows-t-1), src, dst,t, blockCols, BLOCK_SIZE, borderCols);
144 }
145}
146
147void main() {
148 $assert(BLOCK_SIZE > pyramid_height*2);
149 calc_path();
150}
Note: See TracBrowser for help on using the repository browser.