source: CIVL/examples/experimental/pathfinder_cuda.cvl

main
Last change on this file was ea777aa, checked in by Alex Wilton <awilton@…>, 3 years ago

Moved examples, include, build_default.properties, common.xml, and README out from dev.civl.com into the root of the repo.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@5704 fb995dde-84ed-4084-dfe6-e5aef3e2452c

  • Property mode set to 100644
File size: 4.0 KB
Line 
1#include<civlc.cvh>
2
3//pyramid height of 0 does nothing, of 1 is normal stencil, 2 is where
4//the algorithm even kicks in
5#define pyramid_height 2
6
7//This is how many iterations of the loop (minus 1). If less than
8//the pyramid_height, then the pyramid_height does nothing.
9#define rows 3
10
11//I think this is the minimum this can be to be useful
12//(the number of cells)
13#define cols 2
14
15
16//BLOCK_SIZE must satisfy (BLOCK_SIZE > pyramid_height*2)
17#define BLOCK_SIZE 5
18#define HALO 1
19
20int borderCols = (pyramid_height)*HALO;
21int smallBlockCol = BLOCK_SIZE - (pyramid_height) * HALO * 2;
22int blockCols = cols/smallBlockCol+((cols%smallBlockCol==0)?0:1);
23$proc block_procs[blockCols];
24$proc thread_procs[BLOCK_SIZE*blockCols];
25
26int result[cols];
27
28//GPU MEMORY (couldn't do it scoped because need references to memory
29//on host side)
30int gpuResult[2][cols];
31int gpuWall[rows*cols - cols];
32
33$input int wall[rows][cols];
34
35#define IN_RANGE(x, min, max) ((x)>=(min) && (x)<=(max))
36#define CLAMP_RANGE(x, min, max) x = (x<(min)) ? min : ((x>(max)) ? max : x )
37#define MIN(a, b) ((a)<=(b) ? (a) : (b))
38
39//Not implemented yet.
40void __syncthreads() {
41}
42
43void GPU(int iteration, int *gpuSrc, int *gpuResults, int startStep, int blocks, int threads) {
44
45 void GPU_BLOCK(int bx){
46 //shared memory
47 int prev[BLOCK_SIZE];
48 int result[BLOCK_SIZE];
49
50 void GPU_THREAD(int tx){
51
52 int small_block_cols = BLOCK_SIZE-iteration*HALO*2;
53 int blkX = small_block_cols*bx-borderCols;
54 int blkXmax = blkX+BLOCK_SIZE-1;
55
56 // calculate the global thread coordination
57 int xidx = blkX+tx;
58
59 // effective range within this block that falls within
60 // the valid range of the input data
61 // used to rule out computation outside the boundary.
62 int validXmin = (blkX < 0) ? -blkX : 0;
63 int validXmax = (blkXmax > cols-1) ? BLOCK_SIZE-1-(blkXmax-cols+1) : BLOCK_SIZE-1;
64
65 int W = tx-1;
66 int E = tx+1;
67
68 W = (W < validXmin) ? validXmin : W;
69 E = (E > validXmax) ? validXmax : E;
70
71 int isValid = IN_RANGE(tx, validXmin, validXmax);
72 if(IN_RANGE(xidx, 0, cols-1)){
73 prev[tx] = gpuSrc[xidx];
74 }
75 __syncthreads();
76 int computed;
77
78 for (int i=0; i<iteration; i++){
79 computed = 0;
80 if( IN_RANGE(tx, i+1, BLOCK_SIZE-i-2) && isValid) {
81 computed = 1;
82 int left = prev[W];
83 int up = prev[tx];
84 int right = prev[E];
85 int shortest = MIN(left, up);
86 shortest = MIN(shortest, right);
87 int index = cols*(startStep+i)+xidx;
88 result[tx] = shortest + gpuWall[index];
89
90 }
91 __syncthreads();
92
93 //Break not implemented yet...
94 /* if(i==iteration-1) */
95 /* break; */
96 if(computed != 0) //Assign the computation range
97 prev[tx]= result[tx];
98 __syncthreads(); // [Ronny] Added sync to avoid race on prev Aug. 14 201
99
100 }
101
102 // update the global memory
103 // after the last iteration, only threads coordinated within the
104 // small block perform the calculation and switch on ``computed''
105 if (computed != 0) {
106 gpuResults[xidx]=result[tx];
107 }
108
109 }
110
111 //Launch the threads per block
112 for (int tp = 0; tp < BLOCK_SIZE-1; tp++) {
113 thread_procs[bx*BLOCK_SIZE + tp] = $spawn GPU_THREAD(tp);
114 }
115 GPU_THREAD(BLOCK_SIZE-1);
116
117 for (int tp = 0; tp < BLOCK_SIZE-1; tp++) {
118 $wait thread_procs[bx*BLOCK_SIZE + tp];
119 }
120
121 }
122
123 //Launch the blocks
124 for (int b = 0; b < blocks-1; b++) {
125 block_procs[b] = $spawn GPU_BLOCK(b);
126 }
127 GPU_BLOCK(blocks-1);
128
129 for (int b = 0; b < blocks-1; b++) {
130 $wait block_procs[b];
131 }
132}
133
134
135void calc_path() {
136 int src = 1, dst = 0;
137 for (int t = 0; t < rows-1; t+=pyramid_height) {
138 int temp = src;
139 src = dst;
140 dst = temp;
141 GPU(MIN(pyramid_height, rows-t-1),gpuResult[src], gpuResult[dst],t, blockCols, BLOCK_SIZE);
142 }
143}
144
145void main() {
146 $assert(BLOCK_SIZE > pyramid_height*2);
147 calc_path();
148 $assert(1 == 1);
149}
Note: See TracBrowser for help on using the repository browser.