/** * This file is modified from the original pathfinder_cuda.cvl * program by the following way: * First, GPU/GPU_BLOCK/GPU_THREAD no longer access global variables of * int type, but use their value (by parameter passing) instead. * Second, the process that spawns GPU_BLOCK/GPU_THREAD * doen't reuse itself to run GPU_BLOCK/GPU_THREAD. */ #include #define pyramid_height 2 #define rows 3 #define cols 2 //BLOCK_SIZE must satisfy (BLOCK_SIZE > pyramid_height*2) #define BLOCK_SIZE 5 #define HALO 1 int borderCols = (pyramid_height)*HALO; int smallBlockCol = BLOCK_SIZE - (pyramid_height) * HALO * 2; int blockCols = cols/smallBlockCol+((cols%smallBlockCol==0)?0:1); int result[cols]; int gpuResult[2][cols]; int gpuWall[rows*cols - cols]; //$input int wall[rows][cols]; #define IN_RANGE(x, min, max) ((x)>=(min) && (x)<=(max)) #define CLAMP_RANGE(x, min, max) \ if (x < (min)) x = (min); else if (x > (max)) x = (max); #define MIN(result, a, b) if ((a)<=(b)) result = (a); else result = (b); //Not implemented yet. //void __syncthreads() { //} void GPU(int iteration, int src, int dst, int startStep, int blocks, int threads, int gBorderCols) { void GPU_BLOCK(int bx, int bIteration, int bBorderCols, int src, int dst, int startStep){ int prev[BLOCK_SIZE]; int result[BLOCK_SIZE]; void GPU_THREAD(int tx, int tbx, int tIteration, int tBorderCols, int src, int dst, int startStep, int* prev, int* result) { int small_block_cols = BLOCK_SIZE-tIteration*HALO*2; int blkX = small_block_cols*tbx-tBorderCols; int blkXmax = blkX+BLOCK_SIZE-1; int xidx = blkX+tx; int validXmin, validXmax; int W = tx-1; int E = tx+1; if (blkX < 0) validXmin = -blkX; else validXmin = 0; if (blkXmax > cols-1) validXmax = BLOCK_SIZE-1-(blkXmax-cols+1); else validXmax = BLOCK_SIZE-1; if (W < validXmin) W = validXmin; if (E > validXmax) E = validXmax; int isValid = IN_RANGE(tx, validXmin, validXmax); if (IN_RANGE(xidx, 0, cols-1)) { prev[tx] = gpuResult[src][xidx]; } int computed; for (int i=0; i pyramid_height*2); calc_path(); }