#include #include #include //RANDOM IS UNINTERPRETED //$scope opencl_scope = $here; //$malloc(opencl_scope, ....); $input int DATA_SIZE; $input int NUM_DEVICES; $input int MAX_DATA_SIZE; $input int MAX_NUM_DEVICES; $input int CL_DEVICE_MAX_WORK_GROUP_SIZE; $input int LOCAL; $assume 0 < DATA_SIZE && DATA_SIZE < MAX_DATA_SIZE; $assume 0 < NUM_DEVICES && NUM_DEVICES < MAX_NUM_DEVICES; $gbarrier gbarrier = $gbarrier_create($here, NUM_DEVICES); //struct goes here typedef struct { int device_id; int workgroup; int global_id; int local_id; //kernel variables int * input; int * output; int count; }kernel; //kernel goes here void square(int workgroup, int global_id, int local_id, int* input, int* output, const unsigned int count) { //int i = get_global_id(0); int i = global_id; if (i < count) { output[i] = input[i] * input[i]; //printf("output[%d] is %d\n", i, output[i]); } } void workfunc(size_t local, size_t global, kernel param) { for(int i = local * param.workgroup; i < local * param.workgroup + local; i++) { param.local_id = i % local; param.global_id = i; //printf("My workgroup id is %d, my global id is %d, my local id is %d\n", param.workgroup, param.global_id, param.local_id); square(param.workgroup, param.global_id, param.local_id, param.input, param.output, param.count); } } /* Note that the original lines were "__kernel void square( \n" \ " __global int* input, \n" \ " __global int* output, \n" \ " const unsigned int count) \n" \ Any parser must take note of and don't input \n, "", or \ as is __global int * input, __global int * output, int count; */ int main(int argc, char** argv) { //get the number from clGetDeviceIDs 3rd parameter //int num_devices = 1; $assert (LOCAL < CL_DEVICE_MAX_WORK_GROUP_SIZE); //variables from __kernel come here int * input; int * output; int count; size_t global; // global domain size for our calculation size_t local; // local domain size for our calculation //from the code before int data[DATA_SIZE]; // original data set given to device int results[DATA_SIZE]; // results returned from device int correct; // number of correct results returned //handle the definitions being put in different places int i = 0; count = DATA_SIZE; //count defined here for(i = 0; i < count; i++) { data[i] = i; } /* To do random, define constant global counter, randcount int rand_count = 0; $abstract float rand_abs(int i); rand_abs(0), rand_abs(1), .. float rand() { return rand_abs(rand_count++); } */ //comes from clCreateBuffer input = (int *) malloc(sizeof(int) * count); output = (int *) malloc(sizeof(int) * count); //possible heaps //Possibly keep a list of variables, with a flag for whether they are init or not //Not init, malloc one from what is found in //output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(int) * count, NULL, NULL); memcpy(input, data, sizeof(int) * count); //came from clEnqueueWriteBuffer rather than the start of code //Phase after this is the definitions int device_id[NUM_DEVICES]; //put device_ids for(int i = 0; i < NUM_DEVICES; i++) { device_id[i] = i; } //from clCreateContext, uses the device input //but there may be a loop, take it into account in the next example // //"Get" local size from clEnqueueNDRangeKernel, but is really an input local = LOCAL; global = count; //Creates an array of the struct according to clEnqueueNDRangeKernel //Have to split array into parts using local and global, and those are a workgroup //For now, assume local is 1, or else inputting the arrays will be odd, for now $assert(global%local == 0); kernel param[global/local]; for(int i = 0; i < global/local; i++) { //Also picks the device to be used param[i].device_id = device_id[0]; //other parts of the struct param[i].input = input; param[i].output = output; param[i].count = count; } //spawns processes according to parameters in clEnqueueNDRangeKernel $proc procs[global/local]; for(int i = 0; i < global/local; i++) { param[i].workgroup = i; //procs[i] = $spawn square(param[i].global_id, param[i].input, param[i].output, param[i].count); procs[i] = $spawn workfunc(local, global, param[i]); } for(int i = 0; i < global/local; i++) { $wait(procs[i]); } //$barrier barrier = $barrier_create($here, gbarrier, now[i].device_id); //$barrier_call(barrier); //$barrier_destroy(barrier); $gbarrier_destroy(gbarrier); //use the information from clEnqueueReadBuffer //may have to alter later memcpy(results, output, sizeof(int) * count); correct = 0; for(i = 0; i < count; i++) { //printf("results at %i is %d, data^2 is %d \n", i, results[i], data[i] * data[i]); if(results[i] == data[i] * data[i]) { correct++; } } // Print a brief summary detailing the results // printf("Computed '%d/%d' correct values!\n", correct, count); free(input); free(output); return 0; }