//Forget the program #include "openCLshared.cvl" #include #include #include #include $input int NUM_DEVICES; $input int MAX_NUM_DEVICES; $assume 0 < NUM_DEVICES && NUM_DEVICES < MAX_NUM_DEVICES; $input int DATA_SIZE; $input int MAX_DATA_SIZE; $assume 0 < DATA_SIZE && DATA_SIZE < MAX_NUM_DEVICES; $input int LOCAL; $input int MAX_LOCAL; $assume 0 < LOCAL && LOCAL < MAX_LOCAL; //Didn't initialize variables here typedef struct { //Variables for kernels float * input; float * output; int count; }args; cl_kernel clCreateKernel(args * argument) { cl_kernel kernel; kernel.arguments = argument; return kernel; } //kernel void square(int workgroup, int global_id, int local_id, float* input, float* output, int count) { //int i = get_global_id(0); int i = global_id; if (i < count) { output[i] = input[i] * input[i]; //printf("output[%d] is %d\n", i, output[i]); } } //using a type that doesn't exist causes an odd explanation //edu.udel.cis.vsl.abc.parse.IF.ParseException: /Users/fuufusuu/Documents/workspace/CIVL/examples/translation/openclversion2.1/square.cvl line 71:43 required (...)+ loop did not match anything at input 'ckernel' //At "ckernel" in square.cvl 71.43 void workfunc(size_t local, size_t global, cl_kernel param) { for(int i = local * param.workgroup; i < local * param.workgroup + local; i++) { param.local_id = i % local; param.global_id = i; printf("My workgroup id is %d, my global id is %d, my local id is %d\n", param.workgroup, param.global_id, param.local_id); square(param.workgroup, param.global_id, param.local_id, ((args*)param.arguments)->input, ((args*)param.arguments)->output, ((args*)param.arguments)->count); } } int clEnqueueNDRangeKernel(cl_command_queue commands, cl_kernel kernel, int global, int local) { $assert(global % local == 0); cl_kernel param[global/local]; $proc procs[global/local]; for(int i = 0; i < global/local; i++) { param[i] = kernel; param[i].workgroup = i; procs[i] = $spawn workfunc(local, global, param[i]); } //this part here is the new clFinish(commands); for(int i = 0; i < global/local; i++) { $wait(procs[i]); } return CL_SUCCESS; } int main(int argc, char** argv) { args * arguments; arguments = (args*)malloc(sizeof(args)); float data[DATA_SIZE]; // original data set given to device float results[DATA_SIZE]; // results returned from device unsigned int correct; // number of correct results returned size_t global; // global domain size for our calculation size_t local; // local domain size for our calculation cl_device_id device_id; // compute device id cl_context context; // compute context cl_command_queue commands; // compute command queue //cl_program program; // compute program cl_kernel kernel; // compute kernel float * input; // device memory used for the input array float * output; // device memory used for the output array unsigned int count = DATA_SIZE; for(int i = 0; i < count; i++) { data[i] = i; } int err = clGetDeviceIDs(1, &device_id); //ignore clCreateContext for now, until we get an example that uses multiple ones //clCreateCommandQueue, could use context later commands = clCreateCommandQueue(device_id); //clCreateProgram is far different from the real version, this just stores parameters for the kernel //In order to make this clear, it is clCreateProgram and not something like clCreateProgramFromSource, which actually exists in openCL code //program = clCreateProgram(arguments); kernel = clCreateKernel(arguments); //printf("%s", kernel); //comes from clCreateBuffer input = (float *) malloc(sizeof(float) * count); output = (float *) malloc(sizeof(float) * count); memcpy(input, data, sizeof(float) * count); //clEnqueueWriteBuffer /* err = 0; err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input); err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &output); err = clSetKernelArg(kernel, 2, sizeof(unsigned int), &count); */ //kernel.arguments.input = input; //kernel.arguments->input; //use pointer instead of malloc + memcpy for global variables //((args*)kernel.arguments)->input = (float*)malloc(sizeof(float) * count); //memcpy(((args *)kernel.arguments)->input, input, sizeof(float) * count); ((args*)kernel.arguments)->input = input; //((args*)kernel.arguments)->output = (float*)malloc(sizeof(float) * count); //memcpy(((args*)kernel.arguments)->output, output, sizeof(float)); ((args*)kernel.arguments)->output = output; ((args*)kernel.arguments)->count = count; //no malloc needed for non pointers //clGetKernelWorkGroupInfo local = LOCAL; global = count; /* commands holds the "order" of devices kernel holds program, which holds variables offset not implemented */ err = clEnqueueNDRangeKernel(commands, kernel, global, local); memcpy(results, output, sizeof(float) * count); correct = 0; for(int i = 0; i < count; i++) { if(results[i] == data[i] * data[i]) { correct++; } } printf("Computed '%d/%d' correct values!\n", correct, count); free(((args*)kernel.arguments)->input); free(((args*)kernel.arguments)->output); free(input); free(output); free(arguments); return 0; }