| 1 | //Forget the program
|
|---|
| 2 |
|
|---|
| 3 | #include "openCLshared.cvl"
|
|---|
| 4 | #include <stdio.h>
|
|---|
| 5 | #include <stdlib.h>
|
|---|
| 6 | #include <string.h>
|
|---|
| 7 | #include <civlc.h>
|
|---|
| 8 |
|
|---|
| 9 | $input int NUM_DEVICES;
|
|---|
| 10 | $input int MAX_NUM_DEVICES;
|
|---|
| 11 | $assume 0 < NUM_DEVICES && NUM_DEVICES < MAX_NUM_DEVICES;
|
|---|
| 12 |
|
|---|
| 13 | $input int DATA_SIZE;
|
|---|
| 14 | $input int MAX_DATA_SIZE;
|
|---|
| 15 | $assume 0 < DATA_SIZE && DATA_SIZE < MAX_NUM_DEVICES;
|
|---|
| 16 |
|
|---|
| 17 | $input int LOCAL;
|
|---|
| 18 | $input int MAX_LOCAL;
|
|---|
| 19 | $assume 0 < LOCAL && LOCAL < MAX_LOCAL;
|
|---|
| 20 | //Didn't initialize variables here
|
|---|
| 21 | typedef struct
|
|---|
| 22 | {
|
|---|
| 23 | //Variables for kernels
|
|---|
| 24 |
|
|---|
| 25 | float * input;
|
|---|
| 26 | float * output;
|
|---|
| 27 | int count;
|
|---|
| 28 |
|
|---|
| 29 | }args;
|
|---|
| 30 |
|
|---|
| 31 |
|
|---|
| 32 | cl_kernel clCreateKernel(args * argument)
|
|---|
| 33 | {
|
|---|
| 34 | cl_kernel kernel;
|
|---|
| 35 | kernel.arguments = argument;
|
|---|
| 36 |
|
|---|
| 37 | return kernel;
|
|---|
| 38 | }
|
|---|
| 39 |
|
|---|
| 40 | //kernel
|
|---|
| 41 | void square(int workgroup, int global_id, int local_id, float* input, float* output, int count)
|
|---|
| 42 | {
|
|---|
| 43 | //int i = get_global_id(0);
|
|---|
| 44 | int i = global_id;
|
|---|
| 45 | if (i < count)
|
|---|
| 46 | {
|
|---|
| 47 | output[i] = input[i] * input[i];
|
|---|
| 48 | //printf("output[%d] is %d\n", i, output[i]);
|
|---|
| 49 | }
|
|---|
| 50 | }
|
|---|
| 51 |
|
|---|
| 52 | //using a type that doesn't exist causes an odd explanation
|
|---|
| 53 | //edu.udel.cis.vsl.abc.parse.IF.ParseException: /Users/fuufusuu/Documents/workspace/CIVL/examples/translation/openclversion2.1/square.cvl line 71:43 required (...)+ loop did not match anything at input 'ckernel'
|
|---|
| 54 | //At "ckernel" in square.cvl 71.43
|
|---|
| 55 | void workfunc(size_t local, size_t global, cl_kernel param)
|
|---|
| 56 | {
|
|---|
| 57 | for(int i = local * param.workgroup; i < local * param.workgroup + local; i++)
|
|---|
| 58 | {
|
|---|
| 59 | param.local_id = i % local;
|
|---|
| 60 | param.global_id = i;
|
|---|
| 61 | printf("My workgroup id is %d, my global id is %d, my local id is %d\n", param.workgroup, param.global_id, param.local_id);
|
|---|
| 62 | square(param.workgroup, param.global_id, param.local_id, ((args*)param.arguments)->input, ((args*)param.arguments)->output, ((args*)param.arguments)->count);
|
|---|
| 63 | }
|
|---|
| 64 | }
|
|---|
| 65 |
|
|---|
| 66 | int clEnqueueNDRangeKernel(cl_command_queue commands, cl_kernel kernel, int global, int local)
|
|---|
| 67 | {
|
|---|
| 68 | $assert(global % local == 0);
|
|---|
| 69 | cl_kernel param[global/local];
|
|---|
| 70 | $proc procs[global/local];
|
|---|
| 71 | for(int i = 0; i < global/local; i++)
|
|---|
| 72 | {
|
|---|
| 73 | param[i] = kernel;
|
|---|
| 74 | param[i].workgroup = i;
|
|---|
| 75 | procs[i] = $spawn workfunc(local, global, param[i]);
|
|---|
| 76 | }
|
|---|
| 77 |
|
|---|
| 78 | //this part here is the new clFinish(commands);
|
|---|
| 79 | for(int i = 0; i < global/local; i++)
|
|---|
| 80 | {
|
|---|
| 81 | $wait(procs[i]);
|
|---|
| 82 | }
|
|---|
| 83 |
|
|---|
| 84 | return CL_SUCCESS;
|
|---|
| 85 | }
|
|---|
| 86 |
|
|---|
| 87 |
|
|---|
| 88 | int main(int argc, char** argv)
|
|---|
| 89 | {
|
|---|
| 90 | args * arguments;
|
|---|
| 91 | arguments = (args*)malloc(sizeof(args));
|
|---|
| 92 |
|
|---|
| 93 | float data[DATA_SIZE]; // original data set given to device
|
|---|
| 94 | float results[DATA_SIZE]; // results returned from device
|
|---|
| 95 | unsigned int correct; // number of correct results returned
|
|---|
| 96 |
|
|---|
| 97 | size_t global; // global domain size for our calculation
|
|---|
| 98 | size_t local; // local domain size for our calculation
|
|---|
| 99 |
|
|---|
| 100 | cl_device_id device_id; // compute device id
|
|---|
| 101 | cl_context context; // compute context
|
|---|
| 102 | cl_command_queue commands; // compute command queue
|
|---|
| 103 | //cl_program program; // compute program
|
|---|
| 104 | cl_kernel kernel; // compute kernel
|
|---|
| 105 |
|
|---|
| 106 |
|
|---|
| 107 | float * input; // device memory used for the input array
|
|---|
| 108 | float * output; // device memory used for the output array
|
|---|
| 109 |
|
|---|
| 110 | unsigned int count = DATA_SIZE;
|
|---|
| 111 | for(int i = 0; i < count; i++)
|
|---|
| 112 | {
|
|---|
| 113 | data[i] = i;
|
|---|
| 114 | }
|
|---|
| 115 |
|
|---|
| 116 | int err = clGetDeviceIDs(1, &device_id);
|
|---|
| 117 |
|
|---|
| 118 | //ignore clCreateContext for now, until we get an example that uses multiple ones
|
|---|
| 119 |
|
|---|
| 120 | //clCreateCommandQueue, could use context later
|
|---|
| 121 | commands = clCreateCommandQueue(device_id);
|
|---|
| 122 |
|
|---|
| 123 | //clCreateProgram is far different from the real version, this just stores parameters for the kernel
|
|---|
| 124 | //In order to make this clear, it is clCreateProgram and not something like clCreateProgramFromSource, which actually exists in openCL code
|
|---|
| 125 | //program = clCreateProgram(arguments);
|
|---|
| 126 |
|
|---|
| 127 | kernel = clCreateKernel(arguments);
|
|---|
| 128 | //printf("%s", kernel);
|
|---|
| 129 |
|
|---|
| 130 | //comes from clCreateBuffer
|
|---|
| 131 | input = (float *) malloc(sizeof(float) * count);
|
|---|
| 132 | output = (float *) malloc(sizeof(float) * count);
|
|---|
| 133 |
|
|---|
| 134 |
|
|---|
| 135 |
|
|---|
| 136 | memcpy(input, data, sizeof(float) * count);
|
|---|
| 137 | //clEnqueueWriteBuffer
|
|---|
| 138 |
|
|---|
| 139 | /*
|
|---|
| 140 | err = 0;
|
|---|
| 141 | err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input);
|
|---|
| 142 | err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &output);
|
|---|
| 143 | err = clSetKernelArg(kernel, 2, sizeof(unsigned int), &count);
|
|---|
| 144 | */
|
|---|
| 145 | //kernel.arguments.input = input;
|
|---|
| 146 |
|
|---|
| 147 | //kernel.arguments->input;
|
|---|
| 148 |
|
|---|
| 149 | //use pointer instead of malloc + memcpy for global variables
|
|---|
| 150 | //((args*)kernel.arguments)->input = (float*)malloc(sizeof(float) * count);
|
|---|
| 151 | //memcpy(((args *)kernel.arguments)->input, input, sizeof(float) * count);
|
|---|
| 152 | ((args*)kernel.arguments)->input = input;
|
|---|
| 153 |
|
|---|
| 154 | //((args*)kernel.arguments)->output = (float*)malloc(sizeof(float) * count);
|
|---|
| 155 | //memcpy(((args*)kernel.arguments)->output, output, sizeof(float));
|
|---|
| 156 | ((args*)kernel.arguments)->output = output;
|
|---|
| 157 |
|
|---|
| 158 | ((args*)kernel.arguments)->count = count;
|
|---|
| 159 | //no malloc needed for non pointers
|
|---|
| 160 |
|
|---|
| 161 | //clGetKernelWorkGroupInfo
|
|---|
| 162 | local = LOCAL;
|
|---|
| 163 |
|
|---|
| 164 | global = count;
|
|---|
| 165 | /*
|
|---|
| 166 | commands holds the "order" of devices
|
|---|
| 167 | kernel holds program, which holds variables
|
|---|
| 168 | offset not implemented
|
|---|
| 169 | */
|
|---|
| 170 | err = clEnqueueNDRangeKernel(commands, kernel, global, local);
|
|---|
| 171 |
|
|---|
| 172 | memcpy(results, output, sizeof(float) * count);
|
|---|
| 173 |
|
|---|
| 174 | correct = 0;
|
|---|
| 175 | for(int i = 0; i < count; i++)
|
|---|
| 176 | {
|
|---|
| 177 | if(results[i] == data[i] * data[i])
|
|---|
| 178 | {
|
|---|
| 179 | correct++;
|
|---|
| 180 | }
|
|---|
| 181 | }
|
|---|
| 182 | printf("Computed '%d/%d' correct values!\n", correct, count);
|
|---|
| 183 |
|
|---|
| 184 | free(((args*)kernel.arguments)->input);
|
|---|
| 185 | free(((args*)kernel.arguments)->output);
|
|---|
| 186 |
|
|---|
| 187 | free(input);
|
|---|
| 188 | free(output);
|
|---|
| 189 | free(arguments);
|
|---|
| 190 |
|
|---|
| 191 | return 0;
|
|---|
| 192 | }
|
|---|
| 193 |
|
|---|