| [3ff27cf] | 1 | #include <civlc.cvh>
|
|---|
| [cc87898] | 2 | //Forget the program
|
|---|
| 3 |
|
|---|
| [63d4ef2] | 4 | #include "cl.cvl"
|
|---|
| [cc87898] | 5 | #include <stdio.h>
|
|---|
| 6 | #include <stdlib.h>
|
|---|
| 7 | #include <string.h>
|
|---|
| 8 | #include <civlc.h>
|
|---|
| 9 |
|
|---|
| 10 | $input int NUM_DEVICES;
|
|---|
| 11 | $input int MAX_NUM_DEVICES;
|
|---|
| [3ff27cf] | 12 | $assume(0 < NUM_DEVICES && NUM_DEVICES < MAX_NUM_DEVICES);
|
|---|
| [cc87898] | 13 |
|
|---|
| 14 | $input int DATA_SIZE;
|
|---|
| 15 | $input int MAX_DATA_SIZE;
|
|---|
| [3ff27cf] | 16 | $assume(0 < DATA_SIZE && DATA_SIZE < MAX_NUM_DEVICES);
|
|---|
| [cc87898] | 17 |
|
|---|
| 18 | $input int LOCAL;
|
|---|
| 19 | $input int MAX_LOCAL;
|
|---|
| [3ff27cf] | 20 | $assume(0 < LOCAL && LOCAL < MAX_LOCAL);
|
|---|
| [cc87898] | 21 | //this args struct will hold all the parameters of for the kernel function
|
|---|
| 22 | typedef struct
|
|---|
| 23 | {
|
|---|
| 24 | //Variables for kernels
|
|---|
| [5693398] | 25 | /*
|
|---|
| [cc87898] | 26 | float * input;
|
|---|
| 27 | float * output;
|
|---|
| [4da0a7b] | 28 | int count;
|
|---|
| [5693398] | 29 | */
|
|---|
| 30 | void * input;
|
|---|
| 31 | void * output;
|
|---|
| [4da0a7b] | 32 | void * count;
|
|---|
| [b23f0a2] | 33 |
|
|---|
| [cc87898] | 34 |
|
|---|
| 35 | }args;
|
|---|
| 36 |
|
|---|
| 37 | /*
|
|---|
| 38 | args * argument - Takes in the struct, which is changed for every program using a different kernel
|
|---|
| 39 | */
|
|---|
| [4da0a7b] | 40 | cl_kernel clCreateKernel(args * argument, char * function)
|
|---|
| [cc87898] | 41 | {
|
|---|
| 42 | cl_kernel kernel;
|
|---|
| 43 | kernel.arguments = argument;
|
|---|
| [5693398] | 44 | kernel.method = function;
|
|---|
| [cc87898] | 45 |
|
|---|
| 46 | return kernel;
|
|---|
| 47 | }
|
|---|
| 48 |
|
|---|
| 49 | /*
|
|---|
| 50 | This is the kernel that processes compute with
|
|---|
| 51 | int workgroup - Gives the workgroup that a particular process came from, made by clEnqueueNDRangeKernel
|
|---|
| 52 | int global_id - Gives the global_id that a particular process has, given by workfunc
|
|---|
| 53 | int local_id - Gives the local_id that a particular process has, given by workfunc
|
|---|
| 54 | float* input - Kernel argument
|
|---|
| 55 | float* output - Kernel argument
|
|---|
| 56 | int count - Kernel argument
|
|---|
| 57 | */
|
|---|
| 58 | void square(int workgroup, int global_id, int local_id, float* input, float* output, int count)
|
|---|
| 59 | {
|
|---|
| 60 | //int i = get_global_id(0);
|
|---|
| 61 | int i = global_id;
|
|---|
| 62 | if (i < count)
|
|---|
| 63 | {
|
|---|
| 64 | output[i] = input[i] * input[i];
|
|---|
| 65 | //printf("output[%d] is %d\n", i, output[i]);
|
|---|
| 66 | }
|
|---|
| 67 | }
|
|---|
| 68 | /*
|
|---|
| 69 | workfunc assigns local and global ids, before calling the kernel.
|
|---|
| 70 | Note: The function should be identical in all transformations except the calling of the kernel, which means that it cannot be in openCLshared.cvl
|
|---|
| 71 | size_t local - The size of the workgroups, used to calculate blocks
|
|---|
| 72 | size_t global - The total amount of work to be done
|
|---|
| 73 | cl_kernel param - Holds the data for local_id, global_id, and the workgroup
|
|---|
| 74 | Use the print statement to get a better idea of what it means to split workgroups, local_ids, and global_ids
|
|---|
| 75 | */
|
|---|
| [4da0a7b] | 76 | void empty()
|
|---|
| 77 | {
|
|---|
| 78 | }
|
|---|
| 79 |
|
|---|
| [cc87898] | 80 | void workfunc(size_t local, size_t global, cl_kernel param)
|
|---|
| 81 | {
|
|---|
| 82 | $proc procs[local];
|
|---|
| 83 | for(int i = local * param.workgroup; i < local * param.workgroup + local; i++)
|
|---|
| 84 | {
|
|---|
| [b23f0a2] | 85 | int count = *(int*)(((args*)param.arguments)->count);
|
|---|
| [cc87898] | 86 | param.local_id = i % local;
|
|---|
| 87 | param.global_id = i;
|
|---|
| [4da0a7b] | 88 | printf("My workgroup id is %d, my global id is %d, my local id is %d, and my method is %s\n", param.workgroup, param.global_id, param.local_id, param.method);
|
|---|
| 89 |
|
|---|
| [b23f0a2] | 90 | procs[param.local_id] = $spawn square(param.workgroup, param.global_id, param.local_id, ((args*)param.arguments)->input, ((args*)param.arguments)->output, count);
|
|---|
| [4da0a7b] | 91 |
|
|---|
| [cc87898] | 92 | }
|
|---|
| 93 | for(int j = 0; j < local; j++)
|
|---|
| 94 | {
|
|---|
| 95 | $wait(procs[j]);
|
|---|
| 96 | }
|
|---|
| 97 | }
|
|---|
| 98 |
|
|---|
| 99 | /*
|
|---|
| 100 | Splits up and spawns processes based on global and local, using block
|
|---|
| 101 | TODO: remove cl_command_queue completely and put into a "just in case" file, currently not needed
|
|---|
| 102 | cl_command_queue commands - Holds a queue of the order that devices are to be executed
|
|---|
| 103 | cl_kernel kernel - Holds all the arguments for the kernel, as well as local_id, global_id, and the workgroup
|
|---|
| 104 | size_t global - The total amount of work to be done
|
|---|
| 105 | size_t local - Number to split into workgroups by
|
|---|
| 106 | */
|
|---|
| 107 | int clEnqueueNDRangeKernel(cl_kernel kernel, size_t global, size_t local)
|
|---|
| 108 | {
|
|---|
| [700ad59] | 109 |
|
|---|
| [3ff27cf] | 110 | $assert((global % local == 0));
|
|---|
| [cc87898] | 111 | int numworkgroups = global/local;
|
|---|
| 112 | cl_kernel param[numworkgroups];
|
|---|
| 113 | $proc procs[numworkgroups];
|
|---|
| 114 | //consider $parfor
|
|---|
| 115 |
|
|---|
| 116 | /*
|
|---|
| 117 | $domain(1) dom = {0 .. numworkgroups - 1};
|
|---|
| 118 |
|
|---|
| 119 | $for(int i: dom)
|
|---|
| 120 | {
|
|---|
| 121 | param[i] = kernel;
|
|---|
| 122 | param[i].workgroup = i;
|
|---|
| 123 | }
|
|---|
| 124 | $parfor(int i: dom)
|
|---|
| 125 | {
|
|---|
| 126 | workfunc(local, global, param[i]);
|
|---|
| 127 | }
|
|---|
| 128 | */
|
|---|
| 129 |
|
|---|
| 130 | for(int i = 0; i < global/local; i++)
|
|---|
| 131 | {
|
|---|
| 132 | param[i] = kernel;
|
|---|
| 133 | param[i].workgroup = i;
|
|---|
| 134 | procs[i] = $spawn workfunc(local, global, param[i]);
|
|---|
| 135 | }
|
|---|
| 136 |
|
|---|
| 137 | //this part here is the new clFinish(commands);
|
|---|
| 138 | for(int i = 0; i < global/local; i++)
|
|---|
| 139 | {
|
|---|
| 140 | $wait(procs[i]);
|
|---|
| 141 | }
|
|---|
| 142 |
|
|---|
| 143 | return CL_SUCCESS;
|
|---|
| [700ad59] | 144 |
|
|---|
| [cc87898] | 145 | }
|
|---|
| 146 |
|
|---|
| 147 |
|
|---|
| 148 | int main(int argc, char** argv)
|
|---|
| 149 | {
|
|---|
| [5693398] | 150 | //Make function pointers for every method
|
|---|
| 151 | void (*squarePtr) (int, int, int, float *, float *, int);
|
|---|
| 152 | squarePtr = □
|
|---|
| 153 |
|
|---|
| [cc87898] | 154 | args * arguments;
|
|---|
| 155 | arguments = (args*)malloc(sizeof(args));
|
|---|
| 156 |
|
|---|
| 157 | float data[DATA_SIZE]; // original data set given to device
|
|---|
| 158 | float results[DATA_SIZE]; // results returned from device
|
|---|
| 159 | unsigned int correct; // number of correct results returned
|
|---|
| 160 |
|
|---|
| 161 | size_t global; // global domain size for our calculation
|
|---|
| 162 | size_t local; // local domain size for our calculation
|
|---|
| 163 |
|
|---|
| 164 | cl_device_id device_id; // compute device id
|
|---|
| 165 | cl_context context; // compute context
|
|---|
| 166 | cl_command_queue commands; // compute command queue
|
|---|
| 167 | //cl_program program; // compute program
|
|---|
| 168 | cl_kernel kernel; // compute kernel
|
|---|
| 169 | int err;
|
|---|
| 170 |
|
|---|
| 171 | float * input; // device memory used for the input array
|
|---|
| 172 | float * output; // device memory used for the output array
|
|---|
| 173 |
|
|---|
| 174 | //Puts in data for input
|
|---|
| 175 | unsigned int count = DATA_SIZE;
|
|---|
| 176 | for(int i = 0; i < count; i++)
|
|---|
| 177 | {
|
|---|
| 178 | data[i] = i;
|
|---|
| 179 | }
|
|---|
| 180 |
|
|---|
| 181 |
|
|---|
| 182 | //clCreateProgram is far different from the real version, this just stores parameters for the kernel
|
|---|
| 183 | //In order to make this clear, it is clCreateProgram and not something like clCreateProgramFromSource, which actually exists in openCL code
|
|---|
| 184 | //program = clCreateProgram(arguments);
|
|---|
| 185 |
|
|---|
| [5693398] | 186 | kernel = clCreateKernel(arguments, "square");
|
|---|
| [cc87898] | 187 |
|
|---|
| 188 | //replaces clCreateBuffer
|
|---|
| 189 | input = (float *) malloc(sizeof(float) * count);
|
|---|
| 190 | output = (float *) malloc(sizeof(float) * count);
|
|---|
| 191 |
|
|---|
| 192 |
|
|---|
| 193 | //replaces clEnqueueWriteBuffer, puts data into the input to be put into the kernel arguments
|
|---|
| 194 | memcpy(input, data, sizeof(float) * count);
|
|---|
| 195 |
|
|---|
| 196 | /*
|
|---|
| 197 | err = 0;
|
|---|
| 198 | err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input);
|
|---|
| 199 | err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &output);
|
|---|
| 200 | err = clSetKernelArg(kernel, 2, sizeof(unsigned int), &count);
|
|---|
| 201 | */
|
|---|
| 202 |
|
|---|
| 203 | //use pointer instead of malloc + memcpy for global variables
|
|---|
| 204 | //((args*)kernel.arguments)->input = (float*)malloc(sizeof(float) * count);
|
|---|
| 205 | //memcpy(((args *)kernel.arguments)->input, input, sizeof(float) * count);
|
|---|
| 206 | ((args*)kernel.arguments)->input = input;
|
|---|
| 207 |
|
|---|
| 208 | //((args*)kernel.arguments)->output = (float*)malloc(sizeof(float) * count);
|
|---|
| 209 | //memcpy(((args*)kernel.arguments)->output, output, sizeof(float));
|
|---|
| 210 | ((args*)kernel.arguments)->output = output;
|
|---|
| 211 |
|
|---|
| [4da0a7b] | 212 | ((args*)kernel.arguments)->count = (int *)malloc(sizeof(int));
|
|---|
| 213 | memcpy(((args*)kernel.arguments)->count, &count, sizeof(int));
|
|---|
| [cc87898] | 214 | //no malloc needed for non pointers
|
|---|
| 215 |
|
|---|
| 216 | //clGetKernelWorkGroupInfo would get a local size optimal for a device, but is not needed here
|
|---|
| 217 | local = LOCAL;
|
|---|
| 218 |
|
|---|
| 219 | global = count;
|
|---|
| 220 | /*
|
|---|
| 221 | commands holds the "order" of devices
|
|---|
| 222 | kernel holds program, which holds variables
|
|---|
| 223 | offset not implemented
|
|---|
| 224 | */
|
|---|
| 225 | err = clEnqueueNDRangeKernel(kernel, global, local);
|
|---|
| 226 |
|
|---|
| 227 | //Replaces clEnqueueReadBuffer, which takes one of the saved variables and puts it out to another one
|
|---|
| 228 | memcpy(results, output, sizeof(float) * count);
|
|---|
| 229 |
|
|---|
| 230 | //verifies that all values in results are actually squared
|
|---|
| 231 | correct = 0;
|
|---|
| 232 | for(int i = 0; i < count; i++)
|
|---|
| 233 | {
|
|---|
| 234 | if(results[i] == data[i] * data[i])
|
|---|
| 235 | {
|
|---|
| 236 | correct++;
|
|---|
| 237 | }
|
|---|
| 238 | }
|
|---|
| 239 | printf("Computed '%d/%d' correct values!\n", correct, count);
|
|---|
| 240 |
|
|---|
| 241 | //TODO: Think of using void * array instead of regular arguments to make freeing easier
|
|---|
| 242 |
|
|---|
| 243 | free(((args*)kernel.arguments)->input);
|
|---|
| 244 | free(((args*)kernel.arguments)->output);
|
|---|
| [b23f0a2] | 245 | free(((args*)kernel.arguments)->count);
|
|---|
| [cc87898] | 246 | /*
|
|---|
| 247 | free(input);
|
|---|
| 248 | free(output);
|
|---|
| 249 | */
|
|---|
| 250 | free(arguments);
|
|---|
| 251 |
|
|---|
| 252 | return 0;
|
|---|
| 253 | }
|
|---|
| [4da0a7b] | 254 | /*Note, pointers to global memory cannot be stored in local memory
|
|---|
| 255 | support for global, local, constant and group, local being the default
|
|---|
| 256 | private (local) - variable assignment, or malloc + memcpy
|
|---|
| 257 |
|
|---|
| 258 | global - pointer with memory location. For regular non pointer variables, make a pointer and assign it to it, then use the pointer
|
|---|
| 259 | Global variables can be declared in program source but they must use the "constant" address space qualifier and need to be initialized.
|
|---|
| 260 | You cannot have global variables that can be modified by kernels and where the modified values are persistent across work-groups and kernel executions.
|
|---|
| 261 | For this, you should use memory objects instead.
|
|---|
| 262 |
|
|---|
| 263 | Constant is just like global, but read only
|
|---|
| 264 |
|
|---|
| 265 | group - ???
|
|---|
| 266 | */
|
|---|
| [cc87898] | 267 |
|
|---|
| 268 |
|
|---|