| 1 | #include <stdio.h>
|
|---|
| 2 | #include <stdlib.h>
|
|---|
| 3 | #include <string.h>
|
|---|
| 4 |
|
|---|
| 5 | //RANDOM IS UNINTERPRETED
|
|---|
| 6 | //$scope opencl_scope = $here;
|
|---|
| 7 | //$malloc(opencl_scope, ....);
|
|---|
| 8 |
|
|---|
| 9 | $input int DATA_SIZE;
|
|---|
| 10 | $input int NUM_DEVICES;
|
|---|
| 11 | $input int MAX_DATA_SIZE;
|
|---|
| 12 | $input int MAX_NUM_DEVICES;
|
|---|
| 13 | $input int CL_DEVICE_MAX_WORK_GROUP_SIZE;
|
|---|
| 14 | $input int LOCAL;
|
|---|
| 15 | $assume 0 < DATA_SIZE && DATA_SIZE < MAX_DATA_SIZE;
|
|---|
| 16 | $assume 0 < NUM_DEVICES && NUM_DEVICES < MAX_NUM_DEVICES;
|
|---|
| 17 | $gbarrier gbarrier = $gbarrier_create($here, NUM_DEVICES);
|
|---|
| 18 | //struct goes here
|
|---|
| 19 |
|
|---|
| 20 |
|
|---|
| 21 | typedef struct
|
|---|
| 22 | {
|
|---|
| 23 | int device_id;
|
|---|
| 24 | int workgroup;
|
|---|
| 25 | int global_id;
|
|---|
| 26 | int local_id;
|
|---|
| 27 |
|
|---|
| 28 | //kernel variables
|
|---|
| 29 | int * input;
|
|---|
| 30 | int * output;
|
|---|
| 31 | int count;
|
|---|
| 32 | }kernel;
|
|---|
| 33 |
|
|---|
| 34 | //kernel goes here
|
|---|
| 35 | void square(int workgroup, int global_id, int local_id, int* input, int* output, const unsigned int count)
|
|---|
| 36 | {
|
|---|
| 37 | //int i = get_global_id(0);
|
|---|
| 38 | int i = global_id;
|
|---|
| 39 | if (i < count)
|
|---|
| 40 | {
|
|---|
| 41 | output[i] = input[i] * input[i];
|
|---|
| 42 | //printf("output[%d] is %d\n", i, output[i]);
|
|---|
| 43 | }
|
|---|
| 44 | }
|
|---|
| 45 |
|
|---|
| 46 | void workfunc(size_t local, size_t global, kernel param)
|
|---|
| 47 | {
|
|---|
| 48 | for(int i = local * param.workgroup; i < local * param.workgroup + local; i++)
|
|---|
| 49 | {
|
|---|
| 50 | param.local_id = i % local;
|
|---|
| 51 | param.global_id = i;
|
|---|
| 52 | //printf("My workgroup id is %d, my global id is %d, my local id is %d\n", param.workgroup, param.global_id, param.local_id);
|
|---|
| 53 | square(param.workgroup, param.global_id, param.local_id, param.input, param.output, param.count);
|
|---|
| 54 | }
|
|---|
| 55 | }
|
|---|
| 56 | /*
|
|---|
| 57 | Note that the original lines were "__kernel void square( \n" \
|
|---|
| 58 | " __global int* input, \n" \
|
|---|
| 59 | " __global int* output, \n" \
|
|---|
| 60 | " const unsigned int count) \n" \
|
|---|
| 61 |
|
|---|
| 62 | Any parser must take note of and don't input \n, "", or \ as is
|
|---|
| 63 | __global int * input, __global int * output, int count;
|
|---|
| 64 | */
|
|---|
| 65 |
|
|---|
| 66 |
|
|---|
| 67 | int main(int argc, char** argv)
|
|---|
| 68 | {
|
|---|
| 69 | //get the number from clGetDeviceIDs 3rd parameter
|
|---|
| 70 | //int num_devices = 1;
|
|---|
| 71 | $assert (LOCAL < CL_DEVICE_MAX_WORK_GROUP_SIZE);
|
|---|
| 72 | //variables from __kernel come here
|
|---|
| 73 | int * input;
|
|---|
| 74 | int * output;
|
|---|
| 75 | int count;
|
|---|
| 76 |
|
|---|
| 77 | size_t global; // global domain size for our calculation
|
|---|
| 78 | size_t local; // local domain size for our calculation
|
|---|
| 79 |
|
|---|
| 80 | //from the code before
|
|---|
| 81 | int data[DATA_SIZE]; // original data set given to device
|
|---|
| 82 | int results[DATA_SIZE]; // results returned from device
|
|---|
| 83 | int correct; // number of correct results returned
|
|---|
| 84 | //handle the definitions being put in different places
|
|---|
| 85 |
|
|---|
| 86 | int i = 0;
|
|---|
| 87 | count = DATA_SIZE; //count defined here
|
|---|
| 88 | for(i = 0; i < count; i++)
|
|---|
| 89 | {
|
|---|
| 90 | data[i] = i;
|
|---|
| 91 | }
|
|---|
| 92 | /*
|
|---|
| 93 | To do random, define constant global counter, randcount
|
|---|
| 94 | int rand_count = 0;
|
|---|
| 95 | $abstract float rand_abs(int i);
|
|---|
| 96 | rand_abs(0), rand_abs(1), ..
|
|---|
| 97 | float rand() {
|
|---|
| 98 | return rand_abs(rand_count++);
|
|---|
| 99 | }
|
|---|
| 100 | */
|
|---|
| 101 | //comes from clCreateBuffer
|
|---|
| 102 | input = (int *) malloc(sizeof(int) * count);
|
|---|
| 103 | output = (int *) malloc(sizeof(int) * count);
|
|---|
| 104 | //possible heaps
|
|---|
| 105 |
|
|---|
| 106 | //Possibly keep a list of variables, with a flag for whether they are init or not
|
|---|
| 107 | //Not init, malloc one from what is found in
|
|---|
| 108 | //output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(int) * count, NULL, NULL);
|
|---|
| 109 |
|
|---|
| 110 |
|
|---|
| 111 | memcpy(input, data, sizeof(int) * count);
|
|---|
| 112 |
|
|---|
| 113 | //came from clEnqueueWriteBuffer rather than the start of code
|
|---|
| 114 |
|
|---|
| 115 |
|
|---|
| 116 | //Phase after this is the definitions
|
|---|
| 117 | int device_id[NUM_DEVICES];
|
|---|
| 118 | //put device_ids
|
|---|
| 119 | for(int i = 0; i < NUM_DEVICES; i++)
|
|---|
| 120 | {
|
|---|
| 121 | device_id[i] = i;
|
|---|
| 122 | }
|
|---|
| 123 |
|
|---|
| 124 | //from clCreateContext, uses the device input
|
|---|
| 125 | //but there may be a loop, take it into account in the next example
|
|---|
| 126 |
|
|---|
| 127 | //
|
|---|
| 128 | //"Get" local size from clEnqueueNDRangeKernel, but is really an input
|
|---|
| 129 | local = LOCAL;
|
|---|
| 130 |
|
|---|
| 131 |
|
|---|
| 132 | global = count;
|
|---|
| 133 | //Creates an array of the struct according to clEnqueueNDRangeKernel
|
|---|
| 134 | //Have to split array into parts using local and global, and those are a workgroup
|
|---|
| 135 | //For now, assume local is 1, or else inputting the arrays will be odd, for now
|
|---|
| 136 | $assert(global%local == 0);
|
|---|
| 137 | kernel param[global/local];
|
|---|
| 138 | for(int i = 0; i < global/local; i++)
|
|---|
| 139 | {
|
|---|
| 140 | //Also picks the device to be used
|
|---|
| 141 | param[i].device_id = device_id[0];
|
|---|
| 142 | //other parts of the struct
|
|---|
| 143 | param[i].input = input;
|
|---|
| 144 | param[i].output = output;
|
|---|
| 145 | param[i].count = count;
|
|---|
| 146 | }
|
|---|
| 147 |
|
|---|
| 148 |
|
|---|
| 149 | //spawns processes according to parameters in clEnqueueNDRangeKernel
|
|---|
| 150 | $proc procs[global/local];
|
|---|
| 151 | for(int i = 0; i < global/local; i++)
|
|---|
| 152 | {
|
|---|
| 153 | param[i].workgroup = i;
|
|---|
| 154 | //procs[i] = $spawn square(param[i].global_id, param[i].input, param[i].output, param[i].count);
|
|---|
| 155 | procs[i] = $spawn workfunc(local, global, param[i]);
|
|---|
| 156 | }
|
|---|
| 157 |
|
|---|
| 158 | for(int i = 0; i < global/local; i++)
|
|---|
| 159 | {
|
|---|
| 160 | $wait(procs[i]);
|
|---|
| 161 | }
|
|---|
| 162 |
|
|---|
| 163 | //$barrier barrier = $barrier_create($here, gbarrier, now[i].device_id);
|
|---|
| 164 | //$barrier_call(barrier);
|
|---|
| 165 | //$barrier_destroy(barrier);
|
|---|
| 166 |
|
|---|
| 167 | $gbarrier_destroy(gbarrier);
|
|---|
| 168 |
|
|---|
| 169 | //use the information from clEnqueueReadBuffer
|
|---|
| 170 | //may have to alter later
|
|---|
| 171 |
|
|---|
| 172 | memcpy(results, output, sizeof(int) * count);
|
|---|
| 173 |
|
|---|
| 174 | correct = 0;
|
|---|
| 175 | for(i = 0; i < count; i++)
|
|---|
| 176 | {
|
|---|
| 177 | //printf("results at %i is %d, data^2 is %d \n", i, results[i], data[i] * data[i]);
|
|---|
| 178 | if(results[i] == data[i] * data[i])
|
|---|
| 179 | {
|
|---|
| 180 | correct++;
|
|---|
| 181 | }
|
|---|
| 182 | }
|
|---|
| 183 |
|
|---|
| 184 | // Print a brief summary detailing the results
|
|---|
| 185 | //
|
|---|
| 186 | printf("Computed '%d/%d' correct values!\n", correct, count);
|
|---|
| 187 |
|
|---|
| 188 | free(input);
|
|---|
| 189 | free(output);
|
|---|
| 190 |
|
|---|
| 191 | return 0;
|
|---|
| 192 | }
|
|---|