source: CIVL/examples/opencl/2.15/square.cvl

main
Last change on this file was ea777aa, checked in by Alex Wilton <awilton@…>, 3 years ago

Moved examples, include, build_default.properties, common.xml, and README out from dev.civl.com into the root of the repo.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@5704 fb995dde-84ed-4084-dfe6-e5aef3e2452c

  • Property mode set to 100644
File size: 8.9 KB
Line 
1#include <civlc.cvh>
2//Forget the program
3
4#include "cl.cvl"
5#include <stdio.h>
6#include <stdlib.h>
7#include <string.h>
8#include <civlc.h>
9
10$input int NUM_DEVICES;
11$input int MAX_NUM_DEVICES;
12$assume(0 < NUM_DEVICES && NUM_DEVICES < MAX_NUM_DEVICES);
13
14$input int DATA_SIZE;
15$input int MAX_DATA_SIZE;
16$assume(0 < DATA_SIZE && DATA_SIZE < MAX_NUM_DEVICES);
17
18$input int LOCAL;
19$input int MAX_LOCAL;
20$assume(0 < LOCAL && LOCAL < MAX_LOCAL);
21//this args struct will hold all the parameters of for the kernel function
22typedef struct
23{
24 //Variables for kernels
25 /*
26 float * input;
27 float * output;
28 int count;
29
30 void * input;
31 void * output;
32 void * count;
33 */
34 void * param[3];
35
36}args;
37
38/*
39 args * argument - Takes in the struct, which is changed for every program using a different kernel
40*/
41cl_kernel clCreateKernel(args * argument, char * function)
42{
43 cl_kernel kernel;
44 kernel.arguments = argument;
45 kernel.method = function;
46
47 return kernel;
48}
49
50void clReleaseKernel(cl_kernel kernel)
51{
52 for (int i = 0; i < 3; i++)
53 {
54 free(((args*)kernel.arguments)->param[i]);
55 }
56}
57
58/*
59 This is the kernel that processes compute with
60 int workgroup - Gives the workgroup that a particular process came from, made by clEnqueueNDRangeKernel
61 int global_id - Gives the global_id that a particular process has, given by workfunc
62 int local_id - Gives the local_id that a particular process has, given by workfunc
63 float* input - Kernel argument
64 float* output - Kernel argument
65 int count - Kernel argument
66*/
67void square(int workgroup, int global_id, int local_id, float* input, float* output, int count)
68{
69 //int i = get_global_id(0);
70 int i = global_id;
71 if (i < count)
72 {
73 output[i] = input[i] * input[i];
74 //printf("output[%d] is %d\n", i, output[i]);
75 }
76}
77/*
78 workfunc assigns local and global ids, before calling the kernel.
79 Note: The function should be identical in all transformations except the calling of the kernel, which means that it cannot be in openCLshared.cvl
80 size_t local - The size of the workgroups, used to calculate blocks
81 size_t global - The total amount of work to be done
82 cl_kernel param - Holds the data for local_id, global_id, and the workgroup
83 Use the print statement to get a better idea of what it means to split workgroups, local_ids, and global_ids
84*/
85void empty()
86{
87}
88
89void workfunc(size_t local, size_t global, cl_kernel param)
90{
91 $proc procs[local];
92 char * squareChar = "square";
93 for(int i = local * param.workgroup; i < local * param.workgroup + local; i++)
94 {
95 int count = *(int*)(((args*)param.arguments)->param[2]);
96 param.local_id = i % local;
97 param.global_id = i;
98 printf("My workgroup id is %d, my global id is %d, my local id is %d, and my method is %s\n", param.workgroup, param.global_id, param.local_id, param.method);
99
100
101 if(strcmp(param.method, squareChar) == 0)
102 {
103 procs[param.local_id] = $spawn square(param.workgroup, param.global_id, param.local_id, ((args*)param.arguments)->param[0], ((args*)param.arguments)->param[1], count);
104 }
105
106 }
107 for(int j = 0; j < local; j++)
108 {
109 $wait(procs[j]);
110 }
111}
112
113/*
114 Splits up and spawns processes based on global and local, using block
115 TODO: remove cl_command_queue completely and put into a "just in case" file, currently not needed
116 cl_command_queue commands - Holds a queue of the order that devices are to be executed
117 cl_kernel kernel - Holds all the arguments for the kernel, as well as local_id, global_id, and the workgroup
118 size_t global - The total amount of work to be done
119 size_t local - Number to split into workgroups by
120*/
121int clEnqueueNDRangeKernel(cl_kernel kernel, size_t global, size_t local)
122{
123
124 $assert((global % local == 0));
125 int numworkgroups = global/local;
126 cl_kernel param[numworkgroups];
127 $proc procs[numworkgroups];
128 //consider $parfor
129
130 /*
131 $domain(1) dom = {0 .. numworkgroups - 1};
132
133 $for(int i: dom)
134 {
135 param[i] = kernel;
136 param[i].workgroup = i;
137 }
138 $parfor(int i: dom)
139 {
140 workfunc(local, global, param[i]);
141 }
142 */
143
144 for(int i = 0; i < global/local; i++)
145 {
146 param[i] = kernel;
147 param[i].workgroup = i;
148 procs[i] = $spawn workfunc(local, global, param[i]);
149 }
150
151 //this part here is the new clFinish(commands);
152 for(int i = 0; i < global/local; i++)
153 {
154 $wait(procs[i]);
155 }
156
157 return CL_SUCCESS;
158
159}
160
161
162int main(int argc, char** argv)
163{
164 //Make function pointers for every method
165 void (*squarePtr) (int, int, int, float *, float *, int);
166 squarePtr = &square;
167
168 args * arguments;
169 arguments = (args*)malloc(sizeof(args));
170
171 float data[DATA_SIZE]; // original data set given to device
172 float results[DATA_SIZE]; // results returned from device
173 unsigned int correct; // number of correct results returned
174
175 size_t global; // global domain size for our calculation
176 size_t local; // local domain size for our calculation
177
178 cl_device_id device_id; // compute device id
179 cl_context context; // compute context
180 cl_command_queue commands; // compute command queue
181 //cl_program program; // compute program
182 cl_kernel kernel; // compute kernel
183 int err;
184
185 float * input; // device memory used for the input array
186 float * output; // device memory used for the output array
187
188 //Puts in data for input
189 unsigned int count = DATA_SIZE;
190 for(int i = 0; i < count; i++)
191 {
192 data[i] = i;
193 }
194
195
196 //clCreateProgram is far different from the real version, this just stores parameters for the kernel
197 //In order to make this clear, it is clCreateProgram and not something like clCreateProgramFromSource, which actually exists in openCL code
198 //program = clCreateProgram(arguments);
199
200 kernel = clCreateKernel(arguments, "square");
201
202 //replaces clCreateBuffer
203 input = (float *) malloc(sizeof(float) * count);
204 output = (float *) malloc(sizeof(float) * count);
205
206
207 //replaces clEnqueueWriteBuffer, puts data into the input to be put into the kernel arguments
208 memcpy(input, data, sizeof(float) * count);
209
210 /*
211 err = 0;
212 err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input);
213 err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &output);
214 err = clSetKernelArg(kernel, 2, sizeof(unsigned int), &count);
215 */
216 /*
217 Possible current implementation:
218 clSetKernelArg(cl_kernel, index, sizeof(...), data to put in, int (picks global 0, local 1 , group 2), bool (for if it was already a pointer));
219 Example:
220 clSetKernelArg(kernel, 0, sizeof(float) * count, 0, true);
221
222 This raises a question, how does OpenCL know how to do this without using the last two parameters?
223 */
224
225 //use pointer instead of malloc + memcpy for global variables
226 //((args*)kernel.arguments)->input = (float*)malloc(sizeof(float) * count);
227 //memcpy(((args *)kernel.arguments)->input, input, sizeof(float) * count);
228
229 ((args*)kernel.arguments)->param[0] = input;
230
231 //((args*)kernel.arguments)->output = (float*)malloc(sizeof(float) * count);
232 //memcpy(((args*)kernel.arguments)->output, output, sizeof(float));
233
234 ((args*)kernel.arguments)->param[1] = output;
235
236 ((args*)kernel.arguments)->param[2] = (int *)malloc(sizeof(int));
237 memcpy(((args*)kernel.arguments)->param[2], &count, sizeof(int));
238 //no malloc needed for non pointers
239 //if this were global then there would be a pointer of count instead of a malloc
240
241 //clGetKernelWorkGroupInfo would get a local size optimal for a device, but is not needed here
242 local = LOCAL;
243
244 global = count;
245 /*
246 commands holds the "order" of devices
247 kernel holds program, which holds variables
248 offset not implemented
249 */
250 err = clEnqueueNDRangeKernel(kernel, global, local);
251
252 //Replaces clEnqueueReadBuffer, which takes one of the saved variables and puts it out to another one
253 memcpy(results, output, sizeof(float) * count);
254
255 //verifies that all values in results are actually squared
256 correct = 0;
257 for(int i = 0; i < count; i++)
258 {
259 if(results[i] == data[i] * data[i])
260 {
261 correct++;
262 }
263 }
264 printf("Computed '%d/%d' correct values!\n", correct, count);
265
266 //TODO: Think of using void * array instead of regular arguments to make freeing easier
267 clReleaseKernel(kernel);
268 free(arguments);
269
270
271 return 0;
272}
273/*Note, pointers to global memory cannot be stored in local memory
274support for global, local, constant and group, local being the default
275private (local) - variable assignment, or malloc + memcpy
276
277global - pointer with memory location. For regular non pointer variables, make a pointer and assign it to it, then use the pointer
278Global variables can be declared in program source but they must use the "constant" address space qualifier and need to be initialized.
279You cannot have global variables that can be modified by kernels and where the modified values are persistent across work-groups and kernel executions.
280For this, you should use memory objects instead.
281
282Constant is just like global, but read only
283
284group - ???
285*/
286
287
Note: See TracBrowser for help on using the repository browser.