source: CIVL/examples/opencl/2.15/square.cvl@ 41340c1

1.23 2.0 main test-branch
Last change on this file since 41340c1 was 6317abc, checked in by Ziqing Luo <ziqing@…>, 11 years ago

renaming opencl version files
move div0 to arithmetic

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@1768 fb995dde-84ed-4084-dfe6-e5aef3e2452c

  • Property mode set to 100644
File size: 8.9 KB
Line 
1//Forget the program
2
3#include "cl.cvl"
4#include <stdio.h>
5#include <stdlib.h>
6#include <string.h>
7#include <civlc.h>
8
9$input int NUM_DEVICES;
10$input int MAX_NUM_DEVICES;
11$assume 0 < NUM_DEVICES && NUM_DEVICES < MAX_NUM_DEVICES;
12
13$input int DATA_SIZE;
14$input int MAX_DATA_SIZE;
15$assume 0 < DATA_SIZE && DATA_SIZE < MAX_NUM_DEVICES;
16
17$input int LOCAL;
18$input int MAX_LOCAL;
19$assume 0 < LOCAL && LOCAL < MAX_LOCAL;
20//this args struct will hold all the parameters of for the kernel function
21typedef struct
22{
23 //Variables for kernels
24 /*
25 float * input;
26 float * output;
27 int count;
28
29 void * input;
30 void * output;
31 void * count;
32 */
33 void * param[3];
34
35}args;
36
37/*
38 args * argument - Takes in the struct, which is changed for every program using a different kernel
39*/
40cl_kernel clCreateKernel(args * argument, char * function)
41{
42 cl_kernel kernel;
43 kernel.arguments = argument;
44 kernel.method = function;
45
46 return kernel;
47}
48
49void clReleaseKernel(cl_kernel kernel)
50{
51 for (int i = 0; i < 3; i++)
52 {
53 free(((args*)kernel.arguments)->param[i]);
54 }
55}
56
57/*
58 This is the kernel that processes compute with
59 int workgroup - Gives the workgroup that a particular process came from, made by clEnqueueNDRangeKernel
60 int global_id - Gives the global_id that a particular process has, given by workfunc
61 int local_id - Gives the local_id that a particular process has, given by workfunc
62 float* input - Kernel argument
63 float* output - Kernel argument
64 int count - Kernel argument
65*/
66void square(int workgroup, int global_id, int local_id, float* input, float* output, int count)
67{
68 //int i = get_global_id(0);
69 int i = global_id;
70 if (i < count)
71 {
72 output[i] = input[i] * input[i];
73 //printf("output[%d] is %d\n", i, output[i]);
74 }
75}
76/*
77 workfunc assigns local and global ids, before calling the kernel.
78 Note: The function should be identical in all transformations except the calling of the kernel, which means that it cannot be in openCLshared.cvl
79 size_t local - The size of the workgroups, used to calculate blocks
80 size_t global - The total amount of work to be done
81 cl_kernel param - Holds the data for local_id, global_id, and the workgroup
82 Use the print statement to get a better idea of what it means to split workgroups, local_ids, and global_ids
83*/
84void empty()
85{
86}
87
88void workfunc(size_t local, size_t global, cl_kernel param)
89{
90 $proc procs[local];
91 char * squareChar = "square";
92 for(int i = local * param.workgroup; i < local * param.workgroup + local; i++)
93 {
94 int count = *(int*)(((args*)param.arguments)->param[2]);
95 param.local_id = i % local;
96 param.global_id = i;
97 printf("My workgroup id is %d, my global id is %d, my local id is %d, and my method is %s\n", param.workgroup, param.global_id, param.local_id, param.method);
98
99
100 if(strcmp(param.method, squareChar) == 0)
101 {
102 procs[param.local_id] = $spawn square(param.workgroup, param.global_id, param.local_id, ((args*)param.arguments)->param[0], ((args*)param.arguments)->param[1], count);
103 }
104
105 }
106 for(int j = 0; j < local; j++)
107 {
108 $wait(procs[j]);
109 }
110}
111
112/*
113 Splits up and spawns processes based on global and local, using block
114 TODO: remove cl_command_queue completely and put into a "just in case" file, currently not needed
115 cl_command_queue commands - Holds a queue of the order that devices are to be executed
116 cl_kernel kernel - Holds all the arguments for the kernel, as well as local_id, global_id, and the workgroup
117 size_t global - The total amount of work to be done
118 size_t local - Number to split into workgroups by
119*/
120int clEnqueueNDRangeKernel(cl_kernel kernel, size_t global, size_t local)
121{
122
123 $assert(global % local == 0);
124 int numworkgroups = global/local;
125 cl_kernel param[numworkgroups];
126 $proc procs[numworkgroups];
127 //consider $parfor
128
129 /*
130 $domain(1) dom = {0 .. numworkgroups - 1};
131
132 $for(int i: dom)
133 {
134 param[i] = kernel;
135 param[i].workgroup = i;
136 }
137 $parfor(int i: dom)
138 {
139 workfunc(local, global, param[i]);
140 }
141 */
142
143 for(int i = 0; i < global/local; i++)
144 {
145 param[i] = kernel;
146 param[i].workgroup = i;
147 procs[i] = $spawn workfunc(local, global, param[i]);
148 }
149
150 //this part here is the new clFinish(commands);
151 for(int i = 0; i < global/local; i++)
152 {
153 $wait(procs[i]);
154 }
155
156 return CL_SUCCESS;
157
158}
159
160
161int main(int argc, char** argv)
162{
163 //Make function pointers for every method
164 void (*squarePtr) (int, int, int, float *, float *, int);
165 squarePtr = &square;
166
167 args * arguments;
168 arguments = (args*)malloc(sizeof(args));
169
170 float data[DATA_SIZE]; // original data set given to device
171 float results[DATA_SIZE]; // results returned from device
172 unsigned int correct; // number of correct results returned
173
174 size_t global; // global domain size for our calculation
175 size_t local; // local domain size for our calculation
176
177 cl_device_id device_id; // compute device id
178 cl_context context; // compute context
179 cl_command_queue commands; // compute command queue
180 //cl_program program; // compute program
181 cl_kernel kernel; // compute kernel
182 int err;
183
184 float * input; // device memory used for the input array
185 float * output; // device memory used for the output array
186
187 //Puts in data for input
188 unsigned int count = DATA_SIZE;
189 for(int i = 0; i < count; i++)
190 {
191 data[i] = i;
192 }
193
194
195 //clCreateProgram is far different from the real version, this just stores parameters for the kernel
196 //In order to make this clear, it is clCreateProgram and not something like clCreateProgramFromSource, which actually exists in openCL code
197 //program = clCreateProgram(arguments);
198
199 kernel = clCreateKernel(arguments, "square");
200
201 //replaces clCreateBuffer
202 input = (float *) malloc(sizeof(float) * count);
203 output = (float *) malloc(sizeof(float) * count);
204
205
206 //replaces clEnqueueWriteBuffer, puts data into the input to be put into the kernel arguments
207 memcpy(input, data, sizeof(float) * count);
208
209 /*
210 err = 0;
211 err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input);
212 err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &output);
213 err = clSetKernelArg(kernel, 2, sizeof(unsigned int), &count);
214 */
215 /*
216 Possible current implementation:
217 clSetKernelArg(cl_kernel, index, sizeof(...), data to put in, int (picks global 0, local 1 , group 2), bool (for if it was already a pointer));
218 Example:
219 clSetKernelArg(kernel, 0, sizeof(float) * count, 0, true);
220
221 This raises a question, how does OpenCL know how to do this without using the last two parameters?
222 */
223
224 //use pointer instead of malloc + memcpy for global variables
225 //((args*)kernel.arguments)->input = (float*)malloc(sizeof(float) * count);
226 //memcpy(((args *)kernel.arguments)->input, input, sizeof(float) * count);
227
228 ((args*)kernel.arguments)->param[0] = input;
229
230 //((args*)kernel.arguments)->output = (float*)malloc(sizeof(float) * count);
231 //memcpy(((args*)kernel.arguments)->output, output, sizeof(float));
232
233 ((args*)kernel.arguments)->param[1] = output;
234
235 ((args*)kernel.arguments)->param[2] = (int *)malloc(sizeof(int));
236 memcpy(((args*)kernel.arguments)->param[2], &count, sizeof(int));
237 //no malloc needed for non pointers
238 //if this were global then there would be a pointer of count instead of a malloc
239
240 //clGetKernelWorkGroupInfo would get a local size optimal for a device, but is not needed here
241 local = LOCAL;
242
243 global = count;
244 /*
245 commands holds the "order" of devices
246 kernel holds program, which holds variables
247 offset not implemented
248 */
249 err = clEnqueueNDRangeKernel(kernel, global, local);
250
251 //Replaces clEnqueueReadBuffer, which takes one of the saved variables and puts it out to another one
252 memcpy(results, output, sizeof(float) * count);
253
254 //verifies that all values in results are actually squared
255 correct = 0;
256 for(int i = 0; i < count; i++)
257 {
258 if(results[i] == data[i] * data[i])
259 {
260 correct++;
261 }
262 }
263 printf("Computed '%d/%d' correct values!\n", correct, count);
264
265 //TODO: Think of using void * array instead of regular arguments to make freeing easier
266 clReleaseKernel(kernel);
267 free(arguments);
268
269
270 return 0;
271}
272/*Note, pointers to global memory cannot be stored in local memory
273support for global, local, constant and group, local being the default
274private (local) - variable assignment, or malloc + memcpy
275
276global - pointer with memory location. For regular non pointer variables, make a pointer and assign it to it, then use the pointer
277Global variables can be declared in program source but they must use the "constant" address space qualifier and need to be initialized.
278You cannot have global variables that can be modified by kernels and where the modified values are persistent across work-groups and kernel executions.
279For this, you should use memory objects instead.
280
281Constant is just like global, but read only
282
283group - ???
284*/
285
286
Note: See TracBrowser for help on using the repository browser.