source: CIVL/examples/opencl/2.14/square.cvl@ 41340c1

1.23 2.0 main test-branch
Last change on this file since 41340c1 was 6317abc, checked in by Ziqing Luo <ziqing@…>, 11 years ago

renaming opencl version files
move div0 to arithmetic

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@1768 fb995dde-84ed-4084-dfe6-e5aef3e2452c

  • Property mode set to 100644
File size: 8.3 KB
Line 
1//Forget the program
2
3#include "cl.cvl"
4#include <stdio.h>
5#include <stdlib.h>
6#include <string.h>
7#include <civlc.h>
8
9$input int NUM_DEVICES;
10$input int MAX_NUM_DEVICES;
11$assume 0 < NUM_DEVICES && NUM_DEVICES < MAX_NUM_DEVICES;
12
13$input int DATA_SIZE;
14$input int MAX_DATA_SIZE;
15$assume 0 < DATA_SIZE && DATA_SIZE < MAX_NUM_DEVICES;
16
17$input int LOCAL;
18$input int MAX_LOCAL;
19$assume 0 < LOCAL && LOCAL < MAX_LOCAL;
20//this args struct will hold all the parameters of for the kernel function
21typedef struct
22{
23 //Variables for kernels
24 /*
25 float * input;
26 float * output;
27 int count;
28 */
29 void * input;
30 void * output;
31 void * count;
32
33
34}args;
35
36/*
37 args * argument - Takes in the struct, which is changed for every program using a different kernel
38*/
39cl_kernel clCreateKernel(args * argument, char * function)
40{
41 cl_kernel kernel;
42 kernel.arguments = argument;
43 kernel.method = function;
44
45 return kernel;
46}
47
48/*
49 This is the kernel that processes compute with
50 int workgroup - Gives the workgroup that a particular process came from, made by clEnqueueNDRangeKernel
51 int global_id - Gives the global_id that a particular process has, given by workfunc
52 int local_id - Gives the local_id that a particular process has, given by workfunc
53 float* input - Kernel argument
54 float* output - Kernel argument
55 int count - Kernel argument
56*/
57void square(int workgroup, int global_id, int local_id, float* input, float* output, int count)
58{
59 //int i = get_global_id(0);
60 int i = global_id;
61 if (i < count)
62 {
63 output[i] = input[i] * input[i];
64 //printf("output[%d] is %d\n", i, output[i]);
65 }
66}
67/*
68 workfunc assigns local and global ids, before calling the kernel.
69 Note: The function should be identical in all transformations except the calling of the kernel, which means that it cannot be in openCLshared.cvl
70 size_t local - The size of the workgroups, used to calculate blocks
71 size_t global - The total amount of work to be done
72 cl_kernel param - Holds the data for local_id, global_id, and the workgroup
73 Use the print statement to get a better idea of what it means to split workgroups, local_ids, and global_ids
74*/
75void empty()
76{
77}
78
79void workfunc(size_t local, size_t global, cl_kernel param)
80{
81 $proc procs[local];
82 for(int i = local * param.workgroup; i < local * param.workgroup + local; i++)
83 {
84 int count = *(int*)(((args*)param.arguments)->count);
85 param.local_id = i % local;
86 param.global_id = i;
87 printf("My workgroup id is %d, my global id is %d, my local id is %d, and my method is %s\n", param.workgroup, param.global_id, param.local_id, param.method);
88
89 procs[param.local_id] = $spawn square(param.workgroup, param.global_id, param.local_id, ((args*)param.arguments)->input, ((args*)param.arguments)->output, count);
90
91 }
92 for(int j = 0; j < local; j++)
93 {
94 $wait(procs[j]);
95 }
96}
97
98/*
99 Splits up and spawns processes based on global and local, using block
100 TODO: remove cl_command_queue completely and put into a "just in case" file, currently not needed
101 cl_command_queue commands - Holds a queue of the order that devices are to be executed
102 cl_kernel kernel - Holds all the arguments for the kernel, as well as local_id, global_id, and the workgroup
103 size_t global - The total amount of work to be done
104 size_t local - Number to split into workgroups by
105*/
106int clEnqueueNDRangeKernel(cl_kernel kernel, size_t global, size_t local)
107{
108
109 $assert(global % local == 0);
110 int numworkgroups = global/local;
111 cl_kernel param[numworkgroups];
112 $proc procs[numworkgroups];
113 //consider $parfor
114
115 /*
116 $domain(1) dom = {0 .. numworkgroups - 1};
117
118 $for(int i: dom)
119 {
120 param[i] = kernel;
121 param[i].workgroup = i;
122 }
123 $parfor(int i: dom)
124 {
125 workfunc(local, global, param[i]);
126 }
127 */
128
129 for(int i = 0; i < global/local; i++)
130 {
131 param[i] = kernel;
132 param[i].workgroup = i;
133 procs[i] = $spawn workfunc(local, global, param[i]);
134 }
135
136 //this part here is the new clFinish(commands);
137 for(int i = 0; i < global/local; i++)
138 {
139 $wait(procs[i]);
140 }
141
142 return CL_SUCCESS;
143
144}
145
146
147int main(int argc, char** argv)
148{
149 //Make function pointers for every method
150 void (*squarePtr) (int, int, int, float *, float *, int);
151 squarePtr = &square;
152
153 args * arguments;
154 arguments = (args*)malloc(sizeof(args));
155
156 float data[DATA_SIZE]; // original data set given to device
157 float results[DATA_SIZE]; // results returned from device
158 unsigned int correct; // number of correct results returned
159
160 size_t global; // global domain size for our calculation
161 size_t local; // local domain size for our calculation
162
163 cl_device_id device_id; // compute device id
164 cl_context context; // compute context
165 cl_command_queue commands; // compute command queue
166 //cl_program program; // compute program
167 cl_kernel kernel; // compute kernel
168 int err;
169
170 float * input; // device memory used for the input array
171 float * output; // device memory used for the output array
172
173 //Puts in data for input
174 unsigned int count = DATA_SIZE;
175 for(int i = 0; i < count; i++)
176 {
177 data[i] = i;
178 }
179
180
181 //clCreateProgram is far different from the real version, this just stores parameters for the kernel
182 //In order to make this clear, it is clCreateProgram and not something like clCreateProgramFromSource, which actually exists in openCL code
183 //program = clCreateProgram(arguments);
184
185 kernel = clCreateKernel(arguments, "square");
186
187 //replaces clCreateBuffer
188 input = (float *) malloc(sizeof(float) * count);
189 output = (float *) malloc(sizeof(float) * count);
190
191
192 //replaces clEnqueueWriteBuffer, puts data into the input to be put into the kernel arguments
193 memcpy(input, data, sizeof(float) * count);
194
195 /*
196 err = 0;
197 err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input);
198 err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &output);
199 err = clSetKernelArg(kernel, 2, sizeof(unsigned int), &count);
200 */
201
202 //use pointer instead of malloc + memcpy for global variables
203 //((args*)kernel.arguments)->input = (float*)malloc(sizeof(float) * count);
204 //memcpy(((args *)kernel.arguments)->input, input, sizeof(float) * count);
205 ((args*)kernel.arguments)->input = input;
206
207 //((args*)kernel.arguments)->output = (float*)malloc(sizeof(float) * count);
208 //memcpy(((args*)kernel.arguments)->output, output, sizeof(float));
209 ((args*)kernel.arguments)->output = output;
210
211 ((args*)kernel.arguments)->count = (int *)malloc(sizeof(int));
212 memcpy(((args*)kernel.arguments)->count, &count, sizeof(int));
213 //no malloc needed for non pointers
214
215 //clGetKernelWorkGroupInfo would get a local size optimal for a device, but is not needed here
216 local = LOCAL;
217
218 global = count;
219 /*
220 commands holds the "order" of devices
221 kernel holds program, which holds variables
222 offset not implemented
223 */
224 err = clEnqueueNDRangeKernel(kernel, global, local);
225
226 //Replaces clEnqueueReadBuffer, which takes one of the saved variables and puts it out to another one
227 memcpy(results, output, sizeof(float) * count);
228
229 //verifies that all values in results are actually squared
230 correct = 0;
231 for(int i = 0; i < count; i++)
232 {
233 if(results[i] == data[i] * data[i])
234 {
235 correct++;
236 }
237 }
238 printf("Computed '%d/%d' correct values!\n", correct, count);
239
240 //TODO: Think of using void * array instead of regular arguments to make freeing easier
241
242 free(((args*)kernel.arguments)->input);
243 free(((args*)kernel.arguments)->output);
244 free(((args*)kernel.arguments)->count);
245 /*
246 free(input);
247 free(output);
248 */
249 free(arguments);
250
251 return 0;
252}
253/*Note, pointers to global memory cannot be stored in local memory
254support for global, local, constant and group, local being the default
255private (local) - variable assignment, or malloc + memcpy
256
257global - pointer with memory location. For regular non pointer variables, make a pointer and assign it to it, then use the pointer
258Global variables can be declared in program source but they must use the "constant" address space qualifier and need to be initialized.
259You cannot have global variables that can be modified by kernels and where the modified values are persistent across work-groups and kernel executions.
260For this, you should use memory objects instead.
261
262Constant is just like global, but read only
263
264group - ???
265*/
266
267
Note: See TracBrowser for help on using the repository browser.