source: CIVL/examples/opencl/2.14/square.cvl

main
Last change on this file was ea777aa, checked in by Alex Wilton <awilton@…>, 3 years ago

Moved examples, include, build_default.properties, common.xml, and README out from dev.civl.com into the root of the repo.

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@5704 fb995dde-84ed-4084-dfe6-e5aef3e2452c

  • Property mode set to 100644
File size: 8.4 KB
RevLine 
[3ff27cf]1#include <civlc.cvh>
[cc87898]2//Forget the program
3
[63d4ef2]4#include "cl.cvl"
[cc87898]5#include <stdio.h>
6#include <stdlib.h>
7#include <string.h>
8#include <civlc.h>
9
10$input int NUM_DEVICES;
11$input int MAX_NUM_DEVICES;
[3ff27cf]12$assume(0 < NUM_DEVICES && NUM_DEVICES < MAX_NUM_DEVICES);
[cc87898]13
14$input int DATA_SIZE;
15$input int MAX_DATA_SIZE;
[3ff27cf]16$assume(0 < DATA_SIZE && DATA_SIZE < MAX_NUM_DEVICES);
[cc87898]17
18$input int LOCAL;
19$input int MAX_LOCAL;
[3ff27cf]20$assume(0 < LOCAL && LOCAL < MAX_LOCAL);
[cc87898]21//this args struct will hold all the parameters of for the kernel function
22typedef struct
23{
24 //Variables for kernels
[5693398]25 /*
[cc87898]26 float * input;
27 float * output;
[4da0a7b]28 int count;
[5693398]29 */
30 void * input;
31 void * output;
[4da0a7b]32 void * count;
[b23f0a2]33
[cc87898]34
35}args;
36
37/*
38 args * argument - Takes in the struct, which is changed for every program using a different kernel
39*/
[4da0a7b]40cl_kernel clCreateKernel(args * argument, char * function)
[cc87898]41{
42 cl_kernel kernel;
43 kernel.arguments = argument;
[5693398]44 kernel.method = function;
[cc87898]45
46 return kernel;
47}
48
49/*
50 This is the kernel that processes compute with
51 int workgroup - Gives the workgroup that a particular process came from, made by clEnqueueNDRangeKernel
52 int global_id - Gives the global_id that a particular process has, given by workfunc
53 int local_id - Gives the local_id that a particular process has, given by workfunc
54 float* input - Kernel argument
55 float* output - Kernel argument
56 int count - Kernel argument
57*/
58void square(int workgroup, int global_id, int local_id, float* input, float* output, int count)
59{
60 //int i = get_global_id(0);
61 int i = global_id;
62 if (i < count)
63 {
64 output[i] = input[i] * input[i];
65 //printf("output[%d] is %d\n", i, output[i]);
66 }
67}
68/*
69 workfunc assigns local and global ids, before calling the kernel.
70 Note: The function should be identical in all transformations except the calling of the kernel, which means that it cannot be in openCLshared.cvl
71 size_t local - The size of the workgroups, used to calculate blocks
72 size_t global - The total amount of work to be done
73 cl_kernel param - Holds the data for local_id, global_id, and the workgroup
74 Use the print statement to get a better idea of what it means to split workgroups, local_ids, and global_ids
75*/
[4da0a7b]76void empty()
77{
78}
79
[cc87898]80void workfunc(size_t local, size_t global, cl_kernel param)
81{
82 $proc procs[local];
83 for(int i = local * param.workgroup; i < local * param.workgroup + local; i++)
84 {
[b23f0a2]85 int count = *(int*)(((args*)param.arguments)->count);
[cc87898]86 param.local_id = i % local;
87 param.global_id = i;
[4da0a7b]88 printf("My workgroup id is %d, my global id is %d, my local id is %d, and my method is %s\n", param.workgroup, param.global_id, param.local_id, param.method);
89
[b23f0a2]90 procs[param.local_id] = $spawn square(param.workgroup, param.global_id, param.local_id, ((args*)param.arguments)->input, ((args*)param.arguments)->output, count);
[4da0a7b]91
[cc87898]92 }
93 for(int j = 0; j < local; j++)
94 {
95 $wait(procs[j]);
96 }
97}
98
99/*
100 Splits up and spawns processes based on global and local, using block
101 TODO: remove cl_command_queue completely and put into a "just in case" file, currently not needed
102 cl_command_queue commands - Holds a queue of the order that devices are to be executed
103 cl_kernel kernel - Holds all the arguments for the kernel, as well as local_id, global_id, and the workgroup
104 size_t global - The total amount of work to be done
105 size_t local - Number to split into workgroups by
106*/
107int clEnqueueNDRangeKernel(cl_kernel kernel, size_t global, size_t local)
108{
[700ad59]109
[3ff27cf]110 $assert((global % local == 0));
[cc87898]111 int numworkgroups = global/local;
112 cl_kernel param[numworkgroups];
113 $proc procs[numworkgroups];
114 //consider $parfor
115
116 /*
117 $domain(1) dom = {0 .. numworkgroups - 1};
118
119 $for(int i: dom)
120 {
121 param[i] = kernel;
122 param[i].workgroup = i;
123 }
124 $parfor(int i: dom)
125 {
126 workfunc(local, global, param[i]);
127 }
128 */
129
130 for(int i = 0; i < global/local; i++)
131 {
132 param[i] = kernel;
133 param[i].workgroup = i;
134 procs[i] = $spawn workfunc(local, global, param[i]);
135 }
136
137 //this part here is the new clFinish(commands);
138 for(int i = 0; i < global/local; i++)
139 {
140 $wait(procs[i]);
141 }
142
143 return CL_SUCCESS;
[700ad59]144
[cc87898]145}
146
147
148int main(int argc, char** argv)
149{
[5693398]150 //Make function pointers for every method
151 void (*squarePtr) (int, int, int, float *, float *, int);
152 squarePtr = &square;
153
[cc87898]154 args * arguments;
155 arguments = (args*)malloc(sizeof(args));
156
157 float data[DATA_SIZE]; // original data set given to device
158 float results[DATA_SIZE]; // results returned from device
159 unsigned int correct; // number of correct results returned
160
161 size_t global; // global domain size for our calculation
162 size_t local; // local domain size for our calculation
163
164 cl_device_id device_id; // compute device id
165 cl_context context; // compute context
166 cl_command_queue commands; // compute command queue
167 //cl_program program; // compute program
168 cl_kernel kernel; // compute kernel
169 int err;
170
171 float * input; // device memory used for the input array
172 float * output; // device memory used for the output array
173
174 //Puts in data for input
175 unsigned int count = DATA_SIZE;
176 for(int i = 0; i < count; i++)
177 {
178 data[i] = i;
179 }
180
181
182 //clCreateProgram is far different from the real version, this just stores parameters for the kernel
183 //In order to make this clear, it is clCreateProgram and not something like clCreateProgramFromSource, which actually exists in openCL code
184 //program = clCreateProgram(arguments);
185
[5693398]186 kernel = clCreateKernel(arguments, "square");
[cc87898]187
188 //replaces clCreateBuffer
189 input = (float *) malloc(sizeof(float) * count);
190 output = (float *) malloc(sizeof(float) * count);
191
192
193 //replaces clEnqueueWriteBuffer, puts data into the input to be put into the kernel arguments
194 memcpy(input, data, sizeof(float) * count);
195
196 /*
197 err = 0;
198 err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input);
199 err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &output);
200 err = clSetKernelArg(kernel, 2, sizeof(unsigned int), &count);
201 */
202
203 //use pointer instead of malloc + memcpy for global variables
204 //((args*)kernel.arguments)->input = (float*)malloc(sizeof(float) * count);
205 //memcpy(((args *)kernel.arguments)->input, input, sizeof(float) * count);
206 ((args*)kernel.arguments)->input = input;
207
208 //((args*)kernel.arguments)->output = (float*)malloc(sizeof(float) * count);
209 //memcpy(((args*)kernel.arguments)->output, output, sizeof(float));
210 ((args*)kernel.arguments)->output = output;
211
[4da0a7b]212 ((args*)kernel.arguments)->count = (int *)malloc(sizeof(int));
213 memcpy(((args*)kernel.arguments)->count, &count, sizeof(int));
[cc87898]214 //no malloc needed for non pointers
215
216 //clGetKernelWorkGroupInfo would get a local size optimal for a device, but is not needed here
217 local = LOCAL;
218
219 global = count;
220 /*
221 commands holds the "order" of devices
222 kernel holds program, which holds variables
223 offset not implemented
224 */
225 err = clEnqueueNDRangeKernel(kernel, global, local);
226
227 //Replaces clEnqueueReadBuffer, which takes one of the saved variables and puts it out to another one
228 memcpy(results, output, sizeof(float) * count);
229
230 //verifies that all values in results are actually squared
231 correct = 0;
232 for(int i = 0; i < count; i++)
233 {
234 if(results[i] == data[i] * data[i])
235 {
236 correct++;
237 }
238 }
239 printf("Computed '%d/%d' correct values!\n", correct, count);
240
241 //TODO: Think of using void * array instead of regular arguments to make freeing easier
242
243 free(((args*)kernel.arguments)->input);
244 free(((args*)kernel.arguments)->output);
[b23f0a2]245 free(((args*)kernel.arguments)->count);
[cc87898]246 /*
247 free(input);
248 free(output);
249 */
250 free(arguments);
251
252 return 0;
253}
[4da0a7b]254/*Note, pointers to global memory cannot be stored in local memory
255support for global, local, constant and group, local being the default
256private (local) - variable assignment, or malloc + memcpy
257
258global - pointer with memory location. For regular non pointer variables, make a pointer and assign it to it, then use the pointer
259Global variables can be declared in program source but they must use the "constant" address space qualifier and need to be initialized.
260You cannot have global variables that can be modified by kernels and where the modified values are persistent across work-groups and kernel executions.
261For this, you should use memory objects instead.
262
263Constant is just like global, but read only
264
265group - ???
266*/
[cc87898]267
268
Note: See TracBrowser for help on using the repository browser.