source: CIVL/examples/translation/openclversion2.14/square.cvl@ d87ec9c

1.23 2.0 acw/focus-triggers main test-branch
Last change on this file since d87ec9c was 63d4ef2, checked in by Jacob Trieu <fuufusuu@…>, 12 years ago

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@1278 fb995dde-84ed-4084-dfe6-e5aef3e2452c

  • Property mode set to 100644
File size: 7.2 KB
Line 
1//Forget the program
2
3#include "cl.cvl"
4#include <stdio.h>
5#include <stdlib.h>
6#include <string.h>
7#include <civlc.h>
8
9$input int NUM_DEVICES;
10$input int MAX_NUM_DEVICES;
11$assume 0 < NUM_DEVICES && NUM_DEVICES < MAX_NUM_DEVICES;
12
13$input int DATA_SIZE;
14$input int MAX_DATA_SIZE;
15$assume 0 < DATA_SIZE && DATA_SIZE < MAX_NUM_DEVICES;
16
17$input int LOCAL;
18$input int MAX_LOCAL;
19$assume 0 < LOCAL && LOCAL < MAX_LOCAL;
20//this args struct will hold all the parameters of for the kernel function
21typedef struct
22{
23 //Variables for kernels
24
25 float * input;
26 float * output;
27 int count;
28
29}args;
30
31/*
32 args * argument - Takes in the struct, which is changed for every program using a different kernel
33*/
34cl_kernel clCreateKernel(args * argument)
35{
36 cl_kernel kernel;
37 kernel.arguments = argument;
38
39 return kernel;
40}
41
42/*
43 This is the kernel that processes compute with
44 int workgroup - Gives the workgroup that a particular process came from, made by clEnqueueNDRangeKernel
45 int global_id - Gives the global_id that a particular process has, given by workfunc
46 int local_id - Gives the local_id that a particular process has, given by workfunc
47 float* input - Kernel argument
48 float* output - Kernel argument
49 int count - Kernel argument
50*/
51void square(int workgroup, int global_id, int local_id, float* input, float* output, int count)
52{
53 //int i = get_global_id(0);
54 int i = global_id;
55 if (i < count)
56 {
57 output[i] = input[i] * input[i];
58 //printf("output[%d] is %d\n", i, output[i]);
59 }
60}
61/*
62 workfunc assigns local and global ids, before calling the kernel.
63 Note: The function should be identical in all transformations except the calling of the kernel, which means that it cannot be in openCLshared.cvl
64 size_t local - The size of the workgroups, used to calculate blocks
65 size_t global - The total amount of work to be done
66 cl_kernel param - Holds the data for local_id, global_id, and the workgroup
67 Use the print statement to get a better idea of what it means to split workgroups, local_ids, and global_ids
68*/
69void workfunc(size_t local, size_t global, cl_kernel param)
70{
71 $proc procs[local];
72 for(int i = local * param.workgroup; i < local * param.workgroup + local; i++)
73 {
74 param.local_id = i % local;
75 param.global_id = i;
76 printf("My workgroup id is %d, my global id is %d, my local id is %d\n", param.workgroup, param.global_id, param.local_id);
77 procs[param.local_id] = $spawn square(param.workgroup, param.global_id, param.local_id, ((args*)param.arguments)->input, ((args*)param.arguments)->output, ((args*)param.arguments)->count);
78 }
79 for(int j = 0; j < local; j++)
80 {
81 $wait(procs[j]);
82 }
83}
84
85/*
86 Splits up and spawns processes based on global and local, using block
87 TODO: remove cl_command_queue completely and put into a "just in case" file, currently not needed
88 cl_command_queue commands - Holds a queue of the order that devices are to be executed
89 cl_kernel kernel - Holds all the arguments for the kernel, as well as local_id, global_id, and the workgroup
90 size_t global - The total amount of work to be done
91 size_t local - Number to split into workgroups by
92*/
93int clEnqueueNDRangeKernel(cl_kernel kernel, size_t global, size_t local)
94{
95 $assert(global % local == 0);
96 int numworkgroups = global/local;
97 cl_kernel param[numworkgroups];
98 $proc procs[numworkgroups];
99 //consider $parfor
100
101 /*
102 $domain(1) dom = {0 .. numworkgroups - 1};
103
104 $for(int i: dom)
105 {
106 param[i] = kernel;
107 param[i].workgroup = i;
108 }
109 $parfor(int i: dom)
110 {
111 workfunc(local, global, param[i]);
112 }
113 */
114
115 for(int i = 0; i < global/local; i++)
116 {
117 param[i] = kernel;
118 param[i].workgroup = i;
119 procs[i] = $spawn workfunc(local, global, param[i]);
120 }
121
122 //this part here is the new clFinish(commands);
123 for(int i = 0; i < global/local; i++)
124 {
125 $wait(procs[i]);
126 }
127
128 return CL_SUCCESS;
129}
130
131
132int main(int argc, char** argv)
133{
134 args * arguments;
135 arguments = (args*)malloc(sizeof(args));
136
137 float data[DATA_SIZE]; // original data set given to device
138 float results[DATA_SIZE]; // results returned from device
139 unsigned int correct; // number of correct results returned
140
141 size_t global; // global domain size for our calculation
142 size_t local; // local domain size for our calculation
143
144 cl_device_id device_id; // compute device id
145 cl_context context; // compute context
146 cl_command_queue commands; // compute command queue
147 //cl_program program; // compute program
148 cl_kernel kernel; // compute kernel
149 int err;
150
151 float * input; // device memory used for the input array
152 float * output; // device memory used for the output array
153
154 //Puts in data for input
155 unsigned int count = DATA_SIZE;
156 for(int i = 0; i < count; i++)
157 {
158 data[i] = i;
159 }
160
161
162 //clCreateProgram is far different from the real version, this just stores parameters for the kernel
163 //In order to make this clear, it is clCreateProgram and not something like clCreateProgramFromSource, which actually exists in openCL code
164 //program = clCreateProgram(arguments);
165
166 kernel = clCreateKernel(arguments);
167
168 //replaces clCreateBuffer
169 input = (float *) malloc(sizeof(float) * count);
170 output = (float *) malloc(sizeof(float) * count);
171
172
173 //replaces clEnqueueWriteBuffer, puts data into the input to be put into the kernel arguments
174 memcpy(input, data, sizeof(float) * count);
175
176 /*
177 err = 0;
178 err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input);
179 err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &output);
180 err = clSetKernelArg(kernel, 2, sizeof(unsigned int), &count);
181 */
182
183 //use pointer instead of malloc + memcpy for global variables
184 //((args*)kernel.arguments)->input = (float*)malloc(sizeof(float) * count);
185 //memcpy(((args *)kernel.arguments)->input, input, sizeof(float) * count);
186 ((args*)kernel.arguments)->input = input;
187
188 //((args*)kernel.arguments)->output = (float*)malloc(sizeof(float) * count);
189 //memcpy(((args*)kernel.arguments)->output, output, sizeof(float));
190 ((args*)kernel.arguments)->output = output;
191
192 ((args*)kernel.arguments)->count = count;
193 //no malloc needed for non pointers
194
195 //clGetKernelWorkGroupInfo would get a local size optimal for a device, but is not needed here
196 local = LOCAL;
197
198 global = count;
199 /*
200 commands holds the "order" of devices
201 kernel holds program, which holds variables
202 offset not implemented
203 */
204 err = clEnqueueNDRangeKernel(kernel, global, local);
205
206 //Replaces clEnqueueReadBuffer, which takes one of the saved variables and puts it out to another one
207 memcpy(results, output, sizeof(float) * count);
208
209 //verifies that all values in results are actually squared
210 correct = 0;
211 for(int i = 0; i < count; i++)
212 {
213 if(results[i] == data[i] * data[i])
214 {
215 correct++;
216 }
217 }
218 printf("Computed '%d/%d' correct values!\n", correct, count);
219
220 //TODO: Think of using void * array instead of regular arguments to make freeing easier
221
222 free(((args*)kernel.arguments)->input);
223 free(((args*)kernel.arguments)->output);
224 /*
225 free(input);
226 free(output);
227 */
228 free(arguments);
229
230 return 0;
231}
232
233
234
Note: See TracBrowser for help on using the repository browser.