source: CIVL/examples/translation/openclversion2.15/reduceCL.cvl@ c2a3f74

1.23 2.0 acw/focus-triggers main test-branch
Last change on this file since c2a3f74 was fa007d5, checked in by Jacob Trieu <fuufusuu@…>, 12 years ago

new example in progress

git-svn-id: svn://vsl.cis.udel.edu/civl/trunk@1368 fb995dde-84ed-4084-dfe6-e5aef3e2452c

  • Property mode set to 100644
File size: 6.5 KB
Line 
1
2
3#include "cl.cvl"
4#include <stdio.h>
5#include <stdlib.h>
6#include <math.h>
7#include <string.h>
8#include <civlc.h>
9
10#include "reduce_int_kernel.cvl"
11
12$input int MAX_WORKGROUP_SIZE;
13
14/*
15 __global int *output,
16 __global const int *input,
17 __local int *shared,
18 const unsigned int n
19*/
20typedef struct
21{
22 void * param[4];
23}args;
24
25cl_kernel clCreateKernel(args * argument, char * function)
26{
27 cl_kernel kernel;
28 kernel.arguments = argument;
29 kernel.method = function;
30
31 return kernel;
32}
33
34void clReleaseKernel(cl_kernel kernel)
35{
36 for (int i = 0; i < 4; i++)
37 {
38 free(((args*)kernel.arguments)->param[i]);
39 }
40}
41
42#define MIN_ERROR (1e-7)
43#define MAX_GROUPS (64)
44#define MAX_WORK_ITEMS (64)
45#define SEPARATOR ("----------------------------------------------------------------------\n")
46
47//count is too large, was 1024 * 1024
48static int iterations = 1000;
49static int count = 4 * 4;
50static int channels = 1;
51static bool integer = true;
52
53
54void reduce_validate_int(int *data, int size, int * result)
55{
56 int i;
57 int sum = data[0];
58 int c = (int)0.0f;
59 for (i = 1; i < size; i++)
60 {
61 int y = data[i] - c;
62 int t = sum + y;
63 c = (t - sum) - y;
64 sum = t;
65 }
66 result[0] = sum;
67}
68
69void create_reduction_pass_counts(
70 int count,
71 int max_group_size,
72 int max_groups,
73 int max_work_items,
74 int *pass_count,
75 size_t **group_counts,
76 size_t **work_item_counts,
77 int **operation_counts,
78 int **entry_counts)
79{
80 int work_items = (count < max_work_items * 2) ? count / 2 : max_work_items;
81 if(count < 1)
82 work_items = 1;
83
84 int groups = count / (work_items * 2);
85 groups = max_groups < groups ? max_groups : groups;
86
87 int max_levels = 1;
88 int s = groups;
89
90 while(s > 1)
91 {
92 int work_items = (s < max_work_items * 2) ? s / 2 : max_work_items;
93 s = s / (work_items*2);
94 max_levels++;
95 }
96
97 *group_counts = (size_t*)malloc(max_levels * sizeof(size_t));
98 *work_item_counts = (size_t*)malloc(max_levels * sizeof(size_t));
99 *operation_counts = (int*)malloc(max_levels * sizeof(int));
100 *entry_counts = (int*)malloc(max_levels * sizeof(int));
101
102 (*pass_count) = max_levels;
103 (*group_counts)[0] = groups;
104 (*work_item_counts)[0] = work_items;
105 (*operation_counts)[0] = 1;
106 (*entry_counts)[0] = count;
107 if(max_group_size < work_items)
108 {
109 (*operation_counts)[0] = work_items;
110 (*work_item_counts)[0] = max_group_size;
111 }
112
113 s = groups;
114 int level = 1;
115
116 while(s > 1)
117 {
118 int work_items = (s < max_work_items * 2) ? s / 2 : max_work_items;
119 int groups = s / (work_items * 2);
120 groups = (max_groups < groups) ? max_groups : groups;
121
122 (*group_counts)[level] = groups;
123 (*work_item_counts)[level] = work_items;
124 (*operation_counts)[level] = 1;
125 (*entry_counts)[level] = s;
126 if(max_group_size < work_items)
127 {
128 (*operation_counts)[level] = work_items;
129 (*work_item_counts)[level] = max_group_size;
130 }
131
132 s = s / (work_items*2);
133 level++;
134 }
135}
136
137
138int main(int argc, char** argv)
139{
140 args * arguments;
141 arguments = (args*)malloc(sizeof(args));
142
143 int t1 = 0;
144 int t2 = 0;
145 int err;
146 cl_device_id device_id;
147 cl_command_queue commands;
148 cl_context context;
149 /*
150 cl_mem output_buffer;
151 cl_mem input_buffer;
152 cl_mem partials_buffer;
153 */
154 void * output_buffer;
155 void * input_buffer;
156 void * partials_buffer;
157 size_t typesize;
158 int pass_count = 0;
159 size_t* group_counts = 0;
160 size_t* work_item_counts = 0;
161 int* operation_counts = 0;
162 int* entry_counts = 0;
163 int use_gpu = 1;
164
165 int i;
166 int c;
167
168 float *float_data = (float*)malloc(count * channels * sizeof(float));
169 int *integer_data = (int*)malloc(count * channels * sizeof(int));
170
171 //Cannot use strstr
172 use_gpu = 0;
173 integer = true;
174 channels = 1;
175
176 //can use random, but cannot check numbers
177 for (i = 0; i < count * channels; i++)
178 {
179 float_data[i] = ((float) rand() / (float) RAND_MAX);
180 integer_data[i] = (int) (255.0f * float_data[i]);
181 }
182
183 size_t returned_size = 0;
184 size_t max_workgroup_size = 0;
185 //clGetDeviceInfo happens here
186 max_workgroup_size = MAX_WORKGROUP_SIZE;
187
188 printf(SEPARATOR);
189 typesize = integer ? (sizeof(int)) : (sizeof(float));
190
191
192 size_t buffer_size = typesize * count * channels;
193 input_buffer = (int*)malloc(buffer_size);
194
195 void *input_data = (integer) ? (void*)integer_data : (void*)float_data;
196
197 //clEnqueueWriteBuffer
198 memcpy(input_buffer, input_data, buffer_size);
199
200 //clCreateBuffer
201 partials_buffer = (int*)malloc(buffer_size);
202 output_buffer = (int*)malloc(buffer_size);
203
204 create_reduction_pass_counts(
205 count, max_workgroup_size,
206 MAX_GROUPS, MAX_WORK_ITEMS,
207 &pass_count, &group_counts,
208 &work_item_counts, &operation_counts,
209 &entry_counts);
210
211 cl_kernel *kernels = (cl_kernel*)malloc(pass_count * sizeof(cl_kernel));
212 //memset(kernels, 0, pass_count * sizeof(cl_kernel));
213 //no memset
214
215 for(i = 0; i < pass_count; i++)
216 {
217 kernels[i] = clCreateKernel(arguments, "buffer");
218 }
219
220 void * pass_swap;
221 void * pass_input = output_buffer;
222 void * pass_output = input_buffer;
223
224 for(i = 0; i < pass_count; i++)
225 {
226 size_t global = group_counts[i] * work_item_counts[i];
227 size_t local = work_item_counts[i];
228 unsigned int operations = operation_counts[i];
229 unsigned int entries = entry_counts[i];
230 size_t shared_size = typesize * channels * local * operations;
231
232 printf("Pass[%4d] Global[%4d] Local[%4d] Groups[%4d] WorkItems[%4d] Operations[%d] Entries[%d]\n", i,
233 (int)global, (int)local, (int)group_counts[i], (int)work_item_counts[i], operations, entries);
234
235 pass_swap = pass_input;
236 pass_input = pass_output;
237 pass_output = pass_swap;
238/*
239 __global int *output,
240 __global const int *input,
241 __local int *shared,
242 const unsigned int n
243*/
244
245
246 }
247
248 for(i = 0; i < pass_count; i++)
249 {
250 clReleaseKernel(kernels[i]);
251 }
252
253 free(arguments);
254
255 free(output_buffer);
256 free(partials_buffer);
257 free(input_buffer);
258
259 free(group_counts);
260 free(work_item_counts);
261 free(operation_counts);
262 free(entry_counts);
263 //free(computed_result);
264 free(kernels);
265 free(float_data);
266 free(integer_data);
267
268 return 0;
269}
270
Note: See TracBrowser for help on using the repository browser.