| [b23f0a2] | 1 |
|
|---|
| 2 | #ifndef GROUP_SIZE
|
|---|
| 3 | #define GROUP_SIZE (64)
|
|---|
| 4 | #endif
|
|---|
| 5 |
|
|---|
| 6 | #ifndef OPERATIONS
|
|---|
| 7 | #define OPERATIONS (1)
|
|---|
| 8 | #endif
|
|---|
| 9 |
|
|---|
| 10 | ////////////////////////////////////////////////////////////////////////////////////////////////////
|
|---|
| 11 |
|
|---|
| 12 | #define LOAD_GLOBAL_I1(s, i) \
|
|---|
| [fa007d5] | 13 | (( const int*)(s))[(size_t)(i)]
|
|---|
| [b23f0a2] | 14 |
|
|---|
| 15 | #define STORE_GLOBAL_I1(s, i, v) \
|
|---|
| [fa007d5] | 16 | (( int*)(s))[(size_t)(i)] = (v)
|
|---|
| [b23f0a2] | 17 |
|
|---|
| 18 | ////////////////////////////////////////////////////////////////////////////////////////////////////
|
|---|
| 19 |
|
|---|
| 20 | #define LOAD_LOCAL_I1(s, i) \
|
|---|
| [fa007d5] | 21 | (( int*)(s))[(size_t)(i)]
|
|---|
| [b23f0a2] | 22 |
|
|---|
| 23 | #define STORE_LOCAL_I1(s, i, v) \
|
|---|
| [fa007d5] | 24 | (( int*)(s))[(size_t)(i)] = (v)
|
|---|
| [b23f0a2] | 25 |
|
|---|
| 26 | #define ACCUM_LOCAL_I1(s, i, j) \
|
|---|
| 27 | { \
|
|---|
| [fa007d5] | 28 | int x = (( int*)(s))[(size_t)(i)]; \
|
|---|
| 29 | int y = (( int*)(s))[(size_t)(j)]; \
|
|---|
| 30 | (( int*)(s))[(size_t)(i)] = (x + y); \
|
|---|
| [b23f0a2] | 31 | }
|
|---|
| 32 | ////////////////////////////////////////////////////////////////////////////////////////////////////
|
|---|
| 33 | /*
|
|---|
| 34 | __global int *output,
|
|---|
| 35 | __global const int *input,
|
|---|
| 36 | __local int *shared,
|
|---|
| [fa007d5] | 37 | const unsigned int n
|
|---|
| [b23f0a2] | 38 | */
|
|---|
| 39 | void reduce(
|
|---|
| [fa007d5] | 40 | int workgroup,
|
|---|
| 41 | int cl_global_id,
|
|---|
| 42 | int cl_local_id,
|
|---|
| [b23f0a2] | 43 | int *output,
|
|---|
| 44 | const int *input,
|
|---|
| 45 | int *shared,
|
|---|
| 46 | const unsigned int n)
|
|---|
| 47 | {
|
|---|
| 48 | const int zero = 0.0f;
|
|---|
| [fa007d5] | 49 | const unsigned int group_id = workgroup;
|
|---|
| [b23f0a2] | 50 | const unsigned int group_size = GROUP_SIZE;
|
|---|
| 51 | const unsigned int group_stride = 2 * group_size;
|
|---|
| 52 | const size_t local_stride = group_stride * group_size;
|
|---|
| 53 |
|
|---|
| 54 | unsigned int op = 0;
|
|---|
| 55 | unsigned int last = OPERATIONS - 1;
|
|---|
| 56 | for(op = 0; op < OPERATIONS; op++)
|
|---|
| 57 | {
|
|---|
| 58 | const unsigned int offset = (last - op);
|
|---|
| [3192d8e] | 59 | const size_t local_id = cl_local_id + offset;
|
|---|
| [b23f0a2] | 60 |
|
|---|
| 61 | STORE_LOCAL_I1(shared, local_id, zero);
|
|---|
| 62 |
|
|---|
| 63 | size_t i = group_id * group_stride + local_id;
|
|---|
| 64 | while (i < n)
|
|---|
| 65 | {
|
|---|
| 66 | int a = LOAD_GLOBAL_I1(input, i);
|
|---|
| 67 | int b = LOAD_GLOBAL_I1(input, i + group_size);
|
|---|
| 68 | int s = LOAD_LOCAL_I1(shared, local_id);
|
|---|
| 69 | STORE_LOCAL_I1(shared, local_id, (a + b + s));
|
|---|
| 70 | i += local_stride;
|
|---|
| 71 | }
|
|---|
| 72 |
|
|---|
| 73 | #if (GROUP_SIZE >= 2)
|
|---|
| 74 | if (local_id < 1) { ACCUM_LOCAL_I1(shared, local_id, local_id + 1); }
|
|---|
| 75 | #endif
|
|---|
| 76 | }
|
|---|
| 77 |
|
|---|
| 78 |
|
|---|
| 79 |
|
|---|
| [fa007d5] | 80 | if (cl_local_id == 0)
|
|---|
| [b23f0a2] | 81 | {
|
|---|
| 82 | int v = LOAD_LOCAL_I1(shared, 0);
|
|---|
| 83 | STORE_GLOBAL_I1(output, group_id, v);
|
|---|
| 84 | }
|
|---|
| 85 | }
|
|---|
| 86 |
|
|---|