| [cc87898] | 1 | /*
|
|---|
| 2 | * Example of squaring each element in an array of floating point values from:
|
|---|
| 3 | * https://developer.apple.com/library/mac/samplecode/OpenCL_Hello_World_Example/Introduction/Intro.html
|
|---|
| 4 | */
|
|---|
| 5 | //
|
|---|
| 6 | // File: hello.c
|
|---|
| 7 | //
|
|---|
| 8 | // Abstract: A simple "Hello World" compute example showing basic usage of OpenCL which
|
|---|
| 9 | // calculates the mathematical square (X[i] = pow(X[i],2)) for a buffer of
|
|---|
| 10 | // floating point values.
|
|---|
| 11 | //
|
|---|
| 12 | //
|
|---|
| 13 | // Version: <1.0>
|
|---|
| 14 | //
|
|---|
| 15 | // Disclaimer: IMPORTANT: This Apple software is supplied to you by Apple Inc. ("Apple")
|
|---|
| 16 | // in consideration of your agreement to the following terms, and your use,
|
|---|
| 17 | // installation, modification or redistribution of this Apple software
|
|---|
| 18 | // constitutes acceptance of these terms. If you do not agree with these
|
|---|
| 19 | // terms, please do not use, install, modify or redistribute this Apple
|
|---|
| 20 | // software.
|
|---|
| 21 | //
|
|---|
| 22 | // In consideration of your agreement to abide by the following terms, and
|
|---|
| 23 | // subject to these terms, Apple grants you a personal, non - exclusive
|
|---|
| 24 | // license, under Apple's copyrights in this original Apple software ( the
|
|---|
| 25 | // "Apple Software" ), to use, reproduce, modify and redistribute the Apple
|
|---|
| 26 | // Software, with or without modifications, in source and / or binary forms;
|
|---|
| 27 | // provided that if you redistribute the Apple Software in its entirety and
|
|---|
| 28 | // without modifications, you must retain this notice and the following text
|
|---|
| 29 | // and disclaimers in all such redistributions of the Apple Software. Neither
|
|---|
| 30 | // the name, trademarks, service marks or logos of Apple Inc. may be used to
|
|---|
| 31 | // endorse or promote products derived from the Apple Software without specific
|
|---|
| 32 | // prior written permission from Apple. Except as expressly stated in this
|
|---|
| 33 | // notice, no other rights or licenses, express or implied, are granted by
|
|---|
| 34 | // Apple herein, including but not limited to any patent rights that may be
|
|---|
| 35 | // infringed by your derivative works or by other works in which the Apple
|
|---|
| 36 | // Software may be incorporated.
|
|---|
| 37 | //
|
|---|
| 38 | // The Apple Software is provided by Apple on an "AS IS" basis. APPLE MAKES NO
|
|---|
| 39 | // WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED
|
|---|
| 40 | // WARRANTIES OF NON - INFRINGEMENT, MERCHANTABILITY AND FITNESS FOR A
|
|---|
| 41 | // PARTICULAR PURPOSE, REGARDING THE APPLE SOFTWARE OR ITS USE AND OPERATION
|
|---|
| 42 | // ALONE OR IN COMBINATION WITH YOUR PRODUCTS.
|
|---|
| 43 | //
|
|---|
| 44 | // IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL OR
|
|---|
| 45 | // CONSEQUENTIAL DAMAGES ( INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|---|
| 46 | // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|---|
| 47 | // INTERRUPTION ) ARISING IN ANY WAY OUT OF THE USE, REPRODUCTION, MODIFICATION
|
|---|
| 48 | // AND / OR DISTRIBUTION OF THE APPLE SOFTWARE, HOWEVER CAUSED AND WHETHER
|
|---|
| 49 | // UNDER THEORY OF CONTRACT, TORT ( INCLUDING NEGLIGENCE ), STRICT LIABILITY OR
|
|---|
| 50 | // OTHERWISE, EVEN IF APPLE HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|---|
| 51 | //
|
|---|
| 52 | // Copyright ( C ) 2008 Apple Inc. All Rights Reserved.
|
|---|
| 53 | //
|
|---|
| 54 |
|
|---|
| 55 | ////////////////////////////////////////////////////////////////////////////////
|
|---|
| 56 |
|
|---|
| 57 | #include <fcntl.h>
|
|---|
| 58 | #include <stdio.h>
|
|---|
| 59 | #include <stdlib.h>
|
|---|
| 60 | #include <string.h>
|
|---|
| 61 | #include <math.h>
|
|---|
| 62 | #include <unistd.h>
|
|---|
| 63 | #include <sys/types.h>
|
|---|
| 64 | #include <sys/stat.h>
|
|---|
| 65 | #include <OpenCL/opencl.h>
|
|---|
| 66 |
|
|---|
| 67 | ////////////////////////////////////////////////////////////////////////////////
|
|---|
| 68 |
|
|---|
| 69 | // Use a static data size for simplicity
|
|---|
| 70 | //
|
|---|
| 71 | #define DATA_SIZE (1024)
|
|---|
| 72 |
|
|---|
| 73 | ////////////////////////////////////////////////////////////////////////////////
|
|---|
| 74 |
|
|---|
| 75 | // Simple compute kernel which computes the square of an input array
|
|---|
| 76 | //
|
|---|
| 77 | const char *KernelSource = "\n" \
|
|---|
| 78 | "__kernel void square( \n" \
|
|---|
| 79 | " __global float* input, \n" \
|
|---|
| 80 | " __global float* output, \n" \
|
|---|
| 81 | " const unsigned int count) \n" \
|
|---|
| 82 | "{ \n" \
|
|---|
| 83 | " int i = get_global_id(0); \n" \
|
|---|
| 84 | " if(i < count) \n" \
|
|---|
| 85 | " output[i] = input[i] * input[i]; \n" \
|
|---|
| 86 | "} \n" \
|
|---|
| 87 | "\n";
|
|---|
| 88 |
|
|---|
| 89 | ////////////////////////////////////////////////////////////////////////////////
|
|---|
| 90 |
|
|---|
| 91 | int main(int argc, char** argv)
|
|---|
| 92 | {
|
|---|
| 93 | int err; // error code returned from api calls
|
|---|
| 94 |
|
|---|
| 95 | float data[DATA_SIZE]; // original data set given to device
|
|---|
| 96 | float results[DATA_SIZE]; // results returned from device
|
|---|
| 97 | unsigned int correct; // number of correct results returned
|
|---|
| 98 |
|
|---|
| 99 | size_t global; // global domain size for our calculation
|
|---|
| 100 | size_t local; // local domain size for our calculation
|
|---|
| 101 |
|
|---|
| 102 | cl_device_id device_id; // compute device id
|
|---|
| 103 | cl_context context; // compute context
|
|---|
| 104 | cl_command_queue commands; // compute command queue
|
|---|
| 105 | cl_program program; // compute program
|
|---|
| 106 | cl_kernel kernel; // compute kernel
|
|---|
| 107 |
|
|---|
| 108 | cl_mem input; // device memory used for the input array
|
|---|
| 109 | cl_mem output; // device memory used for the output array
|
|---|
| 110 |
|
|---|
| 111 | // Fill our data set with random float values
|
|---|
| 112 | //
|
|---|
| 113 | int i = 0;
|
|---|
| 114 | unsigned int count = DATA_SIZE;
|
|---|
| 115 | for(i = 0; i < count; i++)
|
|---|
| 116 | data[i] = rand() / (float)RAND_MAX;
|
|---|
| 117 |
|
|---|
| 118 | // Connect to a compute device
|
|---|
| 119 | //This computer has an incompatible gpu
|
|---|
| 120 | int gpu = 0;
|
|---|
| 121 | err = clGetDeviceIDs(NULL, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 1, &device_id, NULL);
|
|---|
| 122 | if (err != CL_SUCCESS)
|
|---|
| 123 | {
|
|---|
| 124 | printf("Error: Failed to create a device group!\n");
|
|---|
| 125 | return EXIT_FAILURE;
|
|---|
| 126 | }
|
|---|
| 127 |
|
|---|
| 128 | // Create a compute context
|
|---|
| 129 | //
|
|---|
| 130 | context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
|
|---|
| 131 | if (!context)
|
|---|
| 132 | {
|
|---|
| 133 | printf("Error: Failed to create a compute context!\n");
|
|---|
| 134 | return EXIT_FAILURE;
|
|---|
| 135 | }
|
|---|
| 136 |
|
|---|
| 137 | // Create a command commands
|
|---|
| 138 | //
|
|---|
| 139 | commands = clCreateCommandQueue(context, device_id, 0, &err);
|
|---|
| 140 | if (!commands)
|
|---|
| 141 | {
|
|---|
| 142 | printf("Error: Failed to create a command commands!\n");
|
|---|
| 143 | return EXIT_FAILURE;
|
|---|
| 144 | }
|
|---|
| 145 |
|
|---|
| 146 | // Create the compute program from the source buffer
|
|---|
| 147 | //
|
|---|
| 148 | program = clCreateProgramWithSource(context, 1, (const char **) & KernelSource, NULL, &err);
|
|---|
| 149 | if (!program)
|
|---|
| 150 | {
|
|---|
| 151 | printf("Error: Failed to create compute program!\n");
|
|---|
| 152 | return EXIT_FAILURE;
|
|---|
| 153 | }
|
|---|
| 154 |
|
|---|
| 155 | // Build the program executable
|
|---|
| 156 | //No devices?
|
|---|
| 157 | err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
|
|---|
| 158 | if (err != CL_SUCCESS)
|
|---|
| 159 | {
|
|---|
| 160 | size_t len;
|
|---|
| 161 | char buffer[2048];
|
|---|
| 162 |
|
|---|
| 163 | printf("Error: Failed to build program executable!\n");
|
|---|
| 164 | clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
|
|---|
| 165 | printf("%s\n", buffer);
|
|---|
| 166 | exit(1);
|
|---|
| 167 | }
|
|---|
| 168 |
|
|---|
| 169 | // Create the compute kernel in the program we wish to run
|
|---|
| 170 | //Look up to find the kernel, or look at program
|
|---|
| 171 | kernel = clCreateKernel(program, "square", &err);
|
|---|
| 172 | if (!kernel || err != CL_SUCCESS)
|
|---|
| 173 | {
|
|---|
| 174 | printf("Error: Failed to create compute kernel!\n");
|
|---|
| 175 | exit(1);
|
|---|
| 176 | }
|
|---|
| 177 |
|
|---|
| 178 | // Create the input and output arrays in device memory for our calculation
|
|---|
| 179 | //
|
|---|
| 180 | input = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * count, NULL, NULL);
|
|---|
| 181 | output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * count, NULL, NULL);
|
|---|
| 182 | if (!input || !output)
|
|---|
| 183 | {
|
|---|
| 184 | printf("Error: Failed to allocate device memory!\n");
|
|---|
| 185 | exit(1);
|
|---|
| 186 | }
|
|---|
| 187 |
|
|---|
| 188 | // Write our data set into the input array in device memory
|
|---|
| 189 | //
|
|---|
| 190 | err = clEnqueueWriteBuffer(commands, input, CL_TRUE, 0, sizeof(float) * count, data, 0, NULL, NULL);
|
|---|
| 191 | if (err != CL_SUCCESS)
|
|---|
| 192 | {
|
|---|
| 193 | printf("Error: Failed to write to source array!\n");
|
|---|
| 194 | exit(1);
|
|---|
| 195 | }
|
|---|
| 196 |
|
|---|
| 197 | // Set the arguments to our compute kernel
|
|---|
| 198 | //
|
|---|
| 199 | err = 0;
|
|---|
| 200 | err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input);
|
|---|
| 201 | err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &output);
|
|---|
| 202 | err |= clSetKernelArg(kernel, 2, sizeof(unsigned int), &count);
|
|---|
| 203 | if (err != CL_SUCCESS)
|
|---|
| 204 | {
|
|---|
| 205 | printf("Error: Failed to set kernel arguments! %d\n", err);
|
|---|
| 206 | exit(1);
|
|---|
| 207 | }
|
|---|
| 208 |
|
|---|
| 209 | // Get the maximum work group size for executing the kernel on the device
|
|---|
| 210 | //
|
|---|
| 211 | err = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL);
|
|---|
| 212 | if (err != CL_SUCCESS)
|
|---|
| 213 | {
|
|---|
| 214 | printf("Error: Failed to retrieve kernel work group info! %d\n", err);
|
|---|
| 215 | exit(1);
|
|---|
| 216 | }
|
|---|
| 217 |
|
|---|
| 218 | // Execute the kernel over the entire range of our 1d input data set
|
|---|
| 219 | // using the maximum number of work group items for this device
|
|---|
| 220 | //
|
|---|
| 221 | global = count;
|
|---|
| 222 | err = clEnqueueNDRangeKernel(commands, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
|
|---|
| 223 | if (err)
|
|---|
| 224 | {
|
|---|
| 225 | printf("Error: Failed to execute kernel!\n");
|
|---|
| 226 | return EXIT_FAILURE;
|
|---|
| 227 | }
|
|---|
| 228 |
|
|---|
| 229 | // Wait for the command commands to get serviced before reading back results
|
|---|
| 230 | //
|
|---|
| 231 | clFinish(commands);
|
|---|
| 232 |
|
|---|
| 233 | // Read back the results from the device to verify the output
|
|---|
| 234 | //
|
|---|
| 235 | err = clEnqueueReadBuffer( commands, output, CL_TRUE, 0, sizeof(float) * count, results, 0, NULL, NULL );
|
|---|
| 236 | if (err != CL_SUCCESS)
|
|---|
| 237 | {
|
|---|
| 238 | printf("Error: Failed to read output array! %d\n", err);
|
|---|
| 239 | exit(1);
|
|---|
| 240 | }
|
|---|
| 241 |
|
|---|
| 242 | // Validate our results
|
|---|
| 243 | //
|
|---|
| 244 | correct = 0;
|
|---|
| 245 | for(i = 0; i < count; i++)
|
|---|
| 246 | {
|
|---|
| 247 | if(results[i] == data[i] * data[i])
|
|---|
| 248 | correct++;
|
|---|
| 249 | }
|
|---|
| 250 |
|
|---|
| 251 | // Print a brief summary detailing the results
|
|---|
| 252 | //
|
|---|
| 253 | printf("Computed '%d/%d' correct values!\n", correct, count);
|
|---|
| 254 |
|
|---|
| 255 | // Shutdown and cleanup
|
|---|
| 256 | //
|
|---|
| 257 | clReleaseMemObject(input);
|
|---|
| 258 | clReleaseMemObject(output);
|
|---|
| 259 | clReleaseProgram(program);
|
|---|
| 260 | clReleaseKernel(kernel);
|
|---|
| 261 | clReleaseCommandQueue(commands);
|
|---|
| 262 | clReleaseContext(context);
|
|---|
| 263 |
|
|---|
| 264 | return 0;
|
|---|
| 265 | }
|
|---|
| 266 |
|
|---|