wiki:OpenCLTransformation

Version 10 (modified by fuufusuu, 12 years ago) ( diff )

--

  • 'args' : a struct containing all of the variables that will be passed into a kernel. Uses an array of void pointers.
    typedef struct
    {
      void * param[3];
    }args;
    

input = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * count, NULL, NULL);

clCreateBuffer creates a buffer object with certain types of information attached to it. In the .cvl it only uses the right side with the third parameter, and mallocs space for it

input = (int *) malloc(sizeof(int) * count);

err = clEnqueueWriteBuffer(commands, input, CL_TRUE, 0, sizeof(float) * count, data, 0, NULL, NULL);

clEnqueueWriteBuffer writes to a buffer with extra data. In the transformation it is currently a memcpy.

memcpy(input, data, sizeof(int) * count);

err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input); err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &output); err |= clSetKernelArg(kernel, 2, sizeof(unsigned int), &count);

clSetKernelArg sets arguments for an array of each device in the kernel.

  ((args*)kernel.arguments)->input = input;
  
  ((args*)kernel.arguments)->output = output;

  ((args*)kernel.arguments)->count = count;

err = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL);

In regular openCL this will ask the device what work group size to use at runtime. This is not used in the transformation, instead it will make an input for the local workgroup size.

$input int LOCAL;

err = clEnqueueNDRangeKernel(commands, kernel, 1, NULL, &global, &local, 0, NULL, NULL);

Starts taking all the information gathered and queues up work and workgroups using the specified commands, kernel, global worksize, and local work size.

  int clEnqueueNDRangeKernel(cl_command_queue commands, cl_kernel kernel, int global, int local)
{
  $assert(global % local == 0);
  cl_kernel param[global/local];
  $proc procs[global/local];
  for(int i = 0; i < global/local; i++)
  {
    param[i] = kernel;
    param[i].workgroup = i;
    procs[i] = $spawn workfunc(local, global, param[i]);
  }
  
  //this part here is the new clFinish(commands);
  for(int i = 0; i < global/local; i++)
  {
    $wait(procs[i]);
  }
  
  return CL_SUCCESS;
}

This method simulates the use of block workgroups

void worksquare(size_t local, size_t global, kernel param)
{
  for(int i = local * param.workgroup; i < local * param.workgroup + local; i++)
  {
    param.local_id = i % local;
    param.global_id = i;
    //printf("My workgroup id is %d, my global id is %d, my local id is %d\n", param.workgroup, param.global_id, param.local_id);
    square(param.workgroup, param.global_id, param.local_id, param.input, param.output, param.count);
  }
}

err = clEnqueueReadBuffer( commands, output, CL_TRUE, 0, sizeof(float) * count, results, 0, NULL, NULL );

Puts the data from a kernel from one of the variables passed in to another variable.

  memcpy(results, output, sizeof(int) * count);
Note: See TracWiki for help on using the wiki.