Jump to content

OpenCL: Difference between revisions

3,074 bytes added ,  31 October 2019
Line 141: Line 141:
While you can use the C bindings in your C++ application, Khronos also provides a set of C++ bindings in <code>CL/cl2.hpp</code> which are much easier to use alongside std containers such as <code>std::vector</code>. When using C++ bindings, you also do not need to worry about releasing buffers since these are reference-counted.
While you can use the C bindings in your C++ application, Khronos also provides a set of C++ bindings in <code>CL/cl2.hpp</code> which are much easier to use alongside std containers such as <code>std::vector</code>. When using C++ bindings, you also do not need to worry about releasing buffers since these are reference-counted.


TODO: Provide an example.
<syntaxhighlight lang="cpp">
 
#include <CL/cl.hpp>
#include <fstream>
#include <iostream>
 
int main(void) {
  int ret = 0;
  // Create the two input vectors
  const int LIST_SIZE = 1024;
  std::vector<int> A(LIST_SIZE);
  std::vector<int> B(LIST_SIZE);
  for (int i = 0; i < LIST_SIZE; i++) {
    A[i] = i;
    B[i] = LIST_SIZE - i;
  }
 
  // Load the kernel source code into the string source_str
  std::string source_str;
  {
    std::ifstream file("vector_add_kernel.cl");
    file.seekg(0, std::ios::end);
    source_str.resize(file.tellg());
    file.seekg(0, std::ios::beg);
    file.read(&source_str[0], source_str.size());
  }
 
  // Get platform and device information
  std::vector<cl::Platform> platforms;
  ret = cl::Platform::get(&platforms);
 
  std::vector<cl::Device> devices;
  ret = platforms[0].getDevices(CL_DEVICE_TYPE_ALL, &devices);
 
  // Create an OpenCL context
  cl::Context context(devices[0], NULL, NULL, NULL, &ret);
 
  // Create a command queue
  cl::CommandQueue command_queue(context, devices[0], 0UL, &ret);
 
  // Create memory buffers on the device for each vector
  cl::Buffer a_mem_obj(context, CL_MEM_READ_ONLY, LIST_SIZE * sizeof(int));
  cl::Buffer b_mem_obj(context, CL_MEM_READ_ONLY, LIST_SIZE * sizeof(int));
  cl::Buffer c_mem_obj(context, CL_MEM_READ_WRITE, LIST_SIZE * sizeof(int));
 
  // Copy the lists A and B to their respective memory buffers
  ret = cl::copy(command_queue, A.begin(), A.end(), a_mem_obj);
  ret = cl::copy(command_queue, B.begin(), B.end(), b_mem_obj);
 
  // Create a program from the kernel source
  cl::Program program(context, source_str);
 
  // Build the program
  ret = program.build(std::vector<cl::Device>{devices[0]});
  if (ret != CL_SUCCESS) {
    std::cerr << "Error building program" << std::endl;
    exit(EXIT_FAILURE);
  }
 
  // Create the OpenCL kernel
  cl::Kernel kernel(program, "vector_add", &ret);
  if (ret != CL_SUCCESS) {
    std::cerr << "Error creating kernel" << std::endl;
    exit(EXIT_FAILURE);
  }
 
  // Set the arguments of the kernel
  ret = kernel.setArg(0, sizeof(cl_mem), &a_mem_obj());
  ret = kernel.setArg(1, sizeof(cl_mem), &b_mem_obj());
  ret = kernel.setArg(2, sizeof(cl_mem), &c_mem_obj());
 
  // Execute the OpenCL kernel on the list
  cl::NDRange global_item_size(LIST_SIZE); // Process the entire lists
  cl::NDRange local_item_size(64); // Divide work items into groups of 64
  ret = command_queue.enqueueNDRangeKernel(kernel, 0, global_item_size,
                                          local_item_size, NULL, NULL);
  if (ret != CL_SUCCESS) {
    std::cerr << "Error starting kernel" << std::endl;
    exit(EXIT_FAILURE);
  }
 
  // Read the memory buffer C on the device to the local variable C
  std::vector<int> C(LIST_SIZE);
  ret = cl::copy(command_queue, c_mem_obj, C.begin(), C.end());
  if (ret != CL_SUCCESS) {
    std::cerr << "Error copying C from gpu to memory " << ret << std::endl;
    exit(EXIT_FAILURE);
  }
 
  // Display the result to the screen
  for (int i = 0; i < LIST_SIZE; i++)
    printf("%d + %d = %d\n", A[i], B[i], C[i]);
 
  return 0;
}
</syntaxhighlight>


===Julia===
===Julia===