OpenCL: Difference between revisions

OpenCL (view source)

Revision as of 16:42, 31 October 2019

3,074 bytes added , 31 October 2019

→‎C++

David

Bureaucrats, Interface administrators, Administrators

5,321

edits

@@ Line 141: / Line 141: @@
 While you can use the C bindings in your C++ application, Khronos also provides a set of C++ bindings in <code>CL/cl2.hpp</code> which are much easier to use alongside std containers such as <code>std::vector</code>. When using C++ bindings, you also do not need to worry about releasing buffers since these are reference-counted.
-TODO: Provide an example.
+<syntaxhighlight lang="cpp">
+#include <CL/cl.hpp>
+#include <fstream>
+#include <iostream>
+int main(void) {
+  int ret = 0;
+  // Create the two input vectors
+  const int LIST_SIZE = 1024;
+  std::vector<int> A(LIST_SIZE);
+  std::vector<int> B(LIST_SIZE);
+  for (int i = 0; i < LIST_SIZE; i++) {
+    A[i] = i;
+    B[i] = LIST_SIZE - i;
+  }
+  // Load the kernel source code into the string source_str
+  std::string source_str;
+  {
+    std::ifstream file("vector_add_kernel.cl");
+    file.seekg(0, std::ios::end);
+    source_str.resize(file.tellg());
+    file.seekg(0, std::ios::beg);
+    file.read(&source_str[0], source_str.size());
+  }
+  // Get platform and device information
+  std::vector<cl::Platform> platforms;
+  ret = cl::Platform::get(&platforms);
+  std::vector<cl::Device> devices;
+  ret = platforms[0].getDevices(CL_DEVICE_TYPE_ALL, &devices);
+  // Create an OpenCL context
+  cl::Context context(devices[0], NULL, NULL, NULL, &ret);
+  // Create a command queue
+  cl::CommandQueue command_queue(context, devices[0], 0UL, &ret);
+  // Create memory buffers on the device for each vector
+  cl::Buffer a_mem_obj(context, CL_MEM_READ_ONLY, LIST_SIZE * sizeof(int));
+  cl::Buffer b_mem_obj(context, CL_MEM_READ_ONLY, LIST_SIZE * sizeof(int));
+  cl::Buffer c_mem_obj(context, CL_MEM_READ_WRITE, LIST_SIZE * sizeof(int));
+  // Copy the lists A and B to their respective memory buffers
+  ret = cl::copy(command_queue, A.begin(), A.end(), a_mem_obj);
+  ret = cl::copy(command_queue, B.begin(), B.end(), b_mem_obj);
+  // Create a program from the kernel source
+  cl::Program program(context, source_str);
+  // Build the program
+  ret = program.build(std::vector<cl::Device>{devices[0]});
+  if (ret != CL_SUCCESS) {
+    std::cerr << "Error building program" << std::endl;
+    exit(EXIT_FAILURE);
+  }
+  // Create the OpenCL kernel
+  cl::Kernel kernel(program, "vector_add", &ret);
+  if (ret != CL_SUCCESS) {
+    std::cerr << "Error creating kernel" << std::endl;
+    exit(EXIT_FAILURE);
+  }
+  // Set the arguments of the kernel
+  ret = kernel.setArg(0, sizeof(cl_mem), &a_mem_obj());
+  ret = kernel.setArg(1, sizeof(cl_mem), &b_mem_obj());
+  ret = kernel.setArg(2, sizeof(cl_mem), &c_mem_obj());
+  // Execute the OpenCL kernel on the list
+  cl::NDRange global_item_size(LIST_SIZE); // Process the entire lists
+  cl::NDRange local_item_size(64); // Divide work items into groups of 64
+  ret = command_queue.enqueueNDRangeKernel(kernel, 0, global_item_size,
+                                           local_item_size, NULL, NULL);
+  if (ret != CL_SUCCESS) {
+    std::cerr << "Error starting kernel" << std::endl;
+    exit(EXIT_FAILURE);
+  }
+  // Read the memory buffer C on the device to the local variable C
+  std::vector<int> C(LIST_SIZE);
+  ret = cl::copy(command_queue, c_mem_obj, C.begin(), C.end());
+  if (ret != CL_SUCCESS) {
+    std::cerr << "Error copying C from gpu to memory " << ret << std::endl;
+    exit(EXIT_FAILURE);
+  }
+  // Display the result to the screen
+  for (int i = 0; i < LIST_SIZE; i++)
+    printf("%d + %d = %d\n", A[i], B[i], C[i]);
+  return 0;
+}
+</syntaxhighlight>
 ===Julia===