▶ 包括带有 CL_MEM_READ_ONLY,CL_MEM_WRITE_ONLY,CL_MEM_READ_WRITE 标识的显示拷贝(函数 clEnqueueWriteBuffer 和 clEnqueueReadBuffer);带有 CL_MEM_COPY_HOST_PTR 标识的隐式拷贝(不用拷贝函数,在设备上直接使用);以及使用函数 clEnqueueMapBuffer 直接在设备和主机之间映射(转换)一段内存的指针
● 代码
1 #include <cl.h> 2 #include <stdio.h> 3 #include <stdlib.h> 4 #include <time.h> 5 #include <iostream> 6 7 using namespace std; 8 const int nElement = 2048; 9 10 const char *programSource = " \ 11 __kernel void vectorAdd(__global int *A, __global int *B, __global int *C) \ 12 { \ 13 int idx = get_global_id(0); \ 14 C[idx] = A[idx] + B[idx]; \ 15 return; \ 16 } \ 17 "; 18 19 int main(int argc, char* argv[]) 20 { 21 const size_t dataSize = sizeof(int) * nElement; 22 int i, *A, *B, *C, *returnC; 23 24 A = (int *)malloc(dataSize * sizeof(float)); 25 B = (int *)malloc(dataSize * sizeof(float)); 26 C = (int *)malloc(dataSize * sizeof(float)); 27 for (srand((unsigned)time(NULL)), i = 0; i < dataSize; A[i] = rand() % 65535, B[i] = rand() % 65535, C[i] = A[i] + B[i], i++); 28 29 cl_int status; 30 cl_platform_id platform; 31 clGetPlatformIDs(1, &platform, NULL); 32 cl_device_id device; 33 clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, NULL); 34 cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, &status); 35 cl_command_queue queue = clCreateCommandQueue(context, device, 0, &status); 36 cl_program program = clCreateProgramWithSource(context, 1, &programSource, NULL, &status); 37 clBuildProgram(program, 1, &device, NULL, NULL, NULL); 38 cl_kernel kernel = clCreateKernel(program, "vectorAdd", NULL); 39 cl_event writeEvent, runEvent, mapEvent; 40 41 //创建三个内存对象,把 A 隐式拷贝到 clA,把 B 显示拷贝到 clB,clC 接收计算结果映射给 returnC 42 cl_mem clA = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, dataSize * sizeof(cl_int), A, NULL); 43 cl_mem clB = clCreateBuffer(context, CL_MEM_READ_ONLY, dataSize * sizeof(cl_int), NULL, NULL); 44 cl_mem clC = clCreateBuffer(context, CL_MEM_WRITE_ONLY, dataSize * sizeof(cl_int), NULL, NULL); 45 46 clEnqueueWriteBuffer(queue, clB, 1,0, dataSize * sizeof(cl_int), B, 0, 0, &writeEvent); 47 48 clFlush(queue);// 出现需要等待的事件之前,把队列中的任务全部提交掉 49 clWaitForEvents(1, &writeEvent);// 等待指定事件完成 50 51 // 执行内核 52 size_t global_work_size = dataSize; 53 clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&clA); 54 clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&clB); 55 clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*)&clC); 56 clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_work_size, NULL, 0, NULL, &runEvent); 57 clFlush(queue); 58 clWaitForEvents(1, &runEvent); 59 60 // 结果拷回 61 returnC = (cl_int *)clEnqueueMapBuffer(queue, clC, CL_TRUE, CL_MAP_READ, 0, dataSize * sizeof(cl_int), 0, NULL, &mapEvent, NULL); 62 clFlush(queue); 63 clWaitForEvents(1, &mapEvent); 64 65 //结果验证 66 printf("Verify %s.\n", !memcmp(C, returnC, dataSize)?"passed":"failed");// 定义在 iostream 67 68 free(C); 69 free(A); 70 free(B); 71 clReleaseMemObject(clA); 72 clReleaseMemObject(clB); 73 clReleaseMemObject(clC); 74 clReleaseContext(context); 75 clReleaseCommandQueue(queue); 76 clReleaseProgram(program); 77 clReleaseEvent(writeEvent); 78 clReleaseEvent(runEvent); 79 clReleaseEvent(mapEvent); 80 getchar(); 81 return 0; 82 }
● 输出结果
Verify passed.
● 注意
■ 事件在声明以后一定要通过某个函数的 cl_event * /* event */ 参数定义它,才能进行 clWaitForEvents 或 clReleaseEvent,否则会报错