OpenCL 三种内存对象的使用

▶ 包括带有 CL_MEM_READ_ONLY,CL_MEM_WRITE_ONLY,CL_MEM_READ_WRITE 标识的显示拷贝(函数 clEnqueueWriteBuffer 和 clEnqueueReadBuffer);带有 CL_MEM_COPY_HOST_PTR 标识的隐式拷贝(不用拷贝函数,在设备上直接使用);以及使用函数 clEnqueueMapBuffer 直接在设备和主机之间映射(转换)一段内存的指针

● 代码

 1 #include <cl.h>
 2 #include <stdio.h>
 3 #include <stdlib.h>
 4 #include <time.h>
 5 #include <iostream>
 6 
 7 using namespace std;
 8 const int nElement = 2048;
 9 
10 const char *programSource = "                                              \
11 __kernel void vectorAdd(__global int *A, __global int *B, __global int *C) \
12 {                                                                          \
13     int idx = get_global_id(0);                                            \
14     C[idx] = A[idx] + B[idx];                                              \
15     return;                                                                \
16 }                                                                          \
17 ";
18 
19 int main(int argc, char* argv[])
20 {   
21     const size_t dataSize = sizeof(int) * nElement;
22     int i, *A, *B, *C, *returnC;
23             
24     A = (int *)malloc(dataSize * sizeof(float));
25     B = (int *)malloc(dataSize * sizeof(float));
26     C = (int *)malloc(dataSize * sizeof(float));        
27     for (srand((unsigned)time(NULL)), i = 0; i < dataSize; A[i] = rand() % 65535, B[i] = rand() % 65535, C[i] = A[i] + B[i], i++);
28 
29     cl_int status;
30     cl_platform_id platform;
31     clGetPlatformIDs(1, &platform, NULL);
32     cl_device_id device;
33     clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, NULL);
34     cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, &status);
35     cl_command_queue queue = clCreateCommandQueue(context, device, 0, &status);
36     cl_program program = clCreateProgramWithSource(context, 1, &programSource, NULL, &status);
37     clBuildProgram(program, 1, &device, NULL, NULL, NULL);
38     cl_kernel kernel = clCreateKernel(program, "vectorAdd", NULL);
39     cl_event writeEvent, runEvent, mapEvent;
40 
41     //创建三个内存对象,把 A 隐式拷贝到 clA,把 B 显示拷贝到 clB,clC 接收计算结果映射给 returnC
42     cl_mem clA = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, dataSize * sizeof(cl_int), A, NULL);
43     cl_mem clB = clCreateBuffer(context, CL_MEM_READ_ONLY, dataSize * sizeof(cl_int), NULL, NULL);
44     cl_mem clC = clCreateBuffer(context, CL_MEM_WRITE_ONLY, dataSize * sizeof(cl_int), NULL, NULL);
45 
46     clEnqueueWriteBuffer(queue, clB, 1,0, dataSize * sizeof(cl_int), B, 0, 0, &writeEvent);
47     
48     clFlush(queue);// 出现需要等待的事件之前,把队列中的任务全部提交掉    
49     clWaitForEvents(1, &writeEvent);// 等待指定事件完成
50         
51     // 执行内核
52     size_t global_work_size = dataSize; 
53     clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&clA);
54     clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&clB);
55     clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*)&clC);    
56     clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_work_size, NULL, 0, NULL, &runEvent);    
57     clFlush(queue);
58     clWaitForEvents(1, &runEvent);
59     
60     // 结果拷回
61     returnC = (cl_int *)clEnqueueMapBuffer(queue, clC, CL_TRUE, CL_MAP_READ, 0, dataSize * sizeof(cl_int), 0, NULL, &mapEvent, NULL);    
62     clFlush(queue);
63     clWaitForEvents(1, &mapEvent);    
64 
65     //结果验证
66     printf("Verify %s.\n", !memcmp(C, returnC, dataSize)?"passed":"failed");// 定义在 iostream
67 
68     free(C);    
69     free(A);    
70     free(B);    
71     clReleaseMemObject(clA);
72     clReleaseMemObject(clB);
73     clReleaseMemObject(clC);    
74     clReleaseContext(context);
75     clReleaseCommandQueue(queue);
76     clReleaseProgram(program);
77     clReleaseEvent(writeEvent);
78     clReleaseEvent(runEvent);
79     clReleaseEvent(mapEvent);
80     getchar();
81     return 0;
82 }

● 输出结果

Verify passed.

● 注意

■ 事件在声明以后一定要通过某个函数的  cl_event * /* event */  参数定义它,才能进行 clWaitForEvents 或 clReleaseEvent,否则会报错

猜你喜欢

转载自www.cnblogs.com/cuancuancuanhao/p/8964534.html