以前我用CUDA 写过简单的图形模糊的例子,时间长了就忘了,现在看的时候不得不再花时间学习,
所以就想要记录在博客园上,方便自己复习和记忆。一旦自己用到这放面的程序的时候就可以随时下载。
简单的均值模糊就是将像素点周围的像素值加起来求均值。具体的原理我就不在这里赘述了直接上
CUDA 代码:
1 #include "cuda_runtime.h" 2 #include "device_launch_parameters.h" 3 #include <stdio.h> 4 #include<iostream> 5 6 #include <iostream> 7 #include <opencv2/core/core.hpp> 8 #include <opencv2/highgui/highgui.hpp> 9 10 #define CHANNELS 3 11 #define BLUR_SIZE 3 12 13 14 15 __global__ void Img2Blurincuda(unsigned char *in, unsigned char*out, int imgwidth, int imgheight, int channels) { 16 int ty = blockIdx.y*blockDim.y + threadIdx.y; 17 int tx = blockIdx.x*blockDim.x + threadIdx.x; 18 19 if (ty < imgheight&&tx < imgwidth) { 20 int PixVal = 0; 21 int Pixels = 0; 22 for (int k = 0; k < channels; ++k) { 23 for (int blurRow = -BLUR_SIZE; blurRow < BLUR_SIZE + 1; ++blurRow) { 24 for (int blurCol = -BLUR_SIZE; blurCol < BLUR_SIZE + 1; ++blurCol) { 25 26 int Cur_ty = ty + blurRow; 27 int Cur_tx = tx + blurCol; 28 29 if (Cur_ty > -1 && Cur_ty<imgheight && Cur_tx>-1 && Cur_tx < imgwidth) { 30 PixVal += in[(Cur_ty*imgwidth + Cur_tx)*channels + k]; 31 Pixels++; 32 } 33 } 34 } 35 out[(ty*imgwidth + tx)*channels + k] = (unsigned char)(PixVal / Pixels); 36 } 37 38 } 39 } 40 41 using namespace cv; 42 int main(void) { 43 44 // 读入一张图片(lena) 45 Mat img = imread("E:\\opencv\\lena512.bmp"); //lena512color.tiff;lena512.bmp 46 // 创建一个名为 "lena"窗口 47 namedWindow("lena"); 48 // 在窗口中显示游戏原画 49 imshow("lena", img); 50 // 等待3000 ms后窗口自动关闭 51 waitKey(3000); 52 53 const int imgheight = img.rows; 54 const int imgwidth = img.cols; 55 const int imgchannel = img.channels(); 56 57 printf("imgchannel=%d\n", img.channels()); 58 59 //Mat blurImage(imgheight, imgwidth, CV_8UC3, Scalar(0)); 60 Mat blurImage(imgheight, imgwidth, CV_8UC3); 61 62 //uchar3 *d_in; 63 unsigned char *d_in; 64 unsigned char*d_out; 65 66 //printf("%d\n", sizeof(uchar3)); 67 68 cudaMalloc((void**)&d_in, imgheight*imgwidth *imgchannel * sizeof(unsigned char)); 69 cudaMalloc((void**)&d_out, imgheight*imgwidth *imgchannel * sizeof(unsigned char)); 70 71 cudaMemcpy(d_in, img.data, imgheight*imgwidth * imgchannel * sizeof(unsigned char), cudaMemcpyHostToDevice); 72 73 dim3 BlockDim(16, 16); 74 dim3 GridDim((imgwidth - 1) / BlockDim.x + 1, (imgheight - 1) / BlockDim.y + 1); 75 76 clock_t start, end; 77 start = clock(); 78 79 Img2Blurincuda << <GridDim, BlockDim >> > (d_in, d_out, imgheight, imgwidth, imgchannel); 80 81 cudaDeviceSynchronize(); 82 83 end = clock(); 84 printf("cuda exec time is %.8f\n", (double)(end - start) / CLOCKS_PER_SEC); 85 cudaMemcpy(blurImage.data, d_out, imgheight*imgwidth * imgchannel * sizeof(unsigned char), cudaMemcpyDeviceToHost); 86 87 cudaFree(d_in); 88 cudaFree(d_out); 89 90 91 92 93 imshow("blurImage", blurImage); 94 waitKey(6000); 95 return 0; 96 97 }
运行的结果如下:
原始图片:
模糊处理后: