(NEON) - Swapping color channels

 在neon_programmers_guide中有个Swapping color channels示例,展示了neon技术的加速效果。

这里我编程实现了下,观察下加速效果,在两个平台上做对比试验。

  普通arm neon加速
nvidia tk1    
respbarry pi    
 1 #include <stdlib.h>
 2 #include <memory.h>
 3 #include <fstream>
 4 #include <sys/time.h>
 5 
 6 #include "/usr/lib/gcc-cross/arm-linux-gnueabihf/5/include/arm_neon.h"
 7 
 8 using namespace std;
 9 
10 int main(int argc, char **argv)
11 {
12     int width = 1920;
13     int height = 1080;
14 
15     int pixel_number = width * height;
16     int image_size = width * height * 3;
17 
18     struct timeval tstart, tend;
19 
20     unsigned char *rgb_buffer = (unsigned char *)malloc(image_size);
21     unsigned char *rgb_buffer_result = (unsigned char *)malloc(image_size);
22 
23     for (int i = 0; i < pixel_number; i++)
24     {
25         rgb_buffer[3 * i + 0] = 1;
26         rgb_buffer[3 * i + 1] = 2;
27         rgb_buffer[3 * i + 2] = 3;
28     }
29 
30     fstream wfile("./origin.dat", ios::binary | ios::out);
31     wfile.write((char *)rgb_buffer, image_size);
32     wfile.close();
33 
34     gettimeofday(&tstart, NULL);
35 
36     for (int i = 0; i < pixel_number; i++)
37     {
38         rgb_buffer_result[i] = rgb_buffer[i * 3];
39         rgb_buffer_result[i + pixel_number] = rgb_buffer[i * 3 + 1];
40         rgb_buffer_result[i + 2 * pixel_number] = rgb_buffer[i * 3 + 2];
41     }
42 
43     gettimeofday(&tend, NULL);
44     int timeuse = (1000000 * (tend.tv_sec - tstart.tv_sec) + (tend.tv_usec - tstart.tv_usec)) / 1000;
45     printf("tk1 de_inter with arm (1920*1080) cost time(ms) = %d\n", timeuse);
46 
47     fstream wfile1("./arm.dat", ios::binary | ios::out);
48     wfile1.write((char *)rgb_buffer_result, image_size);
49     wfile1.close();
50 
51     memset(rgb_buffer_result, 0 , image_size);
52 
53     uint8x16x3_t neon_1;
54 
55     gettimeofday(&tstart, NULL);
56 
57     for (int i = 0; i < image_size / 48; i++)
58     {
59         neon_1 = vld3q_u8((uint8_t *)rgb_buffer + i * 48);
60         vst1q_u8(rgb_buffer_result + i * 16, neon_1.val[0]);
61         vst1q_u8(rgb_buffer_result + pixel_number + i * 16, neon_1.val[1]);
62         vst1q_u8(rgb_buffer_result + 2 * pixel_number + i * 16, neon_1.val[2]);
63     }
64 
65     gettimeofday(&tend, NULL);
66     timeuse = (1000000 * (tend.tv_sec - tstart.tv_sec) + (tend.tv_usec - tstart.tv_usec)) / 1000;
67     printf("tk1 de_inter with neon (1920*1080) cost time(ms) = %d\n", timeuse);
68 
69     fstream wfile2("./neon.dat", ios::binary | ios::out);
70     wfile2.write((char *)rgb_buffer_result, image_size);
71     wfile2.close();
72 
73     return 0;
74 }
// Mackfile
arm-linux-gnueabihf-g++ de_inter.cpp -o de_inter.out -mfpu=neon -mfloat-abi=hard


tk1 de_inter with arm (1920*1080) cost time(ms) = 43
tk1 de_inter with neon (1920*1080) cost time(ms) = 15

猜你喜欢

转载自www.cnblogs.com/aperolchen/p/9991253.html
今日推荐