片上单片机与PC通过UART相连并且可以通过PC对片上单片机进行相应的指令操作。
片上单片机可以控制相应的IP core从而用FPGA上的IP core进行相应的运算。
相关源码通过编译可以变为可执行文件拷入u盘,然后片上单片机可以通过挂载u盘执行可执行文件。
#include <stdio.h> #include <fcntl.h> #include <string.h> #include <stdlib.h> #include <sys/time.h> unsigned long tStart, tEnd; unsigned long data; float buf_in[27*600] = {}; float buf_out[float buf_out[16*600] = {}; float weights[16*27] = {}; unsigned long getTime(){ struct timeval temp; gettimeofday(&temp, NULL); return temp.tv_sec * 1000 * 1000 + temp.tv_usec; } void report(char *msg, unsigned long data, unsigned long time, unsigned long dmaUsed){ printf("%s\t%ld\t%ld\t%f\t%d\n", msg, data, time, data * 1.0 / time, dmaUsed); FILE *f = fopen("report.dat", "a"); fprintf(f, "%s\t%ld\t%ld\t%f\t%d\n", msg, data, time, data * 1.0 / time, dmaUsed); fclose(f); } #define REPORT(f, timeStart, timeEnd, dataPtr, msg, dmaUsed) *timeStart = getTime(); *dataPtr = f; *timeEnd = getTime(); report(msg, *dataPtr, *timeEnd - *timeStart, dmaUsed); void checkData(float *bufferIn, float *bufferOut, unsigned int elems){ int i; for (i = 0; i < elems; i++) { if (bufferIn[i] != bufferOut[i]) printf("[%d] %f->%f\n", i, bufferIn[i], bufferOut[i]); } } unsigned long memCpy_ARM(char *bufferIn, char *bufferOut, unsigned long elems, size_t size){ int i; for(i=0; i<elems; i++) bufferOut[i] = bufferIn[i]; return elems * size; } #define WRITE_LEN 16632*4 #define READ_LEN 9600*4 unsigned long memCpy_DMA(char *bufferIn, char *bufferOut, unsigned long elems, size_t size, int dmaToUse){ #define FIFO_LEN 512*1024 #define DMA_NUM 1 int fd; int ret; fd = open("/dev/axi-dma1", O_RDWR); if (fd < 0) { printf("open axi-dma1 failed!\n"); return 1; } unsigned long byteMoved = 0; unsigned long byteToMove = 0; int i; while(byteMoved!=size * elems){ byteToMove = size * elems - byteMoved > FIFO_LEN ? FIFO_LEN : size * elems - byteMoved; ret = write(fd, &bufferIn[byteMoved], byteToMove); if (ret != byteToMove) printf("write DMA error %d!\n", ret); else printf("write DMA %d\n", ret); byteMoved += byteToMove; } printf("Write Total %d\n", byteMoved); byteMoved = 0; while(byteMoved!=READ_LEN){ byteToMove = READ_LEN - byteMoved > FIFO_LEN ? FIFO_LEN : READ_LEN - byteMoved; ret = read(fd, &bufferOut[byteMoved], byteToMove); if (ret != byteToMove) printf("read DMA error %d\n", ret); else printf("read DMA %d\n", ret); byteMoved += byteToMove; } printf("Read Total %d\n", byteMoved); close(fd); return elems * size * dmaToUse; } void cnn_simulate(float *in, float *out) { float *bufferIn, *bufferOut; int i, j, k; float result, temp; bufferIn = (float *) malloc(4*27*600); bufferOut = (float *) malloc(4*16*600); for (i = 0; i < 27*600; i++) bufferIn[i] = in[i]; for (i = 0; i < 16; i++) { for (j = 0; j < 600; j++) { result = 0; for (k = 0; k < 27; k++) { temp = weights[i*27+k] * bufferIn[k*600+j]; result += temp; } bufferOut[i*600+j] = result; } } for (i = 0; i < 16*600; i++) out[i] = bufferOut[i]; } int main(int argc, char **argv) { float *bufferIn, *bufferOut_ARM, *bufferOut_DMA; float *weightsIn; int fd, ret; int size_float = sizeof(float); if(argc!=3){ printf("Usage: ./dmaBench DATA_IN DATA_OUT\n"); exit(0); } unsigned long size_in = atoi(argv[1]); unsigned long size_out = atoi(argv[2]); printf("in %d, out %d\n", size_in, size_out); bufferIn = (float *) malloc(size_float * size_in); bufferOut_DMA = (float *) malloc(size_float * size_out); bufferOut_ARM = (float *) malloc(size_float * size_out); int i; for (i = 0; i < size_in; i++) bufferIn[i] = buf_in[i]; cnn_simulate(bufferIn, bufferOut_ARM); fd = open("/dev/axi-dma1", O_RDWR); if (fd < 0) { printf("open axi-dma1 failed!\n"); return -1; } //ret = write(fd, (unsigned char *)weightsIn, size_float*27*16); //if (ret != size_float*27*16) // printf("import weights failed!\n"); ret = write(fd, (unsigned char *)bufferIn, size_float * size_in); if (ret != size_float * size_in) printf("import image data failed %d\n", ret); ret = read(fd, (unsigned char *)bufferOut_DMA, size_float * size_out); if (ret != size_float * size_out) printf("read error %d\n", ret); for (i = 0; i < size_out; i++) { if (bufferOut_ARM[i] != bufferOut_DMA[i]) printf("[%d] %f - > %f\n", i, bufferOut_ARM[i], bufferOut_DMA[i]); } free(bufferIn); free(bufferOut_DMA); free(bufferOut_ARM); return 0; }
1.关于主函数的参数
int main(int argc, char* argv[])
argc是argument count的简称,argv是argument variable的简称,也就是,argc指代参数的个数,argv指代每个参数
- argv[0] 指向程序运行的全路径名
- argv[1] 指向在DOS命令行中执行程序名后的第一个字符串
- argv[2] 指向执行程序名后的第二个字符串
- argv[argc]为NULL。
例如:输入"test c:/testPic/01.jpg",就是在启动test.exe程序的同时,给该程序指定一个额外的参数“c:/testPic/01.jpg”。这里,argc也就是参数的个数,就是2个,test为第一个参数,空格之后的“c:/testPic/01.jpg”是第二个参数,换句话说,argc=2, argv[0]="test",argv[1]="c:/testPic/01.jpg"。
2.关于文件的操作
getc 从文件中读取字符 getc(fp),fp为文件指针
gets 从缓冲区中读取字符串
fopen与open的区别,`fopen`是C标准函数,因此拥有良好的移植性;而`open`是UNIX系统调用,移植性有限。open返回文件描述符,而文件描述符是UNIX系统下的一个重要概念,UNIX下的一切设备都是以文件的形式操作。
所以fd = open("/dev/axi-dma1", O_RDWR); 就是以打开方式进行相应的AXI总线与DMA操作。
wirte 函数 ssize_t write(int fd, const void *buf, size_t nbyte);
read函数 ssize_t read(int fd, void *buf, size_t count);
read()会把参数fd所指的文件传送nbyte个字节到buf指针所指的内存中。若参数nbyte为0,则read()不会有作用并返回0。返回值为实际读取到的字节数,如果返回0,表示已到达文件尾或无可读取的数据。错误返回-1,
3.memcmp函数
4.atoi函数
C 库函数 int atoi(const char *str) 把参数 str 所指向的字符串转换为一个整数(类型为 int 型)。该函数返回转换后的长整数,如果没有执行有效的转换,则返回零。
unsigned long size_in = atoi(argv[1]);
unsigned long size_out = atoi(argv[2]);
根据主函数中此段程序,我们知道了输入的第一个参数为函数名,第二个参数为输入的size,第三个参数为输出的size。
5.memset
所以程序就是运用open来打开dma的通信,然后用read和write来进行对IP core的读和写。在读懂了程序之后我们进行相应的程序编写,实现FPGA运算卷积的功能。