2D卷积 HLS C/C++实现

void _2DConv(int8_t *in,int8_t *out,int8_t *filter,uint32_t nrows,uint32_t ncols,uint8_t k,uint8_t stride)
/* 
in:输入图像
out:输出图像
filter:滤波器
nrows:输出图像的行数
ncols:输出图像的列数
k:滤波器大小(k*k)
stride:滤波窗口步进长度
 */

{
uint16_t r,c,i,j;
uint32_t nrows_i=nrows*stride+k-1;//输入图像的行数
uint32_t ncols_i=ncols*stride+k-1;//输入图像的列数

for(r=0;r<nrows;r++)
	{
	for(c=0;c<=ncols;c++)
		{
		for(i=0;i<k;i++)
			{
			for(j=0;j<k;j++)
				{
				uint32_t offset=(r*stride+i)*ncols_i+(c*stride+j);
				int32_t in_tmp=*(in+offset);
				int32_t filter_tmp=*(filter+i*k+j);
				*(out+r*ncols+c)+=in_tmp *filter_tmp;
				}
			}
		}
	}

} 

AXI

void HW_2DConv_Mmap_1(int8_t *pixel_in,int8_t *pixel_out,int32_t addr_reserved)
{
	/*
	AXI4数据总线(m_axi):
	pixel_in用于FPGA加速器主动读取DDR的输入图像和滤波器
	pixel_out用于FPGA加速器主动写入输出结果到DDR中
		
	AXI4Lite控制总线(s_axilite):
		用于输入输出图像访存基地址
	*/
	#pragma HLS INTERFACE m_axi depth =482*272+3*3 port=pixel_in offset=slave bundle=user_axi_in register 
	#pragma HLS INTERFACE m_axi depth =482*270     port=pixel_out offset=slave bundle=user_axi_out register
	
	#pragma HLS INTERFACE s_axilite port=pixel_in bundle=user_axi4lite_config register
	#pragma HLS INTERFACE s_axilite port=pixel_out bundle=user_axi4lite_config register
	#pragma HLS INTERFACE s_axilite port=addr_reserved offset=0xFFF0  bundle=user_axi4lite_config register
	#pragma HLS INTERFACE s_axilite port=return  bundle=user_axi4lite_config register
	
	
	int8_t *filter_base=pixel_in+ROWS_I*COLS_I;
	
	_2DConv(pixel_in,pixel_out,filter_base,ROWS_O,COLS_O,FILTER_SIZE,STRIDE);
}

ref
https://blog.csdn.net/weixin_43134018/article/details/94577896

图像处理
https://www.cnblogs.com/Dinging006/p/9328000.html

发布了415 篇原创文章 · 获赞 251 · 访问量 68万+

猜你喜欢

转载自blog.csdn.net/qq_35608277/article/details/104620527
HLS