基于HOG特征的ANN车牌字符识别

车牌识别大致可分为车牌定位、车牌倾斜矫正、车牌字符切割、车牌字符识别几个步骤。以前做过一个车牌识别的项目，其中字符识别部分是采用模板匹配的方式，该方式效率较低，且准确率不够高，抗干扰能力差。

后将其改用基于HOG特征和ANN的识别方式，有效提高了识别效率、准确率及抗干扰能力。现将该方式分享给大家，自己也作一个备忘。

一、准备工作

收集足够的车牌字符图片（获取途径：网上下载或通过车牌大图进行车牌定位、倾斜矫正、字符切割等算法得到车牌字符图片），人工将其分好类，放置在不同的文件夹下，并将其resize为同样的大小(如16*32)。如下图所示：

为方便进行标签，每种字符图片收集了200张，共37种，由于收集难度大（往往只能获取到本省或者邻省的车牌图片），只有湘、鄂全是通过摄像头识别真实车牌采集得到，其他省的字符部分由摄像头识别采集，部分为网络图片识别获取。

将文件夹名与汉字对应起来，共37种，如下图所示：

车牌训练集下载地址： https://download.csdn.net/download/fangye945a/12741133

二、车牌字符模型训练与识别预测

话不多说，直接上代码。如下代码基于opencv2.4.9，包含车牌汉字模型训练、预测(识别一张字符图片)、循环预测（识别大量测试集图片，用于测试准确率）功能。

#include <iostream>
#include <cstring>
#include <vector>
#include <sys/time.h>
#include "opencv/cv.h"
#include "opencv/highgui.h"
#include "opencv/ml.h"
#include "opencv/cxcore.h"
#include "contrib.hpp"
#include <stdlib.h>

enum STATES_OPT				//状态
{
	TRAIN = 1,
	TEST,
	CYCLETEST,
}sta;

using namespace cv;
using namespace std;

//#define GAMMA

#define HOG_SIZE	256	//32*16字符图片的hog算子维度

#define	PIC_NUM		200*37	 //字符图片张数

#define CLASSNUM	37	//种类

/*******************  全局变量定义 **********************/
vector<float> descriptors;		//hog特征存放向量
float data[PIC_NUM][HOG_SIZE]={0};	//样本特征存放数组
float f[1][HOG_SIZE];
float dataCls[PIC_NUM][CLASSNUM]={0};	//样本所属类别
int mClass;							//训练样本所属类别
int dNum;							//训练样本个数

float fGamma = 1/2.2;				//Gamma矫正参数

/********************************************************/
char typetable[CLASSNUM][3]={"0","1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18",
					"19","20","21","22","23","24","25","26","27","28","29","30","31","32","33","34","35","36"};

//与序号对应的车牌汉字字符utf-8
char tablename[CLASSNUM][4]={
							{0xE5,0xB7,0x9D,0x00},{0xE9,0x84,0x82,0x00},
							{0xE7,0x94,0x98,0x00},{0xE8,0xB5,0xA3,0x00},
							{0xE8,0xB4,0xB5,0x00},{0xE6,0xA1,0x82,0x00},
							{0xE9,0xBB,0x91,0x00},{0xE6,0xB2,0xAA,0x00},
							{0xE5,0x90,0x89,0x00},{0xE5,0x86,0x80,0x00},
							{0xE6,0x99,0x8B,0x00},{0xE6,0xB4,0xA5,0x00},
							{0xE4,0xBA,0xAC,0x00},{0xE8,0xBE,0xBD,0x00},
							{0xE9,0xB2,0x81,0x00},{0xE8,0x92,0x99,0x00},
							{0xE9,0x97,0xBD,0x00},{0xE5,0xAE,0x81,0x00},
							{0xE9,0x9D,0x92,0x00},{0xE7,0x90,0xBC,0x00},
							{0xE9,0x99,0x95,0x00},{0xE8,0x8B,0x8F,0x00},
							{0xE7,0x9A,0x96,0x00},{0xE6,0xB9,0x98,0x00},
							{0xE6,0x96,0xB0,0x00},{0xE6,0xB8,0x9D,0x00},
							{0xE8,0xB1,0xAB,0x00},{0xE7,0xB2,0xA4,0x00},
							{0xE4,0xBA,0x91,0x00},{0xE8,0x97,0x8F,0x00},
							{0xE6,0xB5,0x99,0x00},{0xE4,0xBD,0xBF,0x00},
							{0xE6,0xBE,0xB3,0x00},{0xE6,0xB8,0xAF,0x00},
							{0xE8,0xAD,0xA6,0x00},{0xE9,0xA2,0x86,0x00},
							{0xE5,0xAD,0xA6,0x00}
						};

int init()	//初始化各类参数
{
	memset(data,0,sizeof(data));
	memset(dataCls,0,sizeof(dataCls));
	mClass = -1;
	dNum = 0;
}

long getCurrentTime()    //获取当前时间
{    
	struct timeval tv;    
	gettimeofday(&tv,NULL);    
	return tv.tv_sec * 1000000 + tv.tv_usec;    
}


Mat& MyGammaCorrection(Mat& src, float fGamma)    
{    
	CV_Assert(src.data);  //若括号中的表达式为false，则返回一个错误的信息。  

	// accept only char type matrices    
	CV_Assert(src.depth() != sizeof(uchar));    
	// build look up table    
	unsigned char lut[256];    
	for( int i = 0; i < 256; i++ )    
	{    
		lut[i] = pow((float)(i/255.0), fGamma) * 255.0;    
	}    
	//先归一化，i/255,然后进行预补偿(i/255)^fGamma,最后进行反归一化(i/255)^fGamma*255  

	const int channels = src.channels();    
	switch(channels)    
	{    
		case 1:    
			{    
				//运用迭代器访问矩阵元素  
				MatIterator_<uchar> it, end;    
				for( it = src.begin<uchar>(), end = src.end<uchar>(); it != end; it++ )    
					//*it = pow((float)(((*it))/255.0), fGamma) * 255.0;    
					*it = lut[(*it)];    

				break;    
			}    
		case 3:     
			{    

				MatIterator_<Vec3b> it, end;    
				for( it = src.begin<Vec3b>(), end = src.end<Vec3b>(); it != end; it++ )    
				{    
					//(*it)[0] = pow((float)(((*it)[0])/255.0), fGamma) * 255.0;    
					//(*it)[1] = pow((float)(((*it)[1])/255.0), fGamma) * 255.0;    
					//(*it)[2] = pow((float)(((*it)[2])/255.0), fGamma) * 255.0;    
					(*it)[0] = lut[((*it)[0])];    
					(*it)[1] = lut[((*it)[1])];    
					(*it)[2] = lut[((*it)[2])];    
				}    
				break;    
			}    
	}    
	return src;       
}

void get_pic_hog(Mat &img)	//获取一张图片的hog特征
{
	HOGDescriptor *hog = new HOGDescriptor(cvSize(16, 32), cvSize(8, 8), cvSize(8, 8), cvSize(4,4), 8);
	//	窗口大小  block大小	 block的移动步长  cell大小  bins个数

	descriptors.clear();	//清空descriptor
	hog->compute(img, descriptors, Size(16,32), Size(0, 0));
	//图像	hog描绘子(计算结果储存在descriptor中) 窗口的移动步长 图像扩充像素数
	//cout << "descriptors.size = " << descriptors.size() << endl;//输出hog特征描绘子的维度
	delete hog;
	hog = NULL;
}

void packData(STATES_OPT sta)
{
	int p = 0;
	if(sta == TRAIN)
	{
		for(vector<float>::iterator it = descriptors.begin(); it!=descriptors.end(); it++)
		{
			data[dNum][p++] = *it;
		}
		dataCls[dNum++][mClass] = 1.0;
		
	}
	else if(sta == TEST)
	{
		for(vector<float>::iterator it = descriptors.begin(); it!=descriptors.end(); it++)
		{
			f[0][p++] = *it;
		}
	}
	else if(sta == CYCLETEST)
	{
		for(vector<float>::iterator it = descriptors.begin(); it!=descriptors.end(); it++)
		{
			f[0][p++] = *it;
		}
	}
}

int arg_type(char *arg)
{
	if( !strcmp(arg,"-t") )
	{
		sta = TRAIN;		
	}
	else if( !strcmp(arg,"-p") )
	{
		sta = TEST;
	}
	else if( !strcmp(arg,"-c") )
	{
		sta = CYCLETEST;
	}
	else if( !strcmp(arg,"--help") )
	{
		printf("--help		帮助信息\n");
		printf("-t		训练\n");
		printf("-p filepath	预测\n");
		printf("-t	dirpath	循环预测\n");
		return 0;
	}
	return sta;
}

int train_pictrue(char *argv[])
{
	cv::Directory dir;
	cout<<"start training..."<<endl;
	for(int i=0;i<37;i++)
	{
		string dirpath = argv[2];
		string filetype = "*";
		bool addPath = false;
		char *p = typetable[i];
		string dirname = p;
	
		if(dirpath.at(dirpath.length()-1) != '/' )
		{
			cout<<"LINE:"<<__LINE__<<endl;
			dirpath += "/";
		}

		
		dirpath += dirname;
		cout<<"Dirpath = "<<dirpath<<endl;
		vector<string> filenames = dir.GetListFiles(dirpath, filetype, addPath);
		mClass++;
		cout<<"i="<<i<<" filenames.size = "<< filenames.size()<<" mClass = " <<mClass<<endl;	
		for(int j=0; j < filenames.size(); j++)
		{
			string path = dirpath + "/" + filenames[j];

			Mat imageMat = imread(path);
			if(imageMat.empty())
			{
				cout << "image load fail!"<<endl;
				return -1;
			}

#ifdef GAMMA
			MyGammaCorrection(imageMat, fGamma);
#endif

			get_pic_hog(imageMat);		//获取图片的hog特征

			if(dNum+1 > PIC_NUM)
			{
				cout<<"The train picture is more than "<<PIC_NUM<<endl;
				return 0;
			}

			packData(sta);
			//cout<<"dNum:"<<dNum<<" mClass:"<<mClass<<endl;
		}
	}
	
	cout<<endl<<"---------------Get picture finish ..."<<endl;
	
	CvANN_MLP bp;		//建立神经网络进行训练
	CvANN_MLP_TrainParams params;
	
	params.train_method=CvANN_MLP_TrainParams::BACKPROP; //训练方法为反向传播法
	params.bp_moment_scale=0.1;
	params.bp_dw_scale=0.1;

	Mat layerSizes = ( Mat_<int>(1,3) << HOG_SIZE, atoi(argv[3]), CLASSNUM);//三层神经网络
	
	Mat trainDate(PIC_NUM,HOG_SIZE,CV_32FC1,data);
	
	Mat trainLable(PIC_NUM,CLASSNUM,CV_32FC1,dataCls);
	
	bp.create(layerSizes, CvANN_MLP::SIGMOID_SYM, 1, 1);          		//激活函数sigmoid
	
	cout<<"Training..."<<endl;
	
	bp.train(trainDate,trainLable, Mat(),Mat(), params);  				//开始训练

	char charat_name[32]={0};
	
#ifndef GAMMA
	sprintf(charat_name,"zh_charat_%d_%s.xml",PIC_NUM/CLASSNUM,argv[3]);
#else
	sprintf(charat_name,"zh_charat_%d_%s_gamma.xml",PIC_NUM/CLASSNUM,argv[3]);
#endif
	bp.save(charat_name);

	cout << "training finish!!" <<endl;
	cout << dNum <<endl;
	
	return 0;
}

int predict_a_pictrue(int argc,char *argv[])
{
	if(argc != 4)
	{
		cout << "param error"<<endl;
		return -1;
	}
	long time1,time2;
	time1 = getCurrentTime();
	CvANN_MLP bp;							//建立神经网络进行训练
	cout<<"start predict!!"<<endl;
	bp.load(argv[2]);					//加载神经网络文件

	Mat imageMat = imread(argv[3]);
	Mat testmat;
	if(imageMat.empty())
	{
		cout << "image load fail!"<<endl;
		return -1;
	}
	resize(imageMat,testmat,Size(16,32));
	Mat binaryzation_image;				//获取灰度图片
	cvtColor(testmat,binaryzation_image,CV_BGR2GRAY); //灰度化

#ifdef GAMMA
	MyGammaCorrection(imageMat, fGamma);
#endif

	get_pic_hog(binaryzation_image);		//获取图片的hog特征
	packData(sta);

	Mat nearest(1, CLASSNUM, CV_32FC1, Scalar(0));
	Mat charFearture( 1, HOG_SIZE, CV_32FC1, f);

	bp.predict(charFearture, nearest);	
	time2 = getCurrentTime();
	printf("Predict used time:%ld\n",time2-time1);
	Point maxLoc;
	cout << "index\t"<<"charat\t" <<"value\t" <<endl;
	for(int i=0;i<CLASSNUM;i++)
		cout <<i <<"\t"<< tablename[i]<<"\t" << nearest.at<float>(0,i) <<endl;
	minMaxLoc(nearest, NULL, NULL, NULL, &maxLoc);
	cout<<"The predict Result: "<< "index = "<< maxLoc.x << " Charat = " << tablename[maxLoc.x] <<endl;
	return 0;
	
}


int predict_dir_pictrue(int argc,char *argv[])
{
	cv::Directory dir;
	if(argc != 4)
	{
		cout << "need a true path in param 3!"<<endl;
		return 0;
	}
	long time1,time2;
	int error_count=0,sum=0;
	time1 = getCurrentTime();
	CvANN_MLP bp;							//建立神经网络进行训练
	cout<<"start cycle predict!!"<<endl;
	bp.load(argv[2]);
	printf("LINE:%d\n",__LINE__);
	char *ptr = argv[3];
	string dirpath = ptr;
	//string dirpath = "../test_picture/";
	string filetype = "*";
	bool addPath = false;
	
	for(int type = 0;type<CLASSNUM;type++ )
	{	
		char *p = typetable[type];
		string dirname = p;
		string mydirpath;
		if( dirpath.at(dirpath.length()-1) == '/')
				mydirpath = dirpath + dirname;
		else
			mydirpath = dirpath + "/" +dirname;
		//cout << "dirpath ="<<mydirpath<<endl;
		vector<string> filenames = dir.GetListFiles(mydirpath, filetype, addPath);
		for(int i=0;i < filenames.size(); i++)
		{
			
	/*
			if(dirpath.at(dirpath.length))
			{
				string path = dirpath + filenames[i];
				printf("path:%s",path);
			}
			else
			{
				
				printf("path:%s",path);
			}
	*/		
			string path = mydirpath + "/" + filenames[i];
			Mat imageMat = imread(path);
			Mat testmat;
			resize(imageMat,testmat,Size(16,32));
			Mat binaryzation_image;				//获取灰度图片
			cvtColor(testmat,binaryzation_image,CV_BGR2GRAY); //灰度化

			get_pic_hog(binaryzation_image);		//获取图片的hog特征
			packData(sta);
			Mat nearest(1, CLASSNUM, CV_32FC1, Scalar(0));
			Mat charFearture( 1, HOG_SIZE, CV_32FC1, f);
			
			bp.predict(charFearture, nearest);	
			
			Point maxLoc;
			minMaxLoc(nearest, NULL, NULL, NULL, &maxLoc);
			
			if(maxLoc.x != type)
			{
				error_count++;
				cout<<"Predict flase! The Path:is "<<path<<"  predict="<<tablename[maxLoc.x]<<endl;
				cout<<"type="<<type<<endl;
				cout << "index\t"<<"charat\t" <<"value\t" <<endl;
				
				for(int k=0;k<CLASSNUM;k++)
				{
					cout <<k <<"\t"<< tablename[k]<<"\t" << nearest.at<float>(0,k) <<endl;
				}
				string error_pic_path = " ../error_zh/";
				error_pic_path += dirname;
				error_pic_path += "/";
				error_pic_path += filenames[i];
				cout<<error_pic_path<<endl;
				
				char cmd[256]="mv ";
				//char cmd[256]="cp ";
				
				strcat(cmd,path.c_str());
				strcat(cmd,error_pic_path.c_str());
				cout<<endl<<"cmd:"<<cmd<<endl;
				system(cmd);
				
			}
			sum++;
		}
	}
	time2 = getCurrentTime();
	printf("Predict %d picture used time:%ldus\n",sum,time2-time1);
	printf("Predict error count:%d\n",error_count);
	printf("Predict true rate = %f\n",1-1.0*error_count/sum);
}

int main(int argc,char *argv[])
{
	if(argc < 2 || argc > 4)
	{
		printf("param error!\n");
		return 0;
	}
	
	init();	//初始化各类参数
	int ret = arg_type(argv[1]);
	
	switch(ret)
	{
		case 1:																	//训练样本
		{
			int ret = train_pictrue(argv);
			if(ret == -1)
				cout<<"Train fail!!!"<<endl;
			break;
		}
		case 2:																	//预测单张图片
		{
			int ret = predict_a_pictrue(argc,argv);
			if(ret == -1)
				cout<<"Predict a pictrue fail!!!"<<endl;
			break;
		}
		case 3:																//预测目录下多张图片
		{
			int ret = predict_dir_pictrue(argc,argv);
			if(ret == -1)
				cout<<"Predict dir pictrue fail!!!"<<endl;
			break;
		}
		default:break;
	}	
	return 0;
}

经测试：采用相同方法采集的字符图片（获取途径单一），训练时调整适当的参数（中间隐藏节点数等参数），字符识别准确率能达到99.99%以上，该识别方法方便应用于嵌入式设备中，如车牌识别摄像机(硬识别)，大家感兴趣也可以自己参考尝试一下。

基于HOG特征的ANN车牌字符识别

猜你喜欢