今年春节在家里实现的,以前的博客网站不稳定,发这里吧。
===================================================================
自己实现了下BP算法,无需依赖第三方库。MNIST数据集可以达到95%的识别率,感觉很难升上去了,算法瓶颈。
- 矩阵、向量操作一起实现。
- #include<stdio.h>
- #include<vector>
- #include<stdlib.h>
- #include<math.h>
- #include<time.h>
- #include<windows.h>
- using namespace std;
- #define QUADRATIC
- #define MAX_LAYERS 6
- typedef vector<double> VEC;
- typedef vector<VEC> MAT;
- /** VEC/MAT 相关函数定义 **/
- //VEC
- int vec_clear(VEC &vec); //清0
- double operator *(VEC &a,VEC &b); //向量点乘
- VEC operator -(VEC &a,VEC &b); //向量减法
- int operator -=(VEC &a,VEC &b);
- VEC operator +(VEC &a,VEC &b); //向量加法
- int operator +=(VEC &a,VEC &b); //向量加法
- int operator *=(VEC &a,MAT& b); //乘矩阵(1xk向量 * kxn的矩阵,矩阵乘法)
- int operator *=(VEC &a,double w); //向量乘以常数
- int vec_mul(VEC &c,VEC &a,VEC &b);//c[i]=a[i]*b[i],i=1..k
- //MAT
- int mat_resize(MAT &mat,int r,int c,double value); //设置矩阵大小为r*c
- int mat_clear(MAT &mat); //矩阵清0
- int operator +=(MAT &a,MAT &b);
- int operator -=(MAT &a,MAT &b); //矩阵加减法
- int operator *=(MAT &a,double w); //矩阵乘常数
- //VEC implement
- int vec_clear(VEC &vec){for (int i=0;i<vec.size();i++)vec[i]=0; return 0;}
- int vec_mul(VEC &c,VEC &a,VEC &b){for (int i=0;i<c.size();i++) c[i]=a[i]*b[i];return 0;}
- int operator *=(VEC &a,MAT& b){
- int n=a.size(),m=b[0].size(); //1*n n*m
- VEC tmp;
- tmp.resize(m,0);
- for (int i=0;i<m;i++){
- for (int j=0;j<n;j++){
- tmp[i]+=a[j]*b[j][i];
- }
- }
- a=tmp;
- return 0;
- }
- double operator *(VEC &a,VEC &b){ //向量点乘
- double c=0;
- for (int i=0;i<a.size();i++)c+=a[i]*b[i];
- return c;
- }
- VEC operator +(VEC &a,VEC &b){ //向量加法
- VEC c;
- c.resize(a.size());
- for (int i=0;i<a.size();i++)c[i]=a[i]+b[i];
- return c;
- }
- VEC operator -(VEC &a,VEC &b){ //向量减法
- VEC c;
- c.resize(a.size());
- for (int i=0;i<a.size();i++)c[i]=a[i]-b[i];
- return c;
- }
- int operator +=(VEC &a,VEC &b){ for (int i=0;i<a.size();i++)a[i]+=b[i]; return 0;}
- int operator -=(VEC &a,VEC &b){ for (int i=0;i<a.size();i++)a[i]-=b[i]; return 0;}
- int operator *=(VEC &a,double w){for (int i=0;i<a.size();i++)a[i]*=w; return 0;}
- //MAT implement
- int mat_resize(MAT &mat,int r,int c,double value=0.0){ mat.resize(r);for (int i=0;i<r;i++) mat[i].resize(c,value); return 0;} //设置矩阵大小为r*c
- int mat_clear(MAT &mat){ for (int i=0;i<mat.size();i++) vec_clear(mat[i]);return 0;} //矩阵清0
- int operator +=(MAT &a,MAT &b){
- for (int r=0;r<a.size();r++)
- for (int c=0;c<a[0].size();c++)
- a[r][c]+=b[r][c];
- return 0;
- }
- int operator -=(MAT &a,MAT &b){
- for (int r=0;r<a.size();r++)
- for (int c=0;c<a[0].size();c++)
- a[r][c]-=b[r][c];
- return 0;
- }
- int operator *=(MAT &a,double w){
- for (int r=0;r<a.size();r++)
- for (int c=0;c<a[0].size();c++)
- a[r][c]*=w;
- return 0;
- }
- //输入数据集
- struct DATASET{
- int n;
- vector<VEC> in;
- vector<VEC> y;
- DATASET(int num,int a,int b){ //申请足够的空间放数据集
- n=num;
- in.resize(n);
- y.resize(n);
- for (int i=0;i<n;i++){
- in[i].resize(a);
- y[i].resize(b);
- }
- }
- };
- //Class BP
- class BP{
- private:
- int lays; //网络层数
- vector<int> neus; //网络每层的神经元个数
- MAT w[MAX_LAYERS]; //网络权值weight
- VEC b[MAX_LAYERS]; //网络权值bias
- /* 以下变量用于当前输入 */
- MAT w2[MAX_LAYERS];
- VEC b2[MAX_LAYERS]; //当前一轮输入,网络权值w/b的调整值
- VEC z[MAX_LAYERS]; //对于当前输入,网络每一层神经元输入
- VEC a[MAX_LAYERS]; //对于当前输入,网络每一层神经元输出
- VEC delta[MAX_LAYERS]; //对于当前输入,网络每一层神经元对z求偏导时,其偏导值。
- VEC sigmal; //当前层神经元logistic偏导值
- double gauss(){ return sqrt(-2.0*log(1.0*rand()/32767))*sin(2*M_PI*rand()/32767);} //Box-Muller算法产生标准高斯分布N(0,1)随机数
- double sigmoid(double x){ return 1.0/(1+exp(-x)); } //logistic 函数
- double sigmoid_der(double x){ return sigmoid(x)*(1-sigmoid(x));} //logistic 函数导数
- int getsigmal(int l); //获取第l层Sigmal值(即l层层神经元logistic偏导值)
- int feedforward(VEC &in); //通过输入in,得到z,a
- int tune(VEC &in,VEC &y); //训练一组数据,将w/b调整值加入w2/b2中,costFun=0/1表示二次函数和对数函数
- public:
- BP(){}
- ~BP(){}
- void displayWB();
- int save(char* szFileName);
- int load(char* szFileName);
- int Network(vector<int>&neurons);
- int SGD(DATASET *training_data,int epochs,int mini_batch_size,double eta,double lambda,DATASET *test_data);
- int work(VEC &in,VEC &out); //接受输入in,返回输出out
- int MNISTAccuracy(DATASET *ds);
- };
- int BP::Network(vector<int>&neurons){
- lays=neurons.size();
- neus=neurons;
- //w,b
- for (int i=1;i<lays;i++){
- mat_resize(w[i],neus[i],neus[i-1]);
- mat_resize(w2[i],neus[i],neus[i-1]);
- b[i].resize(neus[i]);
- b2[i].resize(neus[i]);
- z[i].resize(neus[i]);
- a[i].resize(neus[i]);
- delta[i].resize(neus[i]);
- }
- //随机赋权值
- srand(time(NULL));
- for (int l=1;l<lays;l++)
- for (int j=0;j<neus[l];j++){
- for (int k=0;k<neus[l-1];k++)
- //w[l][j][k]=1.0*rand()/32768-0.5;
- //b[l][j]=1.0*rand()/32768-0.5;
- w[l][j][k]=gauss()/sqrt(1.0*neus[l-1]);
- b[l][j]=gauss()/sqrt(1.0*neus[l-1]);
- }
- return 0;
- }
- int BP::feedforward(VEC &in){
- a[0]=in;
- for (int l=1;l<lays;l++){
- for (int j=0;j<neus[l];j++){
- z[l][j]=a[l-1]*w[l][j]+b[l][j];
- a[l][j]=sigmoid(z[l][j]);
- }
- }
- return 0;
- }
- int BP::getsigmal(int l){
- sigmal.resize(neus[l]);
- for (int j=0;j<neus[l];j++)
- sigmal[j]=sigmoid_der(z[l][j]);
- return 0;
- }
- int BP::tune(VEC& in,VEC& y){
- feedforward(in);
- /** 先求所有delta值 **/
- // output layer
- int l=lays-1;
- getsigmal(l);
- #ifdef QUADRATIC
- delta[l]=a[l]-y;
- vec_mul(delta[l],delta[l],sigmal);
- #else
- delta[l]=a[l]-y;//化简后得到的式子
- #endif
- // 1..lays-2
- for (l=lays-2;l>=1;l--){
- getsigmal(l);
- delta[l]=delta[l+1];
- delta[l]*=w[l+1];
- vec_mul(delta[l],delta[l],sigmal);
- }
- /** 通过delta 值获得w/b的调整值w2/b2 **/
- for (int l=lays-1;l>=1;l--){
- for (int j=0;j<neus[l];j++){
- //w
- for (int k=0;k<neus[l-1];k++)
- w2[l][j][k]+=delta[l][j]*a[l-1][k];
- //b
- b2[l][j]+=delta[l][j];
- }
- }
- return 0;
- }
- int BP::save(char* szFileName){
- FILE *f=fopen(szFileName,"w");
- if (f==NULL) exit(-1);
- fprintf(f,"%d\n",lays);
- for (int i=0;i<lays;i++) fprintf(f,"%d ",neus[i]);
- //w
- for (int l=1;l<lays;l++){
- for (int j=0;j<neus[l];j++){
- for (int k=0;k<neus[l-1];k++)
- fprintf(f,"%.12lf ",w[l][j][k]);
- fprintf(f,"\n");
- }
- fprintf(f,"\n");
- }
- //b
- for (int l=1;l<lays;l++){
- for (int j=0;j<neus[l];j++)
- fprintf(f,"%.12lf ",b[l][j]);
- fprintf(f,"\n");
- }
- fclose(f);
- return 0;
- }
- int BP::load(char* szFileName){
- FILE *f=fopen(szFileName,"r");
- if (f==NULL) exit(-1);
- fscanf(f,"%d\n",&lays);
- neus.resize(lays);
- for (int i=0;i<lays;i++)
- fscanf(f,"%d",&neus[i]);
- Network(neus);
- //w
- for (int l=1;l<lays;l++){
- for (int j=0;j<neus[l];j++)
- for (int k=0;k<neus[l-1];k++)
- fscanf(f,"%lf",&w[l][j][k]);
- }
- //b
- for (int l=1;l<lays;l++)
- for (int j=0;j<neus[l];j++)
- fscanf(f,"%lf",&b[l][j]);
- fclose(f);
- return 0;
- }
- #define Rand(n) (((rand()<<16)+rand())%n)
- int BP::SGD(DATASET *training_data,int epochs,int mini_batch_size,double eta,double lambda=5.0,DATASET *test_data=NULL){
- int n=training_data->n;
- int m=mini_batch_size;
- int idx[n];
- for (int i=0;i<n;i++) idx[i]=i;
- while (epochs--){
- //idx shuffle
- for (int i=1;i<n;i++)swap(idx[i],idx[Rand(i)]);
- int round=n/m; //当前epoch共调整n/m轮
- int cur=0; //training_data->in[idx[cur]]为当前要训练的数据
- while (round--){
- //清零w2/b2
- for (int l=0;l<lays;l++){
- mat_clear(w2[l]);
- vec_clear(b2[l]);
- }
- //训练
- for (int t=0;t<m;t++){ //随机选择训练数据
- tune(training_data->in[idx[cur]],training_data->y[idx[cur]]); //通过当前数据调整w2/b2大小
- cur++;
- }
- //调整权值
- for (int l=0;l<lays;l++){
- w[l]*=1-eta*lambda/n; //R2 Regularization
- w2[l]*=eta/m;
- b2[l]*=eta/m;
- w[l]-=w2[l];
- b[l]-=b2[l];
- }
- }
- static int epo=1;
- //int b=MNISTAccuracy(test_data);
- int a=MNISTAccuracy(training_data);
- //printf("[Epoch %d]Accuracy Rate: %d/%d (%.3lf)(Test Data)\n",epo,b,10000,1.0*a/10000);
- printf("[Epoch %d]Accuracy Rate: %d/%d (%.3lf)\n",epo++,a,n,1.0*a/n);
- }
- return 0;
- }
- int BP::work(VEC &in,VEC &out){
- feedforward(in);
- out=a[lays-1];
- return 0;
- }
- int BP::MNISTAccuracy(DATASET* ds){ //手写数字识别准确率
- VEC out;
- int cnt=0;
- for (int i=0;i< ds->n;i++){
- feedforward(ds->in[i]);
- out=a[lays-1];
- int ans;
- double mx=0;
- for (int j=0;j<10;j++)
- if (out[j]>mx){
- mx=out[j];
- ans=j;
- }
- int desired;
- for (int j=0;j<10;j++)
- if (ds->y[i][j]==1)desired=j;
- if (ans==desired)cnt++;
- }
- return cnt;
- }
- struct MNIST_DATA{
- BYTE data[28*28];
- BYTE label;
- void show(){
- printf("--------%d---------\n",label);
- for (int i=0;i<784;i++){
- if (data[i]==0)putchar('0');else putchar('*');
- if (i%28==27)putchar('\n');
- }
- }
- }md;
- void ReadTrainingIMG(int idx){ //28*28
- static FILE *fimg=NULL; //image label
- static FILE *flbl=NULL;
- if (fimg==NULL){
- //SetCurrentDirectory("C:\\Users\\thinkt\\Documents\\MNIST Data Set");
- fimg=fopen("mnist_train_img","rb");
- flbl=fopen("mnist_train_label","rb");
- if (fimg==NULL||flbl==NULL) exit(-1);
- }
- fseek(fimg,16+28*28*idx,SEEK_SET);
- fread(&md.data,1,28*28,fimg);
- fseek(flbl,8+idx,SEEK_SET);
- fread(&md.label,1,1,flbl);
- for (int i=0;i<784;i++) md.data[i]=(md.data[i]>128)?0:1;
- }
- void ReadTestIMG(int idx){
- static FILE *fimg=NULL; //image label
- static FILE *flbl=NULL;
- if (fimg==NULL){
- //SetCurrentDirectory("C:\\Users\\thinkt\\Documents\\MNIST Data Set");
- fimg=fopen("t10k-images.idx3-ubyte","rb");
- flbl=fopen("t10k-labels.idx1-ubyte","rb");
- if (fimg==NULL||flbl==NULL) exit(-1);
- }
- fseek(fimg,16+28*28*idx,SEEK_SET);
- fread(&md.data,1,28*28,fimg);
- fseek(flbl,8+idx,SEEK_SET);
- fread(&md.label,1,1,flbl);
- for (int i=0;i<784;i++) md.data[i]=(md.data[i]>128)?0:1;
- }
- BP bp;
- DATASET *ds;
- DATASET *ds2;
- vector<int> neus;
- void getDataset(){
- neus.push_back(784);
- neus.push_back(30);
- neus.push_back(10);
- ds=new DATASET(50000,neus[0],neus[neus.size()-1]);
- ds2=new DATASET(10000,neus[0],neus[neus.size()-1]);
- for (int i=0;i<50000;i++){
- ReadTrainingIMG(i);
- for (int j=0;j<28*28;j++) ds->in[i][j]=md.data[j];
- for (int j=0;j<10;j++)ds->y[i][j]=(md.label==j)?1:0;
- }
- for (int i=0;i<10000;i++){
- ReadTestIMG(i);
- for (int j=0;j<28*28;j++) ds2->in[i][j]=md.data[j];
- for (int j=0;j<10;j++)ds2->y[i][j]=(md.label==j)?1:0;
- }
- }
- void MNIST(){
- bp.Network(neus);
- //bp.load("bp.txt");
- printf("Start Training...\n");
- bp.SGD(ds,20,10,0.3,0.5,ds2);
- bp.save("bp.txt");
- VEC in,out;
- in.resize(784);out.resize(10);
- while (1){
- int idx;scanf("%d",&idx);
- ReadTrainingIMG(idx);
- md.show();
- for (int i=0;i<784;i++)in[i]=md.data[i];
- bp.work(in,out);
- int ans;
- double mx=0;
- for (int i=0;i<10;i++){
- printf("%.2lf ",out[i]);
- if (out[i]>mx){mx=out[i];ans=i;}
- }printf("ans=%d\n",ans);
- }
- exit(0);
- }
- void MNIST_LOAD(){
- //bp.load("MNIST_0.965.txt");
- bp.load("MNIST_0.976.txt");
- //bp.save("bp2.txt");
- printf("yes\n");
- vector<double>in,out;
- in.resize(784);out.resize(10);
- int k;
- k=bp.MNISTAccuracy(ds2);
- printf("accuracy:%d/10000 %.4lf\n",k,1.0*k/10000);
- k=bp.MNISTAccuracy(ds);
- printf("accuracy:%d/50000 %.4lf\n",k,1.0*k/50000);
- for (int idx=0;idx<10000;idx++){
- ReadTestIMG(idx);
- for (int i=0;i<784;i++)in[i]=md.data[i];
- bp.work(in,out);
- int ans;
- double mx=0;
- for (int i=0;i<10;i++){
- if (out[i]>mx){mx=out[i];ans=i;}
- }
- if (ans!=md.label){
- printf("ans=%d recog=%d\n",md.label,ans);
- md.show();
- for (int i=0;i<10;i++)printf("%.2lf ",out[i]);putchar('\n');
- system("pause");
- }
- }
- while (1){
- int idx;scanf("%d",&idx);
- ReadTrainingIMG(idx);
- md.show();
- for (int i=0;i<784;i++)in[i]=md.data[i];
- bp.work(in,out);
- int ans;
- double mx=0;
- for (int i=0;i<10;i++){
- printf("%.2lf ",out[i]);
- if (out[i]>mx){mx=out[i];ans=i;}
- }
- printf("ans=%d\n",ans);
- }
- }
- int main(){
- getDataset();
- MNIST();
- //MNIST_LOAD();
- return 0;
- }