【C++】神经网络BP算法实现

今年春节在家里实现的,以前的博客网站不稳定,发这里吧。

===================================================================


2017-01-25

自己实现了下BP算法,无需依赖第三方库。MNIST数据集可以达到95%的识别率,感觉很难升上去了,算法瓶颈。

  
  
  1. 矩阵、向量操作一起实现。
  
  
  1. #include<stdio.h>
  2. #include<vector>
  3. #include<stdlib.h>
  4. #include<math.h>
  5. #include<time.h>
  6.  
  7. #include<windows.h>
  8. using namespace std;
  9. #define QUADRATIC
  10. #define MAX_LAYERS 6
  11.  
  12. typedef vector<double> VEC;
  13. typedef vector<VEC> MAT;
  14. /** VEC/MAT 相关函数定义 **/
  15. //VEC
  16. int vec_clear(VEC &vec); //清0
  17. double operator *(VEC &a,VEC &b); //向量点乘
  18. VEC operator -(VEC &a,VEC &b); //向量减法
  19. int operator -=(VEC &a,VEC &b);
  20. VEC operator +(VEC &a,VEC &b); //向量加法
  21. int operator +=(VEC &a,VEC &b); //向量加法
  22. int operator *=(VEC &a,MAT& b); //乘矩阵(1xk向量 * kxn的矩阵,矩阵乘法)
  23. int operator *=(VEC &a,double w); //向量乘以常数
  24. int vec_mul(VEC &c,VEC &a,VEC &b);//c[i]=a[i]*b[i],i=1..k
  25. //MAT
  26. int mat_resize(MAT &mat,int r,int c,double value); //设置矩阵大小为r*c
  27. int mat_clear(MAT &mat); //矩阵清0
  28. int operator +=(MAT &a,MAT &b);
  29. int operator -=(MAT &a,MAT &b); //矩阵加减法
  30. int operator *=(MAT &a,double w); //矩阵乘常数
  31. //VEC implement
  32. int vec_clear(VEC &vec){for (int i=0;i<vec.size();i++)vec[i]=0; return 0;}
  33. int vec_mul(VEC &c,VEC &a,VEC &b){for (int i=0;i<c.size();i++) c[i]=a[i]*b[i];return 0;}
  34. int operator *=(VEC &a,MAT& b){
  35. int n=a.size(),m=b[0].size(); //1*n n*m
  36. VEC tmp;
  37. tmp.resize(m,0);
  38. for (int i=0;i<m;i++){
  39. for (int j=0;j<n;j++){
  40. tmp[i]+=a[j]*b[j][i];
  41. }
  42. }
  43. a=tmp;
  44. return 0;
  45. }
  46.  
  47. double operator *(VEC &a,VEC &b){ //向量点乘
  48. double c=0;
  49. for (int i=0;i<a.size();i++)c+=a[i]*b[i];
  50. return c;
  51. }
  52. VEC operator +(VEC &a,VEC &b){ //向量加法
  53. VEC c;
  54. c.resize(a.size());
  55. for (int i=0;i<a.size();i++)c[i]=a[i]+b[i];
  56. return c;
  57. }
  58. VEC operator -(VEC &a,VEC &b){ //向量减法
  59. VEC c;
  60. c.resize(a.size());
  61. for (int i=0;i<a.size();i++)c[i]=a[i]-b[i];
  62. return c;
  63. }
  64. int operator +=(VEC &a,VEC &b){ for (int i=0;i<a.size();i++)a[i]+=b[i]; return 0;}
  65. int operator -=(VEC &a,VEC &b){ for (int i=0;i<a.size();i++)a[i]-=b[i]; return 0;}
  66. int operator *=(VEC &a,double w){for (int i=0;i<a.size();i++)a[i]*=w; return 0;}
  67. //MAT implement
  68. int mat_resize(MAT &mat,int r,int c,double value=0.0){ mat.resize(r);for (int i=0;i<r;i++) mat[i].resize(c,value); return 0;} //设置矩阵大小为r*c
  69. int mat_clear(MAT &mat){ for (int i=0;i<mat.size();i++) vec_clear(mat[i]);return 0;} //矩阵清0
  70. int operator +=(MAT &a,MAT &b){
  71. for (int r=0;r<a.size();r++)
  72. for (int c=0;c<a[0].size();c++)
  73. a[r][c]+=b[r][c];
  74. return 0;
  75. }
  76. int operator -=(MAT &a,MAT &b){
  77. for (int r=0;r<a.size();r++)
  78. for (int c=0;c<a[0].size();c++)
  79. a[r][c]-=b[r][c];
  80. return 0;
  81. }
  82. int operator *=(MAT &a,double w){
  83. for (int r=0;r<a.size();r++)
  84. for (int c=0;c<a[0].size();c++)
  85. a[r][c]*=w;
  86. return 0;
  87. }
  88. //输入数据集
  89. struct DATASET{
  90. int n;
  91. vector<VEC> in;
  92. vector<VEC> y;
  93. DATASET(int num,int a,int b){ //申请足够的空间放数据集
  94. n=num;
  95. in.resize(n);
  96. y.resize(n);
  97. for (int i=0;i<n;i++){
  98. in[i].resize(a);
  99. y[i].resize(b);
  100. }
  101. }
  102. };
  103. //Class BP
  104. class BP{
  105. private:
  106. int lays; //网络层数
  107. vector<int> neus; //网络每层的神经元个数
  108. MAT w[MAX_LAYERS]; //网络权值weight
  109. VEC b[MAX_LAYERS]; //网络权值bias
  110.  
  111.  
  112. /* 以下变量用于当前输入 */
  113. MAT w2[MAX_LAYERS];
  114. VEC b2[MAX_LAYERS]; //当前一轮输入,网络权值w/b的调整值
  115.  
  116. VEC z[MAX_LAYERS]; //对于当前输入,网络每一层神经元输入
  117. VEC a[MAX_LAYERS]; //对于当前输入,网络每一层神经元输出
  118. VEC delta[MAX_LAYERS]; //对于当前输入,网络每一层神经元对z求偏导时,其偏导值。
  119.  
  120. VEC sigmal; //当前层神经元logistic偏导值
  121.  
  122. double gauss(){ return sqrt(-2.0*log(1.0*rand()/32767))*sin(2*M_PI*rand()/32767);} //Box-Muller算法产生标准高斯分布N(0,1)随机数
  123. double sigmoid(double x){ return 1.0/(1+exp(-x)); } //logistic 函数
  124. double sigmoid_der(double x){ return sigmoid(x)*(1-sigmoid(x));} //logistic 函数导数
  125.  
  126. int getsigmal(int l); //获取第l层Sigmal值(即l层层神经元logistic偏导值)
  127.  
  128. int feedforward(VEC &in); //通过输入in,得到z,a
  129. int tune(VEC &in,VEC &y); //训练一组数据,将w/b调整值加入w2/b2中,costFun=0/1表示二次函数和对数函数
  130. public:
  131. BP(){}
  132. ~BP(){}
  133. void displayWB();
  134. int save(char* szFileName);
  135. int load(char* szFileName);
  136. int Network(vector<int>&neurons);
  137. int SGD(DATASET *training_data,int epochs,int mini_batch_size,double eta,double lambda,DATASET *test_data);
  138. int work(VEC &in,VEC &out); //接受输入in,返回输出out
  139. int MNISTAccuracy(DATASET *ds);
  140. };
  141. int BP::Network(vector<int>&neurons){
  142. lays=neurons.size();
  143. neus=neurons;
  144. //w,b
  145. for (int i=1;i<lays;i++){
  146. mat_resize(w[i],neus[i],neus[i-1]);
  147. mat_resize(w2[i],neus[i],neus[i-1]);
  148. b[i].resize(neus[i]);
  149. b2[i].resize(neus[i]);
  150. z[i].resize(neus[i]);
  151. a[i].resize(neus[i]);
  152. delta[i].resize(neus[i]);
  153. }
  154. //随机赋权值
  155. srand(time(NULL));
  156. for (int l=1;l<lays;l++)
  157. for (int j=0;j<neus[l];j++){
  158. for (int k=0;k<neus[l-1];k++)
  159. //w[l][j][k]=1.0*rand()/32768-0.5;
  160. //b[l][j]=1.0*rand()/32768-0.5;
  161. w[l][j][k]=gauss()/sqrt(1.0*neus[l-1]);
  162. b[l][j]=gauss()/sqrt(1.0*neus[l-1]);
  163. }
  164. return 0;
  165. }
  166. int BP::feedforward(VEC &in){
  167. a[0]=in;
  168. for (int l=1;l<lays;l++){
  169. for (int j=0;j<neus[l];j++){
  170. z[l][j]=a[l-1]*w[l][j]+b[l][j];
  171. a[l][j]=sigmoid(z[l][j]);
  172. }
  173. }
  174.  
  175. return 0;
  176. }
  177. int BP::getsigmal(int l){
  178. sigmal.resize(neus[l]);
  179. for (int j=0;j<neus[l];j++)
  180. sigmal[j]=sigmoid_der(z[l][j]);
  181. return 0;
  182. }
  183. int BP::tune(VEC& in,VEC& y){
  184. feedforward(in);
  185. /** 先求所有delta值 **/
  186. // output layer
  187. int l=lays-1;
  188. getsigmal(l);
  189. #ifdef QUADRATIC
  190. delta[l]=a[l]-y;
  191. vec_mul(delta[l],delta[l],sigmal);
  192. #else
  193. delta[l]=a[l]-y;//化简后得到的式子
  194. #endif
  195. // 1..lays-2
  196. for (l=lays-2;l>=1;l--){
  197. getsigmal(l);
  198. delta[l]=delta[l+1];
  199. delta[l]*=w[l+1];
  200. vec_mul(delta[l],delta[l],sigmal);
  201. }
  202. /** 通过delta 值获得w/b的调整值w2/b2 **/
  203. for (int l=lays-1;l>=1;l--){
  204. for (int j=0;j<neus[l];j++){
  205. //w
  206. for (int k=0;k<neus[l-1];k++)
  207. w2[l][j][k]+=delta[l][j]*a[l-1][k];
  208. //b
  209. b2[l][j]+=delta[l][j];
  210. }
  211. }
  212. return 0;
  213. }
  214.  
  215. int BP::save(char* szFileName){
  216. FILE *f=fopen(szFileName,"w");
  217. if (f==NULL) exit(-1);
  218. fprintf(f,"%d\n",lays);
  219. for (int i=0;i<lays;i++) fprintf(f,"%d ",neus[i]);
  220. //w
  221. for (int l=1;l<lays;l++){
  222. for (int j=0;j<neus[l];j++){
  223. for (int k=0;k<neus[l-1];k++)
  224. fprintf(f,"%.12lf ",w[l][j][k]);
  225. fprintf(f,"\n");
  226. }
  227. fprintf(f,"\n");
  228. }
  229. //b
  230. for (int l=1;l<lays;l++){
  231. for (int j=0;j<neus[l];j++)
  232. fprintf(f,"%.12lf ",b[l][j]);
  233. fprintf(f,"\n");
  234. }
  235. fclose(f);
  236. return 0;
  237. }
  238. int BP::load(char* szFileName){
  239. FILE *f=fopen(szFileName,"r");
  240. if (f==NULL) exit(-1);
  241.  
  242. fscanf(f,"%d\n",&lays);
  243. neus.resize(lays);
  244. for (int i=0;i<lays;i++)
  245. fscanf(f,"%d",&neus[i]);
  246. Network(neus);
  247. //w
  248. for (int l=1;l<lays;l++){
  249. for (int j=0;j<neus[l];j++)
  250. for (int k=0;k<neus[l-1];k++)
  251. fscanf(f,"%lf",&w[l][j][k]);
  252. }
  253. //b
  254. for (int l=1;l<lays;l++)
  255. for (int j=0;j<neus[l];j++)
  256. fscanf(f,"%lf",&b[l][j]);
  257. fclose(f);
  258. return 0;
  259. }
  260. #define Rand(n) (((rand()<<16)+rand())%n)
  261. int BP::SGD(DATASET *training_data,int epochs,int mini_batch_size,double eta,double lambda=5.0,DATASET *test_data=NULL){
  262. int n=training_data->n;
  263. int m=mini_batch_size;
  264. int idx[n];
  265. for (int i=0;i<n;i++) idx[i]=i;
  266. while (epochs--){
  267. //idx shuffle
  268. for (int i=1;i<n;i++)swap(idx[i],idx[Rand(i)]);
  269.  
  270. int round=n/m; //当前epoch共调整n/m轮
  271. int cur=0; //training_data->in[idx[cur]]为当前要训练的数据
  272. while (round--){
  273. //清零w2/b2
  274. for (int l=0;l<lays;l++){
  275. mat_clear(w2[l]);
  276. vec_clear(b2[l]);
  277. }
  278. //训练
  279. for (int t=0;t<m;t++){ //随机选择训练数据
  280. tune(training_data->in[idx[cur]],training_data->y[idx[cur]]); //通过当前数据调整w2/b2大小
  281. cur++;
  282. }
  283. //调整权值
  284. for (int l=0;l<lays;l++){
  285. w[l]*=1-eta*lambda/n; //R2 Regularization
  286. w2[l]*=eta/m;
  287. b2[l]*=eta/m;
  288. w[l]-=w2[l];
  289. b[l]-=b2[l];
  290. }
  291. }
  292. static int epo=1;
  293. //int b=MNISTAccuracy(test_data);
  294. int a=MNISTAccuracy(training_data);
  295. //printf("[Epoch %d]Accuracy Rate: %d/%d (%.3lf)(Test Data)\n",epo,b,10000,1.0*a/10000);
  296. printf("[Epoch %d]Accuracy Rate: %d/%d (%.3lf)\n",epo++,a,n,1.0*a/n);
  297.  
  298. }
  299. return 0;
  300. }
  301. int BP::work(VEC &in,VEC &out){
  302. feedforward(in);
  303. out=a[lays-1];
  304. return 0;
  305. }
  306. int BP::MNISTAccuracy(DATASET* ds){ //手写数字识别准确率
  307. VEC out;
  308. int cnt=0;
  309. for (int i=0;i< ds->n;i++){
  310. feedforward(ds->in[i]);
  311. out=a[lays-1];
  312. int ans;
  313. double mx=0;
  314. for (int j=0;j<10;j++)
  315. if (out[j]>mx){
  316. mx=out[j];
  317. ans=j;
  318. }
  319. int desired;
  320. for (int j=0;j<10;j++)
  321. if (ds->y[i][j]==1)desired=j;
  322. if (ans==desired)cnt++;
  323. }
  324. return cnt;
  325. }
  326. struct MNIST_DATA{
  327. BYTE data[28*28];
  328. BYTE label;
  329. void show(){
  330. printf("--------%d---------\n",label);
  331. for (int i=0;i<784;i++){
  332. if (data[i]==0)putchar('0');else putchar('*');
  333. if (i%28==27)putchar('\n');
  334. }
  335. }
  336. }md;
  337. void ReadTrainingIMG(int idx){ //28*28
  338. static FILE *fimg=NULL; //image label
  339. static FILE *flbl=NULL;
  340. if (fimg==NULL){
  341. //SetCurrentDirectory("C:\\Users\\thinkt\\Documents\\MNIST Data Set");
  342. fimg=fopen("mnist_train_img","rb");
  343. flbl=fopen("mnist_train_label","rb");
  344. if (fimg==NULL||flbl==NULL) exit(-1);
  345. }
  346. fseek(fimg,16+28*28*idx,SEEK_SET);
  347. fread(&md.data,1,28*28,fimg);
  348.  
  349. fseek(flbl,8+idx,SEEK_SET);
  350. fread(&md.label,1,1,flbl);
  351. for (int i=0;i<784;i++) md.data[i]=(md.data[i]>128)?0:1;
  352. }
  353. void ReadTestIMG(int idx){
  354. static FILE *fimg=NULL; //image label
  355. static FILE *flbl=NULL;
  356. if (fimg==NULL){
  357. //SetCurrentDirectory("C:\\Users\\thinkt\\Documents\\MNIST Data Set");
  358. fimg=fopen("t10k-images.idx3-ubyte","rb");
  359. flbl=fopen("t10k-labels.idx1-ubyte","rb");
  360. if (fimg==NULL||flbl==NULL) exit(-1);
  361. }
  362. fseek(fimg,16+28*28*idx,SEEK_SET);
  363. fread(&md.data,1,28*28,fimg);
  364.  
  365. fseek(flbl,8+idx,SEEK_SET);
  366. fread(&md.label,1,1,flbl);
  367. for (int i=0;i<784;i++) md.data[i]=(md.data[i]>128)?0:1;
  368.  
  369. }
  370. BP bp;
  371. DATASET *ds;
  372. DATASET *ds2;
  373. vector<int> neus;
  374. void getDataset(){
  375. neus.push_back(784);
  376. neus.push_back(30);
  377. neus.push_back(10);
  378.  
  379. ds=new DATASET(50000,neus[0],neus[neus.size()-1]);
  380. ds2=new DATASET(10000,neus[0],neus[neus.size()-1]);
  381. for (int i=0;i<50000;i++){
  382. ReadTrainingIMG(i);
  383. for (int j=0;j<28*28;j++) ds->in[i][j]=md.data[j];
  384. for (int j=0;j<10;j++)ds->y[i][j]=(md.label==j)?1:0;
  385. }
  386. for (int i=0;i<10000;i++){
  387. ReadTestIMG(i);
  388. for (int j=0;j<28*28;j++) ds2->in[i][j]=md.data[j];
  389. for (int j=0;j<10;j++)ds2->y[i][j]=(md.label==j)?1:0;
  390. }
  391. }
  392. void MNIST(){
  393. bp.Network(neus);
  394. //bp.load("bp.txt");
  395. printf("Start Training...\n");
  396. bp.SGD(ds,20,10,0.3,0.5,ds2);
  397. bp.save("bp.txt");
  398.  
  399. VEC in,out;
  400. in.resize(784);out.resize(10);
  401. while (1){
  402. int idx;scanf("%d",&idx);
  403. ReadTrainingIMG(idx);
  404. md.show();
  405. for (int i=0;i<784;i++)in[i]=md.data[i];
  406. bp.work(in,out);
  407. int ans;
  408. double mx=0;
  409. for (int i=0;i<10;i++){
  410. printf("%.2lf ",out[i]);
  411. if (out[i]>mx){mx=out[i];ans=i;}
  412. }printf("ans=%d\n",ans);
  413. }
  414. exit(0);
  415. }
  416. void MNIST_LOAD(){
  417. //bp.load("MNIST_0.965.txt");
  418. bp.load("MNIST_0.976.txt");
  419. //bp.save("bp2.txt");
  420. printf("yes\n");
  421. vector<double>in,out;
  422. in.resize(784);out.resize(10);
  423. int k;
  424. k=bp.MNISTAccuracy(ds2);
  425. printf("accuracy:%d/10000 %.4lf\n",k,1.0*k/10000);
  426. k=bp.MNISTAccuracy(ds);
  427. printf("accuracy:%d/50000 %.4lf\n",k,1.0*k/50000);
  428.  
  429. for (int idx=0;idx<10000;idx++){
  430. ReadTestIMG(idx);
  431. for (int i=0;i<784;i++)in[i]=md.data[i];
  432. bp.work(in,out);
  433. int ans;
  434. double mx=0;
  435. for (int i=0;i<10;i++){
  436. if (out[i]>mx){mx=out[i];ans=i;}
  437. }
  438. if (ans!=md.label){
  439. printf("ans=%d recog=%d\n",md.label,ans);
  440. md.show();
  441. for (int i=0;i<10;i++)printf("%.2lf ",out[i]);putchar('\n');
  442. system("pause");
  443. }
  444. }
  445. while (1){
  446. int idx;scanf("%d",&idx);
  447. ReadTrainingIMG(idx);
  448. md.show();
  449. for (int i=0;i<784;i++)in[i]=md.data[i];
  450. bp.work(in,out);
  451. int ans;
  452. double mx=0;
  453. for (int i=0;i<10;i++){
  454. printf("%.2lf ",out[i]);
  455. if (out[i]>mx){mx=out[i];ans=i;}
  456. }
  457. printf("ans=%d\n",ans);
  458. }
  459. }
  460. int main(){
  461. getDataset();
  462. MNIST();
  463. //MNIST_LOAD();
  464. return 0;
  465. }


猜你喜欢

转载自blog.csdn.net/vcvycy/article/details/78636308
今日推荐