% Reference : https://github.com/voidbip/matlab_nn
clc;clear;clf;
X =[0011;0101];% x -> Data set 2-dimension data
flag =[0110];% y -> Flag / label
n =100;% The first and second data sets
a =linspace(0,2*pi,n/2);% Set the values for x
u =[5*cos(a)+510*cos(a)+5]+1*rand(1,n);
v =[5*sin(a)+510*sin(a)+5]+1*rand(1,n);
X =[u;v];
flag =[zeros(1,n/2),ones(1,n/2)];
classNum =length(unique(flag));% How many classes?[row, col]=size(X);% row -> dimension, col -> size of dataset
NNLayer =[row 20 classNum];% The structure of our neuron networks
%[1] Initialize weights randomly
w =randInitWeights(NNLayer);
iteration =10000;% Set our iterations
acMethod ='SIGMOID';% Set our activation functions
lambda =0;
flagMatrix =zeros(classNum,col);for i =1:length(flag)flagMatrix(flag(i)+1,i)=1;
end
%- Mini-Batch Gradient Descent Params -%
batchSize =4;%- Adam Params -%
eta =0.002;% Learning rate
s =0; beta =0.99; momentum =0; gamma =0.9; cnt =0;%- draw -%
Range =[-10,20;-10,20];% set the range of dataset
figure(1);
hold on;
posFlag =find(flag ==1);
negFlag =find(flag ==0);plot(X(1,posFlag),X(2,posFlag),'r+','linewidth',2);plot(X(1,negFlag),X(2,negFlag),'bo','linewidth',2);[h_region1,h_region2]=drawRegion(Range,w,NNLayer,acMethod);for i =1: iteration
%%i
% cnt = cnt+1;if(mod(i,100)==0)delete(h_region1);delete(h_region2);
wFinal = w;[h_region1,h_region2]=drawRegion(Range,wFinal,NNLayer,acMethod);title('Data Fitting Using Neuron Networks');legend('class 1','class 2','seprated region');xlabel('x');ylabel('y')
drawnow;
end
% Mini-batch gradient descent + Adam 懒得写成函数了
dataSize =length(X);% obtain the number of data
k =fix(dataSize/batchSize);% obtain the number of batch which has absolutely same size: k = batchNum-1;
batchIdx =randperm(dataSize);% randomly sort for every epoch for achiving sample diversity
flagBatch =flagMatrix(:,batchIdx(1:batchSize));
batchIdx1 =reshape(batchIdx(1:k*batchSize),k,batchSize);% batches which has absolutely same size
batchIdx2 =batchIdx(k*batchSize+1:end);% ramained batch
for batchIdx =1: k
valMatrix =ForwardPropagation(X(:,batchIdx1(batchIdx,:)),w,NNLayer,acMethod);[j,jw]=BackwardPropagation(flagMatrix(:,batchIdx1(batchIdx,:)), valMatrix, w, lambda, NNLayer, acMethod);
cnt = cnt+1;if j<0.01break;
end
[sizeW,~]=size(jw);
eps =10^-8*ones(sizeW,1);
s = beta*s +(1-beta)*jw.*jw;% Update s
momentum = gamma*momentum +(1-gamma).*jw;% Update momentum
momentum_bar = momentum/(1-gamma^cnt);
s_bar = s /(1-beta^cnt);
w = w - eta./sqrt(eps+s_bar).*momentum_bar;% Update parameters(theta)
end
if(~isempty(batchIdx2))
valMatrix =ForwardPropagation(X(:,batchIdx2),w,NNLayer,acMethod);[j,jw]=BackwardPropagation(flagMatrix(:,batchIdx2), valMatrix, w, lambda, NNLayer, acMethod);
cnt = cnt+1;%if j<0.01%break;%end
[sizeW,~]=size(jw);
eps =10^-8*ones(sizeW,1);
s = beta*s +(1-beta)*jw.*jw;% Update s
momentum = gamma*momentum +(1-gamma).*jw;% Update momentum
momentum_bar = momentum/(1-gamma^cnt);
s_bar = s /(1-beta^cnt);
w = w - eta./sqrt(eps+s_bar).*momentum_bar;% Update parameters(theta)
end
% Batch gradient descent
% valMatrix =ForwardPropagation(X,w,NNLayer,acMethod);%[j,jw]=BackwardPropagation(flagMatrix, valMatrix, w, lambda, NNLayer, acMethod);% w = w-eta*jw;% j
%if j<0.1%break;% end
end
hold off;%% Initialize Weights Randomly
% input:[2102]% layer1:2 neurons +1 bias.% layer2:10 neurons +1 bias.% layer3:2 neurons.
function [w]=randInitWeights(NNLayer)
Len =length(NNLayer);% Obtain the number of layers
shiftLayer =[0ones(1,Len-1)+NNLayer(1:Len-1)];% shiftLayer = NNLayer +1(bias), shiftLayer >>1。
wCount = NNLayer.*shiftLayer;% The number of weights for previous layer <-> shiftLayer .* NNLayer
w =zeros(sum(wCount),1);% Initialize weight vector
accWIdx =cumsum(wCount);% The index of each layer for weight vector
for i =2: Len
eps =sqrt(6)/sqrt(NNLayer(i)+shiftLayer(i));w(accWIdx(i-1)+1:accWIdx(i))= eps*(2*rand(wCount(i),1)-1);
end
end
%% FeedForward Propagation
function [valMatrix]=ForwardPropagation(X, w, NNLayer,acMethod)[dim, num]=size(X);
Len =length(NNLayer);% Obtain the number of layers
shiftLayer =[0ones(1,Len-1)+NNLayer(1:Len-1)];% shiftLayer = NNLayer +1(bias), shiftLayer >>1。
accWIdx = NNLayer.*shiftLayer;% The number of weights for previous layer <-> shiftLayer .* NNLayer
ws =cumsum(accWIdx);% The index of each layer for weight vector
accValIdx =[0cumsum(NNLayer)];if(dim ~=NNLayer(1))error("dim of data != dim of input of NN");
end
valMatrix =zeros(sum(NNLayer),num);valMatrix(1:dim,:)= X;for i =2: Len
%curLayerW =reshape(w(ws(i-1)+1:ws(i)),NNLayer(i),shiftLayer(i))';
curLayerW =reshape(w(ws(i-1)+1:ws(i)),shiftLayer(i),NNLayer(i));valMatrix(accValIdx(i)+1:accValIdx(i+1),:)=activateFunc(curLayerW'*[ones(1,num);valMatrix(accValIdx(i-1)+1:accValIdx(i),:)],acMethod);
end
end
%% Backward Propagation
function [CELoss,jw]=BackwardPropagation(y, valMatrix, w, lambda, NNLayer, acMethod)
Len =length(NNLayer);[~,num]=size(y);
gradX =zeros(sum(NNLayer(2:end)),num);
jw =zeros(length(w),1);% CrossEntropy to calculate loss
% Output values:valMatrix(end-NNLayer(end)+1:end,:)
y_hat =valMatrix(end-NNLayer(end)+1:end,:)+1e-7;% This is Cross Entropy Loss value
CELoss =-sum(sum(y.*log(y_hat)+(1-y).*log(1-y_hat)))/num;
CELoss = CELoss +((lambda*sum(w.^2))/(2*num));% Regularization term
% Easy way for sigmoid function
gradX(end-NNLayer(end)+1:end,:)= y_hat - y;% Obtain the gradient of Cross Entropy / back to y_hat
%gradCE =-(y./y_hat-(1-y)./(1-y_hat));%gradX(end-NNLayer(end)+1:end,:)= gradCE.*calculateGrad(y_hat,'Sigmoid');
shiftLayer =[0ones(1,Len-1)+NNLayer(1:Len-1)];% shiftLayer = NNLayer +1(bias), shiftLayer >>1。
accWIdx = NNLayer.*shiftLayer;% The number of weights for previous layer <-> shiftLayer .* NNLayer
ws =cumsum(accWIdx);% The index of each layer for weight vector
gradIdx =[0cumsum(NNLayer(2:end))];% Obtain the gradient for each neurons except which in the first layer
ai=[0cumsum(NNLayer)];%-- Calculate the gradient of neurons --%for i = Len:-1:3%curLayerW =reshape(w(ws(i-1)+1:ws(i),:),NNLayer(i),shiftLayer(i))';% Obtain weights between current adjacent layers
curLayerW =reshape(w(ws(i-1)+1:ws(i),:),shiftLayer(i),NNLayer(i));% Obtain weights between current adjacent layers
curLayerW4X =curLayerW(2:end,:);% Remove the gradients of biases
gradBack =gradX(gradIdx(i-1)+1:gradIdx(i),:);% Get gradients from the next layer
%gradSigmoid =calculateGrad(valMatrix(ai(i-1)+1:ai(i),:),acMethod);%gradX(gradIdx(i-2)+1:gradIdx(i-1),:)= curLayerW4X*gradBack.*gradSigmoid;% Calculate the gradient of neurons in current layer.
gradActiveFunc =calculateGrad(valMatrix(ai(i)+1:ai(i+1),:),acMethod);gradX(gradIdx(i-2)+1:gradIdx(i-1),:)= curLayerW4X*(gradActiveFunc.*gradBack);% Calculate the gradient of neurons in current layer.
end
%-- Calculate the gradient for weights --%for i = Len:-1:2
temp =zeros(accWIdx(i),num);for cnt =1:num
%temp(:,cnt)=kron([1;valMatrix(ai(i-1)+1:ai(i),cnt)],gradX(gradIdx(i-1)+1:gradIdx(i),cnt));temp(:,cnt)=kron(gradX(gradIdx(i-1)+1:gradIdx(i),cnt),[1;valMatrix(ai(i-1)+1:ai(i),cnt)]);
end
jw(1+ws(i-1):ws(i))=sum(temp,2);
end
jw = jw/num;
jw=jw + lambda*w/num;
end
function val =activateFunc(x,acMethod)switch acMethod
case{'SIGMOID','sigmoid'}
val =1.0./(1.0+exp(-x));case{'TANH','tanh'}
val =tanh(x);case{'ReLU','relu'}
val=max(0,x);case{'tansig'}
val=2/(1+exp(-2*x))-1
otherwise
end
end
function val =calculateGrad(x,acMethod)switch acMethod
case{'SIGMOID','sigmoid'}
val =(1-x).*x;case{'TANH','tanh'}error('...');% TODO...case{'ReLU','relu'}
val=x>0;case{'tansig'}error('...');% TODO...
otherwise
error('...');% TODO...
end
end
function [h_region1, h_region2]=drawRegion(Range,w,NNLayer,acMethod)% Draw region
x_draw=Range(1):0.1:Range(3);
y_draw=Range(2):0.1:Range(4);[meshX,meshY]=meshgrid(x_draw,y_draw);[row, col]=size(meshX);
classes =zeros(row,col);for i =1:row
valMatrix =ForwardPropagation([meshX(i,:);meshY(i,:)],w,NNLayer,acMethod);
val =valMatrix(end,:)-valMatrix(end-1,:);classes(i,:)=(val>0)-(val<0);%class(pos)=1,class(neg)=-1;
end
[row, col]=find(classes ==1);
h_region1 =scatter(x_draw(col),y_draw(row),'MarkerFaceColor','r','MarkerEdgeColor','r');
h_region1.MarkerFaceAlpha =0.03;
h_region1.MarkerEdgeAlpha =0.03;[row, col]=find(classes ==-1);
h_region2 =scatter(x_draw(col),y_draw(row),'MarkerFaceColor','b','MarkerEdgeColor','b');
h_region2.MarkerFaceAlpha =0.03;
h_region2.MarkerEdgeAlpha =0.03;
end