学习笔记之——基于pytorch的卷积神经网络

本博文为本人的学习笔记。参考材料为《深度学习入门之——PyTorch》

pytorch中文网：https://www.pytorchtutorial.com/

关于反卷积：https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md

关于卷积和反卷积函数中的参数——“dilation(int or tuple, optional) – 卷积核元素之间的间距”，相当于将卷积核变得稀疏了。

对于全连接神经网络，网络参数太多了。如，对于一张28*28的图片输入，第一个隐含层的单个神经元的权重数目就达28*28=784个。若多设置几层隐含层、输入图片再大一点，参数量十分庞大。

卷积神经网络是一个3D容量的神经元。卷积层和全连接层包含参数，而激活层和池化层不包含参数。参数通过梯度下降法来更新（或者Adam）。

卷积层中滤波器的参数是通过学习得到的。

与神经元链接的空间大小叫神经元的感受野（receptive field）。感受野的大小即filters size（滤波器的尺寸）。而感受野的深度必须和输入输入的深度一致。输出的感受野深度等于the number of filters

CNN——参数共享、稀疏链接（局部链接）

设置网络时，要注意步长限制

参数共享可以有效减少参数的个数。

下面开始构建简单的卷积神经网络

import torch
import numpy as np
import torch.nn as nn

#define the model
class SimpleCNN(nn.Module):
	"""docstring for SimpleCNN"""
	def __init__(self):
		super(SimpleCNN, self).__init__()
		layer1=nn.Sequential()#Container class, We can add some basic modules in it.
		layer1.add_module('conv1',nn.Conv2d(in_channels=3,out_channels=32,kernel_size=3,stride=1,padding=1))
		layer1.add_module('relu1',nn.ReLU(True))
		layer1.add_module('pool1',nn.MaxPool2d(2,2))
		self.layer1=layer1

		layer2=nn.Sequential()
		layer2.add_module('conv2',nn.Conv2d(in_channels=32,out_channels=64,kernel_size=3,stride=1,padding=1))
		layer2.add_module('relu2',nn.ReLU(True))
		layer2.add_module('pool2',nn.MaxPool2d(2,2))
		self.layer2=layer2

		layer3=nn.Sequential()
		layer3.add_module('conv3',nn.Conv2d(in_channels=64,out_channels=128,kernel_size=3,stride=1,padding=1))
		layer3.add_module('relu3',nn.ReLU(True))
		layer3.add_module('pool3',nn.MaxPool2d(2,2))
		self.layer3=layer3

		layer4=nn.Sequential()
		layer4.add_module('fc1',nn.Linear(2048,512))
		layer4.add_module('fc_relu1',nn.ReLU(True))
		layer4.add_module('fc2',nn.Linear(512,64))
		layer4.add_module('fc_relu2',nn.ReLU(True))
		layer4.add_module('fc3',nn.Linear(64,10))
		self.layer4=layer4

	def forward(self,x):
		conv1=self.layer1(x)
		conv2=self.layer2(conv1)
		conv3=self.layer3(conv2)
		fc_input=conv3.view(conv3.size(0),-1)#A multi line Tensor is spliced into a row.
		fc_out=self.layer4(fc_input)
		return fc_out

model=SimpleCNN()
print(model)

run之后的结果：


for param in model.named_parameters():#get the name of the layyer, and the Iterator of parameters
	print(param[0])

结果如下图所示

通过增加1*1的卷积层可以降低输入层的维度，使网络参数减少，从而减少网络里的复杂性。

在pytorch中的torchvision.model里面有很多定义好的网络，同时大部分网络都有训练好的参数。详细可参考链接：

https://www.pytorchtutorial.com/docs/torchvision/torchvision-models/

下面实现一个demo，对MNIST数据集中手写数字进行分类。MNIST数据集是一个手写字体数据集，包含了0~9这10个数字，有55000张训练集，10000张测试集i，5000张验证集，图片大小是28*28的灰度图

import torch
from torch import optim
import torch.nn as nn
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import datasets,transforms

torch.manual_seed(1)    # reproducible


#Hyperparameters
batch_size=50
learning_rate=1e-3
EPOCH=1


#Data preprocessing
data_tf=transforms.Compose([transforms.ToTensor(),transforms.Normalize([0.5],[0.5])])#take all of the preprocessing together
#.ToTensor():Standardization of Image
#normalization,Subtract the mean, divide by variance.


#download the MNIST
train_dataset=datasets.MNIST(root='./MNIST_data',train=True,transform=data_tf,download=True)
test_data=datasets.MNIST(root='./MNIST_data',train=False,transform=data_tf)

train_loader=DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=True)# mess up the data

#####################################################################################################################
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        
        self.layer1=nn.Sequential(nn.Conv2d(in_channels=1,out_channels=16,kernel_size=3,stride=1,padding=0),#the number of feature=16*26*26
                                  nn.BatchNorm2d(16),
                                  nn.ReLU(),)#inplace=True,Changing the input data

        self.layer2=nn.Sequential(nn.Conv2d(in_channels=16,out_channels=32,kernel_size=3,stride=1,padding=0),#32*24*24
        							nn.BatchNorm2d(32),
        							nn.ReLU(),
        							nn.MaxPool2d(kernel_size=2,stride=2),)#32*12*12

        self.layer3=nn.Sequential(nn.Conv2d(in_channels=32,out_channels=64,kernel_size=3,stride=1,padding=0),#64*10*10
        							nn.BatchNorm2d(64),
        							nn.ReLU(),)

        self.layer4=nn.Sequential(nn.Conv2d(in_channels=64,out_channels=128,kernel_size=3,stride=1,padding=0),#128*8*8
        							nn.BatchNorm2d(128),
        							nn.ReLU(),
        							nn.MaxPool2d(kernel_size=2,stride=2),)#128*4*4

        self.fc=nn.Sequential(nn.Linear(128*4*4,1024),
        						nn.ReLU(),
        						nn.Linear(1024,128),
        						nn.ReLU(),
        						nn.Linear(128,10),)
        
    def forward(self,x):

    	x=self.layer1(x)
    	x=self.layer2(x)
    	x=self.layer3(x)
    	x=self.layer4(x)
    	x=x.view(x.size(0),-1)
    	output=self.fc(x)

    	return output
###########################################################################################################

#train
model=CNN()
print(model)

if torch.cuda.is_available():
	model=model.cuda()

criterion=nn.CrossEntropyLoss()
optimizer=optim.Adam(model.parameters(),lr=learning_rate)

for epoch in range(EPOCH):
    for step,(img,label) in enumerate(train_loader):
        if torch.cuda.is_available():
            img=Variable(img).cuda()#Nodes with a volatile attribute of True will not be derivation. and default is False
            label=Variable(label).cuda()
        else:
            img=Variable(img)
            label=Variable(label)
        output=model(img)
        loss=criterion(output,label)

        #reset gradients
        optimizer.zero_grad()
        #backward pass
        loss.backward()
        #update parameters
        optimizer.step()
        	

#test
model.eval()#evaluation Pattern,
#The dropout is turned off during the test, and the parameters in the BN are also used to retain the parameters during training, 
#so the test should enter the evaluation mode.

上面代码运行有点问题，下面给出新的代码

（参考：https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/02-intermediate/convolutional_neural_network/main.py#L35-L56）

import torch
import torch.nn as nn
import torchvision
#It includes the popular data set, model structure and commonly used image conversion tools.
import torchvision.transforms as transforms


#Device configuration
device=torch.device('cuda:0'if torch.cuda.is_available() else 'cpu')

#Hyper parameters
num_epochs=6
num_classes=10#number 0~9
batch_size=100
learning_rate=0.001

#MNIST dataset
train_dataset=torchvision.datasets.MNIST(root='./MNIST_data',train=True,transform=transforms.ToTensor(),download=True)
test_dataset=torchvision.datasets.MNIST(root='./MNIST_data',train=False,transform=transforms.ToTensor())

#data loader or you can call it data Preprocessing
#According to batch size, it is encapsulated into Tensor. 
#After that, Variable is only needed to be input into the model.
train_loader=torch.utils.data.DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=True)
test_loader=torch.utils.data.DataLoader(dataset=test_dataset,batch_size=batch_size,shuffle=False)

##########################################################
#define the CNN
class ConvNet(nn.Module):
    def __init__(self,num_classes=10):
        super(ConvNet,self).__init__()#input 1*28*28
        self.layer1=nn.Sequential(
                                  nn.Conv2d(in_channels=1,out_channels=16,kernel_size=5,stride=1,padding=2),#16*28*28
                                  nn.BatchNorm2d(16),
                                  nn.ReLU(),
                                  nn.MaxPool2d(kernel_size=2,stride=2)#16*14*14  
                                  )
        
        self.layer2=nn.Sequential(
                                  nn.Conv2d(in_channels=16,out_channels=32,kernel_size=5,stride=1,padding=2),#32*14*14
                                  nn.BatchNorm2d(32),
                                  nn.ReLU(),
                                  nn.MaxPool2d(kernel_size=2,stride=2)#32*7*7  
                                  )     
        
        self.fc=nn.Linear(7*7*32,num_classes)

        
    def forward(self,x):
        out=self.layer1(x)
        out=self.layer2(out)
        out=out.reshape(out.size(0),-1)
        out=self.fc(out)
        
        return out
        
model=ConvNet(num_classes).to(device)#this Sentence is see wherether CPU or GPU speed up

#loss and optimizer
criterion=nn.CrossEntropyLoss()
optimizer=torch.optim.Adam(model.parameters(),lr=learning_rate)

#traian the model
total_step=len(train_loader)#all of the train data, each itertation is the number of batch_size. the 
for epoch in range(num_epochs):
    for i,(images,labels) in enumerate(train_loader):
        images=images.to(device)
        labels=labels.to(device)
        
        #Forward pass
        outputs=model(images)
        loss=criterion(outputs,labels)
        
        #backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if(i+1)%100==0:
            print('Epoch[{}/{}],Step[{}/{}],Loss:{:.4f}'
                  .format(epoch+1,num_epochs,i+1,total_step,loss.item()))
    
#################################################################################
#test the model
model.eval()# eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance)
with torch.no_grad():#Remove the gradient
    correct=0
    total=0
    for images,labels in test_loader:
        images=images.to(device)
        labels=labels.to(device)
        
        outputs=model(images)
        _,predicted=torch.max(outputs.data,1)#Returns the maximum value on the dimension=1.
        total+=labels.size(0)
        correct += (predicted == labels).sum().item()
        
    print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total))
        
# Save the model checkpoint
#torch.save(model.state_dict(), 'model.ckpt')

运行结果截图