PyTorch学习笔记6--案例二：基于CNN的CIFAR10数据集的图像分类

基于CNN的CIFAR10图像分类

完整代码如下：

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data.sampler import SubsetRandomSampler
import torchvision
import torchvision.transforms as transforms
import numpy as np
import time
from matplotlib import pyplot as plt

# ===========================================================================================
# 准备数据
# Compose的意思是将多个transform组合在一起用，ToTensor 将像素转化为[0,1]的数字，Normalize则正则化变为 [-1,1]
tf = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
# 下载数据集，训练集：需要训练；测试集：不需要训练
train_set = torchvision.datasets.CIFAR10(root='./cifar10', train=True, download=True, transform=tf)
test_set = torchvision.datasets.CIFAR10(root='./cifar10', train=False, download=True, transform=tf)
# 指定十个类别的标签，有的数据集很大的回加载相应的标签文件(groundtruth)
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'truck', 'ship')

# Training 共50000,取前20000作为训练集
n_training_sample = 50000
train_sample = SubsetRandomSampler(np.arange(n_training_sample, dtype=np.int64))
# Validation 取训练集中的[20000,20000+5000]作为验证集
# n_validation_sample = 5000
# validation_sample = SubsetRandomSampler(np.arange(n_training_sample, n_training_sample + n_validation_sample,dtype=np.int64))
# Testing 共10000,取前5000作为测试集
n_test_sample = 10000
test_sample = SubsetRandomSampler(np.arange(n_test_sample, dtype=np.int64))


# 开启shuffle就等于全集使用SubsetRandomSampler，都是随机采样,num_workers代表多线程加载数据,Windows上不能用(必须0),Linux可用
train_batch_size = 100
test_batch_size = 4
train_loader = torch.utils.data.DataLoader(train_set, batch_size=train_batch_size, sampler=train_sample, num_workers=0)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=test_batch_size, sampler=test_sample, num_workers=0)
#val_loader = torch.utils.data.DataLoader(train_set, batch_size=500, sampler=validation_sample, num_workers=0)

# ================================================================================================
# 2 建立模型 
#  MNIST案例的网络是卷积+全连接层的形式，这种结构的网络效果其实不好：
# 因为全连接层传递效率较低，同时会干扰到卷积层提取出的局部特征。
# 并且也没有用到BatchNorm和Dropout来防止过拟合的问题。
# 现在流行的网络结构大多采用全卷积层的结构：
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, 3, padding = 1)
        self.conv2 = nn.Conv2d(64, 64, 3, padding = 1)
        self.conv3 = nn.Conv2d(64, 128, 3, padding = 1)
        self.conv4 = nn.Conv2d(128, 128, 3, padding = 1)
        self.conv5 = nn.Conv2d(128, 256, 3, padding = 1)
        self.conv6 = nn.Conv2d(256, 256, 3, padding = 1)
        self.maxpool = nn.MaxPool2d(2, 2)
        self.avgpool = nn.AvgPool2d(2, 2)
        self.globalavgpool = nn.AvgPool2d(8, 8)
        self.bn1 = nn.BatchNorm2d(64)
        self.bn2 = nn.BatchNorm2d(128)
        self.bn3 = nn.BatchNorm2d(256)
        self.dropout50 = nn.Dropout(0.5)
        self.dropout10 = nn.Dropout(0.1)
        self.fc = nn.Linear(256, 10)
 
    def forward(self, x):
        x = self.bn1(F.relu(self.conv1(x)))
        x = self.bn1(F.relu(self.conv2(x)))
        x = self.maxpool(x)
        x = self.dropout10(x)
        x = self.bn2(F.relu(self.conv3(x)))
        x = self.bn2(F.relu(self.conv4(x)))
        x = self.avgpool(x)
        x = self.dropout10(x)
        x = self.bn3(F.relu(self.conv5(x)))
        x = self.bn3(F.relu(self.conv6(x)))
        x = self.globalavgpool(x)
        x = self.dropout50(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x
cnn = CNN()
# 如有GPU则自动使用GPU计算
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 
cnn.to(device)
# ===================================================================================
# 损失函数
loss_func = torch.nn.CrossEntropyLoss()  # 交叉熵损失函数
# 优化器
optimizer = optim.Adam(cnn.parameters(), lr=0.001)     # Adam 优化算法是随机梯度下降算法的扩展式
 
# ==========================================================================================
def trainNet(epoch):
    print('Epoch {}'.format(epoch))
    # 加载数据集上边的方法解释了获取训练数据
    training_start_time = time.time()   # 开始时间，为了后边统计一个训练花费时间
    #循环训练 n_epochs是5,也就是重复扫 五遍样本数据,CIFAR10数据集将50000条训练数据分为了五个batch，所以这个地方不要有疑惑
    start_time = time.time()
    train_loss = 0
    for step,(x_batch, y_batch) in enumerate(train_loader):
        x_batch = x_batch.cuda()
        y_batch = y_batch.cuda()
        # forward:前向传播
        outputs = cnn(x_batch)  #
        loss = loss_func(outputs, y_batch)
        train_loss += loss.item()
        # 在一个epoch里。每十组batchsize大小的数据输出一次结果，即以batch_size大小的数据为一组，到第10组，20组，30组...的时候输出
        if step % (len(train_loader)/100) == 0:
            print("epoch{}, {:d}% \t loss:{:.6f} took:{:.2f}s".format(epoch, int(100 * (step) / len(train_loader)),loss.item(), time.time()-start_time))
            start_time = time.time()
        #backward:后向传播
        optimizer.zero_grad()  # 将所有的梯度置零，原因是防止每次backward的时候梯度会累加
        loss.backward()  # 根据反向传播更新所有的参数
        optimizer.step()
    print("Training loss={}, took {:.2f}s".format(train_loss/(len(train_loader)),time.time() - training_start_time))  # 所有的Epoch结束，也就是训练结束，计算花费的时间

#使用以下方法保存和恢复网络参数
#torch.save(cnn, 'cifar10.pkl')
#cnn = torch.load('cifar10.pkl')

def test():
    correct = 0
    test_loss = 0
    cnn.eval()
    with torch.no_grad():
        for data in test_loader:
            # Forward pass
            x_batch,y_batch = data
            x_batch = x_batch.cuda()
            y_batch = y_batch.cuda()
            out = cnn(x_batch)
            loss = loss_func(out, y_batch)
            predicted = torch.max(out, 1)[1]
            correct += (predicted == y_batch).sum().item()
            test_loss += loss.item()
        print("test loss = {:.2f}, Accuracy={:.6f}".format(test_loss / len(test_loader),correct/len(test_loader)/test_batch_size))  # 求验证集的平均损失是多少
    


# 执行整个训练过程
for epoch in range(1,11):
    trainNet(epoch)
    test()

# 统计每类的分类准确率
cnn.eval()
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in test_loader:
        x_batch, y_batch = data
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        out = cnn(x_batch)
        predicted = torch.max(out, 1)[1]
        c = (predicted == y_batch).squeeze()
        # 
        for i in range(test_batch_size):
            label = y_batch[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1
    for i in range(10):
        print('Accuracy of %5s : %2d %%' % (classes[i], 100 * class_correct[i] / class_total[i]))

cifar10教程补充内容

更优选的网络,类似VGG

这个网络比前面那个准确率更高一些.

class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        self.conv1 = nn.Conv2d(3,64,3,padding=1)
        self.conv2 = nn.Conv2d(64,64,3,padding=1)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu1 = nn.ReLU()

        self.conv3 = nn.Conv2d(64,128,3,padding=1)
        self.conv4 = nn.Conv2d(128, 128, 3,padding=1)
        self.pool2 = nn.MaxPool2d(2, 2, padding=1)
        self.bn2 = nn.BatchNorm2d(128)
        self.relu2 = nn.ReLU()

        self.conv5 = nn.Conv2d(128,128, 3,padding=1)
        self.conv6 = nn.Conv2d(128, 128, 3,padding=1)
        self.conv7 = nn.Conv2d(128, 128, 1,padding=1)
        self.pool3 = nn.MaxPool2d(2, 2, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.relu3 = nn.ReLU()

        self.conv8 = nn.Conv2d(128, 256, 3,padding=1)
        self.conv9 = nn.Conv2d(256, 256, 3, padding=1)
        self.conv10 = nn.Conv2d(256, 256, 1, padding=1)
        self.pool4 = nn.MaxPool2d(2, 2, padding=1)
        self.bn4 = nn.BatchNorm2d(256)
        self.relu4 = nn.ReLU()

        self.conv11 = nn.Conv2d(256, 512, 3, padding=1)
        self.conv12 = nn.Conv2d(512, 512, 3, padding=1)
        self.conv13 = nn.Conv2d(512, 512, 1, padding=1)
        self.pool5 = nn.MaxPool2d(2, 2, padding=1)
        self.bn5 = nn.BatchNorm2d(512)
        self.relu5 = nn.ReLU()

        self.fc14 = nn.Linear(512*4*4,1024)
        self.drop1 = nn.Dropout2d()
        self.fc15 = nn.Linear(1024,1024)
        self.drop2 = nn.Dropout2d()
        self.fc16 = nn.Linear(1024,10)


    def forward(self,x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.pool1(x)
        x = self.bn1(x)
        x = self.relu1(x)


        x = self.conv3(x)
        x = self.conv4(x)
        x = self.pool2(x)
        x = self.bn2(x)
        x = self.relu2(x)

        x = self.conv5(x)
        x = self.conv6(x)
        x = self.conv7(x)
        x = self.pool3(x)
        x = self.bn3(x)
        x = self.relu3(x)

        x = self.conv8(x)
        x = self.conv9(x)
        x = self.conv10(x)
        x = self.pool4(x)
        x = self.bn4(x)
        x = self.relu4(x)

        x = self.conv11(x)
        x = self.conv12(x)
        x = self.conv13(x)
        x = self.pool5(x)
        x = self.bn5(x)
        x = self.relu5(x)
        # print(" x shape ",x.size())
        x = x.view(-1,512*4*4)
        x = F.relu(self.fc14(x))
        x = self.drop1(x)
        x = F.relu(self.fc15(x))
        x = self.drop2(x)
        x = self.fc16(x)

        return x

显示图片及标签

显示一些训练集中的照片：

import matplotlib.pyplot as plt
import numpy as np

def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()
dataiter = iter(trainloader)
images, labels = dataiter.next()
imshow(torchvision.utils.make_grid(images))
print(' '.join('%5s' % classes[labels[j]] for j in range(4)))

显示预测结果和实际结果：

dataiter = iter(testloader)
images, labels = dataiter.next()
imshow(torchvision.utils.make_grid(images))
print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(4)))
outputs = net(images)
predicted = torch.max(outputs, 1)[1]
print('Predicted: ', ' '.join('%5s' % classes[predicted[j]] for j in range(4)))

PyTorch学习笔记6--案例二：基于CNN的CIFAR10数据集的图像分类

基于CNN的CIFAR10图像分类

cifar10教程补充内容

更优选的网络,类似VGG

显示图片及标签

猜你喜欢