VGG16 (included with pytorch) + CIFAR10

Supports training with multiple GPUs

import os
import torch
import torch.nn as nn
from torch import optim
from torch.utils.data import DataLoader
from torchvision import transforms, datasets, models
from torchsummary import summary


class VGGNet(nn.Module):
    def __init__(self, num_classes=10):  # num_classes
        super(VGGNet, self).__init__()
        net = models.vgg16(pretrained=True)  # 从预训练模型加载VGG16网络参数
        net.classifier = nn.Sequential()  # 将分类层置空,下面将改变我们的分类层
        self.features = net  # 保留VGG16的特征层
        self.classifier = nn.Sequential(  # 定义自己的分类层
            nn.Linear(512 * 7 * 7, 512),  # 512 * 7 * 7不能改变 ,由VGG16网络决定的,第二个参数为神经元个数可以微调
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(512, 128),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(128, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x


if __name__ == '__main__':

    '''定义超参数'''
    file_path = '/MyDisk/Experiment/DataSet/Pytorch/cifar10'
    batch_size = 512  # 批的大小
    num_epoches = 10  # 遍历训练集的次数

    '''下载训练集 CIFAR-10 10分类训练集'''  # 也可以自己下载 cifar-10-python.tar.gz 到指定目录下
    train_dataset = datasets.CIFAR10(file_path, train=True, transform=transforms.ToTensor(), download=True)
    test_dataset = datasets.CIFAR10(file_path, train=False, transform=transforms.ToTensor(), download=True)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)  # 测试集不需要打乱

    '''创建model实例对象,并检测是否支持使用GPU'''
    model = VGGNet()  # 先实例化模型
    summary(model, input_size=(3, 32, 32), device='cpu')  # 打印模型结构

    os.environ['CUDA_VISIBLE_DEVICES'] = '0, 1'  # 仅让id=0和1的GPU可被使用(也可以不写)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    if torch.cuda.device_count() > 1:  # 有多块GPU供使用
        print("GPU共有 %d 块" % torch.cuda.device_count())
        model = nn.DataParallel(model, device_ids=[0, 1])  # device_ids不指定的话,默认启用所有(指定)可用的GPU

    model = model.to(device)

    '''定义 loss 和 optimizer '''
    loss_func = nn.CrossEntropyLoss()
    # optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
    optimizer = optim.Adam(model.parameters(), lr=1e-3)

    '''   训练网络
    - 获取损失:loss = loss_func(out,batch_y)
    - 清空上一步残余更新参数:opt.zero_grad()
    - 误差反向传播:loss.backward()
    - 将参数更新值施加到net的parmeters上:opt.step()
    '''
    for epoch in range(num_epoches):
        model.train()
        print('\n', '*' * 25, 'epoch {}'.format(epoch + 1), '*' * 25)  # .format为输出格式,formet括号里的即为左边花括号的输出
        running_loss = 0.0
        num_correct = 0.0
        for i, data in enumerate(train_loader, 0):
            img, label = data
            img, label = img.to(device), label.to(device)  # 推荐使用Tensor, 替代 Variable

            out = model(img)  # 向前传播

            # 向后传播
            loss = loss_func(out, label)  # 计算loss
            optimizer.zero_grad()  # 清空上一步残余更新参数值
            loss.backward()  # loss 求导, 误差反向传播,计算参数更新值
            optimizer.step()  # 更新参数:将参数更新值施加到net的parmeters上

            # 计算loss 和 acc
            running_loss += loss.item() * label.size(0)
            _, pred = torch.max(out, 1)  # 预测最大值所在的位置标签
            num_correct += (pred == label).sum().item()  # 统计正确的个数
            # print('==> epoch={}, running_loss={}, num_correct={}'.format(i+1, running_loss, num_correct))

        print(
            'Train==> Finish {} epoch, Loss: {:.6f}, Acc: {:.6f}'.format(epoch + 1, running_loss / (len(train_dataset)), num_correct / (len(train_dataset))))

        # 测试 评估 模型
        model.eval()  # 模型进入测试阶段,参数不再更改
        eval_loss = 0
        num_correct = 0
        for data in test_loader:  # 测试模型
            img, label = data
            img, label = img.to(device).detach(), label.to(device).detach()  # 测试时不需要梯度

            out = model(img)
            loss = loss_func(out, label)
            eval_loss += loss.item() * label.size(0)
            _, pred = torch.max(out, 1)
            num_correct += (pred == label).sum().item()
        print('Test==>  Loss: {:.6f}, Acc: {:.6f}'.format(eval_loss / (len(test_dataset)), num_correct / (len(test_dataset))))

    # 保存模型
    torch.save(model.state_dict(), './VGGNet16_cifar10.pth')

结果:
************************* epoch 10 *************************
Train==> Finish 10 epoch, Loss: 0.521520, Acc: 0.831220
Test==> Loss: 0.657702, Acc: 0.795300

Guess you like

Origin blog.csdn.net/qq_42887760/article/details/111151493