代码来源https://github.com/yunjey/pytorch-tutorial,很好的教程。
1.Basis
1.1 linear_regression.py
#linear_regression.py
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
# Hyper-parameters
input_size = 1
output_size = 1
num_epochs = 60
learning_rate = 0.001
# Toy dataset
x_train = np.array([[3.3], [4.4], [5.5], [6.71], [6.93], [4.168],
[9.779], [6.182], [7.59], [2.167], [7.042],
[10.791], [5.313], [7.997], [3.1]], dtype=np.float32)
print(x_train.shape)
y_train = np.array([[1.7], [2.76], [2.09], [3.19], [1.694], [1.573],
[3.366], [2.596], [2.53], [1.221], [2.827],
[3.465], [1.65], [2.904], [1.3]], dtype=np.float32)
print(y_train.shape)
# Linear regression model
model = nn.Linear(input_size, output_size)
# Loss and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
# Train the model
for epoch in range(num_epochs):
# Convert numpy arrays to torch tensors
inputs = torch.from_numpy(x_train)
targets = torch.from_numpy(y_train)
# Forward pass
outputs = model(inputs)
loss = criterion(outputs, targets)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
if((epoch+1) % 5 ==0):
print("Epoch [{}/{}], Loss:{:.4f}".format(epoch, num_epochs, loss.item()))
# Plot the graph
predicted = model(torch.from_numpy(x_train)).detach().numpy()
plt.plot(x_train, y_train, 'ro', label='Original data')
plt.plot(x_train, predicted, label='Fitted line')
plt.legend()
plt.show()
# Save the model checkpoint
#model.save(model.state_dict, 'model.pth')
1.2
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
# Hyper-parameters
input_size = 28 * 28 # 784
num_classes = 10
num_epochs = 5
batch_size = 100
learning_rate = 0.001
# MNIST dataset (images and labels)
train_dataset = torchvision.datasets.MNIST(root='/home/cw/pytorch-tutorial-1/data',
train=True,
transform=transforms.ToTensor(),
download=True)
test_dataset = torchvision.datasets.MNIST(root='/home/cw/pytorch-tutorial-1/data',
train=False,
transform=transforms.ToTensor())
# Data loader (input pipeline)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
batch_size=batch_size,
shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
batch_size=batch_size,
shuffle=False)
# Logistic regression model
model = nn.Linear(input_size, num_classes)
# Loss and optimizer
# nn.CrossEntropyLoss() computes softmax internally
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
# Train the model
total_step = len(train_loader)
#print(total_step) # 600
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
# Reshape images to (batch_size, input_size)
images = images.reshape(-1, input_size) # torch.Size([100, 1, 28, 28]) ==> torch.Size([100, 784]) 重点:相当于每次100张图片一起训练,但是[1,10]
'''
print(labels)
tensor([0, 8, 3, 4, 7, 0, 6, 3, 0, 7, 1, 1, 5, 3, 7, 1, 2, 6, 2, 1, 6, 3, 1, 1,
4, 3, 1, 9, 5, 1, 4, 4, 6, 6, 3, 3, 9, 7, 9, 8, 8, 2, 6, 3, 3, 2, 0, 8,
8, 8, 8, 3, 8, 7, 5, 0, 7, 8, 3, 5, 1, 1, 8, 6, 9, 5, 2, 2, 1, 4, 6, 3,
0, 9, 2, 5, 9, 6, 3, 5, 4, 1, 1, 9, 3, 7, 3, 9, 3, 3, 8, 8, 8, 4, 3, 0,
9, 1, 6, 3])
torch.Size([100])
print(images.shape)
torch.Size([100, 1, 28, 28])
'''
# Forward pass
outputs = model(images)
#print(outputs.shape)
loss = criterion(outputs, labels)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (i+1) % 100 == 0:
print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
.format(epoch+1, num_epochs, i+1, total_step, loss.item()))
# Test the model
# In test phase, we don't need to compute gradients (for memory efficiency)
with torch.no_grad():
correct = 0
total = 0
for images, labels in test_loader:
images = images.reshape(-1, input_size)
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum()
print('Accuracy of the model on the 10000 test images: {} %'.format(100 * correct // total))
# Save the model checkpoint
torch.save(model.state_dict(), 'model.ckpt')
1.2 feedforward_neural_network.py
#feedforward_neural_network.py
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
# Device configuration
#device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#device = torch.device('cuda' if torch.cuda.is_availavle() else 'cpu')
# Hyper-parameters
input_size = 784
hidden_size = 500
num_classes = 10
num_epochs = 5
batch_size = 100
learning_rate = 0.001
# MNIST dataset
train_dataset = torchvision.datasets.MNIST(root='/home/cw/pytorch-tutorial-1/data',
train=True,
transform=transforms.ToTensor(),
download=True)
test_dataset = torchvision.datasets.MNIST(root='/home/cw/pytorch-tutorial-1/data',
train=False,
transform=transforms.ToTensor())
# Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
batch_size=batch_size,
shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
batch_size=batch_size,
shuffle=False)
# Fully connected neural network with one hidden layer
class NeuralNet(nn.Module):
def __init__(self, input_size, hidden_size, num_classes):
super(NeuralNet, self).__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
self.relu = nn.ReLU()
self.fc2 = nn.Linear(hidden_size, num_classes)
def forward(self, x):
out = self.fc1(x)
out = self.relu(out)
out = self.fc2(out)
return out
model = NeuralNet(input_size, hidden_size, num_classes).to(device)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# Train the model
total_step = len(train_loader)
print(train_loader)
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
# Move tensors to the configured device
images = images.reshape(-1, 28*28).to(device)
labels = labels.to(device)
# Forward pass
outputs = model(images)
loss = criterion(outputs, labels)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (i+1) % 100 == 0:
print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
.format(epoch+1, num_epochs, i+1, total_step, loss.item()))
# Test the model
# In test phase, we don't need to compute gradients (for memory efficiency)
with torch.no_grad():
correct = 0
total = 0
for images, labels in test_loader:
images = images.reshape(-1, 28*28).to(device)
labels = labels.to(device)
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))
# Save the model checkpoint
#torch.save(model.state_dict(), 'model.ckpt')
2. intermediate
2.1 convolutional_neural_network.py
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
# Device configuration
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
# Hyper parameters
num_epochs = 5
num_classes = 10
batch_size = 100
learning_rate = 0.001
# MNIST dataset
train_dataset = torchvision.datasets.MNIST(root='/home/cw/pytorch-tutorial-1/data',
train=True,
transform=transforms.ToTensor(),
download=True)
test_dataset = torchvision.datasets.MNIST(root='/home/cw/pytorch-tutorial-1/data',
train=False,
transform=transforms.ToTensor())
# Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
batch_size=batch_size,
shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
batch_size=batch_size,
shuffle=False)
# Convolutional neural network (two convolutional layers)
class ConvNet(nn.Module):
def __init__(self, num_classes=10):
super(ConvNet, self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(1, 16, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(16),
nn.ReLU(),
# nn.MaxPool2d(kernel_size=2, stride=2))
nn.AdaptiveAvgPool2d((14, 14))
)
# Conv2d
# out_channels = 16
# 28 - 5 + 4 + 1 = 28
# [100, 1, 28, 28] ==> [100, 16, 28, 28]
# MaxPool2d
# 28 / 2 = 14
# [100, 16, 28, 28] ==> [100, 16, 14, 14]
self.layer2 = nn.Sequential(
nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(32),
nn.ReLU(),
#nn.MaxPool2d(kernel_size=2, stride=2))
nn.AdaptiveAvgPool2d((7, 7))
)
# out_channels = 32
# 14 - 5 + 4 + 1 = 14
# [100, 16, 14, 14] ==> [100, 32, 14, 14]
# MaxPool2d
# (14 - 2) / 2 = 7
# [100, 32, 7, 7] ==> [100, 32, 7, 7]
self.fc = nn.Linear(7*7*32, num_classes)
def forward(self, x):
out = self.layer1(x)
out = self.layer2(out)
out = out.reshape(out.size(0), -1) # [32 * 7 * 7, 1]
out = self.fc(out)
return out
model = ConvNet(num_classes).to(device)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# Train the model
total_step = len(train_loader)
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
#print(images.shape)
images = images.to(device) #[100, 1, 28, 28]
labels = labels.to(device)
# Forward pass
outputs = model(images)
loss = criterion(outputs, labels)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (i+1) % 100 == 0:
print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
.format(epoch+1, num_epochs, i+1, total_step, loss.item()))
# Test the model
model.eval() # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance)
with torch.no_grad():
correct = 0
total = 0
for images, labels in test_loader:
images = images.to(device)
labels = labels.to(device)
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total))
# Save the model checkpoint
torch.save(model.state_dict(), 'model.ckpt')
2.2 deep_residual_network
# ---------------------------------------------------------------------------- #
# An implementation of https://arxiv.org/pdf/1512.03385.pdf #
# See section 4.2 for the model architecture on CIFAR-10 #
# Some part of the code was referenced from below #
# https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py #
# ---------------------------------------------------------------------------- #
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Hyper-parameters
num_epochs = 80
batch_size = 100
learning_rate = 0.001
#可以看出Compose里面的参数实际上就是个列表,而这个列表里面的元素就是你想要执行的transform操作。
# Image preprocessing modules
transform = transforms.Compose([
transforms.Pad(4), # 填充
transforms.RandomHorizontalFlip(), # 依概率p水平翻转
transforms.RandomCrop(32), # 随机裁剪
transforms.ToTensor()]) # 转为tensor,并归一化至[0-1]
# CIFAR-10 dataset
train_dataset = torchvision.datasets.CIFAR10(root='/home/Dataset/cifar10',
train=True,
transform=transform,
download=False)
test_dataset = torchvision.datasets.CIFAR10(root='/home/Dataset/cifar10',
train=False,
transform=transforms.ToTensor())
# Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
batch_size=batch_size,
shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
batch_size=batch_size,
shuffle=False)
# 3x3 convolution
def conv3x3(in_channels, out_channels, stride=1):
return nn.Conv2d(in_channels, out_channels, kernel_size=3,
stride=stride, padding=1, bias=False)
# Residual block
class ResidualBlock(nn.Module):
def __init__(self, in_channels, out_channels, stride=1, downsample=None):
super(ResidualBlock, self).__init__()
self.conv1 = conv3x3(in_channels, out_channels, stride)
self.bn1 = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(out_channels, out_channels)
self.bn2 = nn.BatchNorm2d(out_channels)
self.downsample = downsample
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
# ResNet
# conv3x3(3,16)
# Bn(16)
# Relu
# layers
# layer1
# in_channels = 16
# out_channels = 16 blocks [2] stride = 1
# downsample = None
# in_channels = out_channels = 16
# layers[block[16, 16]]
# nn.Sequential(block(16, 16))
# conv3x3
# bn
# relu
# conv3x3
# bn
# [100, 16, 32, 32]
# layer2
# in_channels = 16
# out_channels = 32 blocks [2] stride = 2
# if 16 != 32:
# downsample = nn.Sequential(conv3x3(16, 32, stride = 2))
# con3x3 ==> (32 - 3 + 1) / 2 + 1 = 16
# [100, 16, 16, 16]
# in_channels = out_channels = 32
# layers.append(block(32, 32))
# nn.Sequential(block(32, 32))
# conv3x3
# bn
# relu
# conv3x3
# bn
#[100, 32, 16, 16]
# layer3
# in_channel = 32 out_channels =64 stride = 2
# downsample = nn.Sequential(conv3x3(32,64,2))
# (16 - 3 + 1) / 2 + 1 = 8
# in_channels = out_channels = 64
# layers.append(block(64, 64))
# nn.Sequential(block(64, 64))
# conv3x3
# bn
# relu
# conv3x3
# bn
# [100, 64, 8, 8]
# avepool [100, 64, 1]
# out ==> fc [100, 10, 1]
class ResNet(nn.Module):
def __init__(self, block, layers, num_classes=10):
super(ResNet, self).__init__()
self.in_channels = 16
self.conv = conv3x3(3, 16) #[100, 3, 32, 32] ==> [100, 16, 32, 32]
self.bn = nn.BatchNorm2d(16)
self.relu = nn.ReLU(inplace=True)
self.layer1 = self.make_layer(block, 16, layers[0])
self.layer2 = self.make_layer(block, 32, layers[1], 2)
self.layer3 = self.make_layer(block, 64, layers[2], 2) # [100, 64, 8, 8]
self.avg_pool = nn.AvgPool2d(8) # [100, 64, 1]
self.fc = nn.Linear(64, num_classes) # [1,64] ==> [1, 10]
# layer1
# in_channels = 16
# out_channels = 16 blocks [2] stride = 1
# downsample = None
# in_channels = out_channels = 16
# layers[block[16, 16]]
# nn.Sequential(block(16, 16))
# [100, 16, 32, 32]
# layer2
# in_channels = 16
# out_channels = 32 blocks [2] stride = 2
# if 16 != 32:
# downsample = nn.Sequential(conv3x3(16, 32, stride = 2))
# (32 - 3 + 1) / 2 + 1 = 16
# [100, 16, 16, 16]
# in_channels = out_channels = 32
# layers.append(block(32, 32))
# nn.Sequential(block(32, 32))
#[100, 32, 16, 16]'
# layer3
# in_channel = 32 out_channels =64 stride = 2
# downsample = nn.Sequential(conv3x3(32,64,2))
# (16 - 3 + 1) / 2 + 1 = 8
# [100, 64, 8, 8]
# in_channels = out_channels = 64
#
# [100, 64, 8, 8]
def make_layer(self, block, out_channels, blocks, stride=1):
downsample = None
if (stride != 1) or (self.in_channels != out_channels):
downsample = nn.Sequential(
conv3x3(self.in_channels, out_channels, stride=stride),
nn.BatchNorm2d(out_channels))
layers = []
layers.append(block(self.in_channels, out_channels, stride, downsample))
self.in_channels = out_channels
for i in range(1, blocks):
layers.append(block(out_channels, out_channels))
return nn.Sequential(*layers)
def forward(self, x):
out = self.conv(x)
out = self.bn(out)
out = self.relu(out)
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = self.avg_pool(out)
out = out.view(out.size(0), -1)
out = self.fc(out)
return out
model = ResNet(ResidualBlock, [2, 2, 2]).to(device)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# For updating learning rate
def update_lr(optimizer, lr):
for param_group in optimizer.param_groups:
param_group['lr'] = lr
# Train the model
total_step = len(train_loader)
curr_lr = learning_rate
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
images = images.to(device)
labels = labels.to(device)
# Forward pass
outputs = model(images)
loss = criterion(outputs, labels)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (i+1) % 100 == 0:
print ("Epoch [{}/{}], Step [{}/{}] Loss: {:.4f}"
.format(epoch+1, num_epochs, i+1, total_step, loss.item()))
# Decay learning rate
if (epoch+1) % 20 == 0:
curr_lr /= 3
update_lr(optimizer, curr_lr)
# Test the model
model.eval()
with torch.no_grad():
correct = 0
total = 0
for images, labels in test_loader:
images = images.to(device)
labels = labels.to(device)
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Accuracy of the model on the test images: {} %'.format(100 * correct / total))
# Save the model checkpoint
torch.save(model.state_dict(), 'resnet.ckpt')
import torch
input = torch.randn(3, requires_grad=True)
# input output
tensor([ 0.3107, 0.6120, -1.0163], requires_grad=True)
target = torch.empty(3).random_(2)
# target output
tensor([1., 0., 0.])
import torch.nn.functional as F
loss = F.binary_cross_entropy_with_logits(input, target) #ruduction: 默认是mean
# loss output
tensor(0.6347, grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
# 手动实现过程:
# 先对input sigmod
# 然后求torch.mean
import math
pred = torch.sigmoid(input)
print(pred)
print(1.0 * torch.log(torch.tensor(0.5771))) # tensor(-0.5497)
print(1.0 * torch.log(torch.tensor(1 - 0.6484))) # tensor(-1.0453)
result = torch.mean(target * torch.log(pred ) + (1 - target) * torch.log(1 - pred ) )
# pred and result pred
tensor([0.5771, 0.6484, 0.2658], grad_fn=<SigmoidBackward>)
tensor(0.6347, grad_fn=<NegBackward>)