# import packages and modules%matplotlib inline
import torch
from IPython import display
from matplotlib import pyplot as plt
import numpy as np
import random
print(torch.__version__)# set input feature number
num_inputs =2# set example number
num_examples =1000# set true weight and bias in order to generate corresponded label
true_w =[2,-3.4]
true_b =4.2
features = torch.randn(num_examples, num_inputs,
dtype=torch.float32)
labels = true_w[0]* features[:,0]+ true_w[1]* features[:,1]+ true_b
#print (labels)
labels += torch.tensor(np.random.normal(0,0.01, size=labels.size()),
dtype=torch.float32)#print (labels)#使用图像查看生成的数据
plt.scatter(features[:,1].numpy(), labels.numpy(),1);#读取数据集defdata_iter(batch_size, features, labels):
num_examples =len(features)
indices =list(range(num_examples))
random.shuffle(indices)# random read 10 samplesfor i inrange(0, num_examples, batch_size):
j = torch.LongTensor(indices[i:min(i + batch_size, num_examples)])# the last time may be not enough for a whole batchyield features.index_select(0, j), labels.index_select(0, j)
batch_size =10for X, y in data_iter:print(X,'\n', y)break
w = torch.tensor(np.random.normal(0,0.01,(num_inputs,1)), dtype=torch.float32)
b = torch.zeros(1, dtype=torch.float32)
w.requires_grad_(requires_grad=True)
b.requires_grad_(requires_grad=True)#定义模型deflinreg(X, w, b):return torch.mm(X, w)+ b
defsquared_loss(y_hat, y):return(y_hat - y.view(y_hat.size()))**2/2defsgd(params, lr, batch_size):for param in params:
param.data -= lr * param.grad / batch_size # ues .data to operate param without gradient track#训练# super parameters init
lr =0.03
num_epochs =5
net = linreg
loss = squared_loss
# trainingfor epoch inrange(num_epochs):# training repeats num_epochs times# in each epoch, all the samples in dataset will be used once# X is the feature and y is the label of a batch samplefor X, y in data_iter(batch_size, features, labels):
l = loss(net(X, w, b), y).sum()# calculate the gradient of batch sample loss
l.backward()# using small batch random gradient descent to iter model parameters
sgd([w, b], lr, batch_size)# reset parameter gradient
w.grad.data.zero_()
b.grad.data.zero_()
train_l = loss(net(features, w, b), labels)print('epoch %d, loss %f'%(epoch +1, train_l.mean().item()))#输出结果print(w),print(true_w),print(b),print(true_b)
1.2 简洁实现
import torch
from torch import nn
import numpy as np
torch.manual_seed(1)print(torch.__version__)
torch.set_default_tensor_type('torch.FloatTensor')#生成数据集
num_inputs =2
num_examples =1000
true_w =[2,-3.4]
true_b =4.2
features = torch.tensor(np.random.normal(0,1,(num_examples, num_inputs)), dtype=torch.float)
labels = true_w[0]* features[:,0]+ true_w[1]* features[:,1]+ true_b
labels += torch.tensor(np.random.normal(0,0.01, size=labels.size()), dtype=torch.float)#读取数据集import torch.utils.data as Data
batch_size =10# combine featues and labels of dataset
dataset = Data.TensorDataset(features, labels)# put dataset into DataLoader
data_iter = Data.DataLoader(
dataset=dataset,# torch TensorDataset format
batch_size=batch_size,# mini batch size
shuffle=True,# whether shuffle the data or not
num_workers=2,# read data in multithreading)for X, y in data_iter:print(X,'\n', y)break#定义模型classLinearNet(nn.Module):def__init__(self, n_feature):super(LinearNet, self).__init__()# call father function to init
self.linear = nn.Linear(n_feature,1)# function prototype: `torch.nn.Linear(in_features, out_features, bias=True)`defforward(self, x):
y = self.linear(x)return y
net = LinearNet(num_inputs)print(net)# ways to init a multilayer network# method one
net = nn.Sequential(
nn.Linear(num_inputs,1)# other layers can be added here)# method two
net = nn.Sequential()
net.add_module('linear', nn.Linear(num_inputs,1))# net.add_module ......# method threefrom collections import OrderedDict
net = nn.Sequential(OrderedDict([('linear', nn.Linear(num_inputs,1))# ......]))print(net)print(net[0])#初始化参数from torch.nn import init
init.normal_(net[0].weight, mean=0.0, std=0.01)
init.constant_(net[0].bias, val=0.0)# or you can use `net[0].bias.data.fill_(0)` to modify it directlyfor param in net.parameters():print(param)#定义损失函数
loss = nn.MSELoss()# nn built-in squared loss function# function prototype: `torch.nn.MSELoss(size_average=None, reduce=None, reduction='mean')`#定义优化函数import torch.optim as optim
optimizer = optim.SGD(net.parameters(), lr=0.03)# built-in random gradient descent functionprint(optimizer)# function prototype: `torch.optim.SGD(params, lr=, momentum=0, dampening=0, weight_decay=0, nesterov=False)`#训练
num_epochs =3for epoch inrange(1, num_epochs +1):for X, y in data_iter:
output = net(X)
l = loss(output, y.view(-1,1))
optimizer.zero_grad()# reset gradient, equal to net.zero_grad()
l.backward()
optimizer.step()print('epoch %d, loss: %f'%(epoch, l.item()))# result comparision
dense = net[0]print(true_w, dense.weight.data)print(true_b, dense.bias.data)
2 Softmax与分类模型
2.1 从0开始写
import torch
import torchvision
import numpy as np
import sys
sys.path.append("/home/kesci/input")import d2lzh1981 as d2l
print(torch.__version__)print(torchvision.__version__)#获取训练集和测试集数据
batch_size =256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, root='/home/kesci/input/FashionMNIST2065')#模型参数初始化
num_inputs =784print(28*28)
num_outputs =10
W = torch.tensor(np.random.normal(0,0.01,(num_inputs, num_outputs)), dtype=torch.float)
b = torch.zeros(num_outputs, dtype=torch.float)
W.requires_grad_(requires_grad=True)
b.requires_grad_(requires_grad=True)#对多维Tensor按维度操作
X = torch.tensor([[1,2,3],[4,5,6]])print(X.sum(dim=0, keepdim=True))# dim为0,按照相同的列求和,并在结果中保留列特征print(X.sum(dim=1, keepdim=True))# dim为1,按照相同的行求和,并在结果中保留行特征print(X.sum(dim=0, keepdim=False))# dim为0,按照相同的列求和,不在结果中保留列特征print(X.sum(dim=1, keepdim=False))# dim为1,按照相同的行求和,不在结果中保留行特征#定义Softmax操作defsoftmax(X):
X_exp = X.exp()
partition = X_exp.sum(dim=1, keepdim=True)# print("X size is ", X_exp.size())# print("partition size is ", partition, partition.size())return X_exp / partition # 这里应用了广播机制
X = torch.rand((2,5))
X_prob = softmax(X)print(X_prob,'\n', X_prob.sum(dim=1))#Softmax回归模型defnet(X):return softmax(torch.mm(X.view((-1, num_inputs)), W)+ b)
y_hat = torch.tensor([[0.1,0.3,0.6],[0.3,0.2,0.5]])
y = torch.LongTensor([0,2])
y_hat.gather(1, y.view(-1,1))defcross_entropy(y_hat, y):return- torch.log(y_hat.gather(1, y.view(-1,1)))#定义准确率defaccuracy(y_hat, y):return(y_hat.argmax(dim=1)== y).float().mean().item()print(accuracy(y_hat, y))# 本函数已保存在d2lzh_pytorch包中方便以后使用。该函数将被逐步改进:它的完整实现将在“图像增广”一节中描述defevaluate_accuracy(data_iter, net):
acc_sum, n =0.0,0for X, y in data_iter:
acc_sum +=(net(X).argmax(dim=1)== y).float().sum().item()
n += y.shape[0]return acc_sum / n
print(evaluate_accuracy(test_iter, net))#训练模型
num_epochs, lr =5,0.1# 本函数已保存在d2lzh_pytorch包中方便以后使用deftrain_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size,
params=None, lr=None, optimizer=None):for epoch inrange(num_epochs):
train_l_sum, train_acc_sum, n =0.0,0.0,0for X, y in train_iter:
y_hat = net(X)
l = loss(y_hat, y).sum()# 梯度清零if optimizer isnotNone:
optimizer.zero_grad()elif params isnotNoneand params[0].grad isnotNone:for param in params:
param.grad.data.zero_()
l.backward()if optimizer isNone:
d2l.sgd(params, lr, batch_size)else:
optimizer.step()
train_l_sum += l.item()
train_acc_sum +=(y_hat.argmax(dim=1)== y).sum().item()
n += y.shape[0]
test_acc = evaluate_accuracy(test_iter, net)print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'%(epoch +1, train_l_sum / n, train_acc_sum / n, test_acc))
train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, batch_size,[W, b], lr)#模型预测
X, y =iter(test_iter).next()
true_labels = d2l.get_fashion_mnist_labels(y.numpy())
pred_labels = d2l.get_fashion_mnist_labels(net(X).argmax(dim=1).numpy())
titles =[true +'\n'+ pred for true, pred inzip(true_labels, pred_labels)]
d2l.show_fashion_mnist(X[0:9], titles[0:9])
2.2简洁实现
# 加载各种包或者模块import torch
from torch import nn
from torch.nn import init
import numpy as np
import sys
sys.path.append("/home/kesci/input")import d2lzh1981 as d2l
print(torch.__version__)#初始化参数和获取数据
batch_size =256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, root='/home/kesci/input/FashionMNIST2065')#定义网络模型
num_inputs =784
num_outputs =10classLinearNet(nn.Module):def__init__(self, num_inputs, num_outputs):super(LinearNet, self).__init__()
self.linear = nn.Linear(num_inputs, num_outputs)defforward(self, x):# x 的形状: (batch, 1, 28, 28)
y = self.linear(x.view(x.shape[0],-1))return y
# net = LinearNet(num_inputs, num_outputs)classFlattenLayer(nn.Module):def__init__(self):super(FlattenLayer, self).__init__()defforward(self, x):# x 的形状: (batch, *, *, ...)return x.view(x.shape[0],-1)from collections import OrderedDict
net = nn.Sequential(# FlattenLayer(),# LinearNet(num_inputs, num_outputs)
OrderedDict([('flatten', FlattenLayer()),('linear', nn.Linear(num_inputs, num_outputs))])# 或者写成我们自己定义的 LinearNet(num_inputs, num_outputs) 也可以)#初始化模型参数
init.normal_(net.linear.weight, mean=0, std=0.01)
init.constant_(net.linear.bias, val=0)#定义损失函数
loss = nn.CrossEntropyLoss()# 下面是他的函数原型# class torch.nn.CrossEntropyLoss(weight=None, size_average=None, ignore_index=-100, reduce=None, reduction='mean')#定义优化函数
optimizer = torch.optim.SGD(net.parameters(), lr=0.1)# 下面是函数原型# class torch.optim.SGD(params, lr=, momentum=0, dampening=0, weight_decay=0, nesterov=False)#训练
num_epochs =5
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size,None,None, optimizer)#展示结果
X, y =iter(test_iter).next()
true_labels = d2l.get_fashion_mnist_labels(y.numpy())
pred_labels = d2l.get_fashion_mnist_labels(net(X).argmax(dim=1).numpy())
titles =[true +'\n'+ pred for true, pred inzip(true_labels, pred_labels)]
d2l.show_fashion_mnist(X[0:9], titles[0:9])
3 多层感知机
3.1 从0开始写
import torch
import numpy as np
import sys
sys.path.append("/home/kesci/input")import d2lzh1981 as d2l
print(torch.__version__)#获取训练集
batch_size =256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size,root='/home/kesci/input/FashionMNIST2065')#定义模型参数
num_inputs, num_outputs, num_hiddens =784,10,256
W1 = torch.tensor(np.random.normal(0,0.01,(num_inputs, num_hiddens)), dtype=torch.float)
b1 = torch.zeros(num_hiddens, dtype=torch.float)
W2 = torch.tensor(np.random.normal(0,0.01,(num_hiddens, num_outputs)), dtype=torch.float)
b2 = torch.zeros(num_outputs, dtype=torch.float)print(W1)
params =[W1, b1, W2, b2]for param in params:
param.requires_grad_(requires_grad=True)#定义激活函数defrelu(X):return torch.max(input=X, other=torch.tensor(0.0))#定义网络defnet(X):
X = X.view((-1, num_inputs))
H = relu(torch.matmul(X, W1)+ b1)return torch.matmul(H, W2)+ b2
#定义损失函数
loss = torch.nn.CrossEntropyLoss()#训练
num_epochs, lr =5,100.0# def train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size,# params=None, lr=None, optimizer=None):# for epoch in range(num_epochs):# train_l_sum, train_acc_sum, n = 0.0, 0.0, 0# for X, y in train_iter:# y_hat = net(X)# l = loss(y_hat, y).sum()# # # 梯度清零# if optimizer is not None:# optimizer.zero_grad()# elif params is not None and params[0].grad is not None:# for param in params:# param.grad.data.zero_()# # l.backward()# if optimizer is None:# d2l.sgd(params, lr, batch_size)# else:# optimizer.step() # “softmax回归的简洁实现”一节将用到# # # train_l_sum += l.item()# train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item()# n += y.shape[0]# test_acc = evaluate_accuracy(test_iter, net)# print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'# % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params, lr)
3.2 简洁实现
import torch
from torch import nn
from torch.nn import init
import numpy as np
import sys
sys.path.append("/home/kesci/input")import d2lzh1981 as d2l
print(torch.__version__)#初始化模型和参数
num_inputs, num_outputs, num_hiddens =784,10,256
net = nn.Sequential(
d2l.FlattenLayer(),
nn.Linear(num_inputs, num_hiddens),
nn.ReLU(),
nn.Linear(num_hiddens, num_outputs),)for params in net.parameters():
init.normal_(params, mean=0, std=0.01)#训练
batch_size =256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size,root='/home/kesci/input/FashionMNIST2065')
loss = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.5)
num_epochs =5
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size,None,None, optimizer)