目录
二、增加一个3个神经元的隐藏层,再次实现二分类,并与1做对比
三、自定义隐藏层层数和每个隐藏层中的神经元个数,尝试找到最优超参数完成二分类。可以适当修改数据集,便于探索超参数。
一、利用预定义算子重新实现前馈神经网络
(1)使用pytorch的预定义算子来重新实现二分类任务
导入必要的库和模块:
from data import make_moons
from nndl import accuracy
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import torch
from Runner2_1 import RunnerV2_2
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
定义的网络结构 Model_MLP_L2_V2
:
class Model_MLP_L2_V2(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(Model_MLP_L2_V2, self).__init__()
# 定义第一层线性层
self.fc1 = nn.Linear(input_size, hidden_size)
# 使用正态分布初始化权重和偏置
self.fc1.weight.data = torch.normal(mean=0.0, std=1.0, size=self.fc1.weight.data.size())
self.fc1.bias.data.fill_(0.0) # 常量初始化偏置为0
# 定义第二层线性层
self.fc2 = nn.Linear(hidden_size, output_size)
self.fc2.weight.data = torch.normal(mean=0.0, std=1.0, size=self.fc2.weight.data.size())
self.fc2.bias.data.fill_(0.0) # 常量初始化偏置为0
# 定义Logistic激活函数
self.act_fn = torch.sigmoid
self.layers = [self.fc1, self.act_fn, self.fc2,self.act_fn]
# 前向计算
def forward(self, inputs):
z1 = self.fc1(inputs)
a1 = self.act_fn(z1)
z2 = self.fc2(a1)
a2 = self.act_fn(z2)
return a2
数据集构建和划分:
# 数据集构建
n_samples = 1000
X, y = make_moons(n_samples=n_samples, shuffle=True, noise=0.2)
# 划分数据集
num_train = 640 # 训练集样本数量
num_dev = 160 # 验证集样本数量
num_test = 200 # 测试集样本数量
# 根据指定数量划分数据集
X_train, y_train = X[:num_train], y[:num_train] # 训练集
X_dev, y_dev = X[num_train:num_train + num_dev], y[num_train:num_train + num_dev] # 验证集
X_test, y_test = X[num_train + num_dev:], y[num_train + num_dev:] # 测试集
# 调整标签的形状,将其转换为[N, 1]的格式
y_train = y_train.reshape([-1, 1])
y_dev = y_dev.reshape([-1, 1])
y_test = y_test.reshape([-1, 1])
可视化生成的数据集
plt.figure(figsize=(5, 5)) # 设置图形大小
plt.scatter(x=X[:, 0], y=X[:, 1], marker='*', c=y, cmap='viridis') # 绘制散点图
plt.xlim(-3, 4) # 设置x轴范围
plt.ylim(-3, 4) # 设置y轴范围
plt.grid(True, linestyle='--', alpha=0.3) # 添加网格
plt.show() # 显示图形
(2)完善Runner类
基于上一节实现的 RunnerV2_1
类,本节的 RunnerV2_2 类在训练过程中使用自动梯度计算;模型保存时,使用state_dict
方法获取模型参数;模型加载时,使用set_state_dict
方法加载模型参数.
import os
import torch
class RunnerV2_2(object):
def __init__(self, model, optimizer, metric, loss_fn, **kwargs):
self.model = model
self.optimizer = optimizer
self.loss_fn = loss_fn
self.metric = metric
# 记录训练过程中的评估指标变化情况
self.train_scores = []
self.dev_scores = []
# 记录训练过程中的评价指标变化情况
self.train_loss = []
self.dev_loss = []
def train(self, train_set, dev_set, **kwargs):
# 将模型切换为训练模式
self.model.train()
# 传入训练轮数,如果没有传入值则默认为0
num_epochs = kwargs.get("num_epochs", 0)
# 传入log打印频率,如果没有传入值则默认为100
log_epochs = kwargs.get("log_epochs", 100)
# 传入模型保存路径,如果没有传入值则默认为"best_model.pdparams"
save_path = kwargs.get("save_path", "best_model.pdparams")
# log打印函数,如果没有传入则默认为"None"
custom_print_log = kwargs.get("custom_print_log", None)
# 记录全局最优指标
best_score = 0
# 进行num_epochs轮训练
for epoch in range(num_epochs):
X, y = train_set
# 获取模型预测
logits = self.model(X)
# 计算交叉熵损失
trn_loss = self.loss_fn(logits, y)
self.train_loss.append(trn_loss.item())
# 计算评估指标
trn_score = self.metric(logits, y)
self.train_scores.append(trn_score)
# 自动计算参数梯度
trn_loss.backward()
if custom_print_log is not None:
# 打印每一层的梯度
custom_print_log(self)
# 参数更新
self.optimizer.step()
# 清空梯度
self.optimizer.zero_grad()
dev_score, dev_loss = self.evaluate(dev_set)
# 如果当前指标为最优指标,保存该模型
if dev_score > best_score:
self.save_model(save_path)
print(f"[Evaluate] best accuracy performence has been updated: {best_score:.5f} --> {dev_score:.5f}")
best_score = dev_score
if log_epochs and epoch % log_epochs == 0:
print(f"[Train] epoch: {epoch}/{num_epochs}, loss: {trn_loss.item()}")
# 模型评估阶段,使用'paddle.no_grad()'控制不计算和存储梯度
@torch.no_grad()
def evaluate(self, data_set):
# 将模型切换为评估模式
self.model.eval()
X, y = data_set
# 计算模型输出
logits = self.model(X)
# 计算损失函数
loss = self.loss_fn(logits, y).item()
self.dev_loss.append(loss)
# 计算评估指标
score = self.metric(logits, y)
self.dev_scores.append(score)
return score, loss
# 模型测试阶段,使用'paddle.no_grad()'控制不计算和存储梯度
@torch.no_grad()
def predict(self, X):
# 将模型切换为评估模式
self.model.eval()
return self.model(X)
# 使用'model.state_dict()'获取模型参数,并进行保存
def save_model(self, saved_path):
torch.save(self.model.state_dict(), saved_path)
# 使用'model.set_state_dict'加载模型参数
def load_model(self, model_path):
state_dict = torch.load(model_path ,weights_only=True)
self.model.load_state_dict(state_dict)
(3) 模型训练
实例化RunnerV2类,并传入训练配置,代码实现如下:
# 定义训练参数
epoch_num = 1000 # 训练轮数
model_saved_dir = "best_model.pdparams" # 模型保存目录
# 网络参数
input_size = 2 # 输入层维度为2
hidden_size = 5 # 隐藏层维度为5
output_size = 1 # 输出层维度为1
# 定义多层感知机模型
model = Model_MLP_L2_V2(input_size=input_size, hidden_size=hidden_size, output_size=output_size)
# 定义损失函数
loss_fn =F.binary_cross_entropy
# 定义优化器,设置学习率
learning_rate = 0.2
optimizer = torch.optim.SGD(params=model.parameters(),lr=learning_rate)
# 定义评价方法
metric = accuracy
# 实例化RunnerV2_1类,并传入训练配置
runner = RunnerV2_2(model, optimizer, metric, loss_fn)
# 训练模型
runner.train([X_train, y_train], [X_dev, y_dev], num_epochs=epoch_num, log_epochs=50, save_dir=model_saved_dir)
输出结果:
[Evaluate] best accuracy performence has been updated: 0.00000 --> 0.48125
[Train] epoch: 0/1000, loss: 0.7482572793960571
[Evaluate] best accuracy performence has been updated: 0.48125 --> 0.50000
[Evaluate] best accuracy performence has been updated: 0.50000 --> 0.53750
[Evaluate] best accuracy performence has been updated: 0.53750 --> 0.60625
[Evaluate] best accuracy performence has been updated: 0.60625 --> 0.71250
[Evaluate] best accuracy performence has been updated: 0.71250 --> 0.73750
[Evaluate] best accuracy performence has been updated: 0.73750 --> 0.77500
[Evaluate] best accuracy performence has been updated: 0.77500 --> 0.78750
[Evaluate] best accuracy performence has been updated: 0.78750 --> 0.79375
[Evaluate] best accuracy performence has been updated: 0.79375 --> 0.80000
[Evaluate] best accuracy performence has been updated: 0.80000 --> 0.81250
[Train] epoch: 50/1000, loss: 0.4034937918186188
[Train] epoch: 100/1000, loss: 0.36812323331832886
[Train] epoch: 150/1000, loss: 0.3453332781791687
[Evaluate] best accuracy performence has been updated: 0.81250 --> 0.81875
[Evaluate] best accuracy performence has been updated: 0.81875 --> 0.82500
[Evaluate] best accuracy performence has been updated: 0.82500 --> 0.83125
[Evaluate] best accuracy performence has been updated: 0.83125 --> 0.83750
[Evaluate] best accuracy performence has been updated: 0.83750 --> 0.84375
[Train] epoch: 200/1000, loss: 0.32963332533836365
[Evaluate] best accuracy performence has been updated: 0.84375 --> 0.85000
[Evaluate] best accuracy performence has been updated: 0.85000 --> 0.85625
[Train] epoch: 250/1000, loss: 0.31850504875183105
[Train] epoch: 300/1000, loss: 0.3105042576789856
[Train] epoch: 350/1000, loss: 0.3047018349170685
[Evaluate] best accuracy performence has been updated: 0.85625 --> 0.86250
[Train] epoch: 400/1000, loss: 0.3004642128944397
[Evaluate] best accuracy performence has been updated: 0.86250 --> 0.86875
[Train] epoch: 450/1000, loss: 0.2973460555076599
[Train] epoch: 500/1000, loss: 0.29503050446510315
[Evaluate] best accuracy performence has been updated: 0.86875 --> 0.87500
[Train] epoch: 550/1000, loss: 0.2932911813259125
[Train] epoch: 600/1000, loss: 0.29196637868881226
[Train] epoch: 650/1000, loss: 0.29094070196151733
[Train] epoch: 700/1000, loss: 0.29013189673423767
[Train] epoch: 750/1000, loss: 0.28948113322257996
[Train] epoch: 800/1000, loss: 0.2889463007450104
[Train] epoch: 850/1000, loss: 0.2884971499443054
[Evaluate] best accuracy performence has been updated: 0.87500 --> 0.88125
[Train] epoch: 900/1000, loss: 0.28811198472976685
[Train] epoch: 950/1000, loss: 0.28777503967285156
将训练过程中训练集与验证集的准确率变化情况进行可视化:
# 可视化观察训练集与验证集的指标变化情况
def plot(runner, fig_name):
plt.figure(figsize=(10, 5))
epochs = [i for i in range(0, len(runner.train_scores))]
plt.subplot(1, 2, 1)
plt.plot(epochs, runner.train_loss, color='#e4007f', label="Train loss")
plt.plot(epochs, runner.dev_loss, color='#f19ec2', linestyle='--', label="Dev loss")
# 绘制坐标轴和图例
plt.ylabel("loss", fontsize='large')
plt.xlabel("epoch", fontsize='large')
plt.legend(loc='upper right', fontsize='x-large')
plt.subplot(1, 2, 2)
plt.plot(epochs, runner.train_scores, color='#e4007f', label="Train accuracy")
plt.plot(epochs, runner.dev_scores, color='#f19ec2', linestyle='--', label="Dev accuracy")
# 绘制坐标轴和图例
plt.ylabel("score", fontsize='large')
plt.xlabel("epoch", fontsize='large')
plt.legend(loc='lower right', fontsize='x-large')
plt.savefig(fig_name)
plt.show()
plot(runner, 'fw-acc.pdf')
(4)性能评价
使用测试数据对训练完成后的最优模型进行评价,观察模型在测试集上的准确率以及loss情况:
# 加载训练好的模型----测试
runner.load_model("best_model.pdparams")
# 在测试集上对模型进行评价
score, loss = runner.evaluate([X_test, y_test]) # 评估模型性能
# 打印测试集的准确率和损失
print("[Test] score/loss: {:.4f}/{:.4f}".format(score, loss))
运行结果:
[Test] score/loss: 0.8550/0.3316
从结果来看,模型在测试集上取得了较高的准确率。
可视化:
import math
# 均匀生成40000个数据点
x1, x2 = torch.meshgrid(torch.linspace(-math.pi, math.pi, 200), torch.linspace(-math.pi, math.pi, 200))
x = torch.stack([torch.flatten(x1), torch.flatten(x2)], axis=1) # 将生成的点堆叠成二维数组
# 使用模型进行预测
y = runner.predict(x) # 预测类别
y = torch.squeeze((y >= 0.5).to(torch.float32), axis=-1) # 将概率值转化为类别标签
# 绘制类别区域
plt.ylabel('x2') # 设置y轴标签
plt.xlabel('x1') # 设置x轴标签
plt.scatter(x[:, 0].tolist(), x[:, 1].tolist(), c=y.tolist(), cmap=plt.cm.Spectral) # 绘制类别区域
# 可视化训练集、验证集和测试集数据
plt.scatter(X_dev[:, 0].tolist(), X_dev[:, 1].tolist(), marker='*', c=torch.squeeze(y_dev, axis=-1).tolist()) # 绘制验证集
plt.scatter(X_test[:, 0].tolist(), X_test[:, 1].tolist(), marker='*', c=torch.squeeze(y_test, axis=-1).tolist()) # 绘制测试集
plt.show() # 显示最终图形
完整代码:
from data import make_moons
from nndl import accuracy
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import torch
from Runner2_1 import RunnerV2_2
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
class Model_MLP_L2_V2(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(Model_MLP_L2_V2, self).__init__()
# 定义第一层线性层
self.fc1 = nn.Linear(input_size, hidden_size)
# 使用正态分布初始化权重和偏置
self.fc1.weight.data = torch.normal(mean=0.0, std=1.0, size=self.fc1.weight.data.size())
self.fc1.bias.data.fill_(0.0) # 常量初始化偏置为0
# 定义第二层线性层
self.fc2 = nn.Linear(hidden_size, output_size)
self.fc2.weight.data = torch.normal(mean=0.0, std=1.0, size=self.fc2.weight.data.size())
self.fc2.bias.data.fill_(0.0) # 常量初始化偏置为0
# 定义Logistic激活函数
self.act_fn = torch.sigmoid
self.layers = [self.fc1, self.act_fn, self.fc2,self.act_fn]
def forward(self, x):
x = self.fc1(x)
x = self.act_fn(x)
x = self.fc2(x)
return x
# 前向计算
def forward(self, inputs):
z1 = self.fc1(inputs)
a1 = self.act_fn(z1)
z2 = self.fc2(a1)
a2 = self.act_fn(z2)
return a2
# 数据集构建
n_samples = 1000
X, y = make_moons(n_samples=n_samples, shuffle=True, noise=0.2)
# 划分数据集
num_train = 640 # 训练集样本数量
num_dev = 160 # 验证集样本数量
num_test = 200 # 测试集样本数量
# 根据指定数量划分数据集
X_train, y_train = X[:num_train], y[:num_train] # 训练集
X_dev, y_dev = X[num_train:num_train + num_dev], y[num_train:num_train + num_dev] # 验证集
X_test, y_test = X[num_train + num_dev:], y[num_train + num_dev:] # 测试集
# 调整标签的形状,将其转换为[N, 1]的格式
y_train = y_train.reshape([-1, 1])
y_dev = y_dev.reshape([-1, 1])
y_test = y_test.reshape([-1, 1])
#n 可视化生成的数据集
plt.figure(figsize=(5, 5)) # 设置图形大小
plt.scatter(x=X[:, 0], y=X[:, 1], marker='*', c=y, cmap='viridis') # 绘制散点图
plt.xlim(-3, 4) # 设置x轴范围
plt.ylim(-3, 4) # 设置y轴范围
plt.grid(True, linestyle='--', alpha=0.3) # 添加网格
plt.show() # 显示图形
# 定义训练参数
epoch_num = 1000 # 训练轮数
model_saved_dir = "best_model.pdparams" # 模型保存目录
# 网络参数
input_size = 2 # 输入层维度为2
hidden_size = 5 # 隐藏层维度为5
output_size = 1 # 输出层维度为1
# 定义多层感知机模型
model = Model_MLP_L2_V2(input_size=input_size, hidden_size=hidden_size, output_size=output_size)
# 定义损失函数
loss_fn =F.binary_cross_entropy
# 定义优化器,设置学习率
learning_rate = 0.2
optimizer = torch.optim.SGD(params=model.parameters(),lr=learning_rate)
# 定义评价方法
metric = accuracy
# 实例化RunnerV2_1类,并传入训练配置
runner = RunnerV2_2(model, optimizer, metric, loss_fn)
# 训练模型
runner.train([X_train, y_train], [X_dev, y_dev], num_epochs=epoch_num, log_epochs=50, save_dir=model_saved_dir)
# 加载训练好的模型----测试
runner.load_model("best_model.pdparams")
# 在测试集上对模型进行评价
score, loss = runner.evaluate([X_test, y_test]) # 评估模型性能
# 打印测试集的准确率和损失
print("[Test] score/loss: {:.4f}/{:.4f}".format(score, loss))
import math
# 均匀生成40000个数据点
x1, x2 = torch.meshgrid(torch.linspace(-math.pi, math.pi, 200), torch.linspace(-math.pi, math.pi, 200))
x = torch.stack([torch.flatten(x1), torch.flatten(x2)], axis=1) # 将生成的点堆叠成二维数组
# 使用模型进行预测
y = runner.predict(x) # 预测类别
y = torch.squeeze((y >= 0.5).to(torch.float32), axis=-1) # 将概率值转化为类别标签
# 绘制类别区域
plt.ylabel('x2') # 设置y轴标签
plt.xlabel('x1') # 设置x轴标签
plt.scatter(x[:, 0].tolist(), x[:, 1].tolist(), c=y.tolist(), cmap=plt.cm.Spectral) # 绘制类别区域
# 可视化训练集、验证集和测试集数据
plt.scatter(X_dev[:, 0].tolist(), X_dev[:, 1].tolist(), marker='*', c=torch.squeeze(y_dev, axis=-1).tolist()) # 绘制验证集
plt.scatter(X_test[:, 0].tolist(), X_test[:, 1].tolist(), marker='*', c=torch.squeeze(y_test, axis=-1).tolist()) # 绘制测试集
plt.show() # 显示最终图形
二、增加一个3个神经元的隐藏层,再次实现二分类,并与1做对比
增加一个隐藏层主要修改了网络层和相应传参的两个地方。
网络层的修改:
class Model_MLP_L2_V2(nn.Module):
def __init__(self, input_size, hidden_size1, hidden_size2, output_size):
super(Model_MLP_L2_V2, self).__init__()
# 定义第一层线性层
self.fc1 = nn.Linear(input_size, hidden_size1)
# 使用正态分布初始化权重和偏置
self.fc1.weight.data = torch.normal(mean=0.0, std=1.0, size=self.fc1.weight.data.size())
self.fc1.bias.data.fill_(0.0) # 常量初始化偏置为0
# 定义第二层线性层
self.fc2 = nn.Linear(hidden_size1, hidden_size2)
self.fc2.weight.data = torch.normal(mean=0.0, std=1.0, size=self.fc2.weight.data.size())
self.fc2.bias.data.fill_(0.0) # 常量初始化偏置为0
# 定义第三层线性
self.fc3 = nn.Linear(hidden_size2,output_size)
self.fc3.weight.data = torch.normal(mean=0.0, std=1.0, size=self.fc3.weight.data.size())
self.fc3.bias.data.fill_(0)
# 定义Logistic激活函数
self.act_fn = torch.sigmoid
self.layers = [self.fc1, self.act_fn, self.fc2,self.fc3,self.act_fn,output_size,self.act_fn]
# 前向计算
def forward(self, inputs):
z1 = self.fc1(inputs)
a1 = self.act_fn(z1)
z2 = self.fc2(a1)
a2 = self.act_fn(z2)
z3 = self.fc3(a2)
a3 = self.act_fn(z3)
return a3
模型调用时的修改:
# 定义多层感知机模型
model = Model_MLP_L2_V2(input_size=input_size, hidden_size1=hidden_size1,hidden_size2=hidden_size2, output_size=output_size)
完整代码:
from data import make_moons
from nndl import accuracy
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import torch
from Runner2_1 import RunnerV2_2
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
class Model_MLP_L2_V2(nn.Module):
def __init__(self, input_size, hidden_size1, hidden_size2, output_size):
super(Model_MLP_L2_V2, self).__init__()
# 定义第一层线性层
self.fc1 = nn.Linear(input_size, hidden_size1)
# 使用正态分布初始化权重和偏置
self.fc1.weight.data = torch.normal(mean=0.0, std=1.0, size=self.fc1.weight.data.size())
self.fc1.bias.data.fill_(0.0) # 常量初始化偏置为0
# 定义第二层线性层
self.fc2 = nn.Linear(hidden_size1, hidden_size2)
self.fc2.weight.data = torch.normal(mean=0.0, std=1.0, size=self.fc2.weight.data.size())
self.fc2.bias.data.fill_(0.0) # 常量初始化偏置为0
# 定义第三层线性
self.fc3 = nn.Linear(hidden_size2,output_size)
self.fc3.weight.data = torch.normal(mean=0.0, std=1.0, size=self.fc3.weight.data.size())
self.fc3.bias.data.fill_(0)
# 定义Logistic激活函数
self.act_fn = torch.sigmoid
self.layers = [self.fc1, self.act_fn, self.fc2,self.fc3,self.act_fn,output_size,self.act_fn]
# 前向计算
def forward(self, inputs):
z1 = self.fc1(inputs)
a1 = self.act_fn(z1)
z2 = self.fc2(a1)
a2 = self.act_fn(z2)
z3 = self.fc3(a2)
a3 = self.act_fn(z3)
return a3
# 数据集构建
n_samples = 1000
X, y = make_moons(n_samples=n_samples, shuffle=True, noise=0.2)
# 划分数据集
num_train = 640 # 训练集样本数量
num_dev = 160 # 验证集样本数量
num_test = 200 # 测试集样本数量
# 根据指定数量划分数据集
X_train, y_train = X[:num_train], y[:num_train] # 训练集
X_dev, y_dev = X[num_train:num_train + num_dev], y[num_train:num_train + num_dev] # 验证集
X_test, y_test = X[num_train + num_dev:], y[num_train + num_dev:] # 测试集
# 调整标签的形状,将其转换为[N, 1]的格式
y_train = y_train.reshape([-1, 1])
y_dev = y_dev.reshape([-1, 1])
y_test = y_test.reshape([-1, 1])
# 可视化生成的数据集
plt.figure(figsize=(5, 5)) # 设置图形大小
plt.scatter(x=X[:, 0], y=X[:, 1], marker='*', c=y, cmap='viridis') # 绘制散点图
plt.xlim(-3, 4) # 设置x轴范围
plt.ylim(-3, 4) # 设置y轴范围
plt.grid(True, linestyle='--', alpha=0.3) # 添加网格
plt.show() # 显示图形
# 设置随机种子以确保结果可重复
torch.manual_seed(111)
# 定义训练参数
epoch_num = 1000 # 训练轮数
model_saved_dir = "best_model.pdparams" # 模型保存目录
# 网络参数
input_size = 2 # 输入层维度为2
hidden_size1 = 5 # 第一个隐藏层维度为5
hidden_size2 =3 # 第二个隐藏层维度为3
output_size = 1 # 输出层维度为1
# 定义多层感知机模型
model = Model_MLP_L2_V2(input_size=input_size, hidden_size1=hidden_size1,hidden_size2=hidden_size2, output_size=output_size)
# 定义损失函数
loss_fn =F.binary_cross_entropy
# 定义优化器,设置学习率
learning_rate = 0.2
optimizer = torch.optim.SGD(params=model.parameters(),lr=learning_rate)
# 定义评价方法
metric = accuracy
# 实例化RunnerV2_1类,并传入训练配置
runner = RunnerV2_2(model, optimizer, metric, loss_fn)
# 训练模型
runner.train([X_train, y_train], [X_dev, y_dev], num_epochs=epoch_num, log_epochs=50, save_dir=model_saved_dir)
# 可视化观察训练集与验证集的指标变化情况
def plot(runner, fig_name):
plt.figure(figsize=(10, 5))
epochs = [i for i in range(0, len(runner.train_scores))]
plt.subplot(1, 2, 1)
plt.plot(epochs, runner.train_loss, color='#e4007f', label="Train loss")
plt.plot(epochs, runner.dev_loss, color='#f19ec2', linestyle='--', label="Dev loss")
# 绘制坐标轴和图例
plt.ylabel("loss", fontsize='large')
plt.xlabel("epoch", fontsize='large')
plt.legend(loc='upper right', fontsize='x-large')
plt.subplot(1, 2, 2)
plt.plot(epochs, runner.train_scores, color='#e4007f', label="Train accuracy")
plt.plot(epochs, runner.dev_scores, color='#f19ec2', linestyle='--', label="Dev accuracy")
# 绘制坐标轴和图例
plt.ylabel("score", fontsize='large')
plt.xlabel("epoch", fontsize='large')
plt.legend(loc='lower right', fontsize='x-large')
plt.savefig(fig_name)
plt.show()
plot(runner, 'fw-acc.pdf')
# 加载训练好的模型----测试
runner.load_model("best_model.pdparams")
# 在测试集上对模型进行评价
score, loss = runner.evaluate([X_test, y_test]) # 评估模型性能
# 打印测试集的准确率和损失
print("[Test] score/loss: {:.4f}/{:.4f}".format(score, loss))
import math
# 均匀生成40000个数据点
x1, x2 = torch.meshgrid(torch.linspace(-math.pi, math.pi, 200), torch.linspace(-math.pi, math.pi, 200))
x = torch.stack([torch.flatten(x1), torch.flatten(x2)], axis=1) # 将生成的点堆叠成二维数组
# 使用模型进行预测
y = runner.predict(x) # 预测类别
y = torch.squeeze((y >= 0.5).to(torch.float32), axis=-1) # 将概率值转化为类别标签
# 绘制类别区域
plt.ylabel('x2') # 设置y轴标签
plt.xlabel('x1') # 设置x轴标签
plt.scatter(x[:, 0].tolist(), x[:, 1].tolist(), c=y.tolist(), cmap=plt.cm.Spectral) # 绘制类别区域
# 可视化训练集、验证集和测试集数据
plt.scatter(X_dev[:, 0].tolist(), X_dev[:, 1].tolist(), marker='*', c=torch.squeeze(y_dev, axis=-1).tolist()) # 绘制验证集
plt.scatter(X_test[:, 0].tolist(), X_test[:, 1].tolist(), marker='*', c=torch.squeeze(y_test, axis=-1).tolist()) # 绘制测试集
plt.show() # 显示最终图形
运行结果:
测试1:两个网络均epch=1000 learningrate=0.2
[Evaluate] best accuracy performence has been updated: 0.00000 --> 0.54375
[Train] epoch: 0/1000, loss: 0.8059021234512329
[Evaluate] best accuracy performence has been updated: 0.54375 --> 0.55000
[Evaluate] best accuracy performence has been updated: 0.55000 --> 0.58125
[Evaluate] best accuracy performence has been updated: 0.58125 --> 0.60000
[Evaluate] best accuracy performence has been updated: 0.60000 --> 0.63125
[Evaluate] best accuracy performence has been updated: 0.63125 --> 0.63750
[Evaluate] best accuracy performence has been updated: 0.63750 --> 0.64375
[Evaluate] best accuracy performence has been updated: 0.64375 --> 0.66250
[Evaluate] best accuracy performence has been updated: 0.66250 --> 0.66875
[Evaluate] best accuracy performence has been updated: 0.66875 --> 0.69375
[Evaluate] best accuracy performence has been updated: 0.69375 --> 0.70625
[Evaluate] best accuracy performence has been updated: 0.70625 --> 0.71250
[Evaluate] best accuracy performence has been updated: 0.71250 --> 0.72500
[Evaluate] best accuracy performence has been updated: 0.72500 --> 0.74375
[Evaluate] best accuracy performence has been updated: 0.74375 --> 0.75000
[Evaluate] best accuracy performence has been updated: 0.75000 --> 0.76250
[Evaluate] best accuracy performence has been updated: 0.76250 --> 0.76875
[Evaluate] best accuracy performence has been updated: 0.76875 --> 0.78750
[Evaluate] best accuracy performence has been updated: 0.78750 --> 0.80000
[Evaluate] best accuracy performence has been updated: 0.80000 --> 0.80625
[Train] epoch: 50/1000, loss: 0.6617299318313599
[Evaluate] best accuracy performence has been updated: 0.80625 --> 0.81250
[Evaluate] best accuracy performence has been updated: 0.81250 --> 0.81875
[Train] epoch: 100/1000, loss: 0.593410313129425
[Train] epoch: 150/1000, loss: 0.515239417552948
[Train] epoch: 200/1000, loss: 0.45084771513938904
[Train] epoch: 250/1000, loss: 0.40509477257728577
[Train] epoch: 300/1000, loss: 0.3738361597061157
[Train] epoch: 350/1000, loss: 0.3522436022758484
[Train] epoch: 400/1000, loss: 0.3372547924518585
[Train] epoch: 450/1000, loss: 0.32697561383247375
[Train] epoch: 500/1000, loss: 0.32002943754196167
[Train] epoch: 550/1000, loss: 0.31535804271698
[Train] epoch: 600/1000, loss: 0.31218963861465454
[Train] epoch: 650/1000, loss: 0.30999618768692017
[Train] epoch: 700/1000, loss: 0.3084310293197632
[Train] epoch: 750/1000, loss: 0.30727115273475647
[Train] epoch: 800/1000, loss: 0.30637437105178833
[Train] epoch: 850/1000, loss: 0.3056504726409912
[Train] epoch: 900/1000, loss: 0.3050422668457031
[Train] epoch: 950/1000, loss: 0.30451327562332153
[Test] score/loss: 0.8750/0.6421
结果分析:增加了一个隐藏层但是模型分类结果并未有多大提高
测试2:两个网络均epch=500 步长=0.2
一个隐藏层的运行结果:
[Evaluate] best accuracy performence has been updated: 0.00000 --> 0.58750
[Train] epoch: 0/500, loss: 0.895308792591095
[Evaluate] best accuracy performence has been updated: 0.58750 --> 0.60000
[Evaluate] best accuracy performence has been updated: 0.60000 --> 0.64375
[Evaluate] best accuracy performence has been updated: 0.64375 --> 0.68750
[Evaluate] best accuracy performence has been updated: 0.68750 --> 0.71875
[Evaluate] best accuracy performence has been updated: 0.71875 --> 0.74375
[Evaluate] best accuracy performence has been updated: 0.74375 --> 0.75000
[Evaluate] best accuracy performence has been updated: 0.75000 --> 0.77500
[Evaluate] best accuracy performence has been updated: 0.77500 --> 0.78750
[Evaluate] best accuracy performence has been updated: 0.78750 --> 0.80625
[Evaluate] best accuracy performence has been updated: 0.80625 --> 0.81875
[Evaluate] best accuracy performence has been updated: 0.81875 --> 0.82500
[Evaluate] best accuracy performence has been updated: 0.82500 --> 0.83750
[Evaluate] best accuracy performence has been updated: 0.83750 --> 0.84375
[Evaluate] best accuracy performence has been updated: 0.84375 --> 0.85000
[Evaluate] best accuracy performence has been updated: 0.85000 --> 0.85625
[Evaluate] best accuracy performence has been updated: 0.85625 --> 0.86250
[Evaluate] best accuracy performence has been updated: 0.86250 --> 0.86875
[Evaluate] best accuracy performence has been updated: 0.86875 --> 0.87500
[Train] epoch: 50/500, loss: 0.5181225538253784
[Train] epoch: 100/500, loss: 0.41419583559036255
[Train] epoch: 150/500, loss: 0.3680029809474945
[Train] epoch: 200/500, loss: 0.3443925976753235
[Train] epoch: 250/500, loss: 0.3301316499710083
[Train] epoch: 300/500, loss: 0.32049936056137085
[Train] epoch: 350/500, loss: 0.3135680854320526
[Train] epoch: 400/500, loss: 0.3084009885787964
[Train] epoch: 450/500, loss: 0.3044664263725281
[Test] score/loss: 0.8350/0.5411
两个隐藏层的运行结果:
[Evaluate] best accuracy performence has been updated: 0.00000 --> 0.48125
[Train] epoch: 0/500, loss: 0.7991162538528442
[Evaluate] best accuracy performence has been updated: 0.48125 --> 0.48750
[Evaluate] best accuracy performence has been updated: 0.48750 --> 0.51250
[Evaluate] best accuracy performence has been updated: 0.51250 --> 0.52500
[Evaluate] best accuracy performence has been updated: 0.52500 --> 0.54375
[Evaluate] best accuracy performence has been updated: 0.54375 --> 0.56250
[Evaluate] best accuracy performence has been updated: 0.56250 --> 0.57500
[Evaluate] best accuracy performence has been updated: 0.57500 --> 0.58750
[Evaluate] best accuracy performence has been updated: 0.58750 --> 0.59375
[Evaluate] best accuracy performence has been updated: 0.59375 --> 0.60000
[Evaluate] best accuracy performence has been updated: 0.60000 --> 0.61875
[Evaluate] best accuracy performence has been updated: 0.61875 --> 0.62500
[Evaluate] best accuracy performence has been updated: 0.62500 --> 0.65000
[Evaluate] best accuracy performence has been updated: 0.65000 --> 0.66250
[Evaluate] best accuracy performence has been updated: 0.66250 --> 0.67500
[Evaluate] best accuracy performence has been updated: 0.67500 --> 0.68750
[Evaluate] best accuracy performence has been updated: 0.68750 --> 0.69375
[Evaluate] best accuracy performence has been updated: 0.69375 --> 0.70625
[Evaluate] best accuracy performence has been updated: 0.70625 --> 0.71875
[Evaluate] best accuracy performence has been updated: 0.71875 --> 0.74375
[Evaluate] best accuracy performence has been updated: 0.74375 --> 0.75625
[Evaluate] best accuracy performence has been updated: 0.75625 --> 0.77500
[Evaluate] best accuracy performence has been updated: 0.77500 --> 0.79375
[Evaluate] best accuracy performence has been updated: 0.79375 --> 0.80000
[Train] epoch: 50/500, loss: 0.6597578525543213
[Evaluate] best accuracy performence has been updated: 0.80000 --> 0.80625
[Evaluate] best accuracy performence has been updated: 0.80625 --> 0.81250
[Evaluate] best accuracy performence has been updated: 0.81250 --> 0.81875
[Evaluate] best accuracy performence has been updated: 0.81875 --> 0.83125
[Evaluate] best accuracy performence has been updated: 0.83125 --> 0.83750
[Evaluate] best accuracy performence has been updated: 0.83750 --> 0.84375
[Evaluate] best accuracy performence has been updated: 0.84375 --> 0.85000
[Evaluate] best accuracy performence has been updated: 0.85000 --> 0.85625
[Evaluate] best accuracy performence has been updated: 0.85625 --> 0.86250
[Train] epoch: 100/500, loss: 0.5923366546630859
[Evaluate] best accuracy performence has been updated: 0.86250 --> 0.86875
[Train] epoch: 150/500, loss: 0.5159260630607605
[Train] epoch: 200/500, loss: 0.45305681228637695
[Train] epoch: 250/500, loss: 0.40876054763793945
[Train] epoch: 300/500, loss: 0.37871700525283813
[Train] epoch: 350/500, loss: 0.35801711678504944
[Train] epoch: 400/500, loss: 0.34363946318626404
[Evaluate] best accuracy performence has been updated: 0.86875 --> 0.87500
[Train] epoch: 450/500, loss: 0.3337659537792206
[Test] score/loss: 0.8850/0.3004
epoch=100时:
单隐层[Test] score/loss: 0.7950/0.4711 双隐层的[Test] score/loss: 0.8700/0.6284
epoch=50 时:
单隐层[Test] score/loss: 0.7450/0.6170 双隐层[Test] score/loss: 0.7750/0.6594
结果分析:控制变量法对比可知,有两个隐藏层的比有一个隐藏层的网络在训练轮数较少的时候,好像能够明显表现出前者的模型更好。
测试3:两个网络均epch=1000 步长=1
两个隐藏层:
一个隐藏层:
结果分析:两个网络score都很好,步长为1比步长为0.1好很多(在下面寻找最优参数的时候可以尝试提高学习率)
三、自定义隐藏层层数和每个隐藏层中的神经元个数,尝试找到最优超参数完成二分类。可以适当修改数据集,便于探索超参数。
涉及到的超参数有隐藏层层数、每个隐藏层神经元的数目、步长、训练轮数;先来看看这些超参数都有什么影响。
增加隐藏层可以使模型学习数据中更复杂的关系。也可能增加过度拟合的风险。
一般来说,更大的数据集和更复杂的问题需要更深层次的网络和更多的隐藏层。较小的数据集和更简单的问题可能需要较少的隐藏层。
隐藏层神经元数目:
增加神经元有助于更好地拟合数据。而减少层数有助于减少偏差和加快训练速度。通常,神经元的数量会增加,直到性能没有改善。需要注意的是,这个参数也容易过拟合。
训练轮数:
如果模型较为简单,数据量较小,可以考虑较少的训练次数;如果模型较为复杂,数据规模较大,需要更多的训练次数来保证模型的充分拟合。同时,训练次数过多也容易导致过拟合,因此需要在合适的范围内调整训练次数。
确定训练次数的方法有很多种,可以将数据集分成训练集、验证集和测试集三部分,用训练集来训练模型,用验证集来监控模型的训练过程,当验证集误差不再下降时就停止训练。这种方法可以避免过拟合和欠拟合等问题,提高模型的泛化能力。
学习率:
决定了优化器在训练期间更新模型权重的步长。较低的学习率可能会导致收敛速度变慢,但模型更准确。较大的学习率可能会导致更快的收敛,但模型的准确性较低。
在学习率设置优化这边也有很多相关的算法,之前看视频资料的时候简单了解过,基于这次实验我详细了解了一下。
- 学习率衰减 参考链接
在学习率中执行一个衰减速率,可以让网络在训练过程中自动降低学习率。
在训练过程中,一般根据训练轮数设置动态变化的学习率。刚开始训练时:学习率以 0.01 ~ 0.001 为宜。一定轮数过后:逐渐减缓。接近训练结束:学习速率的衰减应该在100倍以上。
- 自适应学习率优化算法 查看链接--【pytorch优化器】Adagrad、RMSProp优化算法详解
Adagrad的核心想法就是,如果一个参数的梯度一直都非常大,那么其对应的学习率就变小一点,防止震荡,而一个参数的梯度一直都非常小,那么这个参数的学习率就变大一点,使得其能够更快地更新,这就是Adagrad算法加快深层神经网络的训练速度的核心。
AdaGrad
的一个限制是,它可能会在搜索结束时导致每个参数的步长(学习率)非常小,这可能会大大减慢搜索进度
,并且可能意味着无法找到最优值
。
RMSProp可认为是AdaGrad的扩展,是为了解决AdaGrad急剧下降的学习率问题。
RMSProp
采用了指数加权移动平均
。RMSProp
比AdaGrad只多了一个超参数
,其作用类似于动量
,其值通常置为0.9
下面我记录了调参过程中效果比较好的几种参数设置:
测试1:
# 网络参数
input_size = 2 # 输入层维度为2
hidden_size1 = 5 # 第一个隐藏层维度为5
hidden_size2 =4 # 第二个隐藏层维度为4
output_size = 1 # 输出层维度为1
epoch_num = 1000 # 训练轮数
# 设置学习率
learning_rate = 5
测试2:
# 网络参数
input_size = 2 # 输入层维度为2
hidden_size = 7 # 隐藏层维度为7
output_size = 1 # 输出层维度为1
# 定义优化器,设置学习率
learning_rate = 4.5
epoch_num = 1000 # 训练轮数
测试3:
# 网络参数
input_size = 2 # 输入层维度为2
hidden_size = 3 # 隐藏层维度为5
output_size = 1 # 输出层维度为1
learning_rate = 4.5
epoch_num = 1000 # 训练轮数
测试4:
# 网络参数
input_size = 2 # 输入层维度为2
hidden_size1 = 5 # 第一个隐藏层维度为5
hidden_size2 =3 # 第二个隐藏层维度为4
output_size = 1 # 输出层维度为1
epoch_num =1000 # 训练轮数
# 设置学习率
learning_rate = 4.8
测试5:
# 网络参数
input_size = 2 # 输入层维度为2
hidden_size1 = 5 # 第一个隐藏层维度为5
hidden_size2 =6 # 第二个隐藏层维度为4
output_size = 1 # 输出层维度为1
epoch_num =2000 # 训练轮数
# 设置学习率
learning_rate = 5
经过多次实验,测试5的参数设置结果最好,在测试集上达到了准确率达到了99%。
参考链接: