1. 代码功能概述
这段代码使用 Python 实现了一个简单的带有隐藏层的神经网络,用于解决多分类问题。具体步骤包括生成多分类数据集、对标签进行 one - hot 编码、定义神经网络模型、训练模型、测试模型以及可视化决策边界。
2. 导入必要的库
python
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
numpy
:用于进行数值计算,如矩阵运算。matplotlib.pyplot
:用于数据可视化。make_classification
:从sklearn.datasets
中导入,用于生成多分类的数据集。train_test_split
:从sklearn.model_selection
中导入,用于将数据集划分为训练集和测试集。OneHotEncoder
:从sklearn.preprocessing
中导入,用于将标签转换为 one - hot 编码。
3. 生成多分类数据集
python
np.random.seed(42)
X, y = make_classification(n_samples=1000, n_features=2, n_informative=2, n_redundant=0, n_clusters_per_class=1, n_classes=3, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
np.random.seed(42)
:设置随机种子,确保结果可复现。make_classification
:生成一个包含 1000 个样本、2 个特征、3 个类别的分类数据集。train_test_split
:将数据集按 80:20 的比例划分为训练集和测试集。
4. 标签的 one - hot 编码
python
encoder = OneHotEncoder()
y_train_onehot = encoder.fit_transform(y_train.reshape(-1, 1)).toarray()
y_test_onehot = encoder.transform(y_test.reshape(-1, 1)).toarray()
OneHotEncoder
:创建一个 one - hot 编码器对象。fit_transform
:对训练集标签进行拟合和转换,将其转换为 one - hot 编码的密集矩阵。transform
:对测试集标签进行转换,使用训练集拟合的编码器。
5. 定义神经网络类SimpleNN
python
class SimpleNN:
def __init__(self, input_size=2, hidden_size=10, output_size=3):
self.w1 = np.random.randn(input_size, hidden_size)
self.b1 = np.random.randn(hidden_size)
self.w2 = np.random.randn(hidden_size, output_size)
self.b2 = np.random.randn(output_size)
__init__
:类的构造函数,初始化神经网络的权重和偏置。w1
:输入层到隐藏层的权重矩阵。b1
:隐藏层的偏置向量。w2
:隐藏层到输出层的权重矩阵。b2
:输出层的偏置向量。
python
def relu(self, x):
return np.maximum(0, x)
relu
:实现 ReLU 激活函数,将输入中小于 0 的值置为 0。
python
def softmax(self, x):
exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
return exp_x / np.sum(exp_x, axis=1, keepdims=True)
softmax
:实现 Softmax 激活函数,用于将输出转换为概率分布。为了数值稳定性,先减去每行的最大值。
python
def forward(self, x):
self.z1 = np.dot(x, self.w1) + self.b1
self.a1 = self.relu(self.z1)
self.z2 = np.dot(self.a1, self.w2) + self.b2
self.a2 = self.softmax(self.z2)
return self.a2
forward
:实现前向传播,计算神经网络的输出。z1
:隐藏层的输入。a1
:隐藏层的输出,经过 ReLU 激活函数。z2
:输出层的输入。a2
:输出层的输出,经过 Softmax 激活函数。
python
def cross_entropy(self, y_true, y_pred):
m = y_true.shape[0]
log_likelihood = -np.log(y_pred[range(m), y_true.argmax(axis=1)])
return np.mean(log_likelihood)
cross_entropy
:计算交叉熵损失,用于衡量模型预测结果与真实标签之间的差异。
python
def gradient(self, x, y_true, y_pred):
m = x.shape[0]
d_z2 = y_pred - y_true
d_w2 = np.dot(self.a1.T, d_z2) / m
d_b2 = np.sum(d_z2, axis=0) / m
d_a1 = np.dot(d_z2, self.w2.T)
d_z1 = d_a1 * (self.z1 > 0)
d_w1 = np.dot(x.T, d_z1) / m
d_b1 = np.sum(d_z1, axis=0) / m
return d_w1, d_b1, d_w2, d_b2
gradient
:实现反向传播,计算权重和偏置的梯度。d_z2
:输出层的误差。d_w2
:隐藏层到输出层权重的梯度。d_b2
:输出层偏置的梯度。d_a1
:隐藏层的误差。d_z1
:隐藏层输入的误差,考虑 ReLU 的导数。d_w1
:输入层到隐藏层权重的梯度。d_b1
:隐藏层偏置的梯度。
python
def train(self, x, y, lr=0.01, epochs=1000):
for epoch in range(epochs):
y_pred = self.forward(x)
dw1, db1, dw2, db2 = self.gradient(x, y, y_pred)
self.w1 -= lr * dw1
self.b1 -= lr * db1
self.w2 -= lr * dw2
self.b2 -= lr * db2
if (epoch + 1) % 100 == 0:
loss = self.cross_entropy(y, y_pred)
acc = self.accuracy(y, y_pred)
print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss:.4f}, Accuracy: {acc:.4f}')
train
:训练神经网络,使用梯度下降法更新权重和偏置。lr
:学习率,控制每次更新的步长。epochs
:训练的轮数。- 每 100 轮打印一次损失和准确率。
python
def accuracy(self, y_true, y_pred):
y_true_class = np.argmax(y_true, axis=1)
y_pred_class = np.argmax(y_pred, axis=1)
return np.mean(y_true_class == y_pred_class)
accuracy
:计算分类准确率,比较真实标签和预测标签的类别。
6. 训练模型
python
model = SimpleNN(input_size=2, hidden_size=10, output_size=3)
model.train(X_train, y_train_onehot, lr=0.01, epochs=2000)
- 创建
SimpleNN
类的实例,设置输入层大小为 2,隐藏层大小为 10,输出层大小为 3。 - 调用
train
方法对模型进行训练,学习率为 0.01,训练 2000 轮。
7. 测试模型
python
y_test_pred = model.forward(X_test)
test_acc = model.accuracy(y_test_onehot, y_test_pred)
print(f'Test Accuracy: {test_acc:.4f}')
- 对测试集进行前向传播,得到预测结果。
- 计算测试集的准确率并打印。
8. 可视化决策边界
python
def plot_decision_boundary(model, X, y):
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.01),
np.arange(y_min, y_max, 0.01))
grid = np.c_[xx.ravel(), yy.ravel()]
probs = model.forward(grid)
preds = np.argmax(probs, axis=1)
preds = preds.reshape(xx.shape)
plt.contourf(xx, yy, preds, alpha=0.8, cmap=plt.cm.Spectral)
plt.scatter(X[:, 0], X[:, 1], c=y, edgecolors='k', cmap=plt.cm.Spectral)
plt.title("Decision Boundary")
plt.show()
plot_decision_boundary(model, X_test, y_test)
plot_decision_boundary
:定义一个函数用于可视化决策边界。- 生成网格点,对每个网格点进行前向传播,得到预测结果。
- 使用
contourf
绘制决策边界,使用scatter
绘制测试集样本。 - 最后显示图形。
- 完整代码
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
# 1. 生成多分类数据集
np.random.seed(42)
X, y = make_classification(n_samples=1000, n_features=2, n_informative=2, n_redundant=0, n_clusters_per_class=1, n_classes=3, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 将标签转换为 one-hot 编码
encoder = OneHotEncoder()
y_train_onehot = encoder.fit_transform(y_train.reshape(-1, 1)).toarray() # 转换为密集矩阵
y_test_onehot = encoder.transform(y_test.reshape(-1, 1)).toarray() # 转换为密集矩阵
# 2. 定义神经网络(带隐藏层的多分类模型)
class SimpleNN:
def __init__(self, input_size=2, hidden_size=10, output_size=3):
# 初始化权重和偏置
self.w1 = np.random.randn(input_size, hidden_size) # 输入层到隐藏层的权重
self.b1 = np.random.randn(hidden_size) # 隐藏层的偏置
self.w2 = np.random.randn(hidden_size, output_size) # 隐藏层到输出层的权重
self.b2 = np.random.randn(output_size) # 输出层的偏置
def relu(self, x):
return np.maximum(0, x) # ReLU激活函数
def softmax(self, x):
exp_x = np.exp(x - np.max(x, axis=1, keepdims=True)) # 数值稳定性优化
return exp_x / np.sum(exp_x, axis=1, keepdims=True) # Softmax激活函数
def forward(self, x):
# 前向传播
self.z1 = np.dot(x, self.w1) + self.b1 # 隐藏层输入
self.a1 = self.relu(self.z1) # 隐藏层输出(应用ReLU)
self.z2 = np.dot(self.a1, self.w2) + self.b2 # 输出层输入
self.a2 = self.softmax(self.z2) # 输出层输出(应用Softmax)
return self.a2
def cross_entropy(self, y_true, y_pred):
# 交叉熵损失
m = y_true.shape[0]
log_likelihood = -np.log(y_pred[range(m), y_true.argmax(axis=1)])
return np.mean(log_likelihood)
def gradient(self, x, y_true, y_pred):
# 反向传播计算梯度
m = x.shape[0]
# 输出层的梯度
d_z2 = y_pred - y_true
d_w2 = np.dot(self.a1.T, d_z2) / m
d_b2 = np.sum(d_z2, axis=0) / m
# 隐藏层的梯度
d_a1 = np.dot(d_z2, self.w2.T)
d_z1 = d_a1 * (self.z1 > 0) # ReLU的导数
d_w1 = np.dot(x.T, d_z1) / m
d_b1 = np.sum(d_z1, axis=0) / m
return d_w1, d_b1, d_w2, d_b2
def train(self, x, y, lr=0.01, epochs=1000):
for epoch in range(epochs):
y_pred = self.forward(x)
dw1, db1, dw2, db2 = self.gradient(x, y, y_pred)
# 更新权重和偏置
self.w1 -= lr * dw1
self.b1 -= lr * db1
self.w2 -= lr * dw2
self.b2 -= lr * db2
if (epoch + 1) % 100 == 0:
loss = self.cross_entropy(y, y_pred)
acc = self.accuracy(y, y_pred)
print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss:.4f}, Accuracy: {acc:.4f}')
def accuracy(self, y_true, y_pred):
# 计算分类准确率
y_true_class = np.argmax(y_true, axis=1)
y_pred_class = np.argmax(y_pred, axis=1)
return np.mean(y_true_class == y_pred_class)
# 3. 训练模型
model = SimpleNN(input_size=2, hidden_size=10, output_size=3)
model.train(X_train, y_train_onehot, lr=0.01, epochs=2000)
# 4. 测试模型
y_test_pred = model.forward(X_test)
test_acc = model.accuracy(y_test_onehot, y_test_pred)
print(f'Test Accuracy: {test_acc:.4f}')
# 5. 可视化决策边界
def plot_decision_boundary(model, X, y):
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.01),
np.arange(y_min, y_max, 0.01))
grid = np.c_[xx.ravel(), yy.ravel()]
probs = model.forward(grid)
preds = np.argmax(probs, axis=1)
preds = preds.reshape(xx.shape)
plt.contourf(xx, yy, preds, alpha=0.8, cmap=plt.cm.Spectral)
plt.scatter(X[:, 0], X[:, 1], c=y, edgecolors='k', cmap=plt.cm.Spectral)
plt.title("Decision Boundary")
plt.show()
plot_decision_boundary(model, X_test, y_test)