import csv
import numpy as np
def readData(filename):
"""
:param filename:cvs数据文件
:return: X1,y1,X2,y2,X3,y3
X: list with shape[50,4],特征 //更新:此处应该为[x;1],为shape[50,5],后面对应修改
y: list with shape[50,],标签
"""
X1, X2, X3 = [], [], []
y1, y2, y3 = [], [], []
# 读数据
with open(filename, 'r') as f:
reader = csv.reader(f)
for line in reader:
if line[4] == 'Iris-setosa':
X1.append([float(line[0]), float(line[1]), float(line[2]), float(line[3]), 1.0])
y1.append(line[4])
elif line[4] == 'Iris-versicolor':
X2.append([float(line[0]), float(line[1]), float(line[2]), float(line[3]), 1.0])
y2.append(line[4])
else:
X3.append([float(line[0]), float(line[1]), float(line[2]), float(line[3]), 1.0])
y3.append(line[4])
return X1, X2, X3, y1, y2, y3
def tenfolddata(X1, X2):
"""
产生十折训练数据,每折5个正例,5个反例
:param X1: list with shape[50,4], positive shape[50,5]
:param X2: list with shape[50,4], negative shape[50,5]
:return: folds: list with shape[10,10,4] shape[10,10,5]
y; list with shape[10,10]
"""
folds = []
y = []
for i in range(10):
fold = []
fold += X1[i * 5: (i + 1) * 5]
fold += X2[i * 5: (i + 1) * 5]
folds.append(fold)
y.append([1] * 5 + [0] * 5)
return folds, y
def LR(X, y):
"""
训练逻辑回归模型,梯度递降法
:param X: np.array with shape[N,d], Input 包括111...
:param y: np.array with shape[N,1], label
:return: beta with shape[1,d],包括b Optimal params with gradDescent method
"""
N, d = X.shape
lr = 0.01 ############ 对结果影响很大
beta = np.ones((1, d)) * 0.1
z = X.dot(beta.T) # [N,1]
for i in range(150):
p1 = np.exp(z) / (1 + np.exp(z)) # shape[N,1]
first_order = -np.sum(X * (y - p1), 0, keepdims=True) # shape[1,d]
# update
beta -= first_order * lr
z = X.dot(beta.T)
l = np.sum(-y * z + np.log(1 + np.exp(z)))
return beta
def testing(beta, X, y):
"""
基于逻辑回归进行分类任务测试
:param beta: np.array with shape[1,d], 逻辑回归参数
:param X: np.array wiht shape[N,d], testing instances
:param y: np.array with shape[N,1], testing labels
:return: error_num, LR算法分类错误个数
"""
predicts = (X.dot(beta.T) >= 0) # shape[N,1]
error_num = np.sum(predicts != y)
return error_num
def tenFoldCrossValidation(folds, y):
"""
十折交叉验证
:param folds: list with shape[10,10,5]
:param y: list with shape[10,10]
:return:ten_fold_error_nums
"""
ten_fold_error_nums = 0
for i in range(10):
train_X = folds[:i] + folds[i + 1:]
train_y = y[:i] + y[i + 1:]
val_X = folds[i]
val_y = y[i]
train_X = np.array(train_X).reshape(-1, 5) # -1指的是在不知道有多少行的情况下直接进行划分,最终为shape[n,4]
train_y = np.array(train_y).reshape([-1, 1])
val_X = np.array(val_X).reshape(-1, 5)
val_y = np.array(val_y).reshape([-1, 1])
beta = LR(train_X, train_y)
error_num = testing(beta, val_X, val_y)
ten_fold_error_nums += error_num
return ten_fold_error_nums
def Loo(X, y):
"""
留一法进行预测
:param X: list with shape[100,4]
:param y: list with shape[100]
:return: Loo_error_nums
"""
loo_error_nums = 0
for i in range(100):
train_X = X[:i] + X[i + 1:]
train_y = y[:i] + y[i + 1:]
val_X = X[i]
val_y = y[i]
train_X = np.array(train_X).reshape(-1, 5)
train_y = np.array(train_y).reshape(-1, 1)
val_X = np.array(val_X).reshape(-1, 5)
val_y = np.array(val_y).reshape(-1, 1)
beta = LR(train_X, train_y)
error_num = testing(beta, val_X, val_y)
loo_error_nums += error_num
return loo_error_nums
if __name__ == '__main__':
dataset = 'C:\\Users\\14399\\Desktop\\iris.csv'
X1, X2, X3, y1, y2, y3 = readData(dataset)
# 十折交叉验证
# X1 and X2
folds, y = tenfolddata(X1, X2)
# print(folds)
round1_ten_fold_error_nums = tenFoldCrossValidation(folds, y)
print(round1_ten_fold_error_nums)
# X1 and X3
folds, y = tenfolddata(X1, X3)
round2_ten_fold_error_nums = tenFoldCrossValidation(folds, y)
print(round2_ten_fold_error_nums)
# X2 and X3
folds, y = tenfolddata(X2, X3)
round3_ten_fold_error_nums = tenFoldCrossValidation(folds, y)
print(round3_ten_fold_error_nums)
# 留一法
# X1 and X2
X = X1 + X2
y = [1] * len(X1) + [0] * len(X2)
round1_Loo_error_nums = Loo(X, y)
print(round1_Loo_error_nums)
# X1 and X3
X = X1 + X3
y = [1] * len(X1) + [0] * len(X3)
round2_Loo_error_nums = Loo(X, y)
print(round2_Loo_error_nums)
# X2 and X3
X = X2 + X3
y = [1] * len(X2) + [0] * len(X3)
round3_Loo_error_nums = Loo(X, y)
print(round3_Loo_error_nums)
结果:十折交叉: 0 0 15 ////对X进行拓展后的结果更好了,分别为: 0 0 3 和 0 0 4
留一法: 0 0 11
数据集:UCI iris数据集
链接:https://pan.baidu.com/s/1CWMvPZdsYsKYncJsl0P5bQ 提取码:lx4r