Logistic Regression 更多的是从概率的层面进行建模,但是我们实际应用中,更多的是用在二元分类型的问题
案例:
列子:如上图所示:有两类数据 方形=0 圆形=1
坐标轴 类型: X1 ,X2 ,0/1
2 ,2 , 0
2 ,4 , 0
3 ,3 , 0
4 ,5 , 0
8 ,1 , 1
8.5,3.5 , 1
9 ,1 , 1
10 ,4 , 1
# prediction function
from math import exp
def prediction(row, coefficients):
yhat = coefficients[0]
for i in range(len(row)-1):
yhat+=coefficients[i+1]*row[i]
return 1/(1+exp(-yhat))
dataset = [[2,2,0],
[2,4,0],
[3,3,0],
[4,5,0],
[8,1,1],
[8.5,3.5,1],
[9,1,1],
[10,4,1]]
coef = [-0.5,0.9,-1.5] # 我们随便定义一组系数
# prediction function
from math import exp
def prediction(row, coefficients):
yhat = coefficients[0]
for i in range(len(row)-1):
yhat+=coefficients[i+1]*row[i]
return 1/(1+exp(-yhat))
for row in dataset:
yhat = prediction(row,coef)
print("真实类别%.3f, 预测类别 %.3f [%d]" % (row[-1], yhat, round(yhat)))
使用梯度下降
from math import exp
def prediction(row, coefficients):
yhat = coefficients[0]
for i in range(len(row) - 1):
yhat += coefficients[i + 1] * row[i]
return 1 / (1 + exp(-yhat))
def using_sgd_method_to_calculate_coefficients(training_dataset, learning_rate, n_times_epoch):
coefficients = [0.0 for i in range(len(training_dataset[0]))]
for epoch in range(n_times_epoch):
the_sum_of_error = 0
for row in training_dataset:
y_hat = prediction(row, coefficients)
error = row[-1] - y_hat
the_sum_of_error += error ** 2
coefficients[0] = coefficients[0] + learning_rate * error * y_hat * (1.0 - y_hat)
for i in range(len(row) - 1):
coefficients[i + 1] = coefficients[i + 1] + learning_rate * error * y_hat * (1.0 - y_hat) * row[i]
print("第 【%d】步,我们使用的学习率是 【%.3f】,误差是 【%.3f】" % (
epoch, learning_rate, the_sum_of_error))
return coefficients
dataset = [[2, 2, 0],
[2, 4, 0],
[3, 3, 0],
[4, 5, 0],
[8, 1, 1],
[8.5, 3.5, 1],
[9, 1, 1],
[10, 4, 1]]
learning_rate = 0.1
n_times_epoch = 1000
coef = using_sgd_method_to_calculate_coefficients(dataset, learning_rate, n_times_epoch)
print(coef)
dataset = [[2,2,0],
[2,4,0],
[3,3,0],
[4,5,0],
[8,1,1],
[8.5,3.5,1],
[9,1,1],
[10,4,1]]
coef = [-1.15556711935848, 1.4830491764089766, -2.307569789313787] # 将梯度下降得出的系数带入
# prediction function
from math import exp
def prediction(row, coefficients):
yhat = coefficients[0]
for i in range(len(row)-1):
yhat+=coefficients[i+1]*row[i]
return 1/(1+exp(-yhat))
for row in dataset:
yhat = prediction(row,coef)
print("真实类别%.3f, 预测类别 %.3f [%d]" % (row[-1], yhat, round(yhat)))