机器学习之逻辑回归实现二分类器
损失函数
对损失函数求theta的偏导
类似与线性回归,根据偏导和学习率更新参数theta
python实现
#导入相关类库和数据集
import numpy as np
from sklearn import datasets
data = datasets.load_iris()#读取iris数据集
train_x = data['data'][:100]#因为是二分类问题,所以我们只需要前一百个样本,每类50个样本
train_y = data['target'][:100].reshape((-1, 1))#读取他们对应的类别标签
#导入logisticRegression类,类的实现会在下方贴出
from logisticRegression import logisticRegression
lR = logisticRegression(train_x, train_y, alpha=0.1, epoches=20)
lR.train()#调用类中的train方法
logisticRegression.py
import numpy as np
import matplotlib.pyplot as plt
class logisticRegression(object):
def __init__(self, train_x, train_y, alpha, epoches=100):
self._x = train_x
self._y = train_y
self._w = 0.1 * np.random.randn(self._x.shape[1], 1)
self._alpha = alpha
self._epoches = epoches
self._cost = []
def hx2(self, x):
tmp = 1 / (1 + np.exp(-x.dot(self._w)))
tmp = np.exp(x.dot(self._w)) / (1 + np.exp(x.dot(self._w)))
tmp[np.where(tmp >= 0.5)] = 1
tmp[np.where(tmp < 0.5)] = 0
return tmp
def hx(self, x):
tmp = 1 / (1 + np.exp(-x.dot(self._w)))
return tmp
def network(self,):
y_hat = self.hx(self._x)
err = y_hat - self._y
# print(y_hat)
J = -np.mean(self._y.T.dot(np.log(y_hat + 0.00001)) +
(1 - self._y).T.dot(np.log(1 - y_hat + 0.00001)))
self._cost += [J]
delta_w = self._x.T.dot(err)
self._w -= self._alpha * (delta_w + 0.01 * self._w)
print(self._w)
def plotCost(self,):
plt.plot(self._cost)
plt.xlabel("epoches")
plt.ylabel("cost")
plt.title("LogisticRegression")
plt.show()
def plot_y_hat(self, y_hat):
self._plot(y_hat)
plt.title("predict_label")
plt.legend(["predict_label-1", "predict_label-0"])
plt.show()
def plot_y(self,):
self._plot(self._y)
plt.title("true_label")
plt.legend(["true_label-1", "true_label-0"])
plt.show()
def _plot(self, y):
position_zero = (y == 0).reshape((len(y),))
position_one = (y == 1).reshape((len(y),))
plt.scatter(self._x[position_one, 0], self._x[position_one,
1], marker='+', label='+', color='b')
plt.scatter(self._x[position_zero, 0],
self._x[position_zero, 1], marker='o', label='-', color='y')
plt.xlabel("feature_1")
plt.ylabel("feature_2")
def acc(self, y_hat, y):
return list(y_hat - y).count(0) / len(y)
def train(self,):
for e in range(self._epoches):
self.network()
self.plot_y_hat(self.hx2(self._x))
self.plot_y()
self.plotCost()
运行结果
样本的预测类别
样本的真实类别
损失函数追踪
总结
从三张图不难看出逻辑回归的效果还是很不错的,大概七八步就能收敛,这也得益于iris数据的分类效果本身就比较好。