loss函数的定义:
为防止loss求和后过大,通常求和后除以样本数量,即取平均值
优化过程中新w,b的计算:
lr为学习率
为防止梯度过大,一般求和后会除以样本数量,即取平均值
代码:
import numpy as np
# 计算均方误差(定义loss=(w * x_i + b - y_i)**2 | 遍历每一个[i]求和)
def points_avg2_error(b,w,points):
totalError = 0
for i in range(len(points)):
x = points[i, 0]
y = points[i, 1]
totalError += (y - (w * x + b)) ** 2
return totalError / len(points)
# 计算每一步的梯度
def step_graident(b,w,points,learning_rate):
# 梯度
b_gradient = 0
w_gradient = 0
# 样本数量
N = len(points)
# 定义loss=(w * x_i + b - y_i)**2 | 遍历每一个[i]求和
# 偏loss/b-w求出来的导数都是求和的形式
for i in range(N):
x = points[i, 0]
y = points[i, 1]
# 偏loss/偏b ==> b梯度的表示
b_gradient += (2/N)*((w * x + b) - y)
# 偏loss/偏w ==> w梯度的表示
w_gradient += (2/N)* x * ((w * x + b) - y)
# 沿着梯度的反方向优化,使其达到局部最优点
b = b - (learning_rate * b_gradient)
w = w - (learning_rate * w_gradient)
return b,w
# 梯度下降(优化w,b,使得loss最小)
def gradient_descent_runner(points,b,w,learning_rate,num_iterations):
for i in range(num_iterations):
b,w = step_graident(b,w,np.array(points),learning_rate)
return b,w
def run():
# 读取数据
points = np.genfromtxt("data.csv",delimiter=",")
# 学习率
learning_rate = 0.0001
# 初始化参数b,w
init_b = 0
init_w = 0
# 学习次数
num_iterations = 1000
print("梯度下降开始前 b = {0}, w = {1}, error(均方误差) = {2}".format(init_b,init_w,points_avg2_error(init_b,init_w,points)))
print("Running....")
# 梯度下降优化后的b,w
b,w = gradient_descent_runner(points,init_b,init_w,learning_rate,num_iterations)
print("梯度下降开始后 b = {0}, w = {1}, error(均方误差) = {2}".format(b, w, points_avg2_error(b, w, points)))
if __name__=="__main__":
run()