回归问题(附篇1):当目标函数为一元一次函数,即其最小二乘的损失函数为二元二次函数时,在python中采用全量梯度下降、随机梯度下降、批量梯度下降求解损失函数。

版权声明:本文为作者原创,如需转载,请征得博主同意,谢谢! https://blog.csdn.net/qq_22828175/article/details/83380254
import tensorflow as tf
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import pyplot as plt
from datetime import datetime
import random

t0 = datetime.now()
x_data = np.random.randn(int(1.0e2)).astype(np.float32)
y_data = x_data * 0.3 + 0.15  # 目标函数为一元一次,则其最小二乘的loss函数二元二次

# 在损失函数loss1中,小写字母代表常数,大写字母代表自变量和因变量。
# x_data的元素越多,系数的绝对值越大,图像斜率越大;反之斜率越小。
a = np.sum(x_data**2);  b = float(len(x_data))
c = 2*np.sum(x_data); d = -2*np.sum(x_data*y_data)
e = -2*np.sum(y_data); f = np.sum(y_data**2)
# 对于二元二次函数,若二次型的系数矩阵正定,则碗朝上,函数有最小值;
# 若二次型的系数矩阵负定,则碗朝下,函数有最大值;若不定,则其图像为马鞍型。

# 对损失函数loss1作图
fig = plt.figure()
ax = Axes3D(fig)
W = np.arange(-100, 100+0.1, 1)
B = np.arange(-100, 100+0.1, 1)
W, B = np.meshgrid(W, B)  # 网格的创建,这个是关键; W在前,是x坐标,B在后,是y坐标。
loss1 = a*W**2 + b*B**2 + c*W*B + d*W + e*B + f
plt.xlabel('W')
plt.ylabel('B')
ax.plot_surface(W, B, loss1, rstride=1, cstride=1, cmap='rainbow')
plt.show()

# 占位符没有指定维度,则根据实际传入的数据维度自动调整其维度
x_ph = tf.placeholder(tf.float32)
y_ph = tf.placeholder(tf.float32)

# 设置损失函数自变量的初始点,本例中最小值点位于(0.3,0.15)
weight_initial = 1.0e4
bias_initial = 1.0e4
weight = tf.Variable(weight_initial)
bias = tf.Variable(bias_initial)
y_model = weight * x_ph + bias

loss2 = tf.reduce_sum((y_model - y_ph)**2)
loss2_mean = loss2/len(x_data)
learning_rate = 1e-1  # learning_rate的设置至关重要,太小了收敛太慢,太大了无法收敛。
train_op = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(loss2)

sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)

print('初始点是:({},{})'.format(weight_initial, bias_initial))
step = 0  # 设置初始的迭代次数
loop = 0  # 设置初始的循环轮数
threshold = 1e-5  # 设置允许误差的阈值
algorithm = 3  # 1代表全量梯度下降,2代表逐样本随机梯度下降,3代表批量(随机)梯度下降
mini_batch_size = 5  # 设置每一批迭代的样本数量,要能够被len(x_data)整除,否则后面循环时会报错
flag = True
while flag:
    if algorithm == 1:
        # 将已知数据x_data和y_data一次性传入,为全量梯度下降,learning_rate应选小一些的数值
        sess.run(train_op, feed_dict={x_ph: x_data, y_ph: y_data})
        step += 1
        print('第%s次迭代,W is %.2f, B is %.2f' % (step, weight.eval(sess), bias.eval(sess)))
    elif algorithm == 2:
        # 将已知数据x_data和y_data打乱后每次依次传入一对元素,为逐样本随机梯度下降,learning_rate应选大一些的数值
        random.shuffle(x_data)
        y_data = x_data * 0.3 + 0.15  # 每次shuffle之后应当再定义一次目标函数,否则x_data和y_data将不能对应
        # print('x_data are:', x_data)
        # print('y_data are:', y_data)
        for (x, y) in zip(x_data, y_data):
            sess.run(train_op, feed_dict={x_ph: x, y_ph: y})
            step += 1
            print('第%s次迭代,W is %.2f, B is %.2f' % (step, weight.eval(sess), bias.eval(sess)))
        loop += 1
        print('完成第%s轮循环' % loop)
    elif algorithm == 3:
        # 将已知数据x_data和y_data打乱后每次依次传入一批数据对,为小批量(随机)梯度下降,learning_rate应选适中的数值
        random.shuffle(x_data)
        y_data = x_data * 0.3 + 0.15  # 每次shuffle之后应当再定义一次目标函数,否则x_data和y_data将不能对应
        for i in range(0, len(x_data), mini_batch_size):
            x_mini_batch = []
            y_mini_batch = []
            for j in range(i, i + mini_batch_size, 1):
                x_mini_batch.append(x_data[j])
                y_mini_batch.append(y_data[j])
            sess.run(train_op, feed_dict={x_ph: x_mini_batch, y_ph: y_mini_batch})
            step += 1
            print('第%s次迭代,W is %.2f, B is %.2f' % (step, weight.eval(sess), bias.eval(sess)))
        loop += 1
        print('完成第%s轮循环' % loop)
    if sess.run(loss2_mean, feed_dict={x_ph: x_data, y_ph: y_data}) <= threshold:
        print('满足精度要求')
        break  # 跳出break所在循环,即while循环
    if abs(weight.eval(sess)) > weight_initial*2 or abs(bias.eval(sess)) > bias_initial*2:
        print('learning_rate选得过大')
        break

plt.plot(x_data, y_data, 'ro', label='Original data')
plt.plot(x_data, sess.run(weight) * x_data + sess.run(bias), label='Fitted line')
plt.legend()
plt.show()

t1 = datetime.now()
print('耗时:', t1-t0)

猜你喜欢

转载自blog.csdn.net/qq_22828175/article/details/83380254