Python 利用scipy.optimize手写最小二乘法目标函数及优化算法

本文通过最小二乘法的矩阵实现形式和for循环遍历形式分别实现最小二乘法的实现,其中参数学习过程使用BFGS算法。

    

(1)通过矩阵实现,代码如下:

import numpy as np
import scipy.optimize as opt
import matplotlib.pyplot as plt

points = []
shape = []
np.random.seed(1)

# 目标函数
def obj_fun(theta, x, y_):
    theta = theta.reshape(3, 3)
    pre_dis = np.dot(x, theta)
    loss = np.sum((pre_dis - y_)**2) / 2
    points.append(loss)
    return loss

def prime(theta, x, y_):
    theta = theta.reshape(3, 3)
    pre_dis = np.dot(x, theta)
    print(pre_dis.shape)
    gradient = np.dot(np.transpose(x), (pre_dis - y_))
    return np.ravel(gradient)   # 将二维矩阵展开成一维向量

if __name__ == "__main__":
    feature = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [1, 5, 9]])
    dis_ = np.array([[0.1, 0.3, 0.6], [0.2, 0.4, 0.4], [0.3, 0.2, 0.5], [0.1, 0.3, 0.6]])
    #init_theta = np.ones([3, 3])
    init_theta = np.random.normal(size=(3, 3))
    shape = init_theta.shape
    print(prime(init_theta, x=feature, y_=dis_))


    #result = opt.fmin_l_bfgs_b(obj_fun2, init_theta, prime3, args=(feature, dis_, shape))
    result = opt.fmin_l_bfgs_b(obj_fun, init_theta, prime, args=(feature, dis_))
    # 注意使用fmin_l_bfgs_b算法时,优化得到的参数在result[0]中,其他算法即在result中
    m_theta = np.array(result[0]).reshape(3, 3)
    print(np.dot(feature, m_theta))
    print(obj_fun(m_theta, feature, dis_))

    plt.plot(np.arange(len(points)), points)
    plt.show()

(2)通过for循环遍历实现,代码如下:

import numpy as np
import scipy.optimize as opt
import matplotlib.pyplot as plt

points = []
shape = []
np.random.seed(1)

def obj_fun(theta, x, y_, theta_shape):
    (cols, rows) = theta_shape
    theta = theta.reshape([cols, rows])
    loss = 0
    for i in range(rows):
        pre_dis = per_obj_func(theta[:, i], x, y_[:, i])
        loss += pre_dis
    points.append(loss)
    return loss

#用于一维目标函数的计算
def per_obj_func(theta, x, y_):
    loss = 0
    for i in range(len(x)):
        y = 0
        for j in range(len(theta)):
            y += theta[j] * x[i][j]
        loss += (y - y_[i])**2/2
    return loss

def prime(theta, x, y_, theta_shape):
    lst = []
    (cols, rows) = theta_shape
    theta = theta.reshape([cols, rows])
    for i in range(rows):
        per_lst = per_prime(theta[:, i], x, y_[:, i])
        lst.append(per_lst)
    np_lst = np.transpose(np.array(lst))
    return np.ravel(np_lst)

#用于一维W偏导数的计算
def per_prime(theta, x, y_):
    lst = []
    for j in range(len(theta)):
        pre_dis = 0
        for i in range(len(x)):
            y = 0
            for k in range(len(x[0])):
                y += x[i][k] * theta[k]
            pre_dis += (y - y_[i]) * x[i][j]
        lst.append(pre_dis)
    return lst

if __name__ == "__main__":
    feature = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [1, 5, 9]])
    dis_ = np.array([[0.1, 0.3, 0.6], [0.2, 0.4, 0.4], [0.3, 0.2, 0.5], [0.1, 0.3, 0.6]])
    #init_theta = np.ones([3, 3])
    init_theta = np.random.normal(size=(3, 3))
    shape = init_theta.shape

    result = opt.fmin_l_bfgs_b(obj_fun, init_theta, prime, args=(feature, dis_, shape))
    # 注意使用fmin_l_bfgs_b算法时,优化得到的参数在result[0]中,其他算法即在result中
    m_theta = np.array(result[0]).reshape(3, 3)
    print(np.dot(feature, m_theta))
    print(obj_fun(m_theta, feature, dis_, shape))
    plt.plot(np.arange(len(points)), points)
    plt.show()


程序运行结果:

可以看到,loss下降图相同,所以实现是对的。

注意:

         1.这里将shape传到函数中,是因为 opt.fmin_l_bfgs_b 函数将init_theta的参数从(3,3)变为(9,1),因此

需要在函数内部将W重组为(3,3)的形式。

        2.注意 opt.fmin_l_bfgs_b 的 args 需要在fun函数和fprime函数中都使用,所以需要统一参数格式,不能出现两个函数

的参数不一致情况。

因为本人比较菜,所以在用遍历的方式实现时,将W矩阵拆分为几个3 X 1的单列矩阵,这样分别计算,可以减少出错的概率。

参考矩阵方式文档:

https://blog.csdn.net/Ce1estial/article/details/77893760

猜你喜欢

转载自blog.csdn.net/z2539329562/article/details/89312941