[机器学习]线性回归预测14年房价（含python代码动态展示梯度下降过程）

数据集如下图所示：
在这里插入图片描述
原先Year是2000——2013的，但是由于Year和Price数量级相差过大，导致梯度下降不能很快收敛，所以处理一下。
代码首先会展示两张动态图，展示梯度下降代价函数的下降过程以及直线的拟合过程。
拟合完毕（收敛后）会显示闭式解法拟合出的直线。
同时，两种方法都有展示预测的2014年房价。

python代码：

import csv
from numpy import *
import matplotlib.pyplot as plt
from matplotlib.pyplot import MultipleLocator

def loadDataSet():
    csv_reader=csv.reader(open('data.csv',encoding='utf-8-sig'))
    data = []   ## 一个特征
    label = []
    i = 0
    for row in csv_reader:
        if i > 0:   ## 去除表头
            data.append([1.0, float(row[0])])
            label.append(float(row[1]))
        i = i + 1
    return data, label

def closeFormSolution(data, label):
    dataMat = mat(data)
    labelMat = mat(label).transpose()
    theta = linalg.inv(dataMat.transpose() * dataMat) * dataMat.transpose() * labelMat
    # theta = theta.tolist()
    return theta

def gradientDescent(data, label):
    dataMat = mat(data)
    labelMat = mat(label).transpose()
    n,m = shape(dataMat)    ## n样本 m特征
    theta = ones((m,1))    ##初始值
    alpha = 0.0001    ##学习率
    maxCycle = 10000
    epsilon = 0.0002
    error = dataMat * theta - labelMat
    precost = 1 / 2 * error.transpose() * error
    # 画图部分
    plt.ion()
    xs = [0, 0]
    ys= [0, precost[0, 0]]
    # 画图部分
    for k in range(maxCycle):
        theta = theta - alpha * (dataMat.transpose() * error)
        error = dataMat * theta - labelMat
        cost = 1/2 * error.transpose() * error

        xs[0] = xs[1]
        ys[0] = ys[1]
        xs[1] = k
        ys[1] = cost[0, 0]
        plt.figure(1)
        plt.title('costFunction', fontsize=14)
        plt.xlabel('num of iterations', fontsize=8)
        plt.ylabel('cost', fontsize=8)
        plt.plot(xs, ys, color = 'red')
        plt.figure(2)
        plotRegression(data, label, theta, 'gradientDescent')

        plt.pause(0.1)

        if abs(precost - cost) < epsilon:   # cost变化已不大，收敛
            break
        precost = cost

    return theta

def plotRegression(data, label, theta, title):
    plt.clf()
    x = arange(0, 20)
    y = theta[0] + theta[1] * x
    x = x.tolist()
    y = y.transpose().tolist()
    ax = plt.subplot()
    plt.title(title , fontsize=14)
    plt.xlabel('year(+2000)', fontsize=8)
    plt.xticks(x, (2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019))
    plt.ylabel('price', fontsize=8)
    x_major_locator = MultipleLocator(1)
    y_major_locator = MultipleLocator(2)
    ax.xaxis.set_major_locator(x_major_locator)
    ax.yaxis.set_major_locator(y_major_locator)
    plt.xlim(0, 16)
    plt.ylim(0, 16)
    plt.plot([data[i][1] for i in range(0, 14)], label, "ob")
    plt.plot(14, y[14][0], 'om')
    plt.text(14, y[14][0] + 0.8, '2014_prediction', color='b', fontsize=10)
    plt.text(14, y[14][0], '%.2f'%y[14][0], color = 'b', fontsize=10)
    plt.plot(x, y, color='red')


def main():
    data, label = loadDataSet()
    theta = gradientDescent(data, label)
    print("梯度下降theta")
    print(theta)

    plt.ioff()

    plt.figure(3)
    theta1 = closeFormSolution(data, label)
    plotRegression(data, label, theta1, 'closeFormSolution')
    print("闭式解theta")
    print(theta1)
    plt.show()


if __name__=='__main__':
    main()

猜你喜欢

目录

热门文章