用tensorflow预测房价
1.使用软件版本
①Anaconda3.6.5
②PyCharm Community Edition 2021.2
③tensorflow1.10.0
2.房价影响因素文件(housing.data.csv,housing.data.test.csv)
已上传至资源,请自行下载
3.训练代码实现
①读取数据(data_manager.py)
import pandas as pd
def read_data(path):
data = pd.read_csv(path)#用panda包读取csv文件
return data
def get_X_data_Y_data(path):
All_data=pd.read_csv(path)
X_data=All_data.iloc[:,:-1]
Y_data=All_data.iloc[:,-1] #这里Y_data的类型是pandas里的类型
Y_data=Y_data.values.reshape(Y_data.shape[0],1)#把Y_data转为数组类型
return X_data,Y_data
②构建模型(model.py)
import tensorflow as tf
def inference(x,shape):
weight =tf.Variable(initial_value=tf.truncated_normal(shape=[shape,1]),name='weight')
bias = tf.Variable(initial_value=tf.truncated_normal(shape=[1]),name='bias')
y_pred=tf.matmul(x,weight)+bias #获得预测值
return y_pred
def losses(y_pred,y):
loss=tf.reduce_mean(tf.square(y_pred-y),name='loss')
return loss#获得损失值
def trainning(loss,learning_rate):
optimizer=tf.train.AdamOptimizer(learning_rate=learning_rate)#使用adam优化器优化
train_op=optimizer.minimize(loss)#定义优化损失节点
return train_op
③开始训练(train.py)
import data_manager
import tensorflow as tf
import model
import os
learning_rate=0.01 #学习率
training_epochs=10000 #总训练次数
display_step=100 #每隔100次打印数据
save_step=1000 #每隔1000次保存模型
logs_dir='logs' #保存模型的目录
def run_train():
x_data,y_data=data_manager.get_X_data_Y_data('housing.data.csv')
x_pl=tf.placeholder(tf.float32,shape=x_data.shape,name='X')
y_pl=tf.placeholder(tf.float32,shape=y_data.shape,name='Y')#定义x_data和y_data的占位符
y_pred=model.inference(x_pl,x_data.shape[1])
loss=model.losses(y_pred,y_pl)
train_op=model.trainning(loss,learning_rate)
saver=tf.train.Saver()
with tf.Session() as sess:#创建会话执行图运算
sess.run(tf.global_variables_initializer())
for epoch in range(training_epochs):
_,train_loss=sess.run([train_op,loss],feed_dict={
x_pl:x_data,y_pl:y_data})#注入数据,在没有注入数据之前,是没有训练的
if epoch%display_step==0 or (epoch+1)==training_epochs:
print("step:{},train_losses:{}".format(epoch,train_loss))#每隔100次打印训练的损失值
if epoch%save_step==0 or (epoch+1)==training_epochs:
checkpoint_path=os.path.join(logs_dir,'model.ckpt')
saver.save(sess,checkpoint_path,global_step=epoch)#每隔1000次保存模型
print('step:{},save model'.format(epoch))
def get_wb():
with tf.Session() as sess:
saver=tf.train.import_meta_graph('logs/model.ckpt-9999.meta')
saver.restore(sess,tf.train.latest_checkpoint('logs'))
w,b=sess.run(['weight:0','bias:0'])#导入模型并打印w,b
return w,b
run_train()
print(get_wb())
④运行结果
4.计算测试集中的损失值
①代码实现(valid.py)
#计算测试集中的损失值
import tensorflow as tf
import data_manager
import model
logs_dir='logs'
def run_vaild():
x_data,y_data=data_manager.get_X_data_Y_data("housing.data.test.csv")
x_pl = tf.placeholder(tf.float32,shape=x_data.shape,name='X')
y_pl = tf.placeholder(tf.float32,shape=y_data.shape,name='Y')
y_pred = model.inference(x_pl,x_data.shape[1])
valid_loss = model.losses(y_pred,y_pl)
saver=tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
ckpt = tf.train.latest_checkpoint(logs_dir)
if ckpt:
saver.restore(sess,ckpt)
print("restore from the checkpoint {0}".format(ckpt))
loss = sess.run([valid_loss], feed_dict={
x_pl: x_data, y_pl: y_data})
print('valid_losses:{}'.format(loss))
run_vaild()
②结果截图