【PyTorch深度学习】利用图卷积神经网络和出租车轨迹时间序列预测交通速度(附源码)

需要源码请点赞关注收藏后评论区留言私信~~~

1:数据准备

使用的数据集为深圳市2015年1月1日至1月31日的出租车轨迹数据集。实验数据主要包括两部分,一个是156×156邻接矩阵,描述道路之间的空间关系。每行代表一条道路,矩阵中的值代表道路之间的连通性。另一个是特征矩阵,它描述了每条道路上的速度随时间的变化。每一行代表一条路,每一列是不同时间段道路上的交通速度。每15分钟汇总一次每条路上的交通速度,数据维度为2976×156,使用过去10个时间步的数据预测未来1个时间步的数据。选取80%的数据作为训练集,20%数据作为测试集,又将训练集中10%的数据作为验证集,对交通速度进行实时预测

自定义函数的参数含义见下表

构建一个将数据集划分为训练集、验证集和测试集

在PyTorch中,DataLoader是进行数据载入的部件,必须将数据载入后再进行深度学习模型的训练

 2:模型构建

构建GCN层,首先计算公式中的固定值D ̂^−1/2A ̂D ̂^−1/2

初始化GCN层时,确定每个节点的输入特征个数in_features以及输出特征个数out_features

两层GCN网络以及三层全连接网络堆叠

3:模型终止与评价

模型终止部分采用early stopping技术

使用了均方根误差RMSE,皮尔逊相关系数R2,平均绝对误差MAE,加权平均绝对百分比误差WMAPE四个指标对模型进行评价

4:模型训练及测试

在模型训练:加载数据,运用model得到预测值、loss,并返回损失值进行参数更新

在模型测试部分,首先需要利用torch.load函数将训练过程中保存的模型导入进来,然后利用model.load_state_dict将保存的参数字典加载到模型中

5:结果展示

结果展示如下 可见模型对于大部分时候的交通速度预测的十分精确,但是在部分极大值和极小值上预测的误差十分大,这也是模型改进的目标方向

 6:代码

最后 部分代码如下 需要全部代码和数据集请点赞关注收藏后评论区留言私信~~~

 项目结构如下

 

import torch
from torch.utils.data import Dataset
import numpy as np

"""
Parameter:
time_interval, time_lag, tg_in_one_day, forecast_day_number, is_train=True, is_val=False, val_rate=0.1, pre_len
"""


class Traffic_speed(Dataset):
	def __init__(self, time_interval, time_lag, tg_in_one_day, forecast_day_number, speed_data, pre_len, is_train=True, is_val=False, val_rate=0.1):
		super().__init__()
		# 此部分的作用是将数据集划分为训练集、验证集、测试集。
		# 完成后X的维度为 num*276*10,10代表10个时间步,Y的维度为 num*276*1
		# X为临近同一时段的10个时间步
		# Y为156条主干道未来1个时间步
		self.time_interval = time_interval
		self.time_lag = time_lag
		self.tg_in_one_day = tg_in_one_day
		self.forecast_day_number = forecast_day_number
		self.tg_in_one_week = self.tg_in_one_day*self.forecast_day_number
		self.speed_data = np.loadtxt(speed_data, delimiter=",").T  # 对数据进行转置
		self.max_speed = np.max(self.speed_data)
		self.min_speed = np.min(self.speed_data)
		self.is_train = is_train
		self.is_val = is_val
		self.val_rate = val_rate
		self.pre_len = pre_len

		# Normalization
		self.speed_data_norm = np.zeros((self.speed_data.shape[0], self.speed_data.shape[1]))
		for i in range(len(self.speed_data)):
			for j in range(len(self.speed_data[0])):
				self.speed_data_norm[i, j] = round((self.speed_data[i, j]-self.min_speed)/(self.max_speed-self.min_speed), 5)
		if self.is_train:
			self.start_index = self.tg_in_one_week + time_lag
			self.end_index = len(self.speed_data[0]) - self.tg_in_one_day * self.forecast_day_number - self.pre_len
		else:
			self.start_index = len(self.speed_data[0]) - self.tg_in_one_day * self.forecast_day_number
			self.end_index = len(self.speed_data[0]) - self.pre_len

		self.X = [[] for index in range(self.start_index, self.end_index)]
		self.Y = []
		self.Y_original = []
		# print(self.start_index, self.end_index)
		for index in range(self.start_index, self.end_index):
			temp = self.speed_data_norm[:, index - self.time_lag: index]  # 邻近几个时间段的进站量
			temp = temp.tolist()
			self.X[index - self.start_index] = temp
			self.Y.append(self.speed_data_norm[:, index:index + self.pre_len])
		self.X, self.Y = torch.from_numpy(np.array(self.X)), torch.from_numpy(np.array(self.Y))  # (num, 276, time_lag)

		# if val is not zero
		if self.val_rate * len(self.X) != 0:
			val_len = int(self.val_rate * len(self.X))
			train_len = len(self.X) - val_len
			if self.is_val:
				self.X = self.X[-val_len:]
				self.Y = self.Y[-val_len:]
			else:
				self.X = self.X[:train_len]
				self.Y = self.Y[:train_len]
		print("X.shape", self.X.shape, "Y.shape", self.Y.shape)

		if not self.is_train:
			for index in range(self.start_index, self.end_index):
				self.Y_original.append(self.speed_data[:, index:index + self.pre_len])  # the predicted speed before normalization
			self.Y_original = torch.from_numpy(np.array(self.Y_original))

	def get_max_min_speed(self):
		return self.max_speed, self.min_speed

	def __getitem__(self, item):
		if self.is_train:
			return self.X[item], self.Y[item]
		else:
			return self.X[item], self.Y[item], self.Y_original[item]

	def __len__(self):
		return len(self.X)
from data.datasets import Traffic_speed
from torch.utils.data import DataLoader


speed_data = "./data/sz_speed-论文数据.csv"

def get_speed_dataloader(time_interval=15, time_lag=5, tg_in_one_day=72, forecast_day_number=5, pre_len=1, batch_size=32):
	# train speed data loader
	print("train speed")
	speed_train = Traffic_speed(time_interval=time_interval, time_lag=time_lag, tg_in_one_day=tg_in_one_day, forecast_day_number=forecast_day_number,
								pre_len=pre_len, speed_data=speed_data, is_train=True, is_val=False, val_rate=0.1)
	max_speed, min_speed = speed_train.get_max_min_speed()
	speed_data_loader_train = DataLoader(speed_train, batch_size=batch_size, shuffle=False)

	# validation speed data loader
	print("val speed")
	speed_val = Traffic_speed(time_interval=time_interval, time_lag=time_lag, tg_in_one_day=tg_in_one_day, forecast_day_number=forecast_day_number,
								pre_len=pre_len, speed_data=speed_data, is_train=True, is_val=True, val_rate=0.1)
	speed_data_loader_val = DataLoader(speed_val, batch_size=batch_size, shuffle=False)

	# test speed data loader
	print("test speed")
	speed_test = Traffic_speed(time_interval=time_interval, time_lag=time_lag, tg_in_one_day=tg_in_one_day, forecast_day_number=forecast_day_number,
								pre_len=pre_len, speed_data=speed_data, is_train=False, is_val=False, val_rate=0)
	speed_data_loader_test = DataLoader(speed_test, batch_size=batch_size, shuffle=False)

	return speed_data_loader_train, speed_data_loader_val, speed_data_loader_test, max_speed, min_speed
import numpy as np
import os, time, torch
from torch import nn
from torch.utils.tensorboard import SummaryWriter
from utils.utils import GetLaplacian
from model.main_model import Model
import matplotlib.pyplot as plt
from utils.metrics import Metrics, Metrics_1d
from data.get_dataloader import get_speed_dataloader

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

epoch_num = 1000
lr = 0.001
time_interval = 15
time_lag = 10
tg_in_one_day = 72
forecast_day_number = 15
pre_len = 1
batch_size = 32
station_num = 156
# model_type = 'ours'
# TIMESTAMP = str(time.strftime("%Y_%m_%d_%H_%M_%S"))
# save_dir = './save_model/' + model_type + '_' + TIMESTAMP
# if not os.path.exists(save_dir):
# 	os.makedirs(save_dir)

speed_data_loader_train, speed_data_loader_val, speed_data_loader_test, max_speed, min_speed = \
	get_speed_dataloader(time_interval=time_interval, time_lag=time_lag, tg_in_one_day=tg_in_one_day, forecast_day_number=forecast_day_number, pre_len=pre_len, batch_size=batch_size)

# get normalized adj
adjacency = np.loadtxt('./data/sz_adj1.csv', delimiter=",")
adjacency = torch.tensor(GetLaplacian(adjacency).get_normalized_adj(station_num)).type(torch.float32).to(device)

global_start_time = time.time()
writer = SummaryWriter()

model = Model(time_lag, pre_len, station_num, device)

if torch.cuda.is_available():
	model.cuda()

model = model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
mse = torch.nn.MSELoss().to(device)

path = './save_model/ours_2021_08_25_08_40_23/model_dict_checkpoint_2929_0.00017515.pth'
checkpoint = torch.load(path)
model.load_state_dict(checkpoint, strict=True)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

# test
result = []
result_original = []
if not os.path.exists('result/prediction'):
	os.makedirs('result/prediction/')
if not os.path.exists('result/original'):
	os.makedirs('result/original')
with torch.no_grad():
	model.eval()
	test_loss = 0
	for speed_te in enumerate(speed_data_loader_test):
		i_batch, (test_speed_X, test_speed_Y, test_speed_Y_original) = speed_te
		test_speed_X, test_speed_Y = test_speed_X.type(torch.float32).to(device), test_speed_Y.type(torch.float32).to(device)
		target = model(test_speed_X, adjacency)
		loss = mse(input=test_speed_Y, target=target)
		test_loss += loss.item()

		# evaluate on original scale
		# 获取result (batch, 276, pre_len)
		clone_prediction = target.cpu().detach().numpy().copy() * max_speed  # clone(): Copy the tensor and allocate the new memory
		# print(clone_prediction.shape)  # (16, 276, 1)
		for i in range(clone_prediction.shape[0]):
			result.append(clone_prediction[i])

		# 获取result_original
		test_speed_Y_original = test_speed_Y_original.cpu().detach().numpy()
		# print(test_OD_Y_original.shape)  # (16, 276, 1)
		for i in range(test_speed_Y_original.shape[0]):
			result_original.append(test_speed_Y_original[i])

	print(np.array(result).shape, np.array(result_original).shape)  # (num, 276, 1)
	# 取整&非负取0
	result = np.array(result).astype(np.int)
	result[result < 0] = 0
	result_original = np.array(result_original).astype(np.int)
	result_original[result_original < 0] = 0

	# # 取出多个车站进行画图   # (num, 276, 1)   # (num, 276, 2)  # (num, 276, 3)
	x = [[], [], [], [], []]
	y = [[], [], [], [], []]
	for i in range(result.shape[0]):
		x[0].append(result[i][4][0])
		y[0].append(result_original[i][4][0])
		x[1].append(result[i][18][0])
		y[1].append(result_original[i][18][0])
		x[2].append(result[i][30][0])
		y[2].append(result_original[i][30][0])
		x[3].append(result[i][60][0])
		y[3].append(result_original[i][60][0])
		x[4].append(result[i][94][0])
		y[4].append(result_original[i][94][0])
	result = np.array(result).reshape(station_num, -1)
	result_original = result_original.reshape(station_num, -1)

	RMSE, R2, MAE, WMAPE = Metrics(result_original, result).evaluate_performance()

	avg_test_loss = test_loss / len(speed_data_loader_test)
	print('test Loss:', avg_test_loss)

	RMSE_y0, R2_y0, MAE_y0, WMAPE_y0 = Metrics_1d(y[0], x[0]).evaluate_performance()
	RMSE_y1, R2_y1, MAE_y1, WMAPE_y1 = Metrics_1d(y[1], x[1]).evaluate_performance()
	RMSE_y2, R2_y2, MAE_y2, WMAPE_y2 = Metrics_1d(y[2], x[2]).evaluate_performance()
	RMSE_y3, R2_y3, MAE_y3, WMAPE_y3 = Metrics_1d(y[3], x[3]).evaluate_performance()
	RMSE_y4, R2_y4, MAE_y4, WMAPE_y4 = Metrics_1d(y[4], x[4]).evaluate_performance()

# L3, = plt.plot(x[0], color="r")
# L4, = plt.plot(y[0], color="b")
# plt.legend([L3, L4], ["L3-prediction", "L4-true"], loc='best')
# plt.show()

ALL = [RMSE, MAE, WMAPE]
y0_ALL = [RMSE_y0, MAE_y0, WMAPE_y0]
y1_ALL = [RMSE_y1, MAE_y1, WMAPE_y1]
y2_ALL = [RMSE_y2, MAE_y2, WMAPE_y2]
y3_ALL = [RMSE_y3, MAE_y3, WMAPE_y3]
y4_ALL = [RMSE_y4, MAE_y4, WMAPE_y4]

np.savetxt('result/lr_' + str(lr) + '_batch_size_' + str(batch_size) + '_ALL.txt', ALL)
np.savetxt('result/lr_' + str(lr) + '_batch_size_' + str(batch_size) + '_y0_ALL.txt', y0_ALL)
np.savetxt('result/lr_' + str(lr) + '_batch_size_' + str(batch_size) + '_y1_ALL.txt', y1_ALL)
np.savetxt('result/lr_' + str(lr) + '_batch_size_' + str(batch_size) + '_y2_ALL.txt', y2_ALL)
np.savetxt('result/lr_' + str(lr) + '_batch_size_' + str(batch_size) + '_y3_ALL.txt', y3_ALL)
np.savetxt('result/lr_' + str(lr) + '_batch_size_' + str(batch_size) + '_y4_ALL.txt', y4_ALL)
np.savetxt('result/X_original.txt', x)
np.savetxt('result/Y_prediction.txt', y)

print("ALL:", ALL)
print("y0_ALL:", y0_ALL)
print("y1_ALL:", y1_ALL)
print("y2_ALL:", y2_ALL)
print("y3_ALL:", y3_ALL)
print("y4_ALL:", y4_ALL)

print("end")

x = x[1]
y = y[1]
plt.xlabel("Time granularity=15min")
plt.ylabel("Speed")
L1, = plt.plot(x, color="r")
L2, = plt.plot(y, color="y")
plt.legend([L1, L2], ["pre", "actual"], loc='best')
plt.show()

 创作不易 觉得有帮助请点赞关注收藏~~~

猜你喜欢

转载自blog.csdn.net/jiebaoshayebuhui/article/details/130448954