前面的文章记录了深度学习挖短线股的数据预处理、模型训练、结果预测过程,本文将记录应用预测结果进行回测的步骤。
策略回测
在策略回测实现中,主要应用了backtrader的Data Feeds扩展功能 ,在笔记(25)中,记录了使用Data Feeds扩展进行单只股票回测的过程,本文则是基于深度学习预测结果,使用Data Feeds扩展进行多股回测。
预测结果正则化
笔记(43)中得到每只股票每日的预测结果,取值范围在0到1之间,值越接近于1,表明其越有在短期暴涨的可能。
经过观察发现,每只股票取值范围差异较大。例如,000705最大的预测值仅为0.35,而300483最大预测值为0.97。考虑到我们对每只个股分别建立的模型进行预测,跨股票对预测值进行直接比较的意义较小,因此在多股同时回测前,先对每只股票的预测结果进行正则化,即将每只股票的预测值拉伸到0到1的范围内。通过实际运行结果,正则化后的回测结果要高于非正则化处理。
for stk in stklist:
datapath = modpath + '/baostock/predict_results/' + stk +'res.csv'
outpath = modpath + '/baostock/normalization/' + stk + 'res.csv'
indf = pd.read_csv(datapath)
outdf = indf[['date', 'open', 'high', 'low', 'close', 'buy', 'predict_result']]
reslist = outdf['predict_result'].tolist()
delta = max(reslist) - min(reslist)
outdf['nor'] = (outdf['predict_result'] - min(reslist)) / delta
outdf.to_csv(outpath, index = False)
回测关键步骤
数据加载
我们只选择深市的股票进行回测:
for stk in stklist:
if 'sh' == stk[:2]:
continue
datapath = modpath + '/baostock/normalization/' + stk +'res.csv'
# 创建数据
data = GenericCSVDataEx(
dataname = datapath,
fromdate = datetime.datetime(2017, 12, 31),
todate = datetime.datetime(2019, 12, 31),
#todate = datetime.datetime(2018, 1, 15),
nullvalue = 0.0,
dtformat = ('%Y-%m-%d'),
datetime = 0,
open = 1,
high = 2,
low = 3,
close = 4,
volume = -1,
openinterest = -1,
predict_result = 7
)
# 在Cerebro中添加价格数据
cerebro.adddata(data, name = stk)
Data Feeds扩展
自定义GenericCSVData的子类,指明所使用字段在csv文件中的位置(哪一列):
# 扩展DataFeed
class GenericCSVDataEx(GenericCSVData):
# 添加自定义line
lines = ('predict_result', )
# openinterest在GenericCSVData中的默认索引是7,这里对自定义的line的索引加1,用户可指定
params = (('predict_result', 24),)
买单
最大持有3只股票,选择预测值最高的股票进行购买:
if len(self.hold_stocks) < HOLD_STOCK_NUMBER:
buy_dict = {
}
# 买股票,找到得分最高的N值股票
for i, d in enumerate(self.datas):
if d._name not in self.hold_stocks:
buy_dict[d] = self.datas[i].predict_result[0]
buy_dict = sorted(buy_dict.items(), key = lambda x : x[1], reverse = True)
for d in buy_dict:
if d[0]._name in self.hold_stocks:
continue
stake = int(self.broker.cash / (HOLD_STOCK_NUMBER - len(self.hold_stocks)) // (d[0].close[0] * 100)) * 100
self.hold_stocks.append(d[0]._name)
self.buy(data = d[0], size = stake)
if len(self.hold_stocks) >= HOLD_STOCK_NUMBER:
break
卖单
使用StopTrail订单进行卖出:
for stk in self.hold_stocks:
# 若未下买单
if stk not in self.orders:
print(self.getdatabyname(stk)._name)
self.orders[stk] = self.close(data = self.getdatabyname(stk),
exectype = bt.Order.StopTrail, trailamount = 0, trailpercent = self.p.trailpercent
回测结果
在深市1609只股票上进行回测,回测周期为2017年12月31日至2019年12月31日,收益率为37.2%(未统计最大回测)。
以上记录了深度学习挖短线股的多股回测过程,下一篇将记录依据该策略进行个股筛选的过程。
# 正则化
from __future__ import (absolute_import, division, print_function,
unicode_literals)
import datetime # 用于datetime对象操作
import os.path # 用于管理路径
import sys # 用于在argvTo[0]中找到脚本名称
import pandas as pd
if __name__ == '__main__':
# 先找到脚本的位置,然后根据脚本与数据的相对路径关系找到数据位置
# 这样脚本从任意地方被调用,都可以正确地访问到数据
modpath = os.path.dirname(os.path.abspath(sys.argv[0]))
stkfilepath = modpath + '/stk_data/dp_stock_list.csv'
stklist = pd.read_csv(stkfilepath)['code'].tolist()
for stk in stklist:
datapath = modpath + '/baostock/predict_results/' + stk +'res.csv'
outpath = modpath + '/baostock/normalization/' + stk + 'res.csv'
indf = pd.read_csv(datapath)
outdf = indf[['date', 'open', 'high', 'low', 'close', 'buy', 'predict_result']]
reslist = outdf['predict_result'].tolist()
delta = max(reslist) - min(reslist)
outdf['nor'] = (outdf['predict_result'] - min(reslist)) / delta
outdf.to_csv(outpath, index = False)
# 多股回测
from __future__ import (absolute_import, division, print_function,
unicode_literals)
import datetime # 用于datetime对象操作
import os.path # 用于管理路径
import sys # 用于在argvTo[0]中找到脚本名称
import backtrader as bt # 引入backtrader框架
from backtrader.feeds import GenericCSVData # 用于扩展DataFeed
import pandas as pd
HOLD_STOCK_NUMBER = 3
# 扩展DataFeed
class GenericCSVDataEx(GenericCSVData):
# 添加自定义line
lines = ('predict_result', )
# openinterest在GenericCSVData中的默认索引是7,这里对自定义的line的索引加1,用户可指定
params = (('predict_result', 24),)
# 创建策略
class DPStrategy(bt.Strategy):
params = (
('trailpercent', 0.05),
)
def log(self, txt, dt=None):
''' 策略的日志函数'''
dt = dt or self.datas[0].datetime.date(0)
print('%s, %s' % (dt.isoformat(), txt))
def __init__(self):
# 持股
self.hold_stocks = []
self.orders = {
}
def notify_order(self, order):
if order.status in [bt.Order.Completed]:
if order.isbuy():
print('{}: BUY {} EXECUTED, Price: {:.2f}'.format(self.datetime.date(), order.data._name, order.executed.price))
else: # Sell
self.orders.pop(order.data._name)
self.hold_stocks.remove(order.data._name)
print('{}: SELL {} EXECUTED, Price: {:.2f}'.format(
self.datetime.date(), order.data._name, order.executed.price))
elif order.status in [bt.Order.Rejected, bt.Order.Margin, bt.Order.Cancelled, bt.Order.Expired]:
if order.data._name in self.hold_stocks:
self.hold_stocks.remove(order.data._name)
print('{}: order {} failed!'.format(self.datetime.date(), order.data._name))
def notify_trade(self, trade):
if not trade.isclosed:
return
print('{}: TRADING {} OPERATION PROFIT, GROSS {:.2f}, NET {:.2f}'.format(
self.datetime.date(), trade.data._name, trade.pnl, trade.pnlcomm))
def next(self):
# 打印当前持仓
print(self.hold_stocks)
# 对所持股票判断是否已下StopTrail卖单
for stk in self.hold_stocks:
# 若未下买单
if stk not in self.orders:
print(self.getdatabyname(stk)._name)
self.orders[stk] = self.close(data = self.getdatabyname(stk),
exectype = bt.Order.StopTrail, trailamount = 0, trailpercent = self.p.trailpercent)
if len(self.hold_stocks) < HOLD_STOCK_NUMBER:
buy_dict = {
}
# 买股票,找到得分最高的N值股票
for i, d in enumerate(self.datas):
if d._name not in self.hold_stocks:
buy_dict[d] = self.datas[i].predict_result[0]
buy_dict = sorted(buy_dict.items(), key = lambda x : x[1], reverse = True)
for d in buy_dict:
if d[0]._name in self.hold_stocks:
continue
stake = int(self.broker.cash / (HOLD_STOCK_NUMBER - len(self.hold_stocks)) // (d[0].close[0] * 100)) * 100
self.hold_stocks.append(d[0]._name)
self.buy(data = d[0], size = stake)
if len(self.hold_stocks) >= HOLD_STOCK_NUMBER:
break
if __name__ == '__main__':
# 先找到脚本的位置,然后根据脚本与数据的相对路径关系找到数据位置
# 这样脚本从任意地方被调用,都可以正确地访问到数据
modpath = os.path.dirname(os.path.abspath(sys.argv[0]))
stkfilepath = modpath + '/stk_data/dp_stock_list.csv'
stklist = pd.read_csv(stkfilepath)['code'].tolist()
# 创建cerebro实体
cerebro = bt.Cerebro()
# 添加策略
cerebro.addstrategy(DPStrategy)
for stk in stklist:
if 'sh' == stk[:2]:
continue
#datapath = modpath + '/baostock/predict_results/' + stk +'res.csv'
datapath = modpath + '/baostock/normalization/' + stk +'res.csv'
# 创建数据
data = GenericCSVDataEx(
dataname = datapath,
fromdate = datetime.datetime(2017, 12, 31),
todate = datetime.datetime(2019, 12, 31),
#todate = datetime.datetime(2018, 1, 15),
nullvalue = 0.0,
dtformat = ('%Y-%m-%d'),
datetime = 0,
open = 1,
high = 2,
low = 3,
close = 4,
volume = -1,
openinterest = -1,
predict_result = 7
)
# 在Cerebro中添加价格数据
cerebro.adddata(data, name = stk)
# 设置启动资金
cerebro.broker.setcash(100000.0)
# 设置佣金为千分之一
cerebro.broker.setcommission(commission=0.001)
# 打印开始信息
print('Starting Portfolio Value: %.2f' % cerebro.broker.getvalue())
# 遍历所有数据
cerebro.run()
# 打印最后结果
print('Final Portfolio Value: %.2f' % cerebro.broker.getvalue())
欢迎大家关注、点赞、转发、留言,感谢支持!
为了便于相互交流学习,已建微信群,感兴趣的读者请加微信。
近期新建QQ群:676186743,方便资料共享,定期发布学习任务,欢迎加入!