优化一下SIMILAR

# -*- coding:utf-8 -*-

import pandas as pd
import pymysql
pymysql.install_as_MySQLdb()
from sqlalchemy import create_engine

# conn = create_engine('mysql+mysqldb://root:password@localhost:3306/databasename?charset=utf8')  

RAWTS = './h5data/SHSZ_RAWTS_HIS.h5'


def df_col_max(c1, c2):
    if c1 > c2:
        return c1
    else:
        return c2


def df_col_min(c1, c2):
    if c1 < c2:
        return c1
    else:
        return c2


class DB(object):

    def __init__(self, user="root", pswd="10111011", host="localhost", db="jswk"):
        self.user = user
        self.pswd = pswd
        self.host = host
        self.db = db
        self.dbcon = None
        self.con = None
        self.cur = None
        self.dFlag = False
        self.eFlag = False

    def __del__(self):
        if self.con:
            self.con = None

    def set_db_pars(self, user="root", pswd="10111011", host="127.0.0.1", db="jswk"):
        self.user = user
        self.pswd = pswd
        self.host = host
        self.db = db

    def _create_engine_(self):
        cnnstr = 'mysql+mysqldb://{u}:{p}@{h}:3306/{db}?charset=utf8'.format(u=self.user, p=self.pswd, h=self.host, db=self.db)
        try:
            self.dbcon = create_engine(cnnstr)
        except Exception as _:
            print('Create engine to [', self.host, '] failed! ')
            print('ErrorCode: ', _)
        else:
            print('Created!')
            self.eFlag = True

    def _db_con_(self):
        try:
            self.con = pymysql.connect(database=self.db,
                                       user=self.user,
                                       password=self.pswd,
                                       host=self.host,
                                       charset="utf8")
        except Exception as _:
            print('Connecting to MySql [', self.host, '] failed! ')
            print('ErrorCode: ', _)
        else:
            print('Connected')
            self.dFlag = True


class HqReader(DB):
    def __init__(self,
                 user="root",
                 pswd="10111011",
                 host="localhost",
                 db="jswk",
                 fn='.',
                 fieldnames=['open', 'high', 'low', 'close']):
        super(HqReader, self).__init__(user, pswd, host, db)
        self.__fn = fn
        self.__fieldNames = fieldnames
        self.tb = 'shape_moder_k{k}f{f}'
        self.fromdt = None
        self.todt = None
        self.codelist = []
        self.droplist = ['_r_cpf' + str(x) + '_' + str(y) for x in [1, 2, 3] for y in [1, 2, 3, 4, 5, 6]]

    def set_pars(self, fromdt, todt):
        self.fromdt = fromdt
        self.todt = todt

    def get_all_codes(self):
        df = pd.read_sql(sql='select Fsecode from t_stock_pinyin order by Fsecode',
                         con=self.con)
        self.codelist = [code for code in df['Fsecode'].get_values()]

    def get_df_from_h5(self, secode):
        df = pd.read_hdf(path_or_buf=self.__fn,
                         key=secode,
                         # columns=self.__fieldNames,
                         where=['index>=Timestamp("%s")' % self.fromdt, 'index<=Timestamp("%s")' % self.todt])
        if df.empty:
            return pd.DataFrame()
        else:
            return df

    def get_df_from_db(self, secode):
        isql = "select Ftdate as tdate,Fop,Fhp,Flp,Fcp,Fpcp,Fchg,Fchgpct,Fvol \
                from t_his_hq where Fsecode='{secode}' and Ftdate>='{st}' \
                and Fvol>0"
        df = pd.read_sql(sql=isql.format(secode=secode, st=self.fromdt), con=self.con)
        if df.empty:
            return pd.DataFrame()
        else:
            return df

    def get_k_mode(self, secode, k=7, f=3):
        df = self.get_df_from_db(secode)
        if not df.empty:
            df = df.reset_index()
            df.sort_values(by='tdate', ascending=False, inplace=True)
            df['Fchg'] = df['Fcp'].shift(-1)
            df['Fchgpct'] = df['Fpcp'] / df['Fchg']
            df['Fchgpct'] = df['Fchgpct'].shift(1)
            df['Fchgpct'] = df['Fchgpct'].fillna(1.00)
            df['Fq'] = df['Fchgpct'].cumprod()
            for c in ['Fop', 'Fhp', 'Flp', 'Fcp']:
                df[c] = df[c] * df['Fq']
            df['Fvol'] = df['Fvol'] / df['Fq']
            df['Fvols'] = df['Fvol'].shift(-1)
            for fi in range(f):
                df['_r_cpf' + str(fi + 1)] = df['Fcp'].shift(fi + 1) / df['Fcp'] - 1.0
            for c in ['Fop', 'Fhp', 'Flp', 'Fcp']:
                df['_r_' + c] = df[c] / df['Fpcp'] - 1.0
            df['_r_' + 'Fvol'] = df['Fvol'] / df['Fvols'] - 1.0
            df['_r_maxco'] = df[['_r_Fcp', '_r_Fop']].apply(lambda row: df_col_max(row['_r_Fcp'], row['_r_Fop']), axis=1)
            df['_r_minco'] = df[['_r_Fcp', '_r_Fop']].apply(lambda row: df_col_min(row['_r_Fcp'], row['_r_Fop']), axis=1)
            usecol = ['_r_' + c for c in ['Fop', 'Fhp', 'Flp', 'Fcp', 'Fvol', 'maxco', 'minco', 'cpf1', 'cpf2', 'cpf3']]
            usecol.insert(0, 'tdate')
            df = df.loc[:, usecol].dropna()
            # print(df)
            nrow, _ = df.shape
            print(nrow)
            dfs = pd.DataFrame()
            for nr in range(1, nrow - k + 1):
                dfm = df.loc[range(nr, nr + k), :].copy()
                dfm['mkdate'] = max(dfm['tdate'].values)
                # print(dfm)
                dfm['tdate'] = [str(i) for i in range(1, k + 1)]
                dfm.set_index(keys=['mkdate', 'tdate'], inplace=True)
                dfm = dfm.unstack()
                dfm.columns = ['_'.join(col) for col in dfm.columns]
                dfm.drop(labels=self.droplist, axis=1, inplace=True)
                dfs = pd.concat(objs=[dfs, dfm], axis=0)
                dfs = dfs.round(4)
            # print(dfs)
            dfs['secode'] = secode
            se = dfs['secode']
            dfs.drop(labels=['secode'], axis=1, inplace=True)
            dfs.insert(0, 'secode', se)
            # print(dfs.head())
            try:
                dfs.to_sql(name=self.tb.format(k=k, f=f), con=self.dbcon, if_exists='append')
            except Exception as _:
                print('Error', _)
            else:
                print('Inserted to db', secode)

        return True

    def run_all(self):
        if not self.fromdt:
            print('Must set from date!')
            return
        self._create_engine_()
        self._db_con_()
        self.get_all_codes()
        for code in self.codelist[:10]:
            self.get_k_mode(code)


def main():
    hdr = HqReader(fn=RAWTS)
    hdr.set_pars(fromdt='2017-10-01', todt='2018-05-10')
    hdr.run_all()


if __name__ == '__main__':
    main()

猜你喜欢

转载自blog.csdn.net/sphinxrascal168/article/details/80135384