python 101_formulaic_1-40_factor

101 Formulaic Alpha 前四十个全Python代码写的因子

# -*- coding: utf-8 -*-
"""
Created on Tue Aug  6 19:23:12 2019

@author: win10
"""

# Alpha#1: (rank(Ts_ArgMax(SignedPower(((returns < 0) ? stddev(returns, 20) : close), 2.), 5))-0.5)
factor = 'Alpha#1'
print(factor)


def Alpha01_fun(data):
    condition = data['rtn'] < 0
    data.ix[condition, 'part1'] = pd.rolling_std(data['rtn'], 20)[condition]
    data.ix[~condition, 'part1'] = data['adj_close'][~condition]
    data['part1'] = data['part1'] ** 2
    data['part2'] = data['part1'].rolling(window=5, min_periods=5).apply(lambda x: x.argmax())

    return data


data_factor1 = data_clean.groupby('code').apply(Alpha01_fun)

data_factor = pd.DataFrame()
data_factor[factor] = data_factor1.groupby('date').apply(lambda x: x['part2'].rank(pct=True) - 0.5)
data_factor['code'] = data_factor1.groupby('date').apply(lambda x: x['code'])
data_factor.reset_index(inplace=True)
del data_factor1

data_factor = pd.merge(data_factor, data_clean, on=['date', 'code'])
data_factor = data_factor[
    (data_factor[factor].notnull()) & (data_factor[factor] != np.inf) & (data_factor[factor] != -np.inf)]
data_factor[factor] *= -1

barra_list = risk_corr_plot(data_factor, factor)
print(factor, barra_list)
data_factor = data_factor.groupby('date').apply(pure_factor, factor)
data_factor.index = range(len(data_factor))
ana_df = t_capital(factor, data_factor, zz500, asc=True, window=1)
print(ana_df)

# Alpha#2:(-1 * correlation(rank(delta(log(volume), 2)), rank(((close - open) / open)), 6))
factor = 'Alpha#2'
print(factor)


def Alpha02_fun(data):
    data['log_volume'] = np.log(data['adj_volume'])
    data['part1'] = data['log_volume'].diff(1)
    del data['log_volume']

    return data


data_factor1 = data_clean.groupby('code').apply(Alpha02_fun)

data_factor2 = pd.DataFrame()
data_factor2['_part1'] = data_factor1.groupby('date').apply(lambda x: x['part1'].rank(pct=True))
data_factor2['_part2'] = data_factor1.groupby('date').apply(
    lambda x: ((x['adj_close'] - x['adj_open']) / x['adj_open']).rank(pct=True))
data_factor2['code'] = data_factor1.groupby('date').apply(lambda x: x['code'])
data_factor2.reset_index(inplace=True)
del data_factor1

data_factor = pd.DataFrame()
data_factor[factor] = data_factor2.groupby('code').apply(lambda x: pd.rolling_corr(x['_part1'], x['_part2'], 6))
data_factor['date'] = data_factor2.groupby('code').apply(lambda x: x['date'])
data_factor.reset_index(inplace=True)
del data_factor2

data_factor = pd.merge(data_factor, data_clean, on=['date', 'code'])
data_factor = data_factor[
    (data_factor[factor].notnull()) & (data_factor[factor] != np.inf) & (data_factor[factor] != -np.inf)]
data_factor[factor] *= -1
data_factor = data_factor.groupby('date').apply(pure_factor, factor)

data_factor.index = range(len(data_factor))
ana_df = t_capital(factor, data_factor, zz500, asc=True, window=1)
print(ana_df)

# Alpha#3: (-1 * correlation(rank(open), rank(volume), 10))
factor = 'Alpha#3'
print(factor)

data_factor2 = pd.DataFrame()
data_factor2['_part1'] = data_clean.groupby('date').apply(lambda x: x['adj_open'].rank(pct=True))
data_factor2['_part2'] = data_clean.groupby('date').apply(lambda x: x['adj_volume'].rank(pct=True))
data_factor2['code'] = data_clean.groupby('date').apply(lambda x: x['code'])
data_factor2.reset_index(inplace=True)

data_factor = pd.DataFrame()
data_factor[factor] = data_factor2.groupby('code').apply(lambda x: pd.rolling_corr(x['_part1'], x['_part2'], 10))
data_factor['date'] = data_factor2.groupby('code').apply(lambda x: x['date'])
data_factor.reset_index(inplace=True)
del data_factor2

data_factor = pd.merge(data_factor, data_clean, on=['date', 'code'])
data_factor = data_factor[
    (data_factor[factor].notnull()) & (data_factor[factor] != np.inf) & (data_factor[factor] != -np.inf)]
data_factor[factor] *= -1

data_factor = data_factor.groupby('date').apply(pure_factor, factor)

data_factor.index = range(len(data_factor))
ana_df = t_capital(factor, data_factor, zz500, asc=True, window=1)
print(ana_df)

# Alpha#4: (-1 * Ts_Rank(rank(low), 9))
factor = 'Alpha#4'
print(factor)

data_factor2 = pd.DataFrame()
data_factor2['part1'] = data_clean.groupby('date').apply(lambda x: x['adj_low'].rank(pct=True))
data_factor2['code'] = data_clean.groupby('date').apply(lambda x: x['code'])
data_factor2.reset_index(inplace=True)

data_factor = pd.DataFrame()
data_factor[factor] = data_factor2.groupby('code').apply(
    lambda x: x['part1'].rolling(window=9).apply(lambda x: st.rankdata(x)[-1] / 9.0))
data_factor['date'] = data_factor2.groupby('code').apply(lambda x: x['date'])
data_factor.reset_index(inplace=True)
del data_factor2

data_factor = pd.merge(data_factor, data_clean, on=['date', 'code'])
data_factor = data_factor[
    (data_factor[factor].notnull()) & (data_factor[factor] != np.inf) & (data_factor[factor] != -np.inf)]
data_factor[factor] *= -1
data_factor = data_factor.groupby('date').apply(pure_factor, factor)

data_factor.index = range(len(data_factor))
ana_df = t_capital(factor, data_factor, zz500, asc=True, window=1)
print(ana_df)

# Alpha#5:(rank((open - (sum(vwap, 10) / 10))) * (-1 * abs(rank((close - vwap)))))
factor = 'Alpha#5'
print(factor)


def Alpha05_fun(data):
    data['mean_vwap'] = pd.rolling_mean(data['adj_vwap'], 10)

    return data


data_factor1 = data_clean.groupby('code').apply(Alpha05_fun)

data_factor = pd.DataFrame()
data_factor['_part1'] = data_factor1.groupby('date').apply(lambda x: (x['adj_open'] - x['mean_vwap']).rank(pct=True))
data_factor['_part2'] = data_factor1.groupby('date').apply(lambda x: (x['adj_close'] - x['adj_vwap']).rank(pct=True))
data_factor['code'] = data_factor1.groupby('date').apply(lambda x: x['code'])
data_factor.reset_index(inplace=True)
del data_factor1

data_factor[factor] = data_factor['_part1'] * data_factor['_part2']

data_factor = pd.merge(data_factor, data_clean, on=['date', 'code'])
data_factor = data_factor[
    (data_factor[factor].notnull()) & (data_factor[factor] != np.inf) & (data_factor[factor] != -np.inf)]
data_factor[factor] *= -1
data_factor = data_factor.groupby('date').apply(pure_factor, factor)

data_factor.index = range(len(data_factor))
ana_df = t_capital(factor, data_factor, zz500, asc=True, window=1)
print(ana_df)

# -*- coding: utf-8 -*-
"""
Created on Mon Aug 12 13:22:47 2019

@author: win10
"""

# Alpha#6: (-1 * correlation(open, volume, 10))
factor = 'Alpha#6'
print(factor)

data_factor = pd.DataFrame()
data_factor[factor] = data_clean.groupby('date').apply(lambda x: pd.rolling_corr(x['adj_open'], x['adj_volume'], 10))
data_factor['code'] = data_clean.groupby('date').apply(lambda x: x['code'])
data_factor.reset_index(inplace=True)

# Alpha#7: ((adv20 < volume) ? ((-1 * ts_rank(abs(delta(close, 7)), 60)) * sign(delta(close, 7))) : (-1* 1))
factor = 'Alpha#7'
print(factor)


def Alpha07_fun(data):
    data['mean_volume'] = pd.rolling_mean(data['adj_volume'], 20)
    data['diff7'] = data['adj_close'].diff(7)
    data['ts_rank'] = (abs(data['diff7'])).rolling(window=60).apply(lambda x: st.rankdata(x)[-1])
    condition = data['mean_volume'] < data['adj_volume']
    data.ix[condition, factor] = data['ts_rank'][condition] * np.sign(data['diff7'])[condition]
    data[factor] = data[factor].fillna(1)

    return data


data_factor = data_clean.groupby('code').apply(Alpha07_fun)

# Alpha#8: (-1 * rank(((sum(open, 5) * sum(returns, 5)) - delay((sum(open, 5) * sum(returns, 5)),10))))
factor = 'Alpha#8'
print(factor)


def Alpha08_fun(data):
    data['sum_open_rtn'] = pd.rolling_sum(data['adj_open'], 5) * pd.rolling_sum(data['rtn'], 5)
    data['part1'] = data['sum_open_rtn'].diff(10)

    return data


data_factor1 = data_clean.groupby('code').apply(Alpha08_fun)

data_factor = pd.DataFrame()
data_factor[factor] = data_factor1.groupby('date').apply(lambda x: (x['part1']).rank(pct=True))
data_factor['code'] = data_factor1.groupby('date').apply(lambda x: x['code'])
data_factor.reset_index(inplace=True)
del data_factor1

# Alpha#9: ((0 < ts_min(delta(close, 1), 5)) ? delta(close, 1) : ((ts_max(delta(close, 1), 5) < 0) ?delta(close, 1) : (-1 * delta(close, 1))))
factor = 'Alpha#9'
print(factor)


def Alpha09_fun(data):
    data['delta_close'] = data['adj_close'].diff(1)
    data['ts_min'] = pd.rolling_min(data['delta_close'], 5)
    data['ts_max'] = pd.rolling_max(data['delta_close'], 5)
    condition1 = data['ts_min'] > 0
    condition2 = data['ts_max'] < 0
    data.ix[condition1, factor] = data['delta_close'][condition1]
    data.ix[~condition1 & condition2, factor] = data['delta_close'][~condition1 & condition2]
    data.ix[~condition1 & (~condition2), factor] = data['delta_close'][~condition1 & (~condition2)]

    return data


data_factor = data_clean.groupby('code').apply(Alpha09_fun)

# Alpha#10: rank(((0 < ts_min(delta(close, 1), 4)) ? delta(close, 1) : ((ts_max(delta(close, 1), 4) < 0)? delta(close, 1) : (-1 * delta(close, 1)))))
factor = 'Alpha#10'
print(factor)


def Alpha10_fun(data):
    data['delta_close'] = data['adj_close'].diff(1)
    data['ts_min'] = pd.rolling_min(data['delta_close'], 4)
    data['ts_max'] = pd.rolling_max(data['delta_close'], 4)
    condition1 = data['ts_min'] > 0
    condition2 = data['ts_max'] < 0
    data.ix[condition1, factor] = data['delta_close'][condition1]
    data.ix[~condition1 & condition2, factor] = data['delta_close'][~condition1 & condition2]
    data.ix[~condition1 & (~condition2), factor] = data['delta_close'][~condition1 & (~condition2)]

    return data


data_factor = data_clean.groupby('code').apply(Alpha10_fun)

# Alpha#11: ((rank(ts_max((vwap - close), 3)) + rank(ts_min((vwap - close), 3))) *rank(delta(volume, 3)))
factor = 'Alpha#11'
print(factor)


def Alpha11_fun(data):
    data['part1'] = pd.rolling_max(data['adj_vwap'] - data['adj_close'], 3)
    data['part2'] = pd.rolling_min(data['adj_vwap'] - data['adj_close'], 3)
    data['part3'] = data['adj_volume'].diff(3)

    return data


data_factor1 = data_clean.groupby('code').apply(Alpha11_fun)

data_factor = pd.DataFrame()
data_factor[factor] = data_factor1.groupby('date').apply(
    lambda x: (x['part1'].rank(pct=True) + x['part2'].rank(pct=True)) * x['part3'].rank(pct=True))
data_factor['code'] = data_factor1.groupby('date').apply(lambda x: x['code'])
data_factor.reset_index(inplace=True)
del data_factor1

# Alpha#12: (sign(delta(volume, 1)) * (-1 * delta(close, 1)))
factor = 'Alpha#12'
print(factor)


def Alpha12_fun(data):
    data['delta_volume'] = data['adj_volume'].diff(1)
    data['delta_close'] = data['adj_close'].diff(1)
    data[factor] = np.sign(data['delta_volume']) * data['delta_close']

    return data


data_factor = data_clean.groupby('code').apply(Alpha12_fun)

# Alpha#13: (-1 * rank(covariance(rank(close), rank(volume), 5)))
factor = 'Alpha#13'
print(factor)

data_factor1 = pd.DataFrame()
data_factor1['part1'] = data_clean.groupby('date').apply(lambda x: (x['adj_close']).rank(pct=True))
data_factor1['part2'] = data_clean.groupby('date').apply(lambda x: (x['adj_volume']).rank(pct=True))
data_factor1['code'] = data_clean.groupby('date').apply(lambda x: x['code'])
data_factor1.reset_index(inplace=True)

data_factor2 = pd.DataFrame()
data_factor2[factor] = data_factor1.groupby('code').apply(lambda x: pd.rolling_cov(x['part1'], x['part2'], 5))
data_factor2['date'] = data_factor1.groupby('code').apply(lambda x: x['date'])
data_factor2.reset_index(inplace=True)
del data_factor1

data_factor = pd.DataFrame()
data_factor[factor] = data_factor2.groupby('date').apply(lambda x: (x[factor]).rank(pct=True))
data_factor['code'] = data_factor2.groupby('date').apply(lambda x: x['code'])
data_factor.reset_index(inplace=True)
del data_factor2

# Alpha#14: ((-1 * rank(delta(returns, 3))) * correlation(open, volume, 10))
factor = 'Alpha#14'
print(factor)

data_factor1 = pd.DataFrame()
data_factor1['part1'] = data_clean.groupby('code').apply(lambda x: pd.rolling_corr(x['adj_open'], x['adj_volume'], 10))
data_factor1['part2'] = data_clean.groupby('code').apply(lambda x: x['rtn'].diff(3))
data_factor1['date'] = data_clean.groupby('code').apply(lambda x: x['date'])
data_factor1.reset_index(inplace=True)

data_factor = pd.DataFrame()
data_factor[factor] = data_factor1.groupby('date').apply(lambda x: (x['part2']).rank(pct=True) * x['part1'])
data_factor['code'] = data_factor1.groupby('date').apply(lambda x: x['code'])
data_factor.reset_index(inplace=True)
del data_factor1

# Alpha#15: (-1 * sum(rank(correlation(rank(high), rank(volume), 3)), 3))
factor = 'Alpha#15'
print(factor)

data_factor1 = pd.DataFrame()
data_factor1['_part1'] = data_clean.groupby('date').apply(lambda x: (x['adj_high']).rank(pct=True))
data_factor1['_part2'] = data_clean.groupby('date').apply(lambda x: (x['adj_volume']).rank(pct=True))
data_factor1['code'] = data_clean.groupby('date').apply(lambda x: x['code'])
data_factor1.reset_index(inplace=True)

data_factor2 = pd.DataFrame()
data_factor2['part1'] = data_factor1.groupby('code').apply(lambda x: pd.rolling_corr(x['_part1'], x['_part2'], 3))
data_factor2['date'] = data_factor1.groupby('code').apply(lambda x: x['date'])
data_factor2.reset_index(inplace=True)
del data_factor1

data_factor3 = pd.DataFrame()
data_factor3['__part1'] = data_factor2.groupby('date').apply(lambda x: (x['part1']).rank(pct=True))
data_factor3['code'] = data_factor2.groupby('date').apply(lambda x: x['code'])
data_factor3.reset_index(inplace=True)
del data_factor2

data_factor = pd.DataFrame()
data_factor[factor] = data_factor3.groupby('code').apply(lambda x: pd.rolling_sum(x['__part1'], 3))
data_factor['date'] = data_factor3.groupby('code').apply(lambda x: x['date'])
data_factor.reset_index(inplace=True)
del data_factor3

# Alpha#16: (-1 * rank(covariance(rank(high), rank(volume), 5)))
factor = 'Alpha#16'
print(factor)

data_factor1 = pd.DataFrame()
data_factor1['part1'] = data_clean.groupby('date').apply(lambda x: (x['adj_high']).rank(pct=True))
data_factor1['part2'] = data_clean.groupby('date').apply(lambda x: (x['adj_volume']).rank(pct=True))
data_factor1['code'] = data_clean.groupby('date').apply(lambda x: x['code'])
data_factor1.reset_index(inplace=True)

data_factor2 = pd.DataFrame()
data_factor2[factor] = data_factor1.groupby('code').apply(lambda x: pd.rolling_cov(x['part1'], x['part2'], 5))
data_factor2['date'] = data_factor1.groupby('code').apply(lambda x: x['date'])
data_factor2.reset_index(inplace=True)
del data_factor1

data_factor = pd.DataFrame()
data_factor[factor] = data_factor2.groupby('date').apply(lambda x: (x[factor]).rank(pct=True))
data_factor['code'] = data_factor2.groupby('date').apply(lambda x: x['code'])
data_factor.reset_index(inplace=True)
del data_factor2

# Alpha#17: (((-1 * rank(ts_rank(close, 10))) * rank(delta(delta(close, 1), 1))) *rank(ts_rank((volume / adv20), 5)))
factor = 'Alpha#17'
print(factor)


def Alpha17_fun(data):
    data['part1'] = data['adj_close'].rolling(window=10).apply(lambda x: st.rankdata(x)[-1] / 10.0)
    data['part2'] = data['adj_close'].diff(1)
    data['part2'] = data['part2'].diff(1)
    data['part3'] = (data['adj_volume'] / pd.rolling_mean(data['adj_volume'], 20)).rolling(window=5).apply(
        lambda x: st.rankdata(x)[-1] / 5.0)

    return data


data_factor1 = data_clean.groupby('code').apply(Alpha17_fun)

data_factor = pd.DataFrame()
data_factor[factor] = data_factor1.groupby('date').apply(
    lambda x: x['part1'].rank(pct=True) * x['part2'].rank(pct=True) * x['part3'].rank(pct=True))
data_factor['code'] = data_factor1.groupby('date').apply(lambda x: x['code'])
data_factor.reset_index(inplace=True)
del data_factor1

# Alpha#18: (-1 * rank(((stddev(abs((close - open)), 5) + (close - open)) + correlation(close, open,10))))
factor = 'Alpha#18'
print(factor)


def Alpha18_fun(data):
    data['part1'] = pd.rolling_std(abs(data['adj_close'] - data['adj_open']), 5)
    data['part2'] = data['adj_close'] - data['adj_open']
    data['part3'] = pd.rolling_corr(data['adj_close'], data['adj_open'], 10)
    data[factor] = data['part1'] + data['part2'] + data['part3']

    return data


data_factor1 = data_clean.groupby('code').apply(Alpha18_fun)

data_factor = pd.DataFrame()
data_factor[factor] = data_factor1.groupby('date').apply(lambda x: x[factor].rank(pct=True))
data_factor['code'] = data_factor1.groupby('date').apply(lambda x: x['code'])
data_factor.reset_index(inplace=True)
del data_factor1

# Alpha#19: ((-1 * sign(((close - delay(close, 7)) + delta(close, 7)))) * (1 + rank((1 + sum(returns,250)))))
factor = 'Alpha#19'
print(factor)


def Alpha19_fun(data):
    data['part1'] = 2 * data['adj_close'].diff(7)
    data['part2'] = pd.rolling_sum(data['rtn'], 250)

    return data


data_factor1 = data_clean.groupby('code').apply(Alpha19_fun)

data_factor = pd.DataFrame()
data_factor[factor] = data_factor1.groupby('date').apply(
    lambda x: np.sign(x['part1']) * (1 + (1 + x['part2']).rank(pct=True)))
data_factor['code'] = data_factor1.groupby('date').apply(lambda x: x['code'])
data_factor.reset_index(inplace=True)
del data_factor1

# Alpha#20: (((-1 * rank((open - delay(high, 1)))) * rank((open - delay(close, 1)))) * rank((open -delay(low, 1))))
factor = 'Alpha#20'
print(factor)


def Alpha20_fun(data):
    data['part1'] = data['adj_open'] - data['adj_high'].shift(1)
    data['part2'] = data['adj_open'] - data['adj_close'].shift(1)
    data['part3'] = data['adj_open'] - data['adj_low'].shift(1)

    return data


data_factor1 = data_clean.groupby('code').apply(Alpha20_fun)

data_factor = pd.DataFrame()
data_factor[factor] = data_factor1.groupby('date').apply(
    lambda x: np.sign(x['part1']) * (1 + (1 + x['part2']).rank(pct=True)))
data_factor['code'] = data_factor1.groupby('date').apply(lambda x: x['code'])
data_factor.reset_index(inplace=True)
del data_factor1

# -*- coding: utf-8 -*-
"""
Created on Wed Aug 21 15:44:56 2019

@author: win10
"""

factor = 'Alpha#21'
print(factor)


def Alpha21_fun(data):
    mean_close8 = pd.rolling_mean(data['adj_close'], 8)
    std_close = pd.rolling_std(data['adj_close'], 8)
    mean_close2 = pd.rolling_mean(data['adj_close'], 2)
    mean_v = data['adj_volume'] / pd.rolling_mean(data['adj_volume'], 20)
    condition1 = (mean_close8 + std_close) < mean_close2
    condition2 = mean_close2 < (mean_close8 - std_close)
    condition3 = mean_v >= 1

    data.ix[condition1, factor] = -1
    data.ix[~condition1 & condition2, factor] = 1
    data.ix[~condition1 & (~condition2) & condition3, factor] = 1
    data[factor] = data[factor].fillna(-1)

    return data


data_factor = data_clean.groupby('code').apply(Alpha21_fun)

# Alpha#22: (-1 * (delta(correlation(high, volume, 5), 5) * rank(stddev(close, 20))))
factor = 'Alpha#22'
print(factor)


def Alpha22_fun(data):
    data['part1'] = pd.rolling_corr(data['adj_high'], data['adj_volume'], 5)
    data['part2'] = pd.rolling_std(data['adj_close'], 20)
    data['part1'] = data['part1'].diff(5)

    return data


data_factor1 = data_clean.groupby('code').apply(Alpha22_fun)

data_factor = pd.DataFrame()
data_factor[factor] = data_factor1.groupby('date').apply(lambda x: x['part1'] * (x['part2']).rank(pct=True))
data_factor['code'] = data_factor1.groupby('date').apply(lambda x: x['code'])
data_factor.reset_index(inplace=True)
del data_factor1

data_factor = pd.merge(data_factor, data_clean, on=['date', 'code'])
data_factor = data_factor[
    (data_factor[factor].notnull()) & (data_factor[factor] != np.inf) & (data_factor[factor] != -np.inf)]
data_factor[factor] *= -1
data_factor = data_factor.groupby('date').apply(pure_factor, factor)

# Alpha#23: (((sum(high, 20) / 20) < high) ? (-1 * delta(high, 2)) : 0)
factor = 'Alpha#23'
print(factor)


def Alpha23_fun(data):
    condition = pd.rolling_mean(data['adj_high'], 20) < data['adj_high']
    data.ix[condition, factor] = data['adj_high'].diff(2)[condition]
    data[factor] = data[factor].fillna(0)

    return data


data_factor = data_clean.groupby('code').apply(Alpha23_fun)

data_factor = data_factor[
    (data_factor[factor].notnull()) & (data_factor[factor] != np.inf) & (data_factor[factor] != -np.inf)]
data_factor[factor] *= -1
data_factor = data_factor.groupby('date').apply(pure_factor, factor)

data_factor.index = range(len(data_factor))
ana_df = t_capital(factor, data_factor, zz500, asc=True, window=1)
print(ana_df)

# Alpha#24: ((((delta((sum(close, 100) / 100), 100) / delay(close, 100)) < 0.05) ||
# ((delta((sum(close, 100) / 100), 100) / delay(close, 100)) == 0.05)) ? (-1 * (close - ts_min(close,
# 100))) : (-1 * delta(close, 3)))
factor = 'Alpha#24'
print(factor)


def Alpha24_fun(data):
    data['part1'] = pd.rolling_corr(data['adj_high'], data['adj_volume'], 5)
    data['part2'] = pd.rolling_std(data['adj_close'], 20)
    data['part1'] = data['part1'].diff(5)

    return data


data_factor1 = data_clean.groupby('code').apply(Alpha24_fun)

data_factor = pd.DataFrame()
data_factor[factor] = data_factor1.groupby('date').apply(lambda x: x['part1'] * (x['part2']).rank(pct=True))
data_factor['code'] = data_factor1.groupby('date').apply(lambda x: x['code'])
data_factor.reset_index(inplace=True)
del data_factor1

data_factor = pd.merge(data_factor, data_clean, on=['date', 'code'])
data_factor = data_factor[
    (data_factor[factor].notnull()) & (data_factor[factor] != np.inf) & (data_factor[factor] != -np.inf)]
data_factor[factor] *= -1
data_factor = data_factor.groupby('date').apply(pure_factor, factor)

# Alpha#25: rank(((((-1 * returns) * adv20) * vwap) * (high - close)))

factor = 'Alpha#25'
print(factor)

data_factor = pd.DataFrame()
data_factor[factor] = data_clean.groupby('code').apply(
    lambda x: (x['rtn'] * pd.rolling_mean(x['adj_volume'], 20) * x['adj_vwap'] * (x['adj_high'] - x['adj_close'])).rank(
        pct=True))
data_factor['date'] = data_clean.groupby('code').apply(lambda x: x['date'])
data_factor.reset_index(inplace=True)

data_factor = pd.merge(data_factor, data_clean, on=['date', 'code'])
data_factor = data_factor[
    (data_factor[factor].notnull()) & (data_factor[factor] != np.inf) & (data_factor[factor] != -np.inf)]
data_factor[factor] *= -1
data_factor = data_factor.groupby('date').apply(pure_factor, factor)

data_factor.index = range(len(data_factor))
ana_df = t_capital(factor, data_factor, zz500, asc=True, window=1)
print(ana_df)

# Alpha#26: (-1 * ts_max(correlation(ts_rank(volume, 5), ts_rank(high, 5), 5), 3))

factor = 'Alpha#26'
print(factor)


def Alpha26_fun(data):
    data['part1'] = data['adj_volume'].rolling(window=5).apply(lambda x: st.rankdata(x)[-1] / 5.0)
    data['part2'] = data['adj_high'].rolling(window=5).apply(lambda x: st.rankdata(x)[-1] / 5.0)
    data['part1_2'] = pd.rolling_corr(data['part1'], data['part2'], 5)
    data[factor] = pd.rolling_max(data['part1_2'], 3)

    return data


data_factor = data_clean.groupby('code').apply(Alpha26_fun)

data_factor = data_factor[
    (data_factor[factor].notnull()) & (data_factor[factor] != np.inf) & (data_factor[factor] != -np.inf)]
data_factor[factor] *= -1
data_factor = data_factor.groupby('date').apply(pure_factor, factor)

data_factor.index = range(len(data_factor))
ana_df = t_capital(factor, data_factor, zz500, asc=True, window=1)
print(ana_df)

# Alpha#27: ((0.5 < rank((sum(correlation(rank(volume), rank(vwap), 6), 2) / 2.0))) ? (-1 * 1) : 1)

factor = 'Alpha#27'
print(factor)


def Alpha27_fun(data):
    data['part1'] = pd.rolling_corr(data['adj_volume'], data['adj_vwap'], 6)
    data['part2'] = pd.rolling_mean(data['part1'], 2)

    return data


data_factor1 = data_clean.groupby('code').apply(Alpha27_fun)

data_factor = pd.DataFrame()
data_factor[factor] = data_factor1.groupby('date').apply(lambda x: (x['part2']).rank(pct=True))
data_factor['code'] = data_factor1.groupby('date').apply(lambda x: x['code'])
data_factor.reset_index(inplace=True)
del data_factor1

data_factor = pd.merge(data_factor, data_clean, on=['date', 'code'])
data_factor = data_factor[
    (data_factor[factor].notnull()) & (data_factor[factor] != np.inf) & (data_factor[factor] != -np.inf)]
data_factor[factor] *= -1
data_factor = data_factor.groupby('date').apply(pure_factor, factor)

data_factor.index = range(len(data_factor))
ana_df = t_capital(factor, data_factor, zz500, asc=True, window=1)
print(ana_df)

# Alpha#28: scale(((correlation(adv20, low, 5) + ((high + low) / 2)) - close))

factor = 'Alpha#28'
print(factor)


def Alpha28_fun(data):
    data['part1'] = pd.rolling_corr(pd.rolling_mean(data['adj_volume'], 20), data['adj_low'], 5)
    data['part2'] = (data['adj_high'] + data['adj_low']) / 2 / data['adj_close']
    data[factor] = data['part1'] + data['part2']

    return data


data_factor = data_clean.groupby('code').apply(Alpha28_fun)

data_factor = data_factor[
    (data_factor[factor].notnull()) & (data_factor[factor] != np.inf) & (data_factor[factor] != -np.inf)]
data_factor[factor] *= -1
data_factor = data_factor.groupby('date').apply(pure_factor, factor)

data_factor.index = range(len(data_factor))
ana_df = t_capital(factor, data_factor, zz500, asc=True, window=1)
print(ana_df)

# Alpha#29: (min(product(rank(rank(scale(log(sum(ts_min(rank(rank((-1 * rank(delta((close - 1),5))))), 2), 1))))), 1), 5) + ts_rank(delay((-1 * returns), 6), 5))
factor = 'Alpha#29'
print(factor)


def Alpha29_fun(data):
    data['part1'] = data['adj_close'].diff(5)
    data['part2'] = (-1 * data['rtn'].shift(6)).rolling(window=5).apply(lambda x: st.rankdata(x)[-1] / 5.0)

    return data


data_factor1 = data_clean.groupby('code').apply(Alpha29_fun)

data_factor2 = pd.DataFrame()
data_factor2['_part1'] = data_factor1.groupby('date').apply(lambda x: x['part1'].rank(pct=True))
data_factor2['part2'] = data_factor1.groupby('date').apply(lambda x: x['part2'])
data_factor2['code'] = data_factor1.groupby('date').apply(lambda x: x['code'])
data_factor2.reset_index(inplace=True)
del data_factor1

data_factor3 = pd.DataFrame()
data_factor3['__part1'] = data_factor2.groupby('code').apply(lambda x: pd.rolling_min(x['_part1'], 5))
data_factor3['date'] = data_factor2.groupby('code').apply(lambda x: x['date'])
data_factor3['part2'] = data_factor2.groupby('code').apply(lambda x: x['part2'])
data_factor3.reset_index(inplace=True)
del data_factor2

data_factor = pd.DataFrame()
data_factor[factor] = data_factor3.groupby('date').apply(lambda x: x['__part1'] + x['part2'])
data_factor['code'] = data_factor3.groupby('date').apply(lambda x: x['code'])
data_factor.reset_index(inplace=True)
del data_factor3

data_factor = pd.merge(data_factor, data_clean, on=['date', 'code'])
data_factor = data_factor[
    (data_factor[factor].notnull()) & (data_factor[factor] != np.inf) & (data_factor[factor] != -np.inf)]
data_factor[factor] *= -1
data_factor = data_factor.groupby('date').apply(pure_factor, factor)

data_factor.index = range(len(data_factor))
ana_df = t_capital(factor, data_factor, zz500, asc=True, window=1)
print(ana_df)

# Alpha#30: (((1.0 - rank(((sign((close - delay(close, 1))) + sign((delay(close, 1) - delay(close, 2)))) +sign((delay(close, 2) - delay(close, 3)))))) * sum(volume, 5)) / sum(volume, 20))
factor = 'Alpha#30'
print(factor)


def Alpha30_fun(data):
    data['part1'] = np.sign(data['adj_close'].diff(1)) + np.sign(
        data['adj_close'].shift(1) - data['adj_close'].shift(2)) + np.sign(
        data['adj_close'].shift(2) - data['adj_close'].shift(3))
    data['part2'] = pd.rolling_mean(data['adj_volume'], 5) / pd.rolling_mean(data['adj_volume'], 20)

    return data


data_factor1 = data_clean.groupby('code').apply(Alpha30_fun)

data_factor = pd.DataFrame()
data_factor[factor] = data_factor1.groupby('date').apply(lambda x: (1 - x['part1'].rank(pct=True)) * x['part2'])
data_factor['code'] = data_factor1.groupby('date').apply(lambda x: x['code'])
data_factor.reset_index(inplace=True)
del data_factor1

data_factor = pd.merge(data_factor, data_clean, on=['date', 'code'])
data_factor = data_factor[
    (data_factor[factor].notnull()) & (data_factor[factor] != np.inf) & (data_factor[factor] != -np.inf)]
data_factor = data_factor.groupby('date').apply(pure_factor, factor)

data_factor.index = range(len(data_factor))
ana_df = t_capital(factor, data_factor, zz500, asc=True, window=1)
print(ana_df)

# Alpha#31: ((rank(rank(rank(decay_linear((-1 * rank(rank(delta(close, 10)))), 10)))) + rank((-1 *delta(close, 3)))) + sign(scale(correlation(adv20, low, 12))))
factor = 'Alpha#31'
print(factor)


def Alpha31_fun(data):
    data['part1'] = data['adj_close'].diff(10)
    data['part2'] = data['adj_close'].diff(3)
    data['part3'] = np.sign(pd.rolling_corr(pd.rolling_mean(data['adj_volume'], 20), data['adj_low'], 12))

    return data


data_factor1 = data_clean.groupby('code').apply(Alpha31_fun)

data_factor2 = pd.DataFrame()
data_factor2['part1'] = data_factor1.groupby('date').apply(lambda x: x['part1'].rank(pct=True))
data_factor2['part2'] = data_factor1.groupby('date').apply(lambda x: x['part2'].rank(pct=True))
data_factor2['part3'] = data_factor1.groupby('date').apply(lambda x: x['part3'])
data_factor2['code'] = data_factor1.groupby('date').apply(lambda x: x['code'])
data_factor2.reset_index(inplace=True)
del data_factor1

data_factor = pd.DataFrame()
data_factor['_part1'] = data_factor2.groupby('code').apply(lambda x: pd.rolling_mean(x['part1'], 10))
data_factor['_part2'] = data_factor2.groupby('code').apply(lambda x: x['part2'])
data_factor['_part3'] = data_factor2.groupby('code').apply(lambda x: x['part3'])
data_factor['date'] = data_factor2.groupby('code').apply(lambda x: x['date'])
data_factor.reset_index(inplace=True)
data_factor[factor] = data_factor['_part1'] + data_factor['_part2'] + data_factor['_part3']
del data_factor2

data_factor = pd.merge(data_factor, data_clean, on=['date', 'code'])
data_factor[factor] *= -1
data_factor = data_factor[
    (data_factor[factor].notnull()) & (data_factor[factor] != np.inf) & (data_factor[factor] != -np.inf)]
data_factor = data_factor.groupby('date').apply(pure_factor, factor)
data_factor.index = range(len(data_factor))
ana_df = t_capital(factor, data_factor, zz500, asc=True, window=1)
print(ana_df)

# Alpha#32: (scale(((sum(close, 7) / 7) - close)) + (20 * scale(correlation(vwap, delay(close, 5),230))))
factor = 'Alpha#32'
print(factor)


def Alpha32_fun(data):
    data['part1'] = pd.rolling_mean(data['adj_close'], 7) / data['adj_close']
    data['part2'] = pd.rolling_corr(data['adj_vwap'], data['adj_close'].shift(5), 20)
    data[factor] = data['part1'] + data['part2']

    return data


data_factor = data_clean.groupby('code').apply(Alpha32_fun)

data_factor[factor] *= -1
data_factor = data_factor[
    (data_factor[factor].notnull()) & (data_factor[factor] != np.inf) & (data_factor[factor] != -np.inf)]
data_factor = data_factor.groupby('date').apply(pure_factor, factor)
data_factor.index = range(len(data_factor))
ana_df = t_capital(factor, data_factor, zz500, asc=True, window=1)
print(ana_df)

# Alpha#33: rank((-1 * ((1 - (open / close))^1)))
factor = 'Alpha#33'
print(factor)


def Alpha33_fun(data):
    data[factor] = 1 - data['adj_open'] / data['adj_close']

    return data


data_factor = data_clean.groupby('code').apply(Alpha33_fun)

data_factor[factor] *= -1
data_factor = data_factor[
    (data_factor[factor].notnull()) & (data_factor[factor] != np.inf) & (data_factor[factor] != -np.inf)]
data_factor = data_factor.groupby('date').apply(pure_factor, factor)
data_factor.index = range(len(data_factor))
ana_df = t_capital(factor, data_factor, zz500, asc=True, window=1)
print(ana_df)

# Alpha#34: rank(((1 - rank((stddev(returns, 2) / stddev(returns, 5)))) + (1 - rank(delta(close, 1)))))
factor = 'Alpha#34'
print(factor)


def Alpha34_fun(data):
    data['part1'] = pd.rolling_std(data['rtn'], 2) / pd.rolling_std(data['rtn'], 5)
    data['part2'] = data['adj_close'].diff(1)

    return data


data_factor1 = data_clean.groupby('code').apply(Alpha34_fun)

data_factor2 = pd.DataFrame()
data_factor2['_part1'] = data_factor1.groupby('date').apply(lambda x: 1 - x['part1'].rank(pct=True))
data_factor2['_part2'] = data_factor1.groupby('date').apply(lambda x: 1 - x['part2'].rank(pct=True))
data_factor2['code'] = data_factor1.groupby('date').apply(lambda x: x['code'])
data_factor2.reset_index(inplace=True)
del data_factor1

data_factor = pd.DataFrame()
data_factor[factor] = data_factor2.groupby('date').apply(lambda x: (x['_part1'] + x['_part2']).rank(pct=True))
data_factor['code'] = data_factor2.groupby('date').apply(lambda x: x['code'])
data_factor.reset_index(inplace=True)

data_factor = pd.merge(data_factor, data_clean, on=['date', 'code'])
# data_factor[factor]*=-1
data_factor = data_factor[
    (data_factor[factor].notnull()) & (data_factor[factor] != np.inf) & (data_factor[factor] != -np.inf)]
data_factor = data_factor.groupby('date').apply(pure_factor, factor)
data_factor.index = range(len(data_factor))
ana_df = t_capital(factor, data_factor, zz500, asc=True, window=1)
print(ana_df)

# Alpha#35: ((Ts_Rank(volume, 32) * (1 - Ts_Rank(((close + high) - low), 16))) * (1 -Ts_Rank(returns, 32)))
factor = 'Alpha#35'
print(factor)


def Alpha35_fun(data):
    data['part1'] = data['adj_volume'].rolling(window=32).apply(lambda x: st.rankdata(x)[-1] / 32.0)
    data['part2'] = 1 - (data['adj_close'] + data['adj_high'] - data['adj_low']).rolling(window=16).apply(
        lambda x: st.rankdata(x)[-1] / 16.0)
    data['part3'] = 1 - data['rtn'].rolling(window=32).apply(lambda x: st.rankdata(x)[-1] / 32.0)

    return data


data_factor1 = data_clean.groupby('code').apply(Alpha35_fun)

data_factor = pd.DataFrame()
data_factor[factor] = data_factor1.groupby('date').apply(lambda x: (x['part1'] * x['part2'] * x['part3']))
data_factor['code'] = data_factor1.groupby('date').apply(lambda x: x['code'])
data_factor.reset_index(inplace=True)
del data_factor1

data_factor = pd.merge(data_factor, data_clean, on=['date', 'code'])
# data_factor[factor]*=-1
data_factor = data_factor[
    (data_factor[factor].notnull()) & (data_factor[factor] != np.inf) & (data_factor[factor] != -np.inf)]
data_factor = data_factor.groupby('date').apply(pure_factor, factor)
data_factor.index = range(len(data_factor))
ana_df = t_capital(factor, data_factor, zz500, asc=True, window=1)
print(ana_df)

# (((((2.21 * rank(correlation((close - open), delay(volume, 1), 15))) + (0.7 * rank((open
# - close)))) + (0.73 * rank(Ts_Rank(delay((-1 * returns), 6), 5)))) + rank(abs(correlation(vwap,
# adv20, 6)))) + (0.6 * rank((((sum(close, 200) / 200) - open) * (close - open)))))
factor = 'Alpha#36'
print(factor)


def Alpha36_fun(data):
    data['part1'] = pd.rolling_corr(data['adj_close'] - data['adj_open'], data['adj_volume'].shift(1), 15)
    data['part3'] = (-1 * data['rtn'].shift(6)).rolling(window=5).apply(lambda x: st.rankdata(x)[-1] / 5.0)
    data['part4'] = abs(pd.rolling_corr(data['adj_vwap'], pd.rolling_mean(data['adj_volume'], 20), 6))
    data['part5'] = (pd.rolling_mean(data['adj_close'], 200) - data['adj_open']) * (
                data['adj_close'] - data['adj_open'])

    return data


data_factor1 = data_clean.groupby('code').apply(Alpha36_fun)

data_factor = pd.DataFrame()
data_factor[factor] = data_factor1.groupby('date').apply(
    lambda x: 2.21 * x['part1'].rank(pct=True) + 0.7 * (x['adj_open'] - x['adj_close']).rank(pct=True) +
              0.73 * x['part3'] + x['part4'] + 0.6 * x['part5'])
data_factor['code'] = data_factor1.groupby('date').apply(lambda x: x['code'])
data_factor.reset_index(inplace=True)
del data_factor1

data_factor = pd.merge(data_factor, data_clean, on=['date', 'code'])
# data_factor[factor]*=-1
data_factor = data_factor[
    (data_factor[factor].notnull()) & (data_factor[factor] != np.inf) & (data_factor[factor] != -np.inf)]
data_factor = data_factor.groupby('date').apply(pure_factor, factor)
data_factor.index = range(len(data_factor))
ana_df = t_capital(factor, data_factor, zz500, asc=True, window=1)
print(ana_df)

# Alpha#37: (rank(correlation(delay((open - close), 1), close, 200)) + rank((open - close)))
factor = 'Alpha#37'
print(factor)


def Alpha37_fun(data):
    data['part1'] = pd.rolling_corr((data['adj_open'] - data['adj_close']).shift(1), data['adj_close'], 200)

    return data


data_factor1 = data_clean.groupby('code').apply(Alpha37_fun)

data_factor = pd.DataFrame()
data_factor[factor] = data_factor1.groupby('date').apply(
    lambda x: x['part1'].rank(pct=True) + (x['adj_open'] - x['adj_close']).rank(pct=True))
data_factor['code'] = data_factor1.groupby('date').apply(lambda x: x['code'])
data_factor.reset_index(inplace=True)

data_factor = pd.merge(data_factor, data_clean, on=['date', 'code'])
# data_factor[factor]*=-1
data_factor = data_factor[
    (data_factor[factor].notnull()) & (data_factor[factor] != np.inf) & (data_factor[factor] != -np.inf)]
data_factor = data_factor.groupby('date').apply(pure_factor, factor)
data_factor.index = range(len(data_factor))
ana_df = t_capital(factor, data_factor, zz500, asc=True, window=1)
print(ana_df)

# Alpha#38: ((-1 * rank(Ts_Rank(close, 10))) * rank((close / open)))
factor = 'Alpha#38'
print(factor)


def Alpha38_fun(data):
    data['part1'] = data['adj_close'].rolling(window=10).apply(lambda x: st.rankdata(x)[-1] / 10)

    return data


data_factor1 = data_clean.groupby('code').apply(Alpha38_fun)

data_factor = pd.DataFrame()
data_factor[factor] = data_factor1.groupby('date').apply(
    lambda x: x['part1'].rank(pct=True) * (x['adj_close'] / x['adj_open']).rank(pct=True))
data_factor['code'] = data_factor1.groupby('date').apply(lambda x: x['code'])
data_factor.reset_index(inplace=True)
del data_factor1

data_factor = pd.merge(data_factor, data_clean, on=['date', 'code'])
data_factor[factor] *= -1
data_factor = data_factor[
    (data_factor[factor].notnull()) & (data_factor[factor] != np.inf) & (data_factor[factor] != -np.inf)]
data_factor = data_factor.groupby('date').apply(pure_factor, factor)
data_factor.index = range(len(data_factor))
ana_df = t_capital(factor, data_factor, zz500, asc=True, window=1)
print(ana_df)

# Alpha#39: ((-1 * rank((delta(close, 7) * (1 - rank(decay_linear((volume / adv20), 9)))))) * (1 +rank(sum(returns, 250))))
factor = 'Alpha#39'
print(factor)


def Alpha39_fun(data):
    data['part1'] = data['adj_close'].diff(7)
    data['part2'] = pd.rolling_mean(data['adj_volume'] / pd.rolling_mean(data['adj_volume'], 20), 9)
    data['part3'] = pd.rolling_sum(data['rtn'], 250)

    return data


data_factor1 = data_clean.groupby('code').apply(Alpha39_fun)

data_factor = pd.DataFrame()
data_factor[factor] = data_factor1.groupby('date').apply(
    lambda x: (x['part1'] * (1 - x['part2'].rank(pct=True))).rank(pct=True) * (1 + x['part3'].rank(pct=True)))
data_factor['code'] = data_factor1.groupby('date').apply(lambda x: x['code'])
data_factor.reset_index(inplace=True)
del data_factor1

data_factor = pd.merge(data_factor, data_clean, on=['date', 'code'])
data_factor[factor] *= -1
data_factor = data_factor[
    (data_factor[factor].notnull()) & (data_factor[factor] != np.inf) & (data_factor[factor] != -np.inf)]
data_factor = data_factor.groupby('date').apply(pure_factor, factor)
data_factor.index = range(len(data_factor))
ana_df = t_capital(factor, data_factor, zz500, asc=True, window=1)
print(ana_df)

# Alpha#40: ((-1 * rank(stddev(high, 10))) * correlation(high, volume, 10))
factor = 'Alpha#40'
print(factor)


def Alpha40_fun(data):
    data['part1'] = pd.rolling_std(data['adj_high'], 10)
    data['part2'] = pd.rolling_corr(data['adj_high'], data['adj_volume'], 9)

    return data


data_factor1 = data_clean.groupby('code').apply(Alpha40_fun)

data_factor = pd.DataFrame()
data_factor[factor] = data_factor1.groupby('date').apply(lambda x: (x['part1']).rank(pct=True) * (x['part2']))
data_factor['code'] = data_factor1.groupby('date').apply(lambda x: x['code'])
data_factor.reset_index(inplace=True)
del data_factor1

data_factor = pd.merge(data_factor, data_clean, on=['date', 'code'])
data_factor[factor] *= -1
data_factor = data_factor[
    (data_factor[factor].notnull()) & (data_factor[factor] != np.inf) & (data_factor[factor] != -np.inf)]
data_factor = data_factor.groupby('date').apply(pure_factor, factor)
data_factor.index = range(len(data_factor))
ana_df = t_capital(factor, data_factor, zz500, asc=True, window=1)
print(ana_df)



发布了127 篇原创文章 · 获赞 31 · 访问量 12万+

猜你喜欢

转载自blog.csdn.net/lzz781699880/article/details/102610181
今日推荐