-
Notifications
You must be signed in to change notification settings - Fork 36
/
Copy pathoutput_pandas.py
158 lines (119 loc) · 4.59 KB
/
output_pandas.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
# this code is auto generated by the expr_codegen
# https://github.com/wukan1986/expr_codegen
# 此段代码由 expr_codegen 自动生成,欢迎提交 issue 或 pull request
import re
import numpy as np
import pandas as pd
import talib as ta
from loguru import logger
# TODO: 数据加载或外部传入
df = df_input
def signed_power(x, y):
return x.sign() * (x.abs() ** y)
def scale(x, scale=1):
return x / x.abs().sum() * scale
def neutralize(x):
return x - x.mean()
def func_0_ts__asset__date(df: pd.DataFrame) -> pd.DataFrame:
df = df.sort_values(by=["date"])
# ========================================
# _x_0 = ts_mean(OPEN, 10)
df["_x_0"] = df["OPEN"].rolling(10).mean()
# expr_6 = ts_delta(OPEN, 10)
df["expr_6"] = df["OPEN"].diff(10)
# expr_7 = ts_rank(OPEN + 1, 10)
df["expr_7"] = (df["OPEN"] + 1).rolling(10).rank(pct=True)
# _x_1 = ts_mean(CLOSE, 10)
df["_x_1"] = df["CLOSE"].rolling(10).mean()
# expr_5 = -ts_corr(OPEN, CLOSE, 10)
df["expr_5"] = -df["OPEN"].rolling(10).corr(df["CLOSE"], ddof=0)
return df
def func_0_cs__date(df: pd.DataFrame) -> pd.DataFrame:
# ========================================
# _x_5 = cs_rank(OPEN)
df["_x_5"] = df["OPEN"].rank(pct=True)
return df
def func_1_cs__date(df: pd.DataFrame) -> pd.DataFrame:
# ========================================
# _x_2 = cs_rank(_x_0)
df["_x_2"] = df["_x_0"].rank(pct=True)
# _x_3 = cs_rank(_x_1)
df["_x_3"] = df["_x_1"].rank(pct=True)
return df
def func_1_ts__asset__date(df: pd.DataFrame) -> pd.DataFrame:
df = df.sort_values(by=["date"])
# ========================================
# _x_6 = ts_mean(_x_5, 10)
df["_x_6"] = df["_x_5"].rolling(10).mean()
# expr_8 = ts_rank(expr_7 + 1, 10)
df["expr_8"] = (df["expr_7"] + 1).rolling(10).rank(pct=True)
return df
def func_2_cl(df: pd.DataFrame) -> pd.DataFrame:
# ========================================
# expr_2 = _x_2 - Abs(log(_x_1))
df["expr_2"] = df["_x_2"] - (np.log(df["_x_1"])).abs()
return df
def func_2_ts__asset__date(df: pd.DataFrame) -> pd.DataFrame:
df = df.sort_values(by=["date"])
# ========================================
# expr_3 = ts_mean(_x_2, 10)
df["expr_3"] = df["_x_2"].rolling(10).mean()
# expr_1 = -ts_corr(_x_2, _x_3, 10)
df["expr_1"] = -df["_x_2"].rolling(10).corr(df["_x_3"], ddof=0)
return df
def func_2_cs__date(df: pd.DataFrame) -> pd.DataFrame:
# ========================================
# expr_4 = cs_rank(_x_6)
df["expr_4"] = df["_x_6"].rank(pct=True)
return df
# logger.info("start...")
df = df.sort_values(by=["date", "asset"]).reset_index(drop=True)
df = df.groupby(by=["asset"], group_keys=False).apply(func_0_ts__asset__date)
df = df.groupby(by=["date"], group_keys=False).apply(func_0_cs__date)
df = df.groupby(by=["date"], group_keys=False).apply(func_1_cs__date)
df = df.groupby(by=["asset"], group_keys=False).apply(func_1_ts__asset__date)
df = func_2_cl(df)
df = df.groupby(by=["asset"], group_keys=False).apply(func_2_ts__asset__date)
df = df.groupby(by=["date"], group_keys=False).apply(func_2_cs__date)
# #========================================func_0_ts__asset__date
# _x_0 = ts_mean(OPEN, 10)
# expr_6 = ts_delta(OPEN, 10)
# expr_7 = ts_rank(OPEN + 1, 10)
# _x_1 = ts_mean(CLOSE, 10)
# expr_5 = -ts_corr(OPEN, CLOSE, 10)
# #========================================func_0_cs__date
# _x_5 = cs_rank(OPEN)
# #========================================func_1_cs__date
# _x_2 = cs_rank(_x_0)
# _x_3 = cs_rank(_x_1)
# #========================================func_1_ts__asset__date
# _x_6 = ts_mean(_x_5, 10)
# expr_8 = ts_rank(expr_7 + 1, 10)
# #========================================func_2_cl
# expr_2 = _x_2 - Abs(log(_x_1))
# #========================================func_2_ts__asset__date
# expr_3 = ts_mean(_x_2, 10)
# expr_1 = -ts_corr(_x_2, _x_3, 10)
# #========================================func_2_cs__date
# expr_4 = cs_rank(_x_6)
"""
[OPEN, CLOSE, expr_7]
"""
"""
expr_1 = -ts_corr(cs_rank(ts_mean(OPEN, 10)), cs_rank(ts_mean(CLOSE, 10)), 10)
expr_2 = cs_rank(ts_mean(OPEN, 10)) - Abs(log(ts_mean(CLOSE, 10)))
expr_3 = ts_mean(cs_rank(ts_mean(OPEN, 10)), 10)
expr_4 = cs_rank(ts_mean(cs_rank(OPEN), 10))
expr_5 = -ts_corr(OPEN, CLOSE, 10)
expr_6 = ts_delta(OPEN, 10)
expr_8 = ts_rank(expr_7 + 1, 10)
expr_7 = ts_rank(OPEN + 1, 10)
"""
# drop intermediate columns
df = df.drop(columns=list(filter(lambda x: re.search(r"^_x_\d+", x), df.columns)))
# logger.info('done')
# save
# df.to_parquet('output.parquet', compression='zstd')
# print(df.tail(5))
# 向外部传出数据
df_output = df