Skip to content

Commit

Permalink
添加IC相关性
Browse files Browse the repository at this point in the history
  • Loading branch information
wukan1986 committed Mar 7, 2024
1 parent 6d2a744 commit 1740b29
Show file tree
Hide file tree
Showing 7 changed files with 120 additions and 45 deletions.
2 changes: 1 addition & 1 deletion alphainspect/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.2.5"
__version__ = "0.2.6"
34 changes: 28 additions & 6 deletions alphainspect/ic.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import itertools
from typing import Sequence, Literal
from math import sqrt, floor, ceil
from typing import Sequence, Literal, Dict

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -42,21 +43,30 @@ def calc_ic(df_pl: pl.DataFrame, factor: str, forward_returns: Sequence[str]) ->
).sort(_DATE_).fill_nan(None)


def calc_ic_mean(df_pl: pl.DataFrame):
def calc_ic_mean(df_pl: pl.DataFrame) -> pl.DataFrame:
return df_pl.select(pl.exclude(_DATE_).mean())


def calc_ic_ir(df_pl: pl.DataFrame):
def calc_ic_ir(df_pl: pl.DataFrame) -> pl.DataFrame:
"""计算ir,需保证没有nan,只有null"""
return df_pl.select(pl.exclude(_DATE_).mean() / pl.exclude(_DATE_).std(ddof=0))


def calc_ic_corr(df_pl: pl.DataFrame, factors: Sequence[str], forward_returns: Sequence[str]) -> Dict[str, pd.DataFrame]:
corrs = {}
for returns in forward_returns:
cut = len(returns) + 2
columns = [f'{x}__{returns}' for x in factors]
corrs[returns] = df_pl.select(columns).select(pl.all().name.map(lambda x: x[:-cut])).to_pandas().corr()
return corrs


def row_unstack(df_pl: pl.DataFrame, factors: Sequence[str], forward_returns: Sequence[str]) -> pd.DataFrame:
return pd.DataFrame(df_pl.to_numpy().reshape(len(factors), len(forward_returns)),
index=factors, columns=forward_returns)


def mutual_info_func(xx):
def mutual_info_func(xx) -> float:
yx = np.vstack(xx).T
# 跳过nan
mask = np.any(np.isnan(yx), axis=1)
Expand Down Expand Up @@ -130,7 +140,7 @@ def plot_ic_hist(df_pl: pl.DataFrame, col: str,
a = df_pl[col].to_pandas().replace([-np.inf, np.inf], np.nan).dropna()

mean = a.mean()
std = a.std()
std = a.std(ddof=0)
skew = a.skew()
kurt = a.kurt()

Expand Down Expand Up @@ -237,8 +247,20 @@ def create_ic2_sheet(df_pl: pl.DataFrame, factors: Sequence[str], forward_return
plot_ic2_heatmap(df_ir, title='IR', ax=axes[1])
fig.tight_layout()

# IC之间相关性,可用于检查多重共线性
corrs = calc_ic_corr(df_pl, factors, forward_returns)
len_sqrt = sqrt(len(corrs))
row, col = ceil(len_sqrt), floor(len_sqrt)
if row * col < len(corrs):
col += 1
fig, axes = plt.subplots(row, col, figsize=(12, 9), squeeze=False)
axes = axes.flatten()
for i, (k, v) in enumerate(corrs.items()):
plot_ic2_heatmap(v, title=f'{k} IC Corr', ax=axes[i])
fig.tight_layout()

# 画ic时序图
fig, axes = plt.subplots(len(factors), len(forward_returns), figsize=(12, 9))
fig, axes = plt.subplots(len(factors), len(forward_returns), figsize=(12, 9), squeeze=False)
axes = axes.flatten()
logger.info('IC TimeSeries: {}', '=' * 60)
for i, (x, y) in enumerate(itertools.product(factors, forward_returns)):
Expand Down
8 changes: 4 additions & 4 deletions alphainspect/portfolio.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def calc_cum_return_by_quantile(df_pl: pl.DataFrame, fwd_ret_1: str, period: int
out = pd.DataFrame(index=rr[_DATE_])
rr = rr.select(pl.exclude(_DATE_)).to_numpy() + 1 # 日收益
qq = qq.select(pl.exclude(_DATE_)).to_numpy() # 分组编号
logger.info('累计收益准备数据')
logger.info('累计收益准备数据,period={}', period)

np.seterr(divide='ignore', invalid='ignore')
for i in range(int(q_max) + 1):
Expand All @@ -30,7 +30,7 @@ def calc_cum_return_by_quantile(df_pl: pl.DataFrame, fwd_ret_1: str, period: int
# !!!直接减是错误的,因为两资金是独立的,资金减少的一份由于资金不足对冲比例已经不再是1:1
# out['spread'] = out[f'G{q_max}'] - out[f'G0']

logger.info('累计收益计算完成 \n{}', out.iloc[-1])
logger.info('累计收益计算完成,period={}\n{}', period, out.iloc[-1])
return out


Expand All @@ -42,7 +42,7 @@ def calc_cum_return_spread(df_pl: pl.DataFrame, fwd_ret_1: str, period: int = 5)
out = pd.DataFrame(index=rr[_DATE_])
rr = rr.select(pl.exclude(_DATE_)).to_numpy() + 1 # 日收益
qq = qq.select(pl.exclude(_DATE_)).to_numpy() # 分组编号
logger.info('多空收益准备数据')
logger.info('多空收益准备数据,period={}', period)

np.seterr(divide='ignore', invalid='ignore')

Expand All @@ -68,7 +68,7 @@ def calc_cum_return_spread(df_pl: pl.DataFrame, fwd_ret_1: str, period: int = 5)
out[f'G{q_max} w=+1'] = cumulative_returns(rr, b9, funds=period, freq=period)
# 资金是共享的,每次调仓时需要将资金平分成两份
out[f'G{q_max}~G0 w=+.5/-.5'] = cumulative_returns(rr, bb, funds=period, freq=period, init_cash=1.0)
logger.info('多空收益计算完成 \n{}', out.iloc[-1])
logger.info('多空收益计算完成,period={}\n{}', period, out.iloc[-1])
return out


Expand Down
22 changes: 13 additions & 9 deletions alphainspect/reports.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
from math import ceil
from pathlib import Path
from typing import Sequence
from typing import Sequence, Tuple

import polars as pl
from loguru import logger
Expand Down Expand Up @@ -82,7 +83,7 @@ def create_2x2_sheet(df_pl: pl.DataFrame,
factor: str,
forward_return: str, fwd_ret_1: str,
*,
period: int = 5,
periods: Tuple = (2, 5, 10),
axvlines: Sequence[str] = ()) -> None:
"""画2*2的图表。含IC时序、IC直方图、IC热力图、累积收益图
Expand All @@ -94,24 +95,27 @@ def create_2x2_sheet(df_pl: pl.DataFrame,
用于记算IC的远期收益率
fwd_ret_1:str
用于记算累计收益的1期远期收益率
period:int
periods:Tuple
累计收益时持仓天数与资金份数
axvlines
"""
fig, axes = plt.subplots(2, 2, figsize=(12, 9))
count = len(periods) + 3
fig, axes = plt.subplots(ceil(count / 2), 2, figsize=(12, 9), squeeze=False)
axes = axes.flatten()

# 画IC信息
logger.info('计算IC')
df_ic = calc_ic(df_pl, factor, [forward_return])
plot_ic_ts(df_ic, forward_return, axvlines=axvlines, ax=axes[0, 0])
plot_ic_hist(df_ic, forward_return, ax=axes[0, 1])
plot_ic_heatmap(df_ic, forward_return, ax=axes[1, 0])
plot_ic_ts(df_ic, forward_return, axvlines=axvlines, ax=axes[0])
plot_ic_hist(df_ic, forward_return, ax=axes[1])
plot_ic_heatmap(df_ic, forward_return, ax=axes[2])

# 画累计收益
logger.info('计算累计收益')
df_cum_ret = calc_cum_return_by_quantile(df_pl, fwd_ret_1, period)
plot_quantile_portfolio(df_cum_ret, fwd_ret_1, period, axvlines=axvlines, ax=axes[1, 1])
for i, period in enumerate(periods):
df_cum_ret = calc_cum_return_by_quantile(df_pl, fwd_ret_1, period)
plot_quantile_portfolio(df_cum_ret, fwd_ret_1, period, axvlines=axvlines, ax=axes[3 + i])

fig.tight_layout()

Expand Down
92 changes: 71 additions & 21 deletions codes/forward_returns.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import pandas as pd # noqa
import polars as pl # noqa
import polars.selectors as cs # noqa
# from loguru import logger # noqa
from loguru import logger # noqa

# ===================================
# 导入优先级,例如:ts_RSI在ta与talib中都出现了,优先使用ta
Expand All @@ -22,27 +22,39 @@
# ===================================

_ = (
"OPEN",
"CLOSE",
r"OPEN",
r"CLOSE",
)
(
OPEN,
CLOSE,
) = (pl.col(i) for i in _)

_ = (
"_x_0",
"RETURN_CC_1",
"RETURN_OO_1",
"RETURN_OO_2",
"RETURN_OO_5",
r"_x_0",
r"_x_2",
r"_x_3",
r"_x_1",
r"RETURN_CC_1",
r"RETURN_CO_1",
r"RETURN_OC_1",
r"RETURN_OO_1",
r"RETURN_OO_2",
r"RETURN_OO_5",
r"RETURN_OO_10",
)
(
_x_0,
_x_2,
_x_3,
_x_1,
RETURN_CC_1,
RETURN_CO_1,
RETURN_OC_1,
RETURN_OO_1,
RETURN_OO_2,
RETURN_OO_5,
RETURN_OO_10,
) = (pl.col(i) for i in _)

_DATE_ = "date"
Expand All @@ -53,44 +65,82 @@ def func_0_ts__asset(df: pl.DataFrame) -> pl.DataFrame:
df = df.sort(by=[_DATE_])
# ========================================
df = df.with_columns(
_x_0=1 / ts_delay(OPEN, -1),
RETURN_CC_1=(-CLOSE + ts_delay(CLOSE, -1)) / CLOSE,
_x_0=ts_delay(OPEN, -1),
_x_2=ts_delay(CLOSE, -1),
)
return df


def func_0_cl(df: pl.DataFrame) -> pl.DataFrame:
# ========================================
df = df.with_columns(
_x_3=1 / CLOSE,
)
# ========================================
df = df.with_columns(
_x_1=1 / _x_0,
RETURN_CC_1=_x_2 * _x_3 - 1,
RETURN_CO_1=_x_0 * _x_3 - 1,
)
# ========================================
df = df.with_columns(
RETURN_OO_1=_x_0 * ts_delay(OPEN, -2) - 1,
RETURN_OO_2=_x_0 * ts_delay(OPEN, -3) - 1,
RETURN_OO_5=_x_0 * ts_delay(OPEN, -6) - 1,
RETURN_OC_1=_x_1 * _x_2 - 1,
)
return df


def func_1_ts__asset(df: pl.DataFrame) -> pl.DataFrame:
df = df.sort(by=[_DATE_])
# ========================================
df = df.with_columns(
RETURN_OO_1=_x_1 * ts_delay(OPEN, -2) - 1,
RETURN_OO_2=_x_1 * ts_delay(OPEN, -3) - 1,
RETURN_OO_5=_x_1 * ts_delay(OPEN, -6) - 1,
RETURN_OO_10=_x_1 * ts_delay(OPEN, -11) - 1,
)
return df


"""
#========================================func_0_ts__asset
_x_0 = 1/ts_delay(OPEN, -1)
RETURN_CC_1 = (-CLOSE + ts_delay(CLOSE, -1))/CLOSE
#========================================func_0_ts__asset
RETURN_OO_1 = _x_0*ts_delay(OPEN, -2) - 1
RETURN_OO_2 = _x_0*ts_delay(OPEN, -3) - 1
RETURN_OO_5 = _x_0*ts_delay(OPEN, -6) - 1
_x_0 = ts_delay(OPEN, -1)
_x_2 = ts_delay(CLOSE, -1)
#========================================func_0_cl
_x_3 = 1/CLOSE
#========================================func_0_cl
_x_1 = 1/_x_0
RETURN_CC_1 = _x_2*_x_3 - 1
RETURN_CO_1 = _x_0*_x_3 - 1
#========================================func_0_cl
RETURN_OC_1 = _x_1*_x_2 - 1
#========================================func_1_ts__asset
RETURN_OO_1 = _x_1*ts_delay(OPEN, -2) - 1
RETURN_OO_2 = _x_1*ts_delay(OPEN, -3) - 1
RETURN_OO_5 = _x_1*ts_delay(OPEN, -6) - 1
RETURN_OO_10 = _x_1*ts_delay(OPEN, -11) - 1
"""

"""
RETURN_OO_1 = ts_delay(OPEN, -2)/ts_delay(OPEN, -1) - 1
RETURN_OO_2 = ts_delay(OPEN, -3)/ts_delay(OPEN, -1) - 1
RETURN_OO_5 = ts_delay(OPEN, -6)/ts_delay(OPEN, -1) - 1
RETURN_OO_10 = ts_delay(OPEN, -11)/ts_delay(OPEN, -1) - 1
RETURN_OC_1 = ts_delay(CLOSE, -1)/ts_delay(OPEN, -1) - 1
RETURN_CC_1 = -1 + ts_delay(CLOSE, -1)/CLOSE
RETURN_CO_1 = -1 + ts_delay(OPEN, -1)/CLOSE
"""


def main(df: pl.DataFrame):
def main(df: pl.DataFrame) -> pl.DataFrame:
# logger.info("start...")

df = df.sort(by=[_DATE_, _ASSET_])
df = df.group_by(_ASSET_).map_groups(func_0_ts__asset)
df = func_0_cl(df)
df = df.group_by(_ASSET_).map_groups(func_1_ts__asset)

# drop intermediate columns
df = df.drop(columns=list(filter(lambda x: re.search(r"^_x_\d+", x), df.columns)))
df = df.select(pl.exclude(r"^_x_\d+$"))

# shrink
df = df.select(cs.all().shrink_dtype())
Expand Down
2 changes: 1 addition & 1 deletion examples/demo1.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
df_output = with_factor_quantile(df_output, factor, quantiles=10)

# %%
create_2x2_sheet(df_output, factor, forward_return, fwd_ret_1, period=period, axvlines=axvlines)
create_2x2_sheet(df_output, factor, forward_return, fwd_ret_1, periods=(5, 10), axvlines=axvlines)
# %%
create_3x2_sheet(df_output, factor, forward_return, fwd_ret_1, period=period, axvlines=axvlines)

Expand Down
5 changes: 2 additions & 3 deletions examples/demo5.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@
period = 5
axvlines = ('2020-01-01',)

factors = ['STD_010', 'STD_020', 'SMA_010', 'SMA_020'] # 考察因子
forward_returns = ['RETURN_CC_1', 'RETURN_OO_1', 'RETURN_OO_2', 'RETURN_OO_5'] # 同一因子,不同持有期对比

factors = ['STD_010', 'STD_020', 'STD_060', 'SMA_010', 'SMA_020', 'SMA_060'] # 考察因子
forward_returns = ['RETURN_CC_1', 'RETURN_OO_1', 'RETURN_OO_2', 'RETURN_OO_5', 'RETURN_OO_10'] # 同一因子,不同持有期对比
create_ic2_sheet(df_output, factors, forward_returns)
plt.show()

0 comments on commit 1740b29

Please sign in to comment.