重新整理

wukan1986 · Dec 13, 2024 · 041a26e · 041a26e
1 parent 969efbf
commit 041a26e
Show file tree

Hide file tree

Showing 14 changed files with 248 additions and 282 deletions.
diff --git a/README.md b/README.md
@@ -18,10 +18,10 @@ pip install -i https://pypi.tuna.tsinghua.edu.cn/simple --upgrade alphainspect
         1. 多期收期率。如果移动因子，会导致一个因子就要移动多次
         2. 因子一般成百上千，全移动要的工作量非常大，而收益率则少很多
     4. 推荐大家使用`expr_codegen`和`polars_ta`等项目
-2. 运行`examples/demo1.py`示例弹出简易图表
-3. 运行`examples/demo2.py`示例弹出完整图表
-4. 运行`examples/demo3.py`示例多进程并行输出HTML网页报表
-5. 运行`examples/demo4.py`示例事件图表
+2. 运行`examples/factor_analysis.py`示例单因子分析
+3. 运行`examples/reports_html.py`示例多进程并行输出HTML网页报表
+4. 运行`examples/reports_notebook.py`示例多进程并行转Notebook报表
+5. 运行`examples/events_study.py`示例事件分析
 
 ## 部分图示
 

diff --git a/alphainspect/deprecated.py b/alphainspect/deprecated.py
@@ -0,0 +1,119 @@
+"""
+废除
+"""
+import numpy as np
+import pandas as pd
+import polars as pl
+from loguru import logger
+from matplotlib import pyplot as plt
+
+from alphainspect import _QUANTILE_, _DATE_, _ASSET_, _WEIGHT_
+from alphainspect.portfolio import plot_quantile_portfolio
+from alphainspect.utils import cumulative_returns
+
+
+def _calc_cum_return_by_quantile(df: pl.DataFrame, fwd_ret_1: str, period: int = 5, factor_quantile: str = _QUANTILE_) -> pd.DataFrame:
+    """分层计算收益。分成N层，层内等权"""
+    q_max = df.select(pl.max(factor_quantile)).to_series(0)[0]
+    rr = df.pivot(index=_DATE_, columns=_ASSET_, values=fwd_ret_1, aggregate_function='first', sort_columns=True).sort(_DATE_)
+    qq = df.pivot(index=_DATE_, columns=_ASSET_, values=factor_quantile, aggregate_function='first', sort_columns=True).sort(_DATE_)
+
+    out = pd.DataFrame(index=rr[_DATE_])
+    rr = rr.select(pl.exclude(_DATE_)).to_numpy() + 1  # 日收益
+    qq = qq.select(pl.exclude(_DATE_)).to_numpy()  # 分组编号
+    # logger.info('累计收益准备数据,period={}', period)
+
+    np.seterr(divide='ignore', invalid='ignore')
+    for i in range(int(q_max) + 1):
+        # 等权
+        b = qq == i
+        w = b / b.sum(axis=1).reshape(-1, 1)
+        w[(w == 0).all(axis=1), :] = np.nan
+        # 权重绝对值和为1
+        out[f'G{i}'] = cumulative_returns(rr, w, funds=period, freq=period)
+    # !!!直接减是错误的，因为两资金是独立的，资金减少的一份由于资金不足对冲比例已经不再是1:1
+    # out['spread'] = out[f'G{q_max}'] - out[f'G0']
+    logger.info('累计收益计算完成,period={}\n{}', period, out.tail(1).to_string())
+    return out
+
+
+def calc_cum_return_spread(df: pl.DataFrame, fwd_ret_1: str, period: int = 5, factor_quantile: str = _QUANTILE_) -> pd.DataFrame:
+    """分层计算收益。分成N层，层内等权。
+    取Top层和Bottom层。比较不同的计算方法多空收益的区别"""
+
+    q_max = df.select(pl.max(factor_quantile)).to_series(0)[0]
+    rr = df.pivot(index=_DATE_, columns=_ASSET_, values=fwd_ret_1, aggregate_function='first', sort_columns=True).sort(_DATE_).fill_nan(0)
+    qq = df.pivot(index=_DATE_, columns=_ASSET_, values=factor_quantile, aggregate_function='first', sort_columns=True).sort(_DATE_).fill_nan(-1)
+
+    out = pd.DataFrame(index=rr[_DATE_])
+    rr = rr.select(pl.exclude(_DATE_)).to_numpy() + 1  # 日收益
+    qq = qq.select(pl.exclude(_DATE_)).to_numpy()  # 分组编号
+    logger.info('多空收益准备数据,period={}', period)
+
+    np.seterr(divide='ignore', invalid='ignore')
+
+    # 等权
+    w0 = qq == 0
+    w9 = qq == q_max
+    w0 = w0 / w0.sum(axis=1).reshape(-1, 1)
+    w0 = np.where(w0 == w0, w0, 0)
+    w9 = w9 / w9.sum(axis=1).reshape(-1, 1)
+    w9 = np.where(w9 == w9, w9, 0)
+    ww = (w9 - w0) / 2  # 除2，权重绝对值和一定要调整为1，否则后面会计算错误
+
+    # 整行都为0，将其设成nan，后面计算时用于判断是否为0
+    ww[(ww == 0).all(axis=1), :] = np.nan
+    w0[(w0 == 0).all(axis=1), :] = np.nan
+    w9[(w9 == 0).all(axis=1), :] = np.nan
+
+    # 曲线的翻转
+    out['1-G0,w=+1'] = 1 - cumulative_returns(rr, w0, funds=period, freq=period)
+    # 权重的翻转。资金发生了变化。如果资金不共享，无法完全对冲
+    out['G0-1,w=-1'] = cumulative_returns(rr, -w0, funds=period, freq=period) - 1
+
+    out[f'G{q_max},w=+1'] = cumulative_returns(rr, w9, funds=period, freq=period)
+    # 资金是共享的，每次调仓时需要将资金平分成两份
+    out[f'G{q_max}~G0,w=+.5/-.5'] = cumulative_returns(rr, ww, funds=period, freq=period, init_cash=1.0)
+    logger.info('多空收益计算完成,period={}\n{}', period, out.tail(1).to_string())
+    return out
+
+
+def calc_cum_return_weights(df: pl.DataFrame, fwd_ret_1: str, period: int = 1) -> pd.DataFrame:
+    """指定权重计算收益。不再分层计算。资金也不分份"""
+    rr = df.pivot(index=_DATE_, columns=_ASSET_, values=fwd_ret_1, aggregate_function='first', sort_columns=True).sort(_DATE_)
+    ww = df.pivot(index=_DATE_, columns=_ASSET_, values=_WEIGHT_, aggregate_function='first', sort_columns=True).sort(_DATE_)
+
+    out = pd.DataFrame(index=rr[_DATE_], columns=rr.columns[1:])
+    rr = rr.select(pl.exclude(_DATE_)).to_numpy()  # 日收益
+    ww = ww.select(pl.exclude(_DATE_)).to_numpy()  # 权重
+    logger.info('权重收益准备数据,period={}', period)
+
+    np.seterr(divide='ignore', invalid='ignore')
+
+    rr = np.where(rr == rr, rr, 0.0)
+    # 累计收益分资产，资金不共享
+    # 由于是每天换仓，所以不存在空头计算不准的问题
+    out[:] = np.cumprod(rr * ww + 1, axis=0)
+
+    logger.info('权重收益计算完成,period={}\n{}', period, out.tail(1).to_string())
+    return out
+
+
+def create_portfolio2_sheet(df: pl.DataFrame,
+                            fwd_ret_1: str,
+                            *,
+                            axvlines=()) -> None:
+    """分资产收益。权重由外部指定，资金是隔离"""
+    # 各资产收益，如果资产数量过多，图会比较卡顿
+    df_cum_ret = calc_cum_return_weights(df, fwd_ret_1, 1)
+
+    fig, axes = plt.subplots(2, 1, figsize=(12, 9), squeeze=False)
+    axes = axes.flatten()
+    # 分资产收益
+    plot_quantile_portfolio(df_cum_ret, fwd_ret_1, axvlines=axvlines, ax=axes[0])
+
+    # 资产平均收益，相当于等权
+    s = df_cum_ret.mean(axis=1)
+    s.name = 'portfolio'
+    plot_quantile_portfolio(s, fwd_ret_1, axvlines=axvlines, ax=axes[1])
+    fig.tight_layout()
diff --git a/alphainspect/events.py b/alphainspect/events.py
@@ -10,6 +10,7 @@
 from alphainspect import _QUANTILE_, _DATE_, _ASSET_
 from alphainspect.portfolio import calc_cum_return_by_quantile, plot_quantile_portfolio
 
+# 以+-开头的纯数字，希望不会与其他列名冲突
 _REG_AROUND_ = r'^[+-]\d+$'
 _COL_AROUND_ = pl.col(_REG_AROUND_)
 
@@ -21,18 +22,16 @@ def make_around_columns(periods_before: int = 3, periods_after: int = 15) -> Lis
 
 
 def with_around_price(df: pl.DataFrame, price: str, periods_before: int = 5, periods_after: int = 15) -> pl.DataFrame:
-    """添加事件前后复权价
+    """添加前后复权价
 
     Parameters
     ----------
     df
     price
+        收盘价或均价
     periods_before
     periods_after
 
-    Returns
-    -------
-
     """
 
     def _func_ts(df: pl.DataFrame,
@@ -61,7 +60,7 @@ def _func_ts(df: pl.DataFrame,
 
 
 def plot_events_errorbar(df: pl.DataFrame, factor_quantile: str = _QUANTILE_, ax=None) -> None:
-    """事件前后误差条"""
+    """事件前后误差条。只显示最大分组"""
     min_max = df.select(pl.min(factor_quantile).alias('min'), pl.max(factor_quantile).alias('max'))
     min_max = min_max.to_dicts()[0]
     _min, _max = min_max['min'], min_max['max']
@@ -72,6 +71,7 @@ def plot_events_errorbar(df: pl.DataFrame, factor_quantile: str = _QUANTILE_, ax
     std_pl = df.group_by(factor_quantile).agg(pl.std(_REG_AROUND_)).sort(factor_quantile)
     std_pd: pd.DataFrame = std_pl.to_pandas().set_index(factor_quantile).T
 
+    # 取最大分组
     a = mean_pd.loc[:, _max]
     b = std_pd.loc[:, _max]
 

diff --git a/alphainspect/plotting.py b/alphainspect/plotting.py
@@ -47,6 +47,38 @@ def plot_heatmap_monthly_mean(df: pl.DataFrame, col: str,
     plot_heatmap(df_pd[col].unstack(), title=f"{col},Monthly Mean", ax=ax)
 
 
+def plot_heatmap_monthly_diff(df: pd.DataFrame, col='G9',
+                              *, ax=None) -> None:
+    """月度热力图。月底减月初差值
+
+    Parameters
+    ----------
+    df
+    col
+    ax
+
+    """
+    df = df.select([_DATE_, col,
+                    pl.col(_DATE_).dt.year().alias('year'),
+                    pl.col(_DATE_).dt.month().alias('month')
+                    ]).sort(_DATE_)
+    df = df.group_by('year', 'month').agg(pl.last(col) - pl.first(col))
+    df_pd = df.to_pandas().set_index(['year', 'month'])
+
+    plot_heatmap(df_pd[col].unstack(), title=f"{col},Monthly Last-First", ax=ax)
+
+    # out = pd.DataFrame(index=df.index)
+    # out['year'] = out.index.year
+    # out['month'] = out.index.month
+    # out['first'] = df[col]
+    # out['last'] = df[col]
+    # out = out.groupby(by=['year', 'month']).agg({'first': 'first', 'last': 'last'})
+    # # 累计收益由累乘改成了累加，这里算法也需要改动
+    # # out['cum_ret'] = out['last'] / out['first'] - 1
+    # out['cum_ret'] = out['last'] - out['first']
+    # plot_heatmap(out['cum_ret'].unstack(), title=f"{col},Monthly Return", ax=ax)
+
+
 def plot_ts(df: pl.DataFrame, col: str,
             *,
             axvlines=(), ax=None) -> Dict[str, float]: