V0.9.33 更新一批代码 (#173)

* 0.9.33 start coding * 0.9.33 新增时序因子预处理 * 0.9.33 fix show_symbol_factor_layering * 0.9.33 更新 streamlit 组件
waditu · Oct 22, 2023 · 9e333d7 · 9e333d7
1 parent a1145a9
commit 9e333d7
Show file tree

Hide file tree

Showing 6 changed files with 148 additions and 29 deletions.
diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml
@@ -5,7 +5,7 @@ name: Python package
 
 on:
   push:
-    branches: [ master, V0.9.32 ]
+    branches: [ master, V0.9.33 ]
   pull_request:
     branches: [ master ]
 

diff --git a/czsc/__init__.py b/czsc/__init__.py
@@ -90,6 +90,7 @@
     show_sectional_ic,
     show_factor_returns,
     show_factor_layering,
+    show_symbol_factor_layering,
 )
 
 from czsc.utils.bi_info import (
@@ -99,12 +100,13 @@
 
 from czsc.utils.features import (
     normalize_feature,
+    normalize_ts_feature,
 )
 
-__version__ = "0.9.32"
+__version__ = "0.9.33"
 __author__ = "zengbin93"
 __email__ = "[email protected]"
-__date__ = "20231013"
+__date__ = "20231018"
 
 
 

diff --git a/czsc/signals/__init__.py b/czsc/signals/__init__.py
@@ -205,6 +205,7 @@
     tas_macd_bc_V230803,
     tas_macd_bc_V230804,
     tas_macd_bc_ubi_V230804,
+    tas_slope_V231019,
 )
 
 from czsc.signals.pos import (

diff --git a/czsc/signals/tas.py b/czsc/signals/tas.py
@@ -21,7 +21,7 @@
 from czsc.analyze import CZSC
 from czsc.objects import Signal, Direction, BI, RawBar, FX, Mark, ZS
 from czsc.traders.base import CzscSignals
-from czsc.utils import get_sub_elements, fast_slow_cross, count_last_same, create_single_signal
+from czsc.utils import get_sub_elements, fast_slow_cross, count_last_same, create_single_signal, single_linear
 from czsc.utils.sig import cross_zero_axis, cal_cross_num, down_cross_count
 
 
@@ -2789,7 +2789,7 @@ def tas_atr_V230630(c: CZSC, **kwargs) -> OrderedDict:
     **信号逻辑：**
 
     ATR与收盘价的比值衡量了价格振幅比率的大小，对这个值进行分层。
-    
+
     **信号列表：**
 
     - Signal('日线_D1ATR14_波动V230630_第7层_任意_任意_0')
@@ -2802,7 +2802,7 @@ def tas_atr_V230630(c: CZSC, **kwargs) -> OrderedDict:
     - Signal('日线_D1ATR14_波动V230630_第3层_任意_任意_0')
     - Signal('日线_D1ATR14_波动V230630_第2层_任意_任意_0')
     - Signal('日线_D1ATR14_波动V230630_第1层_任意_任意_0')
-    
+
     :param c:  czsc对象
     :param kwargs:
 
@@ -2860,15 +2860,15 @@ def tas_rumi_V230704(c: CZSC, **kwargs) -> OrderedDict:
     rumi_window = int(kwargs.get('rumi_window', 30))
     timeperiod1 = int(kwargs.get('timeperiod1', 3))
     timeperiod2 = int(kwargs.get('timeperiod2', 50))
-    
+
     assert rumi_window < timeperiod2, "rumi_window 必须小于 timeperiod2"
     freq = c.freq.value
     k1, k2, k3 = f"{freq}_D{di}F{timeperiod1}S{timeperiod2}R{rumi_window}_BS辅助V230704".split('_')
     v1 = '其他'
-    
+
     if len(c.bars_raw) < di + timeperiod2:
         return create_single_signal(k1=k1, k2=k2, k3=k3, v1=v1)
-     
+
     key1 = update_ma_cache(c, ma_type='SMA', timeperiod=timeperiod1)
     key2 = update_ma_cache(c, ma_type='WMA', timeperiod=timeperiod2)
     bars = get_sub_elements(c.bars_raw, di=di, n=timeperiod2)
@@ -3204,14 +3204,14 @@ def tas_angle_V230802(c: CZSC, **kwargs) -> OrderedDict:
 
         -n：统计笔的数量
         -di：取第几笔
-    
+
     :return: 信号识别结果
     """
     di = int(kwargs.get('di', 1))
     n = int(kwargs.get('n', 9))
     th = int(kwargs.get('th', 50))
     assert 300 > th > 30, "th 取值范围为 30 ~ 300"
-    
+
     freq = c.freq.value
     k1, k2, k3 = f"{freq}_D{di}N{n}T{th}_笔角度V230802".split('_')
     v1 = '其他'
@@ -3309,7 +3309,7 @@ def tas_macd_bc_V230804(c: CZSC, **kwargs) -> OrderedDict:
         od_dif = max([x.cache[cache_key]['dif'] for x in b1.fx_b.raw_bars + b3.fx_b.raw_bars])
         if 0 < b5_dif < od_dif:
             v1 = '空头'
-    
+
     if b5.direction == Direction.Down and b5.low < (dd + (gg - dd) / 4):
         b5_dif = min([x.cache[cache_key]['dif'] for x in b5.fx_b.raw_bars])
         od_dif = min([x.cache[cache_key]['dif'] for x in b1.fx_b.raw_bars + b3.fx_b.raw_bars])
@@ -3358,11 +3358,61 @@ def tas_macd_bc_ubi_V230804(c: CZSC, **kwargs) -> OrderedDict:
         od_dif = max([x.cache[cache_key]['dif'] for x in b2.fx_b.raw_bars + b4.fx_b.raw_bars])
         if 0 < b5_dif < od_dif:
             v1 = '空头'
-    
+
     if ubi['direction'] == Direction.Down and ubi['low'] < (dd + (gg - dd) / 4):
         b5_dif = min([x.cache[cache_key]['dif'] for x in ubi['raw_bars'][-5:]])
         od_dif = min([x.cache[cache_key]['dif'] for x in b2.fx_b.raw_bars + b4.fx_b.raw_bars])
         if 0 > b5_dif > od_dif:
             v1 = '多头'
 
     return create_single_signal(k1=k1, k2=k2, k3=k3, v1=v1)
+
+
+def tas_slope_V231019(c: CZSC, **kwargs) -> OrderedDict:
+    """DIF趋势线斜率判断多空
+
+    参数模板："{freq}_D{di}DIF{n}斜率T{th}_BS辅助V231019"
+
+    **信号逻辑：**
+
+    取最近 N 根K线的DIF值计算斜率，然后取 N * 10 根K线的斜率值，计算斜率值的分位数，
+    如果分位数大于th，则看多，小于1-th，则看空。
+
+    **信号列表：**
+
+    - Signal('60分钟_D1DIF10斜率T80_BS辅助V231019_看多_任意_任意_0')
+    - Signal('60分钟_D1DIF10斜率T80_BS辅助V231019_看空_任意_任意_0')
+
+    :param cat: CzscSignals对象
+    :param kwargs: 参数字典
+     :return: 返回信号结果
+    """
+    di = int(kwargs.get('di', 1))
+    n = int(kwargs.get('n', 10))
+    th = int(kwargs.get('th', 80))
+    assert th > 50 and th < 100, 'th 参数取值范围为 50 ~ 100'
+
+    freq = c.freq.value
+    cache_key = update_macd_cache(c, fastperiod=12, slowperiod=26, signalperiod=9)
+    k1, k2, k3 = f"{freq}_D{di}DIF{n}斜率T{th}_BS辅助V231019".split('_')
+    v1 = '其他'
+    if len(c.bars_raw) < 50:
+        return create_single_signal(k1=k1, k2=k2, k3=k3, v1=v1)
+
+    cache_slope_key = f"tas_slope_V231019_{di}_{n}"
+    for i, bar in enumerate(c.bars_raw):
+        if i < n:
+            continue
+
+        if cache_slope_key not in bar.cache:
+            dif = [x.cache[cache_key]['dif'] for x in c.bars_raw[i - n: i]]
+            bar.cache[cache_slope_key] = single_linear(dif)['slope']
+
+    bars = get_sub_elements(c.bars_raw, di=di, n=n * 10)
+    dif_slope = [x.cache.get(cache_slope_key, 0) for x in bars]
+    q = (dif_slope[-1] - min(dif_slope)) / (max(dif_slope) - min(dif_slope))
+    if q > th / 100:
+        v1 = '看多'
+    elif q < 1 - th / 100:
+        v1 = '看空'
+    return create_single_signal(k1=k1, k2=k2, k3=k3, v1=v1)
diff --git a/czsc/utils/features.py b/czsc/utils/features.py
@@ -5,6 +5,7 @@
 create_dt: 2023/10/06 15:01
 describe: 因子（特征）处理
 """
+import pandas as pd
 from loguru import logger
 from sklearn.preprocessing import scale
 
@@ -19,10 +20,66 @@ def normalize_feature(df, x_col, **kwargs):
         - q: float，缩尾比例, 默认 0.05
     """
     df = df.copy()
-    if df[x_col].isna().sum() > 0:
-        logger.warning(f"因子列 {x_col} 存在缺失值，已自动剔除，这有可能导致后续分析结果不准确")
-        df = df.dropna(subset=[x_col])
-
+    assert df[x_col].isna().sum() == 0, "因子有缺失值，缺失数量为：{}".format(df[x_col].isna().sum())
     q = kwargs.get("q", 0.05)           # 缩尾比例
     df[x_col] = df.groupby("dt")[x_col].transform(lambda x: scale(x.clip(lower=x.quantile(q), upper=x.quantile(1 - q))))
     return df
+
+
+def normalize_ts_feature(df, x_col, n=10, **kwargs):
+    """对时间序列数据进行归一化处理
+
+    :param df: 因子数据，必须包含 dt, x_col 列，其中 dt 为日期，x_col 为因子值，数据样例：
+    :param x_col: 因子列名
+    :param n: 分层数量，默认为10
+    :param kwargs:
+
+        - method: 分层方法，expanding 或 rolling，默认为 expanding
+        - min_periods: expanding 时的最小样本数量，默认为300
+
+    :return: df, 添加了 x_col_norm, x_col_qcut, x_col分层 列
+    """
+    assert df[x_col].nunique() > n, "因子值的取值数量必须大于分层数量"
+    assert df[x_col].isna().sum() == 0, "因子有缺失值，缺失数量为：{}".format(df[x_col].isna().sum())
+    method = kwargs.get("method", "expanding")
+    min_periods = kwargs.get("min_periods", 300)
+
+    if f"{x_col}_norm" not in df.columns:
+        if method == "expanding":
+            df[f"{x_col}_norm"] = df[x_col].expanding(min_periods=min_periods).apply(
+                lambda x: (x.iloc[-1] - x.mean()) / x.std(), raw=False)
+
+        elif method == "rolling":
+            df[f"{x_col}_norm"] = df[x_col].rolling(min_periods=min_periods, window=min_periods).apply(
+                lambda x: (x.iloc[-1] - x.mean()) / x.std(), raw=False)
+
+        else:
+            raise ValueError("method 必须为 expanding 或 rolling")
+
+        # 用标准化后的值填充原始值中的缺失值
+        na_x = df[df[f"{x_col}_norm"].isna()][x_col].values
+        df.loc[df[f"{x_col}_norm"].isna(), f"{x_col}_norm"] = na_x - na_x.mean() / na_x.std()
+
+    if f"{x_col}_qcut" not in df.columns:
+        if method == "expanding":
+            df[f'{x_col}_qcut'] = df[x_col].expanding(min_periods=min_periods).apply(
+                lambda x: pd.qcut(x, q=n, labels=False, duplicates='drop', retbins=False).values[-1], raw=False)
+
+        elif method == "rolling":
+            df[f'{x_col}_qcut'] = df[x_col].rolling(min_periods=min_periods, window=min_periods).apply(
+                lambda x: pd.qcut(x, q=n, labels=False, duplicates='drop', retbins=False).values[-1], raw=False)
+
+        else:
+            raise ValueError("method 必须为 expanding 或 rolling")
+
+        # 用分位数后的值填充原始值中的缺失值
+        na_x = df[df[f"{x_col}_qcut"].isna()][x_col].values
+        df.loc[df[f"{x_col}_qcut"].isna(), f"{x_col}_qcut"] = pd.qcut(na_x, q=n, labels=False, duplicates='drop', retbins=False)
+
+        if df[f'{x_col}_qcut'].isna().sum() > 0:
+            logger.warning(f"因子 {x_col} 分层存在 {df[f'{x_col}_qcut'].isna().sum()} 个缺失值，已使用前值填充")
+            df[f'{x_col}_qcut'] = df[f'{x_col}_qcut'].ffill()
+
+        df[f'{x_col}分层'] = df[f'{x_col}_qcut'].apply(lambda x: f'第{str(int(x+1)).zfill(2)}层')
+
+    return df
diff --git a/czsc/utils/st_components.py b/czsc/utils/st_components.py
@@ -41,8 +41,16 @@ def _stats(df_, type_='持有日'):
 
         df = df.cumsum()
         fig = px.line(df, y=df.columns.to_list(), title="日收益累计曲线")
+        fig.update_xaxes(title='')
+
+        # 添加每年的开始第一个日期的竖线
+        for year in range(df.index.year.min(), df.index.year.max() + 1):
+            first_date = df[df.index.year == year].index.min()
+            fig.add_vline(x=first_date, line_dash='dash', line_color='red')
+
         for col in kwargs.get("legend_only_cols", []):
             fig.update_traces(visible="legendonly", selector=dict(name=col))
+
         st.plotly_chart(fig, use_container_width=True)
 
 
@@ -183,15 +191,16 @@ def show_symbol_factor_layering(df, x_col, y_col='n1b', **kwargs):
     if df[y_col].max() > 100:       # 如果收益率单位为BP, 转换为万分之一
         df[y_col] = df[y_col] / 10000
 
-    if df[x_col].nunique() > n:
-        df[f'{x_col}分层'] = pd.qcut(df[x_col], q=n, labels=False, duplicates='drop')
-        df[f'{x_col}分层'] = df[f'{x_col}分层'].apply(lambda x: f'第{str(x+1).zfill(2)}层')
-    else:
-        # 如果因子值的取值数量小于分层数量，直接使用因子独立值排序作为分层
-        x_rank = sorted(df[x_col].unique())
-        x_rank = {x_rank[i]: f'第{str(i+1).zfill(2)}层' for i in range(len(x_rank))}
-        st.success(f"因子值分层对应关系：{x_rank}")
-        df[f'{x_col}分层'] = df[x_col].apply(lambda x: x_rank[x])
+    if f'{x_col}分层' not in df.columns:
+        # 如果因子分层列不存在，先计算因子分层
+        if df[x_col].nunique() > n:
+            czsc.normlize_ts_feature(df, x_col, n=n)
+        else:
+            # 如果因子值的取值数量小于分层数量，直接使用因子独立值排序作为分层
+            x_rank = sorted(df[x_col].unique())
+            x_rank = {x_rank[i]: f'第{str(i+1).zfill(2)}层' for i in range(len(x_rank))}
+            st.success(f"因子值分层对应关系：{x_rank}")
+            df[f'{x_col}分层'] = df[x_col].apply(lambda x: x_rank[x])
 
     for i in range(n):
         df[f'第{str(i+1).zfill(2)}层'] = np.where(df[f'{x_col}分层'] == f'第{str(i+1).zfill(2)}层', df[y_col], 0)
@@ -210,7 +219,7 @@ def show_symbol_factor_layering(df, x_col, y_col='n1b', **kwargs):
         long = col1.multiselect("多头组合", layering_cols, default=["第02层"], key="symbol_factor_long")
         short = col2.multiselect("空头组合", layering_cols, default=["第01层"], key="symbol_factor_short")
         dfr = mrr.copy()
-        dfr['多头'] = dfr[long].mean(axis=1)
-        dfr['空头'] = -dfr[short].mean(axis=1)
-        dfr['多空'] = (dfr['多头'] + dfr['空头']) / 2
+        dfr['多头'] = dfr[long].sum(axis=1)
+        dfr['空头'] = -dfr[short].sum(axis=1)
+        dfr['多空'] = dfr['多头'] + dfr['空头']
         show_daily_return(dfr[['多头', '空头', '多空']])