Skip to content

Commit

Permalink
V0.9.33 更新一批代码 (#173)
Browse files Browse the repository at this point in the history
* 0.9.33 start coding

* 0.9.33 新增时序因子预处理

* 0.9.33 fix show_symbol_factor_layering

* 0.9.33 更新 streamlit 组件
  • Loading branch information
zengbin93 authored Oct 22, 2023
1 parent a1145a9 commit 9e333d7
Show file tree
Hide file tree
Showing 6 changed files with 148 additions and 29 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/pythonpackage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ name: Python package

on:
push:
branches: [ master, V0.9.32 ]
branches: [ master, V0.9.33 ]
pull_request:
branches: [ master ]

Expand Down
6 changes: 4 additions & 2 deletions czsc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@
show_sectional_ic,
show_factor_returns,
show_factor_layering,
show_symbol_factor_layering,
)

from czsc.utils.bi_info import (
Expand All @@ -99,12 +100,13 @@

from czsc.utils.features import (
normalize_feature,
normalize_ts_feature,
)

__version__ = "0.9.32"
__version__ = "0.9.33"
__author__ = "zengbin93"
__email__ = "[email protected]"
__date__ = "20231013"
__date__ = "20231018"



Expand Down
1 change: 1 addition & 0 deletions czsc/signals/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@
tas_macd_bc_V230803,
tas_macd_bc_V230804,
tas_macd_bc_ubi_V230804,
tas_slope_V231019,
)

from czsc.signals.pos import (
Expand Down
70 changes: 60 additions & 10 deletions czsc/signals/tas.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from czsc.analyze import CZSC
from czsc.objects import Signal, Direction, BI, RawBar, FX, Mark, ZS
from czsc.traders.base import CzscSignals
from czsc.utils import get_sub_elements, fast_slow_cross, count_last_same, create_single_signal
from czsc.utils import get_sub_elements, fast_slow_cross, count_last_same, create_single_signal, single_linear
from czsc.utils.sig import cross_zero_axis, cal_cross_num, down_cross_count


Expand Down Expand Up @@ -2789,7 +2789,7 @@ def tas_atr_V230630(c: CZSC, **kwargs) -> OrderedDict:
**信号逻辑:**
ATR与收盘价的比值衡量了价格振幅比率的大小,对这个值进行分层。
**信号列表:**
- Signal('日线_D1ATR14_波动V230630_第7层_任意_任意_0')
Expand All @@ -2802,7 +2802,7 @@ def tas_atr_V230630(c: CZSC, **kwargs) -> OrderedDict:
- Signal('日线_D1ATR14_波动V230630_第3层_任意_任意_0')
- Signal('日线_D1ATR14_波动V230630_第2层_任意_任意_0')
- Signal('日线_D1ATR14_波动V230630_第1层_任意_任意_0')
:param c: czsc对象
:param kwargs:
Expand Down Expand Up @@ -2860,15 +2860,15 @@ def tas_rumi_V230704(c: CZSC, **kwargs) -> OrderedDict:
rumi_window = int(kwargs.get('rumi_window', 30))
timeperiod1 = int(kwargs.get('timeperiod1', 3))
timeperiod2 = int(kwargs.get('timeperiod2', 50))

assert rumi_window < timeperiod2, "rumi_window 必须小于 timeperiod2"
freq = c.freq.value
k1, k2, k3 = f"{freq}_D{di}F{timeperiod1}S{timeperiod2}R{rumi_window}_BS辅助V230704".split('_')
v1 = '其他'

if len(c.bars_raw) < di + timeperiod2:
return create_single_signal(k1=k1, k2=k2, k3=k3, v1=v1)

key1 = update_ma_cache(c, ma_type='SMA', timeperiod=timeperiod1)
key2 = update_ma_cache(c, ma_type='WMA', timeperiod=timeperiod2)
bars = get_sub_elements(c.bars_raw, di=di, n=timeperiod2)
Expand Down Expand Up @@ -3204,14 +3204,14 @@ def tas_angle_V230802(c: CZSC, **kwargs) -> OrderedDict:
-n:统计笔的数量
-di:取第几笔
:return: 信号识别结果
"""
di = int(kwargs.get('di', 1))
n = int(kwargs.get('n', 9))
th = int(kwargs.get('th', 50))
assert 300 > th > 30, "th 取值范围为 30 ~ 300"

freq = c.freq.value
k1, k2, k3 = f"{freq}_D{di}N{n}T{th}_笔角度V230802".split('_')
v1 = '其他'
Expand Down Expand Up @@ -3309,7 +3309,7 @@ def tas_macd_bc_V230804(c: CZSC, **kwargs) -> OrderedDict:
od_dif = max([x.cache[cache_key]['dif'] for x in b1.fx_b.raw_bars + b3.fx_b.raw_bars])
if 0 < b5_dif < od_dif:
v1 = '空头'

if b5.direction == Direction.Down and b5.low < (dd + (gg - dd) / 4):
b5_dif = min([x.cache[cache_key]['dif'] for x in b5.fx_b.raw_bars])
od_dif = min([x.cache[cache_key]['dif'] for x in b1.fx_b.raw_bars + b3.fx_b.raw_bars])
Expand Down Expand Up @@ -3358,11 +3358,61 @@ def tas_macd_bc_ubi_V230804(c: CZSC, **kwargs) -> OrderedDict:
od_dif = max([x.cache[cache_key]['dif'] for x in b2.fx_b.raw_bars + b4.fx_b.raw_bars])
if 0 < b5_dif < od_dif:
v1 = '空头'

if ubi['direction'] == Direction.Down and ubi['low'] < (dd + (gg - dd) / 4):
b5_dif = min([x.cache[cache_key]['dif'] for x in ubi['raw_bars'][-5:]])
od_dif = min([x.cache[cache_key]['dif'] for x in b2.fx_b.raw_bars + b4.fx_b.raw_bars])
if 0 > b5_dif > od_dif:
v1 = '多头'

return create_single_signal(k1=k1, k2=k2, k3=k3, v1=v1)


def tas_slope_V231019(c: CZSC, **kwargs) -> OrderedDict:
"""DIF趋势线斜率判断多空
参数模板:"{freq}_D{di}DIF{n}斜率T{th}_BS辅助V231019"
**信号逻辑:**
取最近 N 根K线的DIF值计算斜率,然后取 N * 10 根K线的斜率值,计算斜率值的分位数,
如果分位数大于th,则看多,小于1-th,则看空。
**信号列表:**
- Signal('60分钟_D1DIF10斜率T80_BS辅助V231019_看多_任意_任意_0')
- Signal('60分钟_D1DIF10斜率T80_BS辅助V231019_看空_任意_任意_0')
:param cat: CzscSignals对象
:param kwargs: 参数字典
:return: 返回信号结果
"""
di = int(kwargs.get('di', 1))
n = int(kwargs.get('n', 10))
th = int(kwargs.get('th', 80))
assert th > 50 and th < 100, 'th 参数取值范围为 50 ~ 100'

freq = c.freq.value
cache_key = update_macd_cache(c, fastperiod=12, slowperiod=26, signalperiod=9)
k1, k2, k3 = f"{freq}_D{di}DIF{n}斜率T{th}_BS辅助V231019".split('_')
v1 = '其他'
if len(c.bars_raw) < 50:
return create_single_signal(k1=k1, k2=k2, k3=k3, v1=v1)

cache_slope_key = f"tas_slope_V231019_{di}_{n}"
for i, bar in enumerate(c.bars_raw):
if i < n:
continue

if cache_slope_key not in bar.cache:
dif = [x.cache[cache_key]['dif'] for x in c.bars_raw[i - n: i]]
bar.cache[cache_slope_key] = single_linear(dif)['slope']

bars = get_sub_elements(c.bars_raw, di=di, n=n * 10)
dif_slope = [x.cache.get(cache_slope_key, 0) for x in bars]
q = (dif_slope[-1] - min(dif_slope)) / (max(dif_slope) - min(dif_slope))
if q > th / 100:
v1 = '看多'
elif q < 1 - th / 100:
v1 = '看空'
return create_single_signal(k1=k1, k2=k2, k3=k3, v1=v1)
65 changes: 61 additions & 4 deletions czsc/utils/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
create_dt: 2023/10/06 15:01
describe: 因子(特征)处理
"""
import pandas as pd
from loguru import logger
from sklearn.preprocessing import scale

Expand All @@ -19,10 +20,66 @@ def normalize_feature(df, x_col, **kwargs):
- q: float,缩尾比例, 默认 0.05
"""
df = df.copy()
if df[x_col].isna().sum() > 0:
logger.warning(f"因子列 {x_col} 存在缺失值,已自动剔除,这有可能导致后续分析结果不准确")
df = df.dropna(subset=[x_col])

assert df[x_col].isna().sum() == 0, "因子有缺失值,缺失数量为:{}".format(df[x_col].isna().sum())
q = kwargs.get("q", 0.05) # 缩尾比例
df[x_col] = df.groupby("dt")[x_col].transform(lambda x: scale(x.clip(lower=x.quantile(q), upper=x.quantile(1 - q))))
return df


def normalize_ts_feature(df, x_col, n=10, **kwargs):
"""对时间序列数据进行归一化处理
:param df: 因子数据,必须包含 dt, x_col 列,其中 dt 为日期,x_col 为因子值,数据样例:
:param x_col: 因子列名
:param n: 分层数量,默认为10
:param kwargs:
- method: 分层方法,expanding 或 rolling,默认为 expanding
- min_periods: expanding 时的最小样本数量,默认为300
:return: df, 添加了 x_col_norm, x_col_qcut, x_col分层 列
"""
assert df[x_col].nunique() > n, "因子值的取值数量必须大于分层数量"
assert df[x_col].isna().sum() == 0, "因子有缺失值,缺失数量为:{}".format(df[x_col].isna().sum())
method = kwargs.get("method", "expanding")
min_periods = kwargs.get("min_periods", 300)

if f"{x_col}_norm" not in df.columns:
if method == "expanding":
df[f"{x_col}_norm"] = df[x_col].expanding(min_periods=min_periods).apply(
lambda x: (x.iloc[-1] - x.mean()) / x.std(), raw=False)

elif method == "rolling":
df[f"{x_col}_norm"] = df[x_col].rolling(min_periods=min_periods, window=min_periods).apply(
lambda x: (x.iloc[-1] - x.mean()) / x.std(), raw=False)

else:
raise ValueError("method 必须为 expanding 或 rolling")

# 用标准化后的值填充原始值中的缺失值
na_x = df[df[f"{x_col}_norm"].isna()][x_col].values
df.loc[df[f"{x_col}_norm"].isna(), f"{x_col}_norm"] = na_x - na_x.mean() / na_x.std()

if f"{x_col}_qcut" not in df.columns:
if method == "expanding":
df[f'{x_col}_qcut'] = df[x_col].expanding(min_periods=min_periods).apply(
lambda x: pd.qcut(x, q=n, labels=False, duplicates='drop', retbins=False).values[-1], raw=False)

elif method == "rolling":
df[f'{x_col}_qcut'] = df[x_col].rolling(min_periods=min_periods, window=min_periods).apply(
lambda x: pd.qcut(x, q=n, labels=False, duplicates='drop', retbins=False).values[-1], raw=False)

else:
raise ValueError("method 必须为 expanding 或 rolling")

# 用分位数后的值填充原始值中的缺失值
na_x = df[df[f"{x_col}_qcut"].isna()][x_col].values
df.loc[df[f"{x_col}_qcut"].isna(), f"{x_col}_qcut"] = pd.qcut(na_x, q=n, labels=False, duplicates='drop', retbins=False)

if df[f'{x_col}_qcut'].isna().sum() > 0:
logger.warning(f"因子 {x_col} 分层存在 {df[f'{x_col}_qcut'].isna().sum()} 个缺失值,已使用前值填充")
df[f'{x_col}_qcut'] = df[f'{x_col}_qcut'].ffill()

df[f'{x_col}分层'] = df[f'{x_col}_qcut'].apply(lambda x: f'第{str(int(x+1)).zfill(2)}层')

return df
33 changes: 21 additions & 12 deletions czsc/utils/st_components.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,16 @@ def _stats(df_, type_='持有日'):

df = df.cumsum()
fig = px.line(df, y=df.columns.to_list(), title="日收益累计曲线")
fig.update_xaxes(title='')

# 添加每年的开始第一个日期的竖线
for year in range(df.index.year.min(), df.index.year.max() + 1):
first_date = df[df.index.year == year].index.min()
fig.add_vline(x=first_date, line_dash='dash', line_color='red')

for col in kwargs.get("legend_only_cols", []):
fig.update_traces(visible="legendonly", selector=dict(name=col))

st.plotly_chart(fig, use_container_width=True)


Expand Down Expand Up @@ -183,15 +191,16 @@ def show_symbol_factor_layering(df, x_col, y_col='n1b', **kwargs):
if df[y_col].max() > 100: # 如果收益率单位为BP, 转换为万分之一
df[y_col] = df[y_col] / 10000

if df[x_col].nunique() > n:
df[f'{x_col}分层'] = pd.qcut(df[x_col], q=n, labels=False, duplicates='drop')
df[f'{x_col}分层'] = df[f'{x_col}分层'].apply(lambda x: f'第{str(x+1).zfill(2)}层')
else:
# 如果因子值的取值数量小于分层数量,直接使用因子独立值排序作为分层
x_rank = sorted(df[x_col].unique())
x_rank = {x_rank[i]: f'第{str(i+1).zfill(2)}层' for i in range(len(x_rank))}
st.success(f"因子值分层对应关系:{x_rank}")
df[f'{x_col}分层'] = df[x_col].apply(lambda x: x_rank[x])
if f'{x_col}分层' not in df.columns:
# 如果因子分层列不存在,先计算因子分层
if df[x_col].nunique() > n:
czsc.normlize_ts_feature(df, x_col, n=n)
else:
# 如果因子值的取值数量小于分层数量,直接使用因子独立值排序作为分层
x_rank = sorted(df[x_col].unique())
x_rank = {x_rank[i]: f'第{str(i+1).zfill(2)}层' for i in range(len(x_rank))}
st.success(f"因子值分层对应关系:{x_rank}")
df[f'{x_col}分层'] = df[x_col].apply(lambda x: x_rank[x])

for i in range(n):
df[f'第{str(i+1).zfill(2)}层'] = np.where(df[f'{x_col}分层'] == f'第{str(i+1).zfill(2)}层', df[y_col], 0)
Expand All @@ -210,7 +219,7 @@ def show_symbol_factor_layering(df, x_col, y_col='n1b', **kwargs):
long = col1.multiselect("多头组合", layering_cols, default=["第02层"], key="symbol_factor_long")
short = col2.multiselect("空头组合", layering_cols, default=["第01层"], key="symbol_factor_short")
dfr = mrr.copy()
dfr['多头'] = dfr[long].mean(axis=1)
dfr['空头'] = -dfr[short].mean(axis=1)
dfr['多空'] = (dfr['多头'] + dfr['空头']) / 2
dfr['多头'] = dfr[long].sum(axis=1)
dfr['空头'] = -dfr[short].sum(axis=1)
dfr['多空'] = dfr['多头'] + dfr['空头']
show_daily_return(dfr[['多头', '空头', '多空']])

0 comments on commit 9e333d7

Please sign in to comment.