Skip to content

Commit

Permalink
0.9.33 更新 streamlit 组件
Browse files Browse the repository at this point in the history
  • Loading branch information
zengbin93 committed Oct 21, 2023
1 parent 91edcfa commit 8dc7b25
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 21 deletions.
1 change: 1 addition & 0 deletions czsc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@
show_sectional_ic,
show_factor_returns,
show_factor_layering,
show_symbol_factor_layering,
)

from czsc.utils.bi_info import (
Expand Down
40 changes: 30 additions & 10 deletions czsc/utils/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,7 @@ def normalize_feature(df, x_col, **kwargs):
- q: float,缩尾比例, 默认 0.05
"""
df = df.copy()
if df[x_col].isna().sum() > 0:
logger.warning(f"因子列 {x_col} 存在缺失值,已自动剔除,这有可能导致后续分析结果不准确")
df = df.dropna(subset=[x_col])

assert df[x_col].isna().sum() == 0, "因子有缺失值,缺失数量为:{}".format(df[x_col].isna().sum())
q = kwargs.get("q", 0.05) # 缩尾比例
df[x_col] = df.groupby("dt")[x_col].transform(lambda x: scale(x.clip(lower=x.quantile(q), upper=x.quantile(1 - q))))
return df
Expand All @@ -37,29 +34,52 @@ def normalize_ts_feature(df, x_col, n=10, **kwargs):
:param n: 分层数量,默认为10
:param kwargs:
- method: 分层方法,expanding 或 rolling,默认为 expanding
- min_periods: expanding 时的最小样本数量,默认为300
:return: df, 添加了 x_col_norm, x_col_qcut, x_col分层 列
"""
assert df[x_col].nunique() > n, "因子值的取值数量必须大于分层数量"
assert df[x_col].isna().sum() == 0, "因子有缺失值,缺失数量为:{}".format(df[x_col].isna().sum())
method = kwargs.get("method", "expanding")
min_periods = kwargs.get("min_periods", 300)
if df[x_col].isna().sum() > 0:
logger.warning(f"因子列 {x_col} 存在缺失值,请注意!建议先对因子缺失值进行填充")

if f"{x_col}_norm" not in df.columns:
df[f"{x_col}_norm"] = df[x_col].expanding(min_periods=min_periods).apply(
lambda x: (x.iloc[-1] - x.mean()) / x.std(), raw=False)
if method == "expanding":
df[f"{x_col}_norm"] = df[x_col].expanding(min_periods=min_periods).apply(
lambda x: (x.iloc[-1] - x.mean()) / x.std(), raw=False)

elif method == "rolling":
df[f"{x_col}_norm"] = df[x_col].rolling(min_periods=min_periods, window=min_periods).apply(
lambda x: (x.iloc[-1] - x.mean()) / x.std(), raw=False)

else:
raise ValueError("method 必须为 expanding 或 rolling")

# 用标准化后的值填充原始值中的缺失值
na_x = df[df[f"{x_col}_norm"].isna()][x_col].values
df.loc[df[f"{x_col}_norm"].isna(), f"{x_col}_norm"] = na_x - na_x.mean() / na_x.std()

if f"{x_col}_qcut" not in df.columns:
df[f'{x_col}_qcut'] = df[x_col].expanding(min_periods=min_periods).apply(
lambda x: pd.qcut(x, q=n, labels=False, duplicates='drop', retbins=False).values[-1], raw=False)
if method == "expanding":
df[f'{x_col}_qcut'] = df[x_col].expanding(min_periods=min_periods).apply(
lambda x: pd.qcut(x, q=n, labels=False, duplicates='drop', retbins=False).values[-1], raw=False)

elif method == "rolling":
df[f'{x_col}_qcut'] = df[x_col].rolling(min_periods=min_periods, window=min_periods).apply(
lambda x: pd.qcut(x, q=n, labels=False, duplicates='drop', retbins=False).values[-1], raw=False)

else:
raise ValueError("method 必须为 expanding 或 rolling")

# 用分位数后的值填充原始值中的缺失值
na_x = df[df[f"{x_col}_qcut"].isna()][x_col].values
df.loc[df[f"{x_col}_qcut"].isna(), f"{x_col}_qcut"] = pd.qcut(na_x, q=n, labels=False, duplicates='drop', retbins=False)

if df[f'{x_col}_qcut'].isna().sum() > 0:
logger.warning(f"因子 {x_col} 分层存在 {df[f'{x_col}_qcut'].isna().sum()} 个缺失值,已使用前值填充")
df[f'{x_col}_qcut'] = df[f'{x_col}_qcut'].ffill()

df[f'{x_col}分层'] = df[f'{x_col}_qcut'].apply(lambda x: f'第{str(int(x+1)).zfill(2)}层')

return df
32 changes: 21 additions & 11 deletions czsc/utils/st_components.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,16 @@ def _stats(df_, type_='持有日'):

df = df.cumsum()
fig = px.line(df, y=df.columns.to_list(), title="日收益累计曲线")
fig.update_xaxes(title='')

# 添加每年的开始第一个日期的竖线
for year in range(df.index.year.min(), df.index.year.max() + 1):
first_date = df[df.index.year == year].index.min()
fig.add_vline(x=first_date, line_dash='dash', line_color='red')

for col in kwargs.get("legend_only_cols", []):
fig.update_traces(visible="legendonly", selector=dict(name=col))

st.plotly_chart(fig, use_container_width=True)


Expand Down Expand Up @@ -183,14 +191,16 @@ def show_symbol_factor_layering(df, x_col, y_col='n1b', **kwargs):
if df[y_col].max() > 100: # 如果收益率单位为BP, 转换为万分之一
df[y_col] = df[y_col] / 10000

if df[x_col].nunique() > n:
czsc.normalize_ts_feature(df, x_col)
else:
# 如果因子值的取值数量小于分层数量,直接使用因子独立值排序作为分层
x_rank = sorted(df[x_col].unique())
x_rank = {x_rank[i]: f'第{str(i+1).zfill(2)}层' for i in range(len(x_rank))}
st.success(f"因子值分层对应关系:{x_rank}")
df[f'{x_col}分层'] = df[x_col].apply(lambda x: x_rank[x])
if f'{x_col}分层' not in df.columns:
# 如果因子分层列不存在,先计算因子分层
if df[x_col].nunique() > n:
czsc.normlize_ts_feature(df, x_col, n=n)
else:
# 如果因子值的取值数量小于分层数量,直接使用因子独立值排序作为分层
x_rank = sorted(df[x_col].unique())
x_rank = {x_rank[i]: f'第{str(i+1).zfill(2)}层' for i in range(len(x_rank))}
st.success(f"因子值分层对应关系:{x_rank}")
df[f'{x_col}分层'] = df[x_col].apply(lambda x: x_rank[x])

for i in range(n):
df[f'第{str(i+1).zfill(2)}层'] = np.where(df[f'{x_col}分层'] == f'第{str(i+1).zfill(2)}层', df[y_col], 0)
Expand All @@ -209,7 +219,7 @@ def show_symbol_factor_layering(df, x_col, y_col='n1b', **kwargs):
long = col1.multiselect("多头组合", layering_cols, default=["第02层"], key="symbol_factor_long")
short = col2.multiselect("空头组合", layering_cols, default=["第01层"], key="symbol_factor_short")
dfr = mrr.copy()
dfr['多头'] = dfr[long].mean(axis=1)
dfr['空头'] = -dfr[short].mean(axis=1)
dfr['多空'] = (dfr['多头'] + dfr['空头']) / 2
dfr['多头'] = dfr[long].sum(axis=1)
dfr['空头'] = -dfr[short].sum(axis=1)
dfr['多空'] = dfr['多头'] + dfr['空头']
show_daily_return(dfr[['多头', '空头', '多空']])

0 comments on commit 8dc7b25

Please sign in to comment.