Skip to content

Commit

Permalink
0.9.47 first commit
Browse files Browse the repository at this point in the history
  • Loading branch information
zengbin93 committed Mar 29, 2024
1 parent ff0eef0 commit 9575914
Show file tree
Hide file tree
Showing 6 changed files with 96 additions and 4 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/pythonpackage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ name: Python package

on:
push:
branches: [ master, V0.9.46 ]
branches: [ master, V0.9.47 ]
pull_request:
branches: [ master ]

Expand Down
5 changes: 3 additions & 2 deletions czsc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,12 +154,13 @@
rolling_slope,
rolling_tanh,
feature_adjust,
normalize_corr,
)

__version__ = "0.9.46"
__version__ = "0.9.47"
__author__ = "zengbin93"
__email__ = "[email protected]"
__date__ = "20240318"
__date__ = "20240328"


def welcome():
Expand Down
5 changes: 5 additions & 0 deletions czsc/connectors/cooperation.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,11 @@ def get_symbols(name, **kwargs):
kline = dc.future_klines(trade_date="20231101")
return kline['code'].unique().tolist()

if name.upper() == "ALL":
symbols = get_symbols("股票") + get_symbols("ETF")
symbols += get_symbols("A股指数") + get_symbols("南华指数") + get_symbols("期货主力")
return symbols

raise ValueError(f"{name} 分组无法识别,获取标的列表失败!")


Expand Down
51 changes: 51 additions & 0 deletions czsc/features/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,52 @@ def __lr_slope(x):
return df


def normalize_corr(df: pd.DataFrame, fcol, ycol=None, **kwargs):
"""标准化因子与收益相关性为正数
方法说明:对因子进行滚动相关系数计算,因子乘以滚动相关系数的符号
**注意:**
1. simple 模式下,计算过程有一定的未来信息泄露,在回测中使用时需要注意
2. rolling 模式下,计算过程依赖 window 参数,有可能调整后相关性为负数
:param df: pd.DataFrame, 必须包含 dt、symbol、price 列,以及因子列
:param fcol: str 因子列名
:param kwargs: dict
- window: int, 滚动窗口大小
- min_periods: int, 最小计算周期
- mode: str, 计算方法, rolling 表示使用滚动调整相关系数,simple 表示使用镜像反转相关系数
- copy: bool, 是否复制 df
:return: pd.DataFrame
"""
window = kwargs.get("window", 1000)
min_periods = kwargs.get("min_periods", 5)
mode = kwargs.get("mode", "rolling")
if kwargs.get("copy", False):
df = df.copy()

df = df.sort_values(['symbol', 'dt'], ascending=True).reset_index(drop=True)
for symbol, dfg in df.groupby("symbol"):
dfg['ycol'] = dfg['price'].pct_change().shift(-1)

if mode.lower() == "rolling":
dfg['corr_sign'] = np.sign(dfg[fcol].rolling(window=window, min_periods=min_periods).corr(dfg['ycol']))
dfg[fcol] = (dfg['corr_sign'].shift(3) * dfg[fcol]).fillna(0)

elif mode.lower() == "simple":
corr_sign = np.sign(dfg[fcol].corr(dfg['ycol']))
dfg[fcol] = corr_sign * dfg[fcol]

else:
raise ValueError(f"Unknown mode: {mode}")

df.loc[df['symbol'] == symbol, fcol] = dfg[fcol]
return df


def feature_adjust_V230101(df: pd.DataFrame, fcol, **kwargs):
"""特征调整函数:对特征进行调整,使其符合持仓权重的定义
Expand Down Expand Up @@ -312,6 +358,7 @@ def feature_adjust(df: pd.DataFrame, fcol, method, **kwargs):
:param fcol: str, 因子列名
:param method: str, 调整方法
- KEEP: 直接使用原始因子值作为权重
- V230101: 对因子进行滚动相关系数计算,然后对因子值用 maxabs_scale 进行归一化,最后乘以滚动相关系数的符号
- V240323: 对因子进行滚动相关系数计算,然后对因子值用 scale + tanh 进行归一化,最后乘以滚动相关系数的符号
Expand All @@ -322,6 +369,10 @@ def feature_adjust(df: pd.DataFrame, fcol, method, **kwargs):
:return: pd.DataFrame, 新增 weight 列
"""
if method == "KEEP":
df["weight"] = df[fcol]
return df

if method == "V230101":
return feature_adjust_V230101(df, fcol, **kwargs)
elif method == "V240323":
Expand Down
3 changes: 2 additions & 1 deletion czsc/utils/st_components.py
Original file line number Diff line number Diff line change
Expand Up @@ -762,6 +762,7 @@ def show_out_in_compare(df, ret_col, mid_dt, **kwargs):
df = df[[ret_col]].copy().fillna(0)
df.sort_index(inplace=True, ascending=True)

mid_dt = pd.to_datetime(mid_dt)
dfi = df[df.index < mid_dt].copy()
dfo = df[df.index >= mid_dt].copy()

Expand Down Expand Up @@ -807,7 +808,7 @@ def show_out_in_compare(df, ret_col, mid_dt, **kwargs):
'新高占比': '{:.2%}',
}
)
st.dataframe(df_stats, use_container_width=True)
st.dataframe(df_stats, use_container_width=True, hide_index=True)


def show_optuna_study(study: optuna.Study, **kwargs):
Expand Down
34 changes: 34 additions & 0 deletions test/test_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,37 @@ def test_rolling_tanh():
result_df = rolling_tanh(df, 'col1', new_col='col1_tanh3', window=100, min_periods=50)
assert 'col1_tanh3' in result_df.columns
assert result_df['col1_tanh3'].between(-1, 1).all()


def test_normalize_corr():
from czsc.features.utils import normalize_corr

np.random.seed(123)
# Create a fake DataFrame
df = pd.DataFrame({
'dt': pd.date_range(start='1/1/2021', periods=3000),
'symbol': ['AAPL'] * 3000,
'price': np.random.rand(3000),
'factor': np.random.rand(3000),
})

df['n1b'] = df['price'].shift(-1) / df['price'] - 1
raw_corr = df['n1b'].corr(df['factor'])

# Call the function with the fake DataFrame
result = normalize_corr(df, fcol='factor', copy=True, mode='rolling', window=600)
corr1 = result['n1b'].corr(result['factor'])
assert result.shape == df.shape and np.sign(corr1) == -np.sign(raw_corr)

# Call the function with the fake DataFrame
result = normalize_corr(df, fcol='factor', copy=True, mode='rolling', window=300)
corr1 = result['n1b'].corr(result['factor'])
assert result.shape == df.shape and np.sign(corr1) == np.sign(raw_corr)

result = normalize_corr(df, fcol='factor', copy=True, mode='rolling', window=2000)
corr1 = result['n1b'].corr(result['factor'])
assert result.shape == df.shape and np.sign(corr1) == -np.sign(raw_corr)

result = normalize_corr(df, fcol='factor', copy=True, mode='simple')
corr2 = result['n1b'].corr(result['factor'])
assert result.shape == df.shape and corr2 == -raw_corr

0 comments on commit 9575914

Please sign in to comment.