Skip to content

Commit

Permalink
0.9.49 新增K线质量检查函数
Browse files Browse the repository at this point in the history
  • Loading branch information
zengbin93 committed Apr 30, 2024
1 parent 572aa03 commit 790d264
Show file tree
Hide file tree
Showing 3 changed files with 115 additions and 0 deletions.
9 changes: 9 additions & 0 deletions czsc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,15 @@
normalize_corr,
)


from czsc.utils.kline_quality import (
check_high_low,
check_price_gap,
check_abnormal_volume,
check_zero_volume,
)


__version__ = "0.9.49"
__author__ = "zengbin93"
__email__ = "[email protected]"
Expand Down
67 changes: 67 additions & 0 deletions czsc/utils/kline_quality.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
"""
author: zengbin93
email: [email protected]
create_dt: 2024/4/27 15:01
describe: K线质量评估工具函数
https://hailuoai.com/?chat=241699282914746375
"""

import pandas as pd


def check_high_low(df):
"""
检查是否存在 high < low 的情况。
"""
df["high_low_error"] = df["high"] < df["low"]
error_rate = df["high_low_error"].mean()
error_klines = df[df["high_low_error"]].copy()
return error_rate, error_klines


def check_price_gap(df, **kwargs):
"""
检查是否存在超过阈值的大幅度缺口。
"""
df = df.copy().sort_values(["dt", "symbol"]).reset_index(drop=True)
errors = []
for symbol in df["symbol"].unique():
symbol_df = df[df["symbol"] == symbol]
symbol_df["last_close"] = symbol_df["close"].shift(1)
symbol_df["price_gap"] = (symbol_df["open"] - symbol_df["last_close"]).abs()
gap_th = symbol_df["price_gap"].mean() + 3 * symbol_df["price_gap"].std()
error_ = symbol_df[symbol_df["price_gap"] > gap_th].copy()
if len(error_) > 0:
errors.append(error_)

error_klines = pd.concat(errors)
error_rate = len(error_klines) / len(df)
return error_rate, error_klines


def check_abnormal_volume(df, **kwargs):
"""
检查是否存在异常成交量。
"""
df = df.copy().sort_values(["dt", "symbol"]).reset_index(drop=True)
errors = []
for symbol in df["symbol"].unique():
symbol_df = df[df["symbol"] == symbol]
volume_threshold = symbol_df["vol"].mean() + 3 * symbol_df["vol"].std()
error_ = symbol_df[symbol_df["vol"] > volume_threshold].copy()
if len(error_) > 0:
errors.append(error_)
error_klines = pd.concat(errors)
error_rate = len(error_klines) / len(df)
return error_rate, error_klines


def check_zero_volume(df):
"""
计算零成交量的K线占比。
"""
df = df.copy().sort_values(["dt", "symbol"]).reset_index(drop=True)
error_rate = df["vol"].eq(0).sum() / len(df)
error_klines = df[df["vol"].eq(0)].copy()
return error_rate, error_klines
39 changes: 39 additions & 0 deletions test/test_kline_quality.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import pandas as pd
from czsc.utils.kline_quality import (
check_high_low,
check_price_gap,
check_abnormal_volume,
check_zero_volume,
)
from test.test_analyze import read_daily


def test_check_high_low():
df = read_daily()
df = pd.DataFrame([x.__dict__ for x in df])
error_rate, error_klines = check_high_low(df)
assert error_rate == 0


def test_check_price_gap():
df = read_daily()
df = pd.DataFrame([x.__dict__ for x in df])
error_rate, error_klines = check_price_gap(df)
assert round(error_rate, 4) == 0.0183
print(error_klines)


def test_check_abnormal_volume():
df = read_daily()
df = pd.DataFrame([x.__dict__ for x in df])
error_rate, error_klines = check_abnormal_volume(df)
assert round(error_rate, 4) == 0.0306
print(error_klines)


def test_check_zero_volume():
df = read_daily()
df = pd.DataFrame([x.__dict__ for x in df])
error_rate, error_klines = check_zero_volume(df)
assert error_rate == 0
print(error_klines)

0 comments on commit 790d264

Please sign in to comment.