From 790d2643b19f9c43488db20d4e86c099aac4f715 Mon Sep 17 00:00:00 2001 From: zengbin93 Date: Tue, 30 Apr 2024 22:09:55 +0800 Subject: [PATCH] =?UTF-8?q?0.9.49=20=E6=96=B0=E5=A2=9EK=E7=BA=BF=E8=B4=A8?= =?UTF-8?q?=E9=87=8F=E6=A3=80=E6=9F=A5=E5=87=BD=E6=95=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- czsc/__init__.py | 9 +++++ czsc/utils/kline_quality.py | 67 +++++++++++++++++++++++++++++++++++++ test/test_kline_quality.py | 39 +++++++++++++++++++++ 3 files changed, 115 insertions(+) create mode 100644 czsc/utils/kline_quality.py create mode 100644 test/test_kline_quality.py diff --git a/czsc/__init__.py b/czsc/__init__.py index 024fa4e73..ddeab7ca1 100644 --- a/czsc/__init__.py +++ b/czsc/__init__.py @@ -165,6 +165,15 @@ normalize_corr, ) + +from czsc.utils.kline_quality import ( + check_high_low, + check_price_gap, + check_abnormal_volume, + check_zero_volume, +) + + __version__ = "0.9.49" __author__ = "zengbin93" __email__ = "zeng_bin8888@163.com" diff --git a/czsc/utils/kline_quality.py b/czsc/utils/kline_quality.py new file mode 100644 index 000000000..5de1a3977 --- /dev/null +++ b/czsc/utils/kline_quality.py @@ -0,0 +1,67 @@ +""" +author: zengbin93 +email: zeng_bin8888@163.com +create_dt: 2024/4/27 15:01 +describe: K线质量评估工具函数 + +https://hailuoai.com/?chat=241699282914746375 +""" + +import pandas as pd + + +def check_high_low(df): + """ + 检查是否存在 high < low 的情况。 + """ + df["high_low_error"] = df["high"] < df["low"] + error_rate = df["high_low_error"].mean() + error_klines = df[df["high_low_error"]].copy() + return error_rate, error_klines + + +def check_price_gap(df, **kwargs): + """ + 检查是否存在超过阈值的大幅度缺口。 + """ + df = df.copy().sort_values(["dt", "symbol"]).reset_index(drop=True) + errors = [] + for symbol in df["symbol"].unique(): + symbol_df = df[df["symbol"] == symbol] + symbol_df["last_close"] = symbol_df["close"].shift(1) + symbol_df["price_gap"] = (symbol_df["open"] - symbol_df["last_close"]).abs() + gap_th = symbol_df["price_gap"].mean() + 3 * symbol_df["price_gap"].std() + error_ = symbol_df[symbol_df["price_gap"] > gap_th].copy() + if len(error_) > 0: + errors.append(error_) + + error_klines = pd.concat(errors) + error_rate = len(error_klines) / len(df) + return error_rate, error_klines + + +def check_abnormal_volume(df, **kwargs): + """ + 检查是否存在异常成交量。 + """ + df = df.copy().sort_values(["dt", "symbol"]).reset_index(drop=True) + errors = [] + for symbol in df["symbol"].unique(): + symbol_df = df[df["symbol"] == symbol] + volume_threshold = symbol_df["vol"].mean() + 3 * symbol_df["vol"].std() + error_ = symbol_df[symbol_df["vol"] > volume_threshold].copy() + if len(error_) > 0: + errors.append(error_) + error_klines = pd.concat(errors) + error_rate = len(error_klines) / len(df) + return error_rate, error_klines + + +def check_zero_volume(df): + """ + 计算零成交量的K线占比。 + """ + df = df.copy().sort_values(["dt", "symbol"]).reset_index(drop=True) + error_rate = df["vol"].eq(0).sum() / len(df) + error_klines = df[df["vol"].eq(0)].copy() + return error_rate, error_klines diff --git a/test/test_kline_quality.py b/test/test_kline_quality.py new file mode 100644 index 000000000..c02b79eae --- /dev/null +++ b/test/test_kline_quality.py @@ -0,0 +1,39 @@ +import pandas as pd +from czsc.utils.kline_quality import ( + check_high_low, + check_price_gap, + check_abnormal_volume, + check_zero_volume, +) +from test.test_analyze import read_daily + + +def test_check_high_low(): + df = read_daily() + df = pd.DataFrame([x.__dict__ for x in df]) + error_rate, error_klines = check_high_low(df) + assert error_rate == 0 + + +def test_check_price_gap(): + df = read_daily() + df = pd.DataFrame([x.__dict__ for x in df]) + error_rate, error_klines = check_price_gap(df) + assert round(error_rate, 4) == 0.0183 + print(error_klines) + + +def test_check_abnormal_volume(): + df = read_daily() + df = pd.DataFrame([x.__dict__ for x in df]) + error_rate, error_klines = check_abnormal_volume(df) + assert round(error_rate, 4) == 0.0306 + print(error_klines) + + +def test_check_zero_volume(): + df = read_daily() + df = pd.DataFrame([x.__dict__ for x in df]) + error_rate, error_klines = check_zero_volume(df) + assert error_rate == 0 + print(error_klines)