Source code for czsc.utils.kline_quality

"""
author: zengbin93
email: zeng_bin8888@163.com
create_dt: 2024/4/27 15:01
describe: K线质量评估工具函数

https://hailuoai.com/?chat=241699282914746375
"""

import pandas as pd


[docs]def check_high_low(df): """ 检查是否存在 high < low 的情况。 """ df["high_low_error"] = df["high"] < df["low"] error_rate = df["high_low_error"].mean() error_klines = df[df["high_low_error"]].copy() return error_rate, error_klines
[docs]def check_price_gap(df, **kwargs): """ 检查是否存在超过阈值的大幅度缺口。 """ df = df.copy().sort_values(["dt", "symbol"]).reset_index(drop=True) errors = [] for symbol in df["symbol"].unique(): symbol_df = df[df["symbol"] == symbol] symbol_df["last_close"] = symbol_df["close"].shift(1) symbol_df["price_gap"] = (symbol_df["open"] - symbol_df["last_close"]).abs() gap_th = symbol_df["price_gap"].mean() + 3 * symbol_df["price_gap"].std() error_ = symbol_df[symbol_df["price_gap"] > gap_th].copy() if len(error_) > 0: errors.append(error_) error_klines = pd.concat(errors) error_rate = len(error_klines) / len(df) return error_rate, error_klines
[docs]def check_abnormal_volume(df, **kwargs): """ 检查是否存在异常成交量。 """ df = df.copy().sort_values(["dt", "symbol"]).reset_index(drop=True) errors = [] for symbol in df["symbol"].unique(): symbol_df = df[df["symbol"] == symbol] volume_threshold = symbol_df["vol"].mean() + 3 * symbol_df["vol"].std() error_ = symbol_df[symbol_df["vol"] > volume_threshold].copy() if len(error_) > 0: errors.append(error_) error_klines = pd.concat(errors) error_rate = len(error_klines) / len(df) return error_rate, error_klines
[docs]def check_zero_volume(df): """ 计算零成交量的K线占比。 """ df = df.copy().sort_values(["dt", "symbol"]).reset_index(drop=True) error_rate = df["vol"].eq(0).sum() / len(df) error_klines = df[df["vol"].eq(0)].copy() return error_rate, error_klines