# 飞书文档:https://s0cqcxuy3p.feishu.cn/wiki/AATuw5vN7iN9XbkVPuwcE186n9f
import czsc
import hashlib
import optuna
import numpy as np
import pandas as pd
import streamlit as st
import plotly.express as px
import statsmodels.api as sm
import plotly.graph_objects as go
from deprecated import deprecated
from sklearn.linear_model import LinearRegression
def __stats_style(stats):
stats = stats.style.background_gradient(cmap="RdYlGn_r", axis=None, subset=["年化"])
stats = stats.background_gradient(cmap="RdYlGn_r", axis=None, subset=["绝对收益"])
stats = stats.background_gradient(cmap="RdYlGn_r", axis=None, subset=["夏普"])
stats = stats.background_gradient(cmap="RdYlGn", axis=None, subset=["最大回撤"])
stats = stats.background_gradient(cmap="RdYlGn_r", axis=None, subset=["卡玛"])
stats = stats.background_gradient(cmap="RdYlGn", axis=None, subset=["年化波动率"])
stats = stats.background_gradient(cmap="RdYlGn", axis=None, subset=["下行波动率"])
stats = stats.background_gradient(cmap="RdYlGn", axis=None, subset=["盈亏平衡点"])
stats = stats.background_gradient(cmap="RdYlGn_r", axis=None, subset=["日胜率"])
stats = stats.background_gradient(cmap="RdYlGn_r", axis=None, subset=["日盈亏比"])
stats = stats.background_gradient(cmap="RdYlGn_r", axis=None, subset=["日赢面"])
stats = stats.background_gradient(cmap="RdYlGn_r", axis=None, subset=["非零覆盖"])
stats = stats.background_gradient(cmap="RdYlGn", axis=None, subset=["新高间隔"])
stats = stats.background_gradient(cmap="RdYlGn", axis=None, subset=["回撤风险"])
stats = stats.background_gradient(cmap="RdYlGn_r", axis=None, subset=["新高占比"])
stats = stats.format(
{
"盈亏平衡点": "{:.2f}",
"年化波动率": "{:.2%}",
"下行波动率": "{:.2%}",
"最大回撤": "{:.2%}",
"卡玛": "{:.2f}",
"年化": "{:.2%}",
"夏普": "{:.2f}",
"非零覆盖": "{:.2%}",
"绝对收益": "{:.2%}",
"日胜率": "{:.2%}",
"日盈亏比": "{:.2f}",
"日赢面": "{:.2%}",
"新高间隔": "{:.2f}",
"回撤风险": "{:.2f}",
"新高占比": "{:.2%}",
}
)
return stats
[docs]def show_daily_return(df: pd.DataFrame, **kwargs):
"""用 streamlit 展示日收益
:param df: pd.DataFrame,数据源
:param kwargs:
- sub_title: str,标题
- stat_hold_days: bool,是否展示持有日绩效指标,默认为 True
- legend_only_cols: list,仅在图例中展示的列名
- use_st_table: bool,是否使用 st.table 展示绩效指标,默认为 False
- plot_cumsum: bool,是否展示日收益累计曲线,默认为 True
- yearly_days: int,年交易天数,默认为 252
- show_dailys: bool,是否展示日收益数据详情,默认为 False
"""
if not df.index.dtype == "datetime64[ns]":
df["dt"] = pd.to_datetime(df["dt"])
df.set_index("dt", inplace=True)
assert df.index.dtype == "datetime64[ns]", "index必须是datetime64[ns]类型, 请先使用 pd.to_datetime 进行转换"
yearly_days = kwargs.get("yearly_days", 252)
df = df.copy().fillna(0)
df.sort_index(inplace=True, ascending=True)
def _stats(df_, type_="持有日"):
df_ = df_.copy()
stats = []
for _col in df_.columns:
if type_ == "持有日":
col_stats = czsc.daily_performance([x for x in df_[_col] if x != 0], yearly_days=yearly_days)
else:
assert type_ == "交易日", "type_ 参数必须是 持有日 或 交易日"
col_stats = czsc.daily_performance(df_[_col], yearly_days=yearly_days)
col_stats["日收益名称"] = _col
stats.append(col_stats)
stats = pd.DataFrame(stats).set_index("日收益名称")
stats = __stats_style(stats)
# stats = stats.style.background_gradient(cmap="RdYlGn_r", axis=None, subset=["年化"])
# stats = stats.background_gradient(cmap="RdYlGn_r", axis=None, subset=["绝对收益"])
# stats = stats.background_gradient(cmap="RdYlGn_r", axis=None, subset=["夏普"])
# stats = stats.background_gradient(cmap="RdYlGn", axis=None, subset=["最大回撤"])
# stats = stats.background_gradient(cmap="RdYlGn_r", axis=None, subset=["卡玛"])
# stats = stats.background_gradient(cmap="RdYlGn", axis=None, subset=["年化波动率"])
# stats = stats.background_gradient(cmap="RdYlGn", axis=None, subset=["下行波动率"])
# stats = stats.background_gradient(cmap="RdYlGn", axis=None, subset=["盈亏平衡点"])
# stats = stats.background_gradient(cmap="RdYlGn_r", axis=None, subset=["日胜率"])
# stats = stats.background_gradient(cmap="RdYlGn_r", axis=None, subset=["日盈亏比"])
# stats = stats.background_gradient(cmap="RdYlGn_r", axis=None, subset=["日赢面"])
# stats = stats.background_gradient(cmap="RdYlGn_r", axis=None, subset=["非零覆盖"])
# stats = stats.background_gradient(cmap="RdYlGn", axis=None, subset=["新高间隔"])
# stats = stats.background_gradient(cmap="RdYlGn", axis=None, subset=["回撤风险"])
# stats = stats.background_gradient(cmap="RdYlGn_r", axis=None, subset=["新高占比"])
# stats = stats.format(
# {
# "盈亏平衡点": "{:.2f}",
# "年化波动率": "{:.2%}",
# "下行波动率": "{:.2%}",
# "最大回撤": "{:.2%}",
# "卡玛": "{:.2f}",
# "年化": "{:.2%}",
# "夏普": "{:.2f}",
# "非零覆盖": "{:.2%}",
# "绝对收益": "{:.2%}",
# "日胜率": "{:.2%}",
# "日盈亏比": "{:.2f}",
# "日赢面": "{:.2%}",
# "新高间隔": "{:.2f}",
# "回撤风险": "{:.2f}",
# "新高占比": "{:.2%}",
# }
# )
return stats
use_st_table = kwargs.get("use_st_table", False)
with st.container():
sub_title = kwargs.get("sub_title", "")
if sub_title:
st.subheader(sub_title, divider="rainbow", anchor=sub_title)
if kwargs.get("show_dailys", False):
with st.expander("日收益数据详情", expanded=False):
st.dataframe(df, use_container_width=True)
with st.expander("交易日绩效指标", expanded=True):
if use_st_table:
st.table(_stats(df, type_="交易日"))
else:
st.dataframe(_stats(df, type_="交易日"), use_container_width=True)
st.caption("交易日:交易所指定的交易日,或者有收益发生变化的日期")
if kwargs.get("stat_hold_days", True):
with st.expander("持有日绩效指标", expanded=False):
st.dataframe(_stats(df, type_="持有日"), use_container_width=True)
st.caption("持有日:在交易日的基础上,将收益率为0的日期删除")
if kwargs.get("plot_cumsum", True):
df = df.cumsum()
fig = px.line(df, y=df.columns.to_list(), title="日收益累计曲线")
fig.update_xaxes(title="")
# 添加每年的开始第一个日期的竖线
for year in range(df.index.year.min(), df.index.year.max() + 1):
first_date = df[df.index.year == year].index.min()
fig.add_vline(x=first_date, line_dash="dash", line_color="red")
for col in kwargs.get("legend_only_cols", []):
fig.update_traces(visible="legendonly", selector=dict(name=col))
# fig.update_layout(legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01))
fig.update_layout(margin=dict(l=0, r=0, b=0))
st.plotly_chart(fig, use_container_width=True)
[docs]def show_monthly_return(df, ret_col="total", sub_title="月度累计收益", **kwargs):
"""展示指定列的月度累计收益
:param df: pd.DataFrame,数据源
:param ret_col: str,收益列名
:param sub_title: str,标题
:param kwargs:
"""
assert isinstance(df, pd.DataFrame), "df 必须是 pd.DataFrame 类型"
if not df.index.dtype == "datetime64[ns]":
df["dt"] = pd.to_datetime(df["dt"])
df.set_index("dt", inplace=True)
assert df.index.dtype == "datetime64[ns]", "index必须是datetime64[ns]类型, 请先使用 pd.to_datetime 进行转换"
df = df.copy().fillna(0)
df.sort_index(inplace=True, ascending=True)
if sub_title:
st.subheader(sub_title, divider="rainbow", anchor=sub_title)
monthly = df[[ret_col]].resample("ME").sum()
monthly["year"] = monthly.index.year
monthly["month"] = monthly.index.month
monthly = monthly.pivot_table(index="year", columns="month", values=ret_col)
month_cols = [f"{x}月" for x in monthly.columns]
monthly.columns = month_cols
monthly["年收益"] = monthly.sum(axis=1)
# 计算月度胜率和月度盈亏比
win_rate = monthly.apply(lambda x: (x > 0).sum() / len(x), axis=0)
ykb = monthly.apply(lambda x: x[x > 0].sum() / -x[x < 0].sum() if min(x) < 0 else 10, axis=0)
mean_ret = monthly.mean(axis=0)
dfy = pd.DataFrame([win_rate, ykb, mean_ret], index=["胜率", "盈亏比", "平均收益"])
monthly = monthly.style.background_gradient(cmap="RdYlGn_r", axis=None, subset=month_cols)
monthly = monthly.background_gradient(cmap="RdYlGn_r", axis=None, subset=["年收益"])
monthly = monthly.format("{:.2%}", na_rep="-")
st.dataframe(monthly, use_container_width=True)
dfy = dfy.style.background_gradient(cmap="RdYlGn_r", axis=1).format("{:.2%}", na_rep="-")
st.dataframe(dfy, use_container_width=True)
st.caption(
"注:月度收益为累计收益,胜率为月度收益大于0的占比,盈亏比为月度盈利总额与月度亏损总额的比值,如果月度亏损总额为0,则盈亏比为10"
)
[docs]def show_correlation(df, cols=None, method="pearson", **kwargs):
"""用 streamlit 展示相关性
:param df: pd.DataFrame,数据源
:param cols: list,分析相关性的字段
:param method: str,计算相关性的方法,可选 pearson 和 spearman
:param kwargs:
- use_st_table: bool,是否使用 st.table 展示相关性,默认为 False
- use_container_width: bool,是否使用容器宽度,默认为 True
"""
cols = cols or df.columns.to_list()
dfr = df[cols].corr(method=method)
dfr["average"] = (dfr.sum(axis=1) - 1) / (len(cols) - 1)
dfr = dfr.style.background_gradient(cmap="RdYlGn_r", axis=None).format("{:.4f}", na_rep="MISS")
if kwargs.get("use_st_table", False):
st.table(dfr)
else:
st.dataframe(dfr, use_container_width=kwargs.get("use_container_width", True))
[docs]def show_sectional_ic(df, x_col, y_col, method="pearson", **kwargs):
"""使用 streamlit 展示截面IC
:param df: pd.DataFrame,数据源
:param x_col: str,因子列名
:param y_col: str,收益列名
:param method: str,计算IC的方法,可选 pearson 和 spearman
:param kwargs:
- show_cumsum_ic: bool,是否展示累计IC曲线,默认为 True
- show_factor_histgram: bool,是否展示因子数据分布图,默认为 False
"""
dfc, res = czsc.cross_sectional_ic(df, x_col=x_col, y_col=y_col, dt_col="dt", method=method)
col1, col2, col3, col4 = st.columns([1, 1, 1, 5])
col1.metric("IC均值", res["IC均值"])
col1.metric("IC标准差", res["IC标准差"])
col1.metric("累计IC回归R2", res["累计IC回归R2"])
col2.metric("ICIR", res["ICIR"])
col2.metric("IC胜率", res["IC胜率"])
col2.metric("累计IC回归斜率", res["累计IC回归斜率"])
col3.metric("IC绝对值>2%占比", res["IC绝对值>2%占比"])
col3.metric("品种数量", df["symbol"].nunique())
col3.metric("交易日数量", df["dt"].nunique())
dfc[["year", "month"]] = dfc.dt.apply(lambda x: pd.Series([x.year, x.month]))
dfc["month"] = dfc["month"].apply(lambda x: f"{x:02d}月")
dfm = dfc.groupby(["year", "month"]).agg({"ic": "mean"}).reset_index()
dfm = pd.pivot_table(dfm, index="year", columns="month", values="ic")
# 在 dfm 上增加一列,用于计算每年的平均IC
dfm["年度"] = dfc.groupby("year").agg({"ic": "mean"})
col4.write("月度IC分析结果:")
col4.dataframe(
dfm.style.background_gradient(cmap="RdYlGn_r", axis=None).format("{:.4f}", na_rep="MISS"),
use_container_width=True,
)
if kwargs.get("show_factor_histgram", False):
fig = px.histogram(df, x=x_col, marginal="box", title="因子数据分布图")
st.plotly_chart(fig, use_container_width=True)
if kwargs.get("show_cumsum_ic", True):
dfc["ic_cumsum"] = dfc["ic"].cumsum()
fig = go.Figure()
fig.add_trace(go.Bar(x=dfc["dt"], y=dfc["ic"], name="IC", yaxis="y"))
fig.add_trace(
go.Scatter(x=dfc["dt"], y=dfc["ic_cumsum"], mode="lines", name="累计IC", yaxis="y2", line=dict(color="red"))
)
fig.update_layout(
yaxis=dict(title="IC"),
yaxis2=dict(title="累计IC", overlaying="y", side="right"),
title="截面IC曲线",
margin=dict(l=0, r=0, b=0),
)
st.plotly_chart(fig, use_container_width=True)
[docs]@deprecated("请使用 czsc.show_feature_returns")
def show_factor_returns(df, x_col, y_col):
"""使用 streamlit 展示因子收益率
:param df: pd.DataFrame,数据源
:param x_col: str,因子列名
:param y_col: str,收益列名
"""
assert "dt" in df.columns, "时间列必须为 dt"
res = []
for dt, dfg in df.groupby("dt"):
dfg = dfg.copy().dropna(subset=[x_col, y_col])
X = dfg[x_col].values.reshape(-1, 1)
y = dfg[y_col].values.reshape(-1, 1)
model = LinearRegression(fit_intercept=False).fit(X, y)
res.append([dt, model.coef_[0][0]])
res = pd.DataFrame(res, columns=["dt", "因子收益率"])
res["dt"] = pd.to_datetime(res["dt"])
col1, col2 = st.columns(2)
fig = px.bar(res, x="dt", y="因子收益率", title="因子逐K收益率")
col1.plotly_chart(fig, use_container_width=True)
res["因子累计收益率"] = res["因子收益率"].cumsum()
fig = px.line(res, x="dt", y="因子累计收益率", title="因子累计收益率")
col2.plotly_chart(fig, use_container_width=True)
[docs]def show_feature_returns(df, factor, target="n1b", **kwargs):
"""使用 streamlit 展示因子收益率
:param df: pd.DataFrame, 必须包含 dt、symbol、factor, target 列
:param factor: str, 因子列名
:param target: str, 预测目标收益率列名
:param kwargs:
- fit_intercept: bool, 是否拟合截距,默认为 False
- fig_title: str, 图表标题,默认为 "因子收益率分析"
"""
assert "dt" in df.columns, "时间列必须为 dt"
assert "symbol" in df.columns, "标的列必须为 symbol"
assert factor in df.columns, f"因子列 {factor} 不存在"
assert target in df.columns, f"目标列 {target} 不存在"
fit_intercept = kwargs.get("fit_intercept", False)
dft = czsc.feature_returns(df, factor, target, fit_intercept=fit_intercept)
dft.columns = ["dt", "因子收益率"]
dft["累计收益率"] = dft["因子收益率"].cumsum()
fig_title = kwargs.get("fig_title", "因子截面收益率分析")
# 将因子逐K收益率 和 因子累计收益率 分左右轴,绘制在一张图上
fig = go.Figure()
fig.add_trace(go.Bar(x=dft["dt"], y=dft["因子收益率"], name="因子收益率", yaxis="y"))
fig.add_trace(
go.Scatter(
x=dft["dt"], y=dft["累计收益率"], mode="lines", name="累计收益率", yaxis="y2", line=dict(color="red")
)
)
fig.update_layout(
yaxis=dict(title="因子收益率"),
yaxis2=dict(title="累计收益率", overlaying="y", side="right"),
title=fig_title,
margin=dict(l=0, r=0, b=0),
)
st.plotly_chart(fig, use_container_width=True)
[docs]def show_factor_layering(df, factor, target="n1b", **kwargs):
"""使用 streamlit 绘制因子分层收益率图
:param df: 因子数据
:param factor: 因子列名
:param target: 收益列名
:param kwargs:
- n: 分层数量,默认为10
"""
n = kwargs.get("n", 10)
df = czsc.feture_cross_layering(df, factor, n=n)
mr = df.groupby(["dt", f"{factor}分层"])[target].mean().reset_index()
mrr = mr.pivot(index="dt", columns=f"{factor}分层", values=target).fillna(0)
if "第00层" in mrr.columns:
mrr.drop(columns=["第00层"], inplace=True)
# 计算每层的累计收益率
dfc = mrr.sum(axis=0).to_frame("绝对收益")
dfc["text"] = dfc["绝对收益"].apply(lambda x: f"{x:.2%}")
fig = px.bar(
dfc,
y="绝对收益",
title="因子分层绝对收益 | 单调性:{:.2%}".format(czsc.monotonicity(dfc["绝对收益"])),
color="绝对收益",
color_continuous_scale="RdYlGn_r",
text="text",
)
st.plotly_chart(fig, use_container_width=True)
czsc.show_daily_return(
mrr,
stat_hold_days=False,
yearly_days=kwargs.get("yearly_days", 252),
show_dailys=kwargs.get("show_dailys", False),
)
[docs]def show_symbol_factor_layering(df, x_col, y_col="n1b", **kwargs):
"""使用 streamlit 绘制单个标的上的因子分层收益率图
:param df: 因子数据,必须包含 dt, x_col, y_col 列,其中 dt 为日期,x_col 为因子值,y_col 为收益率,数据样例:
=================== ============ ============
dt intercept n1b
=================== ============ ============
2017-01-03 00:00:00 0 0.00716081
2017-01-04 00:00:00 -0.00154541 0.000250816
2017-01-05 00:00:00 0.000628884 -0.0062695
2017-01-06 00:00:00 -0.00681021 0.00334212
2017-01-09 00:00:00 0.00301077 -0.00182963
=================== ============ ============
:param x_col: 因子列名
:param y_col: 收益列名
:param kwargs:
- n: 分层数量,默认为10
"""
df = df.copy()
n = kwargs.get("n", 10)
if df[y_col].max() > 100: # 如果收益率单位为BP, 转换为万分之一
df[y_col] = df[y_col] / 10000
if df[x_col].nunique() < n * 2:
st.error(f"因子值数量小于{n*2},无法进行分层")
if f"{x_col}分层" not in df.columns:
czsc.normalize_ts_feature(df, x_col, n=n)
for i in range(n):
df[f"第{str(i+1).zfill(2)}层"] = np.where(df[f"{x_col}分层"] == f"第{str(i+1).zfill(2)}层", df[y_col], 0)
layering_cols = [f"第{str(i).zfill(2)}层" for i in range(1, n + 1)]
mrr = df[["dt"] + layering_cols].copy()
mrr.set_index("dt", inplace=True)
tabs = st.tabs(["分层收益率", "多空组合"])
with tabs[0]:
show_daily_return(mrr, stat_hold_days=False)
with tabs[1]:
col1, col2 = st.columns(2)
long = col1.multiselect("多头组合", layering_cols, default=["第02层"], key="symbol_factor_long")
short = col2.multiselect("空头组合", layering_cols, default=["第01层"], key="symbol_factor_short")
dfr = mrr.copy()
dfr["多头"] = dfr[long].sum(axis=1)
dfr["空头"] = -dfr[short].sum(axis=1)
dfr["多空"] = dfr["多头"] + dfr["空头"]
show_daily_return(dfr[["多头", "空头", "多空"]])
[docs]def show_weight_backtest(dfw, **kwargs):
"""展示权重回测结果
:param dfw: 回测数据,任何字段都不允许有空值;数据样例:
=================== ======== ======== =======
dt symbol weight price
=================== ======== ======== =======
2019-01-02 09:01:00 DLi9001 0.5 961.695
2019-01-02 09:02:00 DLi9001 0.25 960.72
2019-01-02 09:03:00 DLi9001 0.25 962.669
2019-01-02 09:04:00 DLi9001 0.25 960.72
2019-01-02 09:05:00 DLi9001 0.25 961.695
=================== ======== ======== =======
:param kwargs:
- fee: 单边手续费,单位为BP,默认为2BP
- digits: 权重小数位数,默认为2
- show_drawdowns: bool,是否展示最大回撤,默认为 False
- show_daily_detail: bool,是否展示每日收益详情,默认为 False
- show_backtest_detail: bool,是否展示回测详情,默认为 False
- show_splited_daily: bool,是否展示分段日收益表现,默认为 False
- show_yearly_stats: bool,是否展示年度绩效指标,默认为 False
- show_monthly_return: bool,是否展示月度累计收益,默认为 False
- n_jobs: int, 并行计算的进程数,默认为 1
"""
fee = kwargs.get("fee", 2)
digits = kwargs.get("digits", 2)
if (dfw.isnull().sum().sum() > 0) or (dfw.isna().sum().sum() > 0):
st.warning("show_weight_backtest :: 持仓权重数据中存在空值,请检查数据后再试;空值数据如下:")
st.dataframe(dfw[dfw.isnull().sum(axis=1) > 0], use_container_width=True)
st.stop()
wb = czsc.WeightBacktest(dfw, fee_rate=fee / 10000, digits=digits, n_jobs=kwargs.get("n_jobs", 1))
stat = wb.results["绩效评价"]
st.divider()
c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 = st.columns([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
c1.metric("盈亏平衡点", f"{stat['盈亏平衡点']:.2%}")
c2.metric("单笔收益(BP)", f"{stat['单笔收益']}")
c3.metric("交易胜率", f"{stat['交易胜率']:.2%}")
c4.metric("持仓K线数", f"{stat['持仓K线数']}")
c5.metric("最大回撤", f"{stat['最大回撤']:.2%}")
c6.metric("年化收益率", f"{stat['年化']:.2%}")
c7.metric("夏普比率", f"{stat['夏普']:.2f}")
c8.metric("卡玛比率", f"{stat['卡玛']:.2f}")
c9.metric("年化波动率", f"{stat['年化波动率']:.2%}")
c10.metric("多头占比", f"{stat['多头占比']:.2%}")
c11.metric("空头占比", f"{stat['空头占比']:.2%}")
st.divider()
dret = wb.results["品种等权日收益"].copy()
dret["dt"] = pd.to_datetime(dret["date"])
dret = dret.set_index("dt").drop(columns=["date"])
# dret.index = pd.to_datetime(dret.index)
show_daily_return(dret, legend_only_cols=dfw["symbol"].unique().tolist(), **kwargs)
if kwargs.get("show_drawdowns", False):
show_drawdowns(dret, ret_col="total", sub_title="")
if kwargs.get("show_backtest_detail", False):
c1, c2 = st.columns([1, 1])
with c1.expander("品种等权日收益", expanded=False):
df_ = wb.results["品种等权日收益"].copy()
st.dataframe(df_.style.background_gradient(cmap="RdYlGn_r").format("{:.2%}"), use_container_width=True)
with c2.expander("查看开平交易对", expanded=False):
dfp = pd.concat([v["pairs"] for k, v in wb.results.items() if k in wb.symbols], ignore_index=True)
st.dataframe(dfp, use_container_width=True)
if kwargs.get("show_splited_daily", False):
with st.expander("品种等权日收益分段表现", expanded=False):
show_splited_daily(dret[["total"]].copy(), ret_col="total")
if kwargs.get("show_yearly_stats", False):
with st.expander("年度绩效指标", expanded=False):
show_yearly_stats(dret, ret_col="total")
if kwargs.get("show_monthly_return", False):
with st.expander("月度累计收益", expanded=False):
show_monthly_return(dret, ret_col="total", sub_title="")
return wb
[docs]def show_splited_daily(df, ret_col, **kwargs):
"""展示分段日收益表现
:param df: pd.DataFrame
:param ret_col: str, df 中的列名,指定收益列
:param kwargs:
sub_title: str, 子标题
"""
yearly_days = kwargs.get("yearly_days", 252)
if not df.index.dtype == "datetime64[ns]":
df["dt"] = pd.to_datetime(df["dt"])
df.set_index("dt", inplace=True)
assert df.index.dtype == "datetime64[ns]", "index必须是datetime64[ns]类型, 请先使用 pd.to_datetime 进行转换"
df = df.copy().fillna(0)
df.sort_index(inplace=True, ascending=True)
sub_title = kwargs.get("sub_title", "")
if sub_title:
st.subheader(sub_title, divider="rainbow", anchor=sub_title)
last_dt = df.index[-1]
sdt_map = {
"过去1周": last_dt - pd.Timedelta(days=7),
"过去2周": last_dt - pd.Timedelta(days=14),
"过去1月": last_dt - pd.Timedelta(days=30),
"过去3月": last_dt - pd.Timedelta(days=90),
"过去6月": last_dt - pd.Timedelta(days=180),
"过去1年": last_dt - pd.Timedelta(days=365),
"今年以来": pd.to_datetime(f"{last_dt.year}-01-01"),
"成立以来": df.index[0],
}
rows = []
for name, sdt in sdt_map.items():
df1 = df.loc[sdt:last_dt].copy()
row = {
"收益名称": name,
"开始日期": sdt.strftime("%Y-%m-%d"),
"结束日期": last_dt.strftime("%Y-%m-%d"),
}
row_ = czsc.daily_performance(df1[ret_col], yearly_days=yearly_days)
row.update(row_)
rows.append(row)
dfv = pd.DataFrame(rows).set_index("收益名称")
dfv = __stats_style(dfv)
st.dataframe(dfv, use_container_width=True)
[docs]def show_yearly_stats(df, ret_col, **kwargs):
"""按年计算日收益表现
:param df: pd.DataFrame,数据源
:param ret_col: str,收益列名
:param kwargs:
- sub_title: str, 子标题
"""
if not df.index.dtype == "datetime64[ns]":
df["dt"] = pd.to_datetime(df["dt"])
df.set_index("dt", inplace=True)
assert df.index.dtype == "datetime64[ns]", "index必须是datetime64[ns]类型, 请先使用 pd.to_datetime 进行转换"
df = df.copy().fillna(0)
df.sort_index(inplace=True, ascending=True)
df["年份"] = df.index.year
yearly_days = max(len(df_) for year, df_ in df.groupby("年份"))
_stats = []
for year, df_ in df.groupby("年份"):
_yst = czsc.daily_performance(df_[ret_col].to_list(), yearly_days=yearly_days)
_yst["年份"] = year
_stats.append(_yst)
stats = pd.DataFrame(_stats).set_index("年份")
stats = __stats_style(stats)
sub_title = kwargs.get("sub_title", "")
if sub_title:
st.subheader(sub_title, divider="rainbow", anchor=sub_title)
st.dataframe(stats, use_container_width=True)
[docs]def show_ts_rolling_corr(df, col1, col2, **kwargs):
"""时序上按 rolling 的方式计算相关系数
:param df: pd.DataFrame, 必须包含列 dt 和 col1, col2
:param col1: str, df 中的列名
:param col2: str, df 中的列名
:param kwargs:
- min_periods: int, 最小滑动窗口长度
- window: int, 滑动窗口长度,0 表示按 expanding 方式滑动
- corr_method: str, 相关系数计算方法,可选 pearson, kendall, spearman
- sub_title: str, 子标题
"""
if col1 not in df.columns or col2 not in df.columns:
st.error(f"列 {col1} 或 {col2} 不存在,请重新输入")
return
if not isinstance(df.index, pd.DatetimeIndex):
df["dt"] = pd.to_datetime(df["dt"])
df = df.set_index("dt")
df = df[[col1, col2]].copy()
if df.isnull().sum().sum() > 0:
st.dataframe(df[df.isnull().sum(axis=1) > 0])
st.error(f"列 {col1} 或 {col2} 中存在缺失值,请先处理缺失值")
return
sub_title = kwargs.get("sub_title", None)
if sub_title:
st.subheader(sub_title, divider="rainbow", anchor=hashlib.md5(sub_title.encode("utf-8")).hexdigest()[:8])
min_periods = kwargs.get("min_periods", 300)
window = kwargs.get("window", 2000)
corr_method = kwargs.get("corr_method", "pearson")
corr_result = df[col1].rolling(window=window, min_periods=min_periods).corr(df[col2], pairwise=True)
corr_result = corr_result.dropna()
corr_result = corr_result.rename("corr")
line = go.Scatter(x=corr_result.index, y=corr_result, mode="lines", name="corr")
layout = go.Layout(
title="滑动相关系数",
xaxis=dict(title=""),
yaxis=dict(title="corr"),
annotations=[
dict(
x=0.0,
y=1.05,
showarrow=False,
xref="paper",
yref="paper",
font=dict(size=12),
text=f"滑动窗口长度:{window},最小滑动窗口长度:{min_periods},相关系数计算方法:{corr_method}",
)
],
)
fig = go.Figure(data=[line], layout=layout)
st.plotly_chart(fig, use_container_width=True)
[docs]def show_ts_self_corr(df, col, **kwargs):
"""展示时序上单因子的自相关性分析结果,贡献者:guo
:param df: pd.DataFrame, 必须包含列 dt 和 col
:param col: str, df 中的列名
"""
if not isinstance(df.index, pd.DatetimeIndex):
df["dt"] = pd.to_datetime(df["dt"])
df = df.set_index("dt")
df = df.sort_index(ascending=True)
if df[col].isnull().sum() > 0:
st.dataframe(df[df[col].isnull()])
st.error(f"列 {col} 中存在缺失值,请先处理缺失值")
return
col1, col2 = st.columns(2)
with col1:
sub_title = f"自相关系数分析({col})"
st.subheader(sub_title, divider="rainbow", anchor=hashlib.md5(sub_title.encode("utf-8")).hexdigest()[:8])
c1, c2, c3 = st.columns([2, 2, 1])
nlags = int(c1.number_input("最大滞后阶数", value=20, min_value=1, max_value=100, step=1))
method = c2.selectbox("选择分析方法", ["acf", "pacf"], index=0)
if method == "acf":
acf_result, conf_int = sm.tsa.acf(df[[col]].copy(), nlags=nlags, alpha=0.05, missing="raise")
else:
acf_result, conf_int = sm.tsa.pacf(df[[col]].copy(), nlags=nlags, alpha=0.05)
bar = go.Bar(x=list(range(len(acf_result))), y=acf_result, name="自相关系数")
upper = go.Scatter(x=list(range(len(acf_result))), y=conf_int[:, 1], mode="lines", name="95%置信区间上界")
lower = go.Scatter(x=list(range(len(acf_result))), y=conf_int[:, 0], mode="lines", name="95%置信区间下界")
layout = go.Layout(title=method.upper(), xaxis=dict(title="滞后阶数"), yaxis=dict(title="自相关系数"))
fig = go.Figure(data=[bar, upper, lower], layout=layout)
st.plotly_chart(fig, use_container_width=True)
with col2:
sub_title = f"滞后N阶滑动相关性({col})"
st.subheader(sub_title, divider="rainbow", anchor=hashlib.md5(sub_title.encode("utf-8")).hexdigest()[:8])
c1, c2, c3, c4 = st.columns(4)
min_periods = int(c1.number_input("最小滑动窗口长度", value=20, min_value=0, step=1))
window = int(c2.number_input("滑动窗口长度", value=200, step=1))
corr_method = c3.selectbox("相关系数计算方法", ["pearson", "kendall", "spearman"])
n = int(c4.number_input("自相关滞后阶数", value=1, min_value=1, step=1))
df[f"{col}_lag{n}"] = df[col].shift(-n)
df.dropna(subset=[f"{col}_lag{n}"], inplace=True)
show_ts_rolling_corr(df, col, f"{col}_lag{n}", min_periods=min_periods, window=window, corr_method=corr_method)
[docs]def show_stoploss_by_direction(dfw, **kwargs):
"""按方向止损分析的展示
:param dfw: pd.DataFrame, 包含权重数据
:param kwargs: dict, 其他参数
- stoploss: float, 止损比例
- show_detail: bool, 是否展示详细信息
- digits: int, 价格小数位数, 默认2
- fee_rate: float, 手续费率, 默认0.0002
:return: None
"""
dfw = dfw.copy()
stoploss = kwargs.pop("stoploss", 0.08)
dfw1 = czsc.stoploss_by_direction(dfw, stoploss=stoploss)
# 找出逐笔止损点
rows = []
for symbol, dfg in dfw1.groupby("symbol"):
for order_id, dfg1 in dfg.groupby("order_id"):
if dfg1["is_stop"].any():
row = {
"symbol": symbol,
"order_id": order_id,
"交易方向": "多头" if dfg1["weight"].iloc[0] > 0 else "空头",
"开仓时间": dfg1["dt"].iloc[0],
"平仓时间": dfg1["dt"].iloc[-1],
"平仓收益": dfg1["hold_returns"].iloc[-1],
"止损时间": dfg1[dfg1["is_stop"]]["dt"].iloc[0],
"止损收益": dfg1[dfg1["is_stop"]]["hold_returns"].iloc[0],
}
rows.append(row)
dfr = pd.DataFrame(rows)
with st.expander("逐笔止损点", expanded=False):
st.dataframe(dfr, use_container_width=True)
if kwargs.pop("show_detail", False):
cols = [
"dt",
"symbol",
"raw_weight",
"weight",
"price",
"hold_returns",
"min_hold_returns",
"returns",
"order_id",
"is_stop",
]
dfs = dfw1[dfw1["is_stop"]][cols].copy()
with st.expander("止损点详情", expanded=False):
st.dataframe(dfs, use_container_width=True)
czsc.show_weight_backtest(dfw1[["dt", "symbol", "weight", "price"]].copy(), **kwargs)
[docs]def show_cointegration(df, col1, col2, **kwargs):
"""分析两个时间序列协整性,贡献者:珠峰
:param df: pd.DataFrame, 必须包含列 dt 和 col1, col2
:param col1: str, df 中的列名
:param col2: str, df 中的列名
:param kwargs: dict, 其他参数
- sub_header: str, default '', 子标题
- docs: bool, default False, 是否显示协整检验的原理与使用说明
"""
from statsmodels.tsa.stattools import coint
if col1 not in df.columns or col2 not in df.columns:
st.error(f"列 {col1} 或 {col2} 不存在,请重新输入")
return
if not isinstance(df.index, pd.DatetimeIndex):
df["dt"] = pd.to_datetime(df["dt"])
df = df.set_index("dt")
df = df[[col1, col2]].copy()
if df.isnull().sum().sum() > 0:
st.warning(f"列 {col1} 或 {col2} 中存在缺失值,请先处理缺失值!!!")
st.dataframe(df[df.isnull().sum(axis=1) > 0], use_container_width=True)
return
sub_header = kwargs.get("sub_header", "")
if sub_header:
st.subheader(sub_header, divider="rainbow")
if kwargs.get("docs", False):
with st.expander("协整检验原理与使用说明", expanded=False):
st.markdown(
"""
##### 协整检验原理
简而言之:两个不平稳的时间序列,如果它们的线性组合是平稳的,那么它们就是协整的。
平稳的时间序列是指均值和方差不随时间变化的时间序列。而平稳的时间序列便可以用来进行统计分析。
举例:两只股票的收盘价满足协整关系,那么它们的线性组合就是平稳的,进而可以进行配对交易等。
##### 协整检验使用说明
教条式地解释:协整检验p值的含义是两个时间序列**不协整**的概率。一般取临界值5%来判断是否协整,低于5%则可以认为两个时间序列协整。
协整检验原理与使用说明参考链接:[Cointegration](https://en.wikipedia.org/wiki/Cointegration)
"""
)
l1, l2, l3 = st.columns(3)
t, p, crit_value = coint(df[col1], df[col2])
l1.metric("协整检验统计量", str(round(t, 3)), help="单位根检验的T统计量。")
l2.metric(
"协整检验P值(不协整的概率)",
f"{p:.2%}",
help="两个时间序列不协整的概率,低于5%则可以认为两个时间序列协整。",
)
fig = px.line(df, x=df.index, y=[col1, col2])
fig.update_layout(title=f"{col1} 与 {col2} 的曲线图对比", xaxis_title="", yaxis_title="value")
st.plotly_chart(fig, use_container_width=True)
[docs]def show_out_in_compare(df, ret_col, mid_dt, **kwargs):
"""展示样本内外表现对比"""
assert isinstance(df, pd.DataFrame), "df 必须是 pd.DataFrame 类型"
if not df.index.dtype == "datetime64[ns]":
df["dt"] = pd.to_datetime(df["dt"])
df.set_index("dt", inplace=True)
assert df.index.dtype == "datetime64[ns]", "index必须是datetime64[ns]类型, 请先使用 pd.to_datetime 进行转换"
df = df[[ret_col]].copy().fillna(0)
df.sort_index(inplace=True, ascending=True)
mid_dt = pd.to_datetime(mid_dt)
dfi = df[df.index < mid_dt].copy()
dfo = df[df.index >= mid_dt].copy()
stats_i = czsc.daily_performance(dfi[ret_col].to_list())
stats_i["标记"] = "样本内"
stats_i["开始日期"] = dfi.index[0].strftime("%Y-%m-%d")
stats_i["结束日期"] = dfi.index[-1].strftime("%Y-%m-%d")
stats_o = czsc.daily_performance(dfo[ret_col].to_list())
stats_o["标记"] = "样本外"
stats_o["开始日期"] = dfo.index[0].strftime("%Y-%m-%d")
stats_o["结束日期"] = dfo.index[-1].strftime("%Y-%m-%d")
df_stats = pd.DataFrame([stats_i, stats_o])
df_stats = df_stats[
[
"标记",
"开始日期",
"结束日期",
"年化",
"最大回撤",
"夏普",
"卡玛",
"日胜率",
"年化波动率",
"非零覆盖",
"盈亏平衡点",
"新高间隔",
"新高占比",
"回撤风险",
]
]
sub_title = kwargs.get("sub_title", "样本内外表现对比")
if sub_title:
st.subheader(sub_title, divider="rainbow")
df_stats = df_stats.style.background_gradient(cmap="RdYlGn_r", subset=["年化"])
df_stats = df_stats.background_gradient(cmap="RdYlGn_r", subset=["夏普"])
df_stats = df_stats.background_gradient(cmap="RdYlGn", subset=["最大回撤"])
df_stats = df_stats.background_gradient(cmap="RdYlGn_r", subset=["卡玛"])
df_stats = df_stats.background_gradient(cmap="RdYlGn", subset=["年化波动率"])
df_stats = df_stats.background_gradient(cmap="RdYlGn", subset=["盈亏平衡点"])
df_stats = df_stats.background_gradient(cmap="RdYlGn_r", subset=["日胜率"])
df_stats = df_stats.background_gradient(cmap="RdYlGn_r", subset=["非零覆盖"])
df_stats = df_stats.background_gradient(cmap="RdYlGn", subset=["新高间隔"])
df_stats = df_stats.background_gradient(cmap="RdYlGn", subset=["回撤风险"])
df_stats = df_stats.background_gradient(cmap="RdYlGn_r", subset=["新高占比"])
df_stats = df_stats.format(
{
"盈亏平衡点": "{:.2f}",
"年化波动率": "{:.2%}",
"最大回撤": "{:.2%}",
"卡玛": "{:.2f}",
"年化": "{:.2%}",
"夏普": "{:.2f}",
"非零覆盖": "{:.2%}",
"日胜率": "{:.2%}",
"新高间隔": "{:.2f}",
"回撤风险": "{:.2f}",
"新高占比": "{:.2%}",
}
)
st.dataframe(df_stats, use_container_width=True, hide_index=True)
[docs]def show_optuna_study(study: optuna.Study, **kwargs):
# https://optuna.readthedocs.io/en/stable/reference/visualization/index.html
# https://zh-cn.optuna.org/reference/visualization.html
from czsc.utils.optuna import optuna_good_params
sub_title = kwargs.pop("sub_title", "Optuna Study Visualization")
if sub_title:
anchor = hashlib.md5(sub_title.encode("utf-8")).hexdigest().upper()[:6]
st.subheader(sub_title, divider="rainbow", anchor=anchor)
fig = optuna.visualization.plot_contour(study)
st.plotly_chart(fig, use_container_width=True)
fig = optuna.visualization.plot_slice(study)
st.plotly_chart(fig, use_container_width=True)
with st.expander("最佳参数列表", expanded=False):
params = optuna_good_params(study, keep=kwargs.pop("keep", 0.2))
st.dataframe(params, use_container_width=True)
return study
[docs]def show_drawdowns(df: pd.DataFrame, ret_col, **kwargs):
"""展示最大回撤分析
:param df: pd.DataFrame, columns: cells, index: dates
:param ret_col: str, 回报率列名称
:param kwargs:
- sub_title: str, optional, 子标题
- top: int, optional, 默认10, 返回最大回撤的数量
"""
if not df.index.dtype == "datetime64[ns]":
df["dt"] = pd.to_datetime(df["dt"])
df.set_index("dt", inplace=True)
assert df.index.dtype == "datetime64[ns]", "index必须是datetime64[ns]类型, 请先使用 pd.to_datetime 进行转换"
df = df[[ret_col]].copy().fillna(0)
df.sort_index(inplace=True, ascending=True)
df["cum_ret"] = df[ret_col].cumsum()
df["cum_max"] = df["cum_ret"].cummax()
df["drawdown"] = df["cum_ret"] - df["cum_max"]
sub_title = kwargs.get("sub_title", "最大回撤分析")
if sub_title:
st.subheader(sub_title, divider="rainbow")
top = kwargs.get("top", 10)
if top is not None:
with st.expander(f"TOP{top} 最大回撤详情", expanded=False):
dft = czsc.top_drawdowns(df[ret_col].copy(), top=10)
dft = dft.style.background_gradient(cmap="RdYlGn_r", subset=["净值回撤"])
dft = dft.background_gradient(cmap="RdYlGn", subset=["回撤天数", "恢复天数", "新高间隔"])
dft = dft.format({"净值回撤": "{:.2%}", "回撤天数": "{:.0f}", "恢复天数": "{:.0f}", "新高间隔": "{:.0f}"})
st.dataframe(dft, use_container_width=True)
# 画图: 净值回撤
# 颜色表:https://www.codeeeee.com/color/rgb.html
drawdown = go.Scatter(
x=df.index,
y=df["drawdown"],
fillcolor="salmon",
line=dict(color="salmon"),
fill="tozeroy",
mode="lines",
name="回测曲线",
)
fig = go.Figure(drawdown)
# 增加 10% 分位数线,30% 分位数线,50% 分位数线,同时增加文本标记
for q in [0.1, 0.3, 0.5]:
y1 = df["drawdown"].quantile(q)
fig.add_hline(y=y1, line_dash="dot", line_color="green", line_width=2)
fig.add_annotation(x=df.index[-1], y=y1, text=f"{q:.1%} (DD: {y1:.2%})", showarrow=False, yshift=10)
fig.update_layout(margin=dict(l=0, r=0, t=0, b=0))
fig.update_layout(title="", xaxis_title="", yaxis_title="净值回撤", legend_title="回撤曲线")
# 限制 绘制高度
fig.update_layout(height=300)
st.plotly_chart(fig, use_container_width=True)
[docs]def show_event_return(df, factor, **kwargs):
"""分析事件因子的收益率特征
:param df: pd.DataFrame, 数据源
:param factor: str, 事件因子名称
:param kwargs: dict, 其他参数
- sub_title: str, 子标题
- max_unique: int, 因子独立值最大数量
"""
sub_title = kwargs.get("sub_title", "事件收益率特征")
max_unique = kwargs.get("max_unique", 20)
if sub_title:
st.subheader(sub_title, divider="rainbow")
if df[factor].nunique() > max_unique:
st.warning(f"因子分布过于离散,无法进行分析,请检查!!!因子独立值数量:{df[factor].nunique()}")
return
df = df.copy()
c1, c2, c3, c4 = st.columns([1, 1, 1, 1])
agg_method = c1.selectbox(
"聚合方法",
[
"平均收益率",
"收益中位数",
"盈亏比",
"交易胜率",
"前20%平均收益率",
"后20%平均收益率",
],
index=0,
key=f"agg_method_{factor}",
)
sdt = pd.to_datetime(c2.date_input("开始时间", value=df["dt"].min()))
edt = pd.to_datetime(c3.date_input("结束时间", value=df["dt"].max()))
max_overlap = c4.number_input("最大重叠次数", value=3, min_value=1, max_value=20)
df[factor] = df[factor].astype(str)
df = czsc.overlap(df, factor, new_col="overlap", max_overlap=max_overlap)
df = df[(df["dt"] >= sdt) & (df["dt"] <= edt)].copy()
sdt = df["dt"].min().strftime("%Y-%m-%d")
edt = df["dt"].max().strftime("%Y-%m-%d")
st.write(f"时间范围:{sdt} ~ {edt};聚合方法:{agg_method}")
nb_cols = [x for x in df.columns.to_list() if x.startswith("n") and x.endswith("b")]
if agg_method == "平均收益率":
agg_method = lambda x: np.mean(x)
if agg_method == "收益中位数":
agg_method = lambda x: np.median(x)
if agg_method == "前20%平均收益率":
agg_method = lambda x: np.mean(sorted(x)[int(len(x) * 0.8) :])
if agg_method == "后20%平均收益率":
agg_method = lambda x: np.mean(sorted(x)[: int(len(x) * 0.2)])
if agg_method == "盈亏比":
agg_method = lambda x: np.mean([y for y in x if y > 0]) / abs(np.mean([y for y in x if y < 0]))
if agg_method == "交易胜率":
agg_method = lambda x: len([y for y in x if y > 0]) / len(x)
def __markout(dfm, cols):
if isinstance(cols, str):
cols = [cols]
dfy = dfm.groupby(cols).agg({x: agg_method for x in nb_cols}).reset_index()
dfy["出现次数"] = dfm.groupby(cols).size().values
dfy["覆盖率"] = dfm.groupby(cols).size().values / len(dfm)
dfy = dfy[cols + ["出现次数", "覆盖率"] + nb_cols]
dfy = dfy.style.background_gradient(cmap="RdYlGn_r", subset=nb_cols, axis=None)
dfy = dfy.background_gradient(cmap="RdYlGn_r", subset=["出现次数"], axis=None)
dfy = dfy.background_gradient(cmap="RdYlGn_r", subset=["覆盖率"], axis=None)
dfy = dfy.background_gradient(cmap="RdYlGn_r", subset=["overlap"], axis=None)
format_ = {x: "{:.3%}" for x in nb_cols}
format_["出现次数"] = "{:.0f}"
format_["覆盖率"] = "{:.2%}"
format_["overlap"] = "{:.0f}"
dfy = dfy.format(format_, na_rep="MISS")
return dfy
dfy1 = __markout(df.copy(), [factor, "overlap"])
st.dataframe(dfy1, use_container_width=True)
if len(df["symbol"].unique()) > 1:
with st.expander("查看品种详细数据", expanded=False):
symbol = st.selectbox("选择品种", df["symbol"].unique().tolist(), index=0, key=f"ms1_{agg_method}")
dfx = __markout(df[df["symbol"] == symbol].copy(), ["symbol", factor, "overlap"])
st.dataframe(dfx, use_container_width=True)
[docs]def show_psi(df, factor, segment, **kwargs):
"""PSI分布稳定性
:param df: pd.DataFrame, 数据源
:param factor: str, 分组因子
:param segment: str, 分段字段
:param kwargs:
- sub_title: str, 子标题
"""
sub_title = kwargs.get("sub_title", "PSI分布稳定性")
if sub_title:
st.subheader(sub_title, divider="rainbow", anchor=f"{factor}_{segment}_PSI")
dfi = czsc.psi(df, factor, segment)
segs = df[segment].unique().tolist()
segs_psi = [x for x in dfi.columns if x.endswith("_PSI")]
dfi = dfi.style.background_gradient(cmap="RdYlGn_r", subset=segs_psi, axis=None)
dfi = dfi.background_gradient(cmap="RdYlGn_r", subset=segs, axis=None)
dfi = dfi.background_gradient(cmap="RdYlGn_r", subset=["PSI"], axis=None)
dfi = dfi.format("{:.2%}", na_rep="MISS")
st.table(dfi)
[docs]def show_strategies_dailys(df, **kwargs):
"""展示多策略多品种日收益率数据:按策略等权日收益
:param df: N策略M品种日收益率数据,columns=['dt', 'strategy', 'symbol', 'returns'],样例如下:
=================== ========== ======== ============
dt strategy symbol returns
=================== ========== ======== ============
2021-01-04 00:00:00 FUT001 SFT9001 -0.00240078
2021-01-05 00:00:00 FUT001 SFT9001 -0.00107012
2021-01-06 00:00:00 FUT001 SFT9001 0.00122168
2021-01-07 00:00:00 FUT001 SFT9001 0.0020896
2021-01-08 00:00:00 FUT001 SFT9001 0.000510725
=================== ========== ======== ============
:param kwargs:
- sub_title: str, 子标题
"""
sub_title = kwargs.get("sub_title", "按策略等权日收益")
if sub_title:
st.subheader(sub_title, divider="rainbow")
strategies = sorted(df["strategy"].unique().tolist())
strategies = st.multiselect("选择策略", strategies, default=strategies)
# st.write(f"策略:{strategies}")
df = df[df["strategy"].isin(strategies)].copy().reset_index(drop=True)
symbols = sorted(df["symbol"].unique().tolist())
symbols = st.multiselect("选择品种", symbols, default=symbols)
df = df[df["symbol"].isin(symbols)].copy().reset_index(drop=True)
with st.expander("每个品种的策略覆盖情况", expanded=False):
dfc_ = df.groupby("symbol")["strategy"].unique().to_frame().reset_index()
dfc_["count"] = dfc_["strategy"].apply(lambda x: len(x))
st.dataframe(dfc_[["symbol", "count", "strategy"]], use_container_width=True)
assert isinstance(df, pd.DataFrame), "df 必须是 pd.DataFrame"
df["dt"] = pd.to_datetime(df["dt"])
df1 = (
df.groupby(["dt", "strategy"])
.apply(lambda x: x["returns"].mean(), include_groups=False)
.to_frame("returns")
.reset_index()
)
df1 = df1.pivot(index="dt", columns="strategy", values="returns").fillna(0)
df1["等权组合"] = df1.mean(axis=1)
czsc.show_daily_return(df1, stat_hold_days=False, legend_only_cols=strategies)
st.write("策略最近表现")
czsc.show_splited_daily(df1, ret_col="等权组合", sub_title="")
st.write("年度绩效统计")
czsc.show_yearly_stats(df1.copy(), ret_col="等权组合", sub_title="")
st.write("策略相关性")
czsc.show_correlation(df1)
st.write("月度收益率")
czsc.show_monthly_return(df1.copy(), ret_col="等权组合", sub_title="")
mid_dt = kwargs.get("mid_dt")
if mid_dt:
st.write("样本内外对比")
mid_dt = pd.to_datetime(mid_dt).strftime("%Y%m%d")
czsc.show_out_in_compare(df1.copy(), ret_col="等权组合", sub_title="", mid_dt=mid_dt)
[docs]def show_strategies_symbol(df, **kwargs):
"""展示多策略多品种日收益率数据:按品种等权日收益
:param df: N策略M品种日收益率数据,columns=['dt', 'strategy', 'symbol', 'returns'],样例如下:
=================== ========== ======== ============
dt strategy symbol returns
=================== ========== ======== ============
2021-01-04 00:00:00 FUT001 SFT9001 -0.00240078
2021-01-05 00:00:00 FUT001 SFT9001 -0.00107012
2021-01-06 00:00:00 FUT001 SFT9001 0.00122168
2021-01-07 00:00:00 FUT001 SFT9001 0.0020896
2021-01-08 00:00:00 FUT001 SFT9001 0.000510725
=================== ========== ======== ============
:param kwargs:
"""
sub_title = kwargs.get("sub_title", "按品种等权日收益")
if sub_title:
st.subheader(sub_title, divider="rainbow")
strategies = sorted(df["strategy"].unique().tolist())
strategies = st.multiselect("选择策略", strategies, default=strategies, key="strategies_symbol")
df = df[df["strategy"].isin(strategies)].copy().reset_index(drop=True)
symbols = sorted(df["symbol"].unique().tolist())
symbols = st.multiselect("选择品种", symbols, default=symbols, key="strategies_symbol_x")
df = df[df["symbol"].isin(symbols)].copy().reset_index(drop=True)
with st.expander("每个品种的策略覆盖情况", expanded=False):
dfc_ = df.groupby("symbol")["strategy"].unique().to_frame().reset_index()
dfc_["count"] = dfc_["strategy"].apply(lambda x: len(x))
st.dataframe(dfc_[["symbol", "count", "strategy"]], use_container_width=True)
assert isinstance(df, pd.DataFrame), "df 必须是 pd.DataFrame"
df["dt"] = pd.to_datetime(df["dt"])
df2 = (
df.groupby(["dt", "symbol"])
.apply(lambda x: x["returns"].mean(), include_groups=False)
.to_frame("returns")
.reset_index()
)
df2 = df2.pivot(index="dt", columns="symbol", values="returns").fillna(0)
df2["等权组合"] = df2.mean(axis=1)
show_daily_return(df2, stat_hold_days=False, legend_only_cols=symbols)
st.write("策略最近表现")
show_splited_daily(df2, ret_col="等权组合", sub_title="")
st.write("年度绩效统计")
show_yearly_stats(df2.copy(), ret_col="等权组合", sub_title="")
st.write("品种相关性")
show_correlation(df2)
st.write("月度收益率")
show_monthly_return(df2.copy(), ret_col="等权组合", sub_title="")
mid_dt = kwargs.get("mid_dt")
if mid_dt:
st.write("样本内外对比")
mid_dt = pd.to_datetime(mid_dt).strftime("%Y%m%d")
show_out_in_compare(df2.copy(), ret_col="等权组合", sub_title="", mid_dt=mid_dt)
[docs]def show_holds_backtest(df, **kwargs):
"""分析持仓组合的回测结果
:param df: 回测数据,任何字段都不允许有空值;建议 weight 列在截面的和为 1;数据样例:
=================== ======== ======== =======
dt symbol weight n1b
=================== ======== ======== =======
2019-01-02 09:01:00 DLi9001 0.5 961.695
2019-01-02 09:02:00 DLi9001 0.25 960.72
2019-01-02 09:03:00 DLi9001 0.25 962.669
2019-01-02 09:04:00 DLi9001 0.25 960.72
2019-01-02 09:05:00 DLi9001 0.25 961.695
=================== ======== ======== =======
:param kwargs:
- fee: 单边手续费,单位为BP,默认为2BP
- digits: 权重小数位数,默认为2
- show_drawdowns: 是否展示最大回撤分析,默认为True
- show_splited_daily: 是否展示分段收益表现,默认为False
- show_yearly_stats: 是否展示年度绩效指标,默认为True
- show_monthly_return: 是否展示月度累计收益,默认为True
"""
fee = kwargs.get("fee", 2)
digits = kwargs.get("digits", 2)
if (df.isnull().sum().sum() > 0) or (df.isna().sum().sum() > 0):
st.warning("show_holds_backtest :: 数据中存在空值,请检查数据后再试;空值数据如下:")
st.dataframe(df[df.isnull().sum(axis=1) > 0], use_container_width=True)
st.stop()
# 计算每日收益、交易成本、净收益
sdt = df["dt"].min().strftime("%Y-%m-%d")
edt = df["dt"].max().strftime("%Y-%m-%d")
dfr = czsc.holds_performance(df, fee=fee, digits=digits)
st.write(f"回测时间:{sdt} ~ {edt}; 单边年换手率:{dfr['change'].mean() * 252:.2f} 倍; 单边费率:{fee}BP")
daily = dfr[["date", "edge_post_fee"]].copy()
daily.columns = ["dt", "return"]
daily["dt"] = pd.to_datetime(daily["dt"])
daily = daily.sort_values("dt").reset_index(drop=True)
czsc.show_daily_return(daily, stat_hold_days=False)
if kwargs.get("show_drawdowns", True):
st.write("最大回撤分析")
czsc.show_drawdowns(daily, ret_col="return", sub_title="")
if kwargs.get("show_splited_daily", False):
st.write("分段收益表现")
czsc.show_splited_daily(daily, ret_col="return")
if kwargs.get("show_yearly_stats", True):
st.write("年度绩效指标")
czsc.show_yearly_stats(daily, ret_col="return", sub_title="")
if kwargs.get("show_monthly_return", True):
st.write("月度累计收益")
czsc.show_monthly_return(daily, ret_col="return", sub_title="")
[docs]def show_symbols_corr(df, factor, target="n1b", method="pearson", **kwargs):
"""展示品种相关性分析
:param df: pd.DataFrame, 数据源,columns=['dt', 'symbol', factor, target]
:param factor: str, 因子名称
:param target: str, 目标列名称
:param method: str, 相关性计算方法,默认为 pearson
:param kwargs:
- fig_title: str, 图表标题
"""
dfc = df.copy().sort_values(["dt", "symbol"]).reset_index(drop=True)
dfr = (
dfc.groupby("symbol")
.apply(lambda x: x[factor].corr(x[target], method=method), include_groups=False)
.reset_index()
)
dfr.columns = ["symbol", "corr"]
dfr = dfr.sort_values("corr", ascending=False)
fig_title = kwargs.get("fig_title", f"{factor} 在品种上的相关性分布")
fig = px.bar(dfr, x="symbol", y="corr", title=fig_title, orientation="v")
st.plotly_chart(fig, use_container_width=True)
[docs]def show_czsc_trader(trader: czsc.CzscTrader, max_k_num=300, **kwargs):
"""显示缠中说禅交易员详情
:param trader: CzscTrader 对象
:param max_k_num: 最大显示 K 线数量
:param kwargs: 其他参数
"""
from czsc.utils.ta import MACD
sub_title = kwargs.get("sub_title", "缠中说禅交易员详情")
if sub_title:
st.subheader(sub_title, divider="rainbow")
if not trader.freqs or not trader.kas or not trader.positions:
st.error("当前 trader 没有回测数据")
return
freqs = czsc.freqs_sorted(trader.freqs)
st.write(f"交易品种: {trader.symbol}")
tabs = st.tabs(freqs + ["策略详情"])
for freq, tab in zip(freqs, tabs[:-1]):
c = trader.kas[freq]
sdt = c.bars_raw[-max_k_num].dt if len(c.bars_raw) > max_k_num else c.bars_raw[0].dt
df = pd.DataFrame(c.bars_raw)
df["DIFF"], df["DEA"], df["MACD"] = MACD(df["close"], fastperiod=12, slowperiod=26, signalperiod=9)
df = df[df["dt"] >= sdt].copy()
kline = czsc.KlineChart(n_rows=3, row_heights=(0.5, 0.3, 0.2), title="", width="100%", height=800)
kline.add_kline(df, name="")
if len(c.bi_list) > 0:
bi = pd.DataFrame(
[{"dt": x.fx_a.dt, "bi": x.fx_a.fx} for x in c.bi_list]
+ [{"dt": c.bi_list[-1].fx_b.dt, "bi": c.bi_list[-1].fx_b.fx}]
)
fx = pd.DataFrame([{"dt": x.dt, "fx": x.fx} for x in c.fx_list])
fx = fx[fx["dt"] >= sdt]
bi = bi[bi["dt"] >= sdt]
kline.add_scatter_indicator(
fx["dt"],
fx["fx"],
name="分型",
row=1,
line_width=1.2,
visible=True,
mode="lines",
line_dash="dot",
marker_color="white",
)
kline.add_scatter_indicator(bi["dt"], bi["bi"], name="笔", row=1, line_width=1.5)
kline.add_sma(df, ma_seq=(5, 20, 60), row=1, visible=False, line_width=1)
kline.add_vol(df, row=2, line_width=1)
kline.add_macd(df, row=3, line_width=1)
# 在基础周期上绘制交易信号
if freq == trader.base_freq:
for pos in trader.positions:
bs_df = pd.DataFrame([x for x in pos.operates if x["dt"] >= sdt])
if bs_df.empty:
continue
open_ops = [czsc.Operate.LO, czsc.Operate.SO]
bs_df["tag"] = bs_df["op"].apply(lambda x: "triangle-up" if x in open_ops else "triangle-down")
bs_df["color"] = bs_df["op"].apply(lambda x: "red" if x in open_ops else "white")
kline.add_scatter_indicator(
bs_df["dt"],
bs_df["price"],
name=pos.name,
text=bs_df["op_desc"],
row=1,
mode="markers",
marker_size=15,
marker_symbol=bs_df["tag"],
marker_color=bs_df["color"],
visible=False,
hover_template="价格: %{y:.2f}<br>时间: %{x}<br>操作: %{text}<extra></extra>",
)
with tab:
config = {
"scrollZoom": True,
"displayModeBar": True,
"displaylogo": False,
"modeBarButtonsToRemove": [
"toggleSpikelines",
"select2d",
"zoomIn2d",
"zoomOut2d",
"lasso2d",
"autoScale2d",
"hoverClosestCartesian",
"hoverCompareCartesian",
],
}
st.plotly_chart(kline.fig, use_container_width=True, config=config)
with tabs[-1]:
with st.expander("查看最新信号", expanded=False):
if len(trader.s):
s = {k: v for k, v in trader.s.items() if len(k.split("_")) == 3}
st.write(s)
else:
st.warning("当前没有信号配置信息")
for pos in trader.positions:
st.divider()
st.write(pos.name)
st.json(pos.dump(with_data=False))
[docs]def show_strategies_recent(df, **kwargs):
"""展示最近 N 天的策略表现
:param df: pd.DataFrame, columns=['dt', 'strategy', 'returns'], 样例如下:
=================== ========== ============
dt strategy returns
=================== ========== ============
2021-01-04 00:00:00 STK001 -0.00240078
2021-01-05 00:00:00 STK001 -0.00107012
2021-01-06 00:00:00 STK001 0.00122168
2021-01-07 00:00:00 STK001 0.0020896
2021-01-08 00:00:00 STK001 0.000510725
=================== ========== ============
:param kwargs: dict
- nseq: tuple, optional, 默认为 (1, 3, 5, 10, 20, 30, 60, 90, 120, 180, 240, 360),展示的天数序列
"""
nseq = kwargs.get("nseq", (1, 3, 5, 10, 20, 30, 60, 90, 120, 180, 240, 360))
dfr = df.copy()
dfr = pd.pivot_table(dfr, index="dt", columns="strategy", values="returns", aggfunc="sum").fillna(0)
rows = []
for n in nseq:
for k, v in dfr.iloc[-n:].sum(axis=0).to_dict().items():
rows.append({"天数": f"近{n}天", "策略": k, "收益": v})
n_rets = pd.DataFrame(rows).pivot_table(index="策略", columns="天数", values="收益")
n_rets = n_rets[[f"近{x}天" for x in nseq]]
st.dataframe(
n_rets.style.background_gradient(cmap="RdYlGn_r").format("{:.2%}", na_rep="-"),
use_container_width=True,
hide_index=False,
)
# 计算每个时间段的盈利策略数量
win_count = n_rets.applymap(lambda x: 1 if x > 0 else 0).sum(axis=0)
win_rate = n_rets.applymap(lambda x: 1 if x > 0 else 0).sum(axis=0) / n_rets.shape[0]
dfs = pd.DataFrame({"盈利策略数量": win_count, "盈利策略比例": win_rate}).T
dfs = dfs.style.background_gradient(cmap="RdYlGn_r", axis=1).format("{:.4f}", na_rep="-")
st.dataframe(dfs, use_container_width=True)
st.caption(f"统计截止日期:{dfr.index[-1].strftime('%Y-%m-%d')};策略数量:{dfr.shape[1]}")
[docs]def show_factor_value(df, factor, **kwargs):
"""展示因子值可视化
:param df: pd.DataFrame, columns=['dt', ‘open’, ‘close’, ‘high’, ‘low’, ‘vol’, factor]
:param factor: str, 因子名称
:param kwargs: dict, 其他参数
- height: int, 可视化高度,默认为 600
- row_heights: list, 默认为 [0.6, 0.1, 0.3]
- title: str, 默认为 f"{factor} 可视化"
"""
if factor not in df.columns:
st.warning(f"因子 {factor} 不存在,请检查")
return
height = kwargs.get("height", 600)
row_heights = kwargs.get("row_heights", [0.6, 0.1, 0.3])
title = kwargs.get("title", f"{factor} 可视化")
chart = czsc.KlineChart(n_rows=3, height=height, row_heights=row_heights, title=title)
chart.add_kline(df)
chart.add_sma(df, visible=True, line_width=1)
chart.add_vol(df, row=2)
chart.add_scatter_indicator(df["dt"], df[factor], name=factor, row=3, line_width=1.5)
plotly_config = {
"scrollZoom": True,
"displayModeBar": True,
"displaylogo": False,
"modeBarButtonsToRemove": [
"toggleSpikelines",
"select2d",
"zoomIn2d",
"zoomOut2d",
"lasso2d",
"autoScale2d",
"hoverClosestCartesian",
"hoverCompareCartesian",
],
}
st.plotly_chart(chart.fig, use_container_width=True, config=plotly_config)
[docs]def show_code_editor(default: str = None, **kwargs):
"""用户自定义 Python 代码编辑器
:param default: str, 默认代码
:param kwargs: dict, 额外参数
- language: str, 编辑器语言,默认为 "python"
- use_expander: bool, 是否使用折叠面板,默认为 True
- expander_title: str, 折叠面板标题,默认为 "PYTHON代码编辑"
- exec: bool, 是否执行代码,默认为 True
- theme: str, 编辑器主题,默认为 "gruvbox"
- keybinding: str, 编辑器快捷键,默认为 "vscode"
:return: tuple
- code: str, 编辑器中的代码
- namespace: dict, 执行代码后的命名空间
"""
if default is None:
default = """
# 代码示例
import czsc
import pandas as pd
import numpy as np
"""
try:
from streamlit_ace import st_ace, THEMES, KEYBINDINGS, LANGUAGES
except ImportError:
st.error("请先安装 streamlit-ace 库,执行命令:pip install streamlit-ace")
return
default_language = kwargs.get("language", "python")
default_theme = kwargs.get("theme", "gruvbox")
default_keybinding = kwargs.get("keybinding", "vscode")
def __editor():
c1, c2 = st.columns([10, 1])
with c2:
language = c2.selectbox("语言", LANGUAGES, index=LANGUAGES.index(default_language))
height = c2.number_input("编辑器高度", value=550, min_value=100, max_value=2000, step=50)
font_size = c2.number_input("字体大小", value=16, min_value=8, max_value=32)
theme = c2.selectbox("主题", THEMES, index=THEMES.index(default_theme))
keybinding = c2.selectbox("快捷键", KEYBINDINGS, index=KEYBINDINGS.index(default_keybinding))
wrap = c2.checkbox("自动换行", value=True)
show_gutter = c2.checkbox("显示行号", value=True)
readonly = c2.checkbox("只读模式", value=False)
with c1:
_code = st_ace(
language=language,
value=default,
height=height,
font_size=font_size,
theme=theme,
show_gutter=show_gutter,
keybinding=keybinding,
markers=None,
tab_size=4,
wrap=wrap,
show_print_margin=False,
readonly=readonly,
key="python_editor",
)
return _code
use_expander = kwargs.get("use_expander", True)
expander_title = kwargs.get("expander_title", "代码编辑器")
if not use_expander:
code = __editor()
else:
with st.expander(expander_title, expanded=True):
code = __editor()
return code
[docs]def show_classify(df, col1, col2, n=10, method="cut", **kwargs):
"""显示 col1 对 col2 的分类作用
:param df: 数据,pd.DataFrame
:param col1: 分层列
:param col2: 统计列
:param n: 分层数量
:param method: 分层方法,cut 或 qcut
:param kwargs:
- show_bar: bool, 是否展示柱状图,默认为 False
"""
df = df[[col1, col2]].copy()
if method == "cut":
df[f"{col1}_分层"] = pd.cut(df[col1], bins=n, duplicates="drop")
elif method == "qcut":
df[f"{col1}_分层"] = pd.qcut(df[col1], q=n, duplicates="drop")
else:
raise ValueError("method must be 'cut' or 'qcut'")
dfg = df.groupby(f"{col1}_分层", observed=True)[col2].describe().reset_index()
dfx = dfg.copy()
info = (
f"{col1} 分层对应 {col2} 的均值单调性::red[{czsc.monotonicity(dfx['mean']):.2%}]; "
f"最后一层的均值::red[{dfx['mean'].iloc[-1]:.4f}];"
f"第一层的均值::red[{dfx['mean'].iloc[0]:.4f}]"
)
st.markdown(info)
if kwargs.get("show_bar", False):
dfx["标记"] = dfx[f"{col1}_分层"].astype(str)
dfx["text"] = dfx["mean"].apply(lambda x: f"{x:.4f}")
fig = px.bar(dfx, x="标记", y="mean", text="text", color="mean", color_continuous_scale="RdYlGn_r")
fig.update_xaxes(title=None)
fig.update_layout(margin=dict(l=0, r=0, t=0, b=0))
st.plotly_chart(fig, use_container_width=True)
dfg = dfg.style.background_gradient(cmap="RdYlGn_r", axis=None, subset=["count"])
dfg = dfg.background_gradient(cmap="RdYlGn_r", axis=None, subset=["mean", "std", "min", "25%", "50%", "75%", "max"])
dfg = dfg.format(
{
"count": "{:.0f}",
"mean": "{:.4f}",
"std": "{:.2%}",
"min": "{:.4f}",
"25%": "{:.4f}",
"50%": "{:.4f}",
"75%": "{:.4f}",
"max": "{:.4f}",
}
)
st.dataframe(dfg, use_container_width=True)