|
@@ -33,6 +33,9 @@ def parse_arguments():
|
|
|
parser.add_argument('--seed', type=int, default=42, help='随机种子')
|
|
|
parser.add_argument('--reg_alpha', type=float, default=0.45, help='L1正则化')
|
|
|
parser.add_argument('--reg_lambda', type=float, default=1.29, help='L2正则化')
|
|
|
+ parser.add_argument('--booster', type=str, default='gbtree', help='提升器类型')
|
|
|
+ parser.add_argument('--tree_method', type=str, default='auto', help='树构建方法')
|
|
|
+ parser.add_argument('--max_delta_step', type=int, default=0, help='最大步长')
|
|
|
|
|
|
# 其他参数
|
|
|
parser.add_argument('--num_boost_round', type=int, default=1000, help='提升迭代次数')
|
|
@@ -63,10 +66,10 @@ TARGET_NAME = '美国RBOB汽油裂解'
|
|
|
CLASSIFICATION = '原油'
|
|
|
MODEL_FRAMEWORK = 'XGBoost'
|
|
|
CREATOR = '张立舟'
|
|
|
-PRED_DATE = '2024/11/11'
|
|
|
+#PRED_DATE = '2024/11/11'
|
|
|
FREQUENCY = '月度'
|
|
|
OUTPUT_PATH = 'update.xlsx'
|
|
|
-
|
|
|
+SIGNIFICANT_DIGITS = 5
|
|
|
|
|
|
# XGBoost默认参数,将在main函数中从命令行参数更新
|
|
|
DEFAULT_PARAMS = {
|
|
@@ -81,6 +84,9 @@ DEFAULT_PARAMS = {
|
|
|
'seed': 42,
|
|
|
'reg_alpha': 0.45,
|
|
|
'reg_lambda': 1.29,
|
|
|
+ 'max_delta_step': 0,
|
|
|
+ 'booster': 'gbtree',
|
|
|
+ 'tree_method': 'auto'
|
|
|
}
|
|
|
|
|
|
# —— 因子预处理相关配置 ——
|
|
@@ -402,9 +408,10 @@ def merge_and_prepare_df(train, test, future, y_te_pred, y_fu_pred):
|
|
|
monthly_df['方向准确率'] = np.where(valid & (pred_dir == true_dir), '正确',
|
|
|
np.where(valid & (pred_dir != true_dir), '错误', ''))
|
|
|
|
|
|
+ # 修改绝对偏差计算,转换为百分比
|
|
|
monthly_df['绝对偏差'] = np.where(
|
|
|
monthly_df[TARGET_COL].notna() & monthly_df['实际值'].notna(),
|
|
|
- (monthly_df[TARGET_COL] - monthly_df['实际值']).abs(),
|
|
|
+ abs((monthly_df[TARGET_COL] - monthly_df['实际值']) / monthly_df['实际值']),
|
|
|
np.nan)
|
|
|
|
|
|
monthly_df = monthly_df.sort_values('Date', ascending=False).reset_index(drop=True)
|
|
@@ -424,22 +431,43 @@ def generate_and_fill_excel(
|
|
|
classification, # 列表页-分类
|
|
|
model_framework, # 列表页-模型框架
|
|
|
creator, # 列表页-创建人
|
|
|
- pred_date, # 列表页-预测日期
|
|
|
+# pred_date, # 列表页-预测日期
|
|
|
frequency, # 列表页-预测频度
|
|
|
+ significant_digits=5,
|
|
|
output_path='update.xlsx'
|
|
|
):
|
|
|
with pd.ExcelWriter(output_path, engine='xlsxwriter') as writer:
|
|
|
workbook = writer.book
|
|
|
|
|
|
+ # 获取monthly_df的第一个日期作为预测日期
|
|
|
+ actual_pred_date = pd.to_datetime(monthly_df['Date'].iloc[0]).strftime('%Y/%m/%d')
|
|
|
+
|
|
|
+ # 格式化数值的辅助函数 - 用于测试值
|
|
|
+ def format_test_value(x, sig_digits=significant_digits):
|
|
|
+ if pd.isna(x):
|
|
|
+ return ""
|
|
|
+ return f"{float(x):.{sig_digits}g}"
|
|
|
+
|
|
|
+ # 格式化百分比的辅助函数 - 用于方向准确率和偏差率(3位有效数)
|
|
|
+ def format_percentage(x):
|
|
|
+ if pd.isna(x):
|
|
|
+ return ""
|
|
|
+ return f"{float(x*100):.2f}%"
|
|
|
+
|
|
|
+ # 格式化训练指标的辅助函数 - 用于训练结果页(6位有效数)
|
|
|
+ def format_metrics(x):
|
|
|
+ if pd.isna(x) or x == '':
|
|
|
+ return ""
|
|
|
+ return f"{float(x):.6g}"
|
|
|
+
|
|
|
# —— 计算三个汇总值 ——
|
|
|
- # 1) 测试值:最新月度的预测值
|
|
|
- test_value = monthly_df[TARGET_COL].iloc[0]
|
|
|
- # 2) 方向准确率:正确数 / 有效数
|
|
|
+ test_value = format_test_value(monthly_df[TARGET_COL].iloc[0])
|
|
|
+
|
|
|
total = monthly_df['方向准确率'].notna().sum()
|
|
|
correct = (monthly_df['方向准确率'] == '正确').sum()
|
|
|
- direction_accuracy = f"{correct/total:.2%}" if total > 0 else ""
|
|
|
- # 3) 平均绝对偏差
|
|
|
- absolute_deviation = monthly_df['绝对偏差'].mean()
|
|
|
+ direction_accuracy = format_percentage(correct/total) if total > 0 else ""
|
|
|
+
|
|
|
+ absolute_deviation = format_percentage(monthly_df['绝对偏差'].mean())
|
|
|
|
|
|
# ========= 列表页 =========
|
|
|
ws_list = workbook.add_worksheet('列表页')
|
|
@@ -452,7 +480,7 @@ def generate_and_fill_excel(
|
|
|
classification,
|
|
|
model_framework,
|
|
|
creator,
|
|
|
- pred_date,
|
|
|
+ actual_pred_date,
|
|
|
test_value,
|
|
|
frequency,
|
|
|
direction_accuracy,
|
|
@@ -462,6 +490,16 @@ def generate_and_fill_excel(
|
|
|
# ========= 详情页 =========
|
|
|
detail_df = monthly_df[['Date', '实际值', TARGET_COL, '方向准确率', '绝对偏差']].copy()
|
|
|
detail_df.columns = ['指标日期','实际值','预测值','方向','偏差率']
|
|
|
+
|
|
|
+ # 格式化日期为年/月/日
|
|
|
+ detail_df['指标日期'] = pd.to_datetime(detail_df['指标日期']).dt.strftime('%Y/%m/%d')
|
|
|
+
|
|
|
+ # 格式化实际值和预测值列(使用传入的significant_digits)
|
|
|
+ detail_df['实际值'] = detail_df['实际值'].apply(format_test_value)
|
|
|
+ detail_df['预测值'] = detail_df['预测值'].apply(format_test_value)
|
|
|
+
|
|
|
+ detail_df['偏差率'] = detail_df['偏差率'].apply(
|
|
|
+ lambda x: f"{float(x*100):.3g}%" if pd.notnull(x) else "")
|
|
|
|
|
|
detail_df.to_excel(writer,sheet_name='详情页',index=False,header=False,startrow=2)
|
|
|
|
|
@@ -472,7 +510,11 @@ def generate_and_fill_excel(
|
|
|
# ========= 日度数据表 =========
|
|
|
daily_out = daily_df[['Date', '实际值', TARGET_COL]].copy()
|
|
|
daily_out.columns = ['指标日期','实际值','预测值']
|
|
|
-
|
|
|
+
|
|
|
+ # 格式化日期为年/月/日
|
|
|
+ daily_out['指标日期'] = pd.to_datetime(daily_out['指标日期']).dt.strftime('%Y/%m/%d')
|
|
|
+
|
|
|
+ # 日度数据表不限制有效数字
|
|
|
daily_out.to_excel(writer,sheet_name='日度数据表',index=False,header=False,startrow=2)
|
|
|
|
|
|
ws_daily = writer.sheets['日度数据表']
|
|
@@ -487,10 +529,10 @@ def generate_and_fill_excel(
|
|
|
ws_metrics.write_row(0, 0, metrics_headers)
|
|
|
|
|
|
metrics_rows = [
|
|
|
- ['训练集 MSE', metrics['train_mse']],
|
|
|
- ['测试集 MSE', metrics['test_mse']],
|
|
|
- ['训练集 R²', metrics['train_r2']],
|
|
|
- ['测试集 R²', metrics['test_r2'] if metrics['test_r2'] is not None else '']
|
|
|
+ ['训练集 MSE', format_metrics(metrics['train_mse'])],
|
|
|
+ ['测试集 MSE', format_metrics(metrics['test_mse'])],
|
|
|
+ ['训练集 R²', format_metrics(metrics['train_r2'])],
|
|
|
+ ['测试集 R²', format_metrics(metrics['test_r2']) if metrics['test_r2'] is not None else '']
|
|
|
]
|
|
|
|
|
|
for i, row in enumerate(metrics_rows, start=1):
|
|
@@ -562,6 +604,9 @@ def main():
|
|
|
'seed': args.seed,
|
|
|
'reg_alpha': args.reg_alpha,
|
|
|
'reg_lambda': args.reg_lambda,
|
|
|
+ 'max_delta_step': args.max_delta_step,
|
|
|
+ 'booster': args.booster,
|
|
|
+ 'tree_method':args.tree_method
|
|
|
}
|
|
|
|
|
|
# print("使用参数:")
|
|
@@ -590,13 +635,14 @@ def main():
|
|
|
generate_and_fill_excel(
|
|
|
daily_df,
|
|
|
monthly_df,
|
|
|
- metrics, # 新增参数
|
|
|
- target_name=TARGET_NAME,
|
|
|
- classification=CLASSIFICATION,
|
|
|
- model_framework=MODEL_FRAMEWORK,
|
|
|
- creator=CREATOR,
|
|
|
- pred_date=PRED_DATE,
|
|
|
+ metrics,
|
|
|
+ target_name=TARGET_NAME,
|
|
|
+ classification=CLASSIFICATION,
|
|
|
+ model_framework=MODEL_FRAMEWORK,
|
|
|
+ creator=CREATOR,
|
|
|
+ # pred_date=PRED_DATE,
|
|
|
frequency=FREQUENCY,
|
|
|
+ significant_digits= SIGNIFICANT_DIGITS, # 设置6位有效数字
|
|
|
output_path=OUTPUT_PATH
|
|
|
)
|
|
|
|