Browse Source

version two 日期问题解决 存储数据设置有效位数防止 数据过长

ziqidai11 2 weeks ago
parent
commit
95614022a9
2 changed files with 68 additions and 22 deletions
  1. 68 22
      WTI/Rbob.py
  2. BIN
      WTI/update.xlsx

+ 68 - 22
WTI/Rbob.py

@@ -33,6 +33,9 @@ def parse_arguments():
     parser.add_argument('--seed', type=int, default=42, help='随机种子')
     parser.add_argument('--reg_alpha', type=float, default=0.45, help='L1正则化')
     parser.add_argument('--reg_lambda', type=float, default=1.29, help='L2正则化')
+    parser.add_argument('--booster', type=str, default='gbtree', help='提升器类型')
+    parser.add_argument('--tree_method', type=str, default='auto', help='树构建方法')
+    parser.add_argument('--max_delta_step', type=int, default=0, help='最大步长')
 
     # 其他参数
     parser.add_argument('--num_boost_round', type=int, default=1000, help='提升迭代次数')
@@ -63,10 +66,10 @@ TARGET_NAME = '美国RBOB汽油裂解'
 CLASSIFICATION = '原油'
 MODEL_FRAMEWORK = 'XGBoost'
 CREATOR = '张立舟'
-PRED_DATE = '2024/11/11'           
+#PRED_DATE = '2024/11/11'           
 FREQUENCY = '月度'
 OUTPUT_PATH = 'update.xlsx'     
-
+SIGNIFICANT_DIGITS = 5
 
 # XGBoost默认参数,将在main函数中从命令行参数更新
 DEFAULT_PARAMS = {
@@ -81,6 +84,9 @@ DEFAULT_PARAMS = {
     'seed': 42,
     'reg_alpha': 0.45,
     'reg_lambda': 1.29,
+    'max_delta_step': 0,
+    'booster': 'gbtree',
+    'tree_method': 'auto'
 }
 
 # —— 因子预处理相关配置 —— 
@@ -402,9 +408,10 @@ def merge_and_prepare_df(train, test, future, y_te_pred, y_fu_pred):
     monthly_df['方向准确率'] = np.where(valid & (pred_dir == true_dir), '正确',
                                    np.where(valid & (pred_dir != true_dir), '错误', ''))
     
+    # 修改绝对偏差计算,转换为百分比
     monthly_df['绝对偏差'] = np.where(
         monthly_df[TARGET_COL].notna() & monthly_df['实际值'].notna(),
-        (monthly_df[TARGET_COL] - monthly_df['实际值']).abs(),
+        abs((monthly_df[TARGET_COL] - monthly_df['实际值']) / monthly_df['实际值']),  
         np.nan)
 
     monthly_df = monthly_df.sort_values('Date', ascending=False).reset_index(drop=True)
@@ -424,22 +431,43 @@ def generate_and_fill_excel(
     classification,     # 列表页-分类
     model_framework,    # 列表页-模型框架
     creator,            # 列表页-创建人
-    pred_date,          # 列表页-预测日期
+#    pred_date,          # 列表页-预测日期
     frequency,          # 列表页-预测频度
+    significant_digits=5,
     output_path='update.xlsx'
 ):
     with pd.ExcelWriter(output_path, engine='xlsxwriter') as writer:
         workbook = writer.book
 
+        # 获取monthly_df的第一个日期作为预测日期
+        actual_pred_date = pd.to_datetime(monthly_df['Date'].iloc[0]).strftime('%Y/%m/%d')
+        
+        # 格式化数值的辅助函数 - 用于测试值
+        def format_test_value(x, sig_digits=significant_digits):
+            if pd.isna(x):
+                return ""
+            return f"{float(x):.{sig_digits}g}"
+            
+        # 格式化百分比的辅助函数 - 用于方向准确率和偏差率(3位有效数)
+        def format_percentage(x):
+            if pd.isna(x):
+                return ""
+            return f"{float(x*100):.2f}%"
+            
+        # 格式化训练指标的辅助函数 - 用于训练结果页(6位有效数)
+        def format_metrics(x):
+            if pd.isna(x) or x == '':
+                return ""
+            return f"{float(x):.6g}"
+
         # —— 计算三个汇总值 —— 
-        # 1) 测试值:最新月度的预测值
-        test_value = monthly_df[TARGET_COL].iloc[0]
-        # 2) 方向准确率:正确数 / 有效数
+        test_value = format_test_value(monthly_df[TARGET_COL].iloc[0])
+        
         total = monthly_df['方向准确率'].notna().sum()
         correct = (monthly_df['方向准确率'] == '正确').sum()
-        direction_accuracy = f"{correct/total:.2%}" if total > 0 else ""
-        # 3) 平均绝对偏差
-        absolute_deviation = monthly_df['绝对偏差'].mean()
+        direction_accuracy = format_percentage(correct/total) if total > 0 else ""
+        
+        absolute_deviation = format_percentage(monthly_df['绝对偏差'].mean())
 
         # ========= 列表页 =========
         ws_list = workbook.add_worksheet('列表页')
@@ -452,7 +480,7 @@ def generate_and_fill_excel(
             classification,
             model_framework,
             creator,
-            pred_date,
+            actual_pred_date,
             test_value,
             frequency,
             direction_accuracy,
@@ -462,6 +490,16 @@ def generate_and_fill_excel(
         # ========= 详情页 =========
         detail_df = monthly_df[['Date', '实际值', TARGET_COL, '方向准确率', '绝对偏差']].copy()
         detail_df.columns = ['指标日期','实际值','预测值','方向','偏差率']
+        
+        # 格式化日期为年/月/日
+        detail_df['指标日期'] = pd.to_datetime(detail_df['指标日期']).dt.strftime('%Y/%m/%d')
+        
+        # 格式化实际值和预测值列(使用传入的significant_digits)
+        detail_df['实际值'] = detail_df['实际值'].apply(format_test_value)
+        detail_df['预测值'] = detail_df['预测值'].apply(format_test_value)
+
+        detail_df['偏差率'] = detail_df['偏差率'].apply(
+            lambda x: f"{float(x*100):.3g}%" if pd.notnull(x) else "")
 
         detail_df.to_excel(writer,sheet_name='详情页',index=False,header=False,startrow=2)
 
@@ -472,7 +510,11 @@ def generate_and_fill_excel(
         # ========= 日度数据表 =========
         daily_out = daily_df[['Date', '实际值', TARGET_COL]].copy()
         daily_out.columns = ['指标日期','实际值','预测值']
-
+        
+        # 格式化日期为年/月/日
+        daily_out['指标日期'] = pd.to_datetime(daily_out['指标日期']).dt.strftime('%Y/%m/%d')
+        
+        # 日度数据表不限制有效数字
         daily_out.to_excel(writer,sheet_name='日度数据表',index=False,header=False,startrow=2)
 
         ws_daily = writer.sheets['日度数据表']
@@ -487,10 +529,10 @@ def generate_and_fill_excel(
         ws_metrics.write_row(0, 0, metrics_headers)
         
         metrics_rows = [
-            ['训练集 MSE', metrics['train_mse']],
-            ['测试集 MSE', metrics['test_mse']],
-            ['训练集 R²', metrics['train_r2']],
-            ['测试集 R²', metrics['test_r2'] if metrics['test_r2'] is not None else '']
+            ['训练集 MSE', format_metrics(metrics['train_mse'])],
+            ['测试集 MSE', format_metrics(metrics['test_mse'])],
+            ['训练集 R²', format_metrics(metrics['train_r2'])],
+            ['测试集 R²', format_metrics(metrics['test_r2']) if metrics['test_r2'] is not None else '']
         ]
         
         for i, row in enumerate(metrics_rows, start=1):
@@ -562,6 +604,9 @@ def main():
         'seed': args.seed,
         'reg_alpha': args.reg_alpha,
         'reg_lambda': args.reg_lambda,
+        'max_delta_step': args.max_delta_step,
+        'booster': args.booster,
+        'tree_method':args.tree_method
     }
     
 #    print("使用参数:")
@@ -590,13 +635,14 @@ def main():
     generate_and_fill_excel(
         daily_df,
         monthly_df,
-        metrics,           # 新增参数
-        target_name=TARGET_NAME,          
-        classification=CLASSIFICATION,
-        model_framework=MODEL_FRAMEWORK,
-        creator=CREATOR,
-        pred_date=PRED_DATE,           
+        metrics,           
+        target_name=TARGET_NAME,        
+        classification=CLASSIFICATION,     
+        model_framework=MODEL_FRAMEWORK,    
+        creator=CREATOR,            
+    #    pred_date=PRED_DATE,    
         frequency=FREQUENCY,
+        significant_digits= SIGNIFICANT_DIGITS,  # 设置6位有效数字
         output_path=OUTPUT_PATH     
     )
     

BIN
WTI/update.xlsx