Browse Source

version one 2025/4/11

ziqidai11 1 month ago
parent
commit
07332be1a6
4 changed files with 60 additions and 30 deletions
  1. 60 30
      WTI/1.2Rbob.py
  2. BIN
      WTI/eta/1.WTI_update_data.xlsx
  3. BIN
      WTI/eta/RBOB_Daily.xlsx
  4. BIN
      WTI/eta/RBOB_Monthly.xlsx

+ 60 - 30
WTI/1.2Rbob.py

@@ -11,7 +11,6 @@ from Dcel import update_excel_data
 from Dtool import fill_missing_values, reverse_column
 
 # ------------ 全局配置参数 -----------
-# 文件路径与输出设置
 FILE_PATH = 'data_input/RBOB.xlsx'
 OUTPUT_DAILY = 'eta/RBOB_Daily.xlsx'
 OUTPUT_MONTHLY = 'eta/RBOB_Monthly.xlsx'
@@ -21,22 +20,14 @@ UPDATE_IDENTIFIER = "RBOB"
 
 NUM_BOOST_ROUND = 1000
 RANDOM_STATE = 42
-# 是否进行超参数调优
-USE_HYPERPARAM_TUNING = True
+USE_HYPERPARAM_TUNING = True    # 若 False 则直接使用默认参数
 
-# 预测目标变量
 TARGET_COL = '美国RBOB汽油裂解'  # 预测目标
+TEST_PERIOD = 20                 # 测试集样本数量
 
-# 测试集样本个数(TEST_PERIOD),当前默认为 1
-TEST_PERIOD = 20
+SEARCH_MODE = "random"           # 可选 "grid"/"bayesian"/"random"
+SHOW_PLOTS = True               # 是否显示最终预测图表
 
-# 搜索模式:选择 "grid"(网格搜索)、"bayesian"(贝叶斯搜索)或 "random"(随机搜索)
-SEARCH_MODE = "random"  # 可修改为 "bayesian" 或 "random"
-
-# 是否显示最终预测结果图表,True 为展示,False 则不展示
-SHOW_PLOTS = True
-
-# 默认模型参数(当不进行调优时使用)
 DEFAULT_PARAMS = {
     'objective': 'reg:squarederror',
     'learning_rate': 0.1309,
@@ -172,6 +163,7 @@ def detect_outliers_weights(X, weight_normal=1.0, weight_outlier=0.05, threshold
     weights = np.where(outlier_mask, weight_outlier, weight_normal)
     return weights
 
+# ------------ 模型训练 ------------
 def train_model_with_tuning(X_train_scaled, y_train, X_test_scaled, y_test, weights, use_tuning=True):
     if use_tuning:
         param_dist = {
@@ -187,45 +179,50 @@ def train_model_with_tuning(X_train_scaled, y_train, X_test_scaled, y_test, weig
         xgb_reg = XGBRegressor(objective='reg:squarederror', eval_metric='rmse',
                                n_estimators=NUM_BOOST_ROUND, seed=RANDOM_STATE)
         tscv = TimeSeriesSplit(n_splits=3)
-        
+        # 设置额外参数,其中 verbose=200 表示每200轮输出一次验证指标
+        extra_fit_params = {
+            'eval_set': [(X_train_scaled, y_train), (X_test_scaled, y_test)],
+            'early_stopping_rounds': 20,
+            'verbose': 200
+        }
         if SEARCH_MODE == "grid":
             search = GridSearchCV(
                 estimator=xgb_reg,
                 param_grid=param_dist,
                 scoring='neg_mean_squared_error',
                 cv=tscv,
-                verbose=1,     # 显示详细信息
+                verbose=2,
                 n_jobs=-1
             )
         elif SEARCH_MODE == "bayesian":
             search = BayesSearchCV(
                 estimator=xgb_reg,
                 search_spaces=param_dist,
-                n_iter=50,    # 增加搜索次数
+                n_iter=50,
                 scoring='neg_mean_squared_error',
                 cv=tscv,
                 random_state=RANDOM_STATE,
-                verbose=1,    # 更详细的训练进度信息
+                verbose=2,
                 n_jobs=-1
             )
         else:
             search = RandomizedSearchCV(
                 estimator=xgb_reg,
                 param_distributions=param_dist,
-                n_iter=50,    # 增加搜索次数
+                n_iter=50,
                 scoring='neg_mean_squared_error',
                 cv=tscv,
                 random_state=RANDOM_STATE,
-                verbose=1,
+                verbose=2,
                 n_jobs=-1
             )
         search.fit(X_train_scaled, y_train, sample_weight=weights)
         best_model = search.best_estimator_
         print("调优后的最佳参数:", search.best_params_)
-        # 修改为使用 early_stopping_rounds
+        # 在二次拟合时使用 extra_fit_params 中的 early_stopping_rounds 和 verbose 控制输出频率
         best_model.fit(X_train_scaled, y_train,
-                      eval_set=[(X_test_scaled, y_test)],
-                      verbose=1)
+                       eval_set=[(X_test_scaled, y_test)],
+                       verbose=200)
     else:
         dtrain = xgb.DMatrix(X_train_scaled, label=y_train, weight=weights)
         dtest = xgb.DMatrix(X_test_scaled, label=y_test)
@@ -234,14 +231,12 @@ def train_model_with_tuning(X_train_scaled, y_train, X_test_scaled, y_test, weig
                                verbose_eval=False)
     return best_model
 
-
 # ------------ 模型评价与预测 ------------
 def evaluate_and_predict(model, scaler, X_train, y_train, X_test, y_test, X_future, use_tuning=True):
     X_train_trans = scaler.transform(X_train)
     X_test_trans = scaler.transform(X_test)
     X_future_trans = scaler.transform(X_future)
     
-    # 根据模型类型判断
     if isinstance(model, xgb.Booster):
         y_train_pred = model.predict(xgb.DMatrix(X_train_trans))
         y_test_pred = model.predict(xgb.DMatrix(X_test_trans))
@@ -267,13 +262,17 @@ def evaluate_and_predict(model, scaler, X_train, y_train, X_test, y_test, X_futu
     
     return y_train_pred, y_test_pred, y_future_pred
 
-
 # ------------ 结果后处理与保存 ------------
 def merge_and_save_results(train_data, test_data, future_data, y_test_pred, y_future_pred):
+    # 更新 test_data 和 future_data 的预测列,
+    # 如果 future_data 包含 "完整模型预测值" 则优先使用
     test_data = test_data.copy()
     future_data = future_data.copy()
     test_data['预测值'] = y_test_pred
-    future_data['预测值'] = y_future_pred
+    if '完整数据_预测值' in future_data.columns:
+        future_data['预测值'] = future_data['完整数据_预测值']
+    else:
+        future_data['预测值'] = y_future_pred
 
     train_data_2023 = train_data[train_data['Date'].dt.year >= 2023][['Date', TARGET_COL]]
     test_actual = test_data[['Date', TARGET_COL]]
@@ -327,6 +326,9 @@ def plot_final_predictions(train_data, y_train, y_train_pred,
     plt.plot(test_data['Date'], y_test, label='Test True', color='orange', alpha=0.7)
     plt.plot(test_data['Date'], y_test_pred, label='Test Predicted', color='red')
     plt.plot(future_data['Date'], future_data['预测值'], label='Future Prediction', color='purple')
+    # 新增使用全数据训练的预测结果,用黑线显示
+    if '完整数据_预测值' in future_data.columns:
+        plt.plot(future_data['Date'], future_data['完整数据_预测值'], label='Full Model Future Prediction', color='black')
     plt.axvline(x=test_data['Date'].iloc[0], color='black', linestyle='--', label='Train/Test Split')
     plt.axvline(x=last_day, color='gray', linestyle='--', label='Future Split')
     plt.title('Prediction Visualization')
@@ -336,6 +338,27 @@ def plot_final_predictions(train_data, y_train, y_train_pred,
     plt.grid(True)
     plt.show()
 
+# ------------ 全数据训练及未来预测 ------------
+def train_full_model_and_predict(X_train, y_train, X_test, y_test, X_future):
+    # 合并训练集和测试集
+    X_full = pd.concat([X_train, X_test])
+    y_full = pd.concat([y_train, y_test])
+    scaler_full = StandardScaler().fit(X_full)
+    X_full_scaled = scaler_full.transform(X_full)
+    X_future_scaled = scaler_full.transform(X_future)
+    
+    params = None
+    if USE_HYPERPARAM_TUNING:
+        # 此处可使用之前调优获得的参数;演示中仍使用默认参数
+        params = None
+    if params is None:
+        params = DEFAULT_PARAMS
+    # 只设置n_estimators,不再重复设置seed参数
+    full_model = XGBRegressor(**params, n_estimators=NUM_BOOST_ROUND)
+    full_model.fit(X_full_scaled, y_full)
+    y_future_full_pred = full_model.predict(X_future_scaled)
+    return full_model, y_future_full_pred, scaler_full
+
 # ------------ 主函数 ------------
 def main():
     df_daily, last_day = load_and_preprocess_data(FILE_PATH)
@@ -343,26 +366,33 @@ def main():
     scaler, X_train_scaled, X_test_scaled, X_future_scaled = scale_and_weight_features(X_train, X_test, X_future)
     weights = detect_outliers_weights(X_train, weight_normal=1.0, weight_outlier=0.05, threshold=3)
     
-    # 根据 USE_HYPERPARAM_TUNING 决定是否进行参数调优
     model = train_model_with_tuning(X_train_scaled, y_train, X_test_scaled, y_test, weights,
                                     use_tuning=USE_HYPERPARAM_TUNING)
     
     y_train_pred, y_test_pred, y_future_pred = evaluate_and_predict(model, scaler, X_train, y_train, X_test, y_test, X_future,
                                                                     use_tuning=USE_HYPERPARAM_TUNING)
     
-    # 将预测结果添加到数据框中
-    future_data = future_data.copy()
-    future_data['预测值'] = y_future_pred
+    # 将预测结果添加到 test_data 和 future_data 中
     test_data = test_data.copy()
     test_data['预测值'] = y_test_pred
+    future_data = future_data.copy()
+    future_data['预测值'] = y_future_pred
+    
+    # 训练全数据模型并预测未来
+    full_model, y_future_full_pred, scaler_full = train_full_model_and_predict(X_train, y_train, X_test, y_test, X_future)
+    future_data['完整数据_预测值'] = y_future_full_pred
     
     merged_df = merge_and_save_results(train_data, test_data, future_data, y_test_pred, y_future_pred)
     update_excel(merged_df)
     
+
+    ### 画图展示 
     if SHOW_PLOTS:
         plot_final_predictions(train_data, y_train, y_train_pred,
                                test_data, y_test, y_test_pred,
                                future_data, last_day)
+    
+    print("全数据模型对未来数据的预测结果:", y_future_full_pred)
 
 if __name__ == '__main__':
     main()

BIN
WTI/eta/1.WTI_update_data.xlsx


BIN
WTI/eta/RBOB_Daily.xlsx


BIN
WTI/eta/RBOB_Monthly.xlsx