Browse Source

version one adding readme 2025/4/11

ziqidai11 4 weeks ago
parent
commit
02c307febf
7 changed files with 103 additions and 77 deletions
  1. 42 0
      README.md
  2. 20 28
      WTI/1.2Rbob.py
  3. BIN
      WTI/data_input/RBOB.xlsx
  4. BIN
      WTI/eta/1.WTI_update_data.xlsx
  5. BIN
      WTI/eta/RBOB_Daily.xlsx
  6. BIN
      WTI/eta/RBOB_Monthly.xlsx
  7. 41 49
      WTI/run_all.py

+ 42 - 0
README.md

@@ -0,0 +1,42 @@
+## 主要功能
+- 自动从ETA数据库获取最新市场数据
+- 数据预处理和特征工程
+- XGBoost模型训练与优化
+- 预测结果可视化
+- 自动更新Excel报表
+
+## 项目结构
+
+## 安装说明
+
+1. 克隆项目到本地
+2. 安装依赖包:
+
+```bash
+pip install -r requirements.txt
+```
+
+## 使用说明
+1. 文件介绍:
+   - 在`产品_api.py` 是api 调取eta数据库的数据, 在文件中 indicator_ids 选取因子和预测目标
+   - 在`产品(英文名字).py` 是对产品进行预测的文件
+   - 在`run_all.py` 是运行文件,运行后会调用  `产品_api.py` 和 `产品(英文名字).py` 进行预测
+   - 在`Dtool.py` 是数据处理工具,用于处理数据
+   - 在`Dcel.py` 是excel处理工具,用于处理日度数据,自动更新到 `1.产品_update_data.xlsx`
+
+2. 运行预测系统:
+```bash
+python run_all.py
+```
+
+3. 查看结果: !!!!!!!!!!!!!!!!!!
+   - 预测结果将保存在`eta/`目录下
+   - 日度预测数据:`_Daily.xlsx`
+   - 月度预测数据:`_Monthly.xlsx`
+   - 上传eta的预测数据:`1.产品_update_data.xlsx`
+
+
+
+## 作者
+LOL D
+

+ 20 - 28
WTI/1.2Rbob.py

@@ -20,13 +20,15 @@ UPDATE_IDENTIFIER = "RBOB"
 
 NUM_BOOST_ROUND = 1000
 RANDOM_STATE = 42
-USE_HYPERPARAM_TUNING = True    # 若 False 则直接使用默认参数
+USE_HYPERPARAM_TUNING = False    # 若 False 则直接使用默认参数
 
 TARGET_COL = '美国RBOB汽油裂解'  # 预测目标
 TEST_PERIOD = 20                 # 测试集样本数量
 
 SEARCH_MODE = "random"           # 可选 "grid"/"bayesian"/"random"
-SHOW_PLOTS = True               # 是否显示最终预测图表
+SHOW_PLOTS = True                # 是否显示最终预测图表
+
+ADJUST_FULL_PREDICTIONS = True
 
 DEFAULT_PARAMS = {
     'objective': 'reg:squarederror',
@@ -99,7 +101,6 @@ def load_and_preprocess_data(file_path):
     df_daily.rename(columns={'index': 'Date'}, inplace=True)
 
     df_daily = fill_missing_values(df_daily, FILL_METHODS, return_only_filled=False)
-
     for col, shift_days, new_col in SHIFT_CONFIG:
         df_daily[new_col] = df_daily[col].shift(shift_days)
 
@@ -109,19 +110,15 @@ def load_and_preprocess_data(file_path):
 
     df_daily = df_daily[(df_daily['Date'] >= '2009-08-01') & (df_daily['Date'] <= last_day_ext)]
     df_daily = df_daily[df_daily['Date'].dt.dayofweek < 5]
-
     for base_col, new_col in REVERSE_CONFIG:
         df_daily[new_col] = reverse_column(df_daily, base_col)
-
     for special_col, config in SPECIAL_REVERSE.items():
         base_col = config['base_column']
         condition_date = config['condition_date']
         df_daily[special_col] = np.where(df_daily['Date'] >= condition_date,
                                          df_daily[base_col],
                                          np.nan)
-
     df_daily = df_daily[(df_daily['Date'] > last_day) | df_daily[TARGET_COL].notna()]
-
     return df_daily, last_day
 
 # ------------ 数据划分与特征构建 ------------
@@ -140,13 +137,11 @@ def split_and_build_features(df_daily, last_day):
         '美国成品车用汽油炼厂与调和装置净产量/4WMA(预测/上年季节性)超季节性/5年_提前30天',
         '美国汽油调和组分RBOB库存(预测/线性外推)超季节性/3年_逆序_2022-01-01'
     ]
-
     X_train = train_data[feature_columns]
     y_train = train_data[TARGET_COL]
     X_test = test_data[feature_columns]
     y_test = test_data[TARGET_COL]
     X_future = future_data[feature_columns]
-    
     return X_train, y_train, X_test, y_test, X_future, train_data, test_data, future_data
 
 # ------------ 特征缩放与异常值检测 ------------
@@ -179,11 +174,10 @@ def train_model_with_tuning(X_train_scaled, y_train, X_test_scaled, y_test, weig
         xgb_reg = XGBRegressor(objective='reg:squarederror', eval_metric='rmse',
                                n_estimators=NUM_BOOST_ROUND, seed=RANDOM_STATE)
         tscv = TimeSeriesSplit(n_splits=3)
-        # 设置额外参数,其中 verbose=200 表示每200轮输出一次验证指标
         extra_fit_params = {
             'eval_set': [(X_train_scaled, y_train), (X_test_scaled, y_test)],
             'early_stopping_rounds': 20,
-            'verbose': 200
+            'verbose': 200  # 每200轮输出一次验证指标
         }
         if SEARCH_MODE == "grid":
             search = GridSearchCV(
@@ -191,7 +185,7 @@ def train_model_with_tuning(X_train_scaled, y_train, X_test_scaled, y_test, weig
                 param_grid=param_dist,
                 scoring='neg_mean_squared_error',
                 cv=tscv,
-                verbose=2,
+                verbose=1,
                 n_jobs=-1
             )
         elif SEARCH_MODE == "bayesian":
@@ -202,7 +196,7 @@ def train_model_with_tuning(X_train_scaled, y_train, X_test_scaled, y_test, weig
                 scoring='neg_mean_squared_error',
                 cv=tscv,
                 random_state=RANDOM_STATE,
-                verbose=2,
+                verbose=1,
                 n_jobs=-1
             )
         else:
@@ -213,13 +207,12 @@ def train_model_with_tuning(X_train_scaled, y_train, X_test_scaled, y_test, weig
                 scoring='neg_mean_squared_error',
                 cv=tscv,
                 random_state=RANDOM_STATE,
-                verbose=2,
+                verbose=1,
                 n_jobs=-1
             )
         search.fit(X_train_scaled, y_train, sample_weight=weights)
         best_model = search.best_estimator_
         print("调优后的最佳参数:", search.best_params_)
-        # 在二次拟合时使用 extra_fit_params 中的 early_stopping_rounds 和 verbose 控制输出频率
         best_model.fit(X_train_scaled, y_train,
                        eval_set=[(X_test_scaled, y_test)],
                        verbose=200)
@@ -264,8 +257,6 @@ def evaluate_and_predict(model, scaler, X_train, y_train, X_test, y_test, X_futu
 
 # ------------ 结果后处理与保存 ------------
 def merge_and_save_results(train_data, test_data, future_data, y_test_pred, y_future_pred):
-    # 更新 test_data 和 future_data 的预测列,
-    # 如果 future_data 包含 "完整模型预测值" 则优先使用
     test_data = test_data.copy()
     future_data = future_data.copy()
     test_data['预测值'] = y_test_pred
@@ -316,6 +307,12 @@ def update_excel(merged_df):
     else:
         print("数据更新失败,请检查错误信息")
 
+def adjust_full_predictions(y_test, future_data):
+    gap = y_test.iloc[-1] - future_data['完整数据_预测值'].iloc[0]
+    future_data['完整数据_预测值'] = future_data['完整数据_预测值'] + gap
+    print(future_data['完整数据_预测值'])
+    return future_data
+
 # ------------ 最终预测结果可视化 ------------
 def plot_final_predictions(train_data, y_train, y_train_pred,
                            test_data, y_test, y_test_pred,
@@ -323,10 +320,9 @@ def plot_final_predictions(train_data, y_train, y_train_pred,
     plt.figure(figsize=(15, 6))
     plt.plot(train_data['Date'], y_train, label='Train True', color='blue')
     plt.plot(train_data['Date'], y_train_pred, label='Train Predicted', color='green')
-    plt.plot(test_data['Date'], y_test, label='Test True', color='orange', alpha=0.7)
+    plt.plot(test_data['Date'], y_test, label='Test True', color='blue', alpha=0.7)
     plt.plot(test_data['Date'], y_test_pred, label='Test Predicted', color='red')
     plt.plot(future_data['Date'], future_data['预测值'], label='Future Prediction', color='purple')
-    # 新增使用全数据训练的预测结果,用黑线显示
     if '完整数据_预测值' in future_data.columns:
         plt.plot(future_data['Date'], future_data['完整数据_预测值'], label='Full Model Future Prediction', color='black')
     plt.axvline(x=test_data['Date'].iloc[0], color='black', linestyle='--', label='Train/Test Split')
@@ -340,7 +336,6 @@ def plot_final_predictions(train_data, y_train, y_train_pred,
 
 # ------------ 全数据训练及未来预测 ------------
 def train_full_model_and_predict(X_train, y_train, X_test, y_test, X_future):
-    # 合并训练集和测试集
     X_full = pd.concat([X_train, X_test])
     y_full = pd.concat([y_train, y_test])
     scaler_full = StandardScaler().fit(X_full)
@@ -349,11 +344,9 @@ def train_full_model_and_predict(X_train, y_train, X_test, y_test, X_future):
     
     params = None
     if USE_HYPERPARAM_TUNING:
-        # 此处可使用之前调优获得的参数;演示中仍使用默认参数
         params = None
     if params is None:
         params = DEFAULT_PARAMS
-    # 只设置n_estimators,不再重复设置seed参数
     full_model = XGBRegressor(**params, n_estimators=NUM_BOOST_ROUND)
     full_model.fit(X_full_scaled, y_full)
     y_future_full_pred = full_model.predict(X_future_scaled)
@@ -368,25 +361,24 @@ def main():
     
     model = train_model_with_tuning(X_train_scaled, y_train, X_test_scaled, y_test, weights,
                                     use_tuning=USE_HYPERPARAM_TUNING)
-    
     y_train_pred, y_test_pred, y_future_pred = evaluate_and_predict(model, scaler, X_train, y_train, X_test, y_test, X_future,
                                                                     use_tuning=USE_HYPERPARAM_TUNING)
     
-    # 将预测结果添加到 test_data 和 future_data 中
     test_data = test_data.copy()
     test_data['预测值'] = y_test_pred
     future_data = future_data.copy()
     future_data['预测值'] = y_future_pred
     
-    # 训练全数据模型并预测未来
     full_model, y_future_full_pred, scaler_full = train_full_model_and_predict(X_train, y_train, X_test, y_test, X_future)
     future_data['完整数据_预测值'] = y_future_full_pred
     
-    merged_df = merge_and_save_results(train_data, test_data, future_data, y_test_pred, y_future_pred)
+
+    if ADJUST_FULL_PREDICTIONS:
+        future_data = adjust_full_predictions(y_test, future_data)
+    
+    merged_df = merge_and_save_results(train_data, test_data, future_data, y_test_pred, y_future_full_pred)
     update_excel(merged_df)
     
-
-    ### 画图展示 
     if SHOW_PLOTS:
         plot_final_predictions(train_data, y_train, y_train_pred,
                                test_data, y_test, y_test_pred,

BIN
WTI/data_input/RBOB.xlsx


BIN
WTI/eta/1.WTI_update_data.xlsx


BIN
WTI/eta/RBOB_Daily.xlsx


BIN
WTI/eta/RBOB_Monthly.xlsx


+ 41 - 49
WTI/run_all.py

@@ -1,56 +1,48 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
 import subprocess
 import sys
+import os
 
-# 按照指定顺序的文件列表
-files = [
-    "1.1美国RBOB汽油裂解api.ipynb",
-    "1.2美国RBOB汽油裂解.ipynb",
-    "2.1WTI_连1-连4月差-残差项api.ipynb",
-    "2.2WTI_连1-连4月差-残差项.ipynb",
-    "3.1wti_连1-4api.ipynb",
-    "3.2wti_连1-4.ipynb",
-    "4.1wti_残差项api.ipynb",
-    "4.2wti_残差项.ipynb",
-    "5.1wti_原油合约价格api.ipynb",
-    "5.3wti_原油合约价格_final_日度.ipynb",
-    "5.4wti_原油合约价格_final_月度.ipynb",
-    "6.1Brent-WTI价差api.ipynb",
-    "6.2Brent-WTI价差.ipynb",
-    '7.1布伦特迪拜掉期EFS_api.ipynb',
-    '7.2布伦特迪拜掉期EFS.ipynb',
-    '8.1迪拜油_api.ipynb',
-    '8.2迪拜油.ipynb'    
+# 要按顺序执行的脚本列表
+SCRIPTS_TO_RUN = [
+    "1.1Rbob_api.py",
+    "1.2Rbob.py"
 ]
 
-def run_file(file):
-    print("正在执行文件:", file)
-    # 根据文件类型构造不同的命令
-    if file.endswith('.py'):
-        cmd = ["python", file]
-    elif file.endswith('.ipynb'):
-        # 使用 jupyter nbconvert 执行 notebook 文件,同步执行并直接写回原文件(避免产生新文件),使用 --inplace 参数
-        cmd = ["jupyter", "nbconvert", "--to", "notebook", "--execute", "--inplace", file]
-    else:
-        print("未知文件类型:", file)
-        return
-
-    try:
-        # 执行命令,capture_output=True 用于捕获输出信息,check=True 如有错误会抛出 CalledProcessError
-        result = subprocess.run(cmd, check=True, text=True, capture_output=True)
-        print(result.stdout)
-    except subprocess.CalledProcessError as e:
-        # 捕获到错误后,打印错误信息并退出
-        print("执行文件时出现错误:", file)
-        print("错误输出:", e.stderr)
-        sys.exit(1)
-
-def main():
-    for file in files:
-        run_file(file)
-    print("所有文件执行完毕!")
+def run_scripts():
+    """按顺序执行脚本,遇到错误就停止"""
+    for script in SCRIPTS_TO_RUN:
+        print(f"\n开始执行: {script}")
+        
+        # 获取脚本的完整路径
+        script_path = os.path.join(os.path.dirname(__file__), script)
+        
+        if not os.path.exists(script_path):
+            print(f"错误: 找不到脚本 {script_path}")
+            return 1
+            
+        try:
+            # 执行脚本并实时显示输出
+            process = subprocess.run(
+                [sys.executable, script_path],
+                check=True,  # 如果脚本返回非零状态码,会抛出异常
+                text=True
+            )
+        except subprocess.CalledProcessError as e:
+            print(f"\n执行 {script} 时出错:")
+            print(f"返回代码: {e.returncode}")
+            if e.stderr:
+                print("错误信息:")
+                print(e.stderr)
+            return 1
+        except Exception as e:
+            print(f"\n执行 {script} 时发生异常:")
+            print(str(e))
+            return 1
+            
+        print(f"成功执行: {script}")
+    
+    print("\n所有脚本执行完成")
+    return 0
 
 if __name__ == "__main__":
-    main()
+    sys.exit(run_scripts())