Browse Source

3.Rbob.py 是最终使用文件 2025/5/13

ziqidai11 3 weeks ago
parent
commit
9cf1e2440d

+ 0 - 390
WTI/1.2Rbob.py

@@ -1,390 +0,0 @@
-import pandas as pd
-import numpy as np
-import xgboost as xgb
-from xgboost import XGBRegressor
-from sklearn.metrics import mean_squared_error, r2_score
-import matplotlib.pyplot as plt
-from skopt import BayesSearchCV
-from sklearn.preprocessing import StandardScaler
-from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, TimeSeriesSplit
-from Dcel import update_excel_data
-from Dtool import fill_missing_values, reverse_column
-
-# ------------ 全局配置参数 -----------
-FILE_PATH = 'data_input/RBOB.xlsx'
-OUTPUT_DAILY = 'eta/RBOB_Daily.xlsx'
-OUTPUT_MONTHLY = 'eta/RBOB_Monthly.xlsx'
-UPDATE_FILE_PATH = "eta/1.WTI_update_data.xlsx"
-UPDATE_SHEET_NAME = "日度数据表"
-UPDATE_IDENTIFIER = "RBOB"
-
-NUM_BOOST_ROUND = 1000
-RANDOM_STATE = 42
-USE_HYPERPARAM_TUNING = False    # 若 False 则直接使用默认参数
-
-TARGET_COL = '美国RBOB汽油裂解'  # 预测目标
-TEST_PERIOD = 20                 # 测试集样本数量
-
-SEARCH_MODE = "random"           # 可选 "grid"/"bayesian"/"random"
-SHOW_PLOTS = True                # 是否显示最终预测图表
-
-ADJUST_FULL_PREDICTIONS = True
-
-DEFAULT_PARAMS = {
-    'objective': 'reg:squarederror',
-    'learning_rate': 0.1309,
-    'max_depth': 8,
-    'min_child_weight': 3,
-    'gamma': 2,
-    'subsample': 0.85,
-    'colsample_bytree': 0.75,
-    'eval_metric': 'rmse',
-    'seed': RANDOM_STATE,
-    'reg_alpha': 0.45,
-    'reg_lambda': 1.29,
-}
-
-# —— 因子预处理相关配置  —— 
-FILL_METHODS = {
-    '美国2年通胀预期': 'rolling_mean_5',
-    '美国首次申领失业金人数/4WMA': 'interpolate',
-    '道琼斯旅游与休闲/工业平均指数': 'interpolate',
-    '美国EIA成品油总库存(预测/供应需求3年季节性)': 'interpolate',
-    '美国成品车用汽油倒推产量(预测/汽油库存维持上年季节性)/8WMA': 'interpolate',
-    '美国成品车用汽油炼厂与调和装置净产量/4WMA(预测/上年季节性)超季节性/5年': 'interpolate',
-    '美国炼厂可用产能(路透)(预测)': 'interpolate',
-    '美国炼厂CDU装置检修量(新)': 'interpolate',
-    '美湾单位辛烷值价格(预测/季节性)': 'interpolate',
-    '美国汽油调和组分RBOB库存(预测/线性外推)超季节性/3年': 'interpolate'
-}
-
-SHIFT_CONFIG = [
-    ('美国2年通胀预期', 56, '美国2年通胀预期_提前56天'),
-    ('美国首次申领失业金人数/4WMA', 100, '美国首次申领失业金人数/4WMA_提前100天'),
-    ('美国首次申领失业金人数/4WMA', 112, '美国首次申领失业金人数/4WMA_提前112天'),
-    ('道琼斯旅游与休闲/工业平均指数', 14, '道琼斯旅游与休闲/工业平均指数_提前14天'),
-    ('美国EIA成品油总库存(预测/供应需求3年季节性)', 15, '美国EIA成品油总库存(预测/供应需求3年季节性)_提前15天'),
-    ('美国成品车用汽油炼厂与调和装置净产量/4WMA(预测/上年季节性)超季节性/5年', 30,
-     '美国成品车用汽油炼厂与调和装置净产量/4WMA(预测/上年季节性)超季节性/5年_提前30天'),
-    ('美国炼厂CDU装置检修量(新)', 30, '美国炼厂CDU装置检修量(新)_提前30天'),
-    ('美国炼厂可用产能(路透)(预测)', 100, '美国炼厂可用产能(路透)(预测)_提前100天')
-]
-
-REVERSE_CONFIG = [
-    ('美国首次申领失业金人数/4WMA', '美国首次申领失业金人数/4WMA_逆序'),
-    ('美国首次申领失业金人数/4WMA_提前100天', '美国首次申领失业金人数/4WMA_提前100天_逆序'),
-    ('美国首次申领失业金人数/4WMA_提前112天', '美国首次申领失业金人数/4WMA_提前112天_逆序'),
-    ('美国EIA成品油总库存(预测/供应需求3年季节性)', '美国EIA成品油总库存(预测/供应需求3年季节性)_逆序'),
-    ('美国EIA成品油总库存(预测/供应需求3年季节性)_提前15天', '美国EIA成品油总库存(预测/供应需求3年季节性)_提前15天_逆序'),
-    ('美国炼厂可用产能(路透)(预测)_提前100天', '美国炼厂可用产能(路透)(预测)_逆序'),
-    ('美国汽油调和组分RBOB库存(预测/线性外推)超季节性/3年', '美国汽油调和组分RBOB库存(预测/线性外推)超季节性/3年_逆序')
-]
-
-SPECIAL_REVERSE = {
-    '美国汽油调和组分RBOB库存(预测/线性外推)超季节性/3年_逆序_2022-01-01': {
-        'base_column': '美国汽油调和组分RBOB库存(预测/线性外推)超季节性/3年_逆序',
-        'condition_date': pd.Timestamp('2022-01-01')
-    }
-}
-
-# ------------ 数据加载与预处理 ------------
-def load_and_preprocess_data(file_path):
-    excel_data = pd.ExcelFile(file_path)
-    df = excel_data.parse('Sheet1')
-    df.rename(columns={'DataTime': 'Date'}, inplace=True)
-    df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
-    df.set_index('Date', inplace=True)
-
-    full_date_range = pd.date_range(start=df.index.min(), end=df.index.max(), freq='D')
-    df_daily = df.reindex(full_date_range)
-    df_daily.reset_index(inplace=True)
-    df_daily.rename(columns={'index': 'Date'}, inplace=True)
-
-    df_daily = fill_missing_values(df_daily, FILL_METHODS, return_only_filled=False)
-    for col, shift_days, new_col in SHIFT_CONFIG:
-        df_daily[new_col] = df_daily[col].shift(shift_days)
-
-    last_valid_idx = df_daily[TARGET_COL].last_valid_index()
-    last_day = df_daily['Date'].iloc[last_valid_idx]
-    last_day_ext = last_day + pd.Timedelta(days=30)
-
-    df_daily = df_daily[(df_daily['Date'] >= '2009-08-01') & (df_daily['Date'] <= last_day_ext)]
-    df_daily = df_daily[df_daily['Date'].dt.dayofweek < 5]
-    for base_col, new_col in REVERSE_CONFIG:
-        df_daily[new_col] = reverse_column(df_daily, base_col)
-    for special_col, config in SPECIAL_REVERSE.items():
-        base_col = config['base_column']
-        condition_date = config['condition_date']
-        df_daily[special_col] = np.where(df_daily['Date'] >= condition_date,
-                                         df_daily[base_col],
-                                         np.nan)
-    df_daily = df_daily[(df_daily['Date'] > last_day) | df_daily[TARGET_COL].notna()]
-    return df_daily, last_day
-
-# ------------ 数据划分与特征构建 ------------
-def split_and_build_features(df_daily, last_day):
-    train_data = df_daily[df_daily['Date'] <= last_day].copy()
-    test_data = train_data[-TEST_PERIOD:].copy()
-    train_data = train_data[:-TEST_PERIOD].copy()
-    future_data = df_daily[df_daily['Date'] > last_day].copy()
-
-    feature_columns = [
-        '美湾单位辛烷值价格(预测/季节性)',
-        '美国炼厂CDU装置检修量(新)_提前30天', 
-        '美国EIA成品油总库存(预测/供应需求3年季节性)_提前15天_逆序', 
-        '美国首次申领失业金人数/4WMA_提前100天_逆序',
-        '美国成品车用汽油倒推产量(预测/汽油库存维持上年季节性)/8WMA',
-        '美国成品车用汽油炼厂与调和装置净产量/4WMA(预测/上年季节性)超季节性/5年_提前30天',
-        '美国汽油调和组分RBOB库存(预测/线性外推)超季节性/3年_逆序_2022-01-01'
-    ]
-    X_train = train_data[feature_columns]
-    y_train = train_data[TARGET_COL]
-    X_test = test_data[feature_columns]
-    y_test = test_data[TARGET_COL]
-    X_future = future_data[feature_columns]
-    return X_train, y_train, X_test, y_test, X_future, train_data, test_data, future_data
-
-# ------------ 特征缩放与异常值检测 ------------
-def scale_and_weight_features(X_train, X_test, X_future):
-    scaler = StandardScaler()
-    X_train_scaled = scaler.fit_transform(X_train)
-    X_test_scaled = scaler.transform(X_test)
-    X_future_scaled = scaler.transform(X_future)
-    return scaler, X_train_scaled, X_test_scaled, X_future_scaled
-
-def detect_outliers_weights(X, weight_normal=1.0, weight_outlier=0.05, threshold=3):
-    z_scores = np.abs((X - X.mean()) / X.std())
-    outlier_mask = (z_scores > threshold).any(axis=1)
-    weights = np.where(outlier_mask, weight_outlier, weight_normal)
-    return weights
-
-# ------------ 模型训练 ------------
-def train_model_with_tuning(X_train_scaled, y_train, X_test_scaled, y_test, weights, use_tuning=True):
-    if use_tuning:
-        param_dist = {
-            'learning_rate': list(np.arange(0.01, 0.11, 0.01)),
-            'max_depth': list(range(4, 11)),
-            'min_child_weight': list(range(1, 6)),
-            'gamma': list(np.arange(0, 0.6, 0.1)),
-            'subsample': list(np.arange(0.5, 1.01, 0.05)),
-            'colsample_bytree': list(np.arange(0.5, 1.01, 0.05)),
-            'reg_alpha': [0, 0.1, 0.2, 0.3, 0.4, 0.45, 0.5],
-            'reg_lambda': list(np.arange(1.0, 1.6, 0.1))
-        }
-        xgb_reg = XGBRegressor(objective='reg:squarederror', eval_metric='rmse',
-                               n_estimators=NUM_BOOST_ROUND, seed=RANDOM_STATE)
-        tscv = TimeSeriesSplit(n_splits=3)
-        extra_fit_params = {
-            'eval_set': [(X_train_scaled, y_train), (X_test_scaled, y_test)],
-            'early_stopping_rounds': 20,
-            'verbose': 200  # 每200轮输出一次验证指标
-        }
-        if SEARCH_MODE == "grid":
-            search = GridSearchCV(
-                estimator=xgb_reg,
-                param_grid=param_dist,
-                scoring='neg_mean_squared_error',
-                cv=tscv,
-                verbose=1,
-                n_jobs=-1
-            )
-        elif SEARCH_MODE == "bayesian":
-            search = BayesSearchCV(
-                estimator=xgb_reg,
-                search_spaces=param_dist,
-                n_iter=50,
-                scoring='neg_mean_squared_error',
-                cv=tscv,
-                random_state=RANDOM_STATE,
-                verbose=1,
-                n_jobs=-1
-            )
-        else:
-            search = RandomizedSearchCV(
-                estimator=xgb_reg,
-                param_distributions=param_dist,
-                n_iter=50,
-                scoring='neg_mean_squared_error',
-                cv=tscv,
-                random_state=RANDOM_STATE,
-                verbose=1,
-                n_jobs=-1
-            )
-        search.fit(X_train_scaled, y_train, sample_weight=weights)
-        best_model = search.best_estimator_
-        print("调优后的最佳参数:", search.best_params_)
-        best_model.fit(X_train_scaled, y_train,
-                       eval_set=[(X_test_scaled, y_test)],
-                       verbose=200)
-    else:
-        dtrain = xgb.DMatrix(X_train_scaled, label=y_train, weight=weights)
-        dtest = xgb.DMatrix(X_test_scaled, label=y_test)
-        best_model = xgb.train(DEFAULT_PARAMS, dtrain, num_boost_round=NUM_BOOST_ROUND,
-                               evals=[(dtrain, 'Train'), (dtest, 'Test')],
-                               verbose_eval=False)
-    return best_model
-
-# ------------ 模型评价与预测 ------------
-def evaluate_and_predict(model, scaler, X_train, y_train, X_test, y_test, X_future, use_tuning=True):
-    X_train_trans = scaler.transform(X_train)
-    X_test_trans = scaler.transform(X_test)
-    X_future_trans = scaler.transform(X_future)
-    
-    if isinstance(model, xgb.Booster):
-        y_train_pred = model.predict(xgb.DMatrix(X_train_trans))
-        y_test_pred = model.predict(xgb.DMatrix(X_test_trans))
-        y_future_pred = model.predict(xgb.DMatrix(X_future_trans))
-    else:
-        y_train_pred = model.predict(X_train_trans)
-        y_test_pred = model.predict(X_test_trans)
-        y_future_pred = model.predict(X_future_trans)
-    
-    train_mse = mean_squared_error(y_train, y_train_pred)
-    test_mse = mean_squared_error(y_test, y_test_pred)
-    
-    if len(y_test) < 2:
-        train_r2 = r2_score(y_train, y_train_pred)
-        test_r2 = None
-        print("测试集样本不足,R² 无法计算")
-    else:
-        train_r2 = r2_score(y_train, y_train_pred)
-        test_r2 = r2_score(y_test, y_test_pred)
-        
-    print(f"Train MSE: {train_mse}, Train R²: {train_r2}")
-    print(f"Test MSE: {test_mse}, Test R²: {test_r2}")
-    
-    return y_train_pred, y_test_pred, y_future_pred
-
-# ------------ 结果后处理与保存 ------------
-def merge_and_save_results(train_data, test_data, future_data, y_test_pred, y_future_pred):
-    test_data = test_data.copy()
-    future_data = future_data.copy()
-    test_data['预测值'] = y_test_pred
-    if '完整数据_预测值' in future_data.columns:
-        future_data['预测值'] = future_data['完整数据_预测值']
-    else:
-        future_data['预测值'] = y_future_pred
-
-    train_data_2023 = train_data[train_data['Date'].dt.year >= 2023][['Date', TARGET_COL]]
-    test_actual = test_data[['Date', TARGET_COL]]
-    historical_actual = pd.concat([train_data_2023, test_actual])
-    historical_actual.columns = ['Date', '实际值']
-    
-    future_pred = future_data[future_data['Date'] >= '2022-08-01'][['Date', '预测值']].copy()
-    future_pred.rename(columns={'预测值': TARGET_COL}, inplace=True)
-    last_actual_value = float(historical_actual.iloc[-1]['实际值'])
-    future_pred.iloc[0, future_pred.columns.get_loc(TARGET_COL)] = np.float32(last_actual_value)
-
-    merged_df = pd.merge(historical_actual, future_pred, on='Date', how='outer')
-    merged_df = merged_df.sort_values('Date', ascending=False)
-    merged_df['Date'] = merged_df['Date'].dt.strftime('%Y/%m/%d')
-    merged_df.to_excel(OUTPUT_DAILY, index=False, float_format='%.2f')
-    
-    actual_values = pd.concat([
-        train_data[train_data['Date'].dt.year >= 2023][['Date', TARGET_COL]],
-        test_actual
-    ])
-    actual_values.columns = ['Date', '实际值']
-    predictions = pd.concat([
-        test_data[['Date', '预测值']],
-        future_pred.rename(columns={TARGET_COL: '预测值'})
-    ], ignore_index=True)
-    monthly_df = pd.merge(actual_values, predictions, on='Date', how='outer')
-    monthly_df['Date'] = pd.to_datetime(monthly_df['Date'])
-    monthly_df.set_index('Date', inplace=True)
-    monthly_df = monthly_df.resample('ME').mean()
-    monthly_df.reset_index(inplace=True)
-    monthly_df = monthly_df.sort_values('Date', ascending=False)
-    monthly_df['Date'] = monthly_df['Date'].dt.strftime('%Y/%m/%d')
-    monthly_df.to_excel(OUTPUT_MONTHLY, index=False, float_format='%.2f')
-    
-    return merged_df
-
-def update_excel(merged_df):
-    success = update_excel_data(merged_df, UPDATE_FILE_PATH, UPDATE_SHEET_NAME, UPDATE_IDENTIFIER)
-    if success:
-        print("数据已成功更新到Excel文件")
-    else:
-        print("数据更新失败,请检查错误信息")
-
-def adjust_full_predictions(y_test, future_data):
-    gap = y_test.iloc[-1] - future_data['完整数据_预测值'].iloc[0]
-    future_data['完整数据_预测值'] = future_data['完整数据_预测值'] + gap
-    print(future_data['完整数据_预测值'])
-    return future_data
-
-# ------------ 最终预测结果可视化 ------------
-def plot_final_predictions(train_data, y_train, y_train_pred,
-                           test_data, y_test, y_test_pred,
-                           future_data, last_day):
-    plt.figure(figsize=(15, 6))
-    plt.plot(train_data['Date'], y_train, label='Train True', color='blue')
-    plt.plot(train_data['Date'], y_train_pred, label='Train Predicted', color='green')
-    plt.plot(test_data['Date'], y_test, label='Test True', color='blue', alpha=0.7)
-    plt.plot(test_data['Date'], y_test_pred, label='Test Predicted', color='red')
-    plt.plot(future_data['Date'], future_data['预测值'], label='Future Prediction', color='purple')
-    if '完整数据_预测值' in future_data.columns:
-        plt.plot(future_data['Date'], future_data['完整数据_预测值'], label='Full Model Future Prediction', color='black')
-    plt.axvline(x=test_data['Date'].iloc[0], color='black', linestyle='--', label='Train/Test Split')
-    plt.axvline(x=last_day, color='gray', linestyle='--', label='Future Split')
-    plt.title('Prediction Visualization')
-    plt.xlabel('Date')
-    plt.ylabel('Target Value')
-    plt.legend()
-    plt.grid(True)
-    plt.show()
-
-# ------------ 全数据训练及未来预测 ------------
-def train_full_model_and_predict(X_train, y_train, X_test, y_test, X_future):
-    X_full = pd.concat([X_train, X_test])
-    y_full = pd.concat([y_train, y_test])
-    scaler_full = StandardScaler().fit(X_full)
-    X_full_scaled = scaler_full.transform(X_full)
-    X_future_scaled = scaler_full.transform(X_future)
-    
-    params = None
-    if USE_HYPERPARAM_TUNING:
-        params = None
-    if params is None:
-        params = DEFAULT_PARAMS
-    full_model = XGBRegressor(**params, n_estimators=NUM_BOOST_ROUND)
-    full_model.fit(X_full_scaled, y_full)
-    y_future_full_pred = full_model.predict(X_future_scaled)
-    return full_model, y_future_full_pred, scaler_full
-
-# ------------ 主函数 ------------
-def main():
-    df_daily, last_day = load_and_preprocess_data(FILE_PATH)
-    X_train, y_train, X_test, y_test, X_future, train_data, test_data, future_data = split_and_build_features(df_daily, last_day)
-    scaler, X_train_scaled, X_test_scaled, X_future_scaled = scale_and_weight_features(X_train, X_test, X_future)
-    weights = detect_outliers_weights(X_train, weight_normal=1.0, weight_outlier=0.05, threshold=3)
-    
-    model = train_model_with_tuning(X_train_scaled, y_train, X_test_scaled, y_test, weights,
-                                    use_tuning=USE_HYPERPARAM_TUNING)
-    y_train_pred, y_test_pred, y_future_pred = evaluate_and_predict(model, scaler, X_train, y_train, X_test, y_test, X_future,
-                                                                    use_tuning=USE_HYPERPARAM_TUNING)
-    
-    test_data = test_data.copy()
-    test_data['预测值'] = y_test_pred
-    future_data = future_data.copy()
-    future_data['预测值'] = y_future_pred
-    
-    full_model, y_future_full_pred, scaler_full = train_full_model_and_predict(X_train, y_train, X_test, y_test, X_future)
-    future_data['完整数据_预测值'] = y_future_full_pred
-    
-
-    if ADJUST_FULL_PREDICTIONS:
-        future_data = adjust_full_predictions(y_test, future_data)
-    
-    merged_df = merge_and_save_results(train_data, test_data, future_data, y_test_pred, y_future_full_pred)
-    update_excel(merged_df)
-    
-    if SHOW_PLOTS:
-        plot_final_predictions(train_data, y_train, y_train_pred,
-                               test_data, y_test, y_test_pred,
-                               future_data, last_day)
-    
-    print("全数据模型对未来数据的预测结果:", y_future_full_pred)
-
-if __name__ == '__main__':
-    main()

+ 450 - 0
WTI/1.Rbob.py

@@ -0,0 +1,450 @@
+import pandas as pd
+import numpy as np
+import xgboost as xgb
+from xgboost import XGBRegressor
+from sklearn.metrics import mean_squared_error, r2_score
+import matplotlib.pyplot as plt
+from skopt import BayesSearchCV
+from sklearn.preprocessing import StandardScaler
+from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, TimeSeriesSplit
+
+from Dtool import fill_missing_values, reverse_column
+from api import fetch_data_by_indicators
+
+
+# 使用示例
+indicators = ["RBWTICKMc1", "C2406121350446455",'USGGBE02 Index', "Cinjcjc4 index",'injcjc4 index','C2201059138_241106232710','C2406036178','C22411071623523660','C2312081670','REFOC-T-EIA_241114135248','C2304065621_241024124344','REFOC-T-EIA_241114135248','C22503031424010431']
+df = fetch_data_by_indicators(indicators)
+df = fetch_data_by_indicators(indicators, "data_input/RBOB.xlsx")
+
+
+# ------------ 全局配置参数 ------------
+FILE_PATH = 'data_input/RBOB.xlsx'
+
+NUM_BOOST_ROUND = 1000
+RANDOM_STATE = 42
+USE_HYPERPARAM_TUNING = False    # 若 False 则直接使用 xgb.train
+TARGET_COL = '美国RBOB汽油裂解'
+TEST_PERIOD = 20
+SEARCH_MODE = 'random'           # 可选 'grid' / 'bayesian' / 'random'
+SHOW_PLOTS = False
+ADJUST_FULL_PREDICTIONS = True
+
+DEFAULT_PARAMS = {
+    'objective': 'reg:squarederror',
+    'learning_rate': 0.1,
+    'max_depth': 8,
+    'min_child_weight': 3,
+    'gamma': 2,
+    'subsample': 0.85,
+    'colsample_bytree': 0.75,
+    'eval_metric': 'rmse',
+    'seed': 42,
+    'reg_alpha': 0.45,
+    'reg_lambda': 1.29,
+}
+
+# —— 因子预处理相关配置 —— 
+FILL_METHODS = {
+    '美国2年通胀预期': 'rolling_mean_5',
+    '美国首次申领失业金人数/4WMA': 'interpolate',
+    '道琼斯旅游与休闲/工业平均指数': 'interpolate',
+    '美国EIA成品油总库存(预测/供应需求3年季节性)': 'interpolate',
+    '美国成品车用汽油倒推产量(预测/汽油库存维持上年季节性)/8WMA': 'interpolate',
+    '美国成品车用汽油炼厂与调和装置净产量/4WMA(预测/上年季节性)超季节性/5年': 'interpolate',
+    '美国炼厂可用产能(路透)(预测)': 'interpolate',
+    '美国炼厂CDU装置检修量(新)': 'interpolate',
+    '美湾单位辛烷值价格(预测/季节性)': 'interpolate',
+    '美国汽油调和组分RBOB库存(预测/线性外推)超季节性/3年': 'interpolate'
+}
+
+SHIFT_CONFIG = [
+    ('美国2年通胀预期', 56, '美国2年通胀预期_提前56天'),
+    ('美国首次申领失业金人数/4WMA', 100, '美国首次申领失业金人数/4WMA_提前100天'),
+    ('美国首次申领失业金人数/4WMA', 112, '美国首次申领失业金人数/4WMA_提前112天'),
+    ('道琼斯旅游与休闲/工业平均指数', 14, '道琼斯旅游与休闲/工业平均指数_提前14天'),
+    ('美国EIA成品油总库存(预测/供应需求3年季节性)', 15,
+     '美国EIA成品油总库存(预测/供应需求3年季节性)_提前15天'),
+    ('美国成品车用汽油炼厂与调和装置净产量/4WMA(预测/上年季节性)超季节性/5年',
+     30,
+     '美国成品车用汽油炼厂与调和装置净产量/4WMA(预测/上年季节性)超季节性/5年_提前30天'),
+    ('美国炼厂CDU装置检修量(新)', 30, '美国炼厂CDU装置检修量(新)_提前30天'),
+    ('美国炼厂可用产能(路透)(预测)', 100,
+     '美国炼厂可用产能(路透)(预测)_提前100天')
+]
+
+REVERSE_CONFIG = [
+    ('美国首次申领失业金人数/4WMA',
+     '美国首次申领失业金人数/4WMA_逆序'),
+    ('美国首次申领失业金人数/4WMA_提前100天',
+     '美国首次申领失业金人数/4WMA_提前100天_逆序'),
+    ('美国首次申领失业金人数/4WMA_提前112天',
+     '美国首次申领失业金人数/4WMA_提前112天_逆序'),
+    ('美国EIA成品油总库存(预测/供应需求3年季节性)',
+     '美国EIA成品油总库存(预测/供应需求3年季节性)_逆序'),
+    ('美国EIA成品油总库存(预测/供应需求3年季节性)_提前15天',
+     '美国EIA成品油总库存(预测/供应需求3年季节性)_提前15天_逆序'),
+    ('美国炼厂可用产能(路透)(预测)_提前100天',
+     '美国炼厂可用产能(路透)(预测)_逆序'),
+    ('美国汽油调和组分RBOB库存(预测/线性外推)超季节性/3年',
+     '美国汽油调和组分RBOB库存(预测/线性外推)超季节性/3年_逆序')
+]
+
+SPECIAL_REVERSE = {
+    '美国汽油调和组分RBOB库存(预测/线性外推)超季节性/3年_逆序_2022-01-01': {
+        'base_column': '美国汽油调和组分RBOB库存(预测/线性外推)超季节性/3年_逆序',
+        'condition_date': pd.Timestamp('2022-01-01')
+    }
+}
+
+
+# ------------ 数据加载与预处理 ------------
+def load_and_preprocess_data(file_path):
+    df = pd.read_excel(file_path, sheet_name='Sheet1')
+    print(df)
+    df.rename(columns={'DataTime': 'Date'}, inplace=True)
+    df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
+    df.set_index('Date', inplace=True)
+
+    full_range = pd.date_range(df.index.min(),df.index.max(),freq='D')
+    df_daily = df.reindex(full_range).reset_index()
+    df_daily.rename(columns={'index': 'Date'}, inplace=True)
+    df_daily = fill_missing_values(df_daily,FILL_METHODS,return_only_filled=False)
+    for col, days, new_col in SHIFT_CONFIG:
+        df_daily[new_col] = df_daily[col].shift(days)
+
+    last_idx = df_daily[TARGET_COL].last_valid_index()
+    last_day = df_daily.loc[last_idx, 'Date']
+
+    df_daily = df_daily[(df_daily['Date'] >= '2009-08-01') &(df_daily['Date'] <= last_day +pd.Timedelta(days=30))]
+    df_daily = df_daily[df_daily['Date'].dt.weekday < 5]
+
+    for base, new in REVERSE_CONFIG:
+        df_daily[new] = reverse_column(df_daily, base)
+    for col, cfg in SPECIAL_REVERSE.items():
+        df_daily[col] = np.where(df_daily['Date'] >= cfg['condition_date'],df_daily[cfg['base_column']],np.nan)
+
+    df_daily = df_daily[(df_daily['Date'] > last_day)|df_daily[TARGET_COL].notna()]
+
+    return df_daily, last_day
+
+
+# ------------ 划分与特征构建 ------------
+def split_and_build_features(df_daily, last_day):
+    train = df_daily[df_daily['Date'] <= last_day].copy()
+    test = train.tail(TEST_PERIOD).copy()
+    train = train.iloc[:-TEST_PERIOD].copy()
+    future = df_daily[df_daily['Date'] > last_day].copy()
+
+    feature_columns = [
+        '美湾单位辛烷值价格(预测/季节性)',
+        '美国炼厂CDU装置检修量(新)_提前30天',
+        '美国EIA成品油总库存(预测/供应需求3年季节性)_提前15天_逆序',
+        '美国首次申领失业金人数/4WMA_提前100天_逆序',
+        '美国成品车用汽油倒推产量(预测/汽油库存维持上年季节性)/8WMA',
+        '美国成品车用汽油炼厂与调和装置净产量/4WMA(预测/上年季节性)超季节性/5年_提前30天',
+        '美国汽油调和组分RBOB库存(预测/线性外推)超季节性/3年_逆序_2022-01-01'
+    ]
+
+    X_train = train[feature_columns]
+    y_train = train[TARGET_COL]
+    X_test = test[feature_columns]
+    y_test = test[TARGET_COL]
+    X_future = future[feature_columns]
+
+    return X_train, y_train, X_test, y_test, X_future, train, test, future
+
+
+# ------------ 特征缩放与异常值权重 ------------
+def scale_and_weight_features(X_train, X_test, X_future):
+    scaler = StandardScaler()
+    X_tr = scaler.fit_transform(X_train)
+    X_te = scaler.transform(X_test)
+    X_fu = scaler.transform(X_future)
+    return scaler, X_tr, X_te, X_fu
+
+
+def detect_outliers_weights(X,weight_normal=1.0,weight_outlier=0.05,threshold=3):
+    z = np.abs((X - X.mean()) / X.std())
+    mask = (z > threshold).any(axis=1)
+    return np.where(mask, weight_outlier, weight_normal)
+
+
+# ------------ 模型训练 ------------
+def train_model_with_tuning(X_tr, y_tr, X_te, y_te, weights, use_tuning):
+    if use_tuning:
+        param_dist = {
+            'learning_rate': list(np.arange(0.01, 0.11, 0.01)),
+            'max_depth': list(range(4, 11)),
+            'min_child_weight': list(range(1, 6)),
+            'gamma': list(np.arange(0, 0.6, 0.1)),
+            'subsample': list(np.arange(0.5, 1.01, 0.05)),
+            'colsample_bytree': list(np.arange(0.5, 1.01, 0.05)),
+            'reg_alpha': [0, 0.1, 0.2, 0.3, 0.4, 0.45, 0.5],
+            'reg_lambda': list(np.arange(1.0, 1.6, 0.1))
+        }
+        xgb_reg = XGBRegressor(objective='reg:squarederror',
+                               eval_metric='rmse',
+                               n_estimators=NUM_BOOST_ROUND,
+                               seed=RANDOM_STATE)
+        tscv = TimeSeriesSplit(n_splits=3)
+        if SEARCH_MODE == 'grid':
+            search = GridSearchCV(xgb_reg,
+                                  param_grid=param_dist,
+                                  scoring='neg_mean_squared_error',
+                                  cv=tscv,
+                                  verbose=1,
+                                  n_jobs=-1)
+        elif SEARCH_MODE == 'bayesian':
+            search = BayesSearchCV(xgb_reg,
+                                  search_spaces=param_dist,
+                                  n_iter=50,
+                                  scoring='neg_mean_squared_error',
+                                  cv=tscv,
+                                  random_state=RANDOM_STATE,
+                                  verbose=1,
+                                  n_jobs=-1)
+        else:
+            search = RandomizedSearchCV(xgb_reg,
+                                        param_distributions=param_dist,
+                                        n_iter=50,
+                                        scoring='neg_mean_squared_error',
+                                        cv=tscv,
+                                        random_state=RANDOM_STATE,
+                                        verbose=1,
+                                        n_jobs=-1)
+        search.fit(X_tr, y_tr, sample_weight=weights)
+        best_model = search.best_estimator_
+        print("调优后的最佳参数:", search.best_params_)
+        best_model.fit(X_tr, y_tr,
+                       eval_set=[(X_te, y_te)],
+                       early_stopping_rounds=20,
+                       verbose=200)
+    else:
+        dtrain = xgb.DMatrix(X_tr, label=y_tr, weight=weights)
+        dtest = xgb.DMatrix(X_te, label=y_te)
+        best_model = xgb.train(DEFAULT_PARAMS,
+                               dtrain,
+                               num_boost_round=NUM_BOOST_ROUND,
+                               evals=[(dtrain, 'Train'),
+                                      (dtest, 'Test')],
+                               verbose_eval=False)
+    return best_model
+
+
+# ------------ 评估与预测 ------------
+def evaluate_and_predict(model, scaler, X_tr, y_tr, X_te, y_te, X_fu,
+                         use_tuning):
+    X_tr_s = scaler.transform(X_tr)
+    X_te_s = scaler.transform(X_te)
+    X_fu_s = scaler.transform(X_fu)
+
+    if isinstance(model, xgb.Booster):
+        y_tr_pred = model.predict(xgb.DMatrix(X_tr_s))
+        y_te_pred = model.predict(xgb.DMatrix(X_te_s))
+        y_fu_pred = model.predict(xgb.DMatrix(X_fu_s))
+    else:
+        y_tr_pred = model.predict(X_tr_s)
+        y_te_pred = model.predict(X_te_s)
+        y_fu_pred = model.predict(X_fu_s)
+
+    print("Train MSE:", mean_squared_error(y_tr, y_tr_pred),
+          "Test MSE:", mean_squared_error(y_te, y_te_pred))
+    if len(y_te) >= 2:
+        print("Train R2:", r2_score(y_tr, y_tr_pred),
+              "Test R2:", r2_score(y_te, y_te_pred))
+    else:
+        print("Test 样本不足,跳过 R² 计算")
+
+    return y_tr_pred, y_te_pred, y_fu_pred
+
+
+# ------------ 结果后处理(生成日度 & 月度 DataFrame) ------------
+def merge_and_prepare_df(train, test, future, y_te_pred, y_fu_pred):
+    # 合并历史与未来预测
+    test = test.copy()
+    future = future.copy()
+    test['预测值'] = y_te_pred
+    future['预测值'] = y_fu_pred
+
+    hist_actual = pd.concat([
+        train[train['Date'].dt.year >= 2023][['Date', TARGET_COL]],
+        test[['Date', TARGET_COL]]
+    ])
+    hist_actual.columns = ['Date', '实际值']
+
+    future_pred = future[future['Date'] >= '2022-08-01'][['Date', '预测值']].rename(columns={'预测值': TARGET_COL}).copy()
+
+    last_val = float(hist_actual.iloc[-1]['实际值'])
+    future_pred.iloc[0, 1] = last_val
+
+    merged = pd.merge(hist_actual, future_pred,on='Date', how='outer').sort_values('Date', ascending=False)
+    daily_df = merged.copy()
+
+    # 月度重采样
+    monthly_df = daily_df.copy()
+    monthly_df['Date'] = pd.to_datetime(monthly_df['Date'])
+    monthly_df.set_index('Date', inplace=True)
+    monthly_df = monthly_df.resample('ME').mean().reset_index()
+
+    # 方向准确率:仅在实际值和预测值都非空时计算
+    pred_dir = np.sign(monthly_df[TARGET_COL].diff())
+    true_dir = np.sign(monthly_df['实际值'].diff())
+    valid = monthly_df[TARGET_COL].notna() & monthly_df['实际值'].notna()
+    monthly_df['方向准确率'] = np.where(valid & (pred_dir == true_dir), '正确',
+                                   np.where(valid & (pred_dir != true_dir), '错误', np.nan))
+    # 绝对偏差
+    monthly_df['绝对偏差'] = (monthly_df[TARGET_COL] - monthly_df['实际值']).abs()
+    # 降序 & 打印
+    monthly_df = monthly_df.sort_values('Date', ascending=False).reset_index(drop=True)
+
+    return daily_df, monthly_df
+
+
+
+def generate_and_fill_excel(
+    daily_df,
+    monthly_df,
+    target_name,        # 写入的“预测标的”显示名
+    classification,     # 列表页-分类
+    model_framework,    # 列表页-模型框架
+    creator,            # 列表页-创建人
+    pred_date,          # 列表页-预测日期
+    frequency,          # 列表页-预测频度
+    output_path='update.xlsx'
+):
+    with pd.ExcelWriter(output_path, engine='xlsxwriter') as writer:
+        workbook = writer.book
+
+        # —— 计算三个汇总值 —— 
+        # 1) 测试值:最新月度的预测值
+        test_value = monthly_df[TARGET_COL].iloc[0]
+        # 2) 方向准确率:正确数 / 有效数
+        total = monthly_df['方向准确率'].notna().sum()
+        correct = (monthly_df['方向准确率'] == '正确').sum()
+        direction_accuracy = f"{correct/total:.2%}" if total > 0 else ""
+        # 3) 平均绝对偏差
+        absolute_deviation = monthly_df['绝对偏差'].mean()
+
+        # ========= 列表页 =========
+        ws_list = workbook.add_worksheet('列表页')
+        writer.sheets['列表页'] = ws_list
+
+        headers = ['预测标的','分类','模型框架','创建人','预测日期','测试值','预测频度','方向准确率','绝对偏差']
+
+        ws_list.write_row(0, 0, headers)
+        ws_list.write_row(1, 0, [
+            target_name,
+            classification,
+            model_framework,
+            creator,
+            pred_date,
+            test_value,
+            frequency,
+            direction_accuracy,
+            absolute_deviation
+        ])
+
+        # ========= 详情页 =========
+        detail_df = monthly_df[['Date', '实际值', TARGET_COL, '方向准确率', '绝对偏差']].copy()
+        detail_df.columns = ['指标日期','实际值','预测值','方向','偏差率']
+
+        detail_df.to_excel(writer,sheet_name='详情页',index=False,header=False,startrow=2)
+
+        ws_detail = writer.sheets['详情页']
+        ws_detail.write(0, 0, target_name)
+        ws_detail.write_row(1, 0, ['指标日期','实际值','预测值','方向','偏差率'])
+
+        # ========= 日度数据表 =========
+        daily_out = daily_df[['Date', '实际值', TARGET_COL]].copy()
+        daily_out.columns = ['指标日期','实际值','预测值']
+
+        daily_out.to_excel(writer,sheet_name='日度数据表',index=False,header=False,startrow=2)
+        
+        ws_daily = writer.sheets['日度数据表']
+        ws_daily.write(0, 0, target_name)
+        ws_daily.write_row(1, 0, ['指标日期','实际值','预测值'])
+
+    print(f"已生成并填充 {output_path}")
+
+
+# ------------ 全量训练与预测 ------------
+def train_full_model_and_predict(X_train, y_train, X_test, y_test, X_future):
+    X_all = pd.concat([X_train, X_test])
+    y_all = pd.concat([y_train, y_test])
+    scaler_all = StandardScaler().fit(X_all)
+    X_all_s = scaler_all.transform(X_all)
+    X_fu_s = scaler_all.transform(X_future)
+
+    model = XGBRegressor(**DEFAULT_PARAMS, n_estimators=NUM_BOOST_ROUND)
+    model.fit(X_all_s, y_all)
+    y_fu_full = model.predict(X_fu_s)
+
+    return model, y_fu_full, scaler_all
+
+
+# ------------ 可视化 ------------
+def plot_final_predictions(train, y_tr, y_tr_pred, test, y_te, y_te_pred,
+                           future, last_day):
+    plt.figure(figsize=(15, 6))
+    plt.plot(train['Date'], y_tr, label='Train True')
+    plt.plot(train['Date'], y_tr_pred, label='Train Pred')
+    plt.plot(test['Date'], y_te, label='Test True', alpha=0.7)
+    plt.plot(test['Date'], y_te_pred, label='Test Pred')
+    plt.plot(future['Date'], future['预测值'], label='Future Pred')
+    plt.axvline(test['Date'].iloc[0], color='gray', linestyle='--')
+    plt.axvline(last_day, color='black', linestyle='--')
+    plt.legend()
+    plt.xlabel('Date')
+    plt.ylabel(TARGET_COL)
+    plt.title('Prediction Visualization')
+    plt.grid(True)
+    plt.show()
+
+
+# ------------ 主函数 ------------
+def main():
+    df_daily, last_day = load_and_preprocess_data(FILE_PATH)
+
+    X_tr, y_tr, X_te, y_te, X_fu, train, test, future = split_and_build_features(df_daily, last_day)
+
+    scaler, X_tr_s, X_te_s, X_fu_s = scale_and_weight_features(X_tr, X_te, X_fu)
+
+    weights = detect_outliers_weights(X_tr_s)
+
+    model = train_model_with_tuning(X_tr_s, y_tr, X_te_s, y_te, weights,USE_HYPERPARAM_TUNING)
+
+    y_tr_pred, y_te_pred, y_fu_pred = evaluate_and_predict(model, scaler, X_tr, y_tr, X_te, y_te, X_fu,USE_HYPERPARAM_TUNING)
+
+    daily_df, monthly_df = merge_and_prepare_df(train, test, future,y_te_pred, y_fu_pred)
+
+    print(monthly_df)
+    print(daily_df)
+
+    generate_and_fill_excel(
+        daily_df,
+        monthly_df,
+        target_name='美国RBOB汽油裂解',        
+        classification='原油',
+        model_framework='XGBoost',
+        creator='张立舟',
+        pred_date='2024/11/11',             
+        frequency='月度',
+        output_path='update.xlsx'    
+    )
+    
+    full_model, y_fu_full, scaler_full = train_full_model_and_predict(X_tr, y_tr, X_te, y_te, X_fu)
+
+    if ADJUST_FULL_PREDICTIONS:
+        offset = y_te.iloc[-1] - y_fu_full[0]
+        y_fu_full += offset
+
+    if SHOW_PLOTS:
+        plot_final_predictions(
+            train, y_tr, y_tr_pred, test, y_te, y_te_pred,
+            future.assign(预测值=y_fu_full), last_day)
+
+    return daily_df, monthly_df
+
+if __name__ == '__main__':
+    daily_df, monthly_df = main()

BIN
WTI/1234_update.xlsx


+ 458 - 0
WTI/2.Rbob.py

@@ -0,0 +1,458 @@
+import pandas as pd
+import numpy as np
+import xgboost as xgb
+from xgboost import XGBRegressor
+from sklearn.metrics import mean_squared_error, r2_score
+import matplotlib.pyplot as plt
+from skopt import BayesSearchCV
+from sklearn.preprocessing import StandardScaler
+from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, TimeSeriesSplit
+
+from Dtool import fill_missing_values, reverse_column
+from api import fetch_data_by_indicators
+
+
+# 使用示例
+INDICATOR_IDS = ["RBWTICKMc1", "C2406121350446455",'USGGBE02 Index', "Cinjcjc4 index",'injcjc4 index','C2201059138_241106232710','C2406036178','C22411071623523660','C2312081670','REFOC-T-EIA_241114135248','C2304065621_241024124344','REFOC-T-EIA_241114135248','C22503031424010431']
+
+USE_HYPERPARAM_TUNING = False    # 若 False 则直接使用 xgb.train
+NUM_BOOST_ROUND = 1000
+RANDOM_STATE = 42
+TARGET_COL = '美国RBOB汽油裂解'
+TEST_PERIOD = 20
+SEARCH_MODE = 'random'           # 可选 'grid' / 'bayesian' / 'random'
+SHOW_PLOTS = False
+ADJUST_FULL_PREDICTIONS = True
+
+
+TARGET_NAME = '美国RBOB汽油裂解'
+CLASSIFICATION = '原油'
+MODEL_FRAMEWORK = 'XGBoost'
+CREATOR = '张立舟'
+PRED_DATE = '2024/11/11'           
+FREQUENCY = '月度'
+OUTPUT_PATH = 'update.xlsx'     
+
+
+DEFAULT_PARAMS = {
+    'objective': 'reg:squarederror',
+    'learning_rate': 0.1,
+    'max_depth': 8,
+    'min_child_weight': 3,
+    'gamma': 2,
+    'subsample': 0.85,
+    'colsample_bytree': 0.75,
+    'eval_metric': 'rmse',
+    'seed': 42,
+    'reg_alpha': 0.45,
+    'reg_lambda': 1.29,
+}
+
+# —— 因子预处理相关配置 —— 
+FILL_METHODS = {
+    '美国2年通胀预期': 'rolling_mean_5',
+    '美国首次申领失业金人数/4WMA': 'interpolate',
+    '道琼斯旅游与休闲/工业平均指数': 'interpolate',
+    '美国EIA成品油总库存(预测/供应需求3年季节性)': 'interpolate',
+    '美国成品车用汽油倒推产量(预测/汽油库存维持上年季节性)/8WMA': 'interpolate',
+    '美国成品车用汽油炼厂与调和装置净产量/4WMA(预测/上年季节性)超季节性/5年': 'interpolate',
+    '美国炼厂可用产能(路透)(预测)': 'interpolate',
+    '美国炼厂CDU装置检修量(新)': 'interpolate',
+    '美湾单位辛烷值价格(预测/季节性)': 'interpolate',
+    '美国汽油调和组分RBOB库存(预测/线性外推)超季节性/3年': 'interpolate'
+}
+
+SHIFT_CONFIG = [
+    ('美国2年通胀预期', 56, '美国2年通胀预期_提前56天'),
+    ('美国首次申领失业金人数/4WMA', 100, '美国首次申领失业金人数/4WMA_提前100天'),
+    ('美国首次申领失业金人数/4WMA', 112, '美国首次申领失业金人数/4WMA_提前112天'),
+    ('道琼斯旅游与休闲/工业平均指数', 14, '道琼斯旅游与休闲/工业平均指数_提前14天'),
+    ('美国EIA成品油总库存(预测/供应需求3年季节性)', 15,
+     '美国EIA成品油总库存(预测/供应需求3年季节性)_提前15天'),
+    ('美国成品车用汽油炼厂与调和装置净产量/4WMA(预测/上年季节性)超季节性/5年',
+     30,
+     '美国成品车用汽油炼厂与调和装置净产量/4WMA(预测/上年季节性)超季节性/5年_提前30天'),
+    ('美国炼厂CDU装置检修量(新)', 30, '美国炼厂CDU装置检修量(新)_提前30天'),
+    ('美国炼厂可用产能(路透)(预测)', 100,
+     '美国炼厂可用产能(路透)(预测)_提前100天')
+]
+
+REVERSE_CONFIG = [
+    ('美国首次申领失业金人数/4WMA',
+     '美国首次申领失业金人数/4WMA_逆序'),
+    ('美国首次申领失业金人数/4WMA_提前100天',
+     '美国首次申领失业金人数/4WMA_提前100天_逆序'),
+    ('美国首次申领失业金人数/4WMA_提前112天',
+     '美国首次申领失业金人数/4WMA_提前112天_逆序'),
+    ('美国EIA成品油总库存(预测/供应需求3年季节性)',
+     '美国EIA成品油总库存(预测/供应需求3年季节性)_逆序'),
+    ('美国EIA成品油总库存(预测/供应需求3年季节性)_提前15天',
+     '美国EIA成品油总库存(预测/供应需求3年季节性)_提前15天_逆序'),
+    ('美国炼厂可用产能(路透)(预测)_提前100天',
+     '美国炼厂可用产能(路透)(预测)_逆序'),
+    ('美国汽油调和组分RBOB库存(预测/线性外推)超季节性/3年',
+     '美国汽油调和组分RBOB库存(预测/线性外推)超季节性/3年_逆序')
+]
+
+SPECIAL_REVERSE = {
+    '美国汽油调和组分RBOB库存(预测/线性外推)超季节性/3年_逆序_2022-01-01': {
+        'base_column': '美国汽油调和组分RBOB库存(预测/线性外推)超季节性/3年_逆序',
+        'condition_date': pd.Timestamp('2022-01-01')
+    }
+}
+
+
+# ------------ 数据加载与预处理 ------------
+def load_and_preprocess_data():
+    # 直接从API获取数据
+    df = fetch_data_by_indicators(INDICATOR_IDS)
+#    print("Initial DataFrame columns:", df.columns)
+    df.index = pd.to_datetime(df.index)
+    df_daily = df.copy()
+    df_daily['Date'] = df_daily.index
+    df_daily = df_daily.reset_index(drop=True)
+    
+    #预处理流程
+    df_daily = fill_missing_values(df_daily, FILL_METHODS, return_only_filled=False)
+    for col, days, new_col in SHIFT_CONFIG:
+        df_daily[new_col] = df_daily[col].shift(days)
+
+    last_idx = df_daily[TARGET_COL].last_valid_index()
+    last_day = df_daily.loc[last_idx, 'Date']
+
+    df_daily = df_daily[(df_daily['Date'] >= '2009-08-01') & (df_daily['Date'] <= last_day + pd.Timedelta(days=30))]
+    df_daily = df_daily[df_daily['Date'].dt.weekday < 5]
+
+    for base, new in REVERSE_CONFIG:
+        df_daily[new] = reverse_column(df_daily, base)
+    for col, cfg in SPECIAL_REVERSE.items():
+        df_daily[col] = np.where(df_daily['Date'] >= cfg['condition_date'],
+                                df_daily[cfg['base_column']],
+                                np.nan)
+
+    df_daily = df_daily[(df_daily['Date'] > last_day)|df_daily[TARGET_COL].notna()]
+
+    return df_daily, last_day
+
+
+# ------------ 划分与特征构建 ------------
+def split_and_build_features(df_daily, last_day):
+    train = df_daily[df_daily['Date'] <= last_day].copy()
+    test = train.tail(TEST_PERIOD).copy()
+    train = train.iloc[:-TEST_PERIOD].copy()
+    future = df_daily[df_daily['Date'] > last_day].copy()
+
+    feature_columns = [
+        '美湾单位辛烷值价格(预测/季节性)',
+        '美国炼厂CDU装置检修量(新)_提前30天',
+        '美国EIA成品油总库存(预测/供应需求3年季节性)_提前15天_逆序',
+        '美国首次申领失业金人数/4WMA_提前100天_逆序',
+        '美国成品车用汽油倒推产量(预测/汽油库存维持上年季节性)/8WMA',
+        '美国成品车用汽油炼厂与调和装置净产量/4WMA(预测/上年季节性)超季节性/5年_提前30天',
+        '美国汽油调和组分RBOB库存(预测/线性外推)超季节性/3年_逆序_2022-01-01'
+    ]
+
+    X_train = train[feature_columns]
+    y_train = train[TARGET_COL]
+    X_test = test[feature_columns]
+    y_test = test[TARGET_COL]
+    X_future = future[feature_columns]
+
+    return X_train, y_train, X_test, y_test, X_future, train, test, future
+
+
+# ------------ 特征缩放与异常值权重 ------------
+def scale_and_weight_features(X_train, X_test, X_future):
+    scaler = StandardScaler()
+    X_tr = scaler.fit_transform(X_train)
+    X_te = scaler.transform(X_test)
+    X_fu = scaler.transform(X_future)
+    return scaler, X_tr, X_te, X_fu
+
+
+def detect_outliers_weights(X,weight_normal=1.0,weight_outlier=0.05,threshold=3):
+    z = np.abs((X - X.mean()) / X.std())
+    mask = (z > threshold).any(axis=1)
+    return np.where(mask, weight_outlier, weight_normal)
+
+
+# ------------ 模型训练 ------------
+def train_model_with_tuning(X_tr, y_tr, X_te, y_te, weights, use_tuning):
+    print(f"超参数调优状态: {'启用' if use_tuning else '禁用'}")
+    if use_tuning:
+        param_dist = {
+            'learning_rate': list(np.arange(0.01, 0.11, 0.01)),
+            'max_depth': list(range(4, 11)),
+            'min_child_weight': list(range(1, 6)),
+            'gamma': list(np.arange(0, 0.6, 0.1)),
+            'subsample': list(np.arange(0.5, 1.01, 0.05)),
+            'colsample_bytree': list(np.arange(0.5, 1.01, 0.05)),
+            'reg_alpha': [0, 0.1, 0.2, 0.3, 0.4, 0.45, 0.5],
+            'reg_lambda': list(np.arange(1.0, 1.6, 0.1))
+        }
+        xgb_reg = XGBRegressor(objective='reg:squarederror',
+                               eval_metric='rmse',
+                               n_estimators=NUM_BOOST_ROUND,
+                               seed=RANDOM_STATE)
+        tscv = TimeSeriesSplit(n_splits=3)
+        if SEARCH_MODE == 'grid':
+            search = GridSearchCV(xgb_reg,
+                                  param_grid=param_dist,
+                                  scoring='neg_mean_squared_error',
+                                  cv=tscv,
+                                  verbose=1,
+                                  n_jobs=-1)
+        elif SEARCH_MODE == 'bayesian':
+            search = BayesSearchCV(xgb_reg,
+                                  search_spaces=param_dist,
+                                  n_iter=50,
+                                  scoring='neg_mean_squared_error',
+                                  cv=tscv,
+                                  random_state=RANDOM_STATE,
+                                  verbose=1,
+                                  n_jobs=-1)
+        else:
+            search = RandomizedSearchCV(xgb_reg,
+                                        param_distributions=param_dist,
+                                        n_iter=50,
+                                        scoring='neg_mean_squared_error',
+                                        cv=tscv,
+                                        random_state=RANDOM_STATE,
+                                        verbose=1,
+                                        n_jobs=-1)
+        search.fit(X_tr, y_tr, sample_weight=weights)
+        best_model = search.best_estimator_
+        print("调优后的最佳参数:", search.best_params_)
+        best_model.fit(X_tr, y_tr,
+                       eval_set=[(X_te, y_te)],
+                       early_stopping_rounds=20,
+                       verbose=200)
+    else:
+        print("使用默认参数进行训练...")
+        dtrain = xgb.DMatrix(X_tr, label=y_tr, weight=weights)
+        dtest = xgb.DMatrix(X_te, label=y_te)
+        best_model = xgb.train(DEFAULT_PARAMS,
+                               dtrain,
+                               num_boost_round=NUM_BOOST_ROUND,
+                               evals=[(dtrain, 'Train'),
+                                      (dtest, 'Test')],
+                               verbose_eval=False)
+    return best_model
+
+
+# ------------ 评估与预测 ------------
+def evaluate_and_predict(model, scaler, X_tr, y_tr, X_te, y_te, X_fu, use_tuning):
+    X_tr_s = scaler.transform(X_tr)
+    X_te_s = scaler.transform(X_te)
+    X_fu_s = scaler.transform(X_fu)
+
+    if isinstance(model, xgb.Booster):
+        y_tr_pred = model.predict(xgb.DMatrix(X_tr_s))
+        y_te_pred = model.predict(xgb.DMatrix(X_te_s))
+        y_fu_pred = model.predict(xgb.DMatrix(X_fu_s))
+    else:
+        y_tr_pred = model.predict(X_tr_s)
+        y_te_pred = model.predict(X_te_s)
+        y_fu_pred = model.predict(X_fu_s)
+
+    print("Train MSE:", mean_squared_error(y_tr, y_tr_pred),
+          "Test MSE:", mean_squared_error(y_te, y_te_pred))
+    if len(y_te) >= 2:
+        print("Train R2:", r2_score(y_tr, y_tr_pred),
+              "Test R2:", r2_score(y_te, y_te_pred))
+    else:
+        print("Test 样本不足,跳过 R² 计算")
+
+    return y_tr_pred, y_te_pred, y_fu_pred
+
+
+# ------------ 结果后处理(生成日度 & 月度 DataFrame) ------------
+def merge_and_prepare_df(train, test, future, y_te_pred, y_fu_pred):
+    # 合并历史与未来预测
+    test = test.copy()
+    future = future.copy()
+    test['预测值'] = y_te_pred
+    future['预测值'] = y_fu_pred
+
+    hist_actual = pd.concat([
+        train[train['Date'].dt.year >= 2023][['Date', TARGET_COL]],
+        test[['Date', TARGET_COL]]
+    ])
+    hist_actual.columns = ['Date', '实际值']
+
+    future_pred = future[future['Date'] >= '2022-08-01'][['Date', '预测值']].rename(columns={'预测值': TARGET_COL}).copy()
+
+    last_val = float(hist_actual.iloc[-1]['实际值'])
+    future_pred.iloc[0, 1] = last_val
+
+    # 日度重采样
+    merged = pd.merge(hist_actual, future_pred,on='Date', how='outer').sort_values('Date', ascending=False)
+    daily_df = merged.copy()
+
+    # 月度重采样
+    monthly_df = daily_df.copy()
+    monthly_df['Date'] = pd.to_datetime(monthly_df['Date'])
+    monthly_df.set_index('Date', inplace=True)
+    monthly_df = monthly_df.resample('ME').mean().reset_index()
+
+    # 方向准确率
+    pred_dir = np.sign(monthly_df[TARGET_COL].diff())
+    true_dir = np.sign(monthly_df['实际值'].diff())
+    valid = monthly_df[TARGET_COL].notna() & monthly_df['实际值'].notna()
+    monthly_df['方向准确率'] = np.where(valid & (pred_dir == true_dir), '正确',
+                                   np.where(valid & (pred_dir != true_dir), '错误', np.nan))
+    # 绝对偏差
+    monthly_df['绝对偏差'] = (monthly_df[TARGET_COL] - monthly_df['实际值']).abs()
+
+    monthly_df = monthly_df.sort_values('Date', ascending=False).reset_index(drop=True)
+    return daily_df, monthly_df
+
+
+
+def generate_and_fill_excel(
+    daily_df,
+    monthly_df,
+    target_name,        # 写入的"预测标的"显示名
+    classification,     # 列表页-分类
+    model_framework,    # 列表页-模型框架
+    creator,            # 列表页-创建人
+    pred_date,          # 列表页-预测日期
+    frequency,          # 列表页-预测频度
+    output_path='update.xlsx'
+):
+    with pd.ExcelWriter(output_path, engine='xlsxwriter') as writer:
+        workbook = writer.book
+
+        # —— 计算三个汇总值 —— 
+        # 1) 测试值:最新月度的预测值
+        test_value = monthly_df[TARGET_COL].iloc[0]
+        # 2) 方向准确率:正确数 / 有效数
+        total = monthly_df['方向准确率'].notna().sum()
+        correct = (monthly_df['方向准确率'] == '正确').sum()
+        direction_accuracy = f"{correct/total:.2%}" if total > 0 else ""
+        # 3) 平均绝对偏差
+        absolute_deviation = monthly_df['绝对偏差'].mean()
+
+        # ========= 列表页 =========
+        ws_list = workbook.add_worksheet('列表页')
+        writer.sheets['列表页'] = ws_list
+
+        headers = ['预测标的','分类','模型框架','创建人','预测日期','测试值','预测频度','方向准确率','绝对偏差']
+        ws_list.write_row(0, 0, headers)
+        ws_list.write_row(1, 0, [
+            target_name,
+            classification,
+            model_framework,
+            creator,
+            pred_date,
+            test_value,
+            frequency,
+            direction_accuracy,
+            absolute_deviation
+        ])
+
+        # ========= 详情页 =========
+        detail_df = monthly_df[['Date', '实际值', TARGET_COL, '方向准确率', '绝对偏差']].copy()
+        detail_df.columns = ['指标日期','实际值','预测值','方向','偏差率']
+
+        detail_df.to_excel(writer,sheet_name='详情页',index=False,header=False,startrow=2)
+
+        ws_detail = writer.sheets['详情页']
+        ws_detail.write(0, 0, target_name)
+        ws_detail.write_row(1, 0, ['指标日期','实际值','预测值','方向','偏差率'])
+
+        # ========= 日度数据表 =========
+        daily_out = daily_df[['Date', '实际值', TARGET_COL]].copy()
+        daily_out.columns = ['指标日期','实际值','预测值']
+
+        daily_out.to_excel(writer,sheet_name='日度数据表',index=False,header=False,startrow=2)
+
+        ws_daily = writer.sheets['日度数据表']
+        ws_daily.write(0, 0, target_name)
+        ws_daily.write_row(1, 0, ['指标日期','实际值','预测值'])
+
+    print(f"已生成并填充 {output_path}")
+
+
+
+
+# ------------ 全量训练与预测 ------------
+def train_full_model_and_predict(X_train, y_train, X_test, y_test, X_future):
+    X_all = pd.concat([X_train, X_test])
+    y_all = pd.concat([y_train, y_test])
+    scaler_all = StandardScaler().fit(X_all)
+    X_all_s = scaler_all.transform(X_all)
+    X_fu_s = scaler_all.transform(X_future)
+
+    model = XGBRegressor(**DEFAULT_PARAMS, n_estimators=NUM_BOOST_ROUND)
+    model.fit(X_all_s, y_all)
+    y_fu_full = model.predict(X_fu_s)
+
+    return model, y_fu_full, scaler_all
+
+
+# ------------ 可视化 ------------
+def plot_final_predictions(train, y_tr, y_tr_pred, test, y_te, y_te_pred,
+                           future, last_day):
+    plt.figure(figsize=(15, 6))
+    plt.plot(train['Date'], y_tr, label='Train True')
+    plt.plot(train['Date'], y_tr_pred, label='Train Pred')
+    plt.plot(test['Date'], y_te, label='Test True', alpha=0.7)
+    plt.plot(test['Date'], y_te_pred, label='Test Pred')
+    plt.plot(future['Date'], future['预测值'], label='Future Pred')
+    plt.axvline(test['Date'].iloc[0], color='gray', linestyle='--')
+    plt.axvline(last_day, color='black', linestyle='--')
+    plt.legend()
+    plt.xlabel('Date')
+    plt.ylabel(TARGET_COL)
+    plt.title('Prediction Visualization')
+    plt.grid(True)
+    plt.show()
+
+
+# ------------ 主函数 ------------
+def main():
+    df_daily, last_day = load_and_preprocess_data()
+
+    X_tr, y_tr, X_te, y_te, X_fu, train, test, future = split_and_build_features(df_daily, last_day)
+
+    scaler, X_tr_s, X_te_s, X_fu_s = scale_and_weight_features(X_tr, X_te, X_fu)
+
+    weights = detect_outliers_weights(X_tr_s)
+
+    model = train_model_with_tuning(X_tr_s, y_tr, X_te_s, y_te, weights,USE_HYPERPARAM_TUNING)
+
+    y_tr_pred, y_te_pred, y_fu_pred = evaluate_and_predict(model, scaler, X_tr, y_tr, X_te, y_te, X_fu,USE_HYPERPARAM_TUNING)
+
+    daily_df, monthly_df = merge_and_prepare_df(train, test, future,y_te_pred, y_fu_pred)
+
+    print(monthly_df)
+    print(daily_df)
+
+    generate_and_fill_excel(
+        daily_df,
+        monthly_df,
+        target_name= TARGET_NAME,          
+        classification= CLASSIFICATION,
+        model_framework= MODEL_FRAMEWORK,
+        creator= CREATOR,
+        pred_date= PRED_DATE,           
+        frequency= FREQUENCY,
+        output_path= OUTPUT_PATH     
+    )
+    
+    full_model, y_fu_full, scaler_full = train_full_model_and_predict(X_tr, y_tr, X_te, y_te, X_fu)
+
+    if ADJUST_FULL_PREDICTIONS:
+        offset = y_te.iloc[-1] - y_fu_full[0]
+        y_fu_full += offset
+
+    if SHOW_PLOTS:
+        plot_final_predictions(
+            train, y_tr, y_tr_pred, test, y_te, y_te_pred,
+            future.assign(预测值=y_fu_full), last_day)
+
+    return daily_df, monthly_df
+
+if __name__ == '__main__':
+    daily_df, monthly_df = main()

+ 604 - 0
WTI/3.Rbob.py

@@ -0,0 +1,604 @@
+import pandas as pd
+import numpy as np
+import xgboost as xgb
+from xgboost import XGBRegressor
+from sklearn.metrics import mean_squared_error, r2_score
+import matplotlib.pyplot as plt
+from skopt import BayesSearchCV
+from sklearn.preprocessing import StandardScaler
+from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, TimeSeriesSplit
+import argparse  # 添加argparse模块用于解析命令行参数
+import itertools
+import random
+from skopt.space import Real, Integer, Categorical
+import json
+
+from Dtool import fill_missing_values, reverse_column
+from api import fetch_data_by_indicators
+
+
+# 添加命令行参数解析
+def parse_arguments():
+    parser = argparse.ArgumentParser(description='RBOB汽油裂解预测模型')
+    
+    # XGBoost参数
+    parser.add_argument('--objective', type=str, default='reg:squarederror', help='XGBoost目标函数')
+    parser.add_argument('--learning_rate', type=float, default=0.1, help='学习率')
+    parser.add_argument('--max_depth', type=int, default=8, help='最大树深度')
+    parser.add_argument('--min_child_weight', type=int, default=3, help='最小子权重')
+    parser.add_argument('--gamma', type=float, default=2, help='gamma参数')
+    parser.add_argument('--subsample', type=float, default=0.85, help='子样本比例')
+    parser.add_argument('--colsample_bytree', type=float, default=0.75, help='每棵树的列采样率')
+    parser.add_argument('--eval_metric', type=str, default='rmse', help='评估指标')
+    parser.add_argument('--seed', type=int, default=42, help='随机种子')
+    parser.add_argument('--reg_alpha', type=float, default=0.45, help='L1正则化')
+    parser.add_argument('--reg_lambda', type=float, default=1.29, help='L2正则化')
+
+    # 其他参数
+    parser.add_argument('--num_boost_round', type=int, default=1000, help='提升迭代次数')
+    parser.add_argument('--use_hyperparam_tuning', type=str, default='False', help='是否使用超参数调优')
+    parser.add_argument('--output_prefix', type=str, default='', help='输出文件前缀,如传入1234则生成1234_update.xlsx')
+
+    args = parser.parse_args()
+    return args
+
+
+
+# 使用示例
+INDICATOR_IDS = ["RBWTICKMc1", "C2406121350446455",'USGGBE02 Index', "Cinjcjc4 index",'injcjc4 index','C2201059138_241106232710','C2406036178','C22411071623523660','C2312081670','REFOC-T-EIA_241114135248','C2304065621_241024124344','REFOC-T-EIA_241114135248','C22503031424010431']
+
+
+# 这些变量将在main函数中从命令行参数更新
+NUM_BOOST_ROUND = 1000
+RANDOM_STATE = 42
+USE_HYPERPARAM_TUNING = False    # 若 False 则直接使用 xgb.train
+TARGET_COL = '美国RBOB汽油裂解'
+TEST_PERIOD = 20
+SEARCH_MODE = 'random'           # 可选 'grid' / 'bayesian' / 'random'
+SHOW_PLOTS = False
+ADJUST_FULL_PREDICTIONS = True
+
+
+TARGET_NAME = '美国RBOB汽油裂解'
+CLASSIFICATION = '原油'
+MODEL_FRAMEWORK = 'XGBoost'
+CREATOR = '张立舟'
+PRED_DATE = '2024/11/11'           
+FREQUENCY = '月度'
+OUTPUT_PATH = 'update.xlsx'     
+
+
+# XGBoost默认参数,将在main函数中从命令行参数更新
+DEFAULT_PARAMS = {
+    'objective': 'reg:squarederror',
+    'learning_rate': 0.1,
+    'max_depth': 8,
+    'min_child_weight': 3,
+    'gamma': 2,
+    'subsample': 0.85,
+    'colsample_bytree': 0.75,
+    'eval_metric': 'rmse',
+    'seed': 42,
+    'reg_alpha': 0.45,
+    'reg_lambda': 1.29,
+}
+
+# —— 因子预处理相关配置 —— 
+FILL_METHODS = {
+    '美国2年通胀预期': 'rolling_mean_5',
+    '美国首次申领失业金人数/4WMA': 'interpolate',
+    '道琼斯旅游与休闲/工业平均指数': 'interpolate',
+    '美国EIA成品油总库存(预测/供应需求3年季节性)': 'interpolate',
+    '美国成品车用汽油倒推产量(预测/汽油库存维持上年季节性)/8WMA': 'interpolate',
+    '美国成品车用汽油炼厂与调和装置净产量/4WMA(预测/上年季节性)超季节性/5年': 'interpolate',
+    '美国炼厂可用产能(路透)(预测)': 'interpolate',
+    '美国炼厂CDU装置检修量(新)': 'interpolate',
+    '美湾单位辛烷值价格(预测/季节性)': 'interpolate',
+    '美国汽油调和组分RBOB库存(预测/线性外推)超季节性/3年': 'interpolate'
+}
+
+SHIFT_CONFIG = [
+    ('美国2年通胀预期', 56, '美国2年通胀预期_提前56天'),
+    ('美国首次申领失业金人数/4WMA', 100, '美国首次申领失业金人数/4WMA_提前100天'),
+    ('美国首次申领失业金人数/4WMA', 112, '美国首次申领失业金人数/4WMA_提前112天'),
+    ('道琼斯旅游与休闲/工业平均指数', 14, '道琼斯旅游与休闲/工业平均指数_提前14天'),
+    ('美国EIA成品油总库存(预测/供应需求3年季节性)', 15,
+     '美国EIA成品油总库存(预测/供应需求3年季节性)_提前15天'),
+    ('美国成品车用汽油炼厂与调和装置净产量/4WMA(预测/上年季节性)超季节性/5年',
+     30,
+     '美国成品车用汽油炼厂与调和装置净产量/4WMA(预测/上年季节性)超季节性/5年_提前30天'),
+    ('美国炼厂CDU装置检修量(新)', 30, '美国炼厂CDU装置检修量(新)_提前30天'),
+    ('美国炼厂可用产能(路透)(预测)', 100,
+     '美国炼厂可用产能(路透)(预测)_提前100天')
+]
+
+REVERSE_CONFIG = [
+    ('美国首次申领失业金人数/4WMA',
+     '美国首次申领失业金人数/4WMA_逆序'),
+    ('美国首次申领失业金人数/4WMA_提前100天',
+     '美国首次申领失业金人数/4WMA_提前100天_逆序'),
+    ('美国首次申领失业金人数/4WMA_提前112天',
+     '美国首次申领失业金人数/4WMA_提前112天_逆序'),
+    ('美国EIA成品油总库存(预测/供应需求3年季节性)',
+     '美国EIA成品油总库存(预测/供应需求3年季节性)_逆序'),
+    ('美国EIA成品油总库存(预测/供应需求3年季节性)_提前15天',
+     '美国EIA成品油总库存(预测/供应需求3年季节性)_提前15天_逆序'),
+    ('美国炼厂可用产能(路透)(预测)_提前100天',
+     '美国炼厂可用产能(路透)(预测)_逆序'),
+    ('美国汽油调和组分RBOB库存(预测/线性外推)超季节性/3年',
+     '美国汽油调和组分RBOB库存(预测/线性外推)超季节性/3年_逆序')
+]
+
+SPECIAL_REVERSE = {
+    '美国汽油调和组分RBOB库存(预测/线性外推)超季节性/3年_逆序_2022-01-01': {
+        'base_column': '美国汽油调和组分RBOB库存(预测/线性外推)超季节性/3年_逆序',
+        'condition_date': pd.Timestamp('2022-01-01')
+    }
+}
+
+
+# ------------ 数据加载与预处理 ------------
+def load_and_preprocess_data():
+    # 直接从API获取数据
+    df = fetch_data_by_indicators(INDICATOR_IDS)
+#    print("Initial DataFrame columns:", df.columns)
+    df.index = pd.to_datetime(df.index)
+    df_daily = df.copy()
+    df_daily['Date'] = df_daily.index
+    df_daily = df_daily.reset_index(drop=True)
+    
+    #预处理流程
+    df_daily = fill_missing_values(df_daily, FILL_METHODS, return_only_filled=False)
+    for col, days, new_col in SHIFT_CONFIG:
+        df_daily[new_col] = df_daily[col].shift(days)
+
+    last_idx = df_daily[TARGET_COL].last_valid_index()
+    last_day = df_daily.loc[last_idx, 'Date']
+
+    df_daily = df_daily[(df_daily['Date'] >= '2009-08-01') & (df_daily['Date'] <= last_day + pd.Timedelta(days=30))]
+    df_daily = df_daily[df_daily['Date'].dt.weekday < 5]
+
+    for base, new in REVERSE_CONFIG:
+        df_daily[new] = reverse_column(df_daily, base)
+    for col, cfg in SPECIAL_REVERSE.items():
+        df_daily[col] = np.where(df_daily['Date'] >= cfg['condition_date'],
+                                df_daily[cfg['base_column']],
+                                np.nan)
+
+    df_daily = df_daily[(df_daily['Date'] > last_day)|df_daily[TARGET_COL].notna()]
+
+    return df_daily, last_day
+
+
+# ------------ 划分与特征构建 ------------
+def split_and_build_features(df_daily, last_day):
+    train = df_daily[df_daily['Date'] <= last_day].copy()
+    test = train.tail(TEST_PERIOD).copy()
+    train = train.iloc[:-TEST_PERIOD].copy()
+    future = df_daily[df_daily['Date'] > last_day].copy()
+
+    feature_columns = [
+        '美湾单位辛烷值价格(预测/季节性)',
+        '美国炼厂CDU装置检修量(新)_提前30天',
+        '美国EIA成品油总库存(预测/供应需求3年季节性)_提前15天_逆序',
+        '美国首次申领失业金人数/4WMA_提前100天_逆序',
+        '美国成品车用汽油倒推产量(预测/汽油库存维持上年季节性)/8WMA',
+        '美国成品车用汽油炼厂与调和装置净产量/4WMA(预测/上年季节性)超季节性/5年_提前30天',
+        '美国汽油调和组分RBOB库存(预测/线性外推)超季节性/3年_逆序_2022-01-01'
+    ]
+
+    X_train = train[feature_columns]
+    y_train = train[TARGET_COL]
+    X_test = test[feature_columns]
+    y_test = test[TARGET_COL]
+    X_future = future[feature_columns]
+
+    return X_train, y_train, X_test, y_test, X_future, train, test, future
+
+
+# ------------ 特征缩放与异常值权重 ------------
+def scale_and_weight_features(X_train, X_test, X_future):
+    scaler = StandardScaler()
+    X_tr = scaler.fit_transform(X_train)
+    X_te = scaler.transform(X_test)
+    X_fu = scaler.transform(X_future)
+    return scaler, X_tr, X_te, X_fu
+
+
+def detect_outliers_weights(X,weight_normal=1.0,weight_outlier=0.05,threshold=3):
+    z = np.abs((X - X.mean()) / X.std())
+    mask = (z > threshold).any(axis=1)
+    return np.where(mask, weight_outlier, weight_normal)
+
+
+# ------------ 模型训练 ------------
+def train_model_with_tuning(X_tr, y_tr, X_te, y_te, weights, use_tuning):
+    if use_tuning:
+        param_dist = {
+            'learning_rate': list(np.arange(0.01, 0.11, 0.01)),
+            'max_depth': list(range(4, 11)),
+            'min_child_weight': list(range(1, 6)),
+            'gamma': list(np.arange(0, 0.6, 0.1)),
+            'subsample': list(np.arange(0.5, 1.01, 0.05)),
+            'colsample_bytree': list(np.arange(0.5, 1.01, 0.05)),
+            'reg_alpha': [0, 0.1, 0.2, 0.3, 0.4, 0.45, 0.5],
+            'reg_lambda': list(np.arange(1.0, 1.6, 0.1))
+        }
+        
+        # 将数据转换为DMatrix格式
+        dtrain = xgb.DMatrix(X_tr, label=y_tr, weight=weights)
+        dtest = xgb.DMatrix(X_te, label=y_te)
+        
+        # 基础参数设置
+        base_params = {
+            'objective': 'reg:squarederror',
+            'eval_metric': 'rmse',
+            'seed': RANDOM_STATE
+        }
+        
+        best_score = float('inf')
+        best_params = None
+        
+        # 网格搜索
+        if SEARCH_MODE == 'grid':
+            param_combinations = [dict(zip(param_dist.keys(), v)) 
+                               for v in itertools.product(*param_dist.values())]
+            for params in param_combinations:
+                curr_params = {**base_params, **params}
+                cv_results = xgb.cv(curr_params, dtrain, 
+                                  num_boost_round=NUM_BOOST_ROUND,
+                                  nfold=3,
+                                  early_stopping_rounds=20,
+                                  verbose_eval=False)
+                score = cv_results['test-rmse-mean'].min()
+                if score < best_score:
+                    best_score = score
+                    best_params = curr_params
+        # 贝叶斯搜索
+        elif SEARCH_MODE == 'bayesian':
+            search_spaces = {
+                'learning_rate': Real(0.01, 0.11, prior='uniform'),
+                'max_depth': Integer(4, 11),
+                'min_child_weight': Integer(1, 6),
+                'gamma': Real(0.0, 0.6, prior='uniform'),
+                'subsample': Real(0.5, 1.01, prior='uniform'),
+                'colsample_bytree': Real(0.5, 1.01, prior='uniform'),
+                'reg_alpha': Real(0.0, 0.5, prior='uniform'),
+                'reg_lambda': Real(1.0, 1.6, prior='uniform')
+            }
+            
+            def objective(params):
+                curr_params = {**base_params, **params}
+                cv_results = xgb.cv(curr_params, dtrain,
+                                  num_boost_round=NUM_BOOST_ROUND,
+                                  nfold=3,
+                                  early_stopping_rounds=20,
+                                  verbose_eval=False)
+                return cv_results['test-rmse-mean'].min()
+            
+            # 执行贝叶斯优化
+            from skopt import gp_minimize
+            result = gp_minimize(
+                objective,
+                dimensions=[space for space in search_spaces.values()],
+                n_calls=50,
+                random_state=RANDOM_STATE
+            )
+            
+            best_params = dict(zip(search_spaces.keys(), result.x))
+            best_params = {**base_params, **best_params}
+            best_score = result.fun
+
+        # 随机搜索    
+        else:
+            for _ in range(50):  
+                params = {k: random.choice(v) for k, v in param_dist.items()}
+                curr_params = {**base_params, **params}
+                cv_results = xgb.cv(curr_params, dtrain,
+                                  num_boost_round=NUM_BOOST_ROUND,
+                                  nfold=3,
+                                  early_stopping_rounds=20,
+                                  verbose_eval=False)
+                score = cv_results['test-rmse-mean'].min()
+                if score < best_score:
+                    best_score = score
+                    best_params = curr_params
+        
+        print("调优后的最佳参数:", best_params)
+        print("最佳得分:", best_score)
+        
+        # 使用最佳参数训练最终模型
+        best_model = xgb.train(best_params,
+                              dtrain,
+                              num_boost_round=NUM_BOOST_ROUND,
+                              evals=[(dtrain, 'Train'), (dtest, 'Test')],
+                              early_stopping_rounds=20,
+                              verbose_eval=False)
+    else:
+        # 直接使用默认参数训练
+        dtrain = xgb.DMatrix(X_tr, label=y_tr, weight=weights)
+        dtest = xgb.DMatrix(X_te, label=y_te)
+        best_model = xgb.train(DEFAULT_PARAMS,
+                              dtrain,
+                              num_boost_round=NUM_BOOST_ROUND,
+                              evals=[(dtrain, 'Train'),
+                                    (dtest, 'Test')],
+                              verbose_eval=False)
+    return best_model
+
+
+# ------------ 评估与预测 ------------
+def evaluate_and_predict(model, scaler, X_tr, y_tr, X_te, y_te, X_fu, use_tuning):
+    X_tr_s = scaler.transform(X_tr)
+    X_te_s = scaler.transform(X_te)
+    X_fu_s = scaler.transform(X_fu)
+
+    if isinstance(model, xgb.Booster):
+        y_tr_pred = model.predict(xgb.DMatrix(X_tr_s))
+        y_te_pred = model.predict(xgb.DMatrix(X_te_s))
+        y_fu_pred = model.predict(xgb.DMatrix(X_fu_s))
+    else:
+        y_tr_pred = model.predict(X_tr_s)
+        y_te_pred = model.predict(X_te_s)
+        y_fu_pred = model.predict(X_fu_s)
+
+    # 计算评估指标并保留4位有效数字
+    train_mse = float(f"{mean_squared_error(y_tr, y_tr_pred):.4g}")
+    test_mse = float(f"{mean_squared_error(y_te, y_te_pred):.4g}")
+    train_r2 = float(f"{r2_score(y_tr, y_tr_pred):.4g}")
+    test_r2 = float(f"{r2_score(y_te, y_te_pred):.4g}") if len(y_te) >= 2 else None
+
+    print("Train MSE:", train_mse, "Test MSE:", test_mse)
+    if len(y_te) >= 2:
+        print("Train R2:", train_r2, "Test R2:", test_r2)
+    else:
+        print("Test 样本不足,跳过 R² 计算")
+
+    metrics = {
+        'train_mse': train_mse,
+        'test_mse': test_mse,
+        'train_r2': train_r2,
+        'test_r2': test_r2
+    }
+
+    # 保存为JSON
+    json_path = 'model_metrics.json'
+    try:
+        with open(json_path, 'r', encoding='utf-8') as f:
+            existing_metrics = json.load(f)
+    except FileNotFoundError:
+        existing_metrics = []
+    
+    existing_metrics.append(metrics)
+    
+    with open(json_path, 'w', encoding='utf-8') as f:
+        json.dump(existing_metrics, f, ensure_ascii=False, indent=4)
+    print(f"评估指标已保存至 {json_path}")
+
+    return y_tr_pred, y_te_pred, y_fu_pred
+
+
+# ------------ 结果后处理(生成日度 & 月度 DataFrame) ------------
+def merge_and_prepare_df(train, test, future, y_te_pred, y_fu_pred):
+    # 合并历史与未来预测
+    test = test.copy()
+    future = future.copy()
+    test['预测值'] = y_te_pred
+    future['预测值'] = y_fu_pred
+
+    hist_actual = pd.concat([
+        train[train['Date'].dt.year >= 2023][['Date', TARGET_COL]],
+        test[['Date', TARGET_COL]]
+    ])
+    hist_actual.columns = ['Date', '实际值']
+
+    future_pred = future[future['Date'] >= '2022-08-01'][['Date', '预测值']].rename(columns={'预测值': TARGET_COL}).copy()
+
+    last_val = hist_actual.iloc[-1]['实际值']
+    future_pred[TARGET_COL] = future_pred[TARGET_COL].astype(last_val.dtype)
+    future_pred.iloc[0, 1] = last_val
+
+    # 日度重采样
+    merged = pd.merge(hist_actual, future_pred,on='Date', how='outer').sort_values('Date', ascending=False)
+    daily_df = merged.copy()
+
+    # 月度重采样
+    monthly_df = daily_df.copy()
+    monthly_df['Date'] = pd.to_datetime(monthly_df['Date'])
+    monthly_df.set_index('Date', inplace=True)
+    monthly_df = monthly_df.resample('ME').mean().reset_index()
+
+    # 方向准确率
+    pred_dir = np.sign(monthly_df[TARGET_COL].diff())
+    true_dir = np.sign(monthly_df['实际值'].diff())
+    valid = monthly_df[TARGET_COL].notna() & monthly_df['实际值'].notna()
+    monthly_df['方向准确率'] = np.where(valid & (pred_dir == true_dir), '正确',
+                                   np.where(valid & (pred_dir != true_dir), '错误', np.nan))
+    # 绝对偏差
+    monthly_df['绝对偏差'] = (monthly_df[TARGET_COL] - monthly_df['实际值']).abs()
+
+    monthly_df = monthly_df.sort_values('Date', ascending=False).reset_index(drop=True)
+    return daily_df, monthly_df
+
+
+
+def generate_and_fill_excel(
+    daily_df,
+    monthly_df,
+    target_name,        # 写入的"预测标的"显示名
+    classification,     # 列表页-分类
+    model_framework,    # 列表页-模型框架
+    creator,            # 列表页-创建人
+    pred_date,          # 列表页-预测日期
+    frequency,          # 列表页-预测频度
+    output_path='update.xlsx'
+):
+    with pd.ExcelWriter(output_path, engine='xlsxwriter') as writer:
+        workbook = writer.book
+
+        # —— 计算三个汇总值 —— 
+        # 1) 测试值:最新月度的预测值
+        test_value = monthly_df[TARGET_COL].iloc[0]
+        # 2) 方向准确率:正确数 / 有效数
+        total = monthly_df['方向准确率'].notna().sum()
+        correct = (monthly_df['方向准确率'] == '正确').sum()
+        direction_accuracy = f"{correct/total:.2%}" if total > 0 else ""
+        # 3) 平均绝对偏差
+        absolute_deviation = monthly_df['绝对偏差'].mean()
+
+        # ========= 列表页 =========
+        ws_list = workbook.add_worksheet('列表页')
+        writer.sheets['列表页'] = ws_list
+
+        headers = ['预测标的','分类','模型框架','创建人','预测日期','测试值','预测频度','方向准确率','绝对偏差']
+        ws_list.write_row(0, 0, headers)
+        ws_list.write_row(1, 0, [
+            target_name,
+            classification,
+            model_framework,
+            creator,
+            pred_date,
+            test_value,
+            frequency,
+            direction_accuracy,
+            absolute_deviation
+        ])
+
+        # ========= 详情页 =========
+        detail_df = monthly_df[['Date', '实际值', TARGET_COL, '方向准确率', '绝对偏差']].copy()
+        detail_df.columns = ['指标日期','实际值','预测值','方向','偏差率']
+
+        detail_df.to_excel(writer,sheet_name='详情页',index=False,header=False,startrow=2)
+
+        ws_detail = writer.sheets['详情页']
+        ws_detail.write(0, 0, target_name)
+        ws_detail.write_row(1, 0, ['指标日期','实际值','预测值','方向','偏差率'])
+
+        # ========= 日度数据表 =========
+        daily_out = daily_df[['Date', '实际值', TARGET_COL]].copy()
+        daily_out.columns = ['指标日期','实际值','预测值']
+
+        daily_out.to_excel(writer,sheet_name='日度数据表',index=False,header=False,startrow=2)
+
+        ws_daily = writer.sheets['日度数据表']
+        ws_daily.write(0, 0, target_name)
+        ws_daily.write_row(1, 0, ['指标日期','实际值','预测值'])
+
+    print(f"已生成并填充 {output_path}")
+
+
+
+# ------------ 全量训练与预测 ------------
+def train_full_model_and_predict(X_train, y_train, X_test, y_test, X_future):
+    X_all = pd.concat([X_train, X_test])
+    y_all = pd.concat([y_train, y_test])
+    scaler_all = StandardScaler().fit(X_all)
+    X_all_s = scaler_all.transform(X_all)
+    X_fu_s = scaler_all.transform(X_future)
+
+    model = XGBRegressor(**DEFAULT_PARAMS, n_estimators=NUM_BOOST_ROUND)
+    model.fit(X_all_s, y_all)
+    y_fu_full = model.predict(X_fu_s)
+
+    return model, y_fu_full, scaler_all
+
+
+# ------------ 可视化 ------------
+def plot_final_predictions(train, y_tr, y_tr_pred, test, y_te, y_te_pred,
+                           future, last_day):
+    plt.figure(figsize=(15, 6))
+    plt.plot(train['Date'], y_tr, label='Train True')
+    plt.plot(train['Date'], y_tr_pred, label='Train Pred')
+    plt.plot(test['Date'], y_te, label='Test True', alpha=0.7)
+    plt.plot(test['Date'], y_te_pred, label='Test Pred')
+    plt.plot(future['Date'], future['预测值'], label='Future Pred')
+    plt.axvline(test['Date'].iloc[0], color='gray', linestyle='--')
+    plt.axvline(last_day, color='black', linestyle='--')
+    plt.legend()
+    plt.xlabel('Date')
+    plt.ylabel(TARGET_COL)
+    plt.title('Prediction Visualization')
+    plt.grid(True)
+    plt.show()
+
+
+# ------------ 主函数 ------------
+def main():
+    # 解析命令行参数
+    args = parse_arguments()
+    
+    # 更新全局变量
+    global NUM_BOOST_ROUND, USE_HYPERPARAM_TUNING, OUTPUT_PATH, DEFAULT_PARAMS
+    
+    NUM_BOOST_ROUND = args.num_boost_round
+    USE_HYPERPARAM_TUNING = args.use_hyperparam_tuning.lower() == 'true'
+    
+    # 根据前缀生成输出路径
+    if args.output_prefix:
+        OUTPUT_PATH = f"{args.output_prefix}_update.xlsx"
+    
+    # 更新XGBoost参数
+    DEFAULT_PARAMS = {
+        'objective': args.objective,
+        'learning_rate': args.learning_rate,
+        'max_depth': args.max_depth,
+        'min_child_weight': args.min_child_weight,
+        'gamma': args.gamma,
+        'subsample': args.subsample,
+        'colsample_bytree': args.colsample_bytree,
+        'eval_metric': args.eval_metric,
+        'seed': args.seed,
+        'reg_alpha': args.reg_alpha,
+        'reg_lambda': args.reg_lambda,
+    }
+    
+#    print("使用参数:")
+#    print(f"NUM_BOOST_ROUND: {NUM_BOOST_ROUND}")
+#    print(f"USE_HYPERPARAM_TUNING: {USE_HYPERPARAM_TUNING}")
+#    print(f"OUTPUT_PATH: {OUTPUT_PATH}")
+#    print("DEFAULT_PARAMS:", DEFAULT_PARAMS)
+    
+    df_daily, last_day = load_and_preprocess_data()
+
+    X_tr, y_tr, X_te, y_te, X_fu, train, test, future = split_and_build_features(df_daily, last_day)
+
+    scaler, X_tr_s, X_te_s, X_fu_s = scale_and_weight_features(X_tr, X_te, X_fu)
+
+    weights = detect_outliers_weights(X_tr_s)
+
+    model = train_model_with_tuning(X_tr_s, y_tr, X_te_s, y_te, weights, USE_HYPERPARAM_TUNING)
+
+    y_tr_pred, y_te_pred, y_fu_pred = evaluate_and_predict(model, scaler, X_tr, y_tr, X_te, y_te, X_fu, USE_HYPERPARAM_TUNING)
+
+    daily_df, monthly_df = merge_and_prepare_df(train, test, future, y_te_pred, y_fu_pred)
+
+#    print(monthly_df)
+#    print(daily_df)
+
+    generate_and_fill_excel(
+        daily_df,
+        monthly_df,
+        target_name= TARGET_NAME,          
+        classification= CLASSIFICATION,
+        model_framework= MODEL_FRAMEWORK,
+        creator= CREATOR,
+        pred_date= PRED_DATE,           
+        frequency= FREQUENCY,
+        output_path= OUTPUT_PATH     
+    )
+    
+    full_model, y_fu_full, scaler_full = train_full_model_and_predict(X_tr, y_tr, X_te, y_te, X_fu)
+
+    if ADJUST_FULL_PREDICTIONS:
+        offset = y_te.iloc[-1] - y_fu_full[0]
+        y_fu_full += offset
+
+    if SHOW_PLOTS:
+        plot_final_predictions(
+            train, y_tr, y_tr_pred, test, y_te, y_te_pred,
+            future.assign(预测值=y_fu_full), last_day)
+
+    return daily_df, monthly_df
+
+if __name__ == '__main__':
+    daily_df, monthly_df = main()

+ 0 - 94
WTI/Dcel.py

@@ -1,94 +0,0 @@
-def update_excel_data(input_data, excel_file_path, sheet_name, identifier):
-    """
-    将数据框更新到指定Excel文件的特定工作表中
-    
-    参数:
-        input_data (DataFrame): 要更新的数据,需包含三列(日期、实际值、预测值)
-        excel_file_path (str): Excel文件路径
-        sheet_name (str): 工作表名称
-        identifier (str): 标识符,用于定位更新位置
-        
-    返回:
-        bool: 更新是否成功
-    """
-    try:
-        import pandas as pd
-        from openpyxl import load_workbook
-        from openpyxl.utils import get_column_letter
-        import os
-        from datetime import datetime
-        
-        # 检查文件是否存在
-        if not os.path.exists(excel_file_path):
-            print(f"错误:文件 {excel_file_path} 不存在")
-            return False
-            
-        # 加载工作簿(不使用 data_only,这样我们可以访问公式)
-        wb = load_workbook(excel_file_path)
-
-        if sheet_name not in wb.sheetnames:
-            print(f"错误:工作表 {sheet_name} 不存在")
-            return False
-            
-        ws = wb[sheet_name]
-        
-        # 查找标识符位置
-        start_row = None
-        identifier_col = None
-        for row in range(1, ws.max_row + 1):
-            for col in range(1, ws.max_column + 1):
-                cell_value = ws.cell(row=row, column=col).value
-                if str(cell_value).strip() == str(identifier).strip():
-                    start_row = row + 2  # 假设数据从标识符下方两行开始
-                    identifier_col = col
-                    break
-            if start_row:
-                break
-                
-        if not start_row:
-            print(f"错误:未找到标识符 {identifier}")
-            return False
-            
-        # 确定写入的列
-        date_col = identifier_col  # 日期列在标识符下方
-        actual_col = date_col + 1  # 真实值列
-        pred_col = date_col + 2    # 预测值列
-        
-        # 只清除数据,不覆盖公式
-        last_row = ws.max_row
-        for row in range(start_row, last_row + 1):
-            for col in [date_col, actual_col, pred_col]:
-                cell = ws.cell(row=row, column=col)
-                if cell.value is not None:
-                    # 清除数据,但保留公式
-                    if not isinstance(cell.value, str) or not cell.value.startswith('='):
-                        cell.value = None
-        
-        # 写入新数据
-        for i, row_data in enumerate(input_data.values):
-            current_row = start_row + i
-            
-            # 处理日期格式
-            date_value = row_data[0]
-            if isinstance(date_value, (datetime, pd.Timestamp)):
-                formatted_date = date_value.strftime('%Y/%m/%d')
-            else:
-                # 如果已经是字符串,尝试解析并重新格式化
-                try:
-                    formatted_date = pd.to_datetime(date_value).strftime('%Y/%m/%d')
-                except:
-                    formatted_date = str(date_value)  # 如果无法解析,保持原样
-            
-            # 更新日期、实际值、预测值
-            ws.cell(row=current_row, column=date_col).value = formatted_date
-            ws.cell(row=current_row, column=actual_col).value = row_data[1]
-            ws.cell(row=current_row, column=pred_col).value = row_data[2]
-        
-        # 保存文件
-        wb.save(excel_file_path)
-        print(f"成功更新 {sheet_name} 中的数据")
-        return True
-        
-    except Exception as e:
-        print(f"更新数据时出错: {str(e)}")
-        return False

BIN
WTI/__pycache__/Dcel.cpython-312.pyc


BIN
WTI/__pycache__/Dtool.cpython-312.pyc


BIN
WTI/__pycache__/api.cpython-312.pyc


+ 24 - 25
WTI/1.1Rbob_api.py → WTI/api.py

@@ -89,47 +89,46 @@ def fetch_indicator_name(indicator_id):
             f"Failed to fetch data for ID {indicator_id}, status code: {response.status_code}")
         return None
 
-def main():
-    # List of indicator IDs you want to fetch
-    indicator_ids = [
-        "RBWTICKMc1",
-        "C2406121350446455",
-        'USGGBE02 Index', 
-        "Cinjcjc4 index",
-        'injcjc4 index',
-        'C2201059138_241106232710',
-        'C2406036178',
-        'C22411071623523660',
-        'C2312081670',
-        'REFOC-T-EIA_241114135248',
-        'C2304065621_241024124344',
-        'REFOC-T-EIA_241114135248',
-        'C22503031424010431'
-        ]  # Add more IDs as needed
-
+def fetch_data_by_indicators(indicator_ids, output_path=None):
+    """
+    根据提供的indicator IDs获取数据并返回DataFrame
+    
+    参数:
+    indicator_ids (list): 指标ID列表
+    output_path (str, optional): 如果提供,将结果保存为Excel文件的路径
+    
+    返回:
+    pandas.DataFrame: 包含所有指标数据的DataFrame
+    """
     # Dictionary to store DataFrames for each indicator
     data_frames = {}
 
     for indicator_id in indicator_ids:
         data = fetch_indicator_details(indicator_id)
         if data:
-            # Create a DataFrame with DataTime as index
             df = pd.DataFrame(data)
             df['DataTime'] = pd.to_datetime(df['DataTime'])
             df.set_index('DataTime', inplace=True)
             df.sort_index(inplace=True)
-            # Only keep the 'Value' column and rename it to the indicator ID
             df = df[['Value']].rename(columns={'Value': fetch_indicator_name(indicator_id)})
             data_frames[indicator_id] = df
 
     # Concatenate all DataFrames along the columns
     if data_frames:
         result_df = pd.concat(data_frames.values(), axis=1)
-        print(result_df.info())
-        result_df.to_excel("data_input/RBOB.xlsx")
-        print("Data saved successfully as 'RBOB.xlsx'")
-
+        
+        if output_path:
+            result_df.to_excel(output_path)
+            print(f"Data saved successfully as '{output_path}'")
+            
+        return result_df
+    return None
 
 
+'''
+# 示例用法
 if __name__ == "__main__":
-    main()
+    example_indicators = ["RBWTICKMc1", "C2406121350446455"]
+    df = fetch_data_by_indicators(example_indicators, "data_input/RBOB.xlsx")
+    print(df.info())
+'''

BIN
WTI/data_input/RBOB.xlsx


BIN
WTI/eta/1.WTI_update_data.xlsx


BIN
WTI/eta/RBOB_Daily.xlsx


BIN
WTI/eta/RBOB_Monthly.xlsx


+ 0 - 48
WTI/run_all.py

@@ -1,48 +0,0 @@
-import subprocess
-import sys
-import os
-
-# 要按顺序执行的脚本列表
-SCRIPTS_TO_RUN = [
-    "1.1Rbob_api.py",
-    "1.2Rbob.py"
-]
-
-def run_scripts():
-    """按顺序执行脚本,遇到错误就停止"""
-    for script in SCRIPTS_TO_RUN:
-        print(f"\n开始执行: {script}")
-        
-        # 获取脚本的完整路径
-        script_path = os.path.join(os.path.dirname(__file__), script)
-        
-        if not os.path.exists(script_path):
-            print(f"错误: 找不到脚本 {script_path}")
-            return 1
-            
-        try:
-            # 执行脚本并实时显示输出
-            process = subprocess.run(
-                [sys.executable, script_path],
-                check=True,  # 如果脚本返回非零状态码,会抛出异常
-                text=True
-            )
-        except subprocess.CalledProcessError as e:
-            print(f"\n执行 {script} 时出错:")
-            print(f"返回代码: {e.returncode}")
-            if e.stderr:
-                print("错误信息:")
-                print(e.stderr)
-            return 1
-        except Exception as e:
-            print(f"\n执行 {script} 时发生异常:")
-            print(str(e))
-            return 1
-            
-        print(f"成功执行: {script}")
-    
-    print("\n所有脚本执行完成")
-    return 0
-
-if __name__ == "__main__":
-    sys.exit(run_scripts())

BIN
WTI/update.xlsx


+ 10 - 0
model_metrics.json

@@ -0,0 +1,10 @@
+[
+    {
+        "train_mse": 0.6056,
+        "test_mse": 1.442,
+        "train_r2": 0.9912,
+        "test_r2": -0.1592,
+        "evaluation_date": "2025-05-13 15:11:24",
+        "model_type": "XGBoost without tuning"
+    }
+]

BIN
update.xlsx