edb_data_calculate_stl.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490
  1. package services
  2. import (
  3. "encoding/json"
  4. "eta/eta_index_lib/models"
  5. "eta/eta_index_lib/utils"
  6. "fmt"
  7. "os"
  8. "os/exec"
  9. "path/filepath"
  10. "strconv"
  11. "time"
  12. "github.com/tealeg/xlsx"
  13. )
  14. const (
  15. ALL_DATE = iota + 1
  16. LAST_N_YEARS
  17. RANGE_DATE
  18. RANGE_DATE_TO_NOW
  19. )
  20. type EdbStlConfig struct {
  21. EdbInfoId int `description:"指标ID"`
  22. CalculateStlConfigId int `description:"计算的STL配置ID"`
  23. DataRangeType int `description:"数据时间类型:1-全部时间,2-最近N年,3-区间设置,4-区间设置(至今)"`
  24. StartDate string `description:"开始日期"`
  25. EndDate string `description:"结束日期"`
  26. LastNYear int `description:"最近N年"`
  27. Period int `description:"数据的周期,根据频率设置"`
  28. Seasonal int `description:"季节性成分窗口大小,一般为period+1,可以设置为大于period的正奇数"`
  29. Trend int `description:"趋势成分窗口大小,一般为period+1,可以设置为大于period的正奇数"`
  30. Fraction float64 `description:"趋势项的平滑系数,默认为0.2,区间为[0-1]"`
  31. Robust bool `description:"是否使用稳健方法: true(使用) false(不使用) "`
  32. TrendDeg int `description:"分解中趋势多项式次数,默认为1,不超过5的正整数"`
  33. SeasonalDeg int `description:"分解中季节性多项次数,默认为1,不超过5的正整数"`
  34. LowPassDeg int `description:"分解中低通滤波器次数,默认为1,不超过5的正整数"`
  35. }
  36. type ChartEdbInfo struct {
  37. EdbInfoId int
  38. Title string
  39. Unit string
  40. Frequency string
  41. MaxData float64
  42. MinData float64
  43. ClassifyId int
  44. ClassifyPath string
  45. DataList []*EdbData
  46. }
  47. type EdbData struct {
  48. Value float64
  49. DataTime string
  50. DataTimestamp int64
  51. }
  52. func RefreshStlData(edbInfoId int) (msg string, err error) {
  53. calculateStl, err := models.GetEdbInfoCalculateMappingDetail(edbInfoId)
  54. if err != nil {
  55. return
  56. }
  57. fromEdbInfo, err := models.GetEdbInfoById(calculateStl.FromEdbInfoId)
  58. if err != nil {
  59. return
  60. }
  61. var stlConfig EdbStlConfig
  62. if err = json.Unmarshal([]byte(calculateStl.CalculateFormula), &stlConfig); err != nil {
  63. return
  64. }
  65. var condition string
  66. var pars []interface{}
  67. switch stlConfig.DataRangeType {
  68. case ALL_DATE:
  69. case LAST_N_YEARS:
  70. condition += " AND data_time >=?"
  71. year := time.Now().Year()
  72. lastDate := time.Date(year-stlConfig.LastNYear, 1, 1, 0, 0, 0, 0, time.Local)
  73. pars = append(pars, lastDate)
  74. case RANGE_DATE:
  75. condition = " AND data_time >=? AND data_time <=?"
  76. pars = append(pars, stlConfig.StartDate, stlConfig.EndDate)
  77. case RANGE_DATE_TO_NOW:
  78. condition = " AND data_time >=?"
  79. pars = append(pars, stlConfig.StartDate)
  80. }
  81. condition += " AND edb_code =?"
  82. pars = append(pars, fromEdbInfo.EdbCode)
  83. edbData, err := models.GetEdbDataByCondition(fromEdbInfo.Source, fromEdbInfo.SubSource, condition, pars)
  84. if err != nil {
  85. return
  86. }
  87. var condMsg string
  88. if stlConfig.Period < 2 || stlConfig.Period > len(edbData) {
  89. condMsg += "period必须是一个大于等于2的正整数,且必须小于时间序列的长度"
  90. }
  91. if stlConfig.Seasonal < 3 || stlConfig.Seasonal%2 == 0 || stlConfig.Seasonal <= stlConfig.Period {
  92. if condMsg != "" {
  93. condMsg += "\n"
  94. }
  95. condMsg += "seasonal必须是一个大于等于3的奇整数,且必须大于period"
  96. }
  97. if stlConfig.Trend < 3 || stlConfig.Trend%2 == 0 || stlConfig.Trend <= stlConfig.Period {
  98. if condMsg != "" {
  99. condMsg += "\n"
  100. }
  101. condMsg += "trend必须是一个大于等于3的奇整数,且必须大于period"
  102. }
  103. if stlConfig.Fraction < 0 || stlConfig.Fraction > 1 {
  104. if condMsg != "" {
  105. condMsg += "\n"
  106. }
  107. condMsg += "fraction必须是一个介于[0-1]之间"
  108. }
  109. if 1 > stlConfig.TrendDeg || stlConfig.TrendDeg > 5 {
  110. if condMsg != "" {
  111. condMsg += "\n"
  112. }
  113. condMsg += "trend_deg请设置成1-5的整数"
  114. }
  115. if 1 > stlConfig.SeasonalDeg || stlConfig.SeasonalDeg > 5 {
  116. if condMsg != "" {
  117. condMsg += "\n"
  118. }
  119. condMsg += "seasonal_deg请设置成1-5的整数"
  120. }
  121. if 1 > stlConfig.LowPassDeg || stlConfig.LowPassDeg > 5 {
  122. if condMsg != "" {
  123. condMsg += "\n"
  124. }
  125. condMsg += "low_pass_deg请设置成1-5的整数"
  126. }
  127. if condMsg != "" {
  128. msg = condMsg
  129. err = fmt.Errorf("参数错误")
  130. return
  131. }
  132. dir, _ := os.Executable()
  133. exPath := filepath.Dir(dir) + "/static/stl_tmp"
  134. err = CheckOsPathAndMake(exPath)
  135. if err != nil {
  136. msg = "计算失败"
  137. return
  138. }
  139. loadFilePath := exPath + "/" + strconv.Itoa(fromEdbInfo.SysUserId) + "_" + time.Now().Format(utils.FormatDateTimeUnSpace) + ".xlsx"
  140. err = SaveToExcel(edbData, loadFilePath)
  141. if err != nil {
  142. msg = "保存数据到Excel失败"
  143. return
  144. }
  145. defer os.Remove(loadFilePath)
  146. saveFilePath := exPath + "/" + strconv.Itoa(fromEdbInfo.SysUserId) + "_" + time.Now().Format(utils.FormatDateTimeUnSpace) + "_res" + ".xlsx"
  147. err = execStlPythonCode(loadFilePath, saveFilePath, stlConfig.Period, stlConfig.Seasonal, stlConfig.Trend, stlConfig.TrendDeg, stlConfig.SeasonalDeg, stlConfig.LowPassDeg, stlConfig.Fraction, stlConfig.Robust)
  148. if err != nil {
  149. msg = "执行Python代码失败"
  150. return
  151. }
  152. trendChart, seasonalChart, residualChart, err := ParseStlExcel(saveFilePath)
  153. if err != nil {
  154. msg = "解析Excel失败"
  155. return
  156. }
  157. defer os.Remove(saveFilePath)
  158. edbInfo, err := models.GetEdbInfoById(edbInfoId)
  159. if err != nil {
  160. msg = "获取指标信息失败"
  161. return
  162. }
  163. err = SyncUpdateRelationEdbInfo(edbInfo, stlConfig, trendChart, seasonalChart, residualChart)
  164. if err != nil {
  165. msg = "更新关联指标失败"
  166. return
  167. }
  168. return
  169. }
  170. func SyncUpdateRelationEdbInfo(edbInfo *models.EdbInfo, config EdbStlConfig, trendData, seasonalData, residualData ChartEdbInfo) (err error) {
  171. configId, err := models.GetCalculateStlConfigMappingIdByEdbInfoId(edbInfo.EdbInfoId)
  172. if err != nil {
  173. return
  174. }
  175. mappingList, err := models.GetCalculateStlConfigMappingByConfigId(configId)
  176. if err != nil {
  177. return
  178. }
  179. for _, v := range mappingList {
  180. edbInfo, er := models.GetEdbInfoById(v.EdbInfoId)
  181. if er != nil {
  182. continue
  183. }
  184. switch v.StlEdbType {
  185. case 1:
  186. // 趋势指标
  187. er = UpdateStlEdbData(edbInfo, config, edbInfo.EdbCode, trendData)
  188. case 2:
  189. // 季节性指标
  190. er = UpdateStlEdbData(edbInfo, config, edbInfo.EdbCode, seasonalData)
  191. case 3:
  192. // 残差指标
  193. er = UpdateStlEdbData(edbInfo, config, edbInfo.EdbCode, residualData)
  194. default:
  195. utils.FileLog.Info("未知的stlEdbType类型, mapping:%v", v)
  196. continue
  197. }
  198. if er != nil {
  199. utils.FileLog.Error("更新指标数据失败, edbInfoId:%v, err:%v", v.EdbInfoId, er)
  200. err = er
  201. continue
  202. }
  203. }
  204. // 同步更新计算配置
  205. newStlConf := &models.CalculateStlConfig{
  206. CalculateStlConfigId: configId,
  207. Config: edbInfo.CalculateFormula,
  208. ModifyTime: time.Now(),
  209. }
  210. err = newStlConf.Update([]string{"config", "modify_time"})
  211. return
  212. }
  213. func UpdateStlEdbData(edbInfo *models.EdbInfo, config EdbStlConfig, edbCode string, edbData ChartEdbInfo) (err error) {
  214. var dataList []*models.EdbDataCalculateStl
  215. for _, v := range edbData.DataList {
  216. dataTime, _ := time.Parse(utils.FormatDate, v.DataTime)
  217. dataList = append(dataList, &models.EdbDataCalculateStl{
  218. EdbInfoId: edbData.EdbInfoId,
  219. EdbCode: edbCode,
  220. DataTime: dataTime,
  221. Value: v.Value,
  222. CreateTime: time.Now(),
  223. ModifyTime: time.Now(),
  224. DataTimestamp: dataTime.UnixMilli(),
  225. })
  226. }
  227. err = models.DeleteAndInsertEdbDataCalculateStl(edbCode, dataList)
  228. if err != nil {
  229. return
  230. }
  231. models.ModifyEdbInfoDataStatus(int64(edbInfo.EdbInfoId), edbInfo.Source, edbInfo.SubSource, edbInfo.EdbCode)
  232. maxAndMinItem, _ := models.GetEdbInfoMaxAndMinInfo(edbInfo.Source, edbInfo.SubSource, edbInfo.EdbCode)
  233. if maxAndMinItem != nil {
  234. err = models.ModifyEdbInfoMaxAndMinInfo(edbInfo.EdbInfoId, maxAndMinItem)
  235. if err != nil {
  236. return
  237. }
  238. }
  239. bconfig, _ := json.Marshal(config)
  240. edbInfo.CalculateFormula = string(bconfig)
  241. edbInfo.ModifyTime = time.Now()
  242. err = edbInfo.Update([]string{"calculate_formula", "modify_time"})
  243. if err != nil {
  244. return
  245. }
  246. return
  247. }
  248. func CheckOsPathAndMake(path string) (err error) {
  249. if _, er := os.Stat(path); os.IsNotExist(er) {
  250. err = os.MkdirAll(path, os.ModePerm)
  251. }
  252. return
  253. }
  254. func SaveToExcel(data []*models.EdbInfoSearchData, filePath string) (err error) {
  255. xlsxFile := xlsx.NewFile()
  256. sheetNew, err := xlsxFile.AddSheet("Tmp")
  257. if err != nil {
  258. return
  259. }
  260. titleRow := sheetNew.AddRow()
  261. titleRow.AddCell().SetString("日期")
  262. titleRow.AddCell().SetString("值")
  263. for i, d := range data {
  264. row := sheetNew.Row(i + 1)
  265. row.AddCell().SetString(d.DataTime)
  266. row.AddCell().SetFloat(d.Value)
  267. }
  268. err = xlsxFile.Save(filePath)
  269. if err != nil {
  270. return
  271. }
  272. return
  273. }
  274. func ParseStlExcel(excelPath string) (TrendChart, SeasonalChart, ResidualChart ChartEdbInfo, err error) {
  275. file, err := xlsx.OpenFile(excelPath)
  276. if err != nil {
  277. return
  278. }
  279. for _, sheet := range file.Sheets {
  280. switch sheet.Name {
  281. case "季节":
  282. var MinData, MaxData float64
  283. for i, row := range sheet.Rows {
  284. if i == 0 {
  285. continue
  286. }
  287. var date string
  288. var dataTimestamp int64
  289. if row.Cells[0].Type() == xlsx.CellTypeNumeric {
  290. dataNum, _ := strconv.ParseFloat(row.Cells[0].Value, 64)
  291. tmpTime := xlsx.TimeFromExcelTime(dataNum, false)
  292. date = tmpTime.Format(utils.FormatDate)
  293. dataTimestamp = tmpTime.UnixMilli()
  294. } else {
  295. timeDate, _ := time.Parse(utils.FormatDateTime, date)
  296. date = timeDate.Format(utils.FormatDate)
  297. dataTimestamp = timeDate.UnixMilli()
  298. }
  299. fv, _ := row.Cells[1].Float()
  300. if MinData == 0 || fv < MinData {
  301. MinData = fv
  302. }
  303. if MaxData == 0 || fv > MaxData {
  304. MaxData = fv
  305. }
  306. SeasonalChart.DataList = append(SeasonalChart.DataList, &EdbData{DataTime: date, Value: fv, DataTimestamp: dataTimestamp})
  307. }
  308. SeasonalChart.MinData = MinData
  309. SeasonalChart.MaxData = MaxData
  310. case "趋势":
  311. var MinData, MaxData float64
  312. for i, row := range sheet.Rows {
  313. if i == 0 {
  314. continue
  315. }
  316. var date string
  317. var dataTimestamp int64
  318. if row.Cells[0].Type() == xlsx.CellTypeNumeric {
  319. dataNum, _ := strconv.ParseFloat(row.Cells[0].Value, 64)
  320. tmpTime := xlsx.TimeFromExcelTime(dataNum, false)
  321. date = tmpTime.Format(utils.FormatDate)
  322. dataTimestamp = tmpTime.UnixMilli()
  323. } else {
  324. timeDate, _ := time.Parse(utils.FormatDateTime, date)
  325. date = timeDate.Format(utils.FormatDate)
  326. dataTimestamp = timeDate.UnixMilli()
  327. }
  328. fv, _ := row.Cells[1].Float()
  329. if MinData == 0 || fv < MinData {
  330. MinData = fv
  331. }
  332. if MaxData == 0 || fv > MaxData {
  333. MaxData = fv
  334. }
  335. TrendChart.DataList = append(TrendChart.DataList, &EdbData{DataTime: date, Value: fv, DataTimestamp: dataTimestamp})
  336. }
  337. TrendChart.MaxData = MaxData
  338. TrendChart.MinData = MinData
  339. case "残差":
  340. var MinData, MaxData float64
  341. for i, row := range sheet.Rows {
  342. if i == 0 {
  343. continue
  344. }
  345. var date string
  346. var dataTimestamp int64
  347. if row.Cells[0].Type() == xlsx.CellTypeNumeric {
  348. dataNum, _ := strconv.ParseFloat(row.Cells[0].Value, 64)
  349. tmpTime := xlsx.TimeFromExcelTime(dataNum, false)
  350. date = tmpTime.Format(utils.FormatDate)
  351. dataTimestamp = tmpTime.UnixMilli()
  352. } else {
  353. timeDate, _ := time.Parse(utils.FormatDateTime, date)
  354. date = timeDate.Format(utils.FormatDate)
  355. dataTimestamp = timeDate.UnixMilli()
  356. }
  357. fv, _ := row.Cells[1].Float()
  358. if MinData == 0 || fv < MinData {
  359. MinData = fv
  360. }
  361. if MaxData == 0 || fv > MaxData {
  362. MaxData = fv
  363. }
  364. ResidualChart.DataList = append(ResidualChart.DataList, &EdbData{DataTime: date, Value: fv, DataTimestamp: dataTimestamp})
  365. }
  366. ResidualChart.MaxData = MaxData
  367. ResidualChart.MinData = MinData
  368. }
  369. }
  370. return
  371. }
  372. func execStlPythonCode(path, toPath string, period, seasonal, trend, trendDeg, seasonalDeg, lowPassDeg int, fraction float64, robust bool) (err error) {
  373. pythonCode := `
  374. import pandas as pd
  375. from statsmodels.tsa.seasonal import STL
  376. from statsmodels.nonparametric.smoothers_lowess import lowess
  377. from statsmodels.tsa.stattools import adfuller
  378. from statsmodels.stats.diagnostic import acorr_ljungbox
  379. import numpy as np
  380. import json
  381. import warnings
  382. warnings.filterwarnings('ignore')
  383. file_path = r"%s"
  384. df = pd.read_excel(file_path, parse_dates=['日期'])
  385. df.set_index('日期', inplace=True)
  386. period = %d
  387. seasonal = %d
  388. trend = %d
  389. fraction = %g
  390. seasonal_deg = %d
  391. trend_deg = %d
  392. low_pass_deg = %d
  393. robust = %s
  394. stl = STL(
  395. df['值'],
  396. period=period,
  397. seasonal=seasonal,
  398. trend=trend,
  399. low_pass=None,
  400. seasonal_deg=seasonal_deg,
  401. trend_deg=trend_deg,
  402. low_pass_deg=low_pass_deg,
  403. seasonal_jump=1,
  404. trend_jump=1,
  405. low_pass_jump=1,
  406. robust=robust
  407. )
  408. result = stl.fit()
  409. smoothed = lowess(df['值'], df.index, frac=fraction)
  410. trend_lowess = smoothed[:, 1]
  411. # 季节图
  412. seasonal_component = result.seasonal
  413. # 趋势图
  414. trend_lowess_series = pd.Series(trend_lowess, index=df.index)
  415. # 残差图
  416. residual_component = df['值'] - trend_lowess - seasonal_component
  417. # 计算打印残差的均值
  418. residual_mean = np.mean(residual_component)
  419. # 计算打印残差的方差
  420. residual_var = np.std(residual_component)
  421. # 计算打印残差的ADF检验结果, 输出p-value
  422. adf_result = adfuller(residual_component)
  423. # 根据p-value判断是否平稳
  424. lb_test = acorr_ljungbox(residual_component, lags=period, return_df=True)
  425. output_file = r"%s"
  426. with pd.ExcelWriter(output_file) as writer:
  427. # 保存季节图
  428. pd.Series(seasonal_component, index=df.index, name='值').to_frame().reset_index().rename(columns={'index': '日期'}).to_excel(writer, sheet_name='季节', index=False)
  429. # 保存趋势图
  430. trend_lowess_series.to_frame(name='值').reset_index().rename(columns={'index': '日期'}).to_excel(writer, sheet_name='趋势', index=False)
  431. # 保存残差图
  432. pd.Series(residual_component, index=df.index, name='值').to_frame().reset_index().rename(columns={'index': '日期'}).to_excel(writer, sheet_name='残差', index=False)
  433. output = json.dumps({
  434. 'residual_mean': residual_mean,
  435. 'residual_var': residual_var,
  436. 'adf_p_value': adf_result[1],
  437. 'lb_test_p_value': lb_test['lb_pvalue'].values[0],
  438. 'lb_test_stat': lb_test['lb_stat'].values[0]
  439. })
  440. print(output)
  441. `
  442. robustStr := "True"
  443. if !robust {
  444. robustStr = "False"
  445. }
  446. pythonCode = fmt.Sprintf(pythonCode, path, period, seasonal, trend, fraction, seasonalDeg, trendDeg, lowPassDeg, robustStr, toPath)
  447. // cmd := exec.Command(`python3`, "-c", pythonCode)
  448. cmd := exec.Command(`D:\conda\envs\py311\python`, "-c", pythonCode)
  449. _, err = cmd.CombinedOutput()
  450. if err != nil {
  451. return
  452. }
  453. defer cmd.Process.Kill()
  454. return
  455. }