data_processor.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457
  1. package ruizide
  2. import (
  3. "context"
  4. "encoding/json"
  5. "errors"
  6. "eta/eta_data_analysis/models"
  7. "eta/eta_data_analysis/utils"
  8. "fmt"
  9. "github.com/xuri/excelize/v2"
  10. "io"
  11. "log"
  12. "os"
  13. "path/filepath"
  14. "time"
  15. "github.com/chromedp/chromedp"
  16. )
  17. // 定义选择器
  18. var (
  19. excelDir = utils.RZD_EXCEL_PATH
  20. downloadDir = utils.RZD_DOWNLOAD_PATH
  21. rzdLoginPath = utils.RZD_LOGIN_PATH
  22. rzdBatchSize = 500
  23. tableNameList = []string{
  24. "Oil_Demand_Signals_Weekly_Report",
  25. "Oil_Supply_Analysis",
  26. "Supply_Revision_Analysis",
  27. "Oil_Demand_Analysis_Product_Detail",
  28. "Oil_Demand_Analysis_Region",
  29. "Oil_Demand_Analysis_Scenario",
  30. "Oil_Demand_Analysis_Continent",
  31. "Oil_Demand_Analysis_Country",
  32. "Oil_Demand_Analysis_Product_Category",
  33. "Oil_Demand_Analysis_Sector_Category",
  34. "Oil_Demand_Analysis_Sector_Detail",
  35. "Oil_Market_Cube_Upstream_Supply_Oil_Quality_API",
  36. "Oil_Market_Cube_Upstream_Supply_Oil_Quality_Sulphur",
  37. "Oil_Market_Cube_Upstream_Supply_Capacity_Capacity",
  38. "Oil_Market_Cube_Upstream_Supply_Production",
  39. "Oil_Market_Cube_Upstream_Supply_Production_Wo_Seasonality",
  40. "Oil_Market_Cube_Upstream_Supply_OPEC_Policy_Reference_Production",
  41. "Oil_Market_Cube_Upstream_Supply_OPEC_Policy_Target_Production",
  42. "Oil_Market_Cube_Upstream_Supply_OPEC_Policy_Target_Cut",
  43. "Oil_Market_Cube_Upstream_Supply_OPEC_Policy_Actual_Cut",
  44. "Oil_Market_Cube_Upstream_Supply_OPEC_Policy_Compliance",
  45. "Oil_Market_Cube_Upstream_Supply_OPEC_Policy_Production_Subject_To_Cut",
  46. "Oil_Market_Cube_Products_Demand_Products_Demand_Mean",
  47. "Oil_Market_Cube_Products_Demand_Products_Demand+Sigma",
  48. "Oil_Market_Cube_Products_Demand_Products_Demand-Sigma",
  49. "Oil_Market_Cube_Balances_Total_Liquids_Balances",
  50. "Oil_Market_Cube_Geography_Latitude",
  51. "Oil_Market_Cube_Geography_Longitude",
  52. }
  53. )
  54. // 处理数据下载的步骤
  55. func downloadData(ctx context.Context) error {
  56. // Analytics Library
  57. if err := chromedp.Run(ctx,
  58. chromedp.Sleep(5*time.Second), // 考虑移除这一行,如果不必要的话
  59. chromedp.Navigate(rzdLoginPath),
  60. chromedp.WaitVisible(`a.mat-tooltip-trigger.analytics.home__link-card-icon[href="/analytics"]`, chromedp.ByQuery), // 等待元素可见
  61. chromedp.Click(`a.mat-tooltip-trigger.analytics.home__link-card-icon[href="/analytics"]`, chromedp.ByQuery), // 点击链接
  62. chromedp.Sleep(5*time.Second),
  63. ); err != nil {
  64. return fmt.Errorf("下载 Analytics Library 数据错误: %v", err)
  65. }
  66. if err := chromedp.Run(ctx,
  67. chromedp.WaitVisible(`#intro-home-page-step1`, chromedp.ByID), // 等待 input 元素可见
  68. chromedp.SetValue(`#intro-home-page-step1`, "oil demand signals weekly report", chromedp.ByID), // 设置值
  69. chromedp.SendKeys(`#intro-home-page-step1`, "\u000D", chromedp.ByID), // 模拟按下回车键 (\u000D 是回车的 Unicode)
  70. chromedp.Sleep(5*time.Second),
  71. ); err != nil {
  72. return fmt.Errorf("设置 input 标签值时发生错误: %v", err)
  73. }
  74. if err := chromedp.Run(ctx,
  75. // 等待第一个 download-btns div 和下载按钮可见
  76. chromedp.WaitVisible(`//div[contains(@class, 'download-btns')][1]//ul//li//button[contains(@class, 'mat-tooltip-trigger') and contains(@class, 'download__excel') and normalize-space(text())='Data']`, chromedp.BySearch),
  77. // 点击第一个 div 中的 Excel 下载按钮
  78. chromedp.Click(`//div[contains(@class, 'download-btns')][1]//ul//li//button[contains(@class, 'mat-tooltip-trigger') and contains(@class, 'download__excel') and normalize-space(text())='Data']`, chromedp.BySearch),
  79. // 可选:等待下载完成,可以调整等待时间
  80. chromedp.Sleep(5*time.Second), // 根据下载时间设置
  81. ); err != nil {
  82. return fmt.Errorf("点击第一个下载按钮时发生错误: %v", err)
  83. }
  84. // 解析文件移动到目标目录
  85. if err := WaitAndRenameDownloadedFile("Oil_Demand_Signals_Weekly_Report_"+utils.GetCurrentYearMonth()+".xlsx", excelDir); err != nil {
  86. return err
  87. }
  88. return nil
  89. }
  90. // 等待下载文件并重命名
  91. func WaitAndRenameDownloadedFile(newFileName, targetDir string) error {
  92. // 等待一段时间以确保文件下载完成
  93. time.Sleep(100 * time.Second) // 可能需要根据实际情况调整
  94. // 查找下载目录中的文件
  95. files, err := filepath.Glob(filepath.Join(downloadDir, "*.xlsx"))
  96. if err != nil {
  97. return fmt.Errorf("查找文件时出错: %v", err)
  98. }
  99. // 如果没有找到文件,返回错误
  100. if len(files) == 0 {
  101. return fmt.Errorf("未找到任何下载的文件")
  102. }
  103. // 找到最新的文件
  104. var latestFile string
  105. var latestTime time.Time
  106. for _, file := range files {
  107. info, err := os.Stat(file)
  108. if err != nil {
  109. return fmt.Errorf("获取文件信息时出错: %v", err)
  110. }
  111. if info.ModTime().After(latestTime) {
  112. latestTime = info.ModTime()
  113. latestFile = file
  114. }
  115. }
  116. // 目标文件的完整路径
  117. targetFilePath := filepath.Join(targetDir, newFileName)
  118. // 重命名并移动到目标目录
  119. if latestFile != "" {
  120. if err := moveFile(latestFile, targetFilePath); err != nil {
  121. return fmt.Errorf("重命名文件时出错: %v", err)
  122. }
  123. // 打印重命名后的文件名
  124. utils.FileLog.Info("文件重命名并移动到: ", targetFilePath)
  125. fmt.Printf("文件重命名并移动到: %s\n", targetFilePath)
  126. }
  127. return nil
  128. }
  129. func moveFile(source, destination string) (err error) {
  130. // 复制文件
  131. srcFile, err := os.Open(source)
  132. if err != nil {
  133. return fmt.Errorf("打开源文件时出错: %v", err)
  134. }
  135. defer func() {
  136. _ = srcFile.Close()
  137. }()
  138. dstFile, err := os.Create(destination)
  139. if err != nil {
  140. return fmt.Errorf("创建目标文件时出错: %v", err)
  141. }
  142. defer func() {
  143. _ = dstFile.Close()
  144. }()
  145. if _, err = io.Copy(dstFile, srcFile); err != nil {
  146. return fmt.Errorf("复制文件时出错: %v", err)
  147. }
  148. time.Sleep(60 * time.Second)
  149. // 删除源文件
  150. /*if err := os.Remove(source); err != nil {
  151. return fmt.Errorf("删除源文件时出错: %v", err)
  152. }*/
  153. return nil
  154. }
  155. func createPath(paths []string) (err error) {
  156. for _, path := range paths {
  157. if path == "" {
  158. continue
  159. }
  160. _, err = os.Stat(path)
  161. if err != nil {
  162. if os.IsNotExist(err) {
  163. if err = os.MkdirAll(path, os.ModePerm); err != nil {
  164. fmt.Printf("睿咨得创建目录时出错: %v\n", err)
  165. utils.FileLog.Error("睿咨得创建目录时出错: %v", err)
  166. return
  167. }
  168. } else {
  169. return
  170. }
  171. }
  172. }
  173. return
  174. }
  175. // ResolverNet 解析网页数据,下载文件
  176. func ResolverNet(_ context.Context) (err error) {
  177. if err = createPath([]string{excelDir}); err != nil {
  178. return
  179. }
  180. // 创建 chromedp 执行上下文
  181. options := append(chromedp.DefaultExecAllocatorOptions[:3],
  182. //关闭无头模式
  183. chromedp.Flag("headless", false),
  184. chromedp.Flag("disable-blink-features", "AutomationControlled"),
  185. chromedp.UserAgent(`Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.80 Safari/537.36`),
  186. chromedp.Flag("safebrowsing.enabled", true), // 启用安全浏览
  187. )
  188. parentCtx, cancel := chromedp.NewExecAllocator(context.Background(), options...)
  189. ctx, ctxCancel := chromedp.NewContext(parentCtx)
  190. defer func() {
  191. cancel()
  192. ctxCancel()
  193. }()
  194. // 启动 Chrome 实例
  195. if err = chromedp.Run(ctx); err != nil {
  196. utils.FileLog.Error("睿咨得启动 Chrome 实例时出错:", err.Error())
  197. fmt.Printf("睿咨得启动 Chrome 实例时出错: %v\n", err)
  198. return
  199. }
  200. // 登录操作
  201. if err = login(ctx); err != nil {
  202. utils.FileLog.Error("睿咨得登录错误:", err.Error())
  203. fmt.Printf("睿咨得登录错误: %v\n", err)
  204. return
  205. }
  206. fmt.Printf("登录成功")
  207. // 下载数据
  208. if err = downloadData(ctx); err != nil {
  209. utils.FileLog.Error("睿咨得数据下载错误:", err.Error())
  210. fmt.Printf("睿咨得数据下载错误: %v\n", err)
  211. return
  212. }
  213. utils.FileLog.Info("睿咨得数据下载完成")
  214. // 解析表格 读取数据
  215. FileResolverV2()
  216. return
  217. }
  218. // 解析本地文件
  219. func FileResolver() error {
  220. for _, tableName := range tableNameList {
  221. var fileName string
  222. // 解析表格
  223. fileName = tableName + "_" + utils.GetCurrentYearMonth() + ".xlsx"
  224. filePath := filepath.Join(excelDir, fileName)
  225. if _, err := os.Stat(filePath); os.IsNotExist(err) {
  226. utils.FileLog.Error("文件不存在: %v", err)
  227. continue
  228. }
  229. // 打开 Excel 文件
  230. f, err := excelize.OpenFile(filePath)
  231. if err != nil {
  232. utils.FileLog.Error("无法打开 Excel 文件: %v", err)
  233. return err
  234. }
  235. // 获取所有工作表
  236. sheetNames := f.GetSheetList()
  237. for _, sheetName := range sheetNames {
  238. if tableName == "Oil_Demand_Signals_Weekly_Report" && sheetName == "Content" {
  239. utils.FileLog.Info(tableName+"跳过工作表:", sheetName)
  240. continue
  241. }
  242. utils.FileLog.Info("读取工作表: %s\n", sheetName)
  243. // 获取工作表的最大行数
  244. maxRow, err := f.GetRows(sheetName) // 直接获取所有行数据
  245. if err != nil {
  246. utils.FileLog.Error("获取工作表数据时出错: %v", err)
  247. continue
  248. }
  249. // 遍历行并打印内容
  250. var indexData []models.BaseFromRzdData
  251. for rowIndex, rowData := range maxRow {
  252. if tableName == "Oil_Supply_Analysis" && rowIndex == 0 {
  253. if rowData[0] == "Viz Date" && rowData[1] == "OilAndGasCategory" && rowData[2] == "supply_kbbld" {
  254. sheetName = "Chart1"
  255. }
  256. if rowData[0] == "Viz Date" && rowData[1] == "supply_kbbld" && rowData[2] == "Region" {
  257. sheetName = "Chart2"
  258. }
  259. if rowData[0] == "Viz Date" && rowData[1] == "CapacityDetail" && rowData[2] == "Capacity_kbbld" {
  260. sheetName = "Chart3"
  261. }
  262. if rowData[0] == "Viz Date" && rowData[1] == "Oil Classification Group" && rowData[2] == "supply_kbbld" {
  263. sheetName = "Chart4"
  264. }
  265. }
  266. processor, err := GetProcessor(tableName, sheetName)
  267. if err != nil {
  268. utils.FileLog.Error("无法获取处理器: %v", err)
  269. continue
  270. }
  271. baseFromRzdDataList, err := processor.Process(tableName, sheetName, rowIndex, rowData)
  272. if err != nil {
  273. utils.FileLog.Error("处理数据失败: %v", err)
  274. return err
  275. }
  276. fmt.Printf("处理数据成功: %v\n", baseFromRzdDataList)
  277. indexData = append(indexData, baseFromRzdDataList...)
  278. }
  279. fmt.Printf("读取工作表完成: %s\n", sheetName)
  280. utils.FileLog.Info("读取工作表完成:", sheetName)
  281. // 新增数据源指标数据
  282. if len(indexData) > 0 {
  283. for i := 0; i < len(indexData); i += rzdBatchSize {
  284. // 计算当前批次的结束索引
  285. end := i + rzdBatchSize
  286. if end > len(indexData) {
  287. end = len(indexData)
  288. }
  289. // 获取当前批次的数据
  290. batchData := indexData[i:end]
  291. // 转换成json
  292. marshal, err := json.Marshal(batchData)
  293. if err != nil {
  294. utils.FileLog.Error("json.Marshal err: %v", err)
  295. return err
  296. }
  297. // 发送 HTTP POST 请求
  298. _, err = utils.HttpPostRequest(utils.EDB_LIB_URL+utils.ADD_BATCH_RZD_DATA, string(marshal), "application/json")
  299. if err != nil {
  300. utils.FileLog.Error("postEdbLib err: %v", err)
  301. return err
  302. }
  303. }
  304. }
  305. // 新增指标库数据
  306. edbDataList := []models.EdbDataRzd{}
  307. for _, index := range indexData {
  308. // 补充 判断是否存在于指标库
  309. paramsLib := make(map[string]interface{})
  310. paramsLib["IndexCode"] = index.IndexCode
  311. paramsLib["Source"] = utils.DATA_SOURCE_RZD
  312. postEdbLib, err := httpRequestFill(paramsLib, utils.GET_RZD_EDB_INFO_BY_INDEX_CODE)
  313. if err != nil {
  314. // 有错误就不继续执行
  315. utils.FileLog.Error("postEdbLib err: %v", err)
  316. continue
  317. }
  318. var requestResponse models.RequestResponse[models.EdbInfo]
  319. err = json.Unmarshal(postEdbLib, &requestResponse)
  320. if err != nil {
  321. utils.FileLog.Error("postEdbLib err: %v", err)
  322. continue
  323. }
  324. if requestResponse.Data.EdbInfoId != 0 {
  325. edbDataRzd := models.EdbDataRzd{
  326. CreateTime: utils.GetCurrentTime(),
  327. ModifyTime: utils.GetCurrentTime(),
  328. EdbInfoId: index.BaseFromRzdIndexId,
  329. EdbCode: index.IndexCode,
  330. DataTime: index.DataTime,
  331. Value: index.Value,
  332. DataTimestamp: uint64(utils.StringToTimeFormat(index.DataTime, utils.FormatDate).UnixMilli()),
  333. }
  334. edbDataList = append(edbDataList, edbDataRzd)
  335. }
  336. }
  337. if len(edbDataList) > 0 {
  338. // 转换成json
  339. marshal, err := json.Marshal(edbDataList)
  340. if err != nil {
  341. utils.FileLog.Error("postEdbLib err: %v", err)
  342. return err
  343. }
  344. _, err = utils.HttpPostRequest(utils.EDB_LIB_URL+utils.ADD_BATCH_RZD_EDB_DATA, string(marshal), "application/json")
  345. if err != nil {
  346. // 有错误就不继续执行
  347. utils.FileLog.Error("postEdbLib err: %v", err)
  348. return err
  349. }
  350. }
  351. }
  352. }
  353. return nil
  354. }
  355. func login(ctx context.Context) error {
  356. if rzdLoginPath == "" {
  357. return errors.New("睿咨得登录页面地址未配置")
  358. }
  359. return chromedp.Run(ctx,
  360. chromedp.Navigate(rzdLoginPath),
  361. chromedp.WaitVisible(`body`, chromedp.ByQuery),
  362. chromedp.SetValue(`input[id="Username"]`, utils.RZD_USERNAME, chromedp.ByQuery),
  363. chromedp.SetValue(`input[id="Password"]`, utils.RZD_PASSWORD, chromedp.ByQuery),
  364. chromedp.WaitEnabled(`//button[text()='Login']`, chromedp.BySearch),
  365. chromedp.Sleep(5*time.Second),
  366. chromedp.Click(`//button[text()='Login']`, chromedp.BySearch),
  367. // 等待并点击登录后页面的链接
  368. /*chromedp.WaitVisible(`a[href="/home"]`, chromedp.ByQuery), // 等待 Analytics Library 链接可见
  369. chromedp.Sleep(5*time.Second), */ // 等待页面加载完成
  370. )
  371. }
  372. func httpRequestFill(data interface{}, urlMethod string) (postEdbLib []byte, err error) {
  373. // 转换成json
  374. marshal, err := json.Marshal(data)
  375. if err != nil {
  376. return nil, err
  377. }
  378. // json 转 interface
  379. var result map[string]interface{}
  380. err = json.Unmarshal(marshal, &result)
  381. if err != nil {
  382. return nil, err
  383. }
  384. postEdbLib, err = utils.PostEdbLibRequest(result, urlMethod)
  385. if err != nil {
  386. // 有错误就不继续执行
  387. log.Printf("postEdbLib err: %v", err)
  388. return nil, err
  389. }
  390. return postEdbLib, nil
  391. }
  392. // 解析本地文件
  393. func FileResolverV2() error {
  394. //获取rzd数据库分类数据
  395. rzdClassifyList, err := getRzdClassifyList()
  396. if err != nil {
  397. fmt.Printf("获取睿咨得分类数据失败: %v", err)
  398. utils.FileLog.Error("获取睿咨得分类数据失败:", err.Error())
  399. return err
  400. }
  401. RzdClassifyMap = make(map[string]*models.BaseFromRzdClassify)
  402. //更新睿咨得分类Map
  403. for _, classify := range rzdClassifyList {
  404. RzdClassifyMap[classify.ClassifyName] = classify
  405. }
  406. for _, tableName := range tableNameList {
  407. rzdProcessor, processorErr := GetRZDProcessor(tableName)
  408. if processorErr != nil {
  409. utils.FileLog.Error("获取睿咨得数据处理器:", processorErr.Error())
  410. continue
  411. }
  412. err = rzdProcessor.Process(tableName)
  413. if err != nil {
  414. utils.FileLog.Error(fmt.Sprintf("%s处理数据失败:", tableName), err.Error())
  415. continue
  416. }
  417. }
  418. return nil
  419. }