data_processor.go 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191
  1. package main
  2. import (
  3. "context"
  4. "eta/eta_data_analysis/utils"
  5. "fmt"
  6. "os"
  7. "path/filepath"
  8. "time"
  9. "github.com/chromedp/chromedp"
  10. )
  11. const downloadDir = "./downloads"
  12. // 定义选择器
  13. var (
  14. rzdLoginPath = "https://clients.rystadenergy.com/clients/"
  15. clientSearchLink = `a[href="/clients/search/"]`
  16. supplyRevisionAnalysisSelector = `div.ais-Hits li[contains(., 'Supply Revision Analysis')]`
  17. oilDemandAnalysisSelector = `div.ais-Hits li[contains(., 'Oil Demand Analysis')]`
  18. oilSupplyAnalysisSelector = `div.ais-Hits li[contains(., 'Oil Supply Analysis')]`
  19. dateSlicerInputSelector = `div.visual.customPadding.allow-deferred-rendering.visual-slicer input.date-slicer-input.enable-hover:first-of-type`
  20. downloadButtonSelector = `div.btn.btn-link.btn-sm.dashboard-action.dashboard-action--download-data`
  21. )
  22. // 函数用于设置查询时间范围
  23. func setQueryTime(ctx context.Context, year string) error {
  24. return chromedp.Run(ctx,
  25. chromedp.WaitVisible(dateSlicerInputSelector, chromedp.ByQuery),
  26. chromedp.SetValue(dateSlicerInputSelector, year, chromedp.ByQuery),
  27. chromedp.SendKeys(dateSlicerInputSelector, "\n"), // 回车查询
  28. )
  29. }
  30. // 函数用于点击下载按钮
  31. func clickDownload(ctx context.Context) error {
  32. return chromedp.Run(ctx, chromedp.Click(downloadButtonSelector, chromedp.ByQuery))
  33. }
  34. // 处理数据下载的步骤
  35. func downloadData(ctx context.Context) error {
  36. // Analytics Library
  37. if err := chromedp.Run(ctx,
  38. chromedp.WaitVisible(clientSearchLink, chromedp.ByQuery),
  39. chromedp.Click(clientSearchLink, chromedp.ByQuery),
  40. chromedp.SetValue(`input[class="ais-SearchBox-input rounded border py-2 px-3 shadow-sm font-size-14 w-100"]`, "oil demand signals weekly report", chromedp.ByQuery),
  41. chromedp.Click(`div.ais-InfiniteHits li:first-child img[src="/Static/img/icons/xls.png"]`, chromedp.ByQuery),
  42. ); err != nil {
  43. return err
  44. }
  45. // Cube Dashboards: Supply Revision Analysis
  46. if err := chromedp.Run(ctx,
  47. chromedp.WaitVisible(`a[href="/clients/subscription/"]`, chromedp.ByQuery),
  48. chromedp.Click(`a[href="/clients/subscription/"]`, chromedp.ByQuery),
  49. chromedp.Click(supplyRevisionAnalysisSelector, chromedp.ByQuery),
  50. ); err != nil {
  51. return err
  52. }
  53. if err := setQueryTime(ctx, "2020"); err != nil {
  54. return err
  55. }
  56. if err := clickDownload(ctx); err != nil {
  57. return err
  58. }
  59. if err := waitAndRenameDownloadedFile("Supply_Revision_Analysis_2020.xlsx"); err != nil {
  60. return err
  61. }
  62. // Oil Demand Analysis
  63. if err := chromedp.Run(ctx,
  64. chromedp.Click(`a[href="/clients/subscription/"]`, chromedp.ByQuery),
  65. chromedp.Click(oilDemandAnalysisSelector, chromedp.ByQuery),
  66. ); err != nil {
  67. return err
  68. }
  69. if err := setQueryTime(ctx, "2015"); err != nil {
  70. return err
  71. }
  72. if err := clickDownload(ctx); err != nil {
  73. return err
  74. }
  75. if err := waitAndRenameDownloadedFile("Oil_Demand_Analysis_2015.xlsx"); err != nil {
  76. return err
  77. }
  78. // Oil Supply Analysis
  79. if err := chromedp.Run(ctx,
  80. chromedp.Click(`a[href="/clients/subscription/"]`, chromedp.ByQuery),
  81. chromedp.Click(oilSupplyAnalysisSelector, chromedp.ByQuery),
  82. ); err != nil {
  83. return err
  84. }
  85. if err := setQueryTime(ctx, "2010"); err != nil {
  86. return err
  87. }
  88. if err := clickDownload(ctx); err != nil {
  89. return err
  90. }
  91. if err := waitAndRenameDownloadedFile("Oil_Supply_Analysis_2010.xlsx"); err != nil {
  92. return err
  93. }
  94. return nil
  95. }
  96. // 等待下载文件并重命名
  97. func waitAndRenameDownloadedFile(newFileName string) error {
  98. // 等待一段时间以确保文件下载完成
  99. time.Sleep(5 * time.Second) // 可能需要根据实际情况调整
  100. // 查找下载目录中的文件
  101. files, err := filepath.Glob(filepath.Join(downloadDir, "*.xlsx"))
  102. if err != nil {
  103. return fmt.Errorf("查找文件时出错: %v", err)
  104. }
  105. // 重命名最新的文件
  106. for _, file := range files {
  107. if err := os.Rename(file, filepath.Join(downloadDir, newFileName)); err != nil {
  108. return fmt.Errorf("重命名文件时出错: %v", err)
  109. }
  110. // 打印重命名后的文件名
  111. fmt.Printf("文件重命名为: %s\n", newFileName)
  112. break // 只重命名第一个找到的文件
  113. }
  114. return nil
  115. }
  116. func main() {
  117. // 创建下载目录
  118. if err := os.MkdirAll(downloadDir, os.ModePerm); err != nil {
  119. fmt.Printf("创建下载目录时出错: %v\n", err)
  120. return
  121. }
  122. // 创建 chromedp 执行上下文
  123. options := []chromedp.ExecAllocatorOption{
  124. chromedp.Flag("headless", false),
  125. chromedp.Flag("disable-blink-features", "AutomationControlled"),
  126. chromedp.UserAgent(`Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.80 Safari/537.36`),
  127. chromedp.Flag("download.default_directory", downloadDir),
  128. chromedp.Flag("download.prompt_for_download", false), // 不弹出下载对话框
  129. chromedp.Flag("safebrowsing.enabled", true), // 启用安全浏览
  130. }
  131. allocCtx, cancel := chromedp.NewExecAllocator(context.Background(), options...)
  132. defer cancel()
  133. ctx, cancel := chromedp.NewContext(allocCtx)
  134. defer cancel()
  135. // 启动 Chrome 实例
  136. if err := chromedp.Run(ctx); err != nil {
  137. fmt.Printf("启动 Chrome 实例时出错: %v\n", err)
  138. return
  139. }
  140. // 登录操作
  141. if err := login(ctx); err != nil {
  142. fmt.Printf("登录错误: %v\n", err)
  143. return
  144. }
  145. // 下载数据
  146. if err := downloadData(ctx); err != nil {
  147. fmt.Printf("数据下载错误: %v\n", err)
  148. return
  149. }
  150. fmt.Println("数据下载完成")
  151. }
  152. func login(ctx context.Context) error {
  153. return chromedp.Run(ctx,
  154. chromedp.Navigate(rzdLoginPath),
  155. chromedp.SetValue(`input[id="Username"]`, utils.LY_USERNAME, chromedp.ByQuery),
  156. chromedp.SetValue(`input[id="Username"]`, utils.LY_USERNAME, chromedp.ByQuery),
  157. chromedp.SetValue(`input[id="Password"]`, utils.LY_PASSWORD, chromedp.ByQuery),
  158. chromedp.WaitEnabled(`//button[text()='Login']`, chromedp.BySearch),
  159. chromedp.Click(`//button[text()='Login']`, chromedp.BySearch),
  160. chromedp.Sleep(5*time.Second),
  161. // 等待并点击登录后页面的链接
  162. chromedp.WaitVisible(`a[href="/clients/"]`, chromedp.ByQuery), // 等待 Analytics Library 链接可见
  163. chromedp.Sleep(5*time.Second), // 等待页面加载完成
  164. )
  165. }