123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344 |
- package main
- import (
- "context"
- "eta/eta_data_analysis/utils"
- "fmt"
- "log"
- "os"
- "path/filepath"
- "strings"
- "time"
- "github.com/chromedp/chromedp"
- )
- const downloadDir = "./downloads"
- // 定义选择器
- var (
- rzdLoginPath = "https://clients.rystadenergy.com/clients/"
- clientSearchLink = `div.d-none.d-lg-flex.flex-grow-1 a[href="/clients/search/"]`
- clientsCubeDashboardsLink = `div.d-none.d-lg-flex.flex-grow-1 a[href="/clients/cube-dashboards/"]`
- supplyRevisionAnalysisSelector = `div.ais-Hits li h5:contains("Supply Revision Analysis")`
- oilDemandAnalysisSelector = `div.ais-Hits li[contains(., 'Oil Demand Analysis')]`
- oilSupplyAnalysisSelector = `div.ais-Hits li[contains(., 'Oil Supply Analysis')]`
- dateSlicerInputSelector = `div.visualContainer.unselectable.readMode.hideBorder.visualHeaderBelow.droppableElement.ui-droppable div.date-slicer-control input.date-slicer-input.enable-hover`
- downloadButtonSelector = `div.btn.btn-link.btn-sm.dashboard-action.dashboard-action--download-data`
- oilDemandIframeSelector = `div#WithPollingInFrame iframe` // 根据实际 iframe 的选择器更新
- tabSelectorBase = `h3.preTextWithEllipsis` // H3 标签中每个标签的基础选择器
- continentTabSelector = tabSelectorBase + `:contains("Continent")`
- regionTabSelector = tabSelectorBase + `:contains("Region")`
- countryTabSelector = tabSelectorBase + `:contains("Country")`
- productCategoryTabSelector = tabSelectorBase + `:contains("Product category")`
- productDetailTabSelector = tabSelectorBase + `:contains("Product detail")`
- sectorCategoryTabSelector = tabSelectorBase + `:contains("Sector category")`
- sectorDetailTabSelector = tabSelectorBase + `:contains("Sector detail")`
- scenarioTabSelector = tabSelectorBase + `:contains("Scenario")`
- )
- // 函数用于设置查询时间范围
- func setQueryTime(ctx context.Context, year string) error {
- // 在这里可以直接操作 iframe 中的元素
- var inputCount int
- if err := chromedp.Run(ctx,
- chromedp.WaitVisible(`#reportContainer`, chromedp.ByQuery),
- // 获取 reportContainer 下的第一个 iframe 的内容文档
- chromedp.ActionFunc(func(ctx context.Context) error {
- // 获取 iframe 的内容文档
- var iframeSrc string
- // 获取 iframe 的 src
- err := chromedp.Evaluate(`document.querySelector('#reportContainer iframe').src`, &iframeSrc).Do(ctx)
- if err != nil {
- return fmt.Errorf("获取 iframe ID 或 src 失败: %v", err)
- }
- // 在 iframe 的上下文中操作
- return chromedp.Run(ctx,
- // 等待 iframe 可见
- chromedp.WaitVisible(`iframe[src="`+iframeSrc+`"]`, chromedp.ByQuery),
- chromedp.Sleep(5*time.Second),
- // 在 iframe 中执行操作
- chromedp.ActionFunc(func(ctx context.Context) error {
- // 选择器
- selector := `div.landingContainer`
- // 获取元素数量
- if err := chromedp.Evaluate(`document.querySelectorAll("`+selector+`").length`, &inputCount).Do(ctx); err != nil {
- return fmt.Errorf("检查输入框失败: %v", err)
- }
- if inputCount == 0 {
- return fmt.Errorf("没有找到匹配的 div.landingContainer 标签")
- }
- return nil
- }),
- )
- }),
- ); err != nil {
- log.Fatal(err)
- }
- /*return chromedp.Run(ctx,
- chromedp.Sleep(3*time.Second),
- chromedp.WaitVisible(dateSlicerInputSelector, chromedp.ByQuery),
- chromedp.SetValue(dateSlicerInputSelector, year, chromedp.ByQuery),
- chromedp.SendKeys(dateSlicerInputSelector, "\n"), // 回车查询
- )*/
- return nil
- }
- // 函数用于点击下载按钮
- func clickDownload(ctx context.Context) error {
- return chromedp.Run(ctx, chromedp.Click(downloadButtonSelector, chromedp.ByQuery))
- }
- // 处理数据下载的步骤
- func downloadData(ctx context.Context) error {
- // Analytics Library
- if err := chromedp.Run(ctx,
- chromedp.Sleep(5*time.Second), // 考虑移除这一行,如果不必要的话
- chromedp.Navigate(rzdLoginPath),
- chromedp.WaitVisible(`div.d-none.d-lg-flex.flex-grow-1`, chromedp.ByQuery),
- chromedp.Click(clientSearchLink, chromedp.ByQuery),
- chromedp.WaitVisible(`input[class="ais-SearchBox-input rounded border py-2 px-3 shadow-sm font-size-14 w-100"]`, chromedp.ByQuery),
- chromedp.SetValue(`input[class="ais-SearchBox-input rounded border py-2 px-3 shadow-sm font-size-14 w-100"]`, "oil demand signals weekly report", chromedp.ByQuery),
- chromedp.Click(`div.ais-InfiniteHits img[src="/Static/img/icons/xls.png"]`, chromedp.ByQuery),
- ); err != nil {
- return fmt.Errorf("下载 Analytics Library 数据错误: %v", err)
- }
- // Cube Dashboards: Supply Revision Analysis
- if err := chromedp.Run(ctx,
- chromedp.WaitVisible(`div.d-none.d-lg-flex.flex-grow-1`, chromedp.ByQuery),
- chromedp.Click(clientsCubeDashboardsLink, chromedp.ByQuery),
- chromedp.Sleep(5*time.Second),
- chromedp.WaitVisible(`div.ais-Hits`, chromedp.ByQuery),
- chromedp.ActionFunc(func(ctx context.Context) error {
- var elements []string
- // 获取所有 h5 标签的文本内容
- if err := chromedp.Evaluate(`Array.from(document.querySelectorAll('div.ais-Hits li h5.text-body.overflow-hidden.mb-1.mr-3.font-weight-bold.line-height-1.dashboards-hit__name')).map(h => h.textContent)`, &elements).Do(ctx); err != nil {
- return err
- }
- // 遍历文本,查找完全匹配的元素并点击
- for i, text := range elements {
- if strings.Contains(text, "Supply Revision Analysis") {
- // 构造选择器,点击找到的匹配元素
- selector := fmt.Sprintf(`div.ais-Hits ol li:nth-child(%d) h5.text-body.overflow-hidden.mb-1.mr-3.font-weight-bold.line-height-1.dashboards-hit__name`, i+2)
- if err := chromedp.Click(selector, chromedp.ByQuery).Do(ctx); err != nil {
- return fmt.Errorf("点击 'Supply Revision Analysis' 失败: %v", err)
- }
- break // 找到后跳出循环
- }
- }
- return nil
- }),
- ); err != nil {
- return err
- }
- if err := setQueryTime(ctx, "2020"); err != nil {
- return err
- }
- if err := clickDownload(ctx); err != nil {
- return err
- }
- if err := waitAndRenameDownloadedFile("Supply_Revision_Analysis_2020.xlsx"); err != nil {
- return err
- }
- // Oil Demand Analysis
- if err := downloadOilDemandAnalysis(ctx); err != nil {
- return fmt.Errorf("下载 Oil Demand Analysis 错误: %v", err)
- }
- // Oil Supply Analysis
- if err := chromedp.Run(ctx,
- chromedp.Click(`a[href="/clients/subscription/"]`, chromedp.ByQuery),
- chromedp.Click(oilSupplyAnalysisSelector, chromedp.ByQuery),
- ); err != nil {
- return err
- }
- if err := setQueryTime(ctx, "2010"); err != nil {
- return err
- }
- if err := clickDownload(ctx); err != nil {
- return err
- }
- if err := waitAndRenameDownloadedFile("Oil_Supply_Analysis_2010.xlsx"); err != nil {
- return err
- }
- return nil
- }
- // 下载 Oil Demand Analysis 的所有标签数据
- func downloadOilDemandAnalysis(ctx context.Context) error {
- // 下载 "Continent" 标签的数据
- if err := downloadOilDemandByTab(ctx, continentTabSelector, "2015", "Oil_Demand_Continent_2015.xlsx"); err != nil {
- return err
- }
- // 下载 "Region" 标签的数据
- if err := downloadOilDemandByTab(ctx, regionTabSelector, "2015", "Oil_Demand_Region_2015.xlsx"); err != nil {
- return err
- }
- // 下载 "Country" 标签的数据
- if err := downloadOilDemandByTab(ctx, countryTabSelector, "2015", "Oil_Demand_Country_2015.xlsx"); err != nil {
- return err
- }
- // 下载 "Product_Category" 标签的数据
- if err := downloadOilDemandByTab(ctx, productCategoryTabSelector, "2015", "Oil_Demand_Product_Category_2015.xlsx"); err != nil {
- return err
- }
- // 下载 "Product_Detail" 标签的数据
- if err := downloadOilDemandByTab(ctx, productDetailTabSelector, "2015", "Oil_Demand_Product_Detail_2015.xlsx"); err != nil {
- return err
- }
- // 下载 "Sector_Category" 标签的数据
- if err := downloadOilDemandByTab(ctx, sectorCategoryTabSelector, "2015", "Oil_Demand_Sector_Category_2015.xlsx"); err != nil {
- return err
- }
- // 下载 "Sector_Detail" 标签的数据
- if err := downloadOilDemandByTab(ctx, sectorDetailTabSelector, "2015", "Oil_Demand_Sector_Detail_2015.xlsx"); err != nil {
- return err
- }
- // 下载 "Scenario" 标签的数据
- if err := downloadOilDemandByTab(ctx, scenarioTabSelector, "2015", "Oil_Demand_Scenario_2015.xlsx"); err != nil {
- return err
- }
- return nil
- }
- // 函数用于处理不同标签的下载
- func downloadOilDemandByTab(ctx context.Context, tabSelector string, year string, fileName string) error {
- // 切换到 iframe 并在 iframe 内进行操作
- if err := chromedp.Run(ctx,
- chromedp.WaitVisible(oilDemandIframeSelector, chromedp.ByQuery), // 等待 iframe 可见
- chromedp.ActionFunc(func(ctx context.Context) error {
- // 点击指定的标签
- if err := chromedp.Click(tabSelector, chromedp.ByQuery).Do(ctx); err != nil {
- return fmt.Errorf("点击标签失败: %v", err)
- }
- // 等待页面加载完成
- if err := chromedp.Sleep(2 * time.Second).Do(ctx); err != nil {
- return fmt.Errorf("等待页面加载失败: %v", err)
- }
- // 设置时间范围
- if err := setQueryTime(ctx, year); err != nil {
- return fmt.Errorf("设置查询时间失败: %v", err)
- }
- // 点击下载按钮
- if err := clickDownload(ctx); err != nil {
- return fmt.Errorf("点击下载按钮失败: %v", err)
- }
- return nil
- }),
- ); err != nil {
- return fmt.Errorf("操作失败: %v", err)
- }
- // 下载完成后,重命名文件
- if err := waitAndRenameDownloadedFile(fileName); err != nil {
- return fmt.Errorf("重命名文件失败: %v", err)
- }
- return nil
- }
- // 等待下载文件并重命名
- func waitAndRenameDownloadedFile(newFileName string) error {
- // 等待一段时间以确保文件下载完成
- time.Sleep(5 * time.Second) // 可能需要根据实际情况调整
- // 查找下载目录中的文件
- files, err := filepath.Glob(filepath.Join(downloadDir, "*.xlsx"))
- if err != nil {
- return fmt.Errorf("查找文件时出错: %v", err)
- }
- // 重命名最新的文件
- for _, file := range files {
- if err := os.Rename(file, filepath.Join(downloadDir, newFileName)); err != nil {
- return fmt.Errorf("重命名文件时出错: %v", err)
- }
- // 打印重命名后的文件名
- fmt.Printf("文件重命名为: %s\n", newFileName)
- break // 只重命名第一个找到的文件
- }
- return nil
- }
- func main() {
- // 创建下载目录
- if err := os.MkdirAll(downloadDir, os.ModePerm); err != nil {
- fmt.Printf("创建下载目录时出错: %v\n", err)
- return
- }
- // 创建 chromedp 执行上下文
- options := []chromedp.ExecAllocatorOption{
- chromedp.Flag("headless", false),
- chromedp.Flag("disable-blink-features", "AutomationControlled"),
- chromedp.UserAgent(`Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.80 Safari/537.36`),
- chromedp.Flag("download.default_directory", downloadDir),
- chromedp.Flag("download.prompt_for_download", false), // 不弹出下载对话框
- chromedp.Flag("safebrowsing.enabled", true), // 启用安全浏览
- }
- allocCtx, cancel := chromedp.NewExecAllocator(context.Background(), options...)
- defer cancel()
- ctx, cancel := chromedp.NewContext(allocCtx)
- defer cancel()
- // 启动 Chrome 实例
- if err := chromedp.Run(ctx); err != nil {
- fmt.Printf("启动 Chrome 实例时出错: %v\n", err)
- return
- }
- // 登录操作
- if err := login(ctx); err != nil {
- fmt.Printf("登录错误: %v\n", err)
- return
- }
- // 下载数据
- if err := downloadData(ctx); err != nil {
- fmt.Printf("数据下载错误: %v\n", err)
- return
- }
- fmt.Println("数据下载完成")
- }
- func login(ctx context.Context) error {
- return chromedp.Run(ctx,
- chromedp.Navigate(rzdLoginPath),
- chromedp.SetValue(`input[id="Username"]`, utils.RZD_USERNAME, chromedp.ByQuery),
- chromedp.SetValue(`input[id="Password"]`, utils.RZD_PASSWORD, chromedp.ByQuery),
- chromedp.WaitEnabled(`//button[text()='Login']`, chromedp.BySearch),
- chromedp.Click(`//button[text()='Login']`, chromedp.BySearch),
- chromedp.Sleep(5*time.Second),
- // 等待并点击登录后页面的链接
- chromedp.WaitVisible(`a[href="/clients/"]`, chromedp.ByQuery), // 等待 Analytics Library 链接可见
- chromedp.Sleep(5*time.Second), // 等待页面加载完成
- )
- }
|