|
@@ -0,0 +1,98 @@
|
|
|
+package services
|
|
|
+
|
|
|
+import (
|
|
|
+ "context"
|
|
|
+ "fmt"
|
|
|
+ "github.com/chromedp/cdproto/browser"
|
|
|
+ "github.com/chromedp/chromedp"
|
|
|
+ "log"
|
|
|
+ "os"
|
|
|
+ "path/filepath"
|
|
|
+ "time"
|
|
|
+)
|
|
|
+
|
|
|
+func GetSourceChangesVisitorsCovid(dirPath string) (filePathStr string, err error) {
|
|
|
+ options := []chromedp.ExecAllocatorOption{
|
|
|
+ chromedp.WindowSize(1920,1080),
|
|
|
+ chromedp.UserAgent(`Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.80 Safari/537.36`),
|
|
|
+ }
|
|
|
+ options = append(chromedp.DefaultExecAllocatorOptions[:], options...)
|
|
|
+ //创建chrome窗口
|
|
|
+ allocCtx, cancel := chromedp.NewExecAllocator(context.Background(), options...)
|
|
|
+
|
|
|
+ // create chrome instance
|
|
|
+ ctx, cancel := chromedp.NewContext(
|
|
|
+ allocCtx,
|
|
|
+ chromedp.WithLogf(log.Printf),
|
|
|
+ )
|
|
|
+ defer cancel()
|
|
|
+
|
|
|
+ // create a timeout
|
|
|
+ ctx, cancel = context.WithTimeout(ctx, 200*time.Second)
|
|
|
+ defer cancel()
|
|
|
+
|
|
|
+
|
|
|
+ // set up a channel so we can block later while we monitor the download
|
|
|
+ // progress
|
|
|
+ done := make(chan string, 1)
|
|
|
+ // set up a listener to watch the download events and close the channel
|
|
|
+ // when complete this could be expanded to handle multiple downloads
|
|
|
+ // through creating a guid map, monitor download urls via
|
|
|
+ // EventDownloadWillBegin, etc
|
|
|
+ chromedp.ListenTarget(ctx, func(v interface{}) {
|
|
|
+ if ev, ok := v.(*browser.EventDownloadProgress); ok {
|
|
|
+ completed := "(unknown)"
|
|
|
+ if ev.TotalBytes != 0 {
|
|
|
+ completed = fmt.Sprintf("%0.2f%%", ev.ReceivedBytes/ev.TotalBytes*100.0)
|
|
|
+ }
|
|
|
+ log.Printf("state: %s, completed: %s\n", ev.State.String(), completed)
|
|
|
+ if ev.State == browser.DownloadProgressStateCompleted {
|
|
|
+ done <- ev.GUID
|
|
|
+ close(done)
|
|
|
+ log.Print("download finished")
|
|
|
+ }
|
|
|
+ }
|
|
|
+ })
|
|
|
+
|
|
|
+ if dirPath == "" {
|
|
|
+ dirPath, err = os.Getwd()
|
|
|
+ if err != nil {
|
|
|
+ log.Fatal(err)
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ //log.Print("start time"+time.Now().Format("2006-01-02_15:04:05.999"))
|
|
|
+
|
|
|
+ // navigate to a page, wait for an element, click
|
|
|
+
|
|
|
+ err = chromedp.Run(ctx,
|
|
|
+ chromedp.Navigate(`https://ourworldindata.org/grapher/changes-visitors-covid`),
|
|
|
+ // wait for footer element is visible (ie, page is loaded)
|
|
|
+ chromedp.WaitVisible(`.GrapherComponent`),
|
|
|
+ chromedp.ScrollIntoView(`//figure[contains(@data-grapher-src,"changes-visitors-covid")]//ul//li//a[contains(@data-track-note, "chart-click-download")]`),
|
|
|
+
|
|
|
+ chromedp.Click(`//figure[contains(@data-grapher-src,"changes-visitors-covid")]//ul//li//a[contains(@data-track-note, "chart-click-download")]`, chromedp.NodeReady),
|
|
|
+ // configure headless browser downloads. note that
|
|
|
+ // SetDownloadBehaviorBehaviorAllowAndName is preferred here over
|
|
|
+ // SetDownloadBehaviorBehaviorAllow so that the file will be named as
|
|
|
+ // the GUID. please note that it only works with 92.0.4498.0 or later
|
|
|
+ // due to issue 1204880, see https://bugs.chromium.org/p/chromium/issues/detail?id=1204880
|
|
|
+ browser.SetDownloadBehavior(browser.SetDownloadBehaviorBehaviorAllowAndName).
|
|
|
+ WithDownloadPath(dirPath).
|
|
|
+ WithEventsEnabled(true),
|
|
|
+ chromedp.Click(`//div//button[contains(@data-track-note, "chart-download-csv")]`, chromedp.NodeVisible),
|
|
|
+ )
|
|
|
+ if err != nil {
|
|
|
+ log.Fatal(err)
|
|
|
+ }
|
|
|
+ //log.Print("end time"+time.Now().Format("2006-01-02_15:04:05.999"))
|
|
|
+
|
|
|
+ // This will block until the chromedp listener closes the channel
|
|
|
+ guid := <-done
|
|
|
+
|
|
|
+ // We can predict the exact file location and name here because of how we
|
|
|
+ // configured SetDownloadBehavior and WithDownloadPath
|
|
|
+ filePathStr = filepath.Join(dirPath, guid)
|
|
|
+ log.Printf("wrote %s", filePathStr)
|
|
|
+ return
|
|
|
+}
|