xiexiaoyuan 2 жил өмнө
parent
commit
7308bcafc6

+ 98 - 0
services/source_changes_visitors_covid.go

@@ -0,0 +1,98 @@
+package services
+
+import (
+	"context"
+	"fmt"
+	"github.com/chromedp/cdproto/browser"
+	"github.com/chromedp/chromedp"
+	"log"
+	"os"
+	"path/filepath"
+	"time"
+)
+
+func GetSourceChangesVisitorsCovid(dirPath string) (filePathStr string, err error) {
+	options := []chromedp.ExecAllocatorOption{
+		chromedp.WindowSize(1920,1080),
+		chromedp.UserAgent(`Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.80 Safari/537.36`),
+	}
+	options = append(chromedp.DefaultExecAllocatorOptions[:], options...)
+	//创建chrome窗口
+	allocCtx, cancel := chromedp.NewExecAllocator(context.Background(), options...)
+
+	// create chrome instance
+	ctx, cancel := chromedp.NewContext(
+		allocCtx,
+		chromedp.WithLogf(log.Printf),
+	)
+	defer cancel()
+
+	// create a timeout
+	ctx, cancel = context.WithTimeout(ctx, 200*time.Second)
+	defer cancel()
+
+
+	// set up a channel so we can block later while we monitor the download
+	// progress
+	done := make(chan string, 1)
+	// set up a listener to watch the download events and close the channel
+	// when complete this could be expanded to handle multiple downloads
+	// through creating a guid map, monitor download urls via
+	// EventDownloadWillBegin, etc
+	chromedp.ListenTarget(ctx, func(v interface{}) {
+		if ev, ok := v.(*browser.EventDownloadProgress); ok {
+			completed := "(unknown)"
+			if ev.TotalBytes != 0 {
+				completed = fmt.Sprintf("%0.2f%%", ev.ReceivedBytes/ev.TotalBytes*100.0)
+			}
+			log.Printf("state: %s, completed: %s\n", ev.State.String(), completed)
+			if ev.State == browser.DownloadProgressStateCompleted {
+				done <- ev.GUID
+				close(done)
+				log.Print("download finished")
+			}
+		}
+	})
+
+	if dirPath == "" {
+		dirPath, err = os.Getwd()
+		if err != nil {
+			log.Fatal(err)
+		}
+	}
+
+	//log.Print("start time"+time.Now().Format("2006-01-02_15:04:05.999"))
+
+	// navigate to a page, wait for an element, click
+
+	err = chromedp.Run(ctx,
+		chromedp.Navigate(`https://ourworldindata.org/grapher/changes-visitors-covid`),
+		// wait for footer element is visible (ie, page is loaded)
+		chromedp.WaitVisible(`.GrapherComponent`),
+		chromedp.ScrollIntoView(`//figure[contains(@data-grapher-src,"changes-visitors-covid")]//ul//li//a[contains(@data-track-note, "chart-click-download")]`),
+
+		chromedp.Click(`//figure[contains(@data-grapher-src,"changes-visitors-covid")]//ul//li//a[contains(@data-track-note, "chart-click-download")]`, chromedp.NodeReady),
+		// configure headless browser downloads. note that
+		// SetDownloadBehaviorBehaviorAllowAndName is preferred here over
+		// SetDownloadBehaviorBehaviorAllow so that the file will be named as
+		// the GUID. please note that it only works with 92.0.4498.0 or later
+		// due to issue 1204880, see https://bugs.chromium.org/p/chromium/issues/detail?id=1204880
+		browser.SetDownloadBehavior(browser.SetDownloadBehaviorBehaviorAllowAndName).
+			WithDownloadPath(dirPath).
+			WithEventsEnabled(true),
+		chromedp.Click(`//div//button[contains(@data-track-note, "chart-download-csv")]`, chromedp.NodeVisible),
+	)
+	if err != nil {
+		log.Fatal(err)
+	}
+	//log.Print("end time"+time.Now().Format("2006-01-02_15:04:05.999"))
+
+	// This will block until the chromedp listener closes the channel
+	guid := <-done
+
+	// We can predict the exact file location and name here because of how we
+	// configured SetDownloadBehavior and WithDownloadPath
+	filePathStr = filepath.Join(dirPath, guid)
+	log.Printf("wrote %s", filePathStr)
+	return
+}