Explorar o código

fix: 谷歌出行a标签属性被修改导致的超时问题

hsun hai 1 ano
pai
achega
d8e4a08b16
Modificáronse 1 ficheiros con 37 adicións e 22 borrados
  1. 37 22
      services/source_changes_visitors_covid.go

+ 37 - 22
services/source_changes_visitors_covid.go

@@ -17,19 +17,18 @@ import (
 	"time"
 )
 
-
 func AddSourceChangesVisitorsCovid() (err error) {
 	utils.FileLog.Info("爬取谷歌出行记录 开始爬取:")
 	defer func() {
 		if err != nil {
 			utils.FileLog.Info("爬取谷歌出行记录失败 Err:" + err.Error())
-			msg := "失败提醒"+"AddSourceChangesVisitorsCovid ErrMsg:"+err.Error()
+			msg := "失败提醒" + "AddSourceChangesVisitorsCovid ErrMsg:" + err.Error()
 			go alarm_msg.SendAlarmMsg(msg, 3)
 		}
 	}()
-	fileName, err :=GetSourceChangesVisitorsCovid()
+	fileName, err := GetSourceChangesVisitorsCovid()
 	if err != nil {
-		err = errors.New("爬取谷歌出行记录失败"+err.Error())
+		err = errors.New("爬取谷歌出行记录失败" + err.Error())
 		return
 	}
 	if fileName == "" {
@@ -38,7 +37,7 @@ func AddSourceChangesVisitorsCovid() (err error) {
 	}
 	fs, err := os.Open(fileName)
 	if err != nil {
-		err = errors.New("打开文件失败"+err.Error())
+		err = errors.New("打开文件失败" + err.Error())
 		return
 	}
 
@@ -53,9 +52,9 @@ func AddSourceChangesVisitorsCovid() (err error) {
 	lastItem, err := models.GetLatestBaseFromChangesVisitorsCovid()
 	if err != nil {
 		if err.Error() != utils.ErrNoRow() {
-			err = errors.New("查询最新的记录失败"+err.Error())
+			err = errors.New("查询最新的记录失败" + err.Error())
 			return
-		}else{
+		} else {
 			err = nil
 		}
 	}
@@ -71,7 +70,7 @@ func AddSourceChangesVisitorsCovid() (err error) {
 		row, tErr := r.Read()
 
 		if tErr != nil && tErr != io.EOF {
-			err = errors.New("读取内容失败 "+ tErr.Error())
+			err = errors.New("读取内容失败 " + tErr.Error())
 			return
 		}
 
@@ -83,16 +82,16 @@ func AddSourceChangesVisitorsCovid() (err error) {
 			count = 0
 			tErr = models.AddBaseFromChangesVisitorsCovidMulti(list)
 			if tErr != nil {
-				err = errors.New("批量新增失败 "+ tErr.Error())
+				err = errors.New("批量新增失败 " + tErr.Error())
 				return
 
 			}
 			list = make([]*models.BaseFromChangesVisitorsCovid, 0)
 		}
-		if len(row) >= 9{
+		if len(row) >= 9 {
 			tmp := new(models.BaseFromChangesVisitorsCovid)
 			tmp.Entity = row[0]
-			if !filterCountry(tmp.Entity) {  //只需要五个国家的数据
+			if !filterCountry(tmp.Entity) { //只需要五个国家的数据
 				continue
 			}
 			tmp.Code = row[1]
@@ -124,7 +123,7 @@ func AddSourceChangesVisitorsCovid() (err error) {
 			tmp.CreateTime = now
 			tmp.ModifyTime = now
 
-			if day.Format(utils.FormatDate) <= lastDay.Format(utils.FormatDate) && lastItem != nil {  //如果是10天内的数据判断数据库中是否已存在
+			if day.Format(utils.FormatDate) <= lastDay.Format(utils.FormatDate) && lastItem != nil { //如果是10天内的数据判断数据库中是否已存在
 				_, tErr = models.GetBaseFromChangesVisitorsCovidByEntityDay(tmp.Entity, row[2])
 				if tErr == nil {
 					//已存在记录,则跳过
@@ -132,7 +131,7 @@ func AddSourceChangesVisitorsCovid() (err error) {
 				}
 			}
 			list = append(list, tmp)
-			count ++
+			count++
 		}
 	}
 
@@ -140,7 +139,7 @@ func AddSourceChangesVisitorsCovid() (err error) {
 		//批量新增
 		tErr := models.AddBaseFromChangesVisitorsCovidMulti(list)
 		if tErr != nil {
-			err = errors.New("批量新增失败 "+ tErr.Error())
+			err = errors.New("批量新增失败 " + tErr.Error())
 			return
 		}
 	}
@@ -148,7 +147,7 @@ func AddSourceChangesVisitorsCovid() (err error) {
 	//处理文件后删除下载的内容
 	err = os.Remove(fileName)
 	if err != nil {
-		err = errors.New("删除文件失败 "+ err.Error())
+		err = errors.New("删除文件失败 " + err.Error())
 		return
 	}
 	utils.FileLog.Info("爬取谷歌出行记录 入库成功")
@@ -158,7 +157,7 @@ func AddSourceChangesVisitorsCovid() (err error) {
 // GetSourceChangesVisitorsCovid 爬取谷歌出行记录
 func GetSourceChangesVisitorsCovid() (filePathStr string, err error) {
 	options := []chromedp.ExecAllocatorOption{
-		chromedp.WindowSize(1920,1080),
+		chromedp.WindowSize(1920, 1080),
 		chromedp.UserAgent(`Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.80 Safari/537.36`),
 	}
 	options = append(chromedp.DefaultExecAllocatorOptions[:], options...)
@@ -176,10 +175,10 @@ func GetSourceChangesVisitorsCovid() (filePathStr string, err error) {
 	ctx, cancel = context.WithTimeout(ctx, 200*time.Second)
 	defer cancel()
 
-
 	// set up a channel so we can block later while we monitor the download
 	// progress
 	done := make(chan string, 1)
+	canceled := make(chan bool, 1)
 	// set up a listener to watch the download events and close the channel
 	// when complete this could be expanded to handle multiple downloads
 	// through creating a guid map, monitor download urls via
@@ -195,6 +194,14 @@ func GetSourceChangesVisitorsCovid() (filePathStr string, err error) {
 				done <- ev.GUID
 				close(done)
 				utils.FileLog.Info("爬取谷歌出行记录 download finished")
+				return
+			}
+
+			if ev.State == browser.DownloadProgressStateCanceled {
+				canceled <- true
+				close(canceled)
+				utils.FileLog.Info("爬取谷歌出行记录 download canceled")
+				return
 			}
 		}
 	})
@@ -214,9 +221,9 @@ func GetSourceChangesVisitorsCovid() (filePathStr string, err error) {
 		chromedp.Navigate(`https://ourworldindata.org/grapher/changes-visitors-covid`),
 		// wait for footer element is visible (ie, page is loaded)
 		chromedp.WaitVisible(`.GrapherComponent`),
-		chromedp.ScrollIntoView(`//figure[contains(@data-grapher-src,"changes-visitors-covid")]//ul//li//a[contains(@data-track-note, "chart-click-download")]`),
+		chromedp.ScrollIntoView(`//figure[contains(@data-grapher-src,"changes-visitors-covid")]//ul//li//a[contains(@data-track-note, "chart_click_download")]`),
 
-		chromedp.Click(`//figure[contains(@data-grapher-src,"changes-visitors-covid")]//ul//li//a[contains(@data-track-note, "chart-click-download")]`, chromedp.NodeReady),
+		chromedp.Click(`//figure[contains(@data-grapher-src,"changes-visitors-covid")]//ul//li//a[contains(@data-track-note, "chart_click_download")]`, chromedp.NodeReady),
 		// configure headless browser downloads. note that
 		// SetDownloadBehaviorBehaviorAllowAndName is preferred here over
 		// SetDownloadBehaviorBehaviorAllow so that the file will be named as
@@ -225,7 +232,7 @@ func GetSourceChangesVisitorsCovid() (filePathStr string, err error) {
 		browser.SetDownloadBehavior(browser.SetDownloadBehaviorBehaviorAllowAndName).
 			WithDownloadPath(dirPath).
 			WithEventsEnabled(true),
-		chromedp.Click(`//div//button[contains(@data-track-note, "chart-download-csv")]`, chromedp.NodeVisible),
+		chromedp.Click(`//div//button[contains(@data-track-note, "chart_download_csv")]`, chromedp.NodeVisible),
 	)
 	if err != nil {
 		//log.Fatal(err)
@@ -234,7 +241,15 @@ func GetSourceChangesVisitorsCovid() (filePathStr string, err error) {
 	//log.Print("end time"+time.Now().Format("2006-01-02_15:04:05.999"))
 
 	// This will block until the chromedp listener closes the channel
-	guid := <-done
+	//guid := <-done
+	var guid string
+	select {
+	case <-canceled:
+		err = fmt.Errorf("download canceled")
+		return
+	case p := <-done:
+		guid = p
+	}
 
 	// We can predict the exact file location and name here because of how we
 	// configured SetDownloadBehavior and WithDownloadPath
@@ -258,4 +273,4 @@ func filterCountry(entity string) bool {
 	default:
 		return false
 	}
-}
+}