|
@@ -17,19 +17,18 @@ import (
|
|
|
"time"
|
|
|
)
|
|
|
|
|
|
-
|
|
|
func AddSourceChangesVisitorsCovid() (err error) {
|
|
|
utils.FileLog.Info("爬取谷歌出行记录 开始爬取:")
|
|
|
defer func() {
|
|
|
if err != nil {
|
|
|
utils.FileLog.Info("爬取谷歌出行记录失败 Err:" + err.Error())
|
|
|
- msg := "失败提醒"+"AddSourceChangesVisitorsCovid ErrMsg:"+err.Error()
|
|
|
+ msg := "失败提醒" + "AddSourceChangesVisitorsCovid ErrMsg:" + err.Error()
|
|
|
go alarm_msg.SendAlarmMsg(msg, 3)
|
|
|
}
|
|
|
}()
|
|
|
- fileName, err :=GetSourceChangesVisitorsCovid()
|
|
|
+ fileName, err := GetSourceChangesVisitorsCovid()
|
|
|
if err != nil {
|
|
|
- err = errors.New("爬取谷歌出行记录失败"+err.Error())
|
|
|
+ err = errors.New("爬取谷歌出行记录失败" + err.Error())
|
|
|
return
|
|
|
}
|
|
|
if fileName == "" {
|
|
@@ -38,7 +37,7 @@ func AddSourceChangesVisitorsCovid() (err error) {
|
|
|
}
|
|
|
fs, err := os.Open(fileName)
|
|
|
if err != nil {
|
|
|
- err = errors.New("打开文件失败"+err.Error())
|
|
|
+ err = errors.New("打开文件失败" + err.Error())
|
|
|
return
|
|
|
}
|
|
|
|
|
@@ -53,9 +52,9 @@ func AddSourceChangesVisitorsCovid() (err error) {
|
|
|
lastItem, err := models.GetLatestBaseFromChangesVisitorsCovid()
|
|
|
if err != nil {
|
|
|
if err.Error() != utils.ErrNoRow() {
|
|
|
- err = errors.New("查询最新的记录失败"+err.Error())
|
|
|
+ err = errors.New("查询最新的记录失败" + err.Error())
|
|
|
return
|
|
|
- }else{
|
|
|
+ } else {
|
|
|
err = nil
|
|
|
}
|
|
|
}
|
|
@@ -71,7 +70,7 @@ func AddSourceChangesVisitorsCovid() (err error) {
|
|
|
row, tErr := r.Read()
|
|
|
|
|
|
if tErr != nil && tErr != io.EOF {
|
|
|
- err = errors.New("读取内容失败 "+ tErr.Error())
|
|
|
+ err = errors.New("读取内容失败 " + tErr.Error())
|
|
|
return
|
|
|
}
|
|
|
|
|
@@ -83,16 +82,16 @@ func AddSourceChangesVisitorsCovid() (err error) {
|
|
|
count = 0
|
|
|
tErr = models.AddBaseFromChangesVisitorsCovidMulti(list)
|
|
|
if tErr != nil {
|
|
|
- err = errors.New("批量新增失败 "+ tErr.Error())
|
|
|
+ err = errors.New("批量新增失败 " + tErr.Error())
|
|
|
return
|
|
|
|
|
|
}
|
|
|
list = make([]*models.BaseFromChangesVisitorsCovid, 0)
|
|
|
}
|
|
|
- if len(row) >= 9{
|
|
|
+ if len(row) >= 9 {
|
|
|
tmp := new(models.BaseFromChangesVisitorsCovid)
|
|
|
tmp.Entity = row[0]
|
|
|
- if !filterCountry(tmp.Entity) { //只需要五个国家的数据
|
|
|
+ if !filterCountry(tmp.Entity) { //只需要五个国家的数据
|
|
|
continue
|
|
|
}
|
|
|
tmp.Code = row[1]
|
|
@@ -124,7 +123,7 @@ func AddSourceChangesVisitorsCovid() (err error) {
|
|
|
tmp.CreateTime = now
|
|
|
tmp.ModifyTime = now
|
|
|
|
|
|
- if day.Format(utils.FormatDate) <= lastDay.Format(utils.FormatDate) && lastItem != nil { //如果是10天内的数据判断数据库中是否已存在
|
|
|
+ if day.Format(utils.FormatDate) <= lastDay.Format(utils.FormatDate) && lastItem != nil { //如果是10天内的数据判断数据库中是否已存在
|
|
|
_, tErr = models.GetBaseFromChangesVisitorsCovidByEntityDay(tmp.Entity, row[2])
|
|
|
if tErr == nil {
|
|
|
//已存在记录,则跳过
|
|
@@ -132,7 +131,7 @@ func AddSourceChangesVisitorsCovid() (err error) {
|
|
|
}
|
|
|
}
|
|
|
list = append(list, tmp)
|
|
|
- count ++
|
|
|
+ count++
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -140,7 +139,7 @@ func AddSourceChangesVisitorsCovid() (err error) {
|
|
|
//批量新增
|
|
|
tErr := models.AddBaseFromChangesVisitorsCovidMulti(list)
|
|
|
if tErr != nil {
|
|
|
- err = errors.New("批量新增失败 "+ tErr.Error())
|
|
|
+ err = errors.New("批量新增失败 " + tErr.Error())
|
|
|
return
|
|
|
}
|
|
|
}
|
|
@@ -148,7 +147,7 @@ func AddSourceChangesVisitorsCovid() (err error) {
|
|
|
//处理文件后删除下载的内容
|
|
|
err = os.Remove(fileName)
|
|
|
if err != nil {
|
|
|
- err = errors.New("删除文件失败 "+ err.Error())
|
|
|
+ err = errors.New("删除文件失败 " + err.Error())
|
|
|
return
|
|
|
}
|
|
|
utils.FileLog.Info("爬取谷歌出行记录 入库成功")
|
|
@@ -158,7 +157,7 @@ func AddSourceChangesVisitorsCovid() (err error) {
|
|
|
// GetSourceChangesVisitorsCovid 爬取谷歌出行记录
|
|
|
func GetSourceChangesVisitorsCovid() (filePathStr string, err error) {
|
|
|
options := []chromedp.ExecAllocatorOption{
|
|
|
- chromedp.WindowSize(1920,1080),
|
|
|
+ chromedp.WindowSize(1920, 1080),
|
|
|
chromedp.UserAgent(`Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.80 Safari/537.36`),
|
|
|
}
|
|
|
options = append(chromedp.DefaultExecAllocatorOptions[:], options...)
|
|
@@ -176,10 +175,10 @@ func GetSourceChangesVisitorsCovid() (filePathStr string, err error) {
|
|
|
ctx, cancel = context.WithTimeout(ctx, 200*time.Second)
|
|
|
defer cancel()
|
|
|
|
|
|
-
|
|
|
// set up a channel so we can block later while we monitor the download
|
|
|
// progress
|
|
|
done := make(chan string, 1)
|
|
|
+ canceled := make(chan bool, 1)
|
|
|
// set up a listener to watch the download events and close the channel
|
|
|
// when complete this could be expanded to handle multiple downloads
|
|
|
// through creating a guid map, monitor download urls via
|
|
@@ -195,6 +194,14 @@ func GetSourceChangesVisitorsCovid() (filePathStr string, err error) {
|
|
|
done <- ev.GUID
|
|
|
close(done)
|
|
|
utils.FileLog.Info("爬取谷歌出行记录 download finished")
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+ if ev.State == browser.DownloadProgressStateCanceled {
|
|
|
+ canceled <- true
|
|
|
+ close(canceled)
|
|
|
+ utils.FileLog.Info("爬取谷歌出行记录 download canceled")
|
|
|
+ return
|
|
|
}
|
|
|
}
|
|
|
})
|
|
@@ -214,9 +221,9 @@ func GetSourceChangesVisitorsCovid() (filePathStr string, err error) {
|
|
|
chromedp.Navigate(`https://ourworldindata.org/grapher/changes-visitors-covid`),
|
|
|
// wait for footer element is visible (ie, page is loaded)
|
|
|
chromedp.WaitVisible(`.GrapherComponent`),
|
|
|
- chromedp.ScrollIntoView(`//figure[contains(@data-grapher-src,"changes-visitors-covid")]//ul//li//a[contains(@data-track-note, "chart-click-download")]`),
|
|
|
+ chromedp.ScrollIntoView(`//figure[contains(@data-grapher-src,"changes-visitors-covid")]//ul//li//a[contains(@data-track-note, "chart_click_download")]`),
|
|
|
|
|
|
- chromedp.Click(`//figure[contains(@data-grapher-src,"changes-visitors-covid")]//ul//li//a[contains(@data-track-note, "chart-click-download")]`, chromedp.NodeReady),
|
|
|
+ chromedp.Click(`//figure[contains(@data-grapher-src,"changes-visitors-covid")]//ul//li//a[contains(@data-track-note, "chart_click_download")]`, chromedp.NodeReady),
|
|
|
// configure headless browser downloads. note that
|
|
|
// SetDownloadBehaviorBehaviorAllowAndName is preferred here over
|
|
|
// SetDownloadBehaviorBehaviorAllow so that the file will be named as
|
|
@@ -225,7 +232,7 @@ func GetSourceChangesVisitorsCovid() (filePathStr string, err error) {
|
|
|
browser.SetDownloadBehavior(browser.SetDownloadBehaviorBehaviorAllowAndName).
|
|
|
WithDownloadPath(dirPath).
|
|
|
WithEventsEnabled(true),
|
|
|
- chromedp.Click(`//div//button[contains(@data-track-note, "chart-download-csv")]`, chromedp.NodeVisible),
|
|
|
+ chromedp.Click(`//div//button[contains(@data-track-note, "chart_download_csv")]`, chromedp.NodeVisible),
|
|
|
)
|
|
|
if err != nil {
|
|
|
//log.Fatal(err)
|
|
@@ -234,7 +241,15 @@ func GetSourceChangesVisitorsCovid() (filePathStr string, err error) {
|
|
|
//log.Print("end time"+time.Now().Format("2006-01-02_15:04:05.999"))
|
|
|
|
|
|
// This will block until the chromedp listener closes the channel
|
|
|
- guid := <-done
|
|
|
+ //guid := <-done
|
|
|
+ var guid string
|
|
|
+ select {
|
|
|
+ case <-canceled:
|
|
|
+ err = fmt.Errorf("download canceled")
|
|
|
+ return
|
|
|
+ case p := <-done:
|
|
|
+ guid = p
|
|
|
+ }
|
|
|
|
|
|
// We can predict the exact file location and name here because of how we
|
|
|
// configured SetDownloadBehavior and WithDownloadPath
|
|
@@ -258,4 +273,4 @@ func filterCountry(entity string) bool {
|
|
|
default:
|
|
|
return false
|
|
|
}
|
|
|
-}
|
|
|
+}
|