|
@@ -4,6 +4,7 @@ import (
|
|
|
"context"
|
|
|
"encoding/json"
|
|
|
models "eta/eta_crawler/models"
|
|
|
+ "eta/eta_crawler/services/alarm_msg"
|
|
|
"eta/eta_crawler/utils"
|
|
|
"fmt"
|
|
|
"github.com/beego/beego/v2/core/logs"
|
|
@@ -17,7 +18,7 @@ import (
|
|
|
"github.com/chromedp/chromedp"
|
|
|
)
|
|
|
|
|
|
-const (
|
|
|
+var (
|
|
|
lyLoginPath = "https://www.fao.com.cn/"
|
|
|
)
|
|
|
|
|
@@ -69,6 +70,10 @@ func LyDataDeal(cont context.Context) (err error) {
|
|
|
err = fetchReportData(ctx, product, category, report, keywords)
|
|
|
if err != nil {
|
|
|
fmt.Printf("获取数据错误: %s -> %s -> %s: %v\n", product, category, report, err)
|
|
|
+ // 您看文章的速度太快了,歇一会再看吧
|
|
|
+ if strings.Contains(err.Error(), "您看文章的速度太快了,歇一会再看吧") {
|
|
|
+ return
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -204,6 +209,12 @@ func fetchReportData(ctx context.Context, product, category, report string, keyw
|
|
|
err = processReport(ctx, product, category, key, keywords)
|
|
|
if err != nil {
|
|
|
logs.Error("处理报告错误: %s: %s: %s: %s: %v", product, category, report, key, err)
|
|
|
+ if strings.Contains(err.Error(), "您看文章的速度太快了,歇一会再看吧") {
|
|
|
+ // 如果报告内容包含 “您看文章的速度太快了,歇一会再看吧” 则停止处理,发短信通知
|
|
|
+ // 发送短信通知
|
|
|
+ alarm_msg.SendAlarmMsg(fmt.Sprintf("粮油商务网-爬取指标数据被限制,请稍后重试, ErrMsg: %s", err.Error()), 1)
|
|
|
+ return nil
|
|
|
+ }
|
|
|
continue
|
|
|
}
|
|
|
|
|
@@ -217,6 +228,8 @@ func fetchReportData(ctx context.Context, product, category, report string, keyw
|
|
|
recordId, err := models.AddLyIndexRecord(&models.BaseFromLyIndexRecord{
|
|
|
CreateTime: utils.GetCurrentTime(),
|
|
|
ModifyTime: utils.GetCurrentTime(),
|
|
|
+ Product: product,
|
|
|
+ Category: category,
|
|
|
Url: key,
|
|
|
DataTime: format,
|
|
|
})
|
|
@@ -385,18 +398,6 @@ func extractReportURLs(htmlContent, keyword string) map[string]string {
|
|
|
func processReport(ctx context.Context, product string, category string, reportURL string, keywords []string) error {
|
|
|
// Navigate to the report page
|
|
|
var reportContent string
|
|
|
- /*// 使用 XPath 构造选择器
|
|
|
- clickSelector := fmt.Sprintf(`//a[@href='%s']`, reportURL)
|
|
|
-
|
|
|
- err := chromedp.Run(ctx,
|
|
|
- chromedp.WaitReady(clickSelector, chromedp.BySearch), // 等待选择器准备好
|
|
|
- chromedp.Click(clickSelector, chromedp.BySearch), // 点击目标链接
|
|
|
- chromedp.Sleep(5*time.Second), // 等待页面加载
|
|
|
- chromedp.OuterHTML("html", &reportContent), // 获取页面 HTML 内容
|
|
|
- )
|
|
|
- if err != nil {
|
|
|
- return err
|
|
|
- }*/
|
|
|
err := chromedp.Run(ctx,
|
|
|
chromedp.Navigate(lyLoginPath+reportURL),
|
|
|
chromedp.WaitVisible("body", chromedp.ByQuery), // 等待 body 元素可见,确保页面已加载
|
|
@@ -407,6 +408,11 @@ func processReport(ctx context.Context, product string, category string, reportU
|
|
|
return err
|
|
|
}
|
|
|
|
|
|
+ // 如果文章内容包含 “您看文章的速度太快了,歇一会再看吧” 则返回指定错误
|
|
|
+ if strings.Contains(reportContent, "您看文章的速度太快了,歇一会再看吧") {
|
|
|
+ return fmt.Errorf("您看文章的速度太快了,歇一会再看吧")
|
|
|
+ }
|
|
|
+
|
|
|
var lyIndexDataList []models.BaseFromLyData
|
|
|
// Process the data based on keywords
|
|
|
for _, keyword := range keywords {
|