瀏覽代碼

Merge branch 'refs/heads/eta_2.0.8_ly_0804@guomengyuan' into debug

gmy 7 月之前
父節點
當前提交
d6a5cefcc2

+ 2 - 0
models/base_from_ly_index_record.go

@@ -11,6 +11,8 @@ type BaseFromLyIndexRecord struct {
 	BaseFromLyIndexRecordId int    `orm:"column(base_from_ly_index_record_id);pk"` // 指标记录ID
 	CreateTime              string `orm:"column(create_time)"`                     // 创建时间
 	ModifyTime              string `orm:"column(modify_time)"`                     // 修改时间
+	Product                 string `orm:"column(product)"`                         // 产品
+	Category                string `orm:"column(category)"`                        // 分类
 	Url                     string `orm:"column(url)"`                             // 指标页面地址
 	DataTime                string `orm:"column(data_time)"`                       // 数据日期
 }

+ 19 - 13
services/liangyou/commodity_liangyou.go

@@ -4,6 +4,7 @@ import (
 	"context"
 	"encoding/json"
 	models "eta/eta_crawler/models"
+	"eta/eta_crawler/services/alarm_msg"
 	"eta/eta_crawler/utils"
 	"fmt"
 	"github.com/beego/beego/v2/core/logs"
@@ -17,7 +18,7 @@ import (
 	"github.com/chromedp/chromedp"
 )
 
-const (
+var (
 	lyLoginPath = "https://www.fao.com.cn/"
 )
 
@@ -69,6 +70,10 @@ func LyDataDeal(cont context.Context) (err error) {
 				err = fetchReportData(ctx, product, category, report, keywords)
 				if err != nil {
 					fmt.Printf("获取数据错误: %s -> %s -> %s: %v\n", product, category, report, err)
+					// 您看文章的速度太快了,歇一会再看吧
+					if strings.Contains(err.Error(), "您看文章的速度太快了,歇一会再看吧") {
+						return
+					}
 				}
 			}
 		}
@@ -204,6 +209,12 @@ func fetchReportData(ctx context.Context, product, category, report string, keyw
 		err = processReport(ctx, product, category, key, keywords)
 		if err != nil {
 			logs.Error("处理报告错误: %s: %s: %s: %s: %v", product, category, report, key, err)
+			if strings.Contains(err.Error(), "您看文章的速度太快了,歇一会再看吧") {
+				// 如果报告内容包含 “您看文章的速度太快了,歇一会再看吧” 则停止处理,发短信通知
+				// 发送短信通知
+				alarm_msg.SendAlarmMsg(fmt.Sprintf("粮油商务网-爬取指标数据被限制,请稍后重试, ErrMsg: %s", err.Error()), 1)
+				return nil
+			}
 			continue
 		}
 
@@ -217,6 +228,8 @@ func fetchReportData(ctx context.Context, product, category, report string, keyw
 		recordId, err := models.AddLyIndexRecord(&models.BaseFromLyIndexRecord{
 			CreateTime: utils.GetCurrentTime(),
 			ModifyTime: utils.GetCurrentTime(),
+			Product:    product,
+			Category:   category,
 			Url:        key,
 			DataTime:   format,
 		})
@@ -385,18 +398,6 @@ func extractReportURLs(htmlContent, keyword string) map[string]string {
 func processReport(ctx context.Context, product string, category string, reportURL string, keywords []string) error {
 	// Navigate to the report page
 	var reportContent string
-	/*// 使用 XPath 构造选择器
-	clickSelector := fmt.Sprintf(`//a[@href='%s']`, reportURL)
-
-	err := chromedp.Run(ctx,
-		chromedp.WaitReady(clickSelector, chromedp.BySearch), // 等待选择器准备好
-		chromedp.Click(clickSelector, chromedp.BySearch),     // 点击目标链接
-		chromedp.Sleep(5*time.Second),                        // 等待页面加载
-		chromedp.OuterHTML("html", &reportContent),           // 获取页面 HTML 内容
-	)
-	if err != nil {
-		return err
-	}*/
 	err := chromedp.Run(ctx,
 		chromedp.Navigate(lyLoginPath+reportURL),
 		chromedp.WaitVisible("body", chromedp.ByQuery), // 等待 body 元素可见,确保页面已加载
@@ -407,6 +408,11 @@ func processReport(ctx context.Context, product string, category string, reportU
 		return err
 	}
 
+	// 如果文章内容包含 “您看文章的速度太快了,歇一会再看吧” 则返回指定错误
+	if strings.Contains(reportContent, "您看文章的速度太快了,歇一会再看吧") {
+		return fmt.Errorf("您看文章的速度太快了,歇一会再看吧")
+	}
+
 	var lyIndexDataList []models.BaseFromLyData
 	// Process the data based on keywords
 	for _, keyword := range keywords {

+ 11 - 3
services/liangyou/processor_business_logic.go

@@ -17,7 +17,7 @@ import (
 	"unicode"
 )
 
-const (
+var (
 	lySourceName = "lysww" // 粮油商务网
 )
 
@@ -1081,10 +1081,18 @@ func (p *DailyTransactionProcessor) Process(ctx context.Context, product string,
 	}
 
 	// 获取第一个表格
-	areaTableData := getNoHeadTableData(reportContent)[0]
+	areaTableDataList := getNoHeadTableData(reportContent)
+	if len(areaTableDataList) == 0 {
+		return []models.BaseFromLyData{}, fmt.Errorf("DailyTransactionProcessor Process() : No table data found")
+	}
+	areaTableData := areaTableDataList[0]
 	// 获取第二个表格
 	blocTableData := getTableData(reportContent, false)
-	logs.Info("DailyTransactionProcessor Process() : areaTableData data: %v, blocTableData data: %v", areaTableData, blocTableData)
+	if blocTableData.Headers == nil {
+		return []models.BaseFromLyData{}, fmt.Errorf("DailyTransactionProcessor Process() : No table data found")
+
+	}
+	logs.Info("SupplyDemandBalanceProcessor Process() : areaTableData data: %v, blocTableData data: %v", areaTableData, blocTableData)
 	// 提取日期信息
 	dateText, err := getDateInfo(ctx)
 	if err != nil {

+ 4 - 4
utils/date_util.go

@@ -66,17 +66,17 @@ func ParseDateAndMonthColzaOil(dateText string) ([]string, error) {
 	months := make([]string, 3)
 	monthMap := map[string]string{
 		"01": "1月F",
-		"02": "2月G",
+		"02": "2月H",
 		"03": "3月H",
-		"04": "4月J",
+		"04": "4月K",
 		"05": "5月K",
-		"06": "6月M",
+		"06": "6月N",
 		"07": "7月N",
 		"08": "8月X",
 		"09": "9月X",
 		"10": "10月X",
 		"11": "11月X",
-		"12": "12月Z",
+		"12": "12月F",
 	}
 
 	for i := 0; i < 3; i++ {