|
@@ -4,33 +4,40 @@ package main
|
|
|
import (
|
|
|
"context"
|
|
|
"encoding/json"
|
|
|
+ "eta/eta_crawler/models"
|
|
|
"eta/eta_crawler/utils"
|
|
|
"fmt"
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
|
"github.com/beego/beego/v2/core/logs"
|
|
|
"github.com/chromedp/chromedp"
|
|
|
"log"
|
|
|
+ "regexp"
|
|
|
+ "strconv"
|
|
|
"strings"
|
|
|
)
|
|
|
|
|
|
+const (
|
|
|
+ sourceName = "lysww" // 粮油商务网
|
|
|
+)
|
|
|
+
|
|
|
// ImportCostProcessor
|
|
|
// @Description: 进口成本处理器
|
|
|
type ImportCostProcessor struct{}
|
|
|
|
|
|
-func (p *ImportCostProcessor) Process(ctx context.Context, product string, reportContent string, keywords []string) error {
|
|
|
+func (p *ImportCostProcessor) Process(ctx context.Context, product string, reportContent string, keywords []string, indexId int) (models.BaseFromLyData, error) {
|
|
|
fmt.Println("Processing import cost...")
|
|
|
// 实现具体的处理逻辑
|
|
|
- return nil
|
|
|
+ return models.BaseFromLyData{}, nil
|
|
|
}
|
|
|
|
|
|
// ProcessingProfitProcessor
|
|
|
// @Description: 加工利润处理器
|
|
|
type ProcessingProfitProcessor struct{}
|
|
|
|
|
|
-func (p *ProcessingProfitProcessor) Process(ctx context.Context, product string, reportContent string, keywords []string) error {
|
|
|
+func (p *ProcessingProfitProcessor) Process(ctx context.Context, product string, reportContent string, keywords []string, indexId int) (models.BaseFromLyData, error) {
|
|
|
fmt.Println("Processing processing profit...")
|
|
|
// 实现具体的处理逻辑
|
|
|
- return nil
|
|
|
+ return models.BaseFromLyData{}, nil
|
|
|
}
|
|
|
|
|
|
// ProcessingReportProcessor
|
|
@@ -44,11 +51,11 @@ type TableData struct {
|
|
|
Rows [][]string `json:"rows"`
|
|
|
}
|
|
|
|
|
|
-func (p *ProcessingReportProcessor) Process(ctx context.Context, product string, reportContent string, keywords []string) error {
|
|
|
+func (p *ProcessingReportProcessor) Process(ctx context.Context, product string, reportContent string, keywords []string, indexId int) (models.BaseFromLyData, error) {
|
|
|
logs.Info("Processing processing report...")
|
|
|
// 解析关键字
|
|
|
if len(keywords) < 3 {
|
|
|
- return fmt.Errorf("ProcessingReportProcessor Process() : keywords must contain at least 3 elements")
|
|
|
+ return models.BaseFromLyData{}, fmt.Errorf("ProcessingReportProcessor Process() : keywords must contain at least 3 elements")
|
|
|
}
|
|
|
|
|
|
// 拿到 行关键字和列关键字
|
|
@@ -102,15 +109,29 @@ func (p *ProcessingReportProcessor) Process(ctx context.Context, product string,
|
|
|
chromedp.Evaluate(`document.querySelector('div.a_date span').innerText`, &dateText),
|
|
|
)
|
|
|
if err != nil {
|
|
|
- return err
|
|
|
+ return models.BaseFromLyData{}, fmt.Errorf("ProcessingReportProcessor Process() : Failed to extract date: %v", err)
|
|
|
}
|
|
|
|
|
|
logs.Info("ProcessingReportProcessor Process() : Extracted Date: %s", dateText)
|
|
|
|
|
|
+ // 校验指标数据是否存在 根据指标id和日期 存在则跳过,不存在正常往下走
|
|
|
+ format, err := utils.ConvertTimeFormat(dateText)
|
|
|
+ if err != nil {
|
|
|
+ return models.BaseFromLyData{}, err
|
|
|
+ }
|
|
|
+ indexData, err := models.GetLyDataByIndexIdAndDataTime(indexId, format)
|
|
|
+ if err != nil {
|
|
|
+ return models.BaseFromLyData{}, fmt.Errorf("ProcessingReportProcessor Process() : Failed to get data by index id and date: %v", err)
|
|
|
+ }
|
|
|
+ if len(indexData) > 0 {
|
|
|
+ logs.Info("ProcessingReportProcessor Process() : Data already exists for index %d and date %s", indexId, dateText)
|
|
|
+ return models.BaseFromLyData{}, nil
|
|
|
+ }
|
|
|
+
|
|
|
// 解析日期并计算当前周数
|
|
|
targetWeek, err := utils.ParseDateAndWeek(dateText)
|
|
|
if err != nil {
|
|
|
- return err
|
|
|
+ return models.BaseFromLyData{}, fmt.Errorf("ProcessingReportProcessor Process() : Failed to parse date: %v", err)
|
|
|
}
|
|
|
|
|
|
fmt.Printf("Target Week: %s\n", targetWeek)
|
|
@@ -123,7 +144,8 @@ func (p *ProcessingReportProcessor) Process(ctx context.Context, product string,
|
|
|
// 查找目标列
|
|
|
columnIdx := -1
|
|
|
for i, header := range tableHeaders {
|
|
|
- if strings.Contains(columnName, header) {
|
|
|
+ headerString := extractChinese(header)
|
|
|
+ if strings.Contains(columnName, headerString) {
|
|
|
columnIdx = i
|
|
|
break
|
|
|
}
|
|
@@ -151,16 +173,41 @@ func (p *ProcessingReportProcessor) Process(ctx context.Context, product string,
|
|
|
// 查找目标行
|
|
|
for _, row := range tableRows {
|
|
|
if len(row) > 0 && strings.Contains(row[0], rowName) {
|
|
|
- fmt.Printf("Row matching '%s':\n", rowName)
|
|
|
if weekIdx < len(row) {
|
|
|
logs.Info("Value in column '%s' - '%s': %s", columnName, rowName, row[columnIdx])
|
|
|
+ numFlag := isNumeric(row[columnIdx])
|
|
|
+ if numFlag {
|
|
|
+ value, err := strconv.ParseFloat(row[columnIdx], 64)
|
|
|
+ if err != nil {
|
|
|
+ logs.Error("ProcessingReportProcessor Process() : Error converting value to float64: %v", err)
|
|
|
+ return models.BaseFromLyData{}, err
|
|
|
+ }
|
|
|
+ // 返回BaseFromLyData对象的数据
|
|
|
+ baseFromLyData := models.BaseFromLyData{
|
|
|
+ DataTime: dateText,
|
|
|
+ Value: value,
|
|
|
+ }
|
|
|
+ return baseFromLyData, nil
|
|
|
+ }
|
|
|
} else {
|
|
|
logs.Error("ProcessingReportProcessor Process() : Column index out of range")
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
}
|
|
|
+ // TODO 后面把这个日志打印,不做返回错误处理,一个指标找不到会导致后续指标无法处理
|
|
|
+ return models.BaseFromLyData{}, fmt.Errorf("ProcessingReportProcessor Process() : No matching row found for '%s'", rowName)
|
|
|
+}
|
|
|
+
|
|
|
+// 判断字符串是否是数字
|
|
|
+func isNumeric(value string) bool {
|
|
|
+ // 正则表达式匹配整数和浮点数
|
|
|
+ re := regexp.MustCompile(`^[+-]?(\d+(\.\d*)?|\.\d+)$`)
|
|
|
+ return re.MatchString(value)
|
|
|
+}
|
|
|
|
|
|
- return nil
|
|
|
+// 只保留汉字
|
|
|
+func extractChinese(text string) string {
|
|
|
+ re := regexp.MustCompile(`[^\p{Han}]`) // 匹配非汉字字符
|
|
|
+ return re.ReplaceAllString(text, "")
|
|
|
}
|