|
@@ -4,33 +4,40 @@ package main
|
|
|
import (
|
|
|
"context"
|
|
|
"encoding/json"
|
|
|
+ "eta/eta_crawler/models"
|
|
|
"eta/eta_crawler/utils"
|
|
|
"fmt"
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
|
"github.com/beego/beego/v2/core/logs"
|
|
|
"github.com/chromedp/chromedp"
|
|
|
"log"
|
|
|
+ "regexp"
|
|
|
+ "strconv"
|
|
|
"strings"
|
|
|
)
|
|
|
|
|
|
+const (
|
|
|
+ sourceName = "lysww"
|
|
|
+)
|
|
|
+
|
|
|
|
|
|
|
|
|
type ImportCostProcessor struct{}
|
|
|
|
|
|
-func (p *ImportCostProcessor) Process(ctx context.Context, product string, reportContent string, keywords []string) error {
|
|
|
+func (p *ImportCostProcessor) Process(ctx context.Context, product string, reportContent string, keywords []string, indexId int) (models.BaseFromLyData, error) {
|
|
|
fmt.Println("Processing import cost...")
|
|
|
|
|
|
- return nil
|
|
|
+ return models.BaseFromLyData{}, nil
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
type ProcessingProfitProcessor struct{}
|
|
|
|
|
|
-func (p *ProcessingProfitProcessor) Process(ctx context.Context, product string, reportContent string, keywords []string) error {
|
|
|
+func (p *ProcessingProfitProcessor) Process(ctx context.Context, product string, reportContent string, keywords []string, indexId int) (models.BaseFromLyData, error) {
|
|
|
fmt.Println("Processing processing profit...")
|
|
|
|
|
|
- return nil
|
|
|
+ return models.BaseFromLyData{}, nil
|
|
|
}
|
|
|
|
|
|
|
|
@@ -44,11 +51,11 @@ type TableData struct {
|
|
|
Rows [][]string `json:"rows"`
|
|
|
}
|
|
|
|
|
|
-func (p *ProcessingReportProcessor) Process(ctx context.Context, product string, reportContent string, keywords []string) error {
|
|
|
+func (p *ProcessingReportProcessor) Process(ctx context.Context, product string, reportContent string, keywords []string, indexId int) (models.BaseFromLyData, error) {
|
|
|
logs.Info("Processing processing report...")
|
|
|
|
|
|
if len(keywords) < 3 {
|
|
|
- return fmt.Errorf("ProcessingReportProcessor Process() : keywords must contain at least 3 elements")
|
|
|
+ return models.BaseFromLyData{}, fmt.Errorf("ProcessingReportProcessor Process() : keywords must contain at least 3 elements")
|
|
|
}
|
|
|
|
|
|
|
|
@@ -102,15 +109,29 @@ func (p *ProcessingReportProcessor) Process(ctx context.Context, product string,
|
|
|
chromedp.Evaluate(`document.querySelector('div.a_date span').innerText`, &dateText),
|
|
|
)
|
|
|
if err != nil {
|
|
|
- return err
|
|
|
+ return models.BaseFromLyData{}, fmt.Errorf("ProcessingReportProcessor Process() : Failed to extract date: %v", err)
|
|
|
}
|
|
|
|
|
|
logs.Info("ProcessingReportProcessor Process() : Extracted Date: %s", dateText)
|
|
|
|
|
|
+
|
|
|
+ format, err := utils.ConvertTimeFormat(dateText)
|
|
|
+ if err != nil {
|
|
|
+ return models.BaseFromLyData{}, err
|
|
|
+ }
|
|
|
+ indexData, err := models.GetLyDataByIndexIdAndDataTime(indexId, format)
|
|
|
+ if err != nil {
|
|
|
+ return models.BaseFromLyData{}, fmt.Errorf("ProcessingReportProcessor Process() : Failed to get data by index id and date: %v", err)
|
|
|
+ }
|
|
|
+ if len(indexData) > 0 {
|
|
|
+ logs.Info("ProcessingReportProcessor Process() : Data already exists for index %d and date %s", indexId, dateText)
|
|
|
+ return models.BaseFromLyData{}, nil
|
|
|
+ }
|
|
|
+
|
|
|
|
|
|
targetWeek, err := utils.ParseDateAndWeek(dateText)
|
|
|
if err != nil {
|
|
|
- return err
|
|
|
+ return models.BaseFromLyData{}, fmt.Errorf("ProcessingReportProcessor Process() : Failed to parse date: %v", err)
|
|
|
}
|
|
|
|
|
|
fmt.Printf("Target Week: %s\n", targetWeek)
|
|
@@ -123,7 +144,8 @@ func (p *ProcessingReportProcessor) Process(ctx context.Context, product string,
|
|
|
|
|
|
columnIdx := -1
|
|
|
for i, header := range tableHeaders {
|
|
|
- if strings.Contains(columnName, header) {
|
|
|
+ headerString := extractChinese(header)
|
|
|
+ if strings.Contains(columnName, headerString) {
|
|
|
columnIdx = i
|
|
|
break
|
|
|
}
|
|
@@ -151,16 +173,41 @@ func (p *ProcessingReportProcessor) Process(ctx context.Context, product string,
|
|
|
|
|
|
for _, row := range tableRows {
|
|
|
if len(row) > 0 && strings.Contains(row[0], rowName) {
|
|
|
- fmt.Printf("Row matching '%s':\n", rowName)
|
|
|
if weekIdx < len(row) {
|
|
|
logs.Info("Value in column '%s' - '%s': %s", columnName, rowName, row[columnIdx])
|
|
|
+ numFlag := isNumeric(row[columnIdx])
|
|
|
+ if numFlag {
|
|
|
+ value, err := strconv.ParseFloat(row[columnIdx], 64)
|
|
|
+ if err != nil {
|
|
|
+ logs.Error("ProcessingReportProcessor Process() : Error converting value to float64: %v", err)
|
|
|
+ return models.BaseFromLyData{}, err
|
|
|
+ }
|
|
|
+
|
|
|
+ baseFromLyData := models.BaseFromLyData{
|
|
|
+ DataTime: dateText,
|
|
|
+ Value: value,
|
|
|
+ }
|
|
|
+ return baseFromLyData, nil
|
|
|
+ }
|
|
|
} else {
|
|
|
logs.Error("ProcessingReportProcessor Process() : Column index out of range")
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
}
|
|
|
+
|
|
|
+ return models.BaseFromLyData{}, fmt.Errorf("ProcessingReportProcessor Process() : No matching row found for '%s'", rowName)
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+func isNumeric(value string) bool {
|
|
|
+
|
|
|
+ re := regexp.MustCompile(`^[+-]?(\d+(\.\d*)?|\.\d+)$`)
|
|
|
+ return re.MatchString(value)
|
|
|
+}
|
|
|
|
|
|
- return nil
|
|
|
+
|
|
|
+func extractChinese(text string) string {
|
|
|
+ re := regexp.MustCompile(`[^\p{Han}]`)
|
|
|
+ return re.ReplaceAllString(text, "")
|
|
|
}
|