Przeglądaj źródła

睿姿得数据爬取

gmy 6 miesięcy temu
rodzic
commit
247b4a1f68

+ 4 - 27
services/ruizide/data_processor.go

@@ -1,4 +1,4 @@
-package ruizide
+package main
 
 import (
 	"context"
@@ -332,8 +332,8 @@ func main() {
 			// 补充 判断是否存在于指标库
 			paramsLib := make(map[string]interface{})
 			paramsLib["IndexCode"] = index.IndexCode
-			paramsLib["DataTime"] = index.DataTime
-			postEdbLib, err := httpRequestFill(paramsLib, utils.GET_EDB_RZD_DATA_BY_CODE_AND_TIME)
+			paramsLib["Source"] = utils.DATA_SOURCE_RZD
+			postEdbLib, err := httpRequestFill(paramsLib, utils.GET_RZD_EDB_INFO_BY_INDEX_CODE)
 			if err != nil {
 				// 有错误就不继续执行
 				log.Printf("postEdbLib err: %v", err)
@@ -346,7 +346,7 @@ func main() {
 				continue
 			}
 
-			if requestResponse.Data.EdbInfoId == 0 {
+			if requestResponse.Data.EdbInfoId != 0 {
 				edbDataRzd := models.EdbDataRzd{
 					CreateTime:    utils.GetCurrentTime(),
 					ModifyTime:    utils.GetCurrentTime(),
@@ -379,29 +379,6 @@ func main() {
 	}
 }
 
-// setDownloadBehavior 设置下载路径
-func setDownloadBehavior(ctx context.Context) error {
-	return chromedp.Run(ctx,
-		chromedp.ActionFunc(func(ctx context.Context) error {
-			// 使用 chromedp.Exec 提交下载行为
-			var result interface{}
-			if err := chromedp.Evaluate(`(function() {
-				return new Promise((resolve, reject) => {
-					chrome.page.setDownloadBehavior({
-						behavior: 'allow',
-						downloadPath: '`+downloadDir+`'
-					}, function() {
-						resolve();
-					});
-				});
-			})()`, &result).Do(ctx); err != nil {
-				return fmt.Errorf("设置下载行为失败: %v", err)
-			}
-			return nil
-		}),
-	)
-}
-
 func login(ctx context.Context) error {
 
 	return chromedp.Run(ctx,

+ 37 - 17
services/ruizide/processor_business_logic.go

@@ -1,6 +1,6 @@
 // Package ruizide
 // @Author gmy 2024/10/21 10:50:00
-package ruizide
+package main
 
 import (
 	"encoding/json"
@@ -8,20 +8,22 @@ import (
 	"eta/eta_data_analysis/utils"
 	"fmt"
 	"github.com/beego/beego/v2/core/logs"
+	"math"
 	"strconv"
 	"strings"
 	"unicode"
 )
 
 var classifyMap = map[string]string{
-	"Road Index":               "analytics library",
-	"Road Active Fleet":        "analytics library",
-	"Aviation Index":           "analytics library",
-	"Aviation Active Fleet":    "analytics library",
-	"Demand-Gasoline":          "analytics library",
-	"Demand - Diesel":          "analytics library",
-	"Demand - Jet Fuel":        "analytics library",
-	"Demand - Maritime Bunker": "analytics library",
+	"Road Index":                       "analytics library",
+	"Road Active Fleet":                "analytics library",
+	"Aviation Index":                   "analytics library",
+	"Aviation Active Fleet":            "analytics library",
+	"Demand-Gasoline":                  "analytics library",
+	"Demand - Diesel":                  "analytics library",
+	"Demand - Jet Fuel":                "analytics library",
+	"Demand - Maritime Bunker":         "analytics library",
+	"Oil_Demand_Signals_Weekly_Report": "analytics library",
 }
 
 // RoadIndexProcessor
@@ -51,6 +53,10 @@ func (p *RoadIndexProcessor) Process(tableName string, sheetName string, rowData
 
 	// step_3: 指标数据
 	dataList, err := dealData(indexOneId, indexTwoId, indexCodeOne, indexCodeTwo, rowData)
+	if err != nil {
+		return nil, err
+	}
+	logs.Info("dataList: %v", dataList)
 
 	return dataList, err
 }
@@ -62,11 +68,18 @@ func dealData(indexOneId, indexTwoId int, indexCodeOne, indexCodeTwo string, row
 	if err != nil {
 		return nil, err
 	}
+	valueOne = math.Round(valueOne*10000) / 10000
+
 	dataTimeOne := rowData[1]
+	formatOne, err := utils.ConvertDateFormat(dataTimeOne)
+	if err != nil {
+		return nil, err
+	}
+	dataTimeOne = formatOne
 
 	paramsLib := make(map[string]interface{})
-	paramsLib["indexCode"] = indexCodeOne
-	paramsLib["dataTime"] = dataTimeOne
+	paramsLib["IndexCode"] = indexCodeOne
+	paramsLib["DataTime"] = dataTimeOne
 	postEdbLib, err := httpRequestFill(paramsLib, utils.GET_RZD_INDEX_DATA_BY_CODE_AND_TIME)
 	if err != nil {
 		return nil, err
@@ -76,7 +89,7 @@ func dealData(indexOneId, indexTwoId int, indexCodeOne, indexCodeTwo string, row
 	if err != nil {
 		return nil, err
 	}
-	if requestResponse.Data.BaseFromRzdIndexId != 0 {
+	if requestResponse.Data.BaseFromRzdIndexId == 0 {
 		dataOne := models.BaseFromRzdData{
 			BaseFromRzdIndexId: indexOneId,
 			CreateTime:         utils.GetCurrentTime(),
@@ -92,7 +105,14 @@ func dealData(indexOneId, indexTwoId int, indexCodeOne, indexCodeTwo string, row
 	if err != nil {
 		return nil, err
 	}
+	valueTwo = math.Round(valueTwo*10000) / 10000
+
 	dataTimeTwo := rowData[1]
+	formatTwo, err := utils.ConvertDateFormat(dataTimeTwo)
+	if err != nil {
+		return nil, err
+	}
+	dataTimeTwo = formatTwo
 
 	paramsLib = make(map[string]interface{})
 	paramsLib["indexCode"] = indexCodeTwo
@@ -106,7 +126,7 @@ func dealData(indexOneId, indexTwoId int, indexCodeOne, indexCodeTwo string, row
 	if err != nil {
 		return nil, err
 	}
-	if requestResponseTwo.Data.BaseFromRzdIndexId != 0 {
+	if requestResponseTwo.Data.BaseFromRzdIndexId == 0 {
 		dataTwo := models.BaseFromRzdData{
 			BaseFromRzdIndexId: indexTwoId,
 			CreateTime:         utils.GetCurrentTime(),
@@ -126,8 +146,8 @@ func dealIndex(sheetName string, rowData []string, indexNameColOne string, index
 	indexNameOne := sheetName + "/" + rowData[len(rowData)-3] + "/" + indexNameColOne
 	indexNameTwo := sheetName + "/" + rowData[len(rowData)-3] + "/" + indexNameColTwo
 	// 生成指标编码
-	indexCodeOne, err = getIndexId(sheetName, rowData[len(rowData)-3], indexNameColOne)
-	indexCodeTwo, err = getIndexId(sheetName, rowData[len(rowData)-3], indexNameColTwo)
+	indexCodeOne, err = getIndexId(sheetName, strings.ToLower(rowData[len(rowData)-3]), indexNameColOne)
+	indexCodeTwo, err = getIndexId(sheetName, strings.ToLower(rowData[len(rowData)-3]), indexNameColTwo)
 
 	// 处理第一个指标
 	paramsLib := make(map[string]interface{})
@@ -236,7 +256,7 @@ func getIndexId(prefix string, area string, suffix string) (string, error) {
 func dealClassify(tableName, sheetName string) (int, error) {
 	// 查询一级分类是否存在
 	paramsLib := make(map[string]interface{})
-	paramsLib["classifyName"] = tableName
+	paramsLib["classifyName"] = classifyMap[tableName]
 	postEdbLib, err := httpRequestFill(paramsLib, utils.GET_RZD_CLASSIFY_BY_NAME)
 	if err != nil {
 		return 0, fmt.Errorf("AnalyticsLibraryProcessor Process() : failed to get classify: %v", err)
@@ -253,7 +273,7 @@ func dealClassify(tableName, sheetName string) (int, error) {
 		// 一级分类不存在,新增一级分类
 		paramsLib = make(map[string]interface{})
 		paramsLib["parentId"] = 0
-		paramsLib["classifyName"] = tableName
+		paramsLib["classifyName"] = classifyMap[tableName]
 		postEdbLib, err = httpRequestFill(paramsLib, utils.ADD_RZD_CLASSIFY)
 		if err != nil {
 			return 0, fmt.Errorf("AnalyticsLibraryProcessor Process() : failed to add classify: %v", err)

+ 1 - 1
services/ruizide/processor_factory.go

@@ -1,4 +1,4 @@
-package ruizide
+package main
 
 import (
 	"eta/eta_data_analysis/models"

+ 1 - 0
utils/constants.go

@@ -274,6 +274,7 @@ const (
 	GET_RZD_CLASSIFY_BY_NAME            = "/rzd/get/rzd/classify/by/name"            // 根据分类名称查询分类
 	ADD_RZD_CLASSIFY                    = "/rzd/add/rzd/classify"                    // 新增睿咨得分类
 	ADD_BATCH_RZD_EDB_DATA              = "/rzd/add/batch/rzd/edb/data"              // 批量增加指标库指标数据
+	GET_RZD_EDB_INFO_BY_INDEX_CODE      = "/rzd/get/rzd/edb/info/by/code"            // 根据指标code获取指标信息
 
 )
 

+ 14 - 0
utils/date_util.go

@@ -487,6 +487,7 @@ func GetCurrentMonth(dateText string) (string, error) {
 	return fmt.Sprintf("%d月", month), nil
 }
 
+// GetCurrentYearMonth 获取当前年月 yyyyMM
 func GetCurrentYearMonth() string {
 	// 获取当前时间
 	now := time.Now()
@@ -496,3 +497,16 @@ func GetCurrentYearMonth() string {
 
 	return yearMonth
 }
+
+// ConvertDateFormat 转换时间格式 dd-MM-yy --> yyyy-MM-dd
+func ConvertDateFormat(dataText string) (string, error) {
+	// 解析原始日期格式
+	parsedTime, err := time.Parse("02-01-06", dataText)
+	if err != nil {
+		return "", fmt.Errorf("failed to parse date: %v", err)
+	}
+
+	// 格式化为 yyyy-MM-dd
+	formattedTime := parsedTime.Format("2006-01-02")
+	return formattedTime, nil
+}