Przeglądaj źródła

睿姿得数据爬取

gmy 4 miesięcy temu
rodzic
commit
a30e1fde35

+ 3 - 6
services/ruizide/data_processor.go

@@ -272,12 +272,12 @@ func resolverNet() {
 // func fileResolver() {
 func main() {
 	var tableNameList = []string{
-		"Oil_Demand_Signals_Weekly_Report",
+		//"Oil_Demand_Signals_Weekly_Report",
 		"RE_Dashboard_Export",
 	}
 	for _, tableName := range tableNameList {
 		var fileName string
-		// 解析Oil_Demand_Signals_Weekly_Report_表格
+		// 解析表格
 		fileName = tableName + "_" + utils.GetCurrentYearMonth() + ".xlsx"
 		filePath := filepath.Join(downloadDir, fileName)
 
@@ -289,11 +289,8 @@ func main() {
 
 		// 获取所有工作表
 		sheetNames := f.GetSheetList()
-		for sheetIndex, sheetName := range sheetNames {
+		for _, sheetName := range sheetNames {
 			fmt.Printf("读取工作表: %s\n", sheetName)
-			if sheetIndex == 0 {
-				continue
-			}
 
 			// 获取工作表的最大行数
 			maxRow, err := f.GetRows(sheetName) // 直接获取所有行数据

+ 64 - 112
services/ruizide/processor_business_logic.go

@@ -24,6 +24,7 @@ var classifyMap = map[string]string{
 	"Demand - Jet Fuel":                "analytics library",
 	"Demand - Maritime Bunker":         "analytics library",
 	"Oil_Demand_Signals_Weekly_Report": "analytics library",
+	"cube dashboards":                  "cube dashboards",
 }
 
 // RoadIndexProcessor
@@ -32,6 +33,9 @@ type RoadIndexProcessor struct{}
 
 func (p *RoadIndexProcessor) Process(tableName string, sheetName string, rowIndex int, rowData []string) ([]models.BaseFromRzdData, error) {
 	logs.Info("Processing AnalyticsLibrary...")
+	if tableName == "Content" {
+		return nil, nil
+	}
 	if rowIndex < 4 {
 		return nil, nil
 	}
@@ -54,10 +58,10 @@ func (p *RoadIndexProcessor) Process(tableName string, sheetName string, rowInde
 	indexNameTwo := sheetName + "/" + rowData[len(rowData)-3] + "/" + indexNameColTwo
 
 	// 生成指标编码
-	indexCodeOne, err := getIndexId(sheetName, strings.ToLower(rowData[len(rowData)-3]), indexNameColOne)
-	indexCodeTwo, err := getIndexId(sheetName, strings.ToLower(rowData[len(rowData)-3]), indexNameColTwo)
+	indexCodeOne, err := getIndexId(sheetName, strings.ReplaceAll(strings.ToLower(rowData[len(rowData)-3]), " ", ""), indexNameColOne)
+	indexCodeTwo, err := getIndexId(sheetName, strings.ReplaceAll(strings.ToLower(rowData[len(rowData)-3]), " ", ""), indexNameColTwo)
 
-	var indexInfoMap map[string]string
+	indexInfoMap := make(map[string]string)
 	indexInfoMap[indexCodeOne] = indexNameOne
 	indexInfoMap[indexCodeTwo] = indexNameTwo
 
@@ -110,42 +114,69 @@ func (p *RoadIndexProcessor) Process(tableName string, sheetName string, rowInde
 // @Description: ReDashboardExportOneProcessor处理器
 type ReDashboardExportOneProcessor struct{}
 
-/*
-	func (p *ReDashboardExportOneProcessor) Process(tableName string, sheetName string, rowIndex int, rowData []string) ([]models.BaseFromRzdData, error) {
-		logs.Info("Processing ReDashboardExportOne...")
-		if rowIndex < 4 {
-			return nil, nil
-		}
+func (p *ReDashboardExportOneProcessor) Process(tableName string, sheetName string, rowIndex int, rowData []string) ([]models.BaseFromRzdData, error) {
+	logs.Info("Processing ReDashboardExportOne...")
+	if rowIndex < 1 {
+		return nil, nil
+	}
 
-		frequency := "季度"
-		unit := "千桶每天"
-		indexNameColOne := "Index"
-		indexNameColTwo := "Index 7DMA"
+	frequency := "季度"
+	unit := "千桶每天"
+	indexNameColSuffix := "YearQuarter"
+	indexNameColPrefix := "CountryRevisionGroup"
 
-		// step_1: 分类
-		classifyId, err := dealClassify("cube dashboards", "Supply Revision Analysis")
-		if err != nil {
-			return nil, err
-		}
-		logs.Info("classifyId: %v", classifyId)
+	// step_1: 分类
+	classifyId, err := dealClassify("cube dashboards", "Supply Revision Analysis")
+	if err != nil {
+		return nil, err
+	}
+	logs.Info("classifyId: %v", classifyId)
 
-		// step_2: 指标
-		indexOneId, indexTwoId, indexCodeOne, indexCodeTwo, err := dealIndex(sheetName, rowData, indexNameColOne, indexNameColTwo, frequency, unit, classifyId)
-		if err != nil {
-			return nil, err
-		}
-		logs.Info("indexOneId: %v, indexTwoId: %v, indexCodeOne: %v, indexCodeTwo: %v", indexOneId, indexTwoId, indexCodeOne, indexCodeTwo)
+	// step_2: 指标
+	// 指标名称
+	indexName := "Supply Revision Analysis" + "/" + indexNameColPrefix + "/" + indexNameColSuffix + "/" + rowData[len(rowData)-1]
 
-		// step_3: 指标数据
-		dataList, err := dealData(indexOneId, indexTwoId, indexCodeOne, indexCodeTwo, rowData)
-		if err != nil {
-			return nil, err
-		}
-		logs.Info("dataList: %v", dataList)
+	// 生成指标编码
+	indexCode, err := getIndexId("Supply Revision Analysis"+" Country Revision Group"+" Year Quarter", strings.ReplaceAll(strings.ToLower(rowData[len(rowData)-1]), " ", ""), "")
+
+	indexInfoMap := make(map[string]string)
+	indexInfoMap[indexCode] = indexName
+
+	var indexInfoList []*models.IndexInfo
+	value, err := strconv.ParseFloat(rowData[len(rowData)-2], 64)
+	if err != nil {
+		return nil, err
+	}
+
+	dataTime := rowData[0]
+	format, err := utils.ConvertDateFormat2(dataTime)
+	if err != nil {
+		return nil, err
+	}
+
+	indexInfoList = append(indexInfoList, &models.IndexInfo{
+		IndexName: indexName,
+		IndexCode: indexCode,
+		Value:     value,
+		DataTime:  format,
+	})
 
-		return dataList, err
+	indexInfoList, err = dealIndex(indexInfoList, frequency, unit, classifyId)
+	if err != nil {
+		return nil, err
 	}
-*/
+	logs.Info("ReDashboardExportOne indexInfoList: %v", indexInfoList)
+
+	// step_3: 指标数据
+	dataList, err := dealData(indexInfoList)
+	if err != nil {
+		return nil, err
+	}
+	logs.Info("ReDashboardExportOne dataList: %v", dataList)
+
+	return dataList, err
+}
+
 func dealData(indexInfoList []*models.IndexInfo) ([]models.BaseFromRzdData, error) {
 	var dataList []models.BaseFromRzdData
 	for _, indexInfo := range indexInfoList {
@@ -220,84 +251,6 @@ func dealIndex(indexInfoList []*models.IndexInfo, frequency string, unit string,
 		}
 	}
 	return indexInfoList, nil
-	/*// 处理第一个指标
-	paramsLib := make(map[string]interface{})
-	paramsLib["indexCode"] = indexCodeOne
-	postEdbLib, err := httpRequestFill(paramsLib, utils.GET_RZD_INDEX_BY_CODE)
-	if err != nil {
-		return 0, 0, fmt.Errorf("getIndexId() : Failed to get rzd index by code: %v", err)
-	}
-	var requestResponse models.RequestResponse[models.BaseFromRzdIndex]
-	err = json.Unmarshal(postEdbLib, &requestResponse)
-	if err != nil {
-		return 0, 0, err
-	}
-	if requestResponse.Data.BaseFromRzdIndexId == 0 {
-		indexOne := models.BaseFromRzdIndex{
-			CreateTime:           utils.GetCurrentTime(),
-			ModifyTime:           utils.GetCurrentTime(),
-			BaseFromLyClassifyId: classifyId,
-			IndexCode:            indexCodeOne,
-			IndexName:            indexNameOne,
-			Frequency:            frequency,
-			Unit:                 unit,
-		}
-		// 这里避免服务器宕机 出现唯一索引异常,进行分开保存
-		postEdbLib, err = httpRequestFill(indexOne, utils.ADD_RZD_INDEX)
-		if err != nil {
-			return 0, 0, fmt.Errorf("getIndexId() : Failed to get rzd index by code: %v", err)
-		}
-		var requestResponse models.RequestResponse[int]
-		err = json.Unmarshal(postEdbLib, &requestResponse)
-		if err != nil {
-			return 0, 0, err
-		}
-		indexOneId = requestResponse.Data
-		logs.Info("indexOneId: %v", indexOneId)
-
-	} else {
-		indexOneId = requestResponse.Data.BaseFromRzdIndexId
-	}
-
-	// 处理第二个指标
-	paramsLib = make(map[string]interface{})
-	paramsLib["indexCode"] = indexCodeTwo
-	postEdbLib, err = httpRequestFill(paramsLib, utils.GET_RZD_INDEX_BY_CODE)
-	if err != nil {
-		return 0, 0, fmt.Errorf("getIndexId() : Failed to get rzd index by code: %v", err)
-	}
-	var requestResponseTwo models.RequestResponse[models.BaseFromRzdIndex]
-	err = json.Unmarshal(postEdbLib, &requestResponseTwo)
-	if err != nil {
-		return 0, 0, err
-	}
-	if requestResponseTwo.Data.BaseFromRzdIndexId == 0 {
-		indexTwo := models.BaseFromRzdIndex{
-			CreateTime:           utils.GetCurrentTime(),
-			ModifyTime:           utils.GetCurrentTime(),
-			BaseFromLyClassifyId: classifyId,
-			IndexCode:            indexCodeTwo,
-			IndexName:            indexNameTwo,
-			Frequency:            frequency,
-			Unit:                 unit,
-		}
-		// 这里避免服务器宕机 出现唯一索引异常,进行分开保存
-		var requestResponse models.RequestResponse[int]
-		postEdbLib, err = httpRequestFill(indexTwo, utils.ADD_RZD_INDEX)
-		if err != nil {
-			return 0, 0, fmt.Errorf("getIndexId() : Failed to get rzd index by code: %v", err)
-		}
-		err = json.Unmarshal(postEdbLib, &requestResponse)
-		if err != nil {
-			return 0, 0, err
-		}
-		indexTwoId = requestResponse.Data
-		logs.Info("indexTwoId: %v", indexTwoId)
-	} else {
-		indexTwoId = requestResponseTwo.Data.BaseFromRzdIndexId
-	}*/
-
-	//return indexOneId, indexTwoId, nil
 }
 
 func getIndexId(prefix string, area string, suffix string) (string, error) {
@@ -359,7 +312,6 @@ func dealClassify(tableName, sheetName string) (int, error) {
 		// 一级分类已存在,使用其 ID
 		parentId = requestResponse.Data.BaseFromRzdClassifyId
 	}
-
 	// 查询二级分类是否存在
 	paramsSubLib := make(map[string]interface{})
 	paramsSubLib["classifyName"] = sheetName // 这里替换成实际的二级分类名称

+ 1 - 1
services/ruizide/processor_factory.go

@@ -20,7 +20,7 @@ func GetProcessor(tableName string, sheetName string) (ReportProcessor, error) {
 	} else if tableName == "RE_Dashboard_Export" {
 		switch sheetName {
 		case "Chart1":
-			return &RoadIndexProcessor{}, nil
+			return &ReDashboardExportOneProcessor{}, nil
 		default:
 			return nil, fmt.Errorf("unknown sheetName: %s", sheetName)
 		}

+ 47 - 7
utils/date_util.go

@@ -498,15 +498,55 @@ func GetCurrentYearMonth() string {
 	return yearMonth
 }
 
-// ConvertDateFormat 转换时间格式 dd-MM-yy --> yyyy-MM-dd
+// ConvertDateFormat 转换时间格式 MM-dd-yy --> yyyy-MM-dd
 func ConvertDateFormat(dataText string) (string, error) {
-	// 解析原始日期格式
-	parsedTime, err := time.Parse("02-01-06", dataText)
+	// 定义输入和输出的时间格式
+	inputFormat := "01-02-06"    // MM-dd-yy
+	outputFormat := "2006-01-02" // yyyy-MM-dd
+
+	// 解析输入的日期
+	date, err := time.Parse(inputFormat, dataText)
 	if err != nil {
-		return "", fmt.Errorf("failed to parse date: %v", err)
+		return "", err
+	}
+
+	// 格式化为输出格式
+	return date.Format(outputFormat), nil
+}
+
+// ConvertDateFormat2 转换时间格式 yyyy-MMQ --> 取每一季度最后一天,例如 2024-01Q --> 2024-03-31
+func ConvertDateFormat2(dataText string) (string, error) {
+
+	// 提取年份和季度信息
+	yearStr := dataText[:4]
+	quarterStr := dataText[6:7]
+
+	// 转换年份
+	year, err := strconv.Atoi(yearStr)
+	if err != nil {
+		return "", fmt.Errorf("无效的年份: %v", err)
+	}
+
+	// 根据季度选择最后一天
+	var endOfQuarter string
+	switch quarterStr {
+	case "1":
+		endOfQuarter = fmt.Sprintf("%d-03-31", year)
+	case "2":
+		endOfQuarter = fmt.Sprintf("%d-06-30", year)
+	case "3":
+		endOfQuarter = fmt.Sprintf("%d-09-30", year)
+	case "4":
+		endOfQuarter = fmt.Sprintf("%d-12-31", year)
+	default:
+		return "", fmt.Errorf("无效的季度: %s", quarterStr)
+	}
+
+	// 确保日期格式正确
+	_, err = time.Parse("2006-01-02", endOfQuarter)
+	if err != nil {
+		return "", fmt.Errorf("日期解析失败: %v", err)
 	}
 
-	// 格式化为 yyyy-MM-dd
-	formattedTime := parsedTime.Format("2006-01-02")
-	return formattedTime, nil
+	return endOfQuarter, nil
 }