gmy 8 сар өмнө
parent
commit
252f3fff5b

+ 87 - 102
cmd/processor_business_logic.go

@@ -397,10 +397,8 @@ func (p *SupplyDemandBalanceProcessor) Process(ctx context.Context, product stri
 	}
 
 	// 拿到 行关键字和列关键字
-	/*columnName := keywords[1]
-	rowVariety := keywords[0]
-	indexNamePrefix := keywords[:1]
-	indexNameSuffix := keywords[1:]*/
+	var columnName string
+	rowVariety := keywords[1]
 
 	// 提取所有表格数据
 	tableData := getTableData(reportContent)
@@ -433,15 +431,15 @@ func (p *SupplyDemandBalanceProcessor) Process(ctx context.Context, product stri
 	// 处理提取的表格数据
 	var result []models.BaseFromLyData
 
-	/*for _, data := range tableData {
-		//tableHeaders := data.Headers
-		tableRows := data.Rows
+	headers := tableData.Headers
+	rows := tableData.Rows
 
-		tableHeaders := tableRows[0]
+	for _, year := range currentYearAndNextYear {
+		columnName = year + monthSuffix
 
 		// 查找目标列
 		columnIdx := -1
-		for i, header := range tableHeaders {
+		for i, header := range headers {
 			if strings.Contains(columnName, header) {
 				columnIdx = i
 				break
@@ -451,65 +449,51 @@ func (p *SupplyDemandBalanceProcessor) Process(ctx context.Context, product stri
 			log.Printf("SupplyDemandBalanceProcessor Process() : Column '%s' not found in table", columnName)
 			continue
 		}
-
 		// 处理表格中的每一行
-		var previousRowVariety string
-		for rowIndex, row := range tableRows {
-
-			if len(row) == len(tableHeaders) {
-				previousRowVariety = row[0]
-			} else if len(row) == len(tableHeaders)-1 {
-				row = append([]string{previousRowVariety}, row...)
-				tableRows[rowIndex] = row
-			}
-
-			for _, targetMonth := range yearMonths {
-				if len(row) >= len(tableHeaders) && row[0] == rowVariety && row[1] == targetMonth {
-					if columnIdx < len(row) {
-						// 指标名称
-						indexNameList := append(indexNamePrefix, append([]string{targetMonth}, indexNameSuffix...)...)
-						indexName := strings.Join(indexNameList[:len(keywords)-2], ":")
-						// 指标编码
-						indexCode := utils.GenerateIndexCode(sourceName, indexName)
-						// 指标id获取
-						indexId, err := getIndexId(indexCode, indexName, classifyId, sourceName, keywords[len(keywords)-2], keywords[len(keywords)-1])
-						if err != nil {
-							logs.Error("SupplyDemandBalanceProcessor Process() : Failed to get index id: %v", err)
-							continue
-						}
+		for _, row := range rows {
+			if len(row) >= len(headers) && row[0] == rowVariety {
+				if columnIdx < len(row) {
+					// 指标名称
+					indexName := strings.Join(keywords[:len(keywords)-2], ":")
+					// 指标编码
+					indexCode := utils.GenerateIndexCode(sourceName, indexName)
+					// 指标id获取
+					indexId, err := getIndexId(indexCode, indexName, classifyId, sourceName, keywords[len(keywords)-2], keywords[len(keywords)-1])
+					if err != nil {
+						logs.Error("SupplyDemandBalanceProcessor Process() : Failed to get index id: %v", err)
+						continue
+					}
 
-						indexData, err := models.GetLyDataByIndexIdAndDataTime(indexId, format)
-						if err != nil {
-							logs.Error("SupplyDemandBalanceProcessor Process() : Failed to get data by index id and date: %v", err)
-							continue
-						}
-						if len(indexData) > 0 {
-							logs.Info("SupplyDemandBalanceProcessor Process() : Data already exists for index %d and date %s", indexId, dateText)
-							continue
-						}
+					indexData, err := models.GetLyDataByIndexIdAndDataTime(indexId, format)
+					if err != nil {
+						logs.Error("SupplyDemandBalanceProcessor Process() : Failed to get data by index id and date: %v", err)
+						continue
+					}
+					if len(indexData) > 0 {
+						logs.Info("SupplyDemandBalanceProcessor Process() : Data already exists for index %d and date %s", indexId, dateText)
+						continue
+					}
 
-						valueStr := row[columnIdx]
-						value, err := strconv.ParseFloat(valueStr, 64)
-						if err != nil {
-							return []models.BaseFromLyData{}, fmt.Errorf("SupplyDemandBalanceProcessor Process() : failed to parse value '%s': %v", valueStr, err)
-						}
-						// 创建并添加到结果列表
-						baseFromLyData := models.BaseFromLyData{
-							DataTime:          format,
-							Value:             value,
-							BaseFromLyIndexId: indexId,
-							IndexCode:         indexCode,
-						}
-						result = append(result, baseFromLyData)
-					} else {
-						log.Printf("SupplyDemandBalanceProcessor Process() : Column index out of range for row '%s', '%s'", rowVariety, columnName)
+					valueStr := row[columnIdx]
+					value, err := strconv.ParseFloat(valueStr, 64)
+					if err != nil {
+						return []models.BaseFromLyData{}, fmt.Errorf("SupplyDemandBalanceProcessor Process() : failed to parse value '%s': %v", valueStr, err)
 					}
-					break
+					// 创建并添加到结果列表
+					baseFromLyData := models.BaseFromLyData{
+						DataTime:          format,
+						Value:             value,
+						BaseFromLyIndexId: indexId,
+						IndexCode:         indexCode,
+					}
+					result = append(result, baseFromLyData)
+				} else {
+					log.Printf("SupplyDemandBalanceProcessor Process() : Column index out of range for row '%s', '%s'", rowVariety, columnName)
 				}
+				break
 			}
-
 		}
-	}*/
+	}
 	return result, nil
 }
 
@@ -764,58 +748,59 @@ func getNoHeadTableData(reportContent string) []TableData {
 	return tableData
 }
 
-// 获取表格数据 有tr td标签的数据 列转行存储==>Rows, 行转头存储==>Headers
+// 获取表格数据 获取id 为 a_content 的 div 中的第一个表格 会拼上列上的合并单元格
 func getTableData(reportContent string) TableData {
 	doc, err := goquery.NewDocumentFromReader(strings.NewReader(reportContent))
 	if err != nil {
-		fmt.Println("Error:", err)
-	}
-
-	var tableData TableData
-
-	// 查找 id 为 a_content 的 div
-	doc.Find("#a_content").Each(func(index int, divHtml *goquery.Selection) {
-		divHtml.Find("table").Each(func(index int, tableHtml *goquery.Selection) {
-			// 提取 Headers(列信息)
-			tableHtml.Find("tr").Each(func(rowIndex int, rowHtml *goquery.Selection) {
-				if rowIndex == 0 { // 处理第一行(包含年度信息)
-					var headerRow []string
-					rowHtml.Find("td").Each(func(colIndex int, colHtml *goquery.Selection) {
-						text := colHtml.Text()
-						if colIndex > 0 { // 忽略第一列“年度(10/9月)”
-							headerRow = append(headerRow, strings.TrimSpace(text))
+		log.Fatal(err)
+	}
+
+	tableData := &TableData{}
+
+	// 只提取 id 为 a_content 的 div 中的第一个表格
+	firstTable := doc.Find("div#a_content table").First()
+
+	var combinedHeaders []string
+
+	// 提取表头
+	firstTable.Find("tr").Each(func(i int, row *goquery.Selection) {
+		if i == 0 {
+			// 第一行处理合并单元格,保存到 combinedHeaders
+			row.Find("td").Each(func(j int, cell *goquery.Selection) {
+				if j > 0 { // 跳过左上角的“年度(10/9月)”
+					colspan, exists := cell.Attr("colspan")
+					if exists {
+						spanCount := 0
+						fmt.Sscanf(colspan, "%d", &spanCount)
+						for k := 0; k < spanCount; k++ {
+							combinedHeaders = append(combinedHeaders, strings.TrimSpace(cell.Text()))
 						}
-					})
-					if len(headerRow) > 0 {
-						tableData.Headers = append(tableData.Headers, headerRow...)
+					} else {
+						combinedHeaders = append(combinedHeaders, strings.TrimSpace(cell.Text()))
 					}
-				} else if rowIndex == 1 { // 处理第二行(列标题)
-					rowHtml.Find("td").Each(func(colIndex int, colHtml *goquery.Selection) {
-						text := colHtml.Text()
-						if colIndex > 0 { // 忽略第一列“年度(10/9月)”
-							tableData.Headers = append(tableData.Headers, strings.TrimSpace(text))
-						}
-					})
 				}
 			})
-
-			// 提取数据行
-			tableHtml.Find("tr").Each(func(rowIndex int, rowHtml *goquery.Selection) {
-				if rowIndex > 1 { // 从第三行开始
-					var row []string
-					rowHtml.Find("td").Each(func(colIndex int, colHtml *goquery.Selection) {
-						text := colHtml.Text()
-						row = append(row, strings.TrimSpace(text))
-					})
-					if len(row) > 0 {
-						tableData.Rows = append(tableData.Rows, row)
-					}
+		} else if i == 1 {
+			// 第二行处理具体标题,组合后保存到 Headers
+			row.Find("td").Each(func(j int, cell *goquery.Selection) {
+				if j < len(combinedHeaders) {
+					fullHeader := combinedHeaders[j] + strings.TrimSpace(cell.Text())
+					tableData.Headers = append(tableData.Headers, fullHeader)
 				}
 			})
-		})
+		} else {
+			// 处理数据行
+			var rowData []string
+			row.Find("td").Each(func(j int, cell *goquery.Selection) {
+				rowData = append(rowData, strings.TrimSpace(cell.Text()))
+			})
+			if len(rowData) > 0 {
+				tableData.Rows = append(tableData.Rows, rowData)
+			}
+		}
 	})
 
-	return tableData
+	return *tableData
 }
 
 // 判断字符串是否是数字