|
@@ -397,10 +397,8 @@ func (p *SupplyDemandBalanceProcessor) Process(ctx context.Context, product stri
|
|
}
|
|
}
|
|
|
|
|
|
// 拿到 行关键字和列关键字
|
|
// 拿到 行关键字和列关键字
|
|
- /*columnName := keywords[1]
|
|
|
|
- rowVariety := keywords[0]
|
|
|
|
- indexNamePrefix := keywords[:1]
|
|
|
|
- indexNameSuffix := keywords[1:]*/
|
|
|
|
|
|
+ var columnName string
|
|
|
|
+ rowVariety := keywords[1]
|
|
|
|
|
|
// 提取所有表格数据
|
|
// 提取所有表格数据
|
|
tableData := getTableData(reportContent)
|
|
tableData := getTableData(reportContent)
|
|
@@ -433,15 +431,15 @@ func (p *SupplyDemandBalanceProcessor) Process(ctx context.Context, product stri
|
|
// 处理提取的表格数据
|
|
// 处理提取的表格数据
|
|
var result []models.BaseFromLyData
|
|
var result []models.BaseFromLyData
|
|
|
|
|
|
- /*for _, data := range tableData {
|
|
|
|
- //tableHeaders := data.Headers
|
|
|
|
- tableRows := data.Rows
|
|
|
|
|
|
+ headers := tableData.Headers
|
|
|
|
+ rows := tableData.Rows
|
|
|
|
|
|
- tableHeaders := tableRows[0]
|
|
|
|
|
|
+ for _, year := range currentYearAndNextYear {
|
|
|
|
+ columnName = year + monthSuffix
|
|
|
|
|
|
// 查找目标列
|
|
// 查找目标列
|
|
columnIdx := -1
|
|
columnIdx := -1
|
|
- for i, header := range tableHeaders {
|
|
|
|
|
|
+ for i, header := range headers {
|
|
if strings.Contains(columnName, header) {
|
|
if strings.Contains(columnName, header) {
|
|
columnIdx = i
|
|
columnIdx = i
|
|
break
|
|
break
|
|
@@ -451,65 +449,51 @@ func (p *SupplyDemandBalanceProcessor) Process(ctx context.Context, product stri
|
|
log.Printf("SupplyDemandBalanceProcessor Process() : Column '%s' not found in table", columnName)
|
|
log.Printf("SupplyDemandBalanceProcessor Process() : Column '%s' not found in table", columnName)
|
|
continue
|
|
continue
|
|
}
|
|
}
|
|
-
|
|
|
|
// 处理表格中的每一行
|
|
// 处理表格中的每一行
|
|
- var previousRowVariety string
|
|
|
|
- for rowIndex, row := range tableRows {
|
|
|
|
-
|
|
|
|
- if len(row) == len(tableHeaders) {
|
|
|
|
- previousRowVariety = row[0]
|
|
|
|
- } else if len(row) == len(tableHeaders)-1 {
|
|
|
|
- row = append([]string{previousRowVariety}, row...)
|
|
|
|
- tableRows[rowIndex] = row
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- for _, targetMonth := range yearMonths {
|
|
|
|
- if len(row) >= len(tableHeaders) && row[0] == rowVariety && row[1] == targetMonth {
|
|
|
|
- if columnIdx < len(row) {
|
|
|
|
- // 指标名称
|
|
|
|
- indexNameList := append(indexNamePrefix, append([]string{targetMonth}, indexNameSuffix...)...)
|
|
|
|
- indexName := strings.Join(indexNameList[:len(keywords)-2], ":")
|
|
|
|
- // 指标编码
|
|
|
|
- indexCode := utils.GenerateIndexCode(sourceName, indexName)
|
|
|
|
- // 指标id获取
|
|
|
|
- indexId, err := getIndexId(indexCode, indexName, classifyId, sourceName, keywords[len(keywords)-2], keywords[len(keywords)-1])
|
|
|
|
- if err != nil {
|
|
|
|
- logs.Error("SupplyDemandBalanceProcessor Process() : Failed to get index id: %v", err)
|
|
|
|
- continue
|
|
|
|
- }
|
|
|
|
|
|
+ for _, row := range rows {
|
|
|
|
+ if len(row) >= len(headers) && row[0] == rowVariety {
|
|
|
|
+ if columnIdx < len(row) {
|
|
|
|
+ // 指标名称
|
|
|
|
+ indexName := strings.Join(keywords[:len(keywords)-2], ":")
|
|
|
|
+ // 指标编码
|
|
|
|
+ indexCode := utils.GenerateIndexCode(sourceName, indexName)
|
|
|
|
+ // 指标id获取
|
|
|
|
+ indexId, err := getIndexId(indexCode, indexName, classifyId, sourceName, keywords[len(keywords)-2], keywords[len(keywords)-1])
|
|
|
|
+ if err != nil {
|
|
|
|
+ logs.Error("SupplyDemandBalanceProcessor Process() : Failed to get index id: %v", err)
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
|
|
- indexData, err := models.GetLyDataByIndexIdAndDataTime(indexId, format)
|
|
|
|
- if err != nil {
|
|
|
|
- logs.Error("SupplyDemandBalanceProcessor Process() : Failed to get data by index id and date: %v", err)
|
|
|
|
- continue
|
|
|
|
- }
|
|
|
|
- if len(indexData) > 0 {
|
|
|
|
- logs.Info("SupplyDemandBalanceProcessor Process() : Data already exists for index %d and date %s", indexId, dateText)
|
|
|
|
- continue
|
|
|
|
- }
|
|
|
|
|
|
+ indexData, err := models.GetLyDataByIndexIdAndDataTime(indexId, format)
|
|
|
|
+ if err != nil {
|
|
|
|
+ logs.Error("SupplyDemandBalanceProcessor Process() : Failed to get data by index id and date: %v", err)
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
+ if len(indexData) > 0 {
|
|
|
|
+ logs.Info("SupplyDemandBalanceProcessor Process() : Data already exists for index %d and date %s", indexId, dateText)
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
|
|
|
|
- valueStr := row[columnIdx]
|
|
|
|
- value, err := strconv.ParseFloat(valueStr, 64)
|
|
|
|
- if err != nil {
|
|
|
|
- return []models.BaseFromLyData{}, fmt.Errorf("SupplyDemandBalanceProcessor Process() : failed to parse value '%s': %v", valueStr, err)
|
|
|
|
- }
|
|
|
|
- // 创建并添加到结果列表
|
|
|
|
- baseFromLyData := models.BaseFromLyData{
|
|
|
|
- DataTime: format,
|
|
|
|
- Value: value,
|
|
|
|
- BaseFromLyIndexId: indexId,
|
|
|
|
- IndexCode: indexCode,
|
|
|
|
- }
|
|
|
|
- result = append(result, baseFromLyData)
|
|
|
|
- } else {
|
|
|
|
- log.Printf("SupplyDemandBalanceProcessor Process() : Column index out of range for row '%s', '%s'", rowVariety, columnName)
|
|
|
|
|
|
+ valueStr := row[columnIdx]
|
|
|
|
+ value, err := strconv.ParseFloat(valueStr, 64)
|
|
|
|
+ if err != nil {
|
|
|
|
+ return []models.BaseFromLyData{}, fmt.Errorf("SupplyDemandBalanceProcessor Process() : failed to parse value '%s': %v", valueStr, err)
|
|
}
|
|
}
|
|
- break
|
|
|
|
|
|
+ // 创建并添加到结果列表
|
|
|
|
+ baseFromLyData := models.BaseFromLyData{
|
|
|
|
+ DataTime: format,
|
|
|
|
+ Value: value,
|
|
|
|
+ BaseFromLyIndexId: indexId,
|
|
|
|
+ IndexCode: indexCode,
|
|
|
|
+ }
|
|
|
|
+ result = append(result, baseFromLyData)
|
|
|
|
+ } else {
|
|
|
|
+ log.Printf("SupplyDemandBalanceProcessor Process() : Column index out of range for row '%s', '%s'", rowVariety, columnName)
|
|
}
|
|
}
|
|
|
|
+ break
|
|
}
|
|
}
|
|
-
|
|
|
|
}
|
|
}
|
|
- }*/
|
|
|
|
|
|
+ }
|
|
return result, nil
|
|
return result, nil
|
|
}
|
|
}
|
|
|
|
|
|
@@ -764,58 +748,59 @@ func getNoHeadTableData(reportContent string) []TableData {
|
|
return tableData
|
|
return tableData
|
|
}
|
|
}
|
|
|
|
|
|
-// 获取表格数据 有tr td标签的数据 列转行存储==>Rows, 行转头存储==>Headers
|
|
|
|
|
|
+// 获取表格数据 获取id 为 a_content 的 div 中的第一个表格 会拼上列上的合并单元格
|
|
func getTableData(reportContent string) TableData {
|
|
func getTableData(reportContent string) TableData {
|
|
doc, err := goquery.NewDocumentFromReader(strings.NewReader(reportContent))
|
|
doc, err := goquery.NewDocumentFromReader(strings.NewReader(reportContent))
|
|
if err != nil {
|
|
if err != nil {
|
|
- fmt.Println("Error:", err)
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- var tableData TableData
|
|
|
|
-
|
|
|
|
- // 查找 id 为 a_content 的 div
|
|
|
|
- doc.Find("#a_content").Each(func(index int, divHtml *goquery.Selection) {
|
|
|
|
- divHtml.Find("table").Each(func(index int, tableHtml *goquery.Selection) {
|
|
|
|
- // 提取 Headers(列信息)
|
|
|
|
- tableHtml.Find("tr").Each(func(rowIndex int, rowHtml *goquery.Selection) {
|
|
|
|
- if rowIndex == 0 { // 处理第一行(包含年度信息)
|
|
|
|
- var headerRow []string
|
|
|
|
- rowHtml.Find("td").Each(func(colIndex int, colHtml *goquery.Selection) {
|
|
|
|
- text := colHtml.Text()
|
|
|
|
- if colIndex > 0 { // 忽略第一列“年度(10/9月)”
|
|
|
|
- headerRow = append(headerRow, strings.TrimSpace(text))
|
|
|
|
|
|
+ log.Fatal(err)
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ tableData := &TableData{}
|
|
|
|
+
|
|
|
|
+ // 只提取 id 为 a_content 的 div 中的第一个表格
|
|
|
|
+ firstTable := doc.Find("div#a_content table").First()
|
|
|
|
+
|
|
|
|
+ var combinedHeaders []string
|
|
|
|
+
|
|
|
|
+ // 提取表头
|
|
|
|
+ firstTable.Find("tr").Each(func(i int, row *goquery.Selection) {
|
|
|
|
+ if i == 0 {
|
|
|
|
+ // 第一行处理合并单元格,保存到 combinedHeaders
|
|
|
|
+ row.Find("td").Each(func(j int, cell *goquery.Selection) {
|
|
|
|
+ if j > 0 { // 跳过左上角的“年度(10/9月)”
|
|
|
|
+ colspan, exists := cell.Attr("colspan")
|
|
|
|
+ if exists {
|
|
|
|
+ spanCount := 0
|
|
|
|
+ fmt.Sscanf(colspan, "%d", &spanCount)
|
|
|
|
+ for k := 0; k < spanCount; k++ {
|
|
|
|
+ combinedHeaders = append(combinedHeaders, strings.TrimSpace(cell.Text()))
|
|
}
|
|
}
|
|
- })
|
|
|
|
- if len(headerRow) > 0 {
|
|
|
|
- tableData.Headers = append(tableData.Headers, headerRow...)
|
|
|
|
|
|
+ } else {
|
|
|
|
+ combinedHeaders = append(combinedHeaders, strings.TrimSpace(cell.Text()))
|
|
}
|
|
}
|
|
- } else if rowIndex == 1 { // 处理第二行(列标题)
|
|
|
|
- rowHtml.Find("td").Each(func(colIndex int, colHtml *goquery.Selection) {
|
|
|
|
- text := colHtml.Text()
|
|
|
|
- if colIndex > 0 { // 忽略第一列“年度(10/9月)”
|
|
|
|
- tableData.Headers = append(tableData.Headers, strings.TrimSpace(text))
|
|
|
|
- }
|
|
|
|
- })
|
|
|
|
}
|
|
}
|
|
})
|
|
})
|
|
-
|
|
|
|
- // 提取数据行
|
|
|
|
- tableHtml.Find("tr").Each(func(rowIndex int, rowHtml *goquery.Selection) {
|
|
|
|
- if rowIndex > 1 { // 从第三行开始
|
|
|
|
- var row []string
|
|
|
|
- rowHtml.Find("td").Each(func(colIndex int, colHtml *goquery.Selection) {
|
|
|
|
- text := colHtml.Text()
|
|
|
|
- row = append(row, strings.TrimSpace(text))
|
|
|
|
- })
|
|
|
|
- if len(row) > 0 {
|
|
|
|
- tableData.Rows = append(tableData.Rows, row)
|
|
|
|
- }
|
|
|
|
|
|
+ } else if i == 1 {
|
|
|
|
+ // 第二行处理具体标题,组合后保存到 Headers
|
|
|
|
+ row.Find("td").Each(func(j int, cell *goquery.Selection) {
|
|
|
|
+ if j < len(combinedHeaders) {
|
|
|
|
+ fullHeader := combinedHeaders[j] + strings.TrimSpace(cell.Text())
|
|
|
|
+ tableData.Headers = append(tableData.Headers, fullHeader)
|
|
}
|
|
}
|
|
})
|
|
})
|
|
- })
|
|
|
|
|
|
+ } else {
|
|
|
|
+ // 处理数据行
|
|
|
|
+ var rowData []string
|
|
|
|
+ row.Find("td").Each(func(j int, cell *goquery.Selection) {
|
|
|
|
+ rowData = append(rowData, strings.TrimSpace(cell.Text()))
|
|
|
|
+ })
|
|
|
|
+ if len(rowData) > 0 {
|
|
|
|
+ tableData.Rows = append(tableData.Rows, rowData)
|
|
|
|
+ }
|
|
|
|
+ }
|
|
})
|
|
})
|
|
|
|
|
|
- return tableData
|
|
|
|
|
|
+ return *tableData
|
|
}
|
|
}
|
|
|
|
|
|
// 判断字符串是否是数字
|
|
// 判断字符串是否是数字
|