Explorar o código

解析excel数据

xyxie hai 2 semanas
pai
achega
91468a1ac6
Modificáronse 1 ficheiros con 83 adicións e 52 borrados
  1. 83 52
      services/kpler/excel.go

+ 83 - 52
services/kpler/excel.go

@@ -7,7 +7,6 @@ import (
 	"fmt"
 	"net/url"
 	"regexp"
-	"strconv"
 	"strings"
 
 	"github.com/xuri/excelize/v2"
@@ -144,9 +143,12 @@ func ProcessKplerData(filePath string) (indexData []models.KplerExcelIndexData,
 		return nil, fmt.Errorf("error scanning for formulas: %v", err)
 	}
 
+	fmt.Println("Found formulas:", formulas)
+
 	// Initialize maps to store column information
 	indexMap := make(map[int]*models.KplerExcelIndexData)    // Maps column to index data
-	dateMap := make(map[int]int)                             // Maps data column to its end date column
+	dateColMap := make(map[int][]int)                        // Maps date column to its data columns
+	dateValues := make(map[int][]string)                     // Maps date column to its values
 
 	// First pass: identify data columns and their corresponding date columns
 	// Headers are in the third row (index 2)
@@ -155,25 +157,44 @@ func ProcessKplerData(filePath string) (indexData []models.KplerExcelIndexData,
 	}
 
 	headers := data.Rows[1] // Get headers from the second row
+	fmt.Println("Headers:", headers)
+
+	// First pass: find all date columns
+	var dateCols []int
 	for j, header := range headers {
-		// Skip empty headers
-		if header == "" {
+		if header == "Period End Date" {
+			dateCols = append(dateCols, j)
+			dateValues[j] = make([]string, 0)
+			dateColMap[j] = make([]int, 0)
+		}
+	}
+	fmt.Println("Date columns:", dateCols)
+
+	// Second pass: associate data columns with their nearest date column
+	for j, header := range headers {
+		if header == "" || header == "Date" || header == "Period End Date" {
 			continue
 		}
 
-		// Check if this is a Period End Date column
-		if header == "Period End Date" {
-			// The data column is typically one column before the date
-			if j > 0 && headers[j-1] != "Date" {
-				dateMap[j-1] = j // Map the previous column (data) to this date column
+		// Find the nearest date column after this data column
+		nearestDateCol := -1
+		for _, dateCol := range dateCols {
+			if dateCol > j {
+				nearestDateCol = dateCol
+				break
 			}
-		} else if header != "Date" {
+		}
+
+		if nearestDateCol != -1 {
 			// This is a data column
 			indexMap[j] = &models.KplerExcelIndexData{
 				Name:       header,
 				DataPoints: make([]models.KplerDataPoint, 0),
 			}
 
+			// Associate this data column with its date column
+			dateColMap[nearestDateCol] = append(dateColMap[nearestDateCol], j)
+
 			// Process formula for this column if it exists
 			if formula, ok := formulas[j]; ok {
 				indexMap[j].Request = formula
@@ -189,10 +210,16 @@ func ProcessKplerData(filePath string) (indexData []models.KplerExcelIndexData,
 		}
 	}
 
-	
-	pendingData := make(map[int][]models.DataPoint) // Maps column index to pending data points
+	fmt.Println("Data columns mapping:", dateColMap)
+
+	// Create a map to store data values for each column
+	dataValues := make(map[int][]string)
+	for j := range indexMap {
+		dataValues[j] = make([]string, 0)
+	}
 
-	for i := 2; i < len(data.Rows); i++ { // Start from row 3 (index 2) for data
+	// First pass: collect all values
+	for i := 2; i < len(data.Rows); i++ {
 		row := data.Rows[i]
 		if len(row) == 0 {
 			continue
@@ -203,50 +230,54 @@ func ProcessKplerData(filePath string) (indexData []models.KplerExcelIndexData,
 				continue
 			}
 
-			// Check if this is a date column
-			if _, exists := indexMap[j]; !exists {
-				// This might be a date column, check if it's used as a date column
-				isDateCol := false
-				for dataCol, dateCol := range dateMap {
-					if dateCol == j {
-						isDateCol = true
-						// This is a date column, update all pending data points for the corresponding data column
-						if pending, hasPending := pendingData[dataCol]; hasPending {
-							for _, dp := range pending {
-								if idx, exists := indexMap[dataCol]; exists {
-									idx.DataPoints = append(idx.DataPoints, models.KplerDataPoint{
-										EndDate: cell,
-										Value:   dp.Value,
-									})
-								}
-							}
-							// Clear pending data for this column
-							delete(pendingData, dataCol)
-						}
-						break
-					}
-				}
-				if isDateCol {
-					continue
-				}
-			}
-
-			// Try to convert cell value to float64
-			_, err := strconv.ParseFloat(cell, 64)
-			if err != nil {
-				fmt.Printf("Warning: Error parsing value at row %d, col %d: %v\n", i+1, j+1, err)
+			// If this is a date column, store its values
+			if _, exists := dateValues[j]; exists {
+				dateValues[j] = append(dateValues[j], cell)
 				continue
 			}
 
-			// Store the data point for later processing when we find the date
+			// If this is a data column, store its values
 			if _, exists := indexMap[j]; exists {
-				if _, hasPending := pendingData[j]; !hasPending {
-					pendingData[j] = make([]models.DataPoint, 0)
+				dataValues[j] = append(dataValues[j], cell)
+			}
+		}
+	}
+
+	fmt.Println("Date values:", dateValues)
+	fmt.Println("Data values:", dataValues)
+
+	// Second pass: combine data and dates
+	for dateCol, dataCols := range dateColMap {
+		dates := dateValues[dateCol]
+		if len(dates) == 0 {
+			fmt.Printf("No dates found for date column %d\n", dateCol)
+			continue
+		}
+
+		fmt.Printf("Processing date column %d with data columns %v\n", dateCol, dataCols)
+
+		// Process each data column associated with this date column
+		for _, dataCol := range dataCols {
+			if idx, exists := indexMap[dataCol]; exists {
+				values := dataValues[dataCol]
+				
+				fmt.Printf("Column %d (%s): %d dates, %d values\n", dataCol, idx.Name, len(dates), len(values))
+
+				// Use the shorter length to avoid index out of range
+				length := len(dates)
+				if len(values) < length {
+					length = len(values)
+				}
+
+				// Combine data and dates
+				for i := 0; i < length; i++ {
+					idx.DataPoints = append(idx.DataPoints, models.KplerDataPoint{
+						EndDate: dates[i],
+						Value:   values[i],
+					})
 				}
-				pendingData[j] = append(pendingData[j], models.DataPoint{
-					Value: cell,
-					Row:   i,
-				})
+
+				fmt.Printf("Added %d data points for column %s\n", length, idx.Name)
 			}
 		}
 	}