package kpler import ( "encoding/json" "eta/eta_data_analysis/models" "eta/eta_data_analysis/utils" "fmt" "net/url" "regexp" "strings" "time" "github.com/xuri/excelize/v2" ) // ExcelData represents structured data extracted from an Excel file type ExcelData struct { Headers []string Rows [][]string SheetName string } // ParseExcel reads and parses data from an Excel file func parseExcel(filePath string) (*ExcelData, error) { // Open the Excel file f, err := excelize.OpenFile(filePath) if err != nil { return nil, fmt.Errorf("error opening Excel file: %w", err) } defer f.Close() // Get the first sheet by default sheetName := f.GetSheetList()[0] // Get all rows from the sheet rows, err := f.GetRows(sheetName) if err != nil { return nil, fmt.Errorf("error reading rows from sheet %s: %w", sheetName, err) } // Check if there's data if len(rows) == 0 { return nil, fmt.Errorf("no data found in sheet %s", sheetName) } // Create structured data excelData := &ExcelData{ SheetName: sheetName, Headers: rows[0], Rows: rows[1:], } return excelData, nil } // ScanSheetForFormulas scans an entire sheet for formulas func scanSheetForFormulas(filePath, sheetName string) (map[int]string, error) { // Open the Excel file f, err := excelize.OpenFile(filePath) if err != nil { return nil, fmt.Errorf("error opening Excel file: %w", err) } defer f.Close() formulas := make(map[int]string) // Get sheet dimensions dimension, err := f.GetSheetDimension(sheetName) if err != nil { return nil, fmt.Errorf("error getting sheet dimension: %w", err) } // Parse dimension to get the range (e.g., "A1:K42") parts := strings.Split(dimension, ":") if len(parts) != 2 { // Use a default range if dimension is not in expected format parts = []string{"A1", "Z100"} } // Extract the column letters and row numbers startCol, startRow, err := excelize.CellNameToCoordinates(parts[0]) if err != nil { startCol, startRow = 1, 1 } endCol, endRow, err := excelize.CellNameToCoordinates(parts[1]) if err != nil { endCol, endRow = 26, 100 // Default to Z100 } // Scan cells for formulas for row := startRow; row <= endRow; row++ { for col := startCol; col <= endCol; col++ { colName, err := excelize.ColumnNumberToName(col) if err != nil { continue } cellCoord := fmt.Sprintf("%s%d", colName, row) formula, err := f.GetCellFormula(sheetName, cellCoord) if err == nil && formula != "" { // Store all formulas or only Kpler-related ones (adjust as needed) //if strings.Contains(formula, "kpler") || strings.Contains(formula, "GetFlows") { if strings.Contains(formula, "GETFLOWS") { // fmt.Println("row: ", row) // fmt.Println("col: ", col) // fmt.Println("GetCellFormula: ", formula) if _, ok := formulas[col-1]; !ok { formulas[col-1] = formula } } } } } return formulas, nil } // ProcessKplerData 解析excel获取指标对应的公式和数据 func ProcessKplerData(filePath string) (indexData []models.KplerExcelIndexData, err error) { defer func() { if err != nil { utils.FileLog.Info(fmt.Sprintf("ProcessKplerData error: %v\n", err)) } }() // Open the Excel file f, err := excelize.OpenFile(filePath) if err != nil { return nil, fmt.Errorf("error opening Excel file: %w", err) } defer f.Close() // Get the first sheet by default data, err := parseExcel(filePath) if err != nil { return nil, fmt.Errorf("error parsing Excel file: %w", err) } // Look for Kpler formulas formulas, err := scanSheetForFormulas(filePath, data.SheetName) if err != nil { return nil, fmt.Errorf("error scanning for formulas: %v", err) } fmt.Println("Found formulas:", formulas) // Initialize maps to store column information indexMap := make(map[int]*models.KplerExcelIndexData) // Maps column to index data dateColMap := make(map[int][]int) // Maps date column to its data columns dateValues := make(map[int][]string) // Maps date column to its values // First pass: identify data columns and their corresponding date columns // Headers are in the third row (index 2) if len(data.Rows) < 3 { return nil, fmt.Errorf("Excel file does not have enough rows") } titles := data.Headers titleMap := make(map[int]string) for j, title := range titles { titleMap[j] = title } headers := data.Rows[1] // Get headers from the second row fmt.Println("Headers:", headers) // First pass: find all date columns var dateCols []int for j, header := range headers { if header == "Period End Date" { dateCols = append(dateCols, j) dateValues[j] = make([]string, 0) dateColMap[j] = make([]int, 0) } } fmt.Println("Date columns:", dateCols) // Second pass: associate data columns with their nearest date column for j, header := range headers { if header == "" || header == "Date" || header == "Period End Date" { continue } // Find the nearest date column after this data column nearestDateCol := -1 for _, dateCol := range dateCols { if dateCol > j { nearestDateCol = dateCol break } } if nearestDateCol != -1 { // This is a data column indexMap[j] = &models.KplerExcelIndexData{ Name: header, DataPoints: make([]models.KplerDataPoint, 0), } // Associate this data column with its date column dateColMap[nearestDateCol] = append(dateColMap[nearestDateCol], j) // Process formula for this column if it exists if formula, ok := formulas[j]; ok { indexMap[j].Request = formula } else { // Look backwards for the formula for k := j; k >= 0; k-- { if formula, ok := formulas[k]; ok { indexMap[j].Request = formula break } } } // 获取标题 title, ok := titleMap[j] if ok && title != "" { indexMap[j].Title = title }else{ // Look backwards for the formula for k := j; k >= 0; k-- { title, ok := titleMap[k] if ok && title != "" { indexMap[j].Title = title break } } } } } fmt.Println("Data columns mapping:", dateColMap) // Create a map to store data values for each column dataValues := make(map[int][]string) for j := range indexMap { dataValues[j] = make([]string, 0) } // First pass: collect all values for i := 2; i < len(data.Rows); i++ { row := data.Rows[i] if len(row) == 0 { continue } for j, cell := range row { if cell == "" { continue } // If this is a date column, store its values if _, exists := dateValues[j]; exists { // 对日期进行格式化 dateFormat := "01-02-06" date, err := time.Parse(dateFormat, cell) if err != nil { fmt.Println("Error parsing date:", err) continue } dateValues[j] = append(dateValues[j], date.Format(utils.FormatDate)) continue } // If this is a data column, store its values if _, exists := indexMap[j]; exists { dataValues[j] = append(dataValues[j], cell) } } } fmt.Println("Date values:", dateValues) fmt.Println("Data values:", dataValues) // Second pass: combine data and dates for dateCol, dataCols := range dateColMap { dates := dateValues[dateCol] if len(dates) == 0 { fmt.Printf("No dates found for date column %d\n", dateCol) continue } fmt.Printf("Processing date column %d with data columns %v\n", dateCol, dataCols) // Process each data column associated with this date column for _, dataCol := range dataCols { if idx, exists := indexMap[dataCol]; exists { values := dataValues[dataCol] fmt.Printf("Column %d (%s): %d dates, %d values\n", dataCol, idx.Name, len(dates), len(values)) // Use the shorter length to avoid index out of range length := len(dates) if len(values) < length { length = len(values) } // Combine data and dates for i := 0; i < length; i++ { idx.DataPoints = append(idx.DataPoints, models.KplerDataPoint{ EndDate: dates[i], Value: values[i], }) } fmt.Printf("Added %d data points for column %s\n", length, idx.Name) } } } // Convert map to slice for _, index := range indexMap { if len(index.DataPoints) > 0 { indexData = append(indexData, *index) } } return indexData, nil } func ParseSpecificKplerFormulaV2(specificFormula string) (reqObj models.KplerFlowsRequest, err error) { // Remove Excel string concatenation specificFormula = strings.ReplaceAll(specificFormula, `" & "`, "") specificFormula = strings.ReplaceAll(specificFormula, `"&"`, "") specificFormula = strings.ReplaceAll(specificFormula, `&`, "") specificFormula = strings.ReplaceAll(specificFormula, `\"`, `"`) // Get content inside parentheses re := regexp.MustCompile(`_xldudf_KPLER_GETFLOWS\((.*)\)`) matches := re.FindStringSubmatch(specificFormula) if len(matches) < 2 { // Try the old format re = regexp.MustCompile(`\((.*)\)`) matches = re.FindStringSubmatch(specificFormula) if len(matches) < 2 { err = fmt.Errorf("没有找到括号里的内容") return } } // Get the parameter string encodedParam := matches[1] // Remove surrounding quotes if present encodedParam = strings.Trim(encodedParam, `"`) // Try direct JSON parsing first var jsonObj models.KplerFlowsRequest if err = json.Unmarshal([]byte(encodedParam), &jsonObj); err == nil { return jsonObj, nil } // If direct parsing fails, try URL decoding decodedStr, err := url.QueryUnescape(encodedParam) if err != nil { // If URL decoding fails, try removing escapes and parse again cleanStr := strings.ReplaceAll(encodedParam, `\`, "") if err = json.Unmarshal([]byte(cleanStr), &jsonObj); err != nil { // Try one more time with manual concatenation cleanup cleanStr = strings.ReplaceAll(cleanStr, `" "`, "") if err = json.Unmarshal([]byte(cleanStr), &jsonObj); err != nil { return reqObj, fmt.Errorf("error parsing formula: %v", err) } } return jsonObj, nil } // Remove surrounding quotes if present in decoded string decodedStr = strings.Trim(decodedStr, `"`) // Try parsing the decoded string if err = json.Unmarshal([]byte(decodedStr), &jsonObj); err != nil { // Try one more time with manual cleanup decodedStr = strings.ReplaceAll(decodedStr, `" "`, "") if err = json.Unmarshal([]byte(decodedStr), &jsonObj); err != nil { return reqObj, fmt.Errorf("error parsing decoded JSON: %v", err) } } return jsonObj, nil }