123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324 |
- package kpler
- import (
- "encoding/json"
- "eta/eta_data_analysis/models"
- "eta/eta_data_analysis/utils"
- "fmt"
- "net/url"
- "regexp"
- "strconv"
- "strings"
- "github.com/xuri/excelize/v2"
- )
- // ExcelData represents structured data extracted from an Excel file
- type ExcelData struct {
- Headers []string
- Rows [][]string
- SheetName string
- }
- // ParseExcel reads and parses data from an Excel file
- func parseExcel(filePath string) (*ExcelData, error) {
- // Open the Excel file
- f, err := excelize.OpenFile(filePath)
- if err != nil {
- return nil, fmt.Errorf("error opening Excel file: %w", err)
- }
- defer f.Close()
- // Get the first sheet by default
- sheetName := f.GetSheetList()[0]
-
- // Get all rows from the sheet
- rows, err := f.GetRows(sheetName)
- if err != nil {
- return nil, fmt.Errorf("error reading rows from sheet %s: %w", sheetName, err)
- }
- // Check if there's data
- if len(rows) == 0 {
- return nil, fmt.Errorf("no data found in sheet %s", sheetName)
- }
- // Create structured data
- excelData := &ExcelData{
- SheetName: sheetName,
- Headers: rows[0],
- Rows: rows[1:],
- }
- return excelData, nil
- }
- // ScanSheetForFormulas scans an entire sheet for formulas
- func scanSheetForFormulas(filePath, sheetName string) (map[int]string, error) {
- // Open the Excel file
- f, err := excelize.OpenFile(filePath)
- if err != nil {
- return nil, fmt.Errorf("error opening Excel file: %w", err)
- }
- defer f.Close()
- formulas := make(map[int]string)
-
- // Get sheet dimensions
- dimension, err := f.GetSheetDimension(sheetName)
- if err != nil {
- return nil, fmt.Errorf("error getting sheet dimension: %w", err)
- }
-
- // Parse dimension to get the range (e.g., "A1:K42")
- parts := strings.Split(dimension, ":")
- if len(parts) != 2 {
- // Use a default range if dimension is not in expected format
- parts = []string{"A1", "Z100"}
- }
-
- // Extract the column letters and row numbers
- startCol, startRow, err := excelize.CellNameToCoordinates(parts[0])
- if err != nil {
- startCol, startRow = 1, 1
- }
-
- endCol, endRow, err := excelize.CellNameToCoordinates(parts[1])
- if err != nil {
- endCol, endRow = 26, 100 // Default to Z100
- }
-
- // Scan cells for formulas
- for row := startRow; row <= endRow; row++ {
- for col := startCol; col <= endCol; col++ {
- colName, err := excelize.ColumnNumberToName(col)
- if err != nil {
- continue
- }
-
- cellCoord := fmt.Sprintf("%s%d", colName, row)
- formula, err := f.GetCellFormula(sheetName, cellCoord)
-
- if err == nil && formula != "" {
- // Store all formulas or only Kpler-related ones (adjust as needed)
- //if strings.Contains(formula, "kpler") || strings.Contains(formula, "GetFlows") {
- if strings.Contains(formula, "GETFLOWS") {
- // fmt.Println("row: ", row)
- // fmt.Println("col: ", col)
- // fmt.Println("GetCellFormula: ", formula)
- if _, ok := formulas[col-1]; !ok {
- formulas[col-1] = formula
- }
- }
- }
- }
- }
-
- return formulas, nil
- }
- // ProcessKplerData 解析excel获取指标对应的公式和数据
- func ProcessKplerData(filePath string) (indexData []models.KplerExcelIndexData, err error) {
- defer func() {
- if err != nil {
- utils.FileLog.Info(fmt.Sprintf("ProcessKplerData error: %v\n", err))
- }
- }()
- // Open the Excel file
- f, err := excelize.OpenFile(filePath)
- if err != nil {
- return nil, fmt.Errorf("error opening Excel file: %w", err)
- }
- defer f.Close()
- // Get the first sheet by default
- data, err := parseExcel(filePath)
- if err != nil {
- return nil, fmt.Errorf("error parsing Excel file: %w", err)
- }
- // Look for Kpler formulas
- formulas, err := scanSheetForFormulas(filePath, data.SheetName)
- if err != nil {
- return nil, fmt.Errorf("error scanning for formulas: %v", err)
- }
- // Initialize maps to store column information
- indexMap := make(map[int]*models.KplerExcelIndexData) // Maps column to index data
- dateMap := make(map[int]int) // Maps data column to its end date column
- // First pass: identify data columns and their corresponding date columns
- // Headers are in the third row (index 2)
- if len(data.Rows) < 3 {
- return nil, fmt.Errorf("Excel file does not have enough rows")
- }
- headers := data.Rows[1] // Get headers from the second row
- for j, header := range headers {
- // Skip empty headers
- if header == "" {
- continue
- }
- // Check if this is a Period End Date column
- if header == "Period End Date" {
- // The data column is typically one column before the date
- if j > 0 && headers[j-1] != "Date" {
- dateMap[j-1] = j // Map the previous column (data) to this date column
- }
- } else if header != "Date" {
- // This is a data column
- indexMap[j] = &models.KplerExcelIndexData{
- Name: header,
- DataPoints: make([]models.KplerDataPoint, 0),
- }
- // Process formula for this column if it exists
- if formula, ok := formulas[j]; ok {
- indexMap[j].Request = formula
- } else {
- // Look backwards for the formula
- for k := j; k >= 0; k-- {
- if formula, ok := formulas[k]; ok {
- indexMap[j].Request = formula
- break
- }
- }
- }
- }
- }
-
- pendingData := make(map[int][]models.DataPoint) // Maps column index to pending data points
- for i := 2; i < len(data.Rows); i++ { // Start from row 3 (index 2) for data
- row := data.Rows[i]
- if len(row) == 0 {
- continue
- }
- for j, cell := range row {
- if cell == "" {
- continue
- }
- // Check if this is a date column
- if _, exists := indexMap[j]; !exists {
- // This might be a date column, check if it's used as a date column
- isDateCol := false
- for dataCol, dateCol := range dateMap {
- if dateCol == j {
- isDateCol = true
- // This is a date column, update all pending data points for the corresponding data column
- if pending, hasPending := pendingData[dataCol]; hasPending {
- for _, dp := range pending {
- if idx, exists := indexMap[dataCol]; exists {
- idx.DataPoints = append(idx.DataPoints, models.KplerDataPoint{
- EndDate: cell,
- Value: dp.Value,
- })
- }
- }
- // Clear pending data for this column
- delete(pendingData, dataCol)
- }
- break
- }
- }
- if isDateCol {
- continue
- }
- }
- // Try to convert cell value to float64
- _, err := strconv.ParseFloat(cell, 64)
- if err != nil {
- fmt.Printf("Warning: Error parsing value at row %d, col %d: %v\n", i+1, j+1, err)
- continue
- }
- // Store the data point for later processing when we find the date
- if _, exists := indexMap[j]; exists {
- if _, hasPending := pendingData[j]; !hasPending {
- pendingData[j] = make([]models.DataPoint, 0)
- }
- pendingData[j] = append(pendingData[j], models.DataPoint{
- Value: cell,
- Row: i,
- })
- }
- }
- }
- // Convert map to slice
- for _, index := range indexMap {
- if len(index.DataPoints) > 0 {
- indexData = append(indexData, *index)
- }
- }
- return indexData, nil
- }
- func ParseSpecificKplerFormulaV2(specificFormula string) (reqObj models.KplerFlowsRequest, err error) {
- // Remove Excel string concatenation
- specificFormula = strings.ReplaceAll(specificFormula, `" & "`, "")
- specificFormula = strings.ReplaceAll(specificFormula, `"&"`, "")
- specificFormula = strings.ReplaceAll(specificFormula, `&`, "")
- specificFormula = strings.ReplaceAll(specificFormula, `\"`, `"`)
- // Get content inside parentheses
- re := regexp.MustCompile(`_xldudf_KPLER_GETFLOWS\((.*)\)`)
- matches := re.FindStringSubmatch(specificFormula)
- if len(matches) < 2 {
- // Try the old format
- re = regexp.MustCompile(`\((.*)\)`)
- matches = re.FindStringSubmatch(specificFormula)
- if len(matches) < 2 {
- err = fmt.Errorf("没有找到括号里的内容")
- return
- }
- }
- // Get the parameter string
- encodedParam := matches[1]
- // Remove surrounding quotes if present
- encodedParam = strings.Trim(encodedParam, `"`)
-
- // Try direct JSON parsing first
- var jsonObj models.KplerFlowsRequest
- if err = json.Unmarshal([]byte(encodedParam), &jsonObj); err == nil {
- return jsonObj, nil
- }
- // If direct parsing fails, try URL decoding
- decodedStr, err := url.QueryUnescape(encodedParam)
- if err != nil {
- // If URL decoding fails, try removing escapes and parse again
- cleanStr := strings.ReplaceAll(encodedParam, `\`, "")
- if err = json.Unmarshal([]byte(cleanStr), &jsonObj); err != nil {
- // Try one more time with manual concatenation cleanup
- cleanStr = strings.ReplaceAll(cleanStr, `" "`, "")
- if err = json.Unmarshal([]byte(cleanStr), &jsonObj); err != nil {
- return reqObj, fmt.Errorf("error parsing formula: %v", err)
- }
- }
- return jsonObj, nil
- }
- // Remove surrounding quotes if present in decoded string
- decodedStr = strings.Trim(decodedStr, `"`)
-
- // Try parsing the decoded string
- if err = json.Unmarshal([]byte(decodedStr), &jsonObj); err != nil {
- // Try one more time with manual cleanup
- decodedStr = strings.ReplaceAll(decodedStr, `" "`, "")
- if err = json.Unmarshal([]byte(decodedStr), &jsonObj); err != nil {
- return reqObj, fmt.Errorf("error parsing decoded JSON: %v", err)
- }
- }
- return jsonObj, nil
- }
|