123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383 |
- package kpler
- import (
- "encoding/json"
- "eta/eta_data_analysis/models"
- "eta/eta_data_analysis/utils"
- "fmt"
- "net/url"
- "regexp"
- "strings"
- "time"
- "github.com/xuri/excelize/v2"
- )
- // ExcelData represents structured data extracted from an Excel file
- type ExcelData struct {
- Headers []string
- Rows [][]string
- SheetName string
- }
- // ParseExcel reads and parses data from an Excel file
- func parseExcel(filePath string) (*ExcelData, error) {
- // Open the Excel file
- f, err := excelize.OpenFile(filePath)
- if err != nil {
- return nil, fmt.Errorf("error opening Excel file: %w", err)
- }
- defer f.Close()
- // Get the first sheet by default
- sheetName := f.GetSheetList()[0]
-
- // Get all rows from the sheet
- rows, err := f.GetRows(sheetName)
- if err != nil {
- return nil, fmt.Errorf("error reading rows from sheet %s: %w", sheetName, err)
- }
- // Check if there's data
- if len(rows) == 0 {
- return nil, fmt.Errorf("no data found in sheet %s", sheetName)
- }
- // Create structured data
- excelData := &ExcelData{
- SheetName: sheetName,
- Headers: rows[0],
- Rows: rows[1:],
- }
- return excelData, nil
- }
- // ScanSheetForFormulas scans an entire sheet for formulas
- func scanSheetForFormulas(filePath, sheetName string) (map[int]string, error) {
- // Open the Excel file
- f, err := excelize.OpenFile(filePath)
- if err != nil {
- return nil, fmt.Errorf("error opening Excel file: %w", err)
- }
- defer f.Close()
- formulas := make(map[int]string)
-
- // Get sheet dimensions
- dimension, err := f.GetSheetDimension(sheetName)
- if err != nil {
- return nil, fmt.Errorf("error getting sheet dimension: %w", err)
- }
-
- // Parse dimension to get the range (e.g., "A1:K42")
- parts := strings.Split(dimension, ":")
- if len(parts) != 2 {
- // Use a default range if dimension is not in expected format
- parts = []string{"A1", "Z100"}
- }
-
- // Extract the column letters and row numbers
- startCol, startRow, err := excelize.CellNameToCoordinates(parts[0])
- if err != nil {
- startCol, startRow = 1, 1
- }
-
- endCol, endRow, err := excelize.CellNameToCoordinates(parts[1])
- if err != nil {
- endCol, endRow = 26, 100 // Default to Z100
- }
-
- // Scan cells for formulas
- for row := startRow; row <= endRow; row++ {
- for col := startCol; col <= endCol; col++ {
- colName, err := excelize.ColumnNumberToName(col)
- if err != nil {
- continue
- }
-
- cellCoord := fmt.Sprintf("%s%d", colName, row)
- formula, err := f.GetCellFormula(sheetName, cellCoord)
-
- if err == nil && formula != "" {
- // Store all formulas or only Kpler-related ones (adjust as needed)
- //if strings.Contains(formula, "kpler") || strings.Contains(formula, "GetFlows") {
- if strings.Contains(formula, "GETFLOWS") {
- // fmt.Println("row: ", row)
- // fmt.Println("col: ", col)
- // fmt.Println("GetCellFormula: ", formula)
- if _, ok := formulas[col-1]; !ok {
- formulas[col-1] = formula
- }
- }
- }
- }
- }
-
- return formulas, nil
- }
- // ProcessKplerData 解析excel获取指标对应的公式和数据
- func ProcessKplerData(filePath string) (indexData []models.KplerExcelIndexData, err error) {
- defer func() {
- if err != nil {
- utils.FileLog.Info(fmt.Sprintf("ProcessKplerData error: %v\n", err))
- }
- }()
- // Open the Excel file
- f, err := excelize.OpenFile(filePath)
- if err != nil {
- return nil, fmt.Errorf("error opening Excel file: %w", err)
- }
- defer f.Close()
- // Get the first sheet by default
- data, err := parseExcel(filePath)
- if err != nil {
- return nil, fmt.Errorf("error parsing Excel file: %w", err)
- }
- // Look for Kpler formulas
- formulas, err := scanSheetForFormulas(filePath, data.SheetName)
- if err != nil {
- return nil, fmt.Errorf("error scanning for formulas: %v", err)
- }
- fmt.Println("Found formulas:", formulas)
- // Initialize maps to store column information
- indexMap := make(map[int]*models.KplerExcelIndexData) // Maps column to index data
- dateColMap := make(map[int][]int) // Maps date column to its data columns
- dateValues := make(map[int][]string) // Maps date column to its values
- // First pass: identify data columns and their corresponding date columns
- // Headers are in the third row (index 2)
- if len(data.Rows) < 3 {
- return nil, fmt.Errorf("Excel file does not have enough rows")
- }
- titles := data.Headers
- titleMap := make(map[int]string)
- for j, title := range titles {
- titleMap[j] = title
- }
- headers := data.Rows[1] // Get headers from the second row
- fmt.Println("Headers:", headers)
- // First pass: find all date columns
- var dateCols []int
- for j, header := range headers {
- if header == "Period End Date" {
- dateCols = append(dateCols, j)
- dateValues[j] = make([]string, 0)
- dateColMap[j] = make([]int, 0)
- }
- }
- fmt.Println("Date columns:", dateCols)
- // Second pass: associate data columns with their nearest date column
- for j, header := range headers {
- if header == "" || header == "Date" || header == "Period End Date" {
- continue
- }
- // Find the nearest date column after this data column
- nearestDateCol := -1
- for _, dateCol := range dateCols {
- if dateCol > j {
- nearestDateCol = dateCol
- break
- }
- }
- if nearestDateCol != -1 {
- // This is a data column
- indexMap[j] = &models.KplerExcelIndexData{
- Name: header,
- DataPoints: make([]models.KplerDataPoint, 0),
- }
- // Associate this data column with its date column
- dateColMap[nearestDateCol] = append(dateColMap[nearestDateCol], j)
- // Process formula for this column if it exists
- if formula, ok := formulas[j]; ok {
- indexMap[j].Request = formula
- } else {
- // Look backwards for the formula
- for k := j; k >= 0; k-- {
- if formula, ok := formulas[k]; ok {
- indexMap[j].Request = formula
- break
- }
- }
- }
- // 获取标题
-
- title, ok := titleMap[j]
- if ok && title != "" {
- indexMap[j].Title = title
- }else{
- // Look backwards for the formula
- for k := j; k >= 0; k-- {
- title, ok := titleMap[k]
- if ok && title != "" {
- indexMap[j].Title = title
- break
- }
- }
- }
- }
- }
- fmt.Println("Data columns mapping:", dateColMap)
- // Create a map to store data values for each column
- dataValues := make(map[int][]string)
- for j := range indexMap {
- dataValues[j] = make([]string, 0)
- }
- // First pass: collect all values
- for i := 2; i < len(data.Rows); i++ {
- row := data.Rows[i]
- if len(row) == 0 {
- continue
- }
- for j, cell := range row {
- if cell == "" {
- continue
- }
- // If this is a date column, store its values
- if _, exists := dateValues[j]; exists {
- // 对日期进行格式化
- dateFormat := "01-02-06"
- date, err := time.Parse(dateFormat, cell)
- if err != nil {
- fmt.Println("Error parsing date:", err)
- continue
- }
- dateValues[j] = append(dateValues[j], date.Format(utils.FormatDate))
- continue
- }
- // If this is a data column, store its values
- if _, exists := indexMap[j]; exists {
- dataValues[j] = append(dataValues[j], cell)
- }
- }
- }
- fmt.Println("Date values:", dateValues)
- fmt.Println("Data values:", dataValues)
- // Second pass: combine data and dates
- for dateCol, dataCols := range dateColMap {
- dates := dateValues[dateCol]
- if len(dates) == 0 {
- fmt.Printf("No dates found for date column %d\n", dateCol)
- continue
- }
- fmt.Printf("Processing date column %d with data columns %v\n", dateCol, dataCols)
- // Process each data column associated with this date column
- for _, dataCol := range dataCols {
- if idx, exists := indexMap[dataCol]; exists {
- values := dataValues[dataCol]
-
- fmt.Printf("Column %d (%s): %d dates, %d values\n", dataCol, idx.Name, len(dates), len(values))
- // Use the shorter length to avoid index out of range
- length := len(dates)
- if len(values) < length {
- length = len(values)
- }
- // Combine data and dates
- for i := 0; i < length; i++ {
- idx.DataPoints = append(idx.DataPoints, models.KplerDataPoint{
- EndDate: dates[i],
- Value: values[i],
- })
- }
- fmt.Printf("Added %d data points for column %s\n", length, idx.Name)
- }
- }
- }
- // Convert map to slice
- for _, index := range indexMap {
- if len(index.DataPoints) > 0 {
- indexData = append(indexData, *index)
- }
- }
- return indexData, nil
- }
- func ParseSpecificKplerFormulaV2(specificFormula string) (reqObj models.KplerFlowsRequest, err error) {
- // Remove Excel string concatenation
- specificFormula = strings.ReplaceAll(specificFormula, `" & "`, "")
- specificFormula = strings.ReplaceAll(specificFormula, `"&"`, "")
- specificFormula = strings.ReplaceAll(specificFormula, `&`, "")
- specificFormula = strings.ReplaceAll(specificFormula, `\"`, `"`)
- // Get content inside parentheses
- re := regexp.MustCompile(`_xldudf_KPLER_GETFLOWS\((.*)\)`)
- matches := re.FindStringSubmatch(specificFormula)
- if len(matches) < 2 {
- // Try the old format
- re = regexp.MustCompile(`\((.*)\)`)
- matches = re.FindStringSubmatch(specificFormula)
- if len(matches) < 2 {
- err = fmt.Errorf("没有找到括号里的内容")
- return
- }
- }
- // Get the parameter string
- encodedParam := matches[1]
- // Remove surrounding quotes if present
- encodedParam = strings.Trim(encodedParam, `"`)
-
- // Try direct JSON parsing first
- var jsonObj models.KplerFlowsRequest
- if err = json.Unmarshal([]byte(encodedParam), &jsonObj); err == nil {
- return jsonObj, nil
- }
- // If direct parsing fails, try URL decoding
- decodedStr, err := url.QueryUnescape(encodedParam)
- if err != nil {
- // If URL decoding fails, try removing escapes and parse again
- cleanStr := strings.ReplaceAll(encodedParam, `\`, "")
- if err = json.Unmarshal([]byte(cleanStr), &jsonObj); err != nil {
- // Try one more time with manual concatenation cleanup
- cleanStr = strings.ReplaceAll(cleanStr, `" "`, "")
- if err = json.Unmarshal([]byte(cleanStr), &jsonObj); err != nil {
- return reqObj, fmt.Errorf("error parsing formula: %v", err)
- }
- }
- return jsonObj, nil
- }
- // Remove surrounding quotes if present in decoded string
- decodedStr = strings.Trim(decodedStr, `"`)
-
- // Try parsing the decoded string
- if err = json.Unmarshal([]byte(decodedStr), &jsonObj); err != nil {
- // Try one more time with manual cleanup
- decodedStr = strings.ReplaceAll(decodedStr, `" "`, "")
- if err = json.Unmarshal([]byte(decodedStr), &jsonObj); err != nil {
- return reqObj, fmt.Errorf("error parsing decoded JSON: %v", err)
- }
- }
- return jsonObj, nil
- }
|