excel.go 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324
  1. package kpler
  2. import (
  3. "encoding/json"
  4. "eta/eta_data_analysis/models"
  5. "eta/eta_data_analysis/utils"
  6. "fmt"
  7. "net/url"
  8. "regexp"
  9. "strconv"
  10. "strings"
  11. "github.com/xuri/excelize/v2"
  12. )
  13. // ExcelData represents structured data extracted from an Excel file
  14. type ExcelData struct {
  15. Headers []string
  16. Rows [][]string
  17. SheetName string
  18. }
  19. // ParseExcel reads and parses data from an Excel file
  20. func parseExcel(filePath string) (*ExcelData, error) {
  21. // Open the Excel file
  22. f, err := excelize.OpenFile(filePath)
  23. if err != nil {
  24. return nil, fmt.Errorf("error opening Excel file: %w", err)
  25. }
  26. defer f.Close()
  27. // Get the first sheet by default
  28. sheetName := f.GetSheetList()[0]
  29. // Get all rows from the sheet
  30. rows, err := f.GetRows(sheetName)
  31. if err != nil {
  32. return nil, fmt.Errorf("error reading rows from sheet %s: %w", sheetName, err)
  33. }
  34. // Check if there's data
  35. if len(rows) == 0 {
  36. return nil, fmt.Errorf("no data found in sheet %s", sheetName)
  37. }
  38. // Create structured data
  39. excelData := &ExcelData{
  40. SheetName: sheetName,
  41. Headers: rows[0],
  42. Rows: rows[1:],
  43. }
  44. return excelData, nil
  45. }
  46. // ScanSheetForFormulas scans an entire sheet for formulas
  47. func scanSheetForFormulas(filePath, sheetName string) (map[int]string, error) {
  48. // Open the Excel file
  49. f, err := excelize.OpenFile(filePath)
  50. if err != nil {
  51. return nil, fmt.Errorf("error opening Excel file: %w", err)
  52. }
  53. defer f.Close()
  54. formulas := make(map[int]string)
  55. // Get sheet dimensions
  56. dimension, err := f.GetSheetDimension(sheetName)
  57. if err != nil {
  58. return nil, fmt.Errorf("error getting sheet dimension: %w", err)
  59. }
  60. // Parse dimension to get the range (e.g., "A1:K42")
  61. parts := strings.Split(dimension, ":")
  62. if len(parts) != 2 {
  63. // Use a default range if dimension is not in expected format
  64. parts = []string{"A1", "Z100"}
  65. }
  66. // Extract the column letters and row numbers
  67. startCol, startRow, err := excelize.CellNameToCoordinates(parts[0])
  68. if err != nil {
  69. startCol, startRow = 1, 1
  70. }
  71. endCol, endRow, err := excelize.CellNameToCoordinates(parts[1])
  72. if err != nil {
  73. endCol, endRow = 26, 100 // Default to Z100
  74. }
  75. // Scan cells for formulas
  76. for row := startRow; row <= endRow; row++ {
  77. for col := startCol; col <= endCol; col++ {
  78. colName, err := excelize.ColumnNumberToName(col)
  79. if err != nil {
  80. continue
  81. }
  82. cellCoord := fmt.Sprintf("%s%d", colName, row)
  83. formula, err := f.GetCellFormula(sheetName, cellCoord)
  84. if err == nil && formula != "" {
  85. // Store all formulas or only Kpler-related ones (adjust as needed)
  86. //if strings.Contains(formula, "kpler") || strings.Contains(formula, "GetFlows") {
  87. if strings.Contains(formula, "GETFLOWS") {
  88. // fmt.Println("row: ", row)
  89. // fmt.Println("col: ", col)
  90. // fmt.Println("GetCellFormula: ", formula)
  91. if _, ok := formulas[col-1]; !ok {
  92. formulas[col-1] = formula
  93. }
  94. }
  95. }
  96. }
  97. }
  98. return formulas, nil
  99. }
  100. // ProcessKplerData 解析excel获取指标对应的公式和数据
  101. func ProcessKplerData(filePath string) (indexData []models.KplerExcelIndexData, err error) {
  102. defer func() {
  103. if err != nil {
  104. utils.FileLog.Info(fmt.Sprintf("ProcessKplerData error: %v\n", err))
  105. }
  106. }()
  107. // Open the Excel file
  108. f, err := excelize.OpenFile(filePath)
  109. if err != nil {
  110. return nil, fmt.Errorf("error opening Excel file: %w", err)
  111. }
  112. defer f.Close()
  113. // Get the first sheet by default
  114. data, err := parseExcel(filePath)
  115. if err != nil {
  116. return nil, fmt.Errorf("error parsing Excel file: %w", err)
  117. }
  118. // Look for Kpler formulas
  119. formulas, err := scanSheetForFormulas(filePath, data.SheetName)
  120. if err != nil {
  121. return nil, fmt.Errorf("error scanning for formulas: %v", err)
  122. }
  123. // Initialize maps to store column information
  124. indexMap := make(map[int]*models.KplerExcelIndexData) // Maps column to index data
  125. dateMap := make(map[int]int) // Maps data column to its end date column
  126. // First pass: identify data columns and their corresponding date columns
  127. // Headers are in the third row (index 2)
  128. if len(data.Rows) < 3 {
  129. return nil, fmt.Errorf("Excel file does not have enough rows")
  130. }
  131. headers := data.Rows[1] // Get headers from the second row
  132. for j, header := range headers {
  133. // Skip empty headers
  134. if header == "" {
  135. continue
  136. }
  137. // Check if this is a Period End Date column
  138. if header == "Period End Date" {
  139. // The data column is typically one column before the date
  140. if j > 0 && headers[j-1] != "Date" {
  141. dateMap[j-1] = j // Map the previous column (data) to this date column
  142. }
  143. } else if header != "Date" {
  144. // This is a data column
  145. indexMap[j] = &models.KplerExcelIndexData{
  146. Name: header,
  147. DataPoints: make([]models.KplerDataPoint, 0),
  148. }
  149. // Process formula for this column if it exists
  150. if formula, ok := formulas[j]; ok {
  151. indexMap[j].Request = formula
  152. } else {
  153. // Look backwards for the formula
  154. for k := j; k >= 0; k-- {
  155. if formula, ok := formulas[k]; ok {
  156. indexMap[j].Request = formula
  157. break
  158. }
  159. }
  160. }
  161. }
  162. }
  163. pendingData := make(map[int][]models.DataPoint) // Maps column index to pending data points
  164. for i := 2; i < len(data.Rows); i++ { // Start from row 3 (index 2) for data
  165. row := data.Rows[i]
  166. if len(row) == 0 {
  167. continue
  168. }
  169. for j, cell := range row {
  170. if cell == "" {
  171. continue
  172. }
  173. // Check if this is a date column
  174. if _, exists := indexMap[j]; !exists {
  175. // This might be a date column, check if it's used as a date column
  176. isDateCol := false
  177. for dataCol, dateCol := range dateMap {
  178. if dateCol == j {
  179. isDateCol = true
  180. // This is a date column, update all pending data points for the corresponding data column
  181. if pending, hasPending := pendingData[dataCol]; hasPending {
  182. for _, dp := range pending {
  183. if idx, exists := indexMap[dataCol]; exists {
  184. idx.DataPoints = append(idx.DataPoints, models.KplerDataPoint{
  185. EndDate: cell,
  186. Value: dp.Value,
  187. })
  188. }
  189. }
  190. // Clear pending data for this column
  191. delete(pendingData, dataCol)
  192. }
  193. break
  194. }
  195. }
  196. if isDateCol {
  197. continue
  198. }
  199. }
  200. // Try to convert cell value to float64
  201. _, err := strconv.ParseFloat(cell, 64)
  202. if err != nil {
  203. fmt.Printf("Warning: Error parsing value at row %d, col %d: %v\n", i+1, j+1, err)
  204. continue
  205. }
  206. // Store the data point for later processing when we find the date
  207. if _, exists := indexMap[j]; exists {
  208. if _, hasPending := pendingData[j]; !hasPending {
  209. pendingData[j] = make([]models.DataPoint, 0)
  210. }
  211. pendingData[j] = append(pendingData[j], models.DataPoint{
  212. Value: cell,
  213. Row: i,
  214. })
  215. }
  216. }
  217. }
  218. // Convert map to slice
  219. for _, index := range indexMap {
  220. if len(index.DataPoints) > 0 {
  221. indexData = append(indexData, *index)
  222. }
  223. }
  224. return indexData, nil
  225. }
  226. func ParseSpecificKplerFormulaV2(specificFormula string) (reqObj models.KplerFlowsRequest, err error) {
  227. // Remove Excel string concatenation
  228. specificFormula = strings.ReplaceAll(specificFormula, `" & "`, "")
  229. specificFormula = strings.ReplaceAll(specificFormula, `"&"`, "")
  230. specificFormula = strings.ReplaceAll(specificFormula, `&amp;`, "")
  231. specificFormula = strings.ReplaceAll(specificFormula, `\"`, `"`)
  232. // Get content inside parentheses
  233. re := regexp.MustCompile(`_xldudf_KPLER_GETFLOWS\((.*)\)`)
  234. matches := re.FindStringSubmatch(specificFormula)
  235. if len(matches) < 2 {
  236. // Try the old format
  237. re = regexp.MustCompile(`\((.*)\)`)
  238. matches = re.FindStringSubmatch(specificFormula)
  239. if len(matches) < 2 {
  240. err = fmt.Errorf("没有找到括号里的内容")
  241. return
  242. }
  243. }
  244. // Get the parameter string
  245. encodedParam := matches[1]
  246. // Remove surrounding quotes if present
  247. encodedParam = strings.Trim(encodedParam, `"`)
  248. // Try direct JSON parsing first
  249. var jsonObj models.KplerFlowsRequest
  250. if err = json.Unmarshal([]byte(encodedParam), &jsonObj); err == nil {
  251. return jsonObj, nil
  252. }
  253. // If direct parsing fails, try URL decoding
  254. decodedStr, err := url.QueryUnescape(encodedParam)
  255. if err != nil {
  256. // If URL decoding fails, try removing escapes and parse again
  257. cleanStr := strings.ReplaceAll(encodedParam, `\`, "")
  258. if err = json.Unmarshal([]byte(cleanStr), &jsonObj); err != nil {
  259. // Try one more time with manual concatenation cleanup
  260. cleanStr = strings.ReplaceAll(cleanStr, `" "`, "")
  261. if err = json.Unmarshal([]byte(cleanStr), &jsonObj); err != nil {
  262. return reqObj, fmt.Errorf("error parsing formula: %v", err)
  263. }
  264. }
  265. return jsonObj, nil
  266. }
  267. // Remove surrounding quotes if present in decoded string
  268. decodedStr = strings.Trim(decodedStr, `"`)
  269. // Try parsing the decoded string
  270. if err = json.Unmarshal([]byte(decodedStr), &jsonObj); err != nil {
  271. // Try one more time with manual cleanup
  272. decodedStr = strings.ReplaceAll(decodedStr, `" "`, "")
  273. if err = json.Unmarshal([]byte(decodedStr), &jsonObj); err != nil {
  274. return reqObj, fmt.Errorf("error parsing decoded JSON: %v", err)
  275. }
  276. }
  277. return jsonObj, nil
  278. }