excel.go 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355
  1. package kpler
  2. import (
  3. "encoding/json"
  4. "eta/eta_data_analysis/models"
  5. "eta/eta_data_analysis/utils"
  6. "fmt"
  7. "net/url"
  8. "regexp"
  9. "strings"
  10. "github.com/xuri/excelize/v2"
  11. )
  12. // ExcelData represents structured data extracted from an Excel file
  13. type ExcelData struct {
  14. Headers []string
  15. Rows [][]string
  16. SheetName string
  17. }
  18. // ParseExcel reads and parses data from an Excel file
  19. func parseExcel(filePath string) (*ExcelData, error) {
  20. // Open the Excel file
  21. f, err := excelize.OpenFile(filePath)
  22. if err != nil {
  23. return nil, fmt.Errorf("error opening Excel file: %w", err)
  24. }
  25. defer f.Close()
  26. // Get the first sheet by default
  27. sheetName := f.GetSheetList()[0]
  28. // Get all rows from the sheet
  29. rows, err := f.GetRows(sheetName)
  30. if err != nil {
  31. return nil, fmt.Errorf("error reading rows from sheet %s: %w", sheetName, err)
  32. }
  33. // Check if there's data
  34. if len(rows) == 0 {
  35. return nil, fmt.Errorf("no data found in sheet %s", sheetName)
  36. }
  37. // Create structured data
  38. excelData := &ExcelData{
  39. SheetName: sheetName,
  40. Headers: rows[0],
  41. Rows: rows[1:],
  42. }
  43. return excelData, nil
  44. }
  45. // ScanSheetForFormulas scans an entire sheet for formulas
  46. func scanSheetForFormulas(filePath, sheetName string) (map[int]string, error) {
  47. // Open the Excel file
  48. f, err := excelize.OpenFile(filePath)
  49. if err != nil {
  50. return nil, fmt.Errorf("error opening Excel file: %w", err)
  51. }
  52. defer f.Close()
  53. formulas := make(map[int]string)
  54. // Get sheet dimensions
  55. dimension, err := f.GetSheetDimension(sheetName)
  56. if err != nil {
  57. return nil, fmt.Errorf("error getting sheet dimension: %w", err)
  58. }
  59. // Parse dimension to get the range (e.g., "A1:K42")
  60. parts := strings.Split(dimension, ":")
  61. if len(parts) != 2 {
  62. // Use a default range if dimension is not in expected format
  63. parts = []string{"A1", "Z100"}
  64. }
  65. // Extract the column letters and row numbers
  66. startCol, startRow, err := excelize.CellNameToCoordinates(parts[0])
  67. if err != nil {
  68. startCol, startRow = 1, 1
  69. }
  70. endCol, endRow, err := excelize.CellNameToCoordinates(parts[1])
  71. if err != nil {
  72. endCol, endRow = 26, 100 // Default to Z100
  73. }
  74. // Scan cells for formulas
  75. for row := startRow; row <= endRow; row++ {
  76. for col := startCol; col <= endCol; col++ {
  77. colName, err := excelize.ColumnNumberToName(col)
  78. if err != nil {
  79. continue
  80. }
  81. cellCoord := fmt.Sprintf("%s%d", colName, row)
  82. formula, err := f.GetCellFormula(sheetName, cellCoord)
  83. if err == nil && formula != "" {
  84. // Store all formulas or only Kpler-related ones (adjust as needed)
  85. //if strings.Contains(formula, "kpler") || strings.Contains(formula, "GetFlows") {
  86. if strings.Contains(formula, "GETFLOWS") {
  87. // fmt.Println("row: ", row)
  88. // fmt.Println("col: ", col)
  89. // fmt.Println("GetCellFormula: ", formula)
  90. if _, ok := formulas[col-1]; !ok {
  91. formulas[col-1] = formula
  92. }
  93. }
  94. }
  95. }
  96. }
  97. return formulas, nil
  98. }
  99. // ProcessKplerData 解析excel获取指标对应的公式和数据
  100. func ProcessKplerData(filePath string) (indexData []models.KplerExcelIndexData, err error) {
  101. defer func() {
  102. if err != nil {
  103. utils.FileLog.Info(fmt.Sprintf("ProcessKplerData error: %v\n", err))
  104. }
  105. }()
  106. // Open the Excel file
  107. f, err := excelize.OpenFile(filePath)
  108. if err != nil {
  109. return nil, fmt.Errorf("error opening Excel file: %w", err)
  110. }
  111. defer f.Close()
  112. // Get the first sheet by default
  113. data, err := parseExcel(filePath)
  114. if err != nil {
  115. return nil, fmt.Errorf("error parsing Excel file: %w", err)
  116. }
  117. // Look for Kpler formulas
  118. formulas, err := scanSheetForFormulas(filePath, data.SheetName)
  119. if err != nil {
  120. return nil, fmt.Errorf("error scanning for formulas: %v", err)
  121. }
  122. fmt.Println("Found formulas:", formulas)
  123. // Initialize maps to store column information
  124. indexMap := make(map[int]*models.KplerExcelIndexData) // Maps column to index data
  125. dateColMap := make(map[int][]int) // Maps date column to its data columns
  126. dateValues := make(map[int][]string) // Maps date column to its values
  127. // First pass: identify data columns and their corresponding date columns
  128. // Headers are in the third row (index 2)
  129. if len(data.Rows) < 3 {
  130. return nil, fmt.Errorf("Excel file does not have enough rows")
  131. }
  132. headers := data.Rows[1] // Get headers from the second row
  133. fmt.Println("Headers:", headers)
  134. // First pass: find all date columns
  135. var dateCols []int
  136. for j, header := range headers {
  137. if header == "Period End Date" {
  138. dateCols = append(dateCols, j)
  139. dateValues[j] = make([]string, 0)
  140. dateColMap[j] = make([]int, 0)
  141. }
  142. }
  143. fmt.Println("Date columns:", dateCols)
  144. // Second pass: associate data columns with their nearest date column
  145. for j, header := range headers {
  146. if header == "" || header == "Date" || header == "Period End Date" {
  147. continue
  148. }
  149. // Find the nearest date column after this data column
  150. nearestDateCol := -1
  151. for _, dateCol := range dateCols {
  152. if dateCol > j {
  153. nearestDateCol = dateCol
  154. break
  155. }
  156. }
  157. if nearestDateCol != -1 {
  158. // This is a data column
  159. indexMap[j] = &models.KplerExcelIndexData{
  160. Name: header,
  161. DataPoints: make([]models.KplerDataPoint, 0),
  162. }
  163. // Associate this data column with its date column
  164. dateColMap[nearestDateCol] = append(dateColMap[nearestDateCol], j)
  165. // Process formula for this column if it exists
  166. if formula, ok := formulas[j]; ok {
  167. indexMap[j].Request = formula
  168. } else {
  169. // Look backwards for the formula
  170. for k := j; k >= 0; k-- {
  171. if formula, ok := formulas[k]; ok {
  172. indexMap[j].Request = formula
  173. break
  174. }
  175. }
  176. }
  177. }
  178. }
  179. fmt.Println("Data columns mapping:", dateColMap)
  180. // Create a map to store data values for each column
  181. dataValues := make(map[int][]string)
  182. for j := range indexMap {
  183. dataValues[j] = make([]string, 0)
  184. }
  185. // First pass: collect all values
  186. for i := 2; i < len(data.Rows); i++ {
  187. row := data.Rows[i]
  188. if len(row) == 0 {
  189. continue
  190. }
  191. for j, cell := range row {
  192. if cell == "" {
  193. continue
  194. }
  195. // If this is a date column, store its values
  196. if _, exists := dateValues[j]; exists {
  197. dateValues[j] = append(dateValues[j], cell)
  198. continue
  199. }
  200. // If this is a data column, store its values
  201. if _, exists := indexMap[j]; exists {
  202. dataValues[j] = append(dataValues[j], cell)
  203. }
  204. }
  205. }
  206. fmt.Println("Date values:", dateValues)
  207. fmt.Println("Data values:", dataValues)
  208. // Second pass: combine data and dates
  209. for dateCol, dataCols := range dateColMap {
  210. dates := dateValues[dateCol]
  211. if len(dates) == 0 {
  212. fmt.Printf("No dates found for date column %d\n", dateCol)
  213. continue
  214. }
  215. fmt.Printf("Processing date column %d with data columns %v\n", dateCol, dataCols)
  216. // Process each data column associated with this date column
  217. for _, dataCol := range dataCols {
  218. if idx, exists := indexMap[dataCol]; exists {
  219. values := dataValues[dataCol]
  220. fmt.Printf("Column %d (%s): %d dates, %d values\n", dataCol, idx.Name, len(dates), len(values))
  221. // Use the shorter length to avoid index out of range
  222. length := len(dates)
  223. if len(values) < length {
  224. length = len(values)
  225. }
  226. // Combine data and dates
  227. for i := 0; i < length; i++ {
  228. idx.DataPoints = append(idx.DataPoints, models.KplerDataPoint{
  229. EndDate: dates[i],
  230. Value: values[i],
  231. })
  232. }
  233. fmt.Printf("Added %d data points for column %s\n", length, idx.Name)
  234. }
  235. }
  236. }
  237. // Convert map to slice
  238. for _, index := range indexMap {
  239. if len(index.DataPoints) > 0 {
  240. indexData = append(indexData, *index)
  241. }
  242. }
  243. return indexData, nil
  244. }
  245. func ParseSpecificKplerFormulaV2(specificFormula string) (reqObj models.KplerFlowsRequest, err error) {
  246. // Remove Excel string concatenation
  247. specificFormula = strings.ReplaceAll(specificFormula, `" & "`, "")
  248. specificFormula = strings.ReplaceAll(specificFormula, `"&"`, "")
  249. specificFormula = strings.ReplaceAll(specificFormula, `&amp;`, "")
  250. specificFormula = strings.ReplaceAll(specificFormula, `\"`, `"`)
  251. // Get content inside parentheses
  252. re := regexp.MustCompile(`_xldudf_KPLER_GETFLOWS\((.*)\)`)
  253. matches := re.FindStringSubmatch(specificFormula)
  254. if len(matches) < 2 {
  255. // Try the old format
  256. re = regexp.MustCompile(`\((.*)\)`)
  257. matches = re.FindStringSubmatch(specificFormula)
  258. if len(matches) < 2 {
  259. err = fmt.Errorf("没有找到括号里的内容")
  260. return
  261. }
  262. }
  263. // Get the parameter string
  264. encodedParam := matches[1]
  265. // Remove surrounding quotes if present
  266. encodedParam = strings.Trim(encodedParam, `"`)
  267. // Try direct JSON parsing first
  268. var jsonObj models.KplerFlowsRequest
  269. if err = json.Unmarshal([]byte(encodedParam), &jsonObj); err == nil {
  270. return jsonObj, nil
  271. }
  272. // If direct parsing fails, try URL decoding
  273. decodedStr, err := url.QueryUnescape(encodedParam)
  274. if err != nil {
  275. // If URL decoding fails, try removing escapes and parse again
  276. cleanStr := strings.ReplaceAll(encodedParam, `\`, "")
  277. if err = json.Unmarshal([]byte(cleanStr), &jsonObj); err != nil {
  278. // Try one more time with manual concatenation cleanup
  279. cleanStr = strings.ReplaceAll(cleanStr, `" "`, "")
  280. if err = json.Unmarshal([]byte(cleanStr), &jsonObj); err != nil {
  281. return reqObj, fmt.Errorf("error parsing formula: %v", err)
  282. }
  283. }
  284. return jsonObj, nil
  285. }
  286. // Remove surrounding quotes if present in decoded string
  287. decodedStr = strings.Trim(decodedStr, `"`)
  288. // Try parsing the decoded string
  289. if err = json.Unmarshal([]byte(decodedStr), &jsonObj); err != nil {
  290. // Try one more time with manual cleanup
  291. decodedStr = strings.ReplaceAll(decodedStr, `" "`, "")
  292. if err = json.Unmarshal([]byte(decodedStr), &jsonObj); err != nil {
  293. return reqObj, fmt.Errorf("error parsing decoded JSON: %v", err)
  294. }
  295. }
  296. return jsonObj, nil
  297. }