excel.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383
  1. package kpler
  2. import (
  3. "encoding/json"
  4. "eta/eta_data_analysis/models"
  5. "eta/eta_data_analysis/utils"
  6. "fmt"
  7. "net/url"
  8. "regexp"
  9. "strings"
  10. "time"
  11. "github.com/xuri/excelize/v2"
  12. )
  13. // ExcelData represents structured data extracted from an Excel file
  14. type ExcelData struct {
  15. Headers []string
  16. Rows [][]string
  17. SheetName string
  18. }
  19. // ParseExcel reads and parses data from an Excel file
  20. func parseExcel(filePath string) (*ExcelData, error) {
  21. // Open the Excel file
  22. f, err := excelize.OpenFile(filePath)
  23. if err != nil {
  24. return nil, fmt.Errorf("error opening Excel file: %w", err)
  25. }
  26. defer f.Close()
  27. // Get the first sheet by default
  28. sheetName := f.GetSheetList()[0]
  29. // Get all rows from the sheet
  30. rows, err := f.GetRows(sheetName)
  31. if err != nil {
  32. return nil, fmt.Errorf("error reading rows from sheet %s: %w", sheetName, err)
  33. }
  34. // Check if there's data
  35. if len(rows) == 0 {
  36. return nil, fmt.Errorf("no data found in sheet %s", sheetName)
  37. }
  38. // Create structured data
  39. excelData := &ExcelData{
  40. SheetName: sheetName,
  41. Headers: rows[0],
  42. Rows: rows[1:],
  43. }
  44. return excelData, nil
  45. }
  46. // ScanSheetForFormulas scans an entire sheet for formulas
  47. func scanSheetForFormulas(filePath, sheetName string) (map[int]string, error) {
  48. // Open the Excel file
  49. f, err := excelize.OpenFile(filePath)
  50. if err != nil {
  51. return nil, fmt.Errorf("error opening Excel file: %w", err)
  52. }
  53. defer f.Close()
  54. formulas := make(map[int]string)
  55. // Get sheet dimensions
  56. dimension, err := f.GetSheetDimension(sheetName)
  57. if err != nil {
  58. return nil, fmt.Errorf("error getting sheet dimension: %w", err)
  59. }
  60. // Parse dimension to get the range (e.g., "A1:K42")
  61. parts := strings.Split(dimension, ":")
  62. if len(parts) != 2 {
  63. // Use a default range if dimension is not in expected format
  64. parts = []string{"A1", "Z100"}
  65. }
  66. // Extract the column letters and row numbers
  67. startCol, startRow, err := excelize.CellNameToCoordinates(parts[0])
  68. if err != nil {
  69. startCol, startRow = 1, 1
  70. }
  71. endCol, endRow, err := excelize.CellNameToCoordinates(parts[1])
  72. if err != nil {
  73. endCol, endRow = 26, 100 // Default to Z100
  74. }
  75. // Scan cells for formulas
  76. for row := startRow; row <= endRow; row++ {
  77. for col := startCol; col <= endCol; col++ {
  78. colName, err := excelize.ColumnNumberToName(col)
  79. if err != nil {
  80. continue
  81. }
  82. cellCoord := fmt.Sprintf("%s%d", colName, row)
  83. formula, err := f.GetCellFormula(sheetName, cellCoord)
  84. if err == nil && formula != "" {
  85. // Store all formulas or only Kpler-related ones (adjust as needed)
  86. //if strings.Contains(formula, "kpler") || strings.Contains(formula, "GetFlows") {
  87. if strings.Contains(formula, "GETFLOWS") {
  88. // fmt.Println("row: ", row)
  89. // fmt.Println("col: ", col)
  90. // fmt.Println("GetCellFormula: ", formula)
  91. if _, ok := formulas[col-1]; !ok {
  92. formulas[col-1] = formula
  93. }
  94. }
  95. }
  96. }
  97. }
  98. return formulas, nil
  99. }
  100. // ProcessKplerData 解析excel获取指标对应的公式和数据
  101. func ProcessKplerData(filePath string) (indexData []models.KplerExcelIndexData, err error) {
  102. defer func() {
  103. if err != nil {
  104. utils.FileLog.Info(fmt.Sprintf("ProcessKplerData error: %v\n", err))
  105. }
  106. }()
  107. // Open the Excel file
  108. f, err := excelize.OpenFile(filePath)
  109. if err != nil {
  110. return nil, fmt.Errorf("error opening Excel file: %w", err)
  111. }
  112. defer f.Close()
  113. // Get the first sheet by default
  114. data, err := parseExcel(filePath)
  115. if err != nil {
  116. return nil, fmt.Errorf("error parsing Excel file: %w", err)
  117. }
  118. // Look for Kpler formulas
  119. formulas, err := scanSheetForFormulas(filePath, data.SheetName)
  120. if err != nil {
  121. return nil, fmt.Errorf("error scanning for formulas: %v", err)
  122. }
  123. fmt.Println("Found formulas:", formulas)
  124. // Initialize maps to store column information
  125. indexMap := make(map[int]*models.KplerExcelIndexData) // Maps column to index data
  126. dateColMap := make(map[int][]int) // Maps date column to its data columns
  127. dateValues := make(map[int][]string) // Maps date column to its values
  128. // First pass: identify data columns and their corresponding date columns
  129. // Headers are in the third row (index 2)
  130. if len(data.Rows) < 3 {
  131. return nil, fmt.Errorf("Excel file does not have enough rows")
  132. }
  133. titles := data.Headers
  134. titleMap := make(map[int]string)
  135. for j, title := range titles {
  136. titleMap[j] = title
  137. }
  138. headers := data.Rows[1] // Get headers from the second row
  139. fmt.Println("Headers:", headers)
  140. // First pass: find all date columns
  141. var dateCols []int
  142. for j, header := range headers {
  143. if header == "Period End Date" {
  144. dateCols = append(dateCols, j)
  145. dateValues[j] = make([]string, 0)
  146. dateColMap[j] = make([]int, 0)
  147. }
  148. }
  149. fmt.Println("Date columns:", dateCols)
  150. // Second pass: associate data columns with their nearest date column
  151. for j, header := range headers {
  152. if header == "" || header == "Date" || header == "Period End Date" {
  153. continue
  154. }
  155. // Find the nearest date column after this data column
  156. nearestDateCol := -1
  157. for _, dateCol := range dateCols {
  158. if dateCol > j {
  159. nearestDateCol = dateCol
  160. break
  161. }
  162. }
  163. if nearestDateCol != -1 {
  164. // This is a data column
  165. indexMap[j] = &models.KplerExcelIndexData{
  166. Name: header,
  167. DataPoints: make([]models.KplerDataPoint, 0),
  168. }
  169. // Associate this data column with its date column
  170. dateColMap[nearestDateCol] = append(dateColMap[nearestDateCol], j)
  171. // Process formula for this column if it exists
  172. if formula, ok := formulas[j]; ok {
  173. indexMap[j].Request = formula
  174. } else {
  175. // Look backwards for the formula
  176. for k := j; k >= 0; k-- {
  177. if formula, ok := formulas[k]; ok {
  178. indexMap[j].Request = formula
  179. break
  180. }
  181. }
  182. }
  183. // 获取标题
  184. title, ok := titleMap[j]
  185. if ok && title != "" {
  186. indexMap[j].Title = title
  187. }else{
  188. // Look backwards for the formula
  189. for k := j; k >= 0; k-- {
  190. title, ok := titleMap[k]
  191. if ok && title != "" {
  192. indexMap[j].Title = title
  193. break
  194. }
  195. }
  196. }
  197. }
  198. }
  199. fmt.Println("Data columns mapping:", dateColMap)
  200. // Create a map to store data values for each column
  201. dataValues := make(map[int][]string)
  202. for j := range indexMap {
  203. dataValues[j] = make([]string, 0)
  204. }
  205. // First pass: collect all values
  206. for i := 2; i < len(data.Rows); i++ {
  207. row := data.Rows[i]
  208. if len(row) == 0 {
  209. continue
  210. }
  211. for j, cell := range row {
  212. if cell == "" {
  213. continue
  214. }
  215. // If this is a date column, store its values
  216. if _, exists := dateValues[j]; exists {
  217. // 对日期进行格式化
  218. dateFormat := "01-02-06"
  219. date, err := time.Parse(dateFormat, cell)
  220. if err != nil {
  221. fmt.Println("Error parsing date:", err)
  222. continue
  223. }
  224. dateValues[j] = append(dateValues[j], date.Format(utils.FormatDate))
  225. continue
  226. }
  227. // If this is a data column, store its values
  228. if _, exists := indexMap[j]; exists {
  229. dataValues[j] = append(dataValues[j], cell)
  230. }
  231. }
  232. }
  233. fmt.Println("Date values:", dateValues)
  234. fmt.Println("Data values:", dataValues)
  235. // Second pass: combine data and dates
  236. for dateCol, dataCols := range dateColMap {
  237. dates := dateValues[dateCol]
  238. if len(dates) == 0 {
  239. fmt.Printf("No dates found for date column %d\n", dateCol)
  240. continue
  241. }
  242. fmt.Printf("Processing date column %d with data columns %v\n", dateCol, dataCols)
  243. // Process each data column associated with this date column
  244. for _, dataCol := range dataCols {
  245. if idx, exists := indexMap[dataCol]; exists {
  246. values := dataValues[dataCol]
  247. fmt.Printf("Column %d (%s): %d dates, %d values\n", dataCol, idx.Name, len(dates), len(values))
  248. // Use the shorter length to avoid index out of range
  249. length := len(dates)
  250. if len(values) < length {
  251. length = len(values)
  252. }
  253. // Combine data and dates
  254. for i := 0; i < length; i++ {
  255. idx.DataPoints = append(idx.DataPoints, models.KplerDataPoint{
  256. EndDate: dates[i],
  257. Value: values[i],
  258. })
  259. }
  260. fmt.Printf("Added %d data points for column %s\n", length, idx.Name)
  261. }
  262. }
  263. }
  264. // Convert map to slice
  265. for _, index := range indexMap {
  266. if len(index.DataPoints) > 0 {
  267. indexData = append(indexData, *index)
  268. }
  269. }
  270. return indexData, nil
  271. }
  272. func ParseSpecificKplerFormulaV2(specificFormula string) (reqObj models.KplerFlowsRequest, err error) {
  273. // Remove Excel string concatenation
  274. specificFormula = strings.ReplaceAll(specificFormula, `" & "`, "")
  275. specificFormula = strings.ReplaceAll(specificFormula, `"&"`, "")
  276. specificFormula = strings.ReplaceAll(specificFormula, `&amp;`, "")
  277. specificFormula = strings.ReplaceAll(specificFormula, `\"`, `"`)
  278. // Get content inside parentheses
  279. re := regexp.MustCompile(`_xldudf_KPLER_GETFLOWS\((.*)\)`)
  280. matches := re.FindStringSubmatch(specificFormula)
  281. if len(matches) < 2 {
  282. // Try the old format
  283. re = regexp.MustCompile(`\((.*)\)`)
  284. matches = re.FindStringSubmatch(specificFormula)
  285. if len(matches) < 2 {
  286. err = fmt.Errorf("没有找到括号里的内容")
  287. return
  288. }
  289. }
  290. // Get the parameter string
  291. encodedParam := matches[1]
  292. // Remove surrounding quotes if present
  293. encodedParam = strings.Trim(encodedParam, `"`)
  294. // Try direct JSON parsing first
  295. var jsonObj models.KplerFlowsRequest
  296. if err = json.Unmarshal([]byte(encodedParam), &jsonObj); err == nil {
  297. return jsonObj, nil
  298. }
  299. // If direct parsing fails, try URL decoding
  300. decodedStr, err := url.QueryUnescape(encodedParam)
  301. if err != nil {
  302. // If URL decoding fails, try removing escapes and parse again
  303. cleanStr := strings.ReplaceAll(encodedParam, `\`, "")
  304. if err = json.Unmarshal([]byte(cleanStr), &jsonObj); err != nil {
  305. // Try one more time with manual concatenation cleanup
  306. cleanStr = strings.ReplaceAll(cleanStr, `" "`, "")
  307. if err = json.Unmarshal([]byte(cleanStr), &jsonObj); err != nil {
  308. return reqObj, fmt.Errorf("error parsing formula: %v", err)
  309. }
  310. }
  311. return jsonObj, nil
  312. }
  313. // Remove surrounding quotes if present in decoded string
  314. decodedStr = strings.Trim(decodedStr, `"`)
  315. // Try parsing the decoded string
  316. if err = json.Unmarshal([]byte(decodedStr), &jsonObj); err != nil {
  317. // Try one more time with manual cleanup
  318. decodedStr = strings.ReplaceAll(decodedStr, `" "`, "")
  319. if err = json.Unmarshal([]byte(decodedStr), &jsonObj); err != nil {
  320. return reqObj, fmt.Errorf("error parsing decoded JSON: %v", err)
  321. }
  322. }
  323. return jsonObj, nil
  324. }