data_processor.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512
  1. package ruizide
  2. import (
  3. "context"
  4. "encoding/json"
  5. "errors"
  6. "eta/eta_data_analysis/models"
  7. "eta/eta_data_analysis/utils"
  8. "fmt"
  9. "github.com/xuri/excelize/v2"
  10. "io"
  11. "log"
  12. "os"
  13. "path/filepath"
  14. "time"
  15. "github.com/chromedp/chromedp"
  16. )
  17. // 定义选择器
  18. var (
  19. excelDir = utils.RZD_EXCEL_PATH
  20. downloadDir = utils.RZD_DOWNLOAD_PATH
  21. rzdLoginPath = utils.RZD_LOGIN_PATH
  22. rzdBatchSize = 500
  23. )
  24. // 处理数据下载的步骤
  25. func downloadData(ctx context.Context) error {
  26. // Analytics Library
  27. if err := chromedp.Run(ctx,
  28. chromedp.Sleep(5*time.Second), // 考虑移除这一行,如果不必要的话
  29. chromedp.Navigate(rzdLoginPath),
  30. chromedp.WaitVisible(`a.mat-tooltip-trigger.analytics.home__link-card-icon[href="/analytics"]`, chromedp.ByQuery), // 等待元素可见
  31. chromedp.Click(`a.mat-tooltip-trigger.analytics.home__link-card-icon[href="/analytics"]`, chromedp.ByQuery), // 点击链接
  32. chromedp.Sleep(5*time.Second),
  33. ); err != nil {
  34. return fmt.Errorf("下载 Analytics Library 数据错误: %v", err)
  35. }
  36. if err := chromedp.Run(ctx,
  37. chromedp.WaitVisible(`#intro-home-page-step1`, chromedp.ByID), // 等待 input 元素可见
  38. chromedp.SetValue(`#intro-home-page-step1`, "oil demand signals weekly report", chromedp.ByID), // 设置值
  39. chromedp.SendKeys(`#intro-home-page-step1`, "\u000D", chromedp.ByID), // 模拟按下回车键 (\u000D 是回车的 Unicode)
  40. chromedp.Sleep(5*time.Second),
  41. ); err != nil {
  42. return fmt.Errorf("设置 input 标签值时发生错误: %v", err)
  43. }
  44. if err := chromedp.Run(ctx,
  45. // 等待第一个 download-btns div 和下载按钮可见
  46. chromedp.WaitVisible(`//div[contains(@class, 'download-btns')][1]//ul//li//button[contains(@class, 'mat-tooltip-trigger') and contains(@class, 'download__excel') and normalize-space(text())='Data']`, chromedp.BySearch),
  47. // 点击第一个 div 中的 Excel 下载按钮
  48. chromedp.Click(`//div[contains(@class, 'download-btns')][1]//ul//li//button[contains(@class, 'mat-tooltip-trigger') and contains(@class, 'download__excel') and normalize-space(text())='Data']`, chromedp.BySearch),
  49. // 可选:等待下载完成,可以调整等待时间
  50. chromedp.Sleep(5*time.Second), // 根据下载时间设置
  51. ); err != nil {
  52. return fmt.Errorf("点击第一个下载按钮时发生错误: %v", err)
  53. }
  54. // 解析文件移动到目标目录
  55. if err := waitAndRenameDownloadedFile("Oil_Demand_Signals_Weekly_Report_"+utils.GetCurrentYearMonth()+".xlsx", excelDir); err != nil {
  56. return err
  57. }
  58. return nil
  59. }
  60. // 等待下载文件并重命名
  61. func waitAndRenameDownloadedFile(newFileName, targetDir string) error {
  62. // 等待一段时间以确保文件下载完成
  63. time.Sleep(100 * time.Second) // 可能需要根据实际情况调整
  64. // 查找下载目录中的文件
  65. files, err := filepath.Glob(filepath.Join(downloadDir, "*.xlsx"))
  66. if err != nil {
  67. return fmt.Errorf("查找文件时出错: %v", err)
  68. }
  69. // 如果没有找到文件,返回错误
  70. if len(files) == 0 {
  71. return fmt.Errorf("未找到任何下载的文件")
  72. }
  73. // 找到最新的文件
  74. var latestFile string
  75. var latestTime time.Time
  76. for _, file := range files {
  77. info, err := os.Stat(file)
  78. if err != nil {
  79. return fmt.Errorf("获取文件信息时出错: %v", err)
  80. }
  81. if info.ModTime().After(latestTime) {
  82. latestTime = info.ModTime()
  83. latestFile = file
  84. }
  85. }
  86. // 目标文件的完整路径
  87. targetFilePath := filepath.Join(targetDir, newFileName)
  88. // 重命名并移动到目标目录
  89. if latestFile != "" {
  90. if err := moveFile(latestFile, targetFilePath); err != nil {
  91. return fmt.Errorf("重命名文件时出错: %v", err)
  92. }
  93. // 打印重命名后的文件名
  94. utils.FileLog.Info("文件重命名并移动到: ", targetFilePath)
  95. fmt.Printf("文件重命名并移动到: %s\n", targetFilePath)
  96. }
  97. return nil
  98. }
  99. func moveFile(source, destination string) error {
  100. // 复制文件
  101. srcFile, err := os.Open(source)
  102. if err != nil {
  103. return fmt.Errorf("打开源文件时出错: %v", err)
  104. }
  105. defer srcFile.Close()
  106. dstFile, err := os.Create(destination)
  107. if err != nil {
  108. return fmt.Errorf("创建目标文件时出错: %v", err)
  109. }
  110. defer dstFile.Close()
  111. if _, err := io.Copy(dstFile, srcFile); err != nil {
  112. return fmt.Errorf("复制文件时出错: %v", err)
  113. }
  114. time.Sleep(60 * time.Second)
  115. // 删除源文件
  116. /*if err := os.Remove(source); err != nil {
  117. return fmt.Errorf("删除源文件时出错: %v", err)
  118. }*/
  119. return nil
  120. }
  121. func createPath(paths []string) (err error) {
  122. for _, path := range paths {
  123. if path == "" {
  124. continue
  125. }
  126. _, err = os.Stat(path)
  127. if err != nil {
  128. if os.IsNotExist(err) {
  129. if err = os.MkdirAll(path, os.ModePerm); err != nil {
  130. fmt.Printf("睿咨得创建目录时出错: %v\n", err)
  131. utils.FileLog.Error("睿咨得创建目录时出错: %v", err)
  132. return
  133. }
  134. } else {
  135. return
  136. }
  137. }
  138. }
  139. return
  140. }
  141. // ResolverNet 解析网页数据,下载文件
  142. func ResolverNet(_ context.Context) (err error) {
  143. if err = createPath([]string{excelDir}); err != nil {
  144. return
  145. }
  146. // 创建 chromedp 执行上下文
  147. options := append(chromedp.DefaultExecAllocatorOptions[:3],
  148. //关闭无头模式
  149. chromedp.Flag("headless", false),
  150. chromedp.Flag("disable-blink-features", "AutomationControlled"),
  151. chromedp.UserAgent(`Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.80 Safari/537.36`),
  152. chromedp.Flag("safebrowsing.enabled", true), // 启用安全浏览
  153. )
  154. parentCtx, cancel := chromedp.NewExecAllocator(context.Background(), options...)
  155. ctx, ctxCancel := chromedp.NewContext(parentCtx)
  156. defer func() {
  157. cancel()
  158. ctxCancel()
  159. }()
  160. // 启动 Chrome 实例
  161. if err = chromedp.Run(ctx); err != nil {
  162. utils.FileLog.Error("睿咨得启动 Chrome 实例时出错:", err.Error())
  163. fmt.Printf("睿咨得启动 Chrome 实例时出错: %v\n", err)
  164. return
  165. }
  166. // 登录操作
  167. if err = login(ctx); err != nil {
  168. utils.FileLog.Error("睿咨得登录错误:", err.Error())
  169. fmt.Printf("睿咨得登录错误: %v\n", err)
  170. return
  171. }
  172. fmt.Printf("登录成功")
  173. // 下载数据
  174. if err = downloadData(ctx); err != nil {
  175. utils.FileLog.Error("睿咨得数据下载错误:", err.Error())
  176. fmt.Printf("睿咨得数据下载错误: %v\n", err)
  177. return
  178. }
  179. utils.FileLog.Info("睿咨得数据下载完成")
  180. fmt.Println("数据下载完成")
  181. // 解析表格 读取数据
  182. fileResolver()
  183. return
  184. }
  185. // func setFolderPermissions(path string) error {
  186. // // 获取当前用户的信息
  187. // currentUser, err := user.Current()
  188. // if err != nil {
  189. // return err
  190. // }
  191. //
  192. // // 设置权限
  193. // err = os.Chmod(path, 0755)
  194. // if err != nil {
  195. // return err
  196. // }
  197. //
  198. // // 获取文件夹的安全描述符
  199. // handle, err := syscall.Open(path, syscall.GENERIC_READ|syscall.GENERIC_WRITE, 0)
  200. // if err != nil {
  201. // return err
  202. // }
  203. // defer syscall.CloseHandle(handle)
  204. //
  205. // var securityDescriptor *syscall.SecurityDescriptor
  206. // err = syscall.GetSecurityInfo(
  207. // handle,
  208. // syscall.SE_FILE_OBJECT,
  209. // syscall.DACL_SECURITY_INFORMATION,
  210. // nil,
  211. // nil,
  212. // &securityDescriptor,
  213. // nil,
  214. // )
  215. // if err != nil {
  216. // return err
  217. // }
  218. //
  219. // // 添加当前用户的写权限
  220. // userSID, err := syscall.StringToSid(currentUser.Uid)
  221. // if err != nil {
  222. // return err
  223. // }
  224. //
  225. // ace := &syscall.ACE{
  226. // Mask: syscall.GENERIC_ALL,
  227. // Sid: userSID,
  228. // Flags: syscall.INHERITED_ACE | syscall.OBJECT_INHERIT_ACE | syscall.CONTAINER_INHERIT_ACE,
  229. // Type: syscall.ACCESS_ALLOWED_ACE_TYPE,
  230. // Inherits: true,
  231. // }
  232. //
  233. // dacl, err := securityDescriptor.Dacl()
  234. // if err != nil {
  235. // return err
  236. // }
  237. //
  238. // dacl.AddAce(ace)
  239. // err = syscall.SetSecurityInfo(
  240. // handle,
  241. // syscall.SE_FILE_OBJECT,
  242. // syscall.DACL_SECURITY_INFORMATION,
  243. // nil,
  244. // nil,
  245. // dacl,
  246. // nil,
  247. // )
  248. // if err != nil {
  249. // return err
  250. // }
  251. //
  252. // return nil
  253. // }
  254. //
  255. // 解析本地文件
  256. func fileResolver() {
  257. //func main() {
  258. var tableNameList = []string{
  259. "Oil_Demand_Signals_Weekly_Report",
  260. "Oil_Supply_Analysis",
  261. "Supply_Revision_Analysis",
  262. "Oil_Market_Cube_Upstream_Supply_Oil_Quality_API",
  263. "Oil_Market_Cube_Upstream_Supply_Oil_Quality_Sulphur",
  264. "Oil_Market_Cube_Upstream_Supply_Capacity_Capacity",
  265. "Oil_Market_Cube_Upstream_Supply_Production",
  266. "Oil_Market_Cube_Upstream_Supply_Production_Wo_Seasonality",
  267. "Oil_Market_Cube_Upstream_Supply_OPEC_Policy_Reference_Production",
  268. "Oil_Market_Cube_Upstream_Supply_OPEC_Policy_Target_Production",
  269. "Oil_Market_Cube_Upstream_Supply_OPEC_Policy_Target_Cut",
  270. "Oil_Market_Cube_Upstream_Supply_OPEC_Policy_Actual_Cut",
  271. "Oil_Market_Cube_Upstream_Supply_OPEC_Policy_Compliance",
  272. "Oil_Market_Cube_Upstream_Supply_OPEC_Policy_Production_Subject_To_Cut",
  273. "Oil_Market_Cube_Products_Demand_Products_Demand_Mean",
  274. "Oil_Market_Cube_Products_Demand_Products_Demand+Sigma",
  275. "Oil_Market_Cube_Products_Demand_Products_Demand-Sigma",
  276. "Oil_Market_Cube_Balances_Total_Liquids_Balances",
  277. "Oil_Market_Cube_Geography_Latitude",
  278. "Oil_Market_Cube_Geography_Longitude",
  279. "Oil_Demand_Analysis_Product_Detail",
  280. "Oil_Demand_Analysis_Region",
  281. "Oil_Demand_Analysis_Scenario",
  282. "Oil_Demand_Analysis_Continent",
  283. "Oil_Demand_Analysis_Country",
  284. "Oil_Demand_Analysis_Product_Category",
  285. "Oil_Demand_Analysis_Sector_Category",
  286. "Oil_Demand_Analysis_Sector_Detail",
  287. }
  288. for _, tableName := range tableNameList {
  289. var fileName string
  290. // 解析表格
  291. fileName = tableName + "_" + utils.GetCurrentYearMonth() + ".xlsx"
  292. filePath := filepath.Join(excelDir, fileName)
  293. // 打开 Excel 文件
  294. f, err := excelize.OpenFile(filePath)
  295. if err != nil {
  296. utils.FileLog.Error("无法打开 Excel 文件: %v", err)
  297. return
  298. }
  299. // 获取所有工作表
  300. sheetNames := f.GetSheetList()
  301. for _, sheetName := range sheetNames {
  302. fmt.Printf("读取工作表: %s\n", sheetName)
  303. // 获取工作表的最大行数
  304. maxRow, err := f.GetRows(sheetName) // 直接获取所有行数据
  305. if err != nil {
  306. utils.FileLog.Error("获取工作表数据时出错: %v", err)
  307. continue
  308. }
  309. // 遍历行并打印内容
  310. indexData := []models.BaseFromRzdData{}
  311. for rowIndex, rowData := range maxRow {
  312. // 因为excel文件中的sheet表格不固定 对于 Supply_Revision_Analysis, Oil_Supply_Analysis 文件 手动调整sheet表格顺序
  313. if tableName == "Supply_Revision_Analysis" && rowIndex == 0 {
  314. if rowData[0] == "YearQuarter" && rowData[1] == "Revision" && rowData[2] == "CountryRevisionGroup" {
  315. sheetName = "Chart1"
  316. }
  317. if rowData[0] == "YearQuarter" && rowData[1] == "Current" && rowData[2] == "Previous" {
  318. sheetName = "Chart2"
  319. }
  320. if rowData[0] == "Year" && rowData[1] == "Revision" && rowData[2] == "CountryRevisionGroup" {
  321. sheetName = "Chart3"
  322. }
  323. if rowData[0] == "Year" && rowData[1] == "Current" && rowData[2] == "Previous" {
  324. sheetName = "Chart4"
  325. }
  326. if rowData[0] == "Previous" && rowData[1] == "Current" && rowData[2] == "YearMonth" {
  327. sheetName = "Chart5"
  328. }
  329. if rowData[0] == "YearMonth" && rowData[1] == "CountryRevisionGroup" && rowData[2] == "Revision" {
  330. sheetName = "Chart6"
  331. }
  332. } else if tableName == "Oil_Supply_Analysis" && rowIndex == 0 {
  333. if rowData[0] == "Viz Date" && rowData[1] == "OilAndGasCategory" && rowData[2] == "supply_kbbld" {
  334. sheetName = "Chart1"
  335. }
  336. if rowData[0] == "Viz Date" && rowData[1] == "supply_kbbld" && rowData[2] == "Region" {
  337. sheetName = "Chart2"
  338. }
  339. if rowData[0] == "Viz Date" && rowData[1] == "CapacityDetail" && rowData[2] == "Capacity_kbbld" {
  340. sheetName = "Chart3"
  341. }
  342. if rowData[0] == "Viz Date" && rowData[1] == "Oil Classification Group" && rowData[2] == "supply_kbbld" {
  343. sheetName = "Chart4"
  344. }
  345. }
  346. processor, err := GetProcessor(tableName, sheetName)
  347. if err != nil {
  348. utils.FileLog.Error("无法获取处理器: %v", err)
  349. continue
  350. }
  351. baseFromRzdDataList, err := processor.Process(tableName, sheetName, rowIndex, rowData)
  352. if err != nil {
  353. utils.FileLog.Error("处理数据失败: %v", err)
  354. return
  355. }
  356. indexData = append(indexData, baseFromRzdDataList...)
  357. }
  358. // 新增数据源指标数据
  359. if len(indexData) > 0 {
  360. for i := 0; i < len(indexData); i += rzdBatchSize {
  361. // 计算当前批次的结束索引
  362. end := i + rzdBatchSize
  363. if end > len(indexData) {
  364. end = len(indexData)
  365. }
  366. // 获取当前批次的数据
  367. batchData := indexData[i:end]
  368. // 转换成json
  369. marshal, err := json.Marshal(batchData)
  370. if err != nil {
  371. utils.FileLog.Error("json.Marshal err: %v", err)
  372. return
  373. }
  374. // 发送 HTTP POST 请求
  375. _, err = utils.HttpPostRequest(utils.EDB_LIB_URL+utils.ADD_BATCH_RZD_DATA, string(marshal), "application/json")
  376. if err != nil {
  377. utils.FileLog.Error("postEdbLib err: %v", err)
  378. return
  379. }
  380. }
  381. }
  382. // 新增指标库数据
  383. edbDataList := []models.EdbDataRzd{}
  384. for _, index := range indexData {
  385. // 补充 判断是否存在于指标库
  386. paramsLib := make(map[string]interface{})
  387. paramsLib["IndexCode"] = index.IndexCode
  388. paramsLib["Source"] = utils.DATA_SOURCE_RZD
  389. postEdbLib, err := httpRequestFill(paramsLib, utils.GET_RZD_EDB_INFO_BY_INDEX_CODE)
  390. if err != nil {
  391. // 有错误就不继续执行
  392. utils.FileLog.Error("postEdbLib err: %v", err)
  393. continue
  394. }
  395. var requestResponse models.RequestResponse[models.EdbInfo]
  396. err = json.Unmarshal(postEdbLib, &requestResponse)
  397. if err != nil {
  398. utils.FileLog.Error("postEdbLib err: %v", err)
  399. continue
  400. }
  401. if requestResponse.Data.EdbInfoId != 0 {
  402. edbDataRzd := models.EdbDataRzd{
  403. CreateTime: utils.GetCurrentTime(),
  404. ModifyTime: utils.GetCurrentTime(),
  405. EdbInfoId: index.BaseFromRzdIndexId,
  406. EdbCode: index.IndexCode,
  407. DataTime: index.DataTime,
  408. Value: index.Value,
  409. DataTimestamp: uint64(utils.StringToTimeFormat(index.DataTime, utils.FormatDate).UnixMilli()),
  410. }
  411. edbDataList = append(edbDataList, edbDataRzd)
  412. }
  413. }
  414. if len(edbDataList) > 0 {
  415. // 转换成json
  416. marshal, err := json.Marshal(edbDataList)
  417. if err != nil {
  418. utils.FileLog.Error("postEdbLib err: %v", err)
  419. return
  420. }
  421. _, err = utils.HttpPostRequest(utils.EDB_LIB_URL+utils.ADD_BATCH_RZD_EDB_DATA, string(marshal), "application/json")
  422. if err != nil {
  423. // 有错误就不继续执行
  424. utils.FileLog.Error("postEdbLib err: %v", err)
  425. return
  426. }
  427. }
  428. }
  429. }
  430. }
  431. func login(ctx context.Context) error {
  432. if rzdLoginPath == "" {
  433. return errors.New("睿咨得登录页面地址未配置")
  434. }
  435. return chromedp.Run(ctx,
  436. chromedp.Navigate(rzdLoginPath),
  437. chromedp.WaitVisible(`body`, chromedp.ByQuery),
  438. chromedp.SetValue(`input[id="Username"]`, utils.RZD_USERNAME, chromedp.ByQuery),
  439. chromedp.SetValue(`input[id="Password"]`, utils.RZD_PASSWORD, chromedp.ByQuery),
  440. chromedp.WaitEnabled(`//button[text()='Login']`, chromedp.BySearch),
  441. chromedp.Sleep(5*time.Second),
  442. chromedp.Click(`//button[text()='Login']`, chromedp.BySearch),
  443. // 等待并点击登录后页面的链接
  444. /*chromedp.WaitVisible(`a[href="/home"]`, chromedp.ByQuery), // 等待 Analytics Library 链接可见
  445. chromedp.Sleep(5*time.Second), */ // 等待页面加载完成
  446. )
  447. }
  448. func httpRequestFill(data interface{}, urlMethod string) (postEdbLib []byte, err error) {
  449. // 转换成json
  450. marshal, err := json.Marshal(data)
  451. if err != nil {
  452. return nil, err
  453. }
  454. // json 转 interface
  455. var result map[string]interface{}
  456. err = json.Unmarshal(marshal, &result)
  457. if err != nil {
  458. return nil, err
  459. }
  460. postEdbLib, err = utils.PostEdbLibRequest(result, urlMethod)
  461. if err != nil {
  462. // 有错误就不继续执行
  463. log.Printf("postEdbLib err: %v", err)
  464. return nil, err
  465. }
  466. return postEdbLib, nil
  467. }