chart.go 11 KB


  1. package base_from_ccf
  2. import (
  3. "context"
  4. "encoding/json"
  5. "eta/eta_data_analysis/models"
  6. "eta/eta_data_analysis/utils"
  7. "fmt"
  8. "os"
  9. "strconv"
  10. "strings"
  11. "time"
  12. "github.com/PuerkitoBio/goquery"
  13. )
  14. // 定义主结构体
  15. type CCFChartRule struct {
  16. Name string `json:"Name"`
  17. ClassifyId int `json:"ClassifyId"`
  18. CustNo int `json:"CustNo"`
  19. Frequency string `json:"Frequency"`
  20. IndexType string `json:"IndexType"`
  21. Child []*CCFChartRule `json:"Child,omitempty"` // 使用指针来处理可能不存在的子对象
  22. }
  23. func loadCCFChartRule() (rules []*CCFChartRule, err error) {
  24. if utils.CCFChartRuleFile == "" {
  25. err = fmt.Errorf("rule文件不存在")
  26. return
  27. }
  28. b, e := os.ReadFile(utils.CCFChartRuleFile)
  29. if e != nil {
  30. err = fmt.Errorf("读取rule文件失败, err: %v", e)
  31. return
  32. }
  33. rules = make([]*CCFChartRule, 0)
  34. if e = json.Unmarshal(b, &rules); e != nil {
  35. err = fmt.Errorf("解析rule文件失败, err: %v", e)
  36. return
  37. }
  38. return
  39. }
  40. type CCFChartAdditionRule struct {
  41. Name string `json:"Name"`
  42. ClassifyId int `json:"ClassifyId"`
  43. Frequency string `json:"Frequency"`
  44. ProdNames string `json:"prodNames"`
  45. LastNYear int `json:"LastNYear"`
  46. IndexType string `json:"IndexType"`
  47. }
  48. func LoadCCFChartAdditionRule() (rules []*CCFChartAdditionRule, err error) {
  49. if utils.CCFChartAdditionRuleFile == "" {
  50. err = fmt.Errorf("rule文件不存在")
  51. return
  52. }
  53. b, e := os.ReadFile(utils.CCFChartAdditionRuleFile)
  54. if e != nil {
  55. err = fmt.Errorf("读取rule文件失败, err: %v", e)
  56. return
  57. }
  58. rules = make([]*CCFChartAdditionRule, 0)
  59. if e = json.Unmarshal(b, &rules); e != nil {
  60. err = fmt.Errorf("解析rule文件失败, err: %v", e)
  61. return
  62. }
  63. return
  64. }
  65. func TaskGetCCFChartEdb(context.Context) (err error) {
  66. _ = GetCCFChartEdb()
  67. return
  68. }
  69. func GetCCFChartEdb() (err error) {
  70. defer func() {
  71. if err != nil {
  72. tips := fmt.Sprintf("GetCCFChartEdb ErrMsg: %s", err.Error())
  73. utils.FileLog.Info(tips)
  74. fmt.Println(tips)
  75. }
  76. }()
  77. rules, err := loadCCFChartRule()
  78. if err != nil {
  79. return
  80. }
  81. indexes := make([]*HandleIndexData, 0)
  82. for _, v := range rules {
  83. // 首页报告链接
  84. pageHtml := fmt.Sprintf("%s?cust_no=%d", CCFCHARTDATAURL, v.CustNo)
  85. fmt.Println(pageHtml)
  86. fileContent, e := fetchPageHtml(pageHtml, 0)
  87. if e != nil {
  88. err = fmt.Errorf("获取首页报告失败, err: %v", e)
  89. return
  90. }
  91. /*fName := v.Name
  92. if strings.Contains(v.Name, "/") {
  93. fName = strings.ReplaceAll(fName, "/", "")
  94. }
  95. filePath := fmt.Sprintf("/Users/xiexiaoyuan/工作/数据源ccf/ccf图表/%s/index.html", fName)
  96. fmt.Println(filePath)
  97. // 打开文件
  98. file, e := os.Open(filePath)
  99. if e != nil {
  100. err = fmt.Errorf("无法打开文件: %v", err)
  101. return
  102. }
  103. defer file.Close()
  104. // 读取文件内容
  105. fileContent, e := io.ReadAll(file)
  106. if e != nil {
  107. err = fmt.Errorf("读取文件内容失败: %v", e)
  108. fmt.Printf("无法读取文件内容: %v", e)
  109. return
  110. }*/
  111. // 转换编码
  112. // 转换编码
  113. /*utf8Reader, e := charset.NewReaderLabel("gb2312", bytes.NewReader(fileContent))
  114. if e != nil {
  115. err = fmt.Errorf("utf8 reader err: %s", e.Error())
  116. return
  117. }
  118. utf8Body, e := io.ReadAll(utf8Reader)
  119. if e != nil {
  120. err = fmt.Errorf("读取utf8 body err: %s", e.Error())
  121. return
  122. }*/
  123. //firstHtml := string(utf8Body)
  124. //fmt.Println(firstHtml)
  125. isStop, indexList, e := AnalysisChartInventoryWeeklyEdb(fileContent, v)
  126. if e != nil {
  127. err = fmt.Errorf("解析图表失败, err: %v", e)
  128. return
  129. }
  130. if isStop {
  131. err = fmt.Errorf("图表名称不存在,停止爬取")
  132. break
  133. }
  134. if len(indexList) > 0 {
  135. indexes = append(indexes, indexList...)
  136. }
  137. }
  138. additionRules, err := LoadCCFChartAdditionRule()
  139. if err != nil {
  140. err = fmt.Errorf("加载额外图表规则失败 err: %v", err)
  141. return
  142. }
  143. now := time.Now()
  144. for _, v := range additionRules {
  145. param := make(map[string]string)
  146. if v.LastNYear == 0 {
  147. v.LastNYear = 5
  148. }
  149. param["startdate"] = time.Date(now.Year()+1-v.LastNYear, 1, 1, 0, 0, 0, 0, time.Local).Format(utils.FormatDate2)
  150. param["enddate"] = now.Format(utils.FormatDate2)
  151. param["type"] = "1"
  152. param["prodNames"] = v.ProdNames
  153. param["skin"] = "infographic"
  154. param["page"] = "index.php"
  155. fmt.Println(param)
  156. htmlContent, er := postPageHtml(CCFCHARTDATAURL, param, 0)
  157. if er != nil {
  158. err = fmt.Errorf("获取首页报告失败, err: %v", er)
  159. return
  160. }
  161. isStop, indexList, e := AnalysisAdditionChartInventoryWeeklyEdb(htmlContent, v)
  162. if e != nil {
  163. err = fmt.Errorf("解析图表失败, err: %v", e)
  164. return
  165. }
  166. if isStop {
  167. err = fmt.Errorf("图表名称不存在,停止爬取")
  168. break
  169. }
  170. if len(indexList) > 0 {
  171. indexes = append(indexes, indexList...)
  172. }
  173. }
  174. if len(indexes) == 0 {
  175. return
  176. }
  177. // 写入数据库
  178. params := make(map[string]interface{})
  179. params["List"] = indexes
  180. params["TerminalCode"] = utils.TerminalCode
  181. result, e := postEdbLib(params, utils.LIB_ROUTE_CCF_EDB_HANDLE)
  182. if e != nil {
  183. b, _ := json.Marshal(params)
  184. err = fmt.Errorf("postEdbLib err: %v, params: %s", e, string(b))
  185. return
  186. }
  187. resp := new(models.BaseEdbLibResponse)
  188. if e = json.Unmarshal(result, &resp); e != nil {
  189. err = fmt.Errorf(" postEdbLib resp json.Unmarshal err: %v", e)
  190. return
  191. }
  192. if resp.Ret != 200 {
  193. err = fmt.Errorf("postEdbLib resp Msg: %s, ErrMsg: %s", resp.Msg, resp.ErrMsg)
  194. return
  195. }
  196. return
  197. }
  198. // AnalysisChartInventoryWeeklyEdb 解析周度库存中的日均值
  199. func AnalysisChartInventoryWeeklyEdb(htm []byte, rule *CCFChartRule) (isStop bool, indexes []*HandleIndexData, err error) {
  200. if len(htm) == 0 {
  201. utils.FileLog.Info("htm empty")
  202. return
  203. }
  204. doc, e := goquery.NewDocumentFromReader(strings.NewReader(string(htm)))
  205. if e != nil {
  206. err = fmt.Errorf("NewDocumentFromReader err: %v", e)
  207. return
  208. }
  209. // 判断图表名称是否相符,如果不符合放弃爬取
  210. title := doc.Find("p a.activated.now").Text()
  211. fmt.Println(title)
  212. if title != rule.Name {
  213. utils.FileLog.Info("图表名称不存在,停止爬取")
  214. isStop = true
  215. return
  216. }
  217. doc.Find("div.tabCont").Each(func(i int, item *goquery.Selection) {
  218. // 提取单位(这里假设单位总是位于 .tips 类的 div 中)
  219. unit := item.Find(".tips").Text()
  220. unit = strings.TrimSpace(unit)
  221. unit = strings.TrimPrefix(unit, "编制说明:单位(")
  222. unit = strings.TrimSuffix(unit, ")")
  223. fmt.Println("单位: ", unit)
  224. indexCode := ""
  225. indexName := ""
  226. // 获取频度和分类ID
  227. classifyId := rule.ClassifyId
  228. frequency := rule.Frequency
  229. childRule := make(map[string]*CCFChartRule)
  230. // 判断是否存在子页面
  231. if len(rule.Child) > 0 {
  232. for _, v := range rule.Child {
  233. childRule[v.Name] = v
  234. }
  235. }
  236. dataMap := make(map[string]string)
  237. // 遍历表格中的每一行(跳过表头)
  238. item.Find("table tbody tr").Each(func(k int, row *goquery.Selection) {
  239. if k == 0 {
  240. return
  241. }
  242. // 提取产品名称
  243. if indexCode == "" {
  244. productName := row.Find("td:nth-child(1)").Text()
  245. productName = strings.TrimSpace(productName)
  246. //判断子页面的频度
  247. if newRule, ok := childRule[productName]; ok {
  248. frequency = newRule.Frequency
  249. // 在存在子类的情况下,判断产品是否属于子类,不属于则跳过
  250. } else if len(childRule) > 0 {
  251. return
  252. }
  253. indexCode = fmt.Sprintf("ccf%s", utils.GetFirstPingYin(productName))
  254. indexName = fmt.Sprintf("CCF%s", productName)
  255. }
  256. // 提取日期
  257. date := row.Find("td:nth-child(2)").Text()
  258. date = strings.TrimSpace(date)
  259. // 提取日均值
  260. var dailyAvg string
  261. if rule.IndexType == "周均" {
  262. dailyAvg = row.Find("td:nth-child(4)").Text()
  263. dailyAvg = strings.TrimSpace(dailyAvg)
  264. } else {
  265. dailyAvg = row.Find("td:nth-child(3)").Text()
  266. dailyAvg = strings.TrimSpace(dailyAvg)
  267. }
  268. // 打印提取的信息
  269. fmt.Printf("单位: %s\n产品名称: %s\n日期: %s\n日均值: %s\n\n", unit, indexName, date, dailyAvg)
  270. _, e = strconv.ParseFloat(dailyAvg, 64)
  271. if e != nil {
  272. utils.FileLog.Info("数据转换失败 err:%s", e.Error())
  273. return
  274. }
  275. dataMap[date] = dailyAvg
  276. })
  277. if indexName == "" {
  278. return
  279. }
  280. edb := new(HandleIndexData)
  281. edb.IndexCode = strings.ToLower(indexCode)
  282. edb.IndexName = indexName
  283. edb.ClassifyId = classifyId
  284. edb.Frequency = frequency
  285. edb.Unit = unit
  286. edb.DateData = dataMap
  287. edb.TerminalCode = utils.TerminalCode
  288. indexes = append(indexes, edb)
  289. })
  290. return
  291. }
  292. func AnalysisAdditionChartInventoryWeeklyEdb(htm []byte, rule *CCFChartAdditionRule) (isStop bool, indexes []*HandleIndexData, err error) {
  293. if len(htm) == 0 {
  294. utils.FileLog.Info("htm empty")
  295. return
  296. }
  297. doc, e := goquery.NewDocumentFromReader(strings.NewReader(string(htm)))
  298. if e != nil {
  299. err = fmt.Errorf("NewDocumentFromReader err: %v", e)
  300. return
  301. }
  302. // 判断图表名称是否相符,如果不符合放弃爬取
  303. doc.Find("div.tabCont").Each(func(i int, item *goquery.Selection) {
  304. // 提取单位(这里假设单位总是位于 .tips 类的 div 中)
  305. unit := item.Find(".tips").Text()
  306. unit = strings.TrimSpace(unit)
  307. unit = strings.TrimPrefix(unit, "编制说明:单位(")
  308. unit = strings.TrimSuffix(unit, ")")
  309. fmt.Println("单位: ", unit)
  310. indexCode := ""
  311. indexName := ""
  312. // 获取频度和分类ID
  313. classifyId := rule.ClassifyId
  314. frequency := rule.Frequency
  315. dataMap := make(map[string]string)
  316. // 遍历表格中的每一行(跳过表头)
  317. item.Find("table tbody tr").Each(func(k int, row *goquery.Selection) {
  318. if k == 0 {
  319. return
  320. }
  321. // 提取产品名称
  322. if indexCode == "" {
  323. productName := row.Find("td:nth-child(1)").Text()
  324. productName = strings.TrimSpace(productName)
  325. if strings.Contains(rule.Name, productName) {
  326. indexName = rule.Name
  327. code := strings.ToLower(utils.GetFirstPingYin(indexName))
  328. code = strings.ReplaceAll(code, "/", "")
  329. code = strings.ReplaceAll(code, " ", "")
  330. indexCode = strings.ToLower(code)
  331. }
  332. }
  333. // 提取日期
  334. date := row.Find("td:nth-child(2)").Text()
  335. date = strings.TrimSpace(date)
  336. // 提取周(日)均值
  337. var dailyAvg string
  338. if rule.IndexType == "周均" {
  339. dailyAvg = row.Find("td:nth-child(4)").Text()
  340. dailyAvg = strings.TrimSpace(dailyAvg)
  341. } else {
  342. dailyAvg = row.Find("td:nth-child(3)").Text()
  343. dailyAvg = strings.TrimSpace(dailyAvg)
  344. }
  345. // 打印提取的信息
  346. fmt.Printf("单位: %s\n产品名称: %s\n日期: %s\n日均值: %s\n\n", unit, indexName, date, dailyAvg)
  347. _, e = strconv.ParseFloat(dailyAvg, 64)
  348. if e != nil {
  349. utils.FileLog.Info("数据转换失败 err:%s", e.Error())
  350. return
  351. }
  352. dataMap[date] = dailyAvg
  353. })
  354. if indexName == "" {
  355. return
  356. }
  357. edb := new(HandleIndexData)
  358. edb.IndexCode = strings.ToLower(indexCode)
  359. edb.IndexName = indexName
  360. edb.ClassifyId = classifyId
  361. edb.Frequency = frequency
  362. edb.Unit = unit
  363. edb.DateData = dataMap
  364. edb.TerminalCode = utils.TerminalCode
  365. indexes = append(indexes, edb)
  366. })
  367. return
  368. }