chart.go 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. package base_from_ccf
  2. import (
  3. "context"
  4. "encoding/json"
  5. "eta/eta_data_analysis/models"
  6. "eta/eta_data_analysis/utils"
  7. "fmt"
  8. "github.com/PuerkitoBio/goquery"
  9. "os"
  10. "strconv"
  11. "strings"
  12. )
  13. // 定义主结构体
  14. type CCFChartRule struct {
  15. Name string `json:"Name"`
  16. ClassifyId int `json:"ClassifyId"`
  17. CustNo int `json:"CustNo"`
  18. Frequency string `json:"Frequency"`
  19. Child []*CCFChartRule `json:"Child,omitempty"` // 使用指针来处理可能不存在的子对象
  20. }
  21. func loadCCFChartRule() (rules []*CCFChartRule, err error) {
  22. if utils.CCFChartRuleFile == "" {
  23. err = fmt.Errorf("rule文件不存在")
  24. return
  25. }
  26. b, e := os.ReadFile(utils.CCFChartRuleFile)
  27. if e != nil {
  28. err = fmt.Errorf("读取rule文件失败, err: %v", e)
  29. return
  30. }
  31. rules = make([]*CCFChartRule, 0)
  32. if e = json.Unmarshal(b, &rules); e != nil {
  33. err = fmt.Errorf("解析rule文件失败, err: %v", e)
  34. return
  35. }
  36. return
  37. }
  38. func TaskGetCCFChartEdb(context.Context) (err error) {
  39. _ = GetCCFChartEdb()
  40. return
  41. }
  42. func GetCCFChartEdb() (err error) {
  43. defer func() {
  44. if err != nil {
  45. tips := fmt.Sprintf("GetCCFChartEdb ErrMsg: %s", err.Error())
  46. utils.FileLog.Info(tips)
  47. fmt.Println(tips)
  48. }
  49. }()
  50. rules, err := loadCCFChartRule()
  51. if err != nil {
  52. return
  53. }
  54. indexes := make([]*HandleIndexData, 0)
  55. for _, v := range rules {
  56. // 首页报告链接
  57. pageHtml := fmt.Sprintf("%s?cust_no=%d", CCFCHARTDATAURL, v.CustNo)
  58. fmt.Println(pageHtml)
  59. fileContent, e := fetchPageHtml(pageHtml, 0)
  60. /*fName := v.Name
  61. if strings.Contains(v.Name, "/") {
  62. fName = strings.ReplaceAll(fName, "/", "")
  63. }
  64. filePath := fmt.Sprintf("/Users/xiexiaoyuan/工作/数据源ccf/ccf图表/%s/index.html", fName)
  65. fmt.Println(filePath)
  66. // 打开文件
  67. file, e := os.Open(filePath)
  68. if e != nil {
  69. err = fmt.Errorf("无法打开文件: %v", err)
  70. return
  71. }
  72. defer file.Close()
  73. // 读取文件内容
  74. fileContent, e := io.ReadAll(file)
  75. if e != nil {
  76. err = fmt.Errorf("读取文件内容失败: %v", e)
  77. fmt.Printf("无法读取文件内容: %v", e)
  78. return
  79. }*/
  80. // 转换编码
  81. // 转换编码
  82. /*utf8Reader, e := charset.NewReaderLabel("gb2312", bytes.NewReader(fileContent))
  83. if e != nil {
  84. err = fmt.Errorf("utf8 reader err: %s", e.Error())
  85. return
  86. }
  87. utf8Body, e := io.ReadAll(utf8Reader)
  88. if e != nil {
  89. err = fmt.Errorf("读取utf8 body err: %s", e.Error())
  90. return
  91. }*/
  92. //firstHtml := string(utf8Body)
  93. //fmt.Println(firstHtml)
  94. isStop, indexList, e := AnalysisChartInventoryWeeklyEdb(fileContent, v)
  95. if e != nil {
  96. err = fmt.Errorf("解析图表失败, err: %v", e)
  97. return
  98. }
  99. if isStop {
  100. err = fmt.Errorf("图表名称不存在,停止爬取")
  101. break
  102. }
  103. if len(indexList) > 0 {
  104. indexes = append(indexes, indexList...)
  105. }
  106. }
  107. if len(indexes) == 0 {
  108. return
  109. }
  110. // 写入数据库
  111. params := make(map[string]interface{})
  112. params["List"] = indexes
  113. params["TerminalCode"] = utils.TerminalCode
  114. result, e := postEdbLib(params, utils.LIB_ROUTE_CCF_EDB_HANDLE)
  115. if e != nil {
  116. b, _ := json.Marshal(params)
  117. err = fmt.Errorf("postEdbLib err: %v, params: %s", e, string(b))
  118. return
  119. }
  120. resp := new(models.BaseEdbLibResponse)
  121. if e = json.Unmarshal(result, &resp); e != nil {
  122. err = fmt.Errorf(" postEdbLib resp json.Unmarshal err: %v", e)
  123. return
  124. }
  125. if resp.Ret != 200 {
  126. err = fmt.Errorf("postEdbLib resp Msg: %s, ErrMsg: %s", resp.Msg, resp.ErrMsg)
  127. return
  128. }
  129. return
  130. }
  131. // AnalysisChartInventoryWeeklyEdb 解析周度库存中的日均值
  132. func AnalysisChartInventoryWeeklyEdb(htm []byte, rule *CCFChartRule) (isStop bool, indexes []*HandleIndexData, err error) {
  133. if len(htm) == 0 {
  134. utils.FileLog.Info("htm empty")
  135. return
  136. }
  137. doc, e := goquery.NewDocumentFromReader(strings.NewReader(string(htm)))
  138. if e != nil {
  139. err = fmt.Errorf("NewDocumentFromReader err: %v", e)
  140. return
  141. }
  142. // 判断图表名称是否相符,如果不符合放弃爬取
  143. title := doc.Find("p a.activated.now").Text()
  144. fmt.Println(title)
  145. if title != rule.Name {
  146. utils.FileLog.Info("图表名称不存在,停止爬取")
  147. isStop = true
  148. return
  149. }
  150. doc.Find("div.tabCont").Each(func(i int, item *goquery.Selection) {
  151. // 提取单位(这里假设单位总是位于 .tips 类的 div 中)
  152. unit := item.Find(".tips").Text()
  153. unit = strings.TrimSpace(unit)
  154. unit = strings.TrimPrefix(unit, "编制说明:单位(")
  155. unit = strings.TrimSuffix(unit, ")")
  156. fmt.Println("单位: ", unit)
  157. indexCode := ""
  158. indexName := ""
  159. // 获取频度和分类ID
  160. classifyId := rule.ClassifyId
  161. frequency := rule.Frequency
  162. childRule := make(map[string]*CCFChartRule)
  163. // 判断是否存在子页面
  164. if len(rule.Child) > 0 {
  165. for _, v := range childRule {
  166. childRule[v.Name] = v
  167. }
  168. }
  169. dataMap := make(map[string]string)
  170. // 遍历表格中的每一行(跳过表头)
  171. item.Find("table tbody tr").Each(func(k int, row *goquery.Selection) {
  172. if k == 0 {
  173. return
  174. }
  175. // 提取产品名称
  176. if indexCode == "" {
  177. productName := row.Find("td:nth-child(1)").Text()
  178. productName = strings.TrimSpace(productName)
  179. indexCode = fmt.Sprintf("ccf%s", utils.GetFirstPingYin(productName))
  180. indexName = fmt.Sprintf("CCF%s", productName)
  181. //判断子页面的频度
  182. if newRule, ok := childRule[productName]; ok {
  183. frequency = newRule.Frequency
  184. }
  185. }
  186. // 提取日期
  187. date := row.Find("td:nth-child(2)").Text()
  188. date = strings.TrimSpace(date)
  189. // 提取日均值
  190. dailyAvg := row.Find("td:nth-child(3)").Text()
  191. dailyAvg = strings.TrimSpace(dailyAvg)
  192. // 打印提取的信息
  193. fmt.Printf("单位: %s\n产品名称: %s\n日期: %s\n日均值: %s\n\n", unit, indexName, date, dailyAvg)
  194. _, e = strconv.ParseFloat(dailyAvg, 64)
  195. if e != nil {
  196. utils.FileLog.Info("数据转换失败 err:%s", e.Error())
  197. return
  198. }
  199. dataMap[date] = dailyAvg
  200. })
  201. if indexName == "" {
  202. return
  203. }
  204. edb := new(HandleIndexData)
  205. edb.IndexCode = strings.ToLower(indexCode)
  206. edb.IndexName = indexName
  207. edb.ClassifyId = classifyId
  208. edb.Frequency = frequency
  209. edb.Unit = unit
  210. edb.DateData = dataMap
  211. edb.TerminalCode = utils.TerminalCode
  212. indexes = append(indexes, edb)
  213. })
  214. return
  215. }