chart.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384
  1. package base_from_ccf
  2. import (
  3. "context"
  4. "encoding/json"
  5. "eta/eta_data_analysis/models"
  6. "eta/eta_data_analysis/utils"
  7. "fmt"
  8. "os"
  9. "strconv"
  10. "strings"
  11. "time"
  12. "github.com/PuerkitoBio/goquery"
  13. )
  14. // 定义主结构体
  15. type CCFChartRule struct {
  16. Name string `json:"Name"`
  17. ClassifyId int `json:"ClassifyId"`
  18. CustNo int `json:"CustNo"`
  19. Frequency string `json:"Frequency"`
  20. IndexType string `json:"IndexType"`
  21. Child []*CCFChartRule `json:"Child,omitempty"` // 使用指针来处理可能不存在的子对象
  22. }
  23. func loadCCFChartRule() (rules []*CCFChartRule, err error) {
  24. if utils.CCFChartRuleFile == "" {
  25. err = fmt.Errorf("rule文件不存在")
  26. return
  27. }
  28. b, e := os.ReadFile(utils.CCFChartRuleFile)
  29. if e != nil {
  30. err = fmt.Errorf("读取rule文件失败, err: %v", e)
  31. return
  32. }
  33. rules = make([]*CCFChartRule, 0)
  34. if e = json.Unmarshal(b, &rules); e != nil {
  35. err = fmt.Errorf("解析rule文件失败, err: %v", e)
  36. return
  37. }
  38. return
  39. }
  40. type CCFChartAdditionRule struct {
  41. Name string `json:"Name"`
  42. ClassifyId int `json:"ClassifyId"`
  43. Frequency string `json:"Frequency"`
  44. ProdNames string `json:"prodNames"`
  45. LastNYear int `json:"LastNYear"`
  46. }
  47. func LoadCCFChartAdditionRule() (rules []*CCFChartAdditionRule, err error) {
  48. if utils.CCFChartAdditionRuleFile == "" {
  49. err = fmt.Errorf("rule文件不存在")
  50. return
  51. }
  52. b, e := os.ReadFile(utils.CCFChartAdditionRuleFile)
  53. if e != nil {
  54. err = fmt.Errorf("读取rule文件失败, err: %v", e)
  55. return
  56. }
  57. rules = make([]*CCFChartAdditionRule, 0)
  58. if e = json.Unmarshal(b, &rules); e != nil {
  59. err = fmt.Errorf("解析rule文件失败, err: %v", e)
  60. return
  61. }
  62. return
  63. }
  64. func TaskGetCCFChartEdb(context.Context) (err error) {
  65. _ = GetCCFChartEdb()
  66. return
  67. }
  68. func GetCCFChartEdb() (err error) {
  69. defer func() {
  70. if err != nil {
  71. tips := fmt.Sprintf("GetCCFChartEdb ErrMsg: %s", err.Error())
  72. utils.FileLog.Info(tips)
  73. fmt.Println(tips)
  74. }
  75. }()
  76. rules, err := loadCCFChartRule()
  77. if err != nil {
  78. return
  79. }
  80. indexes := make([]*HandleIndexData, 0)
  81. for _, v := range rules {
  82. // 首页报告链接
  83. pageHtml := fmt.Sprintf("%s?cust_no=%d", CCFCHARTDATAURL, v.CustNo)
  84. fmt.Println(pageHtml)
  85. fileContent, e := fetchPageHtml(pageHtml, 0)
  86. if e != nil {
  87. err = fmt.Errorf("获取首页报告失败, err: %v", e)
  88. return
  89. }
  90. /*fName := v.Name
  91. if strings.Contains(v.Name, "/") {
  92. fName = strings.ReplaceAll(fName, "/", "")
  93. }
  94. filePath := fmt.Sprintf("/Users/xiexiaoyuan/工作/数据源ccf/ccf图表/%s/index.html", fName)
  95. fmt.Println(filePath)
  96. // 打开文件
  97. file, e := os.Open(filePath)
  98. if e != nil {
  99. err = fmt.Errorf("无法打开文件: %v", err)
  100. return
  101. }
  102. defer file.Close()
  103. // 读取文件内容
  104. fileContent, e := io.ReadAll(file)
  105. if e != nil {
  106. err = fmt.Errorf("读取文件内容失败: %v", e)
  107. fmt.Printf("无法读取文件内容: %v", e)
  108. return
  109. }*/
  110. // 转换编码
  111. // 转换编码
  112. /*utf8Reader, e := charset.NewReaderLabel("gb2312", bytes.NewReader(fileContent))
  113. if e != nil {
  114. err = fmt.Errorf("utf8 reader err: %s", e.Error())
  115. return
  116. }
  117. utf8Body, e := io.ReadAll(utf8Reader)
  118. if e != nil {
  119. err = fmt.Errorf("读取utf8 body err: %s", e.Error())
  120. return
  121. }*/
  122. //firstHtml := string(utf8Body)
  123. //fmt.Println(firstHtml)
  124. isStop, indexList, e := AnalysisChartInventoryWeeklyEdb(fileContent, v)
  125. if e != nil {
  126. err = fmt.Errorf("解析图表失败, err: %v", e)
  127. return
  128. }
  129. if isStop {
  130. err = fmt.Errorf("图表名称不存在,停止爬取")
  131. break
  132. }
  133. if len(indexList) > 0 {
  134. indexes = append(indexes, indexList...)
  135. }
  136. }
  137. additionRules, err := LoadCCFChartAdditionRule()
  138. if err != nil {
  139. err = fmt.Errorf("加载额外图表规则失败 err: %v", err)
  140. return
  141. }
  142. now := time.Now()
  143. for _, v := range additionRules {
  144. param := make(map[string]string)
  145. if v.LastNYear == 0 {
  146. v.LastNYear = 5
  147. }
  148. param["startdate"] = time.Date(now.Year()+1-v.LastNYear, 1, 1, 0, 0, 0, 0, time.Local).Format(utils.FormatDate2)
  149. param["enddate"] = now.Format(utils.FormatDate2)
  150. param["type"] = "1"
  151. param["prodNames"] = v.ProdNames
  152. param["skin"] = "infographic"
  153. param["page"] = "index.php"
  154. fmt.Println(param)
  155. htmlContent, er := postPageHtml(CCFCHARTDATAURL, param, 0)
  156. if er != nil {
  157. err = fmt.Errorf("获取首页报告失败, err: %v", er)
  158. return
  159. }
  160. isStop, indexList, e := AnalysisAdditionChartInventoryWeeklyEdb(htmlContent, v)
  161. if e != nil {
  162. err = fmt.Errorf("解析图表失败, err: %v", e)
  163. return
  164. }
  165. if isStop {
  166. err = fmt.Errorf("图表名称不存在,停止爬取")
  167. break
  168. }
  169. if len(indexList) > 0 {
  170. indexes = append(indexes, indexList...)
  171. }
  172. }
  173. if len(indexes) == 0 {
  174. return
  175. }
  176. // 写入数据库
  177. params := make(map[string]interface{})
  178. params["List"] = indexes
  179. params["TerminalCode"] = utils.TerminalCode
  180. result, e := postEdbLib(params, utils.LIB_ROUTE_CCF_EDB_HANDLE)
  181. if e != nil {
  182. b, _ := json.Marshal(params)
  183. err = fmt.Errorf("postEdbLib err: %v, params: %s", e, string(b))
  184. return
  185. }
  186. resp := new(models.BaseEdbLibResponse)
  187. if e = json.Unmarshal(result, &resp); e != nil {
  188. err = fmt.Errorf(" postEdbLib resp json.Unmarshal err: %v", e)
  189. return
  190. }
  191. if resp.Ret != 200 {
  192. err = fmt.Errorf("postEdbLib resp Msg: %s, ErrMsg: %s", resp.Msg, resp.ErrMsg)
  193. return
  194. }
  195. return
  196. }
  197. // AnalysisChartInventoryWeeklyEdb 解析周度库存中的日均值
  198. func AnalysisChartInventoryWeeklyEdb(htm []byte, rule *CCFChartRule) (isStop bool, indexes []*HandleIndexData, err error) {
  199. if len(htm) == 0 {
  200. utils.FileLog.Info("htm empty")
  201. return
  202. }
  203. doc, e := goquery.NewDocumentFromReader(strings.NewReader(string(htm)))
  204. if e != nil {
  205. err = fmt.Errorf("NewDocumentFromReader err: %v", e)
  206. return
  207. }
  208. // 判断图表名称是否相符,如果不符合放弃爬取
  209. title := doc.Find("p a.activated.now").Text()
  210. fmt.Println(title)
  211. if title != rule.Name {
  212. utils.FileLog.Info("图表名称不存在,停止爬取")
  213. isStop = true
  214. return
  215. }
  216. doc.Find("div.tabCont").Each(func(i int, item *goquery.Selection) {
  217. // 提取单位(这里假设单位总是位于 .tips 类的 div 中)
  218. unit := item.Find(".tips").Text()
  219. unit = strings.TrimSpace(unit)
  220. unit = strings.TrimPrefix(unit, "编制说明:单位(")
  221. unit = strings.TrimSuffix(unit, ")")
  222. fmt.Println("单位: ", unit)
  223. indexCode := ""
  224. indexName := ""
  225. // 获取频度和分类ID
  226. classifyId := rule.ClassifyId
  227. frequency := rule.Frequency
  228. childRule := make(map[string]*CCFChartRule)
  229. // 判断是否存在子页面
  230. if len(rule.Child) > 0 {
  231. for _, v := range rule.Child {
  232. childRule[v.Name] = v
  233. }
  234. }
  235. dataMap := make(map[string]string)
  236. // 遍历表格中的每一行(跳过表头)
  237. item.Find("table tbody tr").Each(func(k int, row *goquery.Selection) {
  238. if k == 0 {
  239. return
  240. }
  241. // 提取产品名称
  242. if indexCode == "" {
  243. productName := row.Find("td:nth-child(1)").Text()
  244. productName = strings.TrimSpace(productName)
  245. //判断子页面的频度
  246. if newRule, ok := childRule[productName]; ok {
  247. frequency = newRule.Frequency
  248. // 在存在子类的情况下,判断产品是否属于子类,不属于则跳过
  249. } else if len(childRule) > 0 {
  250. return
  251. }
  252. indexCode = fmt.Sprintf("ccf%s", utils.GetFirstPingYin(productName))
  253. indexName = fmt.Sprintf("CCF%s", productName)
  254. }
  255. // 提取日期
  256. date := row.Find("td:nth-child(2)").Text()
  257. date = strings.TrimSpace(date)
  258. // 提取日均值
  259. var dailyAvg string
  260. if rule.IndexType == "周均" {
  261. dailyAvg = row.Find("td:nth-child(4)").Text()
  262. dailyAvg = strings.TrimSpace(dailyAvg)
  263. } else {
  264. dailyAvg = row.Find("td:nth-child(3)").Text()
  265. dailyAvg = strings.TrimSpace(dailyAvg)
  266. }
  267. // 打印提取的信息
  268. fmt.Printf("单位: %s\n产品名称: %s\n日期: %s\n日均值: %s\n\n", unit, indexName, date, dailyAvg)
  269. _, e = strconv.ParseFloat(dailyAvg, 64)
  270. if e != nil {
  271. utils.FileLog.Info("数据转换失败 err:%s", e.Error())
  272. return
  273. }
  274. dataMap[date] = dailyAvg
  275. })
  276. if indexName == "" {
  277. return
  278. }
  279. edb := new(HandleIndexData)
  280. edb.IndexCode = strings.ToLower(indexCode)
  281. edb.IndexName = indexName
  282. edb.ClassifyId = classifyId
  283. edb.Frequency = frequency
  284. edb.Unit = unit
  285. edb.DateData = dataMap
  286. edb.TerminalCode = utils.TerminalCode
  287. indexes = append(indexes, edb)
  288. })
  289. return
  290. }
  291. func AnalysisAdditionChartInventoryWeeklyEdb(htm []byte, rule *CCFChartAdditionRule) (isStop bool, indexes []*HandleIndexData, err error) {
  292. if len(htm) == 0 {
  293. utils.FileLog.Info("htm empty")
  294. return
  295. }
  296. doc, e := goquery.NewDocumentFromReader(strings.NewReader(string(htm)))
  297. if e != nil {
  298. err = fmt.Errorf("NewDocumentFromReader err: %v", e)
  299. return
  300. }
  301. // 判断图表名称是否相符,如果不符合放弃爬取
  302. doc.Find("div.tabCont").Each(func(i int, item *goquery.Selection) {
  303. // 提取单位(这里假设单位总是位于 .tips 类的 div 中)
  304. unit := item.Find(".tips").Text()
  305. unit = strings.TrimSpace(unit)
  306. unit = strings.TrimPrefix(unit, "编制说明:单位(")
  307. unit = strings.TrimSuffix(unit, ")")
  308. fmt.Println("单位: ", unit)
  309. indexCode := ""
  310. indexName := ""
  311. // 获取频度和分类ID
  312. classifyId := rule.ClassifyId
  313. frequency := rule.Frequency
  314. dataMap := make(map[string]string)
  315. // 遍历表格中的每一行(跳过表头)
  316. item.Find("table tbody tr").Each(func(k int, row *goquery.Selection) {
  317. if k == 0 {
  318. return
  319. }
  320. // 提取产品名称
  321. if indexCode == "" {
  322. productName := row.Find("td:nth-child(1)").Text()
  323. productName = strings.TrimSpace(productName)
  324. if strings.Contains(rule.Name, productName) {
  325. indexName = rule.Name
  326. code := strings.ToLower(utils.GetFirstPingYin(indexName))
  327. code = strings.ReplaceAll(code, "/", "")
  328. code = strings.ReplaceAll(code, " ", "")
  329. indexCode = strings.ToLower(code)
  330. }
  331. }
  332. // 提取日期
  333. date := row.Find("td:nth-child(2)").Text()
  334. date = strings.TrimSpace(date)
  335. // 提取周均值
  336. dailyAvg := row.Find("td:nth-child(4)").Text()
  337. dailyAvg = strings.TrimSpace(dailyAvg)
  338. // 打印提取的信息
  339. fmt.Printf("单位: %s\n产品名称: %s\n日期: %s\n日均值: %s\n\n", unit, indexName, date, dailyAvg)
  340. _, e = strconv.ParseFloat(dailyAvg, 64)
  341. if e != nil {
  342. utils.FileLog.Info("数据转换失败 err:%s", e.Error())
  343. return
  344. }
  345. dataMap[date] = dailyAvg
  346. })
  347. if indexName == "" {
  348. return
  349. }
  350. edb := new(HandleIndexData)
  351. edb.IndexCode = strings.ToLower(indexCode)
  352. edb.IndexName = indexName
  353. edb.ClassifyId = classifyId
  354. edb.Frequency = frequency
  355. edb.Unit = unit
  356. edb.DateData = dataMap
  357. edb.TerminalCode = utils.TerminalCode
  358. indexes = append(indexes, edb)
  359. })
  360. return
  361. }