stock.go 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215
  1. package base_from_ccf
  2. import (
  3. "context"
  4. "encoding/json"
  5. "eta/eta_data_analysis/models"
  6. "eta/eta_data_analysis/utils"
  7. "fmt"
  8. "github.com/PuerkitoBio/goquery"
  9. "io/ioutil"
  10. "strings"
  11. "time"
  12. )
  13. // HandleTableData 表格数据
  14. type HandleTableData struct {
  15. ClassifyId int `description:"分类ID"`
  16. FromPage string `description:"表格来源"`
  17. TableDate time.Time `description:"表格日期"`
  18. TableContent string `description:"表格HTML"`
  19. }
  20. // TaskStockTable 获取装置表格
  21. func TaskStockTable(context.Context) (err error) {
  22. defer func() {
  23. if err != nil {
  24. tips := fmt.Sprintf("TaskStockTable ErrMsg: %s", err.Error())
  25. utils.FileLog.Info(tips)
  26. fmt.Println(tips)
  27. }
  28. }()
  29. taskNames := []string{"PTA装置", "MEG装置", "PX装置"}
  30. readLimit := utils.CCFStockFetchNum
  31. for _, nameKey := range taskNames {
  32. fetchRule, e := loadDataRule(nameKey)
  33. if e != nil {
  34. utils.FileLog.Info(fmt.Sprintf("%s无解析规则, err: %v\n", nameKey, e))
  35. continue
  36. }
  37. // 解析前N篇报告
  38. files, e := savePageHtml(nameKey, fetchRule.PageDir, false, readLimit)
  39. if e != nil {
  40. utils.FileLog.Info(fmt.Sprintf("%s保存首页失败, err: %v\n", nameKey, e))
  41. continue
  42. }
  43. readCount := 0
  44. for _, v := range files {
  45. readCount += 1
  46. if readCount > readLimit {
  47. break
  48. }
  49. htm, e := ioutil.ReadFile(v)
  50. if e != nil {
  51. fmt.Printf("file: %s, ReadFile err: %v\n", v, e)
  52. utils.FileLog.Info(fmt.Sprintf("file: %s, ReadFile err: %v", v, e))
  53. continue
  54. }
  55. tableContent, tableDate, e := AnalysisStockTable(htm)
  56. if e != nil {
  57. fmt.Printf("file: %s, AnalysisStockTable err: %v\n", v, e)
  58. utils.FileLog.Info(fmt.Sprintf("file: %s, AnalysisStockTable err: %v", v, e))
  59. continue
  60. }
  61. tableItem := new(HandleTableData)
  62. tableItem.ClassifyId = fetchRule.StockTable.ClassifyId
  63. tableItem.FromPage = v
  64. tableItem.TableDate = tableDate
  65. tableItem.TableContent = tableContent
  66. // 写入数据库
  67. params := make(map[string]interface{})
  68. params["Table"] = tableItem
  69. params["TerminalCode"] = utils.TerminalCode
  70. result, e := postEdbLib(params, utils.LIB_ROUTE_CCF_TABLE_HANDLE)
  71. if e != nil {
  72. b, _ := json.Marshal(params)
  73. fmt.Printf("file: %s, postEdbLib err: %v, params: %s\n", v, e, string(b))
  74. utils.FileLog.Info(fmt.Sprintf("file: %s, postEdbLib err: %v, params: %s", v, e, string(b)))
  75. continue
  76. }
  77. resp := new(models.BaseEdbLibResponse)
  78. if e = json.Unmarshal(result, &resp); e != nil {
  79. fmt.Printf("file: %s, json.Unmarshal err: %v\n", v, e)
  80. utils.FileLog.Info(fmt.Sprintf("file: %s, json.Unmarshal err: %v", v, e))
  81. continue
  82. }
  83. if resp.Ret != 200 {
  84. fmt.Printf("file: %s, Msg: %s, ErrMsg: %s\n", v, resp.Msg, resp.ErrMsg)
  85. utils.FileLog.Info(fmt.Sprintf("file: %s, Msg: %s, ErrMsg: %s", v, resp.Msg, resp.ErrMsg))
  86. continue
  87. }
  88. }
  89. }
  90. return
  91. }
  92. // AnalysisStockTable 解析装置表格
  93. func AnalysisStockTable(htm []byte) (tableContent string, tableTime time.Time, err error) {
  94. doc, e := goquery.NewDocumentFromReader(strings.NewReader(string(htm)))
  95. if e != nil {
  96. err = fmt.Errorf("NewDocumentFromReader err: %v", e)
  97. return
  98. }
  99. // 从收藏按钮往上找table, 取出报告发布日期
  100. collectEle := doc.Find("#savenews")
  101. publishTimeTab := collectEle.ParentsFiltered("table").First()
  102. publishTxt := publishTimeTab.Find("td:first-child").Text()
  103. //fmt.Println("publishTxt: ", publishTxt)
  104. publishTime, e := extractReportPublishTime(publishTxt)
  105. if e != nil {
  106. err = fmt.Errorf("extractReportPublishTime err: %v", e)
  107. return
  108. }
  109. if publishTime.IsZero() {
  110. err = fmt.Errorf("发布日期有误")
  111. return
  112. }
  113. //fmt.Println(publishTime)
  114. tableTime = publishTime
  115. //publishYear := publishTime.Year()
  116. //fmt.Println(publishYear)
  117. // 查找包含关键词的标签
  118. keyElement := doc.Find("#newscontent")
  119. table := keyElement.ChildrenFiltered("table").First()
  120. if table.Length() <= 0 {
  121. err = fmt.Errorf("表格未找到")
  122. return
  123. }
  124. h, e := table.Html()
  125. if e != nil {
  126. err = fmt.Errorf("表格HTML有误, err: %v", e)
  127. return
  128. }
  129. tableContent = fmt.Sprintf("<table>%s</table>", h)
  130. //tableContent = regexp.MustCompile(`\n`).ReplaceAllString(tableContent, "")
  131. return
  132. }
  133. // ReadStockHistoryFiles 读取历史文件
  134. func ReadStockHistoryFiles(context.Context) {
  135. var err error
  136. defer func() {
  137. if err != nil {
  138. tips := fmt.Sprintf("ReadStockHistoryFiles ErrMsg: %s", err.Error())
  139. utils.FileLog.Info(tips)
  140. fmt.Println(tips)
  141. }
  142. }()
  143. taskNames := []string{"PTA装置", "MEG装置", "PX装置"}
  144. for _, nameKey := range taskNames {
  145. fetchRule, e := loadDataRule(nameKey)
  146. if e != nil {
  147. utils.FileLog.Info(fmt.Sprintf("%s无解析规则, err: %v\n", nameKey, e))
  148. continue
  149. }
  150. filePaths, e := listFiles(fetchRule.PageDir)
  151. if e != nil {
  152. utils.FileLog.Info(fmt.Sprintf("%s读取文件目录失败, err: %v\n", nameKey, e))
  153. continue
  154. }
  155. for _, v := range filePaths {
  156. v = fmt.Sprintf("%s/%s", fetchRule.PageDir, v)
  157. fmt.Printf("开始解析: %s", v)
  158. //htm, e := ioutil.ReadFile("static/ccf/oil_daily/原油石化早报(4.18).html")
  159. htm, e := ioutil.ReadFile(v)
  160. if e != nil {
  161. fmt.Printf("file: %s, ReadFile err: %v\n", v, e)
  162. utils.FileLog.Info(fmt.Sprintf("file: %s, ReadFile err: %v", v, e))
  163. continue
  164. }
  165. tableContent, tableDate, e := AnalysisStockTable(htm)
  166. if e != nil {
  167. fmt.Printf("file: %s, AnalysisStockTable err: %v\n", v, e)
  168. utils.FileLog.Info(fmt.Sprintf("file: %s, AnalysisStockTable err: %v", v, e))
  169. continue
  170. }
  171. tableItem := new(HandleTableData)
  172. tableItem.ClassifyId = fetchRule.StockTable.ClassifyId
  173. tableItem.FromPage = v
  174. tableItem.TableDate = tableDate
  175. tableItem.TableContent = tableContent
  176. // 写入数据库
  177. params := make(map[string]interface{})
  178. params["Table"] = tableItem
  179. params["TerminalCode"] = utils.TerminalCode
  180. result, e := postEdbLib(params, utils.LIB_ROUTE_CCF_TABLE_HANDLE)
  181. if e != nil {
  182. b, _ := json.Marshal(params)
  183. fmt.Printf("file: %s, postEdbLib err: %v, params: %s\n", v, e, string(b))
  184. utils.FileLog.Info(fmt.Sprintf("file: %s, postEdbLib err: %v, params: %s", v, e, string(b)))
  185. continue
  186. }
  187. resp := new(models.BaseEdbLibResponse)
  188. if e = json.Unmarshal(result, &resp); e != nil {
  189. fmt.Printf("file: %s, json.Unmarshal err: %v\n", v, e)
  190. utils.FileLog.Info(fmt.Sprintf("file: %s, json.Unmarshal err: %v", v, e))
  191. continue
  192. }
  193. if resp.Ret != 200 {
  194. fmt.Printf("file: %s, Msg: %s, ErrMsg: %s\n", v, resp.Msg, resp.ErrMsg)
  195. utils.FileLog.Info(fmt.Sprintf("file: %s, Msg: %s, ErrMsg: %s", v, resp.Msg, resp.ErrMsg))
  196. continue
  197. }
  198. }
  199. }
  200. return
  201. }