data_import.go 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. package services
  2. import (
  3. "fmt"
  4. "github.com/PuerkitoBio/goquery"
  5. "github.com/tealeg/xlsx"
  6. "hongze/hongze_cygx/models"
  7. "hongze/hongze_cygx/utils"
  8. "html"
  9. "strconv"
  10. "strings"
  11. "time"
  12. )
  13. //上海数据同步
  14. func ImportData() {
  15. fileName := "./temp/权益报告导入0319.xlsx"
  16. xlFile, err := xlsx.OpenFile(fileName)
  17. if err != nil {
  18. fmt.Printf("open failed: %s\n", err)
  19. }
  20. for _, sheet := range xlFile.Sheet {
  21. row := sheet.Rows
  22. for k, v := range row {
  23. if k > 0 {
  24. //导入数据
  25. item := new(models.CygxArticle)
  26. cells := v.Cells
  27. articleIdStr := cells[0].Value
  28. articleIdInt, _ := strconv.Atoi(articleIdStr)
  29. //判断文章是否已经存在
  30. existItem, err := models.GetArticleDetailById(articleIdInt)
  31. if err != nil && err.Error() != utils.ErrNoRow() {
  32. fmt.Println("GetArticleDetailById Err:" + err.Error())
  33. return
  34. }
  35. if existItem != nil && existItem.ArticleId > 0 {
  36. fmt.Println("exist", articleIdInt)
  37. continue
  38. }
  39. title := cells[1].String()
  40. titleEn := cells[2].String()
  41. updateFrequency := cells[3].String()
  42. createDate := cells[4].String()
  43. createDateParse, err := time.Parse(utils.FormatDateTime, createDate)
  44. if err != nil {
  45. fmt.Println("createDateParse err:", err.Error())
  46. return
  47. }
  48. fmt.Println("createDateParse ", createDateParse)
  49. publishDate := cells[6].String()
  50. publishDate=strings.Replace(publishDate,`\`,"",-1)
  51. fmt.Println("publishDate: ", publishDate)
  52. publishDateParse, err := time.Parse(utils.FormatDateTime, publishDate)
  53. if err != nil {
  54. fmt.Println("publishDateParse err:", err.Error())
  55. return
  56. }
  57. fmt.Println("publishDateParse ", publishDateParse)
  58. body := cells[11].String()
  59. abstract := cells[12].String()
  60. categoryName := cells[15].String()
  61. subCategoryName := cells[16].String()
  62. categoryId, _ := cells[18].Int()
  63. doc, err := goquery.NewDocumentFromReader(strings.NewReader(body))
  64. if err != nil {
  65. fmt.Println("create doc err:", err.Error())
  66. return
  67. }
  68. var expertNumArr []string
  69. var expertContentArr []string
  70. var interviewDateArr []string
  71. doc.Find("p").Each(func(i int, s *goquery.Selection) {
  72. contentTxt := s.Text()
  73. if strings.Contains(contentTxt, "#访谈时间:") || strings.Contains(contentTxt, "访谈时间:") {
  74. interviewDate := s.Next().Text()
  75. interviewDateArr = append(interviewDateArr, interviewDate)
  76. }
  77. if strings.Contains(contentTxt, "#专家评价") || strings.Contains(contentTxt, "专家评价") {
  78. expertContent := s.Next().Text()
  79. if expertContent == "" {
  80. expertContent = contentTxt
  81. }
  82. if expertContent != "" {
  83. rightIndex := strings.Index(expertContent, ")")
  84. if rightIndex == 0 {
  85. rightIndex = strings.Index(expertContent, ")")
  86. }
  87. expertNum := expertContent[:rightIndex]
  88. expertNum = strings.Replace(expertNum, "(", "", -1)
  89. expertNum = strings.Replace(expertNum, "(", "", -1)
  90. expertNum = strings.Replace(expertNum, "专家评价", "", -1)
  91. if expertNum != "" {
  92. expertNumArr = append(expertNumArr, expertNum)
  93. rightIndex = rightIndex
  94. expertContentStr := expertContent[rightIndex:]
  95. expertContentStr = strings.Replace(expertContentStr, ")", "", -1)
  96. expertContentStr = strings.TrimLeft(expertContentStr, ":")
  97. expertContentStr = strings.TrimRight(expertContentStr, "(推荐")
  98. expertContentArr = append(expertContentArr, expertContentStr)
  99. }
  100. }
  101. }
  102. })
  103. var expertNumStr, expertContentStr, interviewDateStr string
  104. if len(expertNumArr) > 0 {
  105. expertNumStr = expertNumArr[0]
  106. }
  107. if len(expertContentArr) > 0 {
  108. expertContentStr = expertContentArr[0]
  109. }
  110. if len(interviewDateArr) > 0 {
  111. interviewDateStr = interviewDateArr[0]
  112. }
  113. item.ArticleId = articleIdInt
  114. item.Title = title
  115. item.TitleEn = titleEn
  116. item.UpdateFrequency = updateFrequency
  117. item.CreateDate = createDateParse.Format(utils.FormatDateTime)
  118. item.PublishDate = publishDateParse.Format(utils.FormatDateTime)
  119. item.Body = html.EscapeString(body)
  120. item.Abstract = html.EscapeString(abstract)
  121. item.CategoryName = categoryName
  122. item.SubCategoryName = subCategoryName
  123. item.CategoryId = categoryId
  124. item.PublishStatus = 1
  125. item.ExpertBackground = expertContentStr
  126. item.ExpertNumber = expertNumStr
  127. item.InterviewDate = interviewDateStr
  128. item.Department = "策略组"
  129. item.ArticleIdMd5 = utils.MD5(articleIdStr)
  130. _, err = models.AddCygxArticle(item)
  131. if err != nil {
  132. fmt.Println("AddCygxArticle Err:", err.Error())
  133. return
  134. }
  135. }
  136. }
  137. }
  138. }