wechat_platform.go 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202
  1. package llm
  2. import (
  3. "eta/eta_api/models/rag"
  4. "eta/eta_api/utils"
  5. "fmt"
  6. "html"
  7. "strconv"
  8. "time"
  9. )
  10. // TODO 改成走队列,避免并发
  11. func AddWechatPlatform(item *rag.WechatPlatform) {
  12. var err error
  13. defer func() {
  14. if err != nil {
  15. utils.FileLog.Error("公众号入库后查找最新记录失败,err:%v", err)
  16. }
  17. }()
  18. if item.FakeId != `` {
  19. return
  20. }
  21. if item.ArticleLink == `` {
  22. return
  23. }
  24. articleLink := item.ArticleLink
  25. articleDetail, err := SearchByWechatArticle(item.ArticleLink)
  26. if err != nil {
  27. return
  28. }
  29. if articleDetail.Appuin == `` {
  30. err = fmt.Errorf("文章内未匹配到公众号唯一标识")
  31. return
  32. }
  33. wechatPlatform := new(rag.WechatPlatform)
  34. // 查找是否存在这个公众号id的
  35. wechatPlatformInfo, tmpErr := wechatPlatform.GetByFakeID(articleDetail.Appuin)
  36. if tmpErr != nil && !utils.IsErrNoRow(tmpErr) {
  37. err = tmpErr
  38. return
  39. }
  40. if tmpErr == nil {
  41. // 如果找到了,那么需要将当前的给移除掉
  42. err = item.Del()
  43. if err != nil {
  44. return
  45. }
  46. // 并将查出来的微信公众号摘出来的数据重新赋值
  47. item = wechatPlatformInfo
  48. } else if utils.IsErrNoRow(tmpErr) {
  49. // 如果没找到,那么就变更当前的信息
  50. item.FakeId = articleDetail.Appuin
  51. item.Nickname = articleDetail.Nickname
  52. //item.Alias = req.Alias
  53. item.RoundHeadImg = articleDetail.RoundHeadImg
  54. //item.ServiceType = req.ServiceType
  55. item.Signature = articleDetail.ProfileSignature
  56. //item.Verified = verified
  57. item.ModifyTime = time.Now()
  58. err = item.Update([]string{rag.WechatPlatformColumns.FakeID, rag.WechatPlatformColumns.Nickname, rag.WechatPlatformColumns.RoundHeadImg, rag.WechatPlatformColumns.Signature, rag.WechatPlatformColumns.ModifyTime})
  59. if err != nil {
  60. return
  61. }
  62. }
  63. // 把刚搜索的文章加入到指标库
  64. AddWechatArticle(item, articleLink, articleDetail, nil)
  65. BeachAddWechatPlatform(item)
  66. fmt.Println("公众号入库完成")
  67. return
  68. }
  69. // AddWechatArticle
  70. // @Description: 添加公众号文章入库
  71. // @author: Roc
  72. // @datetime 2025-03-05 13:24:14
  73. // @param item *rag.WechatPlatform
  74. // @param link string
  75. // @param articleDetail WechatArticleDataResp
  76. func AddWechatArticle(item *rag.WechatPlatform, articleLink string, articleDetail WechatArticleDataResp, articleMenu *ArticleMenu) {
  77. var err error
  78. defer func() {
  79. if err != nil {
  80. utils.FileLog.Error("公众号文章入库失败,文章链接:%s ,err:%v", articleLink, err)
  81. }
  82. }()
  83. obj := new(rag.WechatArticle)
  84. _, err = obj.GetByLink(articleLink)
  85. if err == nil {
  86. // 文章已经入库了,不需要重复入库
  87. return
  88. }
  89. // 如果不是 ErrNoRow 的时候,那么就是查询数据库出问题了,需要直接返回
  90. if !utils.IsErrNoRow(err) {
  91. return
  92. }
  93. // 这个时候,说明数据库中没有这个文章,那么需要文章入库
  94. err = nil
  95. var publishAt time.Time
  96. if articleDetail.CreateAt != `` {
  97. createAtInt, tmpErr := strconv.Atoi(articleDetail.CreateAt)
  98. if tmpErr == nil {
  99. publishAt = time.Unix(int64(createAtInt), 1000)
  100. }
  101. } else if articleMenu != nil {
  102. publishAt = time.Unix(int64(articleMenu.UpdateTime), 1000)
  103. }
  104. obj = &rag.WechatArticle{
  105. WechatArticleId: 0,
  106. WechatPlatformId: item.WechatPlatformId,
  107. FakeId: item.FakeId,
  108. Title: articleDetail.Title,
  109. Link: articleLink,
  110. CoverUrl: articleDetail.CoverUrl,
  111. Description: articleDetail.Desc,
  112. Content: html.EscapeString(articleDetail.HtmlContent),
  113. TextContent: articleDetail.TextContent,
  114. Country: articleDetail.CountryName,
  115. Province: articleDetail.ProvinceName,
  116. City: articleDetail.CityName,
  117. //Abstract: "",
  118. //ArticleCreateTime: createAt,
  119. ModifyTime: time.Now(),
  120. CreateTime: time.Now(),
  121. }
  122. if !publishAt.IsZero() {
  123. obj.ArticleCreateTime = publishAt
  124. }
  125. if articleMenu != nil {
  126. obj.Title = articleMenu.Title
  127. //obj.Link = articleMenu.Link
  128. obj.CoverUrl = articleMenu.Cover
  129. obj.Abstract = articleMenu.Digest
  130. }
  131. err = obj.Create()
  132. }
  133. // BeachAddWechatPlatform
  134. // @Description: 批量添加公众号文章
  135. // @author: Roc
  136. // @datetime 2025-03-05 15:05:07
  137. // @param item *rag.WechatPlatform
  138. // @return err error
  139. func BeachAddWechatPlatform(item *rag.WechatPlatform) (err error) {
  140. defer func() {
  141. fmt.Println("公众号文章批量入库完成")
  142. if err != nil {
  143. utils.FileLog.Error("公众号文章批量入库失败,err:%v", err)
  144. fmt.Println("公众号文章批量入库失败,err:", err)
  145. }
  146. }()
  147. if item.FakeId == `` {
  148. return
  149. }
  150. num := 10
  151. wechatArticleObj := new(rag.WechatArticle)
  152. // 获取公众号的文章列表
  153. articleListResp, err := SearchByWechatArticleList(item.FakeId, num)
  154. if err != nil {
  155. return
  156. }
  157. for _, articleMenu := range articleListResp.List {
  158. // 判断文章是否已经入库,如果已经入库了,那么就过滤,不去重复查询微信了
  159. _, err = wechatArticleObj.GetByLink(articleMenu.Link)
  160. if err == nil {
  161. // 文章已经入库了,不需要重复入库
  162. continue
  163. }
  164. articleDetail, tmpErr := SearchByWechatArticle(articleMenu.Link)
  165. if tmpErr != nil {
  166. err = tmpErr
  167. return
  168. }
  169. // 把刚搜索的文章加入到指标库
  170. AddWechatArticle(item, articleMenu.Link, articleDetail, &articleMenu)
  171. time.Sleep(10 * time.Second)
  172. }
  173. return
  174. }