wechat_platform.go 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212
  1. package llm
  2. import (
  3. "eta/eta_api/models/rag"
  4. "eta/eta_api/utils"
  5. "fmt"
  6. "html"
  7. "strconv"
  8. "time"
  9. )
  10. // TODO 改成走队列,避免并发
  11. type WechatArticleOp struct {
  12. Source string
  13. WechatPlatformId int
  14. }
  15. // AddWechatPlatform
  16. // @Description: 添加新的公众号
  17. // @param item
  18. func AddWechatPlatform(item *rag.WechatPlatform) {
  19. var err error
  20. defer func() {
  21. if err != nil {
  22. utils.FileLog.Error("公众号入库后查找最新记录失败,err:%v", err)
  23. }
  24. }()
  25. if item.FakeId != `` {
  26. return
  27. }
  28. if item.ArticleLink == `` {
  29. return
  30. }
  31. articleLink := item.ArticleLink
  32. articleDetail, err := SearchByWechatArticle(item.ArticleLink)
  33. if err != nil {
  34. return
  35. }
  36. if articleDetail.Appuin == `` {
  37. err = fmt.Errorf("文章内未匹配到公众号唯一标识")
  38. return
  39. }
  40. wechatPlatform := new(rag.WechatPlatform)
  41. // 查找是否存在这个公众号id的
  42. wechatPlatformInfo, tmpErr := wechatPlatform.GetByFakeID(articleDetail.Appuin)
  43. if tmpErr != nil && !utils.IsErrNoRow(tmpErr) {
  44. err = tmpErr
  45. return
  46. }
  47. if tmpErr == nil {
  48. // 如果找到了,那么需要将当前的给移除掉
  49. err = item.Del()
  50. if err != nil {
  51. return
  52. }
  53. // 并将查出来的微信公众号摘出来的数据重新赋值
  54. item = wechatPlatformInfo
  55. } else if utils.IsErrNoRow(tmpErr) {
  56. // 如果没找到,那么就变更当前的信息
  57. item.FakeId = articleDetail.Appuin
  58. item.Nickname = articleDetail.Nickname
  59. //item.Alias = req.Alias
  60. item.RoundHeadImg = articleDetail.RoundHeadImg
  61. //item.ServiceType = req.ServiceType
  62. item.Signature = articleDetail.ProfileSignature
  63. //item.Verified = verified
  64. item.ModifyTime = time.Now()
  65. err = item.Update([]string{rag.WechatPlatformColumns.FakeID, rag.WechatPlatformColumns.Nickname, rag.WechatPlatformColumns.RoundHeadImg, rag.WechatPlatformColumns.Signature, rag.WechatPlatformColumns.ModifyTime})
  66. if err != nil {
  67. return
  68. }
  69. }
  70. // 把刚搜索的文章加入到指标库
  71. AddWechatArticle(item, articleLink, articleDetail, nil)
  72. BeachAddWechatArticle(item, 10)
  73. fmt.Println("公众号入库完成")
  74. return
  75. }
  76. // AddWechatArticle
  77. // @Description: 添加公众号文章入库
  78. // @author: Roc
  79. // @datetime 2025-03-05 13:24:14
  80. // @param item *rag.WechatPlatform
  81. // @param link string
  82. // @param articleDetail WechatArticleDataResp
  83. func AddWechatArticle(item *rag.WechatPlatform, articleLink string, articleDetail WechatArticleDataResp, articleMenu *ArticleMenu) {
  84. var err error
  85. defer func() {
  86. if err != nil {
  87. utils.FileLog.Error("公众号文章入库失败,文章链接:%s ,err:%v", articleLink, err)
  88. }
  89. }()
  90. obj := new(rag.WechatArticle)
  91. _, err = obj.GetByLink(articleLink)
  92. if err == nil {
  93. // 文章已经入库了,不需要重复入库
  94. return
  95. }
  96. // 如果不是 ErrNoRow 的时候,那么就是查询数据库出问题了,需要直接返回
  97. if !utils.IsErrNoRow(err) {
  98. return
  99. }
  100. // 这个时候,说明数据库中没有这个文章,那么需要文章入库
  101. err = nil
  102. var publishAt time.Time
  103. if articleDetail.CreateAt != `` {
  104. createAtInt, tmpErr := strconv.Atoi(articleDetail.CreateAt)
  105. if tmpErr == nil {
  106. publishAt = time.Unix(int64(createAtInt), 1000)
  107. }
  108. } else if articleMenu != nil {
  109. publishAt = time.Unix(int64(articleMenu.UpdateTime), 1000)
  110. }
  111. obj = &rag.WechatArticle{
  112. WechatArticleId: 0,
  113. WechatPlatformId: item.WechatPlatformId,
  114. FakeId: item.FakeId,
  115. Title: articleDetail.Title,
  116. Link: articleLink,
  117. CoverUrl: articleDetail.CoverUrl,
  118. Description: articleDetail.Desc,
  119. Content: html.EscapeString(articleDetail.HtmlContent),
  120. TextContent: articleDetail.TextContent,
  121. Country: articleDetail.CountryName,
  122. Province: articleDetail.ProvinceName,
  123. City: articleDetail.CityName,
  124. //Abstract: "",
  125. //ArticleCreateTime: createAt,
  126. ModifyTime: time.Now(),
  127. CreateTime: time.Now(),
  128. }
  129. if !publishAt.IsZero() {
  130. obj.ArticleCreateTime = publishAt
  131. }
  132. if articleMenu != nil {
  133. obj.Title = articleMenu.Title
  134. //obj.Link = articleMenu.Link
  135. obj.CoverUrl = articleMenu.Cover
  136. obj.Description = articleMenu.Digest
  137. }
  138. err = obj.Create()
  139. }
  140. // BeachAddWechatArticle
  141. // @Description: 批量添加公众号文章
  142. // @param item
  143. // @param num
  144. // @return err
  145. func BeachAddWechatArticle(item *rag.WechatPlatform, num int) {
  146. var err error
  147. defer func() {
  148. fmt.Println("公众号文章批量入库完成")
  149. if err != nil {
  150. utils.FileLog.Error("公众号文章批量入库失败,err:%v", err)
  151. fmt.Println("公众号文章批量入库失败,err:", err)
  152. }
  153. }()
  154. if item.FakeId == `` {
  155. return
  156. }
  157. wechatArticleObj := new(rag.WechatArticle)
  158. // 获取公众号的文章列表
  159. articleListResp, err := SearchByWechatArticleList(item.FakeId, num)
  160. if err != nil {
  161. return
  162. }
  163. for _, articleMenu := range articleListResp.List {
  164. // 判断文章是否已经入库,如果已经入库了,那么就过滤,不去重复查询微信了
  165. _, err = wechatArticleObj.GetByLink(articleMenu.Link)
  166. if err == nil {
  167. // 文章已经入库了,不需要重复入库
  168. continue
  169. }
  170. if !utils.IsErrNoRow(err) {
  171. return
  172. }
  173. err = nil
  174. articleDetail, tmpErr := SearchByWechatArticle(articleMenu.Link)
  175. if tmpErr != nil {
  176. err = tmpErr
  177. return
  178. }
  179. // 把刚搜索的文章加入到指标库
  180. AddWechatArticle(item, articleMenu.Link, articleDetail, &articleMenu)
  181. time.Sleep(10 * time.Second)
  182. }
  183. return
  184. }