123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202 |
- package llm
- import (
- "eta/eta_api/models/rag"
- "eta/eta_api/utils"
- "fmt"
- "html"
- "strconv"
- "time"
- )
- // TODO 改成走队列,避免并发
- func AddWechatPlatform(item *rag.WechatPlatform) {
- var err error
- defer func() {
- if err != nil {
- utils.FileLog.Error("公众号入库后查找最新记录失败,err:%v", err)
- }
- }()
- if item.FakeId != `` {
- return
- }
- if item.ArticleLink == `` {
- return
- }
- articleLink := item.ArticleLink
- articleDetail, err := SearchByWechatArticle(item.ArticleLink)
- if err != nil {
- return
- }
- if articleDetail.Appuin == `` {
- err = fmt.Errorf("文章内未匹配到公众号唯一标识")
- return
- }
- wechatPlatform := new(rag.WechatPlatform)
- // 查找是否存在这个公众号id的
- wechatPlatformInfo, tmpErr := wechatPlatform.GetByFakeID(articleDetail.Appuin)
- if tmpErr != nil && !utils.IsErrNoRow(tmpErr) {
- err = tmpErr
- return
- }
- if tmpErr == nil {
- // 如果找到了,那么需要将当前的给移除掉
- err = item.Del()
- if err != nil {
- return
- }
- // 并将查出来的微信公众号摘出来的数据重新赋值
- item = wechatPlatformInfo
- } else if utils.IsErrNoRow(tmpErr) {
- // 如果没找到,那么就变更当前的信息
- item.FakeId = articleDetail.Appuin
- item.Nickname = articleDetail.Nickname
- //item.Alias = req.Alias
- item.RoundHeadImg = articleDetail.RoundHeadImg
- //item.ServiceType = req.ServiceType
- item.Signature = articleDetail.ProfileSignature
- //item.Verified = verified
- item.ModifyTime = time.Now()
- err = item.Update([]string{rag.WechatPlatformColumns.FakeID, rag.WechatPlatformColumns.Nickname, rag.WechatPlatformColumns.RoundHeadImg, rag.WechatPlatformColumns.Signature, rag.WechatPlatformColumns.ModifyTime})
- if err != nil {
- return
- }
- }
- // 把刚搜索的文章加入到指标库
- AddWechatArticle(item, articleLink, articleDetail, nil)
- BeachAddWechatPlatform(item)
- fmt.Println("公众号入库完成")
- return
- }
- // AddWechatArticle
- // @Description: 添加公众号文章入库
- // @author: Roc
- // @datetime 2025-03-05 13:24:14
- // @param item *rag.WechatPlatform
- // @param link string
- // @param articleDetail WechatArticleDataResp
- func AddWechatArticle(item *rag.WechatPlatform, articleLink string, articleDetail WechatArticleDataResp, articleMenu *ArticleMenu) {
- var err error
- defer func() {
- if err != nil {
- utils.FileLog.Error("公众号文章入库失败,文章链接:%s ,err:%v", articleLink, err)
- }
- }()
- obj := new(rag.WechatArticle)
- _, err = obj.GetByLink(articleLink)
- if err == nil {
- // 文章已经入库了,不需要重复入库
- return
- }
- // 如果不是 ErrNoRow 的时候,那么就是查询数据库出问题了,需要直接返回
- if !utils.IsErrNoRow(err) {
- return
- }
- // 这个时候,说明数据库中没有这个文章,那么需要文章入库
- err = nil
- var publishAt time.Time
- if articleDetail.CreateAt != `` {
- createAtInt, tmpErr := strconv.Atoi(articleDetail.CreateAt)
- if tmpErr == nil {
- publishAt = time.Unix(int64(createAtInt), 1000)
- }
- } else if articleMenu != nil {
- publishAt = time.Unix(int64(articleMenu.UpdateTime), 1000)
- }
- obj = &rag.WechatArticle{
- WechatArticleId: 0,
- WechatPlatformId: item.WechatPlatformId,
- FakeId: item.FakeId,
- Title: articleDetail.Title,
- Link: articleLink,
- CoverUrl: articleDetail.CoverUrl,
- Description: articleDetail.Desc,
- Content: html.EscapeString(articleDetail.HtmlContent),
- TextContent: articleDetail.TextContent,
- Country: articleDetail.CountryName,
- Province: articleDetail.ProvinceName,
- City: articleDetail.CityName,
- //Abstract: "",
- //ArticleCreateTime: createAt,
- ModifyTime: time.Now(),
- CreateTime: time.Now(),
- }
- if !publishAt.IsZero() {
- obj.ArticleCreateTime = publishAt
- }
- if articleMenu != nil {
- obj.Title = articleMenu.Title
- //obj.Link = articleMenu.Link
- obj.CoverUrl = articleMenu.Cover
- obj.Abstract = articleMenu.Digest
- }
- err = obj.Create()
- }
- // BeachAddWechatPlatform
- // @Description: 批量添加公众号文章
- // @author: Roc
- // @datetime 2025-03-05 15:05:07
- // @param item *rag.WechatPlatform
- // @return err error
- func BeachAddWechatPlatform(item *rag.WechatPlatform) (err error) {
- defer func() {
- fmt.Println("公众号文章批量入库完成")
- if err != nil {
- utils.FileLog.Error("公众号文章批量入库失败,err:%v", err)
- fmt.Println("公众号文章批量入库失败,err:", err)
- }
- }()
- if item.FakeId == `` {
- return
- }
- num := 10
- wechatArticleObj := new(rag.WechatArticle)
- // 获取公众号的文章列表
- articleListResp, err := SearchByWechatArticleList(item.FakeId, num)
- if err != nil {
- return
- }
- for _, articleMenu := range articleListResp.List {
- // 判断文章是否已经入库,如果已经入库了,那么就过滤,不去重复查询微信了
- _, err = wechatArticleObj.GetByLink(articleMenu.Link)
- if err == nil {
- // 文章已经入库了,不需要重复入库
- continue
- }
- articleDetail, tmpErr := SearchByWechatArticle(articleMenu.Link)
- if tmpErr != nil {
- err = tmpErr
- return
- }
- // 把刚搜索的文章加入到指标库
- AddWechatArticle(item, articleMenu.Link, articleDetail, &articleMenu)
- time.Sleep(10 * time.Second)
- }
- return
- }
|