wechat_platform.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586
  1. package services
  2. import (
  3. "eta/eta_api/cache"
  4. "eta/eta_api/models"
  5. "eta/eta_api/models/rag"
  6. "eta/eta_api/services/llm"
  7. "eta/eta_api/utils"
  8. "eta/eta_api/utils/llm/eta_llm/eta_llm_http"
  9. "fmt"
  10. "html"
  11. "os"
  12. "path"
  13. "strconv"
  14. "strings"
  15. "time"
  16. )
  17. // AddWechatPlatform
  18. // @Description: 添加新的公众号
  19. // @param item
  20. func AddWechatPlatform(item *rag.WechatPlatform) {
  21. var err error
  22. defer func() {
  23. if err != nil {
  24. utils.FileLog.Error("公众号入库后查找最新记录失败,err:%v", err)
  25. }
  26. }()
  27. if item.FakeId != `` {
  28. return
  29. }
  30. if item.ArticleLink == `` {
  31. return
  32. }
  33. articleLink := item.ArticleLink
  34. articleDetail, err := llm.SearchByWechatArticle(item.ArticleLink)
  35. if err != nil {
  36. return
  37. }
  38. if articleDetail.Appuin == `` {
  39. err = fmt.Errorf("文章内未匹配到公众号唯一标识")
  40. return
  41. }
  42. wechatPlatform := new(rag.WechatPlatform)
  43. // 查找是否存在这个公众号id的
  44. wechatPlatformInfo, tmpErr := wechatPlatform.GetByFakeID(articleDetail.Appuin)
  45. if tmpErr != nil && !utils.IsErrNoRow(tmpErr) {
  46. err = tmpErr
  47. return
  48. }
  49. if tmpErr == nil {
  50. // 如果找到了,那么需要将当前的给移除掉
  51. err = item.Del()
  52. if err != nil {
  53. return
  54. }
  55. // 并将查出来的微信公众号摘出来的数据重新赋值
  56. item = wechatPlatformInfo
  57. } else if utils.IsErrNoRow(tmpErr) {
  58. // 如果没找到,那么就变更当前的信息
  59. item.FakeId = articleDetail.Appuin
  60. item.Nickname = articleDetail.Nickname
  61. //item.Alias = req.Alias
  62. item.RoundHeadImg = articleDetail.RoundHeadImg
  63. //item.ServiceType = req.ServiceType
  64. item.Signature = articleDetail.ProfileSignature
  65. //item.Verified = verified
  66. item.ModifyTime = time.Now()
  67. err = item.Update([]string{rag.WechatPlatformColumns.FakeID, rag.WechatPlatformColumns.Nickname, rag.WechatPlatformColumns.RoundHeadImg, rag.WechatPlatformColumns.Signature, rag.WechatPlatformColumns.ModifyTime})
  68. if err != nil {
  69. return
  70. }
  71. // 修改公众号头像
  72. go replaceWechatPlatformPic(item)
  73. }
  74. // 把刚搜索的文章加入到文章库中
  75. AddWechatArticle(item, articleLink, articleDetail, nil)
  76. BeachAddWechatArticle(item, 10)
  77. fmt.Println("公众号入库完成")
  78. return
  79. }
  80. // AddWechatArticle
  81. // @Description: 添加公众号文章入库
  82. // @author: Roc
  83. // @datetime 2025-03-05 13:24:14
  84. // @param item *rag.WechatPlatform
  85. // @param link string
  86. // @param articleDetail WechatArticleDataResp
  87. func AddWechatArticle(item *rag.WechatPlatform, articleLink string, articleDetail llm.WechatArticleDataResp, articleMenu *llm.ArticleMenu) {
  88. var err error
  89. defer func() {
  90. if err != nil {
  91. utils.FileLog.Error("公众号文章入库失败,文章链接:%s ,err:%v", articleLink, err)
  92. }
  93. }()
  94. obj := new(rag.WechatArticle)
  95. _, err = obj.GetByLink(articleLink)
  96. if err == nil {
  97. // 文章已经入库了,不需要重复入库
  98. return
  99. }
  100. // 如果不是 ErrNoRow 的时候,那么就是查询数据库出问题了,需要直接返回
  101. if !utils.IsErrNoRow(err) {
  102. return
  103. }
  104. // 这个时候,说明数据库中没有这个文章,那么需要文章入库
  105. err = nil
  106. var publishAt time.Time
  107. if articleDetail.CreateAt != `` {
  108. createAtInt, tmpErr := strconv.Atoi(articleDetail.CreateAt)
  109. if tmpErr == nil {
  110. publishAt = time.Unix(int64(createAtInt), 1000)
  111. }
  112. } else if articleMenu != nil {
  113. publishAt = time.Unix(int64(articleMenu.UpdateTime), 1000)
  114. }
  115. obj = &rag.WechatArticle{
  116. WechatArticleId: 0,
  117. WechatPlatformId: item.WechatPlatformId,
  118. FakeId: item.FakeId,
  119. Title: articleDetail.Title,
  120. Link: articleLink,
  121. CoverUrl: articleDetail.CoverUrl,
  122. Description: articleDetail.Desc,
  123. Content: html.EscapeString(articleDetail.HtmlContent),
  124. TextContent: articleDetail.TextContent,
  125. Country: articleDetail.CountryName,
  126. Province: articleDetail.ProvinceName,
  127. City: articleDetail.CityName,
  128. //Abstract: "",
  129. //ArticleCreateTime: createAt,
  130. ModifyTime: time.Now(),
  131. CreateTime: time.Now(),
  132. }
  133. if !publishAt.IsZero() {
  134. obj.ArticleCreateTime = publishAt
  135. }
  136. if articleMenu != nil {
  137. obj.Title = articleMenu.Title
  138. //obj.Link = articleMenu.Link
  139. obj.CoverUrl = articleMenu.Cover
  140. obj.Description = articleMenu.Digest
  141. }
  142. err = obj.Create()
  143. // 修改文章封面图
  144. go replaceWechatArticlePic(obj)
  145. // 文章入库成功后,需要将相关信息入摘要库
  146. go cache.AddWechatArticleLlmOpToCache(item.WechatPlatformId, ``)
  147. }
  148. // BeachAddWechatArticle
  149. // @Description: 批量添加公众号文章
  150. // @param item
  151. // @param num
  152. // @return err
  153. func BeachAddWechatArticle(item *rag.WechatPlatform, num int) {
  154. var err error
  155. defer func() {
  156. //fmt.Println("公众号文章批量入库完成")
  157. if err != nil {
  158. utils.FileLog.Error("公众号文章批量入库失败,err:%v", err)
  159. fmt.Println("公众号文章批量入库失败,err:", err)
  160. }
  161. }()
  162. if item.FakeId == `` {
  163. return
  164. }
  165. wechatArticleObj := new(rag.WechatArticle)
  166. // 获取公众号的文章列表
  167. articleListResp, err := llm.SearchByWechatArticleList(item.FakeId, num)
  168. if err != nil {
  169. return
  170. }
  171. for _, articleMenu := range articleListResp.List {
  172. // 判断文章是否已经入库,如果已经入库了,那么就过滤,不去重复查询微信了
  173. _, err = wechatArticleObj.GetByLink(articleMenu.Link)
  174. if err == nil {
  175. // 文章已经入库了,不需要重复入库
  176. continue
  177. }
  178. if !utils.IsErrNoRow(err) {
  179. return
  180. }
  181. err = nil
  182. articleDetail, tmpErr := llm.SearchByWechatArticle(articleMenu.Link)
  183. if tmpErr != nil {
  184. err = tmpErr
  185. return
  186. }
  187. // 把刚搜索的文章加入到指标库
  188. AddWechatArticle(item, articleMenu.Link, articleDetail, &articleMenu)
  189. time.Sleep(10 * time.Second)
  190. }
  191. return
  192. }
  193. // GenerateArticleAbstract
  194. // @Description: 文章摘要生成
  195. // @author: Roc
  196. // @datetime 2025-03-10 16:17:53
  197. // @param item *rag.WechatArticle
  198. func GenerateArticleAbstract(item *rag.WechatArticle) {
  199. var err error
  200. defer func() {
  201. if err != nil {
  202. utils.FileLog.Error("文章转临时文件失败,err:%v", err)
  203. fmt.Println("文章转临时文件失败,err:", err)
  204. }
  205. }()
  206. abstractObj := rag.WechatArticleAbstract{}
  207. _, err = abstractObj.GetByWechatArticleId(item.WechatArticleId)
  208. if err == nil {
  209. // 摘要已经生成,不需要重复生成
  210. return
  211. }
  212. if !utils.IsErrNoRow(err) {
  213. return
  214. }
  215. // 生成临时文件
  216. dateDir := time.Now().Format("20060102")
  217. uploadDir := utils.STATIC_DIR + "ai/" + dateDir
  218. err = os.MkdirAll(uploadDir, utils.DIR_MOD)
  219. if err != nil {
  220. err = fmt.Errorf("存储目录创建失败,Err:" + err.Error())
  221. return
  222. }
  223. randStr := utils.GetRandStringNoSpecialChar(28)
  224. fileName := randStr + `.md`
  225. tmpFilePath := uploadDir + "/" + fileName
  226. err = utils.SaveToFile(item.TextContent, tmpFilePath)
  227. if err != nil {
  228. err = fmt.Errorf("生成临时文件失败,Err:" + err.Error())
  229. return
  230. }
  231. defer func() {
  232. os.Remove(tmpFilePath)
  233. }()
  234. // 上传临时文件到LLM
  235. tmpFileResp, err := llm.UploadTempDocs(tmpFilePath)
  236. if err != nil {
  237. err = fmt.Errorf("上传临时文件到LLM失败,Err:" + err.Error())
  238. return
  239. }
  240. if tmpFileResp.Data.Id == `` {
  241. err = fmt.Errorf("上传临时文件到LLM失败,Err:上传失败")
  242. return
  243. }
  244. tmpDocId := tmpFileResp.Data.Id
  245. //tmpDocId := `c4d2ee902808408c8b8ed398b33be103` // 钢材
  246. //tmpDocId := `2dde8afe62d24525a814e74e0a5e35e4` // 钢材
  247. //tmpDocId := `7634cc1086c04b3687682220a2cf1a48` //
  248. historyList := make([]eta_llm_http.HistoryContent, 0)
  249. questionObj := rag.Question{}
  250. questionList, err := questionObj.GetListByCondition(``, []interface{}{}, 0, 100)
  251. if err != nil {
  252. err = fmt.Errorf("获取问题列表失败,Err:" + err.Error())
  253. return
  254. }
  255. addArticleChatRecordList := make([]*rag.WechatArticleChatRecord, 0)
  256. var abstract string
  257. //开始对话
  258. for _, question := range questionList {
  259. originalAnswer, tmpAnswer, tmpErr := getAnswerByContent(tmpDocId, question.QuestionContent, historyList)
  260. if tmpErr != nil {
  261. err = fmt.Errorf("LLM对话失败,Err:" + tmpErr.Error())
  262. return
  263. }
  264. abstract = tmpAnswer
  265. historyList = append(historyList, eta_llm_http.HistoryContent{
  266. Role: `user`,
  267. Content: question.QuestionContent,
  268. }, eta_llm_http.HistoryContent{
  269. Role: `assistant`,
  270. Content: tmpAnswer,
  271. })
  272. // 待入库的数据
  273. addArticleChatRecordList = append(addArticleChatRecordList, &rag.WechatArticleChatRecord{
  274. WechatArticleChatRecordId: 0,
  275. WechatArticleId: item.WechatArticleId,
  276. ChatUserType: "user",
  277. Content: question.QuestionContent,
  278. SendTime: time.Now(),
  279. CreatedTime: time.Now(),
  280. UpdateTime: time.Now(),
  281. }, &rag.WechatArticleChatRecord{
  282. WechatArticleChatRecordId: 0,
  283. WechatArticleId: item.WechatArticleId,
  284. ChatUserType: "assistant",
  285. Content: originalAnswer,
  286. SendTime: time.Now(),
  287. CreatedTime: time.Now(),
  288. UpdateTime: time.Now(),
  289. })
  290. }
  291. // 添加问答记录
  292. if len(addArticleChatRecordList) > 0 {
  293. recordObj := rag.WechatArticleChatRecord{}
  294. err = recordObj.CreateInBatches(addArticleChatRecordList)
  295. if err != nil {
  296. return
  297. }
  298. }
  299. if abstract != `` {
  300. abstractItem := &rag.WechatArticleAbstract{
  301. WechatArticleAbstractId: 0,
  302. WechatArticleId: item.WechatArticleId,
  303. Content: abstract,
  304. Version: 0,
  305. VectorKey: "",
  306. ModifyTime: time.Now(),
  307. CreateTime: time.Now(),
  308. }
  309. err = abstractItem.Create()
  310. if err != nil {
  311. return
  312. }
  313. AbstractToKnowledge(item, abstractItem)
  314. }
  315. }
  316. func getAnswerByContent(docId, question string, historyList []eta_llm_http.HistoryContent) (originalAnswer, answer string, err error) {
  317. originalAnswer, result, err := llm.ChatByFile(docId, question, historyList)
  318. fmt.Println(result)
  319. if err != nil {
  320. err = fmt.Errorf("LLM对话失败,Err:" + err.Error())
  321. return
  322. }
  323. // 提取 </think> 后面的内容
  324. thinkEndIndex := strings.Index(result.Answer, "</think>")
  325. if thinkEndIndex != -1 {
  326. answer = strings.TrimSpace(result.Answer[thinkEndIndex+len("</think>"):])
  327. } else {
  328. answer = result.Answer
  329. }
  330. answer = strings.TrimSpace(answer)
  331. return
  332. }
  333. // ArticleToKnowledge
  334. // @Description: 原文入向量库
  335. // @author: Roc
  336. // @datetime 2025-03-10 16:13:16
  337. // @param item *rag.WechatArticle
  338. func ArticleToKnowledge(item *rag.WechatArticle) {
  339. if item.TextContent == `` {
  340. return
  341. }
  342. var err error
  343. defer func() {
  344. if err != nil {
  345. utils.FileLog.Error("上传文章原文到知识库失败,err:%v", err)
  346. fmt.Println("上传文章原文到知识库失败,err:", err)
  347. }
  348. }()
  349. // 生成临时文件
  350. //dateDir := time.Now().Format("20060102")
  351. //uploadDir := utils.STATIC_DIR + "ai/article/" + dateDir
  352. uploadDir := utils.STATIC_DIR + "ai/article"
  353. err = os.MkdirAll(uploadDir, utils.DIR_MOD)
  354. if err != nil {
  355. err = fmt.Errorf("存储目录创建失败,Err:" + err.Error())
  356. return
  357. }
  358. fileName := utils.RemoveSpecialChars(item.Title) + `.md`
  359. tmpFilePath := uploadDir + "/" + fileName
  360. err = utils.SaveToFile(item.TextContent, tmpFilePath)
  361. if err != nil {
  362. err = fmt.Errorf("生成临时文件失败,Err:" + err.Error())
  363. return
  364. }
  365. defer func() {
  366. os.Remove(tmpFilePath)
  367. }()
  368. knowledgeArticleName := models.BusinessConfMap[models.KnowledgeArticleName]
  369. // 上传临时文件到LLM
  370. uploadFileResp, err := llm.UploadDocsToKnowledge(tmpFilePath, knowledgeArticleName)
  371. if err != nil {
  372. err = fmt.Errorf("上传文章原文到知识库失败,Err:" + err.Error())
  373. return
  374. }
  375. if len(uploadFileResp.FailedFiles) > 0 {
  376. for _, v := range uploadFileResp.FailedFiles {
  377. err = fmt.Errorf("上传文章原文到知识库失败,Err:" + v)
  378. }
  379. }
  380. item.VectorKey = tmpFilePath
  381. item.ModifyTime = time.Now()
  382. err = item.Update([]string{"vector_key", "modify_time"})
  383. }
  384. // AbstractToKnowledge
  385. // @Description: 摘要入向量库
  386. // @author: Roc
  387. // @datetime 2025-03-10 16:14:59
  388. // @param wechatArticleItem *rag.WechatArticle
  389. // @param item *rag.WechatArticleAbstract
  390. func AbstractToKnowledge(wechatArticleItem *rag.WechatArticle, item *rag.WechatArticleAbstract) {
  391. if item.Content == `` {
  392. return
  393. }
  394. // 已经生成了,那就不处理了
  395. if item.VectorKey != `` {
  396. return
  397. }
  398. var err error
  399. defer func() {
  400. if err != nil {
  401. utils.FileLog.Error("摘要入向量库失败,err:%v", err)
  402. fmt.Println("摘要入向量库失败,err:", err)
  403. }
  404. }()
  405. // 生成临时文件
  406. //dateDir := time.Now().Format("20060102")
  407. //uploadDir := utils.STATIC_DIR + "ai/article/" + dateDir
  408. uploadDir := utils.STATIC_DIR + "ai/abstract"
  409. err = os.MkdirAll(uploadDir, utils.DIR_MOD)
  410. if err != nil {
  411. err = fmt.Errorf("存储目录创建失败,Err:" + err.Error())
  412. return
  413. }
  414. fileName := utils.RemoveSpecialChars(wechatArticleItem.Title) + `.md`
  415. tmpFilePath := uploadDir + "/" + fileName
  416. err = utils.SaveToFile(item.Content, tmpFilePath)
  417. if err != nil {
  418. err = fmt.Errorf("生成临时文件失败,Err:" + err.Error())
  419. return
  420. }
  421. defer func() {
  422. os.Remove(tmpFilePath)
  423. }()
  424. knowledgeArticleName := models.BusinessConfMap[models.KnowledgeBaseName]
  425. // 上传临时文件到LLM
  426. uploadFileResp, err := llm.UploadDocsToKnowledge(tmpFilePath, knowledgeArticleName)
  427. if err != nil {
  428. err = fmt.Errorf("上传文章原文到知识库失败,Err:" + err.Error())
  429. return
  430. }
  431. if len(uploadFileResp.FailedFiles) > 0 {
  432. for _, v := range uploadFileResp.FailedFiles {
  433. err = fmt.Errorf("上传文章原文到知识库失败,Err:" + v)
  434. }
  435. }
  436. item.VectorKey = tmpFilePath
  437. item.ModifyTime = time.Now()
  438. err = item.Update([]string{"vector_key", "modify_time"})
  439. }
  440. // replaceWechatPlatformPic
  441. // @Description: 替换公众号头像
  442. // @author: Roc
  443. // @datetime 2025-03-11 09:38:24
  444. // @param item *rag.WechatPlatform
  445. func replaceWechatPlatformPic(item *rag.WechatPlatform) {
  446. var err error
  447. defer func() {
  448. if err != nil {
  449. utils.FileLog.Error("替换公众号头像失败,err:%v", err)
  450. fmt.Println("替换公众号头像失败,err:", err)
  451. }
  452. }()
  453. if item.RoundHeadImg == `` {
  454. return
  455. }
  456. resourceUrl, err := downloadWxPicAndUploadToOss(item.RoundHeadImg, `head_img`)
  457. if err != nil {
  458. return
  459. }
  460. item.RoundHeadImg = resourceUrl
  461. err = item.Update([]string{"round_head_img"})
  462. }
  463. // replaceWechatArticlePic
  464. // @Description: 替换文章封面图
  465. // @author: Roc
  466. // @datetime 2025-03-11 09:38:35
  467. // @param item *rag.WechatArticle
  468. func replaceWechatArticlePic(item *rag.WechatArticle) {
  469. var err error
  470. defer func() {
  471. if err != nil {
  472. utils.FileLog.Error("替换公众号头像失败,err:%v", err)
  473. fmt.Println("替换公众号头像失败,err:", err)
  474. }
  475. }()
  476. if item.CoverUrl == `` {
  477. return
  478. }
  479. resourceUrl, err := downloadWxPicAndUploadToOss(item.CoverUrl, `cover_url`)
  480. if err != nil {
  481. return
  482. }
  483. item.CoverUrl = resourceUrl
  484. err = item.Update([]string{"cover_url"})
  485. }
  486. // downloadWxPicAndUploadToOss
  487. // @Description: 下载微信图片并上传到OSS
  488. // @author: Roc
  489. // @datetime 2025-03-11 09:28:49
  490. // @param wxPicUrl string
  491. // @return resourceUrl string
  492. // @return err error
  493. func downloadWxPicAndUploadToOss(wxPicUrl, source string) (resourceUrl string, err error) {
  494. localFilePath, err := utils.DownloadWxImage(wxPicUrl)
  495. if err != nil {
  496. return
  497. }
  498. defer func() {
  499. os.Remove(localFilePath)
  500. }()
  501. ossClient := NewOssClient()
  502. if ossClient == nil {
  503. err = fmt.Errorf(`初始化OSS服务失败`)
  504. return
  505. }
  506. ext := path.Ext(localFilePath)
  507. randStr := utils.GetRandStringNoSpecialChar(28)
  508. fileName := randStr + ext
  509. //savePath := utils.UploadDir + `wx/wx_article/` + time.Now().Format("200601/20060102/") + fileName
  510. savePath := fmt.Sprintf(`%swx/%s/%s%s`, utils.UploadDir, source, time.Now().Format("200601/20060102/"), fileName)
  511. resourceUrl, err = ossClient.UploadFile(fileName, localFilePath, savePath)
  512. if err != nil {
  513. err = fmt.Errorf("文件上传失败,Err:" + err.Error())
  514. return
  515. }
  516. return
  517. }