wechat_platform.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504
  1. package llm
  2. import (
  3. "eta/eta_api/models"
  4. "eta/eta_api/models/rag"
  5. "eta/eta_api/utils"
  6. "eta/eta_api/utils/llm/eta_llm/eta_llm_http"
  7. "fmt"
  8. "html"
  9. "os"
  10. "regexp"
  11. "strconv"
  12. "strings"
  13. "time"
  14. )
  15. type WechatArticleOp struct {
  16. Source string
  17. WechatPlatformId int
  18. }
  19. // AddWechatPlatform
  20. // @Description: 添加新的公众号
  21. // @param item
  22. func AddWechatPlatform(item *rag.WechatPlatform) {
  23. var err error
  24. defer func() {
  25. if err != nil {
  26. utils.FileLog.Error("公众号入库后查找最新记录失败,err:%v", err)
  27. }
  28. }()
  29. if item.FakeId != `` {
  30. return
  31. }
  32. if item.ArticleLink == `` {
  33. return
  34. }
  35. articleLink := item.ArticleLink
  36. articleDetail, err := SearchByWechatArticle(item.ArticleLink)
  37. if err != nil {
  38. return
  39. }
  40. if articleDetail.Appuin == `` {
  41. err = fmt.Errorf("文章内未匹配到公众号唯一标识")
  42. return
  43. }
  44. wechatPlatform := new(rag.WechatPlatform)
  45. // 查找是否存在这个公众号id的
  46. wechatPlatformInfo, tmpErr := wechatPlatform.GetByFakeID(articleDetail.Appuin)
  47. if tmpErr != nil && !utils.IsErrNoRow(tmpErr) {
  48. err = tmpErr
  49. return
  50. }
  51. if tmpErr == nil {
  52. // 如果找到了,那么需要将当前的给移除掉
  53. err = item.Del()
  54. if err != nil {
  55. return
  56. }
  57. // 并将查出来的微信公众号摘出来的数据重新赋值
  58. item = wechatPlatformInfo
  59. } else if utils.IsErrNoRow(tmpErr) {
  60. // 如果没找到,那么就变更当前的信息
  61. item.FakeId = articleDetail.Appuin
  62. item.Nickname = articleDetail.Nickname
  63. //item.Alias = req.Alias
  64. item.RoundHeadImg = articleDetail.RoundHeadImg
  65. //item.ServiceType = req.ServiceType
  66. item.Signature = articleDetail.ProfileSignature
  67. //item.Verified = verified
  68. item.ModifyTime = time.Now()
  69. err = item.Update([]string{rag.WechatPlatformColumns.FakeID, rag.WechatPlatformColumns.Nickname, rag.WechatPlatformColumns.RoundHeadImg, rag.WechatPlatformColumns.Signature, rag.WechatPlatformColumns.ModifyTime})
  70. if err != nil {
  71. return
  72. }
  73. }
  74. // 把刚搜索的文章加入到指标库
  75. AddWechatArticle(item, articleLink, articleDetail, nil)
  76. BeachAddWechatArticle(item, 10)
  77. fmt.Println("公众号入库完成")
  78. return
  79. }
  80. // AddWechatArticle
  81. // @Description: 添加公众号文章入库
  82. // @author: Roc
  83. // @datetime 2025-03-05 13:24:14
  84. // @param item *rag.WechatPlatform
  85. // @param link string
  86. // @param articleDetail WechatArticleDataResp
  87. func AddWechatArticle(item *rag.WechatPlatform, articleLink string, articleDetail WechatArticleDataResp, articleMenu *ArticleMenu) {
  88. var err error
  89. defer func() {
  90. if err != nil {
  91. utils.FileLog.Error("公众号文章入库失败,文章链接:%s ,err:%v", articleLink, err)
  92. }
  93. }()
  94. obj := new(rag.WechatArticle)
  95. _, err = obj.GetByLink(articleLink)
  96. if err == nil {
  97. // 文章已经入库了,不需要重复入库
  98. return
  99. }
  100. // 如果不是 ErrNoRow 的时候,那么就是查询数据库出问题了,需要直接返回
  101. if !utils.IsErrNoRow(err) {
  102. return
  103. }
  104. // 这个时候,说明数据库中没有这个文章,那么需要文章入库
  105. err = nil
  106. var publishAt time.Time
  107. if articleDetail.CreateAt != `` {
  108. createAtInt, tmpErr := strconv.Atoi(articleDetail.CreateAt)
  109. if tmpErr == nil {
  110. publishAt = time.Unix(int64(createAtInt), 1000)
  111. }
  112. } else if articleMenu != nil {
  113. publishAt = time.Unix(int64(articleMenu.UpdateTime), 1000)
  114. }
  115. obj = &rag.WechatArticle{
  116. WechatArticleId: 0,
  117. WechatPlatformId: item.WechatPlatformId,
  118. FakeId: item.FakeId,
  119. Title: articleDetail.Title,
  120. Link: articleLink,
  121. CoverUrl: articleDetail.CoverUrl,
  122. Description: articleDetail.Desc,
  123. Content: html.EscapeString(articleDetail.HtmlContent),
  124. TextContent: articleDetail.TextContent,
  125. Country: articleDetail.CountryName,
  126. Province: articleDetail.ProvinceName,
  127. City: articleDetail.CityName,
  128. //Abstract: "",
  129. //ArticleCreateTime: createAt,
  130. ModifyTime: time.Now(),
  131. CreateTime: time.Now(),
  132. }
  133. if !publishAt.IsZero() {
  134. obj.ArticleCreateTime = publishAt
  135. }
  136. if articleMenu != nil {
  137. obj.Title = articleMenu.Title
  138. //obj.Link = articleMenu.Link
  139. obj.CoverUrl = articleMenu.Cover
  140. obj.Description = articleMenu.Digest
  141. }
  142. err = obj.Create()
  143. // 文章入库成功后,需要将相关信息入摘要库
  144. go AddWechatArticleLlmOpToCache(item.WechatPlatformId, ``)
  145. }
  146. // BeachAddWechatArticle
  147. // @Description: 批量添加公众号文章
  148. // @param item
  149. // @param num
  150. // @return err
  151. func BeachAddWechatArticle(item *rag.WechatPlatform, num int) {
  152. var err error
  153. defer func() {
  154. //fmt.Println("公众号文章批量入库完成")
  155. if err != nil {
  156. utils.FileLog.Error("公众号文章批量入库失败,err:%v", err)
  157. fmt.Println("公众号文章批量入库失败,err:", err)
  158. }
  159. }()
  160. if item.FakeId == `` {
  161. return
  162. }
  163. wechatArticleObj := new(rag.WechatArticle)
  164. // 获取公众号的文章列表
  165. articleListResp, err := SearchByWechatArticleList(item.FakeId, num)
  166. if err != nil {
  167. return
  168. }
  169. for _, articleMenu := range articleListResp.List {
  170. // 判断文章是否已经入库,如果已经入库了,那么就过滤,不去重复查询微信了
  171. _, err = wechatArticleObj.GetByLink(articleMenu.Link)
  172. if err == nil {
  173. // 文章已经入库了,不需要重复入库
  174. continue
  175. }
  176. if !utils.IsErrNoRow(err) {
  177. return
  178. }
  179. err = nil
  180. articleDetail, tmpErr := SearchByWechatArticle(articleMenu.Link)
  181. if tmpErr != nil {
  182. err = tmpErr
  183. return
  184. }
  185. // 把刚搜索的文章加入到指标库
  186. AddWechatArticle(item, articleMenu.Link, articleDetail, &articleMenu)
  187. time.Sleep(10 * time.Second)
  188. }
  189. return
  190. }
  191. // GenerateArticleAbstract
  192. // @Description: 文章摘要生成
  193. // @author: Roc
  194. // @datetime 2025-03-10 16:17:53
  195. // @param item *rag.WechatArticle
  196. func GenerateArticleAbstract(item *rag.WechatArticle) {
  197. var err error
  198. defer func() {
  199. if err != nil {
  200. utils.FileLog.Error("文章转临时文件失败,err:%v", err)
  201. fmt.Println("文章转临时文件失败,err:", err)
  202. }
  203. }()
  204. abstractObj := rag.WechatArticleAbstract{}
  205. _, err = abstractObj.GetByWechatArticleId(item.WechatArticleId)
  206. if err == nil {
  207. // 摘要已经生成,不需要重复生成
  208. return
  209. }
  210. if !utils.IsErrNoRow(err) {
  211. return
  212. }
  213. // 生成临时文件
  214. dateDir := time.Now().Format("20060102")
  215. uploadDir := utils.STATIC_DIR + "ai/" + dateDir
  216. err = os.MkdirAll(uploadDir, utils.DIR_MOD)
  217. if err != nil {
  218. err = fmt.Errorf("存储目录创建失败,Err:" + err.Error())
  219. return
  220. }
  221. randStr := utils.GetRandStringNoSpecialChar(28)
  222. fileName := randStr + `.md`
  223. tmpFilePath := uploadDir + "/" + fileName
  224. err = utils.SaveToFile(item.TextContent, tmpFilePath)
  225. if err != nil {
  226. err = fmt.Errorf("生成临时文件失败,Err:" + err.Error())
  227. return
  228. }
  229. defer func() {
  230. os.Remove(tmpFilePath)
  231. }()
  232. // 上传临时文件到LLM
  233. tmpFileResp, err := UploadTempDocs(tmpFilePath)
  234. if err != nil {
  235. err = fmt.Errorf("上传临时文件到LLM失败,Err:" + err.Error())
  236. return
  237. }
  238. if tmpFileResp.Data.Id == `` {
  239. err = fmt.Errorf("上传临时文件到LLM失败,Err:上传失败")
  240. return
  241. }
  242. tmpDocId := tmpFileResp.Data.Id
  243. //tmpDocId := `c4d2ee902808408c8b8ed398b33be103` // 钢材
  244. //tmpDocId := `2dde8afe62d24525a814e74e0a5e35e4` // 钢材
  245. //tmpDocId := `7634cc1086c04b3687682220a2cf1a48` //
  246. historyList := make([]eta_llm_http.HistoryContent, 0)
  247. questionObj := rag.Question{}
  248. questionList, err := questionObj.GetListByCondition(``, []interface{}{}, 0, 100)
  249. if err != nil {
  250. err = fmt.Errorf("获取问题列表失败,Err:" + err.Error())
  251. return
  252. }
  253. addArticleChatRecordList := make([]*rag.WechatArticleChatRecord, 0)
  254. var abstract string
  255. //开始对话
  256. for _, question := range questionList {
  257. originalAnswer, tmpAnswer, tmpErr := getAnswerByContent(tmpDocId, question.QuestionContent, historyList)
  258. if tmpErr != nil {
  259. err = fmt.Errorf("LLM对话失败,Err:" + tmpErr.Error())
  260. return
  261. }
  262. abstract = tmpAnswer
  263. historyList = append(historyList, eta_llm_http.HistoryContent{
  264. Role: `user`,
  265. Content: question.QuestionContent,
  266. }, eta_llm_http.HistoryContent{
  267. Role: `assistant`,
  268. Content: tmpAnswer,
  269. })
  270. // 待入库的数据
  271. addArticleChatRecordList = append(addArticleChatRecordList, &rag.WechatArticleChatRecord{
  272. WechatArticleChatRecordId: 0,
  273. WechatArticleId: item.WechatArticleId,
  274. ChatUserType: "user",
  275. Content: question.QuestionContent,
  276. SendTime: time.Now(),
  277. CreatedTime: time.Now(),
  278. UpdateTime: time.Now(),
  279. }, &rag.WechatArticleChatRecord{
  280. WechatArticleChatRecordId: 0,
  281. WechatArticleId: item.WechatArticleId,
  282. ChatUserType: "assistant",
  283. Content: originalAnswer,
  284. SendTime: time.Now(),
  285. CreatedTime: time.Now(),
  286. UpdateTime: time.Now(),
  287. })
  288. }
  289. // 添加问答记录
  290. if len(addArticleChatRecordList) > 0 {
  291. recordObj := rag.WechatArticleChatRecord{}
  292. err = recordObj.CreateInBatches(addArticleChatRecordList)
  293. if err != nil {
  294. return
  295. }
  296. }
  297. if abstract != `` {
  298. abstractItem := &rag.WechatArticleAbstract{
  299. WechatArticleAbstractId: 0,
  300. WechatArticleId: item.WechatArticleId,
  301. Content: abstract,
  302. Version: 0,
  303. VectorKey: "",
  304. ModifyTime: time.Now(),
  305. CreateTime: time.Now(),
  306. }
  307. err = abstractItem.Create()
  308. if err != nil {
  309. return
  310. }
  311. AbstractToKnowledge(item, abstractItem)
  312. }
  313. }
  314. func getAnswerByContent(docId, question string, historyList []eta_llm_http.HistoryContent) (originalAnswer, answer string, err error) {
  315. originalAnswer, result, err := ChatByFile(docId, question, historyList)
  316. fmt.Println(result)
  317. if err != nil {
  318. err = fmt.Errorf("LLM对话失败,Err:" + err.Error())
  319. return
  320. }
  321. // 提取 </think> 后面的内容
  322. thinkEndIndex := strings.Index(result.Answer, "</think>")
  323. if thinkEndIndex != -1 {
  324. answer = strings.TrimSpace(result.Answer[thinkEndIndex+len("</think>"):])
  325. } else {
  326. answer = result.Answer
  327. }
  328. answer = strings.TrimSpace(answer)
  329. return
  330. }
  331. // ArticleToKnowledge
  332. // @Description: 原文入向量库
  333. // @author: Roc
  334. // @datetime 2025-03-10 16:13:16
  335. // @param item *rag.WechatArticle
  336. func ArticleToKnowledge(item *rag.WechatArticle) {
  337. if item.TextContent == `` {
  338. return
  339. }
  340. var err error
  341. defer func() {
  342. if err != nil {
  343. utils.FileLog.Error("上传文章原文到知识库失败,err:%v", err)
  344. fmt.Println("上传文章原文到知识库失败,err:", err)
  345. }
  346. }()
  347. // 生成临时文件
  348. //dateDir := time.Now().Format("20060102")
  349. //uploadDir := utils.STATIC_DIR + "ai/article/" + dateDir
  350. uploadDir := utils.STATIC_DIR + "ai/article"
  351. err = os.MkdirAll(uploadDir, utils.DIR_MOD)
  352. if err != nil {
  353. err = fmt.Errorf("存储目录创建失败,Err:" + err.Error())
  354. return
  355. }
  356. fileName := RemoveSpecialChars(item.Title) + `.md`
  357. tmpFilePath := uploadDir + "/" + fileName
  358. err = utils.SaveToFile(item.TextContent, tmpFilePath)
  359. if err != nil {
  360. err = fmt.Errorf("生成临时文件失败,Err:" + err.Error())
  361. return
  362. }
  363. defer func() {
  364. os.Remove(tmpFilePath)
  365. }()
  366. knowledgeArticleName := models.BusinessConfMap[models.KnowledgeArticleName]
  367. // 上传临时文件到LLM
  368. uploadFileResp, err := UploadDocsToKnowledge(tmpFilePath, knowledgeArticleName)
  369. if err != nil {
  370. err = fmt.Errorf("上传文章原文到知识库失败,Err:" + err.Error())
  371. return
  372. }
  373. if len(uploadFileResp.FailedFiles) > 0 {
  374. for _, v := range uploadFileResp.FailedFiles {
  375. err = fmt.Errorf("上传文章原文到知识库失败,Err:" + v)
  376. }
  377. }
  378. item.VectorKey = tmpFilePath
  379. item.ModifyTime = time.Now()
  380. err = item.Update([]string{"vector_key", "modify_time"})
  381. }
  382. // AbstractToKnowledge
  383. // @Description: 摘要入向量库
  384. // @author: Roc
  385. // @datetime 2025-03-10 16:14:59
  386. // @param wechatArticleItem *rag.WechatArticle
  387. // @param item *rag.WechatArticleAbstract
  388. func AbstractToKnowledge(wechatArticleItem *rag.WechatArticle, item *rag.WechatArticleAbstract) {
  389. if item.Content == `` {
  390. return
  391. }
  392. // 已经生成了,那就不处理了
  393. if item.VectorKey != `` {
  394. return
  395. }
  396. var err error
  397. defer func() {
  398. if err != nil {
  399. utils.FileLog.Error("摘要入向量库失败,err:%v", err)
  400. fmt.Println("摘要入向量库失败,err:", err)
  401. }
  402. }()
  403. // 生成临时文件
  404. //dateDir := time.Now().Format("20060102")
  405. //uploadDir := utils.STATIC_DIR + "ai/article/" + dateDir
  406. uploadDir := utils.STATIC_DIR + "ai/abstract"
  407. err = os.MkdirAll(uploadDir, utils.DIR_MOD)
  408. if err != nil {
  409. err = fmt.Errorf("存储目录创建失败,Err:" + err.Error())
  410. return
  411. }
  412. fileName := RemoveSpecialChars(wechatArticleItem.Title) + `.md`
  413. tmpFilePath := uploadDir + "/" + fileName
  414. err = utils.SaveToFile(item.Content, tmpFilePath)
  415. if err != nil {
  416. err = fmt.Errorf("生成临时文件失败,Err:" + err.Error())
  417. return
  418. }
  419. defer func() {
  420. os.Remove(tmpFilePath)
  421. }()
  422. knowledgeArticleName := models.BusinessConfMap[models.KnowledgeBaseName]
  423. // 上传临时文件到LLM
  424. uploadFileResp, err := UploadDocsToKnowledge(tmpFilePath, knowledgeArticleName)
  425. if err != nil {
  426. err = fmt.Errorf("上传文章原文到知识库失败,Err:" + err.Error())
  427. return
  428. }
  429. if len(uploadFileResp.FailedFiles) > 0 {
  430. for _, v := range uploadFileResp.FailedFiles {
  431. err = fmt.Errorf("上传文章原文到知识库失败,Err:" + v)
  432. }
  433. }
  434. item.VectorKey = tmpFilePath
  435. item.ModifyTime = time.Now()
  436. err = item.Update([]string{"vector_key", "modify_time"})
  437. }
  438. func RemoveSpecialChars(text string) string {
  439. // 匹配非中文、非字母、非数字、非中文标点的字符
  440. reg := regexp.MustCompile(`[^\p{Han}\p{L}\p{N}\x{3000}-\x{303F}]`)
  441. return reg.ReplaceAllString(text, "")
  442. }