wechat_platform.go 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980
  1. package services
  2. import (
  3. "bytes"
  4. "eta/eta_api/cache"
  5. "eta/eta_api/models"
  6. "eta/eta_api/models/rag"
  7. "eta/eta_api/services/elastic"
  8. "eta/eta_api/services/llm"
  9. "eta/eta_api/utils"
  10. "eta/eta_api/utils/llm/eta_llm/eta_llm_http"
  11. "fmt"
  12. html2 "golang.org/x/net/html"
  13. "html"
  14. "os"
  15. "path"
  16. "strconv"
  17. "strings"
  18. "time"
  19. )
  20. // AddWechatPlatform
  21. // @Description: 添加新的公众号
  22. // @param item
  23. func AddWechatPlatform(item *rag.WechatPlatform) {
  24. var err error
  25. defer func() {
  26. if err != nil {
  27. utils.FileLog.Error("公众号入库后查找最新记录失败,err:%v", err)
  28. }
  29. }()
  30. if item.FakeId != `` {
  31. return
  32. }
  33. if item.ArticleLink == `` {
  34. return
  35. }
  36. articleLink := item.ArticleLink
  37. articleDetail, err := llm.SearchByWechatArticle(item.ArticleLink)
  38. if err != nil {
  39. return
  40. }
  41. if articleDetail.Appuin == `` {
  42. err = fmt.Errorf("文章内未匹配到公众号唯一标识")
  43. return
  44. }
  45. wechatPlatform := new(rag.WechatPlatform)
  46. // 查找是否存在这个公众号id的
  47. wechatPlatformInfo, tmpErr := wechatPlatform.GetByFakeID(articleDetail.Appuin)
  48. if tmpErr != nil && !utils.IsErrNoRow(tmpErr) {
  49. err = tmpErr
  50. return
  51. }
  52. if tmpErr == nil {
  53. // 如果找到了,那么需要将当前的给移除掉
  54. err = item.Del()
  55. if err != nil {
  56. return
  57. }
  58. // 并将查出来的微信公众号摘出来的数据重新赋值
  59. item = wechatPlatformInfo
  60. } else if utils.IsErrNoRow(tmpErr) {
  61. // 如果没找到,那么就变更当前的信息
  62. item.FakeId = articleDetail.Appuin
  63. item.Nickname = articleDetail.Nickname
  64. //item.Alias = req.Alias
  65. item.RoundHeadImg = articleDetail.RoundHeadImg
  66. //item.ServiceType = req.ServiceType
  67. item.Signature = articleDetail.ProfileSignature
  68. //item.Verified = verified
  69. item.ModifyTime = time.Now()
  70. err = item.Update([]string{rag.WechatPlatformColumns.FakeID, rag.WechatPlatformColumns.Nickname, rag.WechatPlatformColumns.RoundHeadImg, rag.WechatPlatformColumns.Signature, rag.WechatPlatformColumns.ModifyTime})
  71. if err != nil {
  72. return
  73. }
  74. // 修改公众号头像
  75. go replaceWechatPlatformPic(item)
  76. }
  77. // 把刚搜索的文章加入到文章库中
  78. AddWechatArticle(item, articleLink, articleDetail, nil)
  79. BeachAddWechatArticle(item, 10)
  80. fmt.Println("公众号入库完成")
  81. return
  82. }
  83. // AddWechatArticle
  84. // @Description: 添加公众号文章入库
  85. // @author: Roc
  86. // @datetime 2025-03-05 13:24:14
  87. // @param item *rag.WechatPlatform
  88. // @param link string
  89. // @param articleDetail WechatArticleDataResp
  90. func AddWechatArticle(item *rag.WechatPlatform, articleLink string, articleDetail llm.WechatArticleDataResp, articleMenu *llm.ArticleMenu) {
  91. var err error
  92. defer func() {
  93. if err != nil {
  94. utils.FileLog.Error("公众号文章入库失败,文章链接:%s ,err:%v", articleLink, err)
  95. }
  96. }()
  97. obj := new(rag.WechatArticle)
  98. _, err = obj.GetByLink(articleLink)
  99. if err == nil {
  100. // 文章已经入库了,不需要重复入库
  101. return
  102. }
  103. // 如果不是 ErrNoRow 的时候,那么就是查询数据库出问题了,需要直接返回
  104. if !utils.IsErrNoRow(err) {
  105. return
  106. }
  107. // 这个时候,说明数据库中没有这个文章,那么需要文章入库
  108. err = nil
  109. var publishAt time.Time
  110. if articleDetail.CreateAt != `` {
  111. createAtInt, tmpErr := strconv.Atoi(articleDetail.CreateAt)
  112. if tmpErr == nil {
  113. publishAt = time.Unix(int64(createAtInt), 1000)
  114. }
  115. } else if articleMenu != nil {
  116. publishAt = time.Unix(int64(articleMenu.UpdateTime), 1000)
  117. }
  118. content := articleDetail.HtmlContent
  119. // 图片下载下来到本地,如果成功了,那么就用新的
  120. tmpContent, err := ReplaceHtmlImg(content)
  121. if tmpContent != `` {
  122. content = tmpContent
  123. }
  124. obj = &rag.WechatArticle{
  125. WechatArticleId: 0,
  126. WechatPlatformId: item.WechatPlatformId,
  127. FakeId: item.FakeId,
  128. Title: articleDetail.Title,
  129. Link: articleLink,
  130. CoverUrl: articleDetail.CoverUrl,
  131. Description: articleDetail.Desc,
  132. Content: html.EscapeString(content),
  133. TextContent: articleDetail.TextContent,
  134. Country: articleDetail.CountryName,
  135. Province: articleDetail.ProvinceName,
  136. City: articleDetail.CityName,
  137. //Abstract: "",
  138. //ArticleCreateTime: createAt,
  139. ModifyTime: time.Now(),
  140. CreateTime: time.Now(),
  141. }
  142. if !publishAt.IsZero() {
  143. obj.ArticleCreateTime = publishAt
  144. }
  145. if articleMenu != nil {
  146. obj.Title = articleMenu.Title
  147. //obj.Link = articleMenu.Link
  148. obj.CoverUrl = articleMenu.Cover
  149. obj.Description = articleMenu.Digest
  150. }
  151. err = obj.Create()
  152. // 修改文章封面图
  153. go replaceWechatArticleCoverPic(obj)
  154. // 文章入库成功后,需要将相关信息入摘要库
  155. go cache.AddWechatArticleLlmOpToCache(obj.WechatArticleId, ``)
  156. }
  157. // BeachAddWechatArticle
  158. // @Description: 批量添加公众号文章
  159. // @param item
  160. // @param num
  161. // @return err
  162. func BeachAddWechatArticle(item *rag.WechatPlatform, num int) {
  163. var err error
  164. defer func() {
  165. //fmt.Println("公众号文章批量入库完成")
  166. if err != nil {
  167. utils.FileLog.Error("公众号文章批量入库失败,err:%v", err)
  168. fmt.Println("公众号文章批量入库失败,err:", err)
  169. }
  170. }()
  171. if item.FakeId == `` {
  172. return
  173. }
  174. wechatArticleObj := new(rag.WechatArticle)
  175. // 获取公众号的文章列表
  176. articleListResp, err := llm.SearchByWechatArticleList(item.FakeId, num)
  177. if err != nil {
  178. return
  179. }
  180. for _, articleMenu := range articleListResp.List {
  181. // 判断文章是否已经入库,如果已经入库了,那么就过滤,不去重复查询微信了
  182. _, err = wechatArticleObj.GetByLink(articleMenu.Link)
  183. if err == nil {
  184. // 文章已经入库了,不需要重复入库
  185. continue
  186. }
  187. if !utils.IsErrNoRow(err) {
  188. return
  189. }
  190. err = nil
  191. articleDetail, tmpErr := llm.SearchByWechatArticle(articleMenu.Link)
  192. if tmpErr != nil {
  193. err = tmpErr
  194. return
  195. }
  196. // 把刚搜索的文章加入到指标库
  197. AddWechatArticle(item, articleMenu.Link, articleDetail, &articleMenu)
  198. time.Sleep(10 * time.Second)
  199. }
  200. return
  201. }
  202. // GenerateArticleAbstract
  203. // @Description: 文章摘要生成
  204. // @author: Roc
  205. // @datetime 2025-03-10 16:17:53
  206. // @param item *rag.WechatArticle
  207. func GenerateArticleAbstract(item *rag.WechatArticle) {
  208. var err error
  209. defer func() {
  210. if err != nil {
  211. utils.FileLog.Error("文章转临时文件失败,err:%v", err)
  212. fmt.Println("文章转临时文件失败,err:", err)
  213. }
  214. }()
  215. // 内容为空,那就不需要生成摘要
  216. if item.TextContent == `` {
  217. return
  218. }
  219. abstractObj := rag.WechatArticleAbstract{}
  220. _, err = abstractObj.GetByWechatArticleId(item.WechatArticleId)
  221. if err == nil {
  222. // 摘要已经生成,不需要重复生成
  223. return
  224. }
  225. if !utils.IsErrNoRow(err) {
  226. return
  227. }
  228. // 生成临时文件
  229. dateDir := time.Now().Format("20060102")
  230. uploadDir := utils.STATIC_DIR + "ai/" + dateDir
  231. err = os.MkdirAll(uploadDir, utils.DIR_MOD)
  232. if err != nil {
  233. err = fmt.Errorf("存储目录创建失败,Err:" + err.Error())
  234. return
  235. }
  236. randStr := utils.GetRandStringNoSpecialChar(28)
  237. fileName := randStr + `.md`
  238. tmpFilePath := uploadDir + "/" + fileName
  239. err = utils.SaveToFile(item.TextContent, tmpFilePath)
  240. if err != nil {
  241. err = fmt.Errorf("生成临时文件失败,Err:" + err.Error())
  242. return
  243. }
  244. defer func() {
  245. os.Remove(tmpFilePath)
  246. }()
  247. // 上传临时文件到LLM
  248. tmpFileResp, err := llm.UploadTempDocs(tmpFilePath)
  249. if err != nil {
  250. err = fmt.Errorf("上传临时文件到LLM失败,Err:" + err.Error())
  251. return
  252. }
  253. if tmpFileResp.Data.Id == `` {
  254. err = fmt.Errorf("上传临时文件到LLM失败,Err:上传失败")
  255. return
  256. }
  257. tmpDocId := tmpFileResp.Data.Id
  258. //tmpDocId := `c4d2ee902808408c8b8ed398b33be103` // 钢材
  259. //tmpDocId := `2dde8afe62d24525a814e74e0a5e35e4` // 钢材
  260. //tmpDocId := `7634cc1086c04b3687682220a2cf1a48` //
  261. //开始对话
  262. abstract, addArticleChatRecordList, tmpErr := getAnswerByContent(item.WechatArticleId, tmpDocId)
  263. if tmpErr != nil {
  264. err = fmt.Errorf("LLM对话失败,Err:" + tmpErr.Error())
  265. return
  266. }
  267. // 添加问答记录
  268. if len(addArticleChatRecordList) > 0 {
  269. recordObj := rag.WechatArticleChatRecord{}
  270. err = recordObj.CreateInBatches(addArticleChatRecordList)
  271. if err != nil {
  272. return
  273. }
  274. }
  275. if abstract != `` {
  276. if abstract == `sorry` || strings.Index(abstract, `根据已知信息无法回答该问题`) == 0 {
  277. item.AbstractStatus = 2
  278. item.ModifyTime = time.Now()
  279. err = item.Update([]string{"AbstractStatus", "ModifyTime"})
  280. return
  281. }
  282. item.AbstractStatus = 1
  283. item.ModifyTime = time.Now()
  284. err = item.Update([]string{"AbstractStatus", "ModifyTime"})
  285. abstractItem := &rag.WechatArticleAbstract{
  286. WechatArticleAbstractId: 0,
  287. WechatArticleId: item.WechatArticleId,
  288. Content: abstract,
  289. Version: 0,
  290. VectorKey: "",
  291. ModifyTime: time.Now(),
  292. CreateTime: time.Now(),
  293. }
  294. err = abstractItem.Create()
  295. if err != nil {
  296. return
  297. }
  298. // 数据入ES库
  299. go AddOrEditEsWechatArticleAbstract(abstractItem.WechatArticleAbstractId)
  300. AbstractToKnowledge(item, abstractItem, false)
  301. }
  302. }
  303. // ReGenerateArticleAbstract
  304. // @Description: 文章摘要重新生成
  305. // @author: Roc
  306. // @datetime 2025-03-10 16:17:53
  307. // @param item *rag.WechatArticle
  308. func ReGenerateArticleAbstract(item *rag.WechatArticle) {
  309. var err error
  310. defer func() {
  311. if err != nil {
  312. utils.FileLog.Error("文章转临时文件失败,err:%v", err)
  313. fmt.Println("文章转临时文件失败,err:", err)
  314. }
  315. }()
  316. abstractObj := rag.WechatArticleAbstract{}
  317. abstractItem, err := abstractObj.GetByWechatArticleId(item.WechatArticleId)
  318. if err != nil {
  319. if utils.IsErrNoRow(err) {
  320. // 直接生成
  321. GenerateArticleAbstract(item)
  322. return
  323. }
  324. // 异常了
  325. return
  326. }
  327. // 生成临时文件
  328. dateDir := time.Now().Format("20060102")
  329. uploadDir := utils.STATIC_DIR + "ai/" + dateDir
  330. err = os.MkdirAll(uploadDir, utils.DIR_MOD)
  331. if err != nil {
  332. err = fmt.Errorf("存储目录创建失败,Err:" + err.Error())
  333. return
  334. }
  335. randStr := utils.GetRandStringNoSpecialChar(28)
  336. fileName := randStr + `.md`
  337. tmpFilePath := uploadDir + "/" + fileName
  338. err = utils.SaveToFile(item.TextContent, tmpFilePath)
  339. if err != nil {
  340. err = fmt.Errorf("生成临时文件失败,Err:" + err.Error())
  341. return
  342. }
  343. defer func() {
  344. os.Remove(tmpFilePath)
  345. }()
  346. // 上传临时文件到LLM
  347. tmpFileResp, err := llm.UploadTempDocs(tmpFilePath)
  348. if err != nil {
  349. err = fmt.Errorf("上传临时文件到LLM失败,Err:" + err.Error())
  350. return
  351. }
  352. if tmpFileResp.Data.Id == `` {
  353. err = fmt.Errorf("上传临时文件到LLM失败,Err:上传失败")
  354. return
  355. }
  356. tmpDocId := tmpFileResp.Data.Id
  357. //tmpDocId := `c4d2ee902808408c8b8ed398b33be103` // 钢材
  358. //tmpDocId := `2dde8afe62d24525a814e74e0a5e35e4` // 钢材
  359. //tmpDocId := `7634cc1086c04b3687682220a2cf1a48` //
  360. //开始对话
  361. abstract, addArticleChatRecordList, tmpErr := getAnswerByContent(item.WechatArticleId, tmpDocId)
  362. if tmpErr != nil {
  363. err = fmt.Errorf("LLM对话失败,Err:" + tmpErr.Error())
  364. return
  365. }
  366. // 添加问答记录
  367. if len(addArticleChatRecordList) > 0 {
  368. recordObj := rag.WechatArticleChatRecord{}
  369. err = recordObj.CreateInBatches(addArticleChatRecordList)
  370. if err != nil {
  371. return
  372. }
  373. }
  374. if abstract != `` {
  375. if abstract == `sorry` || strings.Index(abstract, `根据已知信息无法回答该问题`) == 0 {
  376. item.AbstractStatus = 2
  377. item.ModifyTime = time.Now()
  378. err = item.Update([]string{"AbstractStatus", "ModifyTime"})
  379. return
  380. }
  381. item.AbstractStatus = 1
  382. item.ModifyTime = time.Now()
  383. err = item.Update([]string{"AbstractStatus", "ModifyTime"})
  384. abstractItem.Content = abstract
  385. abstractItem.Version = abstractObj.Version + 1
  386. abstractItem.ModifyTime = time.Now()
  387. err = abstractItem.Update([]string{"content", "version", "modify_time"})
  388. if err != nil {
  389. return
  390. }
  391. AbstractToKnowledge(item, abstractItem, true)
  392. }
  393. }
  394. // DelDoc
  395. // @Description: 删除摘要向量库
  396. // @author: Roc
  397. // @datetime 2025-03-12 16:55:05
  398. // @param wechatArticleAbstractList []*rag.WechatArticleAbstract
  399. // @return err error
  400. func DelDoc(wechatArticleAbstractList []*rag.WechatArticleAbstract) (err error) {
  401. defer func() {
  402. if err != nil {
  403. utils.FileLog.Error("删除摘要向量库文件失败,err:%v", err)
  404. fmt.Println("删除摘要向量库文件失败,err:", err)
  405. }
  406. }()
  407. vectorKeyList := make([]string, 0)
  408. wechatArticleAbstractIdList := make([]int, 0)
  409. for _, v := range wechatArticleAbstractList {
  410. vectorKeyList = append(vectorKeyList, v.VectorKey)
  411. wechatArticleAbstractIdList = append(wechatArticleAbstractIdList, v.WechatArticleAbstractId)
  412. }
  413. _, err = llm.DelDocsToKnowledge(models.BusinessConfMap[models.KnowledgeBaseName], vectorKeyList)
  414. if err != nil {
  415. err = fmt.Errorf("删除LLM摘要向量库文件失败,Err:" + err.Error())
  416. return
  417. }
  418. //fmt.Println(resp)
  419. obj := rag.WechatArticleAbstract{}
  420. err = obj.DelVectorKey(wechatArticleAbstractIdList)
  421. return
  422. }
  423. func getAnswerByContent(wechatArticleId int, docId string) (answer string, addArticleChatRecordList []*rag.WechatArticleChatRecord, err error) {
  424. historyList := make([]eta_llm_http.HistoryContent, 0)
  425. addArticleChatRecordList = make([]*rag.WechatArticleChatRecord, 0)
  426. questionObj := rag.Question{}
  427. questionList, err := questionObj.GetListByCondition(``, []interface{}{}, 0, 100)
  428. if err != nil {
  429. err = fmt.Errorf("获取问题列表失败,Err:" + err.Error())
  430. return
  431. }
  432. //你现在是一名资深的期货行业分析师,请基于以下的问题进行汇总总结,如果不能正常总结出来,那么就只需要回复我:sorry
  433. questionStrList := []string{`你现在是一名资深的期货行业分析师,请基于以下的问题进行汇总总结,如果不能正常总结出来,那么就只需要回复我:sorry。以下是问题:`}
  434. for _, v := range questionList {
  435. questionStrList = append(questionStrList, v.QuestionContent)
  436. }
  437. questionStr := strings.Join(questionStrList, "\n")
  438. originalAnswer, result, err := llm.ChatByFile(docId, questionStr, historyList)
  439. fmt.Println(result)
  440. if err != nil {
  441. err = fmt.Errorf("LLM对话失败,Err:" + err.Error())
  442. return
  443. }
  444. // 提取 </think> 后面的内容
  445. thinkEndIndex := strings.Index(result.Answer, "</think>")
  446. if thinkEndIndex != -1 {
  447. answer = strings.TrimSpace(result.Answer[thinkEndIndex+len("</think>"):])
  448. } else {
  449. answer = result.Answer
  450. }
  451. answer = strings.TrimSpace(answer)
  452. // 待入库的数据
  453. addArticleChatRecordList = append(addArticleChatRecordList, &rag.WechatArticleChatRecord{
  454. WechatArticleChatRecordId: 0,
  455. WechatArticleId: wechatArticleId,
  456. ChatUserType: "user",
  457. Content: questionStr,
  458. SendTime: time.Now(),
  459. CreatedTime: time.Now(),
  460. UpdateTime: time.Now(),
  461. }, &rag.WechatArticleChatRecord{
  462. WechatArticleChatRecordId: 0,
  463. WechatArticleId: wechatArticleId,
  464. ChatUserType: "assistant",
  465. Content: originalAnswer,
  466. SendTime: time.Now(),
  467. CreatedTime: time.Now(),
  468. UpdateTime: time.Now(),
  469. })
  470. return
  471. }
  472. // ArticleToKnowledge
  473. // @Description: 原文入向量库
  474. // @author: Roc
  475. // @datetime 2025-03-10 16:13:16
  476. // @param item *rag.WechatArticle
  477. func ArticleToKnowledge(item *rag.WechatArticle) {
  478. if item.TextContent == `` {
  479. return
  480. }
  481. var err error
  482. defer func() {
  483. if err != nil {
  484. utils.FileLog.Error("上传文章原文到知识库失败,err:%v", err)
  485. fmt.Println("上传文章原文到知识库失败,err:", err)
  486. }
  487. }()
  488. // 生成临时文件
  489. //dateDir := time.Now().Format("20060102")
  490. //uploadDir := utils.STATIC_DIR + "ai/article/" + dateDir
  491. uploadDir := utils.STATIC_DIR + "ai/article"
  492. err = os.MkdirAll(uploadDir, utils.DIR_MOD)
  493. if err != nil {
  494. err = fmt.Errorf("存储目录创建失败,Err:" + err.Error())
  495. return
  496. }
  497. fileName := utils.RemoveSpecialChars(item.Title) + `.md`
  498. tmpFilePath := uploadDir + "/" + fileName
  499. err = utils.SaveToFile(item.TextContent, tmpFilePath)
  500. if err != nil {
  501. err = fmt.Errorf("生成临时文件失败,Err:" + err.Error())
  502. return
  503. }
  504. defer func() {
  505. os.Remove(tmpFilePath)
  506. }()
  507. knowledgeArticleName := models.BusinessConfMap[models.KnowledgeArticleName]
  508. // 上传临时文件到LLM
  509. uploadFileResp, err := llm.UploadDocsToKnowledge(tmpFilePath, knowledgeArticleName)
  510. if err != nil {
  511. err = fmt.Errorf("上传文章原文到知识库失败,Err:" + err.Error())
  512. return
  513. }
  514. if len(uploadFileResp.FailedFiles) > 0 {
  515. for _, v := range uploadFileResp.FailedFiles {
  516. err = fmt.Errorf("上传文章原文到知识库失败,Err:" + v)
  517. }
  518. }
  519. item.VectorKey = tmpFilePath
  520. item.ModifyTime = time.Now()
  521. err = item.Update([]string{"vector_key", "modify_time"})
  522. }
  523. // AbstractToKnowledge
  524. // @Description: 摘要入向量库
  525. // @author: Roc
  526. // @datetime 2025-03-10 16:14:59
  527. // @param wechatArticleItem *rag.WechatArticle
  528. // @param item *rag.WechatArticleAbstract
  529. func AbstractToKnowledge(wechatArticleItem *rag.WechatArticle, item *rag.WechatArticleAbstract, isReUpload bool) {
  530. if item.Content == `` {
  531. return
  532. }
  533. // 已经生成了,那就不处理了
  534. if item.VectorKey != `` && !isReUpload {
  535. return
  536. }
  537. var err error
  538. defer func() {
  539. if err != nil {
  540. utils.FileLog.Error("摘要入向量库失败,err:%v", err)
  541. fmt.Println("摘要入向量库失败,err:", err)
  542. }
  543. }()
  544. // 生成临时文件
  545. //dateDir := time.Now().Format("20060102")
  546. //uploadDir := utils.STATIC_DIR + "ai/article/" + dateDir
  547. uploadDir := utils.STATIC_DIR + "ai/abstract"
  548. err = os.MkdirAll(uploadDir, utils.DIR_MOD)
  549. if err != nil {
  550. err = fmt.Errorf("存储目录创建失败,Err:" + err.Error())
  551. return
  552. }
  553. fileName := utils.RemoveSpecialChars(wechatArticleItem.Title) + `.md`
  554. tmpFilePath := uploadDir + "/" + fileName
  555. err = utils.SaveToFile(item.Content, tmpFilePath)
  556. if err != nil {
  557. err = fmt.Errorf("生成临时文件失败,Err:" + err.Error())
  558. return
  559. }
  560. defer func() {
  561. os.Remove(tmpFilePath)
  562. }()
  563. knowledgeArticleName := models.BusinessConfMap[models.KnowledgeBaseName]
  564. // 上传临时文件到LLM
  565. uploadFileResp, err := llm.UploadDocsToKnowledge(tmpFilePath, knowledgeArticleName)
  566. if err != nil {
  567. err = fmt.Errorf("上传文章原文到知识库失败,Err:" + err.Error())
  568. return
  569. }
  570. if len(uploadFileResp.FailedFiles) > 0 {
  571. for _, v := range uploadFileResp.FailedFiles {
  572. err = fmt.Errorf("上传文章原文到知识库失败,Err:" + v)
  573. }
  574. }
  575. item.VectorKey = tmpFilePath
  576. item.ModifyTime = time.Now()
  577. err = item.Update([]string{"vector_key", "modify_time"})
  578. }
  579. // replaceWechatPlatformPic
  580. // @Description: 替换公众号头像
  581. // @author: Roc
  582. // @datetime 2025-03-11 09:38:24
  583. // @param item *rag.WechatPlatform
  584. func replaceWechatPlatformPic(item *rag.WechatPlatform) {
  585. var err error
  586. defer func() {
  587. if err != nil {
  588. utils.FileLog.Error("替换公众号头像失败,err:%v", err)
  589. fmt.Println("替换公众号头像失败,err:", err)
  590. }
  591. }()
  592. if item.RoundHeadImg == `` {
  593. return
  594. }
  595. resourceUrl, err := downloadWxPicAndUploadToOss(item.RoundHeadImg, `head_img`)
  596. if err != nil {
  597. return
  598. }
  599. item.RoundHeadImg = resourceUrl
  600. err = item.Update([]string{"round_head_img"})
  601. }
  602. // replaceWechatArticleCoverPic
  603. // @Description: 替换文章封面图
  604. // @author: Roc
  605. // @datetime 2025-03-11 09:38:35
  606. // @param item *rag.WechatArticle
  607. func replaceWechatArticleCoverPic(item *rag.WechatArticle) {
  608. var err error
  609. defer func() {
  610. if err != nil {
  611. utils.FileLog.Error("替换公众号头像失败,err:%v", err)
  612. fmt.Println("替换公众号头像失败,err:", err)
  613. }
  614. // 数据入ES库
  615. AddOrEditEsWechatArticle(item.WechatArticleId)
  616. }()
  617. if item.CoverUrl == `` {
  618. return
  619. }
  620. resourceUrl, err := downloadWxPicAndUploadToOss(item.CoverUrl, `cover_url`)
  621. if err != nil {
  622. return
  623. }
  624. item.CoverUrl = resourceUrl
  625. err = item.Update([]string{"cover_url"})
  626. }
  627. // replaceWechatArticlePic
  628. // @Description: 替换文章内容图
  629. // @author: Roc
  630. // @datetime 2025-03-11 09:38:35
  631. // @param item *rag.WechatArticle
  632. func ReplaceWechatArticlePic(item *rag.WechatArticle) {
  633. var err error
  634. defer func() {
  635. if err != nil {
  636. utils.FileLog.Error("替换公众号头像失败,err:%v", err)
  637. fmt.Println("替换公众号头像失败,err:", err)
  638. }
  639. }()
  640. if item.Content == `` {
  641. return
  642. }
  643. content, err := ReplaceHtmlImg(html.UnescapeString(item.Content))
  644. if err != nil {
  645. return
  646. }
  647. item.Content = html.EscapeString(content)
  648. err = item.Update([]string{"content"})
  649. return
  650. }
  651. // downloadWxPicAndUploadToOss
  652. // @Description: 下载微信图片并上传到OSS
  653. // @author: Roc
  654. // @datetime 2025-03-11 09:28:49
  655. // @param wxPicUrl string
  656. // @return resourceUrl string
  657. // @return err error
  658. func downloadWxPicAndUploadToOss(wxPicUrl, source string) (resourceUrl string, err error) {
  659. localFilePath, err := utils.DownloadWxImage(wxPicUrl)
  660. if err != nil {
  661. return
  662. }
  663. defer func() {
  664. os.Remove(localFilePath)
  665. }()
  666. ossClient := NewOssClient()
  667. if ossClient == nil {
  668. err = fmt.Errorf(`初始化OSS服务失败`)
  669. return
  670. }
  671. ext := path.Ext(localFilePath)
  672. fileName := fmt.Sprintf(`%s%s%s`, time.Now().Format(utils.FormatShortDateTimeUnSpace), utils.GetRandStringNoSpecialChar(16), ext)
  673. //savePath := utils.UploadDir + `wx/wx_article/` + time.Now().Format("200601/20060102/") + fileName
  674. savePath := fmt.Sprintf(`%swx/%s/%s%s`, utils.UploadDir, source, time.Now().Format("200601/20060102/"), fileName)
  675. resourceUrl, err = ossClient.UploadFile(fileName, localFilePath, savePath)
  676. if err != nil {
  677. err = fmt.Errorf("文件上传失败,Err:" + err.Error())
  678. return
  679. }
  680. return
  681. }
  682. // ReplaceHtmlImg
  683. // @Description: 将html中的图片替换成自己的
  684. // @author: Roc
  685. // @datetime 2025-03-11 14:32:00
  686. // @param htmlStr string
  687. // @return newHtml string
  688. // @return err error
  689. func ReplaceHtmlImg(htmlStr string) (newHtml string, err error) {
  690. doc, err := html2.Parse(strings.NewReader(htmlStr))
  691. if err != nil {
  692. return
  693. }
  694. if err != nil {
  695. return
  696. }
  697. handleNode(doc)
  698. // 将处理后的HTML节点重新渲染为HTML字符串
  699. var buf bytes.Buffer
  700. if err = html2.Render(&buf, doc); err != nil {
  701. fmt.Println(err)
  702. return
  703. }
  704. newHtml = buf.String()
  705. return
  706. }
  707. // handleNode
  708. // @Description: html节点处理
  709. // @author: Roc
  710. // @datetime 2025-03-11 14:32:45
  711. // @param n *html2.Node
  712. func handleNode(n *html2.Node) {
  713. if n.Type == html2.ElementNode {
  714. if n.Data == "img" {
  715. for k, attr := range n.Attr {
  716. // 新增代码:如果标签是img且存在data-src属性,则将data-src的值赋给src
  717. if n.Data == "img" && attr.Key == "src" {
  718. resourceUrl, tmpErr := downloadWxPicAndUploadToOss(attr.Val, `article`)
  719. if tmpErr != nil {
  720. continue
  721. }
  722. attr.Val = resourceUrl
  723. }
  724. n.Attr[k] = attr
  725. }
  726. }
  727. }
  728. for c := n.FirstChild; c != nil; c = c.NextSibling {
  729. handleNode(c)
  730. }
  731. }
  732. // AddOrEditEsWechatPlatformId
  733. // @Description: 批量处理某个公众号下的文章到ES
  734. // @author: Roc
  735. // @datetime 2025-03-13 11:01:28
  736. // @param articleId int
  737. func AddOrEditEsWechatPlatformId(wechatPlatformId int) {
  738. if utils.EsWechatArticleName == `` {
  739. return
  740. }
  741. obj := rag.WechatArticle{}
  742. list, _ := obj.GetListByCondition(` wechat_article_id `, ` AND wechat_platform_id = ? `, []interface{}{wechatPlatformId}, 0, 1000000)
  743. for _, item := range list {
  744. AddOrEditEsWechatArticle(item.WechatArticleId)
  745. }
  746. }
  747. // AddOrEditEsWechatArticle
  748. // @Description: 新增/编辑微信文章入ES
  749. // @author: Roc
  750. // @datetime 2025-03-13 11:01:28
  751. // @param articleId int
  752. func AddOrEditEsWechatArticle(articleId int) {
  753. if utils.EsWechatArticleName == `` {
  754. return
  755. }
  756. var err error
  757. defer func() {
  758. if err != nil {
  759. utils.FileLog.Error("添加公众号微信信息到ES失败,err:%v", err)
  760. fmt.Println("添加公众号微信信息到ES失败,err:", err)
  761. }
  762. }()
  763. obj := rag.WechatArticle{}
  764. articleInfo, err := obj.GetById(articleId)
  765. if err != nil {
  766. err = fmt.Errorf("获取公众号文章信息失败,Err:" + err.Error())
  767. return
  768. }
  769. platformObj := rag.WechatPlatform{}
  770. platformInfo, err := platformObj.GetById(articleInfo.WechatPlatformId)
  771. if err != nil {
  772. err = fmt.Errorf("获取公众号平台信息失败,Err:" + err.Error())
  773. return
  774. }
  775. esItem := elastic.WechatArticleAndPlatform{
  776. WechatArticleId: articleInfo.WechatArticleId,
  777. WechatPlatformId: articleInfo.WechatPlatformId,
  778. FakeId: articleInfo.FakeId,
  779. Title: articleInfo.Title,
  780. Link: articleInfo.Link,
  781. CoverUrl: articleInfo.CoverUrl,
  782. Description: articleInfo.Description,
  783. //Content: articleInfo.Content,
  784. //TextContent: articleInfo.TextContent,
  785. //AbstractStatus: articleInfo.AbstractStatus,
  786. Country: articleInfo.Country,
  787. Province: articleInfo.Province,
  788. City: articleInfo.City,
  789. ArticleCreateTime: articleInfo.ArticleCreateTime,
  790. IsDeleted: articleInfo.IsDeleted,
  791. ModifyTime: articleInfo.ModifyTime,
  792. CreateTime: articleInfo.CreateTime,
  793. Nickname: platformInfo.Nickname,
  794. Alias: platformInfo.Alias,
  795. RoundHeadImg: platformInfo.RoundHeadImg,
  796. }
  797. err = elastic.WechatArticleEsAddOrEdit(strconv.Itoa(articleInfo.WechatArticleId), esItem)
  798. }
  799. // AddOrEditEsWechatArticleAbstract
  800. // @Description: 新增/编辑微信文章摘要入ES
  801. // @author: Roc
  802. // @datetime 2025-03-13 14:13:47
  803. // @param articleAbstractId int
  804. func AddOrEditEsWechatArticleAbstract(articleAbstractId int) {
  805. if utils.EsWechatArticleAbstractName == `` {
  806. return
  807. }
  808. var err error
  809. defer func() {
  810. if err != nil {
  811. utils.FileLog.Error("添加公众号微信信息到ES失败,err:%v", err)
  812. fmt.Println("添加公众号微信信息到ES失败,err:", err)
  813. }
  814. }()
  815. obj := rag.WechatArticleAbstract{}
  816. abstractInfo, err := obj.GetById(articleAbstractId)
  817. if err != nil {
  818. err = fmt.Errorf("获取公众号文章信息失败,Err:" + err.Error())
  819. return
  820. }
  821. articleObj := rag.WechatArticle{}
  822. articleInfo, err := articleObj.GetById(abstractInfo.WechatArticleId)
  823. if err != nil {
  824. err = fmt.Errorf("获取公众号文章信息失败,Err:" + err.Error())
  825. return
  826. }
  827. // 公众号平台关联的标签品种
  828. tagObj := rag.WechatPlatformTagMapping{}
  829. tagMappingList, err := tagObj.GetListByCondition(` AND wechat_platform_id = ? `, []interface{}{articleInfo.WechatPlatformId}, 0, 10000)
  830. if err != nil {
  831. err = fmt.Errorf("获取公众号平台关联的品种信息失败,Err:" + err.Error())
  832. return
  833. }
  834. tagIdList := make([]int, 0)
  835. for _, v := range tagMappingList {
  836. tagIdList = append(tagIdList, v.TagId)
  837. }
  838. esItem := elastic.WechatArticleAbstractItem{
  839. WechatArticleAbstractId: abstractInfo.WechatArticleAbstractId,
  840. WechatArticleId: abstractInfo.WechatArticleId,
  841. Abstract: abstractInfo.Content,
  842. Version: abstractInfo.Version,
  843. VectorKey: abstractInfo.VectorKey,
  844. ModifyTime: articleInfo.ModifyTime,
  845. CreateTime: articleInfo.CreateTime,
  846. Title: articleInfo.Title,
  847. Link: articleInfo.Link,
  848. TagIdList: tagIdList,
  849. }
  850. err = elastic.WechatArticleAbstractEsAddOrEdit(strconv.Itoa(articleAbstractId), esItem)
  851. }
  852. // AddOrEditEsWechatArticleAbstract
  853. // @Description: 新增/编辑微信文章摘要入ES
  854. // @author: Roc
  855. // @datetime 2025-03-13 14:13:47
  856. // @param articleAbstractId int
  857. func DelEsWechatArticleAbstract(articleAbstractId int) {
  858. if utils.EsWechatArticleAbstractName == `` {
  859. return
  860. }
  861. var err error
  862. defer func() {
  863. if err != nil {
  864. utils.FileLog.Error("添加公众号微信信息到ES失败,err:%v", err)
  865. fmt.Println("添加公众号微信信息到ES失败,err:", err)
  866. }
  867. }()
  868. err = elastic.WechatArticleAbstractEsDel(strconv.Itoa(articleAbstractId))
  869. }