wechat_platform.go 30 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096
  1. package services
  2. import (
  3. "bytes"
  4. "eta/eta_api/cache"
  5. "eta/eta_api/models"
  6. "eta/eta_api/models/rag"
  7. "eta/eta_api/services/elastic"
  8. "eta/eta_api/services/llm"
  9. "eta/eta_api/utils"
  10. "eta/eta_api/utils/llm/eta_llm/eta_llm_http"
  11. "fmt"
  12. html2 "golang.org/x/net/html"
  13. "html"
  14. "os"
  15. "path"
  16. "strconv"
  17. "strings"
  18. "time"
  19. )
  20. // AddWechatPlatform
  21. // @Description: 添加新的公众号
  22. // @param item
  23. func AddWechatPlatform(item *rag.WechatPlatform) {
  24. var err error
  25. defer func() {
  26. if err != nil {
  27. utils.FileLog.Error("公众号入库后查找最新记录失败,err:%v", err)
  28. }
  29. }()
  30. if item.FakeId != `` {
  31. return
  32. }
  33. if item.ArticleLink == `` {
  34. return
  35. }
  36. articleLink := item.ArticleLink
  37. articleDetail, err := llm.SearchByWechatArticle(item.ArticleLink)
  38. if err != nil {
  39. return
  40. }
  41. if articleDetail.Appuin == `` {
  42. err = fmt.Errorf("文章内未匹配到公众号唯一标识")
  43. return
  44. }
  45. wechatPlatform := new(rag.WechatPlatform)
  46. // 查找是否存在这个公众号id的
  47. wechatPlatformInfo, tmpErr := wechatPlatform.GetByFakeID(articleDetail.Appuin)
  48. if tmpErr != nil && !utils.IsErrNoRow(tmpErr) {
  49. err = tmpErr
  50. return
  51. }
  52. if tmpErr == nil {
  53. // 如果找到了,那么需要将当前的给移除掉
  54. err = item.Del()
  55. if err != nil {
  56. return
  57. }
  58. // 并将查出来的微信公众号摘出来的数据重新赋值
  59. item = wechatPlatformInfo
  60. } else if utils.IsErrNoRow(tmpErr) {
  61. // 如果没找到,那么就变更当前的信息
  62. item.FakeId = articleDetail.Appuin
  63. item.Nickname = articleDetail.Nickname
  64. //item.Alias = req.Alias
  65. item.RoundHeadImg = articleDetail.RoundHeadImg
  66. //item.ServiceType = req.ServiceType
  67. item.Signature = articleDetail.ProfileSignature
  68. //item.Verified = verified
  69. item.ModifyTime = time.Now()
  70. err = item.Update([]string{rag.WechatPlatformColumns.FakeID, rag.WechatPlatformColumns.Nickname, rag.WechatPlatformColumns.RoundHeadImg, rag.WechatPlatformColumns.Signature, rag.WechatPlatformColumns.ModifyTime})
  71. if err != nil {
  72. return
  73. }
  74. // 修改公众号头像
  75. go replaceWechatPlatformPic(item)
  76. }
  77. // 把刚搜索的文章加入到文章库中
  78. AddWechatArticle(item, articleLink, articleDetail, nil)
  79. BeachAddWechatArticle(item, 10)
  80. fmt.Println("公众号入库完成")
  81. return
  82. }
  83. // AddWechatArticle
  84. // @Description: 添加公众号文章入库
  85. // @author: Roc
  86. // @datetime 2025-03-05 13:24:14
  87. // @param item *rag.WechatPlatform
  88. // @param link string
  89. // @param articleDetail WechatArticleDataResp
  90. func AddWechatArticle(item *rag.WechatPlatform, articleLink string, articleDetail llm.WechatArticleDataResp, articleMenu *llm.ArticleMenu) {
  91. var err error
  92. defer func() {
  93. if err != nil {
  94. utils.FileLog.Error("公众号文章入库失败,文章链接:%s ,err:%v", articleLink, err)
  95. }
  96. }()
  97. obj := new(rag.WechatArticle)
  98. _, err = obj.GetByLink(articleLink)
  99. if err == nil {
  100. // 文章已经入库了,不需要重复入库
  101. return
  102. }
  103. // 如果不是 ErrNoRow 的时候,那么就是查询数据库出问题了,需要直接返回
  104. if !utils.IsErrNoRow(err) {
  105. return
  106. }
  107. // 这个时候,说明数据库中没有这个文章,那么需要文章入库
  108. err = nil
  109. var publishAt time.Time
  110. if articleDetail.CreateAt != `` {
  111. createAtInt, tmpErr := strconv.Atoi(articleDetail.CreateAt)
  112. if tmpErr == nil {
  113. publishAt = time.Unix(int64(createAtInt), 1000)
  114. }
  115. } else if articleMenu != nil {
  116. publishAt = time.Unix(int64(articleMenu.UpdateTime), 1000)
  117. }
  118. content := articleDetail.HtmlContent
  119. // 图片下载下来到本地,如果成功了,那么就用新的
  120. tmpContent, err := ReplaceHtmlImg(content)
  121. if tmpContent != `` {
  122. content = tmpContent
  123. }
  124. obj = &rag.WechatArticle{
  125. WechatArticleId: 0,
  126. WechatPlatformId: item.WechatPlatformId,
  127. FakeId: item.FakeId,
  128. Title: articleDetail.Title,
  129. Link: articleLink,
  130. CoverUrl: articleDetail.CoverUrl,
  131. Description: articleDetail.Desc,
  132. Content: html.EscapeString(content),
  133. TextContent: articleDetail.TextContent,
  134. Country: articleDetail.CountryName,
  135. Province: articleDetail.ProvinceName,
  136. City: articleDetail.CityName,
  137. //Abstract: "",
  138. //ArticleCreateTime: createAt,
  139. ModifyTime: time.Now(),
  140. CreateTime: time.Now(),
  141. }
  142. if !publishAt.IsZero() {
  143. obj.ArticleCreateTime = publishAt
  144. }
  145. if articleMenu != nil {
  146. obj.Title = articleMenu.Title
  147. //obj.Link = articleMenu.Link
  148. obj.CoverUrl = articleMenu.Cover
  149. obj.Description = articleMenu.Digest
  150. }
  151. err = obj.Create()
  152. // 修改文章封面图
  153. go replaceWechatArticleCoverPic(obj)
  154. // 文章入库成功后,需要将相关信息入摘要库
  155. go cache.AddWechatArticleLlmOpToCache(obj.WechatArticleId, ``)
  156. }
  157. // BeachAddWechatArticle
  158. // @Description: 批量添加公众号文章
  159. // @param item
  160. // @param num
  161. // @return err
  162. func BeachAddWechatArticle(item *rag.WechatPlatform, num int) {
  163. var err error
  164. defer func() {
  165. //fmt.Println("公众号文章批量入库完成")
  166. if err != nil {
  167. utils.FileLog.Error("公众号文章批量入库失败,err:%v", err)
  168. fmt.Println("公众号文章批量入库失败,err:", err)
  169. }
  170. }()
  171. if item.FakeId == `` {
  172. return
  173. }
  174. wechatArticleObj := new(rag.WechatArticle)
  175. // 获取公众号的文章列表
  176. articleListResp, err := llm.SearchByWechatArticleList(item.FakeId, num)
  177. if err != nil {
  178. return
  179. }
  180. for _, articleMenu := range articleListResp.List {
  181. // 判断文章是否已经入库,如果已经入库了,那么就过滤,不去重复查询微信了
  182. _, err = wechatArticleObj.GetByLink(articleMenu.Link)
  183. if err == nil {
  184. // 文章已经入库了,不需要重复入库
  185. continue
  186. }
  187. if !utils.IsErrNoRow(err) {
  188. return
  189. }
  190. err = nil
  191. articleDetail, tmpErr := llm.SearchByWechatArticle(articleMenu.Link)
  192. if tmpErr != nil {
  193. err = tmpErr
  194. return
  195. }
  196. // 把刚搜索的文章加入到指标库
  197. AddWechatArticle(item, articleMenu.Link, articleDetail, &articleMenu)
  198. //time.Sleep(10 * time.Second)
  199. // 随机休眠,至少大于10s
  200. sleepTimeInt := utils.GetRandInt(10, 20)
  201. if sleepTimeInt < 10 {
  202. sleepTimeInt = 10
  203. }
  204. time.Sleep(time.Duration(sleepTimeInt) * time.Second)
  205. }
  206. return
  207. }
  208. // GenerateArticleAbstract
  209. // @Description: 文章摘要生成
  210. // @author: Roc
  211. // @datetime 2025-03-10 16:17:53
  212. // @param item *rag.WechatArticle
  213. func GenerateArticleAbstract(item *rag.WechatArticle) {
  214. var err error
  215. defer func() {
  216. if err != nil {
  217. utils.FileLog.Error("文章转临时文件失败,err:%v", err)
  218. fmt.Println("文章转临时文件失败,err:", err)
  219. }
  220. }()
  221. // 内容为空,那就不需要生成摘要
  222. if item.TextContent == `` {
  223. return
  224. }
  225. abstractObj := rag.WechatArticleAbstract{}
  226. tmpAbstractItem, err := abstractObj.GetByWechatArticleId(item.WechatArticleId)
  227. if err == nil {
  228. // 摘要已经生成,不需要重复生成
  229. AbstractToKnowledge(item, tmpAbstractItem, false)
  230. return
  231. }
  232. if !utils.IsErrNoRow(err) {
  233. return
  234. }
  235. // 生成临时文件
  236. dateDir := time.Now().Format("20060102")
  237. uploadDir := "./static/ai/" + dateDir
  238. err = os.MkdirAll(uploadDir, utils.DIR_MOD)
  239. if err != nil {
  240. err = fmt.Errorf("存储目录创建失败,Err:" + err.Error())
  241. return
  242. }
  243. randStr := utils.GetRandStringNoSpecialChar(28)
  244. fileName := randStr + `.md`
  245. tmpFilePath := uploadDir + "/" + fileName
  246. err = utils.SaveToFile(item.TextContent, tmpFilePath)
  247. if err != nil {
  248. err = fmt.Errorf("生成临时文件失败,Err:" + err.Error())
  249. return
  250. }
  251. defer func() {
  252. os.Remove(tmpFilePath)
  253. }()
  254. // 上传临时文件到LLM
  255. tmpFileResp, err := llm.UploadTempDocs(tmpFilePath)
  256. if err != nil {
  257. err = fmt.Errorf("上传临时文件到LLM失败,Err:" + err.Error())
  258. return
  259. }
  260. if tmpFileResp.Data.Id == `` {
  261. err = fmt.Errorf("上传临时文件到LLM失败,Err:上传失败")
  262. return
  263. }
  264. tmpDocId := tmpFileResp.Data.Id
  265. //tmpDocId := `c4d2ee902808408c8b8ed398b33be103` // 钢材
  266. //tmpDocId := `2dde8afe62d24525a814e74e0a5e35e4` // 钢材
  267. //tmpDocId := `7634cc1086c04b3687682220a2cf1a48` //
  268. //开始对话
  269. abstract, addArticleChatRecordList, tmpErr := getAnswerByContent(item.WechatArticleId, tmpDocId)
  270. if tmpErr != nil {
  271. err = fmt.Errorf("LLM对话失败,Err:" + tmpErr.Error())
  272. return
  273. }
  274. // 添加问答记录
  275. if len(addArticleChatRecordList) > 0 {
  276. recordObj := rag.WechatArticleChatRecord{}
  277. err = recordObj.CreateInBatches(addArticleChatRecordList)
  278. if err != nil {
  279. return
  280. }
  281. }
  282. if abstract != `` {
  283. if abstract == `sorry` || strings.Index(abstract, `根据已知信息无法回答该问题`) == 0 {
  284. item.AbstractStatus = 2
  285. item.ModifyTime = time.Now()
  286. err = item.Update([]string{"AbstractStatus", "ModifyTime"})
  287. return
  288. }
  289. item.AbstractStatus = 1
  290. item.ModifyTime = time.Now()
  291. err = item.Update([]string{"AbstractStatus", "ModifyTime"})
  292. abstractItem := &rag.WechatArticleAbstract{
  293. WechatArticleAbstractId: 0,
  294. WechatArticleId: item.WechatArticleId,
  295. Content: abstract,
  296. Version: 0,
  297. VectorKey: "",
  298. ModifyTime: time.Now(),
  299. CreateTime: time.Now(),
  300. }
  301. err = abstractItem.Create()
  302. if err != nil {
  303. return
  304. }
  305. // 数据入ES库
  306. go AddOrEditEsWechatArticleAbstract(abstractItem.WechatArticleAbstractId)
  307. AbstractToKnowledge(item, abstractItem, false)
  308. }
  309. }
  310. // ReGenerateArticleAbstract
  311. // @Description: 文章摘要重新生成
  312. // @author: Roc
  313. // @datetime 2025-03-10 16:17:53
  314. // @param item *rag.WechatArticle
  315. func ReGenerateArticleAbstract(item *rag.WechatArticle) {
  316. var err error
  317. defer func() {
  318. if err != nil {
  319. utils.FileLog.Error("文章转临时文件失败,err:%v", err)
  320. fmt.Println("文章转临时文件失败,err:", err)
  321. }
  322. }()
  323. abstractObj := rag.WechatArticleAbstract{}
  324. abstractItem, err := abstractObj.GetByWechatArticleId(item.WechatArticleId)
  325. if err != nil {
  326. if utils.IsErrNoRow(err) {
  327. // 直接生成
  328. GenerateArticleAbstract(item)
  329. return
  330. }
  331. // 异常了
  332. return
  333. }
  334. // 生成临时文件
  335. dateDir := time.Now().Format("20060102")
  336. uploadDir := "./static/ai/" + dateDir
  337. err = os.MkdirAll(uploadDir, utils.DIR_MOD)
  338. if err != nil {
  339. err = fmt.Errorf("存储目录创建失败,Err:" + err.Error())
  340. return
  341. }
  342. randStr := utils.GetRandStringNoSpecialChar(28)
  343. fileName := randStr + `.md`
  344. tmpFilePath := uploadDir + "/" + fileName
  345. err = utils.SaveToFile(item.TextContent, tmpFilePath)
  346. if err != nil {
  347. err = fmt.Errorf("生成临时文件失败,Err:" + err.Error())
  348. return
  349. }
  350. defer func() {
  351. os.Remove(tmpFilePath)
  352. }()
  353. // 上传临时文件到LLM
  354. tmpFileResp, err := llm.UploadTempDocs(tmpFilePath)
  355. if err != nil {
  356. err = fmt.Errorf("上传临时文件到LLM失败,Err:" + err.Error())
  357. return
  358. }
  359. if tmpFileResp.Data.Id == `` {
  360. err = fmt.Errorf("上传临时文件到LLM失败,Err:上传失败")
  361. return
  362. }
  363. tmpDocId := tmpFileResp.Data.Id
  364. //tmpDocId := `c4d2ee902808408c8b8ed398b33be103` // 钢材
  365. //tmpDocId := `2dde8afe62d24525a814e74e0a5e35e4` // 钢材
  366. //tmpDocId := `7634cc1086c04b3687682220a2cf1a48` //
  367. //开始对话
  368. abstract, addArticleChatRecordList, tmpErr := getAnswerByContent(item.WechatArticleId, tmpDocId)
  369. if tmpErr != nil {
  370. err = fmt.Errorf("LLM对话失败,Err:" + tmpErr.Error())
  371. return
  372. }
  373. // 添加问答记录
  374. if len(addArticleChatRecordList) > 0 {
  375. recordObj := rag.WechatArticleChatRecord{}
  376. err = recordObj.CreateInBatches(addArticleChatRecordList)
  377. if err != nil {
  378. return
  379. }
  380. }
  381. if abstract != `` {
  382. if abstract == `sorry` || strings.Index(abstract, `根据已知信息无法回答该问题`) == 0 {
  383. item.AbstractStatus = 2
  384. item.ModifyTime = time.Now()
  385. err = item.Update([]string{"AbstractStatus", "ModifyTime"})
  386. return
  387. }
  388. item.AbstractStatus = 1
  389. item.ModifyTime = time.Now()
  390. err = item.Update([]string{"AbstractStatus", "ModifyTime"})
  391. abstractItem.Content = abstract
  392. abstractItem.Version = abstractObj.Version + 1
  393. abstractItem.ModifyTime = time.Now()
  394. err = abstractItem.Update([]string{"content", "version", "modify_time"})
  395. if err != nil {
  396. return
  397. }
  398. AbstractToKnowledge(item, abstractItem, true)
  399. }
  400. }
  401. // DelDoc
  402. // @Description: 删除摘要向量库
  403. // @author: Roc
  404. // @datetime 2025-03-12 16:55:05
  405. // @param wechatArticleAbstractList []*rag.WechatArticleAbstract
  406. // @return err error
  407. func DelDoc(wechatArticleAbstractList []*rag.WechatArticleAbstract) (err error) {
  408. defer func() {
  409. if err != nil {
  410. utils.FileLog.Error("删除摘要向量库文件失败,err:%v", err)
  411. fmt.Println("删除摘要向量库文件失败,err:", err)
  412. }
  413. }()
  414. vectorKeyList := make([]string, 0)
  415. wechatArticleAbstractIdList := make([]int, 0)
  416. for _, v := range wechatArticleAbstractList {
  417. if v.VectorKey == `` {
  418. continue
  419. }
  420. vectorKeyList = append(vectorKeyList, v.VectorKey)
  421. wechatArticleAbstractIdList = append(wechatArticleAbstractIdList, v.WechatArticleAbstractId)
  422. }
  423. // 没有就不删除
  424. if len(vectorKeyList) <= 0 {
  425. return
  426. }
  427. _, err = llm.DelDocsToKnowledge(models.BusinessConfMap[models.KnowledgeBaseName], vectorKeyList)
  428. if err != nil {
  429. err = fmt.Errorf("删除LLM摘要向量库文件失败,Err:" + err.Error())
  430. return
  431. }
  432. //fmt.Println(resp)
  433. obj := rag.WechatArticleAbstract{}
  434. err = obj.DelVectorKey(wechatArticleAbstractIdList)
  435. return
  436. }
  437. // DelLlmDoc
  438. // @Description: 删除摘要向量库
  439. // @author: Roc
  440. // @datetime 2025-03-12 16:55:05
  441. // @param wechatArticleAbstractList []*rag.WechatArticleAbstract
  442. // @return err error
  443. func DelLlmDoc(vectorKeyList []string, wechatArticleAbstractIdList []int) (err error) {
  444. defer func() {
  445. if err != nil {
  446. utils.FileLog.Error("删除摘要向量库文件失败,err:%v", err)
  447. fmt.Println("删除摘要向量库文件失败,err:", err)
  448. }
  449. }()
  450. // 没有就不删除
  451. if len(vectorKeyList) <= 0 {
  452. return
  453. }
  454. _, err = llm.DelDocsToKnowledge(models.BusinessConfMap[models.KnowledgeBaseName], vectorKeyList)
  455. if err != nil {
  456. err = fmt.Errorf("删除LLM摘要向量库文件失败,Err:" + err.Error())
  457. return
  458. }
  459. //fmt.Println(resp)
  460. obj := rag.WechatArticleAbstract{}
  461. err = obj.DelVectorKey(wechatArticleAbstractIdList)
  462. return
  463. }
  464. func getAnswerByContent(wechatArticleId int, docId string) (answer string, addArticleChatRecordList []*rag.WechatArticleChatRecord, err error) {
  465. historyList := make([]eta_llm_http.HistoryContent, 0)
  466. addArticleChatRecordList = make([]*rag.WechatArticleChatRecord, 0)
  467. questionObj := rag.Question{}
  468. questionList, err := questionObj.GetListByCondition(``, ``, []interface{}{}, 0, 100)
  469. if err != nil {
  470. err = fmt.Errorf("获取问题列表失败,Err:" + err.Error())
  471. return
  472. }
  473. // 没问题就不生成了
  474. if len(questionList) <= 0 {
  475. return
  476. }
  477. //你现在是一名资深的期货行业分析师,请基于以下的问题进行汇总总结,如果不能正常总结出来,那么就只需要回复我:sorry
  478. questionStrList := []string{`你现在是一名资深的期货行业分析师,请基于以下的问题进行汇总总结,如果不能正常总结出来,那么就只需要回复我:sorry。以下是问题:`}
  479. for _, v := range questionList {
  480. questionStrList = append(questionStrList, v.QuestionContent)
  481. }
  482. questionStr := strings.Join(questionStrList, "\n")
  483. originalAnswer, result, err := llm.ChatByFile(docId, questionStr, historyList)
  484. fmt.Println(result)
  485. if err != nil {
  486. err = fmt.Errorf("LLM对话失败,Err:" + err.Error())
  487. return
  488. }
  489. // 提取 </think> 后面的内容
  490. thinkEndIndex := strings.Index(result.Answer, "</think>")
  491. if thinkEndIndex != -1 {
  492. answer = strings.TrimSpace(result.Answer[thinkEndIndex+len("</think>"):])
  493. } else {
  494. answer = result.Answer
  495. }
  496. answer = strings.TrimSpace(answer)
  497. // 待入库的数据
  498. addArticleChatRecordList = append(addArticleChatRecordList, &rag.WechatArticleChatRecord{
  499. WechatArticleChatRecordId: 0,
  500. WechatArticleId: wechatArticleId,
  501. ChatUserType: "user",
  502. Content: questionStr,
  503. SendTime: time.Now(),
  504. CreatedTime: time.Now(),
  505. UpdateTime: time.Now(),
  506. }, &rag.WechatArticleChatRecord{
  507. WechatArticleChatRecordId: 0,
  508. WechatArticleId: wechatArticleId,
  509. ChatUserType: "assistant",
  510. Content: originalAnswer,
  511. SendTime: time.Now(),
  512. CreatedTime: time.Now(),
  513. UpdateTime: time.Now(),
  514. })
  515. return
  516. }
  517. // ArticleToKnowledge
  518. // @Description: 原文入向量库
  519. // @author: Roc
  520. // @datetime 2025-03-10 16:13:16
  521. // @param item *rag.WechatArticle
  522. func ArticleToKnowledge(item *rag.WechatArticle) {
  523. if item.TextContent == `` {
  524. return
  525. }
  526. var err error
  527. defer func() {
  528. if err != nil {
  529. utils.FileLog.Error("上传文章原文到知识库失败,err:%v", err)
  530. fmt.Println("上传文章原文到知识库失败,err:", err)
  531. }
  532. }()
  533. // 生成临时文件
  534. //dateDir := time.Now().Format("20060102")
  535. //uploadDir := "./static/ai/article/" + dateDir
  536. uploadDir := "./static/ai/article"
  537. err = os.MkdirAll(uploadDir, utils.DIR_MOD)
  538. if err != nil {
  539. err = fmt.Errorf("存储目录创建失败,Err:" + err.Error())
  540. return
  541. }
  542. //fileName := utils.RemoveSpecialChars(item.Title) + `.md`
  543. fileName := utils.MD5(item.Title) + `.md`
  544. tmpFilePath := uploadDir + "/" + fileName
  545. err = utils.SaveToFile(item.TextContent, tmpFilePath)
  546. if err != nil {
  547. err = fmt.Errorf("生成临时文件失败,Err:" + err.Error())
  548. return
  549. }
  550. defer func() {
  551. os.Remove(tmpFilePath)
  552. }()
  553. knowledgeArticleName := models.BusinessConfMap[models.KnowledgeArticleName]
  554. // 上传临时文件到LLM
  555. uploadFileResp, err := llm.UploadDocsToKnowledge(tmpFilePath, knowledgeArticleName)
  556. if err != nil {
  557. err = fmt.Errorf("上传文章原文到知识库失败,Err:" + err.Error())
  558. return
  559. }
  560. if len(uploadFileResp.FailedFiles) > 0 {
  561. for _, v := range uploadFileResp.FailedFiles {
  562. err = fmt.Errorf("上传文章原文到知识库失败,Err:" + v)
  563. }
  564. }
  565. item.VectorKey = tmpFilePath
  566. item.ModifyTime = time.Now()
  567. err = item.Update([]string{"vector_key", "modify_time"})
  568. }
  569. // AbstractToKnowledge
  570. // @Description: 摘要入向量库
  571. // @author: Roc
  572. // @datetime 2025-03-10 16:14:59
  573. // @param wechatArticleItem *rag.WechatArticle
  574. // @param abstractItem *rag.WechatArticleAbstract
  575. func AbstractToKnowledge(wechatArticleItem *rag.WechatArticle, abstractItem *rag.WechatArticleAbstract, isReUpload bool) {
  576. if abstractItem.Content == `` {
  577. return
  578. }
  579. // 已经生成了,那就不处理了
  580. if abstractItem.VectorKey != `` && !isReUpload {
  581. return
  582. }
  583. var err error
  584. defer func() {
  585. if err != nil {
  586. utils.FileLog.Error("摘要入向量库失败,err:%v", err)
  587. fmt.Println("摘要入向量库失败,err:", err)
  588. }
  589. // 数据入ES库
  590. go AddOrEditEsWechatArticleAbstract(abstractItem.WechatArticleAbstractId)
  591. }()
  592. // 生成临时文件
  593. //dateDir := time.Now().Format("20060102")
  594. //uploadDir := + "./static/ai/article/" + dateDir
  595. uploadDir := "./static/ai/abstract"
  596. err = os.MkdirAll(uploadDir, utils.DIR_MOD)
  597. if err != nil {
  598. err = fmt.Errorf("存储目录创建失败,Err:" + err.Error())
  599. return
  600. }
  601. fileName := utils.RemoveSpecialChars(wechatArticleItem.Title) + `.md`
  602. tmpFilePath := uploadDir + "/" + fileName
  603. err = utils.SaveToFile(abstractItem.Content, tmpFilePath)
  604. if err != nil {
  605. err = fmt.Errorf("生成临时文件失败,Err:" + err.Error())
  606. return
  607. }
  608. defer func() {
  609. os.Remove(tmpFilePath)
  610. }()
  611. knowledgeArticleName := models.BusinessConfMap[models.KnowledgeBaseName]
  612. // 上传临时文件到LLM
  613. uploadFileResp, err := llm.UploadDocsToKnowledge(tmpFilePath, knowledgeArticleName)
  614. if err != nil {
  615. err = fmt.Errorf("上传文章原文到知识库失败,Err:" + err.Error())
  616. return
  617. }
  618. if len(uploadFileResp.FailedFiles) > 0 {
  619. for _, v := range uploadFileResp.FailedFiles {
  620. err = fmt.Errorf("上传文章原文到知识库失败,Err:" + v)
  621. }
  622. }
  623. abstractItem.VectorKey = tmpFilePath
  624. abstractItem.ModifyTime = time.Now()
  625. err = abstractItem.Update([]string{"vector_key", "modify_time"})
  626. }
  627. // replaceWechatPlatformPic
  628. // @Description: 替换公众号头像
  629. // @author: Roc
  630. // @datetime 2025-03-11 09:38:24
  631. // @param item *rag.WechatPlatform
  632. func replaceWechatPlatformPic(item *rag.WechatPlatform) {
  633. var err error
  634. defer func() {
  635. if err != nil {
  636. utils.FileLog.Error("替换公众号头像失败,err:%v", err)
  637. fmt.Println("替换公众号头像失败,err:", err)
  638. }
  639. }()
  640. if item.RoundHeadImg == `` {
  641. return
  642. }
  643. resourceUrl, err := downloadWxPicAndUploadToOss(item.RoundHeadImg, `head_img`)
  644. if err != nil {
  645. return
  646. }
  647. item.RoundHeadImg = resourceUrl
  648. err = item.Update([]string{"round_head_img"})
  649. }
  650. // replaceWechatArticleCoverPic
  651. // @Description: 替换文章封面图
  652. // @author: Roc
  653. // @datetime 2025-03-11 09:38:35
  654. // @param item *rag.WechatArticle
  655. func replaceWechatArticleCoverPic(item *rag.WechatArticle) {
  656. var err error
  657. defer func() {
  658. if err != nil {
  659. utils.FileLog.Error("替换公众号头像失败,err:%v", err)
  660. fmt.Println("替换公众号头像失败,err:", err)
  661. }
  662. // 数据入ES库
  663. AddOrEditEsWechatArticle(item.WechatArticleId)
  664. }()
  665. if item.CoverUrl == `` {
  666. return
  667. }
  668. resourceUrl, err := downloadWxPicAndUploadToOss(item.CoverUrl, `cover_url`)
  669. if err != nil {
  670. return
  671. }
  672. item.CoverUrl = resourceUrl
  673. err = item.Update([]string{"cover_url"})
  674. }
  675. // replaceWechatArticlePic
  676. // @Description: 替换文章内容图
  677. // @author: Roc
  678. // @datetime 2025-03-11 09:38:35
  679. // @param item *rag.WechatArticle
  680. func ReplaceWechatArticlePic(item *rag.WechatArticle) {
  681. var err error
  682. defer func() {
  683. if err != nil {
  684. utils.FileLog.Error("替换公众号头像失败,err:%v", err)
  685. fmt.Println("替换公众号头像失败,err:", err)
  686. }
  687. }()
  688. if item.Content == `` {
  689. return
  690. }
  691. content, err := ReplaceHtmlImg(html.UnescapeString(item.Content))
  692. if err != nil {
  693. return
  694. }
  695. item.Content = html.EscapeString(content)
  696. err = item.Update([]string{"content"})
  697. return
  698. }
  699. // downloadWxPicAndUploadToOss
  700. // @Description: 下载微信图片并上传到OSS
  701. // @author: Roc
  702. // @datetime 2025-03-11 09:28:49
  703. // @param wxPicUrl string
  704. // @return resourceUrl string
  705. // @return err error
  706. func downloadWxPicAndUploadToOss(wxPicUrl, source string) (resourceUrl string, err error) {
  707. localFilePath, err := utils.DownloadWxImage(wxPicUrl)
  708. if err != nil {
  709. return
  710. }
  711. defer func() {
  712. os.Remove(localFilePath)
  713. }()
  714. ossClient := NewOssClient()
  715. if ossClient == nil {
  716. err = fmt.Errorf(`初始化OSS服务失败`)
  717. return
  718. }
  719. ext := path.Ext(localFilePath)
  720. fileName := fmt.Sprintf(`%s%s%s`, time.Now().Format(utils.FormatShortDateTimeUnSpace), utils.GetRandStringNoSpecialChar(16), ext)
  721. //savePath := utils.UploadDir + `wx/wx_article/` + time.Now().Format("200601/20060102/") + fileName
  722. savePath := fmt.Sprintf(`%swx/%s/%s%s`, utils.UploadDir, source, time.Now().Format("200601/20060102/"), fileName)
  723. resourceUrl, err = ossClient.UploadFile(fileName, localFilePath, savePath)
  724. if err != nil {
  725. err = fmt.Errorf("文件上传失败,Err:" + err.Error())
  726. return
  727. }
  728. return
  729. }
  730. // ReplaceHtmlImg
  731. // @Description: 将html中的图片替换成自己的
  732. // @author: Roc
  733. // @datetime 2025-03-11 14:32:00
  734. // @param htmlStr string
  735. // @return newHtml string
  736. // @return err error
  737. func ReplaceHtmlImg(htmlStr string) (newHtml string, err error) {
  738. doc, err := html2.Parse(strings.NewReader(htmlStr))
  739. if err != nil {
  740. return
  741. }
  742. if err != nil {
  743. return
  744. }
  745. handleNode(doc)
  746. // 将处理后的HTML节点重新渲染为HTML字符串
  747. var buf bytes.Buffer
  748. if err = html2.Render(&buf, doc); err != nil {
  749. fmt.Println(err)
  750. return
  751. }
  752. newHtml = buf.String()
  753. return
  754. }
  755. // handleNode
  756. // @Description: html节点处理
  757. // @author: Roc
  758. // @datetime 2025-03-11 14:32:45
  759. // @param n *html2.Node
  760. func handleNode(n *html2.Node) {
  761. if n.Type == html2.ElementNode {
  762. if n.Data == "img" {
  763. for k, attr := range n.Attr {
  764. // 新增代码:如果标签是img且存在data-src属性,则将data-src的值赋给src
  765. if n.Data == "img" && attr.Key == "src" {
  766. resourceUrl, tmpErr := downloadWxPicAndUploadToOss(attr.Val, `article`)
  767. if tmpErr != nil {
  768. continue
  769. }
  770. attr.Val = resourceUrl
  771. }
  772. n.Attr[k] = attr
  773. }
  774. }
  775. }
  776. for c := n.FirstChild; c != nil; c = c.NextSibling {
  777. handleNode(c)
  778. }
  779. }
  780. // AddOrEditEsWechatPlatformId
  781. // @Description: 批量处理某个公众号下的文章到ES
  782. // @author: Roc
  783. // @datetime 2025-03-13 11:01:28
  784. // @param articleId int
  785. func AddOrEditEsWechatPlatformId(wechatPlatformId int) {
  786. if utils.EsWechatArticleName == `` {
  787. return
  788. }
  789. obj := rag.WechatArticle{}
  790. list, _ := obj.GetListByCondition(` wechat_article_id `, ` AND wechat_platform_id = ? `, []interface{}{wechatPlatformId}, 0, 1000000)
  791. for _, item := range list {
  792. AddOrEditEsWechatArticle(item.WechatArticleId)
  793. }
  794. }
  795. // AddOrEditEsWechatArticle
  796. // @Description: 新增/编辑微信文章入ES
  797. // @author: Roc
  798. // @datetime 2025-03-13 11:01:28
  799. // @param articleId int
  800. func AddOrEditEsWechatArticle(articleId int) {
  801. if utils.EsWechatArticleName == `` {
  802. return
  803. }
  804. var err error
  805. defer func() {
  806. if err != nil {
  807. utils.FileLog.Error("添加公众号微信信息到ES失败,err:%v", err)
  808. fmt.Println("添加公众号微信信息到ES失败,err:", err)
  809. }
  810. }()
  811. obj := rag.WechatArticle{}
  812. articleInfo, err := obj.GetById(articleId)
  813. if err != nil {
  814. err = fmt.Errorf("获取公众号文章信息失败,Err:" + err.Error())
  815. return
  816. }
  817. platformObj := rag.WechatPlatform{}
  818. platformInfo, err := platformObj.GetById(articleInfo.WechatPlatformId)
  819. if err != nil {
  820. err = fmt.Errorf("获取公众号平台信息失败,Err:" + err.Error())
  821. return
  822. }
  823. esItem := elastic.WechatArticleAndPlatform{
  824. WechatArticleId: articleInfo.WechatArticleId,
  825. WechatPlatformId: articleInfo.WechatPlatformId,
  826. FakeId: articleInfo.FakeId,
  827. Title: articleInfo.Title,
  828. Link: articleInfo.Link,
  829. CoverUrl: articleInfo.CoverUrl,
  830. Description: articleInfo.Description,
  831. //Content: articleInfo.Content,
  832. //TextContent: articleInfo.TextContent,
  833. //AbstractStatus: articleInfo.AbstractStatus,
  834. Country: articleInfo.Country,
  835. Province: articleInfo.Province,
  836. City: articleInfo.City,
  837. ArticleCreateTime: articleInfo.ArticleCreateTime,
  838. IsDeleted: articleInfo.IsDeleted,
  839. ModifyTime: articleInfo.ModifyTime,
  840. CreateTime: articleInfo.CreateTime,
  841. Nickname: platformInfo.Nickname,
  842. Alias: platformInfo.Alias,
  843. RoundHeadImg: platformInfo.RoundHeadImg,
  844. }
  845. err = elastic.WechatArticleEsAddOrEdit(strconv.Itoa(articleInfo.WechatArticleId), esItem)
  846. }
  847. // AddOrEditEsWechatArticleAbstract
  848. // @Description: 新增/编辑微信文章摘要入ES
  849. // @author: Roc
  850. // @datetime 2025-03-13 14:13:47
  851. // @param articleAbstractId int
  852. func AddOrEditEsWechatArticleAbstract(articleAbstractId int) {
  853. if utils.EsWechatArticleAbstractName == `` {
  854. return
  855. }
  856. var err error
  857. defer func() {
  858. if err != nil {
  859. utils.FileLog.Error("添加公众号微信信息到ES失败,err:%v", err)
  860. fmt.Println("添加公众号微信信息到ES失败,err:", err)
  861. }
  862. }()
  863. obj := rag.WechatArticleAbstract{}
  864. abstractInfo, err := obj.GetById(articleAbstractId)
  865. if err != nil {
  866. err = fmt.Errorf("获取公众号文章信息失败,Err:" + err.Error())
  867. return
  868. }
  869. articleObj := rag.WechatArticle{}
  870. articleInfo, err := articleObj.GetById(abstractInfo.WechatArticleId)
  871. if err != nil {
  872. err = fmt.Errorf("获取公众号文章信息失败,Err:" + err.Error())
  873. return
  874. }
  875. // 公众号平台关联的标签品种
  876. tagObj := rag.WechatPlatformTagMapping{}
  877. tagMappingList, err := tagObj.GetListByCondition(` AND wechat_platform_id = ? `, []interface{}{articleInfo.WechatPlatformId}, 0, 10000)
  878. if err != nil {
  879. err = fmt.Errorf("获取公众号平台关联的品种信息失败,Err:" + err.Error())
  880. return
  881. }
  882. tagIdList := make([]int, 0)
  883. for _, v := range tagMappingList {
  884. tagIdList = append(tagIdList, v.TagId)
  885. }
  886. esItem := elastic.WechatArticleAbstractItem{
  887. WechatArticleAbstractId: abstractInfo.WechatArticleAbstractId,
  888. WechatArticleId: abstractInfo.WechatArticleId,
  889. WechatPlatformId: articleInfo.WechatPlatformId,
  890. Abstract: abstractInfo.Content,
  891. Version: abstractInfo.Version,
  892. VectorKey: abstractInfo.VectorKey,
  893. ModifyTime: articleInfo.ModifyTime,
  894. CreateTime: articleInfo.CreateTime,
  895. Title: articleInfo.Title,
  896. Link: articleInfo.Link,
  897. TagIdList: tagIdList,
  898. }
  899. err = elastic.WechatArticleAbstractEsAddOrEdit(strconv.Itoa(articleAbstractId), esItem)
  900. }
  901. // DelEsWechatArticleAbstract
  902. // @Description: 删除ES中的微信文章摘要
  903. // @author: Roc
  904. // @datetime 2025-03-13 14:13:47
  905. // @param articleAbstractId int
  906. func DelEsWechatArticleAbstract(articleAbstractId int) {
  907. if utils.EsWechatArticleAbstractName == `` {
  908. return
  909. }
  910. var err error
  911. defer func() {
  912. if err != nil {
  913. utils.FileLog.Error("添加公众号微信信息到ES失败,err:%v", err)
  914. fmt.Println("添加公众号微信信息到ES失败,err:", err)
  915. }
  916. }()
  917. err = elastic.WechatArticleAbstractEsDel(strconv.Itoa(articleAbstractId))
  918. }
  919. // AddOrEditEsRagQuestion
  920. // @Description: 新增/编辑知识库问题入ES
  921. // @author: Roc
  922. // @datetime 2025-03-28 11:25:50
  923. // @param questionId int
  924. func AddOrEditEsRagQuestion(questionId int) {
  925. if utils.EsWechatArticleName == `` {
  926. return
  927. }
  928. var err error
  929. defer func() {
  930. if err != nil {
  931. utils.FileLog.Error("添加公众号微信信息到ES失败,err:%v", err)
  932. fmt.Println("添加公众号微信信息到ES失败,err:", err)
  933. }
  934. }()
  935. obj := rag.Question{}
  936. questionInfo, err := obj.GetByID(questionId)
  937. if err != nil {
  938. err = fmt.Errorf("获取公众号文章信息失败,Err:" + err.Error())
  939. return
  940. }
  941. esItem := elastic.RagQuestionItem{
  942. QuestionId: questionInfo.QuestionId,
  943. QuestionTitle: questionInfo.QuestionTitle,
  944. QuestionContent: questionInfo.QuestionContent,
  945. Sort: questionInfo.Sort,
  946. SysUserId: questionInfo.SysUserId,
  947. SysUserRealName: questionInfo.SysUserRealName,
  948. ModifyTime: questionInfo.ModifyTime,
  949. CreateTime: questionInfo.CreateTime,
  950. }
  951. err = elastic.RagQuestionEsAddOrEdit(strconv.Itoa(questionInfo.QuestionId), esItem)
  952. }
  953. // DelEsRagQuestion
  954. // @Description: 删除ES中的知识库问题
  955. // @author: Roc
  956. // @datetime 2025-03-28 11:26:40
  957. // @param questionId int
  958. func DelEsRagQuestion(questionId int) {
  959. if utils.EsWechatArticleAbstractName == `` {
  960. return
  961. }
  962. var err error
  963. defer func() {
  964. if err != nil {
  965. utils.FileLog.Error("添加公众号微信信息到ES失败,err:%v", err)
  966. fmt.Println("添加公众号微信信息到ES失败,err:", err)
  967. }
  968. }()
  969. err = elastic.RagQuestionEsDel(strconv.Itoa(questionId))
  970. }