wechat_platform.go 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146
  1. package services
  2. import (
  3. "bytes"
  4. "encoding/json"
  5. "eta/eta_api/cache"
  6. "eta/eta_api/models"
  7. "eta/eta_api/models/rag"
  8. "eta/eta_api/services/elastic"
  9. "eta/eta_api/services/llm"
  10. "eta/eta_api/services/llm/facade"
  11. "eta/eta_api/utils"
  12. "eta/eta_api/utils/llm/eta_llm/eta_llm_http"
  13. "fmt"
  14. html2 "golang.org/x/net/html"
  15. "html"
  16. "os"
  17. "path"
  18. "strconv"
  19. "strings"
  20. "time"
  21. )
  22. // AddWechatPlatform
  23. // @Description: 添加新的公众号
  24. // @param item
  25. func AddWechatPlatform(item *rag.WechatPlatform) {
  26. var err error
  27. defer func() {
  28. if err != nil {
  29. utils.FileLog.Error("公众号入库后查找最新记录失败,err:%v", err)
  30. }
  31. }()
  32. if item.FakeId != `` {
  33. return
  34. }
  35. if item.ArticleLink == `` {
  36. return
  37. }
  38. articleLink := item.ArticleLink
  39. articleDetail, err := llm.SearchByWechatArticle(item.ArticleLink)
  40. if err != nil {
  41. return
  42. }
  43. if articleDetail.Appuin == `` {
  44. err = fmt.Errorf("文章内未匹配到公众号唯一标识")
  45. return
  46. }
  47. wechatPlatform := new(rag.WechatPlatform)
  48. // 查找是否存在这个公众号id的
  49. wechatPlatformInfo, tmpErr := wechatPlatform.GetByFakeID(articleDetail.Appuin)
  50. if tmpErr != nil && !utils.IsErrNoRow(tmpErr) {
  51. err = tmpErr
  52. return
  53. }
  54. if tmpErr == nil {
  55. // 如果找到了,那么需要将当前的给移除掉
  56. err = item.Del()
  57. if err != nil {
  58. return
  59. }
  60. // 并将查出来的微信公众号摘出来的数据重新赋值
  61. item = wechatPlatformInfo
  62. } else if utils.IsErrNoRow(tmpErr) {
  63. // 如果没找到,那么就变更当前的信息
  64. item.FakeId = articleDetail.Appuin
  65. item.Nickname = articleDetail.Nickname
  66. //item.Alias = req.Alias
  67. item.RoundHeadImg = articleDetail.RoundHeadImg
  68. //item.ServiceType = req.ServiceType
  69. item.Signature = articleDetail.ProfileSignature
  70. //item.Verified = verified
  71. item.ModifyTime = time.Now()
  72. err = item.Update([]string{rag.WechatPlatformColumns.FakeID, rag.WechatPlatformColumns.Nickname, rag.WechatPlatformColumns.RoundHeadImg, rag.WechatPlatformColumns.Signature, rag.WechatPlatformColumns.ModifyTime})
  73. if err != nil {
  74. return
  75. }
  76. // 修改公众号头像
  77. go replaceWechatPlatformPic(item)
  78. }
  79. // 把刚搜索的文章加入到文章库中
  80. AddWechatArticle(item, articleLink, articleDetail, nil)
  81. BeachAddWechatArticle(item, 10)
  82. fmt.Println("公众号入库完成")
  83. return
  84. }
  85. // AddWechatArticle
  86. // @Description: 添加公众号文章入库
  87. // @author: Roc
  88. // @datetime 2025-03-05 13:24:14
  89. // @param item *rag.WechatPlatform
  90. // @param link string
  91. // @param articleDetail WechatArticleDataResp
  92. func AddWechatArticle(item *rag.WechatPlatform, articleLink string, articleDetail llm.WechatArticleDataResp, articleMenu *llm.ArticleMenu) {
  93. var err error
  94. defer func() {
  95. if err != nil {
  96. utils.FileLog.Error("公众号文章入库失败,文章链接:%s ,err:%v", articleLink, err)
  97. }
  98. }()
  99. obj := new(rag.WechatArticle)
  100. _, err = obj.GetByLink(articleLink)
  101. if err == nil {
  102. // 文章已经入库了,不需要重复入库
  103. return
  104. }
  105. // 如果不是 ErrNoRow 的时候,那么就是查询数据库出问题了,需要直接返回
  106. if !utils.IsErrNoRow(err) {
  107. return
  108. }
  109. // 这个时候,说明数据库中没有这个文章,那么需要文章入库
  110. err = nil
  111. var publishAt time.Time
  112. if articleDetail.CreateAt != `` {
  113. createAtInt, tmpErr := strconv.Atoi(articleDetail.CreateAt)
  114. if tmpErr == nil {
  115. publishAt = time.Unix(int64(createAtInt), 1000)
  116. }
  117. } else if articleMenu != nil {
  118. publishAt = time.Unix(int64(articleMenu.UpdateTime), 1000)
  119. }
  120. content := articleDetail.HtmlContent
  121. // 图片下载下来到本地,如果成功了,那么就用新的
  122. tmpContent, err := ReplaceHtmlImg(content)
  123. if tmpContent != `` {
  124. content = tmpContent
  125. }
  126. obj = &rag.WechatArticle{
  127. WechatArticleId: 0,
  128. WechatPlatformId: item.WechatPlatformId,
  129. FakeId: item.FakeId,
  130. Title: articleDetail.Title,
  131. Link: articleLink,
  132. CoverUrl: articleDetail.CoverUrl,
  133. Description: articleDetail.Desc,
  134. Content: html.EscapeString(content),
  135. TextContent: articleDetail.TextContent,
  136. Country: articleDetail.CountryName,
  137. Province: articleDetail.ProvinceName,
  138. City: articleDetail.CityName,
  139. //Abstract: "",
  140. //ArticleCreateTime: createAt,
  141. ModifyTime: time.Now(),
  142. CreateTime: time.Now(),
  143. }
  144. if !publishAt.IsZero() {
  145. obj.ArticleCreateTime = publishAt
  146. }
  147. if articleMenu != nil {
  148. obj.Title = articleMenu.Title
  149. //obj.Link = articleMenu.Link
  150. obj.CoverUrl = articleMenu.Cover
  151. obj.Description = articleMenu.Digest
  152. }
  153. err = obj.Create()
  154. // 修改文章封面图
  155. go replaceWechatArticleCoverPic(obj)
  156. // 文章入库成功后,需要将相关信息入摘要库
  157. go cache.AddWechatArticleLlmOpToCache(obj.WechatArticleId, ``)
  158. }
  159. // BeachAddWechatArticle
  160. // @Description: 批量添加公众号文章
  161. // @param item
  162. // @param num
  163. // @return err
  164. func BeachAddWechatArticle(item *rag.WechatPlatform, num int) {
  165. var err error
  166. defer func() {
  167. //fmt.Println("公众号文章批量入库完成")
  168. if err != nil {
  169. utils.FileLog.Error("公众号文章批量入库失败,err:%v", err)
  170. fmt.Println("公众号文章批量入库失败,err:", err)
  171. }
  172. }()
  173. if item.FakeId == `` {
  174. return
  175. }
  176. wechatArticleObj := new(rag.WechatArticle)
  177. // 获取公众号的文章列表
  178. articleListResp, err := llm.SearchByWechatArticleList(item.FakeId, num)
  179. if err != nil {
  180. return
  181. }
  182. for _, articleMenu := range articleListResp.List {
  183. // 判断文章是否已经入库,如果已经入库了,那么就过滤,不去重复查询微信了
  184. _, err = wechatArticleObj.GetByLink(articleMenu.Link)
  185. if err == nil {
  186. // 文章已经入库了,不需要重复入库
  187. continue
  188. }
  189. if !utils.IsErrNoRow(err) {
  190. return
  191. }
  192. err = nil
  193. articleDetail, tmpErr := llm.SearchByWechatArticle(articleMenu.Link)
  194. if tmpErr != nil {
  195. err = tmpErr
  196. return
  197. }
  198. // 把刚搜索的文章加入到指标库
  199. AddWechatArticle(item, articleMenu.Link, articleDetail, &articleMenu)
  200. //time.Sleep(10 * time.Second)
  201. // 随机休眠,至少大于10s
  202. sleepTimeInt := utils.GetRandInt(10, 20)
  203. if sleepTimeInt < 10 {
  204. sleepTimeInt = 10
  205. }
  206. time.Sleep(time.Duration(sleepTimeInt) * time.Second)
  207. }
  208. return
  209. }
  210. //
  211. //// GenerateArticleAbstract
  212. //// @Description: 文章摘要生成
  213. //// @author: Roc
  214. //// @datetime 2025-03-10 16:17:53
  215. //// @param item *rag.WechatArticle
  216. //func GenerateArticleAbstract(item *rag.WechatArticle, forceGenerate bool) {
  217. // var err error
  218. // defer func() {
  219. // if err != nil {
  220. // utils.FileLog.Error("文章转临时文件失败,err:%v", err)
  221. // fmt.Println("文章转临时文件失败,err:", err)
  222. // }
  223. // }()
  224. //
  225. // // 内容为空,那就不需要生成摘要
  226. // if item.TextContent == `` {
  227. // return
  228. // }
  229. //
  230. // abstractObj := rag.WechatArticleAbstract{}
  231. // tmpAbstractItem, err := abstractObj.GetByWechatArticleId(item.WechatArticleId)
  232. // // 如果找到了,同时不是强制生成,那么就直接处理到知识库中
  233. // if err == nil && !forceGenerate {
  234. // // 摘要已经生成,不需要重复生成,只需要重新加入到向量库中
  235. // WechatArticleAbstractToKnowledge(item, tmpAbstractItem, false)
  236. //
  237. // return
  238. // }
  239. // if !utils.IsErrNoRow(err) {
  240. // return
  241. // }
  242. //
  243. // //开始对话
  244. // abstract, addArticleChatRecordList, tmpErr := getAnswerByContent(item.WechatArticleId, utils.AI_ARTICLE_SOURCE_ETA_REPORT)
  245. // if tmpErr != nil {
  246. // err = fmt.Errorf("LLM对话失败,Err:" + tmpErr.Error())
  247. // return
  248. // }
  249. //
  250. // // 添加问答记录
  251. // if len(addArticleChatRecordList) > 0 {
  252. // recordObj := rag.WechatArticleChatRecord{}
  253. // err = recordObj.CreateInBatches(addArticleChatRecordList)
  254. // if err != nil {
  255. // return
  256. // }
  257. // }
  258. //
  259. // if abstract != `` {
  260. // if abstract == `sorry` || strings.Index(abstract, `根据已知信息无法回答该问题`) == 0 {
  261. // item.AbstractStatus = 2
  262. // item.ModifyTime = time.Now()
  263. // err = item.Update([]string{"AbstractStatus", "ModifyTime"})
  264. // return
  265. // }
  266. // item.AbstractStatus = 1
  267. // item.ModifyTime = time.Now()
  268. // err = item.Update([]string{"AbstractStatus", "ModifyTime"})
  269. //
  270. // abstractItem := &rag.WechatArticleAbstract{
  271. // WechatArticleAbstractId: 0,
  272. // WechatArticleId: item.WechatArticleId,
  273. // Content: abstract,
  274. // Version: 0,
  275. // VectorKey: "",
  276. // ModifyTime: time.Now(),
  277. // CreateTime: time.Now(),
  278. // }
  279. // err = abstractItem.Create()
  280. // if err != nil {
  281. // return
  282. // }
  283. //
  284. // // 数据入ES库
  285. // go AddOrEditEsWechatArticleAbstract(abstractItem.WechatArticleAbstractId)
  286. //
  287. // WechatArticleAbstractToKnowledge(item, abstractItem, false)
  288. // }
  289. //}
  290. // GenerateArticleAbstract
  291. // @Description: 文章摘要生成(默认提示词批量生成)
  292. // @author: Roc
  293. // @datetime 2025-03-10 16:17:53
  294. // @param item *rag.WechatArticle
  295. func GenerateWechatArticleAbstract(item *rag.WechatArticle, forceGenerate bool) {
  296. var err error
  297. defer func() {
  298. if err != nil {
  299. utils.FileLog.Error("文章转临时文件失败,err:%v", err)
  300. fmt.Println("文章转临时文件失败,err:", err)
  301. }
  302. }()
  303. // 内容为空,那就不需要生成摘要
  304. if item.TextContent == `` {
  305. return
  306. }
  307. questionObj := rag.Question{}
  308. questionList, err := questionObj.GetListByCondition(``, ` AND is_default = 1 `, []interface{}{}, 0, 100)
  309. if err != nil {
  310. err = fmt.Errorf("获取问题列表失败,Err:" + err.Error())
  311. return
  312. }
  313. // 没问题就不生成了
  314. if len(questionList) <= 0 {
  315. return
  316. }
  317. for _, question := range questionList {
  318. GenerateWechatArticleAbstractByQuestion(item, question, forceGenerate)
  319. }
  320. return
  321. }
  322. // GenerateArticleAbstractByQuestion
  323. // @Description: 文章摘要生成(根据提示词生成)
  324. // @author: Roc
  325. // @datetime 2025-03-10 16:17:53
  326. // @param item *rag.WechatArticle
  327. func GenerateWechatArticleAbstractByQuestion(item *rag.WechatArticle, question *rag.Question, forceGenerate bool) {
  328. var err error
  329. defer func() {
  330. if err != nil {
  331. utils.FileLog.Error("文章转临时文件失败,err:%v", err)
  332. fmt.Println("文章转临时文件失败,err:", err)
  333. }
  334. }()
  335. // 内容为空,那就不需要生成摘要
  336. if item.TextContent == `` {
  337. return
  338. }
  339. abstractObj := rag.WechatArticleAbstract{}
  340. abstractItem, err := abstractObj.GetByWechatArticleIdAndQuestionId(item.WechatArticleId, question.QuestionId)
  341. // 如果找到了,同时不是强制生成,那么就直接处理到知识库中
  342. if err == nil && !forceGenerate {
  343. // 摘要已经生成,不需要重复生成,只需要重新加入到向量库中
  344. WechatArticleAbstractToKnowledge(item, abstractItem, false)
  345. return
  346. }
  347. if !utils.IsErrNoRow(err) {
  348. return
  349. }
  350. //你现在是一名资深的期货行业分析师,请基于以下的问题进行汇总总结,如果不能正常总结出来,那么就只需要回复我:sorry
  351. questionStr := fmt.Sprintf(`%s\n%s`, `你现在是一名资深的期货行业分析师,请基于以下的问题进行汇总总结,如果不能正常总结出来,那么就只需要回复我:sorry。以下是问题:`, question.QuestionContent)
  352. //开始对话
  353. abstract, addArticleChatRecordList, tmpErr := getAnswerByContent(item.WechatArticleId, utils.AI_ARTICLE_SOURCE_ETA_REPORT, questionStr)
  354. if tmpErr != nil {
  355. err = fmt.Errorf("LLM对话失败,Err:" + tmpErr.Error())
  356. return
  357. }
  358. // 添加问答记录
  359. if len(addArticleChatRecordList) > 0 {
  360. recordObj := rag.WechatArticleChatRecord{}
  361. err = recordObj.CreateInBatches(addArticleChatRecordList)
  362. if err != nil {
  363. return
  364. }
  365. }
  366. if abstract != `` {
  367. if abstract == `sorry` || strings.Index(abstract, `根据已知信息无法回答该问题`) == 0 {
  368. item.AbstractStatus = 2
  369. item.ModifyTime = time.Now()
  370. err = item.Update([]string{"AbstractStatus", "ModifyTime"})
  371. return
  372. }
  373. item.AbstractStatus = 1
  374. item.ModifyTime = time.Now()
  375. err = item.Update([]string{"AbstractStatus", "ModifyTime"})
  376. if abstractItem == nil || abstractItem.WechatArticleAbstractId <= 0 {
  377. abstractItem = &rag.WechatArticleAbstract{
  378. WechatArticleAbstractId: 0,
  379. WechatArticleId: item.WechatArticleId,
  380. Content: abstract,
  381. Version: 1,
  382. VectorKey: "",
  383. ModifyTime: time.Now(),
  384. CreateTime: time.Now(),
  385. QuestionId: question.QuestionId,
  386. Tags: "",
  387. QuestionContent: question.QuestionContent,
  388. }
  389. err = abstractItem.Create()
  390. } else {
  391. abstractItem.Content = abstract
  392. abstractItem.Version++
  393. abstractItem.ModifyTime = time.Now()
  394. abstractItem.Tags = ""
  395. abstractItem.QuestionContent = question.QuestionContent
  396. err = abstractItem.Update([]string{"content", "version", "modify_time", "tags", "question_content"})
  397. }
  398. if err != nil {
  399. return
  400. }
  401. // 数据入ES库
  402. go AddOrEditEsWechatArticleAbstract(abstractItem.WechatArticleAbstractId)
  403. WechatArticleAbstractToKnowledge(item, abstractItem, false)
  404. }
  405. }
  406. // DelDoc
  407. // @Description: 删除摘要向量库
  408. // @author: Roc
  409. // @datetime 2025-03-12 16:55:05
  410. // @param wechatArticleAbstractList []*rag.WechatArticleAbstract
  411. // @return err error
  412. func DelDoc(wechatArticleAbstractList []*rag.WechatArticleAbstract) (err error) {
  413. defer func() {
  414. if err != nil {
  415. utils.FileLog.Error("删除摘要向量库文件失败,err:%v", err)
  416. fmt.Println("删除摘要向量库文件失败,err:", err)
  417. }
  418. }()
  419. vectorKeyList := make([]string, 0)
  420. wechatArticleAbstractIdList := make([]int, 0)
  421. for _, v := range wechatArticleAbstractList {
  422. if v.VectorKey == `` {
  423. continue
  424. }
  425. vectorKeyList = append(vectorKeyList, v.VectorKey)
  426. wechatArticleAbstractIdList = append(wechatArticleAbstractIdList, v.WechatArticleAbstractId)
  427. }
  428. // 没有就不删除
  429. if len(vectorKeyList) <= 0 {
  430. return
  431. }
  432. _, err = llm.DelDocsToKnowledge(models.BusinessConfMap[models.KnowledgeBaseName], vectorKeyList)
  433. if err != nil {
  434. err = fmt.Errorf("删除LLM摘要向量库文件失败,Err:" + err.Error())
  435. return
  436. }
  437. //fmt.Println(resp)
  438. obj := rag.WechatArticleAbstract{}
  439. err = obj.DelVectorKey(wechatArticleAbstractIdList)
  440. return
  441. }
  442. // DelLlmDoc
  443. // @Description: 删除摘要向量库
  444. // @author: Roc
  445. // @datetime 2025-03-12 16:55:05
  446. // @param wechatArticleAbstractList []*rag.WechatArticleAbstract
  447. // @return err error
  448. func DelLlmDoc(vectorKeyList []string, wechatArticleAbstractIdList []int) (err error) {
  449. defer func() {
  450. if err != nil {
  451. utils.FileLog.Error("删除摘要向量库文件失败,err:%v", err)
  452. fmt.Println("删除摘要向量库文件失败,err:", err)
  453. }
  454. }()
  455. // 没有就不删除
  456. if len(vectorKeyList) <= 0 {
  457. return
  458. }
  459. _, err = llm.DelDocsToKnowledge(models.BusinessConfMap[models.KnowledgeBaseName], vectorKeyList)
  460. if err != nil {
  461. err = fmt.Errorf("删除LLM摘要向量库文件失败,Err:" + err.Error())
  462. return
  463. }
  464. //fmt.Println(resp)
  465. obj := rag.WechatArticleAbstract{}
  466. err = obj.DelVectorKey(wechatArticleAbstractIdList)
  467. return
  468. }
  469. func getAnswerByContent(articleId int, source int, questionStr string) (answer string, addArticleChatRecordList []*rag.WechatArticleChatRecord, err error) {
  470. addArticleChatRecordList = make([]*rag.WechatArticleChatRecord, 0)
  471. result, err := facade.AIGCBaseOnPromote(facade.AIGC{
  472. Promote: questionStr,
  473. Source: source,
  474. ArticleId: articleId,
  475. LLMModel: `deepseek-r1:32b`,
  476. })
  477. if err != nil {
  478. return
  479. }
  480. // JSON字符串转字节
  481. answerByte, err := json.Marshal(result)
  482. if err != nil {
  483. return
  484. }
  485. originalAnswer := string(answerByte)
  486. // 提取 </think> 后面的内容
  487. thinkEndIndex := strings.Index(result.Answer, "</think>")
  488. if thinkEndIndex != -1 {
  489. answer = strings.TrimSpace(result.Answer[thinkEndIndex+len("</think>"):])
  490. } else {
  491. answer = result.Answer
  492. }
  493. answer = strings.TrimSpace(answer)
  494. // 待入库的数据
  495. addArticleChatRecordList = append(addArticleChatRecordList, &rag.WechatArticleChatRecord{
  496. WechatArticleChatRecordId: 0,
  497. WechatArticleId: articleId,
  498. ChatUserType: "user",
  499. Content: questionStr,
  500. SendTime: time.Now(),
  501. CreatedTime: time.Now(),
  502. UpdateTime: time.Now(),
  503. }, &rag.WechatArticleChatRecord{
  504. WechatArticleChatRecordId: 0,
  505. WechatArticleId: articleId,
  506. ChatUserType: "assistant",
  507. Content: originalAnswer,
  508. SendTime: time.Now(),
  509. CreatedTime: time.Now(),
  510. UpdateTime: time.Now(),
  511. })
  512. return
  513. }
  514. func getAnswerByContentBak(wechatArticleId int, docId string) (answer string, addArticleChatRecordList []*rag.WechatArticleChatRecord, err error) {
  515. historyList := make([]eta_llm_http.HistoryContent, 0)
  516. addArticleChatRecordList = make([]*rag.WechatArticleChatRecord, 0)
  517. questionObj := rag.Question{}
  518. questionList, err := questionObj.GetListByCondition(``, ``, []interface{}{}, 0, 100)
  519. if err != nil {
  520. err = fmt.Errorf("获取问题列表失败,Err:" + err.Error())
  521. return
  522. }
  523. // 没问题就不生成了
  524. if len(questionList) <= 0 {
  525. return
  526. }
  527. //你现在是一名资深的期货行业分析师,请基于以下的问题进行汇总总结,如果不能正常总结出来,那么就只需要回复我:sorry
  528. questionStrList := []string{`你现在是一名资深的期货行业分析师,请基于以下的问题进行汇总总结,如果不能正常总结出来,那么就只需要回复我:sorry。以下是问题:`}
  529. for _, v := range questionList {
  530. questionStrList = append(questionStrList, v.QuestionContent)
  531. }
  532. questionStr := strings.Join(questionStrList, "\n")
  533. originalAnswer, result, err := llm.ChatByFile(docId, questionStr, historyList)
  534. fmt.Println(result)
  535. if err != nil {
  536. err = fmt.Errorf("LLM对话失败,Err:" + err.Error())
  537. return
  538. }
  539. // 提取 </think> 后面的内容
  540. thinkEndIndex := strings.Index(result.Answer, "</think>")
  541. if thinkEndIndex != -1 {
  542. answer = strings.TrimSpace(result.Answer[thinkEndIndex+len("</think>"):])
  543. } else {
  544. answer = result.Answer
  545. }
  546. answer = strings.TrimSpace(answer)
  547. // 待入库的数据
  548. addArticleChatRecordList = append(addArticleChatRecordList, &rag.WechatArticleChatRecord{
  549. WechatArticleChatRecordId: 0,
  550. WechatArticleId: wechatArticleId,
  551. ChatUserType: "user",
  552. Content: questionStr,
  553. SendTime: time.Now(),
  554. CreatedTime: time.Now(),
  555. UpdateTime: time.Now(),
  556. }, &rag.WechatArticleChatRecord{
  557. WechatArticleChatRecordId: 0,
  558. WechatArticleId: wechatArticleId,
  559. ChatUserType: "assistant",
  560. Content: originalAnswer,
  561. SendTime: time.Now(),
  562. CreatedTime: time.Now(),
  563. UpdateTime: time.Now(),
  564. })
  565. return
  566. }
  567. // ArticleToKnowledge
  568. // @Description: 原文入向量库
  569. // @author: Roc
  570. // @datetime 2025-03-10 16:13:16
  571. // @param item *rag.WechatArticle
  572. func ArticleToKnowledge(item *rag.WechatArticle) {
  573. if item.TextContent == `` {
  574. return
  575. }
  576. var err error
  577. defer func() {
  578. if err != nil {
  579. utils.FileLog.Error("上传文章原文到知识库失败,err:%v", err)
  580. fmt.Println("上传文章原文到知识库失败,err:", err)
  581. }
  582. }()
  583. // 生成临时文件
  584. //dateDir := time.Now().Format("20060102")
  585. //uploadDir := "./static/ai/article/" + dateDir
  586. uploadDir := "./static/ai/article"
  587. err = os.MkdirAll(uploadDir, utils.DIR_MOD)
  588. if err != nil {
  589. err = fmt.Errorf("存储目录创建失败,Err:" + err.Error())
  590. return
  591. }
  592. //fileName := utils.RemoveSpecialChars(item.Title) + `.md`
  593. fileName := utils.MD5(item.Title) + `.md`
  594. tmpFilePath := uploadDir + "/" + fileName
  595. err = utils.SaveToFile(item.TextContent, tmpFilePath)
  596. if err != nil {
  597. err = fmt.Errorf("生成临时文件失败,Err:" + err.Error())
  598. return
  599. }
  600. defer func() {
  601. os.Remove(tmpFilePath)
  602. }()
  603. knowledgeArticleName := models.BusinessConfMap[models.KnowledgeArticleName]
  604. // 上传临时文件到LLM
  605. uploadFileResp, err := llm.UploadDocsToKnowledge(tmpFilePath, knowledgeArticleName)
  606. if err != nil {
  607. err = fmt.Errorf("上传文章原文到知识库失败,Err:" + err.Error())
  608. return
  609. }
  610. if len(uploadFileResp.FailedFiles) > 0 {
  611. for _, v := range uploadFileResp.FailedFiles {
  612. err = fmt.Errorf("上传文章原文到知识库失败,Err:" + v)
  613. }
  614. }
  615. item.VectorKey = tmpFilePath
  616. item.ModifyTime = time.Now()
  617. err = item.Update([]string{"vector_key", "modify_time"})
  618. }
  619. // WechatArticleAbstractToKnowledge
  620. // @Description: 摘要入向量库
  621. // @author: Roc
  622. // @datetime 2025-03-10 16:14:59
  623. // @param wechatArticleItem *rag.WechatArticle
  624. // @param abstractItem *rag.WechatArticleAbstract
  625. func WechatArticleAbstractToKnowledge(wechatArticleItem *rag.WechatArticle, abstractItem *rag.WechatArticleAbstract, isReUpload bool) {
  626. if abstractItem.Content == `` {
  627. return
  628. }
  629. // 已经生成了,那就不处理了
  630. if abstractItem.VectorKey != `` && !isReUpload {
  631. return
  632. }
  633. var err error
  634. defer func() {
  635. if err != nil {
  636. utils.FileLog.Error("摘要入向量库失败,err:%v", err)
  637. fmt.Println("摘要入向量库失败,err:", err)
  638. }
  639. // 数据入ES库
  640. go AddOrEditEsWechatArticleAbstract(abstractItem.WechatArticleAbstractId)
  641. }()
  642. // 生成临时文件
  643. //dateDir := time.Now().Format("20060102")
  644. //uploadDir := + "./static/ai/article/" + dateDir
  645. uploadDir := "./static/ai/abstract"
  646. err = os.MkdirAll(uploadDir, utils.DIR_MOD)
  647. if err != nil {
  648. err = fmt.Errorf("存储目录创建失败,Err:" + err.Error())
  649. return
  650. }
  651. fileName := utils.MD5(fmt.Sprintf("%d_%d", utils.AI_ARTICLE_SOURCE_WECHAT, wechatArticleItem.WechatArticleId)) + `.md`
  652. tmpFilePath := uploadDir + "/" + fileName
  653. err = utils.SaveToFile(abstractItem.Content, tmpFilePath)
  654. if err != nil {
  655. err = fmt.Errorf("生成临时文件失败,Err:" + err.Error())
  656. return
  657. }
  658. defer func() {
  659. os.Remove(tmpFilePath)
  660. }()
  661. knowledgeArticleName := models.BusinessConfMap[models.KnowledgeBaseName]
  662. // 上传临时文件到LLM
  663. uploadFileResp, err := llm.UploadDocsToKnowledge(tmpFilePath, knowledgeArticleName)
  664. if err != nil {
  665. err = fmt.Errorf("上传文章原文到知识库失败,Err:" + err.Error())
  666. return
  667. }
  668. if len(uploadFileResp.FailedFiles) > 0 {
  669. for _, v := range uploadFileResp.FailedFiles {
  670. err = fmt.Errorf("上传文章原文到知识库失败,Err:" + v)
  671. }
  672. }
  673. abstractItem.VectorKey = tmpFilePath
  674. abstractItem.ModifyTime = time.Now()
  675. err = abstractItem.Update([]string{"vector_key", "modify_time"})
  676. }
  677. // replaceWechatPlatformPic
  678. // @Description: 替换公众号头像
  679. // @author: Roc
  680. // @datetime 2025-03-11 09:38:24
  681. // @param item *rag.WechatPlatform
  682. func replaceWechatPlatformPic(item *rag.WechatPlatform) {
  683. var err error
  684. defer func() {
  685. if err != nil {
  686. utils.FileLog.Error("替换公众号头像失败,err:%v", err)
  687. fmt.Println("替换公众号头像失败,err:", err)
  688. }
  689. }()
  690. if item.RoundHeadImg == `` {
  691. return
  692. }
  693. resourceUrl, err := downloadWxPicAndUploadToOss(item.RoundHeadImg, `head_img`)
  694. if err != nil {
  695. return
  696. }
  697. item.RoundHeadImg = resourceUrl
  698. err = item.Update([]string{"round_head_img"})
  699. }
  700. // replaceWechatArticleCoverPic
  701. // @Description: 替换文章封面图
  702. // @author: Roc
  703. // @datetime 2025-03-11 09:38:35
  704. // @param item *rag.WechatArticle
  705. func replaceWechatArticleCoverPic(item *rag.WechatArticle) {
  706. var err error
  707. defer func() {
  708. if err != nil {
  709. utils.FileLog.Error("替换公众号头像失败,err:%v", err)
  710. fmt.Println("替换公众号头像失败,err:", err)
  711. }
  712. // 数据入ES库
  713. AddOrEditEsWechatArticle(item.WechatArticleId)
  714. }()
  715. if item.CoverUrl == `` {
  716. return
  717. }
  718. resourceUrl, err := downloadWxPicAndUploadToOss(item.CoverUrl, `cover_url`)
  719. if err != nil {
  720. return
  721. }
  722. item.CoverUrl = resourceUrl
  723. err = item.Update([]string{"cover_url"})
  724. }
  725. // replaceWechatArticlePic
  726. // @Description: 替换文章内容图
  727. // @author: Roc
  728. // @datetime 2025-03-11 09:38:35
  729. // @param item *rag.WechatArticle
  730. func ReplaceWechatArticlePic(item *rag.WechatArticle) {
  731. var err error
  732. defer func() {
  733. if err != nil {
  734. utils.FileLog.Error("替换公众号头像失败,err:%v", err)
  735. fmt.Println("替换公众号头像失败,err:", err)
  736. }
  737. }()
  738. if item.Content == `` {
  739. return
  740. }
  741. content, err := ReplaceHtmlImg(html.UnescapeString(item.Content))
  742. if err != nil {
  743. return
  744. }
  745. item.Content = html.EscapeString(content)
  746. err = item.Update([]string{"content"})
  747. return
  748. }
  749. // downloadWxPicAndUploadToOss
  750. // @Description: 下载微信图片并上传到OSS
  751. // @author: Roc
  752. // @datetime 2025-03-11 09:28:49
  753. // @param wxPicUrl string
  754. // @return resourceUrl string
  755. // @return err error
  756. func downloadWxPicAndUploadToOss(wxPicUrl, source string) (resourceUrl string, err error) {
  757. localFilePath, err := utils.DownloadWxImage(wxPicUrl)
  758. if err != nil {
  759. return
  760. }
  761. defer func() {
  762. os.Remove(localFilePath)
  763. }()
  764. ossClient := NewOssClient()
  765. if ossClient == nil {
  766. err = fmt.Errorf(`初始化OSS服务失败`)
  767. return
  768. }
  769. ext := path.Ext(localFilePath)
  770. fileName := fmt.Sprintf(`%s%s%s`, time.Now().Format(utils.FormatShortDateTimeUnSpace), utils.GetRandStringNoSpecialChar(16), ext)
  771. //savePath := utils.UploadDir + `wx/wx_article/` + time.Now().Format("200601/20060102/") + fileName
  772. savePath := fmt.Sprintf(`%swx/%s/%s%s`, utils.UploadDir, source, time.Now().Format("200601/20060102/"), fileName)
  773. resourceUrl, err = ossClient.UploadFile(fileName, localFilePath, savePath)
  774. if err != nil {
  775. err = fmt.Errorf("文件上传失败,Err:" + err.Error())
  776. return
  777. }
  778. return
  779. }
  780. // ReplaceHtmlImg
  781. // @Description: 将html中的图片替换成自己的
  782. // @author: Roc
  783. // @datetime 2025-03-11 14:32:00
  784. // @param htmlStr string
  785. // @return newHtml string
  786. // @return err error
  787. func ReplaceHtmlImg(htmlStr string) (newHtml string, err error) {
  788. doc, err := html2.Parse(strings.NewReader(htmlStr))
  789. if err != nil {
  790. return
  791. }
  792. if err != nil {
  793. return
  794. }
  795. handleNode(doc)
  796. // 将处理后的HTML节点重新渲染为HTML字符串
  797. var buf bytes.Buffer
  798. if err = html2.Render(&buf, doc); err != nil {
  799. fmt.Println(err)
  800. return
  801. }
  802. newHtml = buf.String()
  803. return
  804. }
  805. // handleNode
  806. // @Description: html节点处理
  807. // @author: Roc
  808. // @datetime 2025-03-11 14:32:45
  809. // @param n *html2.Node
  810. func handleNode(n *html2.Node) {
  811. if n.Type == html2.ElementNode {
  812. if n.Data == "img" {
  813. for k, attr := range n.Attr {
  814. // 新增代码:如果标签是img且存在data-src属性,则将data-src的值赋给src
  815. if n.Data == "img" && attr.Key == "src" {
  816. resourceUrl, tmpErr := downloadWxPicAndUploadToOss(attr.Val, `article`)
  817. if tmpErr != nil {
  818. continue
  819. }
  820. attr.Val = resourceUrl
  821. }
  822. n.Attr[k] = attr
  823. }
  824. }
  825. }
  826. for c := n.FirstChild; c != nil; c = c.NextSibling {
  827. handleNode(c)
  828. }
  829. }
  830. // AddOrEditEsWechatPlatformId
  831. // @Description: 批量处理某个公众号下的文章到ES
  832. // @author: Roc
  833. // @datetime 2025-03-13 11:01:28
  834. // @param articleId int
  835. func AddOrEditEsWechatPlatformId(wechatPlatformId int) {
  836. if utils.EsWechatArticleName == `` {
  837. return
  838. }
  839. obj := rag.WechatArticle{}
  840. list, _ := obj.GetListByCondition(` wechat_article_id `, ` AND wechat_platform_id = ? `, []interface{}{wechatPlatformId}, 0, 1000000)
  841. for _, item := range list {
  842. AddOrEditEsWechatArticle(item.WechatArticleId)
  843. }
  844. }
  845. // AddOrEditEsWechatArticle
  846. // @Description: 新增/编辑微信文章入ES
  847. // @author: Roc
  848. // @datetime 2025-03-13 11:01:28
  849. // @param articleId int
  850. func AddOrEditEsWechatArticle(articleId int) {
  851. if utils.EsWechatArticleName == `` {
  852. return
  853. }
  854. var err error
  855. defer func() {
  856. if err != nil {
  857. utils.FileLog.Error("添加公众号微信信息到ES失败,err:%v", err)
  858. fmt.Println("添加公众号微信信息到ES失败,err:", err)
  859. }
  860. }()
  861. obj := rag.WechatArticle{}
  862. articleInfo, err := obj.GetById(articleId)
  863. if err != nil {
  864. err = fmt.Errorf("获取公众号文章信息失败,Err:" + err.Error())
  865. return
  866. }
  867. platformObj := rag.WechatPlatform{}
  868. platformInfo, err := platformObj.GetById(articleInfo.WechatPlatformId)
  869. if err != nil {
  870. err = fmt.Errorf("获取公众号平台信息失败,Err:" + err.Error())
  871. return
  872. }
  873. esItem := elastic.WechatArticleAndPlatform{
  874. WechatArticleId: articleInfo.WechatArticleId,
  875. WechatPlatformId: articleInfo.WechatPlatformId,
  876. FakeId: articleInfo.FakeId,
  877. Title: articleInfo.Title,
  878. Link: articleInfo.Link,
  879. CoverUrl: articleInfo.CoverUrl,
  880. Description: articleInfo.Description,
  881. //Content: articleInfo.Content,
  882. //TextContent: articleInfo.TextContent,
  883. //AbstractStatus: articleInfo.AbstractStatus,
  884. Country: articleInfo.Country,
  885. Province: articleInfo.Province,
  886. City: articleInfo.City,
  887. ArticleCreateTime: articleInfo.ArticleCreateTime,
  888. IsDeleted: articleInfo.IsDeleted,
  889. ModifyTime: articleInfo.ModifyTime,
  890. CreateTime: articleInfo.CreateTime,
  891. Nickname: platformInfo.Nickname,
  892. Alias: platformInfo.Alias,
  893. RoundHeadImg: platformInfo.RoundHeadImg,
  894. }
  895. err = elastic.WechatArticleEsAddOrEdit(strconv.Itoa(articleInfo.WechatArticleId), esItem)
  896. }
  897. // AddOrEditEsWechatArticleAbstract
  898. // @Description: 新增/编辑微信文章摘要入ES
  899. // @author: Roc
  900. // @datetime 2025-03-13 14:13:47
  901. // @param articleAbstractId int
  902. func AddOrEditEsWechatArticleAbstract(articleAbstractId int) {
  903. if utils.EsWechatArticleAbstractName == `` {
  904. return
  905. }
  906. var err error
  907. defer func() {
  908. if err != nil {
  909. utils.FileLog.Error("添加公众号微信信息到ES失败,err:%v", err)
  910. fmt.Println("添加公众号微信信息到ES失败,err:", err)
  911. }
  912. }()
  913. obj := rag.WechatArticleAbstract{}
  914. abstractInfo, err := obj.GetById(articleAbstractId)
  915. if err != nil {
  916. err = fmt.Errorf("获取公众号文章信息失败,Err:" + err.Error())
  917. return
  918. }
  919. articleObj := rag.WechatArticle{}
  920. articleInfo, err := articleObj.GetById(abstractInfo.WechatArticleId)
  921. if err != nil {
  922. err = fmt.Errorf("获取公众号文章信息失败,Err:" + err.Error())
  923. return
  924. }
  925. // 公众号平台关联的标签品种
  926. tagObj := rag.WechatPlatformTagMapping{}
  927. tagMappingList, err := tagObj.GetListByCondition(` AND wechat_platform_id = ? `, []interface{}{articleInfo.WechatPlatformId}, 0, 10000)
  928. if err != nil {
  929. err = fmt.Errorf("获取公众号平台关联的品种信息失败,Err:" + err.Error())
  930. return
  931. }
  932. tagIdList := make([]int, 0)
  933. for _, v := range tagMappingList {
  934. tagIdList = append(tagIdList, v.TagId)
  935. }
  936. esItem := elastic.WechatArticleAbstractItem{
  937. WechatArticleAbstractId: abstractInfo.WechatArticleAbstractId,
  938. WechatArticleId: abstractInfo.WechatArticleId,
  939. WechatPlatformId: articleInfo.WechatPlatformId,
  940. Abstract: abstractInfo.Content,
  941. Version: abstractInfo.Version,
  942. VectorKey: abstractInfo.VectorKey,
  943. ModifyTime: articleInfo.ModifyTime,
  944. CreateTime: articleInfo.CreateTime,
  945. Title: articleInfo.Title,
  946. Link: articleInfo.Link,
  947. TagIdList: tagIdList,
  948. }
  949. err = elastic.WechatArticleAbstractEsAddOrEdit(strconv.Itoa(articleAbstractId), esItem)
  950. }
  951. // DelEsWechatArticleAbstract
  952. // @Description: 删除ES中的微信文章摘要
  953. // @author: Roc
  954. // @datetime 2025-03-13 14:13:47
  955. // @param articleAbstractId int
  956. func DelEsWechatArticleAbstract(articleAbstractId int) {
  957. if utils.EsWechatArticleAbstractName == `` {
  958. return
  959. }
  960. var err error
  961. defer func() {
  962. if err != nil {
  963. utils.FileLog.Error("添加公众号微信信息到ES失败,err:%v", err)
  964. fmt.Println("添加公众号微信信息到ES失败,err:", err)
  965. }
  966. }()
  967. err = elastic.WechatArticleAbstractEsDel(strconv.Itoa(articleAbstractId))
  968. }
  969. // AddOrEditEsRagQuestion
  970. // @Description: 新增/编辑知识库问题入ES
  971. // @author: Roc
  972. // @datetime 2025-03-28 11:25:50
  973. // @param questionId int
  974. func AddOrEditEsRagQuestion(questionId int) {
  975. if utils.EsWechatArticleName == `` {
  976. return
  977. }
  978. var err error
  979. defer func() {
  980. if err != nil {
  981. utils.FileLog.Error("添加公众号微信信息到ES失败,err:%v", err)
  982. fmt.Println("添加公众号微信信息到ES失败,err:", err)
  983. }
  984. }()
  985. obj := rag.Question{}
  986. questionInfo, err := obj.GetByID(questionId)
  987. if err != nil {
  988. err = fmt.Errorf("获取公众号文章信息失败,Err:" + err.Error())
  989. return
  990. }
  991. esItem := elastic.RagQuestionItem{
  992. QuestionId: questionInfo.QuestionId,
  993. QuestionTitle: questionInfo.QuestionTitle,
  994. QuestionContent: questionInfo.QuestionContent,
  995. Sort: questionInfo.Sort,
  996. SysUserId: questionInfo.SysUserId,
  997. SysUserRealName: questionInfo.SysUserRealName,
  998. ModifyTime: questionInfo.ModifyTime,
  999. CreateTime: questionInfo.CreateTime,
  1000. }
  1001. err = elastic.RagQuestionEsAddOrEdit(strconv.Itoa(questionInfo.QuestionId), esItem)
  1002. }
  1003. // DelEsRagQuestion
  1004. // @Description: 删除ES中的知识库问题
  1005. // @author: Roc
  1006. // @datetime 2025-03-28 11:26:40
  1007. // @param questionId int
  1008. func DelEsRagQuestion(questionId int) {
  1009. if utils.EsWechatArticleAbstractName == `` {
  1010. return
  1011. }
  1012. var err error
  1013. defer func() {
  1014. if err != nil {
  1015. utils.FileLog.Error("添加公众号微信信息到ES失败,err:%v", err)
  1016. fmt.Println("添加公众号微信信息到ES失败,err:", err)
  1017. }
  1018. }()
  1019. err = elastic.RagQuestionEsDel(strconv.Itoa(questionId))
  1020. }