wechat_platform.go 40 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430
  1. package services
  2. import (
  3. "bytes"
  4. "encoding/json"
  5. "errors"
  6. "eta/eta_api/cache"
  7. "eta/eta_api/models"
  8. "eta/eta_api/models/rag"
  9. "eta/eta_api/services/elastic"
  10. "eta/eta_api/services/llm"
  11. "eta/eta_api/services/llm/facade"
  12. "eta/eta_api/utils"
  13. "eta/eta_api/utils/llm/eta_llm/eta_llm_http"
  14. "fmt"
  15. html2 "golang.org/x/net/html"
  16. "html"
  17. "os"
  18. "path"
  19. "regexp"
  20. "strconv"
  21. "strings"
  22. "time"
  23. )
  24. // AddWechatPlatform
  25. // @Description: 添加新的公众号
  26. // @param item
  27. func AddWechatPlatform(item *rag.WechatPlatform) {
  28. var err error
  29. defer func() {
  30. if err != nil {
  31. utils.FileLog.Error("公众号入库后查找最新记录失败,err:%v", err)
  32. }
  33. }()
  34. if item.FakeId != `` {
  35. return
  36. }
  37. if item.ArticleLink == `` {
  38. return
  39. }
  40. articleLink := item.ArticleLink
  41. articleDetail, err := llm.SearchByWechatArticle(item.ArticleLink)
  42. if err != nil {
  43. return
  44. }
  45. if articleDetail.Appuin == `` {
  46. err = fmt.Errorf("文章内未匹配到公众号唯一标识")
  47. return
  48. }
  49. wechatPlatform := new(rag.WechatPlatform)
  50. // 查找是否存在这个公众号id的
  51. wechatPlatformInfo, tmpErr := wechatPlatform.GetByFakeID(articleDetail.Appuin)
  52. if tmpErr != nil && !utils.IsErrNoRow(tmpErr) {
  53. err = tmpErr
  54. return
  55. }
  56. if tmpErr == nil {
  57. // 如果找到了,那么需要将当前的给移除掉
  58. err = item.Del()
  59. if err != nil {
  60. return
  61. }
  62. // 并将查出来的微信公众号摘出来的数据重新赋值
  63. item = wechatPlatformInfo
  64. } else if utils.IsErrNoRow(tmpErr) {
  65. // 如果没找到,那么就变更当前的信息
  66. item.FakeId = articleDetail.Appuin
  67. item.Nickname = articleDetail.Nickname
  68. //item.Alias = req.Alias
  69. item.RoundHeadImg = articleDetail.RoundHeadImg
  70. //item.ServiceType = req.ServiceType
  71. item.Signature = articleDetail.ProfileSignature
  72. //item.Verified = verified
  73. item.ModifyTime = time.Now()
  74. err = item.Update([]string{rag.WechatPlatformColumns.FakeID, rag.WechatPlatformColumns.Nickname, rag.WechatPlatformColumns.RoundHeadImg, rag.WechatPlatformColumns.Signature, rag.WechatPlatformColumns.ModifyTime})
  75. if err != nil {
  76. return
  77. }
  78. // 修改公众号头像
  79. go replaceWechatPlatformPic(item)
  80. }
  81. // 把刚搜索的文章加入到文章库中
  82. AddWechatArticle(item, articleLink, articleDetail, nil)
  83. BeachAddWechatArticle(item, 10)
  84. fmt.Println("公众号入库完成")
  85. return
  86. }
  87. // AddWechatArticle
  88. // @Description: 添加公众号文章入库
  89. // @author: Roc
  90. // @datetime 2025-03-05 13:24:14
  91. // @param item *rag.WechatPlatform
  92. // @param link string
  93. // @param articleDetail WechatArticleDataResp
  94. func AddWechatArticle(item *rag.WechatPlatform, articleLink string, articleDetail llm.WechatArticleDataResp, articleMenu *llm.ArticleMenu) {
  95. var err error
  96. defer func() {
  97. if err != nil {
  98. utils.FileLog.Error("公众号文章入库失败,文章链接:%s ,err:%v", articleLink, err)
  99. }
  100. }()
  101. obj := new(rag.WechatArticle)
  102. _, err = obj.GetByLink(articleLink)
  103. if err == nil {
  104. // 文章已经入库了,不需要重复入库
  105. return
  106. }
  107. // 如果不是 ErrNoRow 的时候,那么就是查询数据库出问题了,需要直接返回
  108. if !utils.IsErrNoRow(err) {
  109. return
  110. }
  111. // 这个时候,说明数据库中没有这个文章,那么需要文章入库
  112. err = nil
  113. var publishAt time.Time
  114. if articleDetail.CreateAt != `` {
  115. createAtInt, tmpErr := strconv.Atoi(articleDetail.CreateAt)
  116. if tmpErr == nil {
  117. publishAt = time.Unix(int64(createAtInt), 1000)
  118. }
  119. } else if articleMenu != nil {
  120. publishAt = time.Unix(int64(articleMenu.UpdateTime), 1000)
  121. }
  122. content := articleDetail.HtmlContent
  123. // 图片下载下来到本地,如果成功了,那么就用新的
  124. tmpContent, err := ReplaceHtmlImg(content)
  125. if tmpContent != `` {
  126. content = tmpContent
  127. }
  128. obj = &rag.WechatArticle{
  129. WechatArticleId: 0,
  130. WechatPlatformId: item.WechatPlatformId,
  131. FakeId: item.FakeId,
  132. Title: articleDetail.Title,
  133. Link: articleLink,
  134. CoverUrl: articleDetail.CoverUrl,
  135. Description: articleDetail.Desc,
  136. Content: html.EscapeString(content),
  137. TextContent: articleDetail.TextContent,
  138. Country: articleDetail.CountryName,
  139. Province: articleDetail.ProvinceName,
  140. City: articleDetail.CityName,
  141. //Abstract: "",
  142. //ArticleCreateTime: createAt,
  143. ModifyTime: time.Now(),
  144. CreateTime: time.Now(),
  145. }
  146. if !publishAt.IsZero() {
  147. obj.ArticleCreateTime = publishAt
  148. }
  149. if articleMenu != nil {
  150. obj.Title = articleMenu.Title
  151. //obj.Link = articleMenu.Link
  152. obj.CoverUrl = articleMenu.Cover
  153. obj.Description = articleMenu.Digest
  154. }
  155. //插入的时候去增加一个自动生成dfa算法的标签
  156. err = obj.Create()
  157. llm.GenerateDfaTags(obj)
  158. // 修改文章封面图
  159. go replaceWechatArticleCoverPic(obj)
  160. // 文章入库成功后,需要将相关信息入摘要库
  161. go cache.AddWechatArticleLlmOpToCache(obj.WechatArticleId, 0, ``)
  162. }
  163. // BeachAddWechatArticle
  164. // @Description: 批量添加公众号文章
  165. // @param item
  166. // @param num
  167. // @return err
  168. func BeachAddWechatArticle(item *rag.WechatPlatform, num int) {
  169. var err error
  170. defer func() {
  171. //fmt.Println("公众号文章批量入库完成")
  172. if err != nil {
  173. utils.FileLog.Error("公众号文章批量入库失败,err:%v", err)
  174. fmt.Println("公众号文章批量入库失败,err:", err)
  175. }
  176. }()
  177. if item.FakeId == `` {
  178. return
  179. }
  180. wechatArticleObj := new(rag.WechatArticle)
  181. // 获取公众号的文章列表
  182. articleListResp, err := llm.SearchByWechatArticleList(item.FakeId, num)
  183. if err != nil {
  184. return
  185. }
  186. for _, articleMenu := range articleListResp.List {
  187. // 判断文章是否已经入库,如果已经入库了,那么就过滤,不去重复查询微信了
  188. _, err = wechatArticleObj.GetByLink(articleMenu.Link)
  189. if err == nil {
  190. // 文章已经入库了,不需要重复入库
  191. continue
  192. }
  193. if !utils.IsErrNoRow(err) {
  194. return
  195. }
  196. err = nil
  197. articleDetail, tmpErr := llm.SearchByWechatArticle(articleMenu.Link)
  198. if tmpErr != nil {
  199. err = tmpErr
  200. return
  201. }
  202. // 把刚搜索的文章加入到指标库
  203. AddWechatArticle(item, articleMenu.Link, articleDetail, &articleMenu)
  204. //time.Sleep(10 * time.Second)
  205. // 随机休眠,至少大于10s
  206. sleepTimeInt := utils.GetRandInt(10, 20)
  207. if sleepTimeInt < 10 {
  208. sleepTimeInt = 10
  209. }
  210. time.Sleep(time.Duration(sleepTimeInt) * time.Second)
  211. }
  212. return
  213. }
  214. //
  215. //// GenerateArticleAbstract
  216. //// @Description: 文章摘要生成
  217. //// @author: Roc
  218. //// @datetime 2025-03-10 16:17:53
  219. //// @param item *rag.WechatArticle
  220. //func GenerateArticleAbstract(item *rag.WechatArticle, forceGenerate bool) {
  221. // var err error
  222. // defer func() {
  223. // if err != nil {
  224. // utils.FileLog.Error("文章转临时文件失败,err:%v", err)
  225. // fmt.Println("文章转临时文件失败,err:", err)
  226. // }
  227. // }()
  228. //
  229. // // 内容为空,那就不需要生成摘要
  230. // if item.TextContent == `` {
  231. // return
  232. // }
  233. //
  234. // abstractObj := rag.WechatArticleAbstract{}
  235. // tmpAbstractItem, err := abstractObj.GetByWechatArticleId(item.WechatArticleId)
  236. // // 如果找到了,同时不是强制生成,那么就直接处理到知识库中
  237. // if err == nil && !forceGenerate {
  238. // // 摘要已经生成,不需要重复生成,只需要重新加入到向量库中
  239. // WechatArticleAbstractToKnowledge(item, tmpAbstractItem, false)
  240. //
  241. // return
  242. // }
  243. // if !utils.IsErrNoRow(err) {
  244. // return
  245. // }
  246. //
  247. // //开始对话
  248. // abstract, addArticleChatRecordList, tmpErr := getAnswerByContent(item.WechatArticleId, utils.AI_ARTICLE_SOURCE_ETA_REPORT)
  249. // if tmpErr != nil {
  250. // err = fmt.Errorf("LLM对话失败,Err:" + tmpErr.Error())
  251. // return
  252. // }
  253. //
  254. // // 添加问答记录
  255. // if len(addArticleChatRecordList) > 0 {
  256. // recordObj := rag.WechatArticleChatRecord{}
  257. // err = recordObj.CreateInBatches(addArticleChatRecordList)
  258. // if err != nil {
  259. // return
  260. // }
  261. // }
  262. //
  263. // if abstract != `` {
  264. // if abstract == `sorry` || strings.Index(abstract, `根据已知信息无法回答该问题`) == 0 {
  265. // item.AbstractStatus = 2
  266. // item.ModifyTime = time.Now()
  267. // err = item.Update([]string{"AbstractStatus", "ModifyTime"})
  268. // return
  269. // }
  270. // item.AbstractStatus = 1
  271. // item.ModifyTime = time.Now()
  272. // err = item.Update([]string{"AbstractStatus", "ModifyTime"})
  273. //
  274. // abstractItem := &rag.WechatArticleAbstract{
  275. // WechatArticleAbstractId: 0,
  276. // WechatArticleId: item.WechatArticleId,
  277. // Content: abstract,
  278. // Version: 0,
  279. // VectorKey: "",
  280. // ModifyTime: time.Now(),
  281. // CreateTime: time.Now(),
  282. // }
  283. // err = abstractItem.Create()
  284. // if err != nil {
  285. // return
  286. // }
  287. //
  288. // // 数据入ES库
  289. // go AddOrEditEsWechatArticleAbstract(abstractItem.WechatArticleAbstractId)
  290. //
  291. // WechatArticleAbstractToKnowledge(item, abstractItem, false)
  292. // }
  293. //}
  294. // GenerateArticleAbstract
  295. // @Description: 文章摘要生成(默认提示词批量生成)
  296. // @author: Roc
  297. // @datetime 2025-03-10 16:17:53
  298. // @param item *rag.WechatArticle
  299. func GenerateWechatArticleAbstract(item *rag.WechatArticle, forceGenerate bool) {
  300. var err error
  301. defer func() {
  302. if err != nil {
  303. utils.FileLog.Error("文章转临时文件失败,err:%v", err)
  304. fmt.Println("文章转临时文件失败,err:", err)
  305. }
  306. }()
  307. // 内容为空,那就不需要生成摘要
  308. if item.TextContent == `` {
  309. return
  310. }
  311. questionObj := rag.Question{}
  312. questionList, err := questionObj.GetListByCondition(``, ` AND is_default = 1 `, []interface{}{}, 0, 100)
  313. if err != nil {
  314. err = fmt.Errorf("获取问题列表失败,Err:" + err.Error())
  315. return
  316. }
  317. // 没问题就不生成了
  318. if len(questionList) <= 0 {
  319. return
  320. }
  321. for _, question := range questionList {
  322. GenerateWechatArticleAbstractByQuestion(item, question, forceGenerate)
  323. }
  324. return
  325. }
  326. // GenerateWechatArticleAbstractByQuestion
  327. // @Description: 文章摘要生成(根据提示词生成)
  328. // @author: Roc
  329. // @datetime 2025-04-24 11:23:27
  330. // @param item *rag.WechatArticle
  331. // @param question *rag.Question
  332. // @param forceGenerate bool
  333. // @return err error
  334. func GenerateWechatArticleAbstractByQuestion(item *rag.WechatArticle, question *rag.Question, forceGenerate bool) (err error) {
  335. defer func() {
  336. if err != nil {
  337. utils.FileLog.Error("文章摘要生成(根据提示词生成)失败,err:%v", err)
  338. }
  339. }()
  340. // 内容为空,那就不需要生成摘要
  341. if item.TextContent == `` {
  342. return
  343. }
  344. abstractObj := rag.WechatArticleAbstract{}
  345. abstractItem, err := abstractObj.GetByWechatArticleIdAndQuestionId(item.WechatArticleId, question.QuestionId)
  346. // 如果找到了,同时不是强制生成,那么就直接处理到知识库中
  347. if err == nil && !forceGenerate {
  348. // 摘要已经生成,不需要重复生成,只需要重新加入到向量库中
  349. WechatArticleAbstractToKnowledge(item, abstractItem, false)
  350. return
  351. }
  352. // 如果是没找到数据,那么就将报错置空
  353. if err != nil && utils.IsErrNoRow(err) {
  354. err = nil
  355. }
  356. //你现在是一名资深的期货行业分析师,请基于以下的问题进行汇总总结,如果不能正常总结出来,那么就只需要回复我:sorry
  357. questionStr := fmt.Sprintf(`%s\n%s`, `你现在是一名资深的期货行业分析师,请基于以下的问题进行汇总总结,如果不能正常总结出来,那么就只需要回复我:sorry。以下是问题:`, question.QuestionContent)
  358. //开始对话
  359. abstract, industryTags, tmpErr := getAnswerByContent(item.WechatArticleId, utils.AI_ARTICLE_SOURCE_WECHAT, questionStr)
  360. if tmpErr != nil {
  361. err = fmt.Errorf("LLM对话失败,Err:" + tmpErr.Error())
  362. return
  363. }
  364. if abstract == `` {
  365. return
  366. }
  367. if abstract == `sorry` || strings.Index(abstract, `根据已知信息无法回答该问题`) == 0 {
  368. item.AbstractStatus = 2
  369. item.ModifyTime = time.Now()
  370. err = item.Update([]string{"AbstractStatus", "ModifyTime"})
  371. return
  372. }
  373. var tagIdJsonStr string
  374. var tagNameJsonStr string
  375. // 标签ID
  376. {
  377. tagIdList := make([]int, 0)
  378. tagNameList := make([]string, 0)
  379. tagIdMap := make(map[int]bool)
  380. if abstractItem != nil && abstractItem.Tags != `` {
  381. tmpErr = json.Unmarshal([]byte(abstractItem.Tags), &tagIdList)
  382. if tmpErr != nil {
  383. utils.FileLog.Info(fmt.Sprintf("json.Unmarshal Tags 失败,标签数据:%s,Err:%s", abstractItem.Tags, tmpErr.Error()))
  384. } else {
  385. for _, tagId := range tagIdList {
  386. tagIdMap[tagId] = true
  387. }
  388. }
  389. }
  390. if abstractItem.TagsName != `` {
  391. tagNameList = strings.Split(abstractItem.TagsName, ",")
  392. }
  393. for _, tagName := range industryTags {
  394. tagId, tmpErr := GetTagIdByName(tagName)
  395. if tmpErr != nil {
  396. utils.FileLog.Info(fmt.Sprintf("获取标签ID失败,标签名称:%s,Err:%s", tagName, tmpErr.Error()))
  397. }
  398. if _, ok := tagIdMap[tagId]; !ok {
  399. tagIdList = append(tagIdList, tagId)
  400. tagNameList = append(tagNameList, tagName)
  401. tagIdMap[tagId] = true
  402. }
  403. }
  404. //for _, tagName := range varietyTags {
  405. // tagId, tmpErr := GetTagIdByName(tagName)
  406. // if tmpErr != nil {
  407. // utils.FileLog.Info(fmt.Sprintf("获取标签ID失败,标签名称:%s,Err:%s", tagName, tmpErr.Error()))
  408. // }
  409. // if _, ok := tagIdMap[tagId]; !ok {
  410. // tagIdList = append(tagIdList, tagId)
  411. // tagIdMap[tagId] = true
  412. // }
  413. //}
  414. tagIdJsonByte, tmpErr := json.Marshal(tagIdList)
  415. if tmpErr != nil {
  416. utils.FileLog.Info(fmt.Sprintf("标签ID序列化失败,Err:%s", tmpErr.Error()))
  417. } else {
  418. tagIdJsonStr = string(tagIdJsonByte)
  419. }
  420. tagNameJsonStr = strings.Join(tagNameList, `,`)
  421. }
  422. item.AbstractStatus = 1
  423. item.ModifyTime = time.Now()
  424. err = item.Update([]string{"AbstractStatus", "ModifyTime"})
  425. if abstractItem == nil || abstractItem.WechatArticleAbstractId <= 0 {
  426. abstractItem = &rag.WechatArticleAbstract{
  427. WechatArticleAbstractId: 0,
  428. WechatArticleId: item.WechatArticleId,
  429. Content: abstract,
  430. Version: 1,
  431. VectorKey: "",
  432. ModifyTime: time.Now(),
  433. CreateTime: time.Now(),
  434. QuestionId: question.QuestionId,
  435. Tags: tagIdJsonStr,
  436. TagsName: tagNameJsonStr,
  437. QuestionContent: question.QuestionContent,
  438. }
  439. err = abstractItem.Create()
  440. } else {
  441. // 添加历史记录
  442. rag.AddArticleAbstractHistoryByWechatArticleAbstract(abstractItem)
  443. abstractItem.Content = abstract
  444. abstractItem.Version++
  445. abstractItem.ModifyTime = time.Now()
  446. abstractItem.Tags = tagIdJsonStr
  447. abstractItem.TagsName = tagNameJsonStr
  448. abstractItem.QuestionContent = question.QuestionContent
  449. err = abstractItem.Update([]string{"content", "version", "modify_time", "tags", "tags_name", "question_content"})
  450. }
  451. if err != nil {
  452. return
  453. }
  454. // 数据入ES库
  455. go AddOrEditEsWechatArticleAbstract(abstractItem.WechatArticleAbstractId)
  456. WechatArticleAbstractToKnowledge(item, abstractItem, false)
  457. return
  458. }
  459. // DelDoc
  460. // @Description: 删除摘要向量库
  461. // @author: Roc
  462. // @datetime 2025-03-12 16:55:05
  463. // @param wechatArticleAbstractList []*rag.WechatArticleAbstract
  464. // @return err error
  465. func DelDoc(wechatArticleAbstractList []*rag.WechatArticleAbstract) (err error) {
  466. defer func() {
  467. if err != nil {
  468. utils.FileLog.Error("删除摘要向量库文件失败,err:%v", err)
  469. fmt.Println("删除摘要向量库文件失败,err:", err)
  470. }
  471. }()
  472. vectorKeyList := make([]string, 0)
  473. wechatArticleAbstractIdList := make([]int, 0)
  474. for _, v := range wechatArticleAbstractList {
  475. if v.VectorKey == `` {
  476. continue
  477. }
  478. vectorKeyList = append(vectorKeyList, v.VectorKey)
  479. wechatArticleAbstractIdList = append(wechatArticleAbstractIdList, v.WechatArticleAbstractId)
  480. }
  481. // 没有就不删除
  482. if len(vectorKeyList) <= 0 {
  483. return
  484. }
  485. _, err = llm.DelDocsToKnowledge(models.BusinessConfMap[models.KnowledgeBaseName], vectorKeyList)
  486. if err != nil {
  487. err = fmt.Errorf("删除LLM摘要向量库文件失败,Err:" + err.Error())
  488. return
  489. }
  490. //fmt.Println(resp)
  491. obj := rag.WechatArticleAbstract{}
  492. err = obj.DelVectorKey(wechatArticleAbstractIdList)
  493. return
  494. }
  495. // DelLlmDoc
  496. // @Description: 删除摘要向量库
  497. // @author: Roc
  498. // @datetime 2025-03-12 16:55:05
  499. // @param wechatArticleAbstractList []*rag.WechatArticleAbstract
  500. // @return err error
  501. func DelLlmDoc(vectorKeyList []string, wechatArticleAbstractIdList []int) (err error) {
  502. defer func() {
  503. if err != nil {
  504. utils.FileLog.Error("删除摘要向量库文件失败,err:%v", err)
  505. fmt.Println("删除摘要向量库文件失败,err:", err)
  506. }
  507. }()
  508. // 没有就不删除
  509. if len(vectorKeyList) <= 0 {
  510. return
  511. }
  512. _, err = llm.DelDocsToKnowledge(models.BusinessConfMap[models.KnowledgeBaseName], vectorKeyList)
  513. if err != nil {
  514. err = fmt.Errorf("删除LLM摘要向量库文件失败,Err:" + err.Error())
  515. return
  516. }
  517. //fmt.Println(resp)
  518. obj := rag.WechatArticleAbstract{}
  519. err = obj.DelVectorKey(wechatArticleAbstractIdList)
  520. return
  521. }
  522. func getAnswerByContent(articleId int, source int, questionStr string) (answer string, tagNameList []string, err error) {
  523. //addArticleChatRecordList = make([]*rag.WechatArticleChatRecord, 0)
  524. result, err := facade.AIGCBaseOnPromote(facade.AIGC{
  525. Promote: questionStr,
  526. Source: source,
  527. ArticleId: articleId,
  528. LLMModel: `deepseek-r1:32b`,
  529. })
  530. if err != nil {
  531. return
  532. }
  533. // JSON字符串转字节
  534. //answerByte, err := json.Marshal(result)
  535. //if err != nil {
  536. // return
  537. //}
  538. //originalAnswer := string(answerByte)
  539. // 提取 </think> 后面的内容
  540. thinkEndIndex := strings.Index(result.Answer, "</think>")
  541. if thinkEndIndex != -1 {
  542. answer = strings.TrimSpace(result.Answer[thinkEndIndex+len("</think>"):])
  543. } else {
  544. answer = result.Answer
  545. }
  546. answer = strings.TrimSpace(answer)
  547. // 提取标签
  548. tagNameList = extractLabels(answer)
  549. //// 待入库的数据
  550. //addArticleChatRecordList = append(addArticleChatRecordList, &rag.WechatArticleChatRecord{
  551. // WechatArticleChatRecordId: 0,
  552. // WechatArticleId: articleId,
  553. // ChatUserType: "user",
  554. // Content: questionStr,
  555. // SendTime: time.Now(),
  556. // CreatedTime: time.Now(),
  557. // UpdateTime: time.Now(),
  558. //}, &rag.WechatArticleChatRecord{
  559. // WechatArticleChatRecordId: 0,
  560. // WechatArticleId: articleId,
  561. // ChatUserType: "assistant",
  562. // Content: originalAnswer,
  563. // SendTime: time.Now(),
  564. // CreatedTime: time.Now(),
  565. // UpdateTime: time.Now(),
  566. //})
  567. return
  568. }
  569. func getAnswerByContentBak(wechatArticleId int, docId string) (answer string, addArticleChatRecordList []*rag.WechatArticleChatRecord, err error) {
  570. historyList := make([]eta_llm_http.HistoryContent, 0)
  571. addArticleChatRecordList = make([]*rag.WechatArticleChatRecord, 0)
  572. questionObj := rag.Question{}
  573. questionList, err := questionObj.GetListByCondition(``, ``, []interface{}{}, 0, 100)
  574. if err != nil {
  575. err = fmt.Errorf("获取问题列表失败,Err:" + err.Error())
  576. return
  577. }
  578. // 没问题就不生成了
  579. if len(questionList) <= 0 {
  580. return
  581. }
  582. //你现在是一名资深的期货行业分析师,请基于以下的问题进行汇总总结,如果不能正常总结出来,那么就只需要回复我:sorry
  583. questionStrList := []string{`你现在是一名资深的期货行业分析师,请基于以下的问题进行汇总总结,如果不能正常总结出来,那么就只需要回复我:sorry。以下是问题:`}
  584. for _, v := range questionList {
  585. questionStrList = append(questionStrList, v.QuestionContent)
  586. }
  587. questionStr := strings.Join(questionStrList, "\n")
  588. originalAnswer, result, err := llm.ChatByFile(docId, questionStr, historyList)
  589. fmt.Println(result)
  590. if err != nil {
  591. err = fmt.Errorf("LLM对话失败,Err:" + err.Error())
  592. return
  593. }
  594. // 提取 </think> 后面的内容
  595. thinkEndIndex := strings.Index(result.Answer, "</think>")
  596. if thinkEndIndex != -1 {
  597. answer = strings.TrimSpace(result.Answer[thinkEndIndex+len("</think>"):])
  598. } else {
  599. answer = result.Answer
  600. }
  601. answer = strings.TrimSpace(answer)
  602. // 待入库的数据
  603. addArticleChatRecordList = append(addArticleChatRecordList, &rag.WechatArticleChatRecord{
  604. WechatArticleChatRecordId: 0,
  605. WechatArticleId: wechatArticleId,
  606. ChatUserType: "user",
  607. Content: questionStr,
  608. SendTime: time.Now(),
  609. CreatedTime: time.Now(),
  610. UpdateTime: time.Now(),
  611. }, &rag.WechatArticleChatRecord{
  612. WechatArticleChatRecordId: 0,
  613. WechatArticleId: wechatArticleId,
  614. ChatUserType: "assistant",
  615. Content: originalAnswer,
  616. SendTime: time.Now(),
  617. CreatedTime: time.Now(),
  618. UpdateTime: time.Now(),
  619. })
  620. return
  621. }
  622. // ArticleToKnowledge
  623. // @Description: 原文入向量库
  624. // @author: Roc
  625. // @datetime 2025-03-10 16:13:16
  626. // @param item *rag.WechatArticle
  627. func ArticleToKnowledge(item *rag.WechatArticle) {
  628. if item.TextContent == `` {
  629. return
  630. }
  631. var err error
  632. defer func() {
  633. if err != nil {
  634. utils.FileLog.Error("上传文章原文到知识库失败,err:%v", err)
  635. fmt.Println("上传文章原文到知识库失败,err:", err)
  636. }
  637. }()
  638. // 生成临时文件
  639. //dateDir := time.Now().Format("20060102")
  640. //uploadDir := "./static/ai/article/" + dateDir
  641. uploadDir := "./static/ai/article"
  642. err = os.MkdirAll(uploadDir, utils.DIR_MOD)
  643. if err != nil {
  644. err = fmt.Errorf("存储目录创建失败,Err:" + err.Error())
  645. return
  646. }
  647. //fileName := utils.RemoveSpecialChars(item.Title) + `.md`
  648. fileName := utils.MD5(item.Title) + `.md`
  649. tmpFilePath := uploadDir + "/" + fileName
  650. err = utils.SaveToFile(item.TextContent, tmpFilePath)
  651. if err != nil {
  652. err = fmt.Errorf("生成临时文件失败,Err:" + err.Error())
  653. return
  654. }
  655. defer func() {
  656. os.Remove(tmpFilePath)
  657. }()
  658. knowledgeArticleName := models.BusinessConfMap[models.KnowledgeArticleName]
  659. // 上传临时文件到LLM
  660. uploadFileResp, err := llm.UploadDocsToKnowledge(tmpFilePath, knowledgeArticleName)
  661. if err != nil {
  662. err = fmt.Errorf("上传文章原文到知识库失败,Err:" + err.Error())
  663. return
  664. }
  665. if len(uploadFileResp.FailedFiles) > 0 {
  666. for _, v := range uploadFileResp.FailedFiles {
  667. err = fmt.Errorf("上传文章原文到知识库失败,Err:" + v)
  668. }
  669. }
  670. item.VectorKey = tmpFilePath
  671. item.ModifyTime = time.Now()
  672. err = item.Update([]string{"vector_key", "modify_time"})
  673. }
  674. // WechatArticleAbstractToKnowledge
  675. // @Description: 摘要入向量库
  676. // @author: Roc
  677. // @datetime 2025-03-10 16:14:59
  678. // @param wechatArticleItem *rag.WechatArticle
  679. // @param abstractItem *rag.WechatArticleAbstract
  680. func WechatArticleAbstractToKnowledge(wechatArticleItem *rag.WechatArticle, abstractItem *rag.WechatArticleAbstract, isReUpload bool) {
  681. if abstractItem.Content == `` {
  682. return
  683. }
  684. // 已经生成了,那就不处理了
  685. if abstractItem.VectorKey != `` && !isReUpload {
  686. return
  687. }
  688. var err error
  689. defer func() {
  690. if err != nil {
  691. utils.FileLog.Error("摘要入向量库失败,err:%v", err)
  692. fmt.Println("摘要入向量库失败,err:", err)
  693. }
  694. // 数据入ES库
  695. go AddOrEditEsWechatArticleAbstract(abstractItem.WechatArticleAbstractId)
  696. }()
  697. // 生成临时文件
  698. //dateDir := time.Now().Format("20060102")
  699. //uploadDir := + "./static/ai/article/" + dateDir
  700. uploadDir := "./static/ai/abstract"
  701. err = os.MkdirAll(uploadDir, utils.DIR_MOD)
  702. if err != nil {
  703. err = fmt.Errorf("存储目录创建失败,Err:" + err.Error())
  704. return
  705. }
  706. fileName := utils.MD5(fmt.Sprintf("%d_%d", utils.AI_ARTICLE_SOURCE_WECHAT, wechatArticleItem.WechatArticleId)) + `.md`
  707. tmpFilePath := uploadDir + "/" + fileName
  708. err = utils.SaveToFile(abstractItem.Content, tmpFilePath)
  709. if err != nil {
  710. err = fmt.Errorf("生成临时文件失败,Err:" + err.Error())
  711. return
  712. }
  713. defer func() {
  714. os.Remove(tmpFilePath)
  715. }()
  716. knowledgeArticleName := models.BusinessConfMap[models.KnowledgeBaseName]
  717. // 上传临时文件到LLM
  718. uploadFileResp, err := llm.UploadDocsToKnowledge(tmpFilePath, knowledgeArticleName)
  719. if err != nil {
  720. err = fmt.Errorf("上传文章原文到知识库失败,Err:" + err.Error())
  721. return
  722. }
  723. if len(uploadFileResp.FailedFiles) > 0 {
  724. for _, v := range uploadFileResp.FailedFiles {
  725. err = fmt.Errorf("上传文章原文到知识库失败,Err:" + v)
  726. }
  727. }
  728. abstractItem.VectorKey = tmpFilePath
  729. abstractItem.ModifyTime = time.Now()
  730. err = abstractItem.Update([]string{"vector_key", "modify_time"})
  731. }
  732. // replaceWechatPlatformPic
  733. // @Description: 替换公众号头像
  734. // @author: Roc
  735. // @datetime 2025-03-11 09:38:24
  736. // @param item *rag.WechatPlatform
  737. func replaceWechatPlatformPic(item *rag.WechatPlatform) {
  738. var err error
  739. defer func() {
  740. if err != nil {
  741. utils.FileLog.Error("替换公众号头像失败,err:%v", err)
  742. fmt.Println("替换公众号头像失败,err:", err)
  743. }
  744. }()
  745. if item.RoundHeadImg == `` {
  746. return
  747. }
  748. resourceUrl, err := downloadWxPicAndUploadToOss(item.RoundHeadImg, `head_img`)
  749. if err != nil {
  750. return
  751. }
  752. item.RoundHeadImg = resourceUrl
  753. err = item.Update([]string{"round_head_img"})
  754. }
  755. // replaceWechatArticleCoverPic
  756. // @Description: 替换文章封面图
  757. // @author: Roc
  758. // @datetime 2025-03-11 09:38:35
  759. // @param item *rag.WechatArticle
  760. func replaceWechatArticleCoverPic(item *rag.WechatArticle) {
  761. var err error
  762. defer func() {
  763. if err != nil {
  764. utils.FileLog.Error("替换公众号头像失败,err:%v", err)
  765. fmt.Println("替换公众号头像失败,err:", err)
  766. }
  767. // 数据入ES库
  768. AddOrEditEsWechatArticle(item.WechatArticleId)
  769. }()
  770. if item.CoverUrl == `` {
  771. return
  772. }
  773. resourceUrl, err := downloadWxPicAndUploadToOss(item.CoverUrl, `cover_url`)
  774. if err != nil {
  775. return
  776. }
  777. item.CoverUrl = resourceUrl
  778. err = item.Update([]string{"cover_url"})
  779. }
  780. // replaceWechatArticlePic
  781. // @Description: 替换文章内容图
  782. // @author: Roc
  783. // @datetime 2025-03-11 09:38:35
  784. // @param item *rag.WechatArticle
  785. func ReplaceWechatArticlePic(item *rag.WechatArticle) {
  786. var err error
  787. defer func() {
  788. if err != nil {
  789. utils.FileLog.Error("替换公众号头像失败,err:%v", err)
  790. fmt.Println("替换公众号头像失败,err:", err)
  791. }
  792. }()
  793. if item.Content == `` {
  794. return
  795. }
  796. content, err := ReplaceHtmlImg(html.UnescapeString(item.Content))
  797. if err != nil {
  798. return
  799. }
  800. item.Content = html.EscapeString(content)
  801. err = item.Update([]string{"content"})
  802. return
  803. }
  804. // downloadWxPicAndUploadToOss
  805. // @Description: 下载微信图片并上传到OSS
  806. // @author: Roc
  807. // @datetime 2025-03-11 09:28:49
  808. // @param wxPicUrl string
  809. // @return resourceUrl string
  810. // @return err error
  811. func downloadWxPicAndUploadToOss(wxPicUrl, source string) (resourceUrl string, err error) {
  812. localFilePath, err := utils.DownloadWxImage(wxPicUrl)
  813. if err != nil {
  814. return
  815. }
  816. defer func() {
  817. os.Remove(localFilePath)
  818. }()
  819. ossClient := NewOssClient()
  820. if ossClient == nil {
  821. err = fmt.Errorf(`初始化OSS服务失败`)
  822. return
  823. }
  824. ext := path.Ext(localFilePath)
  825. fileName := fmt.Sprintf(`%s%s%s`, time.Now().Format(utils.FormatShortDateTimeUnSpace), utils.GetRandStringNoSpecialChar(16), ext)
  826. //savePath := utils.UploadDir + `wx/wx_article/` + time.Now().Format("200601/20060102/") + fileName
  827. savePath := fmt.Sprintf(`%swx/%s/%s%s`, utils.UploadDir, source, time.Now().Format("200601/20060102/"), fileName)
  828. resourceUrl, err = ossClient.UploadFile(fileName, localFilePath, savePath)
  829. if err != nil {
  830. err = fmt.Errorf("文件上传失败,Err:" + err.Error())
  831. return
  832. }
  833. return
  834. }
  835. // ReplaceHtmlImg
  836. // @Description: 将html中的图片替换成自己的
  837. // @author: Roc
  838. // @datetime 2025-03-11 14:32:00
  839. // @param htmlStr string
  840. // @return newHtml string
  841. // @return err error
  842. func ReplaceHtmlImg(htmlStr string) (newHtml string, err error) {
  843. doc, err := html2.Parse(strings.NewReader(htmlStr))
  844. if err != nil {
  845. return
  846. }
  847. if err != nil {
  848. return
  849. }
  850. handleNode(doc)
  851. // 将处理后的HTML节点重新渲染为HTML字符串
  852. var buf bytes.Buffer
  853. if err = html2.Render(&buf, doc); err != nil {
  854. fmt.Println(err)
  855. return
  856. }
  857. newHtml = buf.String()
  858. return
  859. }
  860. // handleNode
  861. // @Description: html节点处理
  862. // @author: Roc
  863. // @datetime 2025-03-11 14:32:45
  864. // @param n *html2.Node
  865. func handleNode(n *html2.Node) {
  866. if n.Type == html2.ElementNode {
  867. if n.Data == "img" {
  868. for k, attr := range n.Attr {
  869. // 新增代码:如果标签是img且存在data-src属性,则将data-src的值赋给src
  870. if n.Data == "img" && attr.Key == "src" {
  871. resourceUrl, tmpErr := downloadWxPicAndUploadToOss(attr.Val, `article`)
  872. if tmpErr != nil {
  873. continue
  874. }
  875. attr.Val = resourceUrl
  876. }
  877. n.Attr[k] = attr
  878. }
  879. }
  880. }
  881. for c := n.FirstChild; c != nil; c = c.NextSibling {
  882. handleNode(c)
  883. }
  884. }
  885. // AddOrEditEsWechatPlatformId
  886. // @Description: 批量处理某个公众号下的文章到ES
  887. // @author: Roc
  888. // @datetime 2025-03-13 11:01:28
  889. // @param articleId int
  890. func AddOrEditEsWechatPlatformId(wechatPlatformId int) {
  891. if utils.EsWechatArticleName == `` {
  892. return
  893. }
  894. obj := rag.WechatArticle{}
  895. list, _ := obj.GetListByCondition(` wechat_article_id `, ` AND wechat_platform_id = ? `, []interface{}{wechatPlatformId}, 0, 1000000)
  896. for _, item := range list {
  897. AddOrEditEsWechatArticle(item.WechatArticleId)
  898. }
  899. }
  900. // AddOrEditEsWechatArticle
  901. // @Description: 新增/编辑微信文章入ES
  902. // @author: Roc
  903. // @datetime 2025-03-13 11:01:28
  904. // @param articleId int
  905. func AddOrEditEsWechatArticle(articleId int) {
  906. if utils.EsWechatArticleName == `` {
  907. return
  908. }
  909. var err error
  910. defer func() {
  911. if err != nil {
  912. utils.FileLog.Error("添加公众号微信信息到ES失败,err:%v", err)
  913. fmt.Println("添加公众号微信信息到ES失败,err:", err)
  914. }
  915. }()
  916. obj := rag.WechatArticle{}
  917. articleInfo, err := obj.GetById(articleId)
  918. if err != nil {
  919. err = fmt.Errorf("获取公众号文章信息失败,Err:" + err.Error())
  920. return
  921. }
  922. platformObj := rag.WechatPlatform{}
  923. platformInfo, err := platformObj.GetById(articleInfo.WechatPlatformId)
  924. if err != nil {
  925. err = fmt.Errorf("获取公众号平台信息失败,Err:" + err.Error())
  926. return
  927. }
  928. esItem := elastic.WechatArticleAndPlatform{
  929. WechatArticleId: articleInfo.WechatArticleId,
  930. WechatPlatformId: articleInfo.WechatPlatformId,
  931. FakeId: articleInfo.FakeId,
  932. Title: articleInfo.Title,
  933. Link: articleInfo.Link,
  934. CoverUrl: articleInfo.CoverUrl,
  935. Description: articleInfo.Description,
  936. //Content: articleInfo.Content,
  937. //TextContent: articleInfo.TextContent,
  938. //AbstractStatus: articleInfo.AbstractStatus,
  939. Country: articleInfo.Country,
  940. Province: articleInfo.Province,
  941. City: articleInfo.City,
  942. ArticleCreateTime: articleInfo.ArticleCreateTime,
  943. IsDeleted: articleInfo.IsDeleted,
  944. ModifyTime: articleInfo.ModifyTime,
  945. CreateTime: articleInfo.CreateTime,
  946. Nickname: platformInfo.Nickname,
  947. Alias: platformInfo.Alias,
  948. RoundHeadImg: platformInfo.RoundHeadImg,
  949. }
  950. err = elastic.WechatArticleEsAddOrEdit(strconv.Itoa(articleInfo.WechatArticleId), esItem)
  951. }
  952. // AddOrEditEsWechatArticleAbstract
  953. // @Description: 新增/编辑微信文章摘要入ES
  954. // @author: Roc
  955. // @datetime 2025-03-13 14:13:47
  956. // @param articleAbstractId int
  957. func AddOrEditEsWechatArticleAbstract(articleAbstractId int) {
  958. if utils.EsWechatArticleAbstractName == `` {
  959. return
  960. }
  961. var err error
  962. defer func() {
  963. if err != nil {
  964. utils.FileLog.Error("添加公众号微信信息到ES失败,err:%v", err)
  965. fmt.Println("添加公众号微信信息到ES失败,err:", err)
  966. }
  967. }()
  968. obj := rag.WechatArticleAbstract{}
  969. abstractInfo, err := obj.GetById(articleAbstractId)
  970. if err != nil {
  971. err = fmt.Errorf("获取公众号文章信息失败,Err:" + err.Error())
  972. return
  973. }
  974. articleObj := rag.WechatArticle{}
  975. articleInfo, err := articleObj.GetById(abstractInfo.WechatArticleId)
  976. if err != nil {
  977. err = fmt.Errorf("获取公众号文章信息失败,Err:" + err.Error())
  978. return
  979. }
  980. // 标签ID
  981. tagIdList := make([]int, 0)
  982. if abstractInfo.Tags != `` {
  983. err = json.Unmarshal([]byte(abstractInfo.Tags), &tagIdList)
  984. if err != nil {
  985. err = fmt.Errorf("报告标签ID转int失败,Err:" + err.Error())
  986. utils.FileLog.Info(fmt.Sprintf("json.Unmarshal 报告标签ID转int失败,标签数据:%s,Err:%s", abstractInfo.Tags, err.Error()))
  987. }
  988. }
  989. tagNameList := make([]string, 0)
  990. if abstractInfo.TagsName != `` {
  991. tagNameList = strings.Split(abstractInfo.TagsName, ",")
  992. }
  993. esItem := elastic.WechatArticleAbstractItem{
  994. WechatArticleAbstractId: abstractInfo.WechatArticleAbstractId,
  995. WechatArticleId: abstractInfo.WechatArticleId,
  996. WechatPlatformId: articleInfo.WechatPlatformId,
  997. Abstract: abstractInfo.Content,
  998. QuestionId: abstractInfo.QuestionId,
  999. Version: abstractInfo.Version,
  1000. VectorKey: abstractInfo.VectorKey,
  1001. ModifyTime: abstractInfo.ModifyTime,
  1002. CreateTime: abstractInfo.CreateTime,
  1003. Title: articleInfo.Title,
  1004. Link: articleInfo.Link,
  1005. TagIdList: tagIdList,
  1006. TagNameList: tagNameList,
  1007. }
  1008. err = elastic.WechatArticleAbstractEsAddOrEdit(strconv.Itoa(articleAbstractId), esItem)
  1009. }
  1010. // DelWechatArticleAbstract
  1011. // @Description: 删除微信文章摘要
  1012. // @author: Roc
  1013. // @datetime 2025-04-23 17:36:22
  1014. // @param abstractIdList []int
  1015. // @return err error
  1016. func DelWechatArticleAbstract(abstractIdList []int) (err error) {
  1017. obj := rag.WechatArticleAbstract{}
  1018. list, err := obj.GetByIdList(abstractIdList)
  1019. if err != nil {
  1020. if !utils.IsErrNoRow(err) {
  1021. err = errors.New("删除向量库失败,Err:" + err.Error())
  1022. } else {
  1023. err = nil
  1024. }
  1025. return
  1026. }
  1027. err = delWechatArticleAbstract(list)
  1028. return
  1029. }
  1030. // DelWechatArticleAbstract
  1031. // @Description: 删除微信文章摘要
  1032. // @author: Roc
  1033. // @datetime 2025-04-23 17:36:22
  1034. // @param abstractIdList []int
  1035. // @return err error
  1036. func DelWechatArticleAbstractByQuestionId(questionId int) (err error) {
  1037. obj := rag.WechatArticleAbstract{}
  1038. list, err := obj.GetListByQuestionId(questionId)
  1039. if err != nil {
  1040. if !utils.IsErrNoRow(err) {
  1041. err = errors.New("删除向量库失败,Err:" + err.Error())
  1042. } else {
  1043. err = nil
  1044. }
  1045. return
  1046. }
  1047. err = delWechatArticleAbstract(list)
  1048. return
  1049. }
  1050. // delRagEtaReportAbstract
  1051. // @Description: 删除摘要
  1052. // @author: Roc
  1053. // @datetime 2025-04-24 15:19:19
  1054. // @param list []*rag.RagEtaReportAbstract
  1055. // @return err error
  1056. func delWechatArticleAbstract(list []*rag.WechatArticleAbstract) (err error) {
  1057. obj := rag.RagEtaReportAbstract{}
  1058. vectorKeyList := make([]string, 0)
  1059. newAbstractIdList := make([]int, 0)
  1060. if len(list) > 0 {
  1061. for _, v := range list {
  1062. // 有加入到向量库,那么就加入到待删除的向量库list中
  1063. if v.VectorKey != `` {
  1064. vectorKeyList = append(vectorKeyList, v.VectorKey)
  1065. }
  1066. newAbstractIdList = append(newAbstractIdList, v.WechatArticleAbstractId)
  1067. }
  1068. }
  1069. //if !req.IsSelectAll {
  1070. // list, err := obj.GetByIdList(req.RagEtaReportAbstractIdList)
  1071. // if err != nil {
  1072. // br.Msg = "修改失败"
  1073. // br.ErrMsg = "修改失败,查找问题失败,Err:" + err.Error()
  1074. // if utils.IsErrNoRow(err) {
  1075. // br.Msg = "问题不存在"
  1076. // br.IsSendEmail = false
  1077. // }
  1078. // return
  1079. // }
  1080. // if len(list) > 0 {
  1081. // for _, v := range list {
  1082. // // 有加入到向量库,那么就加入到待删除的向量库list中
  1083. // if v.VectorKey != `` {
  1084. // vectorKeyList = append(vectorKeyList, v.VectorKey)
  1085. // }
  1086. // wechatArticleAbstractIdList = append(wechatArticleAbstractIdList, v.RagEtaReportAbstractId)
  1087. // }
  1088. // }
  1089. //} else {
  1090. // notIdMap := make(map[int]bool)
  1091. // for _, v := range req.NotRagEtaReportAbstractIdList {
  1092. // notIdMap[v] = true
  1093. // }
  1094. //
  1095. // _, list, err := getRagEtaReportAbstractList(req.KeyWord, req.TagId, 0, 100000)
  1096. // if err != nil {
  1097. // br.Msg = "修改失败"
  1098. // br.ErrMsg = "修改失败,查找问题失败,Err:" + err.Error()
  1099. // if utils.IsErrNoRow(err) {
  1100. // br.Msg = "问题不存在"
  1101. // br.IsSendEmail = false
  1102. // }
  1103. // return
  1104. // }
  1105. // if len(list) > 0 {
  1106. // for _, v := range list {
  1107. // if notIdMap[v.RagEtaReportAbstractId] {
  1108. // continue
  1109. // }
  1110. // // 有加入到向量库,那么就加入到待删除的向量库list中
  1111. // if v.VectorKey != `` {
  1112. // vectorKeyList = append(vectorKeyList, v.VectorKey)
  1113. // }
  1114. // wechatArticleAbstractIdList = append(wechatArticleAbstractIdList, v.RagEtaReportAbstractId)
  1115. // }
  1116. // }
  1117. //}
  1118. // 删除向量库
  1119. err = DelLlmDoc(vectorKeyList, newAbstractIdList)
  1120. if err != nil {
  1121. err = errors.New("删除向量库失败,Err:" + err.Error())
  1122. return
  1123. }
  1124. // 删除摘要
  1125. err = obj.DelByIdList(newAbstractIdList)
  1126. if err != nil {
  1127. err = errors.New("删除失败,Err:" + err.Error())
  1128. return
  1129. }
  1130. // 删除es数据
  1131. for _, wechatArticleAbstractId := range newAbstractIdList {
  1132. DelEsWechatArticleAbstract(wechatArticleAbstractId)
  1133. }
  1134. return
  1135. }
  1136. // DelEsWechatArticleAbstract
  1137. // @Description: 删除ES中的微信文章摘要
  1138. // @author: Roc
  1139. // @datetime 2025-03-13 14:13:47
  1140. // @param articleAbstractId int
  1141. func DelEsWechatArticleAbstract(articleAbstractId int) {
  1142. if utils.EsWechatArticleAbstractName == `` {
  1143. return
  1144. }
  1145. var err error
  1146. defer func() {
  1147. if err != nil {
  1148. utils.FileLog.Error("删除公众号微信信息到ES失败,err:%v", err)
  1149. fmt.Println("删除公众号微信信息到ES失败,err:", err)
  1150. }
  1151. }()
  1152. err = elastic.WechatArticleAbstractEsDel(strconv.Itoa(articleAbstractId))
  1153. }
  1154. // AddOrEditEsRagQuestion
  1155. // @Description: 新增/编辑知识库问题入ES
  1156. // @author: Roc
  1157. // @datetime 2025-03-28 11:25:50
  1158. // @param questionId int
  1159. func AddOrEditEsRagQuestion(questionId int) {
  1160. if utils.EsWechatArticleName == `` {
  1161. return
  1162. }
  1163. var err error
  1164. defer func() {
  1165. if err != nil {
  1166. utils.FileLog.Error("添加公众号微信信息到ES失败,err:%v", err)
  1167. fmt.Println("添加公众号微信信息到ES失败,err:", err)
  1168. }
  1169. }()
  1170. obj := rag.Question{}
  1171. questionInfo, err := obj.GetByID(questionId)
  1172. if err != nil {
  1173. err = fmt.Errorf("获取公众号文章信息失败,Err:" + err.Error())
  1174. return
  1175. }
  1176. esItem := elastic.RagQuestionItem{
  1177. QuestionId: questionInfo.QuestionId,
  1178. QuestionTitle: questionInfo.QuestionTitle,
  1179. QuestionContent: questionInfo.QuestionContent,
  1180. Sort: questionInfo.Sort,
  1181. IsDefault: questionInfo.IsDefault,
  1182. SysUserId: questionInfo.SysUserId,
  1183. SysUserRealName: questionInfo.SysUserRealName,
  1184. ModifyTime: questionInfo.ModifyTime,
  1185. CreateTime: questionInfo.CreateTime,
  1186. }
  1187. err = elastic.RagQuestionEsAddOrEdit(strconv.Itoa(questionInfo.QuestionId), esItem)
  1188. }
  1189. // DelEsRagQuestion
  1190. // @Description: 删除ES中的知识库问题
  1191. // @author: Roc
  1192. // @datetime 2025-03-28 11:26:40
  1193. // @param questionId int
  1194. func DelEsRagQuestion(questionId int) {
  1195. if utils.EsWechatArticleAbstractName == `` {
  1196. return
  1197. }
  1198. var err error
  1199. defer func() {
  1200. if err != nil {
  1201. utils.FileLog.Error("添加公众号微信信息到ES失败,err:%v", err)
  1202. fmt.Println("添加公众号微信信息到ES失败,err:", err)
  1203. }
  1204. }()
  1205. err = elastic.RagQuestionEsDel(strconv.Itoa(questionId))
  1206. }
  1207. // extractLabels
  1208. // @Description: 提取摘要中的标签并去重
  1209. // @author: Roc
  1210. // @datetime 2025-04-18 17:16:05
  1211. // @param text string
  1212. // @return industryTags []string
  1213. // @return varietyTags []string
  1214. func extractLabels(text string) (tags []string) {
  1215. reTag := regexp.MustCompile(`【([^】]*)】`)
  1216. // 提取所有标签
  1217. tagMatches := reTag.FindAllStringSubmatch(text, -1)
  1218. tagSet := make(map[string]bool)
  1219. for _, match := range tagMatches {
  1220. if len(match) > 1 {
  1221. tagSet[match[1]] = true
  1222. }
  1223. }
  1224. // 将去重后的标签转换为切片
  1225. for tag := range tagSet {
  1226. // 为空串就不处理
  1227. if tag == `` {
  1228. continue
  1229. }
  1230. tags = append(tags, tag)
  1231. }
  1232. return
  1233. }
  1234. var aiAbstractTagMap = map[string]int{}
  1235. // GetTagIdByName
  1236. // @Description: 获取标签ID
  1237. // @author: Roc
  1238. // @datetime 2025-04-18 17:25:46
  1239. // @param tagName string
  1240. // @return tagId int
  1241. // @return err error
  1242. func GetTagIdByName(tagName string) (tagId int, err error) {
  1243. tagName = strings.TrimSpace(tagName)
  1244. tagId, ok := aiAbstractTagMap[tagName]
  1245. if ok {
  1246. return
  1247. }
  1248. obj := rag.Tag{}
  1249. item, err := obj.GetByCondition(fmt.Sprintf(` AND %s = ? `, rag.TagColumns.TagName), []interface{}{tagName})
  1250. if err != nil {
  1251. if !utils.IsErrNoRow(err) {
  1252. err = fmt.Errorf("获取标签失败,Err:" + err.Error())
  1253. return
  1254. }
  1255. item = &rag.Tag{
  1256. TagId: 0,
  1257. TagName: tagName,
  1258. Sort: 0,
  1259. ModifyTime: time.Now(),
  1260. CreateTime: time.Now(),
  1261. }
  1262. err = item.Create()
  1263. if err != nil {
  1264. err = fmt.Errorf("添加标签失败,Err:" + err.Error())
  1265. return
  1266. }
  1267. }
  1268. tagId = item.TagId
  1269. aiAbstractTagMap[tagName] = tagId
  1270. return
  1271. }