wechat_platform.go 40 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435
  1. package services
  2. import (
  3. "bytes"
  4. "encoding/json"
  5. "errors"
  6. "eta/eta_api/cache"
  7. "eta/eta_api/models"
  8. "eta/eta_api/models/rag"
  9. "eta/eta_api/services/elastic"
  10. "eta/eta_api/services/llm"
  11. "eta/eta_api/services/llm/facade"
  12. "eta/eta_api/utils"
  13. "eta/eta_api/utils/llm/eta_llm/eta_llm_http"
  14. "fmt"
  15. html2 "golang.org/x/net/html"
  16. "html"
  17. "os"
  18. "path"
  19. "regexp"
  20. "strconv"
  21. "strings"
  22. "time"
  23. )
  24. // AddWechatPlatform
  25. // @Description: 添加新的公众号
  26. // @param item
  27. func AddWechatPlatform(item *rag.WechatPlatform) {
  28. var err error
  29. defer func() {
  30. if err != nil {
  31. utils.FileLog.Error("公众号入库后查找最新记录失败,err:%v", err)
  32. }
  33. }()
  34. if item.FakeId != `` {
  35. return
  36. }
  37. if item.ArticleLink == `` {
  38. return
  39. }
  40. articleLink := item.ArticleLink
  41. articleDetail, err := llm.SearchByWechatArticle(item.ArticleLink)
  42. if err != nil {
  43. return
  44. }
  45. if articleDetail.Appuin == `` {
  46. err = fmt.Errorf("文章内未匹配到公众号唯一标识")
  47. return
  48. }
  49. wechatPlatform := new(rag.WechatPlatform)
  50. // 查找是否存在这个公众号id的
  51. wechatPlatformInfo, tmpErr := wechatPlatform.GetByFakeID(articleDetail.Appuin)
  52. if tmpErr != nil && !utils.IsErrNoRow(tmpErr) {
  53. err = tmpErr
  54. return
  55. }
  56. if tmpErr == nil {
  57. // 如果找到了,那么需要将当前的给移除掉
  58. err = item.Del()
  59. if err != nil {
  60. return
  61. }
  62. // 并将查出来的微信公众号摘出来的数据重新赋值
  63. item = wechatPlatformInfo
  64. } else if utils.IsErrNoRow(tmpErr) {
  65. // 如果没找到,那么就变更当前的信息
  66. item.FakeId = articleDetail.Appuin
  67. item.Nickname = articleDetail.Nickname
  68. //item.Alias = req.Alias
  69. item.RoundHeadImg = articleDetail.RoundHeadImg
  70. //item.ServiceType = req.ServiceType
  71. item.Signature = articleDetail.ProfileSignature
  72. //item.Verified = verified
  73. item.ModifyTime = time.Now()
  74. err = item.Update([]string{rag.WechatPlatformColumns.FakeID, rag.WechatPlatformColumns.Nickname, rag.WechatPlatformColumns.RoundHeadImg, rag.WechatPlatformColumns.Signature, rag.WechatPlatformColumns.ModifyTime})
  75. if err != nil {
  76. return
  77. }
  78. // 修改公众号头像
  79. go replaceWechatPlatformPic(item)
  80. }
  81. // 把刚搜索的文章加入到文章库中
  82. AddWechatArticle(item, articleLink, articleDetail, nil)
  83. BeachAddWechatArticle(item, 10)
  84. fmt.Println("公众号入库完成")
  85. return
  86. }
  87. // AddWechatArticle
  88. // @Description: 添加公众号文章入库
  89. // @author: Roc
  90. // @datetime 2025-03-05 13:24:14
  91. // @param item *rag.WechatPlatform
  92. // @param link string
  93. // @param articleDetail WechatArticleDataResp
  94. func AddWechatArticle(item *rag.WechatPlatform, articleLink string, articleDetail llm.WechatArticleDataResp, articleMenu *llm.ArticleMenu) {
  95. var err error
  96. defer func() {
  97. if err != nil {
  98. utils.FileLog.Error("公众号文章入库失败,文章链接:%s ,err:%v", articleLink, err)
  99. }
  100. }()
  101. obj := new(rag.WechatArticle)
  102. _, err = obj.GetByLink(articleLink)
  103. if err == nil {
  104. // 文章已经入库了,不需要重复入库
  105. return
  106. }
  107. // 如果不是 ErrNoRow 的时候,那么就是查询数据库出问题了,需要直接返回
  108. if !utils.IsErrNoRow(err) {
  109. return
  110. }
  111. // 这个时候,说明数据库中没有这个文章,那么需要文章入库
  112. err = nil
  113. var publishAt time.Time
  114. if articleDetail.CreateAt != `` {
  115. createAtInt, tmpErr := strconv.Atoi(articleDetail.CreateAt)
  116. if tmpErr == nil {
  117. publishAt = time.Unix(int64(createAtInt), 1000)
  118. }
  119. } else if articleMenu != nil {
  120. publishAt = time.Unix(int64(articleMenu.UpdateTime), 1000)
  121. }
  122. content := articleDetail.HtmlContent
  123. // 图片下载下来到本地,如果成功了,那么就用新的
  124. tmpContent, err := ReplaceHtmlImg(content)
  125. if tmpContent != `` {
  126. content = tmpContent
  127. }
  128. obj = &rag.WechatArticle{
  129. WechatArticleId: 0,
  130. WechatPlatformId: item.WechatPlatformId,
  131. FakeId: item.FakeId,
  132. Title: articleDetail.Title,
  133. Link: articleLink,
  134. CoverUrl: articleDetail.CoverUrl,
  135. Description: articleDetail.Desc,
  136. Content: html.EscapeString(content),
  137. TextContent: articleDetail.TextContent,
  138. Country: articleDetail.CountryName,
  139. Province: articleDetail.ProvinceName,
  140. City: articleDetail.CityName,
  141. //Abstract: "",
  142. //ArticleCreateTime: createAt,
  143. ModifyTime: time.Now(),
  144. CreateTime: time.Now(),
  145. }
  146. if !publishAt.IsZero() {
  147. obj.ArticleCreateTime = publishAt
  148. }
  149. if articleMenu != nil {
  150. obj.Title = articleMenu.Title
  151. //obj.Link = articleMenu.Link
  152. obj.CoverUrl = articleMenu.Cover
  153. obj.Description = articleMenu.Digest
  154. }
  155. err = obj.Create()
  156. // 修改文章封面图
  157. go replaceWechatArticleCoverPic(obj)
  158. // 文章入库成功后,需要将相关信息入摘要库
  159. go cache.AddWechatArticleLlmOpToCache(obj.WechatArticleId, 0, ``)
  160. }
  161. // BeachAddWechatArticle
  162. // @Description: 批量添加公众号文章
  163. // @param item
  164. // @param num
  165. // @return err
  166. func BeachAddWechatArticle(item *rag.WechatPlatform, num int) {
  167. var err error
  168. defer func() {
  169. //fmt.Println("公众号文章批量入库完成")
  170. if err != nil {
  171. utils.FileLog.Error("公众号文章批量入库失败,err:%v", err)
  172. fmt.Println("公众号文章批量入库失败,err:", err)
  173. }
  174. }()
  175. if item.FakeId == `` {
  176. return
  177. }
  178. wechatArticleObj := new(rag.WechatArticle)
  179. // 获取公众号的文章列表
  180. articleListResp, err := llm.SearchByWechatArticleList(item.FakeId, num)
  181. if err != nil {
  182. return
  183. }
  184. for _, articleMenu := range articleListResp.List {
  185. // 判断文章是否已经入库,如果已经入库了,那么就过滤,不去重复查询微信了
  186. _, err = wechatArticleObj.GetByLink(articleMenu.Link)
  187. if err == nil {
  188. // 文章已经入库了,不需要重复入库
  189. continue
  190. }
  191. if !utils.IsErrNoRow(err) {
  192. return
  193. }
  194. err = nil
  195. articleDetail, tmpErr := llm.SearchByWechatArticle(articleMenu.Link)
  196. if tmpErr != nil {
  197. err = tmpErr
  198. return
  199. }
  200. // 把刚搜索的文章加入到指标库
  201. AddWechatArticle(item, articleMenu.Link, articleDetail, &articleMenu)
  202. //time.Sleep(10 * time.Second)
  203. // 随机休眠,至少大于10s
  204. sleepTimeInt := utils.GetRandInt(10, 20)
  205. if sleepTimeInt < 10 {
  206. sleepTimeInt = 10
  207. }
  208. time.Sleep(time.Duration(sleepTimeInt) * time.Second)
  209. }
  210. return
  211. }
  212. //
  213. //// GenerateArticleAbstract
  214. //// @Description: 文章摘要生成
  215. //// @author: Roc
  216. //// @datetime 2025-03-10 16:17:53
  217. //// @param item *rag.WechatArticle
  218. //func GenerateArticleAbstract(item *rag.WechatArticle, forceGenerate bool) {
  219. // var err error
  220. // defer func() {
  221. // if err != nil {
  222. // utils.FileLog.Error("文章转临时文件失败,err:%v", err)
  223. // fmt.Println("文章转临时文件失败,err:", err)
  224. // }
  225. // }()
  226. //
  227. // // 内容为空,那就不需要生成摘要
  228. // if item.TextContent == `` {
  229. // return
  230. // }
  231. //
  232. // abstractObj := rag.WechatArticleAbstract{}
  233. // tmpAbstractItem, err := abstractObj.GetByWechatArticleId(item.WechatArticleId)
  234. // // 如果找到了,同时不是强制生成,那么就直接处理到知识库中
  235. // if err == nil && !forceGenerate {
  236. // // 摘要已经生成,不需要重复生成,只需要重新加入到向量库中
  237. // WechatArticleAbstractToKnowledge(item, tmpAbstractItem, false)
  238. //
  239. // return
  240. // }
  241. // if !utils.IsErrNoRow(err) {
  242. // return
  243. // }
  244. //
  245. // //开始对话
  246. // abstract, addArticleChatRecordList, tmpErr := getAnswerByContent(item.WechatArticleId, utils.AI_ARTICLE_SOURCE_ETA_REPORT)
  247. // if tmpErr != nil {
  248. // err = fmt.Errorf("LLM对话失败,Err:" + tmpErr.Error())
  249. // return
  250. // }
  251. //
  252. // // 添加问答记录
  253. // if len(addArticleChatRecordList) > 0 {
  254. // recordObj := rag.WechatArticleChatRecord{}
  255. // err = recordObj.CreateInBatches(addArticleChatRecordList)
  256. // if err != nil {
  257. // return
  258. // }
  259. // }
  260. //
  261. // if abstract != `` {
  262. // if abstract == `sorry` || strings.Index(abstract, `根据已知信息无法回答该问题`) == 0 {
  263. // item.AbstractStatus = 2
  264. // item.ModifyTime = time.Now()
  265. // err = item.Update([]string{"AbstractStatus", "ModifyTime"})
  266. // return
  267. // }
  268. // item.AbstractStatus = 1
  269. // item.ModifyTime = time.Now()
  270. // err = item.Update([]string{"AbstractStatus", "ModifyTime"})
  271. //
  272. // abstractItem := &rag.WechatArticleAbstract{
  273. // WechatArticleAbstractId: 0,
  274. // WechatArticleId: item.WechatArticleId,
  275. // Content: abstract,
  276. // Version: 0,
  277. // VectorKey: "",
  278. // ModifyTime: time.Now(),
  279. // CreateTime: time.Now(),
  280. // }
  281. // err = abstractItem.Create()
  282. // if err != nil {
  283. // return
  284. // }
  285. //
  286. // // 数据入ES库
  287. // go AddOrEditEsWechatArticleAbstract(abstractItem.WechatArticleAbstractId)
  288. //
  289. // WechatArticleAbstractToKnowledge(item, abstractItem, false)
  290. // }
  291. //}
  292. // GenerateArticleAbstract
  293. // @Description: 文章摘要生成(默认提示词批量生成)
  294. // @author: Roc
  295. // @datetime 2025-03-10 16:17:53
  296. // @param item *rag.WechatArticle
  297. func GenerateWechatArticleAbstract(item *rag.WechatArticle, forceGenerate bool) {
  298. var err error
  299. defer func() {
  300. if err != nil {
  301. utils.FileLog.Error("文章转临时文件失败,err:%v", err)
  302. fmt.Println("文章转临时文件失败,err:", err)
  303. }
  304. }()
  305. // 内容为空,那就不需要生成摘要
  306. if item.TextContent == `` {
  307. return
  308. }
  309. questionObj := rag.Question{}
  310. questionList, err := questionObj.GetListByCondition(``, ` AND is_default = 1 `, []interface{}{}, 0, 100)
  311. if err != nil {
  312. err = fmt.Errorf("获取问题列表失败,Err:" + err.Error())
  313. return
  314. }
  315. // 没问题就不生成了
  316. if len(questionList) <= 0 {
  317. return
  318. }
  319. for _, question := range questionList {
  320. GenerateWechatArticleAbstractByQuestion(item, question, forceGenerate)
  321. }
  322. return
  323. }
  324. // GenerateWechatArticleAbstractByQuestion
  325. // @Description: 文章摘要生成(根据提示词生成)
  326. // @author: Roc
  327. // @datetime 2025-04-24 11:23:27
  328. // @param item *rag.WechatArticle
  329. // @param question *rag.Question
  330. // @param forceGenerate bool
  331. // @return err error
  332. func GenerateWechatArticleAbstractByQuestion(item *rag.WechatArticle, question *rag.Question, forceGenerate bool) (err error) {
  333. defer func() {
  334. if err != nil {
  335. utils.FileLog.Error("文章摘要生成(根据提示词生成)失败,err:%v", err)
  336. }
  337. }()
  338. // 内容为空,那就不需要生成摘要
  339. if item.TextContent == `` {
  340. return
  341. }
  342. abstractObj := rag.WechatArticleAbstract{}
  343. abstractItem, err := abstractObj.GetByWechatArticleIdAndQuestionId(item.WechatArticleId, question.QuestionId)
  344. // 如果找到了,同时不是强制生成,那么就直接处理到知识库中
  345. if err == nil && !forceGenerate {
  346. // 摘要已经生成,不需要重复生成,只需要重新加入到向量库中
  347. WechatArticleAbstractToKnowledge(item, abstractItem, false)
  348. return
  349. }
  350. // 如果是没找到数据,那么就将报错置空
  351. if err != nil && utils.IsErrNoRow(err) {
  352. err = nil
  353. }
  354. //你现在是一名资深的期货行业分析师,请基于以下的问题进行汇总总结,如果不能正常总结出来,那么就只需要回复我:sorry
  355. questionStr := fmt.Sprintf(`%s\n%s`, `你现在是一名资深的期货行业分析师,请基于以下的问题进行汇总总结,如果不能正常总结出来,那么就只需要回复我:sorry。以下是问题:`, question.QuestionContent)
  356. //开始对话
  357. abstract, industryTags, _, tmpErr := getAnswerByContent(item.WechatArticleId, utils.AI_ARTICLE_SOURCE_WECHAT, questionStr)
  358. if tmpErr != nil {
  359. err = fmt.Errorf("LLM对话失败,Err:" + tmpErr.Error())
  360. return
  361. }
  362. if abstract == `` {
  363. return
  364. }
  365. if abstract == `sorry` || strings.Index(abstract, `根据已知信息无法回答该问题`) == 0 {
  366. item.AbstractStatus = 2
  367. item.ModifyTime = time.Now()
  368. err = item.Update([]string{"AbstractStatus", "ModifyTime"})
  369. return
  370. }
  371. var tagIdJsonStr string
  372. var tagNameJsonStr string
  373. // 标签ID
  374. {
  375. tagIdList := make([]int, 0)
  376. tagNameList := make([]string, 0)
  377. tagIdMap := make(map[int]bool)
  378. if abstractItem != nil && abstractItem.Tags != `` {
  379. tmpErr = json.Unmarshal([]byte(abstractItem.Tags), &tagIdList)
  380. if tmpErr != nil {
  381. utils.FileLog.Info(fmt.Sprintf("json.Unmarshal Tags 失败,标签数据:%s,Err:%s", abstractItem.Tags, tmpErr.Error()))
  382. } else {
  383. for _, tagId := range tagIdList {
  384. tagIdMap[tagId] = true
  385. }
  386. }
  387. }
  388. if abstractItem.TagsName != `` {
  389. tagNameList = strings.Split(abstractItem.TagsName, ",")
  390. }
  391. for _, tagName := range industryTags {
  392. tagId, tmpErr := GetTagIdByName(tagName)
  393. if tmpErr != nil {
  394. utils.FileLog.Info(fmt.Sprintf("获取标签ID失败,标签名称:%s,Err:%s", tagName, tmpErr.Error()))
  395. }
  396. if _, ok := tagIdMap[tagId]; !ok {
  397. tagIdList = append(tagIdList, tagId)
  398. tagNameList = append(tagNameList, tagName)
  399. tagIdMap[tagId] = true
  400. }
  401. }
  402. //for _, tagName := range varietyTags {
  403. // tagId, tmpErr := GetTagIdByName(tagName)
  404. // if tmpErr != nil {
  405. // utils.FileLog.Info(fmt.Sprintf("获取标签ID失败,标签名称:%s,Err:%s", tagName, tmpErr.Error()))
  406. // }
  407. // if _, ok := tagIdMap[tagId]; !ok {
  408. // tagIdList = append(tagIdList, tagId)
  409. // tagIdMap[tagId] = true
  410. // }
  411. //}
  412. tagIdJsonByte, err := json.Marshal(tagIdList)
  413. if err != nil {
  414. utils.FileLog.Info(fmt.Sprintf("标签ID序列化失败,Err:%s", tmpErr.Error()))
  415. } else {
  416. tagIdJsonStr = string(tagIdJsonByte)
  417. }
  418. tagNameJsonStr = strings.Join(tagNameList, `,`)
  419. }
  420. item.AbstractStatus = 1
  421. item.ModifyTime = time.Now()
  422. err = item.Update([]string{"AbstractStatus", "ModifyTime"})
  423. if abstractItem == nil || abstractItem.WechatArticleAbstractId <= 0 {
  424. abstractItem = &rag.WechatArticleAbstract{
  425. WechatArticleAbstractId: 0,
  426. WechatArticleId: item.WechatArticleId,
  427. Content: abstract,
  428. Version: 1,
  429. VectorKey: "",
  430. ModifyTime: time.Now(),
  431. CreateTime: time.Now(),
  432. QuestionId: question.QuestionId,
  433. Tags: tagIdJsonStr,
  434. TagsName: tagNameJsonStr,
  435. QuestionContent: question.QuestionContent,
  436. }
  437. err = abstractItem.Create()
  438. } else {
  439. // 添加历史记录
  440. rag.AddArticleAbstractHistoryByWechatArticleAbstract(abstractItem)
  441. abstractItem.Content = abstract
  442. abstractItem.Version++
  443. abstractItem.ModifyTime = time.Now()
  444. abstractItem.Tags = tagIdJsonStr
  445. abstractItem.TagsName = tagNameJsonStr
  446. abstractItem.QuestionContent = question.QuestionContent
  447. err = abstractItem.Update([]string{"content", "version", "modify_time", "tags", "tags_name", "question_content"})
  448. }
  449. if err != nil {
  450. return
  451. }
  452. // 数据入ES库
  453. go AddOrEditEsWechatArticleAbstract(abstractItem.WechatArticleAbstractId)
  454. WechatArticleAbstractToKnowledge(item, abstractItem, false)
  455. return
  456. }
  457. // DelDoc
  458. // @Description: 删除摘要向量库
  459. // @author: Roc
  460. // @datetime 2025-03-12 16:55:05
  461. // @param wechatArticleAbstractList []*rag.WechatArticleAbstract
  462. // @return err error
  463. func DelDoc(wechatArticleAbstractList []*rag.WechatArticleAbstract) (err error) {
  464. defer func() {
  465. if err != nil {
  466. utils.FileLog.Error("删除摘要向量库文件失败,err:%v", err)
  467. fmt.Println("删除摘要向量库文件失败,err:", err)
  468. }
  469. }()
  470. vectorKeyList := make([]string, 0)
  471. wechatArticleAbstractIdList := make([]int, 0)
  472. for _, v := range wechatArticleAbstractList {
  473. if v.VectorKey == `` {
  474. continue
  475. }
  476. vectorKeyList = append(vectorKeyList, v.VectorKey)
  477. wechatArticleAbstractIdList = append(wechatArticleAbstractIdList, v.WechatArticleAbstractId)
  478. }
  479. // 没有就不删除
  480. if len(vectorKeyList) <= 0 {
  481. return
  482. }
  483. _, err = llm.DelDocsToKnowledge(models.BusinessConfMap[models.KnowledgeBaseName], vectorKeyList)
  484. if err != nil {
  485. err = fmt.Errorf("删除LLM摘要向量库文件失败,Err:" + err.Error())
  486. return
  487. }
  488. //fmt.Println(resp)
  489. obj := rag.WechatArticleAbstract{}
  490. err = obj.DelVectorKey(wechatArticleAbstractIdList)
  491. return
  492. }
  493. // DelLlmDoc
  494. // @Description: 删除摘要向量库
  495. // @author: Roc
  496. // @datetime 2025-03-12 16:55:05
  497. // @param wechatArticleAbstractList []*rag.WechatArticleAbstract
  498. // @return err error
  499. func DelLlmDoc(vectorKeyList []string, wechatArticleAbstractIdList []int) (err error) {
  500. defer func() {
  501. if err != nil {
  502. utils.FileLog.Error("删除摘要向量库文件失败,err:%v", err)
  503. fmt.Println("删除摘要向量库文件失败,err:", err)
  504. }
  505. }()
  506. // 没有就不删除
  507. if len(vectorKeyList) <= 0 {
  508. return
  509. }
  510. _, err = llm.DelDocsToKnowledge(models.BusinessConfMap[models.KnowledgeBaseName], vectorKeyList)
  511. if err != nil {
  512. err = fmt.Errorf("删除LLM摘要向量库文件失败,Err:" + err.Error())
  513. return
  514. }
  515. //fmt.Println(resp)
  516. obj := rag.WechatArticleAbstract{}
  517. err = obj.DelVectorKey(wechatArticleAbstractIdList)
  518. return
  519. }
  520. func getAnswerByContent(articleId int, source int, questionStr string) (answer string, industryTags, varietyTags []string, err error) {
  521. //addArticleChatRecordList = make([]*rag.WechatArticleChatRecord, 0)
  522. result, err := facade.AIGCBaseOnPromote(facade.AIGC{
  523. Promote: questionStr,
  524. Source: source,
  525. ArticleId: articleId,
  526. LLMModel: `deepseek-r1:32b`,
  527. })
  528. if err != nil {
  529. return
  530. }
  531. // JSON字符串转字节
  532. //answerByte, err := json.Marshal(result)
  533. //if err != nil {
  534. // return
  535. //}
  536. //originalAnswer := string(answerByte)
  537. // 提取 </think> 后面的内容
  538. thinkEndIndex := strings.Index(result.Answer, "</think>")
  539. if thinkEndIndex != -1 {
  540. answer = strings.TrimSpace(result.Answer[thinkEndIndex+len("</think>"):])
  541. } else {
  542. answer = result.Answer
  543. }
  544. answer = strings.TrimSpace(answer)
  545. // 提取标签
  546. industryTags, varietyTags = extractLabels(answer)
  547. //// 待入库的数据
  548. //addArticleChatRecordList = append(addArticleChatRecordList, &rag.WechatArticleChatRecord{
  549. // WechatArticleChatRecordId: 0,
  550. // WechatArticleId: articleId,
  551. // ChatUserType: "user",
  552. // Content: questionStr,
  553. // SendTime: time.Now(),
  554. // CreatedTime: time.Now(),
  555. // UpdateTime: time.Now(),
  556. //}, &rag.WechatArticleChatRecord{
  557. // WechatArticleChatRecordId: 0,
  558. // WechatArticleId: articleId,
  559. // ChatUserType: "assistant",
  560. // Content: originalAnswer,
  561. // SendTime: time.Now(),
  562. // CreatedTime: time.Now(),
  563. // UpdateTime: time.Now(),
  564. //})
  565. return
  566. }
  567. func getAnswerByContentBak(wechatArticleId int, docId string) (answer string, addArticleChatRecordList []*rag.WechatArticleChatRecord, err error) {
  568. historyList := make([]eta_llm_http.HistoryContent, 0)
  569. addArticleChatRecordList = make([]*rag.WechatArticleChatRecord, 0)
  570. questionObj := rag.Question{}
  571. questionList, err := questionObj.GetListByCondition(``, ``, []interface{}{}, 0, 100)
  572. if err != nil {
  573. err = fmt.Errorf("获取问题列表失败,Err:" + err.Error())
  574. return
  575. }
  576. // 没问题就不生成了
  577. if len(questionList) <= 0 {
  578. return
  579. }
  580. //你现在是一名资深的期货行业分析师,请基于以下的问题进行汇总总结,如果不能正常总结出来,那么就只需要回复我:sorry
  581. questionStrList := []string{`你现在是一名资深的期货行业分析师,请基于以下的问题进行汇总总结,如果不能正常总结出来,那么就只需要回复我:sorry。以下是问题:`}
  582. for _, v := range questionList {
  583. questionStrList = append(questionStrList, v.QuestionContent)
  584. }
  585. questionStr := strings.Join(questionStrList, "\n")
  586. originalAnswer, result, err := llm.ChatByFile(docId, questionStr, historyList)
  587. fmt.Println(result)
  588. if err != nil {
  589. err = fmt.Errorf("LLM对话失败,Err:" + err.Error())
  590. return
  591. }
  592. // 提取 </think> 后面的内容
  593. thinkEndIndex := strings.Index(result.Answer, "</think>")
  594. if thinkEndIndex != -1 {
  595. answer = strings.TrimSpace(result.Answer[thinkEndIndex+len("</think>"):])
  596. } else {
  597. answer = result.Answer
  598. }
  599. answer = strings.TrimSpace(answer)
  600. // 待入库的数据
  601. addArticleChatRecordList = append(addArticleChatRecordList, &rag.WechatArticleChatRecord{
  602. WechatArticleChatRecordId: 0,
  603. WechatArticleId: wechatArticleId,
  604. ChatUserType: "user",
  605. Content: questionStr,
  606. SendTime: time.Now(),
  607. CreatedTime: time.Now(),
  608. UpdateTime: time.Now(),
  609. }, &rag.WechatArticleChatRecord{
  610. WechatArticleChatRecordId: 0,
  611. WechatArticleId: wechatArticleId,
  612. ChatUserType: "assistant",
  613. Content: originalAnswer,
  614. SendTime: time.Now(),
  615. CreatedTime: time.Now(),
  616. UpdateTime: time.Now(),
  617. })
  618. return
  619. }
  620. // ArticleToKnowledge
  621. // @Description: 原文入向量库
  622. // @author: Roc
  623. // @datetime 2025-03-10 16:13:16
  624. // @param item *rag.WechatArticle
  625. func ArticleToKnowledge(item *rag.WechatArticle) {
  626. if item.TextContent == `` {
  627. return
  628. }
  629. var err error
  630. defer func() {
  631. if err != nil {
  632. utils.FileLog.Error("上传文章原文到知识库失败,err:%v", err)
  633. fmt.Println("上传文章原文到知识库失败,err:", err)
  634. }
  635. }()
  636. // 生成临时文件
  637. //dateDir := time.Now().Format("20060102")
  638. //uploadDir := "./static/ai/article/" + dateDir
  639. uploadDir := "./static/ai/article"
  640. err = os.MkdirAll(uploadDir, utils.DIR_MOD)
  641. if err != nil {
  642. err = fmt.Errorf("存储目录创建失败,Err:" + err.Error())
  643. return
  644. }
  645. //fileName := utils.RemoveSpecialChars(item.Title) + `.md`
  646. fileName := utils.MD5(item.Title) + `.md`
  647. tmpFilePath := uploadDir + "/" + fileName
  648. err = utils.SaveToFile(item.TextContent, tmpFilePath)
  649. if err != nil {
  650. err = fmt.Errorf("生成临时文件失败,Err:" + err.Error())
  651. return
  652. }
  653. defer func() {
  654. os.Remove(tmpFilePath)
  655. }()
  656. knowledgeArticleName := models.BusinessConfMap[models.KnowledgeArticleName]
  657. // 上传临时文件到LLM
  658. uploadFileResp, err := llm.UploadDocsToKnowledge(tmpFilePath, knowledgeArticleName)
  659. if err != nil {
  660. err = fmt.Errorf("上传文章原文到知识库失败,Err:" + err.Error())
  661. return
  662. }
  663. if len(uploadFileResp.FailedFiles) > 0 {
  664. for _, v := range uploadFileResp.FailedFiles {
  665. err = fmt.Errorf("上传文章原文到知识库失败,Err:" + v)
  666. }
  667. }
  668. item.VectorKey = tmpFilePath
  669. item.ModifyTime = time.Now()
  670. err = item.Update([]string{"vector_key", "modify_time"})
  671. }
  672. // WechatArticleAbstractToKnowledge
  673. // @Description: 摘要入向量库
  674. // @author: Roc
  675. // @datetime 2025-03-10 16:14:59
  676. // @param wechatArticleItem *rag.WechatArticle
  677. // @param abstractItem *rag.WechatArticleAbstract
  678. func WechatArticleAbstractToKnowledge(wechatArticleItem *rag.WechatArticle, abstractItem *rag.WechatArticleAbstract, isReUpload bool) {
  679. if abstractItem.Content == `` {
  680. return
  681. }
  682. // 已经生成了,那就不处理了
  683. if abstractItem.VectorKey != `` && !isReUpload {
  684. return
  685. }
  686. var err error
  687. defer func() {
  688. if err != nil {
  689. utils.FileLog.Error("摘要入向量库失败,err:%v", err)
  690. fmt.Println("摘要入向量库失败,err:", err)
  691. }
  692. // 数据入ES库
  693. go AddOrEditEsWechatArticleAbstract(abstractItem.WechatArticleAbstractId)
  694. }()
  695. // 生成临时文件
  696. //dateDir := time.Now().Format("20060102")
  697. //uploadDir := + "./static/ai/article/" + dateDir
  698. uploadDir := "./static/ai/abstract"
  699. err = os.MkdirAll(uploadDir, utils.DIR_MOD)
  700. if err != nil {
  701. err = fmt.Errorf("存储目录创建失败,Err:" + err.Error())
  702. return
  703. }
  704. fileName := utils.MD5(fmt.Sprintf("%d_%d", utils.AI_ARTICLE_SOURCE_WECHAT, wechatArticleItem.WechatArticleId)) + `.md`
  705. tmpFilePath := uploadDir + "/" + fileName
  706. err = utils.SaveToFile(abstractItem.Content, tmpFilePath)
  707. if err != nil {
  708. err = fmt.Errorf("生成临时文件失败,Err:" + err.Error())
  709. return
  710. }
  711. defer func() {
  712. os.Remove(tmpFilePath)
  713. }()
  714. knowledgeArticleName := models.BusinessConfMap[models.KnowledgeBaseName]
  715. // 上传临时文件到LLM
  716. uploadFileResp, err := llm.UploadDocsToKnowledge(tmpFilePath, knowledgeArticleName)
  717. if err != nil {
  718. err = fmt.Errorf("上传文章原文到知识库失败,Err:" + err.Error())
  719. return
  720. }
  721. if len(uploadFileResp.FailedFiles) > 0 {
  722. for _, v := range uploadFileResp.FailedFiles {
  723. err = fmt.Errorf("上传文章原文到知识库失败,Err:" + v)
  724. }
  725. }
  726. abstractItem.VectorKey = tmpFilePath
  727. abstractItem.ModifyTime = time.Now()
  728. err = abstractItem.Update([]string{"vector_key", "modify_time"})
  729. }
  730. // replaceWechatPlatformPic
  731. // @Description: 替换公众号头像
  732. // @author: Roc
  733. // @datetime 2025-03-11 09:38:24
  734. // @param item *rag.WechatPlatform
  735. func replaceWechatPlatformPic(item *rag.WechatPlatform) {
  736. var err error
  737. defer func() {
  738. if err != nil {
  739. utils.FileLog.Error("替换公众号头像失败,err:%v", err)
  740. fmt.Println("替换公众号头像失败,err:", err)
  741. }
  742. }()
  743. if item.RoundHeadImg == `` {
  744. return
  745. }
  746. resourceUrl, err := downloadWxPicAndUploadToOss(item.RoundHeadImg, `head_img`)
  747. if err != nil {
  748. return
  749. }
  750. item.RoundHeadImg = resourceUrl
  751. err = item.Update([]string{"round_head_img"})
  752. }
  753. // replaceWechatArticleCoverPic
  754. // @Description: 替换文章封面图
  755. // @author: Roc
  756. // @datetime 2025-03-11 09:38:35
  757. // @param item *rag.WechatArticle
  758. func replaceWechatArticleCoverPic(item *rag.WechatArticle) {
  759. var err error
  760. defer func() {
  761. if err != nil {
  762. utils.FileLog.Error("替换公众号头像失败,err:%v", err)
  763. fmt.Println("替换公众号头像失败,err:", err)
  764. }
  765. // 数据入ES库
  766. AddOrEditEsWechatArticle(item.WechatArticleId)
  767. }()
  768. if item.CoverUrl == `` {
  769. return
  770. }
  771. resourceUrl, err := downloadWxPicAndUploadToOss(item.CoverUrl, `cover_url`)
  772. if err != nil {
  773. return
  774. }
  775. item.CoverUrl = resourceUrl
  776. err = item.Update([]string{"cover_url"})
  777. }
  778. // replaceWechatArticlePic
  779. // @Description: 替换文章内容图
  780. // @author: Roc
  781. // @datetime 2025-03-11 09:38:35
  782. // @param item *rag.WechatArticle
  783. func ReplaceWechatArticlePic(item *rag.WechatArticle) {
  784. var err error
  785. defer func() {
  786. if err != nil {
  787. utils.FileLog.Error("替换公众号头像失败,err:%v", err)
  788. fmt.Println("替换公众号头像失败,err:", err)
  789. }
  790. }()
  791. if item.Content == `` {
  792. return
  793. }
  794. content, err := ReplaceHtmlImg(html.UnescapeString(item.Content))
  795. if err != nil {
  796. return
  797. }
  798. item.Content = html.EscapeString(content)
  799. err = item.Update([]string{"content"})
  800. return
  801. }
  802. // downloadWxPicAndUploadToOss
  803. // @Description: 下载微信图片并上传到OSS
  804. // @author: Roc
  805. // @datetime 2025-03-11 09:28:49
  806. // @param wxPicUrl string
  807. // @return resourceUrl string
  808. // @return err error
  809. func downloadWxPicAndUploadToOss(wxPicUrl, source string) (resourceUrl string, err error) {
  810. localFilePath, err := utils.DownloadWxImage(wxPicUrl)
  811. if err != nil {
  812. return
  813. }
  814. defer func() {
  815. os.Remove(localFilePath)
  816. }()
  817. ossClient := NewOssClient()
  818. if ossClient == nil {
  819. err = fmt.Errorf(`初始化OSS服务失败`)
  820. return
  821. }
  822. ext := path.Ext(localFilePath)
  823. fileName := fmt.Sprintf(`%s%s%s`, time.Now().Format(utils.FormatShortDateTimeUnSpace), utils.GetRandStringNoSpecialChar(16), ext)
  824. //savePath := utils.UploadDir + `wx/wx_article/` + time.Now().Format("200601/20060102/") + fileName
  825. savePath := fmt.Sprintf(`%swx/%s/%s%s`, utils.UploadDir, source, time.Now().Format("200601/20060102/"), fileName)
  826. resourceUrl, err = ossClient.UploadFile(fileName, localFilePath, savePath)
  827. if err != nil {
  828. err = fmt.Errorf("文件上传失败,Err:" + err.Error())
  829. return
  830. }
  831. return
  832. }
  833. // ReplaceHtmlImg
  834. // @Description: 将html中的图片替换成自己的
  835. // @author: Roc
  836. // @datetime 2025-03-11 14:32:00
  837. // @param htmlStr string
  838. // @return newHtml string
  839. // @return err error
  840. func ReplaceHtmlImg(htmlStr string) (newHtml string, err error) {
  841. doc, err := html2.Parse(strings.NewReader(htmlStr))
  842. if err != nil {
  843. return
  844. }
  845. if err != nil {
  846. return
  847. }
  848. handleNode(doc)
  849. // 将处理后的HTML节点重新渲染为HTML字符串
  850. var buf bytes.Buffer
  851. if err = html2.Render(&buf, doc); err != nil {
  852. fmt.Println(err)
  853. return
  854. }
  855. newHtml = buf.String()
  856. return
  857. }
  858. // handleNode
  859. // @Description: html节点处理
  860. // @author: Roc
  861. // @datetime 2025-03-11 14:32:45
  862. // @param n *html2.Node
  863. func handleNode(n *html2.Node) {
  864. if n.Type == html2.ElementNode {
  865. if n.Data == "img" {
  866. for k, attr := range n.Attr {
  867. // 新增代码:如果标签是img且存在data-src属性,则将data-src的值赋给src
  868. if n.Data == "img" && attr.Key == "src" {
  869. resourceUrl, tmpErr := downloadWxPicAndUploadToOss(attr.Val, `article`)
  870. if tmpErr != nil {
  871. continue
  872. }
  873. attr.Val = resourceUrl
  874. }
  875. n.Attr[k] = attr
  876. }
  877. }
  878. }
  879. for c := n.FirstChild; c != nil; c = c.NextSibling {
  880. handleNode(c)
  881. }
  882. }
  883. // AddOrEditEsWechatPlatformId
  884. // @Description: 批量处理某个公众号下的文章到ES
  885. // @author: Roc
  886. // @datetime 2025-03-13 11:01:28
  887. // @param articleId int
  888. func AddOrEditEsWechatPlatformId(wechatPlatformId int) {
  889. if utils.EsWechatArticleName == `` {
  890. return
  891. }
  892. obj := rag.WechatArticle{}
  893. list, _ := obj.GetListByCondition(` wechat_article_id `, ` AND wechat_platform_id = ? `, []interface{}{wechatPlatformId}, 0, 1000000)
  894. for _, item := range list {
  895. AddOrEditEsWechatArticle(item.WechatArticleId)
  896. }
  897. }
  898. // AddOrEditEsWechatArticle
  899. // @Description: 新增/编辑微信文章入ES
  900. // @author: Roc
  901. // @datetime 2025-03-13 11:01:28
  902. // @param articleId int
  903. func AddOrEditEsWechatArticle(articleId int) {
  904. if utils.EsWechatArticleName == `` {
  905. return
  906. }
  907. var err error
  908. defer func() {
  909. if err != nil {
  910. utils.FileLog.Error("添加公众号微信信息到ES失败,err:%v", err)
  911. fmt.Println("添加公众号微信信息到ES失败,err:", err)
  912. }
  913. }()
  914. obj := rag.WechatArticle{}
  915. articleInfo, err := obj.GetById(articleId)
  916. if err != nil {
  917. err = fmt.Errorf("获取公众号文章信息失败,Err:" + err.Error())
  918. return
  919. }
  920. platformObj := rag.WechatPlatform{}
  921. platformInfo, err := platformObj.GetById(articleInfo.WechatPlatformId)
  922. if err != nil {
  923. err = fmt.Errorf("获取公众号平台信息失败,Err:" + err.Error())
  924. return
  925. }
  926. esItem := elastic.WechatArticleAndPlatform{
  927. WechatArticleId: articleInfo.WechatArticleId,
  928. WechatPlatformId: articleInfo.WechatPlatformId,
  929. FakeId: articleInfo.FakeId,
  930. Title: articleInfo.Title,
  931. Link: articleInfo.Link,
  932. CoverUrl: articleInfo.CoverUrl,
  933. Description: articleInfo.Description,
  934. //Content: articleInfo.Content,
  935. //TextContent: articleInfo.TextContent,
  936. //AbstractStatus: articleInfo.AbstractStatus,
  937. Country: articleInfo.Country,
  938. Province: articleInfo.Province,
  939. City: articleInfo.City,
  940. ArticleCreateTime: articleInfo.ArticleCreateTime,
  941. IsDeleted: articleInfo.IsDeleted,
  942. ModifyTime: articleInfo.ModifyTime,
  943. CreateTime: articleInfo.CreateTime,
  944. Nickname: platformInfo.Nickname,
  945. Alias: platformInfo.Alias,
  946. RoundHeadImg: platformInfo.RoundHeadImg,
  947. }
  948. err = elastic.WechatArticleEsAddOrEdit(strconv.Itoa(articleInfo.WechatArticleId), esItem)
  949. }
  950. // AddOrEditEsWechatArticleAbstract
  951. // @Description: 新增/编辑微信文章摘要入ES
  952. // @author: Roc
  953. // @datetime 2025-03-13 14:13:47
  954. // @param articleAbstractId int
  955. func AddOrEditEsWechatArticleAbstract(articleAbstractId int) {
  956. if utils.EsWechatArticleAbstractName == `` {
  957. return
  958. }
  959. var err error
  960. defer func() {
  961. if err != nil {
  962. utils.FileLog.Error("添加公众号微信信息到ES失败,err:%v", err)
  963. fmt.Println("添加公众号微信信息到ES失败,err:", err)
  964. }
  965. }()
  966. obj := rag.WechatArticleAbstract{}
  967. abstractInfo, err := obj.GetById(articleAbstractId)
  968. if err != nil {
  969. err = fmt.Errorf("获取公众号文章信息失败,Err:" + err.Error())
  970. return
  971. }
  972. articleObj := rag.WechatArticle{}
  973. articleInfo, err := articleObj.GetById(abstractInfo.WechatArticleId)
  974. if err != nil {
  975. err = fmt.Errorf("获取公众号文章信息失败,Err:" + err.Error())
  976. return
  977. }
  978. // 标签ID
  979. tagIdList := make([]int, 0)
  980. if abstractInfo.Tags != `` {
  981. err = json.Unmarshal([]byte(abstractInfo.Tags), &tagIdList)
  982. if err != nil {
  983. err = fmt.Errorf("报告标签ID转int失败,Err:" + err.Error())
  984. utils.FileLog.Info(fmt.Sprintf("json.Unmarshal 报告标签ID转int失败,标签数据:%s,Err:%s", abstractInfo.Tags, err.Error()))
  985. }
  986. }
  987. tagNameList := make([]string, 0)
  988. if abstractInfo.TagsName != `` {
  989. tagNameList = strings.Split(abstractInfo.TagsName, ",")
  990. }
  991. esItem := elastic.WechatArticleAbstractItem{
  992. WechatArticleAbstractId: abstractInfo.WechatArticleAbstractId,
  993. WechatArticleId: abstractInfo.WechatArticleId,
  994. WechatPlatformId: articleInfo.WechatPlatformId,
  995. Abstract: abstractInfo.Content,
  996. QuestionId: abstractInfo.QuestionId,
  997. Version: abstractInfo.Version,
  998. VectorKey: abstractInfo.VectorKey,
  999. ModifyTime: articleInfo.ModifyTime,
  1000. CreateTime: articleInfo.CreateTime,
  1001. Title: articleInfo.Title,
  1002. Link: articleInfo.Link,
  1003. TagIdList: tagIdList,
  1004. TagNameList: tagNameList,
  1005. }
  1006. err = elastic.WechatArticleAbstractEsAddOrEdit(strconv.Itoa(articleAbstractId), esItem)
  1007. }
  1008. // DelWechatArticleAbstract
  1009. // @Description: 删除微信文章摘要
  1010. // @author: Roc
  1011. // @datetime 2025-04-23 17:36:22
  1012. // @param abstractIdList []int
  1013. // @return err error
  1014. func DelWechatArticleAbstract(abstractIdList []int) (err error) {
  1015. obj := rag.WechatArticleAbstract{}
  1016. list, err := obj.GetByIdList(abstractIdList)
  1017. if err != nil {
  1018. if !utils.IsErrNoRow(err) {
  1019. err = errors.New("删除向量库失败,Err:" + err.Error())
  1020. } else {
  1021. err = nil
  1022. }
  1023. return
  1024. }
  1025. err = delWechatArticleAbstract(list)
  1026. return
  1027. }
  1028. // DelWechatArticleAbstract
  1029. // @Description: 删除微信文章摘要
  1030. // @author: Roc
  1031. // @datetime 2025-04-23 17:36:22
  1032. // @param abstractIdList []int
  1033. // @return err error
  1034. func DelWechatArticleAbstractByQuestionId(questionId int) (err error) {
  1035. obj := rag.WechatArticleAbstract{}
  1036. list, err := obj.GetListByQuestionId(questionId)
  1037. if err != nil {
  1038. if !utils.IsErrNoRow(err) {
  1039. err = errors.New("删除向量库失败,Err:" + err.Error())
  1040. } else {
  1041. err = nil
  1042. }
  1043. return
  1044. }
  1045. err = delWechatArticleAbstract(list)
  1046. return
  1047. }
  1048. // delRagEtaReportAbstract
  1049. // @Description: 删除摘要
  1050. // @author: Roc
  1051. // @datetime 2025-04-24 15:19:19
  1052. // @param list []*rag.RagEtaReportAbstract
  1053. // @return err error
  1054. func delWechatArticleAbstract(list []*rag.WechatArticleAbstract) (err error) {
  1055. obj := rag.RagEtaReportAbstract{}
  1056. vectorKeyList := make([]string, 0)
  1057. newAbstractIdList := make([]int, 0)
  1058. if len(list) > 0 {
  1059. for _, v := range list {
  1060. // 有加入到向量库,那么就加入到待删除的向量库list中
  1061. if v.VectorKey != `` {
  1062. vectorKeyList = append(vectorKeyList, v.VectorKey)
  1063. }
  1064. newAbstractIdList = append(newAbstractIdList, v.WechatArticleAbstractId)
  1065. }
  1066. }
  1067. //if !req.IsSelectAll {
  1068. // list, err := obj.GetByIdList(req.RagEtaReportAbstractIdList)
  1069. // if err != nil {
  1070. // br.Msg = "修改失败"
  1071. // br.ErrMsg = "修改失败,查找问题失败,Err:" + err.Error()
  1072. // if utils.IsErrNoRow(err) {
  1073. // br.Msg = "问题不存在"
  1074. // br.IsSendEmail = false
  1075. // }
  1076. // return
  1077. // }
  1078. // if len(list) > 0 {
  1079. // for _, v := range list {
  1080. // // 有加入到向量库,那么就加入到待删除的向量库list中
  1081. // if v.VectorKey != `` {
  1082. // vectorKeyList = append(vectorKeyList, v.VectorKey)
  1083. // }
  1084. // wechatArticleAbstractIdList = append(wechatArticleAbstractIdList, v.RagEtaReportAbstractId)
  1085. // }
  1086. // }
  1087. //} else {
  1088. // notIdMap := make(map[int]bool)
  1089. // for _, v := range req.NotRagEtaReportAbstractIdList {
  1090. // notIdMap[v] = true
  1091. // }
  1092. //
  1093. // _, list, err := getRagEtaReportAbstractList(req.KeyWord, req.TagId, 0, 100000)
  1094. // if err != nil {
  1095. // br.Msg = "修改失败"
  1096. // br.ErrMsg = "修改失败,查找问题失败,Err:" + err.Error()
  1097. // if utils.IsErrNoRow(err) {
  1098. // br.Msg = "问题不存在"
  1099. // br.IsSendEmail = false
  1100. // }
  1101. // return
  1102. // }
  1103. // if len(list) > 0 {
  1104. // for _, v := range list {
  1105. // if notIdMap[v.RagEtaReportAbstractId] {
  1106. // continue
  1107. // }
  1108. // // 有加入到向量库,那么就加入到待删除的向量库list中
  1109. // if v.VectorKey != `` {
  1110. // vectorKeyList = append(vectorKeyList, v.VectorKey)
  1111. // }
  1112. // wechatArticleAbstractIdList = append(wechatArticleAbstractIdList, v.RagEtaReportAbstractId)
  1113. // }
  1114. // }
  1115. //}
  1116. // 删除向量库
  1117. err = DelLlmDoc(vectorKeyList, newAbstractIdList)
  1118. if err != nil {
  1119. err = errors.New("删除向量库失败,Err:" + err.Error())
  1120. return
  1121. }
  1122. // 删除摘要
  1123. err = obj.DelByIdList(newAbstractIdList)
  1124. if err != nil {
  1125. err = errors.New("删除失败,Err:" + err.Error())
  1126. return
  1127. }
  1128. // 删除es数据
  1129. for _, wechatArticleAbstractId := range newAbstractIdList {
  1130. go DelEsWechatArticleAbstract(wechatArticleAbstractId)
  1131. }
  1132. return
  1133. }
  1134. // DelEsWechatArticleAbstract
  1135. // @Description: 删除ES中的微信文章摘要
  1136. // @author: Roc
  1137. // @datetime 2025-03-13 14:13:47
  1138. // @param articleAbstractId int
  1139. func DelEsWechatArticleAbstract(articleAbstractId int) {
  1140. if utils.EsWechatArticleAbstractName == `` {
  1141. return
  1142. }
  1143. var err error
  1144. defer func() {
  1145. if err != nil {
  1146. utils.FileLog.Error("删除公众号微信信息到ES失败,err:%v", err)
  1147. fmt.Println("删除公众号微信信息到ES失败,err:", err)
  1148. }
  1149. }()
  1150. err = elastic.WechatArticleAbstractEsDel(strconv.Itoa(articleAbstractId))
  1151. }
  1152. // AddOrEditEsRagQuestion
  1153. // @Description: 新增/编辑知识库问题入ES
  1154. // @author: Roc
  1155. // @datetime 2025-03-28 11:25:50
  1156. // @param questionId int
  1157. func AddOrEditEsRagQuestion(questionId int) {
  1158. if utils.EsWechatArticleName == `` {
  1159. return
  1160. }
  1161. var err error
  1162. defer func() {
  1163. if err != nil {
  1164. utils.FileLog.Error("添加公众号微信信息到ES失败,err:%v", err)
  1165. fmt.Println("添加公众号微信信息到ES失败,err:", err)
  1166. }
  1167. }()
  1168. obj := rag.Question{}
  1169. questionInfo, err := obj.GetByID(questionId)
  1170. if err != nil {
  1171. err = fmt.Errorf("获取公众号文章信息失败,Err:" + err.Error())
  1172. return
  1173. }
  1174. esItem := elastic.RagQuestionItem{
  1175. QuestionId: questionInfo.QuestionId,
  1176. QuestionTitle: questionInfo.QuestionTitle,
  1177. QuestionContent: questionInfo.QuestionContent,
  1178. Sort: questionInfo.Sort,
  1179. IsDefault: questionInfo.IsDefault,
  1180. SysUserId: questionInfo.SysUserId,
  1181. SysUserRealName: questionInfo.SysUserRealName,
  1182. ModifyTime: questionInfo.ModifyTime,
  1183. CreateTime: questionInfo.CreateTime,
  1184. }
  1185. err = elastic.RagQuestionEsAddOrEdit(strconv.Itoa(questionInfo.QuestionId), esItem)
  1186. }
  1187. // DelEsRagQuestion
  1188. // @Description: 删除ES中的知识库问题
  1189. // @author: Roc
  1190. // @datetime 2025-03-28 11:26:40
  1191. // @param questionId int
  1192. func DelEsRagQuestion(questionId int) {
  1193. if utils.EsWechatArticleAbstractName == `` {
  1194. return
  1195. }
  1196. var err error
  1197. defer func() {
  1198. if err != nil {
  1199. utils.FileLog.Error("添加公众号微信信息到ES失败,err:%v", err)
  1200. fmt.Println("添加公众号微信信息到ES失败,err:", err)
  1201. }
  1202. }()
  1203. err = elastic.RagQuestionEsDel(strconv.Itoa(questionId))
  1204. }
  1205. // extractLabels
  1206. // @Description: 提取摘要中的标签
  1207. // @author: Roc
  1208. // @datetime 2025-04-18 17:16:05
  1209. // @param text string
  1210. // @return industryTags []string
  1211. // @return varietyTags []string
  1212. func extractLabels(text string) (industryTags []string, varietyTags []string) {
  1213. reIndustry := regexp.MustCompile(`行业标签((?:【[^】]*】)+)`)
  1214. industryMatch := reIndustry.FindStringSubmatch(text)
  1215. if len(industryMatch) > 1 {
  1216. industryContent := industryMatch[1]
  1217. reSplit := regexp.MustCompile(`【([^】]*)】`)
  1218. industryTags = make([]string, 0)
  1219. for _, m := range reSplit.FindAllStringSubmatch(industryContent, -1) {
  1220. if len(m) > 1 {
  1221. industryTags = append(industryTags, m[1])
  1222. }
  1223. }
  1224. }
  1225. reVariety := regexp.MustCompile(`品种标签((?:【[^】]*】)+)`)
  1226. varietyMatch := reVariety.FindStringSubmatch(text)
  1227. if len(varietyMatch) > 1 {
  1228. varietyContent := varietyMatch[1]
  1229. reSplit := regexp.MustCompile(`【([^】]*)】`)
  1230. varietyTags = make([]string, 0)
  1231. for _, m := range reSplit.FindAllStringSubmatch(varietyContent, -1) {
  1232. if len(m) > 1 {
  1233. varietyTags = append(varietyTags, m[1])
  1234. }
  1235. }
  1236. }
  1237. return
  1238. }
  1239. var aiAbstractTagMap = map[string]int{}
  1240. // GetTagIdByName
  1241. // @Description: 获取标签ID
  1242. // @author: Roc
  1243. // @datetime 2025-04-18 17:25:46
  1244. // @param tagName string
  1245. // @return tagId int
  1246. // @return err error
  1247. func GetTagIdByName(tagName string) (tagId int, err error) {
  1248. tagName = strings.TrimSpace(tagName)
  1249. tagId, ok := aiAbstractTagMap[tagName]
  1250. if ok {
  1251. return
  1252. }
  1253. obj := rag.Tag{}
  1254. item, err := obj.GetByCondition(fmt.Sprintf(` AND %s = ? `, rag.TagColumns.TagName), []interface{}{tagName})
  1255. if err != nil {
  1256. if !utils.IsErrNoRow(err) {
  1257. err = fmt.Errorf("获取标签失败,Err:" + err.Error())
  1258. return
  1259. }
  1260. item = &rag.Tag{
  1261. TagId: 0,
  1262. TagName: tagName,
  1263. Sort: 0,
  1264. ModifyTime: time.Now(),
  1265. CreateTime: time.Now(),
  1266. }
  1267. err = item.Create()
  1268. if err != nil {
  1269. err = fmt.Errorf("添加标签失败,Err:" + err.Error())
  1270. return
  1271. }
  1272. }
  1273. tagId = item.TagId
  1274. aiAbstractTagMap[tagName] = tagId
  1275. return
  1276. }