wechat_platform.go 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821
  1. package services
  2. import (
  3. "bytes"
  4. "eta/eta_api/cache"
  5. "eta/eta_api/models"
  6. "eta/eta_api/models/rag"
  7. "eta/eta_api/services/llm"
  8. "eta/eta_api/utils"
  9. "eta/eta_api/utils/llm/eta_llm/eta_llm_http"
  10. "fmt"
  11. html2 "golang.org/x/net/html"
  12. "html"
  13. "os"
  14. "path"
  15. "strconv"
  16. "strings"
  17. "time"
  18. )
  19. // AddWechatPlatform
  20. // @Description: 添加新的公众号
  21. // @param item
  22. func AddWechatPlatform(item *rag.WechatPlatform) {
  23. var err error
  24. defer func() {
  25. if err != nil {
  26. utils.FileLog.Error("公众号入库后查找最新记录失败,err:%v", err)
  27. }
  28. }()
  29. if item.FakeId != `` {
  30. return
  31. }
  32. if item.ArticleLink == `` {
  33. return
  34. }
  35. articleLink := item.ArticleLink
  36. articleDetail, err := llm.SearchByWechatArticle(item.ArticleLink)
  37. if err != nil {
  38. return
  39. }
  40. if articleDetail.Appuin == `` {
  41. err = fmt.Errorf("文章内未匹配到公众号唯一标识")
  42. return
  43. }
  44. wechatPlatform := new(rag.WechatPlatform)
  45. // 查找是否存在这个公众号id的
  46. wechatPlatformInfo, tmpErr := wechatPlatform.GetByFakeID(articleDetail.Appuin)
  47. if tmpErr != nil && !utils.IsErrNoRow(tmpErr) {
  48. err = tmpErr
  49. return
  50. }
  51. if tmpErr == nil {
  52. // 如果找到了,那么需要将当前的给移除掉
  53. err = item.Del()
  54. if err != nil {
  55. return
  56. }
  57. // 并将查出来的微信公众号摘出来的数据重新赋值
  58. item = wechatPlatformInfo
  59. } else if utils.IsErrNoRow(tmpErr) {
  60. // 如果没找到,那么就变更当前的信息
  61. item.FakeId = articleDetail.Appuin
  62. item.Nickname = articleDetail.Nickname
  63. //item.Alias = req.Alias
  64. item.RoundHeadImg = articleDetail.RoundHeadImg
  65. //item.ServiceType = req.ServiceType
  66. item.Signature = articleDetail.ProfileSignature
  67. //item.Verified = verified
  68. item.ModifyTime = time.Now()
  69. err = item.Update([]string{rag.WechatPlatformColumns.FakeID, rag.WechatPlatformColumns.Nickname, rag.WechatPlatformColumns.RoundHeadImg, rag.WechatPlatformColumns.Signature, rag.WechatPlatformColumns.ModifyTime})
  70. if err != nil {
  71. return
  72. }
  73. // 修改公众号头像
  74. go replaceWechatPlatformPic(item)
  75. }
  76. // 把刚搜索的文章加入到文章库中
  77. AddWechatArticle(item, articleLink, articleDetail, nil)
  78. BeachAddWechatArticle(item, 10)
  79. fmt.Println("公众号入库完成")
  80. return
  81. }
  82. // AddWechatArticle
  83. // @Description: 添加公众号文章入库
  84. // @author: Roc
  85. // @datetime 2025-03-05 13:24:14
  86. // @param item *rag.WechatPlatform
  87. // @param link string
  88. // @param articleDetail WechatArticleDataResp
  89. func AddWechatArticle(item *rag.WechatPlatform, articleLink string, articleDetail llm.WechatArticleDataResp, articleMenu *llm.ArticleMenu) {
  90. var err error
  91. defer func() {
  92. if err != nil {
  93. utils.FileLog.Error("公众号文章入库失败,文章链接:%s ,err:%v", articleLink, err)
  94. }
  95. }()
  96. obj := new(rag.WechatArticle)
  97. _, err = obj.GetByLink(articleLink)
  98. if err == nil {
  99. // 文章已经入库了,不需要重复入库
  100. return
  101. }
  102. // 如果不是 ErrNoRow 的时候,那么就是查询数据库出问题了,需要直接返回
  103. if !utils.IsErrNoRow(err) {
  104. return
  105. }
  106. // 这个时候,说明数据库中没有这个文章,那么需要文章入库
  107. err = nil
  108. var publishAt time.Time
  109. if articleDetail.CreateAt != `` {
  110. createAtInt, tmpErr := strconv.Atoi(articleDetail.CreateAt)
  111. if tmpErr == nil {
  112. publishAt = time.Unix(int64(createAtInt), 1000)
  113. }
  114. } else if articleMenu != nil {
  115. publishAt = time.Unix(int64(articleMenu.UpdateTime), 1000)
  116. }
  117. content := articleDetail.HtmlContent
  118. // 图片下载下来到本地,如果成功了,那么就用新的
  119. tmpContent, err := ReplaceHtmlImg(content)
  120. if tmpContent != `` {
  121. content = tmpContent
  122. }
  123. obj = &rag.WechatArticle{
  124. WechatArticleId: 0,
  125. WechatPlatformId: item.WechatPlatformId,
  126. FakeId: item.FakeId,
  127. Title: articleDetail.Title,
  128. Link: articleLink,
  129. CoverUrl: articleDetail.CoverUrl,
  130. Description: articleDetail.Desc,
  131. Content: html.EscapeString(content),
  132. TextContent: articleDetail.TextContent,
  133. Country: articleDetail.CountryName,
  134. Province: articleDetail.ProvinceName,
  135. City: articleDetail.CityName,
  136. //Abstract: "",
  137. //ArticleCreateTime: createAt,
  138. ModifyTime: time.Now(),
  139. CreateTime: time.Now(),
  140. }
  141. if !publishAt.IsZero() {
  142. obj.ArticleCreateTime = publishAt
  143. }
  144. if articleMenu != nil {
  145. obj.Title = articleMenu.Title
  146. //obj.Link = articleMenu.Link
  147. obj.CoverUrl = articleMenu.Cover
  148. obj.Description = articleMenu.Digest
  149. }
  150. err = obj.Create()
  151. // 修改文章封面图
  152. go replaceWechatArticleCoverPic(obj)
  153. // 文章入库成功后,需要将相关信息入摘要库
  154. go cache.AddWechatArticleLlmOpToCache(obj.WechatArticleId, ``)
  155. }
  156. // BeachAddWechatArticle
  157. // @Description: 批量添加公众号文章
  158. // @param item
  159. // @param num
  160. // @return err
  161. func BeachAddWechatArticle(item *rag.WechatPlatform, num int) {
  162. var err error
  163. defer func() {
  164. //fmt.Println("公众号文章批量入库完成")
  165. if err != nil {
  166. utils.FileLog.Error("公众号文章批量入库失败,err:%v", err)
  167. fmt.Println("公众号文章批量入库失败,err:", err)
  168. }
  169. }()
  170. if item.FakeId == `` {
  171. return
  172. }
  173. wechatArticleObj := new(rag.WechatArticle)
  174. // 获取公众号的文章列表
  175. articleListResp, err := llm.SearchByWechatArticleList(item.FakeId, num)
  176. if err != nil {
  177. return
  178. }
  179. for _, articleMenu := range articleListResp.List {
  180. // 判断文章是否已经入库,如果已经入库了,那么就过滤,不去重复查询微信了
  181. _, err = wechatArticleObj.GetByLink(articleMenu.Link)
  182. if err == nil {
  183. // 文章已经入库了,不需要重复入库
  184. continue
  185. }
  186. if !utils.IsErrNoRow(err) {
  187. return
  188. }
  189. err = nil
  190. articleDetail, tmpErr := llm.SearchByWechatArticle(articleMenu.Link)
  191. if tmpErr != nil {
  192. err = tmpErr
  193. return
  194. }
  195. // 把刚搜索的文章加入到指标库
  196. AddWechatArticle(item, articleMenu.Link, articleDetail, &articleMenu)
  197. time.Sleep(10 * time.Second)
  198. }
  199. return
  200. }
  201. // GenerateArticleAbstract
  202. // @Description: 文章摘要生成
  203. // @author: Roc
  204. // @datetime 2025-03-10 16:17:53
  205. // @param item *rag.WechatArticle
  206. func GenerateArticleAbstract(item *rag.WechatArticle) {
  207. var err error
  208. defer func() {
  209. if err != nil {
  210. utils.FileLog.Error("文章转临时文件失败,err:%v", err)
  211. fmt.Println("文章转临时文件失败,err:", err)
  212. }
  213. }()
  214. // 内容为空,那就不需要生成摘要
  215. if item.TextContent == `` {
  216. return
  217. }
  218. abstractObj := rag.WechatArticleAbstract{}
  219. _, err = abstractObj.GetByWechatArticleId(item.WechatArticleId)
  220. if err == nil {
  221. // 摘要已经生成,不需要重复生成
  222. return
  223. }
  224. if !utils.IsErrNoRow(err) {
  225. return
  226. }
  227. // 生成临时文件
  228. dateDir := time.Now().Format("20060102")
  229. uploadDir := utils.STATIC_DIR + "ai/" + dateDir
  230. err = os.MkdirAll(uploadDir, utils.DIR_MOD)
  231. if err != nil {
  232. err = fmt.Errorf("存储目录创建失败,Err:" + err.Error())
  233. return
  234. }
  235. randStr := utils.GetRandStringNoSpecialChar(28)
  236. fileName := randStr + `.md`
  237. tmpFilePath := uploadDir + "/" + fileName
  238. err = utils.SaveToFile(item.TextContent, tmpFilePath)
  239. if err != nil {
  240. err = fmt.Errorf("生成临时文件失败,Err:" + err.Error())
  241. return
  242. }
  243. defer func() {
  244. os.Remove(tmpFilePath)
  245. }()
  246. // 上传临时文件到LLM
  247. tmpFileResp, err := llm.UploadTempDocs(tmpFilePath)
  248. if err != nil {
  249. err = fmt.Errorf("上传临时文件到LLM失败,Err:" + err.Error())
  250. return
  251. }
  252. if tmpFileResp.Data.Id == `` {
  253. err = fmt.Errorf("上传临时文件到LLM失败,Err:上传失败")
  254. return
  255. }
  256. tmpDocId := tmpFileResp.Data.Id
  257. //tmpDocId := `c4d2ee902808408c8b8ed398b33be103` // 钢材
  258. //tmpDocId := `2dde8afe62d24525a814e74e0a5e35e4` // 钢材
  259. //tmpDocId := `7634cc1086c04b3687682220a2cf1a48` //
  260. //开始对话
  261. abstract, addArticleChatRecordList, tmpErr := getAnswerByContent(item.WechatArticleId, tmpDocId)
  262. if tmpErr != nil {
  263. err = fmt.Errorf("LLM对话失败,Err:" + tmpErr.Error())
  264. return
  265. }
  266. // 添加问答记录
  267. if len(addArticleChatRecordList) > 0 {
  268. recordObj := rag.WechatArticleChatRecord{}
  269. err = recordObj.CreateInBatches(addArticleChatRecordList)
  270. if err != nil {
  271. return
  272. }
  273. }
  274. if abstract != `` {
  275. if abstract == `sorry` || strings.Index(abstract, `根据已知信息无法回答该问题`) == 0 {
  276. item.AbstractStatus = 2
  277. item.ModifyTime = time.Now()
  278. err = item.Update([]string{"AbstractStatus", "ModifyTime"})
  279. return
  280. }
  281. item.AbstractStatus = 1
  282. item.ModifyTime = time.Now()
  283. err = item.Update([]string{"AbstractStatus", "ModifyTime"})
  284. abstractItem := &rag.WechatArticleAbstract{
  285. WechatArticleAbstractId: 0,
  286. WechatArticleId: item.WechatArticleId,
  287. Content: abstract,
  288. Version: 0,
  289. VectorKey: "",
  290. ModifyTime: time.Now(),
  291. CreateTime: time.Now(),
  292. }
  293. err = abstractItem.Create()
  294. if err != nil {
  295. return
  296. }
  297. AbstractToKnowledge(item, abstractItem, false)
  298. }
  299. }
  300. // ReGenerateArticleAbstract
  301. // @Description: 文章摘要重新生成
  302. // @author: Roc
  303. // @datetime 2025-03-10 16:17:53
  304. // @param item *rag.WechatArticle
  305. func ReGenerateArticleAbstract(item *rag.WechatArticle) {
  306. var err error
  307. defer func() {
  308. if err != nil {
  309. utils.FileLog.Error("文章转临时文件失败,err:%v", err)
  310. fmt.Println("文章转临时文件失败,err:", err)
  311. }
  312. }()
  313. abstractObj := rag.WechatArticleAbstract{}
  314. abstractItem, err := abstractObj.GetByWechatArticleId(item.WechatArticleId)
  315. if err != nil {
  316. if utils.IsErrNoRow(err) {
  317. // 直接生成
  318. GenerateArticleAbstract(item)
  319. return
  320. }
  321. // 异常了
  322. return
  323. }
  324. // 生成临时文件
  325. dateDir := time.Now().Format("20060102")
  326. uploadDir := utils.STATIC_DIR + "ai/" + dateDir
  327. err = os.MkdirAll(uploadDir, utils.DIR_MOD)
  328. if err != nil {
  329. err = fmt.Errorf("存储目录创建失败,Err:" + err.Error())
  330. return
  331. }
  332. randStr := utils.GetRandStringNoSpecialChar(28)
  333. fileName := randStr + `.md`
  334. tmpFilePath := uploadDir + "/" + fileName
  335. err = utils.SaveToFile(item.TextContent, tmpFilePath)
  336. if err != nil {
  337. err = fmt.Errorf("生成临时文件失败,Err:" + err.Error())
  338. return
  339. }
  340. defer func() {
  341. os.Remove(tmpFilePath)
  342. }()
  343. // 上传临时文件到LLM
  344. tmpFileResp, err := llm.UploadTempDocs(tmpFilePath)
  345. if err != nil {
  346. err = fmt.Errorf("上传临时文件到LLM失败,Err:" + err.Error())
  347. return
  348. }
  349. if tmpFileResp.Data.Id == `` {
  350. err = fmt.Errorf("上传临时文件到LLM失败,Err:上传失败")
  351. return
  352. }
  353. tmpDocId := tmpFileResp.Data.Id
  354. //tmpDocId := `c4d2ee902808408c8b8ed398b33be103` // 钢材
  355. //tmpDocId := `2dde8afe62d24525a814e74e0a5e35e4` // 钢材
  356. //tmpDocId := `7634cc1086c04b3687682220a2cf1a48` //
  357. //开始对话
  358. abstract, addArticleChatRecordList, tmpErr := getAnswerByContent(item.WechatArticleId, tmpDocId)
  359. if tmpErr != nil {
  360. err = fmt.Errorf("LLM对话失败,Err:" + tmpErr.Error())
  361. return
  362. }
  363. // 添加问答记录
  364. if len(addArticleChatRecordList) > 0 {
  365. recordObj := rag.WechatArticleChatRecord{}
  366. err = recordObj.CreateInBatches(addArticleChatRecordList)
  367. if err != nil {
  368. return
  369. }
  370. }
  371. if abstract != `` {
  372. if abstract == `sorry` || strings.Index(abstract, `根据已知信息无法回答该问题`) == 0 {
  373. item.AbstractStatus = 2
  374. item.ModifyTime = time.Now()
  375. err = item.Update([]string{"AbstractStatus", "ModifyTime"})
  376. return
  377. }
  378. item.AbstractStatus = 1
  379. item.ModifyTime = time.Now()
  380. err = item.Update([]string{"AbstractStatus", "ModifyTime"})
  381. abstractItem.Content = abstract
  382. abstractItem.Version = abstractObj.Version + 1
  383. abstractItem.ModifyTime = time.Now()
  384. err = abstractItem.Update([]string{"content", "version", "modify_time"})
  385. if err != nil {
  386. return
  387. }
  388. AbstractToKnowledge(item, abstractItem, true)
  389. }
  390. }
  391. // DelDoc
  392. // @Description: 删除摘要向量库
  393. // @author: Roc
  394. // @datetime 2025-03-12 16:55:05
  395. // @param wechatArticleAbstractList []*rag.WechatArticleAbstract
  396. // @return err error
  397. func DelDoc(wechatArticleAbstractList []*rag.WechatArticleAbstract) (err error) {
  398. defer func() {
  399. if err != nil {
  400. utils.FileLog.Error("删除摘要向量库文件失败,err:%v", err)
  401. fmt.Println("删除摘要向量库文件失败,err:", err)
  402. }
  403. }()
  404. vectorKeyList := make([]string, 0)
  405. wechatArticleAbstractIdList := make([]int, 0)
  406. for _, v := range wechatArticleAbstractList {
  407. vectorKeyList = append(vectorKeyList, v.VectorKey)
  408. wechatArticleAbstractIdList = append(wechatArticleAbstractIdList, v.WechatArticleAbstractId)
  409. }
  410. _, err = llm.DelDocsToKnowledge(models.BusinessConfMap[models.KnowledgeBaseName], vectorKeyList)
  411. if err != nil {
  412. err = fmt.Errorf("删除LLM摘要向量库文件失败,Err:" + err.Error())
  413. return
  414. }
  415. //fmt.Println(resp)
  416. obj := rag.WechatArticleAbstract{}
  417. err = obj.DelVectorKey(wechatArticleAbstractIdList)
  418. return
  419. }
  420. func getAnswerByContent(wechatArticleId int, docId string) (answer string, addArticleChatRecordList []*rag.WechatArticleChatRecord, err error) {
  421. historyList := make([]eta_llm_http.HistoryContent, 0)
  422. addArticleChatRecordList = make([]*rag.WechatArticleChatRecord, 0)
  423. questionObj := rag.Question{}
  424. questionList, err := questionObj.GetListByCondition(``, []interface{}{}, 0, 100)
  425. if err != nil {
  426. err = fmt.Errorf("获取问题列表失败,Err:" + err.Error())
  427. return
  428. }
  429. //你现在是一名资深的期货行业分析师,请基于以下的问题进行汇总总结,如果不能正常总结出来,那么就只需要回复我:sorry
  430. questionStrList := []string{`你现在是一名资深的期货行业分析师,请基于以下的问题进行汇总总结,如果不能正常总结出来,那么就只需要回复我:sorry。以下是问题:`}
  431. for _, v := range questionList {
  432. questionStrList = append(questionStrList, v.QuestionContent)
  433. }
  434. questionStr := strings.Join(questionStrList, "\n")
  435. originalAnswer, result, err := llm.ChatByFile(docId, questionStr, historyList)
  436. fmt.Println(result)
  437. if err != nil {
  438. err = fmt.Errorf("LLM对话失败,Err:" + err.Error())
  439. return
  440. }
  441. // 提取 </think> 后面的内容
  442. thinkEndIndex := strings.Index(result.Answer, "</think>")
  443. if thinkEndIndex != -1 {
  444. answer = strings.TrimSpace(result.Answer[thinkEndIndex+len("</think>"):])
  445. } else {
  446. answer = result.Answer
  447. }
  448. answer = strings.TrimSpace(answer)
  449. // 待入库的数据
  450. addArticleChatRecordList = append(addArticleChatRecordList, &rag.WechatArticleChatRecord{
  451. WechatArticleChatRecordId: 0,
  452. WechatArticleId: wechatArticleId,
  453. ChatUserType: "user",
  454. Content: questionStr,
  455. SendTime: time.Now(),
  456. CreatedTime: time.Now(),
  457. UpdateTime: time.Now(),
  458. }, &rag.WechatArticleChatRecord{
  459. WechatArticleChatRecordId: 0,
  460. WechatArticleId: wechatArticleId,
  461. ChatUserType: "assistant",
  462. Content: originalAnswer,
  463. SendTime: time.Now(),
  464. CreatedTime: time.Now(),
  465. UpdateTime: time.Now(),
  466. })
  467. return
  468. }
  469. // ArticleToKnowledge
  470. // @Description: 原文入向量库
  471. // @author: Roc
  472. // @datetime 2025-03-10 16:13:16
  473. // @param item *rag.WechatArticle
  474. func ArticleToKnowledge(item *rag.WechatArticle) {
  475. if item.TextContent == `` {
  476. return
  477. }
  478. var err error
  479. defer func() {
  480. if err != nil {
  481. utils.FileLog.Error("上传文章原文到知识库失败,err:%v", err)
  482. fmt.Println("上传文章原文到知识库失败,err:", err)
  483. }
  484. }()
  485. // 生成临时文件
  486. //dateDir := time.Now().Format("20060102")
  487. //uploadDir := utils.STATIC_DIR + "ai/article/" + dateDir
  488. uploadDir := utils.STATIC_DIR + "ai/article"
  489. err = os.MkdirAll(uploadDir, utils.DIR_MOD)
  490. if err != nil {
  491. err = fmt.Errorf("存储目录创建失败,Err:" + err.Error())
  492. return
  493. }
  494. fileName := utils.RemoveSpecialChars(item.Title) + `.md`
  495. tmpFilePath := uploadDir + "/" + fileName
  496. err = utils.SaveToFile(item.TextContent, tmpFilePath)
  497. if err != nil {
  498. err = fmt.Errorf("生成临时文件失败,Err:" + err.Error())
  499. return
  500. }
  501. defer func() {
  502. os.Remove(tmpFilePath)
  503. }()
  504. knowledgeArticleName := models.BusinessConfMap[models.KnowledgeArticleName]
  505. // 上传临时文件到LLM
  506. uploadFileResp, err := llm.UploadDocsToKnowledge(tmpFilePath, knowledgeArticleName)
  507. if err != nil {
  508. err = fmt.Errorf("上传文章原文到知识库失败,Err:" + err.Error())
  509. return
  510. }
  511. if len(uploadFileResp.FailedFiles) > 0 {
  512. for _, v := range uploadFileResp.FailedFiles {
  513. err = fmt.Errorf("上传文章原文到知识库失败,Err:" + v)
  514. }
  515. }
  516. item.VectorKey = tmpFilePath
  517. item.ModifyTime = time.Now()
  518. err = item.Update([]string{"vector_key", "modify_time"})
  519. }
  520. // AbstractToKnowledge
  521. // @Description: 摘要入向量库
  522. // @author: Roc
  523. // @datetime 2025-03-10 16:14:59
  524. // @param wechatArticleItem *rag.WechatArticle
  525. // @param item *rag.WechatArticleAbstract
  526. func AbstractToKnowledge(wechatArticleItem *rag.WechatArticle, item *rag.WechatArticleAbstract, isReUpload bool) {
  527. if item.Content == `` {
  528. return
  529. }
  530. // 已经生成了,那就不处理了
  531. if item.VectorKey != `` && !isReUpload {
  532. return
  533. }
  534. var err error
  535. defer func() {
  536. if err != nil {
  537. utils.FileLog.Error("摘要入向量库失败,err:%v", err)
  538. fmt.Println("摘要入向量库失败,err:", err)
  539. }
  540. }()
  541. // 生成临时文件
  542. //dateDir := time.Now().Format("20060102")
  543. //uploadDir := utils.STATIC_DIR + "ai/article/" + dateDir
  544. uploadDir := utils.STATIC_DIR + "ai/abstract"
  545. err = os.MkdirAll(uploadDir, utils.DIR_MOD)
  546. if err != nil {
  547. err = fmt.Errorf("存储目录创建失败,Err:" + err.Error())
  548. return
  549. }
  550. fileName := utils.RemoveSpecialChars(wechatArticleItem.Title) + `.md`
  551. tmpFilePath := uploadDir + "/" + fileName
  552. err = utils.SaveToFile(item.Content, tmpFilePath)
  553. if err != nil {
  554. err = fmt.Errorf("生成临时文件失败,Err:" + err.Error())
  555. return
  556. }
  557. defer func() {
  558. os.Remove(tmpFilePath)
  559. }()
  560. knowledgeArticleName := models.BusinessConfMap[models.KnowledgeBaseName]
  561. // 上传临时文件到LLM
  562. uploadFileResp, err := llm.UploadDocsToKnowledge(tmpFilePath, knowledgeArticleName)
  563. if err != nil {
  564. err = fmt.Errorf("上传文章原文到知识库失败,Err:" + err.Error())
  565. return
  566. }
  567. if len(uploadFileResp.FailedFiles) > 0 {
  568. for _, v := range uploadFileResp.FailedFiles {
  569. err = fmt.Errorf("上传文章原文到知识库失败,Err:" + v)
  570. }
  571. }
  572. item.VectorKey = tmpFilePath
  573. item.ModifyTime = time.Now()
  574. err = item.Update([]string{"vector_key", "modify_time"})
  575. }
  576. // replaceWechatPlatformPic
  577. // @Description: 替换公众号头像
  578. // @author: Roc
  579. // @datetime 2025-03-11 09:38:24
  580. // @param item *rag.WechatPlatform
  581. func replaceWechatPlatformPic(item *rag.WechatPlatform) {
  582. var err error
  583. defer func() {
  584. if err != nil {
  585. utils.FileLog.Error("替换公众号头像失败,err:%v", err)
  586. fmt.Println("替换公众号头像失败,err:", err)
  587. }
  588. }()
  589. if item.RoundHeadImg == `` {
  590. return
  591. }
  592. resourceUrl, err := downloadWxPicAndUploadToOss(item.RoundHeadImg, `head_img`)
  593. if err != nil {
  594. return
  595. }
  596. item.RoundHeadImg = resourceUrl
  597. err = item.Update([]string{"round_head_img"})
  598. }
  599. // replaceWechatArticleCoverPic
  600. // @Description: 替换文章封面图
  601. // @author: Roc
  602. // @datetime 2025-03-11 09:38:35
  603. // @param item *rag.WechatArticle
  604. func replaceWechatArticleCoverPic(item *rag.WechatArticle) {
  605. var err error
  606. defer func() {
  607. if err != nil {
  608. utils.FileLog.Error("替换公众号头像失败,err:%v", err)
  609. fmt.Println("替换公众号头像失败,err:", err)
  610. }
  611. }()
  612. if item.CoverUrl == `` {
  613. return
  614. }
  615. resourceUrl, err := downloadWxPicAndUploadToOss(item.CoverUrl, `cover_url`)
  616. if err != nil {
  617. return
  618. }
  619. item.CoverUrl = resourceUrl
  620. err = item.Update([]string{"cover_url"})
  621. }
  622. // replaceWechatArticlePic
  623. // @Description: 替换文章内容图
  624. // @author: Roc
  625. // @datetime 2025-03-11 09:38:35
  626. // @param item *rag.WechatArticle
  627. func ReplaceWechatArticlePic(item *rag.WechatArticle) {
  628. var err error
  629. defer func() {
  630. if err != nil {
  631. utils.FileLog.Error("替换公众号头像失败,err:%v", err)
  632. fmt.Println("替换公众号头像失败,err:", err)
  633. }
  634. }()
  635. if item.Content == `` {
  636. return
  637. }
  638. content, err := ReplaceHtmlImg(html.UnescapeString(item.Content))
  639. if err != nil {
  640. return
  641. }
  642. item.Content = html.EscapeString(content)
  643. err = item.Update([]string{"content"})
  644. return
  645. }
  646. // downloadWxPicAndUploadToOss
  647. // @Description: 下载微信图片并上传到OSS
  648. // @author: Roc
  649. // @datetime 2025-03-11 09:28:49
  650. // @param wxPicUrl string
  651. // @return resourceUrl string
  652. // @return err error
  653. func downloadWxPicAndUploadToOss(wxPicUrl, source string) (resourceUrl string, err error) {
  654. localFilePath, err := utils.DownloadWxImage(wxPicUrl)
  655. if err != nil {
  656. return
  657. }
  658. defer func() {
  659. os.Remove(localFilePath)
  660. }()
  661. ossClient := NewOssClient()
  662. if ossClient == nil {
  663. err = fmt.Errorf(`初始化OSS服务失败`)
  664. return
  665. }
  666. ext := path.Ext(localFilePath)
  667. fileName := fmt.Sprintf(`%s%s%s`, time.Now().Format(utils.FormatShortDateTimeUnSpace), utils.GetRandStringNoSpecialChar(16), ext)
  668. //savePath := utils.UploadDir + `wx/wx_article/` + time.Now().Format("200601/20060102/") + fileName
  669. savePath := fmt.Sprintf(`%swx/%s/%s%s`, utils.UploadDir, source, time.Now().Format("200601/20060102/"), fileName)
  670. resourceUrl, err = ossClient.UploadFile(fileName, localFilePath, savePath)
  671. if err != nil {
  672. err = fmt.Errorf("文件上传失败,Err:" + err.Error())
  673. return
  674. }
  675. return
  676. }
  677. // ReplaceHtmlImg
  678. // @Description: 将html中的图片替换成自己的
  679. // @author: Roc
  680. // @datetime 2025-03-11 14:32:00
  681. // @param htmlStr string
  682. // @return newHtml string
  683. // @return err error
  684. func ReplaceHtmlImg(htmlStr string) (newHtml string, err error) {
  685. doc, err := html2.Parse(strings.NewReader(htmlStr))
  686. if err != nil {
  687. return
  688. }
  689. if err != nil {
  690. return
  691. }
  692. handleNode(doc)
  693. // 将处理后的HTML节点重新渲染为HTML字符串
  694. var buf bytes.Buffer
  695. if err = html2.Render(&buf, doc); err != nil {
  696. fmt.Println(err)
  697. return
  698. }
  699. newHtml = buf.String()
  700. return
  701. }
  702. // handleNode
  703. // @Description: html节点处理
  704. // @author: Roc
  705. // @datetime 2025-03-11 14:32:45
  706. // @param n *html2.Node
  707. func handleNode(n *html2.Node) {
  708. if n.Type == html2.ElementNode {
  709. if n.Data == "img" {
  710. for k, attr := range n.Attr {
  711. // 新增代码:如果标签是img且存在data-src属性,则将data-src的值赋给src
  712. if n.Data == "img" && attr.Key == "src" {
  713. resourceUrl, tmpErr := downloadWxPicAndUploadToOss(attr.Val, `article`)
  714. if tmpErr != nil {
  715. continue
  716. }
  717. attr.Val = resourceUrl
  718. }
  719. n.Attr[k] = attr
  720. }
  721. }
  722. }
  723. for c := n.FirstChild; c != nil; c = c.NextSibling {
  724. handleNode(c)
  725. }
  726. }