12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148 |
- package services
- import (
- "bytes"
- "encoding/json"
- "eta/eta_api/cache"
- "eta/eta_api/models"
- "eta/eta_api/models/rag"
- "eta/eta_api/services/elastic"
- "eta/eta_api/services/llm"
- "eta/eta_api/services/llm/facade"
- "eta/eta_api/utils"
- "eta/eta_api/utils/llm/eta_llm/eta_llm_http"
- "fmt"
- html2 "golang.org/x/net/html"
- "html"
- "os"
- "path"
- "strconv"
- "strings"
- "time"
- )
- // AddWechatPlatform
- // @Description: 添加新的公众号
- // @param item
- func AddWechatPlatform(item *rag.WechatPlatform) {
- var err error
- defer func() {
- if err != nil {
- utils.FileLog.Error("公众号入库后查找最新记录失败,err:%v", err)
- }
- }()
- if item.FakeId != `` {
- return
- }
- if item.ArticleLink == `` {
- return
- }
- articleLink := item.ArticleLink
- articleDetail, err := llm.SearchByWechatArticle(item.ArticleLink)
- if err != nil {
- return
- }
- if articleDetail.Appuin == `` {
- err = fmt.Errorf("文章内未匹配到公众号唯一标识")
- return
- }
- wechatPlatform := new(rag.WechatPlatform)
- // 查找是否存在这个公众号id的
- wechatPlatformInfo, tmpErr := wechatPlatform.GetByFakeID(articleDetail.Appuin)
- if tmpErr != nil && !utils.IsErrNoRow(tmpErr) {
- err = tmpErr
- return
- }
- if tmpErr == nil {
- // 如果找到了,那么需要将当前的给移除掉
- err = item.Del()
- if err != nil {
- return
- }
- // 并将查出来的微信公众号摘出来的数据重新赋值
- item = wechatPlatformInfo
- } else if utils.IsErrNoRow(tmpErr) {
- // 如果没找到,那么就变更当前的信息
- item.FakeId = articleDetail.Appuin
- item.Nickname = articleDetail.Nickname
- //item.Alias = req.Alias
- item.RoundHeadImg = articleDetail.RoundHeadImg
- //item.ServiceType = req.ServiceType
- item.Signature = articleDetail.ProfileSignature
- //item.Verified = verified
- item.ModifyTime = time.Now()
- err = item.Update([]string{rag.WechatPlatformColumns.FakeID, rag.WechatPlatformColumns.Nickname, rag.WechatPlatformColumns.RoundHeadImg, rag.WechatPlatformColumns.Signature, rag.WechatPlatformColumns.ModifyTime})
- if err != nil {
- return
- }
- // 修改公众号头像
- go replaceWechatPlatformPic(item)
- }
- // 把刚搜索的文章加入到文章库中
- AddWechatArticle(item, articleLink, articleDetail, nil)
- BeachAddWechatArticle(item, 10)
- fmt.Println("公众号入库完成")
- return
- }
- // AddWechatArticle
- // @Description: 添加公众号文章入库
- // @author: Roc
- // @datetime 2025-03-05 13:24:14
- // @param item *rag.WechatPlatform
- // @param link string
- // @param articleDetail WechatArticleDataResp
- func AddWechatArticle(item *rag.WechatPlatform, articleLink string, articleDetail llm.WechatArticleDataResp, articleMenu *llm.ArticleMenu) {
- var err error
- defer func() {
- if err != nil {
- utils.FileLog.Error("公众号文章入库失败,文章链接:%s ,err:%v", articleLink, err)
- }
- }()
- obj := new(rag.WechatArticle)
- _, err = obj.GetByLink(articleLink)
- if err == nil {
- // 文章已经入库了,不需要重复入库
- return
- }
- // 如果不是 ErrNoRow 的时候,那么就是查询数据库出问题了,需要直接返回
- if !utils.IsErrNoRow(err) {
- return
- }
- // 这个时候,说明数据库中没有这个文章,那么需要文章入库
- err = nil
- var publishAt time.Time
- if articleDetail.CreateAt != `` {
- createAtInt, tmpErr := strconv.Atoi(articleDetail.CreateAt)
- if tmpErr == nil {
- publishAt = time.Unix(int64(createAtInt), 1000)
- }
- } else if articleMenu != nil {
- publishAt = time.Unix(int64(articleMenu.UpdateTime), 1000)
- }
- content := articleDetail.HtmlContent
- // 图片下载下来到本地,如果成功了,那么就用新的
- tmpContent, err := ReplaceHtmlImg(content)
- if tmpContent != `` {
- content = tmpContent
- }
- obj = &rag.WechatArticle{
- WechatArticleId: 0,
- WechatPlatformId: item.WechatPlatformId,
- FakeId: item.FakeId,
- Title: articleDetail.Title,
- Link: articleLink,
- CoverUrl: articleDetail.CoverUrl,
- Description: articleDetail.Desc,
- Content: html.EscapeString(content),
- TextContent: articleDetail.TextContent,
- Country: articleDetail.CountryName,
- Province: articleDetail.ProvinceName,
- City: articleDetail.CityName,
- //Abstract: "",
- //ArticleCreateTime: createAt,
- ModifyTime: time.Now(),
- CreateTime: time.Now(),
- }
- if !publishAt.IsZero() {
- obj.ArticleCreateTime = publishAt
- }
- if articleMenu != nil {
- obj.Title = articleMenu.Title
- //obj.Link = articleMenu.Link
- obj.CoverUrl = articleMenu.Cover
- obj.Description = articleMenu.Digest
- }
- err = obj.Create()
- // 修改文章封面图
- go replaceWechatArticleCoverPic(obj)
- // 文章入库成功后,需要将相关信息入摘要库
- go cache.AddWechatArticleLlmOpToCache(obj.WechatArticleId, ``)
- }
- // BeachAddWechatArticle
- // @Description: 批量添加公众号文章
- // @param item
- // @param num
- // @return err
- func BeachAddWechatArticle(item *rag.WechatPlatform, num int) {
- var err error
- defer func() {
- //fmt.Println("公众号文章批量入库完成")
- if err != nil {
- utils.FileLog.Error("公众号文章批量入库失败,err:%v", err)
- fmt.Println("公众号文章批量入库失败,err:", err)
- }
- }()
- if item.FakeId == `` {
- return
- }
- wechatArticleObj := new(rag.WechatArticle)
- // 获取公众号的文章列表
- articleListResp, err := llm.SearchByWechatArticleList(item.FakeId, num)
- if err != nil {
- return
- }
- for _, articleMenu := range articleListResp.List {
- // 判断文章是否已经入库,如果已经入库了,那么就过滤,不去重复查询微信了
- _, err = wechatArticleObj.GetByLink(articleMenu.Link)
- if err == nil {
- // 文章已经入库了,不需要重复入库
- continue
- }
- if !utils.IsErrNoRow(err) {
- return
- }
- err = nil
- articleDetail, tmpErr := llm.SearchByWechatArticle(articleMenu.Link)
- if tmpErr != nil {
- err = tmpErr
- return
- }
- // 把刚搜索的文章加入到指标库
- AddWechatArticle(item, articleMenu.Link, articleDetail, &articleMenu)
- //time.Sleep(10 * time.Second)
- // 随机休眠,至少大于10s
- sleepTimeInt := utils.GetRandInt(10, 20)
- if sleepTimeInt < 10 {
- sleepTimeInt = 10
- }
- time.Sleep(time.Duration(sleepTimeInt) * time.Second)
- }
- return
- }
- //
- //// GenerateArticleAbstract
- //// @Description: 文章摘要生成
- //// @author: Roc
- //// @datetime 2025-03-10 16:17:53
- //// @param item *rag.WechatArticle
- //func GenerateArticleAbstract(item *rag.WechatArticle, forceGenerate bool) {
- // var err error
- // defer func() {
- // if err != nil {
- // utils.FileLog.Error("文章转临时文件失败,err:%v", err)
- // fmt.Println("文章转临时文件失败,err:", err)
- // }
- // }()
- //
- // // 内容为空,那就不需要生成摘要
- // if item.TextContent == `` {
- // return
- // }
- //
- // abstractObj := rag.WechatArticleAbstract{}
- // tmpAbstractItem, err := abstractObj.GetByWechatArticleId(item.WechatArticleId)
- // // 如果找到了,同时不是强制生成,那么就直接处理到知识库中
- // if err == nil && !forceGenerate {
- // // 摘要已经生成,不需要重复生成,只需要重新加入到向量库中
- // WechatArticleAbstractToKnowledge(item, tmpAbstractItem, false)
- //
- // return
- // }
- // if !utils.IsErrNoRow(err) {
- // return
- // }
- //
- // //开始对话
- // abstract, addArticleChatRecordList, tmpErr := getAnswerByContent(item.WechatArticleId, utils.AI_ARTICLE_SOURCE_ETA_REPORT)
- // if tmpErr != nil {
- // err = fmt.Errorf("LLM对话失败,Err:" + tmpErr.Error())
- // return
- // }
- //
- // // 添加问答记录
- // if len(addArticleChatRecordList) > 0 {
- // recordObj := rag.WechatArticleChatRecord{}
- // err = recordObj.CreateInBatches(addArticleChatRecordList)
- // if err != nil {
- // return
- // }
- // }
- //
- // if abstract != `` {
- // if abstract == `sorry` || strings.Index(abstract, `根据已知信息无法回答该问题`) == 0 {
- // item.AbstractStatus = 2
- // item.ModifyTime = time.Now()
- // err = item.Update([]string{"AbstractStatus", "ModifyTime"})
- // return
- // }
- // item.AbstractStatus = 1
- // item.ModifyTime = time.Now()
- // err = item.Update([]string{"AbstractStatus", "ModifyTime"})
- //
- // abstractItem := &rag.WechatArticleAbstract{
- // WechatArticleAbstractId: 0,
- // WechatArticleId: item.WechatArticleId,
- // Content: abstract,
- // Version: 0,
- // VectorKey: "",
- // ModifyTime: time.Now(),
- // CreateTime: time.Now(),
- // }
- // err = abstractItem.Create()
- // if err != nil {
- // return
- // }
- //
- // // 数据入ES库
- // go AddOrEditEsWechatArticleAbstract(abstractItem.WechatArticleAbstractId)
- //
- // WechatArticleAbstractToKnowledge(item, abstractItem, false)
- // }
- //}
- // GenerateArticleAbstract
- // @Description: 文章摘要生成(默认提示词批量生成)
- // @author: Roc
- // @datetime 2025-03-10 16:17:53
- // @param item *rag.WechatArticle
- func GenerateWechatArticleAbstract(item *rag.WechatArticle, forceGenerate bool) {
- var err error
- defer func() {
- if err != nil {
- utils.FileLog.Error("文章转临时文件失败,err:%v", err)
- fmt.Println("文章转临时文件失败,err:", err)
- }
- }()
- // 内容为空,那就不需要生成摘要
- if item.TextContent == `` {
- return
- }
- questionObj := rag.Question{}
- questionList, err := questionObj.GetListByCondition(``, ` AND is_default = 1 `, []interface{}{}, 0, 100)
- if err != nil {
- err = fmt.Errorf("获取问题列表失败,Err:" + err.Error())
- return
- }
- // 没问题就不生成了
- if len(questionList) <= 0 {
- return
- }
- for _, question := range questionList {
- GenerateWechatArticleAbstractByQuestion(item, question, forceGenerate)
- }
- return
- }
- // GenerateArticleAbstractByQuestion
- // @Description: 文章摘要生成(根据提示词生成)
- // @author: Roc
- // @datetime 2025-03-10 16:17:53
- // @param item *rag.WechatArticle
- func GenerateWechatArticleAbstractByQuestion(item *rag.WechatArticle, question *rag.Question, forceGenerate bool) {
- var err error
- defer func() {
- if err != nil {
- utils.FileLog.Error("文章转临时文件失败,err:%v", err)
- fmt.Println("文章转临时文件失败,err:", err)
- }
- }()
- // 内容为空,那就不需要生成摘要
- if item.TextContent == `` {
- return
- }
- abstractObj := rag.WechatArticleAbstract{}
- abstractItem, err := abstractObj.GetByWechatArticleIdAndQuestionId(item.WechatArticleId, question.QuestionId)
- // 如果找到了,同时不是强制生成,那么就直接处理到知识库中
- if err == nil && !forceGenerate {
- // 摘要已经生成,不需要重复生成,只需要重新加入到向量库中
- WechatArticleAbstractToKnowledge(item, abstractItem, false)
- return
- }
- if !utils.IsErrNoRow(err) {
- return
- }
- //你现在是一名资深的期货行业分析师,请基于以下的问题进行汇总总结,如果不能正常总结出来,那么就只需要回复我:sorry
- questionStr := fmt.Sprintf(`%s\n%s`, `你现在是一名资深的期货行业分析师,请基于以下的问题进行汇总总结,如果不能正常总结出来,那么就只需要回复我:sorry。以下是问题:`, question.QuestionContent)
- //开始对话
- abstract, addArticleChatRecordList, tmpErr := getAnswerByContent(item.WechatArticleId, utils.AI_ARTICLE_SOURCE_ETA_REPORT, questionStr)
- if tmpErr != nil {
- err = fmt.Errorf("LLM对话失败,Err:" + tmpErr.Error())
- return
- }
- // 添加问答记录
- if len(addArticleChatRecordList) > 0 {
- recordObj := rag.WechatArticleChatRecord{}
- err = recordObj.CreateInBatches(addArticleChatRecordList)
- if err != nil {
- return
- }
- }
- if abstract != `` {
- if abstract == `sorry` || strings.Index(abstract, `根据已知信息无法回答该问题`) == 0 {
- item.AbstractStatus = 2
- item.ModifyTime = time.Now()
- err = item.Update([]string{"AbstractStatus", "ModifyTime"})
- return
- }
- item.AbstractStatus = 1
- item.ModifyTime = time.Now()
- err = item.Update([]string{"AbstractStatus", "ModifyTime"})
- if abstractItem == nil || abstractItem.WechatArticleAbstractId <= 0 {
- abstractItem = &rag.WechatArticleAbstract{
- WechatArticleAbstractId: 0,
- WechatArticleId: item.WechatArticleId,
- Content: abstract,
- Version: 1,
- VectorKey: "",
- ModifyTime: time.Now(),
- CreateTime: time.Now(),
- QuestionId: question.QuestionId,
- Tags: "",
- QuestionContent: question.QuestionContent,
- }
- err = abstractItem.Create()
- } else {
- abstractItem.Content = abstract
- abstractItem.Version++
- abstractItem.ModifyTime = time.Now()
- abstractItem.Tags = ""
- abstractItem.QuestionContent = question.QuestionContent
- err = abstractItem.Update([]string{"content", "version", "modify_time", "tags", "question_content"})
- }
- if err != nil {
- return
- }
- // 数据入ES库
- go AddOrEditEsWechatArticleAbstract(abstractItem.WechatArticleAbstractId)
- WechatArticleAbstractToKnowledge(item, abstractItem, false)
- }
- }
- // DelDoc
- // @Description: 删除摘要向量库
- // @author: Roc
- // @datetime 2025-03-12 16:55:05
- // @param wechatArticleAbstractList []*rag.WechatArticleAbstract
- // @return err error
- func DelDoc(wechatArticleAbstractList []*rag.WechatArticleAbstract) (err error) {
- defer func() {
- if err != nil {
- utils.FileLog.Error("删除摘要向量库文件失败,err:%v", err)
- fmt.Println("删除摘要向量库文件失败,err:", err)
- }
- }()
- vectorKeyList := make([]string, 0)
- wechatArticleAbstractIdList := make([]int, 0)
- for _, v := range wechatArticleAbstractList {
- if v.VectorKey == `` {
- continue
- }
- vectorKeyList = append(vectorKeyList, v.VectorKey)
- wechatArticleAbstractIdList = append(wechatArticleAbstractIdList, v.WechatArticleAbstractId)
- }
- // 没有就不删除
- if len(vectorKeyList) <= 0 {
- return
- }
- _, err = llm.DelDocsToKnowledge(models.BusinessConfMap[models.KnowledgeBaseName], vectorKeyList)
- if err != nil {
- err = fmt.Errorf("删除LLM摘要向量库文件失败,Err:" + err.Error())
- return
- }
- //fmt.Println(resp)
- obj := rag.WechatArticleAbstract{}
- err = obj.DelVectorKey(wechatArticleAbstractIdList)
- return
- }
- // DelLlmDoc
- // @Description: 删除摘要向量库
- // @author: Roc
- // @datetime 2025-03-12 16:55:05
- // @param wechatArticleAbstractList []*rag.WechatArticleAbstract
- // @return err error
- func DelLlmDoc(vectorKeyList []string, wechatArticleAbstractIdList []int) (err error) {
- defer func() {
- if err != nil {
- utils.FileLog.Error("删除摘要向量库文件失败,err:%v", err)
- fmt.Println("删除摘要向量库文件失败,err:", err)
- }
- }()
- // 没有就不删除
- if len(vectorKeyList) <= 0 {
- return
- }
- _, err = llm.DelDocsToKnowledge(models.BusinessConfMap[models.KnowledgeBaseName], vectorKeyList)
- if err != nil {
- err = fmt.Errorf("删除LLM摘要向量库文件失败,Err:" + err.Error())
- return
- }
- //fmt.Println(resp)
- obj := rag.WechatArticleAbstract{}
- err = obj.DelVectorKey(wechatArticleAbstractIdList)
- return
- }
- func getAnswerByContent(articleId int, source int, questionStr string) (answer string, addArticleChatRecordList []*rag.WechatArticleChatRecord, err error) {
- addArticleChatRecordList = make([]*rag.WechatArticleChatRecord, 0)
- result, err := facade.AIGCBaseOnPromote(facade.AIGC{
- Promote: questionStr,
- Source: source,
- ArticleId: articleId,
- LLMModel: `deepseek-r1:32b`,
- })
- if err != nil {
- return
- }
- // JSON字符串转字节
- answerByte, err := json.Marshal(result)
- if err != nil {
- return
- }
- originalAnswer := string(answerByte)
- // 提取 </think> 后面的内容
- thinkEndIndex := strings.Index(result.Answer, "</think>")
- if thinkEndIndex != -1 {
- answer = strings.TrimSpace(result.Answer[thinkEndIndex+len("</think>"):])
- } else {
- answer = result.Answer
- }
- answer = strings.TrimSpace(answer)
- // 待入库的数据
- addArticleChatRecordList = append(addArticleChatRecordList, &rag.WechatArticleChatRecord{
- WechatArticleChatRecordId: 0,
- WechatArticleId: articleId,
- ChatUserType: "user",
- Content: questionStr,
- SendTime: time.Now(),
- CreatedTime: time.Now(),
- UpdateTime: time.Now(),
- }, &rag.WechatArticleChatRecord{
- WechatArticleChatRecordId: 0,
- WechatArticleId: articleId,
- ChatUserType: "assistant",
- Content: originalAnswer,
- SendTime: time.Now(),
- CreatedTime: time.Now(),
- UpdateTime: time.Now(),
- })
- return
- }
- func getAnswerByContentBak(wechatArticleId int, docId string) (answer string, addArticleChatRecordList []*rag.WechatArticleChatRecord, err error) {
- historyList := make([]eta_llm_http.HistoryContent, 0)
- addArticleChatRecordList = make([]*rag.WechatArticleChatRecord, 0)
- questionObj := rag.Question{}
- questionList, err := questionObj.GetListByCondition(``, ``, []interface{}{}, 0, 100)
- if err != nil {
- err = fmt.Errorf("获取问题列表失败,Err:" + err.Error())
- return
- }
- // 没问题就不生成了
- if len(questionList) <= 0 {
- return
- }
- //你现在是一名资深的期货行业分析师,请基于以下的问题进行汇总总结,如果不能正常总结出来,那么就只需要回复我:sorry
- questionStrList := []string{`你现在是一名资深的期货行业分析师,请基于以下的问题进行汇总总结,如果不能正常总结出来,那么就只需要回复我:sorry。以下是问题:`}
- for _, v := range questionList {
- questionStrList = append(questionStrList, v.QuestionContent)
- }
- questionStr := strings.Join(questionStrList, "\n")
- originalAnswer, result, err := llm.ChatByFile(docId, questionStr, historyList)
- fmt.Println(result)
- if err != nil {
- err = fmt.Errorf("LLM对话失败,Err:" + err.Error())
- return
- }
- // 提取 </think> 后面的内容
- thinkEndIndex := strings.Index(result.Answer, "</think>")
- if thinkEndIndex != -1 {
- answer = strings.TrimSpace(result.Answer[thinkEndIndex+len("</think>"):])
- } else {
- answer = result.Answer
- }
- answer = strings.TrimSpace(answer)
- // 待入库的数据
- addArticleChatRecordList = append(addArticleChatRecordList, &rag.WechatArticleChatRecord{
- WechatArticleChatRecordId: 0,
- WechatArticleId: wechatArticleId,
- ChatUserType: "user",
- Content: questionStr,
- SendTime: time.Now(),
- CreatedTime: time.Now(),
- UpdateTime: time.Now(),
- }, &rag.WechatArticleChatRecord{
- WechatArticleChatRecordId: 0,
- WechatArticleId: wechatArticleId,
- ChatUserType: "assistant",
- Content: originalAnswer,
- SendTime: time.Now(),
- CreatedTime: time.Now(),
- UpdateTime: time.Now(),
- })
- return
- }
- // ArticleToKnowledge
- // @Description: 原文入向量库
- // @author: Roc
- // @datetime 2025-03-10 16:13:16
- // @param item *rag.WechatArticle
- func ArticleToKnowledge(item *rag.WechatArticle) {
- if item.TextContent == `` {
- return
- }
- var err error
- defer func() {
- if err != nil {
- utils.FileLog.Error("上传文章原文到知识库失败,err:%v", err)
- fmt.Println("上传文章原文到知识库失败,err:", err)
- }
- }()
- // 生成临时文件
- //dateDir := time.Now().Format("20060102")
- //uploadDir := "./static/ai/article/" + dateDir
- uploadDir := "./static/ai/article"
- err = os.MkdirAll(uploadDir, utils.DIR_MOD)
- if err != nil {
- err = fmt.Errorf("存储目录创建失败,Err:" + err.Error())
- return
- }
- //fileName := utils.RemoveSpecialChars(item.Title) + `.md`
- fileName := utils.MD5(item.Title) + `.md`
- tmpFilePath := uploadDir + "/" + fileName
- err = utils.SaveToFile(item.TextContent, tmpFilePath)
- if err != nil {
- err = fmt.Errorf("生成临时文件失败,Err:" + err.Error())
- return
- }
- defer func() {
- os.Remove(tmpFilePath)
- }()
- knowledgeArticleName := models.BusinessConfMap[models.KnowledgeArticleName]
- // 上传临时文件到LLM
- uploadFileResp, err := llm.UploadDocsToKnowledge(tmpFilePath, knowledgeArticleName)
- if err != nil {
- err = fmt.Errorf("上传文章原文到知识库失败,Err:" + err.Error())
- return
- }
- if len(uploadFileResp.FailedFiles) > 0 {
- for _, v := range uploadFileResp.FailedFiles {
- err = fmt.Errorf("上传文章原文到知识库失败,Err:" + v)
- }
- }
- item.VectorKey = tmpFilePath
- item.ModifyTime = time.Now()
- err = item.Update([]string{"vector_key", "modify_time"})
- }
- // WechatArticleAbstractToKnowledge
- // @Description: 摘要入向量库
- // @author: Roc
- // @datetime 2025-03-10 16:14:59
- // @param wechatArticleItem *rag.WechatArticle
- // @param abstractItem *rag.WechatArticleAbstract
- func WechatArticleAbstractToKnowledge(wechatArticleItem *rag.WechatArticle, abstractItem *rag.WechatArticleAbstract, isReUpload bool) {
- if abstractItem.Content == `` {
- return
- }
- // 已经生成了,那就不处理了
- if abstractItem.VectorKey != `` && !isReUpload {
- return
- }
- var err error
- defer func() {
- if err != nil {
- utils.FileLog.Error("摘要入向量库失败,err:%v", err)
- fmt.Println("摘要入向量库失败,err:", err)
- }
- // 数据入ES库
- go AddOrEditEsWechatArticleAbstract(abstractItem.WechatArticleAbstractId)
- }()
- // 生成临时文件
- //dateDir := time.Now().Format("20060102")
- //uploadDir := + "./static/ai/article/" + dateDir
- uploadDir := "./static/ai/abstract"
- err = os.MkdirAll(uploadDir, utils.DIR_MOD)
- if err != nil {
- err = fmt.Errorf("存储目录创建失败,Err:" + err.Error())
- return
- }
- fileName := utils.MD5(fmt.Sprintf("%d_%d", utils.AI_ARTICLE_SOURCE_WECHAT, wechatArticleItem.WechatArticleId)) + `.md`
- tmpFilePath := uploadDir + "/" + fileName
- err = utils.SaveToFile(abstractItem.Content, tmpFilePath)
- if err != nil {
- err = fmt.Errorf("生成临时文件失败,Err:" + err.Error())
- return
- }
- defer func() {
- os.Remove(tmpFilePath)
- }()
- knowledgeArticleName := models.BusinessConfMap[models.KnowledgeBaseName]
- // 上传临时文件到LLM
- uploadFileResp, err := llm.UploadDocsToKnowledge(tmpFilePath, knowledgeArticleName)
- if err != nil {
- err = fmt.Errorf("上传文章原文到知识库失败,Err:" + err.Error())
- return
- }
- if len(uploadFileResp.FailedFiles) > 0 {
- for _, v := range uploadFileResp.FailedFiles {
- err = fmt.Errorf("上传文章原文到知识库失败,Err:" + v)
- }
- }
- abstractItem.VectorKey = tmpFilePath
- abstractItem.ModifyTime = time.Now()
- err = abstractItem.Update([]string{"vector_key", "modify_time"})
- }
- // replaceWechatPlatformPic
- // @Description: 替换公众号头像
- // @author: Roc
- // @datetime 2025-03-11 09:38:24
- // @param item *rag.WechatPlatform
- func replaceWechatPlatformPic(item *rag.WechatPlatform) {
- var err error
- defer func() {
- if err != nil {
- utils.FileLog.Error("替换公众号头像失败,err:%v", err)
- fmt.Println("替换公众号头像失败,err:", err)
- }
- }()
- if item.RoundHeadImg == `` {
- return
- }
- resourceUrl, err := downloadWxPicAndUploadToOss(item.RoundHeadImg, `head_img`)
- if err != nil {
- return
- }
- item.RoundHeadImg = resourceUrl
- err = item.Update([]string{"round_head_img"})
- }
- // replaceWechatArticleCoverPic
- // @Description: 替换文章封面图
- // @author: Roc
- // @datetime 2025-03-11 09:38:35
- // @param item *rag.WechatArticle
- func replaceWechatArticleCoverPic(item *rag.WechatArticle) {
- var err error
- defer func() {
- if err != nil {
- utils.FileLog.Error("替换公众号头像失败,err:%v", err)
- fmt.Println("替换公众号头像失败,err:", err)
- }
- // 数据入ES库
- AddOrEditEsWechatArticle(item.WechatArticleId)
- }()
- if item.CoverUrl == `` {
- return
- }
- resourceUrl, err := downloadWxPicAndUploadToOss(item.CoverUrl, `cover_url`)
- if err != nil {
- return
- }
- item.CoverUrl = resourceUrl
- err = item.Update([]string{"cover_url"})
- }
- // replaceWechatArticlePic
- // @Description: 替换文章内容图
- // @author: Roc
- // @datetime 2025-03-11 09:38:35
- // @param item *rag.WechatArticle
- func ReplaceWechatArticlePic(item *rag.WechatArticle) {
- var err error
- defer func() {
- if err != nil {
- utils.FileLog.Error("替换公众号头像失败,err:%v", err)
- fmt.Println("替换公众号头像失败,err:", err)
- }
- }()
- if item.Content == `` {
- return
- }
- content, err := ReplaceHtmlImg(html.UnescapeString(item.Content))
- if err != nil {
- return
- }
- item.Content = html.EscapeString(content)
- err = item.Update([]string{"content"})
- return
- }
- // downloadWxPicAndUploadToOss
- // @Description: 下载微信图片并上传到OSS
- // @author: Roc
- // @datetime 2025-03-11 09:28:49
- // @param wxPicUrl string
- // @return resourceUrl string
- // @return err error
- func downloadWxPicAndUploadToOss(wxPicUrl, source string) (resourceUrl string, err error) {
- localFilePath, err := utils.DownloadWxImage(wxPicUrl)
- if err != nil {
- return
- }
- defer func() {
- os.Remove(localFilePath)
- }()
- ossClient := NewOssClient()
- if ossClient == nil {
- err = fmt.Errorf(`初始化OSS服务失败`)
- return
- }
- ext := path.Ext(localFilePath)
- fileName := fmt.Sprintf(`%s%s%s`, time.Now().Format(utils.FormatShortDateTimeUnSpace), utils.GetRandStringNoSpecialChar(16), ext)
- //savePath := utils.UploadDir + `wx/wx_article/` + time.Now().Format("200601/20060102/") + fileName
- savePath := fmt.Sprintf(`%swx/%s/%s%s`, utils.UploadDir, source, time.Now().Format("200601/20060102/"), fileName)
- resourceUrl, err = ossClient.UploadFile(fileName, localFilePath, savePath)
- if err != nil {
- err = fmt.Errorf("文件上传失败,Err:" + err.Error())
- return
- }
- return
- }
- // ReplaceHtmlImg
- // @Description: 将html中的图片替换成自己的
- // @author: Roc
- // @datetime 2025-03-11 14:32:00
- // @param htmlStr string
- // @return newHtml string
- // @return err error
- func ReplaceHtmlImg(htmlStr string) (newHtml string, err error) {
- doc, err := html2.Parse(strings.NewReader(htmlStr))
- if err != nil {
- return
- }
- if err != nil {
- return
- }
- handleNode(doc)
- // 将处理后的HTML节点重新渲染为HTML字符串
- var buf bytes.Buffer
- if err = html2.Render(&buf, doc); err != nil {
- fmt.Println(err)
- return
- }
- newHtml = buf.String()
- return
- }
- // handleNode
- // @Description: html节点处理
- // @author: Roc
- // @datetime 2025-03-11 14:32:45
- // @param n *html2.Node
- func handleNode(n *html2.Node) {
- if n.Type == html2.ElementNode {
- if n.Data == "img" {
- for k, attr := range n.Attr {
- // 新增代码:如果标签是img且存在data-src属性,则将data-src的值赋给src
- if n.Data == "img" && attr.Key == "src" {
- resourceUrl, tmpErr := downloadWxPicAndUploadToOss(attr.Val, `article`)
- if tmpErr != nil {
- continue
- }
- attr.Val = resourceUrl
- }
- n.Attr[k] = attr
- }
- }
- }
- for c := n.FirstChild; c != nil; c = c.NextSibling {
- handleNode(c)
- }
- }
- // AddOrEditEsWechatPlatformId
- // @Description: 批量处理某个公众号下的文章到ES
- // @author: Roc
- // @datetime 2025-03-13 11:01:28
- // @param articleId int
- func AddOrEditEsWechatPlatformId(wechatPlatformId int) {
- if utils.EsWechatArticleName == `` {
- return
- }
- obj := rag.WechatArticle{}
- list, _ := obj.GetListByCondition(` wechat_article_id `, ` AND wechat_platform_id = ? `, []interface{}{wechatPlatformId}, 0, 1000000)
- for _, item := range list {
- AddOrEditEsWechatArticle(item.WechatArticleId)
- }
- }
- // AddOrEditEsWechatArticle
- // @Description: 新增/编辑微信文章入ES
- // @author: Roc
- // @datetime 2025-03-13 11:01:28
- // @param articleId int
- func AddOrEditEsWechatArticle(articleId int) {
- if utils.EsWechatArticleName == `` {
- return
- }
- var err error
- defer func() {
- if err != nil {
- utils.FileLog.Error("添加公众号微信信息到ES失败,err:%v", err)
- fmt.Println("添加公众号微信信息到ES失败,err:", err)
- }
- }()
- obj := rag.WechatArticle{}
- articleInfo, err := obj.GetById(articleId)
- if err != nil {
- err = fmt.Errorf("获取公众号文章信息失败,Err:" + err.Error())
- return
- }
- platformObj := rag.WechatPlatform{}
- platformInfo, err := platformObj.GetById(articleInfo.WechatPlatformId)
- if err != nil {
- err = fmt.Errorf("获取公众号平台信息失败,Err:" + err.Error())
- return
- }
- esItem := elastic.WechatArticleAndPlatform{
- WechatArticleId: articleInfo.WechatArticleId,
- WechatPlatformId: articleInfo.WechatPlatformId,
- FakeId: articleInfo.FakeId,
- Title: articleInfo.Title,
- Link: articleInfo.Link,
- CoverUrl: articleInfo.CoverUrl,
- Description: articleInfo.Description,
- //Content: articleInfo.Content,
- //TextContent: articleInfo.TextContent,
- //AbstractStatus: articleInfo.AbstractStatus,
- Country: articleInfo.Country,
- Province: articleInfo.Province,
- City: articleInfo.City,
- ArticleCreateTime: articleInfo.ArticleCreateTime,
- IsDeleted: articleInfo.IsDeleted,
- ModifyTime: articleInfo.ModifyTime,
- CreateTime: articleInfo.CreateTime,
- Nickname: platformInfo.Nickname,
- Alias: platformInfo.Alias,
- RoundHeadImg: platformInfo.RoundHeadImg,
- }
- err = elastic.WechatArticleEsAddOrEdit(strconv.Itoa(articleInfo.WechatArticleId), esItem)
- }
- // AddOrEditEsWechatArticleAbstract
- // @Description: 新增/编辑微信文章摘要入ES
- // @author: Roc
- // @datetime 2025-03-13 14:13:47
- // @param articleAbstractId int
- func AddOrEditEsWechatArticleAbstract(articleAbstractId int) {
- if utils.EsWechatArticleAbstractName == `` {
- return
- }
- var err error
- defer func() {
- if err != nil {
- utils.FileLog.Error("添加公众号微信信息到ES失败,err:%v", err)
- fmt.Println("添加公众号微信信息到ES失败,err:", err)
- }
- }()
- obj := rag.WechatArticleAbstract{}
- abstractInfo, err := obj.GetById(articleAbstractId)
- if err != nil {
- err = fmt.Errorf("获取公众号文章信息失败,Err:" + err.Error())
- return
- }
- articleObj := rag.WechatArticle{}
- articleInfo, err := articleObj.GetById(abstractInfo.WechatArticleId)
- if err != nil {
- err = fmt.Errorf("获取公众号文章信息失败,Err:" + err.Error())
- return
- }
- // 标签ID
- tagIdList := make([]int, 0)
- if abstractInfo.Tags != `` {
- tagIdStrList := strings.Split(abstractInfo.Tags, ",")
- for _, tagIdStr := range tagIdStrList {
- tagId, tmpErr := strconv.Atoi(tagIdStr)
- if tmpErr != nil {
- err = fmt.Errorf("报告标签ID转int失败,Err:" + tmpErr.Error())
- return
- }
- tagIdList = append(tagIdList, tagId)
- }
- }
- esItem := elastic.WechatArticleAbstractItem{
- WechatArticleAbstractId: abstractInfo.WechatArticleAbstractId,
- WechatArticleId: abstractInfo.WechatArticleId,
- WechatPlatformId: articleInfo.WechatPlatformId,
- Abstract: abstractInfo.Content,
- QuestionId: abstractInfo.QuestionId,
- Version: abstractInfo.Version,
- VectorKey: abstractInfo.VectorKey,
- ModifyTime: articleInfo.ModifyTime,
- CreateTime: articleInfo.CreateTime,
- Title: articleInfo.Title,
- Link: articleInfo.Link,
- TagIdList: tagIdList,
- }
- err = elastic.WechatArticleAbstractEsAddOrEdit(strconv.Itoa(articleAbstractId), esItem)
- }
- // DelEsWechatArticleAbstract
- // @Description: 删除ES中的微信文章摘要
- // @author: Roc
- // @datetime 2025-03-13 14:13:47
- // @param articleAbstractId int
- func DelEsWechatArticleAbstract(articleAbstractId int) {
- if utils.EsWechatArticleAbstractName == `` {
- return
- }
- var err error
- defer func() {
- if err != nil {
- utils.FileLog.Error("添加公众号微信信息到ES失败,err:%v", err)
- fmt.Println("添加公众号微信信息到ES失败,err:", err)
- }
- }()
- err = elastic.WechatArticleAbstractEsDel(strconv.Itoa(articleAbstractId))
- }
- // AddOrEditEsRagQuestion
- // @Description: 新增/编辑知识库问题入ES
- // @author: Roc
- // @datetime 2025-03-28 11:25:50
- // @param questionId int
- func AddOrEditEsRagQuestion(questionId int) {
- if utils.EsWechatArticleName == `` {
- return
- }
- var err error
- defer func() {
- if err != nil {
- utils.FileLog.Error("添加公众号微信信息到ES失败,err:%v", err)
- fmt.Println("添加公众号微信信息到ES失败,err:", err)
- }
- }()
- obj := rag.Question{}
- questionInfo, err := obj.GetByID(questionId)
- if err != nil {
- err = fmt.Errorf("获取公众号文章信息失败,Err:" + err.Error())
- return
- }
- esItem := elastic.RagQuestionItem{
- QuestionId: questionInfo.QuestionId,
- QuestionTitle: questionInfo.QuestionTitle,
- QuestionContent: questionInfo.QuestionContent,
- Sort: questionInfo.Sort,
- SysUserId: questionInfo.SysUserId,
- SysUserRealName: questionInfo.SysUserRealName,
- ModifyTime: questionInfo.ModifyTime,
- CreateTime: questionInfo.CreateTime,
- }
- err = elastic.RagQuestionEsAddOrEdit(strconv.Itoa(questionInfo.QuestionId), esItem)
- }
- // DelEsRagQuestion
- // @Description: 删除ES中的知识库问题
- // @author: Roc
- // @datetime 2025-03-28 11:26:40
- // @param questionId int
- func DelEsRagQuestion(questionId int) {
- if utils.EsWechatArticleAbstractName == `` {
- return
- }
- var err error
- defer func() {
- if err != nil {
- utils.FileLog.Error("添加公众号微信信息到ES失败,err:%v", err)
- fmt.Println("添加公众号微信信息到ES失败,err:", err)
- }
- }()
- err = elastic.RagQuestionEsDel(strconv.Itoa(questionId))
- }
|