123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598 |
- package services
- import (
- "encoding/json"
- "eta/eta_api/models"
- "eta/eta_api/models/rag"
- "eta/eta_api/services/elastic"
- "eta/eta_api/services/llm"
- "eta/eta_api/utils"
- "fmt"
- "golang.org/x/net/html"
- "golang.org/x/net/html/atom"
- "os"
- "regexp"
- "strconv"
- "strings"
- "time"
- )
- // ReportAddOrModifyKnowledge
- // @Description: ETA报告加入/修改到知识库
- // @author: Roc
- // @datetime 2025-04-07 14:41:45
- // @param reportId int
- // @param reportChapterId int
- func ReportAddOrModifyKnowledge(reportId, reportChapterId int) {
- if reportId <= 0 {
- return
- }
- var err error
- defer func() {
- if err != nil {
- //fmt.Println("ReportAddOrModifyKnowledge error:", err)
- utils.FileLog.Error("ReportAddOrModifyKnowledge error:", err)
- }
- }()
- var title, author, htmlContent string
- var publishTime time.Time
- if reportChapterId > 0 {
- chapterInfo, tmpErr := models.GetReportChapterInfoById(reportChapterId)
- if tmpErr != nil {
- return
- }
- title = chapterInfo.Title
- author = chapterInfo.Author
- publishTime = chapterInfo.PublishTime
- htmlContent = chapterInfo.Content
- } else {
- reportInfo, tmpErr := models.GetReportByReportId(reportId)
- if tmpErr != nil {
- return
- }
- title = reportInfo.Title
- author = reportInfo.Author
- publishTime = reportInfo.PublishTime
- htmlContent = reportInfo.Content
- }
- err = handleReportAddOrModifyKnowledge(reportId, reportChapterId, title, author, htmlContent, publishTime)
- return
- }
- // ReportAddOrModifyKnowledgeByReportId
- // @Description: ETA报告加入/修改到知识库(只传id的情况)
- // @author: Roc
- // @datetime 2025-04-07 15:41:15
- // @param reportId int
- func ReportAddOrModifyKnowledgeByReportId(reportId int) {
- if reportId <= 0 {
- return
- }
- errList := make([]string, 0)
- defer func() {
- if len(errList) > 0 {
- utils.FileLog.Error("ReportAddOrModifyKnowledge error,报告ID:%d:%s", reportId, strings.Join(errList, "\n"))
- }
- }()
- reportInfo, err := models.GetReportByReportId(reportId)
- if err != nil {
- errList = append(errList, err.Error())
- return
- }
- // 如果是单篇报告,那么直接处理
- if reportInfo.HasChapter == 0 {
- err = handleReportAddOrModifyKnowledge(reportId, 0, reportInfo.Title, reportInfo.Author, reportInfo.Content, reportInfo.PublishTime)
- if err != nil {
- errList = append(errList, err.Error())
- }
- return
- }
- // 章节类型的报告,需要查询出来后再处理
- chapterInfoList, err := models.GetPublishedChapterListByReportId(reportId)
- if err != nil {
- errList = append(errList, err.Error())
- return
- }
- for _, v := range chapterInfoList {
- err = handleReportAddOrModifyKnowledge(reportId, v.ReportChapterId, v.Title, reportInfo.Author, v.Content, v.PublishTime)
- if err != nil {
- errList = append(errList, fmt.Sprintf("第%d章:%s,异常:\n%s", v.ReportChapterId, v.Title, err.Error()))
- continue
- }
- }
- return
- }
- // handleReportAddOrModifyKnowledge
- // @Description: 处理ETA报告加入/修改到知识库
- // @author: Roc
- // @datetime 2025-04-07 15:33:38
- // @param reportId int
- // @param reportChapterId int
- // @param title string
- // @param author string
- // @param htmlContent string
- // @param publishTime time.Time
- // @return err error
- func handleReportAddOrModifyKnowledge(reportId, reportChapterId int, title, author, htmlContent string, publishTime time.Time) (err error) {
- htmlContent = html.UnescapeString(htmlContent)
- doc, err := html.Parse(strings.NewReader(htmlContent))
- if err != nil {
- return
- }
- // 只获取文本内容
- content := &strings.Builder{}
- getArticleContent(content, doc)
- textContent := content.String()
- textContent = regexp.MustCompile(`\n+`).ReplaceAllString(textContent, "\n")
- textContent = strings.Trim(textContent, "\n")
- publishTimeStr := `未知`
- if !publishTime.IsZero() {
- title = fmt.Sprintf("%s(%s)", title, publishTime.Format(utils.FormatMonthDayUnSpace))
- publishTimeStr = publishTime.Format(utils.FormatDateTime)
- }
- textContent = fmt.Sprintf("标题:%s\n发布时间:%s\n%s", title, publishTimeStr, textContent)
- obj := rag.RagEtaReport{}
- item, err := obj.GetByReportAndChapterId(reportId, reportChapterId)
- if err != nil && !utils.IsErrNoRow(err) {
- // 查询异常,且不是没找到数据的报错
- return
- }
- if err == nil {
- // 标记删除了的话,那就不处理了
- if item.IsDeleted == 1 {
- return
- }
- item.Title = title
- item.Author = author
- item.TextContent = textContent
- item.IsPublished = 1
- //item.PublishTime = publishTime
- item.ModifyTime = time.Now()
- //err = item.Update([]string{"title", "author", "text_content", "is_published", "publish_time", "modify_time"})
- err = item.Update([]string{"title", "author", "text_content", "is_published", "modify_time"})
- } else {
- // 无数据的时候,需要新增
- err = nil
- item = &rag.RagEtaReport{
- RagEtaReportId: 0,
- ReportId: reportId,
- ReportChapterId: reportChapterId,
- Title: title,
- Author: author,
- TextContent: textContent,
- VectorKey: "",
- IsPublished: 1,
- IsDeleted: 0,
- PublishTime: publishTime,
- ModifyTime: time.Now(),
- CreateTime: time.Now(),
- }
- err = item.Create()
- }
- return
- }
- // ReportUnPublishedKnowledge
- // @Description: 知识库取消发布
- // @author: Roc
- // @datetime 2025-04-07 14:58:25
- // @param reportId int
- // @param reportChapterId int
- func ReportUnPublishedKnowledge(reportId, reportChapterId int) {
- if reportId <= 0 && reportChapterId <= 0 {
- return
- }
- var err error
- defer func() {
- if err != nil {
- //fmt.Println("ReportAddOrModifyKnowledge error:", err)
- utils.FileLog.Error("ReportAddOrModifyKnowledge error:", err)
- }
- }()
- obj := rag.RagEtaReport{}
- item, err := obj.GetByReportAndChapterId(reportId, reportChapterId)
- if err != nil && !utils.IsErrNoRow(err) {
- // 查询异常,且不是没找到数据的报错
- return
- }
- if item.RagEtaReportId > 0 {
- item.IsPublished = 0
- item.ModifyTime = time.Now()
- err = item.Update([]string{"is_published", "modify_time"})
- }
- return
- }
- // ReportUnPublishedKnowledgeByReportId
- // @Description: ETA报告取消发布同步到知识库(只传报告id的情况)
- // @author: Roc
- // @datetime 2025-04-07 15:41:15
- // @param reportId int
- func ReportUnPublishedKnowledgeByReportId(reportId int) {
- errList := make([]string, 0)
- defer func() {
- if len(errList) > 0 {
- utils.FileLog.Error("ReportUnPublishedKnowledgeByReportId error,报告ID:%d:%s", reportId, strings.Join(errList, "\n"))
- }
- }()
- obj := rag.RagEtaReport{}
- list, err := obj.GetListByCondition(``, ` AND report_id = ? `, []interface{}{reportId}, 0, 1000)
- if err != nil && !utils.IsErrNoRow(err) {
- // 查询异常,且不是没找到数据的报错
- return
- }
- for _, item := range list {
- item.IsPublished = 0
- item.ModifyTime = time.Now()
- err = item.Update([]string{"is_published", "modify_time"})
- if err != nil {
- errList = append(errList, fmt.Sprintf("第%d章:%s,异常:\n%s", item.ReportChapterId, item.Title, err.Error()))
- continue
- }
- }
- return
- }
- func getArticleContent(content *strings.Builder, htmlContentNode *html.Node) {
- if htmlContentNode.Type == html.TextNode {
- cleanData := strings.TrimSpace(htmlContentNode.Data)
- if cleanData != `` && cleanData != "</p>" {
- content.WriteString(cleanData)
- }
- } else if htmlContentNode.Type == html.ElementNode {
- switch htmlContentNode.DataAtom {
- case atom.Ul:
- content.WriteString("\n")
- case atom.Br:
- // 遇到 <br> 标签时添加换行符
- content.WriteString("\n")
- case atom.P:
- content.WriteString("\n")
- }
- }
- for c := htmlContentNode.FirstChild; c != nil; c = c.NextSibling {
- getArticleContent(content, c)
- }
- }
- // GenerateArticleAbstract
- // @Description: 文章摘要生成(默认提示词批量生成)
- // @author: Roc
- // @datetime 2025-03-10 16:17:53
- // @param item *rag.RagEtaReport
- func GenerateRagEtaReportAbstract(item *rag.RagEtaReport, forceGenerate bool) {
- var err error
- defer func() {
- if err != nil {
- utils.FileLog.Error("文章转临时文件失败,err:%v", err)
- fmt.Println("文章转临时文件失败,err:", err)
- }
- }()
- // 内容为空,那就不需要生成摘要
- if item.TextContent == `` {
- return
- }
- questionObj := rag.Question{}
- questionList, err := questionObj.GetListByCondition(``, ` AND is_default = 1 `, []interface{}{}, 0, 100)
- if err != nil {
- err = fmt.Errorf("获取问题列表失败,Err:" + err.Error())
- return
- }
- // 没问题就不生成了
- if len(questionList) <= 0 {
- return
- }
- for _, question := range questionList {
- GenerateRagEtaReportAbstractByQuestion(item, question, forceGenerate)
- }
- return
- }
- // GenerateRagEtaReportAbstractByQuestion
- // @Description: 文章摘要生成(根据提示词生成)
- // @author: Roc
- // @datetime 2025-03-10 16:17:53
- // @param item *rag.RagEtaReport
- func GenerateRagEtaReportAbstractByQuestion(item *rag.RagEtaReport, question *rag.Question, forceGenerate bool) {
- var err error
- defer func() {
- if err != nil {
- utils.FileLog.Error("文章转临时文件失败,err:%v", err)
- fmt.Println("文章转临时文件失败,err:", err)
- }
- }()
- // 内容为空,那就不需要生成摘要
- if item.TextContent == `` {
- return
- }
- abstractObj := rag.RagEtaReportAbstract{}
- abstractItem, err := abstractObj.GetByRagEtaReportIdAndQuestionId(item.RagEtaReportId, question.QuestionId)
- // 如果找到了,同时不是强制生成,那么就直接处理到知识库中
- if err == nil && !forceGenerate {
- // 摘要已经生成,不需要重复生成,只需要重新加入到向量库中
- ReportAbstractToKnowledge(item, abstractItem, false)
- return
- }
- //你现在是一名资深的期货行业分析师,请基于以下的问题进行汇总总结,如果不能正常总结出来,那么就只需要回复我:sorry
- questionStr := fmt.Sprintf(`%s\n%s`, `你现在是一名资深的期货行业分析师,请基于以下的问题进行汇总总结,如果不能正常总结出来,那么就只需要回复我:sorry。以下是问题:`, question.QuestionContent)
- //开始对话
- abstract, industryTags, _, tmpErr := getAnswerByContent(item.RagEtaReportId, utils.AI_ARTICLE_SOURCE_ETA_REPORT, questionStr)
- if tmpErr != nil {
- err = fmt.Errorf("LLM对话失败,Err:" + tmpErr.Error())
- return
- }
- // 添加问答记录
- //if len(addArticleChatRecordList) > 0 {
- // recordObj := rag.RagEtaReportChatRecord{}
- // err = recordObj.CreateInBatches(addArticleChatRecordList)
- // if err != nil {
- // return
- // }
- //}
- if abstract == `` {
- return
- }
- //if abstract == `sorry` || strings.Index(abstract, `根据已知信息无法回答该问题`) == 0 {
- // item.AbstractStatus = 2
- // item.ModifyTime = time.Now()
- // err = item.Update([]string{"AbstractStatus", "ModifyTime"})
- // return
- //}
- //item.AbstractStatus = 1
- //item.ModifyTime = time.Now()
- //err = item.Update([]string{"AbstractStatus", "ModifyTime"})
- var tagIdJsonStr string
- // 标签ID
- {
- tagIdList := make([]int, 0)
- tagIdMap := make(map[int]bool)
- if abstractItem != nil && abstractItem.Tags != `` {
- tmpErr = json.Unmarshal([]byte(abstractItem.Tags), &tagIdList)
- if tmpErr != nil {
- utils.FileLog.Info(fmt.Sprintf("json.Unmarshal 失败,标签数据:%s,Err:%s", abstractItem.Tags, tmpErr.Error()))
- } else {
- for _, tagId := range tagIdList {
- tagIdMap[tagId] = true
- }
- }
- }
- for _, tagName := range industryTags {
- tagId, tmpErr := GetTagIdByName(tagName)
- if tmpErr != nil {
- utils.FileLog.Info(fmt.Sprintf("获取标签ID失败,标签名称:%s,Err:%s", tagName, tmpErr.Error()))
- }
- if _, ok := tagIdMap[tagId]; !ok {
- tagIdList = append(tagIdList, tagId)
- tagIdMap[tagId] = true
- }
- }
- //for _, tagName := range varietyTags {
- // tagId, tmpErr := GetTagIdByName(tagName)
- // if tmpErr != nil {
- // utils.FileLog.Info(fmt.Sprintf("获取标签ID失败,标签名称:%s,Err:%s", tagName, tmpErr.Error()))
- // }
- // if _, ok := tagIdMap[tagId]; !ok {
- // tagIdList = append(tagIdList, tagId)
- // tagIdMap[tagId] = true
- // }
- //}
- tagIdJsonByte, err := json.Marshal(tagIdList)
- if err != nil {
- utils.FileLog.Info(fmt.Sprintf("标签ID序列化失败,Err:%s", tmpErr.Error()))
- } else {
- tagIdJsonStr = string(tagIdJsonByte)
- }
- }
- if abstractItem == nil || abstractItem.RagEtaReportAbstractId <= 0 {
- abstractItem = &rag.RagEtaReportAbstract{
- RagEtaReportAbstractId: 0,
- RagEtaReportId: item.RagEtaReportId,
- Content: abstract,
- QuestionId: question.QuestionId,
- QuestionContent: question.QuestionContent,
- Version: 1,
- Tags: tagIdJsonStr,
- VectorKey: "",
- ModifyTime: time.Now(),
- CreateTime: time.Now(),
- }
- err = abstractItem.Create()
- } else {
- // 添加历史记录
- rag.AddArticleAbstractHistoryByRagEtaReportAbstract(abstractItem)
- abstractItem.Content = abstract
- abstractItem.Version++
- abstractItem.ModifyTime = time.Now()
- abstractItem.Tags = ""
- abstractItem.QuestionContent = question.QuestionContent
- err = abstractItem.Update([]string{"content", "version", "modify_time", "tags", "question_content"})
- }
- if err != nil {
- return
- }
- // 数据入ES库
- go AddOrEditEsRagEtaReportAbstract(abstractItem.RagEtaReportAbstractId)
- ReportAbstractToKnowledge(item, abstractItem, false)
- }
- // AddOrEditEsRagEtaReportAbstract
- // @Description: 新增/编辑微信文章摘要入ES
- // @author: Roc
- // @datetime 2025-03-13 14:13:47
- // @param articleAbstractId int
- func AddOrEditEsRagEtaReportAbstract(ragEtaReportAbstractId int) {
- if utils.EsRagEtaReportAbstractName == `` {
- return
- }
- var err error
- defer func() {
- if err != nil {
- utils.FileLog.Error("添加ETA报告微信信息到ES失败,err:%v", err)
- fmt.Println("添加ETA报告微信信息到ES失败,err:", err)
- }
- }()
- obj := rag.RagEtaReportAbstract{}
- abstractInfo, err := obj.GetById(ragEtaReportAbstractId)
- if err != nil {
- err = fmt.Errorf("获取ETA报告文章信息失败,Err:" + err.Error())
- return
- }
- ragEtaReportObj := rag.RagEtaReport{}
- articleInfo, err := ragEtaReportObj.GetById(abstractInfo.RagEtaReportAbstractId)
- if err != nil {
- err = fmt.Errorf("获取ETA报告文章信息失败,Err:" + err.Error())
- return
- }
- tagIdList := make([]int, 0)
- if abstractInfo.Tags != `` {
- err = json.Unmarshal([]byte(abstractInfo.Tags), &tagIdList)
- if err != nil {
- err = fmt.Errorf("报告标签ID转int失败,Err:" + err.Error())
- utils.FileLog.Info(fmt.Sprintf("json.Unmarshal 报告标签ID转int失败,标签数据:%s,Err:%s", abstractInfo.Tags, err.Error()))
- }
- }
- esItem := elastic.RagEtaReportAbstractItem{
- RagEtaReportAbstractId: abstractInfo.RagEtaReportAbstractId,
- RagEtaReportId: abstractInfo.RagEtaReportId,
- Abstract: abstractInfo.Content,
- QuestionId: abstractInfo.QuestionId,
- Version: abstractInfo.Version,
- VectorKey: abstractInfo.VectorKey,
- ModifyTime: abstractInfo.ModifyTime,
- CreateTime: abstractInfo.CreateTime,
- Title: articleInfo.Title,
- TagIdList: tagIdList,
- }
- err = elastic.RagEtaReportAbstractEsAddOrEdit(strconv.Itoa(abstractInfo.RagEtaReportAbstractId), esItem)
- }
- // DelEsRagEtaReportAbstract
- // @Description: 删除ES中的ETA报告
- // @author: Roc
- // @datetime 2025-04-21 11:08:09
- // @param articleAbstractId int
- func DelEsRagEtaReportAbstract(articleAbstractId int) {
- if utils.EsRagEtaReportAbstractName == `` {
- return
- }
- var err error
- defer func() {
- if err != nil {
- utils.FileLog.Error("删除ES中的ETA报告失败,err:%v", err)
- fmt.Println("删除ES中的ETA报告失败,err:", err)
- }
- }()
- err = elastic.RagEtaReportAbstractEsDel(strconv.Itoa(articleAbstractId))
- }
- // WechatArticleAbstractToKnowledge
- // @Description: 摘要入向量库
- // @author: Roc
- // @datetime 2025-03-10 16:14:59
- // @param wechatArticleItem *rag.RagEtaReport
- // @param abstractItem *rag.RagEtaReportAbstract
- func ReportAbstractToKnowledge(ragEtaReport *rag.RagEtaReport, abstractItem *rag.RagEtaReportAbstract, isReUpload bool) {
- if abstractItem.Content == `` {
- return
- }
- // 已经生成了,那就不处理了
- if abstractItem.VectorKey != `` && !isReUpload {
- return
- }
- var err error
- defer func() {
- if err != nil {
- utils.FileLog.Error("摘要入向量库失败,err:%v", err)
- fmt.Println("摘要入向量库失败,err:", err)
- }
- // 数据入ES库
- go AddOrEditEsRagEtaReportAbstract(abstractItem.RagEtaReportAbstractId)
- }()
- // 生成临时文件
- //dateDir := time.Now().Format("20060102")
- //uploadDir := + "./static/ai/article/" + dateDir
- uploadDir := "./static/ai/abstract"
- err = os.MkdirAll(uploadDir, utils.DIR_MOD)
- if err != nil {
- err = fmt.Errorf("存储目录创建失败,Err:" + err.Error())
- return
- }
- fileName := utils.MD5(fmt.Sprintf("%d_%d", utils.AI_ARTICLE_SOURCE_ETA_REPORT, ragEtaReport.RagEtaReportId)) + `.md`
- tmpFilePath := uploadDir + "/" + fileName
- err = utils.SaveToFile(abstractItem.Content, tmpFilePath)
- if err != nil {
- err = fmt.Errorf("生成临时文件失败,Err:" + err.Error())
- return
- }
- defer func() {
- os.Remove(tmpFilePath)
- }()
- knowledgeArticleName := models.BusinessConfMap[models.KnowledgeBaseName]
- // 上传临时文件到LLM
- uploadFileResp, err := llm.UploadDocsToKnowledge(tmpFilePath, knowledgeArticleName)
- if err != nil {
- err = fmt.Errorf("上传文章原文到知识库失败,Err:" + err.Error())
- return
- }
- if len(uploadFileResp.FailedFiles) > 0 {
- for _, v := range uploadFileResp.FailedFiles {
- err = fmt.Errorf("上传文章原文到知识库失败,Err:" + v)
- }
- }
- abstractItem.VectorKey = tmpFilePath
- abstractItem.ModifyTime = time.Now()
- err = abstractItem.Update([]string{"vector_key", "modify_time"})
- }
|