package services import ( "encoding/json" "errors" "eta/eta_api/cache" "eta/eta_api/models" "eta/eta_api/models/rag" "eta/eta_api/services/elastic" "eta/eta_api/services/llm" "eta/eta_api/utils" "fmt" "golang.org/x/net/html" "golang.org/x/net/html/atom" "os" "regexp" "strconv" "strings" "time" ) // ReportAddOrModifyKnowledge // @Description: ETA报告加入/修改到知识库 // @author: Roc // @datetime 2025-04-07 14:41:45 // @param reportId int // @param reportChapterId int func ReportAddOrModifyKnowledge(reportId, reportChapterId int) { if reportId <= 0 { return } var err error defer func() { if err != nil { //fmt.Println("ReportAddOrModifyKnowledge error:", err) utils.FileLog.Error("ReportAddOrModifyKnowledge error:", err) } }() var title, author, htmlContent string var publishTime time.Time if reportChapterId > 0 { chapterInfo, tmpErr := models.GetReportChapterInfoById(reportChapterId) if tmpErr != nil { return } title = chapterInfo.Title author = chapterInfo.Author publishTime = chapterInfo.PublishTime htmlContent = chapterInfo.Content } else { reportInfo, tmpErr := models.GetReportByReportId(reportId) if tmpErr != nil { return } title = reportInfo.Title author = reportInfo.Author publishTime = reportInfo.PublishTime htmlContent = reportInfo.Content } err = handleReportAddOrModifyKnowledge(reportId, reportChapterId, title, author, htmlContent, publishTime) return } // ReportAddOrModifyKnowledgeByReportId // @Description: ETA报告加入/修改到知识库(只传id的情况) // @author: Roc // @datetime 2025-04-07 15:41:15 // @param reportId int func ReportAddOrModifyKnowledgeByReportId(reportId int) { if reportId <= 0 { return } errList := make([]string, 0) defer func() { if len(errList) > 0 { utils.FileLog.Error("ReportAddOrModifyKnowledge error,报告ID:%d:%s", reportId, strings.Join(errList, "\n")) } }() reportInfo, err := models.GetReportByReportId(reportId) if err != nil { errList = append(errList, err.Error()) return } // 如果是单篇报告,那么直接处理 if reportInfo.HasChapter == 0 { err = handleReportAddOrModifyKnowledge(reportId, 0, reportInfo.Title, reportInfo.Author, reportInfo.Content, reportInfo.PublishTime) if err != nil { errList = append(errList, err.Error()) } return } // 章节类型的报告,需要查询出来后再处理 chapterInfoList, err := models.GetPublishedChapterListByReportId(reportId) if err != nil { errList = append(errList, err.Error()) return } for _, v := range chapterInfoList { err = handleReportAddOrModifyKnowledge(reportId, v.ReportChapterId, v.Title, reportInfo.Author, v.Content, v.PublishTime) if err != nil { errList = append(errList, fmt.Sprintf("第%d章:%s,异常:\n%s", v.ReportChapterId, v.Title, err.Error())) continue } } return } // handleReportAddOrModifyKnowledge // @Description: 处理ETA报告加入/修改到知识库 // @author: Roc // @datetime 2025-04-07 15:33:38 // @param reportId int // @param reportChapterId int // @param title string // @param author string // @param htmlContent string // @param publishTime time.Time // @return err error func handleReportAddOrModifyKnowledge(reportId, reportChapterId int, title, author, htmlContent string, publishTime time.Time) (err error) { htmlContent = html.UnescapeString(htmlContent) doc, err := html.Parse(strings.NewReader(htmlContent)) if err != nil { return } // 只获取文本内容 content := &strings.Builder{} getArticleContent(content, doc) textContent := content.String() textContent = regexp.MustCompile(`\n+`).ReplaceAllString(textContent, "\n") textContent = strings.Trim(textContent, "\n") publishTimeStr := `未知` if !publishTime.IsZero() { title = fmt.Sprintf("%s(%s)", title, publishTime.Format(utils.FormatMonthDayUnSpace)) publishTimeStr = publishTime.Format(utils.FormatDateTime) } textContent = fmt.Sprintf("标题:%s\n发布时间:%s\n%s", title, publishTimeStr, textContent) obj := rag.RagEtaReport{} item, err := obj.GetByReportAndChapterId(reportId, reportChapterId) if err != nil && !utils.IsErrNoRow(err) { // 查询异常,且不是没找到数据的报错 return } if err == nil { // 标记删除了的话,那就不处理了 if item.IsDeleted == 1 { return } item.Title = title item.Author = author item.TextContent = textContent item.IsPublished = 1 //item.PublishTime = publishTime item.ModifyTime = time.Now() //err = item.Update([]string{"title", "author", "text_content", "is_published", "publish_time", "modify_time"}) err = item.Update([]string{"title", "author", "text_content", "is_published", "modify_time"}) } else { // 无数据的时候,需要新增 err = nil item = &rag.RagEtaReport{ RagEtaReportId: 0, ReportId: reportId, ReportChapterId: reportChapterId, Title: title, Author: author, TextContent: textContent, VectorKey: "", IsPublished: 1, IsDeleted: 0, PublishTime: publishTime, ModifyTime: time.Now(), CreateTime: time.Now(), } err = item.Create() } cache.AddRagEtaReportLlmOpToCache(item.RagEtaReportId, 0, true) return } // ReportUnPublishedKnowledge // @Description: 知识库取消发布 // @author: Roc // @datetime 2025-04-07 14:58:25 // @param reportId int // @param reportChapterId int func ReportUnPublishedKnowledge(reportId, reportChapterId int) { if reportId <= 0 && reportChapterId <= 0 { return } var err error defer func() { if err != nil { //fmt.Println("ReportAddOrModifyKnowledge error:", err) utils.FileLog.Error("ReportAddOrModifyKnowledge error:", err) } }() obj := rag.RagEtaReport{} item, err := obj.GetByReportAndChapterId(reportId, reportChapterId) if err != nil && !utils.IsErrNoRow(err) { // 查询异常,且不是没找到数据的报错 return } if item.RagEtaReportId > 0 { item.IsPublished = 0 item.ModifyTime = time.Now() err = item.Update([]string{"is_published", "modify_time"}) } return } // ReportUnPublishedKnowledgeByReportId // @Description: ETA报告取消发布同步到知识库(只传报告id的情况) // @author: Roc // @datetime 2025-04-07 15:41:15 // @param reportId int func ReportUnPublishedKnowledgeByReportId(reportId int) { errList := make([]string, 0) defer func() { if len(errList) > 0 { utils.FileLog.Error("ReportUnPublishedKnowledgeByReportId error,报告ID:%d:%s", reportId, strings.Join(errList, "\n")) } }() obj := rag.RagEtaReport{} list, err := obj.GetListByCondition(``, ` AND report_id = ? `, []interface{}{reportId}, 0, 1000) if err != nil && !utils.IsErrNoRow(err) { // 查询异常,且不是没找到数据的报错 return } for _, item := range list { item.IsPublished = 0 item.ModifyTime = time.Now() err = item.Update([]string{"is_published", "modify_time"}) if err != nil { errList = append(errList, fmt.Sprintf("第%d章:%s,异常:\n%s", item.ReportChapterId, item.Title, err.Error())) continue } // 删除摘要 err = DelRagEtaReportAbstract([]int{item.RagEtaReportId}) } return } func getArticleContent(content *strings.Builder, htmlContentNode *html.Node) { if htmlContentNode.Type == html.TextNode { cleanData := strings.TrimSpace(htmlContentNode.Data) if cleanData != `` && cleanData != "

" { content.WriteString(cleanData) } } else if htmlContentNode.Type == html.ElementNode { switch htmlContentNode.DataAtom { case atom.Ul: content.WriteString("\n") case atom.Br: // 遇到
标签时添加换行符 content.WriteString("\n") case atom.P: content.WriteString("\n") } } for c := htmlContentNode.FirstChild; c != nil; c = c.NextSibling { getArticleContent(content, c) } } // GenerateRagEtaReportAbstract // @Description: 文章摘要生成(默认提示词批量生成) // @author: Roc // @datetime 2025-04-24 11:24:53 // @param item *rag.RagEtaReport // @param forceGenerate bool func GenerateRagEtaReportAbstract(item *rag.RagEtaReport, forceGenerate bool) { var err error defer func() { if err != nil { utils.FileLog.Error("文章摘要生成(默认提示词批量生成)失败,err:%v", err) } }() // 内容为空,那就不需要生成摘要 if item.TextContent == `` { return } questionObj := rag.Question{} questionList, err := questionObj.GetListByCondition(``, ` AND is_default = 1 `, []interface{}{}, 0, 100) if err != nil { err = fmt.Errorf("获取问题列表失败,Err:" + err.Error()) return } // 没问题就不生成了 if len(questionList) <= 0 { return } for _, question := range questionList { GenerateRagEtaReportAbstractByQuestion(item, question, forceGenerate) } return } // GenerateRagEtaReportAbstractByQuestion // @Description: ETA报告摘要生成(根据提示词生成) // @author: Roc // @datetime 2025-04-24 11:23:49 // @param item *rag.RagEtaReport // @param question *rag.Question // @param forceGenerate bool // @return err error func GenerateRagEtaReportAbstractByQuestion(item *rag.RagEtaReport, question *rag.Question, forceGenerate bool) (err error) { defer func() { if err != nil { utils.FileLog.Error("文章摘要生成(根据提示词生成)失败,err:%v", err) } }() // 内容为空,那就不需要生成摘要 if item.TextContent == `` { return } abstractObj := rag.RagEtaReportAbstract{} abstractItem, err := abstractObj.GetByRagEtaReportIdAndQuestionId(item.RagEtaReportId, question.QuestionId) // 如果找到了,同时不是强制生成,那么就直接处理到知识库中 if err == nil && !forceGenerate { // 摘要已经生成,不需要重复生成,只需要重新加入到向量库中 ReportAbstractToKnowledge(item, abstractItem, false) return } // 如果是没找到数据,那么就将报错置空 if err != nil && utils.IsErrNoRow(err) { err = nil } //你现在是一名资深的期货行业分析师,请基于以下的问题进行汇总总结,如果不能正常总结出来,那么就只需要回复我:sorry questionStr := fmt.Sprintf(`%s\n%s`, `你现在是一名资深的期货行业分析师,请基于以下的问题进行汇总总结,如果不能正常总结出来,那么就只需要回复我:sorry。以下是问题:`, question.QuestionContent) //开始对话 abstract, industryTags, tmpErr := getAnswerByContent(item.RagEtaReportId, utils.AI_ARTICLE_SOURCE_ETA_REPORT, questionStr) if tmpErr != nil { err = fmt.Errorf("LLM对话失败,Err:" + tmpErr.Error()) return } // 添加问答记录 //if len(addArticleChatRecordList) > 0 { // recordObj := rag.RagEtaReportChatRecord{} // err = recordObj.CreateInBatches(addArticleChatRecordList) // if err != nil { // return // } //} if abstract == `` { return } if abstract == `sorry` || strings.Index(abstract, `根据已知信息无法回答该问题`) == 0 { return } //if abstract == `sorry` || strings.Index(abstract, `根据已知信息无法回答该问题`) == 0 { // item.AbstractStatus = 2 // item.ModifyTime = time.Now() // err = item.Update([]string{"AbstractStatus", "ModifyTime"}) // return //} //item.AbstractStatus = 1 //item.ModifyTime = time.Now() //err = item.Update([]string{"AbstractStatus", "ModifyTime"}) var tagIdJsonStr string var tagNameJsonStr string // 标签ID { tagIdList := make([]int, 0) tagNameList := make([]string, 0) tagIdMap := make(map[int]bool) if abstractItem != nil && abstractItem.Tags != `` { tmpErr = json.Unmarshal([]byte(abstractItem.Tags), &tagIdList) if tmpErr != nil { utils.FileLog.Info(fmt.Sprintf("json.Unmarshal 失败,标签数据:%s,Err:%s", abstractItem.Tags, tmpErr.Error())) } else { for _, tagId := range tagIdList { tagIdMap[tagId] = true } } if abstractItem.TagsName != `` { tagNameList = strings.Split(abstractItem.TagsName, ",") } } for _, tagName := range industryTags { tagId, tmpErr := GetTagIdByName(tagName) if tmpErr != nil { utils.FileLog.Info(fmt.Sprintf("获取标签ID失败,标签名称:%s,Err:%s", tagName, tmpErr.Error())) } if _, ok := tagIdMap[tagId]; !ok { tagIdList = append(tagIdList, tagId) tagNameList = append(tagNameList, tagName) tagIdMap[tagId] = true } } //for _, tagName := range varietyTags { // tagId, tmpErr := GetTagIdByName(tagName) // if tmpErr != nil { // utils.FileLog.Info(fmt.Sprintf("获取标签ID失败,标签名称:%s,Err:%s", tagName, tmpErr.Error())) // } // if _, ok := tagIdMap[tagId]; !ok { // tagIdList = append(tagIdList, tagId) // tagIdMap[tagId] = true // } //} tagIdJsonByte, err := json.Marshal(tagIdList) if err != nil { utils.FileLog.Info(fmt.Sprintf("标签ID序列化失败,Err:%s", tmpErr.Error())) } else { tagIdJsonStr = string(tagIdJsonByte) } tagNameJsonStr = strings.Join(tagNameList, `,`) } if abstractItem == nil || abstractItem.RagEtaReportAbstractId <= 0 { abstractItem = &rag.RagEtaReportAbstract{ RagEtaReportAbstractId: 0, RagEtaReportId: item.RagEtaReportId, Content: abstract, QuestionId: question.QuestionId, QuestionContent: question.QuestionContent, Version: 1, Tags: tagIdJsonStr, TagsName: tagNameJsonStr, VectorKey: "", ModifyTime: time.Now(), CreateTime: time.Now(), } err = abstractItem.Create() } else { // 添加历史记录 rag.AddArticleAbstractHistoryByRagEtaReportAbstract(abstractItem) abstractItem.Content = abstract abstractItem.Version++ abstractItem.ModifyTime = time.Now() abstractItem.Tags = tagIdJsonStr abstractItem.TagsName = tagNameJsonStr abstractItem.QuestionContent = question.QuestionContent err = abstractItem.Update([]string{"content", "version", "modify_time", "tags", "tags_name", "question_content"}) } if err != nil { return } // 数据入ES库 go AddOrEditEsRagEtaReportAbstract(abstractItem.RagEtaReportAbstractId) ReportAbstractToKnowledge(item, abstractItem, false) return } // AddOrEditEsRagEtaReportAbstract // @Description: 新增/编辑微信文章摘要入ES // @author: Roc // @datetime 2025-03-13 14:13:47 // @param articleAbstractId int func AddOrEditEsRagEtaReportAbstract(ragEtaReportAbstractId int) { if utils.EsRagEtaReportAbstractName == `` { return } var err error defer func() { if err != nil { utils.FileLog.Error("添加ETA报告微信信息到ES失败,err:%v", err) fmt.Println("添加ETA报告微信信息到ES失败,err:", err) } }() obj := rag.RagEtaReportAbstract{} abstractInfo, err := obj.GetById(ragEtaReportAbstractId) if err != nil { err = fmt.Errorf("获取ETA报告文章信息失败,Err:" + err.Error()) return } ragEtaReportObj := rag.RagEtaReport{} articleInfo, err := ragEtaReportObj.GetById(abstractInfo.RagEtaReportAbstractId) if err != nil { err = fmt.Errorf("获取ETA报告文章信息失败,Err:" + err.Error()) return } tagIdList := make([]int, 0) if abstractInfo.Tags != `` { err = json.Unmarshal([]byte(abstractInfo.Tags), &tagIdList) if err != nil { err = fmt.Errorf("报告标签ID转int失败,Err:" + err.Error()) utils.FileLog.Info(fmt.Sprintf("json.Unmarshal 报告标签ID转int失败,标签数据:%s,Err:%s", abstractInfo.Tags, err.Error())) } } tagNameList := make([]string, 0) if abstractInfo.TagsName != `` { tagNameList = strings.Split(abstractInfo.TagsName, ",") } esItem := elastic.RagEtaReportAbstractItem{ RagEtaReportAbstractId: abstractInfo.RagEtaReportAbstractId, RagEtaReportId: abstractInfo.RagEtaReportId, Abstract: abstractInfo.Content, QuestionId: abstractInfo.QuestionId, Version: abstractInfo.Version, VectorKey: abstractInfo.VectorKey, ModifyTime: abstractInfo.ModifyTime, CreateTime: abstractInfo.CreateTime, Title: articleInfo.Title, TagIdList: tagIdList, TagNameList: tagNameList, } err = elastic.RagEtaReportAbstractEsAddOrEdit(strconv.Itoa(abstractInfo.RagEtaReportAbstractId), esItem) } // DelEsRagEtaReportAbstract // @Description: 删除ES中的ETA报告 // @author: Roc // @datetime 2025-04-21 11:08:09 // @param articleAbstractId int func DelEsRagEtaReportAbstract(articleAbstractId int) { if utils.EsRagEtaReportAbstractName == `` { return } var err error defer func() { if err != nil { utils.FileLog.Error("删除ES中的ETA报告失败,err:%v", err) fmt.Println("删除ES中的ETA报告失败,err:", err) } }() err = elastic.RagEtaReportAbstractEsDel(strconv.Itoa(articleAbstractId)) } // WechatArticleAbstractToKnowledge // @Description: 摘要入向量库 // @author: Roc // @datetime 2025-03-10 16:14:59 // @param wechatArticleItem *rag.RagEtaReport // @param abstractItem *rag.RagEtaReportAbstract func ReportAbstractToKnowledge(ragEtaReport *rag.RagEtaReport, abstractItem *rag.RagEtaReportAbstract, isReUpload bool) { if abstractItem.Content == `` { return } // 已经生成了,那就不处理了 if abstractItem.VectorKey != `` && !isReUpload { return } var err error defer func() { if err != nil { utils.FileLog.Error("摘要入向量库失败,err:%v", err) fmt.Println("摘要入向量库失败,err:", err) } // 数据入ES库 go AddOrEditEsRagEtaReportAbstract(abstractItem.RagEtaReportAbstractId) }() // 生成临时文件 //dateDir := time.Now().Format("20060102") //uploadDir := + "./static/ai/article/" + dateDir uploadDir := "./static/ai/abstract" err = os.MkdirAll(uploadDir, utils.DIR_MOD) if err != nil { err = fmt.Errorf("存储目录创建失败,Err:" + err.Error()) return } fileName := utils.MD5(fmt.Sprintf("%d_%d", utils.AI_ARTICLE_SOURCE_ETA_REPORT, ragEtaReport.RagEtaReportId)) + `.md` tmpFilePath := uploadDir + "/" + fileName err = utils.SaveToFile(abstractItem.Content, tmpFilePath) if err != nil { err = fmt.Errorf("生成临时文件失败,Err:" + err.Error()) return } defer func() { os.Remove(tmpFilePath) }() knowledgeArticleName := models.BusinessConfMap[models.PrivateKnowledgeBaseName] // 上传临时文件到LLM uploadFileResp, err := llm.UploadDocsToKnowledge(tmpFilePath, knowledgeArticleName) if err != nil { err = fmt.Errorf("上传文章原文到知识库失败,Err:" + err.Error()) return } if len(uploadFileResp.FailedFiles) > 0 { for _, v := range uploadFileResp.FailedFiles { err = fmt.Errorf("上传文章原文到知识库失败,Err:" + v) } } abstractItem.VectorKey = tmpFilePath abstractItem.ModifyTime = time.Now() err = abstractItem.Update([]string{"vector_key", "modify_time"}) } // DelRagReportLlmDoc // @Description: 删除ETA报告的摘要向量库 // @author: Roc // @datetime 2025-04-23 13:24:51 // @param vectorKeyList []string // @param abstractIdList []int // @return err error func DelRagReportLlmDoc(vectorKeyList []string, abstractIdList []int) (err error) { defer func() { if err != nil { utils.FileLog.Error("删除摘要向量库文件失败,err:%v", err) fmt.Println("删除摘要向量库文件失败,err:", err) } }() // 没有就不删除 if len(vectorKeyList) <= 0 { return } _, err = llm.DelDocsToKnowledge(models.BusinessConfMap[models.PrivateKnowledgeBaseName], vectorKeyList) obj := rag.RagEtaReportAbstract{} err = obj.DelVectorKey(abstractIdList) return } // DelRagEtaReportAbstract // @Description: 删除ETA报告摘要 // @author: Roc // @datetime 2025-04-23 17:36:22 // @param abstractIdList []int // @return err error func DelRagEtaReportAbstract(abstractIdList []int) (err error) { obj := rag.RagEtaReportAbstract{} list, err := obj.GetByIdList(abstractIdList) if err != nil { if !utils.IsErrNoRow(err) { err = errors.New("删除向量库失败,Err:" + err.Error()) } else { err = nil } return } err = delRagEtaReportAbstract(list) return } // DelRagEtaReportAbstractByQuestionId // @Description: 根据提示词ID删除ETA报告摘要 // @author: Roc // @datetime 2025-04-23 17:36:22 // @param abstractIdList []int // @return err error func DelRagEtaReportAbstractByQuestionId(questionId int) (err error) { obj := rag.RagEtaReportAbstract{} list, err := obj.GetListByQuestionId(questionId) if err != nil { if !utils.IsErrNoRow(err) { err = errors.New("删除向量库失败,Err:" + err.Error()) } else { err = nil } return } err = delRagEtaReportAbstract(list) return } // delRagEtaReportAbstract // @Description: 删除摘要 // @author: Roc // @datetime 2025-04-24 15:19:19 // @param list []*rag.RagEtaReportAbstract // @return err error func delRagEtaReportAbstract(list []*rag.RagEtaReportAbstract) (err error) { obj := rag.RagEtaReportAbstract{} vectorKeyList := make([]string, 0) newAbstractIdList := make([]int, 0) if len(list) > 0 { for _, v := range list { // 有加入到向量库,那么就加入到待删除的向量库list中 if v.VectorKey != `` { vectorKeyList = append(vectorKeyList, v.VectorKey) } newAbstractIdList = append(newAbstractIdList, v.RagEtaReportAbstractId) } } // 删除向量库 err = DelRagReportLlmDoc(vectorKeyList, newAbstractIdList) if err != nil { err = errors.New("删除向量库失败,Err:" + err.Error()) return } // 删除摘要 err = obj.DelByIdList(newAbstractIdList) if err != nil { err = errors.New("删除失败,Err:" + err.Error()) return } // 删除es数据 for _, wechatArticleAbstractId := range newAbstractIdList { go DelEsRagEtaReportAbstract(wechatArticleAbstractId) } return } // GetDelAbstractByQuestionIdCacheKey // @Description: 获取删除微信文章/ETA报告摘要的缓存key // @author: Roc // @datetime 2025-04-24 15:44:41 // @param questionId int // @return string func GetDelAbstractByQuestionIdCacheKey(questionId int) string { return fmt.Sprintf("%s%d", utils.CACHE_AI_ARTICLE_ABSTRACT_DEL, questionId) } // DelAbstractByQuestionId // @Description: 根据提示词ID删除微信文章/报告摘要 // @author: Roc // @datetime 2025-04-24 15:37:28 // @param questionId int func DelAbstractByQuestionId(questionId int) { cacheKey := GetDelAbstractByQuestionIdCacheKey(questionId) if !utils.Rc.SetNX(cacheKey, 1, 30*time.Minute) { utils.FileLog.Error("根据提示词删除摘要失败,提示词ID:%d,系统处理中,请稍后重试!", questionId) return } defer func() { utils.Rc.Delete(cacheKey) }() // 删除微信文章摘要 err := DelWechatArticleAbstractByQuestionId(questionId) if err != nil { utils.FileLog.Error("根据提示词摘要删除微信文章摘要失败,提示词ID:%d;原因:%s", questionId, err.Error()) } // 删除ETA报告摘要 err = DelRagEtaReportAbstractByQuestionId(questionId) if err != nil { utils.FileLog.Error("根据提示词删除ETA报告摘要失败,提示词ID:%d;原因:%s", questionId, err.Error()) } return }