package services import ( "eta/eta_api/models" "eta/eta_api/models/rag" "eta/eta_api/services/elastic" "eta/eta_api/services/llm" "eta/eta_api/utils" "fmt" "golang.org/x/net/html" "golang.org/x/net/html/atom" "os" "regexp" "strconv" "strings" "time" ) // ReportAddOrModifyKnowledge // @Description: ETA报告加入/修改到知识库 // @author: Roc // @datetime 2025-04-07 14:41:45 // @param reportId int // @param reportChapterId int func ReportAddOrModifyKnowledge(reportId, reportChapterId int) { if reportId <= 0 { return } var err error defer func() { if err != nil { //fmt.Println("ReportAddOrModifyKnowledge error:", err) utils.FileLog.Error("ReportAddOrModifyKnowledge error:", err) } }() var title, author, htmlContent string var publishTime time.Time if reportChapterId > 0 { chapterInfo, tmpErr := models.GetReportChapterInfoById(reportChapterId) if tmpErr != nil { return } title = chapterInfo.Title author = chapterInfo.Author publishTime = chapterInfo.PublishTime htmlContent = chapterInfo.Content } else { reportInfo, tmpErr := models.GetReportByReportId(reportId) if tmpErr != nil { return } title = reportInfo.Title author = reportInfo.Author publishTime = reportInfo.PublishTime htmlContent = reportInfo.Content } err = handleReportAddOrModifyKnowledge(reportId, reportChapterId, title, author, htmlContent, publishTime) return } // ReportAddOrModifyKnowledgeByReportId // @Description: ETA报告加入/修改到知识库(只传id的情况) // @author: Roc // @datetime 2025-04-07 15:41:15 // @param reportId int func ReportAddOrModifyKnowledgeByReportId(reportId int) { if reportId <= 0 { return } errList := make([]string, 0) defer func() { if len(errList) > 0 { utils.FileLog.Error("ReportAddOrModifyKnowledge error,报告ID:%d:%s", reportId, strings.Join(errList, "\n")) } }() reportInfo, err := models.GetReportByReportId(reportId) if err != nil { errList = append(errList, err.Error()) return } // 如果是单篇报告,那么直接处理 if reportInfo.HasChapter == 0 { err = handleReportAddOrModifyKnowledge(reportId, 0, reportInfo.Title, reportInfo.Author, reportInfo.Content, reportInfo.PublishTime) if err != nil { errList = append(errList, err.Error()) } return } // 章节类型的报告,需要查询出来后再处理 chapterInfoList, err := models.GetPublishedChapterListByReportId(reportId) if err != nil { errList = append(errList, err.Error()) return } for _, v := range chapterInfoList { err = handleReportAddOrModifyKnowledge(reportId, v.ReportChapterId, v.Title, reportInfo.Author, v.Content, v.PublishTime) if err != nil { errList = append(errList, fmt.Sprintf("第%d章:%s,异常:\n%s", v.ReportChapterId, v.Title, err.Error())) continue } } return } // handleReportAddOrModifyKnowledge // @Description: 处理ETA报告加入/修改到知识库 // @author: Roc // @datetime 2025-04-07 15:33:38 // @param reportId int // @param reportChapterId int // @param title string // @param author string // @param htmlContent string // @param publishTime time.Time // @return err error func handleReportAddOrModifyKnowledge(reportId, reportChapterId int, title, author, htmlContent string, publishTime time.Time) (err error) { htmlContent = html.UnescapeString(htmlContent) doc, err := html.Parse(strings.NewReader(htmlContent)) if err != nil { return } // 只获取文本内容 content := &strings.Builder{} getArticleContent(content, doc) textContent := content.String() textContent = regexp.MustCompile(`\n+`).ReplaceAllString(textContent, "\n") textContent = strings.Trim(textContent, "\n") publishTimeStr := `未知` if !publishTime.IsZero() { title = fmt.Sprintf("%s(%s)", title, publishTime.Format(utils.FormatMonthDayUnSpace)) publishTimeStr = publishTime.Format(utils.FormatDateTime) } textContent = fmt.Sprintf("标题:%s\n发布时间:%s\n%s", title, publishTimeStr, textContent) obj := rag.RagEtaReport{} item, err := obj.GetByReportAndChapterId(reportId, reportChapterId) if err != nil && !utils.IsErrNoRow(err) { // 查询异常,且不是没找到数据的报错 return } if err == nil { // 标记删除了的话,那就不处理了 if item.IsDeleted == 1 { return } item.Title = title item.Author = author item.TextContent = textContent item.IsPublished = 1 //item.PublishTime = publishTime item.ModifyTime = time.Now() //err = item.Update([]string{"title", "author", "text_content", "is_published", "publish_time", "modify_time"}) err = item.Update([]string{"title", "author", "text_content", "is_published", "modify_time"}) } else { // 无数据的时候,需要新增 err = nil item = &rag.RagEtaReport{ RagEtaReportId: 0, ReportId: reportId, ReportChapterId: reportChapterId, Title: title, Author: author, TextContent: textContent, VectorKey: "", IsPublished: 1, IsDeleted: 0, PublishTime: publishTime, ModifyTime: time.Now(), CreateTime: time.Now(), } err = item.Create() } return } // ReportUnPublishedKnowledge // @Description: 知识库取消发布 // @author: Roc // @datetime 2025-04-07 14:58:25 // @param reportId int // @param reportChapterId int func ReportUnPublishedKnowledge(reportId, reportChapterId int) { if reportId <= 0 && reportChapterId <= 0 { return } var err error defer func() { if err != nil { //fmt.Println("ReportAddOrModifyKnowledge error:", err) utils.FileLog.Error("ReportAddOrModifyKnowledge error:", err) } }() obj := rag.RagEtaReport{} item, err := obj.GetByReportAndChapterId(reportId, reportChapterId) if err != nil && !utils.IsErrNoRow(err) { // 查询异常,且不是没找到数据的报错 return } if item.RagEtaReportId > 0 { item.IsPublished = 0 item.ModifyTime = time.Now() err = item.Update([]string{"is_published", "modify_time"}) } return } // ReportUnPublishedKnowledgeByReportId // @Description: ETA报告取消发布同步到知识库(只传报告id的情况) // @author: Roc // @datetime 2025-04-07 15:41:15 // @param reportId int func ReportUnPublishedKnowledgeByReportId(reportId int) { errList := make([]string, 0) defer func() { if len(errList) > 0 { utils.FileLog.Error("ReportUnPublishedKnowledgeByReportId error,报告ID:%d:%s", reportId, strings.Join(errList, "\n")) } }() obj := rag.RagEtaReport{} list, err := obj.GetListByCondition(``, ` AND report_id = ? `, []interface{}{reportId}, 0, 1000) if err != nil && !utils.IsErrNoRow(err) { // 查询异常,且不是没找到数据的报错 return } for _, item := range list { item.IsPublished = 0 item.ModifyTime = time.Now() err = item.Update([]string{"is_published", "modify_time"}) if err != nil { errList = append(errList, fmt.Sprintf("第%d章:%s,异常:\n%s", item.ReportChapterId, item.Title, err.Error())) continue } } return } func getArticleContent(content *strings.Builder, htmlContentNode *html.Node) { if htmlContentNode.Type == html.TextNode { cleanData := strings.TrimSpace(htmlContentNode.Data) if cleanData != `` && cleanData != "

" { content.WriteString(cleanData) } } else if htmlContentNode.Type == html.ElementNode { switch htmlContentNode.DataAtom { case atom.Ul: content.WriteString("\n") case atom.Br: // 遇到
标签时添加换行符 content.WriteString("\n") case atom.P: content.WriteString("\n") } } for c := htmlContentNode.FirstChild; c != nil; c = c.NextSibling { getArticleContent(content, c) } } // GenerateArticleAbstract // @Description: 文章摘要生成(默认提示词批量生成) // @author: Roc // @datetime 2025-03-10 16:17:53 // @param item *rag.RagEtaReport func GenerateArticleAbstract(item *rag.RagEtaReport, forceGenerate bool) { var err error defer func() { if err != nil { utils.FileLog.Error("文章转临时文件失败,err:%v", err) fmt.Println("文章转临时文件失败,err:", err) } }() // 内容为空,那就不需要生成摘要 if item.TextContent == `` { return } questionObj := rag.Question{} questionList, err := questionObj.GetListByCondition(``, ` AND is_default = 1 `, []interface{}{}, 0, 100) if err != nil { err = fmt.Errorf("获取问题列表失败,Err:" + err.Error()) return } // 没问题就不生成了 if len(questionList) <= 0 { return } for _, question := range questionList { GenerateArticleAbstractByQuestion(item, question, forceGenerate) } return } // GenerateArticleAbstractByQuestion // @Description: 文章摘要生成(根据提示词生成) // @author: Roc // @datetime 2025-03-10 16:17:53 // @param item *rag.RagEtaReport func GenerateArticleAbstractByQuestion(item *rag.RagEtaReport, question *rag.Question, forceGenerate bool) { var err error defer func() { if err != nil { utils.FileLog.Error("文章转临时文件失败,err:%v", err) fmt.Println("文章转临时文件失败,err:", err) } }() // 内容为空,那就不需要生成摘要 if item.TextContent == `` { return } abstractObj := rag.RagEtaReportAbstract{} abstractItem, err := abstractObj.GetByRagEtaReportIdAndQuestionId(item.RagEtaReportId, question.QuestionId) // 如果找到了,同时不是强制生成,那么就直接处理到知识库中 if err == nil && !forceGenerate { // 摘要已经生成,不需要重复生成,只需要重新加入到向量库中 ReportAbstractToKnowledge(item, abstractItem, false) return } if !utils.IsErrNoRow(err) { return } //你现在是一名资深的期货行业分析师,请基于以下的问题进行汇总总结,如果不能正常总结出来,那么就只需要回复我:sorry questionStr := fmt.Sprintf(`%s\n%s`, `你现在是一名资深的期货行业分析师,请基于以下的问题进行汇总总结,如果不能正常总结出来,那么就只需要回复我:sorry。以下是问题:`, question.QuestionContent) //开始对话 abstract, _, tmpErr := getAnswerByContent(item.RagEtaReportId, utils.AI_ARTICLE_SOURCE_ETA_REPORT, questionStr) if tmpErr != nil { err = fmt.Errorf("LLM对话失败,Err:" + tmpErr.Error()) return } // 添加问答记录 //if len(addArticleChatRecordList) > 0 { // recordObj := rag.RagEtaReportChatRecord{} // err = recordObj.CreateInBatches(addArticleChatRecordList) // if err != nil { // return // } //} if abstract == `` { return } //if abstract == `sorry` || strings.Index(abstract, `根据已知信息无法回答该问题`) == 0 { // item.AbstractStatus = 2 // item.ModifyTime = time.Now() // err = item.Update([]string{"AbstractStatus", "ModifyTime"}) // return //} //item.AbstractStatus = 1 //item.ModifyTime = time.Now() //err = item.Update([]string{"AbstractStatus", "ModifyTime"}) if abstractItem == nil || abstractItem.RagEtaReportAbstractId <= 0 { abstractItem = &rag.RagEtaReportAbstract{ RagEtaReportAbstractId: 0, RagEtaReportId: item.RagEtaReportId, Content: item.TextContent, QuestionId: question.QuestionId, QuestionContent: question.QuestionContent, Version: 1, Tags: "", VectorKey: "", ModifyTime: time.Now(), CreateTime: time.Now(), } err = abstractItem.Create() } else { abstractItem.Content = abstract abstractItem.Version++ abstractItem.ModifyTime = time.Now() abstractItem.Tags = "" abstractItem.QuestionContent = question.QuestionContent err = abstractItem.Update([]string{"content", "version", "modify_time", "tags", "question_content"}) } if err != nil { return } // 数据入ES库 go AddOrEditEsRagEtaReportAbstract(abstractItem.RagEtaReportAbstractId) ReportAbstractToKnowledge(item, abstractItem, false) } // AddOrEditEsWechatArticleAbstract // @Description: 新增/编辑微信文章摘要入ES // @author: Roc // @datetime 2025-03-13 14:13:47 // @param articleAbstractId int func AddOrEditEsRagEtaReportAbstract(ragEtaReportAbstractId int) { if utils.EsRagEtaReportAbstractName == `` { return } var err error defer func() { if err != nil { utils.FileLog.Error("添加ETA报告微信信息到ES失败,err:%v", err) fmt.Println("添加ETA报告微信信息到ES失败,err:", err) } }() obj := rag.RagEtaReportAbstract{} abstractInfo, err := obj.GetById(ragEtaReportAbstractId) if err != nil { err = fmt.Errorf("获取ETA报告文章信息失败,Err:" + err.Error()) return } ragEtaReportObj := rag.RagEtaReport{} articleInfo, err := ragEtaReportObj.GetById(abstractInfo.RagEtaReportAbstractId) if err != nil { err = fmt.Errorf("获取ETA报告文章信息失败,Err:" + err.Error()) return } tagIdList := make([]int, 0) if abstractInfo.Tags != `` { tagIdStrList := strings.Split(abstractInfo.Tags, ",") for _, tagIdStr := range tagIdStrList { tagId, tmpErr := strconv.Atoi(tagIdStr) if tmpErr != nil { err = fmt.Errorf("报告标签ID转int失败,Err:" + tmpErr.Error()) return } tagIdList = append(tagIdList, tagId) } } esItem := elastic.RagEtaReportAbstractItem{ RagEtaReportAbstractId: abstractInfo.RagEtaReportAbstractId, RagEtaReportId: abstractInfo.RagEtaReportId, Abstract: abstractInfo.Content, QuestionId: abstractInfo.QuestionId, Version: abstractInfo.Version, VectorKey: abstractInfo.VectorKey, ModifyTime: abstractInfo.ModifyTime, CreateTime: abstractInfo.CreateTime, Title: articleInfo.Title, TagIdList: tagIdList, } err = elastic.RagEtaReportAbstractEsAddOrEdit(strconv.Itoa(abstractInfo.RagEtaReportAbstractId), esItem) } // WechatArticleAbstractToKnowledge // @Description: 摘要入向量库 // @author: Roc // @datetime 2025-03-10 16:14:59 // @param wechatArticleItem *rag.RagEtaReport // @param abstractItem *rag.RagEtaReportAbstract func ReportAbstractToKnowledge(ragEtaReport *rag.RagEtaReport, abstractItem *rag.RagEtaReportAbstract, isReUpload bool) { if abstractItem.Content == `` { return } // 已经生成了,那就不处理了 if abstractItem.VectorKey != `` && !isReUpload { return } var err error defer func() { if err != nil { utils.FileLog.Error("摘要入向量库失败,err:%v", err) fmt.Println("摘要入向量库失败,err:", err) } // 数据入ES库 go AddOrEditEsRagEtaReportAbstract(abstractItem.RagEtaReportAbstractId) }() // 生成临时文件 //dateDir := time.Now().Format("20060102") //uploadDir := + "./static/ai/article/" + dateDir uploadDir := "./static/ai/abstract" err = os.MkdirAll(uploadDir, utils.DIR_MOD) if err != nil { err = fmt.Errorf("存储目录创建失败,Err:" + err.Error()) return } fileName := utils.MD5(fmt.Sprintf("%d_%d", utils.AI_ARTICLE_SOURCE_ETA_REPORT, ragEtaReport.RagEtaReportId)) + `.md` tmpFilePath := uploadDir + "/" + fileName err = utils.SaveToFile(abstractItem.Content, tmpFilePath) if err != nil { err = fmt.Errorf("生成临时文件失败,Err:" + err.Error()) return } defer func() { os.Remove(tmpFilePath) }() knowledgeArticleName := models.BusinessConfMap[models.KnowledgeBaseName] // 上传临时文件到LLM uploadFileResp, err := llm.UploadDocsToKnowledge(tmpFilePath, knowledgeArticleName) if err != nil { err = fmt.Errorf("上传文章原文到知识库失败,Err:" + err.Error()) return } if len(uploadFileResp.FailedFiles) > 0 { for _, v := range uploadFileResp.FailedFiles { err = fmt.Errorf("上传文章原文到知识库失败,Err:" + v) } } abstractItem.VectorKey = tmpFilePath abstractItem.ModifyTime = time.Now() err = abstractItem.Update([]string{"vector_key", "modify_time"}) }