package services
import (
"eta/eta_api/models"
"eta/eta_api/models/rag"
"eta/eta_api/services/elastic"
"eta/eta_api/services/llm"
"eta/eta_api/utils"
"fmt"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
"os"
"regexp"
"strconv"
"strings"
"time"
)
// ReportAddOrModifyKnowledge
// @Description: ETA报告加入/修改到知识库
// @author: Roc
// @datetime 2025-04-07 14:41:45
// @param reportId int
// @param reportChapterId int
func ReportAddOrModifyKnowledge(reportId, reportChapterId int) {
if reportId <= 0 {
return
}
var err error
defer func() {
if err != nil {
//fmt.Println("ReportAddOrModifyKnowledge error:", err)
utils.FileLog.Error("ReportAddOrModifyKnowledge error:", err)
}
}()
var title, author, htmlContent string
var publishTime time.Time
if reportChapterId > 0 {
chapterInfo, tmpErr := models.GetReportChapterInfoById(reportChapterId)
if tmpErr != nil {
return
}
title = chapterInfo.Title
author = chapterInfo.Author
publishTime = chapterInfo.PublishTime
htmlContent = chapterInfo.Content
} else {
reportInfo, tmpErr := models.GetReportByReportId(reportId)
if tmpErr != nil {
return
}
title = reportInfo.Title
author = reportInfo.Author
publishTime = reportInfo.PublishTime
htmlContent = reportInfo.Content
}
err = handleReportAddOrModifyKnowledge(reportId, reportChapterId, title, author, htmlContent, publishTime)
return
}
// ReportAddOrModifyKnowledgeByReportId
// @Description: ETA报告加入/修改到知识库(只传id的情况)
// @author: Roc
// @datetime 2025-04-07 15:41:15
// @param reportId int
func ReportAddOrModifyKnowledgeByReportId(reportId int) {
if reportId <= 0 {
return
}
errList := make([]string, 0)
defer func() {
if len(errList) > 0 {
utils.FileLog.Error("ReportAddOrModifyKnowledge error,报告ID:%d:%s", reportId, strings.Join(errList, "\n"))
}
}()
reportInfo, err := models.GetReportByReportId(reportId)
if err != nil {
errList = append(errList, err.Error())
return
}
// 如果是单篇报告,那么直接处理
if reportInfo.HasChapter == 0 {
err = handleReportAddOrModifyKnowledge(reportId, 0, reportInfo.Title, reportInfo.Author, reportInfo.Content, reportInfo.PublishTime)
if err != nil {
errList = append(errList, err.Error())
}
return
}
// 章节类型的报告,需要查询出来后再处理
chapterInfoList, err := models.GetPublishedChapterListByReportId(reportId)
if err != nil {
errList = append(errList, err.Error())
return
}
for _, v := range chapterInfoList {
err = handleReportAddOrModifyKnowledge(reportId, v.ReportChapterId, v.Title, reportInfo.Author, v.Content, v.PublishTime)
if err != nil {
errList = append(errList, fmt.Sprintf("第%d章:%s,异常:\n%s", v.ReportChapterId, v.Title, err.Error()))
continue
}
}
return
}
// handleReportAddOrModifyKnowledge
// @Description: 处理ETA报告加入/修改到知识库
// @author: Roc
// @datetime 2025-04-07 15:33:38
// @param reportId int
// @param reportChapterId int
// @param title string
// @param author string
// @param htmlContent string
// @param publishTime time.Time
// @return err error
func handleReportAddOrModifyKnowledge(reportId, reportChapterId int, title, author, htmlContent string, publishTime time.Time) (err error) {
htmlContent = html.UnescapeString(htmlContent)
doc, err := html.Parse(strings.NewReader(htmlContent))
if err != nil {
return
}
// 只获取文本内容
content := &strings.Builder{}
getArticleContent(content, doc)
textContent := content.String()
textContent = regexp.MustCompile(`\n+`).ReplaceAllString(textContent, "\n")
textContent = strings.Trim(textContent, "\n")
publishTimeStr := `未知`
if !publishTime.IsZero() {
title = fmt.Sprintf("%s(%s)", title, publishTime.Format(utils.FormatMonthDayUnSpace))
publishTimeStr = publishTime.Format(utils.FormatDateTime)
}
textContent = fmt.Sprintf("标题:%s\n发布时间:%s\n%s", title, publishTimeStr, textContent)
obj := rag.RagEtaReport{}
item, err := obj.GetByReportAndChapterId(reportId, reportChapterId)
if err != nil && !utils.IsErrNoRow(err) {
// 查询异常,且不是没找到数据的报错
return
}
if err == nil {
// 标记删除了的话,那就不处理了
if item.IsDeleted == 1 {
return
}
item.Title = title
item.Author = author
item.TextContent = textContent
item.IsPublished = 1
//item.PublishTime = publishTime
item.ModifyTime = time.Now()
//err = item.Update([]string{"title", "author", "text_content", "is_published", "publish_time", "modify_time"})
err = item.Update([]string{"title", "author", "text_content", "is_published", "modify_time"})
} else {
// 无数据的时候,需要新增
err = nil
item = &rag.RagEtaReport{
RagEtaReportId: 0,
ReportId: reportId,
ReportChapterId: reportChapterId,
Title: title,
Author: author,
TextContent: textContent,
VectorKey: "",
IsPublished: 1,
IsDeleted: 0,
PublishTime: publishTime,
ModifyTime: time.Now(),
CreateTime: time.Now(),
}
err = item.Create()
}
return
}
// ReportUnPublishedKnowledge
// @Description: 知识库取消发布
// @author: Roc
// @datetime 2025-04-07 14:58:25
// @param reportId int
// @param reportChapterId int
func ReportUnPublishedKnowledge(reportId, reportChapterId int) {
if reportId <= 0 && reportChapterId <= 0 {
return
}
var err error
defer func() {
if err != nil {
//fmt.Println("ReportAddOrModifyKnowledge error:", err)
utils.FileLog.Error("ReportAddOrModifyKnowledge error:", err)
}
}()
obj := rag.RagEtaReport{}
item, err := obj.GetByReportAndChapterId(reportId, reportChapterId)
if err != nil && !utils.IsErrNoRow(err) {
// 查询异常,且不是没找到数据的报错
return
}
if item.RagEtaReportId > 0 {
item.IsPublished = 0
item.ModifyTime = time.Now()
err = item.Update([]string{"is_published", "modify_time"})
}
return
}
// ReportUnPublishedKnowledgeByReportId
// @Description: ETA报告取消发布同步到知识库(只传报告id的情况)
// @author: Roc
// @datetime 2025-04-07 15:41:15
// @param reportId int
func ReportUnPublishedKnowledgeByReportId(reportId int) {
errList := make([]string, 0)
defer func() {
if len(errList) > 0 {
utils.FileLog.Error("ReportUnPublishedKnowledgeByReportId error,报告ID:%d:%s", reportId, strings.Join(errList, "\n"))
}
}()
obj := rag.RagEtaReport{}
list, err := obj.GetListByCondition(``, ` AND report_id = ? `, []interface{}{reportId}, 0, 1000)
if err != nil && !utils.IsErrNoRow(err) {
// 查询异常,且不是没找到数据的报错
return
}
for _, item := range list {
item.IsPublished = 0
item.ModifyTime = time.Now()
err = item.Update([]string{"is_published", "modify_time"})
if err != nil {
errList = append(errList, fmt.Sprintf("第%d章:%s,异常:\n%s", item.ReportChapterId, item.Title, err.Error()))
continue
}
}
return
}
func getArticleContent(content *strings.Builder, htmlContentNode *html.Node) {
if htmlContentNode.Type == html.TextNode {
cleanData := strings.TrimSpace(htmlContentNode.Data)
if cleanData != `` && cleanData != "
" {
content.WriteString(cleanData)
}
} else if htmlContentNode.Type == html.ElementNode {
switch htmlContentNode.DataAtom {
case atom.Ul:
content.WriteString("\n")
case atom.Br:
// 遇到
标签时添加换行符
content.WriteString("\n")
case atom.P:
content.WriteString("\n")
}
}
for c := htmlContentNode.FirstChild; c != nil; c = c.NextSibling {
getArticleContent(content, c)
}
}
// GenerateArticleAbstract
// @Description: 文章摘要生成(默认提示词批量生成)
// @author: Roc
// @datetime 2025-03-10 16:17:53
// @param item *rag.RagEtaReport
func GenerateArticleAbstract(item *rag.RagEtaReport, forceGenerate bool) {
var err error
defer func() {
if err != nil {
utils.FileLog.Error("文章转临时文件失败,err:%v", err)
fmt.Println("文章转临时文件失败,err:", err)
}
}()
// 内容为空,那就不需要生成摘要
if item.TextContent == `` {
return
}
questionObj := rag.Question{}
questionList, err := questionObj.GetListByCondition(``, ` AND is_default = 1 `, []interface{}{}, 0, 100)
if err != nil {
err = fmt.Errorf("获取问题列表失败,Err:" + err.Error())
return
}
// 没问题就不生成了
if len(questionList) <= 0 {
return
}
for _, question := range questionList {
GenerateArticleAbstractByQuestion(item, question, forceGenerate)
}
return
}
// GenerateArticleAbstractByQuestion
// @Description: 文章摘要生成(根据提示词生成)
// @author: Roc
// @datetime 2025-03-10 16:17:53
// @param item *rag.RagEtaReport
func GenerateArticleAbstractByQuestion(item *rag.RagEtaReport, question *rag.Question, forceGenerate bool) {
var err error
defer func() {
if err != nil {
utils.FileLog.Error("文章转临时文件失败,err:%v", err)
fmt.Println("文章转临时文件失败,err:", err)
}
}()
// 内容为空,那就不需要生成摘要
if item.TextContent == `` {
return
}
abstractObj := rag.RagEtaReportAbstract{}
abstractItem, err := abstractObj.GetByRagEtaReportIdAndQuestionId(item.RagEtaReportId, question.QuestionId)
// 如果找到了,同时不是强制生成,那么就直接处理到知识库中
if err == nil && !forceGenerate {
// 摘要已经生成,不需要重复生成,只需要重新加入到向量库中
ReportAbstractToKnowledge(item, abstractItem, false)
return
}
if !utils.IsErrNoRow(err) {
return
}
//你现在是一名资深的期货行业分析师,请基于以下的问题进行汇总总结,如果不能正常总结出来,那么就只需要回复我:sorry
questionStr := fmt.Sprintf(`%s\n%s`, `你现在是一名资深的期货行业分析师,请基于以下的问题进行汇总总结,如果不能正常总结出来,那么就只需要回复我:sorry。以下是问题:`, question.QuestionContent)
//开始对话
abstract, _, tmpErr := getAnswerByContent(item.RagEtaReportId, utils.AI_ARTICLE_SOURCE_ETA_REPORT, questionStr)
if tmpErr != nil {
err = fmt.Errorf("LLM对话失败,Err:" + tmpErr.Error())
return
}
// 添加问答记录
//if len(addArticleChatRecordList) > 0 {
// recordObj := rag.RagEtaReportChatRecord{}
// err = recordObj.CreateInBatches(addArticleChatRecordList)
// if err != nil {
// return
// }
//}
if abstract == `` {
return
}
//if abstract == `sorry` || strings.Index(abstract, `根据已知信息无法回答该问题`) == 0 {
// item.AbstractStatus = 2
// item.ModifyTime = time.Now()
// err = item.Update([]string{"AbstractStatus", "ModifyTime"})
// return
//}
//item.AbstractStatus = 1
//item.ModifyTime = time.Now()
//err = item.Update([]string{"AbstractStatus", "ModifyTime"})
if abstractItem == nil || abstractItem.RagEtaReportAbstractId <= 0 {
abstractItem = &rag.RagEtaReportAbstract{
RagEtaReportAbstractId: 0,
RagEtaReportId: item.RagEtaReportId,
Content: item.TextContent,
QuestionId: question.QuestionId,
QuestionContent: question.QuestionContent,
Version: 1,
Tags: "",
VectorKey: "",
ModifyTime: time.Now(),
CreateTime: time.Now(),
}
err = abstractItem.Create()
} else {
abstractItem.Content = abstract
abstractItem.Version++
abstractItem.ModifyTime = time.Now()
abstractItem.Tags = ""
abstractItem.QuestionContent = question.QuestionContent
err = abstractItem.Update([]string{"content", "version", "modify_time", "tags", "question_content"})
}
if err != nil {
return
}
// 数据入ES库
go AddOrEditEsRagEtaReportAbstract(abstractItem.RagEtaReportAbstractId)
ReportAbstractToKnowledge(item, abstractItem, false)
}
// AddOrEditEsWechatArticleAbstract
// @Description: 新增/编辑微信文章摘要入ES
// @author: Roc
// @datetime 2025-03-13 14:13:47
// @param articleAbstractId int
func AddOrEditEsRagEtaReportAbstract(ragEtaReportAbstractId int) {
if utils.EsRagEtaReportAbstractName == `` {
return
}
var err error
defer func() {
if err != nil {
utils.FileLog.Error("添加ETA报告微信信息到ES失败,err:%v", err)
fmt.Println("添加ETA报告微信信息到ES失败,err:", err)
}
}()
obj := rag.RagEtaReportAbstract{}
abstractInfo, err := obj.GetById(ragEtaReportAbstractId)
if err != nil {
err = fmt.Errorf("获取ETA报告文章信息失败,Err:" + err.Error())
return
}
ragEtaReportObj := rag.RagEtaReport{}
articleInfo, err := ragEtaReportObj.GetById(abstractInfo.RagEtaReportAbstractId)
if err != nil {
err = fmt.Errorf("获取ETA报告文章信息失败,Err:" + err.Error())
return
}
tagIdList := make([]int, 0)
if abstractInfo.Tags != `` {
tagIdStrList := strings.Split(abstractInfo.Tags, ",")
for _, tagIdStr := range tagIdStrList {
tagId, tmpErr := strconv.Atoi(tagIdStr)
if tmpErr != nil {
err = fmt.Errorf("报告标签ID转int失败,Err:" + tmpErr.Error())
return
}
tagIdList = append(tagIdList, tagId)
}
}
esItem := elastic.RagEtaReportAbstractItem{
RagEtaReportAbstractId: abstractInfo.RagEtaReportAbstractId,
RagEtaReportId: abstractInfo.RagEtaReportId,
Abstract: abstractInfo.Content,
QuestionId: abstractInfo.QuestionId,
Version: abstractInfo.Version,
VectorKey: abstractInfo.VectorKey,
ModifyTime: abstractInfo.ModifyTime,
CreateTime: abstractInfo.CreateTime,
Title: articleInfo.Title,
TagIdList: tagIdList,
}
err = elastic.RagEtaReportAbstractEsAddOrEdit(strconv.Itoa(abstractInfo.RagEtaReportAbstractId), esItem)
}
// WechatArticleAbstractToKnowledge
// @Description: 摘要入向量库
// @author: Roc
// @datetime 2025-03-10 16:14:59
// @param wechatArticleItem *rag.RagEtaReport
// @param abstractItem *rag.RagEtaReportAbstract
func ReportAbstractToKnowledge(ragEtaReport *rag.RagEtaReport, abstractItem *rag.RagEtaReportAbstract, isReUpload bool) {
if abstractItem.Content == `` {
return
}
// 已经生成了,那就不处理了
if abstractItem.VectorKey != `` && !isReUpload {
return
}
var err error
defer func() {
if err != nil {
utils.FileLog.Error("摘要入向量库失败,err:%v", err)
fmt.Println("摘要入向量库失败,err:", err)
}
// 数据入ES库
go AddOrEditEsRagEtaReportAbstract(abstractItem.RagEtaReportAbstractId)
}()
// 生成临时文件
//dateDir := time.Now().Format("20060102")
//uploadDir := + "./static/ai/article/" + dateDir
uploadDir := "./static/ai/abstract"
err = os.MkdirAll(uploadDir, utils.DIR_MOD)
if err != nil {
err = fmt.Errorf("存储目录创建失败,Err:" + err.Error())
return
}
fileName := utils.MD5(fmt.Sprintf("%d_%d", utils.AI_ARTICLE_SOURCE_ETA_REPORT, ragEtaReport.RagEtaReportId)) + `.md`
tmpFilePath := uploadDir + "/" + fileName
err = utils.SaveToFile(abstractItem.Content, tmpFilePath)
if err != nil {
err = fmt.Errorf("生成临时文件失败,Err:" + err.Error())
return
}
defer func() {
os.Remove(tmpFilePath)
}()
knowledgeArticleName := models.BusinessConfMap[models.KnowledgeBaseName]
// 上传临时文件到LLM
uploadFileResp, err := llm.UploadDocsToKnowledge(tmpFilePath, knowledgeArticleName)
if err != nil {
err = fmt.Errorf("上传文章原文到知识库失败,Err:" + err.Error())
return
}
if len(uploadFileResp.FailedFiles) > 0 {
for _, v := range uploadFileResp.FailedFiles {
err = fmt.Errorf("上传文章原文到知识库失败,Err:" + v)
}
}
abstractItem.VectorKey = tmpFilePath
abstractItem.ModifyTime = time.Now()
err = abstractItem.Update([]string{"vector_key", "modify_time"})
}