|
@@ -2,11 +2,14 @@ package services
|
|
|
|
|
|
import (
|
|
|
"bytes"
|
|
|
+ "encoding/json"
|
|
|
+ "errors"
|
|
|
"eta/eta_api/cache"
|
|
|
"eta/eta_api/models"
|
|
|
"eta/eta_api/models/rag"
|
|
|
"eta/eta_api/services/elastic"
|
|
|
"eta/eta_api/services/llm"
|
|
|
+ "eta/eta_api/services/llm/facade"
|
|
|
"eta/eta_api/utils"
|
|
|
"eta/eta_api/utils/llm/eta_llm/eta_llm_http"
|
|
|
"fmt"
|
|
@@ -14,6 +17,7 @@ import (
|
|
|
"html"
|
|
|
"os"
|
|
|
"path"
|
|
|
+ "regexp"
|
|
|
"strconv"
|
|
|
"strings"
|
|
|
"time"
|
|
@@ -176,7 +180,7 @@ func AddWechatArticle(item *rag.WechatPlatform, articleLink string, articleDetai
|
|
|
go replaceWechatArticleCoverPic(obj)
|
|
|
|
|
|
// 文章入库成功后,需要将相关信息入摘要库
|
|
|
- go cache.AddWechatArticleLlmOpToCache(obj.WechatArticleId, ``)
|
|
|
+ go cache.AddWechatArticleLlmOpToCache(obj.WechatArticleId, 0, ``)
|
|
|
|
|
|
}
|
|
|
|
|
@@ -238,12 +242,93 @@ func BeachAddWechatArticle(item *rag.WechatPlatform, num int) {
|
|
|
return
|
|
|
}
|
|
|
|
|
|
+//
|
|
|
+//// GenerateArticleAbstract
|
|
|
+//// @Description: 文章摘要生成
|
|
|
+//// @author: Roc
|
|
|
+//// @datetime 2025-03-10 16:17:53
|
|
|
+//// @param item *rag.WechatArticle
|
|
|
+//func GenerateArticleAbstract(item *rag.WechatArticle, forceGenerate bool) {
|
|
|
+// var err error
|
|
|
+// defer func() {
|
|
|
+// if err != nil {
|
|
|
+// utils.FileLog.Error("文章转临时文件失败,err:%v", err)
|
|
|
+// fmt.Println("文章转临时文件失败,err:", err)
|
|
|
+// }
|
|
|
+// }()
|
|
|
+//
|
|
|
+// // 内容为空,那就不需要生成摘要
|
|
|
+// if item.TextContent == `` {
|
|
|
+// return
|
|
|
+// }
|
|
|
+//
|
|
|
+// abstractObj := rag.WechatArticleAbstract{}
|
|
|
+// tmpAbstractItem, err := abstractObj.GetByWechatArticleId(item.WechatArticleId)
|
|
|
+// // 如果找到了,同时不是强制生成,那么就直接处理到知识库中
|
|
|
+// if err == nil && !forceGenerate {
|
|
|
+// // 摘要已经生成,不需要重复生成,只需要重新加入到向量库中
|
|
|
+// WechatArticleAbstractToKnowledge(item, tmpAbstractItem, false)
|
|
|
+//
|
|
|
+// return
|
|
|
+// }
|
|
|
+// if !utils.IsErrNoRow(err) {
|
|
|
+// return
|
|
|
+// }
|
|
|
+//
|
|
|
+// //开始对话
|
|
|
+// abstract, addArticleChatRecordList, tmpErr := getAnswerByContent(item.WechatArticleId, utils.AI_ARTICLE_SOURCE_ETA_REPORT)
|
|
|
+// if tmpErr != nil {
|
|
|
+// err = fmt.Errorf("LLM对话失败,Err:" + tmpErr.Error())
|
|
|
+// return
|
|
|
+// }
|
|
|
+//
|
|
|
+// // 添加问答记录
|
|
|
+// if len(addArticleChatRecordList) > 0 {
|
|
|
+// recordObj := rag.WechatArticleChatRecord{}
|
|
|
+// err = recordObj.CreateInBatches(addArticleChatRecordList)
|
|
|
+// if err != nil {
|
|
|
+// return
|
|
|
+// }
|
|
|
+// }
|
|
|
+//
|
|
|
+// if abstract != `` {
|
|
|
+// if abstract == `sorry` || strings.Index(abstract, `根据已知信息无法回答该问题`) == 0 {
|
|
|
+// item.AbstractStatus = 2
|
|
|
+// item.ModifyTime = time.Now()
|
|
|
+// err = item.Update([]string{"AbstractStatus", "ModifyTime"})
|
|
|
+// return
|
|
|
+// }
|
|
|
+// item.AbstractStatus = 1
|
|
|
+// item.ModifyTime = time.Now()
|
|
|
+// err = item.Update([]string{"AbstractStatus", "ModifyTime"})
|
|
|
+//
|
|
|
+// abstractItem := &rag.WechatArticleAbstract{
|
|
|
+// WechatArticleAbstractId: 0,
|
|
|
+// WechatArticleId: item.WechatArticleId,
|
|
|
+// Content: abstract,
|
|
|
+// Version: 0,
|
|
|
+// VectorKey: "",
|
|
|
+// ModifyTime: time.Now(),
|
|
|
+// CreateTime: time.Now(),
|
|
|
+// }
|
|
|
+// err = abstractItem.Create()
|
|
|
+// if err != nil {
|
|
|
+// return
|
|
|
+// }
|
|
|
+//
|
|
|
+// // 数据入ES库
|
|
|
+// go AddOrEditEsWechatArticleAbstract(abstractItem.WechatArticleAbstractId)
|
|
|
+//
|
|
|
+// WechatArticleAbstractToKnowledge(item, abstractItem, false)
|
|
|
+// }
|
|
|
+//}
|
|
|
+
|
|
|
// GenerateArticleAbstract
|
|
|
-// @Description: 文章摘要生成
|
|
|
+// @Description: 文章摘要生成(默认提示词批量生成)
|
|
|
// @author: Roc
|
|
|
// @datetime 2025-03-10 16:17:53
|
|
|
// @param item *rag.WechatArticle
|
|
|
-func GenerateArticleAbstract(item *rag.WechatArticle) {
|
|
|
+func GenerateWechatArticleAbstract(item *rag.WechatArticle, forceGenerate bool) {
|
|
|
var err error
|
|
|
defer func() {
|
|
|
if err != nil {
|
|
@@ -257,203 +342,175 @@ func GenerateArticleAbstract(item *rag.WechatArticle) {
|
|
|
return
|
|
|
}
|
|
|
|
|
|
- abstractObj := rag.WechatArticleAbstract{}
|
|
|
- tmpAbstractItem, err := abstractObj.GetByWechatArticleId(item.WechatArticleId)
|
|
|
- if err == nil {
|
|
|
- // 摘要已经生成,不需要重复生成
|
|
|
- AbstractToKnowledge(item, tmpAbstractItem, false)
|
|
|
-
|
|
|
- return
|
|
|
- }
|
|
|
- if !utils.IsErrNoRow(err) {
|
|
|
- return
|
|
|
- }
|
|
|
-
|
|
|
- // 生成临时文件
|
|
|
- dateDir := time.Now().Format("20060102")
|
|
|
- uploadDir := "./static/ai/" + dateDir
|
|
|
- err = os.MkdirAll(uploadDir, utils.DIR_MOD)
|
|
|
- if err != nil {
|
|
|
- err = fmt.Errorf("存储目录创建失败,Err:" + err.Error())
|
|
|
- return
|
|
|
- }
|
|
|
- randStr := utils.GetRandStringNoSpecialChar(28)
|
|
|
- fileName := randStr + `.md`
|
|
|
- tmpFilePath := uploadDir + "/" + fileName
|
|
|
- err = utils.SaveToFile(item.TextContent, tmpFilePath)
|
|
|
- if err != nil {
|
|
|
- err = fmt.Errorf("生成临时文件失败,Err:" + err.Error())
|
|
|
- return
|
|
|
- }
|
|
|
- defer func() {
|
|
|
- os.Remove(tmpFilePath)
|
|
|
- }()
|
|
|
-
|
|
|
- // 上传临时文件到LLM
|
|
|
- tmpFileResp, err := llm.UploadTempDocs(tmpFilePath)
|
|
|
+ questionObj := rag.Question{}
|
|
|
+ questionList, err := questionObj.GetListByCondition(``, ` AND is_default = 1 `, []interface{}{}, 0, 100)
|
|
|
if err != nil {
|
|
|
- err = fmt.Errorf("上传临时文件到LLM失败,Err:" + err.Error())
|
|
|
- return
|
|
|
- }
|
|
|
-
|
|
|
- if tmpFileResp.Data.Id == `` {
|
|
|
- err = fmt.Errorf("上传临时文件到LLM失败,Err:上传失败")
|
|
|
+ err = fmt.Errorf("获取问题列表失败,Err:" + err.Error())
|
|
|
return
|
|
|
}
|
|
|
- tmpDocId := tmpFileResp.Data.Id
|
|
|
-
|
|
|
- //tmpDocId := `c4d2ee902808408c8b8ed398b33be103` // 钢材
|
|
|
- //tmpDocId := `2dde8afe62d24525a814e74e0a5e35e4` // 钢材
|
|
|
- //tmpDocId := `7634cc1086c04b3687682220a2cf1a48` //
|
|
|
|
|
|
- //开始对话
|
|
|
- abstract, addArticleChatRecordList, tmpErr := getAnswerByContent(item.WechatArticleId, tmpDocId)
|
|
|
- if tmpErr != nil {
|
|
|
- err = fmt.Errorf("LLM对话失败,Err:" + tmpErr.Error())
|
|
|
+ // 没问题就不生成了
|
|
|
+ if len(questionList) <= 0 {
|
|
|
return
|
|
|
}
|
|
|
|
|
|
- // 添加问答记录
|
|
|
- if len(addArticleChatRecordList) > 0 {
|
|
|
- recordObj := rag.WechatArticleChatRecord{}
|
|
|
- err = recordObj.CreateInBatches(addArticleChatRecordList)
|
|
|
- if err != nil {
|
|
|
- return
|
|
|
- }
|
|
|
+ for _, question := range questionList {
|
|
|
+ GenerateWechatArticleAbstractByQuestion(item, question, forceGenerate)
|
|
|
}
|
|
|
|
|
|
- if abstract != `` {
|
|
|
- if abstract == `sorry` || strings.Index(abstract, `根据已知信息无法回答该问题`) == 0 {
|
|
|
- item.AbstractStatus = 2
|
|
|
- item.ModifyTime = time.Now()
|
|
|
- err = item.Update([]string{"AbstractStatus", "ModifyTime"})
|
|
|
- return
|
|
|
- }
|
|
|
- item.AbstractStatus = 1
|
|
|
- item.ModifyTime = time.Now()
|
|
|
- err = item.Update([]string{"AbstractStatus", "ModifyTime"})
|
|
|
-
|
|
|
- abstractItem := &rag.WechatArticleAbstract{
|
|
|
- WechatArticleAbstractId: 0,
|
|
|
- WechatArticleId: item.WechatArticleId,
|
|
|
- Content: abstract,
|
|
|
- Version: 0,
|
|
|
- VectorKey: "",
|
|
|
- ModifyTime: time.Now(),
|
|
|
- CreateTime: time.Now(),
|
|
|
- }
|
|
|
- err = abstractItem.Create()
|
|
|
- if err != nil {
|
|
|
- return
|
|
|
- }
|
|
|
-
|
|
|
- // 数据入ES库
|
|
|
- go AddOrEditEsWechatArticleAbstract(abstractItem.WechatArticleAbstractId)
|
|
|
-
|
|
|
- AbstractToKnowledge(item, abstractItem, false)
|
|
|
- }
|
|
|
+ return
|
|
|
}
|
|
|
|
|
|
-// ReGenerateArticleAbstract
|
|
|
-// @Description: 文章摘要重新生成
|
|
|
+// GenerateWechatArticleAbstractByQuestion
|
|
|
+// @Description: 文章摘要生成(根据提示词生成)
|
|
|
// @author: Roc
|
|
|
-// @datetime 2025-03-10 16:17:53
|
|
|
+// @datetime 2025-04-24 11:23:27
|
|
|
// @param item *rag.WechatArticle
|
|
|
-func ReGenerateArticleAbstract(item *rag.WechatArticle) {
|
|
|
- var err error
|
|
|
+// @param question *rag.Question
|
|
|
+// @param forceGenerate bool
|
|
|
+// @return err error
|
|
|
+func GenerateWechatArticleAbstractByQuestion(item *rag.WechatArticle, question *rag.Question, forceGenerate bool) (err error) {
|
|
|
defer func() {
|
|
|
if err != nil {
|
|
|
- utils.FileLog.Error("文章转临时文件失败,err:%v", err)
|
|
|
- fmt.Println("文章转临时文件失败,err:", err)
|
|
|
+ utils.FileLog.Error("文章摘要生成(根据提示词生成)失败,err:%v", err)
|
|
|
}
|
|
|
}()
|
|
|
|
|
|
- abstractObj := rag.WechatArticleAbstract{}
|
|
|
- abstractItem, err := abstractObj.GetByWechatArticleId(item.WechatArticleId)
|
|
|
- if err != nil {
|
|
|
- if utils.IsErrNoRow(err) {
|
|
|
- // 直接生成
|
|
|
- GenerateArticleAbstract(item)
|
|
|
- return
|
|
|
- }
|
|
|
- // 异常了
|
|
|
+ // 内容为空,那就不需要生成摘要
|
|
|
+ if item.TextContent == `` {
|
|
|
return
|
|
|
}
|
|
|
|
|
|
- // 生成临时文件
|
|
|
- dateDir := time.Now().Format("20060102")
|
|
|
- uploadDir := "./static/ai/" + dateDir
|
|
|
- err = os.MkdirAll(uploadDir, utils.DIR_MOD)
|
|
|
- if err != nil {
|
|
|
- err = fmt.Errorf("存储目录创建失败,Err:" + err.Error())
|
|
|
+ abstractObj := rag.WechatArticleAbstract{}
|
|
|
+ abstractItem, err := abstractObj.GetByWechatArticleIdAndQuestionId(item.WechatArticleId, question.QuestionId)
|
|
|
+ // 如果找到了,同时不是强制生成,那么就直接处理到知识库中
|
|
|
+ if err == nil && !forceGenerate {
|
|
|
+ // 摘要已经生成,不需要重复生成,只需要重新加入到向量库中
|
|
|
+ WechatArticleAbstractToKnowledge(item, abstractItem, false)
|
|
|
+
|
|
|
return
|
|
|
}
|
|
|
- randStr := utils.GetRandStringNoSpecialChar(28)
|
|
|
- fileName := randStr + `.md`
|
|
|
- tmpFilePath := uploadDir + "/" + fileName
|
|
|
- err = utils.SaveToFile(item.TextContent, tmpFilePath)
|
|
|
- if err != nil {
|
|
|
- err = fmt.Errorf("生成临时文件失败,Err:" + err.Error())
|
|
|
- return
|
|
|
+
|
|
|
+ // 如果是没找到数据,那么就将报错置空
|
|
|
+ if err != nil && utils.IsErrNoRow(err) {
|
|
|
+ err = nil
|
|
|
}
|
|
|
- defer func() {
|
|
|
- os.Remove(tmpFilePath)
|
|
|
- }()
|
|
|
|
|
|
- // 上传临时文件到LLM
|
|
|
- tmpFileResp, err := llm.UploadTempDocs(tmpFilePath)
|
|
|
- if err != nil {
|
|
|
- err = fmt.Errorf("上传临时文件到LLM失败,Err:" + err.Error())
|
|
|
+ //你现在是一名资深的期货行业分析师,请基于以下的问题进行汇总总结,如果不能正常总结出来,那么就只需要回复我:sorry
|
|
|
+ questionStr := fmt.Sprintf(`%s\n%s`, `你现在是一名资深的期货行业分析师,请基于以下的问题进行汇总总结,如果不能正常总结出来,那么就只需要回复我:sorry。以下是问题:`, question.QuestionContent)
|
|
|
+ //开始对话
|
|
|
+ abstract, industryTags, _, tmpErr := getAnswerByContent(item.WechatArticleId, utils.AI_ARTICLE_SOURCE_WECHAT, questionStr)
|
|
|
+ if tmpErr != nil {
|
|
|
+ err = fmt.Errorf("LLM对话失败,Err:" + tmpErr.Error())
|
|
|
return
|
|
|
}
|
|
|
|
|
|
- if tmpFileResp.Data.Id == `` {
|
|
|
- err = fmt.Errorf("上传临时文件到LLM失败,Err:上传失败")
|
|
|
+ if abstract == `` {
|
|
|
return
|
|
|
}
|
|
|
- tmpDocId := tmpFileResp.Data.Id
|
|
|
-
|
|
|
- //tmpDocId := `c4d2ee902808408c8b8ed398b33be103` // 钢材
|
|
|
- //tmpDocId := `2dde8afe62d24525a814e74e0a5e35e4` // 钢材
|
|
|
- //tmpDocId := `7634cc1086c04b3687682220a2cf1a48` //
|
|
|
|
|
|
- //开始对话
|
|
|
- abstract, addArticleChatRecordList, tmpErr := getAnswerByContent(item.WechatArticleId, tmpDocId)
|
|
|
- if tmpErr != nil {
|
|
|
- err = fmt.Errorf("LLM对话失败,Err:" + tmpErr.Error())
|
|
|
+ if abstract == `sorry` || strings.Index(abstract, `根据已知信息无法回答该问题`) == 0 {
|
|
|
+ item.AbstractStatus = 2
|
|
|
+ item.ModifyTime = time.Now()
|
|
|
+ err = item.Update([]string{"AbstractStatus", "ModifyTime"})
|
|
|
return
|
|
|
}
|
|
|
|
|
|
- // 添加问答记录
|
|
|
- if len(addArticleChatRecordList) > 0 {
|
|
|
- recordObj := rag.WechatArticleChatRecord{}
|
|
|
- err = recordObj.CreateInBatches(addArticleChatRecordList)
|
|
|
+ var tagIdJsonStr string
|
|
|
+ var tagNameJsonStr string
|
|
|
+ // 标签ID
|
|
|
+ {
|
|
|
+ tagIdList := make([]int, 0)
|
|
|
+ tagNameList := make([]string, 0)
|
|
|
+ tagIdMap := make(map[int]bool)
|
|
|
+
|
|
|
+ if abstractItem != nil && abstractItem.Tags != `` {
|
|
|
+ tmpErr = json.Unmarshal([]byte(abstractItem.Tags), &tagIdList)
|
|
|
+ if tmpErr != nil {
|
|
|
+ utils.FileLog.Info(fmt.Sprintf("json.Unmarshal Tags 失败,标签数据:%s,Err:%s", abstractItem.Tags, tmpErr.Error()))
|
|
|
+ } else {
|
|
|
+ for _, tagId := range tagIdList {
|
|
|
+ tagIdMap[tagId] = true
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if abstractItem.TagsName != `` {
|
|
|
+ tagNameList = strings.Split(abstractItem.TagsName, ",")
|
|
|
+ }
|
|
|
+ for _, tagName := range industryTags {
|
|
|
+ tagId, tmpErr := GetTagIdByName(tagName)
|
|
|
+ if tmpErr != nil {
|
|
|
+ utils.FileLog.Info(fmt.Sprintf("获取标签ID失败,标签名称:%s,Err:%s", tagName, tmpErr.Error()))
|
|
|
+ }
|
|
|
+ if _, ok := tagIdMap[tagId]; !ok {
|
|
|
+ tagIdList = append(tagIdList, tagId)
|
|
|
+ tagNameList = append(tagNameList, tagName)
|
|
|
+ tagIdMap[tagId] = true
|
|
|
+ }
|
|
|
+ }
|
|
|
+ //for _, tagName := range varietyTags {
|
|
|
+ // tagId, tmpErr := GetTagIdByName(tagName)
|
|
|
+ // if tmpErr != nil {
|
|
|
+ // utils.FileLog.Info(fmt.Sprintf("获取标签ID失败,标签名称:%s,Err:%s", tagName, tmpErr.Error()))
|
|
|
+ // }
|
|
|
+ // if _, ok := tagIdMap[tagId]; !ok {
|
|
|
+ // tagIdList = append(tagIdList, tagId)
|
|
|
+ // tagIdMap[tagId] = true
|
|
|
+ // }
|
|
|
+ //}
|
|
|
+
|
|
|
+ tagIdJsonByte, err := json.Marshal(tagIdList)
|
|
|
if err != nil {
|
|
|
- return
|
|
|
+ utils.FileLog.Info(fmt.Sprintf("标签ID序列化失败,Err:%s", tmpErr.Error()))
|
|
|
+ } else {
|
|
|
+ tagIdJsonStr = string(tagIdJsonByte)
|
|
|
}
|
|
|
+
|
|
|
+ tagNameJsonStr = strings.Join(tagNameList, `,`)
|
|
|
}
|
|
|
|
|
|
- if abstract != `` {
|
|
|
- if abstract == `sorry` || strings.Index(abstract, `根据已知信息无法回答该问题`) == 0 {
|
|
|
- item.AbstractStatus = 2
|
|
|
- item.ModifyTime = time.Now()
|
|
|
- err = item.Update([]string{"AbstractStatus", "ModifyTime"})
|
|
|
- return
|
|
|
+ item.AbstractStatus = 1
|
|
|
+ item.ModifyTime = time.Now()
|
|
|
+ err = item.Update([]string{"AbstractStatus", "ModifyTime"})
|
|
|
+
|
|
|
+ if abstractItem == nil || abstractItem.WechatArticleAbstractId <= 0 {
|
|
|
+ abstractItem = &rag.WechatArticleAbstract{
|
|
|
+ WechatArticleAbstractId: 0,
|
|
|
+ WechatArticleId: item.WechatArticleId,
|
|
|
+ Content: abstract,
|
|
|
+ Version: 1,
|
|
|
+ VectorKey: "",
|
|
|
+ ModifyTime: time.Now(),
|
|
|
+ CreateTime: time.Now(),
|
|
|
+ QuestionId: question.QuestionId,
|
|
|
+ Tags: tagIdJsonStr,
|
|
|
+ TagsName: tagNameJsonStr,
|
|
|
+ QuestionContent: question.QuestionContent,
|
|
|
}
|
|
|
- item.AbstractStatus = 1
|
|
|
- item.ModifyTime = time.Now()
|
|
|
- err = item.Update([]string{"AbstractStatus", "ModifyTime"})
|
|
|
+ err = abstractItem.Create()
|
|
|
+ } else {
|
|
|
+ // 添加历史记录
|
|
|
+ rag.AddArticleAbstractHistoryByWechatArticleAbstract(abstractItem)
|
|
|
|
|
|
abstractItem.Content = abstract
|
|
|
- abstractItem.Version = abstractObj.Version + 1
|
|
|
+ abstractItem.Version++
|
|
|
abstractItem.ModifyTime = time.Now()
|
|
|
- err = abstractItem.Update([]string{"content", "version", "modify_time"})
|
|
|
- if err != nil {
|
|
|
- return
|
|
|
- }
|
|
|
+ abstractItem.Tags = tagIdJsonStr
|
|
|
+ abstractItem.TagsName = tagNameJsonStr
|
|
|
+ abstractItem.QuestionContent = question.QuestionContent
|
|
|
+ err = abstractItem.Update([]string{"content", "version", "modify_time", "tags", "tags_name", "question_content"})
|
|
|
+ }
|
|
|
|
|
|
- AbstractToKnowledge(item, abstractItem, true)
|
|
|
+ if err != nil {
|
|
|
+ return
|
|
|
}
|
|
|
+
|
|
|
+ // 数据入ES库
|
|
|
+ go AddOrEditEsWechatArticleAbstract(abstractItem.WechatArticleAbstractId)
|
|
|
+
|
|
|
+ WechatArticleAbstractToKnowledge(item, abstractItem, false)
|
|
|
+
|
|
|
+ return
|
|
|
}
|
|
|
|
|
|
// DelDoc
|
|
@@ -529,7 +586,62 @@ func DelLlmDoc(vectorKeyList []string, wechatArticleAbstractIdList []int) (err e
|
|
|
return
|
|
|
}
|
|
|
|
|
|
-func getAnswerByContent(wechatArticleId int, docId string) (answer string, addArticleChatRecordList []*rag.WechatArticleChatRecord, err error) {
|
|
|
+func getAnswerByContent(articleId int, source int, questionStr string) (answer string, industryTags, varietyTags []string, err error) {
|
|
|
+ //addArticleChatRecordList = make([]*rag.WechatArticleChatRecord, 0)
|
|
|
+
|
|
|
+ result, err := facade.AIGCBaseOnPromote(facade.AIGC{
|
|
|
+ Promote: questionStr,
|
|
|
+ Source: source,
|
|
|
+ ArticleId: articleId,
|
|
|
+ LLMModel: `deepseek-r1:32b`,
|
|
|
+ })
|
|
|
+ if err != nil {
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+ // JSON字符串转字节
|
|
|
+ //answerByte, err := json.Marshal(result)
|
|
|
+ //if err != nil {
|
|
|
+ // return
|
|
|
+ //}
|
|
|
+ //originalAnswer := string(answerByte)
|
|
|
+
|
|
|
+ // 提取 </think> 后面的内容
|
|
|
+ thinkEndIndex := strings.Index(result.Answer, "</think>")
|
|
|
+ if thinkEndIndex != -1 {
|
|
|
+ answer = strings.TrimSpace(result.Answer[thinkEndIndex+len("</think>"):])
|
|
|
+ } else {
|
|
|
+ answer = result.Answer
|
|
|
+ }
|
|
|
+
|
|
|
+ answer = strings.TrimSpace(answer)
|
|
|
+
|
|
|
+ // 提取标签
|
|
|
+ industryTags, varietyTags = extractLabels(answer)
|
|
|
+
|
|
|
+ //// 待入库的数据
|
|
|
+ //addArticleChatRecordList = append(addArticleChatRecordList, &rag.WechatArticleChatRecord{
|
|
|
+ // WechatArticleChatRecordId: 0,
|
|
|
+ // WechatArticleId: articleId,
|
|
|
+ // ChatUserType: "user",
|
|
|
+ // Content: questionStr,
|
|
|
+ // SendTime: time.Now(),
|
|
|
+ // CreatedTime: time.Now(),
|
|
|
+ // UpdateTime: time.Now(),
|
|
|
+ //}, &rag.WechatArticleChatRecord{
|
|
|
+ // WechatArticleChatRecordId: 0,
|
|
|
+ // WechatArticleId: articleId,
|
|
|
+ // ChatUserType: "assistant",
|
|
|
+ // Content: originalAnswer,
|
|
|
+ // SendTime: time.Now(),
|
|
|
+ // CreatedTime: time.Now(),
|
|
|
+ // UpdateTime: time.Now(),
|
|
|
+ //})
|
|
|
+
|
|
|
+ return
|
|
|
+}
|
|
|
+
|
|
|
+func getAnswerByContentBak(wechatArticleId int, docId string) (answer string, addArticleChatRecordList []*rag.WechatArticleChatRecord, err error) {
|
|
|
historyList := make([]eta_llm_http.HistoryContent, 0)
|
|
|
addArticleChatRecordList = make([]*rag.WechatArticleChatRecord, 0)
|
|
|
|
|
@@ -649,13 +761,13 @@ func ArticleToKnowledge(item *rag.WechatArticle) {
|
|
|
|
|
|
}
|
|
|
|
|
|
-// AbstractToKnowledge
|
|
|
+// WechatArticleAbstractToKnowledge
|
|
|
// @Description: 摘要入向量库
|
|
|
// @author: Roc
|
|
|
// @datetime 2025-03-10 16:14:59
|
|
|
// @param wechatArticleItem *rag.WechatArticle
|
|
|
// @param abstractItem *rag.WechatArticleAbstract
|
|
|
-func AbstractToKnowledge(wechatArticleItem *rag.WechatArticle, abstractItem *rag.WechatArticleAbstract, isReUpload bool) {
|
|
|
+func WechatArticleAbstractToKnowledge(wechatArticleItem *rag.WechatArticle, abstractItem *rag.WechatArticleAbstract, isReUpload bool) {
|
|
|
if abstractItem.Content == `` {
|
|
|
return
|
|
|
}
|
|
@@ -683,7 +795,7 @@ func AbstractToKnowledge(wechatArticleItem *rag.WechatArticle, abstractItem *rag
|
|
|
err = fmt.Errorf("存储目录创建失败,Err:" + err.Error())
|
|
|
return
|
|
|
}
|
|
|
- fileName := utils.RemoveSpecialChars(wechatArticleItem.Title) + `.md`
|
|
|
+ fileName := utils.MD5(fmt.Sprintf("%d_%d", utils.AI_ARTICLE_SOURCE_WECHAT, wechatArticleItem.WechatArticleId)) + `.md`
|
|
|
tmpFilePath := uploadDir + "/" + fileName
|
|
|
err = utils.SaveToFile(abstractItem.Content, tmpFilePath)
|
|
|
if err != nil {
|
|
@@ -985,17 +1097,19 @@ func AddOrEditEsWechatArticleAbstract(articleAbstractId int) {
|
|
|
return
|
|
|
}
|
|
|
|
|
|
- // 公众号平台关联的标签品种
|
|
|
- tagObj := rag.WechatPlatformTagMapping{}
|
|
|
- tagMappingList, err := tagObj.GetListByCondition(` AND wechat_platform_id = ? `, []interface{}{articleInfo.WechatPlatformId}, 0, 10000)
|
|
|
- if err != nil {
|
|
|
- err = fmt.Errorf("获取公众号平台关联的品种信息失败,Err:" + err.Error())
|
|
|
- return
|
|
|
+ // 标签ID
|
|
|
+ tagIdList := make([]int, 0)
|
|
|
+ if abstractInfo.Tags != `` {
|
|
|
+ err = json.Unmarshal([]byte(abstractInfo.Tags), &tagIdList)
|
|
|
+ if err != nil {
|
|
|
+ err = fmt.Errorf("报告标签ID转int失败,Err:" + err.Error())
|
|
|
+ utils.FileLog.Info(fmt.Sprintf("json.Unmarshal 报告标签ID转int失败,标签数据:%s,Err:%s", abstractInfo.Tags, err.Error()))
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
- tagIdList := make([]int, 0)
|
|
|
- for _, v := range tagMappingList {
|
|
|
- tagIdList = append(tagIdList, v.TagId)
|
|
|
+ tagNameList := make([]string, 0)
|
|
|
+ if abstractInfo.TagsName != `` {
|
|
|
+ tagNameList = strings.Split(abstractInfo.TagsName, ",")
|
|
|
}
|
|
|
|
|
|
esItem := elastic.WechatArticleAbstractItem{
|
|
@@ -1003,6 +1117,7 @@ func AddOrEditEsWechatArticleAbstract(articleAbstractId int) {
|
|
|
WechatArticleId: abstractInfo.WechatArticleId,
|
|
|
WechatPlatformId: articleInfo.WechatPlatformId,
|
|
|
Abstract: abstractInfo.Content,
|
|
|
+ QuestionId: abstractInfo.QuestionId,
|
|
|
Version: abstractInfo.Version,
|
|
|
VectorKey: abstractInfo.VectorKey,
|
|
|
ModifyTime: articleInfo.ModifyTime,
|
|
@@ -1010,11 +1125,154 @@ func AddOrEditEsWechatArticleAbstract(articleAbstractId int) {
|
|
|
Title: articleInfo.Title,
|
|
|
Link: articleInfo.Link,
|
|
|
TagIdList: tagIdList,
|
|
|
+ TagNameList: tagNameList,
|
|
|
}
|
|
|
|
|
|
err = elastic.WechatArticleAbstractEsAddOrEdit(strconv.Itoa(articleAbstractId), esItem)
|
|
|
}
|
|
|
|
|
|
+// DelWechatArticleAbstract
|
|
|
+// @Description: 删除微信文章摘要
|
|
|
+// @author: Roc
|
|
|
+// @datetime 2025-04-23 17:36:22
|
|
|
+// @param abstractIdList []int
|
|
|
+// @return err error
|
|
|
+func DelWechatArticleAbstract(abstractIdList []int) (err error) {
|
|
|
+ obj := rag.WechatArticleAbstract{}
|
|
|
+
|
|
|
+ list, err := obj.GetByIdList(abstractIdList)
|
|
|
+ if err != nil {
|
|
|
+ if !utils.IsErrNoRow(err) {
|
|
|
+ err = errors.New("删除向量库失败,Err:" + err.Error())
|
|
|
+ } else {
|
|
|
+ err = nil
|
|
|
+ }
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+ err = delWechatArticleAbstract(list)
|
|
|
+
|
|
|
+ return
|
|
|
+}
|
|
|
+
|
|
|
+// DelWechatArticleAbstract
|
|
|
+// @Description: 删除微信文章摘要
|
|
|
+// @author: Roc
|
|
|
+// @datetime 2025-04-23 17:36:22
|
|
|
+// @param abstractIdList []int
|
|
|
+// @return err error
|
|
|
+func DelWechatArticleAbstractByQuestionId(questionId int) (err error) {
|
|
|
+ obj := rag.WechatArticleAbstract{}
|
|
|
+
|
|
|
+ list, err := obj.GetListByQuestionId(questionId)
|
|
|
+ if err != nil {
|
|
|
+ if !utils.IsErrNoRow(err) {
|
|
|
+ err = errors.New("删除向量库失败,Err:" + err.Error())
|
|
|
+ } else {
|
|
|
+ err = nil
|
|
|
+ }
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+ err = delWechatArticleAbstract(list)
|
|
|
+
|
|
|
+ return
|
|
|
+}
|
|
|
+
|
|
|
+// delRagEtaReportAbstract
|
|
|
+// @Description: 删除摘要
|
|
|
+// @author: Roc
|
|
|
+// @datetime 2025-04-24 15:19:19
|
|
|
+// @param list []*rag.RagEtaReportAbstract
|
|
|
+// @return err error
|
|
|
+func delWechatArticleAbstract(list []*rag.WechatArticleAbstract) (err error) {
|
|
|
+ obj := rag.RagEtaReportAbstract{}
|
|
|
+
|
|
|
+ vectorKeyList := make([]string, 0)
|
|
|
+ newAbstractIdList := make([]int, 0)
|
|
|
+
|
|
|
+ if len(list) > 0 {
|
|
|
+ for _, v := range list {
|
|
|
+ // 有加入到向量库,那么就加入到待删除的向量库list中
|
|
|
+ if v.VectorKey != `` {
|
|
|
+ vectorKeyList = append(vectorKeyList, v.VectorKey)
|
|
|
+ }
|
|
|
+ newAbstractIdList = append(newAbstractIdList, v.WechatArticleAbstractId)
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ //if !req.IsSelectAll {
|
|
|
+ // list, err := obj.GetByIdList(req.RagEtaReportAbstractIdList)
|
|
|
+ // if err != nil {
|
|
|
+ // br.Msg = "修改失败"
|
|
|
+ // br.ErrMsg = "修改失败,查找问题失败,Err:" + err.Error()
|
|
|
+ // if utils.IsErrNoRow(err) {
|
|
|
+ // br.Msg = "问题不存在"
|
|
|
+ // br.IsSendEmail = false
|
|
|
+ // }
|
|
|
+ // return
|
|
|
+ // }
|
|
|
+ // if len(list) > 0 {
|
|
|
+ // for _, v := range list {
|
|
|
+ // // 有加入到向量库,那么就加入到待删除的向量库list中
|
|
|
+ // if v.VectorKey != `` {
|
|
|
+ // vectorKeyList = append(vectorKeyList, v.VectorKey)
|
|
|
+ // }
|
|
|
+ // wechatArticleAbstractIdList = append(wechatArticleAbstractIdList, v.RagEtaReportAbstractId)
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+ //} else {
|
|
|
+ // notIdMap := make(map[int]bool)
|
|
|
+ // for _, v := range req.NotRagEtaReportAbstractIdList {
|
|
|
+ // notIdMap[v] = true
|
|
|
+ // }
|
|
|
+ //
|
|
|
+ // _, list, err := getRagEtaReportAbstractList(req.KeyWord, req.TagId, 0, 100000)
|
|
|
+ // if err != nil {
|
|
|
+ // br.Msg = "修改失败"
|
|
|
+ // br.ErrMsg = "修改失败,查找问题失败,Err:" + err.Error()
|
|
|
+ // if utils.IsErrNoRow(err) {
|
|
|
+ // br.Msg = "问题不存在"
|
|
|
+ // br.IsSendEmail = false
|
|
|
+ // }
|
|
|
+ // return
|
|
|
+ // }
|
|
|
+ // if len(list) > 0 {
|
|
|
+ // for _, v := range list {
|
|
|
+ // if notIdMap[v.RagEtaReportAbstractId] {
|
|
|
+ // continue
|
|
|
+ // }
|
|
|
+ // // 有加入到向量库,那么就加入到待删除的向量库list中
|
|
|
+ // if v.VectorKey != `` {
|
|
|
+ // vectorKeyList = append(vectorKeyList, v.VectorKey)
|
|
|
+ // }
|
|
|
+ // wechatArticleAbstractIdList = append(wechatArticleAbstractIdList, v.RagEtaReportAbstractId)
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+ //}
|
|
|
+
|
|
|
+ // 删除向量库
|
|
|
+ err = DelLlmDoc(vectorKeyList, newAbstractIdList)
|
|
|
+ if err != nil {
|
|
|
+ err = errors.New("删除向量库失败,Err:" + err.Error())
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+ // 删除摘要
|
|
|
+ err = obj.DelByIdList(newAbstractIdList)
|
|
|
+ if err != nil {
|
|
|
+ err = errors.New("删除失败,Err:" + err.Error())
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+ // 删除es数据
|
|
|
+ for _, wechatArticleAbstractId := range newAbstractIdList {
|
|
|
+ go DelEsWechatArticleAbstract(wechatArticleAbstractId)
|
|
|
+ }
|
|
|
+
|
|
|
+ return
|
|
|
+}
|
|
|
+
|
|
|
// DelEsWechatArticleAbstract
|
|
|
// @Description: 删除ES中的微信文章摘要
|
|
|
// @author: Roc
|
|
@@ -1028,8 +1286,8 @@ func DelEsWechatArticleAbstract(articleAbstractId int) {
|
|
|
var err error
|
|
|
defer func() {
|
|
|
if err != nil {
|
|
|
- utils.FileLog.Error("添加公众号微信信息到ES失败,err:%v", err)
|
|
|
- fmt.Println("添加公众号微信信息到ES失败,err:", err)
|
|
|
+ utils.FileLog.Error("删除公众号微信信息到ES失败,err:%v", err)
|
|
|
+ fmt.Println("删除公众号微信信息到ES失败,err:", err)
|
|
|
}
|
|
|
}()
|
|
|
|
|
@@ -1094,3 +1352,83 @@ func DelEsRagQuestion(questionId int) {
|
|
|
|
|
|
err = elastic.RagQuestionEsDel(strconv.Itoa(questionId))
|
|
|
}
|
|
|
+
|
|
|
+// extractLabels
|
|
|
+// @Description: 提取摘要中的标签
|
|
|
+// @author: Roc
|
|
|
+// @datetime 2025-04-18 17:16:05
|
|
|
+// @param text string
|
|
|
+// @return industryTags []string
|
|
|
+// @return varietyTags []string
|
|
|
+func extractLabels(text string) (industryTags []string, varietyTags []string) {
|
|
|
+ reIndustry := regexp.MustCompile(`行业标签((?:【[^】]*】)+)`)
|
|
|
+ industryMatch := reIndustry.FindStringSubmatch(text)
|
|
|
+ if len(industryMatch) > 1 {
|
|
|
+ industryContent := industryMatch[1]
|
|
|
+ reSplit := regexp.MustCompile(`【([^】]*)】`)
|
|
|
+ industryTags = make([]string, 0)
|
|
|
+ for _, m := range reSplit.FindAllStringSubmatch(industryContent, -1) {
|
|
|
+ if len(m) > 1 {
|
|
|
+ industryTags = append(industryTags, m[1])
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ reVariety := regexp.MustCompile(`品种标签((?:【[^】]*】)+)`)
|
|
|
+ varietyMatch := reVariety.FindStringSubmatch(text)
|
|
|
+ if len(varietyMatch) > 1 {
|
|
|
+ varietyContent := varietyMatch[1]
|
|
|
+ reSplit := regexp.MustCompile(`【([^】]*)】`)
|
|
|
+ varietyTags = make([]string, 0)
|
|
|
+ for _, m := range reSplit.FindAllStringSubmatch(varietyContent, -1) {
|
|
|
+ if len(m) > 1 {
|
|
|
+ varietyTags = append(varietyTags, m[1])
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return
|
|
|
+}
|
|
|
+
|
|
|
+var aiAbstractTagMap = map[string]int{}
|
|
|
+
|
|
|
+// GetTagIdByName
|
|
|
+// @Description: 获取标签ID
|
|
|
+// @author: Roc
|
|
|
+// @datetime 2025-04-18 17:25:46
|
|
|
+// @param tagName string
|
|
|
+// @return tagId int
|
|
|
+// @return err error
|
|
|
+func GetTagIdByName(tagName string) (tagId int, err error) {
|
|
|
+ tagName = strings.TrimSpace(tagName)
|
|
|
+ tagId, ok := aiAbstractTagMap[tagName]
|
|
|
+ if ok {
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+ obj := rag.Tag{}
|
|
|
+ item, err := obj.GetByCondition(fmt.Sprintf(` AND %s = ? `, rag.TagColumns.TagName), []interface{}{tagName})
|
|
|
+ if err != nil {
|
|
|
+ if !utils.IsErrNoRow(err) {
|
|
|
+ err = fmt.Errorf("获取标签失败,Err:" + err.Error())
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+ item = &rag.Tag{
|
|
|
+ TagId: 0,
|
|
|
+ TagName: tagName,
|
|
|
+ Sort: 0,
|
|
|
+ ModifyTime: time.Now(),
|
|
|
+ CreateTime: time.Now(),
|
|
|
+ }
|
|
|
+ err = item.Create()
|
|
|
+ if err != nil {
|
|
|
+ err = fmt.Errorf("添加标签失败,Err:" + err.Error())
|
|
|
+ return
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ tagId = item.TagId
|
|
|
+ aiAbstractTagMap[tagName] = tagId
|
|
|
+
|
|
|
+ return
|
|
|
+}
|