|
@@ -1,16 +1,18 @@
|
|
|
package llm
|
|
|
|
|
|
import (
|
|
|
+ "eta/eta_api/models"
|
|
|
"eta/eta_api/models/rag"
|
|
|
"eta/eta_api/utils"
|
|
|
"fmt"
|
|
|
"html"
|
|
|
+ "os"
|
|
|
+ "regexp"
|
|
|
"strconv"
|
|
|
+ "strings"
|
|
|
"time"
|
|
|
)
|
|
|
|
|
|
-// TODO 改成走队列,避免并发
|
|
|
-
|
|
|
type WechatArticleOp struct {
|
|
|
Source string
|
|
|
WechatPlatformId int
|
|
@@ -168,7 +170,7 @@ func AddWechatArticle(item *rag.WechatPlatform, articleLink string, articleDetai
|
|
|
func BeachAddWechatArticle(item *rag.WechatPlatform, num int) {
|
|
|
var err error
|
|
|
defer func() {
|
|
|
- fmt.Println("公众号文章批量入库完成")
|
|
|
+ //fmt.Println("公众号文章批量入库完成")
|
|
|
if err != nil {
|
|
|
utils.FileLog.Error("公众号文章批量入库失败,err:%v", err)
|
|
|
fmt.Println("公众号文章批量入库失败,err:", err)
|
|
@@ -210,3 +212,283 @@ func BeachAddWechatArticle(item *rag.WechatPlatform, num int) {
|
|
|
}
|
|
|
return
|
|
|
}
|
|
|
+
|
|
|
+// GenerateArticleAbstract
|
|
|
+// @Description: 文章摘要生成
|
|
|
+// @author: Roc
|
|
|
+// @datetime 2025-03-10 16:17:53
|
|
|
+// @param item *rag.WechatArticle
|
|
|
+func GenerateArticleAbstract(item *rag.WechatArticle) {
|
|
|
+ var err error
|
|
|
+ defer func() {
|
|
|
+ if err != nil {
|
|
|
+ utils.FileLog.Error("文章转临时文件失败,err:%v", err)
|
|
|
+ fmt.Println("文章转临时文件失败,err:", err)
|
|
|
+ }
|
|
|
+ }()
|
|
|
+
|
|
|
+ abstractObj := rag.WechatArticleAbstract{}
|
|
|
+ _, err = abstractObj.GetByWechatArticleId(item.WechatArticleId)
|
|
|
+ if err == nil {
|
|
|
+ // 摘要已经生成,不需要重复生成
|
|
|
+ return
|
|
|
+ }
|
|
|
+ if !utils.IsErrNoRow(err) {
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+ // 生成临时文件
|
|
|
+ dateDir := time.Now().Format("20060102")
|
|
|
+ uploadDir := utils.STATIC_DIR + "ai/" + dateDir
|
|
|
+ err = os.MkdirAll(uploadDir, utils.DIR_MOD)
|
|
|
+ if err != nil {
|
|
|
+ err = fmt.Errorf("存储目录创建失败,Err:" + err.Error())
|
|
|
+ return
|
|
|
+ }
|
|
|
+ randStr := utils.GetRandStringNoSpecialChar(28)
|
|
|
+ fileName := randStr + `.md`
|
|
|
+ tmpFilePath := uploadDir + "/" + fileName
|
|
|
+ err = utils.SaveToFile(item.TextContent, tmpFilePath)
|
|
|
+ if err != nil {
|
|
|
+ err = fmt.Errorf("生成临时文件失败,Err:" + err.Error())
|
|
|
+ return
|
|
|
+ }
|
|
|
+ defer func() {
|
|
|
+ os.Remove(tmpFilePath)
|
|
|
+ }()
|
|
|
+
|
|
|
+ // 上传临时文件到LLM
|
|
|
+ tmpFileResp, err := UploadTempDocs(tmpFilePath)
|
|
|
+ if err != nil {
|
|
|
+ err = fmt.Errorf("上传临时文件到LLM失败,Err:" + err.Error())
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+ if tmpFileResp.Data.Id == `` {
|
|
|
+ err = fmt.Errorf("上传临时文件到LLM失败,Err:上传失败")
|
|
|
+ return
|
|
|
+ }
|
|
|
+ tmpDocId := tmpFileResp.Data.Id
|
|
|
+
|
|
|
+ //tmpDocId := `c4d2ee902808408c8b8ed398b33be103` // 钢材
|
|
|
+ //tmpDocId := `2dde8afe62d24525a814e74e0a5e35e4` // 钢材
|
|
|
+ //tmpDocId := `7634cc1086c04b3687682220a2cf1a48` //
|
|
|
+
|
|
|
+ historyList := make([]HistoryContent, 0)
|
|
|
+
|
|
|
+ questionObj := rag.Question{}
|
|
|
+ questionList, err := questionObj.GetListByCondition(``, []interface{}{}, 0, 100)
|
|
|
+ if err != nil {
|
|
|
+ err = fmt.Errorf("获取问题列表失败,Err:" + err.Error())
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+ addArticleChatRecordList := make([]*rag.WechatArticleChatRecord, 0)
|
|
|
+
|
|
|
+ var abstract string
|
|
|
+ //开始对话
|
|
|
+ for _, question := range questionList {
|
|
|
+ originalAnswer, tmpAnswer, tmpErr := getAnswerByContent(tmpDocId, question.QuestionContent, historyList)
|
|
|
+ if tmpErr != nil {
|
|
|
+ err = fmt.Errorf("LLM对话失败,Err:" + tmpErr.Error())
|
|
|
+ return
|
|
|
+ }
|
|
|
+ abstract = tmpAnswer
|
|
|
+
|
|
|
+ historyList = append(historyList, HistoryContent{
|
|
|
+ Role: `user`,
|
|
|
+ Content: question.QuestionContent,
|
|
|
+ }, HistoryContent{
|
|
|
+ Role: `assistant`,
|
|
|
+ Content: tmpAnswer,
|
|
|
+ })
|
|
|
+
|
|
|
+ // 待入库的数据
|
|
|
+ addArticleChatRecordList = append(addArticleChatRecordList, &rag.WechatArticleChatRecord{
|
|
|
+ WechatArticleChatRecordId: 0,
|
|
|
+ WechatArticleId: item.WechatArticleId,
|
|
|
+ ChatUserType: "user",
|
|
|
+ Content: question.QuestionContent,
|
|
|
+ SendTime: time.Now(),
|
|
|
+ CreatedTime: time.Now(),
|
|
|
+ UpdateTime: time.Now(),
|
|
|
+ }, &rag.WechatArticleChatRecord{
|
|
|
+ WechatArticleChatRecordId: 0,
|
|
|
+ WechatArticleId: item.WechatArticleId,
|
|
|
+ ChatUserType: "assistant",
|
|
|
+ Content: originalAnswer,
|
|
|
+ SendTime: time.Now(),
|
|
|
+ CreatedTime: time.Now(),
|
|
|
+ UpdateTime: time.Now(),
|
|
|
+ })
|
|
|
+ }
|
|
|
+
|
|
|
+ // 添加问答记录
|
|
|
+ if len(addArticleChatRecordList) > 0 {
|
|
|
+ recordObj := rag.WechatArticleChatRecord{}
|
|
|
+ err = recordObj.CreateInBatches(addArticleChatRecordList)
|
|
|
+ if err != nil {
|
|
|
+ return
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if abstract != `` {
|
|
|
+ abstractItem := &rag.WechatArticleAbstract{
|
|
|
+ WechatArticleAbstractId: 0,
|
|
|
+ WechatArticleId: item.WechatArticleId,
|
|
|
+ Content: abstract,
|
|
|
+ Version: 0,
|
|
|
+ VectorKey: "",
|
|
|
+ ModifyTime: time.Now(),
|
|
|
+ CreateTime: time.Now(),
|
|
|
+ }
|
|
|
+ err = abstractItem.Create()
|
|
|
+ if err != nil {
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+ AbstractToKnowledge(item, abstractItem)
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+func getAnswerByContent(docId, question string, historyList []HistoryContent) (originalAnswer, answer string, err error) {
|
|
|
+ originalAnswer, result, err := ChatByFile(docId, question, historyList)
|
|
|
+ fmt.Println(result)
|
|
|
+ if err != nil {
|
|
|
+ err = fmt.Errorf("LLM对话失败,Err:" + err.Error())
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+ // 提取 </think> 后面的内容
|
|
|
+ thinkEndIndex := strings.Index(result.Answer, "</think>")
|
|
|
+ if thinkEndIndex != -1 {
|
|
|
+ answer = strings.TrimSpace(result.Answer[thinkEndIndex+len("</think>"):])
|
|
|
+ } else {
|
|
|
+ answer = result.Answer
|
|
|
+ }
|
|
|
+
|
|
|
+ answer = strings.TrimSpace(answer)
|
|
|
+
|
|
|
+ return
|
|
|
+}
|
|
|
+
|
|
|
+// ArticleToKnowledge
|
|
|
+// @Description: 原文入向量库
|
|
|
+// @author: Roc
|
|
|
+// @datetime 2025-03-10 16:13:16
|
|
|
+// @param item *rag.WechatArticle
|
|
|
+func ArticleToKnowledge(item *rag.WechatArticle) {
|
|
|
+ if item.TextContent == `` {
|
|
|
+ return
|
|
|
+ }
|
|
|
+ var err error
|
|
|
+ defer func() {
|
|
|
+ if err != nil {
|
|
|
+ utils.FileLog.Error("上传文章原文到知识库失败,err:%v", err)
|
|
|
+ fmt.Println("上传文章原文到知识库失败,err:", err)
|
|
|
+ }
|
|
|
+ }()
|
|
|
+
|
|
|
+ // 生成临时文件
|
|
|
+ //dateDir := time.Now().Format("20060102")
|
|
|
+ //uploadDir := utils.STATIC_DIR + "ai/article/" + dateDir
|
|
|
+ uploadDir := utils.STATIC_DIR + "ai/article"
|
|
|
+ err = os.MkdirAll(uploadDir, utils.DIR_MOD)
|
|
|
+ if err != nil {
|
|
|
+ err = fmt.Errorf("存储目录创建失败,Err:" + err.Error())
|
|
|
+ return
|
|
|
+ }
|
|
|
+ fileName := RemoveSpecialChars(item.Title) + `.md`
|
|
|
+ tmpFilePath := uploadDir + "/" + fileName
|
|
|
+ err = utils.SaveToFile(item.TextContent, tmpFilePath)
|
|
|
+ if err != nil {
|
|
|
+ err = fmt.Errorf("生成临时文件失败,Err:" + err.Error())
|
|
|
+ return
|
|
|
+ }
|
|
|
+ defer func() {
|
|
|
+ os.Remove(tmpFilePath)
|
|
|
+ }()
|
|
|
+
|
|
|
+ knowledgeArticleName := models.BusinessConfMap[models.KnowledgeArticleName]
|
|
|
+ // 上传临时文件到LLM
|
|
|
+ uploadFileResp, err := UploadDocsToKnowledge(tmpFilePath, knowledgeArticleName)
|
|
|
+ if err != nil {
|
|
|
+ err = fmt.Errorf("上传文章原文到知识库失败,Err:" + err.Error())
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+ if len(uploadFileResp.FailedFiles) > 0 {
|
|
|
+ for _, v := range uploadFileResp.FailedFiles {
|
|
|
+ err = fmt.Errorf("上传文章原文到知识库失败,Err:" + v)
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ item.VectorKey = tmpFilePath
|
|
|
+ item.ModifyTime = time.Now()
|
|
|
+ err = item.Update([]string{"vector_key", "modify_time"})
|
|
|
+
|
|
|
+}
|
|
|
+
|
|
|
+// AbstractToKnowledge
|
|
|
+// @Description: 摘要入向量库
|
|
|
+// @author: Roc
|
|
|
+// @datetime 2025-03-10 16:14:59
|
|
|
+// @param wechatArticleItem *rag.WechatArticle
|
|
|
+// @param item *rag.WechatArticleAbstract
|
|
|
+func AbstractToKnowledge(wechatArticleItem *rag.WechatArticle, item *rag.WechatArticleAbstract) {
|
|
|
+ if item.Content == `` {
|
|
|
+ return
|
|
|
+ }
|
|
|
+ var err error
|
|
|
+ defer func() {
|
|
|
+ if err != nil {
|
|
|
+ utils.FileLog.Error("摘要入向量库失败,err:%v", err)
|
|
|
+ fmt.Println("摘要入向量库失败,err:", err)
|
|
|
+ }
|
|
|
+ }()
|
|
|
+
|
|
|
+ // 生成临时文件
|
|
|
+ //dateDir := time.Now().Format("20060102")
|
|
|
+ //uploadDir := utils.STATIC_DIR + "ai/article/" + dateDir
|
|
|
+ uploadDir := utils.STATIC_DIR + "ai/abstract"
|
|
|
+ err = os.MkdirAll(uploadDir, utils.DIR_MOD)
|
|
|
+ if err != nil {
|
|
|
+ err = fmt.Errorf("存储目录创建失败,Err:" + err.Error())
|
|
|
+ return
|
|
|
+ }
|
|
|
+ fileName := RemoveSpecialChars(wechatArticleItem.Title) + `.md`
|
|
|
+ tmpFilePath := uploadDir + "/" + fileName
|
|
|
+ err = utils.SaveToFile(item.Content, tmpFilePath)
|
|
|
+ if err != nil {
|
|
|
+ err = fmt.Errorf("生成临时文件失败,Err:" + err.Error())
|
|
|
+ return
|
|
|
+ }
|
|
|
+ defer func() {
|
|
|
+ os.Remove(tmpFilePath)
|
|
|
+ }()
|
|
|
+
|
|
|
+ knowledgeArticleName := models.BusinessConfMap[models.KnowledgeBaseName]
|
|
|
+ // 上传临时文件到LLM
|
|
|
+ uploadFileResp, err := UploadDocsToKnowledge(tmpFilePath, knowledgeArticleName)
|
|
|
+ if err != nil {
|
|
|
+ err = fmt.Errorf("上传文章原文到知识库失败,Err:" + err.Error())
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+ if len(uploadFileResp.FailedFiles) > 0 {
|
|
|
+ for _, v := range uploadFileResp.FailedFiles {
|
|
|
+ err = fmt.Errorf("上传文章原文到知识库失败,Err:" + v)
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ item.VectorKey = tmpFilePath
|
|
|
+ item.ModifyTime = time.Now()
|
|
|
+ err = item.Update([]string{"vector_key", "modify_time"})
|
|
|
+
|
|
|
+}
|
|
|
+
|
|
|
+func RemoveSpecialChars(text string) string {
|
|
|
+ // 匹配非中文、非字母、非数字、非中文标点的字符
|
|
|
+ reg := regexp.MustCompile(`[^\p{Han}\p{L}\p{N}\x{3000}-\x{303F}]`)
|
|
|
+ return reg.ReplaceAllString(text, "")
|
|
|
+}
|