瀏覽代碼

fix:批量生成摘要

Roc 6 天之前
父節點
當前提交
90738eec8d
共有 4 個文件被更改,包括 215 次插入140 次删除
  1. 17 2
      models/rag/rag_eta_report_abstract.go
  2. 15 0
      models/rag/wechat_article_abstract.go
  3. 178 138
      services/wechat_platform.go
  4. 5 0
      utils/constants.go

+ 17 - 2
models/rag/rag_eta_report_abstract.go

@@ -103,7 +103,7 @@ func (m *RagEtaReportAbstract) DelByIdList(idList []int) (err error) {
 	return
 }
 
-// GetByWechatArticleId
+// GetByRagEtaReportId
 // @Description: 根据报告id获取摘要
 // @author: Roc
 // @receiver m
@@ -111,12 +111,27 @@ func (m *RagEtaReportAbstract) DelByIdList(idList []int) (err error) {
 // @param id int
 // @return item *RagEtaReportAbstract
 // @return err error
-func (m *RagEtaReportAbstract) GetByWechatArticleId(id int) (item *RagEtaReportAbstract, err error) {
+func (m *RagEtaReportAbstract) GetByRagEtaReportId(id int) (item *RagEtaReportAbstract, err error) {
 	err = global.DbMap[utils.DbNameAI].Where(fmt.Sprintf("%s = ?", EtaReportAbstractColumns.RagEtaReportID), id).Order(fmt.Sprintf(`%s DESC`, EtaReportAbstractColumns.RagEtaReportAbstractID)).First(&item).Error
 
 	return
 }
 
+// GetByRagEtaReportIdAndQuestionId
+// @Description: 根据报告id和提示词ID获取摘要
+// @author: Roc
+// @receiver m
+// @datetime 2025-04-17 17:39:27
+// @param articleId int
+// @param questionId int
+// @return item *RagEtaReportAbstract
+// @return err error
+func (m *RagEtaReportAbstract) GetByRagEtaReportIdAndQuestionId(articleId, questionId int) (item *RagEtaReportAbstract, err error) {
+	err = global.DbMap[utils.DbNameAI].Where(fmt.Sprintf("%s = ? AND %s = ? ", EtaReportAbstractColumns.RagEtaReportID, EtaReportAbstractColumns.QuestionID), articleId, questionId).Order(fmt.Sprintf(`%s DESC`, WechatArticleAbstractColumns.WechatArticleAbstractID)).First(&item).Error
+
+	return
+}
+
 type EtaReportAbstractView struct {
 	RagEtaReportAbstractID int    `gorm:"primaryKey;column:rag_eta_report_abstract_id" json:"-"`
 	RagEtaReportID         int    `gorm:"column:rag_eta_report_id" json:"ragEtaReportId"` // ETA报告id

+ 15 - 0
models/rag/wechat_article_abstract.go

@@ -114,6 +114,21 @@ func (m *WechatArticleAbstract) GetByWechatArticleId(id int) (item *WechatArticl
 	return
 }
 
+// GetByWechatArticleIdAndQuestionId
+// @Description: 根据报告id和提示词ID获取摘要
+// @author: Roc
+// @receiver m
+// @datetime 2025-04-17 17:39:27
+// @param articleId int
+// @param questionId int
+// @return item *WechatArticleAbstract
+// @return err error
+func (m *WechatArticleAbstract) GetByWechatArticleIdAndQuestionId(articleId, questionId int) (item *WechatArticleAbstract, err error) {
+	err = global.DbMap[utils.DbNameAI].Where(fmt.Sprintf("%s = ? AND %s = ? ", WechatArticleAbstractColumns.WechatArticleID, WechatArticleAbstractColumns.QuestionID), articleId, questionId).Order(fmt.Sprintf(`%s DESC`, WechatArticleAbstractColumns.WechatArticleAbstractID)).First(&item).Error
+
+	return
+}
+
 type WechatArticleAbstractView struct {
 	WechatArticleAbstractId int    `gorm:"column:wechat_article_abstract_id;type:int(9) UNSIGNED;primaryKey;not null;" description:"wechat_article_abstract_id"`
 	WechatArticleId         int    `gorm:"column:wechat_article_id;type:int(9) UNSIGNED;comment:关联的微信报告id;default:0;" description:"关联的微信报告id"`

+ 178 - 138
services/wechat_platform.go

@@ -2,11 +2,13 @@ package services
 
 import (
 	"bytes"
+	"encoding/json"
 	"eta/eta_api/cache"
 	"eta/eta_api/models"
 	"eta/eta_api/models/rag"
 	"eta/eta_api/services/elastic"
 	"eta/eta_api/services/llm"
+	"eta/eta_api/services/llm/facade"
 	"eta/eta_api/utils"
 	"eta/eta_api/utils/llm/eta_llm/eta_llm_http"
 	"fmt"
@@ -238,12 +240,93 @@ func BeachAddWechatArticle(item *rag.WechatPlatform, num int) {
 	return
 }
 
+//
+//// GenerateArticleAbstract
+//// @Description: 文章摘要生成
+//// @author: Roc
+//// @datetime 2025-03-10 16:17:53
+//// @param item *rag.WechatArticle
+//func GenerateArticleAbstract(item *rag.WechatArticle, forceGenerate bool) {
+//	var err error
+//	defer func() {
+//		if err != nil {
+//			utils.FileLog.Error("文章转临时文件失败,err:%v", err)
+//			fmt.Println("文章转临时文件失败,err:", err)
+//		}
+//	}()
+//
+//	// 内容为空,那就不需要生成摘要
+//	if item.TextContent == `` {
+//		return
+//	}
+//
+//	abstractObj := rag.WechatArticleAbstract{}
+//	tmpAbstractItem, err := abstractObj.GetByWechatArticleId(item.WechatArticleId)
+//	// 如果找到了,同时不是强制生成,那么就直接处理到知识库中
+//	if err == nil && !forceGenerate {
+//		// 摘要已经生成,不需要重复生成,只需要重新加入到向量库中
+//		AbstractToKnowledge(item, tmpAbstractItem, false)
+//
+//		return
+//	}
+//	if !utils.IsErrNoRow(err) {
+//		return
+//	}
+//
+//	//开始对话
+//	abstract, addArticleChatRecordList, tmpErr := getAnswerByContent(item.WechatArticleId, utils.AI_ARTICLE_SOURCE_ETA_REPORT)
+//	if tmpErr != nil {
+//		err = fmt.Errorf("LLM对话失败,Err:" + tmpErr.Error())
+//		return
+//	}
+//
+//	// 添加问答记录
+//	if len(addArticleChatRecordList) > 0 {
+//		recordObj := rag.WechatArticleChatRecord{}
+//		err = recordObj.CreateInBatches(addArticleChatRecordList)
+//		if err != nil {
+//			return
+//		}
+//	}
+//
+//	if abstract != `` {
+//		if abstract == `sorry` || strings.Index(abstract, `根据已知信息无法回答该问题`) == 0 {
+//			item.AbstractStatus = 2
+//			item.ModifyTime = time.Now()
+//			err = item.Update([]string{"AbstractStatus", "ModifyTime"})
+//			return
+//		}
+//		item.AbstractStatus = 1
+//		item.ModifyTime = time.Now()
+//		err = item.Update([]string{"AbstractStatus", "ModifyTime"})
+//
+//		abstractItem := &rag.WechatArticleAbstract{
+//			WechatArticleAbstractId: 0,
+//			WechatArticleId:         item.WechatArticleId,
+//			Content:                 abstract,
+//			Version:                 0,
+//			VectorKey:               "",
+//			ModifyTime:              time.Now(),
+//			CreateTime:              time.Now(),
+//		}
+//		err = abstractItem.Create()
+//		if err != nil {
+//			return
+//		}
+//
+//		// 数据入ES库
+//		go AddOrEditEsWechatArticleAbstract(abstractItem.WechatArticleAbstractId)
+//
+//		AbstractToKnowledge(item, abstractItem, false)
+//	}
+//}
+
 // GenerateArticleAbstract
-// @Description: 文章摘要生成
+// @Description: 文章摘要生成(默认提示词批量生成)
 // @author: Roc
 // @datetime 2025-03-10 16:17:53
 // @param item *rag.WechatArticle
-func GenerateArticleAbstract(item *rag.WechatArticle) {
+func GenerateArticleAbstract(item *rag.WechatArticle, forceGenerate bool) {
 	var err error
 	defer func() {
 		if err != nil {
@@ -257,109 +340,31 @@ func GenerateArticleAbstract(item *rag.WechatArticle) {
 		return
 	}
 
-	abstractObj := rag.WechatArticleAbstract{}
-	tmpAbstractItem, err := abstractObj.GetByWechatArticleId(item.WechatArticleId)
-	if err == nil {
-		// 摘要已经生成,不需要重复生成,只需要重新加入到向量库中
-		AbstractToKnowledge(item, tmpAbstractItem, false)
-
-		return
-	}
-	if !utils.IsErrNoRow(err) {
-		return
-	}
-
-	// 生成临时文件
-	dateDir := time.Now().Format("20060102")
-	uploadDir := "./static/ai/" + dateDir
-	err = os.MkdirAll(uploadDir, utils.DIR_MOD)
-	if err != nil {
-		err = fmt.Errorf("存储目录创建失败,Err:" + err.Error())
-		return
-	}
-	randStr := utils.GetRandStringNoSpecialChar(28)
-	fileName := randStr + `.md`
-	tmpFilePath := uploadDir + "/" + fileName
-	err = utils.SaveToFile(item.TextContent, tmpFilePath)
-	if err != nil {
-		err = fmt.Errorf("生成临时文件失败,Err:" + err.Error())
-		return
-	}
-	defer func() {
-		os.Remove(tmpFilePath)
-	}()
-
-	// 上传临时文件到LLM
-	tmpFileResp, err := llm.UploadTempDocs(tmpFilePath)
+	questionObj := rag.Question{}
+	questionList, err := questionObj.GetListByCondition(``, ` AND is_default = 1 `, []interface{}{}, 0, 100)
 	if err != nil {
-		err = fmt.Errorf("上传临时文件到LLM失败,Err:" + err.Error())
-		return
-	}
-
-	if tmpFileResp.Data.Id == `` {
-		err = fmt.Errorf("上传临时文件到LLM失败,Err:上传失败")
+		err = fmt.Errorf("获取问题列表失败,Err:" + err.Error())
 		return
 	}
-	tmpDocId := tmpFileResp.Data.Id
 
-	//tmpDocId := `c4d2ee902808408c8b8ed398b33be103` // 钢材
-	//tmpDocId := `2dde8afe62d24525a814e74e0a5e35e4` // 钢材
-	//tmpDocId := `7634cc1086c04b3687682220a2cf1a48` //
-
-	//开始对话
-	abstract, addArticleChatRecordList, tmpErr := getAnswerByContent(item.WechatArticleId, tmpDocId)
-	if tmpErr != nil {
-		err = fmt.Errorf("LLM对话失败,Err:" + tmpErr.Error())
+	// 没问题就不生成了
+	if len(questionList) <= 0 {
 		return
 	}
 
-	// 添加问答记录
-	if len(addArticleChatRecordList) > 0 {
-		recordObj := rag.WechatArticleChatRecord{}
-		err = recordObj.CreateInBatches(addArticleChatRecordList)
-		if err != nil {
-			return
-		}
+	for _, question := range questionList {
+		GenerateArticleAbstractByQuestion(item, question, forceGenerate)
 	}
 
-	if abstract != `` {
-		if abstract == `sorry` || strings.Index(abstract, `根据已知信息无法回答该问题`) == 0 {
-			item.AbstractStatus = 2
-			item.ModifyTime = time.Now()
-			err = item.Update([]string{"AbstractStatus", "ModifyTime"})
-			return
-		}
-		item.AbstractStatus = 1
-		item.ModifyTime = time.Now()
-		err = item.Update([]string{"AbstractStatus", "ModifyTime"})
-
-		abstractItem := &rag.WechatArticleAbstract{
-			WechatArticleAbstractId: 0,
-			WechatArticleId:         item.WechatArticleId,
-			Content:                 abstract,
-			Version:                 0,
-			VectorKey:               "",
-			ModifyTime:              time.Now(),
-			CreateTime:              time.Now(),
-		}
-		err = abstractItem.Create()
-		if err != nil {
-			return
-		}
-
-		// 数据入ES库
-		go AddOrEditEsWechatArticleAbstract(abstractItem.WechatArticleAbstractId)
-
-		AbstractToKnowledge(item, abstractItem, false)
-	}
+	return
 }
 
-// ReGenerateArticleAbstract
-// @Description: 文章摘要重新生成
+// GenerateArticleAbstractByQuestion
+// @Description: 文章摘要生成(根据提示词生成)
 // @author: Roc
 // @datetime 2025-03-10 16:17:53
 // @param item *rag.WechatArticle
-func ReGenerateArticleAbstract(item *rag.WechatArticle) {
+func GenerateArticleAbstractByQuestion(item *rag.WechatArticle, question *rag.Question, forceGenerate bool) {
 	var err error
 	defer func() {
 		if err != nil {
@@ -368,57 +373,28 @@ func ReGenerateArticleAbstract(item *rag.WechatArticle) {
 		}
 	}()
 
-	abstractObj := rag.WechatArticleAbstract{}
-	abstractItem, err := abstractObj.GetByWechatArticleId(item.WechatArticleId)
-	if err != nil {
-		if utils.IsErrNoRow(err) {
-			// 直接生成
-			GenerateArticleAbstract(item)
-			return
-		}
-		// 异常了
+	// 内容为空,那就不需要生成摘要
+	if item.TextContent == `` {
 		return
 	}
 
-	// 生成临时文件
-	dateDir := time.Now().Format("20060102")
-	uploadDir := "./static/ai/" + dateDir
-	err = os.MkdirAll(uploadDir, utils.DIR_MOD)
-	if err != nil {
-		err = fmt.Errorf("存储目录创建失败,Err:" + err.Error())
-		return
-	}
-	randStr := utils.GetRandStringNoSpecialChar(28)
-	fileName := randStr + `.md`
-	tmpFilePath := uploadDir + "/" + fileName
-	err = utils.SaveToFile(item.TextContent, tmpFilePath)
-	if err != nil {
-		err = fmt.Errorf("生成临时文件失败,Err:" + err.Error())
-		return
-	}
-	defer func() {
-		os.Remove(tmpFilePath)
-	}()
+	abstractObj := rag.WechatArticleAbstract{}
+	tmpAbstractItem, err := abstractObj.GetByWechatArticleIdAndQuestionId(item.WechatArticleId, question.QuestionId)
+	// 如果找到了,同时不是强制生成,那么就直接处理到知识库中
+	if err == nil && !forceGenerate {
+		// 摘要已经生成,不需要重复生成,只需要重新加入到向量库中
+		AbstractToKnowledge(item, tmpAbstractItem, false)
 
-	// 上传临时文件到LLM
-	tmpFileResp, err := llm.UploadTempDocs(tmpFilePath)
-	if err != nil {
-		err = fmt.Errorf("上传临时文件到LLM失败,Err:" + err.Error())
 		return
 	}
-
-	if tmpFileResp.Data.Id == `` {
-		err = fmt.Errorf("上传临时文件到LLM失败,Err:上传失败")
+	if !utils.IsErrNoRow(err) {
 		return
 	}
-	tmpDocId := tmpFileResp.Data.Id
-
-	//tmpDocId := `c4d2ee902808408c8b8ed398b33be103` // 钢材
-	//tmpDocId := `2dde8afe62d24525a814e74e0a5e35e4` // 钢材
-	//tmpDocId := `7634cc1086c04b3687682220a2cf1a48` //
 
+	//你现在是一名资深的期货行业分析师,请基于以下的问题进行汇总总结,如果不能正常总结出来,那么就只需要回复我:sorry
+	questionStr := fmt.Sprintf(`%s\n%s`, `你现在是一名资深的期货行业分析师,请基于以下的问题进行汇总总结,如果不能正常总结出来,那么就只需要回复我:sorry。以下是问题:`, question.QuestionContent)
 	//开始对话
-	abstract, addArticleChatRecordList, tmpErr := getAnswerByContent(item.WechatArticleId, tmpDocId)
+	abstract, addArticleChatRecordList, tmpErr := getAnswerByContent(item.WechatArticleId, utils.AI_ARTICLE_SOURCE_ETA_REPORT, questionStr)
 	if tmpErr != nil {
 		err = fmt.Errorf("LLM对话失败,Err:" + tmpErr.Error())
 		return
@@ -444,15 +420,27 @@ func ReGenerateArticleAbstract(item *rag.WechatArticle) {
 		item.ModifyTime = time.Now()
 		err = item.Update([]string{"AbstractStatus", "ModifyTime"})
 
-		abstractItem.Content = abstract
-		abstractItem.Version = abstractObj.Version + 1
-		abstractItem.ModifyTime = time.Now()
-		err = abstractItem.Update([]string{"content", "version", "modify_time"})
+		abstractItem := &rag.WechatArticleAbstract{
+			WechatArticleAbstractId: 0,
+			WechatArticleId:         item.WechatArticleId,
+			Content:                 abstract,
+			Version:                 1,
+			VectorKey:               "",
+			ModifyTime:              time.Now(),
+			CreateTime:              time.Now(),
+			QuestionID:              question.QuestionId,
+			Tags:                    "",
+			QuestionContent:         question.QuestionContent,
+		}
+		err = abstractItem.Create()
 		if err != nil {
 			return
 		}
 
-		AbstractToKnowledge(item, abstractItem, true)
+		// 数据入ES库
+		go AddOrEditEsWechatArticleAbstract(abstractItem.WechatArticleAbstractId)
+
+		AbstractToKnowledge(item, abstractItem, false)
 	}
 }
 
@@ -529,7 +517,59 @@ func DelLlmDoc(vectorKeyList []string, wechatArticleAbstractIdList []int) (err e
 	return
 }
 
-func getAnswerByContent(wechatArticleId int, docId string) (answer string, addArticleChatRecordList []*rag.WechatArticleChatRecord, err error) {
+func getAnswerByContent(wechatArticleId int, source int, questionStr string) (answer string, addArticleChatRecordList []*rag.WechatArticleChatRecord, err error) {
+	addArticleChatRecordList = make([]*rag.WechatArticleChatRecord, 0)
+
+	result, err := facade.AIGCBaseOnPromote(facade.AIGC{
+		Promote:   questionStr,
+		Source:    source,
+		ArticleId: wechatArticleId,
+		LLMModel:  `deepseek-r1:32b`,
+	})
+	if err != nil {
+		return
+	}
+
+	// JSON字符串转字节
+	answerByte, err := json.Marshal(result)
+	if err != nil {
+		return
+	}
+	originalAnswer := string(answerByte)
+
+	// 提取 </think> 后面的内容
+	thinkEndIndex := strings.Index(result.Answer, "</think>")
+	if thinkEndIndex != -1 {
+		answer = strings.TrimSpace(result.Answer[thinkEndIndex+len("</think>"):])
+	} else {
+		answer = result.Answer
+	}
+
+	answer = strings.TrimSpace(answer)
+
+	// 待入库的数据
+	addArticleChatRecordList = append(addArticleChatRecordList, &rag.WechatArticleChatRecord{
+		WechatArticleChatRecordId: 0,
+		WechatArticleId:           wechatArticleId,
+		ChatUserType:              "user",
+		Content:                   questionStr,
+		SendTime:                  time.Now(),
+		CreatedTime:               time.Now(),
+		UpdateTime:                time.Now(),
+	}, &rag.WechatArticleChatRecord{
+		WechatArticleChatRecordId: 0,
+		WechatArticleId:           wechatArticleId,
+		ChatUserType:              "assistant",
+		Content:                   originalAnswer,
+		SendTime:                  time.Now(),
+		CreatedTime:               time.Now(),
+		UpdateTime:                time.Now(),
+	})
+
+	return
+}
+
+func getAnswerByContentBak(wechatArticleId int, docId string) (answer string, addArticleChatRecordList []*rag.WechatArticleChatRecord, err error) {
 	historyList := make([]eta_llm_http.HistoryContent, 0)
 	addArticleChatRecordList = make([]*rag.WechatArticleChatRecord, 0)
 

+ 5 - 0
utils/constants.go

@@ -602,3 +602,8 @@ const (
 const (
 	AI_TASK_TYPE_GENERATE_ABSTRACT = `question_generate_abstract` // AI任务去批量生成摘要
 )
+
+const (
+	AI_ARTICLE_SOURCE_WECHAT     = 0 // AI文章来源(微信公众号
+	AI_ARTICLE_SOURCE_ETA_REPORT = 1 // AI文章来源(ETA报告)
+)