Browse Source

fix:摘要标签

Roc 1 day ago
parent
commit
9c72949515
2 changed files with 17 additions and 27 deletions
  1. 1 1
      services/llm_report.go
  2. 16 26
      services/wechat_platform.go

+ 1 - 1
services/llm_report.go

@@ -357,7 +357,7 @@ func GenerateRagEtaReportAbstractByQuestion(item *rag.RagEtaReport, question *ra
 	//你现在是一名资深的期货行业分析师,请基于以下的问题进行汇总总结,如果不能正常总结出来,那么就只需要回复我:sorry
 	questionStr := fmt.Sprintf(`%s\n%s`, `你现在是一名资深的期货行业分析师,请基于以下的问题进行汇总总结,如果不能正常总结出来,那么就只需要回复我:sorry。以下是问题:`, question.QuestionContent)
 	//开始对话
-	abstract, industryTags, _, tmpErr := getAnswerByContent(item.RagEtaReportId, utils.AI_ARTICLE_SOURCE_ETA_REPORT, questionStr)
+	abstract, industryTags, tmpErr := getAnswerByContent(item.RagEtaReportId, utils.AI_ARTICLE_SOURCE_ETA_REPORT, questionStr)
 	if tmpErr != nil {
 		err = fmt.Errorf("LLM对话失败,Err:" + tmpErr.Error())
 		return

+ 16 - 26
services/wechat_platform.go

@@ -399,7 +399,7 @@ func GenerateWechatArticleAbstractByQuestion(item *rag.WechatArticle, question *
 	//你现在是一名资深的期货行业分析师,请基于以下的问题进行汇总总结,如果不能正常总结出来,那么就只需要回复我:sorry
 	questionStr := fmt.Sprintf(`%s\n%s`, `你现在是一名资深的期货行业分析师,请基于以下的问题进行汇总总结,如果不能正常总结出来,那么就只需要回复我:sorry。以下是问题:`, question.QuestionContent)
 	//开始对话
-	abstract, industryTags, _, tmpErr := getAnswerByContent(item.WechatArticleId, utils.AI_ARTICLE_SOURCE_WECHAT, questionStr)
+	abstract, industryTags, tmpErr := getAnswerByContent(item.WechatArticleId, utils.AI_ARTICLE_SOURCE_WECHAT, questionStr)
 	if tmpErr != nil {
 		err = fmt.Errorf("LLM对话失败,Err:" + tmpErr.Error())
 		return
@@ -586,7 +586,7 @@ func DelLlmDoc(vectorKeyList []string, wechatArticleAbstractIdList []int) (err e
 	return
 }
 
-func getAnswerByContent(articleId int, source int, questionStr string) (answer string, industryTags, varietyTags []string, err error) {
+func getAnswerByContent(articleId int, source int, questionStr string) (answer string, tagNameList []string, err error) {
 	//addArticleChatRecordList = make([]*rag.WechatArticleChatRecord, 0)
 
 	result, err := facade.AIGCBaseOnPromote(facade.AIGC{
@@ -617,7 +617,7 @@ func getAnswerByContent(articleId int, source int, questionStr string) (answer s
 	answer = strings.TrimSpace(answer)
 
 	// 提取标签
-	industryTags, varietyTags = extractLabels(answer)
+	tagNameList = extractLabels(answer)
 
 	//// 待入库的数据
 	//addArticleChatRecordList = append(addArticleChatRecordList, &rag.WechatArticleChatRecord{
@@ -1355,37 +1355,27 @@ func DelEsRagQuestion(questionId int) {
 }
 
 // extractLabels
-// @Description: 提取摘要中的标签
+// @Description: 提取摘要中的标签并去重
 // @author: Roc
 // @datetime 2025-04-18 17:16:05
 // @param text string
 // @return industryTags []string
 // @return varietyTags []string
-func extractLabels(text string) (industryTags []string, varietyTags []string) {
-	reIndustry := regexp.MustCompile(`行业标签((?:【[^】]*】)+)`)
-	industryMatch := reIndustry.FindStringSubmatch(text)
-	if len(industryMatch) > 1 {
-		industryContent := industryMatch[1]
-		reSplit := regexp.MustCompile(`【([^】]*)】`)
-		industryTags = make([]string, 0)
-		for _, m := range reSplit.FindAllStringSubmatch(industryContent, -1) {
-			if len(m) > 1 {
-				industryTags = append(industryTags, m[1])
-			}
+func extractLabels(text string) (tags []string) {
+	reTag := regexp.MustCompile(`【([^】]*)】`)
+
+	// 提取所有标签
+	tagMatches := reTag.FindAllStringSubmatch(text, -1)
+	tagSet := make(map[string]bool)
+	for _, match := range tagMatches {
+		if len(match) > 1 {
+			tagSet[match[1]] = true
 		}
 	}
 
-	reVariety := regexp.MustCompile(`品种标签((?:【[^】]*】)+)`)
-	varietyMatch := reVariety.FindStringSubmatch(text)
-	if len(varietyMatch) > 1 {
-		varietyContent := varietyMatch[1]
-		reSplit := regexp.MustCompile(`【([^】]*)】`)
-		varietyTags = make([]string, 0)
-		for _, m := range reSplit.FindAllStringSubmatch(varietyContent, -1) {
-			if len(m) > 1 {
-				varietyTags = append(varietyTags, m[1])
-			}
-		}
+	// 将去重后的标签转换为切片
+	for tag := range tagSet {
+		tags = append(tags, tag)
 	}
 	return
 }