|
@@ -399,7 +399,7 @@ func GenerateWechatArticleAbstractByQuestion(item *rag.WechatArticle, question *
|
|
|
//你现在是一名资深的期货行业分析师,请基于以下的问题进行汇总总结,如果不能正常总结出来,那么就只需要回复我:sorry
|
|
|
questionStr := fmt.Sprintf(`%s\n%s`, `你现在是一名资深的期货行业分析师,请基于以下的问题进行汇总总结,如果不能正常总结出来,那么就只需要回复我:sorry。以下是问题:`, question.QuestionContent)
|
|
|
//开始对话
|
|
|
- abstract, industryTags, _, tmpErr := getAnswerByContent(item.WechatArticleId, utils.AI_ARTICLE_SOURCE_WECHAT, questionStr)
|
|
|
+ abstract, industryTags, tmpErr := getAnswerByContent(item.WechatArticleId, utils.AI_ARTICLE_SOURCE_WECHAT, questionStr)
|
|
|
if tmpErr != nil {
|
|
|
err = fmt.Errorf("LLM对话失败,Err:" + tmpErr.Error())
|
|
|
return
|
|
@@ -586,7 +586,7 @@ func DelLlmDoc(vectorKeyList []string, wechatArticleAbstractIdList []int) (err e
|
|
|
return
|
|
|
}
|
|
|
|
|
|
-func getAnswerByContent(articleId int, source int, questionStr string) (answer string, industryTags, varietyTags []string, err error) {
|
|
|
+func getAnswerByContent(articleId int, source int, questionStr string) (answer string, tagNameList []string, err error) {
|
|
|
//addArticleChatRecordList = make([]*rag.WechatArticleChatRecord, 0)
|
|
|
|
|
|
result, err := facade.AIGCBaseOnPromote(facade.AIGC{
|
|
@@ -617,7 +617,7 @@ func getAnswerByContent(articleId int, source int, questionStr string) (answer s
|
|
|
answer = strings.TrimSpace(answer)
|
|
|
|
|
|
// 提取标签
|
|
|
- industryTags, varietyTags = extractLabels(answer)
|
|
|
+ tagNameList = extractLabels(answer)
|
|
|
|
|
|
//// 待入库的数据
|
|
|
//addArticleChatRecordList = append(addArticleChatRecordList, &rag.WechatArticleChatRecord{
|
|
@@ -1355,37 +1355,27 @@ func DelEsRagQuestion(questionId int) {
|
|
|
}
|
|
|
|
|
|
// extractLabels
|
|
|
-// @Description: 提取摘要中的标签
|
|
|
+// @Description: 提取摘要中的标签并去重
|
|
|
// @author: Roc
|
|
|
// @datetime 2025-04-18 17:16:05
|
|
|
// @param text string
|
|
|
// @return industryTags []string
|
|
|
// @return varietyTags []string
|
|
|
-func extractLabels(text string) (industryTags []string, varietyTags []string) {
|
|
|
- reIndustry := regexp.MustCompile(`行业标签((?:【[^】]*】)+)`)
|
|
|
- industryMatch := reIndustry.FindStringSubmatch(text)
|
|
|
- if len(industryMatch) > 1 {
|
|
|
- industryContent := industryMatch[1]
|
|
|
- reSplit := regexp.MustCompile(`【([^】]*)】`)
|
|
|
- industryTags = make([]string, 0)
|
|
|
- for _, m := range reSplit.FindAllStringSubmatch(industryContent, -1) {
|
|
|
- if len(m) > 1 {
|
|
|
- industryTags = append(industryTags, m[1])
|
|
|
- }
|
|
|
+func extractLabels(text string) (tags []string) {
|
|
|
+ reTag := regexp.MustCompile(`【([^】]*)】`)
|
|
|
+
|
|
|
+ // 提取所有标签
|
|
|
+ tagMatches := reTag.FindAllStringSubmatch(text, -1)
|
|
|
+ tagSet := make(map[string]bool)
|
|
|
+ for _, match := range tagMatches {
|
|
|
+ if len(match) > 1 {
|
|
|
+ tagSet[match[1]] = true
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- reVariety := regexp.MustCompile(`品种标签((?:【[^】]*】)+)`)
|
|
|
- varietyMatch := reVariety.FindStringSubmatch(text)
|
|
|
- if len(varietyMatch) > 1 {
|
|
|
- varietyContent := varietyMatch[1]
|
|
|
- reSplit := regexp.MustCompile(`【([^】]*)】`)
|
|
|
- varietyTags = make([]string, 0)
|
|
|
- for _, m := range reSplit.FindAllStringSubmatch(varietyContent, -1) {
|
|
|
- if len(m) > 1 {
|
|
|
- varietyTags = append(varietyTags, m[1])
|
|
|
- }
|
|
|
- }
|
|
|
+ // 将去重后的标签转换为切片
|
|
|
+ for tag := range tagSet {
|
|
|
+ tags = append(tags, tag)
|
|
|
}
|
|
|
return
|
|
|
}
|