Browse Source

fix:知识资源内容抽取

zqbao 4 months ago
parent
commit
0d951d735b
1 changed files with 19 additions and 1 deletions
  1. 19 1
      services/knowledge/es.go

+ 19 - 1
services/knowledge/es.go

@@ -7,6 +7,9 @@ import (
 	"eta_gn/eta_api/utils"
 	"fmt"
 	"strconv"
+	"strings"
+
+	"github.com/PuerkitoBio/goquery"
 )
 
 // SearchChartInfoData 查询es中的图表数据
@@ -154,6 +157,15 @@ func SearchKnowledgeResourceByEs(resourceType int, keywordStr string, showSysIds
 	return
 }
 
+func ExtractTextFromResourceContent(content string) (text string) {
+	doc, err := goquery.NewDocumentFromReader(strings.NewReader(content))
+	if err != nil {
+		return
+	}
+	text = doc.Text()
+	return
+}
+
 // EsAddOrEditKnowledgeResource 新增/修改es中的知识资源数据
 func EsAddOrEditKnowledgeResource(item *knowledge.KnowledgeResource) (err error) {
 	defer func() {
@@ -164,7 +176,13 @@ func EsAddOrEditKnowledgeResource(item *knowledge.KnowledgeResource) (err error)
 	}()
 	indexName := utils.EsKnowledgeResourceIndexName
 	client := utils.EsClient
-
+	if item.IsFile == 0 {
+		content := ExtractTextFromResourceContent(item.Content)
+		contentRunes := []rune(content)
+		if len(contentRunes) > 60 {
+			item.Content = string(contentRunes[:60])
+		}
+	}
 	request := client.Index().Index(indexName).Id(strconv.Itoa(item.KnowledgeResourceId)).BodyJson(item)
 	response, err := request.Do(context.Background())
 	if err != nil {