rdluck 3 years ago
parent
commit
26fefeabcc
6 changed files with 164 additions and 45 deletions
  1. 3 0
      controllers/article.go
  2. 3 6
      controllers/search.go
  3. 4 9
      models/tactics.go
  4. 83 7
      services/elastic.go
  5. 7 18
      services/tactics.go
  6. 64 5
      services/task.go

+ 3 - 0
controllers/article.go

@@ -2,6 +2,7 @@ package controllers
 
 import (
 	"encoding/json"
+	"fmt"
 	"hongze/hongze_cygx/models"
 	"hongze/hongze_cygx/services"
 	"hongze/hongze_cygx/utils"
@@ -71,6 +72,8 @@ func (this *ArticleController) Detail() {
 			br.ErrMsg = "获取信息失败,Err:" + err.Error()
 			return
 		}
+		fmt.Println(detail.Department)
+
 
 		detail.Body = html.UnescapeString(detail.Body)
 		detail.Abstract = html.UnescapeString(detail.Abstract)

+ 3 - 6
controllers/search.go

@@ -64,13 +64,10 @@ func (this *SearchController) SearchList() {
 		}
 		categoryName := strings.Join(categoryNameArr, ",")
 	*/
-	indexName := "article_list"
-	//EsMultiMatchFunctionScoreQuery
-	//EsMatchFunctionScoreQuery
-	result, total, err := services.EsMultiMatchFunctionScoreQuery(indexName, keyWord, startSize, pageSize) //services.EsMatchFunctionScoreQuery(indexName, keyWord, startSize, pageSize) //services.EsMatchQuery(indexName, keyWord)
-	//result, err := services.EsMatchQuery(indexName, keyWord)//services.EsMatchQuery(indexName, keyWord)
 	//indexName := "article_list"
-	//result, err := services.EsMatchPhraseQuery(indexName, keyWord)
+	indexName := "test_article"
+	result, total, err := services.EsMultiMatchFunctionScoreQuery(indexName, keyWord, startSize, pageSize) //services.EsMatchFunctionScoreQuery(indexName, keyWord, startSize, pageSize) //services.EsMatchQuery(indexName, keyWord)
+	//result, total, err := services.EsMultiMatchFunctionScoreQueryFix(indexName, keyWord, startSize, pageSize)
 	if err != nil {
 		br.Msg = "检索失败"
 		br.ErrMsg = "检索失败,Err:" + err.Error()

+ 4 - 9
models/tactics.go

@@ -22,19 +22,13 @@ type Tactics struct {
 func GetTacticsList(endDate string) (list []*Tactics, err error) {
 	o := orm.NewOrm()
 	o.Using("tactics")
-	//sql := `SELECT a.*,b.body,b.abstract,b.annotation,b.article_id,d.category_name,d.sub_category_name,c.category_id
-	//		FROM article_articleinfo AS a
-	//		INNER JOIN article_content AS b ON a.id=b.article_id
-	//		INNER JOIN article_category AS c ON a.id=c.article_id
-	//		INNER JOIN article_categoryinfo AS d ON c.category_id=d.id
-	//		WHERE a.publish_status=1 AND d.id IN (79, 28, 32, 84) `
-
 	sql := `SELECT a.*,b.body,b.abstract,b.annotation,b.article_id,d.category_name,d.sub_category_name,c.category_id
 				FROM article_articleinfo AS a
 				INNER JOIN article_content AS b ON a.id=b.article_id
 				INNER JOIN article_category AS c ON a.id=c.article_id
 				INNER JOIN article_categoryinfo AS d ON c.category_id=d.id
-				WHERE a.publish_status=1 AND a.publish_date>=? AND d.id IN (28,32,45,50,57,62,72,74,79,80,84,86,88,89,90) `
+				WHERE a.publish_status=1 AND a.publish_date>=? AND d.id IN (28,32,45,50,57,62,72,74,79,80,84,86,88,89,90) 
+                AND a.id NOT IN (3454,3456,3457,3459,2449,2450,2453,2454,2459,2530,2583,2663,2670,2699,2715,2732,2748,2759,2399,2356,2870,3173,2978,2826,3470) `
 	_, err = o.Raw(sql, endDate).QueryRows(&list)
 	return
 }
@@ -47,7 +41,8 @@ func GetTacticsListAll() (list []*Tactics, err error) {
 			INNER JOIN article_content AS b ON a.id=b.article_id
 			INNER JOIN article_category AS c ON a.id=c.article_id
 			INNER JOIN article_categoryinfo AS d ON c.category_id=d.id
-			WHERE a.publish_status=1 AND d.id IN (28,32,45,50,57,62,72,74,79,80,84,86,88,89,90) `
+			WHERE a.publish_status=1 AND d.id IN (28,32,45,50,57,62,72,74,79,80,84,86,88,89,90) 
+            AND a.id NOT IN (3454,3456,3457,3459,2449,2450,2453,2454,2459,2530,2583,2663,2670,2699,2715,2732,2748,2759,2399,2356,2870,3173,2978,2826,3470) `
 	//IN(85,71)
 	_, err = o.Raw(sql).QueryRows(&list)
 	return

+ 83 - 7
services/elastic.go

@@ -53,7 +53,7 @@ func EsCreateIndex(indexName, mappingJson string) (err error) {
 }
 
 //新增数据
-func EsAddOrEditData(indexName, docId string, item *ElasticArticleDetail) (err error) {
+func EsAddOrEditData(indexName, docId string, item *ElasticTestArticleDetail) (err error) {
 	defer func() {
 		if err != nil {
 			fmt.Println("EsAddOrEditData Err:", err.Error())
@@ -70,11 +70,8 @@ func EsAddOrEditData(indexName, docId string, item *ElasticArticleDetail) (err e
 	}
 	if searchById != nil && searchById.Found {
 		resp, err := client.Update().Index(indexName).Id(docId).Doc(map[string]interface{}{
-			"BodyText":    item.BodyText,
-			"Title":       item.Title,
-			"PublishDate": item.PublishDate,
-			"TitleEn":     item.TitleEn,
-			"CreateDate":  item.CreateDate,
+			"BodyText": item.BodyText,
+			"Title":    item.Title,
 		}).Do(context.Background())
 		if err != nil {
 			return err
@@ -388,6 +385,82 @@ func EsMultiMatchFunctionScoreQuery(indexName, keyWord string, startSize, pageSi
 	boolquery := elastic.NewBoolQuery()
 	matchArr := make([]elastic.Query, 0)
 
+	n := 0
+	keyWordLen := len(keyWordArr)
+	if keyWordLen <= 0 {
+		keyWordArr = append(keyWordArr, keyWord)
+		keyWordLen = len(keyWordArr)
+	}
+	fmt.Println("keyWordArr:", keyWordArr)
+	keyWordWeight := GetWeight(keyWordLen)
+	for k, v := range keyWordArr {
+		if v != "" {
+			weight := float64(keyWordWeight[k])
+			multiMatch := elastic.NewMultiMatchQuery(v, "Title", "BodyText").Analyzer("ik_smart")
+			bodyFunctionQuery := elastic.NewFunctionScoreQuery()
+			bodyFunctionQuery.Query(multiMatch)
+			bodyFunctions := elastic.NewWeightFactorFunction(weight)
+			bodyFunctionQuery.AddScoreFunc(bodyFunctions)
+			bodyFunctionQuery.BoostMode("replace")
+			matchArr = append(matchArr, bodyFunctionQuery)
+		}
+		n++
+	}
+	boolquery.Should(matchArr...)
+	highlight := elastic.NewHighlight()
+	highlight = highlight.Fields(elastic.NewHighlighterField("Title"), elastic.NewHighlighterField("BodyText"))
+	highlight = highlight.PreTags("<font color='red'>").PostTags("</font>")
+	request := client.Search(indexName).Highlight(highlight).From(startSize).Size(pageSize).Query(boolquery)
+	searchByMatch, err := request.Do(context.Background())
+	if searchByMatch != nil {
+		if searchByMatch.Hits != nil {
+			for _, v := range searchByMatch.Hits.Hits {
+				articleJson, err := v.Source.MarshalJSON()
+				if err != nil {
+					return nil, 0, err
+				}
+				article := new(models.CygxArticle)
+				err = json.Unmarshal(articleJson, &article)
+				if err != nil {
+					return nil, 0, err
+				}
+				searchItem := new(models.SearchItem)
+				searchItem.ArticleId, _ = strconv.Atoi(v.Id)
+				if len(v.Highlight["BodyText"]) > 0 {
+					searchItem.Body = v.Highlight["BodyText"]
+				} else {
+					bodyRune := []rune(article.BodyText)
+					bodyRuneLen := len(bodyRune)
+					if bodyRuneLen > 100 {
+						bodyRuneLen = 100
+					}
+					body := string(bodyRune[:bodyRuneLen])
+					searchItem.Body = []string{body}
+				}
+				var title string
+				if len(v.Highlight["Title"]) > 0 {
+					title = v.Highlight["Title"][0]
+				} else {
+					title = article.Title
+				}
+				searchItem.Title = title
+				searchItem.PublishDate = article.PublishDate
+				result = append(result, searchItem)
+			}
+		}
+		total = searchByMatch.Hits.TotalHits.Value
+	}
+	return
+}
+
+func EsMultiMatchFunctionScoreQueryFix(indexName, keyWord string, startSize, pageSize int) (result []*models.SearchItem, total int64, err error) {
+	client, err := NewClient()
+	keyWordArr, err := GetIndustryMapNameSliceV2(keyWord)
+
+	keyWordArr = RemoveDuplicatesAndEmpty(keyWordArr)
+	boolquery := elastic.NewBoolQuery()
+	matchArr := make([]elastic.Query, 0)
+
 	n := 0
 	keyWordLen := len(keyWordArr)
 	if keyWordLen <= 0 {
@@ -417,6 +490,10 @@ func EsMultiMatchFunctionScoreQuery(indexName, keyWord string, startSize, pageSi
 	request := client.Search(indexName).Highlight(highlight).From(startSize).Size(pageSize).Query(boolquery)
 	searchByMatch, err := request.Do(context.Background())
 	if searchByMatch != nil {
+
+		matchResult, _ := json.Marshal(searchByMatch)
+		utils.FileLog.Info("%s", string(matchResult))
+
 		if searchByMatch.Hits != nil {
 			for _, v := range searchByMatch.Hits.Hits {
 				articleJson, err := v.Source.MarshalJSON()
@@ -440,7 +517,6 @@ func EsMultiMatchFunctionScoreQuery(indexName, keyWord string, startSize, pageSi
 						bodyRuneLen = 100
 					}
 					body := string(bodyRune[:bodyRuneLen])
-					fmt.Println(body)
 					searchItem.Body = []string{body}
 				}
 				var title string

+ 7 - 18
services/tactics.go

@@ -19,11 +19,11 @@ func SyncTacticsList() (err error) {
 		}
 	}()
 	fmt.Println("同步数据")
-	indexName := "article_list"
-
-	endDate := time.Now().AddDate(0, 0, -5).Format(utils.FormatDate)
-	list, err := models.GetTacticsList(endDate)
-	//list, err := models.GetTacticsListAll()
+	//indexName := "article_list"
+	indexName := "test_article"
+	//endDate := time.Now().AddDate(0, 0, -5).Format(utils.FormatDate)
+	//list, err := models.GetTacticsList(endDate)
+	list, err := models.GetTacticsListAll()
 	if err != nil {
 		fmt.Println("GetTacticsList Err:", err.Error())
 		return
@@ -31,6 +31,7 @@ func SyncTacticsList() (err error) {
 	fmt.Println("list len:", len(list))
 
 	for k, v := range list {
+		v.Department="弘则权益研究"
 		fmt.Println(k, v.ArticleId)
 
 		publishDate, err := time.Parse(utils.FormatDateTime, v.PublishDate)
@@ -118,21 +119,9 @@ func SyncTacticsList() (err error) {
 			a.Remove()
 		})
 		bodyText := doc.Text()
-		item := new(ElasticArticleDetail)
+		item := new(ElasticTestArticleDetail)
 		item.ArticleId = v.ArticleId
 		item.Title = v.Title
-		item.TitleEn = v.TitleEn
-		item.UpdateFrequency = v.UpdateFrequency
-		item.CreateDate = v.CreateDate
-		item.PublishDate = v.PublishDate
-		item.Abstract = v.Abstract
-		item.CategoryName = v.CategoryName
-		item.SubCategoryName = v.SubCategoryName
-		item.InterviewDate = interviewDateStr
-		item.ExpertBackground = expertContentStr
-		item.ExpertNumber = expertNumStr
-		item.Department = v.Department
-		item.ArticleIdMd5 = utils.MD5(strconv.Itoa(v.ArticleId))
 		item.BodyText = bodyText
 		EsAddOrEditData(indexName, strconv.Itoa(v.ArticleId), item)
 	}

+ 64 - 5
services/task.go

@@ -2,17 +2,15 @@ package services
 
 import (
 	"fmt"
-	"github.com/astaxie/beego/toolbox"
-
 	//"github.com/astaxie/beego/cache"
 )
 
 func Task() {
 	fmt.Println("start")
 	//同步数据
-	syncTacticsList := toolbox.NewTask("syncTacticsList", "0 */5 * * * *", SyncTacticsList)
-	toolbox.AddTask("syncTacticsList", syncTacticsList)
-	toolbox.StartTask()
+	//syncTacticsList := toolbox.NewTask("syncTacticsList", "0 */5 * * * *", SyncTacticsList)
+	//toolbox.AddTask("syncTacticsList", syncTacticsList)
+	//toolbox.StartTask()
 	//SyncTacticsList()
 	fmt.Println("end")
 }
@@ -162,6 +160,60 @@ func ElasticOption() {
 	//EsDeleteData(indexName,)
 }
 
+/*
+searchItem := new(models.SearchItem)
+searchItem.ArticleId, _ = strconv.Atoi(v.Id)
+searchItem.Body = v.Highlight["BodyText"]
+searchItem.Title = title
+searchItem.PublishDate = article.PublishDate
+ */
+func CreateIndex() {
+	indexName := "test_article"
+	mappingJson := `{
+  "mappings": {
+    "dynamic": true,
+    "properties": {
+      "ArticleId": {
+        "type": "integer"
+      },
+      "BodyText": {
+        "type": "text",
+        "analyzer": "ik_smart",
+        "term_vector": "with_positions_offsets"
+      },
+      "PublishDate": {
+        "type": "text",
+        "analyzer": "ik_smart"
+      },
+      "SubCategoryName": {
+        "type": "text",
+        "analyzer": "ik_smart",
+        "term_vector": "with_positions_offsets"
+      },
+      "Title": {
+        "type": "text",
+        "analyzer": "ik_smart",
+        "term_vector": "with_positions_offsets"
+      },
+      "TitleEn": {
+        "type": "text",
+        "analyzer": "ik_smart",
+        "term_vector": "with_positions_offsets"
+      },
+      "UpdateFrequency": {
+        "type": "text",
+        "analyzer": "ik_smart",
+        "term_vector": "with_positions_offsets"
+      }
+    }
+  }
+}`
+	EsCreateIndex(indexName, mappingJson)
+}
+
+
+// "term_vector": "with_positions_offsets"
+
 type ElasticArticleDetail struct {
 	ArticleId        int    `description:"报告id"`
 	Title            string `description:"标题"`
@@ -179,3 +231,10 @@ type ElasticArticleDetail struct {
 	ArticleIdMd5     string `description:"纪要id"`
 	BodyText         string `description:"内容"`
 }
+
+
+type ElasticTestArticleDetail struct {
+	ArticleId        int    `description:"报告id"`
+	Title            string `description:"标题"`
+	BodyText         string `description:"内容"`
+}