rdluck 4 жил өмнө
parent
commit
beb2c590bc

+ 3 - 3
models/tactics.go

@@ -19,7 +19,7 @@ type Tactics struct {
 	CreateDate      string `description:"创建日期"`
 }
 
-func GetTacticsList() (list []*Tactics, err error) {
+func GetTacticsList(endDate string) (list []*Tactics, err error) {
 	o := orm.NewOrm()
 	o.Using("tactics")
 	sql := `SELECT a.*,b.body,b.abstract,b.annotation,b.article_id,d.category_name,d.sub_category_name,c.category_id
@@ -27,7 +27,7 @@ func GetTacticsList() (list []*Tactics, err error) {
 			INNER JOIN article_content AS b ON a.id=b.article_id
 			INNER JOIN article_category AS c ON a.id=c.article_id
 			INNER JOIN article_categoryinfo AS d ON c.category_id=d.id 
-			WHERE a.publish_status=1 AND a.id=3439 AND d.id IN (79, 28, 32, 84) `
-	_, err = o.Raw(sql).QueryRows(&list)
+			WHERE a.publish_status=1 AND a.publish_date>=? AND d.id IN (79, 28, 32, 84) `
+	_, err = o.Raw(sql, endDate).QueryRows(&list)
 	return
 }

+ 42 - 17
services/elastic.go

@@ -50,19 +50,44 @@ func EsCreateIndex(indexName, mappingJson string) (err error) {
 }
 
 //新增数据
-func EsAddData(indexName, docId string, item interface{}) (err error) {
+func EsAddOrEditData(indexName, docId string, item *ElasticArticleDetail) (err error) {
+	defer func() {
+		if err != nil {
+			fmt.Println("EsAddOrEditData Err:", err.Error())
+		}
+	}()
 	client, err := NewClient()
 	if err != nil {
 		return
 	}
-	resp, err := client.Index().Index(indexName).Id(docId).BodyJson(item).Do(context.Background())
+	searchById, err := client.Get().Index(indexName).Id(docId).Do(context.Background())
 	if err != nil {
 		return
 	}
-	if resp.Status == 0 && resp.Result == "created" {
-		fmt.Println("新增成功")
+	if searchById.Found {
+		resp, err := client.Update().Index(indexName).Id(docId).Doc(map[string]interface{}{
+			"BodyText": item.BodyText,
+			"Title":    item.Title,
+		}).Do(context.Background())
+		if err != nil {
+			return err
+		}
+		fmt.Println(resp.Status, resp.Result)
+		if resp.Status == 0 {
+			fmt.Println("修改成功")
+		} else {
+			fmt.Println("EditData", resp.Status, resp.Result)
+		}
 	} else {
-		fmt.Println("AddData", resp.Status, resp.Result)
+		resp, err := client.Index().Index(indexName).Id(docId).BodyJson(item).Do(context.Background())
+		if err != nil {
+			return err
+		}
+		if resp.Status == 0 && resp.Result == "created" {
+			fmt.Println("新增成功")
+		} else {
+			fmt.Println("AddData", resp.Status, resp.Result)
+		}
 	}
 	return
 }
@@ -107,14 +132,14 @@ func EsMatchQuery(indexName, keyWord string) (result []*models.SearchItem, err e
 	searchMap := make(map[int]int)
 	boolquery := elastic.NewBoolQuery()
 	keyLen := len(keyWordArr)
-	n := float64(keyLen)
+	n := 2.0 * float64(keyLen)
 	matchArr := make([]elastic.Query, 0)
-
-	matchq1 := elastic.NewMatchQuery("Title", keyWord).Boost(n + 1).Analyzer("ik_smart")
-	matchq2 := elastic.NewMatchQuery("BodyText", keyWord).Boost(n + 1).Analyzer("ik_smart")
-
-	matchArr = append(matchArr, matchq1)
-	matchArr = append(matchArr, matchq2)
+	//
+	//matchq1 := elastic.NewMatchQuery("Title", keyWord).Boost(n + 1).Analyzer("ik_smart")
+	//matchq2 := elastic.NewMatchQuery("BodyText", keyWord).Boost(n + 1).Analyzer("ik_smart")
+	//
+	//matchArr = append(matchArr, matchq1)
+	//matchArr = append(matchArr, matchq2)
 	for _, v := range keyWordArr {
 		if v != "" {
 			matchq1 := elastic.NewMatchQuery("Title", v).Boost(n).Analyzer("ik_smart")
@@ -122,7 +147,7 @@ func EsMatchQuery(indexName, keyWord string) (result []*models.SearchItem, err e
 			matchArr = append(matchArr, matchq1)
 			matchArr = append(matchArr, matchq2)
 		}
-		n--
+		n = n - 2
 	}
 	boolquery.Should(matchArr...)
 	highlight := elastic.NewHighlight()
@@ -148,11 +173,11 @@ func EsMatchQuery(indexName, keyWord string) (result []*models.SearchItem, err e
 					searchItem.Body = v.Highlight["BodyText"]
 				} else {
 					bodyRune := []rune(article.BodyText)
-					bodyRuneLen:=len(bodyRune)
-					if bodyRuneLen>100 {
-						bodyRuneLen=100
+					bodyRuneLen := len(bodyRune)
+					if bodyRuneLen > 100 {
+						bodyRuneLen = 100
 					}
-					body:= string(bodyRune[:bodyRuneLen])
+					body := string(bodyRune[:bodyRuneLen])
 					fmt.Println(body)
 					searchItem.Body = []string{body}
 				}

+ 80 - 30
services/tactics.go

@@ -8,30 +8,26 @@ import (
 	"html"
 	"strconv"
 	"strings"
+	"time"
 )
 
 //同步策略文章
-func SyncTacticsList() {
-	var err error
+func SyncTacticsList() (err error) {
 	defer func() {
 		if err != nil {
 			fmt.Println("同步失败,Err:", err.Error())
 		}
 	}()
-	list, err := models.GetTacticsList()
+
+	indexName := "article_list"
+
+	endDate := time.Now().Format(utils.FormatDate)
+	list, err := models.GetTacticsList(endDate)
 	if err != nil {
 		fmt.Println("GetTacticsList Err:", err.Error())
 		return
 	}
 	for k, v := range list {
-		fmt.Println(v.ArticleId)
-		if v.ArticleId == 3439 {
-			expertNumStr, expertContentStr, interviewDateStr := BodyAnalysis(v.Body)
-			fmt.Println(k,expertNumStr, expertContentStr, interviewDateStr)
-		}else{
-			continue
-		}
-
 		//判断是否已经存在
 		if v.ArticleId < 0 {
 			fmt.Println("参数错误")
@@ -39,12 +35,12 @@ func SyncTacticsList() {
 		}
 		count, err := models.GetArticleCountById(v.ArticleId)
 		if err != nil && err.Error() != utils.ErrNoRow() {
-			return
+			return err
 		}
+
+		expertNumStr, expertContentStr, interviewDateStr := BodyAnalysis(v.Body)
 		if count > 0 {
-			fmt.Println("modify")
-			expertNumStr, expertContentStr, interviewDateStr := BodyAnalysis(v.Body)
-			bodyText,_:=GetReportContentTextSub(v.Body)
+			bodyText, _ := GetReportContentTextSub(v.Body)
 			updateParams := make(map[string]interface{})
 			updateParams["Title"] = v.Title
 			updateParams["TitleEn"] = v.TitleEn
@@ -65,19 +61,9 @@ func SyncTacticsList() {
 			whereParam := map[string]interface{}{"article_id": v.ArticleId}
 			go models.UpdateByExpr(models.CygxArticle{}, whereParam, updateParams)
 		} else {
+			fmt.Println(k, v.ArticleId, "add")
 			item := new(models.CygxArticle)
 			articleIdInt := v.ArticleId
-			//判断文章是否已经存在
-			existItem, err := models.GetArticleDetailById(articleIdInt)
-			if err != nil && err.Error() != utils.ErrNoRow() {
-				fmt.Println("GetArticleDetailById Err:" + err.Error())
-				return
-			}
-			if existItem != nil && existItem.ArticleId > 0 {
-				fmt.Println("exist", articleIdInt)
-				continue
-			}
-			expertNumStr, expertContentStr, interviewDateStr := BodyAnalysis(v.Body)
 			item.ArticleId = articleIdInt
 			item.Title = v.Title
 			item.TitleEn = v.TitleEn
@@ -98,14 +84,42 @@ func SyncTacticsList() {
 			_, err = models.AddCygxArticle(item)
 			if err != nil {
 				fmt.Println("AddCygxArticle Err:", err.Error())
-				return
+				return err
 			}
 		}
+
+		content := html.UnescapeString(v.Body)
+		doc, err := goquery.NewDocumentFromReader(strings.NewReader(content))
+		if err != nil {
+			fmt.Println("create doc err:", err.Error())
+			return err
+		}
+		bodyText := doc.Text()
+		item := new(ElasticArticleDetail)
+		item.ArticleId = v.ArticleId
+		item.Title = v.Title
+		item.TitleEn = v.TitleEn
+		item.UpdateFrequency = v.UpdateFrequency
+		item.CreateDate = v.CreateDate
+		item.PublishDate = v.PublishDate
+		item.Abstract = v.Abstract
+		item.CategoryName = v.CategoryName
+		item.SubCategoryName = v.SubCategoryName
+		item.InterviewDate = interviewDateStr
+		item.ExpertBackground = expertContentStr
+		item.ExpertNumber = expertNumStr
+		item.Department = v.Department
+		item.ArticleIdMd5 = utils.MD5(strconv.Itoa(v.ArticleId))
+		item.BodyText = bodyText
+		EsAddOrEditData(indexName, strconv.Itoa(v.ArticleId), item)
 	}
+	return
 }
 
 //body 解析
 func BodyAnalysis(body string) (expertNumStr, expertContentStr, interviewDateStr string) {
+	body = html.UnescapeString(body)
+	utils.FileLog.Info("%s", body)
 	doc, err := goquery.NewDocumentFromReader(strings.NewReader(body))
 	if err != nil {
 		fmt.Println("create doc err:", err.Error())
@@ -117,7 +131,6 @@ func BodyAnalysis(body string) (expertNumStr, expertContentStr, interviewDateStr
 	var interviewDateArr []string
 
 	doc.Find("p").Each(func(i int, s *goquery.Selection) {
-		fmt.Println("get p")
 		contentTxt := s.Text()
 		if strings.Contains(contentTxt, "#访谈时间:") || strings.Contains(contentTxt, "访谈时间:") {
 			interviewDate := s.Next().Text()
@@ -149,11 +162,48 @@ func BodyAnalysis(body string) (expertNumStr, expertContentStr, interviewDateStr
 			}
 		}
 	})
-
+	if len(expertContentArr) == 0 {
+		doc.Find("pre").Each(func(i int, s *goquery.Selection) {
+			contentTxt := s.Text()
+			if strings.Contains(contentTxt, "#访谈时间:") || strings.Contains(contentTxt, "访谈时间:") {
+				interviewDate := s.Next().Text()
+				if interviewDate != "" {
+					interviewDateArr = append(interviewDateArr, interviewDate)
+				}
+			}
+			if strings.Contains(contentTxt, "#专家评价") || strings.Contains(contentTxt, "专家评价") {
+				expertContent := s.Next().Text()
+				if expertContent == "" {
+					expertContent = contentTxt
+				}
+				if expertContent != "" {
+					rightIndex := strings.Index(expertContent, ")")
+					if rightIndex == 0 {
+						rightIndex = strings.Index(expertContent, ")")
+					}
+					expertNum := expertContent[:rightIndex]
+					expertNum = strings.Replace(expertNum, "(", "", -1)
+					expertNum = strings.Replace(expertNum, "(", "", -1)
+					expertNum = strings.Replace(expertNum, "专家评价", "", -1)
+					if expertNum != "" {
+						expertNumArr = append(expertNumArr, expertNum)
+						rightIndex = rightIndex
+						expertContentStr := expertContent[rightIndex:]
+						expertContentStr = strings.Replace(expertContentStr, ")", "", -1)
+						expertContentStr = strings.TrimLeft(expertContentStr, ":")
+						expertContentStr = strings.TrimRight(expertContentStr, "(推荐")
+						if expertContentStr != "" {
+							expertContentArr = append(expertContentArr, expertContentStr)
+						}
+					}
+				}
+			}
+		})
+	}
 
 	if len(expertNumArr) > 0 {
 		expertNumStr = expertNumArr[0]
-		if expertNumStr!="" {
+		if expertNumStr != "" {
 			expertNumStr = strings.Replace(expertNumStr, "#:", "", -1)
 			expertNumStr = strings.Replace(expertNumStr, "# ", "", -1)
 			expertNumStr = strings.Trim(expertNumStr, "")

+ 9 - 2
services/task.go

@@ -2,11 +2,17 @@ package services
 
 import (
 	"fmt"
+	"github.com/astaxie/beego/toolbox"
 	//"github.com/astaxie/beego/cache"
 )
 
 func Task() {
 	fmt.Println("start")
+	//同步数据
+	syncTacticsList := toolbox.NewTask("syncTacticsList", "0 */2 0-18 * * *", SyncTacticsList)
+	toolbox.AddTask("syncTacticsList", syncTacticsList)
+	toolbox.StartTask()
+
 	//keyWord := "专家"
 	//pageSize := 20
 	//SearchByKeyWord(keyWord, pageSize)
@@ -100,7 +106,7 @@ func Task() {
 	//	return
 	//}
 	//
-	////indexName := "article"
+	//indexName := "article_list"
 	//
 	//for _, v := range allList {
 	//	content := html.UnescapeString(v.Body)
@@ -127,7 +133,7 @@ func Task() {
 	//	item.Department = v.Department
 	//	item.ArticleIdMd5 = v.ArticleIdMd5
 	//	item.BodyText = bodyText
-	//	EsAddData(indexName, strconv.Itoa(v.ArticleId), item)
+	//	EsAddOrEditData(indexName, strconv.Itoa(v.ArticleId), item)
 	//}
 
 	//fmt.Println("bm")
@@ -142,6 +148,7 @@ func Task() {
 	//b:=bm.IsExist(key)
 	//fmt.Println(b)
 
+	//同步策略文章
 	//SyncTacticsList()
 	fmt.Println("end")
 }