瀏覽代碼

merge code

rdluck 4 年之前
父節點
當前提交
a934f2b094
共有 7 個文件被更改,包括 436 次插入27 次删除
  1. 53 14
      models/article.go
  2. 1 0
      models/db.go
  3. 21 2
      services/article.go
  4. 148 0
      services/data_import.go
  5. 207 11
      services/elasticsearch.go
  6. 6 0
      services/task.go
  7. 二進制
      temp/权益报告导入0319.xlsx

+ 53 - 14
models/article.go

@@ -2,23 +2,34 @@ package models
 
 import (
 	"rdluck_tools/orm"
-	"time"
 )
 
 type CygxArticle struct {
-	ArticleId        int       `description:"文章id"`
-	Title            string    `description:"标题"`
-	TitleEn          string    `description:"英文标题 "`
-	UpdateFrequency  string    `description:"更新周期"`
-	CreateDate       string    `description:"创建时间"`
-	PublishDate      string    `description:"发布时间"`
-	Body             string    `description:"内容"`
-	Abstract         string    `description:"摘要"`
-	CategoryName     string    `description:"一级分类"`
-	SubCategoryName  string    `description:"二级分类"`
-	InterviewTime    time.Time `description:"访谈时间"`
-	ExpertBackground string    `description:"专家背景"`
-	ExpertNumber     string    `description:"专家编号"`
+	Id               int    `orm:"column(id);pk"`
+	ArticleId        int    `description:"文章id"`
+	Title            string `description:"标题"`
+	TitleEn          string `description:"英文标题 "`
+	UpdateFrequency  string `description:"更新周期"`
+	CreateDate       string `description:"创建时间"`
+	PublishDate      string `description:"发布时间"`
+	Body             string `description:"内容"`
+	Abstract         string `description:"摘要"`
+	CategoryName     string `description:"一级分类"`
+	SubCategoryName  string `description:"二级分类"`
+	PublishStatus    int    `description:"发布状态"`
+	CategoryId       int    `description:"分类id"`
+	ExpertBackground string `description:"专家背景"`
+	ExpertNumber     string `description:"专家编号"`
+	InterviewDate    string `description:"访谈日期"`
+	Department       string `description:"作者"`
+	ArticleIdMd5     string `description:"ID,md5值"`
+}
+
+//新增文章
+func AddCygxArticle(item *CygxArticle) (lastId int64, err error) {
+	o := orm.NewOrm()
+	lastId, err = o.Insert(item)
+	return
 }
 
 type HomeArticle struct {
@@ -101,3 +112,31 @@ func ModifyArticleExpert(articleId int, expertNumStr, expertContentStr, intervie
 	_, err = o.Raw(sql, expertContentStr, expertNumStr, interviewDateStr, articleId).Exec()
 	return
 }
+
+type ArticleDetailTest struct {
+	ArticleId int    `description:"报告id"`
+	Title     string `description:"标题"`
+	BodyText  string `description:"内容"`
+	Body      string `json:"-" description:"内容"`
+}
+
+func GetArticleDetailTestById(articleId int) (item *ArticleDetailTest, err error) {
+	o := orm.NewOrm()
+	sql := `SELECT * FROM cygx_article WHERE article_id = ? `
+	err = o.Raw(sql, articleId).QueryRow(&item)
+	return
+}
+
+func GetArticleAll() (item []*ArticleDetail, err error) {
+	o := orm.NewOrm()
+	sql := `SELECT * FROM cygx_article `
+	_, err = o.Raw(sql).QueryRows(&item)
+	return
+}
+
+func ModifyArticleContent(articleId int, content, expertNumStr, expertContentStr, interviewDateStr string) (err error) {
+	o := orm.NewOrm()
+	sql := `UPDATE cygx_article SET body=?,expert_background=?,expert_number=?,interview_date=? WHERE article_id=? `
+	_, err = o.Raw(sql, content, expertContentStr, expertNumStr, interviewDateStr, articleId).Exec()
+	return
+}

+ 1 - 0
models/db.go

@@ -38,5 +38,6 @@ func init() {
 		new(Resource),
 		new(CygxApplyRecord),
 		new(CygxInterviewApply),
+		new(CygxArticle),
 	)
 }

+ 21 - 2
services/article.go

@@ -75,9 +75,26 @@ func GetReportContentTextSub(content string) (contentSub string, err error) {
 	return
 }
 
+//解析文章内容
+func GetArticleAll() {
+	var err error
+	defer func() {
+		if err != nil {
+			fmt.Println("err:", err.Error())
+			return
+		}
+	}()
+	list, err := models.GetArticleAll()
+	if err != nil {
+
+	}
+	for _, v := range list {
+		FixArticleContent(v.ArticleId)
+	}
+}
+
 //解析报告
-func GetArticleExpert() {
-	articleId := 3526
+func FixArticleContent(articleId int) {
 	item, err := models.GetArticleDetailById(articleId)
 	if err != nil {
 		fmt.Println("GetArticleDetailById Err:" + err.Error())
@@ -85,6 +102,8 @@ func GetArticleExpert() {
 	}
 	content := item.Body
 	content = html.UnescapeString(content)
+	content = strings.Replace(content, "http", "https", -1)
+
 	doc, err := goquery.NewDocumentFromReader(strings.NewReader(content))
 	if err != nil {
 		fmt.Println("create doc err:", err.Error())

+ 148 - 0
services/data_import.go

@@ -0,0 +1,148 @@
+package services
+
+import (
+	"fmt"
+	"github.com/PuerkitoBio/goquery"
+	"github.com/tealeg/xlsx"
+	"hongze/hongze_cygx/models"
+	"hongze/hongze_cygx/utils"
+	"html"
+	"strconv"
+	"strings"
+	"time"
+)
+
+//上海数据同步
+func ImportData() {
+	fileName := "./temp/权益报告导入0319.xlsx"
+	xlFile, err := xlsx.OpenFile(fileName)
+	if err != nil {
+		fmt.Printf("open failed: %s\n", err)
+	}
+	for _, sheet := range xlFile.Sheet {
+		row := sheet.Rows
+		for k, v := range row {
+			if k > 0 {
+				//导入数据
+				item := new(models.CygxArticle)
+				cells := v.Cells
+				articleIdStr := cells[0].Value
+				articleIdInt, _ := strconv.Atoi(articleIdStr)
+				//判断文章是否已经存在
+				existItem, err := models.GetArticleDetailById(articleIdInt)
+				if err != nil && err.Error() != utils.ErrNoRow() {
+					fmt.Println("GetArticleDetailById Err:" + err.Error())
+					return
+				}
+				if existItem != nil && existItem.ArticleId > 0 {
+					fmt.Println("exist", articleIdInt)
+					continue
+				}
+				title := cells[1].String()
+				titleEn := cells[2].String()
+				updateFrequency := cells[3].String()
+				createDate := cells[4].String()
+				createDateParse, err := time.Parse(utils.FormatDateTime, createDate)
+				if err != nil {
+					fmt.Println("createDateParse err:", err.Error())
+					return
+				}
+				fmt.Println("createDateParse ", createDateParse)
+
+				publishDate := cells[6].String()
+				publishDate=strings.Replace(publishDate,`\`,"",-1)
+				fmt.Println("publishDate: ", publishDate)
+
+				publishDateParse, err := time.Parse(utils.FormatDateTime, publishDate)
+				if err != nil {
+					fmt.Println("publishDateParse err:", err.Error())
+					return
+				}
+				fmt.Println("publishDateParse ", publishDateParse)
+
+				body := cells[11].String()
+				abstract := cells[12].String()
+				categoryName := cells[15].String()
+				subCategoryName := cells[16].String()
+				categoryId, _ := cells[18].Int()
+
+				doc, err := goquery.NewDocumentFromReader(strings.NewReader(body))
+				if err != nil {
+					fmt.Println("create doc err:", err.Error())
+					return
+				}
+
+				var expertNumArr []string
+				var expertContentArr []string
+				var interviewDateArr []string
+
+				doc.Find("p").Each(func(i int, s *goquery.Selection) {
+					contentTxt := s.Text()
+					if strings.Contains(contentTxt, "#访谈时间:") || strings.Contains(contentTxt, "访谈时间:") {
+						interviewDate := s.Next().Text()
+						interviewDateArr = append(interviewDateArr, interviewDate)
+					}
+					if strings.Contains(contentTxt, "#专家评价") || strings.Contains(contentTxt, "专家评价") {
+						expertContent := s.Next().Text()
+						if expertContent == "" {
+							expertContent = contentTxt
+						}
+						if expertContent != "" {
+							rightIndex := strings.Index(expertContent, ")")
+							if rightIndex == 0 {
+								rightIndex = strings.Index(expertContent, ")")
+							}
+							expertNum := expertContent[:rightIndex]
+							expertNum = strings.Replace(expertNum, "(", "", -1)
+							expertNum = strings.Replace(expertNum, "(", "", -1)
+							expertNum = strings.Replace(expertNum, "专家评价", "", -1)
+							if expertNum != "" {
+								expertNumArr = append(expertNumArr, expertNum)
+								rightIndex = rightIndex
+								expertContentStr := expertContent[rightIndex:]
+								expertContentStr = strings.Replace(expertContentStr, ")", "", -1)
+								expertContentStr = strings.TrimLeft(expertContentStr, ":")
+								expertContentStr = strings.TrimRight(expertContentStr, "(推荐")
+								expertContentArr = append(expertContentArr, expertContentStr)
+							}
+						}
+					}
+				})
+				var expertNumStr, expertContentStr, interviewDateStr string
+				if len(expertNumArr) > 0 {
+					expertNumStr = expertNumArr[0]
+				}
+				if len(expertContentArr) > 0 {
+					expertContentStr = expertContentArr[0]
+				}
+
+				if len(interviewDateArr) > 0 {
+					interviewDateStr = interviewDateArr[0]
+				}
+
+				item.ArticleId = articleIdInt
+				item.Title = title
+				item.TitleEn = titleEn
+				item.UpdateFrequency = updateFrequency
+				item.CreateDate = createDateParse.Format(utils.FormatDateTime)
+				item.PublishDate = publishDateParse.Format(utils.FormatDateTime)
+				item.Body = html.EscapeString(body)
+				item.Abstract = html.EscapeString(abstract)
+				item.CategoryName = categoryName
+				item.SubCategoryName = subCategoryName
+				item.CategoryId = categoryId
+				item.PublishStatus = 1
+				item.ExpertBackground = expertContentStr
+				item.ExpertNumber = expertNumStr
+				item.InterviewDate = interviewDateStr
+				item.Department = "策略组"
+				item.ArticleIdMd5 = utils.MD5(articleIdStr)
+				_, err = models.AddCygxArticle(item)
+				if err != nil {
+					fmt.Println("AddCygxArticle Err:", err.Error())
+					return
+				}
+			}
+		}
+	}
+}

+ 207 - 11
services/elasticsearch.go

@@ -156,7 +156,7 @@ func (tracelog) Printf(format string, v ...interface{}) {
 	fmt.Printf(format, v...)
 }
 
-func SearchByKeyWord(keyWord string) (result []*models.SearchItem, err error) {
+func SearchByKeyWordBack02(keyWord string) (result []*models.SearchItem, err error) {
 	fmt.Println("keyWord:", keyWord)
 	pageSize := 20
 	keyWordArr, err := GetIndustryMapNameSlice(keyWord)
@@ -470,8 +470,9 @@ func SearchByKeyWordQuery(keyWord string) (result []*models.SearchItem, err erro
 	return
 }
 
-func SearchByKeyWordBack02(keyWord string) (result []*models.SearchItem, err error) {
+func SearchByKeyWord(keyWord string) (result []*models.SearchItem, err error) {
 	fmt.Println("keyWord:", keyWord)
+	keyWord = "医药"
 	pageSize := 20
 	keyWordArr, err := GetIndustryMapNameSlice(keyWord)
 	keyWordArr = RemoveDuplicatesAndEmpty(keyWordArr)
@@ -492,13 +493,10 @@ func SearchByKeyWordBack02(keyWord string) (result []*models.SearchItem, err err
 		elastic.SetTraceLog(log.New(logFile, "ES-TRACE: ", 0)),
 		elastic.SetSniff(false), elastic.SetErrorLog(errorlog))
 
-	var esIndex = "cygx_article"
+	//var esIndex = "cygx_article"
+	var esIndex = "article_two"
 	searchMap := make(map[int]int)
-	keyWordStr := strings.Join(keyWordArr, ",")
 
-	fmt.Println("keyWordStr ", keyWordStr)
-	keyWordStr = "费用"
-	keyWord=keyWordStr
 	//queryString := elastic.NewQueryStringQuery(`Title:医疗 BodyText:医疗`).Analyzer("ik_smart")
 
 	//queryTeerms:=elastic.NewTermsQuery("Title","医疗","费用")
@@ -507,11 +505,28 @@ func SearchByKeyWordBack02(keyWord string) (result []*models.SearchItem, err err
 	//.Analyzer("ik_smart")
 	//.Analyzer("ik_smart")
 	//boolquery.Must(elastic.NewMatchQuery("Title", keyWord), elastic.NewMatchQuery("BodyText", keyWord))
-	mapping:=client.GetMapping()
-	mapJson,err:=json.Marshal(mapping)
-	utils.FileLog.Info("%s",string(mapJson))
+	mapping := client.GetMapping()
+	mapJson, err := json.Marshal(mapping)
+	utils.FileLog.Info("%s", string(mapJson))
 	boolquery := elastic.NewBoolQuery()
-	boolquery.Must(elastic.NewMatchQuery("Title", keyWord).Boost(2).Analyzer("ik_smart"), elastic.NewMatchQuery("BodyText", keyWord).Boost(1).Analyzer("ik_smart"))
+
+	keyLen := len(keyWordArr)
+	n := float64(keyLen)
+	matchArr := make([]elastic.Query, 0)
+	for _, v := range keyWordArr {
+		if v != "" {
+			matchq1 := elastic.NewMatchQuery("Title", v).Boost(n).Analyzer("ik_smart")
+			matchq2 := elastic.NewMatchQuery("BodyText", v).Boost(n).Analyzer("ik_smart")
+			matchArr = append(matchArr, matchq1)
+			matchArr = append(matchArr, matchq2)
+		}
+		n--
+	}
+	//matchArr=append(matchArr,matchq1)
+	//matchArr=append(matchArr,matchq2)
+
+	boolquery.Should(matchArr...)
+	//boolquery.Must(elastic.NewMatchQuery("BodyText", keyWord).Boost(1).Analyzer("ik_smart"))
 
 	highlight := elastic.NewHighlight()
 	highlight = highlight.Fields(elastic.NewHighlighterField("Title"), elastic.NewHighlighterField("BodyText"))
@@ -555,3 +570,184 @@ func SearchByKeyWordBack02(keyWord string) (result []*models.SearchItem, err err
 	}
 	return
 }
+
+func AddMap() {
+	errorlog := log.New(os.Stdout, "APP", log.LstdFlags)
+	file := "./rdlucklog/eslog.log"
+	logFile, _ := os.OpenFile(file, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0766)
+
+	client, err := elastic.NewClient(
+		elastic.SetURL(ES_URL),
+		elastic.SetBasicAuth(ES_USERNAME, ES_PASSWORD),
+		elastic.SetTraceLog(log.New(logFile, "ES-TRACE: ", 0)),
+		elastic.SetSniff(false), elastic.SetErrorLog(errorlog))
+	if err != nil {
+		fmt.Println("NewClient Err:", err.Error())
+	}
+
+	bodyJson := `{
+  "mappings": {
+    "properties": {
+      "ArticleId": {
+        "type": "integer"
+      },
+      "Title": {
+        "type": "text",
+        "analyzer": "ik_smart"
+      },
+      "BodyText": {
+        "type": "text",
+        "analyzer": "ik_smart"
+      }
+    }
+  }
+}`
+	//var esMappingIndex = "article_mapping"
+	var esIndex = "article_two"
+	a := client.IndexAnalyze().Index(esIndex)
+
+	analyzeBody, err := json.Marshal(a)
+	fmt.Println(string(analyzeBody))
+
+	exists, err := client.IndexExists(esIndex).Do(context.Background()) //<5>
+	if err != nil {
+		fmt.Println("IndexExists Err:" + err.Error())
+		return
+	}
+	fmt.Println("exists:", exists)
+	if !exists {
+		_, err = client.CreateIndex(esIndex).BodyJson(bodyJson).Do(context.Background())
+		//BodyJson(bodyJson).Do(context.Background())
+		if err != nil {
+			fmt.Println("CreateIndex Err:" + err.Error())
+			return
+		}
+	}
+
+	item, err := models.GetArticleDetailTestById(3546)
+	if err != nil {
+		fmt.Println("GetArticleDetailById Err:" + err.Error())
+		return
+	}
+	content := html.UnescapeString(item.Body)
+	doc, err := goquery.NewDocumentFromReader(strings.NewReader(content))
+	if err != nil {
+		fmt.Println("create doc err:", err.Error())
+		return
+	}
+	bodyText := doc.Text()
+	item.BodyText = bodyText
+	//新增
+	resp, err := client.Index().Index(esIndex).Id(strconv.Itoa(item.ArticleId)).BodyJson(item).Do(context.Background())
+	if err != nil {
+		fmt.Println("insert es failed", err.Error())
+		return
+	}
+	fmt.Println(resp)
+	//mappping := make(map[string]interface{})
+	//mappping[esMappingIndex] = bodyJson
+	//putResult,err:=client.PutMapping().Index(esIndex).BodyJson(mappping).Do(context.Background())
+	//mapping,err := client.GetMapping().Index(esIndex).Do(context.Background())
+	//fmt.Println(mapping)
+	//mapJson, err := json.Marshal(mapping)
+	//utils.FileLog.Info("%s", string(mapJson))
+	//fmt.Println(esMappingIndex)
+	//
+	//fmt.Println(err)
+	//fmt.Println(putResult)
+}
+
+func Search(keyWord string) (result []*models.SearchItem, err error) {
+	fmt.Println("keyWord:", keyWord)
+	pageSize := 20
+	keyWordArr, err := GetIndustryMapNameSlice(keyWord)
+	keyWordArr = RemoveDuplicatesAndEmpty(keyWordArr)
+
+	fmt.Println(keyWordArr)
+	fmt.Println(" keyWordArr ")
+
+	if err != nil {
+		go utils.SendEmail(utils.APPNAME+" "+utils.RunMode+"异常提醒:", "GetIndustryMapNameSlice:"+err.Error(), utils.EmailSendToUsers)
+	}
+	errorlog := log.New(os.Stdout, "APP", log.LstdFlags)
+	file := "./rdlucklog/eslog.log"
+	logFile, _ := os.OpenFile(file, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0766)
+
+	client, err := elastic.NewClient(
+		elastic.SetURL(ES_URL),
+		elastic.SetBasicAuth(ES_USERNAME, ES_PASSWORD),
+		elastic.SetTraceLog(log.New(logFile, "ES-TRACE: ", 0)),
+		elastic.SetSniff(false), elastic.SetErrorLog(errorlog))
+
+	//var esIndex = "cygx_article"
+	var esIndex = "article_two"
+	searchMap := make(map[int]int)
+
+	boolquery := elastic.NewBoolQuery()
+
+	keyLen := len(keyWordArr)
+	n := float64(keyLen)
+	matchArr := make([]elastic.Query, 0)
+	for _, v := range keyWordArr {
+		if v != "" {
+			matchq1 := elastic.NewMatchQuery("Title", v).Boost(n).Analyzer("ik_smart")
+			matchq2 := elastic.NewMatchQuery("BodyText", v).Boost(n).Analyzer("ik_smart")
+			matchArr = append(matchArr, matchq1)
+			matchArr = append(matchArr, matchq2)
+		}
+		n--
+	}
+	//matchArr=append(matchArr,matchq1)
+	//matchArr=append(matchArr,matchq2)
+	boolquery.Should().Must(matchArr...)
+	//boolquery.Must(elastic.NewMatchQuery("BodyText", keyWord).Boost(1).Analyzer("ik_smart"))
+
+	highlight := elastic.NewHighlight()
+	highlight = highlight.Fields(elastic.NewHighlighterField("Title"), elastic.NewHighlighterField("BodyText"))
+	highlight = highlight.PreTags("<font color='red'>").PostTags("</font>")
+	//request := client.Search(esIndex).Highlight(highlight).Size(pageSize).Query(queryString)
+	request := client.Search(esIndex).Highlight(highlight).Size(pageSize).Query(boolquery)
+	searchByMatch, err := request.Do(context.Background())
+	fmt.Println(searchByMatch, err)
+	//searchByMatch, err := client.Search(esIndex).Highlight(highlight).Size(pageSize).Do(context.Background())
+	if err != nil {
+		return result, err
+	}
+
+	jsonResult,err:=json.Marshal(searchByMatch.Hits.Hits)
+	utils.FileLog.Info("%s",string(jsonResult))
+	return
+	if searchByMatch.Hits != nil {
+		for _, v := range searchByMatch.Hits.Hits {
+			articleJson, err := v.Source.MarshalJSON()
+			fmt.Println(string(articleJson))
+			utils.FileLog.Info("%s",string(articleJson))
+			if err != nil {
+				return nil, err
+			}
+			article := new(models.CygxArticle)
+			err = json.Unmarshal(articleJson, &article)
+			if err != nil {
+				return nil, err
+			}
+			if _, ok := searchMap[article.ArticleId]; !ok {
+				searchItem := new(models.SearchItem)
+				searchItem.ArticleId, _ = strconv.Atoi(v.Id)
+				searchItem.Body = v.Highlight["BodyText"]
+				var title string
+				if len(v.Highlight["Title"]) > 0 {
+					title = v.Highlight["Title"][0]
+				} else {
+					title = article.Title
+				}
+				searchItem.Title = title
+				searchItem.PublishDate = article.PublishDate
+				result = append(result, searchItem)
+				searchMap[article.ArticleId] = article.ArticleId
+			}
+		}
+	}
+	body, err := json.Marshal(searchMap)
+	utils.FileLog.Info("%s", string(body))
+	return
+}

+ 6 - 0
services/task.go

@@ -14,5 +14,11 @@ func Task() {
 	//解析报告
 	//GetArticleExpert()
 	//SearchByKeyWordQuery("")
+	//AddMap()
+
+	//同步数据
+	//ImportData()
+	//AddMap()
+	//Search("光伏行业")
 	fmt.Println("end")
 }

二進制
temp/权益报告导入0319.xlsx