Procházet zdrojové kódy

新增 ES相关操作

rdluck před 4 roky
rodič
revize
232f7aa2a3
4 změnil soubory, kde provedl 317 přidání a 20 odebrání
  1. 27 3
      models/article.go
  2. 127 17
      services/article.go
  3. 68 0
      services/elastic.go
  4. 95 0
      services/task.go

+ 27 - 3
models/article.go

@@ -106,10 +106,10 @@ type ArticleDetailResp struct {
 	HasFree       int `description:"1:已付费(至少包含一个品类的权限),2:未付费(没有任何品类权限)"`
 }
 
-func ModifyArticleExpert(articleId int, expertNumStr, expertContentStr, interviewDateStr string) (err error) {
+func ModifyArticleExpert(articleId int, expertNumStr, expertContentStr, interviewDateStr, bodyText string) (err error) {
 	o := orm.NewOrm()
-	sql := `UPDATE cygx_article SET expert_background=?,expert_number=?,interview_date=? WHERE article_id=? `
-	_, err = o.Raw(sql, expertContentStr, expertNumStr, interviewDateStr, articleId).Exec()
+	sql := `UPDATE cygx_article SET expert_background=?,expert_number=?,interview_date=?,body_text=? WHERE article_id=? `
+	_, err = o.Raw(sql, expertContentStr, expertNumStr, interviewDateStr, bodyText, articleId).Exec()
 	return
 }
 
@@ -140,3 +140,27 @@ func ModifyArticleContent(articleId int, content, expertNumStr, expertContentStr
 	_, err = o.Raw(sql, content, expertContentStr, expertNumStr, interviewDateStr, articleId).Exec()
 	return
 }
+
+
+type ElasticArticleDetail struct {
+	ArticleId            int    `description:"报告id"`
+	Title                string `description:"标题"`
+	TitleEn              string `description:"英文标题 "`
+	UpdateFrequency      string `description:"更新周期"`
+	CreateDate           string `description:"创建时间"`
+	PublishDate          string `description:"发布时间"`
+	Abstract             string `description:"摘要"`
+	CategoryName         string `description:"一级分类"`
+	SubCategoryName      string `description:"二级分类"`
+	IsCollect            bool   `description:"是否收藏:true,已收藏,false:未收藏"`
+	IsInterviewApply     bool   `description:"是否申请访谈:true,已申请,false:未申请"`
+	BodyText             string `description:"内容"`
+	InterviewApplyStatus string `description:"当前访谈申请状态:'待邀请','待访谈','已完成','已取消'"`
+	InterviewDate        string `description:"访谈时间"`
+	ExpertBackground     string `description:"专家背景"`
+	ExpertNumber         string `description:"专家编号"`
+	Department           string `description:"作者"`
+	SellerMobile         string `description:"销售手机号"`
+	SellerName           string `description:"销售名称"`
+	ArticleIdMd5         string `description:"纪要id"`
+}

+ 127 - 17
services/article.go

@@ -2,10 +2,10 @@ package services
 
 import (
 	"fmt"
+	"github.com/PuerkitoBio/goquery"
 	"hongze/hongze_cygx/models"
 	"html"
 	"strings"
-	"github.com/PuerkitoBio/goquery"
 )
 
 func GetReportContentSub(content string) (contentSub string, err error) {
@@ -71,7 +71,6 @@ func GetReportContentTextSub(content string) (contentSub string, err error) {
 			}
 		})
 	}
-	fmt.Println(contentSub)
 	return
 }
 
@@ -86,9 +85,10 @@ func GetArticleAll() {
 	}()
 	list, err := models.GetArticleAll()
 	if err != nil {
-
+		return
 	}
 	for _, v := range list {
+		fmt.Println(v.ArticleId, v.Title)
 		FixArticleContent(v.ArticleId)
 	}
 }
@@ -101,7 +101,9 @@ func FixArticleContent(articleId int) {
 		return
 	}
 	content := item.Body
+	bodyText, _ := GetReportContentTextSub(content)
 	content = html.UnescapeString(content)
+
 	content = strings.Replace(content, "http", "https", -1)
 
 	doc, err := goquery.NewDocumentFromReader(strings.NewReader(content))
@@ -130,22 +132,130 @@ func FixArticleContent(articleId int) {
 				if rightIndex == 0 {
 					rightIndex = strings.Index(expertContent, ")")
 				}
-				expertNum := expertContent[:rightIndex]
-				expertNum = strings.Replace(expertNum, "(", "", -1)
-				expertNum = strings.Replace(expertNum, "(", "", -1)
-				expertNum = strings.Replace(expertNum, "专家评价", "", -1)
-				if expertNum != "" {
-					expertNumArr = append(expertNumArr, expertNum)
-					rightIndex = rightIndex
-					expertContentStr := expertContent[rightIndex:]
-					expertContentStr = strings.Replace(expertContentStr, ")", "", -1)
-					expertContentStr = strings.TrimLeft(expertContentStr, ":")
-					expertContentStr = strings.TrimRight(expertContentStr, "(推荐")
-					expertContentArr = append(expertContentArr, expertContentStr)
+				if rightIndex > 0 {
+					expertNum := expertContent[:rightIndex]
+					expertNum = strings.Replace(expertNum, "(", "", -1)
+					expertNum = strings.Replace(expertNum, "(", "", -1)
+					expertNum = strings.Replace(expertNum, "专家评价", "", -1)
+					if expertNum != "" {
+						expertNumArr = append(expertNumArr, expertNum)
+						rightIndex = rightIndex
+						expertContentStr := expertContent[rightIndex:]
+						expertContentStr = strings.Replace(expertContentStr, ")", "", -1)
+						expertContentStr = strings.TrimLeft(expertContentStr, ":")
+						expertContentStr = strings.TrimRight(expertContentStr, "(推荐")
+						expertContentArr = append(expertContentArr, expertContentStr)
+					}
 				}
 			}
 		}
 	})
+
+	if len(expertContentArr) <= 0 {
+		doc.Find("pre").Each(func(i int, pre *goquery.Selection) {
+			pre.Find("span").Each(func(n int, span *goquery.Selection) {
+				contentTxt := span.Text()
+				if strings.Contains(contentTxt, "#专家评价") || strings.Contains(contentTxt, "专家评价") {
+					span.Find("span").Each(func(m int, subspan *goquery.Selection) {
+						subspanText := subspan.Text()
+						if strings.Contains(subspanText, "专家评价") {
+							expertContent := subspan.Next().Text()
+							if expertContent != "" {
+								rightIndex := strings.Index(expertContent, ")")
+								if rightIndex == 0 {
+									rightIndex = strings.Index(expertContent, ")")
+								}
+								if rightIndex > 0 {
+									expertNum := expertContent[:rightIndex]
+									expertNum = strings.Replace(expertNum, "(", "", -1)
+									expertNum = strings.Replace(expertNum, "(", "", -1)
+									expertNum = strings.Replace(expertNum, "专家评价", "", -1)
+									if expertNum != "" {
+										expertNumArr = append(expertNumArr, expertNum)
+										rightIndex = rightIndex
+										expertContentStr := expertContent[rightIndex:]
+										expertContentStr = strings.Replace(expertContentStr, ")", "", -1)
+										expertContentStr = strings.TrimLeft(expertContentStr, ":")
+										expertContentStr = strings.TrimRight(expertContentStr, "(推荐")
+										expertContentArr = append(expertContentArr, expertContentStr)
+									}
+								}
+							}
+						}
+
+					})
+				}
+
+				span.Find("span").Each(func(k int, sspan *goquery.Selection) {
+					sspanText := sspan.Text()
+					if strings.Contains(sspanText, "访谈时间") {
+						sspanText = strings.Replace(sspanText, "#访谈时间:", "", -1)
+						sspanText = strings.Replace(sspanText, "访谈时间:", "", -1)
+						sspanText = strings.Replace(sspanText, "\n", "", -1)
+						sspanText = strings.Replace(sspanText, " ", "", -1)
+						sspanText = strings.Trim(sspanText, " ")
+						sspanText = sspanText[:10]
+						interviewDate := sspanText
+						if interviewDate != "" {
+							interviewDateArr = append(interviewDateArr, interviewDate)
+						}
+					}
+				})
+			})
+		})
+	}
+
+	if len(expertContentArr) <= 0 {
+		doc.Find("span").Each(func(i int, span *goquery.Selection) {
+			span.Find("strong").Each(func(n int, strong *goquery.Selection) {
+				spanText := span.Text()
+				strongText := strong.Text()
+				if strings.Contains(strongText, "#专家评价") || strings.Contains(strongText, "专家评价") {
+					expertContent := strong.Parents().Text()
+					if expertContent != "" {
+						rightIndex := strings.Index(expertContent, ")")
+						if rightIndex == 0 {
+							rightIndex = strings.Index(expertContent, ")")
+						}
+						if rightIndex > 0 {
+							expertNum := expertContent[:rightIndex]
+							expertNum = strings.Replace(expertNum, "(", "", -1)
+							expertNum = strings.Replace(expertNum, "(", "", -1)
+							expertNum = strings.Replace(expertNum, "专家评价", "", -1)
+							expertNum = strings.Replace(expertNum, "#", "", -1)
+							expertNum = strings.Replace(expertNum, ":", "", -1)
+							expertNum = strings.Replace(expertNum, "\n", "", -1)
+							if expertNum != "" {
+								expertNumArr = append(expertNumArr, expertNum)
+								rightIndex = rightIndex
+								expertContentStr := expertContent[rightIndex:]
+								expertContentStr = strings.Replace(expertContentStr, ")", "", -1)
+								expertContentStr = strings.TrimLeft(expertContentStr, ":")
+								expertContentStr = strings.TrimRight(expertContentStr, "(推荐")
+								expertContentArr = append(expertContentArr, expertContentStr)
+								return
+							}
+						}
+					}
+				}
+
+				if strings.Contains(spanText, "访谈时间") {
+					spanText = strings.Replace(spanText, "#访谈时间:", "", -1)
+					spanText = strings.Replace(spanText, "访谈时间:", "", -1)
+					spanText = strings.Replace(spanText, "\n", "", -1)
+					spanText = strings.Replace(spanText, " ", "", -1)
+					spanText = strings.Trim(spanText, " ")
+					spanText = spanText[:10]
+					interviewDate := spanText
+					if interviewDate != "" {
+						interviewDateArr = append(interviewDateArr, interviewDate)
+					}
+				}
+			})
+		})
+	}
+
+
 	var expertNumStr, expertContentStr, interviewDateStr string
 	if len(expertNumArr) > 0 {
 		expertNumStr = expertNumArr[0]
@@ -157,8 +267,8 @@ func FixArticleContent(articleId int) {
 	if len(interviewDateArr) > 0 {
 		interviewDateStr = interviewDateArr[0]
 	}
-	fmt.Println(articleId, expertNumStr, expertContentStr, interviewDateStr)
-	err = models.ModifyArticleExpert(articleId, expertNumStr, expertContentStr, interviewDateStr)
+	expertNumStr = strings.Replace(expertNumStr, "#:", "", -1)
+	err = models.ModifyArticleExpert(articleId, expertNumStr, expertContentStr, interviewDateStr, bodyText)
 	if err != nil {
 		fmt.Println("ModifyArticleExpert Err:" + err.Error())
 		return

+ 68 - 0
services/elastic.go

@@ -0,0 +1,68 @@
+package services
+
+import (
+	"context"
+	"fmt"
+	"github.com/olivere/elastic/v7"
+	"log"
+	"os"
+)
+
+func NewClient() (client *elastic.Client, err error) {
+	errorlog := log.New(os.Stdout, "APP", log.LstdFlags)
+	file := "./rdlucklog/eslog.log"
+	logFile, _ := os.OpenFile(file, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0766)
+	client, err = elastic.NewClient(
+		elastic.SetURL(ES_URL),
+		elastic.SetBasicAuth(ES_USERNAME, ES_PASSWORD),
+		elastic.SetTraceLog(log.New(logFile, "ES-TRACE: ", 0)),
+		elastic.SetSniff(false), elastic.SetErrorLog(errorlog))
+	return
+}
+
+//indexName:索引名称
+//mappingJson:表结构
+func CreateIndex(indexName, mappingJson string) (err error) {
+	client, err := NewClient()
+	if err != nil {
+		return
+	}
+	//定义表结构
+	exists, err := client.IndexExists(indexName).Do(context.Background()) //<5>
+	if err != nil {
+		return
+	}
+	if !exists {
+		_, err = client.CreateIndex(indexName).BodyJson(mappingJson).Do(context.Background())
+		//BodyJson(bodyJson).Do(context.Background())
+		if err != nil {
+			return
+		}
+	}
+	return
+}
+
+//新增数据
+func AddData(indexName, docId string, item interface{}) (err error) {
+	client, err := NewClient()
+	if err != nil {
+		return
+	}
+	resp, err := client.Index().Index(indexName).Id(docId).BodyJson(item).Do(context.Background())
+	if err != nil {
+		return
+	}
+	fmt.Println("AddData", resp.Status, resp.Result)
+	return
+}
+
+func MappingModify(indexName, mappingJson string) {
+	client, err := NewClient()
+	if err != nil {
+		return
+	}
+	result, err := client.PutMapping().Index(indexName).BodyString(mappingJson).Do(context.Background())
+	fmt.Println(err)
+	fmt.Println(result)
+	return
+}

+ 95 - 0
services/task.go

@@ -20,5 +20,100 @@ func Task() {
 	//ImportData()
 	//AddMap()
 	//Search("光伏行业")
+
+	//修复报告内容
+	//GetArticleAll()
+
+	//
+	//	indexName:="article"
+	//	mappingJson := `{
+	//  "mappings": {
+	//    "dynamic": true,
+	//    "properties": {
+	//      "ArticleId": {
+	//        "type": "integer"
+	//      },
+	//      "Title": {
+	//        "type": "text",
+	//        "analyzer": "ik_smart"
+	//      },
+	//      "TitleEn": {
+	//        "type": "text",
+	//        "analyzer": "ik_smart"
+	//      },
+	//      "UpdateFrequency": {
+	//        "type": "text",
+	//        "analyzer": "ik_smart"
+	//      },
+	//      "CreateDate": {
+	//        "type": "text",
+	//        "analyzer": "ik_smart"
+	//      },
+	//      "PublishDate": {
+	//        "type": "text",
+	//        "analyzer": "ik_smart"
+	//      },
+	//      "Abstract": {
+	//        "type": "text",
+	//        "analyzer": "ik_smart"
+	//      },
+	//      "CategoryName": {
+	//        "type": "text",
+	//        "analyzer": "ik_smart"
+	//      },
+	//      "SubCategoryName": {
+	//        "type": "text",
+	//        "analyzer": "ik_smart"
+	//      },
+	//      "InterviewDate": {
+	//        "type": "text",
+	//        "analyzer": "ik_smart"
+	//      },
+	//      "ExpertBackground": {
+	//        "type": "text",
+	//        "analyzer": "ik_smart"
+	//      },
+	//      "ExpertNumber": {
+	//        "type": "text",
+	//        "analyzer": "ik_smart"
+	//      },
+	//      "Department": {
+	//        "type": "text",
+	//        "analyzer": "ik_smart"
+	//      },
+	//      "ArticleIdMd5": {
+	//        "type": "text",
+	//        "analyzer": "ik_smart"
+	//      }
+	//    }
+	//  }
+	//}`
+	//	CreateIndex(indexName,mappingJson)
+	//插入数据
+
+
+	type ElasticArticleDetail struct {
+		ArticleId            int    `description:"报告id"`
+		Title                string `description:"标题"`
+		TitleEn              string `description:"英文标题 "`
+		UpdateFrequency      string `description:"更新周期"`
+		CreateDate           string `description:"创建时间"`
+		PublishDate          string `description:"发布时间"`
+		Abstract             string `description:"摘要"`
+		CategoryName         string `description:"一级分类"`
+		SubCategoryName      string `description:"二级分类"`
+		IsCollect            bool   `description:"是否收藏:true,已收藏,false:未收藏"`
+		IsInterviewApply     bool   `description:"是否申请访谈:true,已申请,false:未申请"`
+		BodyText             string `description:"内容"`
+		InterviewApplyStatus string `description:"当前访谈申请状态:'待邀请','待访谈','已完成','已取消'"`
+		InterviewDate        string `description:"访谈时间"`
+		ExpertBackground     string `description:"专家背景"`
+		ExpertNumber         string `description:"专家编号"`
+		Department           string `description:"作者"`
+		SellerMobile         string `description:"销售手机号"`
+		SellerName           string `description:"销售名称"`
+		ArticleIdMd5         string `description:"纪要id"`
+	}
+
 	fmt.Println("end")
 }