|
@@ -6,6 +6,7 @@ import (
|
|
|
"hongze/hongze_cygx/models"
|
|
|
"hongze/hongze_cygx/utils"
|
|
|
"html"
|
|
|
+ "regexp"
|
|
|
"strconv"
|
|
|
"strings"
|
|
|
"time"
|
|
@@ -298,15 +299,14 @@ func SyncTacticsListAddreport() (err error) {
|
|
|
fmt.Println("GetTacticsList Err:", err.Error())
|
|
|
return
|
|
|
}
|
|
|
+ listCustomArticle, err := models.GetCustomArticleId() //手动归类的文章,不替换文章类型
|
|
|
+ if err != nil {
|
|
|
+ fmt.Println("GetTacticsList Err:", err.Error())
|
|
|
+ return
|
|
|
+ }
|
|
|
fmt.Println("list len:", len(list))
|
|
|
-
|
|
|
summaryCategoryIds := "28,32,45,50,57,62,72,74,79,84,86,88,90,95,96" //纪要库的文章类型categoty_id
|
|
|
listSummary := strings.Split(summaryCategoryIds, ",")
|
|
|
- //listSummary, errsu := models.GetReportMappingCategoryID()
|
|
|
- //if errsu != nil {
|
|
|
- // fmt.Println("GetTacticsList Err:", errsu.Error())
|
|
|
- // return
|
|
|
- //}
|
|
|
noSummaryArticleIds := "3454,3456,3457,3459,2449,2450,2453,2454,2459,2530,2583,2663,2670,2699,2715,2732,2748,2759,2399,2356,2870,3173,2978,2826,3470" //非纪要库类型的文章ID
|
|
|
listNoSummaryArticleIds := strings.Split(noSummaryArticleIds, ",")
|
|
|
listPermission, errper := models.GetPermissionMappingCategoryID()
|
|
@@ -345,7 +345,7 @@ func SyncTacticsListAddreport() (err error) {
|
|
|
}
|
|
|
}
|
|
|
v.Department = "弘则权益研究"
|
|
|
- fmt.Println(k, v.ArticleId)
|
|
|
+ //fmt.Println(k, v.ArticleId)
|
|
|
hh, _ := time.ParseDuration("8h")
|
|
|
//pDate := publishDate.Add(hh)
|
|
|
v.PublishDate = v.PublishDate.Add(hh)
|
|
@@ -360,7 +360,15 @@ func SyncTacticsListAddreport() (err error) {
|
|
|
return err
|
|
|
}
|
|
|
v.Body = strings.Replace(v.Body, "http://vmp.hzinsights.com", "https://vmp.hzinsights.com", -1)
|
|
|
- expertNumStr, expertContentStr, interviewDateStr := BodyAnalysis(v.Body)
|
|
|
+
|
|
|
+ expertNumStr, expertContentStr, interviewDateStr, fileLink, bodyReturn := BodyAnalysis2(v.Body)
|
|
|
+
|
|
|
+ if strings.Index(v.Body, "报告全文(") > 0 && strings.Index(v.Body, "PDF格式报告下载.pdf") > 0 {
|
|
|
+ v.Body = strings.Replace(v.Body, "报告全文(", "", -1)
|
|
|
+ v.Body = strings.Replace(v.Body, "PDF格式报告下载.pdf", "", -1)
|
|
|
+ v.Body = strings.Replace(v.Body, "):", "", -1)
|
|
|
+ }
|
|
|
+ //fmt.Println(fileLink)
|
|
|
var titleNew string
|
|
|
titleNew = v.Title
|
|
|
// 7资金流向 、11大类资产 、51每日复盘 、80医药周报、9估值研究
|
|
@@ -374,8 +382,11 @@ func SyncTacticsListAddreport() (err error) {
|
|
|
if v.CategoryId == 80 {
|
|
|
titleNew = v.Title + utils.WeekByDate(v.PublishDate)
|
|
|
}
|
|
|
+ //fmt.Println(k)
|
|
|
+ //fmt.Println(expertContentStr)
|
|
|
if count > 0 {
|
|
|
fmt.Println(k, v.ArticleId, "edit")
|
|
|
+ var isCustom bool
|
|
|
bodyText, _ := GetReportContentTextSub(v.Body)
|
|
|
updateParams := make(map[string]interface{})
|
|
|
//updateParams["Title"] = v.Title
|
|
@@ -384,12 +395,22 @@ func SyncTacticsListAddreport() (err error) {
|
|
|
updateParams["UpdateFrequency"] = v.UpdateFrequency
|
|
|
updateParams["CreateDate"] = v.CreateDate
|
|
|
updateParams["PublishDate"] = v.PublishDate
|
|
|
- updateParams["Body"] = html.EscapeString(v.Body)
|
|
|
+ //updateParams["Body"] = html.EscapeString(v.Body)
|
|
|
+ updateParams["Body"] = html.EscapeString(bodyReturn)
|
|
|
updateParams["BodyText"] = bodyText
|
|
|
updateParams["Abstract"] = html.EscapeString(v.Abstract)
|
|
|
updateParams["CategoryName"] = v.CategoryName
|
|
|
updateParams["SubCategoryName"] = v.SubCategoryName
|
|
|
- updateParams["CategoryId"] = v.CategoryId
|
|
|
+ for _, vCustom := range listCustomArticle {
|
|
|
+ if v.ArticleId == vCustom.ArticleId {
|
|
|
+ fmt.Println("手动归类的文章:" + strconv.Itoa(v.ArticleId))
|
|
|
+ isCustom = true
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if isCustom == false {
|
|
|
+ updateParams["CategoryId"] = v.CategoryId
|
|
|
+ }
|
|
|
+ //updateParams["CategoryId"] = v.CategoryId
|
|
|
updateParams["PublishStatus"] = v.PublishStatus
|
|
|
updateParams["ExpertBackground"] = expertContentStr
|
|
|
updateParams["ExpertNumber"] = expertNumStr
|
|
@@ -402,6 +423,7 @@ func SyncTacticsListAddreport() (err error) {
|
|
|
v.Department = "弘则权益研究"
|
|
|
}
|
|
|
updateParams["Department"] = v.Department
|
|
|
+ updateParams["FileLink"] = fileLink
|
|
|
whereParam := map[string]interface{}{"article_id": v.ArticleId}
|
|
|
err = models.UpdateByExpr(models.CygxArticle{}, whereParam, updateParams)
|
|
|
if err != nil {
|
|
@@ -418,7 +440,8 @@ func SyncTacticsListAddreport() (err error) {
|
|
|
item.UpdateFrequency = v.UpdateFrequency
|
|
|
item.CreateDate = v.CreateDate
|
|
|
item.PublishDate = v.PublishDate.Format(utils.FormatDateTime)
|
|
|
- item.Body = html.EscapeString(v.Body)
|
|
|
+ //item.Body = html.EscapeString(v.Body)
|
|
|
+ item.Body = html.EscapeString(bodyReturn)
|
|
|
item.Abstract = html.EscapeString(v.Abstract)
|
|
|
item.CategoryName = v.CategoryName
|
|
|
item.SubCategoryName = v.SubCategoryName
|
|
@@ -433,6 +456,7 @@ func SyncTacticsListAddreport() (err error) {
|
|
|
item.IsSummary = v.IsSummary
|
|
|
item.IsReport = v.IsReport
|
|
|
item.ReportType = v.ReportType
|
|
|
+ item.FileLink = fileLink
|
|
|
_, err = models.AddCygxArticles(item)
|
|
|
if err != nil {
|
|
|
fmt.Println("AddCygxArticle Err:", err.Error())
|
|
@@ -511,3 +535,143 @@ func SyncTacticsListToEs() (err error) {
|
|
|
}
|
|
|
return
|
|
|
}
|
|
|
+
|
|
|
+//body 解析
|
|
|
+func BodyAnalysis2(body string) (expertNumStr, expertContentStr, interviewDateStr, fileLink, bodyReturn string) {
|
|
|
+ body = html.UnescapeString(body)
|
|
|
+ //fmt.Println(body)
|
|
|
+ doc, err := goquery.NewDocumentFromReader(strings.NewReader(body))
|
|
|
+ if err != nil {
|
|
|
+ fmt.Println("create doc err:", err.Error())
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+ var expertNumArr []string
|
|
|
+ var expertContentArr []string
|
|
|
+ var interviewDateArr []string
|
|
|
+ //var fileLink string
|
|
|
+
|
|
|
+ doc.Find("p").Each(func(i int, s *goquery.Selection) {
|
|
|
+ contentTxt := s.Text()
|
|
|
+ if strings.Contains(contentTxt, "#访谈时间:") || strings.Contains(contentTxt, "访谈时间:") {
|
|
|
+ interviewDate := s.Next().Text()
|
|
|
+ interviewDateArr = append(interviewDateArr, interviewDate)
|
|
|
+ }
|
|
|
+ if strings.Contains(contentTxt, "#专家评价") || strings.Contains(contentTxt, "专家评价") {
|
|
|
+ expertContent := s.Next().Text()
|
|
|
+ if expertContent == "" {
|
|
|
+ expertContent = contentTxt
|
|
|
+ }
|
|
|
+ if expertContent != "" {
|
|
|
+ rightIndex := strings.Index(expertContent, ")")
|
|
|
+ if rightIndex == 0 {
|
|
|
+ rightIndex = strings.Index(expertContent, ")")
|
|
|
+ }
|
|
|
+ if rightIndex > 0 {
|
|
|
+ expertNum := expertContent[:rightIndex]
|
|
|
+ expertNum = strings.Replace(expertNum, "(", "", -1)
|
|
|
+ expertNum = strings.Replace(expertNum, "(", "", -1)
|
|
|
+ expertNum = strings.Replace(expertNum, "专家评价", "", -1)
|
|
|
+ if expertNum != "" {
|
|
|
+ expertNumArr = append(expertNumArr, expertNum)
|
|
|
+ rightIndex = rightIndex
|
|
|
+ expertContentStr := expertContent[rightIndex:]
|
|
|
+ expertContentStr = strings.Replace(expertContentStr, ")", "", -1)
|
|
|
+ expertContentStr = strings.TrimLeft(expertContentStr, ":")
|
|
|
+ expertContentStr = strings.TrimRight(expertContentStr, "(推荐")
|
|
|
+ expertContentArr = append(expertContentArr, expertContentStr)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ })
|
|
|
+ if len(expertContentArr) == 0 {
|
|
|
+ doc.Find("pre").Each(func(i int, s *goquery.Selection) {
|
|
|
+ contentTxt := s.Text()
|
|
|
+ if strings.Contains(contentTxt, "#访谈时间:") || strings.Contains(contentTxt, "访谈时间:") {
|
|
|
+ interviewDate := s.Next().Text()
|
|
|
+ if interviewDate != "" {
|
|
|
+ interviewDateArr = append(interviewDateArr, interviewDate)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if strings.Contains(contentTxt, "#专家评价") || strings.Contains(contentTxt, "专家评价") {
|
|
|
+ expertContent := s.Next().Text()
|
|
|
+ if expertContent == "" {
|
|
|
+ expertContent = contentTxt
|
|
|
+ }
|
|
|
+ if expertContent != "" {
|
|
|
+ rightIndex := strings.Index(expertContent, ")")
|
|
|
+ if rightIndex == 0 {
|
|
|
+ rightIndex = strings.Index(expertContent, ")")
|
|
|
+ }
|
|
|
+ expertNum := expertContent[:rightIndex]
|
|
|
+ expertNum = strings.Replace(expertNum, "(", "", -1)
|
|
|
+ expertNum = strings.Replace(expertNum, "(", "", -1)
|
|
|
+ expertNum = strings.Replace(expertNum, "专家评价", "", -1)
|
|
|
+ if expertNum != "" {
|
|
|
+ expertNumArr = append(expertNumArr, expertNum)
|
|
|
+ rightIndex = rightIndex
|
|
|
+ expertContentStr := expertContent[rightIndex:]
|
|
|
+ expertContentStr = strings.Replace(expertContentStr, ")", "", -1)
|
|
|
+ expertContentStr = strings.TrimLeft(expertContentStr, ":")
|
|
|
+ expertContentStr = strings.TrimRight(expertContentStr, "(推荐")
|
|
|
+ if expertContentStr != "" {
|
|
|
+ expertContentArr = append(expertContentArr, expertContentStr)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ })
|
|
|
+ }
|
|
|
+ if len(expertNumArr) > 0 {
|
|
|
+ expertNumStr = expertNumArr[0]
|
|
|
+ if expertNumStr != "" {
|
|
|
+ expertNumStr = strings.Replace(expertNumStr, "#:", "", -1)
|
|
|
+ expertNumStr = strings.Replace(expertNumStr, "# ", "", -1)
|
|
|
+ expertNumStr = strings.Trim(expertNumStr, "")
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if len(expertContentArr) > 0 {
|
|
|
+ expertContentStr = expertContentArr[0]
|
|
|
+ }
|
|
|
+ if len(expertContentStr) > 600 {
|
|
|
+ strnum := strings.Index(expertContentStr, "#专家评价:")
|
|
|
+ content := expertContentStr[strnum:len(expertContentStr)]
|
|
|
+ strnum2 := strings.Index(content, "(")
|
|
|
+ content = content[strnum2+9 : len(content)] //中文括号3位 专家编号6位
|
|
|
+ expertContentStr = content
|
|
|
+ }
|
|
|
+ //if strings.Index(body, "报告全文(") > 0 && strings.Index(body, "PDF格式报告下载.pdf") > 0 {
|
|
|
+ // numStar := strings.Index(body, "http")
|
|
|
+ // numEnd := strings.Index(body, ".pdf")
|
|
|
+ // fmt.Println("获取PDF链接")
|
|
|
+ // fileLink = body[numStar : numEnd+4]
|
|
|
+ //}
|
|
|
+ var hrefRegexp = regexp.MustCompile("(?m)<a.*?[^<]>.*?</a>")
|
|
|
+ match := hrefRegexp.FindAllString(body, -1)
|
|
|
+ if match != nil {
|
|
|
+ for k, v := range match {
|
|
|
+ if k == 0 && strings.Index(v, ".pdf") > 0 {
|
|
|
+ numStar := strings.Index(v, "http")
|
|
|
+ numEnd := strings.Index(v, ".pdf")
|
|
|
+ fileLink = v[numStar : numEnd+4]
|
|
|
+ }
|
|
|
+ body = strings.Replace(body, v, "", -1)
|
|
|
+ }
|
|
|
+ body = strings.Replace(body, "完整报告请点击链接:", "", -1)
|
|
|
+ body = strings.Replace(body, "PDF格式报告下载.pdf", "", -1)
|
|
|
+ body = strings.Replace(body, "报告全文():", "", -1)
|
|
|
+ }
|
|
|
+ bodyReturn = body
|
|
|
+ //fmt.Println(bodyReturn)
|
|
|
+ //fmt.Println("_________________")
|
|
|
+ //fmt.Println(fileLink)
|
|
|
+ //body = strings.Replace(body, "报告全文(", "", -1)
|
|
|
+ //body = strings.Replace(body, "PDF格式报告下载.pdf", "", -1)
|
|
|
+ //body = strings.Replace(body, "):", "", -1)
|
|
|
+
|
|
|
+ if len(interviewDateArr) > 0 {
|
|
|
+ interviewDateStr = interviewDateArr[0]
|
|
|
+ }
|
|
|
+ return
|
|
|
+}
|