|
@@ -6,6 +6,7 @@ import (
|
|
|
"hongze/hongze_cygx/models"
|
|
|
"hongze/hongze_cygx/utils"
|
|
|
"html"
|
|
|
+ "regexp"
|
|
|
"strconv"
|
|
|
"strings"
|
|
|
"time"
|
|
@@ -360,7 +361,7 @@ func SyncTacticsListAddreport() (err error) {
|
|
|
}
|
|
|
v.Body = strings.Replace(v.Body, "http://vmp.hzinsights.com", "https://vmp.hzinsights.com", -1)
|
|
|
|
|
|
- expertNumStr, expertContentStr, interviewDateStr, fileLink := BodyAnalysis2(v.Body)
|
|
|
+ expertNumStr, expertContentStr, interviewDateStr, fileLink, bodyReturn := BodyAnalysis2(v.Body)
|
|
|
|
|
|
if strings.Index(v.Body, "报告全文(") > 0 && strings.Index(v.Body, "PDF格式报告下载.pdf") > 0 {
|
|
|
v.Body = strings.Replace(v.Body, "报告全文(", "", -1)
|
|
@@ -394,7 +395,8 @@ func SyncTacticsListAddreport() (err error) {
|
|
|
updateParams["UpdateFrequency"] = v.UpdateFrequency
|
|
|
updateParams["CreateDate"] = v.CreateDate
|
|
|
updateParams["PublishDate"] = v.PublishDate
|
|
|
- updateParams["Body"] = html.EscapeString(v.Body)
|
|
|
+ //updateParams["Body"] = html.EscapeString(v.Body)
|
|
|
+ updateParams["Body"] = html.EscapeString(bodyReturn)
|
|
|
updateParams["BodyText"] = bodyText
|
|
|
updateParams["Abstract"] = html.EscapeString(v.Abstract)
|
|
|
updateParams["CategoryName"] = v.CategoryName
|
|
@@ -438,7 +440,8 @@ func SyncTacticsListAddreport() (err error) {
|
|
|
item.UpdateFrequency = v.UpdateFrequency
|
|
|
item.CreateDate = v.CreateDate
|
|
|
item.PublishDate = v.PublishDate.Format(utils.FormatDateTime)
|
|
|
- item.Body = html.EscapeString(v.Body)
|
|
|
+ //item.Body = html.EscapeString(v.Body)
|
|
|
+ item.Body = html.EscapeString(bodyReturn)
|
|
|
item.Abstract = html.EscapeString(v.Abstract)
|
|
|
item.CategoryName = v.CategoryName
|
|
|
item.SubCategoryName = v.SubCategoryName
|
|
@@ -534,7 +537,7 @@ func SyncTacticsListToEs() (err error) {
|
|
|
}
|
|
|
|
|
|
//body 解析
|
|
|
-func BodyAnalysis2(body string) (expertNumStr, expertContentStr, interviewDateStr, fileLink string) {
|
|
|
+func BodyAnalysis2(body string) (expertNumStr, expertContentStr, interviewDateStr, fileLink, bodyReturn string) {
|
|
|
body = html.UnescapeString(body)
|
|
|
doc, err := goquery.NewDocumentFromReader(strings.NewReader(body))
|
|
|
if err != nil {
|
|
@@ -619,7 +622,6 @@ func BodyAnalysis2(body string) (expertNumStr, expertContentStr, interviewDateSt
|
|
|
}
|
|
|
})
|
|
|
}
|
|
|
-
|
|
|
if len(expertNumArr) > 0 {
|
|
|
expertNumStr = expertNumArr[0]
|
|
|
if expertNumStr != "" {
|
|
@@ -631,7 +633,6 @@ func BodyAnalysis2(body string) (expertNumStr, expertContentStr, interviewDateSt
|
|
|
if len(expertContentArr) > 0 {
|
|
|
expertContentStr = expertContentArr[0]
|
|
|
}
|
|
|
-
|
|
|
if len(expertContentStr) > 600 {
|
|
|
strnum := strings.Index(expertContentStr, "#专家评价:")
|
|
|
content := expertContentStr[strnum:len(expertContentStr)]
|
|
@@ -639,13 +640,30 @@ func BodyAnalysis2(body string) (expertNumStr, expertContentStr, interviewDateSt
|
|
|
content = content[strnum2+9 : len(content)] //中文括号3位 专家编号6位
|
|
|
expertContentStr = content
|
|
|
}
|
|
|
- if strings.Index(body, "报告全文(") > 0 && strings.Index(body, "PDF格式报告下载.pdf") > 0 {
|
|
|
- numStar := strings.Index(body, "http")
|
|
|
- numEnd := strings.Index(body, ".pdf")
|
|
|
- fmt.Println("获取PDF链接")
|
|
|
- fileLink = body[numStar : numEnd+4]
|
|
|
+ //if strings.Index(body, "报告全文(") > 0 && strings.Index(body, "PDF格式报告下载.pdf") > 0 {
|
|
|
+ // numStar := strings.Index(body, "http")
|
|
|
+ // numEnd := strings.Index(body, ".pdf")
|
|
|
+ // fmt.Println("获取PDF链接")
|
|
|
+ // fileLink = body[numStar : numEnd+4]
|
|
|
+ //}
|
|
|
+ var hrefRegexp = regexp.MustCompile("(?m)<a.*?[^<]>.*?</a>")
|
|
|
+ match := hrefRegexp.FindAllString(body, -1)
|
|
|
+ if match != nil {
|
|
|
+ for k, v := range match {
|
|
|
+ if k == 0 && strings.Index(v, ".pdf") > 0 {
|
|
|
+ numStar := strings.Index(v, "http")
|
|
|
+ numEnd := strings.Index(v, ".pdf")
|
|
|
+ fileLink = v[numStar : numEnd+4]
|
|
|
+ }
|
|
|
+ body = strings.Replace(body, v, "", -1)
|
|
|
+ }
|
|
|
+ body = strings.Replace(body, "完整报告请点击链接:", "", -1)
|
|
|
+ body = strings.Replace(body, "PDF格式报告下载.pdf", "", -1)
|
|
|
}
|
|
|
-
|
|
|
+ bodyReturn = body
|
|
|
+ //fmt.Println(body)
|
|
|
+ //fmt.Println("_________________")
|
|
|
+ //fmt.Println(fileLink)
|
|
|
//body = strings.Replace(body, "报告全文(", "", -1)
|
|
|
//body = strings.Replace(body, "PDF格式报告下载.pdf", "", -1)
|
|
|
//body = strings.Replace(body, "):", "", -1)
|
|
@@ -653,6 +671,5 @@ func BodyAnalysis2(body string) (expertNumStr, expertContentStr, interviewDateSt
|
|
|
if len(interviewDateArr) > 0 {
|
|
|
interviewDateStr = interviewDateArr[0]
|
|
|
}
|
|
|
- //fmt.Println(expertContentStr)
|
|
|
return
|
|
|
}
|