|
@@ -4,6 +4,7 @@ import (
|
|
|
"fmt"
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
|
"hongze/hongze_cygx/models"
|
|
|
+ "hongze/hongze_cygx/utils"
|
|
|
"html"
|
|
|
"strings"
|
|
|
)
|
|
@@ -35,42 +36,35 @@ func GetReportContentSub(content string) (contentSub string, err error) {
|
|
|
|
|
|
func GetReportContentTextSub(content string) (contentSub string, err error) {
|
|
|
content = html.UnescapeString(content)
|
|
|
+ utils.FileLog.Info("%s", content)
|
|
|
doc, err := goquery.NewDocumentFromReader(strings.NewReader(content))
|
|
|
if err != nil {
|
|
|
fmt.Println("create doc err:", err.Error())
|
|
|
return
|
|
|
}
|
|
|
- maxRow := 5
|
|
|
- n := 0
|
|
|
doc.Find("p").Each(func(i int, s *goquery.Selection) {
|
|
|
pHtml, _ := s.Html()
|
|
|
- if !strings.Contains(pHtml, "img") && !strings.Contains(pHtml, "table") {
|
|
|
- if n > maxRow {
|
|
|
- return
|
|
|
- }
|
|
|
- text := s.Text()
|
|
|
- if text != "" && !strings.Contains(text, "访谈时间") && !strings.Contains(text, "纪要详情") {
|
|
|
- n++
|
|
|
- contentSub = contentSub + s.Text()
|
|
|
- }
|
|
|
+ if strings.Contains(pHtml, "img") || strings.Contains(pHtml, "table") {
|
|
|
+ s.Remove()
|
|
|
}
|
|
|
})
|
|
|
if contentSub == "" || len(contentSub) < 200 {
|
|
|
- m := 0
|
|
|
+ //m := 0
|
|
|
doc.Find("span").Each(func(i int, s *goquery.Selection) {
|
|
|
spanHtml, _ := s.Html()
|
|
|
- if !strings.Contains(spanHtml, "img") && !strings.Contains(spanHtml, "table") {
|
|
|
- if m > maxRow {
|
|
|
- return
|
|
|
- }
|
|
|
- text := s.Text()
|
|
|
- if text != "" && !strings.Contains(text, "访谈时间") && !strings.Contains(text, "纪要详情") {
|
|
|
- n++
|
|
|
- contentSub = contentSub + s.Text()
|
|
|
- }
|
|
|
+ if strings.Contains(spanHtml, "img") || strings.Contains(spanHtml, "table") {
|
|
|
+ s.Remove()
|
|
|
}
|
|
|
})
|
|
|
}
|
|
|
+ docText:=doc.Text()
|
|
|
+ bodyRune := []rune(docText)
|
|
|
+ bodyRuneLen:=len(bodyRune)
|
|
|
+ if bodyRuneLen>200 {
|
|
|
+ bodyRuneLen=200
|
|
|
+ }
|
|
|
+ body:= string(bodyRune[:bodyRuneLen])
|
|
|
+ contentSub=body
|
|
|
return
|
|
|
}
|
|
|
|
|
@@ -255,7 +249,6 @@ func FixArticleContent(articleId int) {
|
|
|
})
|
|
|
}
|
|
|
|
|
|
-
|
|
|
var expertNumStr, expertContentStr, interviewDateStr string
|
|
|
if len(expertNumArr) > 0 {
|
|
|
expertNumStr = expertNumArr[0]
|