|
@@ -2,10 +2,10 @@ package services
|
|
|
|
|
|
import (
|
|
import (
|
|
"fmt"
|
|
"fmt"
|
|
|
|
+ "github.com/PuerkitoBio/goquery"
|
|
"hongze/hongze_cygx/models"
|
|
"hongze/hongze_cygx/models"
|
|
"html"
|
|
"html"
|
|
"strings"
|
|
"strings"
|
|
- "github.com/PuerkitoBio/goquery"
|
|
|
|
)
|
|
)
|
|
|
|
|
|
func GetReportContentSub(content string) (contentSub string, err error) {
|
|
func GetReportContentSub(content string) (contentSub string, err error) {
|
|
@@ -71,7 +71,6 @@ func GetReportContentTextSub(content string) (contentSub string, err error) {
|
|
}
|
|
}
|
|
})
|
|
})
|
|
}
|
|
}
|
|
- fmt.Println(contentSub)
|
|
|
|
return
|
|
return
|
|
}
|
|
}
|
|
|
|
|
|
@@ -86,9 +85,10 @@ func GetArticleAll() {
|
|
}()
|
|
}()
|
|
list, err := models.GetArticleAll()
|
|
list, err := models.GetArticleAll()
|
|
if err != nil {
|
|
if err != nil {
|
|
-
|
|
|
|
|
|
+ return
|
|
}
|
|
}
|
|
for _, v := range list {
|
|
for _, v := range list {
|
|
|
|
+ fmt.Println(v.ArticleId, v.Title)
|
|
FixArticleContent(v.ArticleId)
|
|
FixArticleContent(v.ArticleId)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
@@ -101,7 +101,9 @@ func FixArticleContent(articleId int) {
|
|
return
|
|
return
|
|
}
|
|
}
|
|
content := item.Body
|
|
content := item.Body
|
|
|
|
+ bodyText, _ := GetReportContentTextSub(content)
|
|
content = html.UnescapeString(content)
|
|
content = html.UnescapeString(content)
|
|
|
|
+
|
|
content = strings.Replace(content, "http", "https", -1)
|
|
content = strings.Replace(content, "http", "https", -1)
|
|
|
|
|
|
doc, err := goquery.NewDocumentFromReader(strings.NewReader(content))
|
|
doc, err := goquery.NewDocumentFromReader(strings.NewReader(content))
|
|
@@ -130,22 +132,130 @@ func FixArticleContent(articleId int) {
|
|
if rightIndex == 0 {
|
|
if rightIndex == 0 {
|
|
rightIndex = strings.Index(expertContent, ")")
|
|
rightIndex = strings.Index(expertContent, ")")
|
|
}
|
|
}
|
|
- expertNum := expertContent[:rightIndex]
|
|
|
|
- expertNum = strings.Replace(expertNum, "(", "", -1)
|
|
|
|
- expertNum = strings.Replace(expertNum, "(", "", -1)
|
|
|
|
- expertNum = strings.Replace(expertNum, "专家评价", "", -1)
|
|
|
|
- if expertNum != "" {
|
|
|
|
- expertNumArr = append(expertNumArr, expertNum)
|
|
|
|
- rightIndex = rightIndex
|
|
|
|
- expertContentStr := expertContent[rightIndex:]
|
|
|
|
- expertContentStr = strings.Replace(expertContentStr, ")", "", -1)
|
|
|
|
- expertContentStr = strings.TrimLeft(expertContentStr, ":")
|
|
|
|
- expertContentStr = strings.TrimRight(expertContentStr, "(推荐")
|
|
|
|
- expertContentArr = append(expertContentArr, expertContentStr)
|
|
|
|
|
|
+ if rightIndex > 0 {
|
|
|
|
+ expertNum := expertContent[:rightIndex]
|
|
|
|
+ expertNum = strings.Replace(expertNum, "(", "", -1)
|
|
|
|
+ expertNum = strings.Replace(expertNum, "(", "", -1)
|
|
|
|
+ expertNum = strings.Replace(expertNum, "专家评价", "", -1)
|
|
|
|
+ if expertNum != "" {
|
|
|
|
+ expertNumArr = append(expertNumArr, expertNum)
|
|
|
|
+ rightIndex = rightIndex
|
|
|
|
+ expertContentStr := expertContent[rightIndex:]
|
|
|
|
+ expertContentStr = strings.Replace(expertContentStr, ")", "", -1)
|
|
|
|
+ expertContentStr = strings.TrimLeft(expertContentStr, ":")
|
|
|
|
+ expertContentStr = strings.TrimRight(expertContentStr, "(推荐")
|
|
|
|
+ expertContentArr = append(expertContentArr, expertContentStr)
|
|
|
|
+ }
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
})
|
|
})
|
|
|
|
+
|
|
|
|
+ if len(expertContentArr) <= 0 {
|
|
|
|
+ doc.Find("pre").Each(func(i int, pre *goquery.Selection) {
|
|
|
|
+ pre.Find("span").Each(func(n int, span *goquery.Selection) {
|
|
|
|
+ contentTxt := span.Text()
|
|
|
|
+ if strings.Contains(contentTxt, "#专家评价") || strings.Contains(contentTxt, "专家评价") {
|
|
|
|
+ span.Find("span").Each(func(m int, subspan *goquery.Selection) {
|
|
|
|
+ subspanText := subspan.Text()
|
|
|
|
+ if strings.Contains(subspanText, "专家评价") {
|
|
|
|
+ expertContent := subspan.Next().Text()
|
|
|
|
+ if expertContent != "" {
|
|
|
|
+ rightIndex := strings.Index(expertContent, ")")
|
|
|
|
+ if rightIndex == 0 {
|
|
|
|
+ rightIndex = strings.Index(expertContent, ")")
|
|
|
|
+ }
|
|
|
|
+ if rightIndex > 0 {
|
|
|
|
+ expertNum := expertContent[:rightIndex]
|
|
|
|
+ expertNum = strings.Replace(expertNum, "(", "", -1)
|
|
|
|
+ expertNum = strings.Replace(expertNum, "(", "", -1)
|
|
|
|
+ expertNum = strings.Replace(expertNum, "专家评价", "", -1)
|
|
|
|
+ if expertNum != "" {
|
|
|
|
+ expertNumArr = append(expertNumArr, expertNum)
|
|
|
|
+ rightIndex = rightIndex
|
|
|
|
+ expertContentStr := expertContent[rightIndex:]
|
|
|
|
+ expertContentStr = strings.Replace(expertContentStr, ")", "", -1)
|
|
|
|
+ expertContentStr = strings.TrimLeft(expertContentStr, ":")
|
|
|
|
+ expertContentStr = strings.TrimRight(expertContentStr, "(推荐")
|
|
|
|
+ expertContentArr = append(expertContentArr, expertContentStr)
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ })
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ span.Find("span").Each(func(k int, sspan *goquery.Selection) {
|
|
|
|
+ sspanText := sspan.Text()
|
|
|
|
+ if strings.Contains(sspanText, "访谈时间") {
|
|
|
|
+ sspanText = strings.Replace(sspanText, "#访谈时间:", "", -1)
|
|
|
|
+ sspanText = strings.Replace(sspanText, "访谈时间:", "", -1)
|
|
|
|
+ sspanText = strings.Replace(sspanText, "\n", "", -1)
|
|
|
|
+ sspanText = strings.Replace(sspanText, " ", "", -1)
|
|
|
|
+ sspanText = strings.Trim(sspanText, " ")
|
|
|
|
+ sspanText = sspanText[:10]
|
|
|
|
+ interviewDate := sspanText
|
|
|
|
+ if interviewDate != "" {
|
|
|
|
+ interviewDateArr = append(interviewDateArr, interviewDate)
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ })
|
|
|
|
+ })
|
|
|
|
+ })
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if len(expertContentArr) <= 0 {
|
|
|
|
+ doc.Find("span").Each(func(i int, span *goquery.Selection) {
|
|
|
|
+ span.Find("strong").Each(func(n int, strong *goquery.Selection) {
|
|
|
|
+ spanText := span.Text()
|
|
|
|
+ strongText := strong.Text()
|
|
|
|
+ if strings.Contains(strongText, "#专家评价") || strings.Contains(strongText, "专家评价") {
|
|
|
|
+ expertContent := strong.Parents().Text()
|
|
|
|
+ if expertContent != "" {
|
|
|
|
+ rightIndex := strings.Index(expertContent, ")")
|
|
|
|
+ if rightIndex == 0 {
|
|
|
|
+ rightIndex = strings.Index(expertContent, ")")
|
|
|
|
+ }
|
|
|
|
+ if rightIndex > 0 {
|
|
|
|
+ expertNum := expertContent[:rightIndex]
|
|
|
|
+ expertNum = strings.Replace(expertNum, "(", "", -1)
|
|
|
|
+ expertNum = strings.Replace(expertNum, "(", "", -1)
|
|
|
|
+ expertNum = strings.Replace(expertNum, "专家评价", "", -1)
|
|
|
|
+ expertNum = strings.Replace(expertNum, "#", "", -1)
|
|
|
|
+ expertNum = strings.Replace(expertNum, ":", "", -1)
|
|
|
|
+ expertNum = strings.Replace(expertNum, "\n", "", -1)
|
|
|
|
+ if expertNum != "" {
|
|
|
|
+ expertNumArr = append(expertNumArr, expertNum)
|
|
|
|
+ rightIndex = rightIndex
|
|
|
|
+ expertContentStr := expertContent[rightIndex:]
|
|
|
|
+ expertContentStr = strings.Replace(expertContentStr, ")", "", -1)
|
|
|
|
+ expertContentStr = strings.TrimLeft(expertContentStr, ":")
|
|
|
|
+ expertContentStr = strings.TrimRight(expertContentStr, "(推荐")
|
|
|
|
+ expertContentArr = append(expertContentArr, expertContentStr)
|
|
|
|
+ return
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if strings.Contains(spanText, "访谈时间") {
|
|
|
|
+ spanText = strings.Replace(spanText, "#访谈时间:", "", -1)
|
|
|
|
+ spanText = strings.Replace(spanText, "访谈时间:", "", -1)
|
|
|
|
+ spanText = strings.Replace(spanText, "\n", "", -1)
|
|
|
|
+ spanText = strings.Replace(spanText, " ", "", -1)
|
|
|
|
+ spanText = strings.Trim(spanText, " ")
|
|
|
|
+ spanText = spanText[:10]
|
|
|
|
+ interviewDate := spanText
|
|
|
|
+ if interviewDate != "" {
|
|
|
|
+ interviewDateArr = append(interviewDateArr, interviewDate)
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ })
|
|
|
|
+ })
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+
|
|
var expertNumStr, expertContentStr, interviewDateStr string
|
|
var expertNumStr, expertContentStr, interviewDateStr string
|
|
if len(expertNumArr) > 0 {
|
|
if len(expertNumArr) > 0 {
|
|
expertNumStr = expertNumArr[0]
|
|
expertNumStr = expertNumArr[0]
|
|
@@ -157,8 +267,8 @@ func FixArticleContent(articleId int) {
|
|
if len(interviewDateArr) > 0 {
|
|
if len(interviewDateArr) > 0 {
|
|
interviewDateStr = interviewDateArr[0]
|
|
interviewDateStr = interviewDateArr[0]
|
|
}
|
|
}
|
|
- fmt.Println(articleId, expertNumStr, expertContentStr, interviewDateStr)
|
|
|
|
- err = models.ModifyArticleExpert(articleId, expertNumStr, expertContentStr, interviewDateStr)
|
|
|
|
|
|
+ expertNumStr = strings.Replace(expertNumStr, "#:", "", -1)
|
|
|
|
+ err = models.ModifyArticleExpert(articleId, expertNumStr, expertContentStr, interviewDateStr, bodyText)
|
|
if err != nil {
|
|
if err != nil {
|
|
fmt.Println("ModifyArticleExpert Err:" + err.Error())
|
|
fmt.Println("ModifyArticleExpert Err:" + err.Error())
|
|
return
|
|
return
|