ziwen 1 рік тому
батько
коміт
8ec7872832
4 змінених файлів з 26 додано та 14 видалено
  1. 6 1
      controllers/yanxuan_special.go
  2. 3 1
      go.mod
  3. 0 11
      go.sum
  4. 17 1
      utils/common.go

+ 6 - 1
controllers/yanxuan_special.go

@@ -2,6 +2,7 @@ package controllers
 
 import (
 	"encoding/json"
+	"golang.org/x/net/html"
 	"hongze/hongze_cygx/models"
 	"hongze/hongze_cygx/services"
 	"hongze/hongze_cygx/utils"
@@ -75,7 +76,6 @@ func (this *YanxuanSpecialController) List() {
 		}
 		v.ImgUrl = strings.TrimRight(v.ImgUrl, ",")
 		//去除图片标签
-		v.Content = utils.ArticleRemoveImgUrl(v.Content)
 		hasImg, err := utils.ArticleHasImgUrl(v.Content)
 		if err != nil {
 			return
@@ -83,6 +83,11 @@ func (this *YanxuanSpecialController) List() {
 		if hasImg {
 			v.ContentHasImg = 1
 		}
+		v.Content = utils.ArticleRemoveImgUrl(v.Content)
+
+		doc, _ := html.Parse(strings.NewReader(v.Content))
+		v.Content = utils.ExtractText(doc)
+
 		if v.DocUrl != "" {
 			var docs []models.Doc
 			err := json.Unmarshal([]byte(v.DocUrl), &docs)

+ 3 - 1
go.mod

@@ -11,12 +11,14 @@ require (
 	github.com/elastic/go-elasticsearch/v7 v7.17.1
 	github.com/go-sql-driver/mysql v1.6.0
 	github.com/medivhzhan/weapp/v2 v2.5.0
-	github.com/medivhzhan/weapp/v3 v3.6.19
+	github.com/mitchellh/mapstructure v1.5.0 // indirect
 	github.com/olivere/elastic/v7 v7.0.32
 	github.com/pdfcpu/pdfcpu v0.3.13
 	github.com/rdlucklib/rdluck_tools v1.0.3
 	github.com/satori/go.uuid v1.2.0 // indirect
 	github.com/tealeg/xlsx v1.0.5
+	golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd
+	golang.org/x/sys v0.6.0 // indirect
 	golang.org/x/time v0.0.0-20220411224347-583f2d630306 // indirect
 	gopkg.in/gomail.v2 v2.0.0-20160411212932-81ebce5c23df
 )

+ 0 - 11
go.sum

@@ -99,8 +99,6 @@ github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1m
 github.com/envoyproxy/go-control-plane v0.9.9-0.20210217033140-668b12f5399d/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk=
 github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
 github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
-github.com/fatih/color v1.15.0 h1:kOqh6YHBtK8aywxGerMG2Eq3H6Qgoqeo13Bk2Mv/nBs=
-github.com/fatih/color v1.15.0/go.mod h1:0h5ZqXfHYED7Bhv2ZJamyIOUej9KtShiJESRwBDUSsw=
 github.com/fortytw2/leaktest v1.3.0 h1:u8491cBMTQ8ft8aeV+adlcytMZylmA5nnwwkRZjI8vw=
 github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g=
 github.com/franela/goblin v0.0.0-20200105215937-c9ffbefa60db/go.mod h1:7dvUGVsVBjqR7JHJk0brhHOZYGmfBYOrK0ZhYMEtBr4=
@@ -249,14 +247,8 @@ github.com/lyft/protoc-gen-validate v0.0.13/go.mod h1:XbGvPuh87YZc5TdIa2/I4pLk0Q
 github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
 github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
 github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU=
-github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
-github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
 github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4=
 github.com/mattn/go-isatty v0.0.4/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4=
-github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
-github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
-github.com/mattn/go-isatty v0.0.18 h1:DOKFKCQ7FNG2L1rbrmstDN4QVRdS89Nkh85u68Uwp98=
-github.com/mattn/go-isatty v0.0.18/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
 github.com/mattn/go-runewidth v0.0.2/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU=
 github.com/mattn/go-sqlite3 v1.14.7/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU=
 github.com/mattn/go-sqlite3 v2.0.3+incompatible h1:gXHsfypPkaMZrKbD5209QV9jbUTJKjyR5WD3HYQSd+U=
@@ -265,8 +257,6 @@ github.com/matttproud/golang_protobuf_extensions v1.0.1 h1:4hp9jkHxhMHkqkrB3Ix0j
 github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
 github.com/medivhzhan/weapp/v2 v2.5.0 h1:WfgSHdZ34xM47w4b/5nGTSzaqHo1uizdDDKJsCRB2bY=
 github.com/medivhzhan/weapp/v2 v2.5.0/go.mod h1:lcP2Go0ow4ElqGsEQwEeyTZiiEhr+lC5zMxGZGA7Bsc=
-github.com/medivhzhan/weapp/v3 v3.6.19 h1:SXY1H1AdX4RvRyuCTb5+sJxEMisyoxHLL87aIwuos/g=
-github.com/medivhzhan/weapp/v3 v3.6.19/go.mod h1:DBlnuMNGIcDYSDaM8JnBIT14I+OqMaMNm9QU8HvhhMU=
 github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg=
 github.com/mitchellh/cli v1.0.0/go.mod h1:hNIlj7HEI86fIcpObd7a0FcrxTWetlwJDGcceTlRvqc=
 github.com/mitchellh/go-homedir v1.0.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
@@ -537,7 +527,6 @@ golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBc
 golang.org/x/sys v0.0.0-20210603081109-ebe580a85c40/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=

+ 17 - 1
utils/common.go

@@ -8,6 +8,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"github.com/PuerkitoBio/goquery"
+	"golang.org/x/net/html"
 	"image"
 	"image/png"
 	"math"
@@ -916,4 +917,19 @@ func FindArticleImgUrls(body string) (imgUrls []string, err error) {
 		imgUrls = append(imgUrls, src)
 	})
 	return
-}
+}
+
+//提取纯文本
+func ExtractText(node *html.Node) string {
+	var texts []string
+
+	if node.Type == html.TextNode {
+		texts = append(texts, node.Data)
+	}
+
+	for child := node.FirstChild; child != nil; child = child.NextSibling {
+		texts = append(texts, ExtractText(child))
+	}
+
+	return strings.Join(texts, "")
+}