|
@@ -1,6 +1,7 @@
|
|
|
package services
|
|
|
|
|
|
import (
|
|
|
+ "bytes"
|
|
|
"eta/eta_api/cache"
|
|
|
"eta/eta_api/models"
|
|
|
"eta/eta_api/models/rag"
|
|
@@ -8,6 +9,7 @@ import (
|
|
|
"eta/eta_api/utils"
|
|
|
"eta/eta_api/utils/llm/eta_llm/eta_llm_http"
|
|
|
"fmt"
|
|
|
+ html2 "golang.org/x/net/html"
|
|
|
"html"
|
|
|
"os"
|
|
|
"path"
|
|
@@ -132,6 +134,13 @@ func AddWechatArticle(item *rag.WechatPlatform, articleLink string, articleDetai
|
|
|
publishAt = time.Unix(int64(articleMenu.UpdateTime), 1000)
|
|
|
}
|
|
|
|
|
|
+ content := articleDetail.HtmlContent
|
|
|
+ // 图片下载下来到本地,如果成功了,那么就用新的
|
|
|
+ tmpContent, err := ReplaceHtmlImg(content)
|
|
|
+ if tmpContent != `` {
|
|
|
+ content = tmpContent
|
|
|
+ }
|
|
|
+
|
|
|
obj = &rag.WechatArticle{
|
|
|
WechatArticleId: 0,
|
|
|
WechatPlatformId: item.WechatPlatformId,
|
|
@@ -140,7 +149,7 @@ func AddWechatArticle(item *rag.WechatPlatform, articleLink string, articleDetai
|
|
|
Link: articleLink,
|
|
|
CoverUrl: articleDetail.CoverUrl,
|
|
|
Description: articleDetail.Desc,
|
|
|
- Content: html.EscapeString(articleDetail.HtmlContent),
|
|
|
+ Content: html.EscapeString(content),
|
|
|
TextContent: articleDetail.TextContent,
|
|
|
Country: articleDetail.CountryName,
|
|
|
Province: articleDetail.ProvinceName,
|
|
@@ -163,7 +172,7 @@ func AddWechatArticle(item *rag.WechatPlatform, articleLink string, articleDetai
|
|
|
err = obj.Create()
|
|
|
|
|
|
// 修改文章封面图
|
|
|
- go replaceWechatArticlePic(obj)
|
|
|
+ go replaceWechatArticleCoverPic(obj)
|
|
|
|
|
|
// 文章入库成功后,需要将相关信息入摘要库
|
|
|
go cache.AddWechatArticleLlmOpToCache(item.WechatPlatformId, ``)
|
|
@@ -525,12 +534,12 @@ func replaceWechatPlatformPic(item *rag.WechatPlatform) {
|
|
|
|
|
|
}
|
|
|
|
|
|
-// replaceWechatArticlePic
|
|
|
+// replaceWechatArticleCoverPic
|
|
|
// @Description: 替换文章封面图
|
|
|
// @author: Roc
|
|
|
// @datetime 2025-03-11 09:38:35
|
|
|
// @param item *rag.WechatArticle
|
|
|
-func replaceWechatArticlePic(item *rag.WechatArticle) {
|
|
|
+func replaceWechatArticleCoverPic(item *rag.WechatArticle) {
|
|
|
var err error
|
|
|
defer func() {
|
|
|
if err != nil {
|
|
@@ -550,6 +559,33 @@ func replaceWechatArticlePic(item *rag.WechatArticle) {
|
|
|
|
|
|
}
|
|
|
|
|
|
+// replaceWechatArticlePic
|
|
|
+// @Description: 替换文章内容图
|
|
|
+// @author: Roc
|
|
|
+// @datetime 2025-03-11 09:38:35
|
|
|
+// @param item *rag.WechatArticle
|
|
|
+func ReplaceWechatArticlePic(item *rag.WechatArticle) {
|
|
|
+ var err error
|
|
|
+ defer func() {
|
|
|
+ if err != nil {
|
|
|
+ utils.FileLog.Error("替换公众号头像失败,err:%v", err)
|
|
|
+ fmt.Println("替换公众号头像失败,err:", err)
|
|
|
+ }
|
|
|
+ }()
|
|
|
+ if item.Content == `` {
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+ content, err := ReplaceHtmlImg(html.UnescapeString(item.Content))
|
|
|
+ if err != nil {
|
|
|
+ return
|
|
|
+ }
|
|
|
+ item.Content = html.EscapeString(content)
|
|
|
+ err = item.Update([]string{"content"})
|
|
|
+
|
|
|
+ return
|
|
|
+}
|
|
|
+
|
|
|
// downloadWxPicAndUploadToOss
|
|
|
// @Description: 下载微信图片并上传到OSS
|
|
|
// @author: Roc
|
|
@@ -571,8 +607,7 @@ func downloadWxPicAndUploadToOss(wxPicUrl, source string) (resourceUrl string, e
|
|
|
return
|
|
|
}
|
|
|
ext := path.Ext(localFilePath)
|
|
|
- randStr := utils.GetRandStringNoSpecialChar(28)
|
|
|
- fileName := randStr + ext
|
|
|
+ fileName := fmt.Sprintf(`%s%s%s`, time.Now().Format(utils.FormatShortDateTimeUnSpace), utils.GetRandStringNoSpecialChar(16), ext)
|
|
|
//savePath := utils.UploadDir + `wx/wx_article/` + time.Now().Format("200601/20060102/") + fileName
|
|
|
savePath := fmt.Sprintf(`%swx/%s/%s%s`, utils.UploadDir, source, time.Now().Format("200601/20060102/"), fileName)
|
|
|
resourceUrl, err = ossClient.UploadFile(fileName, localFilePath, savePath)
|
|
@@ -584,3 +619,58 @@ func downloadWxPicAndUploadToOss(wxPicUrl, source string) (resourceUrl string, e
|
|
|
return
|
|
|
|
|
|
}
|
|
|
+
|
|
|
+// ReplaceHtmlImg
|
|
|
+// @Description: 将html中的图片替换成自己的
|
|
|
+// @author: Roc
|
|
|
+// @datetime 2025-03-11 14:32:00
|
|
|
+// @param htmlStr string
|
|
|
+// @return newHtml string
|
|
|
+// @return err error
|
|
|
+func ReplaceHtmlImg(htmlStr string) (newHtml string, err error) {
|
|
|
+ doc, err := html2.Parse(strings.NewReader(htmlStr))
|
|
|
+ if err != nil {
|
|
|
+ return
|
|
|
+ }
|
|
|
+ if err != nil {
|
|
|
+ return
|
|
|
+ }
|
|
|
+ handleNode(doc)
|
|
|
+
|
|
|
+ // 将处理后的HTML节点重新渲染为HTML字符串
|
|
|
+ var buf bytes.Buffer
|
|
|
+ if err = html2.Render(&buf, doc); err != nil {
|
|
|
+ fmt.Println(err)
|
|
|
+ return
|
|
|
+ }
|
|
|
+ newHtml = buf.String()
|
|
|
+
|
|
|
+ return
|
|
|
+}
|
|
|
+
|
|
|
+// handleNode
|
|
|
+// @Description: html节点处理
|
|
|
+// @author: Roc
|
|
|
+// @datetime 2025-03-11 14:32:45
|
|
|
+// @param n *html2.Node
|
|
|
+func handleNode(n *html2.Node) {
|
|
|
+ if n.Type == html2.ElementNode {
|
|
|
+ if n.Data == "img" {
|
|
|
+ for k, attr := range n.Attr {
|
|
|
+ // 新增代码:如果标签是img且存在data-src属性,则将data-src的值赋给src
|
|
|
+ if n.Data == "img" && attr.Key == "src" {
|
|
|
+ resourceUrl, tmpErr := downloadWxPicAndUploadToOss(attr.Val, `article`)
|
|
|
+ if tmpErr != nil {
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ attr.Val = resourceUrl
|
|
|
+ }
|
|
|
+ n.Attr[k] = attr
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ }
|
|
|
+ for c := n.FirstChild; c != nil; c = c.NextSibling {
|
|
|
+ handleNode(c)
|
|
|
+ }
|
|
|
+}
|