Browse Source

Merge branch 'feature/deepseek_rag_1.0' into debug

kobe6258 1 month ago
parent
commit
f9c1781b3b
4 changed files with 102 additions and 12 deletions
  1. 3 3
      controllers/llm/wechat_platform.go
  2. 96 6
      services/wechat_platform.go
  3. 3 2
      utils/common.go
  4. 0 1
      utils/ws/session.go

+ 3 - 3
controllers/llm/wechat_platform.go

@@ -156,8 +156,6 @@ func (c *WechatPlatformController) Add() {
 	}
 
 	// 异步新增公众号
-	//go llm.AddWechatPlatform(item)
-
 	go cache.AddWechatArticleOpToCache(item.WechatPlatformId, `add`)
 
 	br.Ret = 200
@@ -652,7 +650,7 @@ func (c *WechatPlatformController) ArticleDel() {
 //	//fmt.Println(llm.BeachAddWechatPlatform(item))
 //
 //	obj := rag.WechatArticle{}
-//	list, _ := obj.GetListByCondition(`wechat_article_id,cover_url`, ` `, []interface{}{}, 0, 1000)
+//	list, _ := obj.GetListByCondition(`wechat_article_id,content`, ` AND wechat_article_id!=314 `, []interface{}{}, 0, 1000)
 //	//obj := rag.WechatPlatform{}
 //	//list, _ := obj.GetListByCondition(` AND wechat_platform_id !=1 `, []interface{}{}, 0, 100)
 //	//llm.ArticleToTmpFile(item.TextContent)
@@ -660,4 +658,6 @@ func (c *WechatPlatformController) ArticleDel() {
 //		//llm.ArticleToKnowledge(item)
 //		services.ReplaceWechatArticlePic(item)
 //	}
+//
+//	fmt.Println("修复结束")
 //}

+ 96 - 6
services/wechat_platform.go

@@ -1,6 +1,7 @@
 package services
 
 import (
+	"bytes"
 	"eta/eta_api/cache"
 	"eta/eta_api/models"
 	"eta/eta_api/models/rag"
@@ -8,6 +9,7 @@ import (
 	"eta/eta_api/utils"
 	"eta/eta_api/utils/llm/eta_llm/eta_llm_http"
 	"fmt"
+	html2 "golang.org/x/net/html"
 	"html"
 	"os"
 	"path"
@@ -132,6 +134,13 @@ func AddWechatArticle(item *rag.WechatPlatform, articleLink string, articleDetai
 		publishAt = time.Unix(int64(articleMenu.UpdateTime), 1000)
 	}
 
+	content := articleDetail.HtmlContent
+	// 图片下载下来到本地,如果成功了,那么就用新的
+	tmpContent, err := ReplaceHtmlImg(content)
+	if tmpContent != `` {
+		content = tmpContent
+	}
+
 	obj = &rag.WechatArticle{
 		WechatArticleId:  0,
 		WechatPlatformId: item.WechatPlatformId,
@@ -140,7 +149,7 @@ func AddWechatArticle(item *rag.WechatPlatform, articleLink string, articleDetai
 		Link:             articleLink,
 		CoverUrl:         articleDetail.CoverUrl,
 		Description:      articleDetail.Desc,
-		Content:          html.EscapeString(articleDetail.HtmlContent),
+		Content:          html.EscapeString(content),
 		TextContent:      articleDetail.TextContent,
 		Country:          articleDetail.CountryName,
 		Province:         articleDetail.ProvinceName,
@@ -163,7 +172,7 @@ func AddWechatArticle(item *rag.WechatPlatform, articleLink string, articleDetai
 	err = obj.Create()
 
 	// 修改文章封面图
-	go replaceWechatArticlePic(obj)
+	go replaceWechatArticleCoverPic(obj)
 
 	// 文章入库成功后,需要将相关信息入摘要库
 	go cache.AddWechatArticleLlmOpToCache(item.WechatPlatformId, ``)
@@ -525,12 +534,12 @@ func replaceWechatPlatformPic(item *rag.WechatPlatform) {
 
 }
 
-// replaceWechatArticlePic
+// replaceWechatArticleCoverPic
 // @Description: 替换文章封面图
 // @author: Roc
 // @datetime 2025-03-11 09:38:35
 // @param item *rag.WechatArticle
-func replaceWechatArticlePic(item *rag.WechatArticle) {
+func replaceWechatArticleCoverPic(item *rag.WechatArticle) {
 	var err error
 	defer func() {
 		if err != nil {
@@ -550,6 +559,33 @@ func replaceWechatArticlePic(item *rag.WechatArticle) {
 
 }
 
+// replaceWechatArticlePic
+// @Description: 替换文章内容图
+// @author: Roc
+// @datetime 2025-03-11 09:38:35
+// @param item *rag.WechatArticle
+func ReplaceWechatArticlePic(item *rag.WechatArticle) {
+	var err error
+	defer func() {
+		if err != nil {
+			utils.FileLog.Error("替换公众号头像失败,err:%v", err)
+			fmt.Println("替换公众号头像失败,err:", err)
+		}
+	}()
+	if item.Content == `` {
+		return
+	}
+
+	content, err := ReplaceHtmlImg(html.UnescapeString(item.Content))
+	if err != nil {
+		return
+	}
+	item.Content = html.EscapeString(content)
+	err = item.Update([]string{"content"})
+
+	return
+}
+
 // downloadWxPicAndUploadToOss
 // @Description: 下载微信图片并上传到OSS
 // @author: Roc
@@ -571,8 +607,7 @@ func downloadWxPicAndUploadToOss(wxPicUrl, source string) (resourceUrl string, e
 		return
 	}
 	ext := path.Ext(localFilePath)
-	randStr := utils.GetRandStringNoSpecialChar(28)
-	fileName := randStr + ext
+	fileName := fmt.Sprintf(`%s%s%s`, time.Now().Format(utils.FormatShortDateTimeUnSpace), utils.GetRandStringNoSpecialChar(16), ext)
 	//savePath := utils.UploadDir + `wx/wx_article/` + time.Now().Format("200601/20060102/") + fileName
 	savePath := fmt.Sprintf(`%swx/%s/%s%s`, utils.UploadDir, source, time.Now().Format("200601/20060102/"), fileName)
 	resourceUrl, err = ossClient.UploadFile(fileName, localFilePath, savePath)
@@ -584,3 +619,58 @@ func downloadWxPicAndUploadToOss(wxPicUrl, source string) (resourceUrl string, e
 	return
 
 }
+
+// ReplaceHtmlImg
+// @Description: 将html中的图片替换成自己的
+// @author: Roc
+// @datetime 2025-03-11 14:32:00
+// @param htmlStr string
+// @return newHtml string
+// @return err error
+func ReplaceHtmlImg(htmlStr string) (newHtml string, err error) {
+	doc, err := html2.Parse(strings.NewReader(htmlStr))
+	if err != nil {
+		return
+	}
+	if err != nil {
+		return
+	}
+	handleNode(doc)
+
+	// 将处理后的HTML节点重新渲染为HTML字符串
+	var buf bytes.Buffer
+	if err = html2.Render(&buf, doc); err != nil {
+		fmt.Println(err)
+		return
+	}
+	newHtml = buf.String()
+
+	return
+}
+
+// handleNode
+// @Description: html节点处理
+// @author: Roc
+// @datetime 2025-03-11 14:32:45
+// @param n *html2.Node
+func handleNode(n *html2.Node) {
+	if n.Type == html2.ElementNode {
+		if n.Data == "img" {
+			for k, attr := range n.Attr {
+				// 新增代码:如果标签是img且存在data-src属性,则将data-src的值赋给src
+				if n.Data == "img" && attr.Key == "src" {
+					resourceUrl, tmpErr := downloadWxPicAndUploadToOss(attr.Val, `article`)
+					if tmpErr != nil {
+						continue
+					}
+					attr.Val = resourceUrl
+				}
+				n.Attr[k] = attr
+			}
+		}
+
+	}
+	for c := n.FirstChild; c != nil; c = c.NextSibling {
+		handleNode(c)
+	}
+}

+ 3 - 2
utils/common.go

@@ -360,8 +360,9 @@ func DownloadWxImage(imgUrl string) (filePath string, err error) {
 	// 获得文件的writer对象
 	writer := bufio.NewWriter(file)
 
-	written, _ := io.Copy(writer, reader)
-	fmt.Printf("Total length: %d \n", written)
+	_, err = io.Copy(writer, reader)
+	//fmt.Printf("Total length: %d \n", written)
+
 	return
 }
 

+ 0 - 1
utils/ws/session.go

@@ -52,7 +52,6 @@ func (s *Session) readPump() {
 		if err = manager.HandleMessage(s.UserId, s.Id, message); err != nil {
 			//写应答
 			_ = s.writeWithTimeout(err.Error())
-
 		}
 	}
 }