瀏覽代碼

Merge remote-tracking branch 'origin/rag/3.1' into debug

# Conflicts:
#	services/task.go
#	utils/constants.go
Roc 1 周之前
父節點
當前提交
385b060562

+ 31 - 0
cache/wechat_platform.go → cache/llm.go

@@ -51,3 +51,34 @@ func AddWechatArticleLlmOpToCache(wechatPlatformId int, source string) bool {
 	}
 	return false
 }
+
+type RagEtaReportOpOp struct {
+	Source          string
+	ReportId        int
+	ReportChapterId int
+}
+
+// RagEtaReportOpToCache
+// @Description: 将eta报告入知识库操作加入缓存
+// @author: Roc
+// @datetime 2025-04-07 15:05:22
+// @param reportId int
+// @param reportChapterId int
+// @param source string
+// @return bool
+func RagEtaReportOpToCache(reportId, reportChapterId int, source string) bool {
+	record := new(RagEtaReportOpOp)
+	record.Source = source
+	record.ReportId = reportId
+	record.ReportChapterId = reportChapterId
+	if utils.Re == nil {
+		err := utils.Rc.LPush(utils.CACHE_ETA_REPORT_KNOWLEDGE, record)
+
+		utils.FileLog.Info(fmt.Sprintf("将eta报告入知识库操作加入缓存 加入缓存 RagEtaReportOpToCache LPush: 操作类型:%s,报告id:%d,章节id:%d", source, reportId, reportChapterId))
+		if err != nil {
+			fmt.Println("RagEtaReportOpToCache LPush Err:" + err.Error())
+		}
+		return true
+	}
+	return false
+}

+ 15 - 0
controllers/llm/report.go

@@ -58,6 +58,9 @@ func (c *RagEtaReportController) ArticleList() {
 	var condition string
 	var pars []interface{}
 
+	condition += fmt.Sprintf(` AND %s = ? AND %s = ? `, rag.RagEtaReportColumns.IsDeleted, rag.RagEtaReportColumns.IsPublished)
+	pars = append(pars, 0, 1)
+
 	if keyWord != "" {
 		condition += fmt.Sprintf(` AND %s like ? `, rag.RagEtaReportColumns.Title)
 		pars = append(pars, `%`+keyWord+`%`)
@@ -226,3 +229,15 @@ func (c *RagEtaReportController) ArticleDel() {
 	br.Success = true
 	br.Msg = "删除成功"
 }
+
+//// 修复历史ETA报告到知识库
+//func init() {
+//	idList, err := models.GetAllPublishReportId()
+//	if err != nil {
+//		fmt.Println("查询失败:", err.Error())
+//		return
+//	}
+//	for _, v := range idList {
+//		cache.RagEtaReportOpToCache(v, 0, "publish")
+//	}
+//}

+ 7 - 0
controllers/report_v2.go

@@ -2,6 +2,7 @@ package controllers
 
 import (
 	"encoding/json"
+	"eta/eta_api/cache"
 	"eta/eta_api/models"
 	"eta/eta_api/models/report"
 	"eta/eta_api/models/report_approve"
@@ -1506,6 +1507,9 @@ func (this *ReportController) PublishCancelReport() {
 		_ = services.ResetMiniProgramReportDetailCover(reportInfo.Id)
 	}()
 
+	// 报告取消发布成功后,需要将相关信息入知识库
+	go cache.RagEtaReportOpToCache(reportInfo.Id, 0, `un_publish`)
+
 	br.Ret = 200
 	br.Success = true
 }
@@ -1839,6 +1843,9 @@ func (this *ReportController) CancelApprove() {
 		_ = services.ResetMiniProgramReportDetailCover(reportItem.Id)
 	}()
 
+	// 报告发布成功后,需要将相关信息入知识库
+	go cache.RagEtaReportOpToCache(reportItem.Id, 0, `un_publish`)
+
 	br.Ret = 200
 	br.Success = true
 	br.Msg = "操作成功"

+ 9 - 0
models/rag/rag_eta_report.go

@@ -17,6 +17,7 @@ type RagEtaReport struct {
 	Author          string    `gorm:"column:author" description:"作者"`
 	TextContent     string    `gorm:"column:text_content" description:"报告内容(去除html)"`
 	VectorKey       string    `gorm:"column:vector_key" description:"向量库的key"`
+	IsPublished     int       `gorm:"column:is_published;type:tinyint(4);comment:是否已发布,0:未发布,1:已发布;default:1;" description:"是否已发布,0:未发布,1:已发布"`
 	IsDeleted       int       `gorm:"column:is_deleted;type:tinyint(4);comment:是否删除,0:未删除,1:已删除;default:0;" description:"否删除,0:未删除,1:已删除"`
 	PublishTime     time.Time `gorm:"column:publish_time" description:"发布时间"`
 	ModifyTime      time.Time `gorm:"column:modify_time" description:"修改时间"`
@@ -37,6 +38,7 @@ var RagEtaReportColumns = struct {
 	Author          string
 	TextContent     string
 	VectorKey       string
+	IsPublished     string
 	IsDeleted       string
 	PublishTime     string
 	ModifyTime      string
@@ -49,6 +51,7 @@ var RagEtaReportColumns = struct {
 	Author:          "author",
 	TextContent:     "text_content",
 	VectorKey:       "vector_key",
+	IsPublished:     "is_published",
 	IsDeleted:       "is_deleted",
 	PublishTime:     "publish_time",
 	ModifyTime:      "modify_time",
@@ -123,6 +126,12 @@ func (m *RagEtaReport) GetById(id int) (item *RagEtaReport, err error) {
 	return
 }
 
+func (m *RagEtaReport) GetByReportAndChapterId(reportId, reportChapterId int) (item *RagEtaReport, err error) {
+	err = global.DbMap[utils.DbNameAI].Where(fmt.Sprintf("%s = ? AND %s = ? ", RagEtaReportColumns.ReportID, RagEtaReportColumns.ReportChapterID), reportId, reportChapterId).First(&item).Error
+
+	return
+}
+
 func (m *RagEtaReport) GetListByCondition(field, condition string, pars []interface{}, startSize, pageSize int) (items []*RagEtaReport, err error) {
 	if field == "" {
 		field = "*"

+ 7 - 0
models/report.go

@@ -1697,3 +1697,10 @@ type ReportShartUrlReq struct {
 type ReportShartUrlResp struct {
 	UrlToken string `description:"分享链接token"`
 }
+
+func GetAllPublishReportId() (items []int, err error) {
+	o := global.DbMap[utils.DbNameReport]
+	sql := `SELECT  a.id FROM report as a WHERE 1=1 AND state in (2,6) limit 20`
+	err = o.Raw(sql).Find(&items).Error
+	return
+}

+ 270 - 0
services/llm_report.go

@@ -0,0 +1,270 @@
+package services
+
+import (
+	"eta/eta_api/models"
+	"eta/eta_api/models/rag"
+	"eta/eta_api/utils"
+	"fmt"
+	"golang.org/x/net/html"
+	"golang.org/x/net/html/atom"
+	"regexp"
+	"strings"
+	"time"
+)
+
+// ReportAddOrModifyKnowledge
+// @Description: ETA报告加入/修改到知识库
+// @author: Roc
+// @datetime 2025-04-07 14:41:45
+// @param reportId int
+// @param reportChapterId int
+func ReportAddOrModifyKnowledge(reportId, reportChapterId int) {
+	if reportId <= 0 {
+		return
+	}
+	var err error
+	defer func() {
+		if err != nil {
+			//fmt.Println("ReportAddOrModifyKnowledge error:", err)
+			utils.FileLog.Error("ReportAddOrModifyKnowledge error:", err)
+		}
+	}()
+
+	var title, author, htmlContent string
+	var publishTime time.Time
+
+	if reportChapterId > 0 {
+		chapterInfo, tmpErr := models.GetReportChapterInfoById(reportChapterId)
+		if tmpErr != nil {
+			return
+		}
+		title = chapterInfo.Title
+		author = chapterInfo.Author
+		publishTime = chapterInfo.PublishTime
+		htmlContent = chapterInfo.Content
+	} else {
+		reportInfo, tmpErr := models.GetReportByReportId(reportId)
+		if tmpErr != nil {
+			return
+		}
+		title = reportInfo.Title
+		author = reportInfo.Author
+		publishTime = reportInfo.PublishTime
+		htmlContent = reportInfo.Content
+	}
+
+	err = handleReportAddOrModifyKnowledge(reportId, reportChapterId, title, author, htmlContent, publishTime)
+
+	return
+}
+
+// ReportAddOrModifyKnowledgeByReportId
+// @Description: ETA报告加入/修改到知识库(只传id的情况)
+// @author: Roc
+// @datetime 2025-04-07 15:41:15
+// @param reportId int
+func ReportAddOrModifyKnowledgeByReportId(reportId int) {
+	if reportId <= 0 {
+		return
+	}
+	errList := make([]string, 0)
+	defer func() {
+		if len(errList) > 0 {
+			utils.FileLog.Error("ReportAddOrModifyKnowledge error,报告ID:%d:%s", reportId, strings.Join(errList, "\n"))
+		}
+	}()
+
+	reportInfo, err := models.GetReportByReportId(reportId)
+	if err != nil {
+		errList = append(errList, err.Error())
+		return
+	}
+
+	// 如果是单篇报告,那么直接处理
+	if reportInfo.HasChapter == 0 {
+		err = handleReportAddOrModifyKnowledge(reportId, 0, reportInfo.Title, reportInfo.Author, reportInfo.Content, reportInfo.PublishTime)
+		if err != nil {
+			errList = append(errList, err.Error())
+		}
+		return
+	}
+
+	// 章节类型的报告,需要查询出来后再处理
+	chapterInfoList, err := models.GetPublishedChapterListByReportId(reportId)
+	if err != nil {
+		errList = append(errList, err.Error())
+		return
+	}
+	for _, v := range chapterInfoList {
+		err = handleReportAddOrModifyKnowledge(reportId, v.ReportChapterId, v.Title, v.Author, v.Content, reportInfo.PublishTime)
+		if err != nil {
+			errList = append(errList, fmt.Sprintf("第%d章:%s,异常:\n%s", v.ReportChapterId, v.Title, err.Error()))
+			continue
+		}
+	}
+
+	return
+}
+
+// handleReportAddOrModifyKnowledge
+// @Description: 处理ETA报告加入/修改到知识库
+// @author: Roc
+// @datetime 2025-04-07 15:33:38
+// @param reportId int
+// @param reportChapterId int
+// @param title string
+// @param author string
+// @param htmlContent string
+// @param publishTime time.Time
+// @return err error
+func handleReportAddOrModifyKnowledge(reportId, reportChapterId int, title, author, htmlContent string, publishTime time.Time) (err error) {
+	htmlContent = html.UnescapeString(htmlContent)
+	doc, err := html.Parse(strings.NewReader(htmlContent))
+	if err != nil {
+		return
+	}
+	// 只获取文本内容
+	content := &strings.Builder{}
+	getArticleContent(content, doc)
+
+	textContent := content.String()
+	textContent = regexp.MustCompile(`\n+`).ReplaceAllString(textContent, "\n")
+	textContent = strings.Trim(textContent, "\n")
+
+	publishTimeStr := `未知`
+	if !publishTime.IsZero() {
+		title = fmt.Sprintf("%s(%s)", title, publishTime.Format(utils.FormatMonthDayUnSpace))
+		publishTimeStr = publishTime.Format(utils.FormatDateTime)
+	}
+
+	textContent = fmt.Sprintf("标题:%s\n发布时间:%s\n%s", title, publishTimeStr, textContent)
+
+	obj := rag.RagEtaReport{}
+	item, err := obj.GetByReportAndChapterId(reportId, reportChapterId)
+	if err != nil && !utils.IsErrNoRow(err) {
+		// 查询异常,且不是没找到数据的报错
+		return
+	}
+
+	if err == nil {
+		// 标记删除了的话,那就不处理了
+		if item.IsDeleted == 1 {
+			return
+		}
+		item.Title = title
+		item.Author = author
+		item.TextContent = textContent
+		item.PublishTime = publishTime
+		item.ModifyTime = time.Now()
+		err = item.Update([]string{"author", "text_content", "publish_time", "modify_time"})
+	} else {
+		// 无数据的时候,需要新增
+		err = nil
+		item = &rag.RagEtaReport{
+			RagEtaReportId:  0,
+			ReportId:        reportId,
+			ReportChapterId: reportChapterId,
+			Title:           title,
+			Author:          author,
+			TextContent:     textContent,
+			VectorKey:       "",
+			IsPublished:     1,
+			IsDeleted:       0,
+			PublishTime:     publishTime,
+			ModifyTime:      time.Now(),
+			CreateTime:      time.Now(),
+		}
+		err = item.Create()
+	}
+
+	return
+}
+
+// ReportUnPublishedKnowledge
+// @Description: 知识库取消发布
+// @author: Roc
+// @datetime 2025-04-07 14:58:25
+// @param reportId int
+// @param reportChapterId int
+func ReportUnPublishedKnowledge(reportId, reportChapterId int) {
+	if reportId <= 0 && reportChapterId <= 0 {
+		return
+	}
+	var err error
+	defer func() {
+		if err != nil {
+			//fmt.Println("ReportAddOrModifyKnowledge error:", err)
+			utils.FileLog.Error("ReportAddOrModifyKnowledge error:", err)
+		}
+	}()
+
+	obj := rag.RagEtaReport{}
+	item, err := obj.GetByReportAndChapterId(reportId, reportChapterId)
+	if err != nil && !utils.IsErrNoRow(err) {
+		// 查询异常,且不是没找到数据的报错
+		return
+	}
+
+	if item.RagEtaReportId > 0 {
+		item.IsPublished = 0
+		item.ModifyTime = time.Now()
+		err = item.Update([]string{"is_published", "modify_time"})
+	}
+
+	return
+}
+
+// ReportUnPublishedKnowledgeByReportId
+// @Description: ETA报告取消发布同步到知识库(只传报告id的情况)
+// @author: Roc
+// @datetime 2025-04-07 15:41:15
+// @param reportId int
+func ReportUnPublishedKnowledgeByReportId(reportId int) {
+	errList := make([]string, 0)
+	defer func() {
+		if len(errList) > 0 {
+			utils.FileLog.Error("ReportUnPublishedKnowledgeByReportId error,报告ID:%d:%s", reportId, strings.Join(errList, "\n"))
+		}
+	}()
+
+	obj := rag.RagEtaReport{}
+	list, err := obj.GetListByCondition(``, ` AND report_id = ? `, []interface{}{reportId}, 0, 1000)
+	if err != nil && !utils.IsErrNoRow(err) {
+		// 查询异常,且不是没找到数据的报错
+		return
+	}
+
+	for _, item := range list {
+		item.IsPublished = 0
+		item.ModifyTime = time.Now()
+		err = item.Update([]string{"is_published", "modify_time"})
+		if err != nil {
+			errList = append(errList, fmt.Sprintf("第%d章:%s,异常:\n%s", item.ReportChapterId, item.Title, err.Error()))
+			continue
+		}
+	}
+
+	return
+}
+
+func getArticleContent(content *strings.Builder, htmlContentNode *html.Node) {
+	if htmlContentNode.Type == html.TextNode {
+		cleanData := strings.TrimSpace(htmlContentNode.Data)
+		if cleanData != `` && cleanData != "</p>" {
+			content.WriteString(cleanData)
+		}
+	} else if htmlContentNode.Type == html.ElementNode {
+		switch htmlContentNode.DataAtom {
+		case atom.Ul:
+			content.WriteString("\n")
+		case atom.Br:
+			// 遇到 <br> 标签时添加换行符
+			content.WriteString("\n")
+		case atom.P:
+			content.WriteString("\n")
+		}
+	}
+	for c := htmlContentNode.FirstChild; c != nil; c = c.NextSibling {
+		getArticleContent(content, c)
+	}
+}

+ 4 - 0
services/report_approve.go

@@ -1,6 +1,7 @@
 package services
 
 import (
+	"eta/eta_api/cache"
 	"eta/eta_api/models"
 	"eta/eta_api/models/report_approve"
 	"eta/eta_api/models/smart_report"
@@ -867,6 +868,9 @@ func AfterReportApprovePass(reportType, reportId int) (err error) {
 		//_ = CreateVideo(reportInfo)
 		_ = UpdateReportEs(reportInfo.Id, models.ReportStatePublished)
 
+		// 报告发布成功后,需要将相关信息入知识库
+		go cache.RagEtaReportOpToCache(reportInfo.Id, 0, `publish`)
+
 		return
 	}
 

+ 4 - 0
services/report_v2.go

@@ -4,6 +4,7 @@ import (
 	"archive/zip"
 	"encoding/json"
 	"errors"
+	"eta/eta_api/cache"
 	"eta/eta_api/models"
 	"eta/eta_api/models/company"
 	"eta/eta_api/models/data_manage/excel"
@@ -1260,6 +1261,9 @@ func PublishReport(reportId int, reportUrl string, sysUser *system.Admin) (tips
 		go handleReportPermission(int64(reportInfo.Id), minClassifyId)
 	}
 
+	// 报告发布成功后,需要将相关信息入知识库
+	go cache.RagEtaReportOpToCache(reportInfo.Id, 0, `publish`)
+
 	return
 }
 

+ 29 - 0
services/task.go

@@ -74,6 +74,9 @@ func Task() {
 	// 定时任务进行微信文章LLM操作
 	go HandleWechatArticleLLmOp()
 
+	// 队列任务将eta报告同步到知识库操作
+	go HandleEtaReportKnowledgeLLmOp()
+
 	// 权益报告监听入库
 	go AutoInsertRaiReport()
 
@@ -648,3 +651,29 @@ func HandleWechatArticleLLmOp() {
 		})
 	}
 }
+
+// HandleEtaReportKnowledgeLLmOp
+// @Description: 处理eta报告加入知识库操作
+func HandleEtaReportKnowledgeLLmOp() {
+	defer func() {
+		if err := recover(); err != nil {
+			fmt.Println("[HandleEtaReportKnowledgeLLmOp]", err)
+		}
+	}()
+	for {
+		utils.Rc.Brpop(utils.CACHE_ETA_REPORT_KNOWLEDGE, func(b []byte) {
+			ragEtaReportOpOp := new(cache.RagEtaReportOpOp)
+			if err := json.Unmarshal(b, &ragEtaReportOpOp); err != nil {
+				fmt.Println("json unmarshal wrong!")
+				return
+			}
+			switch ragEtaReportOpOp.Source {
+			case `publish`:
+				ReportAddOrModifyKnowledgeByReportId(ragEtaReportOpOp.ReportId)
+			case `un_publish`:
+				ReportUnPublishedKnowledgeByReportId(ragEtaReportOpOp.ReportId)
+			}
+
+		})
+	}
+}

+ 1 - 0
utils/constants.go

@@ -271,6 +271,7 @@ const (
 	CACHE_DATA_SOURCE_ES_HANDLE             = "eta:data_source_es:handle"            // 数据源es处理队列
 	CACHE_WECHAT_PLATFORM_ARTICLE           = "wechat_platform:article:op"           //微信文章处理
 	CACHE_WECHAT_PLATFORM_ARTICLE_KNOWLEDGE = "wechat_platform:article:knowledge:op" //微信文章入知识库处理
+	CACHE_ETA_REPORT_KNOWLEDGE              = "eta:report:knowledge:op"              //eta报告入知识库处理
 	CACHE_CHART_AUTH                        = "eta:chart:auth:"                      //图表数据授权
 	CACHE_REPORT_SHARE_AUTH                 = "eta:report:auth:share:"               //报告短链与报告图表授权映射key
 	CACHE_REPORT_AUTH                       = "eta:report:auth:"                     //报告图表数据授权