|
@@ -0,0 +1,270 @@
|
|
|
+package services
|
|
|
+
|
|
|
+import (
|
|
|
+ "eta/eta_api/models"
|
|
|
+ "eta/eta_api/models/rag"
|
|
|
+ "eta/eta_api/utils"
|
|
|
+ "fmt"
|
|
|
+ "golang.org/x/net/html"
|
|
|
+ "golang.org/x/net/html/atom"
|
|
|
+ "regexp"
|
|
|
+ "strings"
|
|
|
+ "time"
|
|
|
+)
|
|
|
+
|
|
|
+// ReportAddOrModifyKnowledge
|
|
|
+// @Description: ETA报告加入/修改到知识库
|
|
|
+// @author: Roc
|
|
|
+// @datetime 2025-04-07 14:41:45
|
|
|
+// @param reportId int
|
|
|
+// @param reportChapterId int
|
|
|
+func ReportAddOrModifyKnowledge(reportId, reportChapterId int) {
|
|
|
+ if reportId <= 0 {
|
|
|
+ return
|
|
|
+ }
|
|
|
+ var err error
|
|
|
+ defer func() {
|
|
|
+ if err != nil {
|
|
|
+ //fmt.Println("ReportAddOrModifyKnowledge error:", err)
|
|
|
+ utils.FileLog.Error("ReportAddOrModifyKnowledge error:", err)
|
|
|
+ }
|
|
|
+ }()
|
|
|
+
|
|
|
+ var title, author, htmlContent string
|
|
|
+ var publishTime time.Time
|
|
|
+
|
|
|
+ if reportChapterId > 0 {
|
|
|
+ chapterInfo, tmpErr := models.GetReportChapterInfoById(reportChapterId)
|
|
|
+ if tmpErr != nil {
|
|
|
+ return
|
|
|
+ }
|
|
|
+ title = chapterInfo.Title
|
|
|
+ author = chapterInfo.Author
|
|
|
+ publishTime = chapterInfo.PublishTime
|
|
|
+ htmlContent = chapterInfo.Content
|
|
|
+ } else {
|
|
|
+ reportInfo, tmpErr := models.GetReportByReportId(reportId)
|
|
|
+ if tmpErr != nil {
|
|
|
+ return
|
|
|
+ }
|
|
|
+ title = reportInfo.Title
|
|
|
+ author = reportInfo.Author
|
|
|
+ publishTime = reportInfo.PublishTime
|
|
|
+ htmlContent = reportInfo.Content
|
|
|
+ }
|
|
|
+
|
|
|
+ err = handleReportAddOrModifyKnowledge(reportId, reportChapterId, title, author, htmlContent, publishTime)
|
|
|
+
|
|
|
+ return
|
|
|
+}
|
|
|
+
|
|
|
+// ReportAddOrModifyKnowledgeByReportId
|
|
|
+// @Description: ETA报告加入/修改到知识库(只传id的情况)
|
|
|
+// @author: Roc
|
|
|
+// @datetime 2025-04-07 15:41:15
|
|
|
+// @param reportId int
|
|
|
+func ReportAddOrModifyKnowledgeByReportId(reportId int) {
|
|
|
+ if reportId <= 0 {
|
|
|
+ return
|
|
|
+ }
|
|
|
+ errList := make([]string, 0)
|
|
|
+ defer func() {
|
|
|
+ if len(errList) > 0 {
|
|
|
+ utils.FileLog.Error("ReportAddOrModifyKnowledge error,报告ID:%d:%s", reportId, strings.Join(errList, "\n"))
|
|
|
+ }
|
|
|
+ }()
|
|
|
+
|
|
|
+ reportInfo, err := models.GetReportByReportId(reportId)
|
|
|
+ if err != nil {
|
|
|
+ errList = append(errList, err.Error())
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+ // 如果是单篇报告,那么直接处理
|
|
|
+ if reportInfo.HasChapter == 0 {
|
|
|
+ err = handleReportAddOrModifyKnowledge(reportId, 0, reportInfo.Title, reportInfo.Author, reportInfo.Content, reportInfo.PublishTime)
|
|
|
+ if err != nil {
|
|
|
+ errList = append(errList, err.Error())
|
|
|
+ }
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+ // 章节类型的报告,需要查询出来后再处理
|
|
|
+ chapterInfoList, err := models.GetPublishedChapterListByReportId(reportId)
|
|
|
+ if err != nil {
|
|
|
+ errList = append(errList, err.Error())
|
|
|
+ return
|
|
|
+ }
|
|
|
+ for _, v := range chapterInfoList {
|
|
|
+ err = handleReportAddOrModifyKnowledge(reportId, v.ReportChapterId, v.Title, v.Author, v.Content, reportInfo.PublishTime)
|
|
|
+ if err != nil {
|
|
|
+ errList = append(errList, fmt.Sprintf("第%d章:%s,异常:\n%s", v.ReportChapterId, v.Title, err.Error()))
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return
|
|
|
+}
|
|
|
+
|
|
|
+// handleReportAddOrModifyKnowledge
|
|
|
+// @Description: 处理ETA报告加入/修改到知识库
|
|
|
+// @author: Roc
|
|
|
+// @datetime 2025-04-07 15:33:38
|
|
|
+// @param reportId int
|
|
|
+// @param reportChapterId int
|
|
|
+// @param title string
|
|
|
+// @param author string
|
|
|
+// @param htmlContent string
|
|
|
+// @param publishTime time.Time
|
|
|
+// @return err error
|
|
|
+func handleReportAddOrModifyKnowledge(reportId, reportChapterId int, title, author, htmlContent string, publishTime time.Time) (err error) {
|
|
|
+ htmlContent = html.UnescapeString(htmlContent)
|
|
|
+ doc, err := html.Parse(strings.NewReader(htmlContent))
|
|
|
+ if err != nil {
|
|
|
+ return
|
|
|
+ }
|
|
|
+ // 只获取文本内容
|
|
|
+ content := &strings.Builder{}
|
|
|
+ getArticleContent(content, doc)
|
|
|
+
|
|
|
+ textContent := content.String()
|
|
|
+ textContent = regexp.MustCompile(`\n+`).ReplaceAllString(textContent, "\n")
|
|
|
+ textContent = strings.Trim(textContent, "\n")
|
|
|
+
|
|
|
+ publishTimeStr := `未知`
|
|
|
+ if !publishTime.IsZero() {
|
|
|
+ title = fmt.Sprintf("%s(%s)", title, publishTime.Format(utils.FormatMonthDayUnSpace))
|
|
|
+ publishTimeStr = publishTime.Format(utils.FormatDateTime)
|
|
|
+ }
|
|
|
+
|
|
|
+ textContent = fmt.Sprintf("标题:%s\n发布时间:%s\n%s", title, publishTimeStr, textContent)
|
|
|
+
|
|
|
+ obj := rag.RagEtaReport{}
|
|
|
+ item, err := obj.GetByReportAndChapterId(reportId, reportChapterId)
|
|
|
+ if err != nil && !utils.IsErrNoRow(err) {
|
|
|
+ // 查询异常,且不是没找到数据的报错
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+ if err == nil {
|
|
|
+ // 标记删除了的话,那就不处理了
|
|
|
+ if item.IsDeleted == 1 {
|
|
|
+ return
|
|
|
+ }
|
|
|
+ item.Title = title
|
|
|
+ item.Author = author
|
|
|
+ item.TextContent = textContent
|
|
|
+ item.PublishTime = publishTime
|
|
|
+ item.ModifyTime = time.Now()
|
|
|
+ err = item.Update([]string{"author", "text_content", "publish_time", "modify_time"})
|
|
|
+ } else {
|
|
|
+ // 无数据的时候,需要新增
|
|
|
+ err = nil
|
|
|
+ item = &rag.RagEtaReport{
|
|
|
+ RagEtaReportId: 0,
|
|
|
+ ReportId: reportId,
|
|
|
+ ReportChapterId: reportChapterId,
|
|
|
+ Title: title,
|
|
|
+ Author: author,
|
|
|
+ TextContent: textContent,
|
|
|
+ VectorKey: "",
|
|
|
+ IsPublished: 1,
|
|
|
+ IsDeleted: 0,
|
|
|
+ PublishTime: publishTime,
|
|
|
+ ModifyTime: time.Now(),
|
|
|
+ CreateTime: time.Now(),
|
|
|
+ }
|
|
|
+ err = item.Create()
|
|
|
+ }
|
|
|
+
|
|
|
+ return
|
|
|
+}
|
|
|
+
|
|
|
+// ReportUnPublishedKnowledge
|
|
|
+// @Description: 知识库取消发布
|
|
|
+// @author: Roc
|
|
|
+// @datetime 2025-04-07 14:58:25
|
|
|
+// @param reportId int
|
|
|
+// @param reportChapterId int
|
|
|
+func ReportUnPublishedKnowledge(reportId, reportChapterId int) {
|
|
|
+ if reportId <= 0 && reportChapterId <= 0 {
|
|
|
+ return
|
|
|
+ }
|
|
|
+ var err error
|
|
|
+ defer func() {
|
|
|
+ if err != nil {
|
|
|
+ //fmt.Println("ReportAddOrModifyKnowledge error:", err)
|
|
|
+ utils.FileLog.Error("ReportAddOrModifyKnowledge error:", err)
|
|
|
+ }
|
|
|
+ }()
|
|
|
+
|
|
|
+ obj := rag.RagEtaReport{}
|
|
|
+ item, err := obj.GetByReportAndChapterId(reportId, reportChapterId)
|
|
|
+ if err != nil && !utils.IsErrNoRow(err) {
|
|
|
+ // 查询异常,且不是没找到数据的报错
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+ if item.RagEtaReportId > 0 {
|
|
|
+ item.IsPublished = 0
|
|
|
+ item.ModifyTime = time.Now()
|
|
|
+ err = item.Update([]string{"is_published", "modify_time"})
|
|
|
+ }
|
|
|
+
|
|
|
+ return
|
|
|
+}
|
|
|
+
|
|
|
+// ReportUnPublishedKnowledgeByReportId
|
|
|
+// @Description: ETA报告取消发布同步到知识库(只传报告id的情况)
|
|
|
+// @author: Roc
|
|
|
+// @datetime 2025-04-07 15:41:15
|
|
|
+// @param reportId int
|
|
|
+func ReportUnPublishedKnowledgeByReportId(reportId int) {
|
|
|
+ errList := make([]string, 0)
|
|
|
+ defer func() {
|
|
|
+ if len(errList) > 0 {
|
|
|
+ utils.FileLog.Error("ReportUnPublishedKnowledgeByReportId error,报告ID:%d:%s", reportId, strings.Join(errList, "\n"))
|
|
|
+ }
|
|
|
+ }()
|
|
|
+
|
|
|
+ obj := rag.RagEtaReport{}
|
|
|
+ list, err := obj.GetListByCondition(``, ` AND report_id = ? `, []interface{}{reportId}, 0, 1000)
|
|
|
+ if err != nil && !utils.IsErrNoRow(err) {
|
|
|
+ // 查询异常,且不是没找到数据的报错
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+ for _, item := range list {
|
|
|
+ item.IsPublished = 0
|
|
|
+ item.ModifyTime = time.Now()
|
|
|
+ err = item.Update([]string{"is_published", "modify_time"})
|
|
|
+ if err != nil {
|
|
|
+ errList = append(errList, fmt.Sprintf("第%d章:%s,异常:\n%s", item.ReportChapterId, item.Title, err.Error()))
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return
|
|
|
+}
|
|
|
+
|
|
|
+func getArticleContent(content *strings.Builder, htmlContentNode *html.Node) {
|
|
|
+ if htmlContentNode.Type == html.TextNode {
|
|
|
+ cleanData := strings.TrimSpace(htmlContentNode.Data)
|
|
|
+ if cleanData != `` && cleanData != "</p>" {
|
|
|
+ content.WriteString(cleanData)
|
|
|
+ }
|
|
|
+ } else if htmlContentNode.Type == html.ElementNode {
|
|
|
+ switch htmlContentNode.DataAtom {
|
|
|
+ case atom.Ul:
|
|
|
+ content.WriteString("\n")
|
|
|
+ case atom.Br:
|
|
|
+ // 遇到 <br> 标签时添加换行符
|
|
|
+ content.WriteString("\n")
|
|
|
+ case atom.P:
|
|
|
+ content.WriteString("\n")
|
|
|
+ }
|
|
|
+ }
|
|
|
+ for c := htmlContentNode.FirstChild; c != nil; c = c.NextSibling {
|
|
|
+ getArticleContent(content, c)
|
|
|
+ }
|
|
|
+}
|