123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272 |
- package services
- import (
- "eta/eta_api/models"
- "eta/eta_api/models/rag"
- "eta/eta_api/utils"
- "fmt"
- "golang.org/x/net/html"
- "golang.org/x/net/html/atom"
- "regexp"
- "strings"
- "time"
- )
- // ReportAddOrModifyKnowledge
- // @Description: ETA报告加入/修改到知识库
- // @author: Roc
- // @datetime 2025-04-07 14:41:45
- // @param reportId int
- // @param reportChapterId int
- func ReportAddOrModifyKnowledge(reportId, reportChapterId int) {
- if reportId <= 0 {
- return
- }
- var err error
- defer func() {
- if err != nil {
- //fmt.Println("ReportAddOrModifyKnowledge error:", err)
- utils.FileLog.Error("ReportAddOrModifyKnowledge error:", err)
- }
- }()
- var title, author, htmlContent string
- var publishTime time.Time
- if reportChapterId > 0 {
- chapterInfo, tmpErr := models.GetReportChapterInfoById(reportChapterId)
- if tmpErr != nil {
- return
- }
- title = chapterInfo.Title
- author = chapterInfo.Author
- publishTime = chapterInfo.PublishTime
- htmlContent = chapterInfo.Content
- } else {
- reportInfo, tmpErr := models.GetReportByReportId(reportId)
- if tmpErr != nil {
- return
- }
- title = reportInfo.Title
- author = reportInfo.Author
- publishTime = reportInfo.PublishTime
- htmlContent = reportInfo.Content
- }
- err = handleReportAddOrModifyKnowledge(reportId, reportChapterId, title, author, htmlContent, publishTime)
- return
- }
- // ReportAddOrModifyKnowledgeByReportId
- // @Description: ETA报告加入/修改到知识库(只传id的情况)
- // @author: Roc
- // @datetime 2025-04-07 15:41:15
- // @param reportId int
- func ReportAddOrModifyKnowledgeByReportId(reportId int) {
- if reportId <= 0 {
- return
- }
- errList := make([]string, 0)
- defer func() {
- if len(errList) > 0 {
- utils.FileLog.Error("ReportAddOrModifyKnowledge error,报告ID:%d:%s", reportId, strings.Join(errList, "\n"))
- }
- }()
- reportInfo, err := models.GetReportByReportId(reportId)
- if err != nil {
- errList = append(errList, err.Error())
- return
- }
- // 如果是单篇报告,那么直接处理
- if reportInfo.HasChapter == 0 {
- err = handleReportAddOrModifyKnowledge(reportId, 0, reportInfo.Title, reportInfo.Author, reportInfo.Content, reportInfo.PublishTime)
- if err != nil {
- errList = append(errList, err.Error())
- }
- return
- }
- // 章节类型的报告,需要查询出来后再处理
- chapterInfoList, err := models.GetPublishedChapterListByReportId(reportId)
- if err != nil {
- errList = append(errList, err.Error())
- return
- }
- for _, v := range chapterInfoList {
- err = handleReportAddOrModifyKnowledge(reportId, v.ReportChapterId, v.Title, reportInfo.Author, v.Content, v.PublishTime)
- if err != nil {
- errList = append(errList, fmt.Sprintf("第%d章:%s,异常:\n%s", v.ReportChapterId, v.Title, err.Error()))
- continue
- }
- }
- return
- }
- // handleReportAddOrModifyKnowledge
- // @Description: 处理ETA报告加入/修改到知识库
- // @author: Roc
- // @datetime 2025-04-07 15:33:38
- // @param reportId int
- // @param reportChapterId int
- // @param title string
- // @param author string
- // @param htmlContent string
- // @param publishTime time.Time
- // @return err error
- func handleReportAddOrModifyKnowledge(reportId, reportChapterId int, title, author, htmlContent string, publishTime time.Time) (err error) {
- htmlContent = html.UnescapeString(htmlContent)
- doc, err := html.Parse(strings.NewReader(htmlContent))
- if err != nil {
- return
- }
- // 只获取文本内容
- content := &strings.Builder{}
- getArticleContent(content, doc)
- textContent := content.String()
- textContent = regexp.MustCompile(`\n+`).ReplaceAllString(textContent, "\n")
- textContent = strings.Trim(textContent, "\n")
- publishTimeStr := `未知`
- if !publishTime.IsZero() {
- title = fmt.Sprintf("%s(%s)", title, publishTime.Format(utils.FormatMonthDayUnSpace))
- publishTimeStr = publishTime.Format(utils.FormatDateTime)
- }
- textContent = fmt.Sprintf("标题:%s\n发布时间:%s\n%s", title, publishTimeStr, textContent)
- obj := rag.RagEtaReport{}
- item, err := obj.GetByReportAndChapterId(reportId, reportChapterId)
- if err != nil && !utils.IsErrNoRow(err) {
- // 查询异常,且不是没找到数据的报错
- return
- }
- if err == nil {
- // 标记删除了的话,那就不处理了
- if item.IsDeleted == 1 {
- return
- }
- item.Title = title
- item.Author = author
- item.TextContent = textContent
- item.IsPublished = 1
- //item.PublishTime = publishTime
- item.ModifyTime = time.Now()
- //err = item.Update([]string{"title", "author", "text_content", "is_published", "publish_time", "modify_time"})
- err = item.Update([]string{"title", "author", "text_content", "is_published", "modify_time"})
- } else {
- // 无数据的时候,需要新增
- err = nil
- item = &rag.RagEtaReport{
- RagEtaReportId: 0,
- ReportId: reportId,
- ReportChapterId: reportChapterId,
- Title: title,
- Author: author,
- TextContent: textContent,
- VectorKey: "",
- IsPublished: 1,
- IsDeleted: 0,
- PublishTime: publishTime,
- ModifyTime: time.Now(),
- CreateTime: time.Now(),
- }
- err = item.Create()
- }
- return
- }
- // ReportUnPublishedKnowledge
- // @Description: 知识库取消发布
- // @author: Roc
- // @datetime 2025-04-07 14:58:25
- // @param reportId int
- // @param reportChapterId int
- func ReportUnPublishedKnowledge(reportId, reportChapterId int) {
- if reportId <= 0 && reportChapterId <= 0 {
- return
- }
- var err error
- defer func() {
- if err != nil {
- //fmt.Println("ReportAddOrModifyKnowledge error:", err)
- utils.FileLog.Error("ReportAddOrModifyKnowledge error:", err)
- }
- }()
- obj := rag.RagEtaReport{}
- item, err := obj.GetByReportAndChapterId(reportId, reportChapterId)
- if err != nil && !utils.IsErrNoRow(err) {
- // 查询异常,且不是没找到数据的报错
- return
- }
- if item.RagEtaReportId > 0 {
- item.IsPublished = 0
- item.ModifyTime = time.Now()
- err = item.Update([]string{"is_published", "modify_time"})
- }
- return
- }
- // ReportUnPublishedKnowledgeByReportId
- // @Description: ETA报告取消发布同步到知识库(只传报告id的情况)
- // @author: Roc
- // @datetime 2025-04-07 15:41:15
- // @param reportId int
- func ReportUnPublishedKnowledgeByReportId(reportId int) {
- errList := make([]string, 0)
- defer func() {
- if len(errList) > 0 {
- utils.FileLog.Error("ReportUnPublishedKnowledgeByReportId error,报告ID:%d:%s", reportId, strings.Join(errList, "\n"))
- }
- }()
- obj := rag.RagEtaReport{}
- list, err := obj.GetListByCondition(``, ` AND report_id = ? `, []interface{}{reportId}, 0, 1000)
- if err != nil && !utils.IsErrNoRow(err) {
- // 查询异常,且不是没找到数据的报错
- return
- }
- for _, item := range list {
- item.IsPublished = 0
- item.ModifyTime = time.Now()
- err = item.Update([]string{"is_published", "modify_time"})
- if err != nil {
- errList = append(errList, fmt.Sprintf("第%d章:%s,异常:\n%s", item.ReportChapterId, item.Title, err.Error()))
- continue
- }
- }
- return
- }
- func getArticleContent(content *strings.Builder, htmlContentNode *html.Node) {
- if htmlContentNode.Type == html.TextNode {
- cleanData := strings.TrimSpace(htmlContentNode.Data)
- if cleanData != `` && cleanData != "</p>" {
- content.WriteString(cleanData)
- }
- } else if htmlContentNode.Type == html.ElementNode {
- switch htmlContentNode.DataAtom {
- case atom.Ul:
- content.WriteString("\n")
- case atom.Br:
- // 遇到 <br> 标签时添加换行符
- content.WriteString("\n")
- case atom.P:
- content.WriteString("\n")
- }
- }
- for c := htmlContentNode.FirstChild; c != nil; c = c.NextSibling {
- getArticleContent(content, c)
- }
- }
|