package services import ( "eta/eta_api/models" "eta/eta_api/models/rag" "eta/eta_api/utils" "fmt" "golang.org/x/net/html" "golang.org/x/net/html/atom" "regexp" "strings" "time" ) // ReportAddOrModifyKnowledge // @Description: ETA报告加入/修改到知识库 // @author: Roc // @datetime 2025-04-07 14:41:45 // @param reportId int // @param reportChapterId int func ReportAddOrModifyKnowledge(reportId, reportChapterId int) { if reportId <= 0 { return } var err error defer func() { if err != nil { //fmt.Println("ReportAddOrModifyKnowledge error:", err) utils.FileLog.Error("ReportAddOrModifyKnowledge error:", err) } }() var title, author, htmlContent string var publishTime time.Time if reportChapterId > 0 { chapterInfo, tmpErr := models.GetReportChapterInfoById(reportChapterId) if tmpErr != nil { return } title = chapterInfo.Title author = chapterInfo.Author publishTime = chapterInfo.PublishTime htmlContent = chapterInfo.Content } else { reportInfo, tmpErr := models.GetReportByReportId(reportId) if tmpErr != nil { return } title = reportInfo.Title author = reportInfo.Author publishTime = reportInfo.PublishTime htmlContent = reportInfo.Content } err = handleReportAddOrModifyKnowledge(reportId, reportChapterId, title, author, htmlContent, publishTime) return } // ReportAddOrModifyKnowledgeByReportId // @Description: ETA报告加入/修改到知识库(只传id的情况) // @author: Roc // @datetime 2025-04-07 15:41:15 // @param reportId int func ReportAddOrModifyKnowledgeByReportId(reportId int) { if reportId <= 0 { return } errList := make([]string, 0) defer func() { if len(errList) > 0 { utils.FileLog.Error("ReportAddOrModifyKnowledge error,报告ID:%d:%s", reportId, strings.Join(errList, "\n")) } }() reportInfo, err := models.GetReportByReportId(reportId) if err != nil { errList = append(errList, err.Error()) return } // 如果是单篇报告,那么直接处理 if reportInfo.HasChapter == 0 { err = handleReportAddOrModifyKnowledge(reportId, 0, reportInfo.Title, reportInfo.Author, reportInfo.Content, reportInfo.PublishTime) if err != nil { errList = append(errList, err.Error()) } return } // 章节类型的报告,需要查询出来后再处理 chapterInfoList, err := models.GetPublishedChapterListByReportId(reportId) if err != nil { errList = append(errList, err.Error()) return } for _, v := range chapterInfoList { err = handleReportAddOrModifyKnowledge(reportId, v.ReportChapterId, v.Title, reportInfo.Author, v.Content, v.PublishTime) if err != nil { errList = append(errList, fmt.Sprintf("第%d章:%s,异常:\n%s", v.ReportChapterId, v.Title, err.Error())) continue } } return } // handleReportAddOrModifyKnowledge // @Description: 处理ETA报告加入/修改到知识库 // @author: Roc // @datetime 2025-04-07 15:33:38 // @param reportId int // @param reportChapterId int // @param title string // @param author string // @param htmlContent string // @param publishTime time.Time // @return err error func handleReportAddOrModifyKnowledge(reportId, reportChapterId int, title, author, htmlContent string, publishTime time.Time) (err error) { htmlContent = html.UnescapeString(htmlContent) doc, err := html.Parse(strings.NewReader(htmlContent)) if err != nil { return } // 只获取文本内容 content := &strings.Builder{} getArticleContent(content, doc) textContent := content.String() textContent = regexp.MustCompile(`\n+`).ReplaceAllString(textContent, "\n") textContent = strings.Trim(textContent, "\n") publishTimeStr := `未知` if !publishTime.IsZero() { title = fmt.Sprintf("%s(%s)", title, publishTime.Format(utils.FormatMonthDayUnSpace)) publishTimeStr = publishTime.Format(utils.FormatDateTime) } textContent = fmt.Sprintf("标题:%s\n发布时间:%s\n%s", title, publishTimeStr, textContent) obj := rag.RagEtaReport{} item, err := obj.GetByReportAndChapterId(reportId, reportChapterId) if err != nil && !utils.IsErrNoRow(err) { // 查询异常,且不是没找到数据的报错 return } if err == nil { // 标记删除了的话,那就不处理了 if item.IsDeleted == 1 { return } item.Title = title item.Author = author item.TextContent = textContent item.IsPublished = 1 //item.PublishTime = publishTime item.ModifyTime = time.Now() //err = item.Update([]string{"title", "author", "text_content", "is_published", "publish_time", "modify_time"}) err = item.Update([]string{"title", "author", "text_content", "is_published", "modify_time"}) } else { // 无数据的时候,需要新增 err = nil item = &rag.RagEtaReport{ RagEtaReportId: 0, ReportId: reportId, ReportChapterId: reportChapterId, Title: title, Author: author, TextContent: textContent, VectorKey: "", IsPublished: 1, IsDeleted: 0, PublishTime: publishTime, ModifyTime: time.Now(), CreateTime: time.Now(), } err = item.Create() } return } // ReportUnPublishedKnowledge // @Description: 知识库取消发布 // @author: Roc // @datetime 2025-04-07 14:58:25 // @param reportId int // @param reportChapterId int func ReportUnPublishedKnowledge(reportId, reportChapterId int) { if reportId <= 0 && reportChapterId <= 0 { return } var err error defer func() { if err != nil { //fmt.Println("ReportAddOrModifyKnowledge error:", err) utils.FileLog.Error("ReportAddOrModifyKnowledge error:", err) } }() obj := rag.RagEtaReport{} item, err := obj.GetByReportAndChapterId(reportId, reportChapterId) if err != nil && !utils.IsErrNoRow(err) { // 查询异常,且不是没找到数据的报错 return } if item.RagEtaReportId > 0 { item.IsPublished = 0 item.ModifyTime = time.Now() err = item.Update([]string{"is_published", "modify_time"}) } return } // ReportUnPublishedKnowledgeByReportId // @Description: ETA报告取消发布同步到知识库(只传报告id的情况) // @author: Roc // @datetime 2025-04-07 15:41:15 // @param reportId int func ReportUnPublishedKnowledgeByReportId(reportId int) { errList := make([]string, 0) defer func() { if len(errList) > 0 { utils.FileLog.Error("ReportUnPublishedKnowledgeByReportId error,报告ID:%d:%s", reportId, strings.Join(errList, "\n")) } }() obj := rag.RagEtaReport{} list, err := obj.GetListByCondition(``, ` AND report_id = ? `, []interface{}{reportId}, 0, 1000) if err != nil && !utils.IsErrNoRow(err) { // 查询异常,且不是没找到数据的报错 return } for _, item := range list { item.IsPublished = 0 item.ModifyTime = time.Now() err = item.Update([]string{"is_published", "modify_time"}) if err != nil { errList = append(errList, fmt.Sprintf("第%d章:%s,异常:\n%s", item.ReportChapterId, item.Title, err.Error())) continue } } return } func getArticleContent(content *strings.Builder, htmlContentNode *html.Node) { if htmlContentNode.Type == html.TextNode { cleanData := strings.TrimSpace(htmlContentNode.Data) if cleanData != `` && cleanData != "

" { content.WriteString(cleanData) } } else if htmlContentNode.Type == html.ElementNode { switch htmlContentNode.DataAtom { case atom.Ul: content.WriteString("\n") case atom.Br: // 遇到
标签时添加换行符 content.WriteString("\n") case atom.P: content.WriteString("\n") } } for c := htmlContentNode.FirstChild; c != nil; c = c.NextSibling { getArticleContent(content, c) } }