llm_report.go 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271
  1. package services
  2. import (
  3. "eta/eta_api/models"
  4. "eta/eta_api/models/rag"
  5. "eta/eta_api/utils"
  6. "fmt"
  7. "golang.org/x/net/html"
  8. "golang.org/x/net/html/atom"
  9. "regexp"
  10. "strings"
  11. "time"
  12. )
  13. // ReportAddOrModifyKnowledge
  14. // @Description: ETA报告加入/修改到知识库
  15. // @author: Roc
  16. // @datetime 2025-04-07 14:41:45
  17. // @param reportId int
  18. // @param reportChapterId int
  19. func ReportAddOrModifyKnowledge(reportId, reportChapterId int) {
  20. if reportId <= 0 {
  21. return
  22. }
  23. var err error
  24. defer func() {
  25. if err != nil {
  26. //fmt.Println("ReportAddOrModifyKnowledge error:", err)
  27. utils.FileLog.Error("ReportAddOrModifyKnowledge error:", err)
  28. }
  29. }()
  30. var title, author, htmlContent string
  31. var publishTime time.Time
  32. if reportChapterId > 0 {
  33. chapterInfo, tmpErr := models.GetReportChapterInfoById(reportChapterId)
  34. if tmpErr != nil {
  35. return
  36. }
  37. title = chapterInfo.Title
  38. author = chapterInfo.Author
  39. publishTime = chapterInfo.PublishTime
  40. htmlContent = chapterInfo.Content
  41. } else {
  42. reportInfo, tmpErr := models.GetReportByReportId(reportId)
  43. if tmpErr != nil {
  44. return
  45. }
  46. title = reportInfo.Title
  47. author = reportInfo.Author
  48. publishTime = reportInfo.PublishTime
  49. htmlContent = reportInfo.Content
  50. }
  51. err = handleReportAddOrModifyKnowledge(reportId, reportChapterId, title, author, htmlContent, publishTime)
  52. return
  53. }
  54. // ReportAddOrModifyKnowledgeByReportId
  55. // @Description: ETA报告加入/修改到知识库(只传id的情况)
  56. // @author: Roc
  57. // @datetime 2025-04-07 15:41:15
  58. // @param reportId int
  59. func ReportAddOrModifyKnowledgeByReportId(reportId int) {
  60. if reportId <= 0 {
  61. return
  62. }
  63. errList := make([]string, 0)
  64. defer func() {
  65. if len(errList) > 0 {
  66. utils.FileLog.Error("ReportAddOrModifyKnowledge error,报告ID:%d:%s", reportId, strings.Join(errList, "\n"))
  67. }
  68. }()
  69. reportInfo, err := models.GetReportByReportId(reportId)
  70. if err != nil {
  71. errList = append(errList, err.Error())
  72. return
  73. }
  74. // 如果是单篇报告,那么直接处理
  75. if reportInfo.HasChapter == 0 {
  76. err = handleReportAddOrModifyKnowledge(reportId, 0, reportInfo.Title, reportInfo.Author, reportInfo.Content, reportInfo.PublishTime)
  77. if err != nil {
  78. errList = append(errList, err.Error())
  79. }
  80. return
  81. }
  82. // 章节类型的报告,需要查询出来后再处理
  83. chapterInfoList, err := models.GetPublishedChapterListByReportId(reportId)
  84. if err != nil {
  85. errList = append(errList, err.Error())
  86. return
  87. }
  88. for _, v := range chapterInfoList {
  89. err = handleReportAddOrModifyKnowledge(reportId, v.ReportChapterId, v.Title, reportInfo.Author, v.Content, v.PublishTime)
  90. if err != nil {
  91. errList = append(errList, fmt.Sprintf("第%d章:%s,异常:\n%s", v.ReportChapterId, v.Title, err.Error()))
  92. continue
  93. }
  94. }
  95. return
  96. }
  97. // handleReportAddOrModifyKnowledge
  98. // @Description: 处理ETA报告加入/修改到知识库
  99. // @author: Roc
  100. // @datetime 2025-04-07 15:33:38
  101. // @param reportId int
  102. // @param reportChapterId int
  103. // @param title string
  104. // @param author string
  105. // @param htmlContent string
  106. // @param publishTime time.Time
  107. // @return err error
  108. func handleReportAddOrModifyKnowledge(reportId, reportChapterId int, title, author, htmlContent string, publishTime time.Time) (err error) {
  109. htmlContent = html.UnescapeString(htmlContent)
  110. doc, err := html.Parse(strings.NewReader(htmlContent))
  111. if err != nil {
  112. return
  113. }
  114. // 只获取文本内容
  115. content := &strings.Builder{}
  116. getArticleContent(content, doc)
  117. textContent := content.String()
  118. textContent = regexp.MustCompile(`\n+`).ReplaceAllString(textContent, "\n")
  119. textContent = strings.Trim(textContent, "\n")
  120. publishTimeStr := `未知`
  121. if !publishTime.IsZero() {
  122. title = fmt.Sprintf("%s(%s)", title, publishTime.Format(utils.FormatMonthDayUnSpace))
  123. publishTimeStr = publishTime.Format(utils.FormatDateTime)
  124. }
  125. textContent = fmt.Sprintf("标题:%s\n发布时间:%s\n%s", title, publishTimeStr, textContent)
  126. obj := rag.RagEtaReport{}
  127. item, err := obj.GetByReportAndChapterId(reportId, reportChapterId)
  128. if err != nil && !utils.IsErrNoRow(err) {
  129. // 查询异常,且不是没找到数据的报错
  130. return
  131. }
  132. if err == nil {
  133. // 标记删除了的话,那就不处理了
  134. if item.IsDeleted == 1 {
  135. return
  136. }
  137. item.Title = title
  138. item.Author = author
  139. item.TextContent = textContent
  140. item.IsPublished = 1
  141. item.PublishTime = publishTime
  142. item.ModifyTime = time.Now()
  143. err = item.Update([]string{"author", "text_content", "is_published", "publish_time", "modify_time"})
  144. } else {
  145. // 无数据的时候,需要新增
  146. err = nil
  147. item = &rag.RagEtaReport{
  148. RagEtaReportId: 0,
  149. ReportId: reportId,
  150. ReportChapterId: reportChapterId,
  151. Title: title,
  152. Author: author,
  153. TextContent: textContent,
  154. VectorKey: "",
  155. IsPublished: 1,
  156. IsDeleted: 0,
  157. PublishTime: publishTime,
  158. ModifyTime: time.Now(),
  159. CreateTime: time.Now(),
  160. }
  161. err = item.Create()
  162. }
  163. return
  164. }
  165. // ReportUnPublishedKnowledge
  166. // @Description: 知识库取消发布
  167. // @author: Roc
  168. // @datetime 2025-04-07 14:58:25
  169. // @param reportId int
  170. // @param reportChapterId int
  171. func ReportUnPublishedKnowledge(reportId, reportChapterId int) {
  172. if reportId <= 0 && reportChapterId <= 0 {
  173. return
  174. }
  175. var err error
  176. defer func() {
  177. if err != nil {
  178. //fmt.Println("ReportAddOrModifyKnowledge error:", err)
  179. utils.FileLog.Error("ReportAddOrModifyKnowledge error:", err)
  180. }
  181. }()
  182. obj := rag.RagEtaReport{}
  183. item, err := obj.GetByReportAndChapterId(reportId, reportChapterId)
  184. if err != nil && !utils.IsErrNoRow(err) {
  185. // 查询异常,且不是没找到数据的报错
  186. return
  187. }
  188. if item.RagEtaReportId > 0 {
  189. item.IsPublished = 0
  190. item.ModifyTime = time.Now()
  191. err = item.Update([]string{"is_published", "modify_time"})
  192. }
  193. return
  194. }
  195. // ReportUnPublishedKnowledgeByReportId
  196. // @Description: ETA报告取消发布同步到知识库(只传报告id的情况)
  197. // @author: Roc
  198. // @datetime 2025-04-07 15:41:15
  199. // @param reportId int
  200. func ReportUnPublishedKnowledgeByReportId(reportId int) {
  201. errList := make([]string, 0)
  202. defer func() {
  203. if len(errList) > 0 {
  204. utils.FileLog.Error("ReportUnPublishedKnowledgeByReportId error,报告ID:%d:%s", reportId, strings.Join(errList, "\n"))
  205. }
  206. }()
  207. obj := rag.RagEtaReport{}
  208. list, err := obj.GetListByCondition(``, ` AND report_id = ? `, []interface{}{reportId}, 0, 1000)
  209. if err != nil && !utils.IsErrNoRow(err) {
  210. // 查询异常,且不是没找到数据的报错
  211. return
  212. }
  213. for _, item := range list {
  214. item.IsPublished = 0
  215. item.ModifyTime = time.Now()
  216. err = item.Update([]string{"is_published", "modify_time"})
  217. if err != nil {
  218. errList = append(errList, fmt.Sprintf("第%d章:%s,异常:\n%s", item.ReportChapterId, item.Title, err.Error()))
  219. continue
  220. }
  221. }
  222. return
  223. }
  224. func getArticleContent(content *strings.Builder, htmlContentNode *html.Node) {
  225. if htmlContentNode.Type == html.TextNode {
  226. cleanData := strings.TrimSpace(htmlContentNode.Data)
  227. if cleanData != `` && cleanData != "</p>" {
  228. content.WriteString(cleanData)
  229. }
  230. } else if htmlContentNode.Type == html.ElementNode {
  231. switch htmlContentNode.DataAtom {
  232. case atom.Ul:
  233. content.WriteString("\n")
  234. case atom.Br:
  235. // 遇到 <br> 标签时添加换行符
  236. content.WriteString("\n")
  237. case atom.P:
  238. content.WriteString("\n")
  239. }
  240. }
  241. for c := htmlContentNode.FirstChild; c != nil; c = c.NextSibling {
  242. getArticleContent(content, c)
  243. }
  244. }