llm_report.go 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272
  1. package services
  2. import (
  3. "eta/eta_api/models"
  4. "eta/eta_api/models/rag"
  5. "eta/eta_api/utils"
  6. "fmt"
  7. "golang.org/x/net/html"
  8. "golang.org/x/net/html/atom"
  9. "regexp"
  10. "strings"
  11. "time"
  12. )
  13. // ReportAddOrModifyKnowledge
  14. // @Description: ETA报告加入/修改到知识库
  15. // @author: Roc
  16. // @datetime 2025-04-07 14:41:45
  17. // @param reportId int
  18. // @param reportChapterId int
  19. func ReportAddOrModifyKnowledge(reportId, reportChapterId int) {
  20. if reportId <= 0 {
  21. return
  22. }
  23. var err error
  24. defer func() {
  25. if err != nil {
  26. //fmt.Println("ReportAddOrModifyKnowledge error:", err)
  27. utils.FileLog.Error("ReportAddOrModifyKnowledge error:", err)
  28. }
  29. }()
  30. var title, author, htmlContent string
  31. var publishTime time.Time
  32. if reportChapterId > 0 {
  33. chapterInfo, tmpErr := models.GetReportChapterInfoById(reportChapterId)
  34. if tmpErr != nil {
  35. return
  36. }
  37. title = chapterInfo.Title
  38. author = chapterInfo.Author
  39. publishTime = chapterInfo.PublishTime
  40. htmlContent = chapterInfo.Content
  41. } else {
  42. reportInfo, tmpErr := models.GetReportByReportId(reportId)
  43. if tmpErr != nil {
  44. return
  45. }
  46. title = reportInfo.Title
  47. author = reportInfo.Author
  48. publishTime = reportInfo.PublishTime
  49. htmlContent = reportInfo.Content
  50. }
  51. err = handleReportAddOrModifyKnowledge(reportId, reportChapterId, title, author, htmlContent, publishTime)
  52. return
  53. }
  54. // ReportAddOrModifyKnowledgeByReportId
  55. // @Description: ETA报告加入/修改到知识库(只传id的情况)
  56. // @author: Roc
  57. // @datetime 2025-04-07 15:41:15
  58. // @param reportId int
  59. func ReportAddOrModifyKnowledgeByReportId(reportId int) {
  60. if reportId <= 0 {
  61. return
  62. }
  63. errList := make([]string, 0)
  64. defer func() {
  65. if len(errList) > 0 {
  66. utils.FileLog.Error("ReportAddOrModifyKnowledge error,报告ID:%d:%s", reportId, strings.Join(errList, "\n"))
  67. }
  68. }()
  69. reportInfo, err := models.GetReportByReportId(reportId)
  70. if err != nil {
  71. errList = append(errList, err.Error())
  72. return
  73. }
  74. // 如果是单篇报告,那么直接处理
  75. if reportInfo.HasChapter == 0 {
  76. err = handleReportAddOrModifyKnowledge(reportId, 0, reportInfo.Title, reportInfo.Author, reportInfo.Content, reportInfo.PublishTime)
  77. if err != nil {
  78. errList = append(errList, err.Error())
  79. }
  80. return
  81. }
  82. // 章节类型的报告,需要查询出来后再处理
  83. chapterInfoList, err := models.GetPublishedChapterListByReportId(reportId)
  84. if err != nil {
  85. errList = append(errList, err.Error())
  86. return
  87. }
  88. for _, v := range chapterInfoList {
  89. err = handleReportAddOrModifyKnowledge(reportId, v.ReportChapterId, v.Title, reportInfo.Author, v.Content, v.PublishTime)
  90. if err != nil {
  91. errList = append(errList, fmt.Sprintf("第%d章:%s,异常:\n%s", v.ReportChapterId, v.Title, err.Error()))
  92. continue
  93. }
  94. }
  95. return
  96. }
  97. // handleReportAddOrModifyKnowledge
  98. // @Description: 处理ETA报告加入/修改到知识库
  99. // @author: Roc
  100. // @datetime 2025-04-07 15:33:38
  101. // @param reportId int
  102. // @param reportChapterId int
  103. // @param title string
  104. // @param author string
  105. // @param htmlContent string
  106. // @param publishTime time.Time
  107. // @return err error
  108. func handleReportAddOrModifyKnowledge(reportId, reportChapterId int, title, author, htmlContent string, publishTime time.Time) (err error) {
  109. htmlContent = html.UnescapeString(htmlContent)
  110. doc, err := html.Parse(strings.NewReader(htmlContent))
  111. if err != nil {
  112. return
  113. }
  114. // 只获取文本内容
  115. content := &strings.Builder{}
  116. getArticleContent(content, doc)
  117. textContent := content.String()
  118. textContent = regexp.MustCompile(`\n+`).ReplaceAllString(textContent, "\n")
  119. textContent = strings.Trim(textContent, "\n")
  120. publishTimeStr := `未知`
  121. if !publishTime.IsZero() {
  122. title = fmt.Sprintf("%s(%s)", title, publishTime.Format(utils.FormatMonthDayUnSpace))
  123. publishTimeStr = publishTime.Format(utils.FormatDateTime)
  124. }
  125. textContent = fmt.Sprintf("标题:%s\n发布时间:%s\n%s", title, publishTimeStr, textContent)
  126. obj := rag.RagEtaReport{}
  127. item, err := obj.GetByReportAndChapterId(reportId, reportChapterId)
  128. if err != nil && !utils.IsErrNoRow(err) {
  129. // 查询异常,且不是没找到数据的报错
  130. return
  131. }
  132. if err == nil {
  133. // 标记删除了的话,那就不处理了
  134. if item.IsDeleted == 1 {
  135. return
  136. }
  137. item.Title = title
  138. item.Author = author
  139. item.TextContent = textContent
  140. item.IsPublished = 1
  141. //item.PublishTime = publishTime
  142. item.ModifyTime = time.Now()
  143. //err = item.Update([]string{"title", "author", "text_content", "is_published", "publish_time", "modify_time"})
  144. err = item.Update([]string{"title", "author", "text_content", "is_published", "modify_time"})
  145. } else {
  146. // 无数据的时候,需要新增
  147. err = nil
  148. item = &rag.RagEtaReport{
  149. RagEtaReportId: 0,
  150. ReportId: reportId,
  151. ReportChapterId: reportChapterId,
  152. Title: title,
  153. Author: author,
  154. TextContent: textContent,
  155. VectorKey: "",
  156. IsPublished: 1,
  157. IsDeleted: 0,
  158. PublishTime: publishTime,
  159. ModifyTime: time.Now(),
  160. CreateTime: time.Now(),
  161. }
  162. err = item.Create()
  163. }
  164. return
  165. }
  166. // ReportUnPublishedKnowledge
  167. // @Description: 知识库取消发布
  168. // @author: Roc
  169. // @datetime 2025-04-07 14:58:25
  170. // @param reportId int
  171. // @param reportChapterId int
  172. func ReportUnPublishedKnowledge(reportId, reportChapterId int) {
  173. if reportId <= 0 && reportChapterId <= 0 {
  174. return
  175. }
  176. var err error
  177. defer func() {
  178. if err != nil {
  179. //fmt.Println("ReportAddOrModifyKnowledge error:", err)
  180. utils.FileLog.Error("ReportAddOrModifyKnowledge error:", err)
  181. }
  182. }()
  183. obj := rag.RagEtaReport{}
  184. item, err := obj.GetByReportAndChapterId(reportId, reportChapterId)
  185. if err != nil && !utils.IsErrNoRow(err) {
  186. // 查询异常,且不是没找到数据的报错
  187. return
  188. }
  189. if item.RagEtaReportId > 0 {
  190. item.IsPublished = 0
  191. item.ModifyTime = time.Now()
  192. err = item.Update([]string{"is_published", "modify_time"})
  193. }
  194. return
  195. }
  196. // ReportUnPublishedKnowledgeByReportId
  197. // @Description: ETA报告取消发布同步到知识库(只传报告id的情况)
  198. // @author: Roc
  199. // @datetime 2025-04-07 15:41:15
  200. // @param reportId int
  201. func ReportUnPublishedKnowledgeByReportId(reportId int) {
  202. errList := make([]string, 0)
  203. defer func() {
  204. if len(errList) > 0 {
  205. utils.FileLog.Error("ReportUnPublishedKnowledgeByReportId error,报告ID:%d:%s", reportId, strings.Join(errList, "\n"))
  206. }
  207. }()
  208. obj := rag.RagEtaReport{}
  209. list, err := obj.GetListByCondition(``, ` AND report_id = ? `, []interface{}{reportId}, 0, 1000)
  210. if err != nil && !utils.IsErrNoRow(err) {
  211. // 查询异常,且不是没找到数据的报错
  212. return
  213. }
  214. for _, item := range list {
  215. item.IsPublished = 0
  216. item.ModifyTime = time.Now()
  217. err = item.Update([]string{"is_published", "modify_time"})
  218. if err != nil {
  219. errList = append(errList, fmt.Sprintf("第%d章:%s,异常:\n%s", item.ReportChapterId, item.Title, err.Error()))
  220. continue
  221. }
  222. }
  223. return
  224. }
  225. func getArticleContent(content *strings.Builder, htmlContentNode *html.Node) {
  226. if htmlContentNode.Type == html.TextNode {
  227. cleanData := strings.TrimSpace(htmlContentNode.Data)
  228. if cleanData != `` && cleanData != "</p>" {
  229. content.WriteString(cleanData)
  230. }
  231. } else if htmlContentNode.Type == html.ElementNode {
  232. switch htmlContentNode.DataAtom {
  233. case atom.Ul:
  234. content.WriteString("\n")
  235. case atom.Br:
  236. // 遇到 <br> 标签时添加换行符
  237. content.WriteString("\n")
  238. case atom.P:
  239. content.WriteString("\n")
  240. }
  241. }
  242. for c := htmlContentNode.FirstChild; c != nil; c = c.NextSibling {
  243. getArticleContent(content, c)
  244. }
  245. }