semantic_analysis.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473
  1. package services
  2. import (
  3. saModel "eta_gn/eta_api/models/semantic_analysis"
  4. "eta_gn/eta_api/services/alarm_msg"
  5. "eta_gn/eta_api/utils"
  6. "fmt"
  7. "github.com/PuerkitoBio/goquery"
  8. "html"
  9. "sort"
  10. "strings"
  11. )
  12. func LoadSaDocContent2Section(content string) (sections []string, err error) {
  13. if content == `` {
  14. return
  15. }
  16. defer func() {
  17. if err != nil {
  18. fmt.Println(err.Error())
  19. }
  20. }()
  21. doc, e := goquery.NewDocumentFromReader(strings.NewReader(content))
  22. if e != nil {
  23. err = fmt.Errorf("读取html内容失败, Err: %s", e.Error())
  24. return
  25. }
  26. doc.Find("p").Each(func(i int, s *goquery.Selection) {
  27. h, e := s.Html()
  28. if e != nil {
  29. err = fmt.Errorf("读取html标签失败, Err: %s", e.Error())
  30. return
  31. }
  32. t := strings.TrimSpace(s.Text())
  33. if t != "" {
  34. sections = append(sections, fmt.Sprintf(`<p>%s</p>`, h))
  35. }
  36. })
  37. return
  38. }
  39. func GetSaCompareTableData(labelIds, docIds, secIds []int, contentMap map[string]int) (resp *saModel.SaCompareSaveResp, err error) {
  40. resp = new(saModel.SaCompareSaveResp)
  41. docMap := make(map[int]*saModel.SaDoc)
  42. if len(docIds) > 0 {
  43. docOB := new(saModel.SaDoc)
  44. docCond := fmt.Sprintf(` AND %s IN (%s)`, saModel.SaDocColumns.SaDocId, utils.GetOrmInReplace(len(docIds)))
  45. docPars := make([]interface{}, 0)
  46. docPars = append(docPars, docIds)
  47. docFields := []string{saModel.SaDocColumns.SaDocId, saModel.SaDocColumns.Title, saModel.SaDocColumns.Theme}
  48. docItems, e := docOB.GetItemsByCondition(docCond, docPars, docFields, "")
  49. if e != nil {
  50. err = fmt.Errorf("获取比对文档列表失败, Err: %s", e.Error())
  51. return
  52. }
  53. for _, d := range docItems {
  54. docMap[d.SaDocId] = d
  55. }
  56. }
  57. secMap := make(map[int]string)
  58. if len(secIds) > 0 {
  59. secOB := new(saModel.SaDocSection)
  60. secCond := fmt.Sprintf(` AND %s IN (%s)`, saModel.SaDocSectionColumns.SaDocSectionId, utils.GetOrmInReplace(len(secIds)))
  61. secPars := make([]interface{}, 0)
  62. secPars = append(secPars, secIds)
  63. secFields := []string{saModel.SaDocSectionColumns.SaDocSectionId, saModel.SaDocSectionColumns.DocId, saModel.SaDocSectionColumns.Content}
  64. secItems, e := secOB.GetItemsByCondition(secCond, secPars, secFields, "")
  65. if e != nil {
  66. err = fmt.Errorf("获取比对文档列表失败, Err: %s", e.Error())
  67. return
  68. }
  69. for _, s := range secItems {
  70. secMap[s.SaDocSectionId] = html.UnescapeString(s.Content)
  71. }
  72. }
  73. labelMap := make(map[int]string)
  74. labelOB := new(saModel.SaLabel)
  75. labelCond := ``
  76. labelPars := make([]interface{}, 0)
  77. labelFields := []string{saModel.SaLabelColumns.SaLabelId, saModel.SaLabelColumns.LabelName}
  78. labelItems, e := labelOB.GetItemsByCondition(labelCond, labelPars, labelFields, "")
  79. if e != nil {
  80. err = fmt.Errorf("获取标签列表失败, Err: %s", e.Error())
  81. return
  82. }
  83. for _, l := range labelItems {
  84. labelMap[l.SaLabelId] = l.LabelName
  85. }
  86. for _, d := range docIds {
  87. dv := docMap[d]
  88. if dv != nil {
  89. resp.TitleList = append(resp.TitleList, dv.Title)
  90. }
  91. }
  92. respLabel := make([]*saModel.SaCompareSaveRespLabel, 0)
  93. for _, l := range labelIds {
  94. v := new(saModel.SaCompareSaveRespLabel)
  95. v.SaLabelId = l
  96. v.LabelName = labelMap[l]
  97. docList := make([]*saModel.SaCompareSaveRespDoc, 0)
  98. for _, d := range docIds {
  99. vd := new(saModel.SaCompareSaveRespDoc)
  100. vd.SaDocId = d
  101. dv := docMap[d]
  102. if dv != nil {
  103. vd.Title = dv.Title
  104. }
  105. secList := make([]*saModel.SaCompareSaveRespSection, 0)
  106. for _, s := range secIds {
  107. k := fmt.Sprintf("%d-%d-%d", l, d, s)
  108. if contentMap[k] == 0 {
  109. continue
  110. }
  111. vs := new(saModel.SaCompareSaveRespSection)
  112. vs.SaDocSectionId = s
  113. vs.Content = secMap[s]
  114. secList = append(secList, vs)
  115. }
  116. vd.SectionList = secList
  117. docList = append(docList, vd)
  118. }
  119. v.DocList = docList
  120. respLabel = append(respLabel, v)
  121. }
  122. resp.LabelList = respLabel
  123. return
  124. }
  125. func GetSaCompareDetailByDocIds(docIds []int, compareId, sysAdminId int) (detail *saModel.SaCompareDetail, err error) {
  126. if len(docIds) == 0 {
  127. return
  128. }
  129. docOB := new(saModel.SaDoc)
  130. docCond := fmt.Sprintf(` AND %s IN (%s)`, saModel.SaDocColumns.SaDocId, utils.GetOrmInReplace(len(docIds)))
  131. docPars := make([]interface{}, 0)
  132. docPars = append(docPars, docIds)
  133. docs, e := docOB.GetItemsByCondition(docCond, docPars, []string{}, "")
  134. if e != nil {
  135. err = fmt.Errorf("获取文档信息失败, Err: %s", e.Error())
  136. return
  137. }
  138. secOB := new(saModel.SaDocSection)
  139. secCond := fmt.Sprintf(` AND %s IN (%s)`, saModel.SaDocSectionColumns.DocId, utils.GetOrmInReplace(len(docIds)))
  140. secPars := make([]interface{}, 0)
  141. secPars = append(secPars, docIds)
  142. secsItems, e := secOB.GetItemsByCondition(secCond, secPars, []string{}, "doc_id ASC, sort ASC")
  143. secsMap := make(map[int][]*saModel.SaDocSection)
  144. for _, s := range secsItems {
  145. if secsMap[s.DocId] == nil {
  146. secsMap[s.DocId] = make([]*saModel.SaDocSection, 0)
  147. }
  148. secsMap[s.DocId] = append(secsMap[s.DocId], s)
  149. }
  150. compLabs := make([]*saModel.SaCompareLabel, 0)
  151. compLabOB := new(saModel.SaCompareLabel)
  152. compLabCond := fmt.Sprintf(` AND %s IN (%s)`, saModel.SaCompareLabelColumns.DocId, utils.GetOrmInReplace(len(docIds)))
  153. compLabPars := make([]interface{}, 0)
  154. compLabPars = append(compLabPars, docIds)
  155. compLabQuery, e := compLabOB.GetItemsByCondition(compLabCond, compLabPars, []string{}, "")
  156. if e != nil {
  157. err = fmt.Errorf("获取比对标签失败, Err: %s", e.Error())
  158. return
  159. }
  160. compLabs = compLabQuery
  161. keywordsOB := new(saModel.SaCompareSearchKeyword)
  162. keywordsCond := fmt.Sprintf(` AND %s = ?`, saModel.SaCompareSearchKeywordColumns.CompareId)
  163. keywordsPars := make([]interface{}, 0)
  164. keywordsPars = append(keywordsPars, compareId)
  165. keywordsItems, e := keywordsOB.GetItemsByCondition(keywordsCond, keywordsPars, []string{}, "")
  166. if e != nil {
  167. err = fmt.Errorf("获取历史搜索关键词失败, Err: %s", e.Error())
  168. return
  169. }
  170. keywords := make([]string, 0)
  171. for _, k := range keywordsItems {
  172. keywords = append(keywords, k.Keyword)
  173. }
  174. secLabelMap, isMineMap := formatCompareLabelStatusGroupSection(compLabs, compareId, sysAdminId)
  175. partSecMap := make(map[int][]*saModel.SaCompareLabel)
  176. partSecExistMap := make(map[string]bool)
  177. tabLabelIds := make([]int, 0)
  178. tabLabels := make([]*saModel.SaCompareDetailHeadLabel, 0)
  179. for _, l := range compLabs {
  180. if !utils.InArrayByInt(tabLabelIds, l.LabelId) {
  181. t := new(saModel.SaCompareDetailHeadLabel)
  182. t.LabelId = l.LabelId
  183. t.LabelName = l.LabelName
  184. t.IsMine = isMineMap[l.LabelId]
  185. tabLabelIds = append(tabLabelIds, l.LabelId)
  186. tabLabels = append(tabLabels, t)
  187. }
  188. if l.IsPart != 1 {
  189. continue
  190. }
  191. if partSecMap[l.DocId] == nil {
  192. partSecMap[l.DocId] = make([]*saModel.SaCompareLabel, 0)
  193. }
  194. ek := fmt.Sprintf("%d-%s", l.SectionId, utils.MD5(l.Content))
  195. if partSecExistMap[ek] {
  196. continue
  197. }
  198. partSecMap[l.DocId] = append(partSecMap[l.DocId], l)
  199. }
  200. detail = new(saModel.SaCompareDetail)
  201. docList := make([]*saModel.SaCompareDetailDoc, 0)
  202. for _, d := range docs {
  203. dv := new(saModel.SaCompareDetailDoc)
  204. dv.DocId = d.SaDocId
  205. dv.Title = d.Title
  206. dv.Theme = d.Theme
  207. dv.ClassifyName = d.ClassifyName
  208. secList := make([]*saModel.SaCompareDetailSection, 0)
  209. secs := secsMap[d.SaDocId]
  210. if secs != nil {
  211. for _, s := range secs {
  212. sv := new(saModel.SaCompareDetailSection)
  213. sv.SectionId = s.SaDocSectionId
  214. sv.Content = html.UnescapeString(s.Content)
  215. sv.Sort = s.Sort
  216. sv.LabelList = secLabelMap[fmt.Sprintf("%d-%s", s.SaDocSectionId, utils.MD5(``))]
  217. secList = append(secList, sv)
  218. }
  219. }
  220. parts := partSecMap[d.SaDocId]
  221. if parts != nil {
  222. for _, p := range parts {
  223. pv := new(saModel.SaCompareDetailSection)
  224. pv.SectionId = p.SectionId
  225. pv.Content = html.UnescapeString(p.Content)
  226. pv.IsPart = 1
  227. pv.StartIndex = p.StartIndex
  228. pv.EndIndex = p.EndIndex
  229. pv.LabelList = secLabelMap[fmt.Sprintf("%d-%s", p.SectionId, utils.MD5(p.Content))]
  230. fmt.Println("kkk", fmt.Sprintf("%d-%s", p.SectionId, utils.MD5(p.Content)))
  231. secList = append(secList, pv)
  232. }
  233. }
  234. dv.SectionList = secList
  235. docList = append(docList, dv)
  236. }
  237. detail.HeadLabel = tabLabels
  238. detail.DocList = docList
  239. detail.KeywordsList = keywords
  240. return
  241. }
  242. func formatCompareLabelStatusGroupSection(compLabels []*saModel.SaCompareLabel, compareId, sysAdminId int) (labelMap map[string][]*saModel.SaCompareDetailFormatLabel, isMineMap map[int]int) {
  243. labelMap = make(map[string][]*saModel.SaCompareDetailFormatLabel)
  244. repeatMap := make(map[string][]int)
  245. thisMap := make(map[string]int)
  246. historyMap := make(map[string]int)
  247. otherMap := make(map[string]int)
  248. isMineMap = make(map[int]int) // 用于判断标签是否自己曾经使用或者当前使用过
  249. for _, l := range compLabels {
  250. m := utils.MD5(l.Content)
  251. k := fmt.Sprintf("%d-%s-%d", l.SectionId, m, l.LabelId)
  252. if l.CompareId == compareId {
  253. thisMap[k] = 1
  254. if l.SysAdminId == sysAdminId {
  255. isMineMap[l.LabelId] = 1
  256. }
  257. }
  258. if l.CompareId != compareId && l.SysAdminId == sysAdminId {
  259. historyMap[k] = 1
  260. isMineMap[l.LabelId] = 1
  261. }
  262. if l.SysAdminId != sysAdminId {
  263. otherMap[k] = 1
  264. }
  265. k2 := fmt.Sprintf("%d-%s", l.SectionId, m)
  266. if repeatMap[k2] == nil {
  267. repeatMap[k2] = make([]int, 0)
  268. }
  269. if utils.InArrayByInt(repeatMap[k2], l.LabelId) {
  270. continue
  271. }
  272. repeatMap[k2] = append(repeatMap[k2], l.LabelId)
  273. if labelMap[k2] == nil {
  274. labelMap[k2] = make([]*saModel.SaCompareDetailFormatLabel, 0)
  275. }
  276. labelMap[k2] = append(labelMap[k2], &saModel.SaCompareDetailFormatLabel{
  277. LabelId: l.LabelId,
  278. LabelName: l.LabelName,
  279. })
  280. }
  281. for s, l := range labelMap {
  282. for _, v := range l {
  283. k := fmt.Sprintf("%s-%d", s, v.LabelId)
  284. v.IsThis = thisMap[k]
  285. v.IsHistory = historyMap[k]
  286. v.IsOther = otherMap[k]
  287. }
  288. }
  289. return
  290. }
  291. func HandleElasticSaDocAndSection(saDoc *saModel.SaDoc, sections []*saModel.SaDocSection, delIds []int) (err error) {
  292. defer func() {
  293. if err != nil {
  294. alarm_msg.SendAlarmMsg(fmt.Sprintf("Elastic-语义分析文档, Err: %s", err.Error()), 2)
  295. }
  296. }()
  297. indexName := utils.EsSemanticAnalysisDocIndexName
  298. content := ``
  299. items := make([]*saModel.ElasticSaDoc, 0)
  300. for _, s := range sections {
  301. h := html.UnescapeString(s.Content)
  302. content += h
  303. items = append(items, &saModel.ElasticSaDoc{
  304. SaDocId: s.DocId,
  305. SaDocSectionId: s.SaDocSectionId,
  306. ClassifyId: saDoc.ClassifyId,
  307. ClassifyName: saDoc.ClassifyName,
  308. Title: saDoc.Title,
  309. Theme: saDoc.Theme,
  310. BodyContent: h,
  311. Author: saDoc.SysAdminName,
  312. CoverImg: saDoc.CoverImg,
  313. CreateTime: saDoc.CreateTime.Format(utils.FormatDateTime),
  314. })
  315. }
  316. docId := fmt.Sprintf("%d-0", saDoc.SaDocId)
  317. item := &saModel.ElasticSaDoc{
  318. SaDocId: saDoc.SaDocId,
  319. SaDocSectionId: 0,
  320. ClassifyId: saDoc.ClassifyId,
  321. ClassifyName: saDoc.ClassifyName,
  322. Title: saDoc.Title,
  323. Theme: saDoc.Theme,
  324. BodyContent: content,
  325. Author: saDoc.SysAdminName,
  326. CoverImg: saDoc.CoverImg,
  327. CreateTime: saDoc.CreateTime.Format(utils.FormatDateTime),
  328. }
  329. if e := EsAddOrEditSaDoc(indexName, docId, item); e != nil {
  330. err = fmt.Errorf("新增/更新ES语义分析文档失败, Err: %s", e.Error())
  331. return
  332. }
  333. for _, v := range items {
  334. docId = fmt.Sprintf("%d-%d", v.SaDocId, v.SaDocSectionId)
  335. if e := EsAddOrEditSaDoc(indexName, docId, v); e != nil {
  336. err = fmt.Errorf("新增/更新ES语义分析文档段落失败, Err: %s", e.Error())
  337. return
  338. }
  339. }
  340. if len(delIds) > 0 {
  341. for _, d := range delIds {
  342. docId = fmt.Sprintf("%d-%d", saDoc.SaDocId, d)
  343. if e := EsDeleteData(indexName, docId); e != nil && !strings.Contains(e.Error(), "404") {
  344. err = fmt.Errorf("删除ES语义分析文档段落失败, Err: %s", e.Error())
  345. return
  346. }
  347. }
  348. }
  349. return
  350. }
  351. func DeleteElasticSaDocAndSection(saDocId int, secIds []int) (err error) {
  352. defer func() {
  353. if err != nil {
  354. alarm_msg.SendAlarmMsg(fmt.Sprintf("Elastic-语义分析文档, Err: %s", err.Error()), 2)
  355. }
  356. }()
  357. indexName := utils.EsSemanticAnalysisDocIndexName
  358. docId := fmt.Sprintf("%d-0", saDocId)
  359. if e := EsDeleteData(indexName, docId); e != nil && !strings.Contains(e.Error(), "404") {
  360. err = fmt.Errorf("删除ES语义分析文档失败, Err: %s", e.Error())
  361. return
  362. }
  363. if len(secIds) > 0 {
  364. for _, d := range secIds {
  365. docId = fmt.Sprintf("%d-%d", saDocId, d)
  366. if e := EsDeleteData(indexName, docId); e != nil && !strings.Contains(e.Error(), "404") {
  367. err = fmt.Errorf("删除ES语义分析文档段落失败, Err: %s", e.Error())
  368. return
  369. }
  370. }
  371. }
  372. return
  373. }
  374. func FormatCompareLabels2TableData(compareLabels []*saModel.SaCompareLabelItem) (resp *saModel.SaCompareSaveResp, err error) {
  375. resp = new(saModel.SaCompareSaveResp)
  376. resp.LabelList = make([]*saModel.SaCompareSaveRespLabel, 0)
  377. labelMap := make(map[int]*saModel.SaCompareLabelItem)
  378. docMap := make(map[int]*saModel.SaCompareLabelItem)
  379. secMap := make(map[string][]*saModel.SaCompareLabelItem)
  380. for _, v := range compareLabels {
  381. if labelMap[v.LabelId] == nil {
  382. labelMap[v.LabelId] = v
  383. resp.LabelList = append(resp.LabelList, &saModel.SaCompareSaveRespLabel{
  384. SaLabelId: v.LabelId,
  385. LabelName: v.LabelName,
  386. DocList: make([]*saModel.SaCompareSaveRespDoc, 0),
  387. })
  388. }
  389. if docMap[v.DocId] == nil {
  390. docMap[v.DocId] = v
  391. resp.TitleList = append(resp.TitleList, v.Title)
  392. }
  393. k := fmt.Sprintf("%d-%d", v.LabelId, v.DocId)
  394. if secMap[k] == nil {
  395. secMap[k] = make([]*saModel.SaCompareLabelItem, 0)
  396. }
  397. secMap[k] = append(secMap[k], v)
  398. }
  399. secExistMap := make(map[string]bool)
  400. for _, l := range resp.LabelList {
  401. docs := make([]*saModel.SaCompareSaveRespDoc, 0)
  402. for _, d := range docMap {
  403. dv := new(saModel.SaCompareSaveRespDoc)
  404. dv.SaDocId = d.DocId
  405. dv.Title = d.Title
  406. k := fmt.Sprintf("%d-%d", l.SaLabelId, d.DocId)
  407. secs := make([]*saModel.SaCompareSaveRespSection, 0)
  408. secList := secMap[k]
  409. if secList != nil && len(secList) > 0 {
  410. for _, s := range secList {
  411. sv := new(saModel.SaCompareSaveRespSection)
  412. sv.SaDocSectionId = s.SectionId
  413. content := html.UnescapeString(s.SectionContent)
  414. if s.CompareContent != "" {
  415. sv.IsPart = 1
  416. content = html.UnescapeString(s.CompareContent)
  417. }
  418. sv.Content = content
  419. ek := fmt.Sprintf("%d-%d-%d-%s", l.SaLabelId, d.DocId, s.SectionId, utils.MD5(content))
  420. fmt.Println(ek)
  421. if secExistMap[ek] {
  422. fmt.Println("跳过", ek)
  423. continue
  424. }
  425. secExistMap[ek] = true
  426. secs = append(secs, sv)
  427. }
  428. }
  429. dv.SectionList = secs
  430. docs = append(docs, dv)
  431. }
  432. sort.Slice(docs, func(i, j int) bool {
  433. return docs[j].SaDocId > docs[i].SaDocId
  434. })
  435. l.DocList = docs
  436. }
  437. return
  438. }