123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515 |
- package services
- import (
- saModel "eta_gn/eta_api/models/semantic_analysis"
- "eta_gn/eta_api/services/alarm_msg"
- "eta_gn/eta_api/utils"
- "fmt"
- "github.com/PuerkitoBio/goquery"
- "html"
- "sort"
- "strings"
- )
- // LoadSaDocContent2Section 读取文档内容为段落, 以<p>标签划分
- func LoadSaDocContent2Section(content string) (sections []string, err error) {
- if content == `` {
- return
- }
- defer func() {
- if err != nil {
- fmt.Println(err.Error())
- }
- }()
- doc, e := goquery.NewDocumentFromReader(strings.NewReader(content))
- if e != nil {
- err = fmt.Errorf("读取html内容失败, Err: %s", e.Error())
- return
- }
- doc.Find("p").Each(func(i int, s *goquery.Selection) {
- h, e := s.Html()
- if e != nil {
- err = fmt.Errorf("读取html标签失败, Err: %s", e.Error())
- return
- }
- t := strings.TrimSpace(s.Text())
- if t != "" {
- sections = append(sections, fmt.Sprintf(`<p>%s</p>`, h))
- }
- })
- return
- }
- // GetSaCompareTableData 获取文档比对表格数据
- func GetSaCompareTableData(labelIds, docIds, secIds []int, contentMap map[string]int) (resp *saModel.SaCompareSaveResp, err error) {
- resp = new(saModel.SaCompareSaveResp)
- // 文档列表
- docMap := make(map[int]*saModel.SaDoc)
- if len(docIds) > 0 {
- docOB := new(saModel.SaDoc)
- docCond := fmt.Sprintf(` AND %s IN (%s)`, saModel.SaDocColumns.SaDocId, utils.GetOrmInReplace(len(docIds)))
- docPars := make([]interface{}, 0)
- docPars = append(docPars, docIds)
- docFields := []string{saModel.SaDocColumns.SaDocId, saModel.SaDocColumns.Title, saModel.SaDocColumns.Theme}
- docItems, e := docOB.GetItemsByCondition(docCond, docPars, docFields, "")
- if e != nil {
- err = fmt.Errorf("获取比对文档列表失败, Err: %s", e.Error())
- return
- }
- for _, d := range docItems {
- docMap[d.SaDocId] = d
- }
- }
- // 段落map
- secMap := make(map[int]string)
- if len(secIds) > 0 {
- secOB := new(saModel.SaDocSection)
- secCond := fmt.Sprintf(` AND %s IN (%s)`, saModel.SaDocSectionColumns.SaDocSectionId, utils.GetOrmInReplace(len(secIds)))
- secPars := make([]interface{}, 0)
- secPars = append(secPars, secIds)
- secFields := []string{saModel.SaDocSectionColumns.SaDocSectionId, saModel.SaDocSectionColumns.DocId, saModel.SaDocSectionColumns.Content}
- secItems, e := secOB.GetItemsByCondition(secCond, secPars, secFields, "")
- if e != nil {
- err = fmt.Errorf("获取比对文档列表失败, Err: %s", e.Error())
- return
- }
- for _, s := range secItems {
- secMap[s.SaDocSectionId] = html.UnescapeString(s.Content)
- }
- }
- // 标签map
- labelMap := make(map[int]string)
- labelOB := new(saModel.SaLabel)
- labelCond := ``
- labelPars := make([]interface{}, 0)
- labelFields := []string{saModel.SaLabelColumns.SaLabelId, saModel.SaLabelColumns.LabelName}
- labelItems, e := labelOB.GetItemsByCondition(labelCond, labelPars, labelFields, "")
- if e != nil {
- err = fmt.Errorf("获取标签列表失败, Err: %s", e.Error())
- return
- }
- for _, l := range labelItems {
- labelMap[l.SaLabelId] = l.LabelName
- }
- // 表头信息, 按照docIds的顺序不然会乱
- for _, d := range docIds {
- dv := docMap[d]
- if dv != nil {
- resp.TitleList = append(resp.TitleList, dv.Title)
- //resp.ThemeList = append(resp.ThemeList, dv.Theme)
- }
- }
- // 标签列表
- respLabel := make([]*saModel.SaCompareSaveRespLabel, 0)
- for _, l := range labelIds {
- v := new(saModel.SaCompareSaveRespLabel)
- v.SaLabelId = l
- v.LabelName = labelMap[l]
- // 文档列表
- docList := make([]*saModel.SaCompareSaveRespDoc, 0)
- for _, d := range docIds {
- vd := new(saModel.SaCompareSaveRespDoc)
- vd.SaDocId = d
- dv := docMap[d]
- if dv != nil {
- vd.Title = dv.Title
- }
- secList := make([]*saModel.SaCompareSaveRespSection, 0)
- // 段落列表
- for _, s := range secIds {
- k := fmt.Sprintf("%d-%d-%d", l, d, s)
- if contentMap[k] == 0 {
- continue
- }
- vs := new(saModel.SaCompareSaveRespSection)
- vs.SaDocSectionId = s
- vs.Content = secMap[s]
- secList = append(secList, vs)
- }
- vd.SectionList = secList
- docList = append(docList, vd)
- }
- v.DocList = docList
- respLabel = append(respLabel, v)
- }
- resp.LabelList = respLabel
- return
- }
- // GetSaCompareDetailByDocIds 根据文档IDs获取比对详情信息
- // compareId大于0: docIds为比对关联的文档IDs, 段落标签展示当前标签、历史标签、Ta的标签
- // compareId等于0: docIds为选择的文档IDs, 段落标签展示历史标签、Ta的标签
- func GetSaCompareDetailByDocIds(docIds []int, compareId, sysAdminId int) (detail *saModel.SaCompareDetail, err error) {
- if len(docIds) == 0 {
- return
- }
- // 获取文档信息
- docOB := new(saModel.SaDoc)
- docCond := fmt.Sprintf(` AND %s IN (%s)`, saModel.SaDocColumns.SaDocId, utils.GetOrmInReplace(len(docIds)))
- docPars := make([]interface{}, 0)
- docPars = append(docPars, docIds)
- docs, e := docOB.GetItemsByCondition(docCond, docPars, []string{}, "")
- if e != nil {
- err = fmt.Errorf("获取文档信息失败, Err: %s", e.Error())
- return
- }
- // 获取文档段落信息
- secOB := new(saModel.SaDocSection)
- secCond := fmt.Sprintf(` AND %s IN (%s)`, saModel.SaDocSectionColumns.DocId, utils.GetOrmInReplace(len(docIds)))
- secPars := make([]interface{}, 0)
- secPars = append(secPars, docIds)
- secsItems, e := secOB.GetItemsByCondition(secCond, secPars, []string{}, "doc_id ASC, sort ASC")
- secsMap := make(map[int][]*saModel.SaDocSection)
- for _, s := range secsItems {
- if secsMap[s.DocId] == nil {
- secsMap[s.DocId] = make([]*saModel.SaDocSection, 0)
- }
- secsMap[s.DocId] = append(secsMap[s.DocId], s)
- }
- // 获取文档打的标签
- compLabs := make([]*saModel.SaCompareLabel, 0)
- compLabOB := new(saModel.SaCompareLabel)
- compLabCond := fmt.Sprintf(` AND %s IN (%s)`, saModel.SaCompareLabelColumns.DocId, utils.GetOrmInReplace(len(docIds)))
- compLabPars := make([]interface{}, 0)
- compLabPars = append(compLabPars, docIds)
- compLabQuery, e := compLabOB.GetItemsByCondition(compLabCond, compLabPars, []string{}, "")
- if e != nil {
- err = fmt.Errorf("获取比对标签失败, Err: %s", e.Error())
- return
- }
- compLabs = compLabQuery
- // 历史搜索关键词
- keywordsOB := new(saModel.SaCompareSearchKeyword)
- keywordsCond := fmt.Sprintf(` AND %s = ?`, saModel.SaCompareSearchKeywordColumns.CompareId)
- keywordsPars := make([]interface{}, 0)
- keywordsPars = append(keywordsPars, compareId)
- keywordsItems, e := keywordsOB.GetItemsByCondition(keywordsCond, keywordsPars, []string{}, "")
- if e != nil {
- err = fmt.Errorf("获取历史搜索关键词失败, Err: %s", e.Error())
- return
- }
- keywords := make([]string, 0)
- for _, k := range keywordsItems {
- keywords = append(keywords, k.Keyword)
- }
- // 段落标签Map
- secLabelMap, isMineMap := formatCompareLabelStatusGroupSection(compLabs, compareId, sysAdminId)
- partSecMap := make(map[int][]*saModel.SaCompareLabel)
- partSecExistMap := make(map[string]bool)
- // 头部标签列表-包含多个文档中引用的所有标签并进行去重
- tabLabelIds := make([]int, 0)
- tabLabels := make([]*saModel.SaCompareDetailHeadLabel, 0)
- for _, l := range compLabs {
- if !utils.InArrayByInt(tabLabelIds, l.LabelId) {
- t := new(saModel.SaCompareDetailHeadLabel)
- t.LabelId = l.LabelId
- t.LabelName = l.LabelName
- t.IsMine = isMineMap[l.LabelId]
- tabLabelIds = append(tabLabelIds, l.LabelId)
- tabLabels = append(tabLabels, t)
- }
- // 文档片段Map
- if l.IsPart != 1 {
- continue
- }
- if partSecMap[l.DocId] == nil {
- partSecMap[l.DocId] = make([]*saModel.SaCompareLabel, 0)
- }
- ek := fmt.Sprintf("%d-%s", l.SectionId, utils.MD5(l.Content))
- if partSecExistMap[ek] {
- continue
- }
- partSecMap[l.DocId] = append(partSecMap[l.DocId], l)
- }
- // 详情
- detail = new(saModel.SaCompareDetail)
- docList := make([]*saModel.SaCompareDetailDoc, 0)
- for _, d := range docs {
- dv := new(saModel.SaCompareDetailDoc)
- dv.DocId = d.SaDocId
- dv.Title = d.Title
- dv.Theme = d.Theme
- dv.ClassifyName = d.ClassifyName
- // 整段
- secList := make([]*saModel.SaCompareDetailSection, 0)
- secs := secsMap[d.SaDocId]
- if secs != nil {
- for _, s := range secs {
- sv := new(saModel.SaCompareDetailSection)
- sv.SectionId = s.SaDocSectionId
- sv.Content = html.UnescapeString(s.Content)
- sv.Sort = s.Sort
- sv.LabelList = secLabelMap[fmt.Sprintf("%d-%s", s.SaDocSectionId, utils.MD5(``))]
- secList = append(secList, sv)
- }
- }
- // 片段
- parts := partSecMap[d.SaDocId]
- if parts != nil {
- for _, p := range parts {
- pv := new(saModel.SaCompareDetailSection)
- pv.SectionId = p.SectionId
- pv.Content = html.UnescapeString(p.Content)
- pv.IsPart = 1
- pv.StartIndex = p.StartIndex
- pv.EndIndex = p.EndIndex
- pv.LabelList = secLabelMap[fmt.Sprintf("%d-%s", p.SectionId, utils.MD5(p.Content))]
- fmt.Println("kkk", fmt.Sprintf("%d-%s", p.SectionId, utils.MD5(p.Content)))
- secList = append(secList, pv)
- }
- }
- dv.SectionList = secList
- docList = append(docList, dv)
- }
- detail.HeadLabel = tabLabels
- detail.DocList = docList
- detail.KeywordsList = keywords
- return
- }
- // formatCompareLabelStatusGroupSection 根据段落格式化段落标签的状态
- func formatCompareLabelStatusGroupSection(compLabels []*saModel.SaCompareLabel, compareId, sysAdminId int) (labelMap map[string][]*saModel.SaCompareDetailFormatLabel, isMineMap map[int]int) {
- labelMap = make(map[string][]*saModel.SaCompareDetailFormatLabel)
- repeatMap := make(map[string][]int)
- thisMap := make(map[string]int)
- historyMap := make(map[string]int)
- otherMap := make(map[string]int)
- isMineMap = make(map[int]int) // 用于判断标签是否自己曾经使用或者当前使用过
- for _, l := range compLabels {
- // 判断段落标签的三种状态
- m := utils.MD5(l.Content)
- k := fmt.Sprintf("%d-%s-%d", l.SectionId, m, l.LabelId)
- if l.CompareId == compareId {
- thisMap[k] = 1
- if l.SysAdminId == sysAdminId {
- isMineMap[l.LabelId] = 1
- }
- }
- if l.CompareId != compareId && l.SysAdminId == sysAdminId {
- historyMap[k] = 1
- isMineMap[l.LabelId] = 1
- }
- if l.SysAdminId != sysAdminId {
- otherMap[k] = 1
- }
- k2 := fmt.Sprintf("%d-%s", l.SectionId, m)
- // 判断每段落内的标签是否重复添加
- if repeatMap[k2] == nil {
- repeatMap[k2] = make([]int, 0)
- }
- if utils.InArrayByInt(repeatMap[k2], l.LabelId) {
- continue
- }
- repeatMap[k2] = append(repeatMap[k2], l.LabelId)
- // 初始化段落标签
- if labelMap[k2] == nil {
- labelMap[k2] = make([]*saModel.SaCompareDetailFormatLabel, 0)
- }
- labelMap[k2] = append(labelMap[k2], &saModel.SaCompareDetailFormatLabel{
- LabelId: l.LabelId,
- LabelName: l.LabelName,
- })
- }
- for s, l := range labelMap {
- for _, v := range l {
- k := fmt.Sprintf("%s-%d", s, v.LabelId)
- v.IsThis = thisMap[k]
- v.IsHistory = historyMap[k]
- v.IsOther = otherMap[k]
- }
- }
- return
- }
- // HandleElasticSaDocAndSection Elastic-新增/编辑文档和段落
- func HandleElasticSaDocAndSection(saDoc *saModel.SaDoc, sections []*saModel.SaDocSection, delIds []int) (err error) {
- defer func() {
- if err != nil {
- alarm_msg.SendAlarmMsg(fmt.Sprintf("Elastic-语义分析文档, Err: %s", err.Error()), 2)
- }
- }()
- indexName := utils.EsSemanticAnalysisDocIndexName
- content := ``
- // 段落
- items := make([]*saModel.ElasticSaDoc, 0)
- for _, s := range sections {
- h := html.UnescapeString(s.Content)
- content += h
- items = append(items, &saModel.ElasticSaDoc{
- SaDocId: s.DocId,
- SaDocSectionId: s.SaDocSectionId,
- ClassifyId: saDoc.ClassifyId,
- ClassifyName: saDoc.ClassifyName,
- Title: saDoc.Title,
- Theme: saDoc.Theme,
- BodyContent: h,
- Author: saDoc.SysAdminName,
- CoverImg: saDoc.CoverImg,
- CreateTime: saDoc.CreateTime.Format(utils.FormatDateTime),
- })
- }
- // 文档
- docId := fmt.Sprintf("%d-0", saDoc.SaDocId)
- item := &saModel.ElasticSaDoc{
- SaDocId: saDoc.SaDocId,
- SaDocSectionId: 0,
- ClassifyId: saDoc.ClassifyId,
- ClassifyName: saDoc.ClassifyName,
- Title: saDoc.Title,
- Theme: saDoc.Theme,
- BodyContent: content,
- Author: saDoc.SysAdminName,
- CoverImg: saDoc.CoverImg,
- CreateTime: saDoc.CreateTime.Format(utils.FormatDateTime),
- }
- // 新增/更新
- if e := EsAddOrEditSaDoc(indexName, docId, item); e != nil {
- err = fmt.Errorf("新增/更新ES语义分析文档失败, Err: %s", e.Error())
- return
- }
- for _, v := range items {
- docId = fmt.Sprintf("%d-%d", v.SaDocId, v.SaDocSectionId)
- if e := EsAddOrEditSaDoc(indexName, docId, v); e != nil {
- err = fmt.Errorf("新增/更新ES语义分析文档段落失败, Err: %s", e.Error())
- return
- }
- }
- // 删除段落
- if len(delIds) > 0 {
- for _, d := range delIds {
- docId = fmt.Sprintf("%d-%d", saDoc.SaDocId, d)
- if e := EsDeleteData(indexName, docId); e != nil && !strings.Contains(e.Error(), "404") {
- err = fmt.Errorf("删除ES语义分析文档段落失败, Err: %s", e.Error())
- return
- }
- }
- }
- return
- }
- // DeleteElasticSaDocAndSection Elastic-删除文档和段落
- func DeleteElasticSaDocAndSection(saDocId int, secIds []int) (err error) {
- defer func() {
- if err != nil {
- alarm_msg.SendAlarmMsg(fmt.Sprintf("Elastic-语义分析文档, Err: %s", err.Error()), 2)
- }
- }()
- indexName := utils.EsSemanticAnalysisDocIndexName
- docId := fmt.Sprintf("%d-0", saDocId)
- if e := EsDeleteData(indexName, docId); e != nil && !strings.Contains(e.Error(), "404") {
- err = fmt.Errorf("删除ES语义分析文档失败, Err: %s", e.Error())
- return
- }
- if len(secIds) > 0 {
- for _, d := range secIds {
- docId = fmt.Sprintf("%d-%d", saDocId, d)
- if e := EsDeleteData(indexName, docId); e != nil && !strings.Contains(e.Error(), "404") {
- err = fmt.Errorf("删除ES语义分析文档段落失败, Err: %s", e.Error())
- return
- }
- }
- }
- return
- }
- // FormatCompareLabels2TableData 格式化比对标签为表格数据
- func FormatCompareLabels2TableData(compareLabels []*saModel.SaCompareLabelItem) (resp *saModel.SaCompareSaveResp, err error) {
- resp = new(saModel.SaCompareSaveResp)
- resp.LabelList = make([]*saModel.SaCompareSaveRespLabel, 0)
- // 取出文档作为X轴, 标签作为Y轴
- labelMap := make(map[int]*saModel.SaCompareLabelItem)
- docMap := make(map[int]*saModel.SaCompareLabelItem)
- secMap := make(map[string][]*saModel.SaCompareLabelItem)
- for _, v := range compareLabels {
- // 标签-Y轴
- if labelMap[v.LabelId] == nil {
- labelMap[v.LabelId] = v
- resp.LabelList = append(resp.LabelList, &saModel.SaCompareSaveRespLabel{
- SaLabelId: v.LabelId,
- LabelName: v.LabelName,
- DocList: make([]*saModel.SaCompareSaveRespDoc, 0),
- })
- }
- // 文档-X轴
- if docMap[v.DocId] == nil {
- docMap[v.DocId] = v
- resp.TitleList = append(resp.TitleList, v.Title)
- }
- // 标签ID-文档ID作为key写入map, 后续段落匹配
- k := fmt.Sprintf("%d-%d", v.LabelId, v.DocId)
- if secMap[k] == nil {
- secMap[k] = make([]*saModel.SaCompareLabelItem, 0)
- }
- secMap[k] = append(secMap[k], v)
- }
- // 填充标签数据
- secExistMap := make(map[string]bool)
- for _, l := range resp.LabelList {
- docs := make([]*saModel.SaCompareSaveRespDoc, 0)
- for _, d := range docMap {
- dv := new(saModel.SaCompareSaveRespDoc)
- dv.SaDocId = d.DocId
- dv.Title = d.Title
- // 文档段落
- k := fmt.Sprintf("%d-%d", l.SaLabelId, d.DocId)
- secs := make([]*saModel.SaCompareSaveRespSection, 0)
- secList := secMap[k]
- if secList != nil && len(secList) > 0 {
- for _, s := range secList {
- sv := new(saModel.SaCompareSaveRespSection)
- sv.SaDocSectionId = s.SectionId
- content := html.UnescapeString(s.SectionContent)
- if s.CompareContent != "" {
- sv.IsPart = 1
- content = html.UnescapeString(s.CompareContent)
- }
- sv.Content = content
- // 同标签同文档同段落中的整段/片段去重
- ek := fmt.Sprintf("%d-%d-%d-%s", l.SaLabelId, d.DocId, s.SectionId, utils.MD5(content))
- fmt.Println(ek)
- if secExistMap[ek] {
- fmt.Println("跳过", ek)
- continue
- }
- secExistMap[ek] = true
- secs = append(secs, sv)
- }
- }
- dv.SectionList = secs
- docs = append(docs, dv)
- }
- // 标签对应的文档列表排序, 与resp.TitleList排序保持一致
- sort.Slice(docs, func(i, j int) bool {
- return docs[j].SaDocId > docs[i].SaDocId
- })
- l.DocList = docs
- }
- return
- }
|