package services import ( "fmt" "github.com/PuerkitoBio/goquery" saModel "hongze/hz_eta_api/models/semantic_analysis" "hongze/hz_eta_api/services/alarm_msg" "hongze/hz_eta_api/utils" "html" "sort" "strings" ) // LoadSaDocContent2Section 读取文档内容为段落, 以
标签划分 func LoadSaDocContent2Section(content string) (sections []string, err error) { if content == `` { return } defer func() { if err != nil { fmt.Println(err.Error()) } }() doc, e := goquery.NewDocumentFromReader(strings.NewReader(content)) if e != nil { err = fmt.Errorf("读取html内容失败, Err: %s", e.Error()) return } doc.Find("p").Each(func(i int, s *goquery.Selection) { h, e := s.Html() if e != nil { err = fmt.Errorf("读取html标签失败, Err: %s", e.Error()) return } t := strings.TrimSpace(s.Text()) if t != "" { sections = append(sections, fmt.Sprintf(`
%s
`, h)) } }) return } // GetSaCompareTableData 获取文档比对表格数据 func GetSaCompareTableData(labelIds, docIds, secIds []int, contentMap map[string]int) (resp *saModel.SaCompareSaveResp, err error) { resp = new(saModel.SaCompareSaveResp) // 文档列表 docMap := make(map[int]*saModel.SaDoc) if len(docIds) > 0 { docOB := new(saModel.SaDoc) docCond := fmt.Sprintf(` AND %s IN (%s)`, saModel.SaDocColumns.SaDocId, utils.GetOrmInReplace(len(docIds))) docPars := make([]interface{}, 0) docPars = append(docPars, docIds) docFields := []string{saModel.SaDocColumns.SaDocId, saModel.SaDocColumns.Title, saModel.SaDocColumns.Theme} docItems, e := docOB.GetItemsByCondition(docCond, docPars, docFields, "") if e != nil { err = fmt.Errorf("获取比对文档列表失败, Err: %s", e.Error()) return } for _, d := range docItems { docMap[d.SaDocId] = d } } // 段落map secMap := make(map[int]string) if len(secIds) > 0 { secOB := new(saModel.SaDocSection) secCond := fmt.Sprintf(` AND %s IN (%s)`, saModel.SaDocSectionColumns.SaDocSectionId, utils.GetOrmInReplace(len(secIds))) secPars := make([]interface{}, 0) secPars = append(secPars, secIds) secFields := []string{saModel.SaDocSectionColumns.SaDocSectionId, saModel.SaDocSectionColumns.DocId, saModel.SaDocSectionColumns.Content} secItems, e := secOB.GetItemsByCondition(secCond, secPars, secFields, "") if e != nil { err = fmt.Errorf("获取比对文档列表失败, Err: %s", e.Error()) return } for _, s := range secItems { secMap[s.SaDocSectionId] = html.UnescapeString(s.Content) } } // 标签map labelMap := make(map[int]string) labelOB := new(saModel.SaLabel) labelCond := `` labelPars := make([]interface{}, 0) labelFields := []string{saModel.SaLabelColumns.SaLabelId, saModel.SaLabelColumns.LabelName} labelItems, e := labelOB.GetItemsByCondition(labelCond, labelPars, labelFields, "") if e != nil { err = fmt.Errorf("获取标签列表失败, Err: %s", e.Error()) return } for _, l := range labelItems { labelMap[l.SaLabelId] = l.LabelName } // 表头信息, 按照docIds的顺序不然会乱 for _, d := range docIds { dv := docMap[d] if dv != nil { resp.TitleList = append(resp.TitleList, dv.Title) //resp.ThemeList = append(resp.ThemeList, dv.Theme) } } // 标签列表 respLabel := make([]*saModel.SaCompareSaveRespLabel, 0) for _, l := range labelIds { v := new(saModel.SaCompareSaveRespLabel) v.SaLabelId = l v.LabelName = labelMap[l] // 文档列表 docList := make([]*saModel.SaCompareSaveRespDoc, 0) for _, d := range docIds { vd := new(saModel.SaCompareSaveRespDoc) vd.SaDocId = d dv := docMap[d] if dv != nil { vd.Title = dv.Title } secList := make([]*saModel.SaCompareSaveRespSection, 0) // 段落列表 for _, s := range secIds { k := fmt.Sprintf("%d-%d-%d", l, d, s) if contentMap[k] == 0 { continue } vs := new(saModel.SaCompareSaveRespSection) vs.SaDocSectionId = s vs.Content = secMap[s] secList = append(secList, vs) } vd.SectionList = secList docList = append(docList, vd) } v.DocList = docList respLabel = append(respLabel, v) } resp.LabelList = respLabel return } // GetSaCompareDetailByDocIds 根据文档IDs获取比对详情信息 // compareId大于0: docIds为比对关联的文档IDs, 段落标签展示当前标签、历史标签、Ta的标签 // compareId等于0: docIds为选择的文档IDs, 段落标签展示历史标签、Ta的标签 func GetSaCompareDetailByDocIds(docIds []int, compareId, sysAdminId int) (detail *saModel.SaCompareDetail, err error) { if len(docIds) == 0 { return } // 获取文档信息 docOB := new(saModel.SaDoc) docCond := fmt.Sprintf(` AND %s IN (%s)`, saModel.SaDocColumns.SaDocId, utils.GetOrmInReplace(len(docIds))) docPars := make([]interface{}, 0) docPars = append(docPars, docIds) docs, e := docOB.GetItemsByCondition(docCond, docPars, []string{}, "") if e != nil { err = fmt.Errorf("获取文档信息失败, Err: %s", e.Error()) return } // 获取文档段落信息 secOB := new(saModel.SaDocSection) secCond := fmt.Sprintf(` AND %s IN (%s)`, saModel.SaDocSectionColumns.DocId, utils.GetOrmInReplace(len(docIds))) secPars := make([]interface{}, 0) secPars = append(secPars, docIds) secsItems, e := secOB.GetItemsByCondition(secCond, secPars, []string{}, "doc_id ASC, sort ASC") secsMap := make(map[int][]*saModel.SaDocSection) for _, s := range secsItems { if secsMap[s.DocId] == nil { secsMap[s.DocId] = make([]*saModel.SaDocSection, 0) } secsMap[s.DocId] = append(secsMap[s.DocId], s) } // 获取文档打的标签 compLabs := make([]*saModel.SaCompareLabel, 0) compLabOB := new(saModel.SaCompareLabel) compLabCond := fmt.Sprintf(` AND %s IN (%s)`, saModel.SaCompareLabelColumns.DocId, utils.GetOrmInReplace(len(docIds))) compLabPars := make([]interface{}, 0) compLabPars = append(compLabPars, docIds) compLabQuery, e := compLabOB.GetItemsByCondition(compLabCond, compLabPars, []string{}, "") if e != nil { err = fmt.Errorf("获取比对标签失败, Err: %s", e.Error()) return } compLabs = compLabQuery // 历史搜索关键词 keywordsOB := new(saModel.SaCompareSearchKeyword) keywordsCond := fmt.Sprintf(` AND %s = ?`, saModel.SaCompareSearchKeywordColumns.CompareId) keywordsPars := make([]interface{}, 0) keywordsPars = append(keywordsPars, compareId) keywordsItems, e := keywordsOB.GetItemsByCondition(keywordsCond, keywordsPars, []string{}, "") if e != nil { err = fmt.Errorf("获取历史搜索关键词失败, Err: %s", e.Error()) return } keywords := make([]string, 0) for _, k := range keywordsItems { keywords = append(keywords, k.Keyword) } // 段落标签Map secLabelMap, isMineMap := formatCompareLabelStatusGroupSection(compLabs, compareId, sysAdminId) partSecMap := make(map[int][]*saModel.SaCompareLabel) partSecExistMap := make(map[string]bool) // 头部标签列表-包含多个文档中引用的所有标签并进行去重 tabLabelIds := make([]int, 0) tabLabels := make([]*saModel.SaCompareDetailHeadLabel, 0) for _, l := range compLabs { if !utils.InArrayByInt(tabLabelIds, l.LabelId) { t := new(saModel.SaCompareDetailHeadLabel) t.LabelId = l.LabelId t.LabelName = l.LabelName t.IsMine = isMineMap[l.LabelId] tabLabelIds = append(tabLabelIds, l.LabelId) tabLabels = append(tabLabels, t) } // 文档片段Map if l.IsPart != 1 { continue } if partSecMap[l.DocId] == nil { partSecMap[l.DocId] = make([]*saModel.SaCompareLabel, 0) } ek := fmt.Sprintf("%d-%s", l.SectionId, utils.MD5(l.Content)) if partSecExistMap[ek] { continue } partSecMap[l.DocId] = append(partSecMap[l.DocId], l) } // 详情 detail = new(saModel.SaCompareDetail) docList := make([]*saModel.SaCompareDetailDoc, 0) for _, d := range docs { dv := new(saModel.SaCompareDetailDoc) dv.DocId = d.SaDocId dv.Title = d.Title dv.Theme = d.Theme dv.ClassifyName = d.ClassifyName // 整段 secList := make([]*saModel.SaCompareDetailSection, 0) secs := secsMap[d.SaDocId] if secs != nil { for _, s := range secs { sv := new(saModel.SaCompareDetailSection) sv.SectionId = s.SaDocSectionId sv.Content = html.UnescapeString(s.Content) sv.Sort = s.Sort sv.LabelList = secLabelMap[fmt.Sprintf("%d-%s", s.SaDocSectionId, utils.MD5(``))] secList = append(secList, sv) } } // 片段 parts := partSecMap[d.SaDocId] if parts != nil { for _, p := range parts { pv := new(saModel.SaCompareDetailSection) pv.SectionId = p.SectionId pv.Content = html.UnescapeString(p.Content) pv.IsPart = 1 pv.StartIndex = p.StartIndex pv.EndIndex = p.EndIndex pv.LabelList = secLabelMap[fmt.Sprintf("%d-%s", p.SectionId, utils.MD5(p.Content))] fmt.Println("kkk", fmt.Sprintf("%d-%s", p.SectionId, utils.MD5(p.Content))) secList = append(secList, pv) } } dv.SectionList = secList docList = append(docList, dv) } detail.HeadLabel = tabLabels detail.DocList = docList detail.KeywordsList = keywords return } // formatCompareLabelStatusGroupSection 根据段落格式化段落标签的状态 func formatCompareLabelStatusGroupSection(compLabels []*saModel.SaCompareLabel, compareId, sysAdminId int) (labelMap map[string][]*saModel.SaCompareDetailFormatLabel, isMineMap map[int]int) { labelMap = make(map[string][]*saModel.SaCompareDetailFormatLabel) repeatMap := make(map[string][]int) thisMap := make(map[string]int) historyMap := make(map[string]int) otherMap := make(map[string]int) isMineMap = make(map[int]int) // 用于判断标签是否自己曾经使用或者当前使用过 for _, l := range compLabels { // 判断段落标签的三种状态 m := utils.MD5(l.Content) k := fmt.Sprintf("%d-%s-%d", l.SectionId, m, l.LabelId) if l.CompareId == compareId { thisMap[k] = 1 if l.SysAdminId == sysAdminId { isMineMap[l.LabelId] = 1 } } if l.CompareId != compareId && l.SysAdminId == sysAdminId { historyMap[k] = 1 isMineMap[l.LabelId] = 1 } if l.SysAdminId != sysAdminId { otherMap[k] = 1 } k2 := fmt.Sprintf("%d-%s", l.SectionId, m) // 判断每段落内的标签是否重复添加 if repeatMap[k2] == nil { repeatMap[k2] = make([]int, 0) } if utils.InArrayByInt(repeatMap[k2], l.LabelId) { continue } repeatMap[k2] = append(repeatMap[k2], l.LabelId) // 初始化段落标签 if labelMap[k2] == nil { labelMap[k2] = make([]*saModel.SaCompareDetailFormatLabel, 0) } labelMap[k2] = append(labelMap[k2], &saModel.SaCompareDetailFormatLabel{ LabelId: l.LabelId, LabelName: l.LabelName, }) } for s, l := range labelMap { for _, v := range l { k := fmt.Sprintf("%s-%d", s, v.LabelId) v.IsThis = thisMap[k] v.IsHistory = historyMap[k] v.IsOther = otherMap[k] } } return } // HandleElasticSaDocAndSection Elastic-新增/编辑文档和段落 func HandleElasticSaDocAndSection(saDoc *saModel.SaDoc, sections []*saModel.SaDocSection, delIds []int) (err error) { defer func() { if err != nil { alarm_msg.SendAlarmMsg(fmt.Sprintf("Elastic-语义分析文档, Err: %s", err.Error()), 2) } }() indexName := utils.EsSemanticAnalysisDocIndexName content := `` // 段落 items := make([]*saModel.ElasticSaDoc, 0) for _, s := range sections { h := html.UnescapeString(s.Content) content += h items = append(items, &saModel.ElasticSaDoc{ SaDocId: s.DocId, SaDocSectionId: s.SaDocSectionId, ClassifyId: saDoc.ClassifyId, ClassifyName: saDoc.ClassifyName, Title: saDoc.Title, Theme: saDoc.Theme, BodyContent: h, Author: saDoc.SysAdminName, CoverImg: saDoc.CoverImg, CreateTime: saDoc.CreateTime.Format(utils.FormatDateTime), }) } // 文档 docId := fmt.Sprintf("%d-0", saDoc.SaDocId) item := &saModel.ElasticSaDoc{ SaDocId: saDoc.SaDocId, SaDocSectionId: 0, ClassifyId: saDoc.ClassifyId, ClassifyName: saDoc.ClassifyName, Title: saDoc.Title, Theme: saDoc.Theme, BodyContent: content, Author: saDoc.SysAdminName, CoverImg: saDoc.CoverImg, CreateTime: saDoc.CreateTime.Format(utils.FormatDateTime), } // 新增/更新 if e := EsAddOrEditSaDoc(indexName, docId, item); e != nil { err = fmt.Errorf("新增/更新ES语义分析文档失败, Err: %s", e.Error()) return } for _, v := range items { docId = fmt.Sprintf("%d-%d", v.SaDocId, v.SaDocSectionId) if e := EsAddOrEditSaDoc(indexName, docId, v); e != nil { err = fmt.Errorf("新增/更新ES语义分析文档段落失败, Err: %s", e.Error()) return } } // 删除段落 if len(delIds) > 0 { for _, d := range delIds { docId = fmt.Sprintf("%d-%d", saDoc.SaDocId, d) if e := EsDeleteData(indexName, docId); e != nil && !strings.Contains(e.Error(), "404") { err = fmt.Errorf("删除ES语义分析文档段落失败, Err: %s", e.Error()) return } } } return } // DeleteElasticSaDocAndSection Elastic-删除文档和段落 func DeleteElasticSaDocAndSection(saDocId int, secIds []int) (err error) { defer func() { if err != nil { alarm_msg.SendAlarmMsg(fmt.Sprintf("Elastic-语义分析文档, Err: %s", err.Error()), 2) } }() indexName := utils.EsSemanticAnalysisDocIndexName docId := fmt.Sprintf("%d-0", saDocId) if e := EsDeleteData(indexName, docId); e != nil && !strings.Contains(e.Error(), "404") { err = fmt.Errorf("删除ES语义分析文档失败, Err: %s", e.Error()) return } if len(secIds) > 0 { for _, d := range secIds { docId = fmt.Sprintf("%d-%d", saDocId, d) if e := EsDeleteData(indexName, docId); e != nil && !strings.Contains(e.Error(), "404") { err = fmt.Errorf("删除ES语义分析文档段落失败, Err: %s", e.Error()) return } } } return } // FormatCompareLabels2TableData 格式化比对标签为表格数据 func FormatCompareLabels2TableData(compareLabels []*saModel.SaCompareLabelItem) (resp *saModel.SaCompareSaveResp, err error) { resp = new(saModel.SaCompareSaveResp) resp.LabelList = make([]*saModel.SaCompareSaveRespLabel, 0) // 取出文档作为X轴, 标签作为Y轴 labelMap := make(map[int]*saModel.SaCompareLabelItem) docMap := make(map[int]*saModel.SaCompareLabelItem) secMap := make(map[string][]*saModel.SaCompareLabelItem) for _, v := range compareLabels { // 标签-Y轴 if labelMap[v.LabelId] == nil { labelMap[v.LabelId] = v resp.LabelList = append(resp.LabelList, &saModel.SaCompareSaveRespLabel{ SaLabelId: v.LabelId, LabelName: v.LabelName, DocList: make([]*saModel.SaCompareSaveRespDoc, 0), }) } // 文档-X轴 if docMap[v.DocId] == nil { docMap[v.DocId] = v resp.TitleList = append(resp.TitleList, v.Title) } // 标签ID-文档ID作为key写入map, 后续段落匹配 k := fmt.Sprintf("%d-%d", v.LabelId, v.DocId) if secMap[k] == nil { secMap[k] = make([]*saModel.SaCompareLabelItem, 0) } secMap[k] = append(secMap[k], v) } // 填充标签数据 secExistMap := make(map[string]bool) for _, l := range resp.LabelList { docs := make([]*saModel.SaCompareSaveRespDoc, 0) for _, d := range docMap { dv := new(saModel.SaCompareSaveRespDoc) dv.SaDocId = d.DocId dv.Title = d.Title // 文档段落 k := fmt.Sprintf("%d-%d", l.SaLabelId, d.DocId) secs := make([]*saModel.SaCompareSaveRespSection, 0) secList := secMap[k] if secList != nil && len(secList) > 0 { for _, s := range secList { sv := new(saModel.SaCompareSaveRespSection) sv.SaDocSectionId = s.SectionId content := html.UnescapeString(s.SectionContent) if s.CompareContent != "" { sv.IsPart = 1 content = html.UnescapeString(s.CompareContent) } sv.Content = content // 同标签同文档同段落中的整段/片段去重 ek := fmt.Sprintf("%d-%d-%d-%s", l.SaLabelId, d.DocId, s.SectionId, utils.MD5(content)) fmt.Println(ek) if secExistMap[ek] { fmt.Println("跳过", ek) continue } secExistMap[ek] = true secs = append(secs, sv) } } dv.SectionList = secs docs = append(docs, dv) } // 标签对应的文档列表排序, 与resp.TitleList排序保持一致 sort.Slice(docs, func(i, j int) bool { return docs[j].SaDocId > docs[i].SaDocId }) l.DocList = docs } return }