elastic.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525
  1. package services
  2. import (
  3. "context"
  4. "encoding/json"
  5. "fmt"
  6. "github.com/olivere/elastic/v7"
  7. "hongze/hongze_clpt/models"
  8. "hongze/hongze_clpt/utils"
  9. "strconv"
  10. "strings"
  11. )
  12. //func NewClient() (client *elastic.Client, err error) {
  13. // //errorlog := log.New(os.Stdout, "APP", log.LstdFlags)
  14. // //file := ""
  15. // //if utils.RunMode == "release" {
  16. // // //file = `/data/rdlucklog/hongze_cygx/eslog.log`
  17. // // file = `./rdlucklog/eslog.log`
  18. // //} else {
  19. // // file = `./rdlucklog/eslog.log`
  20. // //}
  21. // //logFile, _ := os.OpenFile(file, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0766)
  22. // //client, err = elastic.NewClient(
  23. // // elastic.SetURL(ES_URL),
  24. // // elastic.SetBasicAuth(ES_USERNAME, ES_PASSWORD),
  25. // // elastic.SetTraceLog(log.New(logFile, "ES-TRACE: ", 0)),
  26. // // elastic.SetSniff(false), elastic.SetErrorLog(errorlog))
  27. // client, err = elastic.NewClient(
  28. // elastic.SetURL(ES_URL),
  29. // elastic.SetBasicAuth(ES_USERNAME, ES_PASSWORD),
  30. // elastic.SetSniff(false))
  31. // return
  32. //}
  33. func RemoveDuplicatesAndEmpty(a []string) (ret []string) {
  34. a_len := len(a)
  35. for i := 0; i < a_len; i++ {
  36. if (i > 0 && a[i-1] == a[i]) || len(a[i]) == 0 {
  37. continue
  38. }
  39. ret = append(ret, a[i])
  40. }
  41. return
  42. }
  43. func GetArrSum(intArr []int) (sum int) {
  44. for _, val := range intArr {
  45. //累计求和
  46. sum += val
  47. }
  48. return
  49. }
  50. func EsMultiMatchFunctionScoreQuerySort(indexName, keyWord string, startSize, pageSize, userId int, orderColumn string) (result []*models.SearchItem, total int64, err error) {
  51. client := utils.Client
  52. keyWordArr, err := GetIndustryMapNameSliceV3(keyWord)
  53. keyWordArr = RemoveDuplicatesAndEmpty(keyWordArr)
  54. //artidArr := make([]elastic.Query, 0)
  55. //matchArr := make([]elastic.Query, 0)
  56. n := 0
  57. keyWordLen := len(keyWordArr)
  58. if keyWordLen <= 0 {
  59. keyWordArr = append(keyWordArr, keyWord)
  60. keyWordLen = len(keyWordArr)
  61. }
  62. // @Param OrderColumn query int true "排序字段 ,Comprehensive综合 ,Matching匹配度 ,PublishDate 发布时间 "
  63. utils.FileLog.Info("SearchKeyWord:%s, userId:%s", keyWordArr, strconv.Itoa(userId))
  64. //keyWordWeight := GetWeight(keyWordLen)
  65. for _, v := range keyWordArr {
  66. if v != "" {
  67. matchArr := make([]elastic.Query, 0)
  68. boolquery := elastic.NewBoolQuery()
  69. bodyFunctionQuery := elastic.NewFunctionScoreQuery()
  70. bodyFunctionQuery2 := elastic.NewFunctionScoreQuery()
  71. bodyFunctionQuery3 := elastic.NewFunctionScoreQuery()
  72. //multiMatch := elastic.NewMultiMatchQuery(v, "Title", "BodyText").Analyzer("ik_smart")
  73. multiMatch := elastic.NewMultiMatchQuery(v, "Title").Analyzer("ik_smart").Boost(100)
  74. bodyFunctionQuery.Query(multiMatch)
  75. matchArr = append(matchArr, bodyFunctionQuery)
  76. multiMatch = elastic.NewMultiMatchQuery(v, "BodyText").Analyzer("ik_smart").Boost(1)
  77. bodyFunctionQuery2.Query(multiMatch)
  78. matchArr = append(matchArr, bodyFunctionQuery2)
  79. //multiMatch = elastic.NewMultiMatchQuery(1, "IsSummary")
  80. bodyFunctionQuery3.Query(multiMatch)
  81. matchArr = append(matchArr, bodyFunctionQuery3)
  82. boolquery.Should(matchArr...)
  83. //multiMatch = elastic.NewMultiMatchQuery(v, "BodyText").Analyzer("ik_smart")
  84. //bodyFunctionQuery.Query(multiMatch)
  85. //matchArr = append(matchArr, bodyFunctionQuery)
  86. //boolquery.Should(matchArr...)
  87. highlight := elastic.NewHighlight()
  88. highlight = highlight.PreTags("<font color='red'>").PostTags("</font>")
  89. highlight = highlight.Fields(elastic.NewHighlighterField("Title"), elastic.NewHighlighterField("BodyText"))
  90. request := client.Search(indexName).Highlight(highlight).Sort("PublishDate", false).From(0).Size(pageSize).Query(boolquery)
  91. if orderColumn == "Matching" {
  92. request = client.Search(indexName).Highlight(highlight).From(0).Size(pageSize).Query(boolquery)
  93. }
  94. searchByMatch, err := request.Do(context.Background())
  95. if err != nil {
  96. return nil, 0, err
  97. }
  98. if searchByMatch != nil {
  99. if searchByMatch.Hits != nil {
  100. for _, v := range searchByMatch.Hits.Hits {
  101. var isAppend bool
  102. articleJson, err := v.Source.MarshalJSON()
  103. if err != nil {
  104. return nil, 0, err
  105. }
  106. article := new(models.CygxArticleEs)
  107. err = json.Unmarshal(articleJson, &article)
  108. if err != nil {
  109. return nil, 0, err
  110. }
  111. searchItem := new(models.SearchItem)
  112. searchItem.ArticleId, _ = strconv.Atoi(v.Id)
  113. if len(v.Highlight["BodyText"]) > 0 {
  114. searchItem.Body = v.Highlight["BodyText"]
  115. } else {
  116. bodyRune := []rune(article.BodyText)
  117. bodyRuneLen := len(bodyRune)
  118. if bodyRuneLen > 100 {
  119. bodyRuneLen = 100
  120. }
  121. body := string(bodyRune[:bodyRuneLen])
  122. searchItem.Body = []string{body}
  123. }
  124. var title string
  125. if len(v.Highlight["Title"]) > 0 {
  126. title = v.Highlight["Title"][0]
  127. } else {
  128. title = article.Title
  129. }
  130. searchItem.Title = title
  131. searchItem.PublishDate = article.PublishDate
  132. searchItem.ExpertBackground = article.ExpertBackground
  133. searchItem.CategoryId = article.CategoryId
  134. for _, v_result := range result {
  135. if v_result.ArticleId == searchItem.ArticleId {
  136. isAppend = true
  137. }
  138. }
  139. if !isAppend {
  140. result = append(result, searchItem)
  141. }
  142. }
  143. }
  144. //total += searchByMatch.Hits.TotalHits.Value
  145. }
  146. }
  147. n++
  148. }
  149. total = int64(len(result))
  150. return
  151. }
  152. func EsMultiMatchFunctionScoreQueryTimeSort(indexName, keyWord string, startSize, pageSize, userId int) (result []*models.SearchItem, total int64, err error) {
  153. client := utils.Client
  154. keyWordArr, err := GetIndustryMapNameSliceV2(keyWord)
  155. keyWordArr = RemoveDuplicatesAndEmpty(keyWordArr)
  156. boolquery := elastic.NewBoolQuery()
  157. matchArr := make([]elastic.Query, 0)
  158. //matchArr2 := make([]elastic.Query, 0)
  159. n := 0
  160. keyWordLen := len(keyWordArr)
  161. if keyWordLen <= 0 {
  162. keyWordArr = append(keyWordArr, keyWord)
  163. keyWordLen = len(keyWordArr)
  164. }
  165. utils.FileLog.Info("SearchKeyWord:%s, userId:%s", keyWordArr, strconv.Itoa(userId))
  166. for _, v := range keyWordArr {
  167. if v != "" {
  168. multiMatch := elastic.NewMultiMatchQuery(v, "Title", "BodyText")
  169. bodyFunctionQuery := elastic.NewFunctionScoreQuery()
  170. bodyFunctionQuery.Query(multiMatch)
  171. matchArr = append(matchArr, bodyFunctionQuery)
  172. }
  173. n++
  174. }
  175. boolquery.Should(matchArr...)
  176. highlight := elastic.NewHighlight()
  177. highlight = highlight.Fields(elastic.NewHighlighterField("Title"), elastic.NewHighlighterField("BodyText"))
  178. highlight = highlight.PreTags("<font color='red'>").PostTags("</font>")
  179. request := client.Search(indexName).Highlight(highlight).Sort("PublishDate", false).Size(pageSize).Query(boolquery)
  180. searchByMatch, err := request.Do(context.Background())
  181. if searchByMatch != nil {
  182. matchResult, _ := json.Marshal(searchByMatch)
  183. utils.FileLog.Info("%s", string(matchResult))
  184. fmt.Println(len(searchByMatch.Hits.Hits))
  185. if searchByMatch.Hits != nil {
  186. for _, v := range searchByMatch.Hits.Hits {
  187. articleJson, err := v.Source.MarshalJSON()
  188. utils.FileLog.Info("%s", string(articleJson))
  189. if err != nil {
  190. return nil, 0, err
  191. }
  192. article := new(models.CygxArticleEs)
  193. err = json.Unmarshal(articleJson, &article)
  194. if err != nil {
  195. return nil, 0, err
  196. }
  197. searchItem := new(models.SearchItem)
  198. searchItem.ArticleId, _ = strconv.Atoi(v.Id)
  199. if len(v.Highlight["BodyText"]) > 0 {
  200. searchItem.Body = v.Highlight["BodyText"]
  201. } else {
  202. bodyRune := []rune(article.BodyText)
  203. bodyRuneLen := len(bodyRune)
  204. if bodyRuneLen > 100 {
  205. bodyRuneLen = 100
  206. }
  207. body := string(bodyRune[:bodyRuneLen])
  208. searchItem.Body = []string{body}
  209. }
  210. var title string
  211. if len(v.Highlight["Title"]) > 0 {
  212. title = v.Highlight["Title"][0]
  213. } else {
  214. title = article.Title
  215. }
  216. searchItem.Title = title
  217. searchItem.PublishDate = article.PublishDate
  218. searchItem.ExpertBackground = article.ExpertBackground
  219. searchItem.CategoryId = article.CategoryId
  220. result = append(result, searchItem)
  221. }
  222. }
  223. total = searchByMatch.Hits.TotalHits.Value
  224. }
  225. return
  226. }
  227. func EsArticleSearch(keyWord string, startSize, pageSize int, orderColumn string, ikType int) (result []*models.SearchItem, total int64, err error) {
  228. indexName := utils.IndexName
  229. client := utils.Client
  230. keyWordArr, err := GetIndustryMapNameSliceV3(keyWord)
  231. keyWordArr = RemoveDuplicatesAndEmpty(keyWordArr)
  232. keyWordLen := len(keyWordArr)
  233. if keyWordLen <= 0 {
  234. keyWordArr = append(keyWordArr, keyWord)
  235. keyWordLen = len(keyWordArr)
  236. }
  237. //Es 的高级查询有 自定义排序 文档一时半会儿撸不懂,先做多次查询手动过滤 2023.2.2
  238. //ikType 查询方式 ,0:查所有 、 1:查询键入词 、 2:查询除了查询键入词之外的联想词
  239. mustMap := make([]interface{}, 0)
  240. shouldMap := make(map[string]interface{}, 0)
  241. shouldMapquery := make([]interface{}, 0)
  242. mustNotMap := make([]interface{}, 0)
  243. shouldNotMap := make(map[string]interface{}, 0)
  244. shouldNotMapquery := make([]interface{}, 0)
  245. // @Param OrderColumn query int true "排序字段 ,Comprehensive综合 ,Matching匹配度 ,PublishDate 发布时间 "
  246. //keyWordWeight := GetWeight(keyWordLen)
  247. var boost int
  248. //lenkeyWordArr := len(keyWordArr)
  249. for k, v := range keyWordArr {
  250. if k == 0 {
  251. boost = 2 * 1000
  252. } else {
  253. boost = 1
  254. }
  255. //如果是 2:查询除了查询键入词之外的联想词
  256. if k == 0 && ikType == 2 {
  257. if v != "" {
  258. shouldNotMapquery = append(shouldNotMapquery, map[string]interface{}{
  259. "function_score": map[string]interface{}{
  260. "query": map[string]interface{}{
  261. "multi_match": map[string]interface{}{
  262. //"boost": (lenkeyWordArr - k) * boost, //给查询的值赋予权重
  263. "boost": boost, //给查询的值赋予权重
  264. "fields": []interface{}{"Title"},
  265. "query": v,
  266. },
  267. },
  268. },
  269. })
  270. shouldNotMapquery = append(shouldNotMapquery, map[string]interface{}{
  271. "function_score": map[string]interface{}{
  272. "query": map[string]interface{}{
  273. "multi_match": map[string]interface{}{
  274. "boost": boost, //给查询的值赋予权重
  275. "fields": []interface{}{"Abstract"},
  276. "query": v,
  277. },
  278. },
  279. },
  280. })
  281. shouldNotMapquery = append(shouldNotMapquery, map[string]interface{}{
  282. "function_score": map[string]interface{}{
  283. "query": map[string]interface{}{
  284. "multi_match": map[string]interface{}{
  285. "boost": boost, //给查询的值赋予权重
  286. "fields": []interface{}{"Annotation"},
  287. "query": v,
  288. },
  289. },
  290. },
  291. })
  292. shouldNotMapquery = append(shouldNotMapquery, map[string]interface{}{
  293. "function_score": map[string]interface{}{
  294. "query": map[string]interface{}{
  295. "multi_match": map[string]interface{}{
  296. //"boost": (lenkeyWordArr-k)*boost - 1, //给查询的值赋予权重
  297. "boost": boost, //给查询的值赋予权重
  298. "fields": []interface{}{"BodyText"},
  299. "query": v,
  300. },
  301. },
  302. },
  303. })
  304. }
  305. continue
  306. }
  307. //如果是 1:查询键入词
  308. if k > 0 && ikType == 1 {
  309. continue
  310. }
  311. if v != "" {
  312. shouldMapquery = append(shouldMapquery, map[string]interface{}{
  313. "function_score": map[string]interface{}{
  314. "query": map[string]interface{}{
  315. "multi_match": map[string]interface{}{
  316. //"boost": (lenkeyWordArr - k) * boost, //给查询的值赋予权重
  317. "boost": boost, //给查询的值赋予权重
  318. "fields": []interface{}{"Title"},
  319. "query": v,
  320. },
  321. },
  322. },
  323. })
  324. shouldMapquery = append(shouldMapquery, map[string]interface{}{
  325. "function_score": map[string]interface{}{
  326. "query": map[string]interface{}{
  327. "multi_match": map[string]interface{}{
  328. "boost": boost, //给查询的值赋予权重
  329. "fields": []interface{}{"Abstract"},
  330. "query": v,
  331. },
  332. },
  333. },
  334. })
  335. shouldMapquery = append(shouldMapquery, map[string]interface{}{
  336. "function_score": map[string]interface{}{
  337. "query": map[string]interface{}{
  338. "multi_match": map[string]interface{}{
  339. "boost": boost, //给查询的值赋予权重
  340. "fields": []interface{}{"Annotation"},
  341. "query": v,
  342. },
  343. },
  344. },
  345. })
  346. shouldMapquery = append(shouldMapquery, map[string]interface{}{
  347. "function_score": map[string]interface{}{
  348. "query": map[string]interface{}{
  349. "multi_match": map[string]interface{}{
  350. //"boost": (lenkeyWordArr-k)*boost - 1, //给查询的值赋予权重
  351. "boost": boost, //给查询的值赋予权重
  352. "fields": []interface{}{"BodyText"},
  353. "query": v,
  354. },
  355. },
  356. },
  357. })
  358. }
  359. }
  360. shouldMap = map[string]interface{}{
  361. "should": shouldMapquery,
  362. }
  363. shouldNotMap = map[string]interface{}{
  364. "should": shouldNotMapquery,
  365. }
  366. //排序
  367. sortMap := make([]interface{}, 0)
  368. //时间
  369. sortMap = append(sortMap, map[string]interface{}{
  370. "PublishDate": map[string]interface{}{
  371. "order": "desc",
  372. },
  373. })
  374. //sortMap = append(sortMap, map[string]interface{}{
  375. // "_score": map[string]interface{}{
  376. // "order": "desc",
  377. // },
  378. //})
  379. //高亮
  380. highlightMap := make(map[string]interface{}, 0)
  381. highlightMap = map[string]interface{}{
  382. "fields": map[string]interface{}{
  383. "BodyText": map[string]interface{}{},
  384. "Title": map[string]interface{}{},
  385. "Abstract": map[string]interface{}{},
  386. "Annotation": map[string]interface{}{},
  387. },
  388. //样式 红色
  389. "post_tags": []interface{}{"</font>"},
  390. "pre_tags": []interface{}{"<font color='red'>"},
  391. }
  392. mustMap = append(mustMap, map[string]interface{}{
  393. "bool": shouldMap,
  394. })
  395. mustNotMap = append(mustNotMap, map[string]interface{}{
  396. "bool": shouldNotMap,
  397. })
  398. queryMap := map[string]interface{}{
  399. "query": map[string]interface{}{
  400. "bool": map[string]interface{}{
  401. "must": mustMap,
  402. },
  403. },
  404. }
  405. //把第一次键入词的筛选条件过滤掉
  406. if ikType == 2 {
  407. queryMap = map[string]interface{}{
  408. "query": map[string]interface{}{
  409. "bool": map[string]interface{}{
  410. "must": mustMap,
  411. "must_not": mustNotMap,
  412. },
  413. },
  414. }
  415. }
  416. if orderColumn == "Matching" {
  417. queryMap["sort"] = sortMap
  418. }
  419. queryMap["from"] = startSize
  420. queryMap["size"] = pageSize
  421. queryMap["highlight"] = highlightMap
  422. jsonBytes, _ := json.Marshal(queryMap)
  423. fmt.Println(string(jsonBytes))
  424. //utils.FileLog.Info(string(jsonBytes))
  425. request := client.Search(indexName).Source(queryMap) // sets the JSON request
  426. searchByMatch, err := request.Do(context.Background())
  427. if searchByMatch != nil {
  428. if searchByMatch.Hits != nil {
  429. for _, v := range searchByMatch.Hits.Hits {
  430. var isAppend bool
  431. articleJson, err := v.Source.MarshalJSON()
  432. if err != nil {
  433. return nil, 0, err
  434. }
  435. article := new(models.CygxArticleEs)
  436. err = json.Unmarshal(articleJson, &article)
  437. if err != nil {
  438. return nil, 0, err
  439. }
  440. searchItem := new(models.SearchItem)
  441. searchItem.ArticleId, _ = strconv.Atoi(v.Id)
  442. if len(v.Highlight["BodyText"]) > 0 {
  443. searchItem.Body = v.Highlight["BodyText"]
  444. } else {
  445. bodyRune := []rune(article.BodyText)
  446. bodyRuneLen := len(bodyRune)
  447. if bodyRuneLen > 100 {
  448. bodyRuneLen = 100
  449. }
  450. body := string(bodyRune[:bodyRuneLen])
  451. searchItem.Body = []string{body}
  452. }
  453. var title string
  454. if len(v.Highlight["Title"]) > 0 {
  455. title = v.Highlight["Title"][0]
  456. } else {
  457. title = article.Title
  458. }
  459. searchItem.Title = title
  460. searchItem.PublishDate = article.PublishDate
  461. searchItem.ExpertBackground = article.ExpertBackground
  462. searchItem.CategoryId = article.CategoryId
  463. for _, v_result := range result {
  464. if v_result.ArticleId == searchItem.ArticleId {
  465. isAppend = true
  466. }
  467. }
  468. if !isAppend {
  469. result = append(result, searchItem)
  470. }
  471. }
  472. }
  473. total = searchByMatch.Hits.TotalHits.Value
  474. }
  475. return
  476. }
  477. // KeyWordArrSqlRegexp 预处理ik联想词的模糊查询语句
  478. func KeyWordArrSqlRegexpAll(a []string) (ret string) {
  479. a_len := len(a)
  480. for i := 0; i < a_len; i++ {
  481. ret += a[i] + "|"
  482. }
  483. ret = strings.TrimRight(ret, "|")
  484. //ret = "'" + ret + "'"
  485. return
  486. }
  487. // KeyWordArrSqlRegexp 预处理ik联想词的模糊查询语句
  488. func KeyWordArrSqlRegexp(a []string) (ret string) {
  489. a_len := len(a)
  490. for i := 0; i < a_len; i++ {
  491. if i == 0 {
  492. continue
  493. }
  494. ret += a[i] + "|"
  495. }
  496. ret = strings.TrimRight(ret, "|")
  497. return
  498. }