elasticsearch.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384
  1. package services
  2. import (
  3. "context"
  4. "encoding/json"
  5. "fmt"
  6. "hongze/hongze_cygx/models"
  7. "hongze/hongze_cygx/utils"
  8. "html"
  9. "strconv"
  10. "strings"
  11. "github.com/PuerkitoBio/goquery"
  12. "github.com/olivere/elastic/v7"
  13. "github.com/olivere/elastic/v7/config"
  14. )
  15. const (
  16. ES_URL = "http://es-cn-nif227b580019rgw6.public.elasticsearch.aliyuncs.com:9200" //<1>
  17. ES_USERNAME = "elastic" //<2>
  18. ES_PASSWORD = "hongze@2021" //<3>
  19. //Grafana pwd-> 20521bb9
  20. //Grafana username-> emon
  21. )
  22. func SaveData() {
  23. //fmt.Println("start")
  24. var sniff = false //<4>
  25. cfg := &config.Config{
  26. URL: ES_URL,
  27. Username: ES_USERNAME,
  28. Password: ES_PASSWORD,
  29. }
  30. cfg.Sniff = &sniff
  31. var client, err = elastic.NewClientFromConfig(cfg)
  32. if err != nil {
  33. fmt.Println("NewClientFromConfig Err:" + err.Error())
  34. return
  35. }
  36. var esIndex = "cygx_article"
  37. //var esType = "article"
  38. //
  39. exists, err := client.IndexExists(esIndex).Do(context.Background()) //<5>
  40. if err != nil {
  41. fmt.Println("IndexExists Err:" + err.Error())
  42. return
  43. }
  44. if !exists {
  45. _, err = client.CreateIndex(esIndex).Do(context.Background())
  46. if err != nil {
  47. fmt.Println("CreateIndex Err:" + err.Error())
  48. return
  49. }
  50. }
  51. /*
  52. 3161,3190,3226,3244,3264,3285,3310,3334,3370,3397,3418,3446,3477,3497,3526,3554
  53. */
  54. idStr := `3584,3644`
  55. idArr := strings.Split(idStr, ",")
  56. for _, v := range idArr {
  57. id, _ := strconv.Atoi(v)
  58. item, err := models.GetArticleDetailById(id)
  59. if err != nil {
  60. fmt.Println("GetArticleDetailById Err:" + err.Error())
  61. return
  62. }
  63. content := html.UnescapeString(item.Body)
  64. doc, err := goquery.NewDocumentFromReader(strings.NewReader(content))
  65. if err != nil {
  66. fmt.Println("create doc err:", err.Error())
  67. return
  68. }
  69. bodyText := doc.Text()
  70. item.BodyText = bodyText
  71. //新增
  72. resp, err := client.Index().Index(esIndex).Id(strconv.Itoa(item.ArticleId)).BodyJson(item).Do(context.Background())
  73. if err != nil {
  74. fmt.Println("insert es failed", err.Error())
  75. return
  76. }
  77. fmt.Println(resp.Status)
  78. }
  79. /*
  80. //根据id查询
  81. searchById, err := client.Get().Index(esIndex).Type(esType).Id("3138").Do(context.Background())
  82. if searchById.Found {
  83. body, err := searchById.Source.MarshalJSON()
  84. fmt.Println("body:",string(body))
  85. fmt.Println(err)
  86. //var resultType models.ArticleDetail
  87. //if err := json.Unmarshal(searchById.Source,&resultType); err != nil{
  88. // log.Error(err.Error())
  89. //}
  90. //fmt.Printf("search by id: %#v \n",resultType)
  91. }
  92. */
  93. ////查询index中所有的数据
  94. //var resultType models.CygxArticle
  95. //searchAll,err := client.Search(esIndex).Type(esType).Do(context.Background())
  96. //for _,item := range searchAll.Each(reflect.TypeOf(resultType)) {
  97. // language := item.(models.CygxArticle)
  98. // fmt.Printf("search by index all: %#v \n",language)
  99. //}
  100. //根据检索条件查询
  101. // boolquery := elastic.NewBoolQuery()
  102. // boolquery.Should(elastic.NewMatchQuery("Body", "专家"))
  103. // highlight := elastic.NewHighlight()
  104. // highlight = highlight.Fields(elastic.NewHighlighterField("Body"))
  105. // highlight = highlight.PreTags("<font color='red'>").PostTags("</font>")
  106. // var pageSize int
  107. // pageSize = 20
  108. // searchByMatch, err := client.Search(esIndex).Highlight(highlight).Size(pageSize).Query(boolquery).Do(context.Background())
  109. // var result string
  110. // if searchByMatch.Hits != nil {
  111. // }
  112. // //fmt.Println(string(result))
  113. // utils.FileLog.Info("%s", string(result))
  114. //var resultType models.CygxArticle
  115. //for k,item := range searchByMatch.Each(reflect.TypeOf(resultType)) {
  116. // language := item.(models.CygxArticle)
  117. // fmt.Printf("search by match: %#v \n",language)
  118. //
  119. // fmt.Println(k)
  120. // result,err:=json.Marshal(language)
  121. // fmt.Println(err)
  122. // utils.FileLog.Info("%s",string(result))
  123. //}
  124. // Perform the search request.
  125. //searchByMatch, err := client.Search(esIndex).Type(esType).Query(query).From(1).Size(10).Do(context.Background())
  126. fmt.Println("end")
  127. }
  128. func SearchByKeyWord(keyWord string) (result []*models.SearchItem, err error) {
  129. fmt.Println("keyWord:",keyWord)
  130. pageSize := 20
  131. keyWordArr, err := GetIndustryMapNameSlice(keyWord)
  132. keyWordArr=RemoveDuplicatesAndEmpty(keyWordArr)
  133. fmt.Println(keyWordArr)
  134. fmt.Println(" keyWordArr ")
  135. if err != nil {
  136. go utils.SendEmail(utils.APPNAME+" "+utils.RunMode+"异常提醒:", "GetIndustryMapNameSlice:"+err.Error(), utils.EmailSendToUsers)
  137. }
  138. var sniff = false //<4>
  139. cfg := &config.Config{
  140. URL: ES_URL,
  141. Username: ES_USERNAME,
  142. Password: ES_PASSWORD,
  143. }
  144. cfg.Sniff = &sniff
  145. client, err := elastic.NewClientFromConfig(cfg)
  146. if err != nil {
  147. return
  148. }
  149. var esIndex = "cygx_article"
  150. searchMap := make(map[int]int)
  151. //boolquery := elastic.NewBoolQuery()
  152. //keyWordLen := len(keyWordArr)
  153. //n := keyWordLen
  154. //for _, v := range keyWordArr {
  155. // keyWord = v
  156. // boost := float64(n)
  157. // fmt.Println("keyWord:", keyWord)
  158. // fmt.Println("boost:", boost)
  159. // //matchQueryList = append(matchQueryList, elastic.NewMatchQuery("Title", keyWord).Boost(boost+0.2))
  160. // //matchQueryList = append(matchQueryList, elastic.NewMatchQuery("BodyText", keyWord).Boost(boost+0.1))
  161. // q1 := elastic.NewMatchQuery("Title", keyWord)
  162. // q1 = q1.Boost(boost + 0.2)
  163. // q2 := elastic.NewMatchQuery("BodyText", keyWord)
  164. // q2 = q2.Boost(boost + 0.1)
  165. // boolquery.Should(q1, q2)
  166. // n--
  167. //}
  168. keyWordStr := strings.Join(keyWordArr, ",")
  169. fmt.Println("keyWordStr ",keyWordStr)
  170. queryString := elastic.NewQueryStringQuery(`Title:(` + keyWordStr + `) BodyText:(`+keyWordStr+`)`).Analyzer("ik_smart")
  171. boolqueryJson, err := json.Marshal(queryString)
  172. fmt.Println("err:", err)
  173. fmt.Println("queryString ", string(boolqueryJson))
  174. //boolquery.Must(elastic.NewMatchQuery("Title", keyWord), elastic.NewMatchQuery("BodyText", keyWord))
  175. highlight := elastic.NewHighlight()
  176. highlight = highlight.Fields(elastic.NewHighlighterField("Title"), elastic.NewHighlighterField("BodyText"))
  177. highlight = highlight.PreTags("<font color='red'>").PostTags("</font>")
  178. request := client.Search(esIndex).Highlight(highlight).Size(pageSize).Query(queryString)
  179. requestJson, err := json.Marshal(request)
  180. fmt.Println("err:", err)
  181. fmt.Println("requestJson ", string(requestJson))
  182. searchByMatch, err := request.Do(context.Background())
  183. //searchByMatch, err := client.Search(esIndex).Highlight(highlight).Size(pageSize).Do(context.Background())
  184. if err != nil {
  185. return result, err
  186. }
  187. if searchByMatch.Hits != nil {
  188. for _, v := range searchByMatch.Hits.Hits {
  189. articleJson, err := v.Source.MarshalJSON()
  190. if err != nil {
  191. return nil, err
  192. }
  193. article := new(models.CygxArticle)
  194. err = json.Unmarshal(articleJson, &article)
  195. if err != nil {
  196. return nil, err
  197. }
  198. if _, ok := searchMap[article.ArticleId]; !ok {
  199. searchItem := new(models.SearchItem)
  200. searchItem.ArticleId, _ = strconv.Atoi(v.Id)
  201. searchItem.Body = v.Highlight["BodyText"]
  202. var title string
  203. if len(v.Highlight["Title"]) > 0 {
  204. title = v.Highlight["Title"][0]
  205. } else {
  206. title = article.Title
  207. }
  208. searchItem.Title = title
  209. searchItem.PublishDate = article.PublishDate
  210. result = append(result, searchItem)
  211. searchMap[article.ArticleId] = article.ArticleId
  212. }
  213. }
  214. }
  215. return
  216. }
  217. func SearchByKeyWordBack(keyWord string) (result []*models.SearchItem, err error) {
  218. pageSize := 20
  219. keyWordArr, err := GetIndustryMapNameSlice(keyWord)
  220. if err != nil {
  221. go utils.SendEmail(utils.APPNAME+" "+utils.RunMode+"异常提醒:", "GetIndustryMapNameSlice:"+err.Error(), utils.EmailSendToUsers)
  222. }
  223. var sniff = false //<4>
  224. cfg := &config.Config{
  225. URL: ES_URL,
  226. Username: ES_USERNAME,
  227. Password: ES_PASSWORD,
  228. }
  229. cfg.Sniff = &sniff
  230. client, err := elastic.NewClientFromConfig(cfg)
  231. if err != nil {
  232. return
  233. }
  234. var esIndex = "cygx_article"
  235. searchMap := make(map[int]int)
  236. for _, v := range keyWordArr {
  237. keyWord = v
  238. boolquery := elastic.NewBoolQuery()
  239. boolquery.Must(elastic.NewMatchQuery("Title", keyWord), elastic.NewMatchQuery("BodyText", keyWord))
  240. highlight := elastic.NewHighlight()
  241. highlight = highlight.Fields(elastic.NewHighlighterField("Title"), elastic.NewHighlighterField("BodyText"))
  242. highlight = highlight.PreTags("<font color='red'>").PostTags("</font>")
  243. searchByMatch, err := client.Search(esIndex).Highlight(highlight).Size(pageSize).Query(boolquery).Do(context.Background())
  244. if err != nil {
  245. return result, err
  246. }
  247. if searchByMatch.Hits != nil {
  248. for _, v := range searchByMatch.Hits.Hits {
  249. articleJson, err := v.Source.MarshalJSON()
  250. if err != nil {
  251. return nil, err
  252. }
  253. article := new(models.CygxArticle)
  254. err = json.Unmarshal(articleJson, &article)
  255. if err != nil {
  256. return nil, err
  257. }
  258. if _, ok := searchMap[article.ArticleId]; !ok {
  259. searchItem := new(models.SearchItem)
  260. searchItem.ArticleId, _ = strconv.Atoi(v.Id)
  261. searchItem.Body = v.Highlight["BodyText"]
  262. var title string
  263. if len(v.Highlight["Title"]) > 0 {
  264. title = v.Highlight["Title"][0]
  265. } else {
  266. title = article.Title
  267. }
  268. searchItem.Title = title
  269. searchItem.PublishDate = article.PublishDate
  270. result = append(result, searchItem)
  271. searchMap[article.ArticleId] = article.ArticleId
  272. }
  273. }
  274. }
  275. }
  276. return
  277. }
  278. func esSearch(keyWord, categoryName string) (result []*models.SearchItem, err error) {
  279. pageSize := 20
  280. var sniff = false //<4>
  281. cfg := &config.Config{
  282. URL: ES_URL,
  283. Username: ES_USERNAME,
  284. Password: ES_PASSWORD,
  285. }
  286. cfg.Sniff = &sniff
  287. client, err := elastic.NewClientFromConfig(cfg)
  288. if err != nil {
  289. return
  290. }
  291. var esIndex = "cygx_article"
  292. termsQuery := elastic.NewTermsQuery("category_name", categoryName)
  293. boolquery := elastic.NewBoolQuery()
  294. boolquery.Must(elastic.NewMatchQuery("Title", keyWord), elastic.NewMatchQuery("BodyText", keyWord))
  295. highlight := elastic.NewHighlight()
  296. highlight = highlight.Fields(elastic.NewHighlighterField("Title"), elastic.NewHighlighterField("BodyText"))
  297. highlight = highlight.PreTags("<font color='red'>").PostTags("</font>")
  298. searchByMatch, err := client.Search(esIndex).Highlight(highlight).Size(pageSize).Query(termsQuery).Query(boolquery).Do(context.Background())
  299. if err != nil {
  300. return result, err
  301. }
  302. searchMap := make(map[int]int)
  303. if searchByMatch.Hits != nil {
  304. for _, v := range searchByMatch.Hits.Hits {
  305. articleJson, err := v.Source.MarshalJSON()
  306. if err != nil {
  307. return nil, err
  308. }
  309. article := new(models.CygxArticle)
  310. err = json.Unmarshal(articleJson, &article)
  311. if err != nil {
  312. return nil, err
  313. }
  314. if _, ok := searchMap[article.ArticleId]; !ok {
  315. searchItem := new(models.SearchItem)
  316. searchItem.ArticleId, _ = strconv.Atoi(v.Id)
  317. searchItem.Body = v.Highlight["BodyText"]
  318. var title string
  319. if len(v.Highlight["Title"]) > 0 {
  320. title = v.Highlight["Title"][0]
  321. } else {
  322. title = article.Title
  323. }
  324. searchItem.Title = title
  325. searchItem.PublishDate = article.PublishDate
  326. result = append(result, searchItem)
  327. searchMap[article.ArticleId] = article.ArticleId
  328. }
  329. }
  330. }
  331. return
  332. }
  333. func RemoveDuplicatesAndEmpty(a []string) (ret []string){
  334. a_len := len(a)
  335. for i:=0; i < a_len; i++{
  336. if (i > 0 && a[i-1] == a[i]) || len(a[i])==0{
  337. continue;
  338. }
  339. ret = append(ret, a[i])
  340. }
  341. return
  342. }