123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666 |
- package services
- import (
- "context"
- "encoding/json"
- "fmt"
- "github.com/PuerkitoBio/goquery"
- "github.com/olivere/elastic/v7"
- "github.com/olivere/elastic/v7/config"
- "hongze/hongze_cygx/models"
- "hongze/hongze_cygx/utils"
- "html"
- "log"
- "os"
- "strconv"
- "strings"
- )
- const (
- ES_URL = "http://es-cn-nif227b580019rgw6.public.elasticsearch.aliyuncs.com:9200"
- ES_USERNAME = "elastic"
- ES_PASSWORD = "hongze@2021"
-
-
- )
- func SaveData() {
-
- var sniff = false
- cfg := &config.Config{
- URL: ES_URL,
- Username: ES_USERNAME,
- Password: ES_PASSWORD,
- }
- cfg.Sniff = &sniff
- var client, err = elastic.NewClientFromConfig(cfg)
- if err != nil {
- fmt.Println("NewClientFromConfig Err:" + err.Error())
- return
- }
- var esIndex = "cygx_article"
-
-
- exists, err := client.IndexExists(esIndex).Do(context.Background())
- if err != nil {
- fmt.Println("IndexExists Err:" + err.Error())
- return
- }
- if !exists {
- _, err = client.CreateIndex(esIndex).Do(context.Background())
- if err != nil {
- fmt.Println("CreateIndex Err:" + err.Error())
- return
- }
- }
-
- idStr := `3584,3644`
- idArr := strings.Split(idStr, ",")
- for _, v := range idArr {
- id, _ := strconv.Atoi(v)
- item, err := models.GetArticleDetailById(id)
- if err != nil {
- fmt.Println("GetArticleDetailById Err:" + err.Error())
- return
- }
- content := html.UnescapeString(item.Body)
- doc, err := goquery.NewDocumentFromReader(strings.NewReader(content))
- if err != nil {
- fmt.Println("create doc err:", err.Error())
- return
- }
- bodyText := doc.Text()
- item.BodyText = bodyText
-
- resp, err := client.Index().Index(esIndex).Id(strconv.Itoa(item.ArticleId)).BodyJson(item).Do(context.Background())
- if err != nil {
- fmt.Println("insert es failed", err.Error())
- return
- }
- fmt.Println(resp.Status)
- }
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- fmt.Println("end")
- }
- type tracelog struct{}
- func (tracelog) Printf(format string, v ...interface{}) {
- fmt.Printf(format, v...)
- }
- func SearchByKeyWordBack02(keyWord string) (result []*models.SearchItem, err error) {
- fmt.Println("keyWord:", keyWord)
- pageSize := 20
- keyWordArr, err := GetIndustryMapNameSlice(keyWord)
- keyWordArr = RemoveDuplicatesAndEmpty(keyWordArr)
- fmt.Println(keyWordArr)
- fmt.Println(" keyWordArr ")
- if err != nil {
- go utils.SendEmail(utils.APPNAME+" "+utils.RunMode+"异常提醒:", "GetIndustryMapNameSlice:"+err.Error(), utils.EmailSendToUsers)
- }
-
-
-
-
-
-
-
-
-
- errorlog := log.New(os.Stdout, "APP", log.LstdFlags)
- file := "./eslog.log"
- logFile, _ := os.OpenFile(file, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0766)
- client, err := elastic.NewClient(
- elastic.SetURL(ES_URL),
- elastic.SetBasicAuth(ES_USERNAME, ES_PASSWORD),
- elastic.SetTraceLog(log.New(logFile, "ES-TRACE: ", 0)),
- elastic.SetSniff(false), elastic.SetErrorLog(errorlog))
- var esIndex = "cygx_article"
- searchMap := make(map[int]int)
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- keyWordStr := strings.Join(keyWordArr, ",")
- fmt.Println("keyWordStr ", keyWordStr)
- queryString := elastic.NewQueryStringQuery(`Title:(` + keyWordStr + `) BodyText:(` + keyWordStr + `)`).Analyzer("ik_smart")
- boolqueryJson, err := json.Marshal(queryString)
- fmt.Println("err:", err)
- fmt.Println("queryString ", string(boolqueryJson))
-
- highlight := elastic.NewHighlight()
- highlight = highlight.Fields(elastic.NewHighlighterField("Title"), elastic.NewHighlighterField("BodyText"))
- highlight = highlight.PreTags("<font color='red'>").PostTags("</font>")
- request := client.Search(esIndex).Highlight(highlight).Size(pageSize).Query(queryString)
- requestJson, err := json.Marshal(request)
- fmt.Println("err:", err)
- fmt.Println("requestJson ", string(requestJson))
- searchByMatch, err := request.Do(context.Background())
-
- if err != nil {
- return result, err
- }
- if searchByMatch.Hits != nil {
- for _, v := range searchByMatch.Hits.Hits {
- articleJson, err := v.Source.MarshalJSON()
- if err != nil {
- return nil, err
- }
- article := new(models.CygxArticle)
- err = json.Unmarshal(articleJson, &article)
- if err != nil {
- return nil, err
- }
- if _, ok := searchMap[article.ArticleId]; !ok {
- searchItem := new(models.SearchItem)
- searchItem.ArticleId, _ = strconv.Atoi(v.Id)
- searchItem.Body = v.Highlight["BodyText"]
- var title string
- if len(v.Highlight["Title"]) > 0 {
- title = v.Highlight["Title"][0]
- } else {
- title = article.Title
- }
- searchItem.Title = title
- searchItem.PublishDate = article.PublishDate
- result = append(result, searchItem)
- searchMap[article.ArticleId] = article.ArticleId
- }
- }
- }
- return
- }
- func SearchByKeyWordBack(indexName, keyWord string, startSize, pageSize, userId int) (result []*models.SearchItem, total int, err error) {
- keyWordArr, err := GetIndustryMapNameSlice(keyWord)
- if err != nil {
- go utils.SendEmail(utils.APPNAME+" "+utils.RunMode+"异常提醒:", "GetIndustryMapNameSlice:"+err.Error(), utils.EmailSendToUsers)
- }
- var sniff = false
- cfg := &config.Config{
- URL: ES_URL,
- Username: ES_USERNAME,
- Password: ES_PASSWORD,
- }
- cfg.Sniff = &sniff
- client, err := elastic.NewClientFromConfig(cfg)
- if err != nil {
- return
- }
- var esIndex = indexName
- searchMap := make(map[int]int)
- for _, v := range keyWordArr {
- keyWord = v
- boolquery := elastic.NewBoolQuery()
- boolquery.Must(elastic.NewMatchQuery("Title", keyWord), elastic.NewMatchQuery("BodyText", keyWord))
- highlight := elastic.NewHighlight()
- highlight = highlight.Fields(elastic.NewHighlighterField("Title"), elastic.NewHighlighterField("BodyText"))
- highlight = highlight.PreTags("<font color='red'>").PostTags("</font>")
- searchByMatch, err := client.Search(esIndex).Highlight(highlight).Size(pageSize).Query(boolquery).Do(context.Background())
- if err != nil {
- return result, 0, err
- }
- if searchByMatch.Hits != nil {
- for _, v := range searchByMatch.Hits.Hits {
- articleJson, err := v.Source.MarshalJSON()
- if err != nil {
- return nil, 0, err
- }
- article := new(models.CygxArticle)
- err = json.Unmarshal(articleJson, &article)
- if err != nil {
- return nil, 0, err
- }
- if _, ok := searchMap[article.ArticleId]; !ok {
- searchItem := new(models.SearchItem)
- searchItem.ArticleId, _ = strconv.Atoi(v.Id)
- searchItem.Body = v.Highlight["BodyText"]
- var title string
- if len(v.Highlight["Title"]) > 0 {
- title = v.Highlight["Title"][0]
- } else {
- title = article.Title
- }
- searchItem.Title = title
- searchItem.PublishDate = article.PublishDate
- result = append(result, searchItem)
- searchMap[article.ArticleId] = article.ArticleId
- }
- }
- }
- }
- return
- }
- func esSearch(keyWord, categoryName string) (result []*models.SearchItem, err error) {
- pageSize := 20
- var sniff = false
- cfg := &config.Config{
- URL: ES_URL,
- Username: ES_USERNAME,
- Password: ES_PASSWORD,
- }
- cfg.Sniff = &sniff
- client, err := elastic.NewClientFromConfig(cfg)
- if err != nil {
- return
- }
- var esIndex = "cygx_article"
- termsQuery := elastic.NewTermsQuery("category_name", categoryName)
- boolquery := elastic.NewBoolQuery()
- boolquery.Must(elastic.NewMatchQuery("Title", keyWord), elastic.NewMatchQuery("BodyText", keyWord))
- highlight := elastic.NewHighlight()
- highlight = highlight.Fields(elastic.NewHighlighterField("Title"), elastic.NewHighlighterField("BodyText"))
- highlight = highlight.PreTags("<font color='red'>").PostTags("</font>")
- searchByMatch, err := client.Search(esIndex).Highlight(highlight).Size(pageSize).Query(termsQuery).Query(boolquery).Do(context.Background())
- if err != nil {
- return result, err
- }
- searchMap := make(map[int]int)
- if searchByMatch.Hits != nil {
- for _, v := range searchByMatch.Hits.Hits {
- articleJson, err := v.Source.MarshalJSON()
- if err != nil {
- return nil, err
- }
- article := new(models.CygxArticle)
- err = json.Unmarshal(articleJson, &article)
- if err != nil {
- return nil, err
- }
- if _, ok := searchMap[article.ArticleId]; !ok {
- searchItem := new(models.SearchItem)
- searchItem.ArticleId, _ = strconv.Atoi(v.Id)
- searchItem.Body = v.Highlight["BodyText"]
- var title string
- if len(v.Highlight["Title"]) > 0 {
- title = v.Highlight["Title"][0]
- } else {
- title = article.Title
- }
- searchItem.Title = title
- searchItem.PublishDate = article.PublishDate
- result = append(result, searchItem)
- searchMap[article.ArticleId] = article.ArticleId
- }
- }
- }
- return
- }
- func RemoveDuplicatesAndEmpty(a []string) (ret []string) {
- a_len := len(a)
- for i := 0; i < a_len; i++ {
- if (i > 0 && a[i-1] == a[i]) || len(a[i]) == 0 {
- continue
- }
- ret = append(ret, a[i])
- }
- return
- }
- func KeyWordArrSqlRegexp(a []string) (ret string) {
- a_len := len(a)
- for i := 0; i < a_len; i++ {
- if i == 0 {
- continue
- }
- ret += a[i] + "|"
- }
- ret = strings.TrimRight(ret, "|")
- return
- }
- func KeyWordArrSqlRegexpAll(a []string) (ret string) {
- a_len := len(a)
- for i := 0; i < a_len; i++ {
- ret += a[i] + "|"
- }
- ret = strings.TrimRight(ret, "|")
-
- return
- }
- func init21123() {
- fmt.Println("start")
- client, err := elastic.NewClient(elastic.SetURL(ES_URL), elastic.SetBasicAuth(ES_USERNAME, ES_PASSWORD), elastic.SetSniff(false))
- if err != nil {
- fmt.Println("err:", err)
- }
- fmt.Println(client)
- keyWordStr := "医疗器械"
- queryString := elastic.NewQueryStringQuery(`Title:(` + keyWordStr + `)`)
- boolqueryJson, err := json.Marshal(queryString)
- fmt.Println("err:", err)
- fmt.Println("queryString ", string(boolqueryJson))
- var esIndex = "cygx_article"
-
- highlight := elastic.NewHighlight()
- highlight = highlight.Fields(elastic.NewHighlighterField("Title"), elastic.NewHighlighterField("BodyText"))
- highlight = highlight.PreTags("<font color='red'>").PostTags("</font>")
- request := client.Search(esIndex).Highlight(highlight).Query(queryString)
- requestJson, err := json.Marshal(request)
- fmt.Println("err:", err)
- fmt.Println("requestJson ", string(requestJson))
- searchByMatch, err := request.Do(context.Background())
- if searchByMatch.Hits != nil {
- }
- fmt.Println(searchByMatch)
- fmt.Println("end")
- }
- func SearchByKeyWordQuery(keyWord string) (result []*models.SearchItem, err error) {
- errorlog := log.New(os.Stdout, "APP", log.LstdFlags)
- file := "./rdlucklog/eslog.log"
- logFile, _ := os.OpenFile(file, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0766)
- client, err := elastic.NewClient(
- elastic.SetURL(ES_URL),
- elastic.SetBasicAuth(ES_USERNAME, ES_PASSWORD),
- elastic.SetTraceLog(log.New(logFile, "ES-TRACE: ", 0)),
- elastic.SetSniff(false), elastic.SetErrorLog(errorlog))
- var esIndex = "cygx_article"
-
-
- queryString := elastic.NewQueryStringQuery("Title:医疗")
-
- highlight := elastic.NewHighlight()
- highlight = highlight.Fields(elastic.NewHighlighterField("Title"), elastic.NewHighlighterField("BodyText"))
- highlight = highlight.PreTags("<font color='red'>").PostTags("</font>")
- request := client.Search(esIndex).Highlight(highlight).Query(queryString)
- searchByMatch, err := request.Do(context.Background())
- if err != nil {
- fmt.Println("request.Do err:", err.Error())
- return result, err
- }
- if searchByMatch.Hits != nil {
- for _, v := range searchByMatch.Hits.Hits {
- articleJson, _ := v.Source.MarshalJSON()
- fmt.Println(string(articleJson))
- }
- }
- return
- }
- func SearchByKeyWord(keyWord string) (result []*models.SearchItem, err error) {
- fmt.Println("keyWord:", keyWord)
- keyWord = "医药"
- pageSize := 20
- keyWordArr, err := GetIndustryMapNameSlice(keyWord)
- keyWordArr = RemoveDuplicatesAndEmpty(keyWordArr)
- fmt.Println(keyWordArr)
- fmt.Println(" keyWordArr ")
- if err != nil {
- go utils.SendEmail(utils.APPNAME+" "+utils.RunMode+"异常提醒:", "GetIndustryMapNameSlice:"+err.Error(), utils.EmailSendToUsers)
- }
- errorlog := log.New(os.Stdout, "APP", log.LstdFlags)
- file := "./rdlucklog/eslog.log"
- logFile, _ := os.OpenFile(file, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0766)
- client, err := elastic.NewClient(
- elastic.SetURL(ES_URL),
- elastic.SetBasicAuth(ES_USERNAME, ES_PASSWORD),
- elastic.SetTraceLog(log.New(logFile, "ES-TRACE: ", 0)),
- elastic.SetSniff(false), elastic.SetErrorLog(errorlog))
-
- var esIndex = "article_two"
- searchMap := make(map[int]int)
-
-
-
-
-
-
- boolquery := elastic.NewBoolQuery()
- keyLen := len(keyWordArr)
- n := float64(keyLen)
- matchArr := make([]elastic.Query, 0)
- for _, v := range keyWordArr {
- if v != "" {
- matchq1 := elastic.NewMatchQuery("Title", v).Boost(n).Analyzer("ik_smart")
- matchq2 := elastic.NewMatchQuery("BodyText", v).Boost(n).Analyzer("ik_smart")
- matchArr = append(matchArr, matchq1)
- matchArr = append(matchArr, matchq2)
- }
- n--
- }
-
-
- boolquery.Should(matchArr...)
-
- highlight := elastic.NewHighlight()
- highlight = highlight.Fields(elastic.NewHighlighterField("Title"), elastic.NewHighlighterField("BodyText"))
- highlight = highlight.PreTags("<font color='red'>").PostTags("</font>")
-
- request := client.Search(esIndex).Highlight(highlight).Size(pageSize).Query(boolquery)
- searchByMatch, err := request.Do(context.Background())
- fmt.Println(searchByMatch, err)
-
- if err != nil {
- return result, err
- }
- fmt.Println(searchByMatch.Status, searchByMatch.TotalHits())
- if searchByMatch.Hits != nil {
- for _, v := range searchByMatch.Hits.Hits {
- articleJson, err := v.Source.MarshalJSON()
- if err != nil {
- return nil, err
- }
- article := new(models.CygxArticle)
- err = json.Unmarshal(articleJson, &article)
- if err != nil {
- return nil, err
- }
- if _, ok := searchMap[article.ArticleId]; !ok {
- searchItem := new(models.SearchItem)
- searchItem.ArticleId, _ = strconv.Atoi(v.Id)
- searchItem.Body = v.Highlight["BodyText"]
- var title string
- if len(v.Highlight["Title"]) > 0 {
- title = v.Highlight["Title"][0]
- } else {
- title = article.Title
- }
- searchItem.Title = title
- searchItem.PublishDate = article.PublishDate
- result = append(result, searchItem)
- searchMap[article.ArticleId] = article.ArticleId
- }
- }
- }
- return
- }
- func AddMap() {
- errorlog := log.New(os.Stdout, "APP", log.LstdFlags)
- file := "./rdlucklog/eslog.log"
- logFile, _ := os.OpenFile(file, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0766)
- client, err := elastic.NewClient(
- elastic.SetURL(ES_URL),
- elastic.SetBasicAuth(ES_USERNAME, ES_PASSWORD),
- elastic.SetTraceLog(log.New(logFile, "ES-TRACE: ", 0)),
- elastic.SetSniff(false), elastic.SetErrorLog(errorlog))
- if err != nil {
- fmt.Println("NewClient Err:", err.Error())
- }
- bodyJson := `{
- "mappings": {
- "properties": {
- "ArticleId": {
- "type": "integer"
- },
- "Title": {
- "type": "text",
- "analyzer": "ik_smart"
- },
- "BodyText": {
- "type": "text",
- "analyzer": "ik_smart"
- }
- }
- }
- }`
-
- var esIndex = "article_two"
- a := client.IndexAnalyze().Index(esIndex)
- analyzeBody, err := json.Marshal(a)
- fmt.Println(string(analyzeBody))
- exists, err := client.IndexExists(esIndex).Do(context.Background())
- if err != nil {
- fmt.Println("IndexExists Err:" + err.Error())
- return
- }
- fmt.Println("exists:", exists)
- if !exists {
- _, err = client.CreateIndex(esIndex).BodyJson(bodyJson).Do(context.Background())
-
- if err != nil {
- fmt.Println("CreateIndex Err:" + err.Error())
- return
- }
- }
- item, err := models.GetArticleDetailTestById(3546)
- if err != nil {
- fmt.Println("GetArticleDetailById Err:" + err.Error())
- return
- }
- content := html.UnescapeString(item.Body)
- doc, err := goquery.NewDocumentFromReader(strings.NewReader(content))
- if err != nil {
- fmt.Println("create doc err:", err.Error())
- return
- }
- bodyText := doc.Text()
- item.BodyText = bodyText
-
- resp, err := client.Index().Index(esIndex).Id(strconv.Itoa(item.ArticleId)).BodyJson(item).Do(context.Background())
- if err != nil {
- fmt.Println("insert es failed", err.Error())
- return
- }
- fmt.Println(resp)
- }
|