package services import ( "fmt" "github.com/PuerkitoBio/goquery" "hongze/hongze_cygx/models" "hongze/hongze_cygx/utils" "html" "strconv" "strings" "time" ) //同步策略文章 func SyncTacticsList() (err error) { defer func() { if err != nil { fmt.Println("同步失败,Err:", err.Error()) } }() fmt.Println("同步数据") indexName := utils.IndexName endDate := time.Now().AddDate(0, 0, -7).Format(utils.FormatDate) list, err := models.GetTacticsList(endDate) //list, err := models.GetTacticsListAll() if err != nil { fmt.Println("GetTacticsList Err:", err.Error()) return } fmt.Println("list len:", len(list)) for k, v := range list { v.Department = "弘则权益研究" fmt.Println(k, v.ArticleId) // //publishDate, err := time.Parse(utils.FormatDateTime, v.PublishDate) //if err != nil { // fmt.Println("time.Parse:", err.Error()) // return err //} //fmt.Println(publishDate) hh, _ := time.ParseDuration("8h") //pDate := publishDate.Add(hh) v.PublishDate = v.PublishDate.Add(hh) //判断是否已经存在 if v.ArticleId < 0 { fmt.Println("参数错误") return err } count, err := models.GetArticleCountById(v.ArticleId) if err != nil && err.Error() != utils.ErrNoRow() { return err } v.Body = strings.Replace(v.Body, "http://vmp.hzinsights.com", "https://vmp.hzinsights.com", -1) expertNumStr, expertContentStr, interviewDateStr := BodyAnalysis(v.Body) if count > 0 { bodyText, _ := GetReportContentTextSub(v.Body) updateParams := make(map[string]interface{}) updateParams["Title"] = v.Title updateParams["TitleEn"] = v.TitleEn updateParams["UpdateFrequency"] = v.UpdateFrequency updateParams["CreateDate"] = v.CreateDate updateParams["PublishDate"] = v.PublishDate updateParams["Body"] = html.EscapeString(v.Body) updateParams["BodyText"] = bodyText updateParams["Abstract"] = html.EscapeString(v.Abstract) updateParams["CategoryName"] = v.CategoryName updateParams["SubCategoryName"] = v.SubCategoryName updateParams["CategoryId"] = v.CategoryId updateParams["PublishStatus"] = v.PublishStatus updateParams["ExpertBackground"] = expertContentStr updateParams["ExpertNumber"] = expertNumStr updateParams["InterviewDate"] = interviewDateStr if v.Department != "弘则权益研究" { v.Department = "弘则权益研究" } updateParams["Department"] = v.Department whereParam := map[string]interface{}{"article_id": v.ArticleId} err = models.UpdateByExpr(models.CygxArticle{}, whereParam, updateParams) if err != nil { fmt.Println("UpdateByExpr Err:" + err.Error()) } } else { fmt.Println(k, v.ArticleId, "add") item := new(models.CygxArticle) articleIdInt := v.ArticleId item.ArticleId = articleIdInt item.Title = v.Title item.TitleEn = v.TitleEn item.UpdateFrequency = v.UpdateFrequency item.CreateDate = v.CreateDate item.PublishDate = v.PublishDate.Format(utils.FormatDateTime) item.Body = html.EscapeString(v.Body) item.Abstract = html.EscapeString(v.Abstract) item.CategoryName = v.CategoryName item.SubCategoryName = v.SubCategoryName item.CategoryId = v.CategoryId item.PublishStatus = v.PublishStatus item.ExpertBackground = expertContentStr item.ExpertNumber = expertNumStr item.InterviewDate = interviewDateStr item.Department = v.Department item.ArticleIdMd5 = utils.MD5(strconv.Itoa(articleIdInt)) _, err = models.AddCygxArticle(item) if err != nil { fmt.Println("AddCygxArticle Err:", err.Error()) return err } } content := html.UnescapeString(v.Body) doc, err := goquery.NewDocumentFromReader(strings.NewReader(content)) if err != nil { fmt.Println("create doc err:", err.Error()) return err } doc.Find("a").Each(func(i int, a *goquery.Selection) { a.Remove() }) bodyText := doc.Text() item := new(ElasticTestArticleDetail) item.ArticleId = v.ArticleId item.Title = v.Title item.BodyText = bodyText item.PublishDate = v.PublishDate.Format(utils.FormatDateTime) EsAddOrEditData(indexName, strconv.Itoa(v.ArticleId), item) } return } //同步策略文章 func SyncCygxArticleList() (err error) { defer func() { if err != nil { fmt.Println("同步失败,Err:", err.Error()) } }() fmt.Println("同步数据") indexName := utils.IndexName fmt.Println("indexName:", indexName) time.Sleep(5 * time.Second) list, err := models.GetCygxArticleListAll() if err != nil { fmt.Println("GetTacticsList Err:", err.Error()) return } fmt.Println("list len:", len(list)) for k, v := range list { v.Department = "弘则权益研究" fmt.Println(k, v.ArticleId) //判断是否已经存在 if v.ArticleId < 0 { fmt.Println("参数错误") return err } content := html.UnescapeString(v.Body) doc, err := goquery.NewDocumentFromReader(strings.NewReader(content)) if err != nil { fmt.Println("create doc err:", err.Error()) return err } doc.Find("a").Each(func(i int, a *goquery.Selection) { a.Remove() }) bodyText := doc.Text() item := new(ElasticTestArticleDetail) item.ArticleId = v.ArticleId item.Title = v.Title item.BodyText = bodyText item.PublishDate = v.PublishDate.Format(utils.FormatDateTime) EsAddOrEditData(indexName, strconv.Itoa(v.ArticleId), item) } return } //body 解析 func BodyAnalysis(body string) (expertNumStr, expertContentStr, interviewDateStr string) { body = html.UnescapeString(body) doc, err := goquery.NewDocumentFromReader(strings.NewReader(body)) if err != nil { fmt.Println("create doc err:", err.Error()) return } var expertNumArr []string var expertContentArr []string var interviewDateArr []string doc.Find("p").Each(func(i int, s *goquery.Selection) { contentTxt := s.Text() if strings.Contains(contentTxt, "#访谈时间:") || strings.Contains(contentTxt, "访谈时间:") { interviewDate := s.Next().Text() interviewDateArr = append(interviewDateArr, interviewDate) } if strings.Contains(contentTxt, "#专家评价") || strings.Contains(contentTxt, "专家评价") { expertContent := s.Next().Text() if expertContent == "" { expertContent = contentTxt } if expertContent != "" { rightIndex := strings.Index(expertContent, ")") if rightIndex == 0 { rightIndex = strings.Index(expertContent, ")") } if rightIndex > 0 { expertNum := expertContent[:rightIndex] expertNum = strings.Replace(expertNum, "(", "", -1) expertNum = strings.Replace(expertNum, "(", "", -1) expertNum = strings.Replace(expertNum, "专家评价", "", -1) if expertNum != "" { expertNumArr = append(expertNumArr, expertNum) rightIndex = rightIndex expertContentStr := expertContent[rightIndex:] expertContentStr = strings.Replace(expertContentStr, ")", "", -1) expertContentStr = strings.TrimLeft(expertContentStr, ":") expertContentStr = strings.TrimRight(expertContentStr, "(推荐") expertContentArr = append(expertContentArr, expertContentStr) } } } } }) if len(expertContentArr) == 0 { doc.Find("pre").Each(func(i int, s *goquery.Selection) { contentTxt := s.Text() if strings.Contains(contentTxt, "#访谈时间:") || strings.Contains(contentTxt, "访谈时间:") { interviewDate := s.Next().Text() if interviewDate != "" { interviewDateArr = append(interviewDateArr, interviewDate) } } if strings.Contains(contentTxt, "#专家评价") || strings.Contains(contentTxt, "专家评价") { expertContent := s.Next().Text() if expertContent == "" { expertContent = contentTxt } if expertContent != "" { rightIndex := strings.Index(expertContent, ")") if rightIndex == 0 { rightIndex = strings.Index(expertContent, ")") } expertNum := expertContent[:rightIndex] expertNum = strings.Replace(expertNum, "(", "", -1) expertNum = strings.Replace(expertNum, "(", "", -1) expertNum = strings.Replace(expertNum, "专家评价", "", -1) if expertNum != "" { expertNumArr = append(expertNumArr, expertNum) rightIndex = rightIndex expertContentStr := expertContent[rightIndex:] expertContentStr = strings.Replace(expertContentStr, ")", "", -1) expertContentStr = strings.TrimLeft(expertContentStr, ":") expertContentStr = strings.TrimRight(expertContentStr, "(推荐") if expertContentStr != "" { expertContentArr = append(expertContentArr, expertContentStr) } } } } }) } if len(expertNumArr) > 0 { expertNumStr = expertNumArr[0] if expertNumStr != "" { expertNumStr = strings.Replace(expertNumStr, "#:", "", -1) expertNumStr = strings.Replace(expertNumStr, "# ", "", -1) expertNumStr = strings.Trim(expertNumStr, "") } } if len(expertContentArr) > 0 { expertContentStr = expertContentArr[0] } if len(interviewDateArr) > 0 { interviewDateStr = interviewDateArr[0] } return } //同步策略文章 func SyncTacticsListAddreport() (err error) { defer func() { if err != nil { fmt.Println("同步失败,Err:", err.Error()) } }() fmt.Println("同步数据") indexName := utils.IndexName //endDate := time.Now().AddDate(0, 0, -7).Format(utils.FormatDate) //list, err := models.GetTacticsList(endDate) list, err := models.GetTacticsListAll2() if err != nil { fmt.Println("GetTacticsList Err:", err.Error()) return } fmt.Println("list len:", len(list)) listSummary, errsu := models.GetReportMappingCategoryID() if errsu != nil { fmt.Println("GetTacticsList Err:", errsu.Error()) return } listPermission, errper := models.GetPermissionMappingCategoryID() if errper != nil { fmt.Println("GetTacticsList Err:", errper.Error()) return } for k, v := range list { //是否属于纪要库 //countSummary, err := models.GetPermissionMappingById(v.CategoryId) //if err != nil && err.Error() != utils.ErrNoRow() { // br.Msg = "参数解析异常!" // br.ErrMsg = "参数解析失败,Err:" + err.Error() // return //} //if countSummary > 0 { // v.IsSummary = 1 //} for _, vSum := range listSummary { if v.CategoryId == vSum.CategoryId { v.IsSummary = 1 } } //是否属于报告 //countReport, err := models.GetReportMappingById(v.CategoryId) //if err != nil && err.Error() != utils.ErrNoRow() { // br.Msg = "参数解析异常!" // br.ErrMsg = "参数解析失败,Err:" + err.Error() // return //} for _, vPer := range listPermission { if v.CategoryId == vPer.CategoryId { v.IsReport = 1 } } if v.IsReport > 0 { //是否属于策略 策略自动归类 //是否属于行业报告 行业报告自动归类 if v.CategoryId == 7 || v.CategoryId == 11 || v.CategoryId == 51 || v.CategoryId == 52 || v.CategoryId == 64 || v.CategoryId == 80 || v.CategoryId == 87 { v.IsClass = 1 } if v.CategoryId == 64 || v.CategoryId == 87 || v.CategoryId == 80 { v.ReportType = 2 //是否属于行业报告 } else { v.ReportType = 1 //是否属于产业报告 } } v.Department = "弘则权益研究" fmt.Println(k, v.ArticleId) hh, _ := time.ParseDuration("8h") //pDate := publishDate.Add(hh) v.PublishDate = v.PublishDate.Add(hh) //判断是否已经存在 if v.ArticleId < 0 { if err != nil { fmt.Println("AddCygxArticle Err:", err.Error()) return err } } count, err := models.GetArticleCountById(v.ArticleId) if err != nil && err.Error() != utils.ErrNoRow() { if err != nil { fmt.Println("AddCygxArticle Err:", err.Error()) return err } } fmt.Println(v.IsClass) v.Body = strings.Replace(v.Body, "http://vmp.hzinsights.com", "https://vmp.hzinsights.com", -1) expertNumStr, expertContentStr, interviewDateStr := BodyAnalysis(v.Body) if count > 0 { fmt.Println(k, v.ArticleId, "edit") //articleInfo, err := models.GetArticleDetailById(v.ArticleId) //if err != nil { // br.Msg = "同步失败!文章ID:" + strconv.Itoa(v.ArticleId) // br.ErrMsg = "同步失败,Err:" + err.Error() // return //} //if articleInfo.IsClass == 1 { // v.IsClass = 1 //} bodyText, _ := GetReportContentTextSub(v.Body) updateParams := make(map[string]interface{}) updateParams["Title"] = v.Title updateParams["TitleEn"] = v.TitleEn updateParams["UpdateFrequency"] = v.UpdateFrequency updateParams["CreateDate"] = v.CreateDate updateParams["PublishDate"] = v.PublishDate updateParams["Body"] = html.EscapeString(v.Body) updateParams["BodyText"] = bodyText updateParams["Abstract"] = html.EscapeString(v.Abstract) updateParams["CategoryName"] = v.CategoryName updateParams["SubCategoryName"] = v.SubCategoryName updateParams["CategoryId"] = v.CategoryId updateParams["PublishStatus"] = v.PublishStatus updateParams["ExpertBackground"] = expertContentStr updateParams["ExpertNumber"] = expertNumStr updateParams["InterviewDate"] = interviewDateStr //updateParams["IsClass"] = v.IsClass updateParams["IsSummary"] = v.IsSummary updateParams["IsReport"] = v.IsReport updateParams["ReportType"] = v.ReportType if v.Department != "弘则权益研究" { v.Department = "弘则权益研究" } updateParams["Department"] = v.Department whereParam := map[string]interface{}{"article_id": v.ArticleId} err = models.UpdateByExpr(models.CygxArticle{}, whereParam, updateParams) if err != nil { fmt.Println("UpdateByExpr Err:" + err.Error()) } } else { fmt.Println(k, v.ArticleId, "add") item := new(models.CygxArticle) articleIdInt := v.ArticleId item.ArticleId = articleIdInt item.Title = v.Title item.TitleEn = v.TitleEn item.UpdateFrequency = v.UpdateFrequency item.CreateDate = v.CreateDate item.PublishDate = v.PublishDate.Format(utils.FormatDateTime) item.Body = html.EscapeString(v.Body) item.Abstract = html.EscapeString(v.Abstract) item.CategoryName = v.CategoryName item.SubCategoryName = v.SubCategoryName item.CategoryId = v.CategoryId item.PublishStatus = v.PublishStatus item.ExpertBackground = expertContentStr item.ExpertNumber = expertNumStr item.InterviewDate = interviewDateStr item.Department = v.Department item.ArticleIdMd5 = utils.MD5(strconv.Itoa(articleIdInt)) item.IsClass = v.IsClass item.IsSummary = v.IsSummary item.IsReport = v.IsReport item.ReportType = v.ReportType _, err = models.AddCygxArticles(item) if err != nil { fmt.Println("AddCygxArticle Err:", err.Error()) return err } } //纪要库的数据同步到Es if v.IsSummary == 1 { content := html.UnescapeString(v.Body) doc, err := goquery.NewDocumentFromReader(strings.NewReader(content)) if err != nil { fmt.Println("AddCygxArticle Err:", err.Error()) return err } doc.Find("a").Each(func(i int, a *goquery.Selection) { a.Remove() }) bodyText := doc.Text() item := new(ElasticTestArticleDetail) item.ArticleId = v.ArticleId item.Title = v.Title item.BodyText = bodyText item.PublishDate = v.PublishDate.Format(utils.FormatDateTime) EsAddOrEditData(indexName, strconv.Itoa(v.ArticleId), item) } } return }