123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148 |
- package services
- import (
- "fmt"
- "github.com/PuerkitoBio/goquery"
- "github.com/tealeg/xlsx"
- "hongze/hongze_cygx/models"
- "hongze/hongze_cygx/utils"
- "html"
- "strconv"
- "strings"
- "time"
- )
- //上海数据同步
- func ImportData() {
- fileName := "./temp/权益报告导入0319.xlsx"
- xlFile, err := xlsx.OpenFile(fileName)
- if err != nil {
- fmt.Printf("open failed: %s\n", err)
- }
- for _, sheet := range xlFile.Sheet {
- row := sheet.Rows
- for k, v := range row {
- if k > 0 {
- //导入数据
- item := new(models.CygxArticle)
- cells := v.Cells
- articleIdStr := cells[0].Value
- articleIdInt, _ := strconv.Atoi(articleIdStr)
- //判断文章是否已经存在
- existItem, err := models.GetArticleDetailById(articleIdInt)
- if err != nil && err.Error() != utils.ErrNoRow() {
- fmt.Println("GetArticleDetailById Err:" + err.Error())
- return
- }
- if existItem != nil && existItem.ArticleId > 0 {
- fmt.Println("exist", articleIdInt)
- continue
- }
- title := cells[1].String()
- titleEn := cells[2].String()
- updateFrequency := cells[3].String()
- createDate := cells[4].String()
- createDateParse, err := time.Parse(utils.FormatDateTime, createDate)
- if err != nil {
- fmt.Println("createDateParse err:", err.Error())
- return
- }
- fmt.Println("createDateParse ", createDateParse)
- publishDate := cells[6].String()
- publishDate=strings.Replace(publishDate,`\`,"",-1)
- fmt.Println("publishDate: ", publishDate)
- publishDateParse, err := time.Parse(utils.FormatDateTime, publishDate)
- if err != nil {
- fmt.Println("publishDateParse err:", err.Error())
- return
- }
- fmt.Println("publishDateParse ", publishDateParse)
- body := cells[11].String()
- abstract := cells[12].String()
- categoryName := cells[15].String()
- subCategoryName := cells[16].String()
- categoryId, _ := cells[18].Int()
- doc, err := goquery.NewDocumentFromReader(strings.NewReader(body))
- if err != nil {
- fmt.Println("create doc err:", err.Error())
- return
- }
- var expertNumArr []string
- var expertContentArr []string
- var interviewDateArr []string
- doc.Find("p").Each(func(i int, s *goquery.Selection) {
- contentTxt := s.Text()
- if strings.Contains(contentTxt, "#访谈时间:") || strings.Contains(contentTxt, "访谈时间:") {
- interviewDate := s.Next().Text()
- interviewDateArr = append(interviewDateArr, interviewDate)
- }
- if strings.Contains(contentTxt, "#专家评价") || strings.Contains(contentTxt, "专家评价") {
- expertContent := s.Next().Text()
- if expertContent == "" {
- expertContent = contentTxt
- }
- if expertContent != "" {
- rightIndex := strings.Index(expertContent, ")")
- if rightIndex == 0 {
- rightIndex = strings.Index(expertContent, ")")
- }
- expertNum := expertContent[:rightIndex]
- expertNum = strings.Replace(expertNum, "(", "", -1)
- expertNum = strings.Replace(expertNum, "(", "", -1)
- expertNum = strings.Replace(expertNum, "专家评价", "", -1)
- if expertNum != "" {
- expertNumArr = append(expertNumArr, expertNum)
- rightIndex = rightIndex
- expertContentStr := expertContent[rightIndex:]
- expertContentStr = strings.Replace(expertContentStr, ")", "", -1)
- expertContentStr = strings.TrimLeft(expertContentStr, ":")
- expertContentStr = strings.TrimRight(expertContentStr, "(推荐")
- expertContentArr = append(expertContentArr, expertContentStr)
- }
- }
- }
- })
- var expertNumStr, expertContentStr, interviewDateStr string
- if len(expertNumArr) > 0 {
- expertNumStr = expertNumArr[0]
- }
- if len(expertContentArr) > 0 {
- expertContentStr = expertContentArr[0]
- }
- if len(interviewDateArr) > 0 {
- interviewDateStr = interviewDateArr[0]
- }
- item.ArticleId = articleIdInt
- item.Title = title
- item.TitleEn = titleEn
- item.UpdateFrequency = updateFrequency
- item.CreateDate = createDateParse.Format(utils.FormatDateTime)
- item.PublishDate = publishDateParse.Format(utils.FormatDateTime)
- item.Body = html.EscapeString(body)
- item.Abstract = html.EscapeString(abstract)
- item.CategoryName = categoryName
- item.SubCategoryName = subCategoryName
- item.CategoryId = categoryId
- item.PublishStatus = 1
- item.ExpertBackground = expertContentStr
- item.ExpertNumber = expertNumStr
- item.InterviewDate = interviewDateStr
- item.Department = "策略组"
- item.ArticleIdMd5 = utils.MD5(articleIdStr)
- _, err = models.AddCygxArticle(item)
- if err != nil {
- fmt.Println("AddCygxArticle Err:", err.Error())
- return
- }
- }
- }
- }
- }
|