package services import ( "fmt" "github.com/PuerkitoBio/goquery" "github.com/tealeg/xlsx" "hongze/hongze_cygx/models" "hongze/hongze_cygx/utils" "html" "strconv" "strings" "time" ) //上海数据同步 func ImportData() { fileName := "./temp/权益报告导入0319.xlsx" xlFile, err := xlsx.OpenFile(fileName) if err != nil { fmt.Printf("open failed: %s\n", err) } for _, sheet := range xlFile.Sheet { row := sheet.Rows for k, v := range row { if k > 0 { //导入数据 item := new(models.CygxArticle) cells := v.Cells articleIdStr := cells[0].Value articleIdInt, _ := strconv.Atoi(articleIdStr) //判断文章是否已经存在 existItem, err := models.GetArticleDetailById(articleIdInt) if err != nil && err.Error() != utils.ErrNoRow() { fmt.Println("GetArticleDetailById Err:" + err.Error()) return } if existItem != nil && existItem.ArticleId > 0 { fmt.Println("exist", articleIdInt) continue } title := cells[1].String() titleEn := cells[2].String() updateFrequency := cells[3].String() createDate := cells[4].String() createDateParse, err := time.Parse(utils.FormatDateTime, createDate) if err != nil { fmt.Println("createDateParse err:", err.Error()) return } fmt.Println("createDateParse ", createDateParse) publishDate := cells[6].String() publishDate=strings.Replace(publishDate,`\`,"",-1) fmt.Println("publishDate: ", publishDate) publishDateParse, err := time.Parse(utils.FormatDateTime, publishDate) if err != nil { fmt.Println("publishDateParse err:", err.Error()) return } fmt.Println("publishDateParse ", publishDateParse) body := cells[11].String() abstract := cells[12].String() categoryName := cells[15].String() subCategoryName := cells[16].String() categoryId, _ := cells[18].Int() doc, err := goquery.NewDocumentFromReader(strings.NewReader(body)) if err != nil { fmt.Println("create doc err:", err.Error()) return } var expertNumArr []string var expertContentArr []string var interviewDateArr []string doc.Find("p").Each(func(i int, s *goquery.Selection) { contentTxt := s.Text() if strings.Contains(contentTxt, "#访谈时间:") || strings.Contains(contentTxt, "访谈时间:") { interviewDate := s.Next().Text() interviewDateArr = append(interviewDateArr, interviewDate) } if strings.Contains(contentTxt, "#专家评价") || strings.Contains(contentTxt, "专家评价") { expertContent := s.Next().Text() if expertContent == "" { expertContent = contentTxt } if expertContent != "" { rightIndex := strings.Index(expertContent, ")") if rightIndex == 0 { rightIndex = strings.Index(expertContent, ")") } expertNum := expertContent[:rightIndex] expertNum = strings.Replace(expertNum, "(", "", -1) expertNum = strings.Replace(expertNum, "(", "", -1) expertNum = strings.Replace(expertNum, "专家评价", "", -1) if expertNum != "" { expertNumArr = append(expertNumArr, expertNum) rightIndex = rightIndex expertContentStr := expertContent[rightIndex:] expertContentStr = strings.Replace(expertContentStr, ")", "", -1) expertContentStr = strings.TrimLeft(expertContentStr, ":") expertContentStr = strings.TrimRight(expertContentStr, "(推荐") expertContentArr = append(expertContentArr, expertContentStr) } } } }) var expertNumStr, expertContentStr, interviewDateStr string if len(expertNumArr) > 0 { expertNumStr = expertNumArr[0] } if len(expertContentArr) > 0 { expertContentStr = expertContentArr[0] } if len(interviewDateArr) > 0 { interviewDateStr = interviewDateArr[0] } item.ArticleId = articleIdInt item.Title = title item.TitleEn = titleEn item.UpdateFrequency = updateFrequency item.CreateDate = createDateParse.Format(utils.FormatDateTime) item.PublishDate = publishDateParse.Format(utils.FormatDateTime) item.Body = html.EscapeString(body) item.Abstract = html.EscapeString(abstract) item.CategoryName = categoryName item.SubCategoryName = subCategoryName item.CategoryId = categoryId item.PublishStatus = 1 item.ExpertBackground = expertContentStr item.ExpertNumber = expertNumStr item.InterviewDate = interviewDateStr item.Department = "策略组" item.ArticleIdMd5 = utils.MD5(articleIdStr) _, err = models.AddCygxArticle(item) if err != nil { fmt.Println("AddCygxArticle Err:", err.Error()) return } } } } }