package services import ( "eta/eta_data_analysis/models" "eta/eta_data_analysis/utils" "fmt" "github.com/PuerkitoBio/goquery" "strings" "time" ) var ZhiJiangListMap = map[string]string { "中国化机浆样本产量周数据分析" : "https://list.oilchem.net/2959/45240/", "中国阔叶浆样本产量周数据分析" : "https://list.oilchem.net/2959/45240/", "中国纸浆主流港口样本库存周数据分析" : "https://list.oilchem.net/2959/45241/", } func ZhiJiangList(num int) (err error) { for k, v := range ZhiJiangListMap { for i := 1; i < num; i++ { listUrl := v + fmt.Sprintf("%d.html",i) fmt.Println("listUrl:",listUrl) htm, e := FetchPageHtml(listUrl) if e != nil { err = e utils.FileLog.Error(fmt.Sprintf("FetchPageHtml err:%v",err)) fmt.Println("FetchPageHtml err",err) return } err = AnalysisOilchemList(htm, k) if err != nil { utils.FileLog.Error(fmt.Sprintf("AnalysisOilchemList err:%v",err)) fmt.Println("AnalysisOilchemList err",err) return } time.Sleep(2*time.Second) } } return } // 中国化机浆样本产量,中国阔叶浆样本产量 func AnalysisOilchemZhiJiang1(htm []byte) (err error) { if len(htm) == 0 { utils.FileLog.Info("htm empty") return } doc, e := goquery.NewDocumentFromReader(strings.NewReader(string(htm))) if e != nil { err = fmt.Errorf("NewDocumentFromReader err: %v", e) return } //titleList := make([]string, 0) unitList := make([]string, 0) doc.Find("p").Each(func(i int, selection *goquery.Selection) { ptext := selection.Text() if strings.Contains(ptext, "单位:") { unit := strings.Replace(ptext, "单位:", "", -1) //fmt.Println("unit:",unit) unitList = append(unitList, unit) } //if strings.Contains(ptext, "中国纸浆主流港口样本库存周数据") { // title := ptext // //fmt.Println("title:",title) // titleList = append(titleList, title) //} }) area := "" title := doc.Find("h2").Text() fmt.Println("title:",title) createTimeStr := doc.Find("h2").Next().Text() createTimeStr = strings.TrimLeft(createTimeStr,"发布时间:") createTimeStrIndex := strings.Index(createTimeStr,"来源:") createTimeStr = createTimeStr[:createTimeStrIndex] createTimeStr = strings.TrimSpace(createTimeStr) createTime, err := time.Parse(utils.HlbFormatDateTimeNoSecond, createTimeStr) if err != nil { utils.FileLog.Error(fmt.Sprintf("time.Parse err:%v",err)) return } //fmt.Println("createTime:",createTime) dataTime := createTime.Format(utils.FormatDate) //fmt.Println("dataTime:",dataTime) indexList := make([]*models.BaseFromOilchemIndex, 0) doc.Find("tbody").Each(func(tableIndex int, table *goquery.Selection) { table.Find("tr").First().Each(func(ii int, table2 *goquery.Selection) { table.Find("td").Each(func(jj int, table3 *goquery.Selection) { text3 := table3.Text() text3 = strings.Replace(text3,"\n","",-1) text3 = strings.Replace(text3," ","",-1) if text3 == "上周" || text3 == "环比" || text3 == "地区" { return } //fmt.Println("table3:",text3) //utils.FileLog.Info(fmt.Sprintf("table3:%s",text3)) //fmt.Println("ii:",ii) //utils.FileLog.Info(fmt.Sprintf("ii:%d",ii)) //fmt.Println("jj:",jj) //utils.FileLog.Info(fmt.Sprintf("jj:%d",jj)) //fmt.Println("tableIndex:",tableIndex) //utils.FileLog.Info(fmt.Sprintf("tableIndex:%d",tableIndex)) if utils.ContainsChinese(text3) && text3 != "本周" { area = text3 return } if area == "" { return } value := text3 value = strings.TrimRight(value, "%") //valueF, e := strconv.ParseFloat(value, 64) //if e != nil { // err = e // utils.FileLog.Error(fmt.Sprintf("strconv.ParseFloat err:%v",e)) // return //} indexName := "中国" + area + "样本产量" fmt.Println("indexName:",indexName) fmt.Println("valueF:",value) //unit := "" //if strings.Contains(area,"产量") { // unit = "万吨" //} else { // unit = "%" //} item := &models.BaseFromOilchemIndex{ IndexName: indexName, ClassifyId: 4, Unit: "万吨", Frequency: "周度", Describe: "", DataTime: dataTime, Value: value, Sort: 0, CreateTime: time.Now(), ModifyTime: time.Now(), IndexNameStr: indexName, MarketName: "", } indexList = append(indexList,item) area = "" }) }) }) // 写入数据库 err = PostHandleOilchem(indexList) if err != nil { utils.FileLog.Error(fmt.Sprintf("PostHandleOilchem err:%v",err)) fmt.Println("PostHandleOilchem err",err) return } return } // 中国纸浆主流港口样本库存周数据 func AnalysisOilchemZhiJiang2(htm []byte) (err error) { if len(htm) == 0 { utils.FileLog.Info("htm empty") return } doc, e := goquery.NewDocumentFromReader(strings.NewReader(string(htm))) if e != nil { err = fmt.Errorf("NewDocumentFromReader err: %v", e) return } //titleList := make([]string, 0) unitList := make([]string, 0) doc.Find("p").Each(func(i int, selection *goquery.Selection) { ptext := selection.Text() if strings.Contains(ptext, "单位:") { unit := strings.Replace(ptext, "单位:", "", -1) //fmt.Println("unit:",unit) unitList = append(unitList, unit) } //if strings.Contains(ptext, "中国纸浆主流港口样本库存周数据") { // title := ptext // //fmt.Println("title:",title) // titleList = append(titleList, title) //} }) area := "" title := doc.Find("h2").Text() fmt.Println("title:",title) createTimeStr := doc.Find("h2").Next().Text() createTimeStr = strings.TrimLeft(createTimeStr,"发布时间:") createTimeStrIndex := strings.Index(createTimeStr,"来源:") createTimeStr = createTimeStr[:createTimeStrIndex] createTimeStr = strings.TrimSpace(createTimeStr) createTime, err := time.Parse(utils.HlbFormatDateTimeNoSecond, createTimeStr) if err != nil { utils.FileLog.Error(fmt.Sprintf("time.Parse err:%v",err)) return } //fmt.Println("createTime:",createTime) dataTime := createTime.Format(utils.FormatDate) //fmt.Println("dataTime:",dataTime) indexList := make([]*models.BaseFromOilchemIndex, 0) doc.Find("tbody").Each(func(tableIndex int, table *goquery.Selection) { table.Find("tr").First().Each(func(ii int, table2 *goquery.Selection) { table.Find("td").Each(func(jj int, table3 *goquery.Selection) { text3 := table3.Text() text3 = strings.Replace(text3,"\n","",-1) text3 = strings.Replace(text3," ","",-1) if text3 == "上周" || text3 == "环比" || text3 == "地区" { return } //fmt.Println("table3:",text3) //utils.FileLog.Info(fmt.Sprintf("table3:%s",text3)) //fmt.Println("ii:",ii) //utils.FileLog.Info(fmt.Sprintf("ii:%d",ii)) //fmt.Println("jj:",jj) //utils.FileLog.Info(fmt.Sprintf("jj:%d",jj)) //fmt.Println("tableIndex:",tableIndex) //utils.FileLog.Info(fmt.Sprintf("tableIndex:%d",tableIndex)) if utils.ContainsChinese(text3) && text3 != "本周" { area = text3 return } if area == "" { return } value := text3 value = strings.TrimRight(value, "%") //valueF, e := strconv.ParseFloat(value, 64) //if e != nil { // err = e // utils.FileLog.Error(fmt.Sprintf("strconv.ParseFloat err:%v",e)) // return //} indexName := "中国纸浆主流港口样本库存" +"("+ area + ")" fmt.Println("indexName:",indexName) fmt.Println("valueF:",value) //unit := "" //if strings.Contains(area,"产量") { // unit = "万吨" //} else { // unit = "%" //} item := &models.BaseFromOilchemIndex{ IndexName: indexName, ClassifyId: 4, Unit: "万吨", Frequency: "周度", Describe: "", DataTime: dataTime, Value: value, Sort: 0, CreateTime: time.Now(), ModifyTime: time.Now(), IndexNameStr: "中国纸浆主流港口样本库存", MarketName: area, } indexList = append(indexList,item) area = "" }) }) }) // 写入数据库 err = PostHandleOilchem(indexList) if err != nil { utils.FileLog.Error(fmt.Sprintf("PostHandleOilchem err:%v",err)) fmt.Println("PostHandleOilchem err",err) return } return }