|
- package services
- import (
- "eta/eta_data_analysis/models"
- "eta/eta_data_analysis/utils"
- "fmt"
- "github.com/PuerkitoBio/goquery"
- "strings"
- "time"
- )
- var ZhiJiangListMap = map[string]string {
- "中国化机浆样本产量周数据分析" : "https://list.oilchem.net/2959/45240/",
- "中国阔叶浆样本产量周数据分析" : "https://list.oilchem.net/2959/45240/",
- "中国纸浆主流港口样本库存周数据分析" : "https://list.oilchem.net/2959/45241/",
- }
- func ZhiJiangList(num int) (err error) {
- for k, v := range ZhiJiangListMap {
- for i := 1; i < num; i++ {
- listUrl := v + fmt.Sprintf("%d.html",i)
- fmt.Println("listUrl:",listUrl)
- htm, e := FetchPageHtml(listUrl)
- if e != nil {
- err = e
- utils.FileLog.Error(fmt.Sprintf("FetchPageHtml err:%v",err))
- fmt.Println("FetchPageHtml err",err)
- return
- }
- err = AnalysisOilchemList(htm, k)
- if err != nil {
- utils.FileLog.Error(fmt.Sprintf("AnalysisOilchemList err:%v",err))
- fmt.Println("AnalysisOilchemList err",err)
- return
- }
- time.Sleep(2*time.Second)
- }
- }
- return
- }
- // 中国化机浆样本产量,中国阔叶浆样本产量
- func AnalysisOilchemZhiJiang1(htm []byte) (err error) {
- if len(htm) == 0 {
- utils.FileLog.Info("htm empty")
- return
- }
- doc, e := goquery.NewDocumentFromReader(strings.NewReader(string(htm)))
- if e != nil {
- err = fmt.Errorf("NewDocumentFromReader err: %v", e)
- return
- }
- //titleList := make([]string, 0)
- unitList := make([]string, 0)
- doc.Find("p").Each(func(i int, selection *goquery.Selection) {
- ptext := selection.Text()
- if strings.Contains(ptext, "单位:") {
- unit := strings.Replace(ptext, "单位:", "", -1)
- //fmt.Println("unit:",unit)
- unitList = append(unitList, unit)
- }
- //if strings.Contains(ptext, "中国纸浆主流港口样本库存周数据") {
- // title := ptext
- // //fmt.Println("title:",title)
- // titleList = append(titleList, title)
- //}
- })
- area := ""
- title := doc.Find("h2").Text()
- fmt.Println("title:",title)
- createTimeStr := doc.Find("h2").Next().Text()
- createTimeStr = strings.TrimLeft(createTimeStr,"发布时间:")
- createTimeStrIndex := strings.Index(createTimeStr,"来源:")
- createTimeStr = createTimeStr[:createTimeStrIndex]
- createTimeStr = strings.TrimSpace(createTimeStr)
- createTime, err := time.Parse(utils.HlbFormatDateTimeNoSecond, createTimeStr)
- if err != nil {
- utils.FileLog.Error(fmt.Sprintf("time.Parse err:%v",err))
- return
- }
- //fmt.Println("createTime:",createTime)
- dataTime := createTime.Format(utils.FormatDate)
- //fmt.Println("dataTime:",dataTime)
- indexList := make([]*models.BaseFromOilchemIndex, 0)
- doc.Find("tbody").Each(func(tableIndex int, table *goquery.Selection) {
- table.Find("tr").First().Each(func(ii int, table2 *goquery.Selection) {
- table.Find("td").Each(func(jj int, table3 *goquery.Selection) {
- text3 := table3.Text()
- text3 = strings.Replace(text3,"\n","",-1)
- text3 = strings.Replace(text3," ","",-1)
- if text3 == "上周" || text3 == "环比" || text3 == "地区" {
- return
- }
- //fmt.Println("table3:",text3)
- //utils.FileLog.Info(fmt.Sprintf("table3:%s",text3))
- //fmt.Println("ii:",ii)
- //utils.FileLog.Info(fmt.Sprintf("ii:%d",ii))
- //fmt.Println("jj:",jj)
- //utils.FileLog.Info(fmt.Sprintf("jj:%d",jj))
- //fmt.Println("tableIndex:",tableIndex)
- //utils.FileLog.Info(fmt.Sprintf("tableIndex:%d",tableIndex))
- if utils.ContainsChinese(text3) && text3 != "本周" {
- area = text3
- return
- }
- if area == "" {
- return
- }
- value := text3
- value = strings.TrimRight(value, "%")
- //valueF, e := strconv.ParseFloat(value, 64)
- //if e != nil {
- // err = e
- // utils.FileLog.Error(fmt.Sprintf("strconv.ParseFloat err:%v",e))
- // return
- //}
- indexName := "中国" + area + "样本产量"
- fmt.Println("indexName:",indexName)
- fmt.Println("valueF:",value)
- //unit := ""
- //if strings.Contains(area,"产量") {
- // unit = "万吨"
- //} else {
- // unit = "%"
- //}
- item := &models.BaseFromOilchemIndex{
- IndexName: indexName,
- ClassifyId: 4,
- Unit: "万吨",
- Frequency: "周度",
- Describe: "",
- DataTime: dataTime,
- Value: value,
- Sort: 0,
- CreateTime: time.Now(),
- ModifyTime: time.Now(),
- IndexNameStr: indexName,
- MarketName: "",
- }
- indexList = append(indexList,item)
- area = ""
- })
- })
- })
- // 写入数据库
- err = PostHandleOilchem(indexList)
- if err != nil {
- utils.FileLog.Error(fmt.Sprintf("PostHandleOilchem err:%v",err))
- fmt.Println("PostHandleOilchem err",err)
- return
- }
- return
- }
- // 中国纸浆主流港口样本库存周数据
- func AnalysisOilchemZhiJiang2(htm []byte) (err error) {
- if len(htm) == 0 {
- utils.FileLog.Info("htm empty")
- return
- }
- doc, e := goquery.NewDocumentFromReader(strings.NewReader(string(htm)))
- if e != nil {
- err = fmt.Errorf("NewDocumentFromReader err: %v", e)
- return
- }
- //titleList := make([]string, 0)
- unitList := make([]string, 0)
- doc.Find("p").Each(func(i int, selection *goquery.Selection) {
- ptext := selection.Text()
- if strings.Contains(ptext, "单位:") {
- unit := strings.Replace(ptext, "单位:", "", -1)
- //fmt.Println("unit:",unit)
- unitList = append(unitList, unit)
- }
- //if strings.Contains(ptext, "中国纸浆主流港口样本库存周数据") {
- // title := ptext
- // //fmt.Println("title:",title)
- // titleList = append(titleList, title)
- //}
- })
- area := ""
- title := doc.Find("h2").Text()
- fmt.Println("title:",title)
- createTimeStr := doc.Find("h2").Next().Text()
- createTimeStr = strings.TrimLeft(createTimeStr,"发布时间:")
- createTimeStrIndex := strings.Index(createTimeStr,"来源:")
- createTimeStr = createTimeStr[:createTimeStrIndex]
- createTimeStr = strings.TrimSpace(createTimeStr)
- createTime, err := time.Parse(utils.HlbFormatDateTimeNoSecond, createTimeStr)
- if err != nil {
- utils.FileLog.Error(fmt.Sprintf("time.Parse err:%v",err))
- return
- }
- //fmt.Println("createTime:",createTime)
- dataTime := createTime.Format(utils.FormatDate)
- //fmt.Println("dataTime:",dataTime)
- indexList := make([]*models.BaseFromOilchemIndex, 0)
- doc.Find("tbody").Each(func(tableIndex int, table *goquery.Selection) {
- table.Find("tr").First().Each(func(ii int, table2 *goquery.Selection) {
- table.Find("td").Each(func(jj int, table3 *goquery.Selection) {
- text3 := table3.Text()
- text3 = strings.Replace(text3,"\n","",-1)
- text3 = strings.Replace(text3," ","",-1)
- if text3 == "上周" || text3 == "环比" || text3 == "地区" {
- return
- }
- //fmt.Println("table3:",text3)
- //utils.FileLog.Info(fmt.Sprintf("table3:%s",text3))
- //fmt.Println("ii:",ii)
- //utils.FileLog.Info(fmt.Sprintf("ii:%d",ii))
- //fmt.Println("jj:",jj)
- //utils.FileLog.Info(fmt.Sprintf("jj:%d",jj))
- //fmt.Println("tableIndex:",tableIndex)
- //utils.FileLog.Info(fmt.Sprintf("tableIndex:%d",tableIndex))
- if utils.ContainsChinese(text3) && text3 != "本周" {
- area = text3
- return
- }
- if area == "" {
- return
- }
- value := text3
- value = strings.TrimRight(value, "%")
- //valueF, e := strconv.ParseFloat(value, 64)
- //if e != nil {
- // err = e
- // utils.FileLog.Error(fmt.Sprintf("strconv.ParseFloat err:%v",e))
- // return
- //}
- indexName := "中国纸浆主流港口样本库存" +"("+ area + ")"
- fmt.Println("indexName:",indexName)
- fmt.Println("valueF:",value)
- //unit := ""
- //if strings.Contains(area,"产量") {
- // unit = "万吨"
- //} else {
- // unit = "%"
- //}
- item := &models.BaseFromOilchemIndex{
- IndexName: indexName,
- ClassifyId: 4,
- Unit: "万吨",
- Frequency: "周度",
- Describe: "",
- DataTime: dataTime,
- Value: value,
- Sort: 0,
- CreateTime: time.Now(),
- ModifyTime: time.Now(),
- IndexNameStr: "中国纸浆主流港口样本库存",
- MarketName: area,
- }
- indexList = append(indexList,item)
- area = ""
- })
- })
- })
- // 写入数据库
- err = PostHandleOilchem(indexList)
- if err != nil {
- utils.FileLog.Error(fmt.Sprintf("PostHandleOilchem err:%v",err))
- fmt.Println("PostHandleOilchem err",err)
- return
- }
- return
- }
|