소스 검색

新增居民价格消费指数爬虫

tuoling805 1 년 전
부모
커밋
0cd1956068
4개의 변경된 파일317개의 추가작업 그리고 0개의 파일을 삭제
  1. 93 0
      models/base_from_icpi.go
  2. 3 0
      models/db.go
  3. 217 0
      services/icpi.go
  4. 4 0
      services/task.go

+ 93 - 0
models/base_from_icpi.go

@@ -0,0 +1,93 @@
+package models
+
+import (
+	"github.com/beego/beego/v2/client/orm"
+	"time"
+)
+
+type BaseFromIcpiIndex struct {
+	BaseFromIcpiIndexId int       `orm:"column(base_from_icpi_index_id);pk"`
+	IndexCode           string    `description:"指标编码"`
+	IndexName           string    `description:"指标名称"`
+	Frequency           string    `description:"频度"`
+	StartDate           time.Time `description:"开始日期"`
+	EndDate             time.Time `description:"结束日期"`
+	CreateTime          time.Time `description:"创建时间"`
+	ModifyTime          time.Time `description:"修改时间"`
+}
+
+type BaseFromIcpiData struct {
+	BaseFromIcpiDataId  int       `orm:"column(base_from_icpi_data_id);pk"`
+	BaseFromIcpiIndexId int       `description:"指标id"`
+	IndexCode           string    `description:"指标编码"`
+	DataTime            string    `description:"日期"`
+	Value               string    `description:"值"`
+	CreateTime          time.Time `description:"创建时间"`
+	ModifyTime          time.Time `description:"修改时间"`
+}
+
+func (obj *BaseFromIcpiIndex) GetBaseFromIcpiIndexAll() (list []*BaseFromIcpiIndex, err error) {
+	o := orm.NewOrmUsingDB("data")
+	sql := `SELECT * FROM base_from_icpi_index`
+	_, err = o.Raw(sql).QueryRows(&list)
+	return
+}
+
+func (m *BaseFromIcpiIndex) Add() (err error) {
+	o := orm.NewOrmUsingDB("data")
+	id, err := o.Insert(m)
+	if err != nil {
+		return
+	}
+	m.BaseFromIcpiIndexId = int(id)
+	return
+}
+
+func (obj *BaseFromIcpiIndex) GetBaseFromIcpiData(indexCode string, limit int) (list []*BaseFromIcpiData, err error) {
+	o := orm.NewOrmUsingDB("data")
+	sql := `SELECT * FROM base_from_icpi_data WHERE index_code=? ORDER BY data_time DESC LIMIT ?`
+	_, err = o.Raw(sql, indexCode, limit).QueryRows(&list)
+	return
+}
+
+func (m *BaseFromIcpiIndex) AddData(dataItem *BaseFromIcpiData) (err error) {
+	o := orm.NewOrmUsingDB("data")
+	_, err = o.Insert(dataItem)
+	if err != nil {
+		return
+	}
+	return
+}
+
+func (m *BaseFromIcpiIndex) ModifyData(indexCode, dataTime, value string) (err error) {
+	o := orm.NewOrmUsingDB("data")
+	sql := ` UPDATE base_from_icpi_data SET value=? WHERE index_code=? AND data_time=? `
+	_, err = o.Raw(sql, value, indexCode, dataTime).Exec()
+	return
+}
+
+// EdbInfoMaxAndMinInfo 指标最新数据记录结构体
+type IndexMaxAndMinInfo struct {
+	MinDate     string  `description:"最小日期"`
+	MaxDate     string  `description:"最大日期"`
+	MinValue    float64 `description:"最小值"`
+	MaxValue    float64 `description:"最大值"`
+	LatestValue float64 `description:"最新值"`
+	LatestDate  string  `description:"实际数据最新日期"`
+}
+
+// 获取指标的最新数据记录信息
+func (m *BaseFromIcpiIndex) GetBaseFromIcpiIndexMaxAndMinInfo(indexCode string) (item *IndexMaxAndMinInfo, err error) {
+	o := orm.NewOrmUsingDB("data")
+	sql := ` SELECT MIN(data_time) AS min_date,MAX(data_time) AS max_date,MIN(value) AS min_value,MAX(value) AS max_value FROM base_from_icpi_data WHERE index_code=? `
+	err = o.Raw(sql, indexCode).QueryRow(&item)
+	return
+}
+
+// 修改指标的最新数据信息
+func (m *BaseFromIcpiIndex) ModifyEdbInfoMaxAndMinInfo(indexCode string, item *IndexMaxAndMinInfo) (err error) {
+	o := orm.NewOrmUsingDB("data")
+	sql := ` UPDATE base_from_icpi_index SET start_date=?,end_date=?,modify_time=NOW() WHERE index_code=? `
+	_, err = o.Raw(sql, item.MinDate, item.MaxDate, indexCode).Exec()
+	return
+}

+ 3 - 0
models/db.go

@@ -56,5 +56,8 @@ func init() {
 		new(BaseFromNationalStatisticsIndex),
 		new(BaseFromNationalStatisticsData),
 		new(MeetingProbabilities),
+
+		new(BaseFromIcpiIndex),
+		new(BaseFromIcpiData),
 	)
 }

+ 217 - 0
services/icpi.go

@@ -0,0 +1,217 @@
+package services
+
+import (
+	"context"
+	"encoding/json"
+	"eta/eta_crawler/models"
+	"eta/eta_crawler/utils"
+	"fmt"
+	"io/ioutil"
+	"net/http"
+	"strings"
+	"time"
+)
+
+func CrawlerIcpi(cont context.Context) (err error) {
+	dataIndexUrl := `http://www.bdecon.com/dataIndex`
+	resp, err := http.Get(dataIndexUrl)
+	fmt.Println(err)
+	cookieLen := len(resp.Cookies())
+	var cookieStr string
+	for i := cookieLen - 1; i >= 0; i-- {
+		cookie := resp.Cookies()[i]
+		fmt.Println(i, cookie.Name, cookie.Value)
+		cookieStr += cookie.Name + "=" + cookie.Value + ";"
+	}
+
+	cookieStr = strings.Trim(cookieStr, ";")
+	fmt.Println(cookieStr)
+	fmt.Println("end")
+	// 创建一个HTTP客户端
+	client := &http.Client{}
+	// 创建一个GET请求
+	data := strings.NewReader("laber=1")
+	req, err := http.NewRequest("POST", "http://www.bdecon.com/indexqueryiCPI/", data)
+	if err != nil {
+		fmt.Println("创建请求失败:", err)
+		return
+	}
+	// 设置请求头
+	req.Header.Set("Accept", "*/*")
+	req.Header.Set("Accept-Encoding", "gzip, deflate")
+	req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8")
+	req.Header.Set("Connection", "keep-alive")
+	req.Header.Set("Content-Length", "7")
+	req.Header.Set("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8")
+	req.Header.Set("Cookie", cookieStr)
+	req.Header.Set("Host", "www.bdecon.com")
+	req.Header.Set("Origin", "http://www.bdecon.com")
+	req.Header.Set("Referer", "http://www.bdecon.com/dataIndex")
+	req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
+	req.Header.Set("X-Requested-With", "XMLHttpRequest")
+	// 发送请求并获取响应
+	resp, err = client.Do(req)
+	if err != nil {
+		fmt.Println("发送请求失败:", err)
+		return
+	}
+	defer resp.Body.Close()
+	// 读取响应的内容
+	body, err := ioutil.ReadAll(resp.Body)
+	if err != nil {
+		fmt.Println("读取响应失败:", err)
+		return
+	}
+
+	utils.FileLog.Info("result:")
+	utils.FileLog.Info(string(body))
+	// 打印响应内容
+	fmt.Println(string(body))
+
+	list := make([]*IcpiItem, 0)
+	err = json.Unmarshal(body, &list)
+	if err != nil {
+		fmt.Println("CrawlerIcpi json.Unmarshal Err:" + err.Error())
+		return
+	}
+
+	icpiIndexObj := new(models.BaseFromIcpiIndex)
+
+	indexAll, err := icpiIndexObj.GetBaseFromIcpiIndexAll()
+	if err != nil {
+		fmt.Println("GetBaseFromIcpiIndexAll Err:" + err.Error())
+		return
+	}
+
+	indexMap := make(map[string]*models.BaseFromIcpiIndex)
+	for _, v := range indexAll {
+		indexMap[v.IndexCode] = v
+	}
+
+	limit := 2
+	for _, v := range list {
+		indexCode := strings.ReplaceAll(v.EnglishName, " ", "_")
+
+		dataList, err := icpiIndexObj.GetBaseFromIcpiData(indexCode, limit)
+		if err != nil {
+			fmt.Println("GetBaseFromIcpiIndexAll Err:" + err.Error())
+			return err
+		}
+
+		dataMap := make(map[string]string)
+		for _, dv := range dataList {
+			dataMap[dv.DataTime] = dv.Value
+		}
+
+		if indexItem, ok := indexMap[indexCode]; !ok { //新增指标
+			icpiIndexObj = new(models.BaseFromIcpiIndex)
+			icpiIndexObj.IndexCode = indexCode
+			icpiIndexObj.IndexName = v.Name
+			icpiIndexObj.Frequency = "日度"
+			icpiIndexObj.CreateTime = time.Now()
+			icpiIndexObj.ModifyTime = time.Now()
+			err = icpiIndexObj.Add()
+			if err != nil {
+				fmt.Println("icpiIndexObj Add Err:" + err.Error())
+				return err
+			}
+
+			//新增指标对应的数据
+			dataItem := new(models.BaseFromIcpiData)
+			dataItem.IndexCode = indexCode
+			dataItem.BaseFromIcpiIndexId = icpiIndexObj.BaseFromIcpiIndexId
+			dataItem.DataTime = time.Now().Format(utils.FormatDate)
+			dataItem.Value = v.Today
+			dataItem.CreateTime = time.Now()
+			dataItem.ModifyTime = time.Now()
+			err = icpiIndexObj.AddData(dataItem)
+			if err != nil {
+				fmt.Println("icpiIndexObj AddData Err:" + err.Error())
+				return err
+			}
+
+			//新增指标对应的数据
+			dataItem = new(models.BaseFromIcpiData)
+			dataItem.IndexCode = indexCode
+			dataItem.BaseFromIcpiIndexId = icpiIndexObj.BaseFromIcpiIndexId
+			dataItem.DataTime = time.Now().AddDate(0, 0, -1).Format(utils.FormatDate)
+			dataItem.Value = v.Yesterday
+			dataItem.CreateTime = time.Now()
+			dataItem.ModifyTime = time.Now()
+			err = icpiIndexObj.AddData(dataItem)
+			if err != nil {
+				fmt.Println("icpiIndexObj AddData Err:" + err.Error())
+				return err
+			}
+		} else {
+			today := time.Now().Format(utils.FormatDate)
+			if dataVal, ok := dataMap[today]; !ok {
+				dataItem := new(models.BaseFromIcpiData)
+				dataItem.IndexCode = indexCode
+				dataItem.BaseFromIcpiIndexId = indexItem.BaseFromIcpiIndexId
+				dataItem.DataTime = today
+				dataItem.Value = v.Today
+				dataItem.CreateTime = time.Now()
+				dataItem.ModifyTime = time.Now()
+				err = icpiIndexObj.AddData(dataItem)
+				if err != nil {
+					fmt.Println("icpiIndexObj AddData Err:" + err.Error())
+					return err
+				}
+			} else {
+				if dataVal != v.Today {
+					err = icpiIndexObj.ModifyData(indexCode, today, v.Today)
+					if err != nil {
+						fmt.Println("icpiIndexObj ModifyData Err:" + err.Error())
+						return err
+					}
+				}
+			}
+
+			yesterday := time.Now().AddDate(0, 0, -1).Format(utils.FormatDate)
+			if dataVal, ok := dataMap[yesterday]; !ok {
+				dataItem := new(models.BaseFromIcpiData)
+				dataItem.IndexCode = indexCode
+				dataItem.BaseFromIcpiIndexId = indexItem.BaseFromIcpiIndexId
+				dataItem.DataTime = yesterday
+				dataItem.Value = v.Yesterday
+				dataItem.CreateTime = time.Now()
+				dataItem.ModifyTime = time.Now()
+				err = icpiIndexObj.AddData(dataItem)
+				if err != nil {
+					fmt.Println("icpiIndexObj AddData Err:" + err.Error())
+					return err
+				}
+			} else {
+				if dataVal != v.Today {
+					err = icpiIndexObj.ModifyData(indexCode, yesterday, v.Today)
+					if err != nil {
+						fmt.Println("icpiIndexObj ModifyData Err:" + err.Error())
+						return err
+					}
+				}
+			}
+		}
+
+		maxItem, err := icpiIndexObj.GetBaseFromIcpiIndexMaxAndMinInfo(indexCode)
+		if err != nil {
+			fmt.Println("GetBaseFromIcpiIndexMaxAndMinInfo Err:" + err.Error())
+			return err
+		}
+
+		err = icpiIndexObj.ModifyEdbInfoMaxAndMinInfo(indexCode, maxItem)
+		if err != nil {
+			fmt.Println("ModifyEdbInfoMaxAndMinInfo Err:" + err.Error())
+			return err
+		}
+
+	}
+	return err
+}
+
+type IcpiItem struct {
+	Name        string `json:"name"`
+	EnglishName string `json:"englishName"`
+	Today       string `json:"today"`
+	Yesterday   string `json:"yesterday"`
+}

+ 4 - 0
services/task.go

@@ -26,6 +26,8 @@ func Task() {
 	syncYearComTrade := task.NewTask("SyncYearComTrade", "0 0 3 1 1 *", SyncYearComTrade)                // 每年一月一号同步
 	syncYearMonthComTrade := task.NewTask("SyncYearMonthComTrade", "0 0 3 1 * *", SyncYearMonthComTrade) // 每月1号同步
 
+	crawlerIcpi := task.NewTask("refreshData", "0 0,30 16-18 * * *", CrawlerIcpi) //居民消费价格指数
+
 	// 统计局-分月季年爬
 	//refreshNationalMonthA := task.NewTask("RefreshNationalMonthDbA", "0 15 2 10 * *", national_data.RefreshNationalMonthDbA)
 	//refreshNationalMonthB := task.NewTask("RefreshNationalMonthDbB", "0 15 2 16 * *", national_data.RefreshNationalMonthDbB)
@@ -42,6 +44,8 @@ func Task() {
 	task.AddTask("UN年度数据", syncYearComTrade)      //每年一月一号的3点同步
 	task.AddTask("UN月度数据", syncYearMonthComTrade) //每月1号的3点同步
 
+	task.AddTask("居民消费价格指数", crawlerIcpi) //每月1号的3点同步
+
 	//task.AddTask("统计局数据爬取-月度A", refreshNationalMonthA) // 每月10号2:15执行
 	//task.AddTask("统计局数据爬取-月度B", refreshNationalMonthB) // 每月16号2:15执行
 	//task.AddTask("统计局数据爬取-季度", refreshNationalQuarter) // 每月15号1:25执行