Browse Source

Merge branch 'bzq/eia_steo' of eta_server/eta_crawler into master

鲍自强 5 months ago
parent
commit
8f25cbb71e
2 changed files with 279 additions and 16 deletions
  1. 41 4
      models/base_from_eia_steo.go
  2. 238 12
      services/eia_steo.go

+ 41 - 4
models/base_from_eia_steo.go

@@ -2,9 +2,10 @@ package models
 
 import (
 	"eta/eta_crawler/utils"
+	"time"
+
 	"github.com/beego/beego/v2/client/orm"
 	"github.com/shopspring/decimal"
-	"time"
 )
 
 // BaseFromEiaSteoIndex EiaSteo指标
@@ -51,6 +52,8 @@ type BaseFromEiaSteoClassify struct {
 	BaseFromEiaSteoClassifyId int       `orm:"column(base_from_eia_steo_classify_id);pk"`
 	ClassifyName              string    `description:"分类名称(中文名称)"`
 	ClassifyNameOriginal      string    `description:"分类名称(原始名称)"`
+	ParentId                  int       `description:"父级id"`
+	Level                     int       `description:"层级"`
 	ModifyTime                time.Time `description:"最新更新时间"`
 	CreateTime                time.Time `description:"创建时间"`
 }
@@ -85,6 +88,20 @@ func GetBaseFromEiaSteoClassifyAll() (list []*BaseFromEiaSteoClassify, err error
 	return
 }
 
+func GetChildBaseFromEiaSteoClassifyById(classifyId int) (list []*BaseFromEiaSteoClassify, err error) {
+	o := orm.NewOrmUsingDB("data")
+	sql := `SELECT * FROM base_from_eia_steo_classify WHERE parent_id = ?`
+	_, err = o.Raw(sql, classifyId).QueryRows(&list)
+	return
+}
+
+func GetBaseFromEiaSteoClassifyByName(classifyName string) (item *BaseFromEiaSteoClassify, err error) {
+	o := orm.NewOrmUsingDB("data")
+	sql := `SELECT * FROM base_from_eia_steo_classify WHERE classify_name = ?`
+	err = o.Raw(sql, classifyName).QueryRow(&item)
+	return
+}
+
 // GetBaseFromEiaSteoIndexAll 获取所有的指标
 func GetBaseFromEiaSteoIndexAll() (list []*BaseFromEiaSteoIndex, err error) {
 	o := orm.NewOrmUsingDB("data")
@@ -93,6 +110,12 @@ func GetBaseFromEiaSteoIndexAll() (list []*BaseFromEiaSteoIndex, err error) {
 	return
 }
 
+func lastDayOfMonth(year int, month time.Month) string {
+	firstDay := time.Date(year, month, 1, 0, 0, 0, 0, time.Local)
+	nextMonth := firstDay.AddDate(0, 1, 0)
+	return nextMonth.Add(-time.Second * 1).Format(utils.FormatDate)
+}
+
 // HandleEiaSteoData 数据处理
 func HandleEiaSteoData(dataList map[string]interface{}, indexInfo *BaseFromEiaSteoIndex) (err error) {
 	o := orm.NewOrmUsingDB("data")
@@ -116,7 +139,9 @@ func HandleEiaSteoData(dataList map[string]interface{}, indexInfo *BaseFromEiaSt
 
 	existMap := make(map[string]*BaseFromEiaSteoData)
 	for _, v := range existList {
-		existMap[v.DataTime.Format(utils.FormatDate)] = v
+		// 产品要求每个月的最后一天,为了兼容之前每个月第一天的数据
+		dateTime := lastDayOfMonth(v.DataTime.Year(), v.DataTime.Month())
+		existMap[dateTime] = v
 	}
 
 	// 待添加的数据列表
@@ -135,13 +160,20 @@ func HandleEiaSteoData(dataList map[string]interface{}, indexInfo *BaseFromEiaSt
 			err = tmpErr
 			return
 		}
+		initDate := time.Date(2010, time.January, 1, 0, 0, 0, 0, time.Local)
+		if currDate.Before(initDate) {
+			continue
+		}
 		if currDate.After(endDate) {
 			endDate = currDate
 		}
 		if currDate.Before(startDate) {
 			startDate = currDate
 		}
-		date = currDate.Format(utils.FormatDate)
+		// 产品要求每个月的最后一天
+		date = lastDayOfMonth(currDate.Year(), currDate.Month())
+		currDate, _ = time.ParseInLocation(utils.FormatDate, date, time.Local)
+		// date = currDate.Format(utils.FormatDate)
 		//fmt.Println("date:", date)
 
 		tmpBaseFromEiaSteoData, ok := existMap[date]
@@ -158,10 +190,15 @@ func HandleEiaSteoData(dataList map[string]interface{}, indexInfo *BaseFromEiaSt
 		} else {
 			nowVal := decimal.NewFromFloat(tmpBaseFromEiaSteoData.Value)
 			//fmt.Println(date, "=========", nowVal.Equal(valDecimal))
+			var updateCols []string
 			if !nowVal.Equal(valDecimal) {
 				tmpBaseFromEiaSteoData.Value = val
 				tmpBaseFromEiaSteoData.ModifyTime = time.Now()
-				_, err = to.Update(tmpBaseFromEiaSteoData, "Value", "ModifyTime")
+				updateCols = append(updateCols, "value")
+				updateCols = append(updateCols, "modify_time")
+			}
+			if len(updateCols) > 0 {
+				_, err = to.Update(tmpBaseFromEiaSteoData, updateCols...)
 				if err != nil {
 					return
 				}

+ 238 - 12
services/eia_steo.go

@@ -4,24 +4,58 @@ import (
 	"context"
 	"encoding/json"
 	"eta/eta_crawler/models"
+	"eta/eta_crawler/utils"
 	"fmt"
-	"github.com/rdlucklib/rdluck_tools/http"
 	"reflect"
 	"strconv"
+	"strings"
 	"time"
+
+	"github.com/rdlucklib/rdluck_tools/http"
 )
 
+var noCrawlerName = map[string]struct{}{"Real Gross Domestic Product": {}, "Nominal U.S. Dollar Exchange Rate": {}}
+
 func SyncEiaSteoData(cont context.Context) (err error) {
-	err = syncEiaSteoData()
+	// err = syncEiaSteoData()
+	eiaSteoUrls := map[string]string{
+		"International Petroleum and Other Liquids Production、Consumption、Inventories": `https://www.eia.gov/outlooks/steo/data/browser/data/index.php?v=6&f=M&s=0&id=&linechart=PAPR_OECD~PAPR_NONOPEC&maptype=0&ctype=linechart&map=&method=getData`,
+		"Non-OPEC Petroleum and Other Liquids Production":                              `https://www.eia.gov/outlooks/steo/data/browser/data/index.php?v=29&f=M&s=0&start=201901&end=202512&id=&ctype=linechart&maptype=0&method=getData`,
+		"Total Liquid Fuels Production":                                                `https://www.eia.gov/outlooks/steo/data/browser/data/index.php?v=7&f=M&s=0&start=201901&end=202512&maptype=0&ctype=linechart&id=&method=getData`,
+		"Total Crude Oil Production":                                                   `https://www.eia.gov/outlooks/steo/data/browser/data/index.php?v=30&f=M&s=0&start=201901&end=202512&id=&ctype=linechart&maptype=0&method=getData`,
+		"World Petroleum and Other Liquid Fuels Consumption":                           `https://www.eia.gov/outlooks/steo/data/browser/data/index.php?v=31&f=M&s=0&start=201901&end=202512&maptype=0&ctype=linechart&id=&method=getData`,
+		"U.S. Petroleum and Other Liquids Supply、Consumption、Inventories":              `https://www.eia.gov/outlooks/steo/data/browser/data/index.php?v=9&f=M&s=0&start=201901&end=202512&maptype=0&ctype=linechart&method=getData`,
+		"Drilling Productivity Metrics":                                                `https://www.eia.gov/outlooks/steo/data/browser/data/index.php?v=32&f=M&s=0&start=201901&end=202512&ctype=linechart&maptype=0&method=getData`,
+		"Crude Oil and Natural Gas Production from Shale and Tight Formations":         `https://www.eia.gov/outlooks/steo/data/browser/data/index.php?v=33&f=M&s=0&start=201901&end=202512&maptype=0&ctype=linechart&method=getData`,
+	}
+	var eiaIndexName = []string{
+		"International Petroleum and Other Liquids Production、Consumption、Inventories",
+		"Non-OPEC Petroleum and Other Liquids Production",
+		"Total Liquid Fuels Production",
+		"Total Crude Oil Production",
+		"World Petroleum and Other Liquid Fuels Consumption",
+		"U.S. Petroleum and Other Liquids Supply、Consumption、Inventories",
+		"Drilling Productivity Metrics",
+		"Crude Oil and Natural Gas Production from Shale and Tight Formations",
+	}
+
+	for _, name := range eiaIndexName {
+		url := eiaSteoUrls[name]
+		err = syncEiaSteoDataV2(name, url)
+		if err != nil {
+			fmt.Println("同步失败", err)
+			return
+		}
+	}
 	return
 }
 
-func syncEiaSteoData() (err error) {
+func syncEiaSteoData(eiaSteoUrl string) (err error) {
 	// 获取数据
 
 	//官网地址:https://www.eia.gov/outlooks/steo/data/browser/#/?v=6&f=M&s=0&start=201701&end=202312&linechart=~T3_STCHANGE_WORLD&maptype=0&ctype=linechart&map=
 	// 这是获取数据的链接(月度的)
-	eiaSteoUrl := "https://www.eia.gov/outlooks/steo/data/browser/data/index.php?v=6&f=M&s=0&id=&linechart=PAPR_OECD~PAPR_NONOPEC&maptype=0&ctype=linechart&map=&method=getData"
+	// eiaSteoUrl := "https://www.eia.gov/outlooks/steo/data/browser/data/index.php?v=6&f=M&s=0&id=&linechart=PAPR_OECD~PAPR_NONOPEC&maptype=0&ctype=linechart&map=&method=getData"
 	eiaSteoData, err := queryData(eiaSteoUrl)
 	if err != nil {
 		fmt.Println("读取失败", err)
@@ -91,17 +125,12 @@ func syncEiaSteoData() (err error) {
 
 		eiaSteoIndex, ok := indexMap[v.SERIESID]
 
-		// 指标名称(中文)
-		indexName := EiaSteoNameMap[v.SERIESID]
-		if indexName == `` {
-			indexName = v.CHARTNAME
-		}
 		if !ok {
 			eiaSteoIndex = &models.BaseFromEiaSteoIndex{
 				//BaseFromEiaSteoIndexId:    0,
 				BaseFromEiaSteoClassifyId: nowClassify.BaseFromEiaSteoClassifyId,
 				IndexCode:                 v.SERIESID,
-				IndexName:                 indexName,
+				IndexName:                 v.CHARTNAME, // 不做中文名称的转换
 				IndexNameOriginal:         v.CHARTNAME,
 				Frequency:                 "月度",
 				Level:                     v.LEVEL,
@@ -128,8 +157,8 @@ func syncEiaSteoData() (err error) {
 				eiaSteoIndex.BaseFromEiaSteoClassifyId = nowClassify.BaseFromEiaSteoClassifyId
 				updateCol = append(updateCol, "BaseFromEiaSteoClassifyId")
 			}
-			if eiaSteoIndex.IndexName != indexName {
-				eiaSteoIndex.IndexName = indexName
+			if eiaSteoIndex.IndexName != v.CHARTNAME {
+				eiaSteoIndex.IndexName = v.CHARTNAME
 				updateCol = append(updateCol, "IndexName")
 			}
 			if eiaSteoIndex.IndexNameOriginal != v.CHARTNAME {
@@ -173,6 +202,203 @@ func syncEiaSteoData() (err error) {
 	return
 }
 
+func syncEiaSteoDataV2(eiaSteoName, eiaSteoUrl string) (err error) {
+	defer func() {
+		if r := recover(); r != nil {
+			fmt.Println("异常:", r)
+			fmt.Println("异常的eiaSteo名字", eiaSteoName)
+		}
+	}()
+	// 获取数据
+
+	//官网地址:https://www.eia.gov/outlooks/steo/data/browser/#/?v=6&f=M&s=0&start=201701&end=202312&linechart=~T3_STCHANGE_WORLD&maptype=0&ctype=linechart&map=
+	// 这是获取数据的链接(月度的)
+	// eiaSteoUrl := "https://www.eia.gov/outlooks/steo/data/browser/data/index.php?v=6&f=M&s=0&id=&linechart=PAPR_OECD~PAPR_NONOPEC&maptype=0&ctype=linechart&map=&method=getData"
+	eiaSteoData, err := queryData(eiaSteoUrl)
+	if err != nil {
+		fmt.Println("读取失败", err)
+		return
+	}
+
+	parentClassify, err := models.GetBaseFromEiaSteoClassifyByName(eiaSteoName)
+	if err != nil && err.Error() != utils.ErrNoRow() {
+		fmt.Println("添加eiaSteo指标异常, err: ", err.Error())
+		return
+	}
+	if parentClassify == nil {
+		tmpClassify := &models.BaseFromEiaSteoClassify{
+			ClassifyName:         eiaSteoName,
+			ClassifyNameOriginal: eiaSteoName,
+			Level:                1,
+			ModifyTime:           time.Now(),
+			CreateTime:           time.Now(),
+		}
+		// 新增指标分类
+		err = tmpClassify.AddBaseFromEiaSteoClassify()
+		if err != nil {
+			return
+		}
+		parentClassify = tmpClassify
+	}
+
+	// 获取分类列表
+	classifyList, err := models.GetChildBaseFromEiaSteoClassifyById(parentClassify.BaseFromEiaSteoClassifyId)
+	if err != nil {
+		fmt.Println("获取分类失败:", err)
+		return
+	}
+	classifyMap := make(map[string]*models.BaseFromEiaSteoClassify)
+	for _, v := range classifyList {
+		classifyMap[v.ClassifyNameOriginal] = v
+	}
+	// 获取指标列表
+	indexList, err := models.GetBaseFromEiaSteoIndexAll()
+	if err != nil {
+		fmt.Println("获取分类失败:", err)
+		return
+	}
+	indexMap := make(map[string]*models.BaseFromEiaSteoIndex)
+	for _, v := range indexList {
+		indexMap[v.IndexCode] = v
+	}
+
+	var hasClassify *models.BaseFromEiaSteoClassify
+	var noDataClassify *models.BaseFromEiaSteoClassify
+	for i, v := range eiaSteoData.VIEWSDATA.ROWS {
+		// 如果没有数据,那么就返回
+		if v.HASDATA != 1 && strings.TrimSpace(v.CHARTNAME) == "" {
+			curRow := eiaSteoData.VIEWSDATA.ROWS
+			length := len(eiaSteoData.VIEWSDATA.ROWS)
+			if length > i+1 {
+				if noDataClassify != nil {
+					if curRow[i+1].LEVEL == 1 && strings.TrimSpace(curRow[i+1].CHARTNAME) != "" && curRow[i+1].HASDATA != 1 {
+						noDataClassify = nil
+					}
+				}
+				if hasClassify != nil {
+					if curRow[i+1].LEVEL == 1 && strings.TrimSpace(curRow[i+1].CHARTNAME) != "" && curRow[i+1].HASDATA == 1 {
+						hasClassify = nil
+					}
+				}
+			}
+			continue
+		}
+		if noDataClassify == nil && hasClassify == nil && strings.TrimSpace(v.CHARTNAME) != "" && v.LEVEL == 1 {
+			classify, ok := classifyMap[v.CHARTNAME]
+			if !ok {
+				tmpClassify := &models.BaseFromEiaSteoClassify{
+					BaseFromEiaSteoClassifyId: 0,
+					ClassifyName:              v.CHARTNAME,
+					ClassifyNameOriginal:      v.CHARTNAME,
+					ParentId:                  parentClassify.BaseFromEiaSteoClassifyId,
+					Level:                     2,
+					ModifyTime:                time.Now(),
+					CreateTime:                time.Now(),
+				}
+				classifyMap[v.CHARTNAME] = tmpClassify
+				if v.HASDATA == 1 {
+					hasClassify = tmpClassify
+				} else {
+					noDataClassify = tmpClassify
+				}
+				if _, ok := noCrawlerName[tmpClassify.ClassifyName]; ok {
+					continue
+				}
+				// 新增指标分类
+				err = tmpClassify.AddBaseFromEiaSteoClassify()
+				if err != nil {
+					return
+				}
+			} else {
+				if v.HASDATA == 1 {
+					hasClassify = classify
+				} else {
+					noDataClassify = classify
+				}
+			}
+		}
+		// 如果系列名称为空的话,那么也返回
+		if v.SERIESID == `` {
+			continue
+		}
+		if v.HASDATA != 1 {
+			continue
+		}
+
+		var curClassify *models.BaseFromEiaSteoClassify
+		if noDataClassify != nil {
+			curClassify = noDataClassify
+		} else {
+			curClassify = hasClassify
+		}
+		if _, ok := noCrawlerName[curClassify.ClassifyName]; ok {
+			continue
+		}
+
+		eiaSteoIndex, ok := indexMap[v.SERIESID]
+		if !ok {
+			eiaSteoIndex = &models.BaseFromEiaSteoIndex{
+				//BaseFromEiaSteoIndexId:    0,
+				BaseFromEiaSteoClassifyId: curClassify.BaseFromEiaSteoClassifyId,
+				IndexCode:                 v.SERIESID,
+				IndexName:                 v.CHARTNAME, // 不做中文名称的转换
+				IndexNameOriginal:         v.CHARTNAME,
+				Frequency:                 "月度",
+				Level:                     v.LEVEL,
+				Unit:                      v.UNITS,
+				Super:                     v.SUPER,
+				Precision:                 v.PRECISION,
+				LastHistorical:            strconv.Itoa(v.LASTHISTORICAL),
+				Description:               v.DESCRIPTION,
+				IsMappable:                v.ISMAPPABLE,
+				StartDate:                 time.Now(),
+				EndDate:                   time.Now(),
+				ModifyTime:                time.Now(),
+				CreateTime:                time.Now(),
+			}
+			// 新增指标
+			err = eiaSteoIndex.Add()
+			if err != nil {
+				return
+			}
+			indexMap[v.SERIESID] = eiaSteoIndex
+		} else {
+			updateCol := make([]string, 0)
+			if eiaSteoIndex.BaseFromEiaSteoClassifyId != curClassify.BaseFromEiaSteoClassifyId {
+				eiaSteoIndex.BaseFromEiaSteoClassifyId = curClassify.BaseFromEiaSteoClassifyId
+				updateCol = append(updateCol, "BaseFromEiaSteoClassifyId")
+			}
+			if eiaSteoIndex.IndexName != v.CHARTNAME {
+				eiaSteoIndex.IndexName = v.CHARTNAME
+				updateCol = append(updateCol, "IndexName")
+			}
+			if eiaSteoIndex.IndexNameOriginal != v.CHARTNAME {
+				eiaSteoIndex.IndexNameOriginal = v.CHARTNAME
+				updateCol = append(updateCol, "IndexNameOriginal")
+			}
+			if len(updateCol) > 0 {
+				eiaSteoIndex.Update(updateCol)
+			}
+
+		}
+
+		//校验数据类型对不对
+		valType := reflect.TypeOf(v.DATA)
+		switch valType.String() {
+		case "[]interface {}": // 没有数据
+		case "map[string]interface {}": // 有数据
+			data := v.DATA.(map[string]interface{})
+			err = models.HandleEiaSteoData(data, eiaSteoIndex)
+			if err != nil {
+				return
+			}
+		}
+
+		fmt.Println(v.CHARTNAME, v.SERIESID, "==", v.PRECISION, "===========")
+	}
+	return
+}
+
 // queryData 接口请求网站数据
 func queryData(eiaSteoUrl string) (eiaSteoData EiaSteoData, err error) {
 	body, err := http.Get(eiaSteoUrl)