|
- package national_data
- import (
- "crypto/tls"
- "encoding/json"
- "fmt"
- "hongze/hongze_data_crawler/models"
- "hongze/hongze_data_crawler/services/alarm_msg"
- "hongze/hongze_data_crawler/utils"
- "io/ioutil"
- "net/http"
- "net/url"
- "strings"
- "time"
- )
- const (
- NationalStatisticsBaseReqUrl = "https://data.stats.gov.cn/easyquery.htm"
- )
- func NationalHttpPost(reqUrl, payload string) (result []byte, err error) {
- tr := &http.Transport{
- TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
- }
- client := &http.Client{
- Transport: tr,
- }
- req, err := http.NewRequest("POST", reqUrl, strings.NewReader(payload))
- if err != nil {
- return
- }
- req.Header.Add("Accept", "text/plain, */*; q=0.01")
- req.Header.Add("Accept-Encoding", "tgzip, deflate, br")
- req.Header.Add("Accept-Language", "zh-CN,zh;q=0.9")
- req.Header.Add("Connection", "keep-alive")
- req.Header.Add("Content-Length", "37")
- req.Header.Add("Content-Type", "application/x-www-form-urlencoded")
- req.Header.Add("Cookie", "wzws_sessionid=gDExNS4xOTQuMTAyLjEyN6BkERzUgmZjNWVlMYFiOWNiZDg=; JSESSIONID=UOri2Cu3f3c-Y3rPgXWJ04E8pfbeyAUGG-s7zJ7Tt0JhlEiLi0EU!412929168; u=5")
- req.Header.Add("Host", "data.stats.gov.cn")
- req.Header.Add("Origin", "https://data.stats.gov.cn")
- req.Header.Set("Referer", "https://data.stats.gov.cn/easyquery.htm?cn=A01")
- req.Header.Set("sec-ch-ua", "\"Not_A Brand\";v=\"99\", \"Google Chrome\";v=\"109\", \"Chromium\";v=\"109\"")
- req.Header.Set("sec-ch-ua-mobile", "?0")
- req.Header.Set("sec-ch-ua-platform", "\"Windows\"")
- req.Header.Set("Sec-Fetch-Dest", "empty")
- req.Header.Set("Sec-Fetch-Mode", "cors")
- req.Header.Set("Sec-Fetch-Site", "same-origin")
- req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36")
- req.Header.Set("X-Requested-With", "XMLHttpRequest")
- res, err := client.Do(req)
- if err != nil {
- return
- }
- defer res.Body.Close()
- body, err := ioutil.ReadAll(res.Body)
- if err != nil {
- return
- }
- result = body
- return
- }
- func NationalGet(reqUrl, payload string) (err error) {
- tr := &http.Transport{
- TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
- }
- client := &http.Client{
- Transport: tr,
- }
- req, err := http.NewRequest("GET", reqUrl, strings.NewReader(payload))
- if err != nil {
- return
- }
- res, err := client.Do(req)
- if err != nil {
- return
- }
- defer res.Body.Close()
- _, err = ioutil.ReadAll(res.Body)
- if err != nil {
- return
- }
- Cookie := res.Header.Get("Cookie")
- fmt.Println(Cookie)
- rcookie := req.Header.Get("Cookie")
- fmt.Println("rcookie")
- fmt.Println(rcookie)
- //fmt.Println("body:" + string(body))
- cookiesArr := res.Cookies()
- fmt.Println("cookiesArrLen:", len(cookiesArr))
- for k, v := range cookiesArr {
- fmt.Println(k, v)
- }
- return
- }
- // QuotaClassifyTreeResp 指标分类树响应
- type QuotaClassifyTreeResp struct {
- Id string `description:"分类ID(字符串)"`
- IsParent bool `description:"是否为父级"`
- Name string `description:"分类名称"`
- Pid string `description:"父级分类ID"`
- Dbcode string `description:"源-dbocde"`
- Wdcode string `description:"源-wdcode"`
- }
- // SyncQuotaClassifyTree 同步指标分类树
- func SyncQuotaClassifyTree() (err error) {
- defer func() {
- if err != nil {
- utils.FileLog.Error("统计局-同步指标分类树失败, ErrMsg: %s", err.Error())
- go alarm_msg.SendAlarmMsg(fmt.Sprintf("统计局-同步指标分类树失败, ErrMsg: %s", err.Error()), 3)
- }
- }()
- dbCodes := []string{
- "hgyd", "hgjd", "hgnd", // 月度/季度/年度
- "fsyd", "fsjd", "fsnd", "csyd", "csnd", "gatyd", "gatnd", // 分省月度/分省季度/分省年度/城市月度价格/城市年度数据/港澳台月度/港澳台年度
- "gjyd", "gjydsdj", "gjydsc", "gjnd", // 主要国家月度/三大经济体月度/国际市场月度商品/国家年度
- }
- for _, code := range dbCodes {
- fmt.Println("开始同步DbCode: ", code)
- items := make([]*models.BaseFromNationalStatisticsClassify, 0)
- resp, e := curlAndFormatQuotaClassify("zb", code, "zb", items)
- if e != nil {
- err = fmt.Errorf("递归指标分类失败, Err: %s", e.Error())
- return
- }
- items = resp
- // 去重
- classifyMap := make(map[string]bool)
- classifyOB := new(models.BaseFromNationalStatisticsClassify)
- classifyPars := make([]interface{}, 0)
- classifies, e := classifyOB.GetItemsByCondition("", classifyPars, []string{}, "")
- if e != nil {
- err = fmt.Errorf("获取指标分类列表失败, Err: %s", e.Error())
- return
- }
- for _, c := range classifies {
- classifyMap[fmt.Sprintf("%s%s", code, c.Id)] = true
- }
- finalList := make([]*models.BaseFromNationalStatisticsClassify, 0)
- for _, v := range items {
- if classifyMap[fmt.Sprintf("%s%s", code, v.Id)] {
- continue
- }
- finalList = append(finalList, v)
- }
- if e = classifyOB.CreateMulti(items); e != nil {
- err = fmt.Errorf("批量新增指标分类失败, Err: %s", e.Error())
- return
- }
- fmt.Println("结束同步DbCode: ", code)
- }
- return
- }
- // curlAndFormatQuotaClassify 递归请求分类树
- func curlAndFormatQuotaClassify(id, dbcode, wdcode string, items []*models.BaseFromNationalStatisticsClassify) (resp []*models.BaseFromNationalStatisticsClassify, err error) {
- f := url.Values{}
- f.Add("id", id)
- f.Add("dbcode", dbcode)
- f.Add("wdcode", wdcode)
- f.Add("m", "getTree")
- r, e := NationalHttpPost(NationalStatisticsBaseReqUrl, f.Encode())
- if e != nil {
- err = fmt.Errorf("请求指标分类树失败, Err: %s", e.Error())
- return
- }
- list := make([]*QuotaClassifyTreeResp, 0)
- if e = json.Unmarshal(r, &list); e != nil {
- err = fmt.Errorf("解析响应数据失败, Err: %s", e.Error())
- return
- }
- nowTime := time.Now().Local()
- for _, v := range list {
- isParent := 0
- if v.IsParent {
- isParent = 1
- }
- items = append(items, &models.BaseFromNationalStatisticsClassify{
- ClassifyName: v.Name,
- Id: v.Id,
- Dbcode: v.Dbcode,
- Wdcode: v.Wdcode,
- Pid: v.Pid,
- IsParent: isParent,
- CreateTime: nowTime,
- ModifyTime: nowTime,
- })
- // 向下递归
- if isParent == 1 {
- time.Sleep(1 * time.Second) // 缓缓...毕竟接口是人家的...
- items, e = curlAndFormatQuotaClassify(v.Id, v.Dbcode, v.Wdcode, items)
- if e != nil {
- err = fmt.Errorf("递归请求分类树失败, Err: %s", e.Error())
- return
- }
- }
- }
- return items, nil
- }
- // QuotaListDataResp 指标数据列表响应体
- type QuotaListDataResp struct {
- ReturnCode int `description:"状态码" json:"returncode"`
- ReturnData struct {
- DataNodes []QuotaDataNode `json:"datanodes"`
- WdNodes []QuotaWdNode `json:"wdnodes"`
- }
- }
- // QuotaDataNode 指标数据节点
- type QuotaDataNode struct {
- Code string `description:"编码"`
- Data struct {
- Data float64 `description:"指标值"`
- HasData bool `description:"是否有值" json:"hasdata"`
- StrData string `description:"指标值(字符串)" json:"strdata"`
- }
- Wds []QuotaDataWds
- }
- // QuotaDataWds 指标数据对应的维度信息
- type QuotaDataWds struct {
- ValueCode string `json:"valuecode"`
- WdCode string `json:"wdcode"`
- }
- // QuotaWdNode 维度节点
- type QuotaWdNode struct {
- WdCode string `description:"示例: zb; sj; reg;" json:"wdcode"`
- WdName string `description:"示例: 指标; 时间; 地区" json:"wdname"`
- Nodes []QuotaWdNodeData
- }
- // QuotaWdNodeData 维度节点数据
- type QuotaWdNodeData struct {
- Code string `description:"指标编码"`
- Name string `description:"指标名称"`
- Unit string `description:"单位"`
- SortCode int `description:"编码排序" json:"sortcode"`
- }
- // SyncQuotaDataFromDbCodeAndId 同步指标值
- func SyncQuotaDataFromDbCodeAndId() (err error) {
- defer func() {
- if err != nil {
- utils.FileLog.Error("统计局-同步数据失败, ErrMsg: %s", err.Error())
- go alarm_msg.SendAlarmMsg(fmt.Sprintf("统计局-同步数据失败, ErrMsg: %s", err.Error()), 3)
- }
- }()
- // 查询无父级的指标分类
- classifyOB := new(models.BaseFromNationalStatisticsClassify)
- classifyCond := ` AND is_parent = 0`
- classifyPars := make([]interface{}, 0)
- classifyOrder := ` ORDER BY base_from_national_statistics_classify_id ASC`
- classifyList, e := classifyOB.GetItemsByCondition(classifyCond, classifyPars, []string{}, classifyOrder)
- if e != nil {
- err = fmt.Errorf("获取指标分类列表失败, Err: %s", e.Error())
- return
- }
- // 同步指标和数据
- for _, c := range classifyList {
- time.Sleep(time.Second)
- if e = SyncIndexAndDataByClassify(c.BaseFromNationalStatisticsClassifyId, c.Dbcode, c.Id); e != nil {
- err = fmt.Errorf("同步分类下的指标和数据失败, Err: %s", e.Error())
- return
- }
- }
- return
- }
- // SyncIndexAndDataByClassify 同步分类下的指标和数据
- func SyncIndexAndDataByClassify(classifyId int, dbCode, classifyCode string) (err error) {
- // yd-月度 jd-季度 nd-年度
- frequency := ""
- timeParam := ""
- if strings.Contains(dbCode, "yd") {
- timeParam = "LAST36" // 最近36个月
- frequency = "月度"
- }
- if strings.Contains(dbCode, "jd") {
- timeParam = "LAST18" // 最近18个季度
- frequency = "季度"
- }
- if strings.Contains(dbCode, "nd") {
- timeParam = "LAST20" // 最近20年
- frequency = "年度"
- }
- // 构建查询
- f := url.Values{}
- f.Add("m", "QueryData")
- f.Add("dbcode", dbCode)
- f.Add("rowcode", "zb")
- f.Add("colcode", "sj")
- f.Add("wds", "[]")
- f.Add("dfwds", fmt.Sprintf(`[{"wdcode":"zb","valuecode":"%s"},{"wdcode":"sj","valuecode":"%s"}]`, classifyCode, timeParam))
- f.Add("k1", fmt.Sprint(time.Now().UnixNano()/1e6))
- f.Add("h", "1")
- r, e := NationalHttpPost(NationalStatisticsBaseReqUrl, f.Encode())
- if e != nil {
- err = fmt.Errorf("请求分类下的指标失败, Err: %s", e.Error())
- return
- }
- resp := new(QuotaListDataResp)
- if e = json.Unmarshal(r, &resp); e != nil {
- fmt.Println("Unmarshal Err: ", e.Error())
- return
- }
- // 数据集
- dataNodes := resp.ReturnData.DataNodes
- dataMap := make(map[string]QuotaDataNode)
- for _, d := range dataNodes {
- dataMap[d.Code] = d
- }
- // 取出指标(Y轴), 日期(X轴)
- wdNodes := resp.ReturnData.WdNodes
- var quotaNodes, dateNodes []QuotaWdNodeData
- for _, w := range wdNodes {
- if w.WdCode == "zb" {
- quotaNodes = w.Nodes
- continue
- }
- if w.WdCode == "sj" {
- dateNodes = w.Nodes
- }
- }
- // 指标编码去重, 指标编码+日期数据去重
- indexOB := new(models.BaseFromNationalStatisticsIndex)
- indexCond := ``
- indexPars := make([]interface{}, 0)
- indexList, e := indexOB.GetItemsByCondition(indexCond, indexPars, []string{"index_code"}, "")
- if e != nil {
- err = fmt.Errorf("获取指标列表失败, Err: %s", e.Error())
- return
- }
- indexExistMap := make(map[string]bool)
- for _, v := range indexList {
- indexExistMap[v.IndexCode] = true
- }
- dataOB := new(models.BaseFromNationalStatisticsData)
- dataCond := ``
- dataPars := make([]interface{}, 0)
- dataList, e := dataOB.GetItemsByCondition(dataCond, dataPars, []string{"index_code", "data_time"}, "")
- if e != nil {
- err = fmt.Errorf("获取指标数据列表失败, Err: %s", e.Error())
- return
- }
- dataExistMap := make(map[string]bool)
- for _, v := range dataList {
- dataExistMap[fmt.Sprintf("%s%s", v.IndexCode, v.DataTime.Format(utils.FormatDate))] = true
- }
- // 遍历XY轴
- indexDataList := make([]*models.SaveNationalStatisticsIndexAndDataReq, 0)
- indexDataMap := make(map[string][]*models.BaseFromNationalStatisticsData)
- for _, q := range quotaNodes {
- indexCode := fmt.Sprintf("%s%s", dbCode, q.Code)
- // 指标
- r := new(models.SaveNationalStatisticsIndexAndDataReq)
- r.Index = &models.BaseFromNationalStatisticsIndex{
- BaseFromNationalStatisticsClassifyId: classifyId,
- Dbcode: dbCode,
- IndexCode: indexCode,
- IndexName: q.Name,
- Frequency: frequency,
- CreateTime: time.Now().Local(),
- ModifyTime: time.Now().Local(),
- }
- if indexExistMap[indexCode] {
- r.IndexExist = true
- }
- // 数据
- for _, d := range dateNodes {
- k := fmt.Sprintf("%s.%s_%s.%s", "zb", q.Code, "sj", d.Code)
- v := dataMap[k]
- if !v.Data.HasData {
- continue
- }
- // 日期去重
- t, e := time.ParseInLocation("200601", d.Code, time.Local)
- if e != nil {
- err = fmt.Errorf("指标日期转换失败, Err: %s", e.Error())
- return
- }
- existKey := fmt.Sprintf("%s%s", indexCode, t.Format(utils.FormatDate))
- if dataExistMap[existKey] {
- continue
- }
- // 数据map
- if indexDataMap[indexCode] == nil {
- indexDataMap[indexCode] = make([]*models.BaseFromNationalStatisticsData, 0)
- }
- indexDataMap[indexCode] = append(indexDataMap[indexCode], &models.BaseFromNationalStatisticsData{
- IndexCode: indexCode,
- DataTime: t,
- Value: v.Data.Data,
- CreateTime: time.Now().Local(),
- ModifyTime: time.Now().Local(),
- })
- }
- indexDataList = append(indexDataList, r)
- }
- // 保存指标
- for _, v := range indexDataList {
- ds := indexDataMap[v.Index.IndexCode]
- if ds == nil || (ds != nil && len(ds) == 0) {
- continue
- }
- v.DataList = ds
- if e := models.SaveNationalStatisticsIndexAndData(v); e != nil {
- err = fmt.Errorf("保存指标和数据失败, Err: %s", e.Error())
- return
- }
- }
- return
- }
- func ApiTest() (err error) {
- defer func() {
- if err != nil {
- fmt.Println(err.Error())
- }
- }()
- dbCode := "hgyd"
- classifyCode := "A010101"
- timeParam := "LAST36"
- f := url.Values{}
- f.Add("m", "QueryData")
- f.Add("dbcode", dbCode)
- f.Add("rowcode", "zb")
- f.Add("colcode", "sj")
- f.Add("wds", "[]")
- f.Add("dfwds", fmt.Sprintf(`[{"wdcode":"zb","valuecode":"%s"},{"wdcode":"sj","valuecode":"%s"}]`, classifyCode, timeParam))
- f.Add("k1", fmt.Sprint(time.Now().UnixNano()/1e6))
- //f.Add("h", "1")
- r, e := NationalHttpPost(NationalStatisticsBaseReqUrl, f.Encode())
- if e != nil {
- err = fmt.Errorf("请求分类下的指标失败, Err: %s", e.Error())
- return
- }
- resp := new(QuotaListDataResp)
- if e = json.Unmarshal(r, &resp); e != nil {
- fmt.Println("Unmarshal Err: ", e.Error())
- return
- }
- result, _ := json.Marshal(resp)
- utils.FileLog.Info("result: ", string(result))
- return
- }
|