national_data.go 14 KB


  1. package national_data
  2. import (
  3. "crypto/tls"
  4. "encoding/json"
  5. "fmt"
  6. "hongze/hongze_data_crawler/models"
  7. "hongze/hongze_data_crawler/services/alarm_msg"
  8. "hongze/hongze_data_crawler/utils"
  9. "io/ioutil"
  10. "net/http"
  11. "net/url"
  12. "strings"
  13. "time"
  14. )
  15. const (
  16. NationalStatisticsBaseReqUrl = "https://data.stats.gov.cn/easyquery.htm"
  17. )
  18. func NationalHttpPost(reqUrl, payload string) (result []byte, err error) {
  19. tr := &http.Transport{
  20. TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
  21. }
  22. client := &http.Client{
  23. Transport: tr,
  24. }
  25. req, err := http.NewRequest("POST", reqUrl, strings.NewReader(payload))
  26. if err != nil {
  27. return
  28. }
  29. req.Header.Add("Accept", "text/plain, */*; q=0.01")
  30. req.Header.Add("Accept-Encoding", "tgzip, deflate, br")
  31. req.Header.Add("Accept-Language", "zh-CN,zh;q=0.9")
  32. req.Header.Add("Connection", "keep-alive")
  33. req.Header.Add("Content-Length", "37")
  34. req.Header.Add("Content-Type", "application/x-www-form-urlencoded")
  35. req.Header.Add("Cookie", "wzws_sessionid=gDExNS4xOTQuMTAyLjEyN6BkERzUgmZjNWVlMYFiOWNiZDg=; JSESSIONID=UOri2Cu3f3c-Y3rPgXWJ04E8pfbeyAUGG-s7zJ7Tt0JhlEiLi0EU!412929168; u=5")
  36. req.Header.Add("Host", "data.stats.gov.cn")
  37. req.Header.Add("Origin", "https://data.stats.gov.cn")
  38. req.Header.Set("Referer", "https://data.stats.gov.cn/easyquery.htm?cn=A01")
  39. req.Header.Set("sec-ch-ua", "\"Not_A Brand\";v=\"99\", \"Google Chrome\";v=\"109\", \"Chromium\";v=\"109\"")
  40. req.Header.Set("sec-ch-ua-mobile", "?0")
  41. req.Header.Set("sec-ch-ua-platform", "\"Windows\"")
  42. req.Header.Set("Sec-Fetch-Dest", "empty")
  43. req.Header.Set("Sec-Fetch-Mode", "cors")
  44. req.Header.Set("Sec-Fetch-Site", "same-origin")
  45. req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36")
  46. req.Header.Set("X-Requested-With", "XMLHttpRequest")
  47. res, err := client.Do(req)
  48. if err != nil {
  49. return
  50. }
  51. defer res.Body.Close()
  52. body, err := ioutil.ReadAll(res.Body)
  53. if err != nil {
  54. return
  55. }
  56. result = body
  57. return
  58. }
  59. func NationalGet(reqUrl, payload string) (err error) {
  60. tr := &http.Transport{
  61. TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
  62. }
  63. client := &http.Client{
  64. Transport: tr,
  65. }
  66. req, err := http.NewRequest("GET", reqUrl, strings.NewReader(payload))
  67. if err != nil {
  68. return
  69. }
  70. res, err := client.Do(req)
  71. if err != nil {
  72. return
  73. }
  74. defer res.Body.Close()
  75. _, err = ioutil.ReadAll(res.Body)
  76. if err != nil {
  77. return
  78. }
  79. Cookie := res.Header.Get("Cookie")
  80. fmt.Println(Cookie)
  81. rcookie := req.Header.Get("Cookie")
  82. fmt.Println("rcookie")
  83. fmt.Println(rcookie)
  84. //fmt.Println("body:" + string(body))
  85. cookiesArr := res.Cookies()
  86. fmt.Println("cookiesArrLen:", len(cookiesArr))
  87. for k, v := range cookiesArr {
  88. fmt.Println(k, v)
  89. }
  90. return
  91. }
  92. // QuotaClassifyTreeResp 指标分类树响应
  93. type QuotaClassifyTreeResp struct {
  94. Id string `description:"分类ID(字符串)"`
  95. IsParent bool `description:"是否为父级"`
  96. Name string `description:"分类名称"`
  97. Pid string `description:"父级分类ID"`
  98. Dbcode string `description:"源-dbocde"`
  99. Wdcode string `description:"源-wdcode"`
  100. }
  101. // SyncQuotaClassifyTree 同步指标分类树
  102. func SyncQuotaClassifyTree() (err error) {
  103. defer func() {
  104. if err != nil {
  105. utils.FileLog.Error("统计局-同步指标分类树失败, ErrMsg: %s", err.Error())
  106. go alarm_msg.SendAlarmMsg(fmt.Sprintf("统计局-同步指标分类树失败, ErrMsg: %s", err.Error()), 3)
  107. }
  108. }()
  109. dbCodes := []string{
  110. "hgyd", "hgjd", "hgnd", // 月度/季度/年度
  111. "fsyd", "fsjd", "fsnd", "csyd", "csnd", "gatyd", "gatnd", // 分省月度/分省季度/分省年度/城市月度价格/城市年度数据/港澳台月度/港澳台年度
  112. "gjyd", "gjydsdj", "gjydsc", "gjnd", // 主要国家月度/三大经济体月度/国际市场月度商品/国家年度
  113. }
  114. for _, code := range dbCodes {
  115. fmt.Println("开始同步DbCode: ", code)
  116. items := make([]*models.BaseFromNationalStatisticsClassify, 0)
  117. resp, e := curlAndFormatQuotaClassify("zb", code, "zb", items)
  118. if e != nil {
  119. err = fmt.Errorf("递归指标分类失败, Err: %s", e.Error())
  120. return
  121. }
  122. items = resp
  123. // 去重
  124. classifyMap := make(map[string]bool)
  125. classifyOB := new(models.BaseFromNationalStatisticsClassify)
  126. classifyPars := make([]interface{}, 0)
  127. classifies, e := classifyOB.GetItemsByCondition("", classifyPars, []string{}, "")
  128. if e != nil {
  129. err = fmt.Errorf("获取指标分类列表失败, Err: %s", e.Error())
  130. return
  131. }
  132. for _, c := range classifies {
  133. classifyMap[fmt.Sprintf("%s%s", code, c.Id)] = true
  134. }
  135. finalList := make([]*models.BaseFromNationalStatisticsClassify, 0)
  136. for _, v := range items {
  137. if classifyMap[fmt.Sprintf("%s%s", code, v.Id)] {
  138. continue
  139. }
  140. finalList = append(finalList, v)
  141. }
  142. if e = classifyOB.CreateMulti(items); e != nil {
  143. err = fmt.Errorf("批量新增指标分类失败, Err: %s", e.Error())
  144. return
  145. }
  146. fmt.Println("结束同步DbCode: ", code)
  147. }
  148. return
  149. }
  150. // curlAndFormatQuotaClassify 递归请求分类树
  151. func curlAndFormatQuotaClassify(id, dbcode, wdcode string, items []*models.BaseFromNationalStatisticsClassify) (resp []*models.BaseFromNationalStatisticsClassify, err error) {
  152. f := url.Values{}
  153. f.Add("id", id)
  154. f.Add("dbcode", dbcode)
  155. f.Add("wdcode", wdcode)
  156. f.Add("m", "getTree")
  157. r, e := NationalHttpPost(NationalStatisticsBaseReqUrl, f.Encode())
  158. if e != nil {
  159. err = fmt.Errorf("请求指标分类树失败, Err: %s", e.Error())
  160. return
  161. }
  162. list := make([]*QuotaClassifyTreeResp, 0)
  163. if e = json.Unmarshal(r, &list); e != nil {
  164. err = fmt.Errorf("解析响应数据失败, Err: %s", e.Error())
  165. return
  166. }
  167. nowTime := time.Now().Local()
  168. for _, v := range list {
  169. isParent := 0
  170. if v.IsParent {
  171. isParent = 1
  172. }
  173. items = append(items, &models.BaseFromNationalStatisticsClassify{
  174. ClassifyName: v.Name,
  175. Id: v.Id,
  176. Dbcode: v.Dbcode,
  177. Wdcode: v.Wdcode,
  178. Pid: v.Pid,
  179. IsParent: isParent,
  180. CreateTime: nowTime,
  181. ModifyTime: nowTime,
  182. })
  183. // 向下递归
  184. if isParent == 1 {
  185. time.Sleep(1 * time.Second) // 缓缓...毕竟接口是人家的...
  186. items, e = curlAndFormatQuotaClassify(v.Id, v.Dbcode, v.Wdcode, items)
  187. if e != nil {
  188. err = fmt.Errorf("递归请求分类树失败, Err: %s", e.Error())
  189. return
  190. }
  191. }
  192. }
  193. return items, nil
  194. }
  195. // QuotaListDataResp 指标数据列表响应体
  196. type QuotaListDataResp struct {
  197. ReturnCode int `description:"状态码" json:"returncode"`
  198. ReturnData struct {
  199. DataNodes []QuotaDataNode `json:"datanodes"`
  200. WdNodes []QuotaWdNode `json:"wdnodes"`
  201. }
  202. }
  203. // QuotaDataNode 指标数据节点
  204. type QuotaDataNode struct {
  205. Code string `description:"编码"`
  206. Data struct {
  207. Data float64 `description:"指标值"`
  208. HasData bool `description:"是否有值" json:"hasdata"`
  209. StrData string `description:"指标值(字符串)" json:"strdata"`
  210. }
  211. Wds []QuotaDataWds
  212. }
  213. // QuotaDataWds 指标数据对应的维度信息
  214. type QuotaDataWds struct {
  215. ValueCode string `json:"valuecode"`
  216. WdCode string `json:"wdcode"`
  217. }
  218. // QuotaWdNode 维度节点
  219. type QuotaWdNode struct {
  220. WdCode string `description:"示例: zb; sj; reg;" json:"wdcode"`
  221. WdName string `description:"示例: 指标; 时间; 地区" json:"wdname"`
  222. Nodes []QuotaWdNodeData
  223. }
  224. // QuotaWdNodeData 维度节点数据
  225. type QuotaWdNodeData struct {
  226. Code string `description:"指标编码"`
  227. Name string `description:"指标名称"`
  228. Unit string `description:"单位"`
  229. SortCode int `description:"编码排序" json:"sortcode"`
  230. }
  231. // SyncQuotaDataFromDbCodeAndId 同步指标值
  232. func SyncQuotaDataFromDbCodeAndId() (err error) {
  233. defer func() {
  234. if err != nil {
  235. utils.FileLog.Error("统计局-同步数据失败, ErrMsg: %s", err.Error())
  236. go alarm_msg.SendAlarmMsg(fmt.Sprintf("统计局-同步数据失败, ErrMsg: %s", err.Error()), 3)
  237. }
  238. }()
  239. // 查询无父级的指标分类
  240. classifyOB := new(models.BaseFromNationalStatisticsClassify)
  241. classifyCond := ` AND is_parent = 0`
  242. classifyPars := make([]interface{}, 0)
  243. classifyOrder := ` ORDER BY base_from_national_statistics_classify_id ASC`
  244. classifyList, e := classifyOB.GetItemsByCondition(classifyCond, classifyPars, []string{}, classifyOrder)
  245. if e != nil {
  246. err = fmt.Errorf("获取指标分类列表失败, Err: %s", e.Error())
  247. return
  248. }
  249. // 同步指标和数据
  250. for _, c := range classifyList {
  251. time.Sleep(time.Second)
  252. if e = SyncIndexAndDataByClassify(c.BaseFromNationalStatisticsClassifyId, c.Dbcode, c.Id); e != nil {
  253. err = fmt.Errorf("同步分类下的指标和数据失败, Err: %s", e.Error())
  254. return
  255. }
  256. }
  257. return
  258. }
  259. // SyncIndexAndDataByClassify 同步分类下的指标和数据
  260. func SyncIndexAndDataByClassify(classifyId int, dbCode, classifyCode string) (err error) {
  261. // yd-月度 jd-季度 nd-年度
  262. frequency := ""
  263. timeParam := ""
  264. if strings.Contains(dbCode, "yd") {
  265. timeParam = "LAST36" // 最近36个月
  266. frequency = "月度"
  267. }
  268. if strings.Contains(dbCode, "jd") {
  269. timeParam = "LAST18" // 最近18个季度
  270. frequency = "季度"
  271. }
  272. if strings.Contains(dbCode, "nd") {
  273. timeParam = "LAST20" // 最近20年
  274. frequency = "年度"
  275. }
  276. // 构建查询
  277. f := url.Values{}
  278. f.Add("m", "QueryData")
  279. f.Add("dbcode", dbCode)
  280. f.Add("rowcode", "zb")
  281. f.Add("colcode", "sj")
  282. f.Add("wds", "[]")
  283. f.Add("dfwds", fmt.Sprintf(`[{"wdcode":"zb","valuecode":"%s"},{"wdcode":"sj","valuecode":"%s"}]`, classifyCode, timeParam))
  284. f.Add("k1", fmt.Sprint(time.Now().UnixNano()/1e6))
  285. f.Add("h", "1")
  286. r, e := NationalHttpPost(NationalStatisticsBaseReqUrl, f.Encode())
  287. if e != nil {
  288. err = fmt.Errorf("请求分类下的指标失败, Err: %s", e.Error())
  289. return
  290. }
  291. resp := new(QuotaListDataResp)
  292. if e = json.Unmarshal(r, &resp); e != nil {
  293. fmt.Println("Unmarshal Err: ", e.Error())
  294. return
  295. }
  296. // 数据集
  297. dataNodes := resp.ReturnData.DataNodes
  298. dataMap := make(map[string]QuotaDataNode)
  299. for _, d := range dataNodes {
  300. dataMap[d.Code] = d
  301. }
  302. // 取出指标(Y轴), 日期(X轴)
  303. wdNodes := resp.ReturnData.WdNodes
  304. var quotaNodes, dateNodes []QuotaWdNodeData
  305. for _, w := range wdNodes {
  306. if w.WdCode == "zb" {
  307. quotaNodes = w.Nodes
  308. continue
  309. }
  310. if w.WdCode == "sj" {
  311. dateNodes = w.Nodes
  312. }
  313. }
  314. // 指标编码去重, 指标编码+日期数据去重
  315. indexOB := new(models.BaseFromNationalStatisticsIndex)
  316. indexCond := ``
  317. indexPars := make([]interface{}, 0)
  318. indexList, e := indexOB.GetItemsByCondition(indexCond, indexPars, []string{"index_code"}, "")
  319. if e != nil {
  320. err = fmt.Errorf("获取指标列表失败, Err: %s", e.Error())
  321. return
  322. }
  323. indexExistMap := make(map[string]bool)
  324. for _, v := range indexList {
  325. indexExistMap[v.IndexCode] = true
  326. }
  327. dataOB := new(models.BaseFromNationalStatisticsData)
  328. dataCond := ``
  329. dataPars := make([]interface{}, 0)
  330. dataList, e := dataOB.GetItemsByCondition(dataCond, dataPars, []string{"index_code", "data_time"}, "")
  331. if e != nil {
  332. err = fmt.Errorf("获取指标数据列表失败, Err: %s", e.Error())
  333. return
  334. }
  335. dataExistMap := make(map[string]bool)
  336. for _, v := range dataList {
  337. dataExistMap[fmt.Sprintf("%s%s", v.IndexCode, v.DataTime.Format(utils.FormatDate))] = true
  338. }
  339. // 遍历XY轴
  340. indexDataList := make([]*models.SaveNationalStatisticsIndexAndDataReq, 0)
  341. indexDataMap := make(map[string][]*models.BaseFromNationalStatisticsData)
  342. for _, q := range quotaNodes {
  343. indexCode := fmt.Sprintf("%s%s", dbCode, q.Code)
  344. // 指标
  345. r := new(models.SaveNationalStatisticsIndexAndDataReq)
  346. r.Index = &models.BaseFromNationalStatisticsIndex{
  347. BaseFromNationalStatisticsClassifyId: classifyId,
  348. Dbcode: dbCode,
  349. IndexCode: indexCode,
  350. IndexName: q.Name,
  351. Frequency: frequency,
  352. CreateTime: time.Now().Local(),
  353. ModifyTime: time.Now().Local(),
  354. }
  355. if indexExistMap[indexCode] {
  356. r.IndexExist = true
  357. }
  358. // 数据
  359. for _, d := range dateNodes {
  360. k := fmt.Sprintf("%s.%s_%s.%s", "zb", q.Code, "sj", d.Code)
  361. v := dataMap[k]
  362. if !v.Data.HasData {
  363. continue
  364. }
  365. // 日期去重
  366. t, e := time.ParseInLocation("200601", d.Code, time.Local)
  367. if e != nil {
  368. err = fmt.Errorf("指标日期转换失败, Err: %s", e.Error())
  369. return
  370. }
  371. existKey := fmt.Sprintf("%s%s", indexCode, t.Format(utils.FormatDate))
  372. if dataExistMap[existKey] {
  373. continue
  374. }
  375. // 数据map
  376. if indexDataMap[indexCode] == nil {
  377. indexDataMap[indexCode] = make([]*models.BaseFromNationalStatisticsData, 0)
  378. }
  379. indexDataMap[indexCode] = append(indexDataMap[indexCode], &models.BaseFromNationalStatisticsData{
  380. IndexCode: indexCode,
  381. DataTime: t,
  382. Value: v.Data.Data,
  383. CreateTime: time.Now().Local(),
  384. ModifyTime: time.Now().Local(),
  385. })
  386. }
  387. indexDataList = append(indexDataList, r)
  388. }
  389. // 保存指标
  390. for _, v := range indexDataList {
  391. ds := indexDataMap[v.Index.IndexCode]
  392. if ds == nil || (ds != nil && len(ds) == 0) {
  393. continue
  394. }
  395. v.DataList = ds
  396. if e := models.SaveNationalStatisticsIndexAndData(v); e != nil {
  397. err = fmt.Errorf("保存指标和数据失败, Err: %s", e.Error())
  398. return
  399. }
  400. }
  401. return
  402. }
  403. func ApiTest() (err error) {
  404. defer func() {
  405. if err != nil {
  406. fmt.Println(err.Error())
  407. }
  408. }()
  409. dbCode := "hgyd"
  410. classifyCode := "A010101"
  411. timeParam := "LAST36"
  412. f := url.Values{}
  413. f.Add("m", "QueryData")
  414. f.Add("dbcode", dbCode)
  415. f.Add("rowcode", "zb")
  416. f.Add("colcode", "sj")
  417. f.Add("wds", "[]")
  418. f.Add("dfwds", fmt.Sprintf(`[{"wdcode":"zb","valuecode":"%s"},{"wdcode":"sj","valuecode":"%s"}]`, classifyCode, timeParam))
  419. f.Add("k1", fmt.Sprint(time.Now().UnixNano()/1e6))
  420. //f.Add("h", "1")
  421. r, e := NationalHttpPost(NationalStatisticsBaseReqUrl, f.Encode())
  422. if e != nil {
  423. err = fmt.Errorf("请求分类下的指标失败, Err: %s", e.Error())
  424. return
  425. }
  426. resp := new(QuotaListDataResp)
  427. if e = json.Unmarshal(r, &resp); e != nil {
  428. fmt.Println("Unmarshal Err: ", e.Error())
  429. return
  430. }
  431. result, _ := json.Marshal(resp)
  432. utils.FileLog.Info("result: ", string(result))
  433. return
  434. }