common.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411
  1. package national_data
  2. import (
  3. "bytes"
  4. "crypto/tls"
  5. "encoding/json"
  6. "eta/eta_crawler/utils"
  7. "fmt"
  8. "io"
  9. "io/ioutil"
  10. "net/http"
  11. "net/url"
  12. "strings"
  13. "time"
  14. )
  15. const (
  16. NationalStatisticsBaseReqUrl = "https://data.stats.gov.cn/easyquery.htm"
  17. )
  18. func NationalHttpPost(reqUrl, payload string) (result []byte, err error) {
  19. // 随机延迟执行
  20. r := utils.RangeRand(5000, 8000)
  21. //fmt.Printf("随机延迟%d\n", r)
  22. if r > 6100 && r < 6250 {
  23. time.Sleep(15 * time.Second)
  24. } else {
  25. time.Sleep(time.Duration(r) * time.Millisecond)
  26. }
  27. tr := &http.Transport{
  28. TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
  29. }
  30. client := &http.Client{
  31. Transport: tr,
  32. }
  33. req, err := http.NewRequest("POST", reqUrl, strings.NewReader(payload))
  34. if err != nil {
  35. return
  36. }
  37. req.Header.Add("Accept", "text/plain, */*; q=0.01")
  38. req.Header.Add("Accept-Encoding", "tgzip, deflate, br")
  39. req.Header.Add("Accept-Language", "zh-CN,zh;q=0.9")
  40. req.Header.Add("Connection", "keep-alive")
  41. req.Header.Add("Content-Length", "37")
  42. req.Header.Add("Content-Type", "application/x-www-form-urlencoded")
  43. req.Header.Add("Cookie", "wzws_sessionid=gDExNS4xOTQuMTAyLjEyN6BkERzUgmZjNWVlMYFiOWNiZDg=; JSESSIONID=UOri2Cu3f3c-Y3rPgXWJ04E8pfbeyAUGG-s7zJ7Tt0JhlEiLi0EU!412929168; u=5")
  44. req.Header.Add("Host", "data.stats.gov.cn")
  45. req.Header.Add("Origin", "https://data.stats.gov.cn")
  46. req.Header.Set("Referer", "https://data.stats.gov.cn/easyquery.htm?cn=A01")
  47. req.Header.Set("sec-ch-ua", "\"Not_A Brand\";v=\"99\", \"Google Chrome\";v=\"109\", \"Chromium\";v=\"109\"")
  48. req.Header.Set("sec-ch-ua-mobile", "?0")
  49. req.Header.Set("sec-ch-ua-platform", "\"Windows\"")
  50. req.Header.Set("Sec-Fetch-Dest", "empty")
  51. req.Header.Set("Sec-Fetch-Mode", "cors")
  52. req.Header.Set("Sec-Fetch-Site", "same-origin")
  53. req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36")
  54. req.Header.Set("X-Requested-With", "XMLHttpRequest")
  55. res, err := client.Do(req)
  56. if err != nil {
  57. return
  58. }
  59. defer func() {
  60. _ = res.Body.Close()
  61. }()
  62. // 此处用io.Copy替代ioutil.ReadAll方法避免数据过大读取不完整
  63. var b []byte
  64. buf := bytes.NewBuffer(b)
  65. _, err = io.Copy(buf, res.Body)
  66. if err != nil {
  67. return
  68. }
  69. result = buf.Bytes()
  70. return
  71. }
  72. func NationalGet(reqUrl, payload string) (err error) {
  73. tr := &http.Transport{
  74. TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
  75. }
  76. client := &http.Client{
  77. Transport: tr,
  78. }
  79. req, err := http.NewRequest("GET", reqUrl, strings.NewReader(payload))
  80. if err != nil {
  81. return
  82. }
  83. res, err := client.Do(req)
  84. if err != nil {
  85. return
  86. }
  87. defer res.Body.Close()
  88. _, err = ioutil.ReadAll(res.Body)
  89. if err != nil {
  90. return
  91. }
  92. Cookie := res.Header.Get("Cookie")
  93. fmt.Println(Cookie)
  94. rcookie := req.Header.Get("Cookie")
  95. fmt.Println("rcookie")
  96. fmt.Println(rcookie)
  97. //fmt.Println("body:" + string(body))
  98. cookiesArr := res.Cookies()
  99. fmt.Println("cookiesArrLen:", len(cookiesArr))
  100. for k, v := range cookiesArr {
  101. fmt.Println(k, v)
  102. }
  103. return
  104. }
  105. // DataApiReq 数据接口请求体
  106. type DataApiReq struct {
  107. Method string `description:"方法: QueryData-查询数据; getOtherWds-获取其他维度" json:"method"`
  108. DbCode string `description:"数据库编码" json:"dbcode"`
  109. RowCode string `description:"行-维度: zb; sj; reg" json:"rowcode"`
  110. ColCode string `description:"列-维度: zb; sj; reg" json:"colcode"`
  111. WdsList []Wds `description:"维度列表" json:"wdsList"`
  112. DfwdsList []Wds `description:"df不知道啥意思...反正也是维度相关的" json:"dfwdsList"`
  113. }
  114. // Wds 维度
  115. type Wds struct {
  116. WdCode string `description:"维度: zb-指标; sj-时间; reg-地区" json:"wdcode"`
  117. ValueCode string `description:"维度编码" json:"valuecode"`
  118. }
  119. // CommonDataApiRequest 数据接口请求
  120. func CommonDataApiRequest(req DataApiReq) (resp QuotaListDataResp, err error) {
  121. var b []byte
  122. defer func() {
  123. if err != nil {
  124. r, _ := json.Marshal(req)
  125. utils.FileLog.Error("CommonDataApiRequest Err request: %s", r)
  126. utils.FileLog.Info("CommonDataApiRequest Err result: %s", string(b))
  127. }
  128. }()
  129. if req.DbCode == "" {
  130. return
  131. }
  132. if req.Method == "" {
  133. req.Method = "QueryData"
  134. }
  135. if req.RowCode == "" {
  136. req.RowCode = "zb"
  137. }
  138. if req.ColCode == "" {
  139. req.ColCode = "sj"
  140. }
  141. // 构建查询
  142. f := url.Values{}
  143. f.Add("m", req.Method)
  144. f.Add("dbcode", req.DbCode)
  145. f.Add("rowcode", req.RowCode)
  146. f.Add("colcode", req.ColCode)
  147. wds := `[]`
  148. if len(req.WdsList) > 0 {
  149. wdsByte, e := json.Marshal(req.WdsList)
  150. if e != nil {
  151. err = fmt.Errorf("wds json marshal err: %s", e.Error())
  152. return
  153. }
  154. wds = string(wdsByte)
  155. }
  156. dfwds := `[]`
  157. if len(req.DfwdsList) > 0 {
  158. dfwdsByte, e := json.Marshal(req.DfwdsList)
  159. if e != nil {
  160. err = fmt.Errorf("dfwds json marshal err: %s", e.Error())
  161. return
  162. }
  163. dfwds = string(dfwdsByte)
  164. }
  165. f.Add("wds", wds)
  166. f.Add("dfwds", dfwds)
  167. f.Add("k1", fmt.Sprint(time.Now().UnixNano()/1e6))
  168. f.Add("h", "1")
  169. // 响应
  170. b, e := NationalHttpPost(NationalStatisticsBaseReqUrl, f.Encode())
  171. if e != nil {
  172. err = fmt.Errorf("http request err: %s", e.Error())
  173. return
  174. }
  175. if len(b) == 0 {
  176. err = fmt.Errorf("http result empty")
  177. return
  178. }
  179. if e = json.Unmarshal(b, &resp); e != nil {
  180. err = fmt.Errorf("resp unmarshal err: %s", e.Error())
  181. return
  182. }
  183. if resp.ReturnCode != 200 {
  184. err = fmt.Errorf("resp code err: %d", resp.ReturnCode)
  185. return
  186. }
  187. return
  188. }
  189. // QuotaListDataResp 指标数据列表响应体
  190. type QuotaListDataResp struct {
  191. ReturnCode int `description:"状态码" json:"returncode"`
  192. ReturnData struct {
  193. DataNodes []QuotaDataNode `json:"datanodes"`
  194. WdNodes []QuotaWdNode `json:"wdnodes"`
  195. }
  196. }
  197. // QuotaDataNode 指标数据节点
  198. type QuotaDataNode struct {
  199. Code string `description:"编码"`
  200. Data struct {
  201. Data float64 `description:"指标值"`
  202. HasData bool `description:"是否有值" json:"hasdata"`
  203. StrData string `description:"指标值(字符串)" json:"strdata"`
  204. }
  205. Wds []Wds
  206. }
  207. // QuotaWdNode 维度节点
  208. type QuotaWdNode struct {
  209. WdCode string `description:"示例: zb; sj; reg;" json:"wdcode"`
  210. WdName string `description:"示例: 指标; 时间; 地区" json:"wdname"`
  211. Nodes []QuotaWdNodeData
  212. }
  213. // QuotaWdNodeData 维度节点数据
  214. type QuotaWdNodeData struct {
  215. Code string `description:"指标编码"`
  216. Name string `description:"指标名称"`
  217. Unit string `description:"单位"`
  218. SortCode int `description:"编码排序" json:"sortcode"`
  219. }
  220. // OtherWdResp 其他维度信息响应体
  221. type OtherWdResp struct {
  222. ReturnCode int `description:"状态码" json:"returncode"`
  223. ReturnData []OtherWdData `description:"响应数据" json:"returndata"`
  224. }
  225. // OtherWdData 其他维度数据
  226. type OtherWdData struct {
  227. IsSj bool `description:"是否为时间" json:"issj"`
  228. WdCode string `description:"维度编码" json:"wdcode"`
  229. WdName string `description:"维度名称" json:"wdname"`
  230. Nodes []OtherWdNodes `description:"维度数据" json:"nodes"`
  231. }
  232. type OtherWdNodes struct {
  233. Code string `description:"编码" json:"code"`
  234. Name string `description:"名称" json:"name"`
  235. Sort string `description:"排序" json:"sort"`
  236. }
  237. // formatMonth2YearDateCode 将日期code转为对应日期
  238. func formatMonth2YearDateCode(dateCode string) (date time.Time, err error) {
  239. if dateCode == "" {
  240. return
  241. }
  242. // 根据日期code长度进行区分, 格式为三种: 月度-200601; 季度-2006A; 年度-2006
  243. switch len([]rune(dateCode)) {
  244. case 6:
  245. // 月度日期取每月最后一天
  246. m := dateCode[4:]
  247. replaceMonth := map[string]string{
  248. "01": "0131",
  249. "02": "0228",
  250. "03": "0331",
  251. "04": "0430",
  252. "05": "0531",
  253. "06": "0630",
  254. "07": "0731",
  255. "08": "0831",
  256. "09": "0930",
  257. "10": "1031",
  258. "11": "1130",
  259. "12": "1231",
  260. }
  261. md := fmt.Sprintf("%s%s", dateCode[:4], replaceMonth[m])
  262. t, e := time.ParseInLocation("20060102", md, time.Local)
  263. if e != nil {
  264. err = fmt.Errorf("月度指标日期转换失败, Err: %s", e.Error())
  265. return
  266. }
  267. date = t
  268. break
  269. case 5:
  270. // 季度ABCD转换成对应日期
  271. dateSuffixMap := map[string]string{
  272. "A": "03-31",
  273. "B": "06-30",
  274. "C": "09-30",
  275. "D": "12-31",
  276. }
  277. dateCode = strings.ToUpper(dateCode)
  278. quarterTab := dateCode[4:]
  279. dateStr := fmt.Sprintf("%s-%s", dateCode[:4], dateSuffixMap[quarterTab])
  280. t, e := time.ParseInLocation(utils.FormatDate, dateStr, time.Local)
  281. if e != nil {
  282. err = fmt.Errorf("季度指标日期转换失败, Err: %s", e.Error())
  283. return
  284. }
  285. date = t
  286. break
  287. case 4:
  288. dateStr := fmt.Sprintf("%s-%s", dateCode, "12-31")
  289. t, e := time.ParseInLocation(utils.FormatDate, dateStr, time.Local)
  290. if e != nil {
  291. err = fmt.Errorf("年度指标日期转换失败, Err: %s", e.Error())
  292. return
  293. }
  294. date = t
  295. break
  296. default:
  297. err = fmt.Errorf("日期code格式有误, code: %s", dateCode)
  298. return
  299. }
  300. return
  301. }
  302. // GetOtherWd 获取Db下其他维度信息
  303. func GetOtherWd(dbCode, rowCode, colCode string) (wdList []OtherWdData, err error) {
  304. if dbCode == "" {
  305. return
  306. }
  307. if rowCode == "" {
  308. rowCode = "zb"
  309. }
  310. if colCode == "" {
  311. colCode = "sj"
  312. }
  313. // 构建查询
  314. f := url.Values{}
  315. f.Add("m", "getOtherWds")
  316. f.Add("dbcode", dbCode)
  317. f.Add("rowcode", rowCode)
  318. f.Add("colcode", colCode)
  319. f.Add("wds", `[]`)
  320. f.Add("k1", fmt.Sprint(time.Now().UnixNano()/1e6))
  321. f.Add("h", "1")
  322. r, e := NationalHttpPost(NationalStatisticsBaseReqUrl, f.Encode())
  323. if e != nil {
  324. err = fmt.Errorf("请求其他维度信息失败, Err: %s", e.Error())
  325. return
  326. }
  327. utils.FileLog.Info("GetOtherWdInfo Result: %s", string(r))
  328. // 响应
  329. resp := new(OtherWdResp)
  330. if e = json.Unmarshal(r, &resp); e != nil {
  331. err = fmt.Errorf("其他维度信息Unmarshal Err: %s", e.Error())
  332. return
  333. }
  334. if resp == nil {
  335. err = fmt.Errorf("其他维度信息请求结果为空")
  336. return
  337. }
  338. if resp.ReturnCode != 200 {
  339. err = fmt.Errorf("其他维度信息请求有误, Code: %d", resp.ReturnCode)
  340. return
  341. }
  342. wdList = resp.ReturnData
  343. return
  344. }
  345. func ApiTest() (err error) {
  346. //f := url.Values{}
  347. //f.Add("m", "QueryData")
  348. //f.Add("dbcode", "fsyd")
  349. //f.Add("rowcode", "zb")
  350. //f.Add("colcode", "sj")
  351. //f.Add("wds", `[{"wdcode":"reg","valuecode":"000"}]`)
  352. //f.Add("dfwds", `[{"wdcode":"zb","valuecode":"A01"}]`)
  353. f := url.Values{}
  354. f.Add("m", "QueryData")
  355. f.Add("dbcode", "fsjd")
  356. f.Add("rowcode", "zb")
  357. f.Add("colcode", "sj")
  358. //f.Add("wds", `[{"wdcode":"reg","valuecode":"110000"}]`)
  359. f.Add("wds", `[{"wdcode":"reg","valuecode":"310000"}]`)
  360. f.Add("dfwds", `[{"wdcode":"zb","valuecode":"A0501"},{"wdcode":"sj","valuecode":"LAST18"}]`)
  361. f.Add("k1", fmt.Sprint(time.Now().UnixNano()/1e6))
  362. f.Add("h", "1")
  363. //f := url.Values{}
  364. //f.Add("m", "QueryData")
  365. //f.Add("dbcode", "gatyd")
  366. //f.Add("rowcode", "sj")
  367. //f.Add("colcode", "reg")
  368. //f.Add("wds", `[{"wdcode":"zb","valuecode":"A010A"}]`)
  369. //f.Add("dfwds", `[{"wdcode":"sj","valuecode":"LAST36"}]`)
  370. //f := url.Values{}
  371. //f.Add("m", "QueryData")
  372. //f.Add("dbcode", "fsyd")
  373. //f.Add("rowcode", "zb")
  374. //f.Add("colcode", "sj")
  375. //f.Add("wds", `[{"wdcode":"reg","valuecode":"000"}]`)
  376. //f.Add("dfwds", `[{"wdcode":"zb","valuecode":"A01"}]`)
  377. //f.Add("k1", fmt.Sprint(time.Now().UnixNano()/1e6))
  378. //f.Add("h", "1")
  379. r, e := NationalHttpPost(NationalStatisticsBaseReqUrl, f.Encode())
  380. if e != nil {
  381. fmt.Println("请求失败, Err: ", e.Error())
  382. return
  383. }
  384. utils.FileLog.Info("test result: %s", string(r))
  385. return
  386. }