Browse Source

从美国农业部月度供需平衡表网站爬取数据

xyxie 7 months ago
parent
commit
19a88f5fc5
2 changed files with 215 additions and 0 deletions
  1. 160 0
      services/usda_psd.go
  2. 55 0
      utils/common.go

+ 160 - 0
services/usda_psd.go

@@ -0,0 +1,160 @@
+package services
+
+import (
+	"encoding/json"
+	"eta/eta_crawler/services/alarm_msg"
+	"eta/eta_crawler/utils"
+	"fmt"
+	"strings"
+)
+
+// UsdaPsdDataQueryParams 定义了JSON到Go结构体的映射
+type UsdaPsdDataQueryParams struct {
+	QueryID              int      `json:"queryId"`
+	CommodityGroupCode   string   `json:"commodityGroupCode"`
+	Commodities          []string `json:"commodities"`
+	Attributes           []int    `json:"attributes"`
+	Countries            []string `json:"countries"`
+	MarketYears          []int    `json:"marketYears"`
+	ChkCommoditySummary  bool     `json:"chkCommoditySummary"`
+	ChkAttribSummary     bool     `json:"chkAttribSummary"`
+	ChkCountrySummary    bool     `json:"chkCountrySummary"`
+	CommoditySummaryText string   `json:"commoditySummaryText"`
+	AttribSummaryText    string   `json:"attribSummaryText"`
+	CountrySummaryText   string   `json:"countrySummaryText"`
+	OptionColumn         string   `json:"optionColumn"`
+	ChkTopCountry        bool     `json:"chkTopCountry"`
+	TopCountryCount      string   `json:"topCountryCount"`
+	ChkFileFormat        bool     `json:"chkfileFormat"`
+	ChkPrevMonth         bool     `json:"chkPrevMonth"`
+	ChkMonthChange       bool     `json:"chkMonthChange"`
+	ChkCodes             bool     `json:"chkCodes"`
+	ChkYearChange        bool     `json:"chkYearChange"`
+	QueryName            string   `json:"queryName"`
+	SortOrder            string   `json:"sortOrder"`
+	TopCountryState      bool     `json:"topCountryState"`
+}
+
+type UsdaPsdData struct {
+	TableHeaders []string                 `json:"tableHeaders"`
+	QueryResult  []map[string]interface{} `json:"queryResult"`
+}
+
+type UsdaPsdDataAttribute struct {
+	AttributeId int `json:"attributeId"`
+}
+
+// Meal, Palm Kernel:0813800
+// Meal, Peanut:0813200
+// Meal, Rapeseed:0813600
+// Meal, Soybean:0813100
+// Meal, Sunflowerseed:0813500
+
+// Oil, Coconut:4242000
+// Oil, Cottonseed:4233000
+// Oil, Olive:4235000
+// Oil, Palm:4243000
+// Oil, Palm Kernel:4244000
+// Oil, Peanut:4234000
+// Oil, Rapeseed:4239100
+// Oil, Soybean:4232000
+// Oil, Sunflowerseed:4236000
+
+// Oilseed, Cottonseed:2223000
+// Oilseed, Palm Kernel:2232000
+// Oilseed, Peanut:2221000
+// Oilseed, Rapeseed:2226000
+// Oilseed, Soybean:2222000
+// Oilseed, Sunflowerseed:2224000
+func DownloadUsdaPsdData() {
+	var err error
+	defer func() {
+		if err != nil {
+			msg := "失败提醒" + "downloadUsdaPsdData ErrMsg:" + err.Error()
+			fmt.Println("msg:", msg)
+			utils.FileLog.Info(msg)
+			go alarm_msg.SendAlarmMsg(msg, 3)
+		}
+	}()
+	// 定义请求地址
+	attributeUrl := "https://apps.fas.usda.gov/PSDOnlineApi/api/query/GetMultiCommodityAttributes?"
+	dataUrl := "https://apps.fas.usda.gov/PSDOnlineApi/api/query/RunQuery"
+
+	var commodities []string
+	commodities = append(commodities, "0813800", "0813200", "0813600", "0813100", "0813500", "4242000", "4233000", "4235000", "4243000", "4244000", "4234000", "4239100", "4232000", "4236000", "2223000", "2232000", "2221000", "2226000", "2222000", "2224000")
+	//commodities = append(commodities, "0430000")
+	commodityCodes := strings.Join(commodities, ",")
+	attributeUrl = attributeUrl + "commodityCodes=" + commodityCodes
+	// 定义请求参数
+	// 获取属性入参
+	attributeBody, e := utils.HttpGetNoCookie(attributeUrl)
+	if e != nil {
+		err = e
+		return
+	}
+
+	attrList := make([]UsdaPsdDataAttribute, 0)
+	err = json.Unmarshal(attributeBody, &attrList)
+	if err != nil {
+		fmt.Println("json.Unmarshal err:" + err.Error())
+		return
+	}
+	// 解析
+	var attributes []int
+	for _, v := range attrList {
+		// 键值对的值
+		attributes = append(attributes, v.AttributeId)
+	}
+
+	// 定义请求方法
+	//attributes = append(attributes, 4, 20, 28, 57, 81, 84, 86, 88, 113, 130, 192, 125, 176, 178, 184)
+	var countries []string
+	countries = append(countries, "R00", "ALL")
+	var marketYears []int
+	marketYears = append(marketYears, 2024, 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016, 2015, 2014)
+	// {"queryId":0,"commodityGroupCode":null,"commodities":["0430000"],"attributes":[4,20,28,57,81,84,86,88,113,130,192,125,176,178,184],"countries":["R00","ALL"],"marketYears":[2024,2023,2022,2021,2020,2019,2018,2017,2016,2015,2014],"chkCommoditySummary":false,"chkAttribSummary":false,"chkCountrySummary":false,"commoditySummaryText":"","attribSummaryText":"","countrySummaryText":"","optionColumn":"year","chkTopCountry":false,"topCountryCount":"","chkfileFormat":false,"chkPrevMonth":true,"chkMonthChange":false,"chkCodes":false,"chkYearChange":false,"queryName":"","sortOrder":"Commodity/Attribute/Country","topCountryState":false}
+	var req UsdaPsdDataQueryParams
+	req.Commodities = commodities
+	req.Attributes = attributes
+	req.Countries = countries
+	req.MarketYears = marketYears
+	req.OptionColumn = "year"
+	//req.ChkPrevMonth = true
+	req.SortOrder = "Commodity/Country/Attribute"
+
+	// 构造httppost请求
+	reqBody, _ := json.Marshal(req)
+	// 解析返回值
+
+	headerParams := make(map[string]string)
+	//headerParams["Cookie"] = "CT6T=312900; SF_cookie_3=68941398"
+	//headerParams["User-Agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36"
+	headerParams["Content-Type"] = "application/json"
+	body, e := utils.HttpPostNoCookie(dataUrl, string(reqBody), headerParams)
+	if e != nil {
+		err = e
+		return
+	}
+	fmt.Println("body :" + string(body))
+	utils.FileLog.Info("body:" + string(body))
+	if strings.Contains(string(body), "很抱歉! 因系统检测到您的请求可能对网站造成威胁") {
+		return
+	}
+	item := new(UsdaPsdData)
+	err = json.Unmarshal(body, &item)
+	if err != nil {
+		fmt.Println("json.Unmarshal err:" + err.Error())
+		return
+	}
+	// 解析
+	for k, v := range item.TableHeaders {
+		// 键值对的值
+		fmt.Println("key:", k, "value:", v)
+	}
+	// 解析
+	for k, v := range item.QueryResult {
+		// 键值对的值
+		fmt.Println("key:", k, "value:", v)
+	}
+	return
+}

+ 55 - 0
utils/common.go

@@ -1127,3 +1127,58 @@ func ChineseToPinyinInitials(input string) string {
 
 	return result.String()
 }
+
+// 修改供应商信息
+func HttpPostNoCookie(url string, reqParam string, headersParams map[string]string) ([]byte, error) {
+
+	var (
+		err error
+	)
+
+	// 准备: HTTP请求
+	reqBody := strings.NewReader(reqParam)
+	httpReq, err := http.NewRequest("POST", url, reqBody)
+	if err != nil {
+		fmt.Printf("NewRequest fail, url: %s, reqBody: %s, err: %v", url, reqBody, err)
+		return nil, err
+	}
+	for k, v := range headersParams {
+		httpReq.Header.Set(k, v)
+	}
+	//httpReq.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36")
+
+	//httpReq.Header.Add("Content-Type", "application/x-www-form-urlencoded;charset=utf-8")
+	client := &http.Client{}
+	// DO: HTTP请求
+	httpRsp, err := client.Do(httpReq)
+	if err != nil {
+		fmt.Printf("do http fail, url: %s, reqBody: %s, err:%v", url, reqBody, err)
+		return nil, err
+	}
+	defer httpRsp.Body.Close()
+
+	if httpRsp.StatusCode != 200 {
+		err = fmt.Errorf("do http fail, url: %s, statusCode: %d, status:%s ", url, httpRsp.StatusCode, httpRsp.Status)
+		return nil, err
+	}
+	// Read: HTTP结果
+	b, err := ioutil.ReadAll(httpRsp.Body)
+	if err != nil {
+		fmt.Printf("ReadAll failed, url: %s, reqBody: %s, err: %v", url, reqBody, err)
+		return nil, err
+	}
+
+	// unmarshal: 解析HTTP返回的结果
+	// 		body: {"Result":{"RequestId":"12131","HasError":true,"ResponseItems":{"ErrorMsg":"错误信息"}}}
+
+	return b, err
+}
+
+func HttpGetNoCookie(url string) ([]byte, error) {
+	res, err := http.Get(url)
+	if err != nil {
+		return nil, err
+	}
+	defer res.Body.Close()
+	return ioutil.ReadAll(res.Body)
+}