package services import ( "bytes" "encoding/json" "eta/eta_crawler/services/alarm_msg" "eta/eta_crawler/utils" "fmt" "github.com/PuerkitoBio/goquery" "io" "io/ioutil" "mime/multipart" "net/http" "os" "strconv" "strings" "time" ) // UsdaPsdDataQueryParams 定义了JSON到Go结构体的映射 type UsdaPsdDataQueryParams struct { QueryID int `json:"queryId"` CommodityGroupCode string `json:"commodityGroupCode"` Commodities []string `json:"commodities"` Attributes []int `json:"attributes"` Countries []string `json:"countries"` MarketYears []int `json:"marketYears"` ChkCommoditySummary bool `json:"chkCommoditySummary"` ChkAttribSummary bool `json:"chkAttribSummary"` ChkCountrySummary bool `json:"chkCountrySummary"` CommoditySummaryText string `json:"commoditySummaryText"` AttribSummaryText string `json:"attribSummaryText"` CountrySummaryText string `json:"countrySummaryText"` OptionColumn string `json:"optionColumn"` ChkTopCountry bool `json:"chkTopCountry"` TopCountryCount string `json:"topCountryCount"` ChkFileFormat bool `json:"chkfileFormat"` ChkPrevMonth bool `json:"chkPrevMonth"` ChkMonthChange bool `json:"chkMonthChange"` ChkCodes bool `json:"chkCodes"` ChkYearChange bool `json:"chkYearChange"` QueryName string `json:"queryName"` SortOrder string `json:"sortOrder"` TopCountryState bool `json:"topCountryState"` } type UsdaPsdData struct { TableHeaders []string `json:"tableHeaders"` QueryResult []map[string]interface{} `json:"queryResult"` } type UsdaPsdDataAttribute struct { AttributeId int `json:"attributeId"` } // UsdaFasIndex 美国农业部指标数据 type UsdaFasIndex struct { ClassifyName string `description:"指标目录"` ParentClassifyName string `description:"父级指标目录"` ClassifySort int `description:"指标目录排序号"` IndexName string `description:"指标名称"` IndexCode string `description:"指标编码"` Unit string `description:"单位"` Sort int `description:"排序号"` Frequency string `description:"频度"` TerminalCode string `description:"编码"` ExcelDataMap map[string]string } // Meal, Palm Kernel:0813800 // Meal, Peanut:0813200 // Meal, Rapeseed:0813600 // Meal, Soybean:0813100 // Meal, Sunflowerseed:0813500 // Oil, Coconut:4242000 // Oil, Cottonseed:4233000 // Oil, Olive:4235000 // Oil, Palm:4243000 // Oil, Palm Kernel:4244000 // Oil, Peanut:4234000 // Oil, Rapeseed:4239100 // Oil, Soybean:4232000 // Oil, Sunflowerseed:4236000 // Oilseed, Cottonseed:2223000 // Oilseed, Palm Kernel:2232000 // Oilseed, Peanut:2221000 // Oilseed, Rapeseed:2226000 // Oilseed, Soybean:2222000 // Oilseed, Sunflowerseed:2224000 // 美国农业部月度供需平衡表数据 func DownloadUsdaPsdData() (indexList []*UsdaFasIndex, err error) { // 从test.json文件中读取json串 body, err := ioutil.ReadFile("test.json") if err != nil { return } // 解析json串 item := new(UsdaPsdData) err = json.Unmarshal(body, &item) if err != nil { fmt.Println("json.Unmarshal err:" + err.Error()) return } indexList, err = handleUsdaFasPsd(item) return /*defer func() { if err != nil { msg := "失败提醒" + "downloadUsdaPsdData ErrMsg:" + err.Error() fmt.Println("msg:", msg) utils.FileLog.Info(msg) go alarm_msg.SendAlarmMsg(msg, 3) } }() // 定义请求地址 attributeUrl := "https://apps.fas.usda.gov/PSDOnlineApi/api/query/GetMultiCommodityAttributes?" dataUrl := "https://apps.fas.usda.gov/PSDOnlineApi/api/query/RunQuery" var commodities []string commodities = append(commodities, "0813800", "0813200", "0813600", "0813100", "0813500", "4242000", "4233000", "4235000", "4243000", "4244000", "4234000", "4239100", "4232000", "4236000", "2223000", "2232000", "2221000", "2226000", "2222000", "2224000") //commodities = append(commodities, "0430000") commodityCodes := strings.Join(commodities, ",") attributeUrl = attributeUrl + "commodityCodes=" + commodityCodes // 定义请求参数 // 获取属性入参 attributeBody, e := utils.HttpGetNoCookie(attributeUrl) if e != nil { err = e return } attrList := make([]UsdaPsdDataAttribute, 0) err = json.Unmarshal(attributeBody, &attrList) if err != nil { fmt.Println("json.Unmarshal err:" + err.Error()) return } // 解析 var attributes []int for _, v := range attrList { // 键值对的值 attributes = append(attributes, v.AttributeId) } // 定义请求方法 //attributes = append(attributes, 4, 20, 28, 57, 81, 84, 86, 88, 113, 130, 192, 125, 176, 178, 184) var countries []string countries = append(countries, "R00", "ALL") var marketYears []int marketYears = append(marketYears, 2024, 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016, 2015, 2014) // {"queryId":0,"commodityGroupCode":null,"commodities":["0430000"],"attributes":[4,20,28,57,81,84,86,88,113,130,192,125,176,178,184],"countries":["R00","ALL"],"marketYears":[2024,2023,2022,2021,2020,2019,2018,2017,2016,2015,2014],"chkCommoditySummary":false,"chkAttribSummary":false,"chkCountrySummary":false,"commoditySummaryText":"","attribSummaryText":"","countrySummaryText":"","optionColumn":"year","chkTopCountry":false,"topCountryCount":"","chkfileFormat":false,"chkPrevMonth":true,"chkMonthChange":false,"chkCodes":false,"chkYearChange":false,"queryName":"","sortOrder":"Commodity/Attribute/Country","topCountryState":false} var req UsdaPsdDataQueryParams req.Commodities = commodities req.Attributes = attributes req.Countries = countries req.MarketYears = marketYears req.OptionColumn = "year" //req.ChkPrevMonth = true req.SortOrder = "Commodity/Country/Attribute" // 构造httppost请求 reqBody, _ := json.Marshal(req) // 解析返回值 headerParams := make(map[string]string) //headerParams["Cookie"] = "CT6T=312900; SF_cookie_3=68941398" //headerParams["User-Agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36" headerParams["Content-Type"] = "application/json" body, e := utils.HttpPostNoCookie(dataUrl, string(reqBody), headerParams) if e != nil { err = e return } item := new(UsdaPsdData) err = json.Unmarshal(body, &item) if err != nil { fmt.Println("json.Unmarshal err:" + err.Error()) return } indexList, err = handleUsdaFasPsd(item)*/ return } // 美国农业出库销售数据 func DownloadUsdaFmsData() { var err error defer func() { if err != nil { msg := "失败提醒" + "DownloadUsdaFmsData ErrMsg:" + err.Error() fmt.Println("msg:", msg) utils.FileLog.Info(msg) go alarm_msg.SendAlarmMsg(msg, 3) } }() downloadFile := "downloaded_excel.xlsx" //请求首页,获取入参 dataUrl := "https://apps.fas.usda.gov/esrquery/esrq.aspx" body1, err := utils.HttpGetNoCookie(dataUrl) if err != nil { return } htmlString := string(body1) // 解析返回值,截取htmlinput标签,input标签里,id=“__EVENTVALIDATION”的input标签里的值 // 使用goquery读取HTML字符串 doc, err := goquery.NewDocumentFromReader(strings.NewReader(htmlString)) if err != nil { return } stateValue := doc.Find("input#__VIEWSTATE").AttrOr("value", "") stateEneratorValue := doc.Find("input#__VIEWSTATEGENERATOR").AttrOr("value", "") // 查询并获取input标签的值 validValue := doc.Find("input#__EVENTVALIDATION").AttrOr("value", "") var body bytes.Buffer multipartWriter := multipart.NewWriter(&body) // 添加表单字段(如果需要的话) if err = multipartWriter.WriteField("__EVENTTARGET", ""); err != nil { err = fmt.Errorf("set __EVENTTARGET, Err:%s", err) return } if err = multipartWriter.WriteField("__EVENTARGUMENT", ""); err != nil { err = fmt.Errorf("set __EVENTARGUMENT, Err:%s", err) return } if err = multipartWriter.WriteField("__LASTFOCUS", ""); err != nil { err = fmt.Errorf("set __LASTFOCUS, Err:%s", err) return } if err = multipartWriter.WriteField("__VIEWSTATE", stateValue); err != nil { err = fmt.Errorf("set __VIEWSTATE, Err:%s", err) return } if err = multipartWriter.WriteField("__VIEWSTATEGENERATOR", stateEneratorValue); err != nil { err = fmt.Errorf("set __VIEWSTATEGENERATOR, Err:%s", err) return } if err = multipartWriter.WriteField("__EVENTVALIDATION", validValue); err != nil { err = fmt.Errorf("set __EVENTVALIDATION, Err:%s", err) return } //整理需要下载的品种ID //Soybeans:801,Soybean cake & meal:901,Soybean Oil:902 CommodityIds := []string{"801", "901", "902"} for _, v := range CommodityIds { if err = multipartWriter.WriteField("ctl00$MainContent$lbCommodity", v); err != nil { err = fmt.Errorf("set ctl00$MainContent$lbCommodity, Err:%s", err) return } } startDate := "08/22/2019" endDate := "08/22/2024" if err = multipartWriter.WriteField("ctl00$MainContent$lbCountry", "0:0"); err != nil { err = fmt.Errorf("set ctl00$MainContent$lbCountry, Err:%s", err) return } if err = multipartWriter.WriteField("ctl00$MainContent$ddlReportFormat", "10"); err != nil { return } if err = multipartWriter.WriteField("ctl00$MainContent$rblOutputType", "2"); err != nil { return } if err = multipartWriter.WriteField("ctl00$MainContent$tbStartDate", startDate); err != nil { return } if err = multipartWriter.WriteField("ctl00$MainContent$tbEndDate", endDate); err != nil { return } if err = multipartWriter.WriteField("ctl00$MainContent$rblColumnSelection", "regular"); err != nil { return } if err = multipartWriter.WriteField("ctl00$MainContent$btnSubmit", "Submit"); err != nil { return } // 注意:如果接口需要文件上传,这里应该使用multipartWriter.CreateFormFile来添加文件 // 关闭multipart writer以添加最后的边界 if err = multipartWriter.Close(); err != nil { err = fmt.Errorf("close multipart writer, Err:%s", err) return } // 构造请求 req, err := http.NewRequest("POST", dataUrl, &body) if err != nil { err = fmt.Errorf("create request, Err:%s", err) return } // 设置请求头 req.Header.Set("Content-Type", multipartWriter.FormDataContentType()) // 发送请求 client := &http.Client{} resp, err := client.Do(req) if err != nil { err = fmt.Errorf("send request, Err:%s", err) return } defer resp.Body.Close() // 检查响应状态码 if resp.StatusCode != http.StatusOK { err = fmt.Errorf("unexpected status code: %d", resp.StatusCode) return } // 读取响应体 out, err := os.Create(downloadFile) if err != nil { return } defer out.Close() // 将响应体写入到文件 _, err = io.Copy(out, resp.Body) if err != nil { return } fmt.Println("Excel file downloaded successfully") return } func handleUsdaFasPsd(item *UsdaPsdData) (indexList []*UsdaFasIndex, err error) { // 解析 headerSlice := make([]string, 0) for index, v := range item.TableHeaders { // 键值对的值 fmt.Println("key:", index, "value:", v) if !strings.Contains(v, "/") && !strings.Contains(v, " ") { v = strings.ToLower(v) } if v == "Unit Description" { v = "unit Description" } headerSlice = append(headerSlice, v) } // 解析 // 遍历行读取 indexList = make([]*UsdaFasIndex, 0) sort := 0 // 指标名称 indexMap := make(map[string]*UsdaFasIndex) // 键值对的值 commodityRow := "" countriesRow := "" attributesRow := "" errMsg := "" for _, row := range item.QueryResult { unitK := headerSlice[len(headerSlice)-1] unit := row[unitK].(string) // unit 去掉左右两边的括号,去掉中间的空格 unit = strings.Replace(unit, " ", "", -1) unit = strings.Trim(unit, "()") for _, k := range headerSlice { col, ok := row[k] if !ok || col == nil { continue } if k == "commodity" { commodityRow = col.(string) } else if k == "country" { countriesRow = col.(string) } else if k == "attribute" { attributesRow = col.(string) } else if k == "unit Description" { // unit = col.(string) } else { //数据列 year, _ := strconv.Atoi(strings.Split(k, "/")[0]) month := 0 indexName := "" classifyName := "" classifySort := 0 inCode := "" fre := "年度" lastStr := "Yearly" // year年度的最后一天日期 dateT := time.Date(year, time.December, 31, 0, 0, 0, 0, time.UTC) if strings.Contains(k, "(") { fre = "月度" lastStr = "Monthly" // 截取括号中间的月度数据 monthStr := strings.Split(k, "(")[1] monthStr = strings.Split(monthStr, ")")[0] // 将Jul英文月份前缀转成数字月份 monthT, e := time.ParseInLocation("Jan", monthStr, time.Local) if e != nil { errMsg += fmt.Sprintf("月份转换错误:%s%s\n", monthStr, e.Error()) continue } month = int(monthT.Month()) // 将year和month拼接成日期,该月的最后一天日期 dateT = time.Date(year, time.Month(month), 31, 0, 0, 0, 0, time.UTC) } date := dateT.Format("2006-01-02") // 封装成指标数据 if commodityRow != "" && countriesRow != "" && attributesRow != "" { indexName = commodityRow + ": " + countriesRow + ": " + attributesRow + ": " + lastStr } else { fmt.Println("commodityRow:", commodityRow, "countriesRow:", countriesRow, "attributesRow:", attributesRow) errMsg += fmt.Sprintf("指标名称为空 commodityRow:%s,countriesRow:%s,attributesRow:%s\n", commodityRow, countriesRow, attributesRow) continue } inCode = "usda" + utils.GetFirstLetter(indexName) indexItem, okIndex := indexMap[indexName] // 首字母大写 classifyName = commodityRow if !okIndex { // 新增指标 indexItem = new(UsdaFasIndex) indexItem.IndexName = indexName indexItem.ClassifyName = classifyName indexItem.ParentClassifyName = "月度供需" indexItem.ClassifySort = classifySort indexItem.IndexCode = inCode indexItem.Frequency = fre indexItem.Sort = sort indexItem.Unit = unit indexItem.ExcelDataMap = make(map[string]string) sort++ } val := col.(float64) indexItem.ExcelDataMap[date] = fmt.Sprintf("%.4f", val) indexMap[indexName] = indexItem continue } } } for _, v := range indexMap { fmt.Printf("IndexName: %s \n", v.IndexName) fmt.Printf("IndexCode: %s \n", v.IndexCode) indexList = append(indexList, v) if len(indexList) > 500 { err = addUsdaFasPsdData(indexList) if err != nil { return } indexList = []*UsdaFasIndex{} } } err = addUsdaFasPsdData(indexList) if err != nil { return } return } func addUsdaFasPsdData(indexList []*UsdaFasIndex) (err error) { sheetName := "月度供需" if len(indexList) > 0 { params := make(map[string]interface{}) params["List"] = indexList params["TerminalCode"] = "" result, e := utils.PostEdbLib(params, "usda_fas/handle/excel_data") if e != nil { err = fmt.Errorf("sheet :%s PostEdbLib err: %s", sheetName, e.Error()) b, _ := json.Marshal(params) utils.FileLog.Info(fmt.Sprintf("sheet :%s PostEdbLib err: %s, params: %s", sheetName, e.Error(), string(b))) return } resp := new(utils.BaseEdbLibResponse) if e := json.Unmarshal(result, &resp); e != nil { err = fmt.Errorf("sheet :%s json.Unmarshal err: %s", sheetName, e) utils.FileLog.Info(fmt.Sprintf("sheet :%s json.Unmarshal err: %s", sheetName, e)) return } if resp.Ret != 200 { err = fmt.Errorf("sheet :%s Msg: %s, ErrMsg: %s", sheetName, resp.Msg, resp.ErrMsg) utils.FileLog.Info(fmt.Sprintf("sheet :%s Msg: %s, ErrMsg: %s", sheetName, resp.Msg, resp.ErrMsg)) return } } return }