123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481 |
- package services
- import (
- "bytes"
- "encoding/json"
- "eta/eta_crawler/services/alarm_msg"
- "eta/eta_crawler/utils"
- "fmt"
- "github.com/PuerkitoBio/goquery"
- "io"
- "io/ioutil"
- "mime/multipart"
- "net/http"
- "os"
- "strconv"
- "strings"
- "time"
- )
- // UsdaPsdDataQueryParams 定义了JSON到Go结构体的映射
- type UsdaPsdDataQueryParams struct {
- QueryID int `json:"queryId"`
- CommodityGroupCode string `json:"commodityGroupCode"`
- Commodities []string `json:"commodities"`
- Attributes []int `json:"attributes"`
- Countries []string `json:"countries"`
- MarketYears []int `json:"marketYears"`
- ChkCommoditySummary bool `json:"chkCommoditySummary"`
- ChkAttribSummary bool `json:"chkAttribSummary"`
- ChkCountrySummary bool `json:"chkCountrySummary"`
- CommoditySummaryText string `json:"commoditySummaryText"`
- AttribSummaryText string `json:"attribSummaryText"`
- CountrySummaryText string `json:"countrySummaryText"`
- OptionColumn string `json:"optionColumn"`
- ChkTopCountry bool `json:"chkTopCountry"`
- TopCountryCount string `json:"topCountryCount"`
- ChkFileFormat bool `json:"chkfileFormat"`
- ChkPrevMonth bool `json:"chkPrevMonth"`
- ChkMonthChange bool `json:"chkMonthChange"`
- ChkCodes bool `json:"chkCodes"`
- ChkYearChange bool `json:"chkYearChange"`
- QueryName string `json:"queryName"`
- SortOrder string `json:"sortOrder"`
- TopCountryState bool `json:"topCountryState"`
- }
- type UsdaPsdData struct {
- TableHeaders []string `json:"tableHeaders"`
- QueryResult []map[string]interface{} `json:"queryResult"`
- }
- type UsdaPsdDataAttribute struct {
- AttributeId int `json:"attributeId"`
- }
- // UsdaFasIndex 美国农业部指标数据
- type UsdaFasIndex struct {
- ClassifyName string `description:"指标目录"`
- ParentClassifyName string `description:"父级指标目录"`
- ClassifySort int `description:"指标目录排序号"`
- IndexName string `description:"指标名称"`
- IndexCode string `description:"指标编码"`
- Unit string `description:"单位"`
- Sort int `description:"排序号"`
- Frequency string `description:"频度"`
- TerminalCode string `description:"编码"`
- ExcelDataMap map[string]string
- }
- // Meal, Palm Kernel:0813800
- // Meal, Peanut:0813200
- // Meal, Rapeseed:0813600
- // Meal, Soybean:0813100
- // Meal, Sunflowerseed:0813500
- // Oil, Coconut:4242000
- // Oil, Cottonseed:4233000
- // Oil, Olive:4235000
- // Oil, Palm:4243000
- // Oil, Palm Kernel:4244000
- // Oil, Peanut:4234000
- // Oil, Rapeseed:4239100
- // Oil, Soybean:4232000
- // Oil, Sunflowerseed:4236000
- // Oilseed, Cottonseed:2223000
- // Oilseed, Palm Kernel:2232000
- // Oilseed, Peanut:2221000
- // Oilseed, Rapeseed:2226000
- // Oilseed, Soybean:2222000
- // Oilseed, Sunflowerseed:2224000
- // 美国农业部月度供需平衡表数据
- func DownloadUsdaPsdData() (indexList []*UsdaFasIndex, err error) {
- // 从test.json文件中读取json串
- body, err := ioutil.ReadFile("test.json")
- if err != nil {
- return
- }
- // 解析json串
- item := new(UsdaPsdData)
- err = json.Unmarshal(body, &item)
- if err != nil {
- fmt.Println("json.Unmarshal err:" + err.Error())
- return
- }
- indexList, err = handleUsdaFasPsd(item)
- return
- /*defer func() {
- if err != nil {
- msg := "失败提醒" + "downloadUsdaPsdData ErrMsg:" + err.Error()
- fmt.Println("msg:", msg)
- utils.FileLog.Info(msg)
- go alarm_msg.SendAlarmMsg(msg, 3)
- }
- }()
- // 定义请求地址
- attributeUrl := "https://apps.fas.usda.gov/PSDOnlineApi/api/query/GetMultiCommodityAttributes?"
- dataUrl := "https://apps.fas.usda.gov/PSDOnlineApi/api/query/RunQuery"
- var commodities []string
- commodities = append(commodities, "0813800", "0813200", "0813600", "0813100", "0813500", "4242000", "4233000", "4235000", "4243000", "4244000", "4234000", "4239100", "4232000", "4236000", "2223000", "2232000", "2221000", "2226000", "2222000", "2224000")
- //commodities = append(commodities, "0430000")
- commodityCodes := strings.Join(commodities, ",")
- attributeUrl = attributeUrl + "commodityCodes=" + commodityCodes
- // 定义请求参数
- // 获取属性入参
- attributeBody, e := utils.HttpGetNoCookie(attributeUrl)
- if e != nil {
- err = e
- return
- }
- attrList := make([]UsdaPsdDataAttribute, 0)
- err = json.Unmarshal(attributeBody, &attrList)
- if err != nil {
- fmt.Println("json.Unmarshal err:" + err.Error())
- return
- }
- // 解析
- var attributes []int
- for _, v := range attrList {
- // 键值对的值
- attributes = append(attributes, v.AttributeId)
- }
- // 定义请求方法
- //attributes = append(attributes, 4, 20, 28, 57, 81, 84, 86, 88, 113, 130, 192, 125, 176, 178, 184)
- var countries []string
- countries = append(countries, "R00", "ALL")
- var marketYears []int
- marketYears = append(marketYears, 2024, 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016, 2015, 2014)
- // {"queryId":0,"commodityGroupCode":null,"commodities":["0430000"],"attributes":[4,20,28,57,81,84,86,88,113,130,192,125,176,178,184],"countries":["R00","ALL"],"marketYears":[2024,2023,2022,2021,2020,2019,2018,2017,2016,2015,2014],"chkCommoditySummary":false,"chkAttribSummary":false,"chkCountrySummary":false,"commoditySummaryText":"","attribSummaryText":"","countrySummaryText":"","optionColumn":"year","chkTopCountry":false,"topCountryCount":"","chkfileFormat":false,"chkPrevMonth":true,"chkMonthChange":false,"chkCodes":false,"chkYearChange":false,"queryName":"","sortOrder":"Commodity/Attribute/Country","topCountryState":false}
- var req UsdaPsdDataQueryParams
- req.Commodities = commodities
- req.Attributes = attributes
- req.Countries = countries
- req.MarketYears = marketYears
- req.OptionColumn = "year"
- //req.ChkPrevMonth = true
- req.SortOrder = "Commodity/Country/Attribute"
- // 构造httppost请求
- reqBody, _ := json.Marshal(req)
- // 解析返回值
- headerParams := make(map[string]string)
- //headerParams["Cookie"] = "CT6T=312900; SF_cookie_3=68941398"
- //headerParams["User-Agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36"
- headerParams["Content-Type"] = "application/json"
- body, e := utils.HttpPostNoCookie(dataUrl, string(reqBody), headerParams)
- if e != nil {
- err = e
- return
- }
- item := new(UsdaPsdData)
- err = json.Unmarshal(body, &item)
- if err != nil {
- fmt.Println("json.Unmarshal err:" + err.Error())
- return
- }
- indexList, err = handleUsdaFasPsd(item)*/
- return
- }
- // 美国农业出库销售数据
- func DownloadUsdaFmsData() {
- var err error
- defer func() {
- if err != nil {
- msg := "失败提醒" + "DownloadUsdaFmsData ErrMsg:" + err.Error()
- fmt.Println("msg:", msg)
- utils.FileLog.Info(msg)
- go alarm_msg.SendAlarmMsg(msg, 3)
- }
- }()
- downloadFile := "downloaded_excel.xlsx"
- //请求首页,获取入参
- dataUrl := "https://apps.fas.usda.gov/esrquery/esrq.aspx"
- body1, err := utils.HttpGetNoCookie(dataUrl)
- if err != nil {
- return
- }
- htmlString := string(body1)
- // 解析返回值,截取htmlinput标签,input标签里,id=“__EVENTVALIDATION”的input标签里的值
- // 使用goquery读取HTML字符串
- doc, err := goquery.NewDocumentFromReader(strings.NewReader(htmlString))
- if err != nil {
- return
- }
- stateValue := doc.Find("input#__VIEWSTATE").AttrOr("value", "")
- stateEneratorValue := doc.Find("input#__VIEWSTATEGENERATOR").AttrOr("value", "")
- // 查询并获取input标签的值
- validValue := doc.Find("input#__EVENTVALIDATION").AttrOr("value", "")
- var body bytes.Buffer
- multipartWriter := multipart.NewWriter(&body)
- // 添加表单字段(如果需要的话)
- if err = multipartWriter.WriteField("__EVENTTARGET", ""); err != nil {
- err = fmt.Errorf("set __EVENTTARGET, Err:%s", err)
- return
- }
- if err = multipartWriter.WriteField("__EVENTARGUMENT", ""); err != nil {
- err = fmt.Errorf("set __EVENTARGUMENT, Err:%s", err)
- return
- }
- if err = multipartWriter.WriteField("__LASTFOCUS", ""); err != nil {
- err = fmt.Errorf("set __LASTFOCUS, Err:%s", err)
- return
- }
- if err = multipartWriter.WriteField("__VIEWSTATE", stateValue); err != nil {
- err = fmt.Errorf("set __VIEWSTATE, Err:%s", err)
- return
- }
- if err = multipartWriter.WriteField("__VIEWSTATEGENERATOR", stateEneratorValue); err != nil {
- err = fmt.Errorf("set __VIEWSTATEGENERATOR, Err:%s", err)
- return
- }
- if err = multipartWriter.WriteField("__EVENTVALIDATION", validValue); err != nil {
- err = fmt.Errorf("set __EVENTVALIDATION, Err:%s", err)
- return
- }
- //整理需要下载的品种ID
- //Soybeans:801,Soybean cake & meal:901,Soybean Oil:902
- CommodityIds := []string{"801", "901", "902"}
- for _, v := range CommodityIds {
- if err = multipartWriter.WriteField("ctl00$MainContent$lbCommodity", v); err != nil {
- err = fmt.Errorf("set ctl00$MainContent$lbCommodity, Err:%s", err)
- return
- }
- }
- startDate := "08/22/2019"
- endDate := "08/22/2024"
- if err = multipartWriter.WriteField("ctl00$MainContent$lbCountry", "0:0"); err != nil {
- err = fmt.Errorf("set ctl00$MainContent$lbCountry, Err:%s", err)
- return
- }
- if err = multipartWriter.WriteField("ctl00$MainContent$ddlReportFormat", "10"); err != nil {
- return
- }
- if err = multipartWriter.WriteField("ctl00$MainContent$rblOutputType", "2"); err != nil {
- return
- }
- if err = multipartWriter.WriteField("ctl00$MainContent$tbStartDate", startDate); err != nil {
- return
- }
- if err = multipartWriter.WriteField("ctl00$MainContent$tbEndDate", endDate); err != nil {
- return
- }
- if err = multipartWriter.WriteField("ctl00$MainContent$rblColumnSelection", "regular"); err != nil {
- return
- }
- if err = multipartWriter.WriteField("ctl00$MainContent$btnSubmit", "Submit"); err != nil {
- return
- }
- // 注意:如果接口需要文件上传,这里应该使用multipartWriter.CreateFormFile来添加文件
- // 关闭multipart writer以添加最后的边界
- if err = multipartWriter.Close(); err != nil {
- err = fmt.Errorf("close multipart writer, Err:%s", err)
- return
- }
- // 构造请求
- req, err := http.NewRequest("POST", dataUrl, &body)
- if err != nil {
- err = fmt.Errorf("create request, Err:%s", err)
- return
- }
- // 设置请求头
- req.Header.Set("Content-Type", multipartWriter.FormDataContentType())
- // 发送请求
- client := &http.Client{}
- resp, err := client.Do(req)
- if err != nil {
- err = fmt.Errorf("send request, Err:%s", err)
- return
- }
- defer resp.Body.Close()
- // 检查响应状态码
- if resp.StatusCode != http.StatusOK {
- err = fmt.Errorf("unexpected status code: %d", resp.StatusCode)
- return
- }
- // 读取响应体
- out, err := os.Create(downloadFile)
- if err != nil {
- return
- }
- defer out.Close()
- // 将响应体写入到文件
- _, err = io.Copy(out, resp.Body)
- if err != nil {
- return
- }
- fmt.Println("Excel file downloaded successfully")
- return
- }
- func handleUsdaFasPsd(item *UsdaPsdData) (indexList []*UsdaFasIndex, err error) {
- // 解析
- headerSlice := make([]string, 0)
- for index, v := range item.TableHeaders {
- // 键值对的值
- fmt.Println("key:", index, "value:", v)
- if !strings.Contains(v, "/") && !strings.Contains(v, " ") {
- v = strings.ToLower(v)
- }
- if v == "Unit Description" {
- v = "unit Description"
- }
- headerSlice = append(headerSlice, v)
- }
- // 解析
- // 遍历行读取
- indexList = make([]*UsdaFasIndex, 0)
- sort := 0
- // 指标名称
- indexMap := make(map[string]*UsdaFasIndex)
- // 键值对的值
- commodityRow := ""
- countriesRow := ""
- attributesRow := ""
- errMsg := ""
- for _, row := range item.QueryResult {
- unitK := headerSlice[len(headerSlice)-1]
- unit := row[unitK].(string)
- // unit 去掉左右两边的括号,去掉中间的空格
- unit = strings.Replace(unit, " ", "", -1)
- unit = strings.Trim(unit, "()")
- for _, k := range headerSlice {
- col, ok := row[k]
- if !ok || col == nil {
- continue
- }
- if k == "commodity" {
- commodityRow = col.(string)
- } else if k == "country" {
- countriesRow = col.(string)
- } else if k == "attribute" {
- attributesRow = col.(string)
- } else if k == "unit Description" {
- // unit = col.(string)
- } else {
- //数据列
- year, _ := strconv.Atoi(strings.Split(k, "/")[0])
- month := 0
- indexName := ""
- classifyName := ""
- classifySort := 0
- inCode := ""
- fre := "年度"
- lastStr := "Yearly"
- // year年度的最后一天日期
- dateT := time.Date(year, time.December, 31, 0, 0, 0, 0, time.UTC)
- if strings.Contains(k, "(") {
- fre = "月度"
- lastStr = "Monthly"
- // 截取括号中间的月度数据
- monthStr := strings.Split(k, "(")[1]
- monthStr = strings.Split(monthStr, ")")[0]
- // 将Jul英文月份前缀转成数字月份
- monthT, e := time.ParseInLocation("Jan", monthStr, time.Local)
- if e != nil {
- errMsg += fmt.Sprintf("月份转换错误:%s%s\n", monthStr, e.Error())
- continue
- }
- month = int(monthT.Month())
- // 将year和month拼接成日期,该月的最后一天日期
- dateT = time.Date(year, time.Month(month), 31, 0, 0, 0, 0, time.UTC)
- }
- date := dateT.Format("2006-01-02")
- // 封装成指标数据
- if commodityRow != "" && countriesRow != "" && attributesRow != "" {
- indexName = commodityRow + ": " + countriesRow + ": " + attributesRow + ": " + lastStr
- } else {
- fmt.Println("commodityRow:", commodityRow, "countriesRow:", countriesRow, "attributesRow:", attributesRow)
- errMsg += fmt.Sprintf("指标名称为空 commodityRow:%s,countriesRow:%s,attributesRow:%s\n", commodityRow, countriesRow, attributesRow)
- continue
- }
- inCode = "usda" + utils.GetFirstLetter(indexName)
- indexItem, okIndex := indexMap[indexName]
- // 首字母大写
- classifyName = commodityRow
- if !okIndex {
- // 新增指标
- indexItem = new(UsdaFasIndex)
- indexItem.IndexName = indexName
- indexItem.ClassifyName = classifyName
- indexItem.ParentClassifyName = "月度供需"
- indexItem.ClassifySort = classifySort
- indexItem.IndexCode = inCode
- indexItem.Frequency = fre
- indexItem.Sort = sort
- indexItem.Unit = unit
- indexItem.ExcelDataMap = make(map[string]string)
- sort++
- }
- val := col.(float64)
- indexItem.ExcelDataMap[date] = fmt.Sprintf("%.4f", val)
- indexMap[indexName] = indexItem
- continue
- }
- }
- }
- for _, v := range indexMap {
- fmt.Printf("IndexName: %s \n", v.IndexName)
- fmt.Printf("IndexCode: %s \n", v.IndexCode)
- indexList = append(indexList, v)
- if len(indexList) > 500 {
- err = addUsdaFasPsdData(indexList)
- if err != nil {
- return
- }
- indexList = []*UsdaFasIndex{}
- }
- }
- err = addUsdaFasPsdData(indexList)
- if err != nil {
- return
- }
- return
- }
- func addUsdaFasPsdData(indexList []*UsdaFasIndex) (err error) {
- sheetName := "月度供需"
- if len(indexList) > 0 {
- params := make(map[string]interface{})
- params["List"] = indexList
- params["TerminalCode"] = ""
- result, e := utils.PostEdbLib(params, "usda_fas/handle/excel_data")
- if e != nil {
- err = fmt.Errorf("sheet :%s PostEdbLib err: %s", sheetName, e.Error())
- b, _ := json.Marshal(params)
- utils.FileLog.Info(fmt.Sprintf("sheet :%s PostEdbLib err: %s, params: %s", sheetName, e.Error(), string(b)))
- return
- }
- resp := new(utils.BaseEdbLibResponse)
- if e := json.Unmarshal(result, &resp); e != nil {
- err = fmt.Errorf("sheet :%s json.Unmarshal err: %s", sheetName, e)
- utils.FileLog.Info(fmt.Sprintf("sheet :%s json.Unmarshal err: %s", sheetName, e))
- return
- }
- if resp.Ret != 200 {
- err = fmt.Errorf("sheet :%s Msg: %s, ErrMsg: %s", sheetName, resp.Msg, resp.ErrMsg)
- utils.FileLog.Info(fmt.Sprintf("sheet :%s Msg: %s, ErrMsg: %s", sheetName, resp.Msg, resp.ErrMsg))
- return
- }
- }
- return
- }
|