usda_psd.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481
  1. package services
  2. import (
  3. "bytes"
  4. "encoding/json"
  5. "eta/eta_crawler/services/alarm_msg"
  6. "eta/eta_crawler/utils"
  7. "fmt"
  8. "github.com/PuerkitoBio/goquery"
  9. "io"
  10. "io/ioutil"
  11. "mime/multipart"
  12. "net/http"
  13. "os"
  14. "strconv"
  15. "strings"
  16. "time"
  17. )
  18. // UsdaPsdDataQueryParams 定义了JSON到Go结构体的映射
  19. type UsdaPsdDataQueryParams struct {
  20. QueryID int `json:"queryId"`
  21. CommodityGroupCode string `json:"commodityGroupCode"`
  22. Commodities []string `json:"commodities"`
  23. Attributes []int `json:"attributes"`
  24. Countries []string `json:"countries"`
  25. MarketYears []int `json:"marketYears"`
  26. ChkCommoditySummary bool `json:"chkCommoditySummary"`
  27. ChkAttribSummary bool `json:"chkAttribSummary"`
  28. ChkCountrySummary bool `json:"chkCountrySummary"`
  29. CommoditySummaryText string `json:"commoditySummaryText"`
  30. AttribSummaryText string `json:"attribSummaryText"`
  31. CountrySummaryText string `json:"countrySummaryText"`
  32. OptionColumn string `json:"optionColumn"`
  33. ChkTopCountry bool `json:"chkTopCountry"`
  34. TopCountryCount string `json:"topCountryCount"`
  35. ChkFileFormat bool `json:"chkfileFormat"`
  36. ChkPrevMonth bool `json:"chkPrevMonth"`
  37. ChkMonthChange bool `json:"chkMonthChange"`
  38. ChkCodes bool `json:"chkCodes"`
  39. ChkYearChange bool `json:"chkYearChange"`
  40. QueryName string `json:"queryName"`
  41. SortOrder string `json:"sortOrder"`
  42. TopCountryState bool `json:"topCountryState"`
  43. }
  44. type UsdaPsdData struct {
  45. TableHeaders []string `json:"tableHeaders"`
  46. QueryResult []map[string]interface{} `json:"queryResult"`
  47. }
  48. type UsdaPsdDataAttribute struct {
  49. AttributeId int `json:"attributeId"`
  50. }
  51. // UsdaFasIndex 美国农业部指标数据
  52. type UsdaFasIndex struct {
  53. ClassifyName string `description:"指标目录"`
  54. ParentClassifyName string `description:"父级指标目录"`
  55. ClassifySort int `description:"指标目录排序号"`
  56. IndexName string `description:"指标名称"`
  57. IndexCode string `description:"指标编码"`
  58. Unit string `description:"单位"`
  59. Sort int `description:"排序号"`
  60. Frequency string `description:"频度"`
  61. TerminalCode string `description:"编码"`
  62. ExcelDataMap map[string]string
  63. }
  64. // Meal, Palm Kernel:0813800
  65. // Meal, Peanut:0813200
  66. // Meal, Rapeseed:0813600
  67. // Meal, Soybean:0813100
  68. // Meal, Sunflowerseed:0813500
  69. // Oil, Coconut:4242000
  70. // Oil, Cottonseed:4233000
  71. // Oil, Olive:4235000
  72. // Oil, Palm:4243000
  73. // Oil, Palm Kernel:4244000
  74. // Oil, Peanut:4234000
  75. // Oil, Rapeseed:4239100
  76. // Oil, Soybean:4232000
  77. // Oil, Sunflowerseed:4236000
  78. // Oilseed, Cottonseed:2223000
  79. // Oilseed, Palm Kernel:2232000
  80. // Oilseed, Peanut:2221000
  81. // Oilseed, Rapeseed:2226000
  82. // Oilseed, Soybean:2222000
  83. // Oilseed, Sunflowerseed:2224000
  84. // 美国农业部月度供需平衡表数据
  85. func DownloadUsdaPsdData() (indexList []*UsdaFasIndex, err error) {
  86. // 从test.json文件中读取json串
  87. body, err := ioutil.ReadFile("test.json")
  88. if err != nil {
  89. return
  90. }
  91. // 解析json串
  92. item := new(UsdaPsdData)
  93. err = json.Unmarshal(body, &item)
  94. if err != nil {
  95. fmt.Println("json.Unmarshal err:" + err.Error())
  96. return
  97. }
  98. indexList, err = handleUsdaFasPsd(item)
  99. return
  100. /*defer func() {
  101. if err != nil {
  102. msg := "失败提醒" + "downloadUsdaPsdData ErrMsg:" + err.Error()
  103. fmt.Println("msg:", msg)
  104. utils.FileLog.Info(msg)
  105. go alarm_msg.SendAlarmMsg(msg, 3)
  106. }
  107. }()
  108. // 定义请求地址
  109. attributeUrl := "https://apps.fas.usda.gov/PSDOnlineApi/api/query/GetMultiCommodityAttributes?"
  110. dataUrl := "https://apps.fas.usda.gov/PSDOnlineApi/api/query/RunQuery"
  111. var commodities []string
  112. commodities = append(commodities, "0813800", "0813200", "0813600", "0813100", "0813500", "4242000", "4233000", "4235000", "4243000", "4244000", "4234000", "4239100", "4232000", "4236000", "2223000", "2232000", "2221000", "2226000", "2222000", "2224000")
  113. //commodities = append(commodities, "0430000")
  114. commodityCodes := strings.Join(commodities, ",")
  115. attributeUrl = attributeUrl + "commodityCodes=" + commodityCodes
  116. // 定义请求参数
  117. // 获取属性入参
  118. attributeBody, e := utils.HttpGetNoCookie(attributeUrl)
  119. if e != nil {
  120. err = e
  121. return
  122. }
  123. attrList := make([]UsdaPsdDataAttribute, 0)
  124. err = json.Unmarshal(attributeBody, &attrList)
  125. if err != nil {
  126. fmt.Println("json.Unmarshal err:" + err.Error())
  127. return
  128. }
  129. // 解析
  130. var attributes []int
  131. for _, v := range attrList {
  132. // 键值对的值
  133. attributes = append(attributes, v.AttributeId)
  134. }
  135. // 定义请求方法
  136. //attributes = append(attributes, 4, 20, 28, 57, 81, 84, 86, 88, 113, 130, 192, 125, 176, 178, 184)
  137. var countries []string
  138. countries = append(countries, "R00", "ALL")
  139. var marketYears []int
  140. marketYears = append(marketYears, 2024, 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016, 2015, 2014)
  141. // {"queryId":0,"commodityGroupCode":null,"commodities":["0430000"],"attributes":[4,20,28,57,81,84,86,88,113,130,192,125,176,178,184],"countries":["R00","ALL"],"marketYears":[2024,2023,2022,2021,2020,2019,2018,2017,2016,2015,2014],"chkCommoditySummary":false,"chkAttribSummary":false,"chkCountrySummary":false,"commoditySummaryText":"","attribSummaryText":"","countrySummaryText":"","optionColumn":"year","chkTopCountry":false,"topCountryCount":"","chkfileFormat":false,"chkPrevMonth":true,"chkMonthChange":false,"chkCodes":false,"chkYearChange":false,"queryName":"","sortOrder":"Commodity/Attribute/Country","topCountryState":false}
  142. var req UsdaPsdDataQueryParams
  143. req.Commodities = commodities
  144. req.Attributes = attributes
  145. req.Countries = countries
  146. req.MarketYears = marketYears
  147. req.OptionColumn = "year"
  148. //req.ChkPrevMonth = true
  149. req.SortOrder = "Commodity/Country/Attribute"
  150. // 构造httppost请求
  151. reqBody, _ := json.Marshal(req)
  152. // 解析返回值
  153. headerParams := make(map[string]string)
  154. //headerParams["Cookie"] = "CT6T=312900; SF_cookie_3=68941398"
  155. //headerParams["User-Agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36"
  156. headerParams["Content-Type"] = "application/json"
  157. body, e := utils.HttpPostNoCookie(dataUrl, string(reqBody), headerParams)
  158. if e != nil {
  159. err = e
  160. return
  161. }
  162. item := new(UsdaPsdData)
  163. err = json.Unmarshal(body, &item)
  164. if err != nil {
  165. fmt.Println("json.Unmarshal err:" + err.Error())
  166. return
  167. }
  168. indexList, err = handleUsdaFasPsd(item)*/
  169. return
  170. }
  171. // 美国农业出库销售数据
  172. func DownloadUsdaFmsData() {
  173. var err error
  174. defer func() {
  175. if err != nil {
  176. msg := "失败提醒" + "DownloadUsdaFmsData ErrMsg:" + err.Error()
  177. fmt.Println("msg:", msg)
  178. utils.FileLog.Info(msg)
  179. go alarm_msg.SendAlarmMsg(msg, 3)
  180. }
  181. }()
  182. downloadFile := "downloaded_excel.xlsx"
  183. //请求首页,获取入参
  184. dataUrl := "https://apps.fas.usda.gov/esrquery/esrq.aspx"
  185. body1, err := utils.HttpGetNoCookie(dataUrl)
  186. if err != nil {
  187. return
  188. }
  189. htmlString := string(body1)
  190. // 解析返回值,截取htmlinput标签,input标签里,id=“__EVENTVALIDATION”的input标签里的值
  191. // 使用goquery读取HTML字符串
  192. doc, err := goquery.NewDocumentFromReader(strings.NewReader(htmlString))
  193. if err != nil {
  194. return
  195. }
  196. stateValue := doc.Find("input#__VIEWSTATE").AttrOr("value", "")
  197. stateEneratorValue := doc.Find("input#__VIEWSTATEGENERATOR").AttrOr("value", "")
  198. // 查询并获取input标签的值
  199. validValue := doc.Find("input#__EVENTVALIDATION").AttrOr("value", "")
  200. var body bytes.Buffer
  201. multipartWriter := multipart.NewWriter(&body)
  202. // 添加表单字段(如果需要的话)
  203. if err = multipartWriter.WriteField("__EVENTTARGET", ""); err != nil {
  204. err = fmt.Errorf("set __EVENTTARGET, Err:%s", err)
  205. return
  206. }
  207. if err = multipartWriter.WriteField("__EVENTARGUMENT", ""); err != nil {
  208. err = fmt.Errorf("set __EVENTARGUMENT, Err:%s", err)
  209. return
  210. }
  211. if err = multipartWriter.WriteField("__LASTFOCUS", ""); err != nil {
  212. err = fmt.Errorf("set __LASTFOCUS, Err:%s", err)
  213. return
  214. }
  215. if err = multipartWriter.WriteField("__VIEWSTATE", stateValue); err != nil {
  216. err = fmt.Errorf("set __VIEWSTATE, Err:%s", err)
  217. return
  218. }
  219. if err = multipartWriter.WriteField("__VIEWSTATEGENERATOR", stateEneratorValue); err != nil {
  220. err = fmt.Errorf("set __VIEWSTATEGENERATOR, Err:%s", err)
  221. return
  222. }
  223. if err = multipartWriter.WriteField("__EVENTVALIDATION", validValue); err != nil {
  224. err = fmt.Errorf("set __EVENTVALIDATION, Err:%s", err)
  225. return
  226. }
  227. //整理需要下载的品种ID
  228. //Soybeans:801,Soybean cake & meal:901,Soybean Oil:902
  229. CommodityIds := []string{"801", "901", "902"}
  230. for _, v := range CommodityIds {
  231. if err = multipartWriter.WriteField("ctl00$MainContent$lbCommodity", v); err != nil {
  232. err = fmt.Errorf("set ctl00$MainContent$lbCommodity, Err:%s", err)
  233. return
  234. }
  235. }
  236. startDate := "08/22/2019"
  237. endDate := "08/22/2024"
  238. if err = multipartWriter.WriteField("ctl00$MainContent$lbCountry", "0:0"); err != nil {
  239. err = fmt.Errorf("set ctl00$MainContent$lbCountry, Err:%s", err)
  240. return
  241. }
  242. if err = multipartWriter.WriteField("ctl00$MainContent$ddlReportFormat", "10"); err != nil {
  243. return
  244. }
  245. if err = multipartWriter.WriteField("ctl00$MainContent$rblOutputType", "2"); err != nil {
  246. return
  247. }
  248. if err = multipartWriter.WriteField("ctl00$MainContent$tbStartDate", startDate); err != nil {
  249. return
  250. }
  251. if err = multipartWriter.WriteField("ctl00$MainContent$tbEndDate", endDate); err != nil {
  252. return
  253. }
  254. if err = multipartWriter.WriteField("ctl00$MainContent$rblColumnSelection", "regular"); err != nil {
  255. return
  256. }
  257. if err = multipartWriter.WriteField("ctl00$MainContent$btnSubmit", "Submit"); err != nil {
  258. return
  259. }
  260. // 注意:如果接口需要文件上传,这里应该使用multipartWriter.CreateFormFile来添加文件
  261. // 关闭multipart writer以添加最后的边界
  262. if err = multipartWriter.Close(); err != nil {
  263. err = fmt.Errorf("close multipart writer, Err:%s", err)
  264. return
  265. }
  266. // 构造请求
  267. req, err := http.NewRequest("POST", dataUrl, &body)
  268. if err != nil {
  269. err = fmt.Errorf("create request, Err:%s", err)
  270. return
  271. }
  272. // 设置请求头
  273. req.Header.Set("Content-Type", multipartWriter.FormDataContentType())
  274. // 发送请求
  275. client := &http.Client{}
  276. resp, err := client.Do(req)
  277. if err != nil {
  278. err = fmt.Errorf("send request, Err:%s", err)
  279. return
  280. }
  281. defer resp.Body.Close()
  282. // 检查响应状态码
  283. if resp.StatusCode != http.StatusOK {
  284. err = fmt.Errorf("unexpected status code: %d", resp.StatusCode)
  285. return
  286. }
  287. // 读取响应体
  288. out, err := os.Create(downloadFile)
  289. if err != nil {
  290. return
  291. }
  292. defer out.Close()
  293. // 将响应体写入到文件
  294. _, err = io.Copy(out, resp.Body)
  295. if err != nil {
  296. return
  297. }
  298. fmt.Println("Excel file downloaded successfully")
  299. return
  300. }
  301. func handleUsdaFasPsd(item *UsdaPsdData) (indexList []*UsdaFasIndex, err error) {
  302. // 解析
  303. headerSlice := make([]string, 0)
  304. for index, v := range item.TableHeaders {
  305. // 键值对的值
  306. fmt.Println("key:", index, "value:", v)
  307. if !strings.Contains(v, "/") && !strings.Contains(v, " ") {
  308. v = strings.ToLower(v)
  309. }
  310. if v == "Unit Description" {
  311. v = "unit Description"
  312. }
  313. headerSlice = append(headerSlice, v)
  314. }
  315. // 解析
  316. // 遍历行读取
  317. indexList = make([]*UsdaFasIndex, 0)
  318. sort := 0
  319. // 指标名称
  320. indexMap := make(map[string]*UsdaFasIndex)
  321. // 键值对的值
  322. commodityRow := ""
  323. countriesRow := ""
  324. attributesRow := ""
  325. errMsg := ""
  326. for _, row := range item.QueryResult {
  327. unitK := headerSlice[len(headerSlice)-1]
  328. unit := row[unitK].(string)
  329. // unit 去掉左右两边的括号,去掉中间的空格
  330. unit = strings.Replace(unit, " ", "", -1)
  331. unit = strings.Trim(unit, "()")
  332. for _, k := range headerSlice {
  333. col, ok := row[k]
  334. if !ok || col == nil {
  335. continue
  336. }
  337. if k == "commodity" {
  338. commodityRow = col.(string)
  339. } else if k == "country" {
  340. countriesRow = col.(string)
  341. } else if k == "attribute" {
  342. attributesRow = col.(string)
  343. } else if k == "unit Description" {
  344. // unit = col.(string)
  345. } else {
  346. //数据列
  347. year, _ := strconv.Atoi(strings.Split(k, "/")[0])
  348. month := 0
  349. indexName := ""
  350. classifyName := ""
  351. classifySort := 0
  352. inCode := ""
  353. fre := "年度"
  354. lastStr := "Yearly"
  355. // year年度的最后一天日期
  356. dateT := time.Date(year, time.December, 31, 0, 0, 0, 0, time.UTC)
  357. if strings.Contains(k, "(") {
  358. fre = "月度"
  359. lastStr = "Monthly"
  360. // 截取括号中间的月度数据
  361. monthStr := strings.Split(k, "(")[1]
  362. monthStr = strings.Split(monthStr, ")")[0]
  363. // 将Jul英文月份前缀转成数字月份
  364. monthT, e := time.ParseInLocation("Jan", monthStr, time.Local)
  365. if e != nil {
  366. errMsg += fmt.Sprintf("月份转换错误:%s%s\n", monthStr, e.Error())
  367. continue
  368. }
  369. month = int(monthT.Month())
  370. // 将year和month拼接成日期,该月的最后一天日期
  371. dateT = time.Date(year, time.Month(month), 31, 0, 0, 0, 0, time.UTC)
  372. }
  373. date := dateT.Format("2006-01-02")
  374. // 封装成指标数据
  375. if commodityRow != "" && countriesRow != "" && attributesRow != "" {
  376. indexName = commodityRow + ": " + countriesRow + ": " + attributesRow + ": " + lastStr
  377. } else {
  378. fmt.Println("commodityRow:", commodityRow, "countriesRow:", countriesRow, "attributesRow:", attributesRow)
  379. errMsg += fmt.Sprintf("指标名称为空 commodityRow:%s,countriesRow:%s,attributesRow:%s\n", commodityRow, countriesRow, attributesRow)
  380. continue
  381. }
  382. inCode = "usda" + utils.GetFirstLetter(indexName)
  383. indexItem, okIndex := indexMap[indexName]
  384. // 首字母大写
  385. classifyName = commodityRow
  386. if !okIndex {
  387. // 新增指标
  388. indexItem = new(UsdaFasIndex)
  389. indexItem.IndexName = indexName
  390. indexItem.ClassifyName = classifyName
  391. indexItem.ParentClassifyName = "月度供需"
  392. indexItem.ClassifySort = classifySort
  393. indexItem.IndexCode = inCode
  394. indexItem.Frequency = fre
  395. indexItem.Sort = sort
  396. indexItem.Unit = unit
  397. indexItem.ExcelDataMap = make(map[string]string)
  398. sort++
  399. }
  400. val := col.(float64)
  401. indexItem.ExcelDataMap[date] = fmt.Sprintf("%.4f", val)
  402. indexMap[indexName] = indexItem
  403. continue
  404. }
  405. }
  406. }
  407. for _, v := range indexMap {
  408. fmt.Printf("IndexName: %s \n", v.IndexName)
  409. fmt.Printf("IndexCode: %s \n", v.IndexCode)
  410. indexList = append(indexList, v)
  411. if len(indexList) > 500 {
  412. err = addUsdaFasPsdData(indexList)
  413. if err != nil {
  414. return
  415. }
  416. indexList = []*UsdaFasIndex{}
  417. }
  418. }
  419. err = addUsdaFasPsdData(indexList)
  420. if err != nil {
  421. return
  422. }
  423. return
  424. }
  425. func addUsdaFasPsdData(indexList []*UsdaFasIndex) (err error) {
  426. sheetName := "月度供需"
  427. if len(indexList) > 0 {
  428. params := make(map[string]interface{})
  429. params["List"] = indexList
  430. params["TerminalCode"] = ""
  431. result, e := utils.PostEdbLib(params, "usda_fas/handle/excel_data")
  432. if e != nil {
  433. err = fmt.Errorf("sheet :%s PostEdbLib err: %s", sheetName, e.Error())
  434. b, _ := json.Marshal(params)
  435. utils.FileLog.Info(fmt.Sprintf("sheet :%s PostEdbLib err: %s, params: %s", sheetName, e.Error(), string(b)))
  436. return
  437. }
  438. resp := new(utils.BaseEdbLibResponse)
  439. if e := json.Unmarshal(result, &resp); e != nil {
  440. err = fmt.Errorf("sheet :%s json.Unmarshal err: %s", sheetName, e)
  441. utils.FileLog.Info(fmt.Sprintf("sheet :%s json.Unmarshal err: %s", sheetName, e))
  442. return
  443. }
  444. if resp.Ret != 200 {
  445. err = fmt.Errorf("sheet :%s Msg: %s, ErrMsg: %s", sheetName, resp.Msg, resp.ErrMsg)
  446. utils.FileLog.Info(fmt.Sprintf("sheet :%s Msg: %s, ErrMsg: %s", sheetName, resp.Msg, resp.ErrMsg))
  447. return
  448. }
  449. }
  450. return
  451. }