Explorar o código

Merge branch 'feature/usda_psd_edb_data' of eta_server/eta_crawler into master

xyxie hai 1 mes
pai
achega
8c1a6fdca3
Modificáronse 6 ficheiros con 908 adicións e 1 borrados
  1. 4 1
      services/task.go
  2. 750 0
      services/usda_psd.go
  3. 20 0
      static/convert_xls_to_xlsx.py
  4. 0 0
      test.json
  5. 127 0
      utils/common.go
  6. 7 0
      utils/config.go

+ 4 - 1
services/task.go

@@ -24,6 +24,8 @@ func Task() {
 	//FileCoalCoastal()
 	//FileCoalInland()
 	if utils.BusinessCode == utils.BusinessCodeRelease {
+		refreshUsdaPsd := task.NewTask("RefreshUsdaPsd", "0 0 16 * * 5", DownloadUsdaPsdDataTask) //每周五下午16点
+		refreshUsdaFms := task.NewTask("RefreshUsdaFms", "0 0 16 * * 5", DownloadUsdaFmsDataTask) //每周五下午16点
 		refreshData := task.NewTask("refreshData", "0 0,30 16-18 * * *", RefreshData)
 		refreshEic := task.NewTask("RefreshEic", "0 0 2,6 * * *", RefreshEic)
 		refreshCoal := task.NewTask("RefreshCoal", "0 0,30 16-23 * * *", RefreshCoal)
@@ -41,7 +43,8 @@ func Task() {
 		//refreshNationalQuarter := task.NewTask("RefreshNationalQuarterDb", "0 25 1 15 * *", national_data.RefreshNationalQuarterDb)
 		//refreshNationalYearA := task.NewTask("RefreshNationalYearDbA", "0 45 1 20 * *", national_data.RefreshNationalYearDbA)
 		//refreshNationalYearB := task.NewTask("RefreshNationalYearDbB", "0 45 1 25 * *", national_data.RefreshNationalYearDbB)
-
+		task.AddTask("美国农业部月度供需数据爬取", refreshUsdaPsd)
+		task.AddTask("美国农业部出口销售数据爬取", refreshUsdaFms)
 		task.AddTask("数据爬取", refreshData)
 		task.AddTask("欧洲天然气爬取", refreshEic)
 		task.AddTask("中国煤炭网爬取", refreshCoal)

+ 750 - 0
services/usda_psd.go

@@ -0,0 +1,750 @@
+package services
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"eta/eta_crawler/services/alarm_msg"
+	"eta/eta_crawler/utils"
+	"fmt"
+	"github.com/PuerkitoBio/goquery"
+	"github.com/xuri/excelize/v2"
+	"io"
+	"mime/multipart"
+	"net/http"
+	"os"
+	"os/exec"
+	"strconv"
+	"strings"
+	"time"
+)
+
+// UsdaPsdDataQueryParams 定义了JSON到Go结构体的映射
+type UsdaPsdDataQueryParams struct {
+	QueryID              int      `json:"queryId"`
+	CommodityGroupCode   string   `json:"commodityGroupCode"`
+	Commodities          []string `json:"commodities"`
+	Attributes           []int    `json:"attributes"`
+	Countries            []string `json:"countries"`
+	MarketYears          []int    `json:"marketYears"`
+	ChkCommoditySummary  bool     `json:"chkCommoditySummary"`
+	ChkAttribSummary     bool     `json:"chkAttribSummary"`
+	ChkCountrySummary    bool     `json:"chkCountrySummary"`
+	CommoditySummaryText string   `json:"commoditySummaryText"`
+	AttribSummaryText    string   `json:"attribSummaryText"`
+	CountrySummaryText   string   `json:"countrySummaryText"`
+	OptionColumn         string   `json:"optionColumn"`
+	ChkTopCountry        bool     `json:"chkTopCountry"`
+	TopCountryCount      string   `json:"topCountryCount"`
+	ChkFileFormat        bool     `json:"chkfileFormat"`
+	ChkPrevMonth         bool     `json:"chkPrevMonth"`
+	ChkMonthChange       bool     `json:"chkMonthChange"`
+	ChkCodes             bool     `json:"chkCodes"`
+	ChkYearChange        bool     `json:"chkYearChange"`
+	QueryName            string   `json:"queryName"`
+	SortOrder            string   `json:"sortOrder"`
+	TopCountryState      bool     `json:"topCountryState"`
+}
+
+type UsdaPsdData struct {
+	TableHeaders []string                 `json:"tableHeaders"`
+	QueryResult  []map[string]interface{} `json:"queryResult"`
+}
+
+type UsdaPsdDataAttribute struct {
+	AttributeId int `json:"attributeId"`
+}
+
+// UsdaFasIndex 美国农业部指标数据
+type UsdaFasIndex struct {
+	ClassifyName       string `description:"指标目录"`
+	ParentClassifyName string `description:"父级指标目录"`
+	ClassifySort       int    `description:"指标目录排序号"`
+	IndexName          string `description:"指标名称"`
+	IndexCode          string `description:"指标编码"`
+	Unit               string `description:"单位"`
+	Sort               int    `description:"排序号"`
+	Frequency          string `description:"频度"`
+	TerminalCode       string `description:"编码"`
+	Country            string `description:"国家"`
+	Commodity          string `description:"属性"`
+	ExcelDataMap       map[string]string
+}
+
+func DownloadUsdaPsdDataTask(cont context.Context) (err error) {
+	//月度供需,年度和月度
+	//todo 设置下载频率
+	// 获取最近两年的年份
+	years := []int{time.Now().Year() + 1, time.Now().Year()}
+	var commodities []string
+	commodities = append(commodities, "0813800", "0813200", "0813600", "0813100", "0813500", "4242000", "4233000", "4235000", "4243000", "4244000", "4234000", "4239100", "4232000", "4236000", "2223000", "2232000", "2221000", "2226000", "2222000", "2224000")
+	for _, commodity := range commodities {
+		err = DownloadUsdaPsdData(commodity, years)
+		if err != nil {
+			utils.FileLog.Info("DownloadUsdaPsdData " + commodity + "ErrMsg:" + err.Error())
+		}
+		//time.Sleep(time.Duration(utils.RangeRand(2, 10)*10) * time.Second)
+		utils.FileLog.Info("DownloadUsdaPsdData " + commodity + " 爬取成功")
+	}
+	return
+}
+
+func DownloadUsdaFmsDataTask(cont context.Context) (err error) {
+	//出口销售周度数据
+	startDate := time.Now().AddDate(0, -1, 0).Format("01/02/2006")
+	endDate := time.Now().Format("01/02/2006")
+	err = DownloadUsdaFmsData(startDate, endDate)
+	return
+}
+
+// Meal, Palm Kernel:0813800
+// Meal, Peanut:0813200
+// Meal, Rapeseed:0813600
+// Meal, Soybean:0813100
+// Meal, Sunflowerseed:0813500
+
+// Oil, Coconut:4242000
+// Oil, Cottonseed:4233000
+// Oil, Olive:4235000
+// Oil, Palm:4243000
+// Oil, Palm Kernel:4244000
+// Oil, Peanut:4234000
+// Oil, Rapeseed:4239100
+// Oil, Soybean:4232000
+// Oil, Sunflowerseed:4236000
+
+// Oilseed, Cottonseed:2223000
+// Oilseed, Palm Kernel:2232000
+// Oilseed, Peanut:2221000
+// Oilseed, Rapeseed:2226000
+// Oilseed, Soybean:2222000
+// Oilseed, Sunflowerseed:2224000
+// 美国农业部月度供需平衡表数据
+func DownloadUsdaPsdData(commodityCode string, years []int) (err error) {
+	defer func() {
+		if err != nil {
+			msg := "失败提醒" + "downloadUsdaPsdData ErrMsg:" + err.Error()
+			fmt.Println("msg:", msg)
+			utils.FileLog.Info(msg)
+			go alarm_msg.SendAlarmMsg(msg, 3)
+		}
+	}()
+	// 定义请求地址
+	attributeUrl := "https://apps.fas.usda.gov/PSDOnlineApi/api/query/GetMultiCommodityAttributes?"
+	dataUrl := "https://apps.fas.usda.gov/PSDOnlineApi/api/query/RunQuery"
+
+	var commodities []string
+	//commodities = append(commodities, "0813800", "0813200", "0813600", "0813100", "0813500", "4242000", "4233000", "4235000", "4243000", "4244000", "4234000", "4239100", "4232000", "4236000", "2223000", "2232000", "2221000", "2226000", "2222000", "2224000")
+	commodities = append(commodities, commodityCode)
+	commodityCodes := strings.Join(commodities, ",")
+	attributeUrl = attributeUrl + "commodityCodes=" + commodityCodes
+	fmt.Println("attributeUrl", attributeUrl)
+	// 定义请求参数
+	// 获取属性入参
+	attributeBody, e := utils.HttpGetNoCookie(attributeUrl)
+	if e != nil {
+		err = e
+		return
+	}
+
+	attrList := make([]UsdaPsdDataAttribute, 0)
+	err = json.Unmarshal(attributeBody, &attrList)
+	if err != nil {
+		fmt.Println("json.Unmarshal err:" + err.Error())
+		return
+	}
+	// 解析
+	var attributes []int
+	for _, v := range attrList {
+		// 键值对的值
+		attributes = append(attributes, v.AttributeId)
+	}
+
+	// 定义请求方法
+	//attributes = append(attributes, 4, 20, 28, 57, 81, 84, 86, 88, 113, 130, 192, 125, 176, 178, 184)
+	var countries []string
+	countries = append(countries, "R00", "ALL")
+	var marketYears []int
+	//marketYears = append(marketYears, 2025, 2024, 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016, 2015, 2014)
+	marketYears = append(marketYears, years...)
+	// {"queryId":0,"commodityGroupCode":null,"commodities":["0430000"],"attributes":[4,20,28,57,81,84,86,88,113,130,192,125,176,178,184],"countries":["R00","ALL"],"marketYears":[2024,2023,2022,2021,2020,2019,2018,2017,2016,2015,2014],"chkCommoditySummary":false,"chkAttribSummary":false,"chkCountrySummary":false,"commoditySummaryText":"","attribSummaryText":"","countrySummaryText":"","optionColumn":"year","chkTopCountry":false,"topCountryCount":"","chkfileFormat":false,"chkPrevMonth":true,"chkMonthChange":false,"chkCodes":false,"chkYearChange":false,"queryName":"","sortOrder":"Commodity/Attribute/Country","topCountryState":false}
+	var req UsdaPsdDataQueryParams
+	req.Commodities = commodities
+	req.Attributes = attributes
+	req.Countries = countries
+	req.MarketYears = marketYears
+	req.OptionColumn = "year"
+	req.ChkPrevMonth = true
+	req.ChkYearChange = true
+	req.SortOrder = "Commodity/Country/Attribute"
+
+	// 构造httppost请求
+	reqBody, _ := json.Marshal(req)
+	// 解析返回值
+	fmt.Println("reqBody", string(reqBody))
+
+	headerParams := make(map[string]string)
+	//headerParams["Cookie"] = "CT6T=312900; SF_cookie_3=68941398"
+	//headerParams["User-Agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36"
+	headerParams["Content-Type"] = "application/json"
+	body, e := utils.HttpPostNoCookie(dataUrl, string(reqBody), headerParams)
+	if e != nil {
+		err = e
+		return
+	}
+	item := new(UsdaPsdData)
+	err = json.Unmarshal(body, &item)
+	if err != nil {
+		fmt.Println("json.Unmarshal err:" + err.Error())
+		return
+	}
+
+	// 使用通道等待解析完成
+	done := make(chan error)
+	go func() {
+		done <- handleUsdaFasPsd(item)
+	}()
+	// 等待解析完成或超时
+	select {
+	case err = <-done:
+		if err != nil {
+			err = fmt.Errorf("handleUsdaFasPsd, Err:%w", err)
+			return
+		}
+		/*case <-time.After(20 * time.Minute): // 假设20分钟超时
+		err = fmt.Errorf("parse excel timed out")
+		return*/
+	}
+	utils.FileLog.Info("月度供需 " + commodityCode + "结束")
+	return
+}
+
+// 美国农业出库销售数据
+func DownloadUsdaFmsData(startDate, endDate string) (err error) {
+	// todo 设置下载频率, 如果有正在处理中的,则暂停下载
+	defer func() {
+		if err != nil {
+			msg := "失败提醒" + "DownloadUsdaFmsData ErrMsg:" + err.Error()
+			fmt.Println("msg:", msg)
+			utils.FileLog.Info(msg)
+			go alarm_msg.SendAlarmMsg(msg, 3)
+		}
+	}()
+	downloadFile := fmt.Sprintf("./static/usda_fms_excel_%s.xls", time.Now().Format(utils.FormatDateTimeUnSpace))
+	//请求首页,获取入参
+	dataUrl := "https://apps.fas.usda.gov/esrquery/esrq.aspx"
+	body1, err := utils.HttpGetNoCookie(dataUrl)
+	if err != nil {
+		return
+	}
+	htmlString := string(body1)
+	// 解析返回值,截取htmlinput标签,input标签里,id=“__EVENTVALIDATION”的input标签里的值
+	// 使用goquery读取HTML字符串
+	doc, err := goquery.NewDocumentFromReader(strings.NewReader(htmlString))
+	if err != nil {
+		return
+	}
+	stateValue := doc.Find("input#__VIEWSTATE").AttrOr("value", "")
+	stateEneratorValue := doc.Find("input#__VIEWSTATEGENERATOR").AttrOr("value", "")
+	// 查询并获取input标签的值
+	validValue := doc.Find("input#__EVENTVALIDATION").AttrOr("value", "")
+
+	var body bytes.Buffer
+	multipartWriter := multipart.NewWriter(&body)
+
+	// 添加表单字段(如果需要的话)
+	if err = multipartWriter.WriteField("__EVENTTARGET", ""); err != nil {
+		err = fmt.Errorf("set __EVENTTARGET, Err:%s", err)
+		return
+	}
+	if err = multipartWriter.WriteField("__EVENTARGUMENT", ""); err != nil {
+		err = fmt.Errorf("set __EVENTARGUMENT, Err:%s", err)
+		return
+	}
+	if err = multipartWriter.WriteField("__LASTFOCUS", ""); err != nil {
+		err = fmt.Errorf("set __LASTFOCUS, Err:%s", err)
+		return
+	}
+	if err = multipartWriter.WriteField("__VIEWSTATE", stateValue); err != nil {
+		err = fmt.Errorf("set __VIEWSTATE, Err:%s", err)
+		return
+	}
+	if err = multipartWriter.WriteField("__VIEWSTATEGENERATOR", stateEneratorValue); err != nil {
+		err = fmt.Errorf("set __VIEWSTATEGENERATOR, Err:%s", err)
+		return
+	}
+	if err = multipartWriter.WriteField("__EVENTVALIDATION", validValue); err != nil {
+		err = fmt.Errorf("set __EVENTVALIDATION, Err:%s", err)
+		return
+	}
+	//整理需要下载的品种ID
+	//Soybeans:801,Soybean cake & meal:901,Soybean Oil:902
+	CommodityIds := []string{"801", "901", "902"}
+	for _, v := range CommodityIds {
+		if err = multipartWriter.WriteField("ctl00$MainContent$lbCommodity", v); err != nil {
+			err = fmt.Errorf("set ctl00$MainContent$lbCommodity, Err:%s", err)
+			return
+		}
+	}
+	if err = multipartWriter.WriteField("ctl00$MainContent$lbCountry", "0:0"); err != nil {
+		err = fmt.Errorf("set ctl00$MainContent$lbCountry, Err:%s", err)
+		return
+	}
+	if err = multipartWriter.WriteField("ctl00$MainContent$ddlReportFormat", "10"); err != nil {
+
+		return
+	}
+	if err = multipartWriter.WriteField("ctl00$MainContent$rblOutputType", "2"); err != nil {
+		return
+	}
+	if err = multipartWriter.WriteField("ctl00$MainContent$tbStartDate", startDate); err != nil {
+		return
+	}
+	if err = multipartWriter.WriteField("ctl00$MainContent$tbEndDate", endDate); err != nil {
+		return
+	}
+	if err = multipartWriter.WriteField("ctl00$MainContent$rblColumnSelection", "regular"); err != nil {
+		return
+	}
+	if err = multipartWriter.WriteField("ctl00$MainContent$btnSubmit", "Submit"); err != nil {
+		return
+	}
+	// 注意:如果接口需要文件上传,这里应该使用multipartWriter.CreateFormFile来添加文件
+
+	// 关闭multipart writer以添加最后的边界
+	if err = multipartWriter.Close(); err != nil {
+		err = fmt.Errorf("close multipart writer, Err:%s", err)
+		return
+	}
+
+	// 构造请求
+	req, err := http.NewRequest("POST", dataUrl, &body)
+	if err != nil {
+		err = fmt.Errorf("create request, Err:%s", err)
+		return
+	}
+
+	// 设置请求头
+	req.Header.Set("Content-Type", multipartWriter.FormDataContentType())
+
+	// 发送请求
+	client := &http.Client{}
+	resp, err := client.Do(req)
+	if err != nil {
+		err = fmt.Errorf("send request, Err:%s", err)
+		return
+	}
+	defer resp.Body.Close()
+
+	// 检查响应状态码
+	if resp.StatusCode != http.StatusOK {
+		err = fmt.Errorf("unexpected status code: %d", resp.StatusCode)
+		return
+	}
+
+	// 读取响应体
+	out, err := os.Create(downloadFile)
+	if err != nil {
+		return
+	}
+
+	// 将响应体写入到文件
+	_, err = io.Copy(out, resp.Body)
+	if err != nil {
+		return
+	}
+	// 关闭临时文件以确保数据写入完成
+	err = out.Close()
+	if err != nil {
+		err = fmt.Errorf("Failed to close temporary file: %v", err)
+		return
+	}
+
+	// 转换文件格式
+	downloadFileXlsx := downloadFile + "x"
+	err = ConvertXlsToXlsx(downloadFile, downloadFileXlsx)
+	if err != nil {
+		err = fmt.Errorf("文件格式转换失败 convert excel, Err:%w", err)
+		return
+	}
+	// 使用通道等待解析完成
+	done := make(chan error)
+	go func() {
+		done <- ParseUsdaFmsExcel(downloadFileXlsx)
+	}()
+	// 等待解析完成或超时
+	select {
+	case err = <-done:
+		if err != nil {
+			err = fmt.Errorf("parse excel, Err:%w", err)
+			return
+		}
+		/*case <-time.After(20 * time.Minute): // 假设20分钟超时
+		err = fmt.Errorf("parse excel timed out")
+		return*/
+	}
+	// 删除临时文件
+	defer func() {
+		os.Remove(downloadFile)
+	}()
+	fmt.Println("Excel file downloaded successfully")
+	return
+}
+
+func ParseUsdaFmsExcel(path string) (err error) {
+	defer func() {
+		if err != nil {
+			msg := "失败提醒" + "DownloadUsdaFmsData_ParseUsdaFmsExcel ErrMsg:" + err.Error()
+			fmt.Println("msg:", msg)
+			utils.FileLog.Info(msg)
+			go alarm_msg.SendAlarmMsg(msg, 3)
+		}
+	}()
+	//var xlFile *xlsx.File
+	exist, err := PathExists(path)
+	if err != nil {
+		fmt.Println(err)
+		err = fmt.Errorf("文件地址不存在 err:%s", err.Error())
+		return
+	}
+	if !exist {
+		err = fmt.Errorf("文件地址不存在")
+		return
+	}
+	//xlFile, err = xlsx.OpenFile(path)
+	xlFile, err := excelize.OpenFile(path)
+	if err != nil {
+		fmt.Println("OpenFile err:", err)
+		err = fmt.Errorf("打开文件失败 err:%s", err.Error())
+		return
+	}
+	defer func() {
+		// 关闭工作簿
+		if err = xlFile.Close(); err != nil {
+			fmt.Println(err)
+		}
+		os.Remove(path)
+	}()
+	sheetName := xlFile.GetSheetName(0)
+	fmt.Println("Sheet Name:", sheetName)
+	//解析出表头第7行
+	//拼接指标名称
+	// 指标名称
+	indexMap := make(map[string]*UsdaFasIndex)
+	indexList := make([]*UsdaFasIndex, 0)
+	sort := 0
+	rows, err := xlFile.GetRows(sheetName)
+	//for _, sheet := range xlFile.Sheets {
+	//遍历行读取
+	for i, row := range rows {
+		if i > 6 {
+			commodity := ""
+			dateStr := ""
+			country := ""
+			dataVal := ""
+			unit := "Metric Tons"
+			for k, text := range row {
+				//fmt.Println("第", i, "行,第", k, "列,内容:", text)
+				kind := ""
+				indexName := ""
+				if k == 1 { // 品种名称Commodity
+					commodity = text
+				} else if k == 2 {
+					dateStr = text
+				} else if k == 4 {
+					country = text
+				} else if k == 5 {
+					kind = "Weekly  Exports"
+				} else if k == 6 {
+					kind = "Accum  Exports"
+				} else if k == 7 {
+					kind = "Outstanding Sale:CMY"
+				} else if k == 8 {
+					kind = "Gross Sale:CMY"
+				} else if k == 9 {
+					kind = "Net Sale :CMY"
+				} else if k == 10 {
+					kind = "Total Commitment:CMY"
+				} else if k == 11 {
+					kind = "Outstanding Sale:NMY"
+				} else if k == 12 {
+					kind = "Net Sale :NMY"
+				}
+				if k > 4 && k < 13 {
+					// 处理日期
+					//fmt.Println(dateStr)
+					//fmt.Println(unit)
+					timeT, e := time.ParseInLocation(utils.FormatDateTime, dateStr, time.Local)
+					if e != nil {
+						utils.FileLog.Info("日期格式转换失败 err:%s", e.Error())
+						continue
+					}
+					date := timeT.Format(utils.FormatDate)
+					dataVal = text
+					firstCommodity := utils.GetFirstLetter(commodity)
+					firstKind := utils.GetFirstLetter(kind)
+					indexName = fmt.Sprintf("%s: %s: %s", commodity, country, kind)
+					inCode := fmt.Sprintf("usda%s%s%s", firstCommodity, strings.ToLower(strings.ReplaceAll(country, " ", "")), firstKind)
+					indexItem, okIndex := indexMap[indexName]
+					// 首字母大写
+					classifyName := commodity
+					if !okIndex {
+						// 新增指标
+						indexItem = new(UsdaFasIndex)
+						indexItem.IndexName = indexName
+						indexItem.ClassifyName = classifyName
+						indexItem.Country = country
+						indexItem.Commodity = kind
+						indexItem.ParentClassifyName = "出口销售"
+						indexItem.ClassifySort = 0
+						indexItem.IndexCode = inCode
+						indexItem.Frequency = "周度"
+						indexItem.Sort = sort
+						indexItem.Unit = unit
+						indexItem.ExcelDataMap = make(map[string]string)
+						sort++
+					}
+					if strings.Contains(dataVal, ",") {
+						dataVal = strings.ReplaceAll(dataVal, ",", "")
+					}
+					val, e := strconv.ParseFloat(dataVal, 64)
+					if e != nil {
+						utils.FileLog.Info("数据转换失败 err:%s", e.Error())
+						continue
+					}
+					indexItem.ExcelDataMap[date] = fmt.Sprintf("%.4f", val)
+					indexMap[indexName] = indexItem
+				}
+			}
+		}
+	}
+	//}
+
+	for _, v := range indexMap {
+		//fmt.Printf("IndexName: %s \n", v.IndexName)
+		//fmt.Printf("IndexCode: %s \n", v.IndexCode)
+		indexList = append(indexList, v)
+		if len(indexList) > 100 {
+			err = addUsdaFasPsdData(indexList, "出口销售")
+			if err != nil {
+				return
+			}
+			indexList = []*UsdaFasIndex{}
+		}
+	}
+	err = addUsdaFasPsdData(indexList, "出口销售")
+	if err != nil {
+		return
+	}
+
+	fmt.Println("出口销售 执行成功")
+	return
+}
+
+// 月度供需
+func handleUsdaFasPsd(item *UsdaPsdData) (err error) {
+	//设置缓存key,防止重复处理
+	errMsg := ""
+	defer func() {
+		if err != nil {
+			errMsg += err.Error()
+		}
+		if errMsg != "" {
+			msg := "失败提醒" + "downloadUsdaPsdData_handleUsdaFasPsd ErrMsg:" + errMsg
+			fmt.Println("msg:", msg)
+			utils.FileLog.Info(msg)
+			go alarm_msg.SendAlarmMsg(msg, 3)
+		}
+	}()
+
+	// 解析
+	headerSlice := make([]string, 0)
+	for index, v := range item.TableHeaders {
+		// 键值对的值
+		fmt.Println("key:", index, "value:", v)
+		if !strings.Contains(v, "/") && !strings.Contains(v, " ") {
+			v = strings.ToLower(v)
+		}
+		if v == "Unit Description" {
+			v = "unit Description"
+		}
+		headerSlice = append(headerSlice, v)
+	}
+	sort := 0
+	// 指标名称
+	indexMap := make(map[string]*UsdaFasIndex)
+	// 键值对的值
+	commodityRow := ""
+	countriesRow := ""
+	attributesRow := ""
+
+	for _, row := range item.QueryResult {
+		unitK := headerSlice[len(headerSlice)-1]
+		unit := row[unitK].(string)
+		// unit 去掉左右两边的括号,去掉中间的空格
+		unit = strings.Replace(unit, " ", "", -1)
+		unit = strings.Trim(unit, "()")
+		for _, k := range headerSlice {
+			col, ok := row[k]
+			if !ok || col == nil {
+				//utils.FileLog.Info("col is nil")
+				continue
+			}
+			if k == "commodity" {
+				commodityRow = col.(string)
+			} else if k == "country" {
+				countriesRow = col.(string)
+			} else if k == "attribute" {
+				attributesRow = col.(string)
+			} else if k == "unit Description" {
+				// unit = col.(string)
+			} else {
+				//数据列
+				year, _ := strconv.Atoi(strings.Split(k, "/")[0])
+				indexName := ""
+				classifyName := ""
+				classifySort := 0
+				inCode := ""
+				fre := "年度"
+				lastStr := "Yearly"
+				// year年度的最后一天日期
+				dateT := time.Date(year, time.December, 31, 0, 0, 0, 0, time.Local)
+				if strings.Contains(k, "(") {
+					fre = "月度"
+					lastStr = "Monthly"
+					// 截取括号中间的月度数据
+					monthStr := strings.Split(k, "(")[1]
+					monthStr = strings.Split(monthStr, ")")[0]
+					// 将Jul英文月份前缀转成数字月份
+					monthT, e := time.ParseInLocation("Jan", monthStr, time.Local)
+					if e != nil {
+						errMsg += fmt.Sprintf("月份转换错误:%s%s\n", monthStr, e.Error())
+						continue
+					}
+					// 将year和month拼接成日期,该月的最后一天日期
+					// 获取下一个月份的第一天,减去1天为当前月份的最后一天
+					dateT = time.Date(year, monthT.Month(), 1, 0, 0, 0, 0, time.Now().Location()).AddDate(0, 1, -1)
+				}
+				date := dateT.Format("2006-01-02")
+				// 封装成指标数据
+				if commodityRow != "" && countriesRow != "" && attributesRow != "" {
+					indexName = commodityRow + ": " + countriesRow + ": " + attributesRow + ": " + lastStr
+				} else {
+					fmt.Println("commodityRow:", commodityRow, "countriesRow:", countriesRow, "attributesRow:", attributesRow)
+					errMsg += fmt.Sprintf("指标名称为空 commodityRow:%s,countriesRow:%s,attributesRow:%s\n", commodityRow, countriesRow, attributesRow)
+					continue
+				}
+				firstCommodityRow := strings.Replace(commodityRow, ", ", "", -1)
+				firstCommodityRow = strings.Replace(firstCommodityRow, " ", "", -1)
+				firstCommodityRow = strings.ToLower(firstCommodityRow)
+				firstAttributesRow := utils.GetFirstLetter(attributesRow)
+				firstLastStr := utils.GetFirstLetter(lastStr)
+
+				inCode = fmt.Sprintf("usda%s%s%s%s", firstCommodityRow, strings.ToLower(strings.ReplaceAll(countriesRow, " ", "")), firstAttributesRow, firstLastStr)
+				indexItem, okIndex := indexMap[indexName]
+				// 首字母大写
+				classifyName = commodityRow
+				if !okIndex {
+					// 新增指标
+					indexItem = new(UsdaFasIndex)
+					indexItem.IndexName = indexName
+					indexItem.ClassifyName = classifyName
+					indexItem.Country = countriesRow
+					indexItem.Commodity = attributesRow
+					indexItem.ParentClassifyName = "月度供需"
+					indexItem.ClassifySort = classifySort
+					indexItem.IndexCode = inCode
+					indexItem.Frequency = fre
+					indexItem.Sort = sort
+					indexItem.Unit = unit
+					indexItem.ExcelDataMap = make(map[string]string)
+					sort++
+				}
+				val := col.(float64)
+				val = utils.FloatFormatRound(val, 2)
+				indexItem.ExcelDataMap[date] = fmt.Sprintf("%.4f", val)
+				indexMap[indexName] = indexItem
+				continue
+			}
+		}
+
+	}
+	indexList := make([]*UsdaFasIndex, 0)
+	for _, v := range indexMap {
+		//fmt.Printf("IndexName: %s \n", v.IndexName)
+		//fmt.Printf("IndexCode: %s \n", v.IndexCode)
+		indexList = append(indexList, v)
+		if len(indexList) > 100 {
+			err = addUsdaFasPsdData(indexList, "月度供需")
+			if err != nil {
+				return
+			}
+			indexList = []*UsdaFasIndex{}
+		}
+	}
+	err = addUsdaFasPsdData(indexList, "月度供需")
+	if err != nil {
+		return
+	}
+	fmt.Println("月度供需 " + commodityRow + "执行成功")
+	return
+}
+
+func addUsdaFasPsdData(indexList []*UsdaFasIndex, sheetName string) (err error) {
+	if len(indexList) > 0 {
+		params := make(map[string]interface{})
+		params["List"] = indexList
+		params["TerminalCode"] = ""
+		result, e := utils.PostEdbLib(params, "usda_fas/handle/excel_data")
+		if e != nil {
+			err = fmt.Errorf("sheet :%s PostEdbLib err: %s", sheetName, e.Error())
+			b, _ := json.Marshal(params)
+			utils.FileLog.Info(fmt.Sprintf("sheet :%s PostEdbLib err: %s, params: %s", sheetName, e.Error(), string(b)))
+			return
+		}
+		resp := new(utils.BaseEdbLibResponse)
+		if e := json.Unmarshal(result, &resp); e != nil {
+			err = fmt.Errorf("sheet :%s json.Unmarshal err: %s", sheetName, e)
+			utils.FileLog.Info(fmt.Sprintf("sheet :%s json.Unmarshal err: %s", sheetName, e))
+			return
+		}
+		if resp.Ret != 200 {
+			err = fmt.Errorf("sheet :%s Msg: %s, ErrMsg: %s", sheetName, resp.Msg, resp.ErrMsg)
+			utils.FileLog.Info(fmt.Sprintf("sheet :%s Msg: %s, ErrMsg: %s", sheetName, resp.Msg, resp.ErrMsg))
+			return
+		}
+	}
+	return
+}
+
+// ConvertXlsToXlsx 调用python服务把旧的xls格式转换成xlsx格式
+func ConvertXlsToXlsx(inputFile, outputFile string) (err error) {
+	pythonScript := "./static/convert_xls_to_xlsx.py"
+
+	cmd := exec.Command(utils.PYTHON_PATH, pythonScript, inputFile, outputFile)
+
+	// 创建一个缓冲区来捕获输出
+	var out bytes.Buffer
+	cmd.Stdout = &out
+	cmd.Stderr = os.Stderr // 你仍然可以将错误输出到标准错误
+
+	// 运行命令
+	err = cmd.Run()
+	if err != nil {
+		err = fmt.Errorf("Error running command: %v\n", err)
+		fmt.Printf("Error running command: %v\n", err)
+		return
+	}
+
+	// 检查输出是否包含 "SUCCESS"
+	output := out.String()
+	if strings.TrimSpace(output) == "SUCCESS" {
+		fmt.Println("Conversion completed successfully.")
+	} else {
+		err = fmt.Errorf("Conversion failed: %s", output)
+		fmt.Println("Conversion failed.")
+		// 如果需要,可以打印更详细的错误信息(如果 Python 脚本打印了的话)
+		fmt.Println("Output from Python script:", output)
+	}
+	return
+}

+ 20 - 0
static/convert_xls_to_xlsx.py

@@ -0,0 +1,20 @@
+# convert_xls_to_xlsx.py
+import pandas as pd
+import sys
+
+def convert_xls_to_xlsx(input_file, output_file):
+    try:
+        df = pd.read_excel(input_file, engine='xlrd')
+        df.to_excel(output_file, index=False)
+        print("SUCCESS")  # 打印成功消息
+    except Exception as e:
+        print(f"ERROR: {e}")  # 打印错误消息
+
+if __name__ == "__main__":
+    if len(sys.argv) != 3:
+        print("ERROR: Usage: python convert_xls_to_xlsx.py <input_xls_file> <output_xlsx_file>")
+        sys.exit(1)
+
+    input_file = sys.argv[1]
+    output_file = sys.argv[2]
+    convert_xls_to_xlsx(input_file, output_file)

+ 0 - 0
test.json


+ 127 - 0
utils/common.go

@@ -11,6 +11,7 @@ import (
 	"errors"
 	"fmt"
 	"github.com/mozillazg/go-pinyin"
+	"github.com/shopspring/decimal"
 	"image"
 	"image/png"
 	"io"
@@ -1127,3 +1128,129 @@ func ChineseToPinyinInitials(input string) string {
 
 	return result.String()
 }
+
+// GetFirstLetter 英文单词首字母
+func GetFirstLetter(name string) string {
+	// 定义一个正则表达式,它匹配由字母组成的单词,并捕获每个单词的首字母
+	// 注意:这个正则表达式假设单词由字母组成,并且被非字母字符分隔
+	// 你可能需要根据实际情况调整这个正则表达式
+	re := regexp.MustCompile(`\b[a-zA-Z]`)
+
+	// FindAllString 方法会返回所有匹配的字符串(在这个例子中,是单词的首字母)
+	matches := re.FindAllString(name, -1)
+
+	// 将匹配的首字母连接成一个字符串
+	newStr := strings.Join(matches, "")
+	return strings.ToLower(newStr)
+}
+
+// 修改供应商信息
+func HttpPostNoCookie(url string, reqParam string, headersParams map[string]string) ([]byte, error) {
+
+	var (
+		err error
+	)
+
+	// 准备: HTTP请求
+	reqBody := strings.NewReader(reqParam)
+	httpReq, err := http.NewRequest("POST", url, reqBody)
+	if err != nil {
+		fmt.Printf("NewRequest fail, url: %s, reqBody: %s, err: %v", url, reqBody, err)
+		return nil, err
+	}
+	for k, v := range headersParams {
+		httpReq.Header.Set(k, v)
+	}
+	//httpReq.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36")
+
+	//httpReq.Header.Add("Content-Type", "application/x-www-form-urlencoded;charset=utf-8")
+	client := &http.Client{}
+	// DO: HTTP请求
+	httpRsp, err := client.Do(httpReq)
+	if err != nil {
+		fmt.Printf("do http fail, url: %s, reqBody: %s, err:%v", url, reqBody, err)
+		return nil, err
+	}
+	defer httpRsp.Body.Close()
+
+	if httpRsp.StatusCode != 200 {
+		err = fmt.Errorf("do http fail, url: %s, statusCode: %d, status:%s ", url, httpRsp.StatusCode, httpRsp.Status)
+		return nil, err
+	}
+	// Read: HTTP结果
+	b, err := ioutil.ReadAll(httpRsp.Body)
+	if err != nil {
+		fmt.Printf("ReadAll failed, url: %s, reqBody: %s, err: %v", url, reqBody, err)
+		return nil, err
+	}
+
+	// unmarshal: 解析HTTP返回的结果
+	// 		body: {"Result":{"RequestId":"12131","HasError":true,"ResponseItems":{"ErrorMsg":"错误信息"}}}
+
+	return b, err
+}
+
+func HttpGetNoCookie(url string) ([]byte, error) {
+	res, err := http.Get(url)
+	if err != nil {
+		return nil, err
+	}
+	defer res.Body.Close()
+	return ioutil.ReadAll(res.Body)
+}
+
+// PostEdbLib 调用指标接口
+func PostEdbLib(param map[string]interface{}, method string) (result []byte, err error) {
+	// todo postUrl := EDB_LIB_URL + method
+	postUrl := "http://127.0.0.1:8900/edbapi/" + method
+	postData, err := json.Marshal(param)
+	if err != nil {
+		return
+	}
+	result, err = httpPostEtaLib(postUrl, string(postData), "application/json")
+	if err != nil {
+		return
+	}
+	return
+}
+
+type BaseEdbLibResponse struct {
+	Ret     int
+	Msg     string
+	ErrMsg  string
+	ErrCode string
+	Data    interface{}
+}
+
+func httpPostEtaLib(url, postData string, params ...string) ([]byte, error) {
+	fmt.Println("HttpPost Url:" + url)
+	body := ioutil.NopCloser(strings.NewReader(postData))
+	client := &http.Client{}
+	req, err := http.NewRequest("POST", url, body)
+	if err != nil {
+		return nil, err
+	}
+	contentType := "application/x-www-form-urlencoded;charset=utf-8"
+	if len(params) > 0 && params[0] != "" {
+		contentType = params[0]
+	}
+	req.Header.Set("Content-Type", contentType)
+	// todo req.Header.Set("authorization", MD5(APP_EDB_LIB_NAME_EN+"EDB_LIB_Md5_KEY"))
+	req.Header.Set("authorization", MD5("eta_index_lib"+"2342342341asd"))
+	resp, err := client.Do(req)
+	if err != nil {
+		fmt.Println("client.Do err:" + err.Error())
+		return nil, err
+	}
+	defer resp.Body.Close()
+	b, err := ioutil.ReadAll(resp.Body)
+	if err != nil {
+		fmt.Println("HttpPost:" + string(b))
+	}
+	return b, err
+}
+
+func FloatFormatRound(val float64, num int32) float64 {
+	val, _ = decimal.NewFromFloat(val).Round(num).Float64()
+	return val
+}

+ 7 - 0
utils/config.go

@@ -12,6 +12,7 @@ var (
 	RunMode        string //运行模式
 	MYSQL_URL      string //数据库连接
 	MYSQL_URL_DATA string
+	PYTHON_PATH    string // python可执行文件地址
 )
 
 // 日志配置
@@ -117,10 +118,16 @@ func init() {
 		LogMaxDays, _ = strconv.Atoi(logMaxDaysStr)
 	}
 
+	PYTHON_PATH = config["python_path"]
+	if PYTHON_PATH == "" {
+		PYTHON_PATH = "python3"
+	}
+
 	{
 		LY_USERNAME = config["ly_username"]
 		LY_PASSWORD = config["ly_password"]
 		LY_JSON_PATH = config["ly_json_path"]
 		LY_OPEN = config["ly_open"]
+
 	}
 }