Browse Source

解析出口销售数据

xyxie 5 months ago
parent
commit
3208f50cea
1 changed files with 137 additions and 8 deletions
  1. 137 8
      services/usda_psd.go

+ 137 - 8
services/usda_psd.go

@@ -7,8 +7,8 @@ import (
 	"eta/eta_crawler/utils"
 	"fmt"
 	"github.com/PuerkitoBio/goquery"
+	"github.com/tealeg/xlsx"
 	"io"
-	"io/ioutil"
 	"mime/multipart"
 	"net/http"
 	"os"
@@ -92,7 +92,7 @@ type UsdaFasIndex struct {
 // 美国农业部月度供需平衡表数据
 func DownloadUsdaPsdData() (indexList []*UsdaFasIndex, err error) {
 	// 从test.json文件中读取json串
-	body, err := ioutil.ReadFile("test.json")
+	/*body, err := ioutil.ReadFile("test.json")
 	if err != nil {
 		return
 	}
@@ -104,8 +104,8 @@ func DownloadUsdaPsdData() (indexList []*UsdaFasIndex, err error) {
 		return
 	}
 	indexList, err = handleUsdaFasPsd(item)
-	return
-	/*defer func() {
+	return*/
+	defer func() {
 		if err != nil {
 			msg := "失败提醒" + "downloadUsdaPsdData ErrMsg:" + err.Error()
 			fmt.Println("msg:", msg)
@@ -178,13 +178,13 @@ func DownloadUsdaPsdData() (indexList []*UsdaFasIndex, err error) {
 		fmt.Println("json.Unmarshal err:" + err.Error())
 		return
 	}
-	indexList, err = handleUsdaFasPsd(item)*/
+	indexList, err = handleUsdaFasPsd(item)
 	return
 }
 
 // 美国农业出库销售数据
-func DownloadUsdaFmsData() {
-	var err error
+func DownloadUsdaFmsData() (indexList []*UsdaFasIndex, err error) {
+	// todo 设置下载频率, 如果有正在处理中的,则暂停下载
 	defer func() {
 		if err != nil {
 			msg := "失败提醒" + "DownloadUsdaFmsData ErrMsg:" + err.Error()
@@ -249,6 +249,7 @@ func DownloadUsdaFmsData() {
 			return
 		}
 	}
+	// todo 下载的日期
 	startDate := "08/22/2019"
 	endDate := "08/22/2024"
 	if err = multipartWriter.WriteField("ctl00$MainContent$lbCountry", "0:0"); err != nil {
@@ -319,8 +320,136 @@ func DownloadUsdaFmsData() {
 	if err != nil {
 		return
 	}
+	go func() {
+		err = ParseUsdaFmsExcel(downloadFile)
+		fmt.Println("Excel file downloaded successfully")
+	}()
+	return
+}
 
-	fmt.Println("Excel file downloaded successfully")
+func ParseUsdaFmsExcel(path string) (err error) {
+	var xlFile *xlsx.File
+	exist, err := PathExists(path)
+	if err != nil {
+		fmt.Println(err)
+		err = fmt.Errorf("文件地址不存在 err:%s", err.Error())
+		return
+	}
+	if !exist {
+		err = fmt.Errorf("文件地址不存在")
+		return
+	}
+	xlFile, err = xlsx.OpenFile(path)
+	if err != nil {
+		fmt.Println("OpenFile err:", err)
+		err = fmt.Errorf("打开文件失败 err:%s", err.Error())
+		return
+	}
+
+	//解析出表头第7行
+	//拼接指标名称
+	// 指标名称
+	indexMap := make(map[string]*UsdaFasIndex)
+	indexList := make([]*UsdaFasIndex, 0)
+	sort := 0
+	for _, sheet := range xlFile.Sheets {
+		//遍历行读取
+		maxCol := sheet.MaxCol
+		for i := 0; i < maxCol; i++ {
+			if i > 6 {
+				row := sheet.Row(i)
+				cells := row.Cells
+				commodity := ""
+				dateStr := ""
+				country := ""
+				dataVal := ""
+				unit := "Metric Tons"
+				for k, cell := range cells {
+					text := cell.String()
+					kind := ""
+					indexName := ""
+					if k == 1 { // 品种名称Commodity
+						commodity = text
+					} else if k == 2 {
+						dateStr = text
+					} else if k == 4 {
+						country = text
+					} else if k == 5 {
+						kind = "Weekly  Exports"
+					} else if k == 6 {
+						kind = "Accum  Exports"
+					} else if k == 7 {
+						kind = "Outstanding Sale:CMY"
+					} else if k == 8 {
+						kind = "Gross Sale:CMY"
+					} else if k == 9 {
+						kind = "Net Sale :CMY"
+					} else if k == 10 {
+						kind = "Total Commitment:CMY"
+					} else if k == 11 {
+						kind = "Outstanding Sale:NMY"
+					} else if k == 12 {
+						kind = "Net Sale :NMY"
+					}
+					if k > 4 && k < 13 {
+						// 处理日期
+						fmt.Println(dateStr)
+						fmt.Println(unit)
+						timeT, e := time.ParseInLocation("01\\/02\\/2006", dateStr, time.Local)
+						if e != nil {
+							utils.FileLog.Info("日期格式转换失败 err:%s", e.Error())
+							continue
+						}
+						date := timeT.Format(utils.FormatDate)
+						dataVal = text
+						indexName = fmt.Sprintf("%s: %s: %s", commodity, country, kind)
+						inCode := "usda" + utils.GetFirstLetter(indexName)
+						indexItem, okIndex := indexMap[indexName]
+						// 首字母大写
+						classifyName := commodity
+						if !okIndex {
+							// 新增指标
+							indexItem = new(UsdaFasIndex)
+							indexItem.IndexName = indexName
+							indexItem.ClassifyName = classifyName
+							indexItem.ParentClassifyName = "出口销售"
+							indexItem.ClassifySort = 0
+							indexItem.IndexCode = inCode
+							indexItem.Frequency = "周度"
+							indexItem.Sort = sort
+							indexItem.Unit = unit
+							indexItem.ExcelDataMap = make(map[string]string)
+							sort++
+						}
+						val, e := strconv.ParseFloat(dataVal, 64)
+						if e != nil {
+							utils.FileLog.Info("数据转换失败 err:%s", e.Error())
+							continue
+						}
+						indexItem.ExcelDataMap[date] = fmt.Sprintf("%.4f", val)
+						indexMap[indexName] = indexItem
+					}
+				}
+			}
+		}
+	}
+
+	for _, v := range indexMap {
+		fmt.Printf("IndexName: %s \n", v.IndexName)
+		fmt.Printf("IndexCode: %s \n", v.IndexCode)
+		indexList = append(indexList, v)
+		if len(indexList) > 500 {
+			err = addUsdaFasPsdData(indexList)
+			if err != nil {
+				return
+			}
+			indexList = []*UsdaFasIndex{}
+		}
+	}
+	err = addUsdaFasPsdData(indexList)
+	if err != nil {
+		return
+	}
 	return
 }