|
@@ -7,8 +7,8 @@ import (
|
|
|
"eta/eta_crawler/utils"
|
|
|
"fmt"
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
|
+ "github.com/tealeg/xlsx"
|
|
|
"io"
|
|
|
- "io/ioutil"
|
|
|
"mime/multipart"
|
|
|
"net/http"
|
|
|
"os"
|
|
@@ -92,7 +92,7 @@ type UsdaFasIndex struct {
|
|
|
// 美国农业部月度供需平衡表数据
|
|
|
func DownloadUsdaPsdData() (indexList []*UsdaFasIndex, err error) {
|
|
|
// 从test.json文件中读取json串
|
|
|
- body, err := ioutil.ReadFile("test.json")
|
|
|
+ /*body, err := ioutil.ReadFile("test.json")
|
|
|
if err != nil {
|
|
|
return
|
|
|
}
|
|
@@ -104,8 +104,8 @@ func DownloadUsdaPsdData() (indexList []*UsdaFasIndex, err error) {
|
|
|
return
|
|
|
}
|
|
|
indexList, err = handleUsdaFasPsd(item)
|
|
|
- return
|
|
|
- /*defer func() {
|
|
|
+ return*/
|
|
|
+ defer func() {
|
|
|
if err != nil {
|
|
|
msg := "失败提醒" + "downloadUsdaPsdData ErrMsg:" + err.Error()
|
|
|
fmt.Println("msg:", msg)
|
|
@@ -178,13 +178,13 @@ func DownloadUsdaPsdData() (indexList []*UsdaFasIndex, err error) {
|
|
|
fmt.Println("json.Unmarshal err:" + err.Error())
|
|
|
return
|
|
|
}
|
|
|
- indexList, err = handleUsdaFasPsd(item)*/
|
|
|
+ indexList, err = handleUsdaFasPsd(item)
|
|
|
return
|
|
|
}
|
|
|
|
|
|
// 美国农业出库销售数据
|
|
|
-func DownloadUsdaFmsData() {
|
|
|
- var err error
|
|
|
+func DownloadUsdaFmsData() (indexList []*UsdaFasIndex, err error) {
|
|
|
+ // todo 设置下载频率, 如果有正在处理中的,则暂停下载
|
|
|
defer func() {
|
|
|
if err != nil {
|
|
|
msg := "失败提醒" + "DownloadUsdaFmsData ErrMsg:" + err.Error()
|
|
@@ -249,6 +249,7 @@ func DownloadUsdaFmsData() {
|
|
|
return
|
|
|
}
|
|
|
}
|
|
|
+ // todo 下载的日期
|
|
|
startDate := "08/22/2019"
|
|
|
endDate := "08/22/2024"
|
|
|
if err = multipartWriter.WriteField("ctl00$MainContent$lbCountry", "0:0"); err != nil {
|
|
@@ -319,8 +320,136 @@ func DownloadUsdaFmsData() {
|
|
|
if err != nil {
|
|
|
return
|
|
|
}
|
|
|
+ go func() {
|
|
|
+ err = ParseUsdaFmsExcel(downloadFile)
|
|
|
+ fmt.Println("Excel file downloaded successfully")
|
|
|
+ }()
|
|
|
+ return
|
|
|
+}
|
|
|
|
|
|
- fmt.Println("Excel file downloaded successfully")
|
|
|
+func ParseUsdaFmsExcel(path string) (err error) {
|
|
|
+ var xlFile *xlsx.File
|
|
|
+ exist, err := PathExists(path)
|
|
|
+ if err != nil {
|
|
|
+ fmt.Println(err)
|
|
|
+ err = fmt.Errorf("文件地址不存在 err:%s", err.Error())
|
|
|
+ return
|
|
|
+ }
|
|
|
+ if !exist {
|
|
|
+ err = fmt.Errorf("文件地址不存在")
|
|
|
+ return
|
|
|
+ }
|
|
|
+ xlFile, err = xlsx.OpenFile(path)
|
|
|
+ if err != nil {
|
|
|
+ fmt.Println("OpenFile err:", err)
|
|
|
+ err = fmt.Errorf("打开文件失败 err:%s", err.Error())
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+ //解析出表头第7行
|
|
|
+ //拼接指标名称
|
|
|
+ // 指标名称
|
|
|
+ indexMap := make(map[string]*UsdaFasIndex)
|
|
|
+ indexList := make([]*UsdaFasIndex, 0)
|
|
|
+ sort := 0
|
|
|
+ for _, sheet := range xlFile.Sheets {
|
|
|
+ //遍历行读取
|
|
|
+ maxCol := sheet.MaxCol
|
|
|
+ for i := 0; i < maxCol; i++ {
|
|
|
+ if i > 6 {
|
|
|
+ row := sheet.Row(i)
|
|
|
+ cells := row.Cells
|
|
|
+ commodity := ""
|
|
|
+ dateStr := ""
|
|
|
+ country := ""
|
|
|
+ dataVal := ""
|
|
|
+ unit := "Metric Tons"
|
|
|
+ for k, cell := range cells {
|
|
|
+ text := cell.String()
|
|
|
+ kind := ""
|
|
|
+ indexName := ""
|
|
|
+ if k == 1 { // 品种名称Commodity
|
|
|
+ commodity = text
|
|
|
+ } else if k == 2 {
|
|
|
+ dateStr = text
|
|
|
+ } else if k == 4 {
|
|
|
+ country = text
|
|
|
+ } else if k == 5 {
|
|
|
+ kind = "Weekly Exports"
|
|
|
+ } else if k == 6 {
|
|
|
+ kind = "Accum Exports"
|
|
|
+ } else if k == 7 {
|
|
|
+ kind = "Outstanding Sale:CMY"
|
|
|
+ } else if k == 8 {
|
|
|
+ kind = "Gross Sale:CMY"
|
|
|
+ } else if k == 9 {
|
|
|
+ kind = "Net Sale :CMY"
|
|
|
+ } else if k == 10 {
|
|
|
+ kind = "Total Commitment:CMY"
|
|
|
+ } else if k == 11 {
|
|
|
+ kind = "Outstanding Sale:NMY"
|
|
|
+ } else if k == 12 {
|
|
|
+ kind = "Net Sale :NMY"
|
|
|
+ }
|
|
|
+ if k > 4 && k < 13 {
|
|
|
+ // 处理日期
|
|
|
+ fmt.Println(dateStr)
|
|
|
+ fmt.Println(unit)
|
|
|
+ timeT, e := time.ParseInLocation("01\\/02\\/2006", dateStr, time.Local)
|
|
|
+ if e != nil {
|
|
|
+ utils.FileLog.Info("日期格式转换失败 err:%s", e.Error())
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ date := timeT.Format(utils.FormatDate)
|
|
|
+ dataVal = text
|
|
|
+ indexName = fmt.Sprintf("%s: %s: %s", commodity, country, kind)
|
|
|
+ inCode := "usda" + utils.GetFirstLetter(indexName)
|
|
|
+ indexItem, okIndex := indexMap[indexName]
|
|
|
+ // 首字母大写
|
|
|
+ classifyName := commodity
|
|
|
+ if !okIndex {
|
|
|
+ // 新增指标
|
|
|
+ indexItem = new(UsdaFasIndex)
|
|
|
+ indexItem.IndexName = indexName
|
|
|
+ indexItem.ClassifyName = classifyName
|
|
|
+ indexItem.ParentClassifyName = "出口销售"
|
|
|
+ indexItem.ClassifySort = 0
|
|
|
+ indexItem.IndexCode = inCode
|
|
|
+ indexItem.Frequency = "周度"
|
|
|
+ indexItem.Sort = sort
|
|
|
+ indexItem.Unit = unit
|
|
|
+ indexItem.ExcelDataMap = make(map[string]string)
|
|
|
+ sort++
|
|
|
+ }
|
|
|
+ val, e := strconv.ParseFloat(dataVal, 64)
|
|
|
+ if e != nil {
|
|
|
+ utils.FileLog.Info("数据转换失败 err:%s", e.Error())
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ indexItem.ExcelDataMap[date] = fmt.Sprintf("%.4f", val)
|
|
|
+ indexMap[indexName] = indexItem
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ for _, v := range indexMap {
|
|
|
+ fmt.Printf("IndexName: %s \n", v.IndexName)
|
|
|
+ fmt.Printf("IndexCode: %s \n", v.IndexCode)
|
|
|
+ indexList = append(indexList, v)
|
|
|
+ if len(indexList) > 500 {
|
|
|
+ err = addUsdaFasPsdData(indexList)
|
|
|
+ if err != nil {
|
|
|
+ return
|
|
|
+ }
|
|
|
+ indexList = []*UsdaFasIndex{}
|
|
|
+ }
|
|
|
+ }
|
|
|
+ err = addUsdaFasPsdData(indexList)
|
|
|
+ if err != nil {
|
|
|
+ return
|
|
|
+ }
|
|
|
return
|
|
|
}
|
|
|
|