|
@@ -2,16 +2,18 @@ package services
|
|
|
|
|
|
import (
|
|
|
"bytes"
|
|
|
+ "context"
|
|
|
"encoding/json"
|
|
|
"eta/eta_crawler/services/alarm_msg"
|
|
|
"eta/eta_crawler/utils"
|
|
|
"fmt"
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
|
- "github.com/tealeg/xlsx"
|
|
|
+ "github.com/xuri/excelize/v2"
|
|
|
"io"
|
|
|
"mime/multipart"
|
|
|
"net/http"
|
|
|
"os"
|
|
|
+ "os/exec"
|
|
|
"strconv"
|
|
|
"strings"
|
|
|
"time"
|
|
@@ -67,6 +69,21 @@ type UsdaFasIndex struct {
|
|
|
ExcelDataMap map[string]string
|
|
|
}
|
|
|
|
|
|
+func DownloadUsdaPsdDataTask(cont context.Context) (err error) {
|
|
|
+
|
|
|
+
|
|
|
+ err = DownloadUsdaPsdData()
|
|
|
+ return
|
|
|
+}
|
|
|
+
|
|
|
+func DownloadUsdaFmsDataTask(cont context.Context) (err error) {
|
|
|
+
|
|
|
+ startDate := time.Now().AddDate(0, -1, 0).Format("01/02/2006")
|
|
|
+ endDate := time.Now().Format("01/02/2006")
|
|
|
+ err = DownloadUsdaFmsData(startDate, endDate)
|
|
|
+ return
|
|
|
+}
|
|
|
+
|
|
|
|
|
|
|
|
|
|
|
@@ -90,7 +107,7 @@ type UsdaFasIndex struct {
|
|
|
|
|
|
|
|
|
|
|
|
-func DownloadUsdaPsdData() (indexList []*UsdaFasIndex, err error) {
|
|
|
+func DownloadUsdaPsdData() (err error) {
|
|
|
|
|
|
|
|
|
if err != nil {
|
|
@@ -178,12 +195,14 @@ func DownloadUsdaPsdData() (indexList []*UsdaFasIndex, err error) {
|
|
|
fmt.Println("json.Unmarshal err:" + err.Error())
|
|
|
return
|
|
|
}
|
|
|
- indexList, err = handleUsdaFasPsd(item)
|
|
|
+ go func() {
|
|
|
+ err = handleUsdaFasPsd(item)
|
|
|
+ }()
|
|
|
return
|
|
|
}
|
|
|
|
|
|
|
|
|
-func DownloadUsdaFmsData() (indexList []*UsdaFasIndex, err error) {
|
|
|
+func DownloadUsdaFmsData(startDate, endDate string) (err error) {
|
|
|
|
|
|
defer func() {
|
|
|
if err != nil {
|
|
@@ -193,7 +212,7 @@ func DownloadUsdaFmsData() (indexList []*UsdaFasIndex, err error) {
|
|
|
go alarm_msg.SendAlarmMsg(msg, 3)
|
|
|
}
|
|
|
}()
|
|
|
- downloadFile := "downloaded_excel.xlsx"
|
|
|
+ downloadFile := fmt.Sprintf("./static/usda_fms_excel_%s.xls", time.Now().Format(utils.FormatDate))
|
|
|
|
|
|
dataUrl := "https://apps.fas.usda.gov/esrquery/esrq.aspx"
|
|
|
body1, err := utils.HttpGetNoCookie(dataUrl)
|
|
@@ -249,9 +268,6 @@ func DownloadUsdaFmsData() (indexList []*UsdaFasIndex, err error) {
|
|
|
return
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
- startDate := "08/22/2019"
|
|
|
- endDate := "08/22/2024"
|
|
|
if err = multipartWriter.WriteField("ctl00$MainContent$lbCountry", "0:0"); err != nil {
|
|
|
err = fmt.Errorf("set ctl00$MainContent$lbCountry, Err:%s", err)
|
|
|
return
|
|
@@ -313,22 +329,57 @@ func DownloadUsdaFmsData() (indexList []*UsdaFasIndex, err error) {
|
|
|
if err != nil {
|
|
|
return
|
|
|
}
|
|
|
- defer out.Close()
|
|
|
|
|
|
|
|
|
_, err = io.Copy(out, resp.Body)
|
|
|
if err != nil {
|
|
|
return
|
|
|
}
|
|
|
+
|
|
|
+ err = out.Close()
|
|
|
+ if err != nil {
|
|
|
+ err = fmt.Errorf("Failed to close temporary file: %v", err)
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ downloadFileXlsx := downloadFile + "x"
|
|
|
+ err = ConvertXlsToXlsx(downloadFile, downloadFileXlsx)
|
|
|
+ if err != nil {
|
|
|
+ err = fmt.Errorf("文件格式转换失败 convert excel, Err:%w", err)
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+ done := make(chan error)
|
|
|
go func() {
|
|
|
- err = ParseUsdaFmsExcel(downloadFile)
|
|
|
- fmt.Println("Excel file downloaded successfully")
|
|
|
+ done <- ParseUsdaFmsExcel(downloadFileXlsx)
|
|
|
}()
|
|
|
+
|
|
|
+ select {
|
|
|
+ case err = <-done:
|
|
|
+ if err != nil {
|
|
|
+ err = fmt.Errorf("parse excel, Err:%w", err)
|
|
|
+ return
|
|
|
+ }
|
|
|
+ case <-time.After(20 * time.Minute):
|
|
|
+ err = fmt.Errorf("parse excel timed out")
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+ fmt.Println("Excel file downloaded successfully")
|
|
|
return
|
|
|
}
|
|
|
|
|
|
func ParseUsdaFmsExcel(path string) (err error) {
|
|
|
- var xlFile *xlsx.File
|
|
|
+ defer func() {
|
|
|
+ if err != nil {
|
|
|
+ msg := "失败提醒" + "DownloadUsdaFmsData_ParseUsdaFmsExcel ErrMsg:" + err.Error()
|
|
|
+ fmt.Println("msg:", msg)
|
|
|
+ utils.FileLog.Info(msg)
|
|
|
+ go alarm_msg.SendAlarmMsg(msg, 3)
|
|
|
+ }
|
|
|
+ }()
|
|
|
+
|
|
|
exist, err := PathExists(path)
|
|
|
if err != nil {
|
|
|
fmt.Println(err)
|
|
@@ -339,121 +390,141 @@ func ParseUsdaFmsExcel(path string) (err error) {
|
|
|
err = fmt.Errorf("文件地址不存在")
|
|
|
return
|
|
|
}
|
|
|
- xlFile, err = xlsx.OpenFile(path)
|
|
|
+
|
|
|
+ xlFile, err := excelize.OpenFile(path)
|
|
|
if err != nil {
|
|
|
fmt.Println("OpenFile err:", err)
|
|
|
err = fmt.Errorf("打开文件失败 err:%s", err.Error())
|
|
|
return
|
|
|
}
|
|
|
-
|
|
|
+ defer func() {
|
|
|
+
|
|
|
+ if err = xlFile.Close(); err != nil {
|
|
|
+ fmt.Println(err)
|
|
|
+ }
|
|
|
+ os.Remove(path)
|
|
|
+ }()
|
|
|
+ sheetName := xlFile.GetSheetName(0)
|
|
|
+ fmt.Println("Sheet Name:", sheetName)
|
|
|
|
|
|
|
|
|
|
|
|
indexMap := make(map[string]*UsdaFasIndex)
|
|
|
indexList := make([]*UsdaFasIndex, 0)
|
|
|
sort := 0
|
|
|
- for _, sheet := range xlFile.Sheets {
|
|
|
-
|
|
|
- maxCol := sheet.MaxCol
|
|
|
- for i := 0; i < maxCol; i++ {
|
|
|
- if i > 6 {
|
|
|
- row := sheet.Row(i)
|
|
|
- cells := row.Cells
|
|
|
- commodity := ""
|
|
|
- dateStr := ""
|
|
|
- country := ""
|
|
|
- dataVal := ""
|
|
|
- unit := "Metric Tons"
|
|
|
- for k, cell := range cells {
|
|
|
- text := cell.String()
|
|
|
- kind := ""
|
|
|
- indexName := ""
|
|
|
- if k == 1 {
|
|
|
- commodity = text
|
|
|
- } else if k == 2 {
|
|
|
- dateStr = text
|
|
|
- } else if k == 4 {
|
|
|
- country = text
|
|
|
- } else if k == 5 {
|
|
|
- kind = "Weekly Exports"
|
|
|
- } else if k == 6 {
|
|
|
- kind = "Accum Exports"
|
|
|
- } else if k == 7 {
|
|
|
- kind = "Outstanding Sale:CMY"
|
|
|
- } else if k == 8 {
|
|
|
- kind = "Gross Sale:CMY"
|
|
|
- } else if k == 9 {
|
|
|
- kind = "Net Sale :CMY"
|
|
|
- } else if k == 10 {
|
|
|
- kind = "Total Commitment:CMY"
|
|
|
- } else if k == 11 {
|
|
|
- kind = "Outstanding Sale:NMY"
|
|
|
- } else if k == 12 {
|
|
|
- kind = "Net Sale :NMY"
|
|
|
+ rows, err := xlFile.GetRows(sheetName)
|
|
|
+
|
|
|
+
|
|
|
+ for i, row := range rows {
|
|
|
+ if i > 6 {
|
|
|
+ commodity := ""
|
|
|
+ dateStr := ""
|
|
|
+ country := ""
|
|
|
+ dataVal := ""
|
|
|
+ unit := "Metric Tons"
|
|
|
+ for k, text := range row {
|
|
|
+ fmt.Println("第", i, "行,第", k, "列,内容:", text)
|
|
|
+ kind := ""
|
|
|
+ indexName := ""
|
|
|
+ if k == 1 {
|
|
|
+ commodity = text
|
|
|
+ } else if k == 2 {
|
|
|
+ dateStr = text
|
|
|
+ } else if k == 4 {
|
|
|
+ country = text
|
|
|
+ } else if k == 5 {
|
|
|
+ kind = "Weekly Exports"
|
|
|
+ } else if k == 6 {
|
|
|
+ kind = "Accum Exports"
|
|
|
+ } else if k == 7 {
|
|
|
+ kind = "Outstanding Sale:CMY"
|
|
|
+ } else if k == 8 {
|
|
|
+ kind = "Gross Sale:CMY"
|
|
|
+ } else if k == 9 {
|
|
|
+ kind = "Net Sale :CMY"
|
|
|
+ } else if k == 10 {
|
|
|
+ kind = "Total Commitment:CMY"
|
|
|
+ } else if k == 11 {
|
|
|
+ kind = "Outstanding Sale:NMY"
|
|
|
+ } else if k == 12 {
|
|
|
+ kind = "Net Sale :NMY"
|
|
|
+ }
|
|
|
+ if k > 4 && k < 13 {
|
|
|
+
|
|
|
+ fmt.Println(dateStr)
|
|
|
+ fmt.Println(unit)
|
|
|
+ timeT, e := time.ParseInLocation(utils.FormatDateTime, dateStr, time.Local)
|
|
|
+ if e != nil {
|
|
|
+ utils.FileLog.Info("日期格式转换失败 err:%s", e.Error())
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ date := timeT.Format(utils.FormatDate)
|
|
|
+ dataVal = text
|
|
|
+ indexName = fmt.Sprintf("%s: %s: %s", commodity, country, kind)
|
|
|
+ inCode := "usda" + utils.GetFirstLetter(indexName)
|
|
|
+ indexItem, okIndex := indexMap[indexName]
|
|
|
+
|
|
|
+ classifyName := commodity
|
|
|
+ if !okIndex {
|
|
|
+
|
|
|
+ indexItem = new(UsdaFasIndex)
|
|
|
+ indexItem.IndexName = indexName
|
|
|
+ indexItem.ClassifyName = classifyName
|
|
|
+ indexItem.ParentClassifyName = "出口销售"
|
|
|
+ indexItem.ClassifySort = 0
|
|
|
+ indexItem.IndexCode = inCode
|
|
|
+ indexItem.Frequency = "周度"
|
|
|
+ indexItem.Sort = sort
|
|
|
+ indexItem.Unit = unit
|
|
|
+ indexItem.ExcelDataMap = make(map[string]string)
|
|
|
+ sort++
|
|
|
}
|
|
|
- if k > 4 && k < 13 {
|
|
|
-
|
|
|
- fmt.Println(dateStr)
|
|
|
- fmt.Println(unit)
|
|
|
- timeT, e := time.ParseInLocation("01\\/02\\/2006", dateStr, time.Local)
|
|
|
- if e != nil {
|
|
|
- utils.FileLog.Info("日期格式转换失败 err:%s", e.Error())
|
|
|
- continue
|
|
|
- }
|
|
|
- date := timeT.Format(utils.FormatDate)
|
|
|
- dataVal = text
|
|
|
- indexName = fmt.Sprintf("%s: %s: %s", commodity, country, kind)
|
|
|
- inCode := "usda" + utils.GetFirstLetter(indexName)
|
|
|
- indexItem, okIndex := indexMap[indexName]
|
|
|
-
|
|
|
- classifyName := commodity
|
|
|
- if !okIndex {
|
|
|
-
|
|
|
- indexItem = new(UsdaFasIndex)
|
|
|
- indexItem.IndexName = indexName
|
|
|
- indexItem.ClassifyName = classifyName
|
|
|
- indexItem.ParentClassifyName = "出口销售"
|
|
|
- indexItem.ClassifySort = 0
|
|
|
- indexItem.IndexCode = inCode
|
|
|
- indexItem.Frequency = "周度"
|
|
|
- indexItem.Sort = sort
|
|
|
- indexItem.Unit = unit
|
|
|
- indexItem.ExcelDataMap = make(map[string]string)
|
|
|
- sort++
|
|
|
- }
|
|
|
- val, e := strconv.ParseFloat(dataVal, 64)
|
|
|
- if e != nil {
|
|
|
- utils.FileLog.Info("数据转换失败 err:%s", e.Error())
|
|
|
- continue
|
|
|
- }
|
|
|
- indexItem.ExcelDataMap[date] = fmt.Sprintf("%.4f", val)
|
|
|
- indexMap[indexName] = indexItem
|
|
|
+ if strings.Contains(dataVal, ",") {
|
|
|
+ dataVal = strings.ReplaceAll(dataVal, ",", "")
|
|
|
+ }
|
|
|
+ val, e := strconv.ParseFloat(dataVal, 64)
|
|
|
+ if e != nil {
|
|
|
+ utils.FileLog.Info("数据转换失败 err:%s", e.Error())
|
|
|
+ continue
|
|
|
}
|
|
|
+ indexItem.ExcelDataMap[date] = fmt.Sprintf("%.4f", val)
|
|
|
+ indexMap[indexName] = indexItem
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
|
|
|
for _, v := range indexMap {
|
|
|
fmt.Printf("IndexName: %s \n", v.IndexName)
|
|
|
fmt.Printf("IndexCode: %s \n", v.IndexCode)
|
|
|
indexList = append(indexList, v)
|
|
|
if len(indexList) > 500 {
|
|
|
- err = addUsdaFasPsdData(indexList)
|
|
|
+ err = addUsdaFasPsdData(indexList, "出口销售")
|
|
|
if err != nil {
|
|
|
return
|
|
|
}
|
|
|
indexList = []*UsdaFasIndex{}
|
|
|
}
|
|
|
}
|
|
|
- err = addUsdaFasPsdData(indexList)
|
|
|
+ err = addUsdaFasPsdData(indexList, "出口销售")
|
|
|
if err != nil {
|
|
|
return
|
|
|
}
|
|
|
return
|
|
|
}
|
|
|
|
|
|
-func handleUsdaFasPsd(item *UsdaPsdData) (indexList []*UsdaFasIndex, err error) {
|
|
|
+func handleUsdaFasPsd(item *UsdaPsdData) (err error) {
|
|
|
+
|
|
|
+ defer func() {
|
|
|
+ if err != nil {
|
|
|
+ msg := "失败提醒" + "downloadUsdaPsdData_handleUsdaFasPsd ErrMsg:" + err.Error()
|
|
|
+ fmt.Println("msg:", msg)
|
|
|
+ utils.FileLog.Info(msg)
|
|
|
+ go alarm_msg.SendAlarmMsg(msg, 3)
|
|
|
+ }
|
|
|
+ }()
|
|
|
+
|
|
|
|
|
|
headerSlice := make([]string, 0)
|
|
|
for index, v := range item.TableHeaders {
|
|
@@ -467,9 +538,6 @@ func handleUsdaFasPsd(item *UsdaPsdData) (indexList []*UsdaFasIndex, err error)
|
|
|
}
|
|
|
headerSlice = append(headerSlice, v)
|
|
|
}
|
|
|
-
|
|
|
-
|
|
|
- indexList = make([]*UsdaFasIndex, 0)
|
|
|
sort := 0
|
|
|
|
|
|
indexMap := make(map[string]*UsdaFasIndex)
|
|
@@ -561,28 +629,27 @@ func handleUsdaFasPsd(item *UsdaPsdData) (indexList []*UsdaFasIndex, err error)
|
|
|
}
|
|
|
|
|
|
}
|
|
|
-
|
|
|
+ indexList := make([]*UsdaFasIndex, 0)
|
|
|
for _, v := range indexMap {
|
|
|
fmt.Printf("IndexName: %s \n", v.IndexName)
|
|
|
fmt.Printf("IndexCode: %s \n", v.IndexCode)
|
|
|
indexList = append(indexList, v)
|
|
|
if len(indexList) > 500 {
|
|
|
- err = addUsdaFasPsdData(indexList)
|
|
|
+ err = addUsdaFasPsdData(indexList, "月度供需")
|
|
|
if err != nil {
|
|
|
return
|
|
|
}
|
|
|
indexList = []*UsdaFasIndex{}
|
|
|
}
|
|
|
}
|
|
|
- err = addUsdaFasPsdData(indexList)
|
|
|
+ err = addUsdaFasPsdData(indexList, "月度供需")
|
|
|
if err != nil {
|
|
|
return
|
|
|
}
|
|
|
return
|
|
|
}
|
|
|
|
|
|
-func addUsdaFasPsdData(indexList []*UsdaFasIndex) (err error) {
|
|
|
- sheetName := "月度供需"
|
|
|
+func addUsdaFasPsdData(indexList []*UsdaFasIndex, sheetName string) (err error) {
|
|
|
if len(indexList) > 0 {
|
|
|
params := make(map[string]interface{})
|
|
|
params["List"] = indexList
|
|
@@ -608,3 +675,35 @@ func addUsdaFasPsdData(indexList []*UsdaFasIndex) (err error) {
|
|
|
}
|
|
|
return
|
|
|
}
|
|
|
+
|
|
|
+
|
|
|
+func ConvertXlsToXlsx(inputFile, outputFile string) (err error) {
|
|
|
+ pythonScript := "./static/convert_xls_to_xlsx.py"
|
|
|
+
|
|
|
+ cmd := exec.Command(utils.PYTHON_PATH, pythonScript, inputFile, outputFile)
|
|
|
+
|
|
|
+
|
|
|
+ var out bytes.Buffer
|
|
|
+ cmd.Stdout = &out
|
|
|
+ cmd.Stderr = os.Stderr
|
|
|
+
|
|
|
+
|
|
|
+ err = cmd.Run()
|
|
|
+ if err != nil {
|
|
|
+ err = fmt.Errorf("Error running command: %v\n", err)
|
|
|
+ fmt.Printf("Error running command: %v\n", err)
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ output := out.String()
|
|
|
+ if strings.TrimSpace(output) == "SUCCESS" {
|
|
|
+ fmt.Println("Conversion completed successfully.")
|
|
|
+ } else {
|
|
|
+ err = fmt.Errorf("Conversion failed: %s", output)
|
|
|
+ fmt.Println("Conversion failed.")
|
|
|
+
|
|
|
+ fmt.Println("Output from Python script:", output)
|
|
|
+ }
|
|
|
+ return
|
|
|
+}
|