Browse Source

睿姿得爬取逻辑调整

gmy 4 months ago
parent
commit
1869d2e132
2 changed files with 58 additions and 103 deletions
  1. 54 101
      services/ruizide/data_processor.go
  2. 4 2
      services/ruizide/processor_business_logic.go

+ 54 - 101
services/ruizide/data_processor.go

@@ -12,7 +12,6 @@ import (
 	"log"
 	"os"
 	"path/filepath"
-	"strings"
 	"time"
 
 	"github.com/chromedp/chromedp"
@@ -87,58 +86,6 @@ func downloadData(ctx context.Context) error {
 		return err
 	}
 
-	// Cube Dashboards: Supply Revision Analysis
-	if err := chromedp.Run(ctx,
-		chromedp.WaitVisible(`div.d-none.d-lg-flex.flex-grow-1`, chromedp.ByQuery),
-		chromedp.Click(clientsCubeDashboardsLink, chromedp.ByQuery),
-		chromedp.Sleep(5*time.Second),
-		chromedp.WaitVisible(`div.ais-Hits`, chromedp.ByQuery),
-		chromedp.ActionFunc(func(ctx context.Context) error {
-			var elements []string
-
-			// 获取所有 h5 标签的文本内容
-			if err := chromedp.Evaluate(`Array.from(document.querySelectorAll('div.ais-Hits li h5.text-body.overflow-hidden.mb-1.mr-3.font-weight-bold.line-height-1.dashboards-hit__name')).map(h => h.textContent)`, &elements).Do(ctx); err != nil {
-				return err
-			}
-
-			// 遍历文本,查找完全匹配的元素并点击
-			for i, text := range elements {
-				if strings.Contains(text, "Supply Revision Analysis") {
-					// 构造选择器,点击找到的匹配元素
-					selector := fmt.Sprintf(`div.ais-Hits ol li:nth-child(%d) h5.text-body.overflow-hidden.mb-1.mr-3.font-weight-bold.line-height-1.dashboards-hit__name`, i+2)
-					if err := chromedp.Click(selector, chromedp.ByQuery).Do(ctx); err != nil {
-						return fmt.Errorf("点击 'Supply Revision Analysis' 失败: %v", err)
-					}
-					break // 找到后跳出循环
-				}
-			}
-
-			return nil
-		}),
-	); err != nil {
-		return err
-	}
-	if err := clickDownload(ctx); err != nil {
-		return err
-	}
-	if err := waitAndRenameDownloadedFile("Supply_Revision_Analysis_2020.xlsx", downloadDir); err != nil {
-		return err
-	}
-
-	// Oil Supply Analysis
-	if err := chromedp.Run(ctx,
-		chromedp.Click(`a[href="/clients/subscription/"]`, chromedp.ByQuery),
-		chromedp.Click(oilSupplyAnalysisSelector, chromedp.ByQuery),
-	); err != nil {
-		return err
-	}
-	if err := clickDownload(ctx); err != nil {
-		return err
-	}
-	if err := waitAndRenameDownloadedFile("Oil_Supply_Analysis_2010.xlsx", downloadDir); err != nil {
-		return err
-	}
-
 	return nil
 }
 
@@ -266,15 +213,19 @@ func resolverNet() {
 	}
 
 	fmt.Println("数据下载完成")
+
+	// 解析表格 读取数据
+	fileResolver()
 }
 
 // 解析本地文件
-// func fileResolver() {
-func main() {
+func fileResolver() {
+	//func main() {
 	var tableNameList = []string{
-		//"Oil_Demand_Signals_Weekly_Report",
+		"Oil_Demand_Signals_Weekly_Report",
+		"Oil_Supply_Analysis",
 		"Supply_Revision_Analysis",
-		/*"Oil_Market_Cube_Upstream_Supply_Oil_Quality_Api",
+		"Oil_Market_Cube_Upstream_Supply_Oil_Quality_Api",
 		"Oil_Market_Cube_Upstream_Supply_Oil_Quality_Sulphur",
 		"Oil_Market_Cube_Upstream_Supply_Capacity_Capacity",
 		"Oil_Market_Cube_Upstream_Supply_Production",
@@ -290,7 +241,15 @@ func main() {
 		"Oil_Market_Cube_Products_Demand_Products_Demand-Sigma",
 		"Oil_Market_Cube_Balances_Total_Liquids_Balances",
 		"Oil_Market_Cube_Geography_Latitude",
-		"Oil_Market_Cube_Geography_Longitude",*/
+		"Oil_Market_Cube_Geography_Longitude",
+		"Oil_Demand_Analysis_Product_Detail",
+		"Oil_Demand_Analysis_Region",
+		"Oil_Demand_Analysis_Scenario",
+		"Oil_Demand_Analysis_Continent",
+		"Oil_Demand_Analysis_Country",
+		"Oil_Demand_Analysis_Product_Category",
+		"Oil_Demand_Analysis_Sector_Category",
+		"Oil_Demand_Analysis_Sector_Detail",
 	}
 	for _, tableName := range tableNameList {
 		var fileName string
@@ -306,51 +265,9 @@ func main() {
 
 		// 获取所有工作表
 		sheetNames := f.GetSheetList()
+
 		for _, sheetName := range sheetNames {
 			fmt.Printf("读取工作表: %s\n", sheetName)
-			/*if strings.Contains(sheetName, "Content") {
-				continue
-			}
-			if strings.Contains(sheetName, "Road Index") {
-				continue
-			}
-			if strings.Contains(sheetName, "Road Active Fleet") {
-				continue
-			}
-			if strings.Contains(sheetName, "Aviation Index") {
-				continue
-			}
-			if strings.Contains(sheetName, "Aviation Active Fleet") {
-				continue
-			}
-			if strings.Contains(sheetName, "Demand - Gasoline") {
-				continue
-			}
-			if strings.Contains(sheetName, "Demand - Diesel") {
-				continue
-			}
-			if strings.Contains(sheetName, "Demand - Jet Fuel") {
-				continue
-			}
-			if strings.Contains(sheetName, "Demand - Maritime Bunker") {
-				continue
-			}*/
-
-			if strings.Contains(sheetName, "Chart1") {
-				continue
-			}
-			if strings.Contains(sheetName, "Chart2") {
-				continue
-			}
-			if strings.Contains(sheetName, "Chart3") {
-				continue
-			}
-			if strings.Contains(sheetName, "Chart4") {
-				continue
-			}
-			if strings.Contains(sheetName, "Chart5") {
-				continue
-			}
 
 			// 获取工作表的最大行数
 			maxRow, err := f.GetRows(sheetName) // 直接获取所有行数据
@@ -362,6 +279,42 @@ func main() {
 			// 遍历行并打印内容
 			indexData := []models.BaseFromRzdData{}
 			for rowIndex, rowData := range maxRow {
+
+				// 因为excel文件中的sheet表格不固定 对于 Supply_Revision_Analysis, Oil_Supply_Analysis 文件 手动调整sheet表格顺序
+				if tableName == "Supply_Revision_Analysis" && rowIndex == 0 {
+					if rowData[0] == "YearQuarter" && rowData[1] == "Revision" && rowData[2] == "CountryRevisionGroup" {
+						sheetName = "Chart1"
+					}
+					if rowData[0] == "YearQuarter" && rowData[1] == "Current" && rowData[2] == "Previous" {
+						sheetName = "Chart2"
+					}
+					if rowData[0] == "Year" && rowData[1] == "Revision" && rowData[2] == "CountryRevisionGroup" {
+						sheetName = "Chart3"
+					}
+					if rowData[0] == "Year" && rowData[1] == "Current" && rowData[2] == "Previous" {
+						sheetName = "Chart4"
+					}
+					if rowData[0] == "Previous" && rowData[1] == "Current" && rowData[2] == "YearMonth" {
+						sheetName = "Chart5"
+					}
+					if rowData[0] == "YearMonth" && rowData[1] == "CountryRevisionGroup" && rowData[2] == "Revision" {
+						sheetName = "Chart6"
+					}
+				} else if tableName == "Oil_Supply_Analysis" && rowIndex == 0 {
+					if rowData[0] == "Viz Date" && rowData[1] == "OilAndGasCategory" && rowData[2] == "supply_kbbld" {
+						sheetName = "Chart1"
+					}
+					if rowData[0] == "Viz Date" && rowData[1] == "supply_kbbld" && rowData[2] == "Region" {
+						sheetName = "Chart2"
+					}
+					if rowData[0] == "Viz Date" && rowData[1] == "CapacityDetail" && rowData[2] == "Capacity_kbbld" {
+						sheetName = "Chart3"
+					}
+					if rowData[0] == "Viz Date" && rowData[1] == "Oil Classification Group" && rowData[2] == "supply_kbbld" {
+						sheetName = "Chart4"
+					}
+				}
+
 				processor, err := GetProcessor(tableName, sheetName)
 				if err != nil {
 					continue

+ 4 - 2
services/ruizide/processor_business_logic.go

@@ -1033,7 +1033,8 @@ func (p *SupplyRevisionAnalysisChartFiveProcessor) Process(tableName string, she
 	}
 
 	dataTimeOne := rowData[len(rowData)-1]
-	formatOne, err := utils.ConvertDateFormat5(dataTimeOne)
+	timeSplit := strings.Split(dataTimeOne, "-")
+	formatOne, err := utils.GetLastDayOfMonth(timeSplit[0] + "-" + timeSplit[1])
 	if err != nil {
 		return nil, err
 	}
@@ -1121,7 +1122,8 @@ func (p *SupplyRevisionAnalysisChartSixProcessor) Process(tableName string, shee
 	}
 
 	dataTime := rowData[0]
-	format, err := utils.ConvertDateFormat5(dataTime)
+	timeSplit := strings.Split(dataTime, "-")
+	format, err := utils.GetLastDayOfMonth(timeSplit[0] + "-" + timeSplit[1])
 	if err != nil {
 		return nil, err
 	}