Browse Source

粮油爬取逻辑调整

gmy 4 months ago
parent
commit
8cee22932b

+ 32 - 4
services/liangyou/commodity_liangyou.go

@@ -1,4 +1,4 @@
-package liangyou
+package main
 
 import (
 	"context"
@@ -23,7 +23,8 @@ var (
 	lyLoginPath = "https://www.fao.com.cn/"
 )
 
-func LyDataDeal(cont context.Context) (err error) {
+// func LyDataDeal(cont context.Context) (err error) {
+func main() {
 
 	// 读取 JSON 文件
 	configFile, err := os.ReadFile(utils.LY_JSON_PATH)
@@ -80,7 +81,7 @@ func LyDataDeal(cont context.Context) (err error) {
 			}
 		}
 	}
-	return nil
+	//return nil
 }
 
 func login(ctx context.Context) error {
@@ -165,10 +166,16 @@ func fetchReportData(ctx context.Context, product, category, report string, keyw
 			break
 		}
 
+		// 校验获取到的url key在数据库是否存在
+		if IsExistInDB(allReportURLMap) {
+			logs.Info("改页报告已存在处理的报告,无需再翻页: %s: %s: %s", product, category, report)
+			break
+		}
+
 		// Click the next page button
 		err = chromedp.Run(ctx,
 			chromedp.Click(`div.my-page-next`, chromedp.ByQuery),
-			chromedp.Sleep(5*time.Second),
+			chromedp.Sleep(10*time.Second),
 			chromedp.Location(&categoryPageURL),
 		)
 		if err != nil {
@@ -264,6 +271,27 @@ func fetchReportData(ctx context.Context, product, category, report string, keyw
 	return nil
 }
 
+func IsExistInDB(urlMap map[string]string) bool {
+	var urlList []string
+	for key, _ := range urlMap {
+		urlList = append(urlList, key)
+	}
+	paramsLib := make(map[string]interface{})
+	paramsLib["UrlList"] = urlList
+	postEdbLib, err := utils.PostEdbLibRequest(paramsLib, utils.CHECK_LY_INDEX_RECORD_IS_EXIST)
+	if err != nil {
+		// 有错误就不继续执行
+		log.Printf("postEdbLib err: %v", err)
+		return false
+	}
+	var requestResponse models.RequestResponse[bool]
+	err = json.Unmarshal(postEdbLib, &requestResponse)
+	if err != nil {
+		return false
+	}
+	return requestResponse.Data
+}
+
 func fillProductPageURL(ctx context.Context, product string, category string) (string, error) {
 	// 选择 dl 标签下所有 a 标签的 XPath
 	selector := `//dl[contains(@class, 'dl_hot')]//a`

+ 1 - 1
services/liangyou/processor_business_logic.go

@@ -1,5 +1,5 @@
 // @Author gmy 2024/8/6 10:50:00
-package liangyou
+package main
 
 import (
 	"context"

+ 1 - 1
services/liangyou/processor_factory.go

@@ -1,5 +1,5 @@
 // @Author gmy 2024/8/6 10:48:00
-package liangyou
+package main
 
 import (
 	"context"

+ 1 - 0
utils/constants.go

@@ -264,6 +264,7 @@ const (
 	GET_LY_INDEX_BY_CODE                              = "/ly/get/ly/index/by/code"                              // 根据指标编码获取指标
 	GET_EDB_INFO_BY_INDEX_CODE                        = "/ly/get/edb/info/by/index/code"                        // 根据指标code获取指标信息
 	ADD_BATCH_LY_EDB_DATA                             = "/ly/add/batch/ly/edb/data"                             // 批量增加粮油指标库数据
+	CHECK_LY_INDEX_RECORD_IS_EXIST                    = "/ly/check/ly/index/record/is/exist"                    // 校验当前页是否有指标读取到指标库
 )
 
 const (