Browse Source

优化ccf

hsun 7 months ago
parent
commit
40821f39bd
6 changed files with 40 additions and 15 deletions
  1. 3 3
      go.mod
  2. 2 0
      go.sum
  3. 15 7
      services/base_from_ccf/common.go
  4. 2 2
      services/base_from_ccf/edb.go
  5. 1 1
      services/base_from_ccf/stock.go
  6. 17 2
      utils/config.go

+ 3 - 3
go.mod

@@ -6,6 +6,8 @@ require (
 	github.com/PuerkitoBio/goquery v1.9.2
 	github.com/beego/bee/v2 v2.1.0
 	github.com/beego/beego/v2 v2.1.4
+	github.com/chromedp/cdproto v0.0.0-20240202021202-6d0b6a386732
+	github.com/chromedp/chromedp v0.9.5
 	github.com/gonum/stat v0.0.0-20181125101827-41a0da705a5b
 	github.com/mozillazg/go-pinyin v0.20.0
 	github.com/patrickmn/go-cache v2.1.0+incompatible
@@ -15,14 +17,13 @@ require (
 	github.com/tealeg/xlsx v1.0.5
 	github.com/xuri/excelize/v2 v2.8.0
 	golang.org/x/net v0.24.0
+	golang.org/x/text v0.14.0
 )
 
 require (
 	github.com/andybalholm/cascadia v1.3.2 // indirect
 	github.com/beorn7/perks v1.0.1 // indirect
 	github.com/cespare/xxhash/v2 v2.2.0 // indirect
-	github.com/chromedp/cdproto v0.0.0-20240202021202-6d0b6a386732 // indirect
-	github.com/chromedp/chromedp v0.9.5 // indirect
 	github.com/chromedp/sysutil v1.0.0 // indirect
 	github.com/gobwas/httphead v0.1.0 // indirect
 	github.com/gobwas/pool v0.2.1 // indirect
@@ -54,7 +55,6 @@ require (
 	github.com/xuri/nfp v0.0.0-20230819163627-dc951e3ffe1a // indirect
 	golang.org/x/crypto v0.22.0 // indirect
 	golang.org/x/sys v0.19.0 // indirect
-	golang.org/x/text v0.14.0 // indirect
 	google.golang.org/protobuf v1.30.0 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
 )

+ 2 - 0
go.sum

@@ -58,6 +58,7 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
 github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
 github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
 github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
+github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80 h1:6Yzfa6GP0rIo/kULo2bwGEkFvCePZ3qHDDTC3/J9Swo=
 github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80/go.mod h1:imJHygn/1yfhB7XSJJKlFZKl/J+dCPAknuiaGOshXAs=
 github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
 github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
@@ -71,6 +72,7 @@ github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 h1:RWengNIwukTxcDr9
 github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826/go.mod h1:TaXosZuwdSHYgviHp1DAtfrULt5eUgsSMsZf+YrPgl8=
 github.com/mozillazg/go-pinyin v0.20.0 h1:BtR3DsxpApHfKReaPO1fCqF4pThRwH9uwvXzm+GnMFQ=
 github.com/mozillazg/go-pinyin v0.20.0/go.mod h1:iR4EnMMRXkfpFVV5FMi4FNB6wGq9NV6uDWbUuPhP4Yc=
+github.com/orisano/pixelmatch v0.0.0-20220722002657-fb0b55479cde h1:x0TT0RDC7UhAVbbWWBzr41ElhJx5tXPWkIHA2HWPRuw=
 github.com/orisano/pixelmatch v0.0.0-20220722002657-fb0b55479cde/go.mod h1:nZgzbfBr3hhjoZnS66nKrHmduYNpc34ny7RK4z5/HM0=
 github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaRUnok+kx1WdO15EQc=
 github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTKqZju1ZixQ/KpMGzzAfe6+WQ=

+ 15 - 7
services/base_from_ccf/common.go

@@ -96,6 +96,15 @@ func fetchPageHtml(baseUrl string, fetchNum int) (respBody []byte, err error) {
 		err = fmt.Errorf("读取cookie文件失败, err: %s", e.Error())
 		return
 	}
+	if strCookie == "" && fetchNum < 2 {
+		fmt.Printf("文件cookie为空, 重新获取, fetchNum: %d\n", fetchNum)
+		utils.FileLog.Info(fmt.Sprintf("文件cookie为空, 重新获取, fetchNum: %d", fetchNum))
+		_, err = getCookieByChrome()
+		if err != nil {
+			return
+		}
+		return fetchPageHtml(baseUrl, fetchNum)
+	}
 
 	// 拉取网站内容
 	cli := new(http.Client)
@@ -419,7 +428,6 @@ func analysisReportHrefs(contents []byte, page int) (hrefs []ReportHrefs, err er
 	return
 }
 
-
 // extractReportPublishTime 提取报告发布时间
 func extractReportPublishTime(text string) (time.Time, error) {
 	datePattern := `(\d{4}年\d{1,2}月\d{1,2}日\d{1,2}:\d{2})`
@@ -741,10 +749,10 @@ func getCookie() (cookieStr string, err error) {
 		return
 	}
 	cookieStr = strings.TrimSpace(string(cookieByte))
-	if cookieStr == "" {
-		err = fmt.Errorf("cookie为空")
-		return
-	}
+	//if cookieStr == "" {
+	//	err = fmt.Errorf("cookie为空")
+	//	return
+	//}
 
 	return
 }
@@ -802,8 +810,8 @@ func getCookieByChrome() (cookieStr string, err error) {
 			for _, v := range cookies {
 				cookieStr = cookieStr + v.Name + "=" + v.Value + ";"
 			}
-			//fmt.Println("header cookie:", cookieStr)
-			//utils.FileLog.Info("header cookie:" + cookieStr)
+			fmt.Println("header cookie:", cookieStr)
+			utils.FileLog.Info("header cookie:" + cookieStr)
 
 			tmpFile, tmpErr := os.Create(utils.CCFCookieFile)
 			if tmpErr != nil {

+ 2 - 2
services/base_from_ccf/edb.go

@@ -55,7 +55,7 @@ func TaskOilDailyEdb(context.Context) (err error) {
 	}
 
 	// 解析前N篇报告
-	readLimit := 7
+	readLimit := utils.CCFDailyFetchNum
 	filePaths, e := savePageHtml(nameKey, fetchRule.PageDir, false, readLimit)
 	if e != nil {
 		err = fmt.Errorf("savePageHtml, err: %v", e)
@@ -125,7 +125,7 @@ func TaskWeeklyEdb(context.Context) (err error) {
 	}()
 
 	taskNames := []string{"PTA周报", "MEG周报", "长丝周报", "短纤周报", "瓶片周报", "切片周报", "PX周报"}
-	readLimit := 3
+	readLimit := utils.CCFWeeklyFetchNum
 	for _, nameKey := range taskNames {
 		fmt.Printf("开始获取: %s\n", nameKey)
 

+ 1 - 1
services/base_from_ccf/stock.go

@@ -31,7 +31,7 @@ func TaskStockTable(context.Context) (err error) {
 	}()
 
 	taskNames := []string{"PTA装置", "MEG装置", "PX装置"}
-	readLimit := 3
+	readLimit := utils.CCFStockFetchNum
 	for _, nameKey := range taskNames {
 		fetchRule, e := loadDataRule(nameKey)
 		if e != nil {

+ 17 - 2
utils/config.go

@@ -82,14 +82,17 @@ var (
 	CCFStockTaskTime  string // CCF数据装置任务时间
 	CCFUseName        string // CCF登录账号
 	CCFPassword       string // CCF登录密码
+	CCFDailyFetchNum  int    // CCF数据日度每次获取报告数量
+	CCFWeeklyFetchNum int    // CCF数据周度每次获取报告数量
+	CCFStockFetchNum  int    // CCF数据装置每次获取报告数量
 )
 
 var (
 	OilchemAccount    string
 	OilchemPassword   string
 	OilchemCookieFile string
-	OilchemOpen string
-	OilchemDataInit string
+	OilchemOpen       string
+	OilchemDataInit   string
 )
 
 var TerminalCode string
@@ -182,6 +185,18 @@ func init() {
 		CCFStockTaskTime = config["ccf_stock_task_time"]
 		CCFUseName = config["ccf_username"]
 		CCFPassword = config["ccf_password"]
+		CCFDailyFetchNum, _ = strconv.Atoi(config["ccf_daily_fetch_num"])
+		if CCFDailyFetchNum <= 0 {
+			CCFDailyFetchNum = 7
+		}
+		CCFWeeklyFetchNum, _ = strconv.Atoi(config["ccf_weekly_fetch_num"])
+		if CCFWeeklyFetchNum <= 0 {
+			CCFWeeklyFetchNum = 3
+		}
+		CCFStockFetchNum, _ = strconv.Atoi(config["ccf_stock_fetch_num"])
+		if CCFStockFetchNum <= 0 {
+			CCFStockFetchNum = 3
+		}
 	}
 
 	// 隆众数据