Browse Source

add:新增ccf数据源数据

zqbao 3 months ago
parent
commit
d2bd62d043

+ 3 - 1
.gitignore

@@ -10,4 +10,6 @@ binlog/
 *.exe
 eta_data_analysis
 static/ccf
-ccf_cookie.txt
+ccf_cookie.txt
+/test/
+/.vscode/

+ 151 - 2
services/base_from_ccf/chart.go

@@ -6,10 +6,12 @@ import (
 	"eta/eta_data_analysis/models"
 	"eta/eta_data_analysis/utils"
 	"fmt"
-	"github.com/PuerkitoBio/goquery"
 	"os"
 	"strconv"
 	"strings"
+	"time"
+
+	"github.com/PuerkitoBio/goquery"
 )
 
 // 定义主结构体
@@ -18,6 +20,7 @@ type CCFChartRule struct {
 	ClassifyId int             `json:"ClassifyId"`
 	CustNo     int             `json:"CustNo"`
 	Frequency  string          `json:"Frequency"`
+	IndexType  string          `json:"IndexType"`
 	Child      []*CCFChartRule `json:"Child,omitempty"` // 使用指针来处理可能不存在的子对象
 }
 
@@ -39,6 +42,31 @@ func loadCCFChartRule() (rules []*CCFChartRule, err error) {
 	return
 }
 
+type CCFChartAdditionRule struct {
+	Name       string `json:"Name"`
+	ClassifyId int    `json:"ClassifyId"`
+	Frequency  string `json:"Frequency"`
+	ProdNames  string `json:"prodNames"`
+}
+
+func LoadCCFChartAdditionRule() (rules []*CCFChartAdditionRule, err error) {
+	if utils.CCFChartAdditionRuleFile == "" {
+		err = fmt.Errorf("rule文件不存在")
+		return
+	}
+	b, e := os.ReadFile(utils.CCFChartAdditionRuleFile)
+	if e != nil {
+		err = fmt.Errorf("读取rule文件失败, err: %v", e)
+		return
+	}
+	rules = make([]*CCFChartAdditionRule, 0)
+	if e = json.Unmarshal(b, &rules); e != nil {
+		err = fmt.Errorf("解析rule文件失败, err: %v", e)
+		return
+	}
+	return
+}
+
 func TaskGetCCFChartEdb(context.Context) (err error) {
 	_ = GetCCFChartEdb()
 	return
@@ -61,6 +89,10 @@ func GetCCFChartEdb() (err error) {
 		pageHtml := fmt.Sprintf("%s?cust_no=%d", CCFCHARTDATAURL, v.CustNo)
 		fmt.Println(pageHtml)
 		fileContent, e := fetchPageHtml(pageHtml, 0)
+		if e != nil {
+			err = fmt.Errorf("获取首页报告失败, err: %v", e)
+			return
+		}
 		/*fName := v.Name
 		if strings.Contains(v.Name, "/") {
 			fName = strings.ReplaceAll(fName, "/", "")
@@ -110,6 +142,41 @@ func GetCCFChartEdb() (err error) {
 			indexes = append(indexes, indexList...)
 		}
 	}
+	additionRules, err := LoadCCFChartAdditionRule()
+	if err != nil {
+		err = fmt.Errorf("加载额外图表规则失败 err: %v", err)
+		return
+	}
+	now := time.Now()
+	for _, v := range additionRules {
+		param := make(map[string]string)
+		param["startdate"] = time.Date(now.Year()-4, 1, 1, 0, 0, 0, 0, time.Local).Format(utils.FormatDate2)
+		param["enddate"] = now.Format(utils.FormatDate2)
+		param["type"] = "1"
+		param["prodNames"] = v.ProdNames
+		param["skin"] = "infographic"
+		param["page"] = "index.php"
+		htmlContent, er := postPageHtml(CCFCHARTDATAURL, param, 0)
+		if er != nil {
+			err = fmt.Errorf("获取首页报告失败, err: %v", er)
+			return
+		}
+
+		isStop, indexList, e := AnalysisAdditionChartInventoryWeeklyEdb(htmlContent, v)
+		if e != nil {
+			err = fmt.Errorf("解析图表失败, err: %v", e)
+			return
+		}
+		if isStop {
+			err = fmt.Errorf("图表名称不存在,停止爬取")
+			break
+		}
+		if len(indexList) > 0 {
+			indexes = append(indexes, indexList...)
+		}
+
+	}
+
 	if len(indexes) == 0 {
 		return
 	}
@@ -196,7 +263,89 @@ func AnalysisChartInventoryWeeklyEdb(htm []byte, rule *CCFChartRule) (isStop boo
 			date = strings.TrimSpace(date)
 
 			// 提取日均值
-			dailyAvg := row.Find("td:nth-child(3)").Text()
+			var dailyAvg string
+			if rule.IndexType == "周均" {
+				dailyAvg = row.Find("td:nth-child(4)").Text()
+				dailyAvg = strings.TrimSpace(dailyAvg)
+			} else {
+				dailyAvg = row.Find("td:nth-child(3)").Text()
+				dailyAvg = strings.TrimSpace(dailyAvg)
+			}
+
+			// 打印提取的信息
+			fmt.Printf("单位: %s\n产品名称: %s\n日期: %s\n日均值: %s\n\n", unit, indexName, date, dailyAvg)
+			_, e = strconv.ParseFloat(dailyAvg, 64)
+			if e != nil {
+				utils.FileLog.Info("数据转换失败 err:%s", e.Error())
+				return
+			}
+			dataMap[date] = dailyAvg
+		})
+		if indexName == "" {
+			return
+		}
+
+		edb := new(HandleIndexData)
+		edb.IndexCode = strings.ToLower(indexCode)
+		edb.IndexName = indexName
+		edb.ClassifyId = classifyId
+		edb.Frequency = frequency
+		edb.Unit = unit
+		edb.DateData = dataMap
+		edb.TerminalCode = utils.TerminalCode
+		indexes = append(indexes, edb)
+	})
+	return
+}
+
+func AnalysisAdditionChartInventoryWeeklyEdb(htm []byte, rule *CCFChartAdditionRule) (isStop bool, indexes []*HandleIndexData, err error) {
+	if len(htm) == 0 {
+		utils.FileLog.Info("htm empty")
+		return
+	}
+
+	doc, e := goquery.NewDocumentFromReader(strings.NewReader(string(htm)))
+	if e != nil {
+		err = fmt.Errorf("NewDocumentFromReader err: %v", e)
+		return
+	}
+	// 判断图表名称是否相符,如果不符合放弃爬取
+	doc.Find("div.tabCont").Each(func(i int, item *goquery.Selection) {
+		// 提取单位(这里假设单位总是位于 .tips 类的 div 中)
+		unit := item.Find(".tips").Text()
+		unit = strings.TrimSpace(unit)
+		unit = strings.TrimPrefix(unit, "编制说明:单位(")
+		unit = strings.TrimSuffix(unit, ")")
+		fmt.Println("单位: ", unit)
+		indexCode := ""
+		indexName := ""
+		// 获取频度和分类ID
+		classifyId := rule.ClassifyId
+		frequency := rule.Frequency
+		dataMap := make(map[string]string)
+		// 遍历表格中的每一行(跳过表头)
+		item.Find("table tbody tr").Each(func(k int, row *goquery.Selection) {
+			if k == 0 {
+				return
+			}
+			// 提取产品名称
+			if indexCode == "" {
+				productName := row.Find("td:nth-child(1)").Text()
+				productName = strings.TrimSpace(productName)
+				if strings.Contains(rule.Name, productName) {
+					indexName = rule.Name
+					code := strings.ToLower(utils.GetFirstPingYin(indexName))
+					code = strings.ReplaceAll(code, "/", "")
+					code = strings.ReplaceAll(code, " ", "")
+					indexCode = strings.ToLower(code)
+				}
+			}
+			// 提取日期
+			date := row.Find("td:nth-child(2)").Text()
+			date = strings.TrimSpace(date)
+
+			// 提取周均值
+			dailyAvg := row.Find("td:nth-child(4)").Text()
 			dailyAvg = strings.TrimSpace(dailyAvg)
 
 			// 打印提取的信息

+ 107 - 6
services/base_from_ccf/common.go

@@ -7,14 +7,9 @@ import (
 	"encoding/json"
 	"eta/eta_data_analysis/utils"
 	"fmt"
-	"github.com/PuerkitoBio/goquery"
-	"github.com/chromedp/cdproto/network"
-	"github.com/chromedp/chromedp"
-	"golang.org/x/net/html/charset"
-	"golang.org/x/text/encoding/simplifiedchinese"
-	"golang.org/x/text/transform"
 	"io"
 	"log"
+	"mime/multipart"
 	"net/http"
 	"net/url"
 	"os"
@@ -23,6 +18,13 @@ import (
 	"strconv"
 	"strings"
 	"time"
+
+	"github.com/PuerkitoBio/goquery"
+	"github.com/chromedp/cdproto/network"
+	"github.com/chromedp/chromedp"
+	"golang.org/x/net/html/charset"
+	"golang.org/x/text/encoding/simplifiedchinese"
+	"golang.org/x/text/transform"
 )
 
 const (
@@ -180,6 +182,105 @@ func fetchPageHtml(baseUrl string, fetchNum int) (respBody []byte, err error) {
 	return
 }
 
+// postPageHtml 获取网站HTML文本
+func postPageHtml(baseUrl string, formData map[string]string, fetchNum int) (respBody []byte, err error) {
+	defer func() {
+		if err != nil {
+			tips := fmt.Sprintf("BuildCCFRequest ErrMsg: %s", err.Error())
+			utils.FileLog.Info(tips)
+			fmt.Println(tips)
+		}
+	}()
+	// 查询次数
+	fetchNum++
+	if baseUrl == "" {
+		err = fmt.Errorf("CCF请求地址为空")
+		return
+	}
+
+	// 获取Cookie
+	strCookie, e := getCookie()
+	if e != nil {
+		err = fmt.Errorf("读取cookie文件失败, err: %s", e.Error())
+		return
+	}
+	if strCookie == "" && fetchNum < 2 {
+		fmt.Printf("文件cookie为空, 重新获取, fetchNum: %d\n", fetchNum)
+		utils.FileLog.Info(fmt.Sprintf("文件cookie为空, 重新获取, fetchNum: %d", fetchNum))
+		_, err = getCookieByChrome()
+		if err != nil {
+			return
+		}
+		return postPageHtml(baseUrl, formData, fetchNum)
+	}
+	var b bytes.Buffer
+	writer := multipart.NewWriter(&b)
+
+	for k, v := range formData {
+		_ = writer.WriteField(k, v)
+	}
+	writer.Close()
+
+	// 拉取网站内容
+	cli := new(http.Client)
+	req, e := http.NewRequest("Post", baseUrl, &b)
+	if e != nil {
+		err = e
+		return
+	}
+
+	req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7")
+	req.Header.Set("Accept-Encoding", "gzip, deflate, br")
+	req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6")
+	req.Header.Set("Connection", "keep-alive")
+	req.Header.Set("Cookie", strCookie)
+	req.Header.Set("Host", "www.ccf.com.cn")
+	req.Header.Set("Referer", baseUrl)
+	req.Header.Set("Sec-Ch-Ua", "\"Not A(Brand\";v=\"99\", \"Microsoft Edge\";v=\"121\", \"Chromium\";v=\"121\"")
+	req.Header.Set("Sec-Ch-Ua-Mobile", "?0")
+	req.Header.Set("Sec-Ch-Ua-Platform", "\"Windows\"")
+	req.Header.Set("Sec-Fetch-Dest", "empty")
+	req.Header.Set("Sec-Fetch-Mode", "cors")
+	req.Header.Set("Sec-Fetch-Site", "same-origin")
+	req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36 Edg/121.0.0.0")
+	req.Header.Set("X-Requested-With", "XMLHttpRequest")
+
+	resp, e := cli.Do(req)
+	if e != nil {
+		err = fmt.Errorf("HTTP client Do err: %s", e.Error())
+		return
+	}
+	defer func() {
+		_ = resp.Body.Close()
+	}()
+
+	// 读取响应的内容
+	reader, e := gzip.NewReader(resp.Body)
+	if e != nil {
+		err = fmt.Errorf("gzip NewReader err: %s", e.Error())
+		return
+	}
+	body, e := io.ReadAll(reader)
+	if e != nil {
+		err = fmt.Errorf("read body err: %s", e.Error())
+		return
+	}
+	respBody = body
+
+	isLoginPage := checkIsLoginPage(string(respBody))
+	fmt.Println("是否登录页:", isLoginPage)
+
+	// 如果是登录页,且查询次数少于2次,那么就重新登录后查询
+	if isLoginPage && fetchNum < 2 {
+		_, err = getCookieByChrome()
+		if err != nil {
+			return
+		}
+		return postPageHtml(baseUrl, formData, fetchNum)
+	}
+	return
+}
+
 // DataRule 数据爬取规则
 type DataRule struct {
 	Name      string `json:"Name"`

+ 14 - 0
static/ccf_chart_addition_rule.json

@@ -0,0 +1,14 @@
+[
+    {
+        "Name": "CCF/MEG港口库存指数",
+        "ClassifyId": 21,
+        "Frequency": "周度",
+        "prodNames": "kc-221000"
+    },
+    {
+        "Name": "CCF/PTA FOB中国",
+        "ClassifyId": 22,
+        "Frequency": "日度",
+        "prodNames": "zs-235"
+    }
+]

+ 4 - 4
static/ccf_chart_rule.json

@@ -1,10 +1,10 @@
 [
-  {"Name":"库存天数","ClassifyId": 21,"CustNo": 1,"Frequency": "周度"},
+  {"Name":"库存天数","ClassifyId": 21,"CustNo": 1,"Frequency": "周度", "IndexType": "周均"},
   {"Name":"PTA/EG价格","ClassifyId": 22,"CustNo": 2,"Frequency": "日度"},
   {"Name":"EG价格","ClassifyId": 22,"CustNo": 3,"Frequency": "周度","Child":[{"Name":"外盘MEG","Frequency": "日度"}]},
   {"Name":"丝价","ClassifyId": 22,"CustNo": 4,"Frequency": "日度"},
   {"Name":"瓶片","ClassifyId": 22,"CustNo": 5,"Frequency": "日度"},
-  {"Name":"原料负荷","ClassifyId": 23,"CustNo": 6,"Frequency": "周度"},
-  {"Name":"聚酯负荷","ClassifyId": 23,"CustNo": 7,"Frequency": "周度"},
-  {"Name":"终端负荷","ClassifyId": 23,"CustNo": 8,"Frequency": "周度"}
+  {"Name":"原料负荷","ClassifyId": 23,"CustNo": 6,"Frequency": "周度", "IndexType": "周均"},
+  {"Name":"聚酯负荷","ClassifyId": 23,"CustNo": 7,"Frequency": "周度", "IndexType": "周均"},
+  {"Name":"终端负荷","ClassifyId": 23,"CustNo": 8,"Frequency": "周度", "IndexType": "周均"}
 ]

+ 19 - 14
utils/config.go

@@ -2,9 +2,10 @@ package utils
 
 import (
 	"fmt"
+	"strconv"
+
 	beeLogger "github.com/beego/bee/v2/logger"
 	"github.com/beego/beego/v2/server/web"
-	"strconv"
 )
 
 var (
@@ -91,19 +92,20 @@ var (
 
 // CCF化纤信息
 var (
-	CCFOpen           string // 是否配置CCF
-	CCFCookieFile     string // CCF登录Cookie
-	CCFDataRuleFile   string // CCF数据爬取规则
-	CCFDailyTaskTime  string // CCF数据日度任务时间
-	CCFWeeklyTaskTime string // CCF数据周度任务时间
-	CCFStockTaskTime  string // CCF数据装置任务时间
-	CCFUseName        string // CCF登录账号
-	CCFPassword       string // CCF登录密码
-	CCFDailyFetchNum  int    // CCF数据日度每次获取报告数量
-	CCFWeeklyFetchNum int    // CCF数据周度每次获取报告数量
-	CCFStockFetchNum  int    // CCF数据装置每次获取报告数量
-	CCFChartRuleFile  string // CCF图表爬取规则
-	CCFChartTaskTime  string
+	CCFOpen                  string // 是否配置CCF
+	CCFCookieFile            string // CCF登录Cookie
+	CCFDataRuleFile          string // CCF数据爬取规则
+	CCFDailyTaskTime         string // CCF数据日度任务时间
+	CCFWeeklyTaskTime        string // CCF数据周度任务时间
+	CCFStockTaskTime         string // CCF数据装置任务时间
+	CCFUseName               string // CCF登录账号
+	CCFPassword              string // CCF登录密码
+	CCFDailyFetchNum         int    // CCF数据日度每次获取报告数量
+	CCFWeeklyFetchNum        int    // CCF数据周度每次获取报告数量
+	CCFStockFetchNum         int    // CCF数据装置每次获取报告数量
+	CCFChartRuleFile         string // CCF图表爬取规则
+	CCFChartAdditionRuleFile string // CCF图表爬取附加规则
+	CCFChartTaskTime         string
 )
 
 var (
@@ -226,6 +228,9 @@ func init() {
 		if CCFChartRuleFile == "" {
 			CCFChartRuleFile = "static/ccf_chart_rule.json"
 		}
+		if CCFChartAdditionRuleFile == "" {
+			CCFChartAdditionRuleFile = "static/ccf_chart_addition_rule.json"
+		}
 		CCFChartTaskTime = config["ccf_chart_task_time"]
 	}
 

+ 1 - 0
utils/constants.go

@@ -16,6 +16,7 @@ const (
 	FormatYearMonthUnSpace     = "200601"                  //年月的日期格式
 	PageSize15                 = 15                        //列表页每页数据量
 	FormatDate1                = "2006/1/02"               //日期格式
+	FormatDate2                = "2006/01/02"              //日期格式
 	FormatDateYearMonthDay     = "2006年01月02日"             //日期格式
 	FormatDatePoint            = "2006.01.02"              //日期格式
 	PageSize5                  = 5