Răsfoiți Sursa

Merge branch 'feature/eta2.2.4_ccf' of eta_server/eta_data_analysis into master

xyxie 4 luni în urmă
părinte
comite
fda4a6a73c

+ 226 - 0
services/base_from_ccf/chart.go

@@ -0,0 +1,226 @@
+package base_from_ccf
+
+import (
+	"context"
+	"encoding/json"
+	"eta/eta_data_analysis/models"
+	"eta/eta_data_analysis/utils"
+	"fmt"
+	"github.com/PuerkitoBio/goquery"
+	"os"
+	"strconv"
+	"strings"
+)
+
+// 定义主结构体
+type CCFChartRule struct {
+	Name       string          `json:"Name"`
+	ClassifyId int             `json:"ClassifyId"`
+	CustNo     int             `json:"CustNo"`
+	Frequency  string          `json:"Frequency"`
+	Child      []*CCFChartRule `json:"Child,omitempty"` // 使用指针来处理可能不存在的子对象
+}
+
+func loadCCFChartRule() (rules []*CCFChartRule, err error) {
+	if utils.CCFChartRuleFile == "" {
+		err = fmt.Errorf("rule文件不存在")
+		return
+	}
+	b, e := os.ReadFile(utils.CCFChartRuleFile)
+	if e != nil {
+		err = fmt.Errorf("读取rule文件失败, err: %v", e)
+		return
+	}
+	rules = make([]*CCFChartRule, 0)
+	if e = json.Unmarshal(b, &rules); e != nil {
+		err = fmt.Errorf("解析rule文件失败, err: %v", e)
+		return
+	}
+	return
+}
+
+func TaskGetCCFChartEdb(context.Context) (err error) {
+	_ = GetCCFChartEdb()
+	return
+}
+func GetCCFChartEdb() (err error) {
+	defer func() {
+		if err != nil {
+			tips := fmt.Sprintf("GetCCFChartEdb ErrMsg: %s", err.Error())
+			utils.FileLog.Info(tips)
+			fmt.Println(tips)
+		}
+	}()
+	rules, err := loadCCFChartRule()
+	if err != nil {
+		return
+	}
+	indexes := make([]*HandleIndexData, 0)
+	for _, v := range rules {
+		// 首页报告链接
+		pageHtml := fmt.Sprintf("%s?cust_no=%d", CCFCHARTDATAURL, v.CustNo)
+		fmt.Println(pageHtml)
+		fileContent, e := fetchPageHtml(pageHtml, 0)
+		/*fName := v.Name
+		if strings.Contains(v.Name, "/") {
+			fName = strings.ReplaceAll(fName, "/", "")
+		}
+		filePath := fmt.Sprintf("/Users/xiexiaoyuan/工作/数据源ccf/ccf图表/%s/index.html", fName)
+		fmt.Println(filePath)
+		// 打开文件
+		file, e := os.Open(filePath)
+		if e != nil {
+			err = fmt.Errorf("无法打开文件: %v", err)
+			return
+		}
+		defer file.Close()
+		// 读取文件内容
+		fileContent, e := io.ReadAll(file)
+		if e != nil {
+			err = fmt.Errorf("读取文件内容失败: %v", e)
+			fmt.Printf("无法读取文件内容: %v", e)
+			return
+		}*/
+		// 转换编码
+
+		// 转换编码
+		/*utf8Reader, e := charset.NewReaderLabel("gb2312", bytes.NewReader(fileContent))
+		if e != nil {
+			err = fmt.Errorf("utf8 reader err: %s", e.Error())
+			return
+		}
+		utf8Body, e := io.ReadAll(utf8Reader)
+		if e != nil {
+			err = fmt.Errorf("读取utf8 body err: %s", e.Error())
+			return
+		}*/
+		//firstHtml := string(utf8Body)
+		//fmt.Println(firstHtml)
+
+		isStop, indexList, e := AnalysisChartInventoryWeeklyEdb(fileContent, v)
+		if e != nil {
+			err = fmt.Errorf("解析图表失败, err: %v", e)
+			return
+		}
+		if isStop {
+			err = fmt.Errorf("图表名称不存在,停止爬取")
+			break
+		}
+		if len(indexList) > 0 {
+			indexes = append(indexes, indexList...)
+		}
+	}
+	if len(indexes) == 0 {
+		return
+	}
+	// 写入数据库
+	params := make(map[string]interface{})
+	params["List"] = indexes
+	params["TerminalCode"] = utils.TerminalCode
+	result, e := postEdbLib(params, utils.LIB_ROUTE_CCF_EDB_HANDLE)
+	if e != nil {
+		b, _ := json.Marshal(params)
+		err = fmt.Errorf("postEdbLib err: %v, params: %s", e, string(b))
+		return
+	}
+	resp := new(models.BaseEdbLibResponse)
+	if e = json.Unmarshal(result, &resp); e != nil {
+		err = fmt.Errorf(" postEdbLib resp json.Unmarshal err: %v", e)
+		return
+	}
+	if resp.Ret != 200 {
+		err = fmt.Errorf("postEdbLib resp Msg: %s, ErrMsg: %s", resp.Msg, resp.ErrMsg)
+		return
+	}
+	return
+}
+
+// AnalysisChartInventoryWeeklyEdb 解析周度库存中的日均值
+func AnalysisChartInventoryWeeklyEdb(htm []byte, rule *CCFChartRule) (isStop bool, indexes []*HandleIndexData, err error) {
+	if len(htm) == 0 {
+		utils.FileLog.Info("htm empty")
+		return
+	}
+
+	doc, e := goquery.NewDocumentFromReader(strings.NewReader(string(htm)))
+	if e != nil {
+		err = fmt.Errorf("NewDocumentFromReader err: %v", e)
+		return
+	}
+	// 判断图表名称是否相符,如果不符合放弃爬取
+	title := doc.Find("p a.activated.now").Text()
+	fmt.Println(title)
+	if title != rule.Name {
+		utils.FileLog.Info("图表名称不存在,停止爬取")
+		isStop = true
+		return
+	}
+	doc.Find("div.tabCont").Each(func(i int, item *goquery.Selection) {
+		// 提取单位(这里假设单位总是位于 .tips 类的 div 中)
+		unit := item.Find(".tips").Text()
+		unit = strings.TrimSpace(unit)
+		unit = strings.TrimPrefix(unit, "编制说明:单位(")
+		unit = strings.TrimSuffix(unit, ")")
+		fmt.Println("单位: ", unit)
+		indexCode := ""
+		indexName := ""
+		// 获取频度和分类ID
+		classifyId := rule.ClassifyId
+		frequency := rule.Frequency
+		childRule := make(map[string]*CCFChartRule)
+		// 判断是否存在子页面
+		if len(rule.Child) > 0 {
+			for _, v := range childRule {
+				childRule[v.Name] = v
+			}
+		}
+		dataMap := make(map[string]string)
+		// 遍历表格中的每一行(跳过表头)
+		item.Find("table tbody tr").Each(func(k int, row *goquery.Selection) {
+			if k == 0 {
+				return
+			}
+			// 提取产品名称
+			if indexCode == "" {
+				productName := row.Find("td:nth-child(1)").Text()
+				productName = strings.TrimSpace(productName)
+				indexCode = fmt.Sprintf("ccf%s", utils.GetFirstPingYin(productName))
+				indexName = fmt.Sprintf("CCF%s", productName)
+				//判断子页面的频度
+				if newRule, ok := childRule[productName]; ok {
+					frequency = newRule.Frequency
+				}
+			}
+			// 提取日期
+			date := row.Find("td:nth-child(2)").Text()
+			date = strings.TrimSpace(date)
+
+			// 提取日均值
+			dailyAvg := row.Find("td:nth-child(3)").Text()
+			dailyAvg = strings.TrimSpace(dailyAvg)
+
+			// 打印提取的信息
+			fmt.Printf("单位: %s\n产品名称: %s\n日期: %s\n日均值: %s\n\n", unit, indexName, date, dailyAvg)
+			_, e = strconv.ParseFloat(dailyAvg, 64)
+			if e != nil {
+				utils.FileLog.Info("数据转换失败 err:%s", e.Error())
+				return
+			}
+			dataMap[date] = dailyAvg
+		})
+		if indexName == "" {
+			return
+		}
+
+		edb := new(HandleIndexData)
+		edb.IndexCode = strings.ToLower(indexCode)
+		edb.IndexName = indexName
+		edb.ClassifyId = classifyId
+		edb.Frequency = frequency
+		edb.Unit = unit
+		edb.DateData = dataMap
+		edb.TerminalCode = utils.TerminalCode
+		indexes = append(indexes, edb)
+	})
+	return
+}

+ 1 - 0
services/base_from_ccf/common.go

@@ -28,6 +28,7 @@ import (
 const (
 	CCFSearchPageUrl       = "https://www.ccf.com.cn/newscenter/simplesearch.php" // CCF搜索页地址
 	CCFReportDetailBaseUrl = "https://www.ccf.com.cn"                             // CCF报告详情页地址
+	CCFCHARTDATAURL        = "https://www.ccf.com.cn/datacenter/index.php"
 )
 
 // postEdbLib 调用指标接口

+ 2 - 6
services/mtjh_watch.go

@@ -5,10 +5,7 @@ import (
 	"fmt"
 	"github.com/patrickmn/go-cache"
 	"io/fs"
-	"os"
 	"path/filepath"
-	"strings"
-	"syscall"
 	"time"
 )
 
@@ -29,7 +26,7 @@ func mtjhWatch() {
 			return err
 		}
 		if !info.IsDir() {
-			fileInfo, err := os.Stat(path)
+			/*fileInfo, err := os.Stat(path)
 			if err != nil {
 				fmt.Println("os.Stat:", err.Error())
 			}
@@ -50,9 +47,8 @@ func mtjhWatch() {
 				}
 			}
 			cacheClient.Delete(path)
-			cacheClient.Set(path, modifyTimeStr, 24*time.Hour)
+			cacheClient.Set(path, modifyTimeStr, 24*time.Hour)*/
 		}
 		return nil
 	})
 }
-

+ 10 - 5
services/task.go

@@ -8,10 +8,7 @@ import (
 	"eta/eta_data_analysis/utils"
 	"fmt"
 	"io/fs"
-	"os"
 	"path/filepath"
-	"strings"
-	"syscall"
 	"time"
 
 	"github.com/beego/beego/v2/task"
@@ -105,6 +102,14 @@ func Task() {
 		}
 		taskCCFStockTable := task.NewTask("taskCCFStockTable", ccfStockTime, ccfService.TaskStockTable)
 		task.AddTask("CCF装置检修", taskCCFStockTable)
+
+		// ccf图表数据-默认每天下午4点
+		ccfChartTime := utils.CCFChartTaskTime
+		if ccfChartTime == "" {
+			ccfChartTime = "0 16 * * * *"
+		}
+		taskGetCCFChartEdb := task.NewTask("taskGetCCFChartEdb", ccfChartTime, ccfService.TaskGetCCFChartEdb)
+		task.AddTask("CCF图表数据", taskGetCCFChartEdb)
 	}
 
 	// 隆众资讯
@@ -140,7 +145,7 @@ func ReadWatchIndexFile() {
 			return err
 		}
 		if !info.IsDir() {
-			fileInfo, err := os.Stat(path)
+			/* fileInfo, err := os.Stat(path)
 			if err != nil {
 				fmt.Println("os.Stat:", err.Error())
 			}
@@ -185,7 +190,7 @@ func ReadWatchIndexFile() {
 				}
 			}
 			cacheClient.Delete(path)
-			cacheClient.Set(path, modifyTimeStr, 24*time.Hour)
+			cacheClient.Set(path, modifyTimeStr, 24*time.Hour)*/
 		}
 		return nil
 	})

+ 10 - 0
static/ccf_chart_rule.json

@@ -0,0 +1,10 @@
+[
+  {"Name":"库存天数","ClassifyId": 21,"CustNo": 1,"Frequency": "周度"},
+  {"Name":"PTA/EG价格","ClassifyId": 22,"CustNo": 2,"Frequency": "日度"},
+  {"Name":"EG价格","ClassifyId": 22,"CustNo": 3,"Frequency": "周度","Child":[{"Name":"外盘MEG","Frequency": "日度"}]},
+  {"Name":"丝价","ClassifyId": 22,"CustNo": 4,"Frequency": "日度"},
+  {"Name":"瓶片","ClassifyId": 22,"CustNo": 5,"Frequency": "日度"},
+  {"Name":"原料负荷","ClassifyId": 23,"CustNo": 6,"Frequency": "周度"},
+  {"Name":"聚酯负荷","ClassifyId": 23,"CustNo": 7,"Frequency": "周度"},
+  {"Name":"终端负荷","ClassifyId": 23,"CustNo": 8,"Frequency": "周度"}
+]

+ 13 - 3
static/ccf_data_rule.json

@@ -443,6 +443,16 @@
         "MatchUnit": "",
         "Unit": "%"
       },
+      {
+        "IndexCode": "ccfmegfhgnhcqzfh",
+        "IndexName": "CCF/MEG负荷国内合成气制负荷",
+        "ClassifyId": 15,
+        "Frequency": "周度",
+        "Product": "MEG负荷",
+        "Market": "国内煤制负荷",
+        "MatchUnit": "",
+        "Unit": "%"
+      },
       {
         "IndexCode": "ccfmeghdgkkcnbg",
         "IndexName": "CCF/MEG华东港口库存宁波港",
@@ -793,7 +803,7 @@
       {
         "IndexCode": "ccfzgpxfh",
         "IndexName": "CCF中国PX负荷",
-        "ClassifyId": 20,
+        "ClassifyId": 7,
         "Frequency": "周度",
         "Product": "中国PX",
         "Market": "",
@@ -803,7 +813,7 @@
       {
         "IndexCode": "ccfyzpxfh",
         "IndexName": "CCF亚洲PX负荷",
-        "ClassifyId": 20,
+        "ClassifyId": 7,
         "Frequency": "周度",
         "Product": "亚洲PX",
         "Market": "",
@@ -813,7 +823,7 @@
       {
         "IndexCode": "ccfzgptafh",
         "IndexName": "CCF中国PTA负荷",
-        "ClassifyId": 20,
+        "ClassifyId": 7,
         "Frequency": "周度",
         "Product": "中国PTA",
         "Market": "",

+ 7 - 0
utils/config.go

@@ -93,6 +93,8 @@ var (
 	CCFDailyFetchNum  int    // CCF数据日度每次获取报告数量
 	CCFWeeklyFetchNum int    // CCF数据周度每次获取报告数量
 	CCFStockFetchNum  int    // CCF数据装置每次获取报告数量
+	CCFChartRuleFile  string // CCF图表爬取规则
+	CCFChartTaskTime  string
 )
 
 var (
@@ -205,6 +207,11 @@ func init() {
 		if CCFStockFetchNum <= 0 {
 			CCFStockFetchNum = 3
 		}
+		CCFChartRuleFile = config["ccf_chart_rule_file"]
+		if CCFChartRuleFile == "" {
+			CCFChartRuleFile = "static/ccf_chart_rule.json"
+		}
+		CCFChartTaskTime = config["ccf_chart_task_time"]
 	}
 
 	{