|
@@ -6,10 +6,12 @@ import (
|
|
|
"eta/eta_data_analysis/models"
|
|
|
"eta/eta_data_analysis/utils"
|
|
|
"fmt"
|
|
|
- "github.com/PuerkitoBio/goquery"
|
|
|
"os"
|
|
|
"strconv"
|
|
|
"strings"
|
|
|
+ "time"
|
|
|
+
|
|
|
+ "github.com/PuerkitoBio/goquery"
|
|
|
)
|
|
|
|
|
|
// 定义主结构体
|
|
@@ -18,6 +20,7 @@ type CCFChartRule struct {
|
|
|
ClassifyId int `json:"ClassifyId"`
|
|
|
CustNo int `json:"CustNo"`
|
|
|
Frequency string `json:"Frequency"`
|
|
|
+ IndexType string `json:"IndexType"`
|
|
|
Child []*CCFChartRule `json:"Child,omitempty"` // 使用指针来处理可能不存在的子对象
|
|
|
}
|
|
|
|
|
@@ -39,6 +42,31 @@ func loadCCFChartRule() (rules []*CCFChartRule, err error) {
|
|
|
return
|
|
|
}
|
|
|
|
|
|
+type CCFChartAdditionRule struct {
|
|
|
+ Name string `json:"Name"`
|
|
|
+ ClassifyId int `json:"ClassifyId"`
|
|
|
+ Frequency string `json:"Frequency"`
|
|
|
+ ProdNames string `json:"prodNames"`
|
|
|
+}
|
|
|
+
|
|
|
+func LoadCCFChartAdditionRule() (rules []*CCFChartAdditionRule, err error) {
|
|
|
+ if utils.CCFChartAdditionRuleFile == "" {
|
|
|
+ err = fmt.Errorf("rule文件不存在")
|
|
|
+ return
|
|
|
+ }
|
|
|
+ b, e := os.ReadFile(utils.CCFChartAdditionRuleFile)
|
|
|
+ if e != nil {
|
|
|
+ err = fmt.Errorf("读取rule文件失败, err: %v", e)
|
|
|
+ return
|
|
|
+ }
|
|
|
+ rules = make([]*CCFChartAdditionRule, 0)
|
|
|
+ if e = json.Unmarshal(b, &rules); e != nil {
|
|
|
+ err = fmt.Errorf("解析rule文件失败, err: %v", e)
|
|
|
+ return
|
|
|
+ }
|
|
|
+ return
|
|
|
+}
|
|
|
+
|
|
|
func TaskGetCCFChartEdb(context.Context) (err error) {
|
|
|
_ = GetCCFChartEdb()
|
|
|
return
|
|
@@ -61,6 +89,10 @@ func GetCCFChartEdb() (err error) {
|
|
|
pageHtml := fmt.Sprintf("%s?cust_no=%d", CCFCHARTDATAURL, v.CustNo)
|
|
|
fmt.Println(pageHtml)
|
|
|
fileContent, e := fetchPageHtml(pageHtml, 0)
|
|
|
+ if e != nil {
|
|
|
+ err = fmt.Errorf("获取首页报告失败, err: %v", e)
|
|
|
+ return
|
|
|
+ }
|
|
|
/*fName := v.Name
|
|
|
if strings.Contains(v.Name, "/") {
|
|
|
fName = strings.ReplaceAll(fName, "/", "")
|
|
@@ -110,6 +142,41 @@ func GetCCFChartEdb() (err error) {
|
|
|
indexes = append(indexes, indexList...)
|
|
|
}
|
|
|
}
|
|
|
+ additionRules, err := LoadCCFChartAdditionRule()
|
|
|
+ if err != nil {
|
|
|
+ err = fmt.Errorf("加载额外图表规则失败 err: %v", err)
|
|
|
+ return
|
|
|
+ }
|
|
|
+ now := time.Now()
|
|
|
+ for _, v := range additionRules {
|
|
|
+ param := make(map[string]string)
|
|
|
+ param["startdate"] = time.Date(now.Year()-4, 1, 1, 0, 0, 0, 0, time.Local).Format(utils.FormatDate2)
|
|
|
+ param["enddate"] = now.Format(utils.FormatDate2)
|
|
|
+ param["type"] = "1"
|
|
|
+ param["prodNames"] = v.ProdNames
|
|
|
+ param["skin"] = "infographic"
|
|
|
+ param["page"] = "index.php"
|
|
|
+ htmlContent, er := postPageHtml(CCFCHARTDATAURL, param, 0)
|
|
|
+ if er != nil {
|
|
|
+ err = fmt.Errorf("获取首页报告失败, err: %v", er)
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+ isStop, indexList, e := AnalysisAdditionChartInventoryWeeklyEdb(htmlContent, v)
|
|
|
+ if e != nil {
|
|
|
+ err = fmt.Errorf("解析图表失败, err: %v", e)
|
|
|
+ return
|
|
|
+ }
|
|
|
+ if isStop {
|
|
|
+ err = fmt.Errorf("图表名称不存在,停止爬取")
|
|
|
+ break
|
|
|
+ }
|
|
|
+ if len(indexList) > 0 {
|
|
|
+ indexes = append(indexes, indexList...)
|
|
|
+ }
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
if len(indexes) == 0 {
|
|
|
return
|
|
|
}
|
|
@@ -196,7 +263,89 @@ func AnalysisChartInventoryWeeklyEdb(htm []byte, rule *CCFChartRule) (isStop boo
|
|
|
date = strings.TrimSpace(date)
|
|
|
|
|
|
// 提取日均值
|
|
|
- dailyAvg := row.Find("td:nth-child(3)").Text()
|
|
|
+ var dailyAvg string
|
|
|
+ if rule.IndexType == "周均" {
|
|
|
+ dailyAvg = row.Find("td:nth-child(4)").Text()
|
|
|
+ dailyAvg = strings.TrimSpace(dailyAvg)
|
|
|
+ } else {
|
|
|
+ dailyAvg = row.Find("td:nth-child(3)").Text()
|
|
|
+ dailyAvg = strings.TrimSpace(dailyAvg)
|
|
|
+ }
|
|
|
+
|
|
|
+ // 打印提取的信息
|
|
|
+ fmt.Printf("单位: %s\n产品名称: %s\n日期: %s\n日均值: %s\n\n", unit, indexName, date, dailyAvg)
|
|
|
+ _, e = strconv.ParseFloat(dailyAvg, 64)
|
|
|
+ if e != nil {
|
|
|
+ utils.FileLog.Info("数据转换失败 err:%s", e.Error())
|
|
|
+ return
|
|
|
+ }
|
|
|
+ dataMap[date] = dailyAvg
|
|
|
+ })
|
|
|
+ if indexName == "" {
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+ edb := new(HandleIndexData)
|
|
|
+ edb.IndexCode = strings.ToLower(indexCode)
|
|
|
+ edb.IndexName = indexName
|
|
|
+ edb.ClassifyId = classifyId
|
|
|
+ edb.Frequency = frequency
|
|
|
+ edb.Unit = unit
|
|
|
+ edb.DateData = dataMap
|
|
|
+ edb.TerminalCode = utils.TerminalCode
|
|
|
+ indexes = append(indexes, edb)
|
|
|
+ })
|
|
|
+ return
|
|
|
+}
|
|
|
+
|
|
|
+func AnalysisAdditionChartInventoryWeeklyEdb(htm []byte, rule *CCFChartAdditionRule) (isStop bool, indexes []*HandleIndexData, err error) {
|
|
|
+ if len(htm) == 0 {
|
|
|
+ utils.FileLog.Info("htm empty")
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+ doc, e := goquery.NewDocumentFromReader(strings.NewReader(string(htm)))
|
|
|
+ if e != nil {
|
|
|
+ err = fmt.Errorf("NewDocumentFromReader err: %v", e)
|
|
|
+ return
|
|
|
+ }
|
|
|
+ // 判断图表名称是否相符,如果不符合放弃爬取
|
|
|
+ doc.Find("div.tabCont").Each(func(i int, item *goquery.Selection) {
|
|
|
+ // 提取单位(这里假设单位总是位于 .tips 类的 div 中)
|
|
|
+ unit := item.Find(".tips").Text()
|
|
|
+ unit = strings.TrimSpace(unit)
|
|
|
+ unit = strings.TrimPrefix(unit, "编制说明:单位(")
|
|
|
+ unit = strings.TrimSuffix(unit, ")")
|
|
|
+ fmt.Println("单位: ", unit)
|
|
|
+ indexCode := ""
|
|
|
+ indexName := ""
|
|
|
+ // 获取频度和分类ID
|
|
|
+ classifyId := rule.ClassifyId
|
|
|
+ frequency := rule.Frequency
|
|
|
+ dataMap := make(map[string]string)
|
|
|
+ // 遍历表格中的每一行(跳过表头)
|
|
|
+ item.Find("table tbody tr").Each(func(k int, row *goquery.Selection) {
|
|
|
+ if k == 0 {
|
|
|
+ return
|
|
|
+ }
|
|
|
+ // 提取产品名称
|
|
|
+ if indexCode == "" {
|
|
|
+ productName := row.Find("td:nth-child(1)").Text()
|
|
|
+ productName = strings.TrimSpace(productName)
|
|
|
+ if strings.Contains(rule.Name, productName) {
|
|
|
+ indexName = rule.Name
|
|
|
+ code := strings.ToLower(utils.GetFirstPingYin(indexName))
|
|
|
+ code = strings.ReplaceAll(code, "/", "")
|
|
|
+ code = strings.ReplaceAll(code, " ", "")
|
|
|
+ indexCode = strings.ToLower(code)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ // 提取日期
|
|
|
+ date := row.Find("td:nth-child(2)").Text()
|
|
|
+ date = strings.TrimSpace(date)
|
|
|
+
|
|
|
+ // 提取周均值
|
|
|
+ dailyAvg := row.Find("td:nth-child(4)").Text()
|
|
|
dailyAvg = strings.TrimSpace(dailyAvg)
|
|
|
|
|
|
// 打印提取的信息
|