|
@@ -0,0 +1,226 @@
|
|
|
+package base_from_ccf
|
|
|
+
|
|
|
+import (
|
|
|
+ "context"
|
|
|
+ "encoding/json"
|
|
|
+ "eta/eta_data_analysis/models"
|
|
|
+ "eta/eta_data_analysis/utils"
|
|
|
+ "fmt"
|
|
|
+ "github.com/PuerkitoBio/goquery"
|
|
|
+ "os"
|
|
|
+ "strconv"
|
|
|
+ "strings"
|
|
|
+)
|
|
|
+
|
|
|
+// 定义主结构体
|
|
|
+type CCFChartRule struct {
|
|
|
+ Name string `json:"Name"`
|
|
|
+ ClassifyId int `json:"ClassifyId"`
|
|
|
+ CustNo int `json:"CustNo"`
|
|
|
+ Frequency string `json:"Frequency"`
|
|
|
+ Child []*CCFChartRule `json:"Child,omitempty"` // 使用指针来处理可能不存在的子对象
|
|
|
+}
|
|
|
+
|
|
|
+func loadCCFChartRule() (rules []*CCFChartRule, err error) {
|
|
|
+ if utils.CCFChartRuleFile == "" {
|
|
|
+ err = fmt.Errorf("rule文件不存在")
|
|
|
+ return
|
|
|
+ }
|
|
|
+ b, e := os.ReadFile(utils.CCFChartRuleFile)
|
|
|
+ if e != nil {
|
|
|
+ err = fmt.Errorf("读取rule文件失败, err: %v", e)
|
|
|
+ return
|
|
|
+ }
|
|
|
+ rules = make([]*CCFChartRule, 0)
|
|
|
+ if e = json.Unmarshal(b, &rules); e != nil {
|
|
|
+ err = fmt.Errorf("解析rule文件失败, err: %v", e)
|
|
|
+ return
|
|
|
+ }
|
|
|
+ return
|
|
|
+}
|
|
|
+
|
|
|
+func TaskGetCCFChartEdb(context.Context) (err error) {
|
|
|
+ _ = GetCCFChartEdb()
|
|
|
+ return
|
|
|
+}
|
|
|
+func GetCCFChartEdb() (err error) {
|
|
|
+ defer func() {
|
|
|
+ if err != nil {
|
|
|
+ tips := fmt.Sprintf("GetCCFChartEdb ErrMsg: %s", err.Error())
|
|
|
+ utils.FileLog.Info(tips)
|
|
|
+ fmt.Println(tips)
|
|
|
+ }
|
|
|
+ }()
|
|
|
+ rules, err := loadCCFChartRule()
|
|
|
+ if err != nil {
|
|
|
+ return
|
|
|
+ }
|
|
|
+ indexes := make([]*HandleIndexData, 0)
|
|
|
+ for _, v := range rules {
|
|
|
+ // 首页报告链接
|
|
|
+ pageHtml := fmt.Sprintf("%s?cust_no=%d", CCFCHARTDATAURL, v.CustNo)
|
|
|
+ fmt.Println(pageHtml)
|
|
|
+ fileContent, e := fetchPageHtml(pageHtml, 0)
|
|
|
+ /*fName := v.Name
|
|
|
+ if strings.Contains(v.Name, "/") {
|
|
|
+ fName = strings.ReplaceAll(fName, "/", "")
|
|
|
+ }
|
|
|
+ filePath := fmt.Sprintf("/Users/xiexiaoyuan/工作/数据源ccf/ccf图表/%s/index.html", fName)
|
|
|
+ fmt.Println(filePath)
|
|
|
+ // 打开文件
|
|
|
+ file, e := os.Open(filePath)
|
|
|
+ if e != nil {
|
|
|
+ err = fmt.Errorf("无法打开文件: %v", err)
|
|
|
+ return
|
|
|
+ }
|
|
|
+ defer file.Close()
|
|
|
+ // 读取文件内容
|
|
|
+ fileContent, e := io.ReadAll(file)
|
|
|
+ if e != nil {
|
|
|
+ err = fmt.Errorf("读取文件内容失败: %v", e)
|
|
|
+ fmt.Printf("无法读取文件内容: %v", e)
|
|
|
+ return
|
|
|
+ }*/
|
|
|
+ // 转换编码
|
|
|
+
|
|
|
+ // 转换编码
|
|
|
+ /*utf8Reader, e := charset.NewReaderLabel("gb2312", bytes.NewReader(fileContent))
|
|
|
+ if e != nil {
|
|
|
+ err = fmt.Errorf("utf8 reader err: %s", e.Error())
|
|
|
+ return
|
|
|
+ }
|
|
|
+ utf8Body, e := io.ReadAll(utf8Reader)
|
|
|
+ if e != nil {
|
|
|
+ err = fmt.Errorf("读取utf8 body err: %s", e.Error())
|
|
|
+ return
|
|
|
+ }*/
|
|
|
+ //firstHtml := string(utf8Body)
|
|
|
+ //fmt.Println(firstHtml)
|
|
|
+
|
|
|
+ isStop, indexList, e := AnalysisChartInventoryWeeklyEdb(fileContent, v)
|
|
|
+ if e != nil {
|
|
|
+ err = fmt.Errorf("解析图表失败, err: %v", e)
|
|
|
+ return
|
|
|
+ }
|
|
|
+ if isStop {
|
|
|
+ err = fmt.Errorf("图表名称不存在,停止爬取")
|
|
|
+ break
|
|
|
+ }
|
|
|
+ if len(indexList) > 0 {
|
|
|
+ indexes = append(indexes, indexList...)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if len(indexes) == 0 {
|
|
|
+ return
|
|
|
+ }
|
|
|
+ // 写入数据库
|
|
|
+ params := make(map[string]interface{})
|
|
|
+ params["List"] = indexes
|
|
|
+ params["TerminalCode"] = utils.TerminalCode
|
|
|
+ result, e := postEdbLib(params, utils.LIB_ROUTE_CCF_EDB_HANDLE)
|
|
|
+ if e != nil {
|
|
|
+ b, _ := json.Marshal(params)
|
|
|
+ err = fmt.Errorf("postEdbLib err: %v, params: %s", e, string(b))
|
|
|
+ return
|
|
|
+ }
|
|
|
+ resp := new(models.BaseEdbLibResponse)
|
|
|
+ if e = json.Unmarshal(result, &resp); e != nil {
|
|
|
+ err = fmt.Errorf(" postEdbLib resp json.Unmarshal err: %v", e)
|
|
|
+ return
|
|
|
+ }
|
|
|
+ if resp.Ret != 200 {
|
|
|
+ err = fmt.Errorf("postEdbLib resp Msg: %s, ErrMsg: %s", resp.Msg, resp.ErrMsg)
|
|
|
+ return
|
|
|
+ }
|
|
|
+ return
|
|
|
+}
|
|
|
+
|
|
|
+// AnalysisChartInventoryWeeklyEdb 解析周度库存中的日均值
|
|
|
+func AnalysisChartInventoryWeeklyEdb(htm []byte, rule *CCFChartRule) (isStop bool, indexes []*HandleIndexData, err error) {
|
|
|
+ if len(htm) == 0 {
|
|
|
+ utils.FileLog.Info("htm empty")
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+ doc, e := goquery.NewDocumentFromReader(strings.NewReader(string(htm)))
|
|
|
+ if e != nil {
|
|
|
+ err = fmt.Errorf("NewDocumentFromReader err: %v", e)
|
|
|
+ return
|
|
|
+ }
|
|
|
+ // 判断图表名称是否相符,如果不符合放弃爬取
|
|
|
+ title := doc.Find("p a.activated.now").Text()
|
|
|
+ fmt.Println(title)
|
|
|
+ if title != rule.Name {
|
|
|
+ utils.FileLog.Info("图表名称不存在,停止爬取")
|
|
|
+ isStop = true
|
|
|
+ return
|
|
|
+ }
|
|
|
+ doc.Find("div.tabCont").Each(func(i int, item *goquery.Selection) {
|
|
|
+ // 提取单位(这里假设单位总是位于 .tips 类的 div 中)
|
|
|
+ unit := item.Find(".tips").Text()
|
|
|
+ unit = strings.TrimSpace(unit)
|
|
|
+ unit = strings.TrimPrefix(unit, "编制说明:单位(")
|
|
|
+ unit = strings.TrimSuffix(unit, ")")
|
|
|
+ fmt.Println("单位: ", unit)
|
|
|
+ indexCode := ""
|
|
|
+ indexName := ""
|
|
|
+ // 获取频度和分类ID
|
|
|
+ classifyId := rule.ClassifyId
|
|
|
+ frequency := rule.Frequency
|
|
|
+ childRule := make(map[string]*CCFChartRule)
|
|
|
+ // 判断是否存在子页面
|
|
|
+ if len(rule.Child) > 0 {
|
|
|
+ for _, v := range childRule {
|
|
|
+ childRule[v.Name] = v
|
|
|
+ }
|
|
|
+ }
|
|
|
+ dataMap := make(map[string]string)
|
|
|
+ // 遍历表格中的每一行(跳过表头)
|
|
|
+ item.Find("table tbody tr").Each(func(k int, row *goquery.Selection) {
|
|
|
+ if k == 0 {
|
|
|
+ return
|
|
|
+ }
|
|
|
+ // 提取产品名称
|
|
|
+ if indexCode == "" {
|
|
|
+ productName := row.Find("td:nth-child(1)").Text()
|
|
|
+ productName = strings.TrimSpace(productName)
|
|
|
+ indexCode = fmt.Sprintf("ccf%s", utils.GetFirstPingYin(productName))
|
|
|
+ indexName = fmt.Sprintf("CCF%s", productName)
|
|
|
+ //判断子页面的频度
|
|
|
+ if newRule, ok := childRule[productName]; ok {
|
|
|
+ frequency = newRule.Frequency
|
|
|
+ }
|
|
|
+ }
|
|
|
+ // 提取日期
|
|
|
+ date := row.Find("td:nth-child(2)").Text()
|
|
|
+ date = strings.TrimSpace(date)
|
|
|
+
|
|
|
+ // 提取日均值
|
|
|
+ dailyAvg := row.Find("td:nth-child(3)").Text()
|
|
|
+ dailyAvg = strings.TrimSpace(dailyAvg)
|
|
|
+
|
|
|
+ // 打印提取的信息
|
|
|
+ fmt.Printf("单位: %s\n产品名称: %s\n日期: %s\n日均值: %s\n\n", unit, indexName, date, dailyAvg)
|
|
|
+ _, e = strconv.ParseFloat(dailyAvg, 64)
|
|
|
+ if e != nil {
|
|
|
+ utils.FileLog.Info("数据转换失败 err:%s", e.Error())
|
|
|
+ return
|
|
|
+ }
|
|
|
+ dataMap[date] = dailyAvg
|
|
|
+ })
|
|
|
+ if indexName == "" {
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
+ edb := new(HandleIndexData)
|
|
|
+ edb.IndexCode = strings.ToLower(indexCode)
|
|
|
+ edb.IndexName = indexName
|
|
|
+ edb.ClassifyId = classifyId
|
|
|
+ edb.Frequency = frequency
|
|
|
+ edb.Unit = unit
|
|
|
+ edb.DateData = dataMap
|
|
|
+ edb.TerminalCode = utils.TerminalCode
|
|
|
+ indexes = append(indexes, edb)
|
|
|
+ })
|
|
|
+ return
|
|
|
+}
|