123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391 |
- package base_from_ccf
- import (
- "context"
- "encoding/json"
- "eta/eta_data_analysis/models"
- "eta/eta_data_analysis/utils"
- "fmt"
- "os"
- "strconv"
- "strings"
- "time"
- "github.com/PuerkitoBio/goquery"
- )
- type CCFChartRule struct {
- Name string `json:"Name"`
- ClassifyId int `json:"ClassifyId"`
- CustNo int `json:"CustNo"`
- Frequency string `json:"Frequency"`
- IndexType string `json:"IndexType"`
- Child []*CCFChartRule `json:"Child,omitempty"`
- }
- func loadCCFChartRule() (rules []*CCFChartRule, err error) {
- if utils.CCFChartRuleFile == "" {
- err = fmt.Errorf("rule文件不存在")
- return
- }
- b, e := os.ReadFile(utils.CCFChartRuleFile)
- if e != nil {
- err = fmt.Errorf("读取rule文件失败, err: %v", e)
- return
- }
- rules = make([]*CCFChartRule, 0)
- if e = json.Unmarshal(b, &rules); e != nil {
- err = fmt.Errorf("解析rule文件失败, err: %v", e)
- return
- }
- return
- }
- type CCFChartAdditionRule struct {
- Name string `json:"Name"`
- ClassifyId int `json:"ClassifyId"`
- Frequency string `json:"Frequency"`
- ProdNames string `json:"prodNames"`
- LastNYear int `json:"LastNYear"`
- IndexType string `json:"IndexType"`
- }
- func LoadCCFChartAdditionRule() (rules []*CCFChartAdditionRule, err error) {
- if utils.CCFChartAdditionRuleFile == "" {
- err = fmt.Errorf("rule文件不存在")
- return
- }
- b, e := os.ReadFile(utils.CCFChartAdditionRuleFile)
- if e != nil {
- err = fmt.Errorf("读取rule文件失败, err: %v", e)
- return
- }
- rules = make([]*CCFChartAdditionRule, 0)
- if e = json.Unmarshal(b, &rules); e != nil {
- err = fmt.Errorf("解析rule文件失败, err: %v", e)
- return
- }
- return
- }
- func TaskGetCCFChartEdb(context.Context) (err error) {
- _ = GetCCFChartEdb()
- return
- }
- func GetCCFChartEdb() (err error) {
- defer func() {
- if err != nil {
- tips := fmt.Sprintf("GetCCFChartEdb ErrMsg: %s", err.Error())
- utils.FileLog.Info(tips)
- fmt.Println(tips)
- }
- }()
- rules, err := loadCCFChartRule()
- if err != nil {
- return
- }
- indexes := make([]*HandleIndexData, 0)
- for _, v := range rules {
-
- pageHtml := fmt.Sprintf("%s?cust_no=%d", CCFCHARTDATAURL, v.CustNo)
- fmt.Println(pageHtml)
- fileContent, e := fetchPageHtml(pageHtml, 0)
- if e != nil {
- err = fmt.Errorf("获取首页报告失败, err: %v", e)
- return
- }
-
-
-
-
-
-
- isStop, indexList, e := AnalysisChartInventoryWeeklyEdb(fileContent, v)
- if e != nil {
- err = fmt.Errorf("解析图表失败, err: %v", e)
- return
- }
- if isStop {
- err = fmt.Errorf("图表名称不存在,停止爬取")
- break
- }
- if len(indexList) > 0 {
- indexes = append(indexes, indexList...)
- }
- }
- additionRules, err := LoadCCFChartAdditionRule()
- if err != nil {
- err = fmt.Errorf("加载额外图表规则失败 err: %v", err)
- return
- }
- now := time.Now()
- for _, v := range additionRules {
- param := make(map[string]string)
- if v.LastNYear == 0 {
- v.LastNYear = 5
- }
- param["startdate"] = time.Date(now.Year()+1-v.LastNYear, 1, 1, 0, 0, 0, 0, time.Local).Format(utils.FormatDate2)
- param["enddate"] = now.Format(utils.FormatDate2)
- param["type"] = "1"
- param["prodNames"] = v.ProdNames
- param["skin"] = "infographic"
- param["page"] = "index.php"
- fmt.Println(param)
- htmlContent, er := postPageHtml(CCFCHARTDATAURL, param, 0)
- if er != nil {
- err = fmt.Errorf("获取首页报告失败, err: %v", er)
- return
- }
- isStop, indexList, e := AnalysisAdditionChartInventoryWeeklyEdb(htmlContent, v)
- if e != nil {
- err = fmt.Errorf("解析图表失败, err: %v", e)
- return
- }
- if isStop {
- err = fmt.Errorf("图表名称不存在,停止爬取")
- break
- }
- if len(indexList) > 0 {
- indexes = append(indexes, indexList...)
- }
- }
- if len(indexes) == 0 {
- return
- }
-
- params := make(map[string]interface{})
- params["List"] = indexes
- params["TerminalCode"] = utils.TerminalCode
- result, e := postEdbLib(params, utils.LIB_ROUTE_CCF_EDB_HANDLE)
- if e != nil {
- b, _ := json.Marshal(params)
- err = fmt.Errorf("postEdbLib err: %v, params: %s", e, string(b))
- return
- }
- resp := new(models.BaseEdbLibResponse)
- if e = json.Unmarshal(result, &resp); e != nil {
- err = fmt.Errorf(" postEdbLib resp json.Unmarshal err: %v", e)
- return
- }
- if resp.Ret != 200 {
- err = fmt.Errorf("postEdbLib resp Msg: %s, ErrMsg: %s", resp.Msg, resp.ErrMsg)
- return
- }
- return
- }
- func AnalysisChartInventoryWeeklyEdb(htm []byte, rule *CCFChartRule) (isStop bool, indexes []*HandleIndexData, err error) {
- if len(htm) == 0 {
- utils.FileLog.Info("htm empty")
- return
- }
- doc, e := goquery.NewDocumentFromReader(strings.NewReader(string(htm)))
- if e != nil {
- err = fmt.Errorf("NewDocumentFromReader err: %v", e)
- return
- }
-
- title := doc.Find("p a.activated.now").Text()
- fmt.Println(title)
- if title != rule.Name {
- utils.FileLog.Info("图表名称不存在,停止爬取")
- isStop = true
- return
- }
- doc.Find("div.tabCont").Each(func(i int, item *goquery.Selection) {
-
- unit := item.Find(".tips").Text()
- unit = strings.TrimSpace(unit)
- unit = strings.TrimPrefix(unit, "编制说明:单位(")
- unit = strings.TrimSuffix(unit, ")")
- fmt.Println("单位: ", unit)
- indexCode := ""
- indexName := ""
-
- classifyId := rule.ClassifyId
- frequency := rule.Frequency
- childRule := make(map[string]*CCFChartRule)
-
- if len(rule.Child) > 0 {
- for _, v := range rule.Child {
- childRule[v.Name] = v
- }
- }
- dataMap := make(map[string]string)
-
- item.Find("table tbody tr").Each(func(k int, row *goquery.Selection) {
- if k == 0 {
- return
- }
-
- if indexCode == "" {
- productName := row.Find("td:nth-child(1)").Text()
- productName = strings.TrimSpace(productName)
-
- if newRule, ok := childRule[productName]; ok {
- frequency = newRule.Frequency
-
- } else if len(childRule) > 0 {
- return
- }
- indexCode = fmt.Sprintf("ccf%s", utils.GetFirstPingYin(productName))
- indexName = fmt.Sprintf("CCF%s", productName)
- }
-
- date := row.Find("td:nth-child(2)").Text()
- date = strings.TrimSpace(date)
-
- var dailyAvg string
- if rule.IndexType == "周均" {
- dailyAvg = row.Find("td:nth-child(4)").Text()
- dailyAvg = strings.TrimSpace(dailyAvg)
- } else {
- dailyAvg = row.Find("td:nth-child(3)").Text()
- dailyAvg = strings.TrimSpace(dailyAvg)
- }
-
- fmt.Printf("单位: %s\n产品名称: %s\n日期: %s\n日均值: %s\n\n", unit, indexName, date, dailyAvg)
- _, e = strconv.ParseFloat(dailyAvg, 64)
- if e != nil {
- utils.FileLog.Info("数据转换失败 err:%s", e.Error())
- return
- }
- dataMap[date] = dailyAvg
- })
- if indexName == "" {
- return
- }
- edb := new(HandleIndexData)
- edb.IndexCode = strings.ToLower(indexCode)
- edb.IndexName = indexName
- edb.ClassifyId = classifyId
- edb.Frequency = frequency
- edb.Unit = unit
- edb.DateData = dataMap
- edb.TerminalCode = utils.TerminalCode
- indexes = append(indexes, edb)
- })
- return
- }
- func AnalysisAdditionChartInventoryWeeklyEdb(htm []byte, rule *CCFChartAdditionRule) (isStop bool, indexes []*HandleIndexData, err error) {
- if len(htm) == 0 {
- utils.FileLog.Info("htm empty")
- return
- }
- doc, e := goquery.NewDocumentFromReader(strings.NewReader(string(htm)))
- if e != nil {
- err = fmt.Errorf("NewDocumentFromReader err: %v", e)
- return
- }
-
- doc.Find("div.tabCont").Each(func(i int, item *goquery.Selection) {
-
- unit := item.Find(".tips").Text()
- unit = strings.TrimSpace(unit)
- unit = strings.TrimPrefix(unit, "编制说明:单位(")
- unit = strings.TrimSuffix(unit, ")")
- fmt.Println("单位: ", unit)
- indexCode := ""
- indexName := ""
-
- classifyId := rule.ClassifyId
- frequency := rule.Frequency
- dataMap := make(map[string]string)
-
- item.Find("table tbody tr").Each(func(k int, row *goquery.Selection) {
- if k == 0 {
- return
- }
-
- if indexCode == "" {
- productName := row.Find("td:nth-child(1)").Text()
- productName = strings.TrimSpace(productName)
- if strings.Contains(rule.Name, productName) {
- indexName = rule.Name
- code := strings.ToLower(utils.GetFirstPingYin(indexName))
- code = strings.ReplaceAll(code, "/", "")
- code = strings.ReplaceAll(code, " ", "")
- indexCode = strings.ToLower(code)
- }
- }
-
- date := row.Find("td:nth-child(2)").Text()
- date = strings.TrimSpace(date)
-
- var dailyAvg string
- if rule.IndexType == "周均" {
- dailyAvg = row.Find("td:nth-child(4)").Text()
- dailyAvg = strings.TrimSpace(dailyAvg)
- } else {
- dailyAvg = row.Find("td:nth-child(3)").Text()
- dailyAvg = strings.TrimSpace(dailyAvg)
- }
-
- fmt.Printf("单位: %s\n产品名称: %s\n日期: %s\n日均值: %s\n\n", unit, indexName, date, dailyAvg)
- _, e = strconv.ParseFloat(dailyAvg, 64)
- if e != nil {
- utils.FileLog.Info("数据转换失败 err:%s", e.Error())
- return
- }
- dataMap[date] = dailyAvg
- })
- if indexName == "" {
- return
- }
- edb := new(HandleIndexData)
- edb.IndexCode = strings.ToLower(indexCode)
- edb.IndexName = indexName
- edb.ClassifyId = classifyId
- edb.Frequency = frequency
- edb.Unit = unit
- edb.DateData = dataMap
- edb.TerminalCode = utils.TerminalCode
- indexes = append(indexes, edb)
- })
- return
- }
|