1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333 |
- package base_from_ccf
- import (
- "context"
- "encoding/json"
- "eta/eta_data_analysis/models"
- "eta/eta_data_analysis/utils"
- "fmt"
- "github.com/PuerkitoBio/goquery"
- "io/ioutil"
- "strconv"
- "strings"
- "time"
- )
- // TaskAnalysisHandlers 解析表格的函数
- var TaskAnalysisHandlers = map[string]func(htm []byte, fetchRule *DataRule) (indexes []*HandleIndexData, err error){
- "原油石化早报": AnalysisOilReportEdb,
- "PTA周报": AnalysisPTAWeekEdb,
- "MEG周报": AnalysisMEGWeekEdb,
- "长丝周报": AnalysisChangSiWeekEdb,
- "短纤周报": AnalysisDuanXianWeekEdb,
- "瓶片周报": AnalysisPingPianWeekEdb,
- "切片周报": AnalysisQiePianWeekEdb,
- "PX周报": AnalysisPXWeekEdb,
- }
- // HandleIndexData 指标数据
- type HandleIndexData struct {
- IndexName string `description:"指标名称"`
- IndexCode string `description:"指标编码"`
- ClassifyId int `description:"分类ID"`
- Unit string `description:"单位"`
- Sort int `description:"排序"`
- Frequency string `description:"频度"`
- TerminalCode string `description:"终端编码"`
- DateData map[string]string `description:"日期数据"`
- }
- // TaskOilDailyEdb 获取原油石化早报指标
- func TaskOilDailyEdb(context.Context) (err error) {
- defer func() {
- if err != nil {
- tips := fmt.Sprintf("TaskOilEdbDaily ErrMsg: %s", err.Error())
- utils.FileLog.Info(tips)
- fmt.Println(tips)
- }
- }()
- nameKey := "原油石化早报"
- fetchRule, e := loadDataRule(nameKey)
- if e != nil {
- err = fmt.Errorf("loadDataRule, err: %v", e)
- return
- }
- // 解析前N篇报告
- readLimit := utils.CCFDailyFetchNum
- filePaths, e := savePageHtml(nameKey, fetchRule.PageDir, false, readLimit)
- if e != nil {
- err = fmt.Errorf("savePageHtml, err: %v", e)
- return
- }
- readCount := 0
- for _, v := range filePaths {
- readCount += 1
- if readCount > readLimit {
- return
- }
- htm, e := ioutil.ReadFile(v)
- if e != nil {
- fmt.Printf("file: %s, ReadFile err: %v\n", v, e)
- utils.FileLog.Info(fmt.Sprintf("file: %s, ReadFile err: %v", v, e))
- continue
- }
- handler, ok := TaskAnalysisHandlers[nameKey]
- if !ok {
- utils.FileLog.Info(fmt.Sprintf("%s无解析函数\n", nameKey))
- continue
- }
- indexes, e := handler(htm, fetchRule)
- if e != nil {
- fmt.Printf("file: %s, AnalysisOilReportEdb err: %v\n", v, e)
- utils.FileLog.Info(fmt.Sprintf("file: %s, AnalysisOilReportEdb err: %v", v, e))
- continue
- }
- // 写入数据库
- params := make(map[string]interface{})
- params["List"] = indexes
- params["TerminalCode"] = utils.TerminalCode
- result, e := postEdbLib(params, utils.LIB_ROUTE_CCF_EDB_HANDLE)
- if e != nil {
- b, _ := json.Marshal(params)
- fmt.Printf("file: %s, postEdbLib err: %v, params: %s\n", v, e, string(b))
- utils.FileLog.Info(fmt.Sprintf("file: %s, postEdbLib err: %v, params: %s", v, e, string(b)))
- continue
- }
- resp := new(models.BaseEdbLibResponse)
- if e = json.Unmarshal(result, &resp); e != nil {
- fmt.Printf("file: %s, json.Unmarshal err: %v\n", v, e)
- utils.FileLog.Info(fmt.Sprintf("file: %s, json.Unmarshal err: %v", v, e))
- continue
- }
- if resp.Ret != 200 {
- fmt.Printf("file: %s, Msg: %s, ErrMsg: %s\n", v, resp.Msg, resp.ErrMsg)
- utils.FileLog.Info(fmt.Sprintf("file: %s, Msg: %s, ErrMsg: %s", v, resp.Msg, resp.ErrMsg))
- continue
- }
- }
- return
- }
- // TaskWeeklyEdb 获取周报指标
- func TaskWeeklyEdb(context.Context) (err error) {
- defer func() {
- if err != nil {
- tips := fmt.Sprintf("TaskWeeklyEdb ErrMsg: %s", err.Error())
- utils.FileLog.Info(tips)
- fmt.Println(tips)
- }
- }()
- taskNames := []string{"PTA周报", "MEG周报", "长丝周报", "短纤周报", "瓶片周报", "切片周报", "PX周报"}
- readLimit := utils.CCFWeeklyFetchNum
- for _, nameKey := range taskNames {
- fmt.Printf("开始获取: %s\n", nameKey)
- fetchRule, e := loadDataRule(nameKey)
- if e != nil {
- utils.FileLog.Info(fmt.Sprintf("%s无解析规则, err: %v\n", nameKey, e))
- continue
- }
- handler, ok := TaskAnalysisHandlers[nameKey]
- if !ok {
- //fmt.Printf("%s无解析函数\n", nameKey)
- utils.FileLog.Info(fmt.Sprintf("%s无解析函数\n", nameKey))
- continue
- }
- // 解析前N篇报告
- files, e := savePageHtml(nameKey, fetchRule.PageDir, false, readLimit)
- if e != nil {
- //fmt.Printf("%s保存首页失败, err: %v\n", nameKey, e)
- utils.FileLog.Info(fmt.Sprintf("%s保存首页失败, err: %v\n", nameKey, e))
- continue
- }
- readCount := 0
- for _, v := range files {
- readCount += 1
- if readCount > readLimit {
- break
- }
- htm, e := ioutil.ReadFile(v)
- if e != nil {
- //fmt.Printf("file: %s, ReadFile err: %v\n", v, e)
- utils.FileLog.Info(fmt.Sprintf("file: %s, ReadFile err: %v", v, e))
- continue
- }
- indexes, e := handler(htm, fetchRule)
- if e != nil {
- //fmt.Printf("file: %s, AnalysisOilReportEdb err: %v\n", v, e)
- utils.FileLog.Info(fmt.Sprintf("file: %s, AnalysisOilReportEdb err: %v", v, e))
- continue
- }
- // 写入数据库
- params := make(map[string]interface{})
- params["List"] = indexes
- params["TerminalCode"] = utils.TerminalCode
- result, e := postEdbLib(params, utils.LIB_ROUTE_CCF_EDB_HANDLE)
- if e != nil {
- b, _ := json.Marshal(params)
- //fmt.Printf("file: %s, postEdbLib err: %v, params: %s\n", v, e, string(b))
- utils.FileLog.Info(fmt.Sprintf("file: %s, postEdbLib err: %v, params: %s", v, e, string(b)))
- continue
- }
- resp := new(models.BaseEdbLibResponse)
- if e = json.Unmarshal(result, &resp); e != nil {
- //fmt.Printf("file: %s, json.Unmarshal err: %v\n", v, e)
- utils.FileLog.Info(fmt.Sprintf("file: %s, json.Unmarshal err: %v", v, e))
- continue
- }
- if resp.Ret != 200 {
- //fmt.Printf("file: %s, Msg: %s, ErrMsg: %s\n", v, resp.Msg, resp.ErrMsg)
- utils.FileLog.Info(fmt.Sprintf("file: %s, Msg: %s, ErrMsg: %s", v, resp.Msg, resp.ErrMsg))
- continue
- }
- }
- fmt.Printf("结束获取: %s\n", nameKey)
- }
- return
- }
- // AnalysisOilReportEdb 解析原油石化早报中的指标数据
- func AnalysisOilReportEdb(htm []byte, fetchRule *DataRule) (indexes []*HandleIndexData, err error) {
- if len(htm) == 0 || fetchRule == nil {
- utils.FileLog.Info("htm empty")
- return
- }
- doc, e := goquery.NewDocumentFromReader(strings.NewReader(string(htm)))
- if e != nil {
- err = fmt.Errorf("NewDocumentFromReader err: %v", e)
- return
- }
- // 找到表格
- keyElement := doc.Find("#newscontent")
- tableBody := keyElement.ChildrenFiltered("table").First().ChildrenFiltered("tbody")
- if tableBody.Length() <= 0 {
- err = fmt.Errorf("表格未找到")
- return
- }
- colDates := make(map[int]string)
- colLen := tableBody.Children().First().Find("td").Length()
- attemptDates := []string{"2006/1/2", "2006/01/02", "2006-01-02", "2006-1-2", "2006.01.02", "2006.1.2"}
- var rows []TableRow
- var mergeBegin, mergeRows int
- var mergeProduct string
- tableBody.Children().Each(func(i int, s *goquery.Selection) {
- cells := s.Find("td")
- // 从表头取出日期列
- // 格式: [产品|市场|日期列(列数不定)|涨跌|单位]
- if i == 0 {
- cells.Each(func(ii int, ss *goquery.Selection) {
- cellTxt := strings.TrimSpace(ss.Text())
- if ii > 1 && ii < colLen-2 {
- var d time.Time
- // 尝试解析日期
- for _, a := range attemptDates {
- t, e := time.ParseInLocation(a, cellTxt, time.Local)
- if e == nil {
- d = t
- break
- }
- }
- //fmt.Println("colDate: ", d)
- if !d.IsZero() {
- colDates[ii] = d.Format(utils.FormatDate)
- }
- }
- })
- }
- // 取指标
- if i > 0 {
- row := TableRow{
- DateData: make(map[string]string),
- }
- mergedRow := false // 是否为被合并行
- cellsLen := cells.Length()
- cells.Each(func(ii int, cell *goquery.Selection) {
- cellData := filterInvalidVal(cell.Text())
- if cellData == "" {
- return
- }
- switch ii {
- case 0:
- // 被合并行为市场列, 其余为产品列
- hasMerge, _ := cell.Attr("rowspan")
- if hasMerge != "" {
- // 开始合并行
- mergeRows, _ = strconv.Atoi(hasMerge)
- mergeBegin = i
- row.Product = cellData
- mergeProduct = row.Product
- } else {
- // 被合并行的后一行, 重置合并计数
- if i >= (mergeBegin + mergeRows) {
- mergeBegin = 0
- mergeRows = 0
- }
- // 被合并行, 第一列为市场
- if mergeBegin > 0 && mergeRows > 0 && i < (mergeBegin+mergeRows) {
- row.Product = mergeProduct
- row.Market = cellData
- mergedRow = true
- }
- if mergeBegin == 0 && mergeRows == 0 {
- row.Product = cellData
- }
- }
- case 1:
- // 被合并行为日期列, 其余为市场列
- if mergedRow {
- d, ok := colDates[ii+1]
- if ok {
- row.DateData[d] = formatIntervalData(cellData, "")
- }
- } else {
- row.Market = cellData
- }
- case cellsLen - 2:
- // 忽略涨跌列
- case cellsLen - 1:
- row.Unit = cellData
- default:
- // 日期列
- if mergedRow {
- d, ok := colDates[ii+1]
- if ok {
- row.DateData[d] = formatIntervalData(cellData, "")
- }
- } else {
- d, ok := colDates[ii]
- if ok {
- row.DateData[d] = formatIntervalData(cellData, "")
- }
- }
- }
- })
- rows = append(rows, row)
- }
- })
- // 只取需要的指标
- indexes = formatTableRow2ValidEdb(rows, fetchRule.EdbMatch)
- return
- }
- // AnalysisPTAWeekEdb 解析PTA周报中的指标数据
- func AnalysisPTAWeekEdb(htm []byte, fetchRule *DataRule) (indexes []*HandleIndexData, err error) {
- if len(htm) == 0 || fetchRule == nil {
- utils.FileLog.Info("htm empty")
- return
- }
- doc, e := goquery.NewDocumentFromReader(strings.NewReader(string(htm)))
- if e != nil {
- err = fmt.Errorf("NewDocumentFromReader err: %v", e)
- return
- }
- burdenTitle, ptaTitle := "负荷", "PTA库存"
- // 从收藏按钮往上找table, 取出报告发布日期
- collectEle := doc.Find("#savenews")
- publishTimeTab := collectEle.ParentsFiltered("table").First()
- publishTxt := publishTimeTab.Find("td:first-child").Text()
- publishTime, e := extractReportPublishTime(publishTxt)
- if e != nil {
- err = fmt.Errorf("extractReportPublishTime err: %v", e)
- return
- }
- //publishYear := publishTime.Year()
- //fmt.Println(publishTime)
- //fmt.Println("年份", publishYear)
- // 遍历h2, 找出负荷和PTA库存下第一个table
- burdenTable, ptaTable := new(goquery.Selection), new(goquery.Selection)
- h2Selections := doc.Find("h2")
- h2Selections.Each(func(i int, h2 *goquery.Selection) {
- //fmt.Println(i, h2.Text())
- if strings.Contains(h2.Text(), burdenTitle) {
- burdenTable = h2.NextAllFiltered("table").First()
- }
- if strings.Contains(h2.Text(), ptaTitle) {
- ptaTable = h2.NextAllFiltered("table").First()
- }
- })
- // 负荷
- //var rows []TableRow
- //var burdenRows []TableRow
- //var burdenDataTime string
- //burdenTable.Find("tbody").Children().Each(func(i int, s *goquery.Selection) {
- // // 表头取出日期
- // cells := s.Find("td")
- // if i == 0 {
- // cells.Each(func(ii int, ss *goquery.Selection) {
- // cellTxt := strings.TrimSpace(ss.Text())
- // if ii == 2 {
- // strTime := fmt.Sprintf("%d年%s", publishYear, cellTxt)
- // t, e := time.ParseInLocation("2006年01月02日", strTime, time.Local)
- // if e != nil {
- // err = fmt.Errorf("解析PTA负荷数据日期失败, err: %v", e)
- // return
- // }
- // burdenDataTime = t.Format(utils.FormatDate)
- // }
- // })
- // }
- // // 取指标
- // if i > 0 {
- // row := TableRow{
- // DateData: make(map[string]string),
- // }
- // cells.Each(func(ii int, ss *goquery.Selection) {
- // cellTxt := strings.TrimSpace(ss.Text())
- // switch ii {
- // case 0:
- // row.Product = cellTxt
- // case 1:
- // row.Market = cellTxt
- // case 2:
- // row.DateData[burdenDataTime] = cellTxt
- // }
- // })
- // //row.Unit = burdenUnit
- // burdenRows = append(burdenRows, row)
- // }
- //})
- //rows = append(rows, burdenRows...)
- var rows []TableRow
- var analysisPars AnalysisNoneMergeTablePars
- analysisPars.DocTable = burdenTable
- analysisPars.MarketCol.HasCol = true
- analysisPars.MarketCol.ColIndex = 1
- analysisPars.DateCol.StartIndex = 2
- analysisPars.DateCol.EndIndex = 3
- analysisPars.DateCol.PublishTime = publishTime
- //analysisPars.DateCol.PublishYear = publishYear
- analysisPars.DateCol.StrTimeFormat = "%d年%s"
- analysisPars.DateCol.TimeFormat = []string{"2006年01月02日", "2006年1月2日"}
- burdenRows := analysisNoneMergeTable(analysisPars)
- rows = append(rows, burdenRows...)
- // PTA库存, 存在特殊格式
- ptaRows := make(map[int]TableRow)
- ptaTable.Find("tbody").Children().Each(func(i int, s *goquery.Selection) {
- cells := s.Find("td")
- cellLen := cells.Length()
- // 判断tr下td的长度, 兼容处理
- // td长度为2, 数据日期取发布日期
- if cellLen == 2 {
- if i == 0 {
- cells.Each(func(ii int, ss *goquery.Selection) {
- cellTxt := strings.TrimSpace(ss.Text())
- row := TableRow{
- Product: cellTxt,
- DateData: make(map[string]string),
- }
- ptaRows[ii] = row
- })
- }
- if i > 0 {
- cells.Each(func(ii int, ss *goquery.Selection) {
- cellTxt := filterInvalidVal(ss.Text())
- if cellTxt == "" {
- return
- }
- val, e := calculateDataHalfVal(cellTxt)
- if e != nil {
- utils.FileLog.Info(fmt.Sprintf("PTA周报-calculateDataHalfVal: cellTxt-%s, err: %v", cellTxt, e))
- return
- }
- ptaRows[ii].DateData[publishTime.Format(utils.FormatDate)] = val
- })
- }
- }
- // 大于2时, 内容第一列为日期
- if cellLen > 2 {
- if i == 0 {
- cells.Each(func(ii int, ss *goquery.Selection) {
- if ii == 0 {
- return
- }
- cellTxt := strings.TrimSpace(ss.Text())
- row := TableRow{
- Product: cellTxt,
- DateData: make(map[string]string),
- }
- ptaRows[ii] = row
- })
- }
- if i > 0 {
- var dataTime string
- cells.Each(func(ii int, ss *goquery.Selection) {
- cellTxt := filterInvalidVal(ss.Text())
- if cellTxt == "" {
- return
- }
- if ii == 0 {
- strTime := fmt.Sprintf("%d/%s", publishTime.Year(), cellTxt)
- t, e := time.ParseInLocation("2006/1/2", strTime, time.Local)
- if e != nil {
- fmt.Printf("time parse err: %v", e)
- return
- }
- // 判断报告是否跨年
- if t.AddDate(0, -6, 0).After(publishTime) {
- utils.FileLog.Info(fmt.Sprintf("跨年判断-2: ColTime-%v; PublishTime-%v", t, publishTime))
- t = t.AddDate(-1, 0, 0)
- }
- dataTime = t.Format(utils.FormatDate)
- return
- }
- val, e := calculateDataHalfVal(cellTxt)
- if e != nil {
- fmt.Printf("calculateDataHalfVal err: %v\n", e)
- return
- }
- if dataTime != "" && val != "" {
- ptaRows[ii].DateData[dataTime] = val
- }
- })
- }
- }
- })
- for _, v := range ptaRows {
- rows = append(rows, v)
- }
- indexes = formatTableRow2ValidEdb(rows, fetchRule.EdbMatch)
- return
- }
- // AnalysisMEGWeekEdb 解析MEG周报中的指标数据
- func AnalysisMEGWeekEdb(htm []byte, fetchRule *DataRule) (indexes []*HandleIndexData, err error) {
- if len(htm) == 0 || fetchRule == nil {
- utils.FileLog.Info("htm empty")
- return
- }
- doc, e := goquery.NewDocumentFromReader(strings.NewReader(string(htm)))
- if e != nil {
- err = fmt.Errorf("NewDocumentFromReader err: %v", e)
- return
- }
- burdenTitle, stockTitle := "CCF指数", "MEG华东港口库存情况"
- // 从收藏按钮往上找table, 取出报告发布日期
- collectEle := doc.Find("#savenews")
- publishTimeTab := collectEle.ParentsFiltered("table").First()
- publishTxt := publishTimeTab.Find("td:first-child").Text()
- //fmt.Println("publishTxt: ", publishTxt)
- publishTime, e := extractReportPublishTime(publishTxt)
- if e != nil {
- err = fmt.Errorf("extractReportPublishTime err: %v", e)
- return
- }
- //publishYear := publishTime.Year()
- //fmt.Println(publishTime)
- //fmt.Println(publishYear)
- // 遍历h2, 找出对应Table
- burdenTable, stockTable := new(goquery.Selection), new(goquery.Selection)
- //h2Selections := doc.Find("h2")
- doc.Find("h2").Each(func(i int, h2 *goquery.Selection) {
- //fmt.Println(i, h2.Text())
- if strings.Contains(h2.Text(), burdenTitle) {
- burdenTable = h2.NextAllFiltered("table").First()
- }
- if strings.Contains(h2.Text(), stockTitle) {
- stockTable = h2.NextAllFiltered("table").First()
- }
- })
- // 负荷-存在合并行
- var rows []TableRow
- //var burdenRows []TableRow
- {
- //var burdenDataTime string
- var mergeBegin, mergeRows int
- var mergeProduct string
- burdenColDate := make(map[int]string) // 日期列key->日期
- burdenTable.Find("tbody").Children().Each(func(i int, s *goquery.Selection) {
- // 表头取出日期
- cells := s.Find("td")
- if i == 0 {
- cells.Each(func(ii int, ss *goquery.Selection) {
- cellTxt := strings.TrimSpace(ss.Text())
- if cellTxt == "" {
- return
- }
- if ii == 2 || ii == 3 {
- //fmt.Println("日期列")
- strTime := fmt.Sprintf("%d年%s", publishTime.Year(), cellTxt)
- //fmt.Println("日期str", strTime)
- t, e := time.ParseInLocation("2006年1月2日", strTime, time.Local)
- if e != nil {
- utils.FileLog.Info(fmt.Sprintf("MEG周报-日期解析: cellTxt-%s, err: %v", cellTxt, e))
- //fmt.Println("e: ", e)
- //err = fmt.Errorf("解析MEG负荷数据日期失败, err: %v", e)
- return
- }
- // 判断报告是否跨年
- if t.AddDate(0, -6, 0).After(publishTime) {
- utils.FileLog.Info(fmt.Sprintf("跨年判断-MEG: ColTime-%v; PublishTime-%v", t, publishTime))
- t = t.AddDate(-1, 0, 0)
- }
- if !t.IsZero() {
- burdenColDate[ii] = t.Format(utils.FormatDate)
- }
- //fmt.Println("日期:", t.Format(utils.FormatDate))
- }
- })
- }
- // 取指标
- if i > 0 {
- row := TableRow{
- DateData: make(map[string]string),
- }
- mergedRow := false // 是否为被合并行
- cells.Each(func(ii int, ss *goquery.Selection) {
- cellTxt := filterInvalidVal(ss.Text())
- if cellTxt == "" {
- return
- }
- switch ii {
- case 0:
- // 被合并行为市场列, 其余为产品列
- hasMerge, _ := ss.Attr("rowspan")
- if hasMerge != "" {
- // 开始合并行
- mergeRows, _ = strconv.Atoi(hasMerge)
- mergeBegin = i
- row.Product = cellTxt
- mergeProduct = row.Product
- } else {
- // 被合并行的后一行, 重置合并计数
- if i >= (mergeBegin + mergeRows) {
- mergeBegin = 0
- mergeRows = 0
- }
- // 被合并行第一列为产品
- if mergeBegin > 0 && mergeRows > 0 && i < (mergeBegin+mergeRows) {
- row.Product = mergeProduct
- row.Market = cellTxt
- mergedRow = true
- //fmt.Println("被合并行: ", i, mergeBegin+mergeRows)
- }
- if mergeBegin == 0 && mergeRows == 0 {
- row.Product = cellTxt
- }
- }
- case 1:
- // 被合并行为值列, 其余为市场列
- if mergedRow {
- d, ok := burdenColDate[ii+1]
- if ok {
- row.DateData[d] = cellTxt
- }
- } else {
- row.Market = cellTxt
- }
- case 2:
- if mergedRow {
- d, ok := burdenColDate[ii+1]
- if ok {
- row.DateData[d] = cellTxt
- }
- } else {
- d, ok := burdenColDate[ii]
- if ok {
- row.DateData[d] = cellTxt
- }
- }
- case 3:
- if !mergedRow {
- d, ok := burdenColDate[ii]
- if ok {
- row.DateData[d] = cellTxt
- }
- }
- }
- })
- rows = append(rows, row)
- }
- })
- }
- // 库存
- //var stockRows []TableRow
- //{
- // colDate := make(map[int]string) // 日期列key->日期
- // stockTable.Find("tbody").Children().Each(func(i int, s *goquery.Selection) {
- // cells := s.Find("td")
- //
- // // 表头取出日期
- // if i == 0 {
- // cells.Each(func(ii int, ss *goquery.Selection) {
- // cellTxt := strings.TrimSpace(ss.Text())
- // if ii > 0 {
- // t, e := time.ParseInLocation("2006/1/2", cellTxt, time.Local)
- // if e != nil {
- // fmt.Println("e: ", e)
- // //err = fmt.Errorf("解析MEG负荷数据日期失败, err: %v", e)
- // return
- // }
- // colDate[ii] = t.Format(utils.FormatDate)
- // fmt.Println("日期:", t.Format(utils.FormatDate))
- // }
- // })
- // }
- //
- // // 取指标
- // if i > 0 {
- // row := TableRow{
- // Product: stockTitle,
- // //Unit: stockUnit,
- // DateData: make(map[string]string),
- // }
- // cells.Each(func(ii int, ss *goquery.Selection) {
- // cellTxt := strings.TrimSpace(ss.Text())
- // switch ii {
- // case 0:
- // row.Market = cellTxt
- // case 1, 2:
- // row.DateData[colDate[ii]] = cellTxt
- // }
- // })
- // fmt.Println(row)
- // stockRows = append(stockRows, row)
- // }
- // })
- //}
- var analysisPars AnalysisNoneMergeTablePars
- analysisPars.DocTable = stockTable
- analysisPars.DateCol.StartIndex = 1
- analysisPars.DateCol.EndIndex = 2
- analysisPars.DateCol.PublishTime = publishTime
- //analysisPars.DateCol.PublishYear = publishYear
- analysisPars.DateCol.StrTimeFormat = ""
- analysisPars.DateCol.TimeFormat = []string{"2006/1/2"}
- stockRows := analysisNoneMergeTable(analysisPars)
- rows = append(rows, stockRows...)
- indexes = formatTableRow2ValidEdb(rows, fetchRule.EdbMatch)
- //fmt.Println(111)
- return
- }
- // AnalysisChangSiWeekEdb 解析长丝周报中的指标数据
- func AnalysisChangSiWeekEdb(htm []byte, fetchRule *DataRule) (indexes []*HandleIndexData, err error) {
- if len(htm) == 0 || fetchRule == nil {
- utils.FileLog.Info("htm empty")
- return
- }
- doc, e := goquery.NewDocumentFromReader(strings.NewReader(string(htm)))
- if e != nil {
- err = fmt.Errorf("NewDocumentFromReader err: %v", e)
- return
- }
- burdenTitle, stockTitle, observeTitle := "负荷指数", "库存指数", "下游观察"
- // 从收藏按钮往上找table, 取出报告发布日期
- collectEle := doc.Find("#savenews")
- publishTimeTab := collectEle.ParentsFiltered("table").First()
- publishTxt := publishTimeTab.Find("td:first-child").Text()
- //fmt.Println("publishTxt: ", publishTxt)
- publishTime, e := extractReportPublishTime(publishTxt)
- if e != nil {
- err = fmt.Errorf("extractReportPublishTime err: %v", e)
- return
- }
- //publishYear := publishTime.Year()
- //fmt.Println(publishTime)
- //fmt.Println(publishYear)
- // 遍历h2, 找出对应Table
- burdenTable, stockTable, observeTable := new(goquery.Selection), new(goquery.Selection), new(goquery.Selection)
- //h2Selections := doc.Find("h2")
- doc.Find("h2").Each(func(i int, h2 *goquery.Selection) {
- //fmt.Println(i, h2.Text())
- if strings.Contains(h2.Text(), burdenTitle) {
- burdenTable = h2.NextAllFiltered("table").First()
- }
- if strings.Contains(h2.Text(), stockTitle) {
- stockTable = h2.NextAllFiltered("table").First()
- }
- if strings.Contains(h2.Text(), observeTitle) {
- observeTable = h2.NextAllFiltered("table").First()
- }
- })
- // 负荷/下游观察解析
- //noneMergeAnalysis := func(docTable *goquery.Selection, unit string) (items []TableRow) {
- // colDate := make(map[int]string)
- //
- // docTable.Find("tbody").Children().Each(func(i int, s *goquery.Selection) {
- // cells := s.Find("td")
- //
- // // 表头取出日期
- // if i == 0 {
- // cells.Each(func(ii int, ss *goquery.Selection) {
- // cellTxt := strings.TrimSpace(ss.Text())
- // fmt.Println("cellTxt", cellTxt)
- // if ii >= 1 && ii <= 3 {
- // strTime := fmt.Sprintf("%d.%s", publishYear, cellTxt)
- // t, e := time.ParseInLocation("2006.01.02", strTime, time.Local)
- // if e != nil {
- // fmt.Println("e: ", e)
- // //err = fmt.Errorf("解析MEG负荷数据日期失败, err: %v", e)
- // return
- // }
- // colDate[ii] = t.Format(utils.FormatDate)
- // fmt.Println("日期:", t.Format(utils.FormatDate))
- // }
- // })
- // }
- //
- // // 取指标
- // if i > 0 {
- // row := TableRow{
- // //Product: stockTitle,
- // Unit: unit,
- // DateData: make(map[string]string),
- // }
- // cells.Each(func(ii int, ss *goquery.Selection) {
- // cellTxt := strings.TrimSpace(ss.Text())
- // fmt.Println("cellTxt", cellTxt)
- // switch ii {
- // case 0:
- // row.Product = cellTxt
- // case 1, 2, 3:
- // row.DateData[colDate[ii]] = cellTxt
- // }
- // })
- // //fmt.Println(row)
- // items = append(items, row)
- // }
- // })
- // return
- //}
- // 库存解析-存在合并行
- mergeAnalysis := func(docTable *goquery.Selection) (items []TableRow) {
- var mergeBegin, mergeRows int
- var mergeProduct string
- colDate := make(map[int]string) // 日期列key->日期
- attemptDates := []string{"2006.01.02", "2006.1.02", "2006.01.2"}
- docTable.Find("tbody").Children().Each(func(i int, s *goquery.Selection) {
- // 表头取出日期
- cells := s.Find("td")
- if i == 0 {
- cells.Each(func(ii int, ss *goquery.Selection) {
- cellTxt := strings.TrimSpace(ss.Text())
- fmt.Println("1-cellTxt", cellTxt)
- if ii >= 2 && ii <= 4 {
- //fmt.Println("日期列")
- strTime := fmt.Sprintf("%d.%s", publishTime.Year(), cellTxt)
- //fmt.Println("日期str", strTime)
- //t, e := time.ParseInLocation("2006.01.02", strTime, time.Local)
- //if e != nil {
- // utils.FileLog.Info(fmt.Sprintf("长丝周报-日期解析: cellTxt-%s, err: %v", cellTxt, e))
- // //fmt.Println("time e: ", e)
- // //err = fmt.Errorf("解析MEG负荷数据日期失败, err: %v", e)
- // return
- //}
- var colTime time.Time
- for _, f := range attemptDates {
- t, e := time.ParseInLocation(f, strTime, time.Local)
- if e != nil {
- continue
- }
- colTime = t
- break
- }
- // 判断报告是否跨年
- if colTime.AddDate(0, -6, 0).After(publishTime) {
- utils.FileLog.Info(fmt.Sprintf("跨年判断-长丝: ColTime-%v; PublishTime-%v", colTime, publishTime))
- colTime = colTime.AddDate(-1, 0, 0)
- }
- if !colTime.IsZero() {
- colDate[ii] = colTime.Format(utils.FormatDate)
- }
- //fmt.Println("日期:", t.Format(utils.FormatDate))
- }
- })
- }
- // 取指标
- if i > 0 {
- row := TableRow{
- DateData: make(map[string]string),
- }
- mergedRow := false // 是否为被合并行
- cells.Each(func(ii int, ss *goquery.Selection) {
- cellTxt := filterInvalidVal(ss.Text())
- fmt.Println("2-cellTxt", cellTxt)
- switch ii {
- case 0:
- // 被合并行为市场列, 其余为产品列
- hasMerge, _ := ss.Attr("rowspan")
- if hasMerge != "" {
- // 开始合并行
- mergeRows, _ = strconv.Atoi(hasMerge)
- mergeBegin = i
- row.Product = cellTxt
- mergeProduct = row.Product
- } else {
- // 被合并行的后一行, 重置合并计数
- if i >= (mergeBegin + mergeRows) {
- mergeBegin = 0
- mergeRows = 0
- }
- // 被合并行第一列为产品
- if mergeBegin > 0 && mergeRows > 0 && i < (mergeBegin+mergeRows) {
- row.Product = mergeProduct
- row.Market = cellTxt
- mergedRow = true
- //fmt.Println("被合并行: ", i, mergeBegin+mergeRows)
- }
- if mergeBegin == 0 && mergeRows == 0 {
- row.Product = cellTxt
- }
- }
- case 1:
- // 被合并行为值列, 其余为市场列
- if mergedRow {
- d, ok := colDate[ii+1]
- if ok {
- row.DateData[d] = cellTxt
- }
- } else {
- row.Market = cellTxt
- }
- case 2, 3:
- if mergedRow {
- d, ok := colDate[ii+1]
- if ok {
- row.DateData[d] = cellTxt
- }
- } else {
- d, ok := colDate[ii]
- if ok {
- row.DateData[d] = cellTxt
- }
- }
- case 4:
- if !mergedRow {
- d, ok := colDate[ii]
- if ok {
- row.DateData[d] = cellTxt
- }
- }
- }
- })
- items = append(items, row)
- }
- })
- return
- }
- // 负荷
- var rows []TableRow
- fmt.Println("blen", burdenTable.Length())
- if burdenTable.Length() > 0 {
- //items := noneMergeAnalysis(burdenTable, burdenUnit)
- //if len(items) > 0 {
- // rows = append(rows, items...)
- //}
- //strTime := fmt.Sprintf("%d.%s", publishYear, cellTxt)
- //t, e := time.ParseInLocation("2006.01.02", strTime, time.Local)
- var analysisPars AnalysisNoneMergeTablePars
- analysisPars.DocTable = burdenTable
- analysisPars.DateCol.StartIndex = 1
- analysisPars.DateCol.EndIndex = 3
- analysisPars.DateCol.PublishTime = publishTime
- //analysisPars.DateCol.PublishYear = publishYear
- analysisPars.DateCol.StrTimeFormat = "%d.%s"
- analysisPars.DateCol.TimeFormat = []string{"2006.01.02"}
- burdenRows := analysisNoneMergeTable(analysisPars)
- rows = append(rows, burdenRows...)
- }
- // 下游观察
- fmt.Println("olen", observeTable.Length())
- if observeTable.Length() > 0 {
- //items := noneMergeAnalysis(observeTable, observeUnit)
- //if len(items) > 0 {
- // rows = append(rows, items...)
- //}
- var analysisPars AnalysisNoneMergeTablePars
- analysisPars.DocTable = observeTable
- analysisPars.DateCol.StartIndex = 1
- analysisPars.DateCol.EndIndex = 3
- analysisPars.DateCol.PublishTime = publishTime
- //analysisPars.DateCol.PublishYear = publishYear
- analysisPars.DateCol.StrTimeFormat = "%d.%s"
- analysisPars.DateCol.TimeFormat = []string{"2006.01.02"}
- observeRows := analysisNoneMergeTable(analysisPars)
- rows = append(rows, observeRows...)
- }
- // 下游观察
- fmt.Println("slen", stockTable.Length())
- if stockTable.Length() > 0 {
- //fmt.Println(stockUnit)
- items := mergeAnalysis(stockTable)
- if len(items) > 0 {
- rows = append(rows, items...)
- }
- }
- fmt.Println(rows)
- indexes = formatTableRow2ValidEdb(rows, fetchRule.EdbMatch)
- return
- }
- // AnalysisDuanXianWeekEdb 解析短纤周报中的指标数据
- func AnalysisDuanXianWeekEdb(htm []byte, fetchRule *DataRule) (indexes []*HandleIndexData, err error) {
- if len(htm) == 0 || fetchRule == nil {
- utils.FileLog.Info("htm empty")
- return
- }
- doc, e := goquery.NewDocumentFromReader(strings.NewReader(string(htm)))
- if e != nil {
- err = fmt.Errorf("NewDocumentFromReader err: %v", e)
- return
- }
- mainTitle := "主要运行指数"
- // 从收藏按钮往上找table, 取出报告发布日期
- collectEle := doc.Find("#savenews")
- publishTimeTab := collectEle.ParentsFiltered("table").First()
- publishTxt := publishTimeTab.Find("td:first-child").Text()
- //fmt.Println("publishTxt: ", publishTxt)
- publishTime, e := extractReportPublishTime(publishTxt)
- if e != nil {
- err = fmt.Errorf("extractReportPublishTime err: %v", e)
- return
- }
- //publishYear := publishTime.Year()
- //fmt.Println(publishTime)
- //fmt.Println(publishYear)
- // 查找包含文本的<p>元素
- mainElement := doc.Find(fmt.Sprintf("p:contains('%s')", mainTitle))
- if mainElement.Length() <= 0 {
- err = fmt.Errorf("未找到p标签, keyword: %s", mainTitle)
- return
- }
- table := mainElement.NextAllFiltered("table").First()
- if table.Length() <= 0 {
- err = fmt.Errorf("未找到p标签后的table, keyword: %s", mainTitle)
- return
- }
- var analysisPars AnalysisNoneMergeTablePars
- analysisPars.DocTable = table
- analysisPars.DateCol.StartIndex = 1
- analysisPars.DateCol.EndIndex = 2
- analysisPars.DateCol.PublishTime = publishTime
- //analysisPars.DateCol.PublishYear = publishYear
- analysisPars.DateCol.StrTimeFormat = "%d年%s"
- analysisPars.DateCol.TimeFormat = []string{"2006年1月2日"}
- rows := analysisNoneMergeTable(analysisPars)
- fmt.Println(rows)
- indexes = formatTableRow2ValidEdb(rows, fetchRule.EdbMatch)
- return
- }
- // AnalysisPingPianWeekEdb 解析瓶片周报中的指标数据
- func AnalysisPingPianWeekEdb(htm []byte, fetchRule *DataRule) (indexes []*HandleIndexData, err error) {
- if len(htm) == 0 || fetchRule == nil {
- utils.FileLog.Info("htm empty")
- return
- }
- doc, e := goquery.NewDocumentFromReader(strings.NewReader(string(htm)))
- if e != nil {
- err = fmt.Errorf("NewDocumentFromReader err: %v", e)
- return
- }
- mainTitle := "周均负荷指数"
- // 从收藏按钮往上找table, 取出报告发布日期
- collectEle := doc.Find("#savenews")
- publishTimeTab := collectEle.ParentsFiltered("table").First()
- publishTxt := publishTimeTab.Find("td:first-child").Text()
- //fmt.Println("publishTxt: ", publishTxt)
- publishTime, e := extractReportPublishTime(publishTxt)
- if e != nil {
- err = fmt.Errorf("extractReportPublishTime err: %v", e)
- return
- }
- //publishYear := publishTime.Year()
- //fmt.Println(publishTime)
- //fmt.Println(publishYear)
- // 查找包含文本的<p>元素
- mainElement := doc.Find(fmt.Sprintf("h2:contains('%s')", mainTitle))
- if mainElement.Length() <= 0 {
- err = fmt.Errorf("未找到p标签, keyword: %s", mainTitle)
- return
- }
- table := mainElement.NextAllFiltered("table").First()
- if table.Length() <= 0 {
- err = fmt.Errorf("未找到p标签后的table, keyword: %s", mainTitle)
- return
- }
- var analysisPars AnalysisNoneMergeTablePars
- analysisPars.DocTable = table
- analysisPars.DateCol.StartIndex = 1
- analysisPars.DateCol.EndIndex = 3
- analysisPars.DateCol.PublishTime = publishTime
- //analysisPars.DateCol.PublishYear = publishYear
- analysisPars.DateCol.StrTimeFormat = "%d.%s"
- analysisPars.DateCol.TimeFormat = []string{"2006.1.2"}
- analysisPars.DateCol.SplitLast = true
- analysisPars.DateCol.SplitFlag = "-"
- rows := analysisNoneMergeTable(analysisPars)
- fmt.Println(rows)
- indexes = formatTableRow2ValidEdb(rows, fetchRule.EdbMatch)
- return
- }
- // AnalysisQiePianWeekEdb 解析切片周报中的指标数据
- func AnalysisQiePianWeekEdb(htm []byte, fetchRule *DataRule) (indexes []*HandleIndexData, err error) {
- if len(htm) == 0 || fetchRule == nil {
- utils.FileLog.Info("htm empty")
- return
- }
- doc, e := goquery.NewDocumentFromReader(strings.NewReader(string(htm)))
- if e != nil {
- err = fmt.Errorf("NewDocumentFromReader err: %v", e)
- return
- }
- mainTitle := "切片纺方面"
- // 从收藏按钮往上找table, 取出报告发布日期
- collectEle := doc.Find("#savenews")
- publishTimeTab := collectEle.ParentsFiltered("table").First()
- publishTxt := publishTimeTab.Find("td:first-child").Text()
- //fmt.Println("publishTxt: ", publishTxt)
- publishTime, e := extractReportPublishTime(publishTxt)
- if e != nil {
- err = fmt.Errorf("extractReportPublishTime err: %v", e)
- return
- }
- //publishYear := publishTime.Year()
- //fmt.Println(publishTime)
- //fmt.Println(publishYear)
- // 查找包含关键词的标签
- mainElement := doc.Find(fmt.Sprintf("h2:contains('%s')", mainTitle))
- if mainElement.Length() <= 0 {
- err = fmt.Errorf("未找到关键词标签, keyword: %s", mainTitle)
- return
- }
- table := mainElement.NextAllFiltered("table").First()
- if table.Length() <= 0 {
- err = fmt.Errorf("未找到p标签后的table, keyword: %s", mainTitle)
- return
- }
- var analysisPars AnalysisNoneMergeTablePars
- analysisPars.DocTable = table
- analysisPars.DateCol.StartIndex = 1
- analysisPars.DateCol.EndIndex = 3
- analysisPars.DateCol.PublishTime = publishTime
- //analysisPars.DateCol.PublishYear = publishYear
- analysisPars.DateCol.StrTimeFormat = ""
- analysisPars.DateCol.TimeFormat = []string{"2006-1-2", "2006/1/2"}
- analysisPars.ValCol.SplitHalfVal = true
- rows := analysisNoneMergeTable(analysisPars)
- fmt.Println(rows)
- indexes = formatTableRow2ValidEdb(rows, fetchRule.EdbMatch)
- return
- }
- // AnalysisPXWeekEdb 解析PX周报中的指标数据
- func AnalysisPXWeekEdb(htm []byte, fetchRule *DataRule) (indexes []*HandleIndexData, err error) {
- if len(htm) == 0 || fetchRule == nil {
- utils.FileLog.Info("htm empty")
- return
- }
- doc, e := goquery.NewDocumentFromReader(strings.NewReader(string(htm)))
- if e != nil {
- err = fmt.Errorf("NewDocumentFromReader err: %v", e)
- return
- }
- mainTitle := "负荷指数"
- // 从收藏按钮往上找table, 取出报告发布日期
- collectEle := doc.Find("#savenews")
- publishTimeTab := collectEle.ParentsFiltered("table").First()
- publishTxt := publishTimeTab.Find("td:first-child").Text()
- //fmt.Println("publishTxt: ", publishTxt)
- publishTime, e := extractReportPublishTime(publishTxt)
- if e != nil {
- err = fmt.Errorf("extractReportPublishTime err: %v", e)
- return
- }
- //publishYear := publishTime.Year()
- //fmt.Println(publishTime)
- //fmt.Println(publishYear)
- // 查找包含关键词的标签
- mainElement := doc.Find(fmt.Sprintf("h2:contains('%s')", mainTitle))
- if mainElement.Length() <= 0 {
- err = fmt.Errorf("未找到关键词标签, keyword: %s", mainTitle)
- return
- }
- table := mainElement.NextAllFiltered("table").First()
- if table.Length() <= 0 {
- err = fmt.Errorf("未找到p标签后的table, keyword: %s", mainTitle)
- return
- }
- var analysisPars AnalysisNoneMergeTablePars
- analysisPars.DocTable = table
- analysisPars.DateCol.StartIndex = 1
- analysisPars.DateCol.EndIndex = 3
- analysisPars.DateCol.PublishTime = publishTime
- //analysisPars.DateCol.PublishYear = publishYear
- analysisPars.DateCol.StrTimeFormat = "%d年%s"
- analysisPars.DateCol.TimeFormat = []string{"2006年1月2日"}
- rows := analysisNoneMergeTable(analysisPars)
- fmt.Println(rows)
- indexes = formatTableRow2ValidEdb(rows, fetchRule.EdbMatch)
- return
- }
- // FetchHistoryFiles 获取历史文件
- func FetchHistoryFiles(context.Context) {
- var err error
- defer func() {
- if err != nil {
- tips := fmt.Sprintf("FetchEdbHistoryFiles ErrMsg: %s", err.Error())
- utils.FileLog.Info(tips)
- fmt.Println(tips)
- }
- }()
- taskNames := []string{"原油石化早报", "PTA周报", "MEG周报", "长丝周报", "短纤周报", "瓶片周报", "切片周报", "PX周报", "PTA装置", "MEG装置", "PX装置"}
- //taskNames := []string{"原油石化早报"}
- for _, nameKey := range taskNames {
- fmt.Println("开始获取: ", nameKey)
- fetchRule, e := loadDataRule(nameKey)
- if e != nil {
- err = fmt.Errorf("loadDataRule, err: %v", e)
- return
- }
- _, e = savePageHtml(nameKey, fetchRule.PageDir, true, 0)
- if e != nil {
- err = fmt.Errorf("savePageHtml, err: %v", e)
- return
- }
- fmt.Println("结束获取: ", nameKey)
- }
- return
- }
- // ReadEdbHistoryFiles 读取历史文件
- func ReadEdbHistoryFiles(context.Context) {
- var err error
- defer func() {
- if err != nil {
- tips := fmt.Sprintf("ReadEdbHistoryFiles ErrMsg: %s", err.Error())
- utils.FileLog.Info(tips)
- fmt.Println(tips)
- }
- }()
- taskNames := []string{"原油石化早报", "PTA周报", "MEG周报", "长丝周报", "短纤周报", "瓶片周报", "切片周报", "PX周报"}
- //taskNames := []string{"原油石化早报", "PTA周报", "MEG周报", "长丝周报", "短纤周报", "瓶片周报", "切片周报", "PX周报"}
- for _, nameKey := range taskNames {
- fetchRule, e := loadDataRule(nameKey)
- if e != nil {
- utils.FileLog.Info(fmt.Sprintf("%s无解析规则, err: %v\n", nameKey, e))
- continue
- }
- filePaths, e := listFiles(fetchRule.PageDir)
- if e != nil {
- utils.FileLog.Info(fmt.Sprintf("%s读取文件目录失败, err: %v\n", nameKey, e))
- continue
- }
- for _, v := range filePaths {
- //if k > 0 {
- // break
- //}
- v = fmt.Sprintf("%s/%s", fetchRule.PageDir, v)
- fmt.Printf("开始解析: %s", v)
- //htm, e := ioutil.ReadFile("static/ccf/oil_daily/28-20240604-原油石化早报(6.7).html")
- htm, e := ioutil.ReadFile(v)
- if e != nil {
- fmt.Printf("file: %s, ReadFile err: %v\n", v, e)
- utils.FileLog.Info(fmt.Sprintf("file: %s, ReadFile err: %v", v, e))
- continue
- }
- handler, ok := TaskAnalysisHandlers[nameKey]
- if !ok {
- utils.FileLog.Info(fmt.Sprintf("%s无解析函数\n", nameKey))
- continue
- }
- indexes, e := handler(htm, fetchRule)
- if e != nil {
- fmt.Printf("file: %s, handler err: %v\n", v, e)
- utils.FileLog.Info(fmt.Sprintf("file: %s, handler err: %v", v, e))
- continue
- }
- // 写入数据库
- params := make(map[string]interface{})
- params["List"] = indexes
- params["TerminalCode"] = utils.TerminalCode
- result, e := postEdbLib(params, utils.LIB_ROUTE_CCF_EDB_HANDLE)
- if e != nil {
- b, _ := json.Marshal(params)
- fmt.Printf("file: %s, postEdbLib err: %v, params: %s\n", v, e, string(b))
- utils.FileLog.Info(fmt.Sprintf("file: %s, postEdbLib err: %v, params: %s", v, e, string(b)))
- continue
- }
- resp := new(models.BaseEdbLibResponse)
- if e = json.Unmarshal(result, &resp); e != nil {
- fmt.Printf("file: %s, json.Unmarshal err: %v\n", v, e)
- utils.FileLog.Info(fmt.Sprintf("file: %s, json.Unmarshal err: %v", v, e))
- continue
- }
- if resp.Ret != 200 {
- fmt.Printf("file: %s, Msg: %s, ErrMsg: %s\n", v, resp.Msg, resp.ErrMsg)
- utils.FileLog.Info(fmt.Sprintf("file: %s, Msg: %s, ErrMsg: %s", v, resp.Msg, resp.ErrMsg))
- continue
- }
- }
- }
- return
- }
|