package base_from_ccf import ( "bytes" "compress/gzip" "context" "encoding/json" "eta/eta_data_analysis/utils" "fmt" "github.com/PuerkitoBio/goquery" "github.com/chromedp/cdproto/network" "github.com/chromedp/chromedp" "golang.org/x/net/html/charset" "golang.org/x/text/encoding/simplifiedchinese" "golang.org/x/text/transform" "io" "log" "net/http" "net/url" "os" "path/filepath" "regexp" "strconv" "strings" "time" ) const ( CCFSearchPageUrl = "https://www.ccf.com.cn/newscenter/simplesearch.php" // CCF搜索页地址 CCFReportDetailBaseUrl = "https://www.ccf.com.cn" // CCF报告详情页地址 CCFCHARTDATAURL = "https://www.ccf.com.cn/datacenter/index.php" ) // postEdbLib 调用指标接口 func postEdbLib(param map[string]interface{}, method string) (result []byte, err error) { postUrl := utils.EDB_LIB_URL + method postData, err := json.Marshal(param) if err != nil { return } result, err = httpPost(postUrl, string(postData), "application/json") if err != nil { return } return } // httpPost HTTP请求 func httpPost(url, postData string, params ...string) ([]byte, error) { fmt.Println("httpPost Url:" + url) body := io.NopCloser(strings.NewReader(postData)) client := &http.Client{} req, err := http.NewRequest("POST", url, body) if err != nil { return nil, err } contentType := "application/x-www-form-urlencoded;charset=utf-8" if len(params) > 0 && params[0] != "" { contentType = params[0] } req.Header.Set("Content-Type", contentType) req.Header.Set("authorization", utils.MD5(utils.APP_EDB_LIB_NAME_EN+utils.EDB_LIB_Md5_KEY)) resp, err := client.Do(req) if err != nil { fmt.Println("client.Do err:" + err.Error()) return nil, err } defer func() { _ = resp.Body.Close() }() b, err := io.ReadAll(resp.Body) if err != nil { fmt.Println("httpPost:" + string(b)) } return b, err } // fetchPageHtml 获取网站HTML文本 func fetchPageHtml(baseUrl string, fetchNum int) (respBody []byte, err error) { defer func() { if err != nil { tips := fmt.Sprintf("BuildCCFRequest ErrMsg: %s", err.Error()) utils.FileLog.Info(tips) fmt.Println(tips) } }() // 查询次数 fetchNum++ if baseUrl == "" { err = fmt.Errorf("CCF请求地址为空") return } // 获取Cookie strCookie, e := getCookie() if e != nil { err = fmt.Errorf("读取cookie文件失败, err: %s", e.Error()) return } if strCookie == "" && fetchNum < 2 { fmt.Printf("文件cookie为空, 重新获取, fetchNum: %d\n", fetchNum) utils.FileLog.Info(fmt.Sprintf("文件cookie为空, 重新获取, fetchNum: %d", fetchNum)) _, err = getCookieByChrome() if err != nil { return } return fetchPageHtml(baseUrl, fetchNum) } // 拉取网站内容 cli := new(http.Client) req, e := http.NewRequest("GET", baseUrl, nil) if e != nil { err = fmt.Errorf("") return } req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7") req.Header.Set("Accept-Encoding", "gzip, deflate, br") req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6") req.Header.Set("Connection", "keep-alive") req.Header.Set("Cookie", strCookie) req.Header.Set("Host", "www.ccf.com.cn") req.Header.Set("Referer", baseUrl) req.Header.Set("Sec-Ch-Ua", "\"Not A(Brand\";v=\"99\", \"Microsoft Edge\";v=\"121\", \"Chromium\";v=\"121\"") req.Header.Set("Sec-Ch-Ua-Mobile", "?0") req.Header.Set("Sec-Ch-Ua-Platform", "\"Windows\"") req.Header.Set("Sec-Fetch-Dest", "empty") req.Header.Set("Sec-Fetch-Mode", "cors") req.Header.Set("Sec-Fetch-Site", "same-origin") req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36 Edg/121.0.0.0") req.Header.Set("X-Requested-With", "XMLHttpRequest") resp, e := cli.Do(req) if e != nil { err = fmt.Errorf("HTTP client Do err: %s", e.Error()) return } defer func() { _ = resp.Body.Close() }() // 读取响应的内容 reader, e := gzip.NewReader(resp.Body) if e != nil { err = fmt.Errorf("gzip NewReader err: %s", e.Error()) return } body, e := io.ReadAll(reader) if e != nil { err = fmt.Errorf("read body err: %s", e.Error()) return } // 转换编码 utf8Reader, e := charset.NewReaderLabel("gb2312", bytes.NewReader(body)) if e != nil { err = fmt.Errorf("utf8 reader err: %s", e.Error()) return } utf8Body, e := io.ReadAll(utf8Reader) if e != nil { err = fmt.Errorf("utf8 body err: %s", e.Error()) return } respBody = utf8Body isLoginPage := checkIsLoginPage(string(respBody)) fmt.Println("是否登录页:", isLoginPage) // 如果是登录页,且查询次数少于2次,那么就重新登录后查询 if isLoginPage && fetchNum < 2 { _, err = getCookieByChrome() if err != nil { return } return fetchPageHtml(baseUrl, fetchNum) } return } // DataRule 数据爬取规则 type DataRule struct { Name string `json:"Name"` Frequency string `json:"Frequency"` PageDir string `json:"PageDir"` Search struct { ClassId string `json:"ClassId"` SubClassId string `json:"SubClassId"` ProductId string `json:"ProductId"` SubProductId string `json:"SubProductId"` SimpleTerms string `json:"SimpleTerms"` } `json:"Search"` TableFetch []struct { Keyword string `json:"Keyword"` Unit string `json:"Unit"` } `json:"TableFetch"` EdbMatch []DataRuleEdbMatch `json:"EdbMatch"` StockTable struct { ClassifyId int `json:"ClassifyId"` } `json:"StockTable"` } // DataRuleEdbMatch 数据爬取规则-指标匹配 type DataRuleEdbMatch struct { IndexCode string `json:"IndexCode"` IndexName string `json:"IndexName"` ClassifyId int `json:"ClassifyId"` Frequency string `json:"Frequency"` Product string `json:"Product"` Market string `json:"Market"` MatchUnit string `json:"MatchUnit" description:"匹配单位"` Unit string `json:"Unit" description:"实际单位"` } // loadDataRule 从配置中读取爬取规则 func loadDataRule(nameKey string) (fetchRule *DataRule, err error) { if utils.CCFDataRuleFile == "" { err = fmt.Errorf("rule文件不存在") return } b, e := os.ReadFile(utils.CCFDataRuleFile) if e != nil { err = fmt.Errorf("读取rule文件失败, err: %v", e) return } rules := make([]*DataRule, 0) if e = json.Unmarshal(b, &rules); e != nil { err = fmt.Errorf("解析rule文件失败, err: %v", e) return } for _, v := range rules { if v.Name != "" && v.Name == nameKey { fetchRule = v return } } err = fmt.Errorf("rule不存在, nameKey: %s", nameKey) return } // savePageHtml 拉取历史报告详情 func savePageHtml(nameKey, saveDir string, historyPage bool, reportMax int) (files []string, err error) { if nameKey == "" { return } defer func() { if err != nil { tips := fmt.Sprintf("GetCCFOilEdbHistory ErrMsg: %s", err.Error()) utils.FileLog.Info(tips) fmt.Println(tips) } }() fetchRule, e := loadDataRule(nameKey) if e != nil { err = fmt.Errorf("loadDataRule, err: %v", e) return } if saveDir == "" { saveDir = "static/ccf" } // 获取品种第一页 baseUrl := fmt.Sprintf(`%s?newssubmit=1&sitename=localhost`, CCFSearchPageUrl) if fetchRule.Search.ClassId != "" { baseUrl = fmt.Sprintf(`%s&ClassID=%s`, baseUrl, fetchRule.Search.ClassId) } if fetchRule.Search.SubClassId != "" { baseUrl = fmt.Sprintf(`%s&SubClassID=%s`, baseUrl, fetchRule.Search.SubClassId) } if fetchRule.Search.ProductId != "" { baseUrl = fmt.Sprintf(`%s&ProductID=%s`, baseUrl, fetchRule.Search.ProductId) } if fetchRule.Search.SubProductId != "" { baseUrl = fmt.Sprintf(`%s&ProductID=%s`, baseUrl, fetchRule.Search.SubProductId) } if fetchRule.Search.SimpleTerms != "" { termsEncode, e := gb2312ToPercentEncoding(fetchRule.Search.SimpleTerms) if e != nil { err = fmt.Errorf("gb2312ToPercentEncoding err: %v", e) return } baseUrl = fmt.Sprintf(`%s&simpleterms=%s`, baseUrl, termsEncode) } firstPage := fmt.Sprintf(`%s&cur_pg_num=%d`, baseUrl, 1) // 首页报告链接 firstHtml, e := fetchPageHtml(firstPage, 0) if e != nil { err = fmt.Errorf("获取首页HTML失败, err: %v", e) return } firstHrefs, e := analysisReportHrefs(firstHtml, 1) if e != nil { err = fmt.Errorf("读取首页列表报告链接失败, err: %v", e) return } var historyHrefs []ReportHrefs historyHrefs = append(historyHrefs, firstHrefs...) ticker := time.NewTicker(5 * time.Second) defer ticker.Stop() // 历史报告 if historyPage { endPage, e := analysisEndPage(firstHtml) if e != nil { err = fmt.Errorf("解析首页最后页码失败, err: %v", e) return } if endPage > 1 { for i := 2; i <= endPage; i++ { <-ticker.C fmt.Printf("开始读取历史页%d\n", i) // 每页28条数据, 需要带上页码*28的偏移量不然始终获取第一页 pageUrl := fmt.Sprintf(`%s&cur_pg_num=%d&cur_row_pos=%d`, baseUrl, i, i*28) fmt.Println("pageUrl: ", pageUrl) pageContents, e := fetchPageHtml(pageUrl, 0) if e != nil { err = fmt.Errorf("获取首页HTML失败, err: %v", e) return } pageHrefs, e := analysisReportHrefs(pageContents, i) if e != nil { err = fmt.Errorf("读取第%d页列表报告链接失败, err: %v", i, e) return } historyHrefs = append(historyHrefs, pageHrefs...) fmt.Printf("结束读取历史页%d\n", i) } } fmt.Println("endPage: ", endPage) } fmt.Println("historyHrefs len: ", len(historyHrefs)) fmt.Println("historyHrefs: ", historyHrefs) // 拉取报告留档 strDate := time.Now().Format("20060102") reportCount := 0 for _, v := range historyHrefs { <-ticker.C if reportMax > 0 { reportCount += 1 if reportCount > reportMax { break } } fmt.Printf("拉取报告: %s; url: %s\n", v.Title, v.Href) htm, e := fetchPageHtml(fmt.Sprintf("%s%s", CCFReportDetailBaseUrl, v.Href), 0) if e != nil { utils.FileLog.Info("获取页面失败, err: %v", e) continue } dateDir := fmt.Sprintf("%s/%s", saveDir, strDate) if e = utils.MkDir(dateDir); e != nil { utils.FileLog.Info(fmt.Sprintf("创建目录失败, err: %v", e)) continue } outputPath := fmt.Sprintf("%s/%d-%s.html", dateDir, v.Page, v.Title) if e = utils.WriteHTMLToFile(string(htm), outputPath); e != nil { utils.FileLog.Info(fmt.Sprintf("写入HTML出错, err: %v", e)) continue } files = append(files, outputPath) } fmt.Println("拉取报告 end") return } // analysisEndPage 读取列表页最后一页页码 func analysisEndPage(contents []byte) (endPage int, err error) { doc, e := goquery.NewDocumentFromReader(strings.NewReader(string(contents))) if e != nil { err = fmt.Errorf("NewDocumentFromReader err: %v", e) return } // 查找页码元素并遍历a标签 sectionDigg := doc.Find(".digg") aElements := sectionDigg.Find("a") // 获取倒数第二个a标签中的页码 totalAElements := aElements.Length() targetIndex := totalAElements - 2 if targetIndex >= 0 && targetIndex < totalAElements { targetA := aElements.Eq(targetIndex) txt := targetA.Text() endPage, e = strconv.Atoi(txt) if e != nil { err = fmt.Errorf("页码文本有误, %s", txt) return } fmt.Println(endPage) return } endPage = 1 return } // ReportHrefs 报告链接 type ReportHrefs struct { Title string `description:"报告标题"` Href string `description:"报告详情链接"` Page int `description:"页码"` } // analysisReportHrefs 解析列表页报告链接 func analysisReportHrefs(contents []byte, page int) (hrefs []ReportHrefs, err error) { doc, e := goquery.NewDocumentFromReader(strings.NewReader(string(contents))) if e != nil { err = fmt.Errorf("NewDocumentFromReader err: %v", e) return } doc.Find("ul.newslist li a").Each(func(_ int, s *goquery.Selection) { href, exists := s.Attr("href") if exists { title := s.Text() hrefs = append(hrefs, ReportHrefs{ Title: title, Href: href, Page: page, }) } }) return } // extractReportPublishTime 提取报告发布时间 func extractReportPublishTime(text string) (time.Time, error) { datePattern := `(\d{4}年\d{1,2}月\d{1,2}日\d{1,2}:\d{2})` re := regexp.MustCompile(datePattern) var strTime string match := re.FindStringSubmatch(text) if len(match) <= 0 { return time.Time{}, fmt.Errorf("没有读取出日期") } strTime = match[0] // 转为时间格式 dateFormat := "2006年01月02日15:04" parsedDate, e := time.ParseInLocation(dateFormat, strTime, time.Local) if e != nil { return time.Time{}, fmt.Errorf("日期转换失败, str: %s, err: %v", strTime, e) } return parsedDate, nil } // calculateDataHalfVal 取出数据区间的折中值, 如"7-9天"返回结果为"8" func calculateDataHalfVal(duration string) (result string, err error) { re := regexp.MustCompile(`\d+`) matches := re.FindAllString(duration, -1) if len(matches) != 2 { err = fmt.Errorf("未找到两个数字, Num: %d", len(matches)) return } a, e := strconv.Atoi(matches[0]) if e != nil { err = e return } b, e := strconv.Atoi(matches[1]) if e != nil { err = e return } average := float64(a+b) / 2.0 // 格式化结果 if average == float64(int(average)) { result = strconv.Itoa(int(average)) } else { result = fmt.Sprintf("%.1f", average) } return } // gb2312ToPercentEncoding 中文字符转码 func gb2312ToPercentEncoding(input string) (string, error) { // 创建GB18030编码转换器(兼容GB2312) encoder := simplifiedchinese.GB18030.NewEncoder() // 使用转换器将字符串转换为GB18030编码的字节流,并写入bytes.Buffer var buf bytes.Buffer writer := transform.NewWriter(&buf, encoder) _, err := writer.Write([]byte(input)) if err != nil { return "", err } err = writer.Close() if err != nil { return "", err } // 将字节流转换为百分号编码 percentEncoded := url.QueryEscape(buf.String()) return percentEncoded, nil } // AnalysisNoneMergeTablePars 解析无合并单元格的简单表格入参 type AnalysisNoneMergeTablePars struct { DocTable *goquery.Selection MarketCol struct { HasCol bool `description:"是否有市场列"` ColIndex int `description:"市场列"` } DateCol struct { StartIndex int `description:"日期开始列"` EndIndex int `description:"日期结束列"` PublishTime time.Time `description:"报告发布时间"` //PublishYear int `description:"报告发布年份"` StrTimeFormat string `description:"数据日期格式-需拼接日期列中的变量"` TimeFormat []string `description:"标准日期格式, 可能存在多种分别进行遍历"` SplitLast bool `description:"是否分隔日期: 如1.24-1.28"` SplitFlag string `description:"分隔日期分隔符: 如-"` } ValCol struct { SplitHalfVal bool `description:"是否取折中值: 如8-10天, 9-12天"` } } // TableRow 读取Table的行信息 type TableRow struct { Product string Market string DateData map[string]string Unit string } // analysisNoneMergeTable 解析无合并单元格的简单表格 func analysisNoneMergeTable(params AnalysisNoneMergeTablePars) (items []TableRow) { if params.DocTable != nil && params.DocTable.Length() <= 0 { return } attemptDates := []string{"2006/1/2", "2006/01/02", "2006/01/2", "2006/1/02", "2006-1-2", "2006-01-02", "2006-01-2", "2006-1-02", "2006.01.02", "2006.1.2", "2006.1.02", "2006.01.2", "2006年01月02日", "2006年1月2日", "2006年1月02日", "2006年01月2日"} colDate := make(map[int]string) params.DocTable.Find("tbody").Children().Each(func(i int, s *goquery.Selection) { cells := s.Find("td") // 表头取出日期 if i == 0 { cells.Each(func(ii int, ss *goquery.Selection) { cellTxt := strings.TrimSpace(ss.Text()) //fmt.Println("cellTxt", cellTxt) if ii >= params.DateCol.StartIndex && ii <= params.DateCol.EndIndex { //strTime := fmt.Sprintf("%d.%s", publishYear, cellTxt) //var strTimeFormat string completeTime := cellTxt // 是否需要拼接年份 if params.DateCol.StrTimeFormat != "" { strDate := cellTxt // 是否取分隔日期的后一个日期 if params.DateCol.SplitLast && params.DateCol.SplitFlag != "" { dateArr := strings.Split(cellTxt, params.DateCol.SplitFlag) if len(dateArr) > 1 { strDate = dateArr[len(dateArr)-1] } } completeTime = fmt.Sprintf(params.DateCol.StrTimeFormat, params.DateCol.PublishTime.Year(), strDate) } //fmt.Println("completeTime: ", completeTime) // 遍历多种可能的日期格式 var colTime time.Time for _, f := range params.DateCol.TimeFormat { t, e := time.ParseInLocation(f, completeTime, time.Local) if e != nil { continue } colTime = t break } // 统一判断一次, 入参的日期格式可能不全 if colTime.IsZero() { utils.FileLog.Info(fmt.Sprintf("日期格式异常: cellTxt-%s; completeTime-%s", cellTxt, completeTime)) for _, f := range attemptDates { t, e := time.ParseInLocation(f, completeTime, time.Local) if e != nil { continue } colTime = t break } } // 判断报告是否跨年 if colTime.AddDate(0, -6, 0).After(params.DateCol.PublishTime) { utils.FileLog.Info(fmt.Sprintf("跨年判断: ColTime-%v; PublishTime-%v", colTime, params.DateCol.PublishTime)) colTime = colTime.AddDate(-1, 0, 0) } if !colTime.IsZero() { colDate[ii] = colTime.Format(utils.FormatDate) } fmt.Println("日期:", colTime.Format(utils.FormatDate)) } }) } // 取指标 if i > 0 { row := TableRow{ DateData: make(map[string]string), } cells.Each(func(ii int, ss *goquery.Selection) { cellTxt := filterInvalidVal(ss.Text()) //fmt.Println("cellTxt", cellTxt) if ii == 0 { row.Product = cellTxt } if params.MarketCol.HasCol && ii == params.MarketCol.ColIndex { row.Market = cellTxt } if ii >= params.DateCol.StartIndex && ii <= params.DateCol.EndIndex { d, ok := colDate[ii] if !ok { return } // 是否取折中值 if params.ValCol.SplitHalfVal { val, e := calculateDataHalfVal(cellTxt) if e != nil { fmt.Printf("calculateDataHalfVal err: %v\n", e) return } cellTxt = val } if cellTxt != "" { row.DateData[d] = cellTxt } } }) //fmt.Println(row) items = append(items, row) } }) return } // formatTableRow2ValidEdb 表格行转换为有效指标 func formatTableRow2ValidEdb(rows []TableRow, edbMatch []DataRuleEdbMatch) (indexes []*HandleIndexData) { indexes = make([]*HandleIndexData, 0) for _, m := range edbMatch { for _, v := range rows { fmt.Printf("产品: %s, 市场: %s, 日期数据: %v, 单位: %s\n", v.Product, v.Market, v.DateData, v.Unit) var productOk, marketOk, unitOk bool if (m.Product == "" && v.Product == "") || (m.Product != "" && strings.Contains(v.Product, m.Product)) { productOk = true } if (m.Market == "" && v.Market == "") || (m.Market != "" && strings.Contains(v.Market, m.Market)) { marketOk = true } if (m.MatchUnit == "" && v.Unit == "") || (m.MatchUnit != "" && strings.Contains(v.Unit, m.MatchUnit)) { unitOk = true } if productOk && marketOk && unitOk { edb := new(HandleIndexData) edb.IndexCode = m.IndexCode edb.IndexName = m.IndexName edb.ClassifyId = m.ClassifyId edb.Frequency = m.Frequency edb.Unit = m.Unit edb.DateData = v.DateData edb.TerminalCode = utils.TerminalCode indexes = append(indexes, edb) } } } return } // listFiles 列出目录下所有文件名 func listFiles(dirPath string) ([]string, error) { var files []string err := filepath.Walk(dirPath, func(path string, info os.FileInfo, err error) error { if err != nil { return err } if !info.IsDir() { files = append(files, info.Name()) } return nil }) if err != nil { return nil, err } return files, nil } // filterInvalidVal 过滤无效值 func filterInvalidVal(cellTxt string) string { cellTxt = strings.TrimSpace(cellTxt) if cellTxt == "休市" || cellTxt == "/" || cellTxt == "-" || cellTxt == "—" { return "" } return cellTxt } // formatIntervalData 格式化区间值 func formatIntervalData(cellTxt, flag string) string { cellTxt = filterInvalidVal(cellTxt) if flag == "" { flag = "-" } matches := strings.Split(cellTxt, flag) if len(matches) < 2 { return cellTxt } if len(matches) != 2 { return "" } // 转换不了直接返回空值 a, e := strconv.ParseFloat(matches[0], 64) if e != nil { return "" } b, e := strconv.ParseFloat(matches[1], 64) if e != nil { return "" } average := (a + b) / 2 return fmt.Sprint(average) } // getCookie // @Description: 获取cookie // @author: Roc // @datetime 2024-07-09 14:00:53 // @return cookieStr string // @return err error func getCookie() (cookieStr string, err error) { // 读取Cookie if utils.CCFCookieFile == "" { err = fmt.Errorf("cookie文件未配置") return } cookieByte, e := os.ReadFile(utils.CCFCookieFile) if e != nil { err = fmt.Errorf("读取cookie文件失败, err: %s", e.Error()) return } cookieStr = strings.TrimSpace(string(cookieByte)) //if cookieStr == "" { // err = fmt.Errorf("cookie为空") // return //} return } // getCookieByChrome // @Description: 获取cookie // @author: Roc // @datetime 2024-07-09 14:00:53 // @return cookieStr string // @return err error func getCookieByChrome() (cookieStr string, err error) { // 读取Cookie if utils.CCFUseName == "" { err = fmt.Errorf("CCF账号未设置") return } if utils.CCFPassword == "" { err = fmt.Errorf("CCF密码未设置") return } opts := append( chromedp.DefaultExecAllocatorOptions[:], chromedp.Flag("headless", false), ) allocCtx, cancel1 := chromedp.NewExecAllocator(context.Background(), opts...) defer cancel1() // 创建chrome实例 ctx, cancel2 := chromedp.NewContext( allocCtx, chromedp.WithLogf(log.Printf), ) defer cancel2() err = chromedp.Run(ctx, chromedp.Navigate(`https://www.ccf.com.cn/member/member.php`), chromedp.SetValue(`input[name="username"]`, utils.CCFUseName, chromedp.ByQuery), chromedp.SetValue(`input[name="password"]`, utils.CCFPassword, chromedp.ByQuery), chromedp.Sleep(2*time.Second), chromedp.Click(`input[id="imageField"]`, chromedp.ByQuery), chromedp.Sleep(5*time.Second), chromedp.Navigate(`https://www.ccf.com.cn/newscenter/detail-410000-2024070600003.shtml`), chromedp.Sleep(2*time.Second), chromedp.ActionFunc(func(ctx context.Context) error { cookies, err := network.GetCookies().Do(ctx) if err != nil { return err } //cookieJson, err := json.Marshal(cookies) //if err != nil { // return err //} //fmt.Println("cookieJson:", string(cookieJson)) //utils.FileLog.Info("cookieJson:" + string(cookieJson)) for _, v := range cookies { cookieStr = cookieStr + v.Name + "=" + v.Value + ";" } fmt.Println("header cookie:", cookieStr) utils.FileLog.Info("header cookie:" + cookieStr) tmpFile, tmpErr := os.Create(utils.CCFCookieFile) if tmpErr != nil { fmt.Println("创建cookie文件失败:", tmpErr.Error()) return nil } if _, err := tmpFile.WriteString(cookieStr); err != nil { fmt.Println("写入cookie到文件失败:", err.Error()) return nil } return nil }), ) //if err != nil { // fmt.Println(err) //} return } // checkIsLoginPage // @Description: 校验是否是登录页 // @author: Roc // @datetime 2024-07-09 16:34:17 // @param bodyStr string // @return isLoginPage bool func checkIsLoginPage(bodyStr string) (isLoginPage bool) { // 初始化goquery.Document doc, err := goquery.NewDocumentFromReader(strings.NewReader(bodyStr)) if err != nil { log.Fatal(err) } // 查找name为LoginForm的表单 doc.Find("form[name=LoginForm]").Each(func(i int, s *goquery.Selection) { // 如果找到了,打印信息表示这是登录页 //fmt.Println("这是一个登录页面") isLoginPage = true return }) // 如果没有找到,打印信息表示这不是登录页 //if doc.Find("form[name=LoginForm]").Length() == 0 { // fmt.Println("这不是一个登录页面") //} return }