Browse Source

广期所爬虫

xyxie 1 year ago
parent
commit
a11c988980
5 changed files with 159 additions and 259 deletions
  1. 0 70
      models/base_from_yongyi.go
  2. 0 3
      models/db.go
  3. 57 0
      services/trade_guangzhou.go
  4. 0 186
      services/yongyi_pig.go
  5. 102 0
      utils/common.go

+ 0 - 70
models/base_from_yongyi.go

@@ -1,70 +0,0 @@
-package models
-
-import (
-	"github.com/beego/beego/v2/client/orm"
-	"time"
-)
-
-type BaseFromYongyiIndex struct {
-	YongyiIndexId int `orm:"column(yongyi_index_id);pk"`
-	ClassifyId    int
-	IndexCode     string
-	IndexName     string
-	Frequency     string
-	Unit          string
-	Sort          int
-	CreateTime    time.Time
-	ModifyTime    time.Time
-}
-
-// Add 新增
-func (y *BaseFromYongyiIndex) Add(list []*BaseFromYongyiIndex) (err error) {
-	o := orm.NewOrmUsingDB("data")
-	_, err = o.InsertMulti(len(list), list)
-	return
-}
-
-func GetBaseFromYongyiIndexByClassifyId(classifyId int) (list []*BaseFromYongyiIndex, err error) {
-	o := orm.NewOrmUsingDB("data")
-	sql := `SELECT * FROM base_from_yongyi_index where classify_id=?`
-	_, err = o.Raw(sql, classifyId).QueryRows(&list)
-	return
-}
-
-type BaseFromYongyiData struct {
-	YongyiDataId  int `orm:"column(yongyi_data_id);pk"`
-	YongyiIndexId int
-	IndexCode     string
-	DataTime      string
-	Value         string
-	CreateTime    time.Time
-	ModifyTime    time.Time
-	DataTimestamp int64
-}
-
-// Add 新增
-func (y *BaseFromYongyiData) Add(list []*BaseFromYongyiData) (err error) {
-	o := orm.NewOrmUsingDB("data")
-	_, err = o.InsertMulti(len(list), list)
-	return
-}
-
-// BaseFromYongyiClassify 涌益咨询原始数据分类表
-type BaseFromYongyiClassify struct {
-	ClassifyId      int       `orm:"column(classify_id);pk"`
-	ClassifyName    string    `description:"分类名称"`
-	ParentId        int       `description:"父级id"`
-	SysUserId       int       `description:"创建人id"`
-	SysUserRealName string    `description:"创建人姓名"`
-	Level           int       `description:"层级"`
-	Sort            int       `description:"排序字段,越小越靠前,默认值:10"`
-	ModifyTime      time.Time `description:"修改时间"`
-	CreateTime      time.Time `description:"创建时间"`
-}
-
-// Add 新增
-func (y *BaseFromYongyiClassify) Add(list []*BaseFromYongyiClassify) (err error) {
-	o := orm.NewOrmUsingDB("data")
-	_, err = o.InsertMulti(len(list), list)
-	return
-}

+ 0 - 3
models/db.go

@@ -56,8 +56,5 @@ func init() {
 		new(BaseFromNationalStatisticsIndex),
 		new(BaseFromNationalStatisticsData),
 		new(MeetingProbabilities),
-		new(BaseFromYongyiClassify),
-		new(BaseFromYongyiIndex),
-		new(BaseFromYongyiData),
 	)
 }

+ 57 - 0
services/trade_guangzhou.go

@@ -0,0 +1,57 @@
+package services
+
+import (
+	"encoding/json"
+	"eta/eta_crawler/services/alarm_msg"
+	"eta/eta_crawler/utils"
+	"fmt"
+	"time"
+)
+
+// SyncRankingFromGuangzhou 广州交易所期货
+func SyncRankingFromGuangzhou() {
+	var err error
+	defer func() {
+		msg := "失败提醒" + "SyncRankingFromGuangzhou ErrMsg:" + err.Error()
+		fmt.Println("msg:", msg)
+		utils.FileLog.Info(msg)
+		go alarm_msg.SendAlarmMsg(msg, 3)
+	}()
+	var message Message
+	/*zzUrl := "http://www.gfex.com.cn/gfex/rihq/hqsj_tjsj.shtml"
+	params := "AWS=10009067&ASER=s1&ARD=click&ARDT=click&ACT=web&ATP=pc&AVER=20230824&ASDKVER=20230824&AUC=20231213173030726415409395570806&AVUC=1702621016582&AUID=&AUN=&ALG=zh-CN&ACL=24&ASS=1600*900&AFST=1702459830716&ALST=1702459830716&ARC=1&ACS=UTF-8&ASY=mac%20os%20x%20&ASYT=pc&ABOT=visitor&ABR=chrome&AWXBR=0&ATZ=-8&AMD=&ALOGT=&APS=www.gfex.com.cn&APU=%2Fgfex%2Frihq%2Fhqsj_tjsj.shtml&ASSCW=1440&AXPOS=364&AYPOS=548&ASSH=900&ASSCH=274&ASSSH=1373&ACLKTYPE=click&AFORMNAME=&ATAG=TD&ATAGTYPE=&ATAGID=&ATAGNAME=&ATAGVALUE=14&ATAGURL=&ATAGPOS=342_533_36_33&ARECOMCLICK=&ARECOMSHOWCLICK=&ATAGTIMES=60000&AMID=1702621016582502&AEVTMID=&ASENDID=1702621335956&ARANDID=171277509712211773&ARANDOM=0.27127750971221154&ARESEND=0"
+	fmt.Println(zzUrl)
+	e := utils.HttpPost(zzUrl, "")
+	if e != nil {
+		err = e
+		fmt.Println("err:", err)
+		return
+	}
+	*/
+	//获取新的指标信息
+	for i := 0; i >= 0; i-- {
+
+		zzUrl := `http://www.gfex.com.cn/u/interfacesWebTiDayQuotes/loadList`
+		date := time.Now().AddDate(0, 0, -i)
+		dateStr := date.Format(utils.FormatDateUnSpace)
+		zzUrl = fmt.Sprintf(zzUrl)
+		fmt.Println(zzUrl)
+		//	params := ""
+		params := fmt.Sprintf("trade_date=%s&trade_type=0", dateStr)
+		headerParams := make(map[string]string)
+		//headerParams["Cookie"] = "CT6T=312900; SF_cookie_3=68941398"
+		headerParams["User-Agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
+		headerParams["Content-Type"] = "application/x-www-form-urlencoded;charset=utf-8"
+		body, e := utils.HttpPost(zzUrl, params, headerParams)
+		if e != nil {
+			err = e
+			fmt.Println("err:", err)
+			return
+		}
+		err = json.Unmarshal(body, &message)
+		if err != nil {
+			fmt.Println("Unmarshal Err:", err)
+			continue
+		}
+	}
+}

+ 0 - 186
services/yongyi_pig.go

@@ -1,186 +0,0 @@
-package services
-
-import (
-	"eta/eta_crawler/models"
-	"eta/eta_crawler/utils"
-	"fmt"
-	"github.com/tealeg/xlsx"
-	"strconv"
-	"time"
-)
-
-func HandleYongyiExcelDaily(uploadPath string) {
-	var err error
-	xlFile, err := xlsx.OpenFile(uploadPath)
-	if err != nil {
-		err = fmt.Errorf("打开文件失败, Err: %s", err)
-		return
-	}
-
-	dateMap := make(map[int]string)
-	nameMap := make(map[int]string)
-
-	indexDataList := make([]*models.BaseFromYongyiData, 0)
-	indexList := make([]*models.BaseFromYongyiIndex, 0)
-	dailyPriceSheet, ok := xlFile.Sheet["出栏价"]
-	if ok {
-		// todo 处理分类信息,一个sheet表示一个分类
-		//classifyName := "日度-商品猪出栏价"
-		classifyId := 1
-		now := time.Now()
-		frequency := "日度"
-		unit := "元/公斤"
-		namePrefix := "商品猪出栏价"
-
-		sheet := dailyPriceSheet
-		// 遍历行读取
-		maxRow := sheet.MaxRow
-		fmt.Println("最大行")
-		fmt.Println(maxRow)
-		// 指标名称
-		indexMap := make(map[string]*models.BaseFromYongyiIndex)
-		for i := 0; i < maxRow; i++ {
-			fmt.Printf("当前第%d行 \n", i)
-			if i == 0 { // 首行,表示时间
-				row := sheet.Row(i)
-				cells := row.Cells
-				for k, cell := range cells {
-					text := cell.String()
-					if k > 1 && text != "" {
-						if cell.IsTime() {
-							dateText, _ := cell.GetTime(false)
-							text = dateText.Format(utils.FormatDate)
-						}
-						// 检查单元格是否为合并单元格
-						if cell.HMerge > 0 {
-							for j := 1; j <= cell.HMerge; j++ {
-								dateMap[k+j] = text
-							}
-						}
-						fmt.Printf("合并单元格开始列:%d \n", k)
-						dateMap[k] = text
-					}
-				}
-			} else if i == 1 { //表示表头
-				// 处理 index指标表
-				row := sheet.Row(i)
-				cells := row.Cells
-				for k, cell := range cells {
-					text := cell.String()
-					nameMap[k] = text
-				}
-			} else { //数据列
-				// 新增指标记录,并返回对应的ID
-				existIndexMap := make(map[string]*models.BaseFromYongyiIndex)
-
-				//获取所有指标信息
-				allIndex, err := models.GetBaseFromYongyiIndexByClassifyId(classifyId)
-				if err != nil {
-					return
-				}
-				for _, v := range allIndex {
-					indexKey := v.IndexName
-					existIndexMap[indexKey] = v
-				}
-				row := sheet.Row(i)
-				cells := row.Cells
-				province := ""
-				for k, cell := range cells {
-					fmt.Printf("当前第%d列 \n", k)
-					text := cell.String()
-					if k == 0 {
-						province = text
-						continue
-					} else if k == 1 {
-						continue
-					}
-
-					date, ok1 := dateMap[k]
-					if !ok1 {
-						err = fmt.Errorf("找不到对应的日期,第%d行,第%d列", i, k)
-						return
-					}
-					name, ok2 := nameMap[k]
-					if !ok2 {
-						err = fmt.Errorf("找不到对应的列名,第%d行,第%d列", i, k)
-						return
-					}
-					// todo 放到对应的指标名称的下方
-					if name != "规模场" && name != "小散户" && name != "均价" {
-						// 只处理以上三个类型,其余过滤
-						continue
-					}
-					fmt.Printf("当前第%d行第%d列, 当前省份%s \n", i, k, province)
-					// 处理指标名称
-					fullIndexName := fmt.Sprintf("%s/%s/%s", namePrefix, province, name)
-					indexItem, okIndex := indexMap[fullIndexName]
-
-					if !okIndex {
-						// 新增指标
-						indexItem = new(models.BaseFromYongyiIndex)
-						indexItem.IndexName = fullIndexName
-						indexItem.ClassifyId = classifyId
-						// todo 处理indexCode
-						indexItem.IndexCode = fullIndexName
-						indexItem.Frequency = frequency
-						indexItem.Unit = unit
-						indexItem.ModifyTime = now
-						indexItem.CreateTime = now
-						// todo 查询是否已存在,如果已存在,则获取已存在的指标ID
-						if exist, ok3 := existIndexMap[fullIndexName]; ok3 {
-							indexItem.YongyiIndexId = exist.YongyiIndexId
-						} else {
-							indexList = append(indexList, indexItem)
-						}
-						indexMap[fullIndexName] = indexItem
-					} else {
-						// todo 查询是否已存在,如果已存在,则获取已存在的指标ID
-						if exist, ok3 := existIndexMap[fullIndexName]; ok3 {
-							indexItem.YongyiIndexId = exist.YongyiIndexId
-						}
-					}
-					fmt.Printf("indexItem%s", indexItem.IndexCode)
-
-					dataItem := new(models.BaseFromYongyiData)
-					dataItem.IndexCode = indexItem.IndexCode
-					dataItem.YongyiIndexId = indexItem.YongyiIndexId
-					dataItem.DataTime = date
-					dataItem.Value = text
-					dataItem.CreateTime = now
-					dataItem.ModifyTime = now
-					// 判断指标是否已存在
-					indexDataList = append(indexDataList, dataItem)
-					continue
-
-				}
-			}
-		}
-
-		if len(indexList) > 0 {
-			index := new(models.BaseFromYongyiIndex)
-			err = index.Add(indexList)
-			if err != nil {
-				fmt.Println(err)
-				return
-			}
-		}
-		// 处理最终的数据并入库
-		if len(indexDataList) > 0 {
-			pigData := new(models.BaseFromYongyiData)
-			err = pigData.Add(indexDataList)
-			if err != nil {
-				fmt.Println(err)
-				return
-			}
-		}
-	} else {
-		fmt.Println("该文件不存在")
-	}
-
-}
-
-func excelDateToDate(excelDate string) time.Time {
-	excelTime := time.Date(1899, time.December, 30, 0, 0, 0, 0, time.UTC)
-	var days, _ = strconv.Atoi(excelDate)
-	return excelTime.Add(time.Second * time.Duration(days*86400))
-}

+ 102 - 0
utils/common.go

@@ -13,11 +13,13 @@ import (
 	"image"
 	"image/png"
 	"io"
+	"io/ioutil"
 	"math"
 	"math/big"
 	"math/rand"
 	"net"
 	"net/http"
+	"net/http/cookiejar"
 	"os"
 	"os/exec"
 	"path"
@@ -986,3 +988,103 @@ func GetLocalIP() (ip string, err error) {
 	}
 	return
 }
+
+// HTTP: post请求参数
+//
+//	{
+//		"A" : 10086,
+//		"B" : "请求信息"
+//	}
+type RequestParam struct {
+	A int    `json:"A"`
+	B string `json:"B"`
+}
+
+// HTTP返回Body
+type HTTPRspBody struct {
+	Result Results `json:"Result"`
+}
+type Results struct {
+	RequestID     string   `json:"Result"`
+	HasError      bool     `json:"HasError"`
+	ResponseItems ErrorMsg `json:"ResponseItems"`
+}
+type ErrorMsg struct {
+	ErrorMsg string `json:"ErrorMsg"`
+}
+
+// 修改供应商信息
+func HttpPost(url string, reqParam string, headersParams map[string]string) ([]byte, error) {
+
+	var (
+		err error
+	)
+
+	// 准备: HTTP请求
+	reqBody := strings.NewReader(reqParam)
+	httpReq, err := http.NewRequest("POST", url, reqBody)
+	if err != nil {
+		fmt.Printf("NewRequest fail, url: %s, reqBody: %s, err: %v", url, reqBody, err)
+		return nil, err
+	}
+	for k, v := range headersParams {
+		httpReq.Header.Set(k, v)
+	}
+	//httpReq.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36")
+
+	jar, err := cookiejar.New(nil)
+	if err != nil {
+		panic(err)
+	}
+	//httpReq.Header.Add("Content-Type", "application/x-www-form-urlencoded;charset=utf-8")
+	client := &http.Client{Jar: jar}
+	// DO: HTTP请求
+	httpRsp, err := client.Do(httpReq)
+	if err != nil {
+		fmt.Printf("do http fail, url: %s, reqBody: %s, err:%v", url, reqBody, err)
+		return nil, err
+	}
+	defer httpRsp.Body.Close()
+
+	// Read: HTTP结果
+	b, err := ioutil.ReadAll(httpRsp.Body)
+	if err != nil {
+		fmt.Printf("ReadAll failed, url: %s, reqBody: %s, err: %v", url, reqBody, err)
+		return nil, err
+	}
+
+	// unmarshal: 解析HTTP返回的结果
+	// 		body: {"Result":{"RequestId":"12131","HasError":true,"ResponseItems":{"ErrorMsg":"错误信息"}}}
+
+	return b, err
+}
+
+func HttpGet(reqUrl, payload string) (err error) {
+	client := &http.Client{}
+	req, err := http.NewRequest("GET", reqUrl, strings.NewReader(payload))
+	if err != nil {
+		return
+	}
+	res, err := client.Do(req)
+	if err != nil {
+		return
+	}
+	defer res.Body.Close()
+	_, err = ioutil.ReadAll(res.Body)
+	if err != nil {
+		return
+	}
+
+	Cookie := res.Header.Get("Cookie")
+	fmt.Println(Cookie)
+	rcookie := req.Header.Get("Cookie")
+	fmt.Println("rcookie")
+	fmt.Println(rcookie)
+	//fmt.Println("body:" + string(body))
+	cookiesArr := res.Cookies()
+	fmt.Println("cookiesArrLen:", len(cookiesArr))
+	for k, v := range cookiesArr {
+		fmt.Println(k, v)
+	}
+	return
+}