package services import ( "bytes" "fmt" "github.com/PuerkitoBio/goquery" "hongze/hongze_data_crawler/models" "hongze/hongze_data_crawler/utils" "io/ioutil" "log" "mime/multipart" "net/http" "strconv" "strings" "time" ) type SearchList struct { VarietyName string `description:"商品名称"` CarietyCode string `description:"商品名称对应的编码"` List []SearchContractId ListSearch []*SearchContractId } type SearchContractId struct { ContractId string `description:"商品类型"` } //同步 N天 之内的数据 func SyncRankingFromDalianDo() { for i := 200; i >= 0; i-- { SyncRankingFromDalianSearch(i) } } //大连交易所持单排名 func SyncRankingFromDalianSearch(dayNum int) (err error) { fmt.Println("start") defer func() { if err != nil { fmt.Println("RefreshDataFromDalian Err:" + err.Error()) go utils.SendEmail(utils.APPNAME+"【"+utils.RunMode+"】"+"失败提醒", "RefreshDataFromDalian ErrMsg:"+err.Error(), utils.EmailSendToUsers) } }() //定义爬取时间 endDate := time.Now().AddDate(0, 0, -dayNum).Format(utils.FormatDateTime) //endDate := time.Now().Format(utils.FormatDateTime) timeDate := utils.StrTimeToTime(endDate) currDate := timeDate.Format(utils.FormatDateUnSpace) year := timeDate.Year() month := timeDate.Format("01") var dayStr string day := timeDate.Day() if day < 10 { dayStr = "0" + strconv.Itoa(day) } else { dayStr = strconv.Itoa(day) } monthNum, _ := strconv.Atoi(month) month = strconv.Itoa(monthNum - 1) //获取时月份需要减一 list, err := models.GetBaseFromTradeDalianDataList(timeDate.Format(utils.FormatDate)) if err != nil { fmt.Println(err) return err } listDataMap := make(map[string]int) for _, v := range list { listDataMap[v.DealShortName+v.ClassifyType+v.DataTime] = v.BaseFromTradeDalianIndexId } var ContractId string var CarietyCode string var VarietyName string //模拟form表单请求 url := "http://www.dce.com.cn/publicweb/quotesdata/memberDealPosiQuotes.html" method := "POST" payload := &bytes.Buffer{} writer := multipart.NewWriter(payload) _ = writer.WriteField("memberDealPosiQuotes.variety", CarietyCode) _ = writer.WriteField("memberDealPosiQuotes.trade_type", "0") _ = writer.WriteField("year", strconv.Itoa(year)) _ = writer.WriteField("month", month) _ = writer.WriteField("day", dayStr) _ = writer.WriteField("contract.contract_id", ContractId) _ = writer.WriteField("contract.variety_id", CarietyCode) _ = writer.WriteField("currDate", currDate) err = writer.Close() if err != nil { utils.FileLog.Info("获取指标失败:" + currDate + VarietyName + ContractId) return err } client := &http.Client{} req, err := http.NewRequest(method, url, payload) if err != nil { return err } req.Header.Add("Cookie", "JSESSIONID=36ACF02A59227A3854F9D5D5E2FB5F2E; WMONID=R5ojcAIIcx-") req.Header.Set("Content-Type", writer.FormDataContentType()) res, err := client.Do(req) if err != nil { return err } defer res.Body.Close() body, err := ioutil.ReadAll(res.Body) if err != nil { go utils.SendEmail(utils.APPNAME+"【"+utils.RunMode+"】"+"失败提醒", "RefreshDataFromDalian ErrMsg:"+err.Error()+"获取指标失败:"+currDate+VarietyName+ContractId, utils.EmailSendToUsers) return err } exitProductMaps, _, varietyArrMaps := DoSearch(string(body)) var items []*SearchList for k, v := range exitProductMaps { item := new(SearchList) item.VarietyName = v item.CarietyCode = varietyArrMaps[k] htmlBody, err := GetDalianHtmlBody(dayNum, "", item.CarietyCode, item.VarietyName) if err != nil { return err } listContractId := DoSearchMap(htmlBody) item.ListSearch = listContractId items = append(items, item) } for k, v := range items { for _, v2 := range v.ListSearch { fmt.Println(currDate, v.VarietyName, v2.ContractId, v.CarietyCode, k) } } SyncRankingFromDalian(dayNum, items) return err } //处理搜索条件初始 func DoSearch(body string) (exitProductMaps, exitContractIdMaps, varietyArrMaps map[int]string) { var str string str = body doc, err := goquery.NewDocumentFromReader(strings.NewReader(str)) if err != nil { log.Fatal(err) } exitProductMap := make(map[int]string) exitContractIdMap := make(map[int]string) varietyArrMap := make(map[int]string) //var productName string ul := doc.Find(".selBox ul") var pNum int var cidNum int var vNum int ul.Each(func(i int, s *goquery.Selection) { //解析标签 //fmt.Println(i, s.Text()) ulTxt := s.Text() //fmt.Println(ulTxt) if ulTxt != "" && (i == 0 || i == 2) { ulTxtArr := strings.Split(ulTxt, "\n") for _, v := range ulTxtArr { v = strings.Replace(v, " ", "", -1) v = strings.Replace(v, "\n", "", -1) v = strings.Replace(v, " ", "", -1) if v != "" && len(v) > 0 { exitProductMap[pNum] = v pNum++ } } } if ulTxt != "" && i == 3 { //fmt.Println(ulTxt) cidTxtArr := strings.Split(ulTxt, " ") for _, v := range cidTxtArr { v = strings.Replace(v, "\n", "", -1) v = strings.Replace(v, " ", "", -1) v = strings.Replace(v, " ", "", -1) if v != "" { exitContractIdMap[cidNum] = v cidNum++ } } } }) varietyArr := strings.Split(str, "onclick=\"javascript:setVariety('") for _, v := range varietyArr { strnum := strings.Index(v, "');") if strnum > 0 { varietyStr := v[0:strnum] if len(varietyStr) < 10 { //fmt.Println(strnum, varietyStr) varietyArrMap[vNum] = varietyStr vNum++ } } } exitProductMaps = exitProductMap exitContractIdMaps = exitContractIdMap varietyArrMaps = varietyArrMap return } //处理搜索条件 func DoSearchMap(body string) (items []*SearchContractId) { var str string str = body doc, err := goquery.NewDocumentFromReader(strings.NewReader(str)) if err != nil { log.Fatal(err) } exitContractIdMap := make(map[int]string) ul := doc.Find(".selBox ul") var cidNum int ul.Each(func(i int, s *goquery.Selection) { //解析标签 ulTxt := s.Text() if ulTxt != "" && i == 3 { cidTxtArr := strings.Split(ulTxt, " ") for _, v := range cidTxtArr { v = strings.Replace(v, "\n", "", -1) v = strings.Replace(v, " ", "", -1) v = strings.Replace(v, " ", "", -1) if v != "" { exitContractIdMap[cidNum] = v cidNum++ } } } }) for _, v := range exitContractIdMap { item := new(SearchContractId) item.ContractId = v items = append(items, item) } return items } //处理解析Html func DoHtml(body, name, contractId string, dateTime time.Time, listDataMap map[string]int) (err error) { defer func() { if err != nil { fmt.Println("RefreshDataFromDaLian Err:" + err.Error()) go utils.SendEmail(utils.APPNAME+"【"+utils.RunMode+"】"+"失败提醒", "RefreshDataFromDaLian ErrMsg:"+err.Error(), utils.EmailSendToUsers) } }() str := body doc, err := goquery.NewDocumentFromReader(strings.NewReader(str)) if err != nil { log.Fatal(err) } var isAdd bool addSql := ` INSERT INTO base_from_trade_dalian_index(rank,deal_short_name,deal_name,deal_code,deal_value,buy_short_name,deal_change,buy_name,buy_code,buy_value,buy_change,sold_short_name,sold_name,sold_code,sold_value,sold_change,frequency,classify_name,classify_type,create_time,modify_time,data_time) values ` table := doc.Find("table") var rank, shortName, dealValue, dealChange, buyName, buyValue, buyChange, soldName, soldValue, soldChange string table.Find("tr").Each(func(i int, tr *goquery.Selection) { tds := tr.Find("td") //fmt.Println(tds.Length(), "长度:", i) if tds.Length() == 0 || tds.Length() == 7 || i == 23 { tdText := tds.Text() utils.FileLog.Info(tdText) } else { item := new(models.BaseFromTradeDalianIndex) tds.Each(func(tk int, td *goquery.Selection) { tdText := td.Text() if tk == 0 { //名次 rank = tdText } if tk == 1 { //会员简称 shortName = tdText } if tk == 2 { //成交量 dealValue = strings.Replace(tdText, ",", "", -1) } if tk == 3 { //增减 dealChange = strings.Replace(tdText, ",", "", -1) } if tk == 5 { //会员简称 buyName = tdText } if tk == 6 { //持买单量 buyValue = strings.Replace(tdText, ",", "", -1) } if tk == 7 { //增减 buyChange = strings.Replace(tdText, ",", "", -1) } if tk == 9 { //会员简称 soldName = tdText } if tk == 10 { //持卖单量 soldValue = strings.Replace(tdText, ",", "", -1) } if tk == 11 { //增减 soldChange = strings.Replace(tdText, ",", "", -1) } }) item.Rank = rank item.DealShortName = shortName item.DealName = shortName + "_成交量_" + contractId item.DealCode = DlIndexCodeGenerator(item.DealName, "deal") item.DealValue = dealValue item.BuyShortName = buyName item.DealChange = dealChange item.BuyName = buyName + "_持买单量_" + contractId item.BuyCode = DlIndexCodeGenerator(item.BuyName, "buy") item.BuyValue = buyValue item.BuyChange = buyChange item.SoldShortName = soldName item.SoldName = soldName + "_持卖单量_" + contractId item.SoldCode = DlIndexCodeGenerator(item.SoldName, "sold") item.SoldValue = soldValue item.SoldChange = soldChange item.Frequency = "日度" item.ClassifyName = name item.ClassifyType = contractId item.CreateTime = time.Now().Format(utils.FormatDateTime) item.ModifyTime = time.Now().Format(utils.FormatDateTime) item.DataTime = dateTime.Format(utils.FormatDate) if val, ok := listDataMap[item.DealShortName+item.ClassifyType+item.DataTime]; !ok { addSql += models.GetAddSql(item) isAdd = true } else { //更新 err := models.UpdateBaseFromTradeDalianIndex(item, val) if err != nil { fmt.Println("UpdateBaseFromTradeDalianIndex err:", err) } } } }) addSql = strings.TrimRight(addSql, ",") if isAdd { err = models.RefreshEdbDataByDaLian(addSql) if err != nil { return err } } return } func DlIndexCodeGenerator(indexName, suffix string) string { ineIndexCode, _ := ineIndexCodeMap[indexName] if ineIndexCode == "" { ineIndexCode = fmt.Sprintf("DL%s", time.Now().Format(utils.FormatDateTimeUnSpace)+strconv.Itoa(utils.GetRandInt(1, 100))+suffix) ineIndexCodeMap[indexName] = ineIndexCode err := models.AddBaseFromTradeMapping(indexName, ineIndexCode, "DL") if err != nil { fmt.Println("add Code err:", err) } } return ineIndexCode } //大连交易所持单排名 func GetDalianHtmlBody(dayNum int, contractId, carietyCode, varietyName string) (body string, err error) { defer func() { if err != nil { fmt.Println("GetDalianHtmlBody Err:" + err.Error()) go utils.SendEmail(utils.APPNAME+"【"+utils.RunMode+"】"+"失败提醒", "GetDalianHtmlBody ErrMsg:"+err.Error(), utils.EmailSendToUsers) } }() //定义爬取时间 endDate := time.Now().AddDate(0, 0, -dayNum).Format(utils.FormatDateTime) //endDate := time.Now().Format(utils.FormatDateTime) timeDate := utils.StrTimeToTime(endDate) currDate := timeDate.Format(utils.FormatDateUnSpace) year := timeDate.Year() month := timeDate.Format("01") var dayStr string day := timeDate.Day() if day < 10 { dayStr = "0" + strconv.Itoa(day) } else { dayStr = strconv.Itoa(day) } monthNum, _ := strconv.Atoi(month) month = strconv.Itoa(monthNum - 1) //获取时月份需要减一 list, err := models.GetBaseFromTradeDalianDataList(timeDate.Format(utils.FormatDate)) listDataMap := make(map[string]int) for _, v := range list { listDataMap[v.DealShortName+v.ClassifyType+v.DataTime] = v.BaseFromTradeDalianIndexId } if err != nil { fmt.Println(err) return } //模拟form表单请求 url := "http://www.dce.com.cn/publicweb/quotesdata/memberDealPosiQuotes.html" method := "POST" payload := &bytes.Buffer{} writer := multipart.NewWriter(payload) _ = writer.WriteField("memberDealPosiQuotes.variety", carietyCode) _ = writer.WriteField("memberDealPosiQuotes.trade_type", "0") _ = writer.WriteField("year", strconv.Itoa(year)) _ = writer.WriteField("month", month) _ = writer.WriteField("day", dayStr) _ = writer.WriteField("contract.contract_id", contractId) _ = writer.WriteField("contract.variety_id", carietyCode) _ = writer.WriteField("currDate", currDate) err = writer.Close() if err != nil { utils.FileLog.Info("获取指标失败:" + currDate + varietyName + contractId) return } client := &http.Client{} req, err := http.NewRequest(method, url, payload) if err != nil { return } req.Header.Add("Cookie", "JSESSIONID=36ACF02A59227A3854F9D5D5E2FB5F2E; WMONID=R5ojcAIIcx-") req.Header.Set("Content-Type", writer.FormDataContentType()) res, err := client.Do(req) if err != nil { return } defer res.Body.Close() htmlBody, err := ioutil.ReadAll(res.Body) if err != nil { go utils.SendEmail(utils.APPNAME+"【"+utils.RunMode+"】"+"失败提醒", "GetDalianHtmlBody ErrMsg:"+err.Error()+"获取指标失败:"+currDate+varietyName+contractId, utils.EmailSendToUsers) return } body = string(htmlBody) return } //大连交易所持单排名 func SyncRankingFromDalian(dayNum int, searchList []*SearchList) (err error) { fmt.Println("start") defer func() { if err != nil { fmt.Println("RefreshDataFromDalian Err:" + err.Error()) go utils.SendEmail(utils.APPNAME+"【"+utils.RunMode+"】"+"失败提醒", "RefreshDataFromDalian ErrMsg:"+err.Error(), utils.EmailSendToUsers) } }() //n := utils.GetRandInt(10, 120) //time.Sleep(time.Duration(n) * time.Second) //searchList := []SearchList{ // SearchList{VarietyName: "豆一", CarietyCode: "a", List: []SearchContractId{SearchContractId{ContractId: "a2111"}, {ContractId: "a2201"}, {ContractId: "a2203"}}}, // SearchList{VarietyName: "豆二", CarietyCode: "b", List: []SearchContractId{SearchContractId{ContractId: "b2112"}, {ContractId: "b2201"}}}, // SearchList{VarietyName: "豆粕", CarietyCode: "m", List: []SearchContractId{SearchContractId{ContractId: "m2111"}, {ContractId: "m2112"}, {ContractId: "m2201"}, {ContractId: "m2203"}, {ContractId: "m2205"}, {ContractId: "m2207"}, {ContractId: "m2208"}, {ContractId: "m2209"}}}, // SearchList{VarietyName: "豆油", CarietyCode: "y", List: []SearchContractId{SearchContractId{ContractId: "y2112"}, {ContractId: "y2201"}, {ContractId: "y2203"}, {ContractId: "y2205"}, {ContractId: "y2207"}, {ContractId: "y2208"}}}, // SearchList{VarietyName: "棕榈油", CarietyCode: "p", List: []SearchContractId{SearchContractId{ContractId: "p2112"}, {ContractId: "p2201"}, {ContractId: "p2202"}, {ContractId: "p2203"}, {ContractId: "p2204"}, {ContractId: "p2205"}}}, // SearchList{VarietyName: "玉米", CarietyCode: "c", List: []SearchContractId{SearchContractId{ContractId: "c2111"}, {ContractId: "c2201"}, {ContractId: "c2203"}, {ContractId: "c2205"}, {ContractId: "c2207"}}}, // SearchList{VarietyName: "玉米淀粉", CarietyCode: "cs", List: []SearchContractId{SearchContractId{ContractId: "cs2111"}, {ContractId: "cs2201"}, {ContractId: "cs2203"}}}, // SearchList{VarietyName: "鸡蛋", CarietyCode: "jd", List: []SearchContractId{SearchContractId{ContractId: "jd2201"}, {ContractId: "jd2202"}, {ContractId: "jd2203"}, {ContractId: "jd2204"}, {ContractId: "jd2205"}}}, // SearchList{VarietyName: "粳米", CarietyCode: "rr", List: []SearchContractId{SearchContractId{ContractId: "rr2112"}}}, // //SearchList{VarietyName: "纤维板", CarietyCode: "fb", List:[]SearchContractId{}}, //无数据 // //SearchList{VarietyName: "胶合板", CarietyCode: "bb", List:[]SearchContractId{}}, //无数据 // SearchList{VarietyName: "生猪", CarietyCode: "lh", List: []SearchContractId{SearchContractId{ContractId: "lh2201"}, {ContractId: "lh2203"}, {ContractId: "lh2205"}}}, // SearchList{VarietyName: "聚乙烯", CarietyCode: "l", List: []SearchContractId{SearchContractId{ContractId: "l2112"}, {ContractId: "l2201"}, {ContractId: "l2202"}, {ContractId: "l2203"}, {ContractId: "l2204"}, {ContractId: "l2205"}}}, // SearchList{VarietyName: "聚氯乙烯", CarietyCode: "v", List: []SearchContractId{SearchContractId{ContractId: "v2111"}, {ContractId: "v2112"}, {ContractId: "v2201"}, {ContractId: "v2202"}, {ContractId: "v2203"}, {ContractId: "v2204"}, {ContractId: "v2205"}}}, // SearchList{VarietyName: "聚丙烯", CarietyCode: "pp", List: []SearchContractId{SearchContractId{ContractId: "pp2112"}, {ContractId: "pp2201"}, {ContractId: "pp2202"}, {ContractId: "pp2203"}, {ContractId: "pp2204"}, {ContractId: "pp2205"}}}, // SearchList{VarietyName: "苯乙烯", CarietyCode: "eb", List: []SearchContractId{SearchContractId{ContractId: "eb2111"}, {ContractId: "eb2112"}, {ContractId: "eb2201"}, {ContractId: "eb2202"}}}, // SearchList{VarietyName: "焦炭", CarietyCode: "j", List: []SearchContractId{SearchContractId{ContractId: "j2201"}, {ContractId: "j2205"}}}, // SearchList{VarietyName: "焦煤", CarietyCode: "jm", List: []SearchContractId{SearchContractId{ContractId: "jm2201"}, {ContractId: "jm2205"}}}, // SearchList{VarietyName: "铁矿石", CarietyCode: "i", List: []SearchContractId{SearchContractId{ContractId: "i2112"}, {ContractId: "i2201"}, {ContractId: "i2202"}, {ContractId: "i2203"}, {ContractId: "i2204"}, {ContractId: "i2205"}, {ContractId: "i2206"}, {ContractId: "i2207"}, {ContractId: "i2208"}, {ContractId: "i2209"}}}, // SearchList{VarietyName: "乙二醇", CarietyCode: "eg", List: []SearchContractId{SearchContractId{ContractId: "eg2112"}, {ContractId: "eg2201"}, {ContractId: "eg2202"}, {ContractId: "eg2203"}, {ContractId: "eg2205"}}}, // SearchList{VarietyName: "液化石油气", CarietyCode: "pg", List: []SearchContractId{SearchContractId{ContractId: "pg2111"}, {ContractId: "pg2112"}, {ContractId: "pg2201"}, {ContractId: "pg2202"}}}, //} //定义爬取时间 endDate := time.Now().AddDate(0, 0, -dayNum).Format(utils.FormatDateTime) //endDate := time.Now().Format(utils.FormatDateTime) timeDate := utils.StrTimeToTime(endDate) currDate := timeDate.Format(utils.FormatDateUnSpace) year := timeDate.Year() month := timeDate.Format("01") var dayStr string day := timeDate.Day() if day < 10 { dayStr = "0" + strconv.Itoa(day) } else { dayStr = strconv.Itoa(day) } monthNum, _ := strconv.Atoi(month) month = strconv.Itoa(monthNum - 1) //获取时月份需要减一 list, err := models.GetBaseFromTradeDalianDataList(timeDate.Format(utils.FormatDate)) listDataMap := make(map[string]int) for _, v := range list { listDataMap[v.DealShortName+v.ClassifyType+v.DataTime] = v.BaseFromTradeDalianIndexId } if err != nil { fmt.Println(err) return err } for _, v := range searchList { for _, v2 := range v.ListSearch { //模拟form表单请求 url := "http://www.dce.com.cn/publicweb/quotesdata/memberDealPosiQuotes.html" method := "POST" payload := &bytes.Buffer{} writer := multipart.NewWriter(payload) _ = writer.WriteField("memberDealPosiQuotes.variety", v.CarietyCode) _ = writer.WriteField("memberDealPosiQuotes.trade_type", "0") _ = writer.WriteField("year", strconv.Itoa(year)) _ = writer.WriteField("month", month) _ = writer.WriteField("day", dayStr) _ = writer.WriteField("contract.contract_id", v2.ContractId) _ = writer.WriteField("contract.variety_id", v.CarietyCode) _ = writer.WriteField("currDate", currDate) err := writer.Close() fmt.Println(currDate, v.VarietyName, v2.ContractId) if err != nil { utils.FileLog.Info("获取指标失败:" + currDate + v.VarietyName + v2.ContractId) return err } client := &http.Client{} req, err := http.NewRequest(method, url, payload) if err != nil { return err } req.Header.Add("Cookie", "JSESSIONID=36ACF02A59227A3854F9D5D5E2FB5F2E; WMONID=R5ojcAIIcx-") req.Header.Set("Content-Type", writer.FormDataContentType()) res, err := client.Do(req) if err != nil { return err } defer res.Body.Close() body, err := ioutil.ReadAll(res.Body) if err != nil { go utils.SendEmail(utils.APPNAME+"【"+utils.RunMode+"】"+"失败提醒", "RefreshDataFromDalian ErrMsg:"+err.Error()+"获取指标失败:"+currDate+v.VarietyName+v2.ContractId, utils.EmailSendToUsers) return err } err = DoHtml(string(body), v.VarietyName, v2.ContractId, timeDate, listDataMap) if err != nil { return err } } } return err }