commodity_trade_dalian.go 16 KB


  1. package services
  2. import (
  3. "bytes"
  4. "fmt"
  5. "github.com/PuerkitoBio/goquery"
  6. "hongze/hongze_data_crawler/models"
  7. "hongze/hongze_data_crawler/utils"
  8. "io/ioutil"
  9. "log"
  10. "mime/multipart"
  11. "net/http"
  12. "strconv"
  13. "strings"
  14. "time"
  15. )
  16. type SearchList struct {
  17. VarietyName string `description:"商品名称"`
  18. CarietyCode string `description:"商品名称对应的编码"`
  19. List []SearchContractId
  20. ListSearch []*SearchContractId
  21. }
  22. type SearchContractId struct {
  23. ContractId string `description:"商品类型"`
  24. }
  25. //同步 N天 之内的数据
  26. func SyncRankingFromDalianDo() {
  27. for i := 200; i >= 0; i-- {
  28. SyncRankingFromDalianSearch(i)
  29. }
  30. }
  31. //大连交易所持单排名
  32. func SyncRankingFromDalianSearch(dayNum int) (err error) {
  33. fmt.Println("start")
  34. defer func() {
  35. if err != nil {
  36. fmt.Println("RefreshDataFromDalian Err:" + err.Error())
  37. go utils.SendEmail(utils.APPNAME+"【"+utils.RunMode+"】"+"失败提醒", "RefreshDataFromDalian ErrMsg:"+err.Error(), utils.EmailSendToUsers)
  38. }
  39. }()
  40. //定义爬取时间
  41. endDate := time.Now().AddDate(0, 0, -dayNum).Format(utils.FormatDateTime)
  42. //endDate := time.Now().Format(utils.FormatDateTime)
  43. timeDate := utils.StrTimeToTime(endDate)
  44. currDate := timeDate.Format(utils.FormatDateUnSpace)
  45. year := timeDate.Year()
  46. month := timeDate.Format("01")
  47. var dayStr string
  48. day := timeDate.Day()
  49. if day < 10 {
  50. dayStr = "0" + strconv.Itoa(day)
  51. } else {
  52. dayStr = strconv.Itoa(day)
  53. }
  54. monthNum, _ := strconv.Atoi(month)
  55. month = strconv.Itoa(monthNum - 1) //获取时月份需要减一
  56. list, err := models.GetBaseFromTradeDalianDataList(timeDate.Format(utils.FormatDate))
  57. if err != nil {
  58. fmt.Println(err)
  59. return err
  60. }
  61. listDataMap := make(map[string]int)
  62. for _, v := range list {
  63. listDataMap[v.DealShortName+v.ClassifyType+v.DataTime] = v.BaseFromTradeDalianIndexId
  64. }
  65. var ContractId string
  66. var CarietyCode string
  67. var VarietyName string
  68. //模拟form表单请求
  69. url := "http://www.dce.com.cn/publicweb/quotesdata/memberDealPosiQuotes.html"
  70. method := "POST"
  71. payload := &bytes.Buffer{}
  72. writer := multipart.NewWriter(payload)
  73. _ = writer.WriteField("memberDealPosiQuotes.variety", CarietyCode)
  74. _ = writer.WriteField("memberDealPosiQuotes.trade_type", "0")
  75. _ = writer.WriteField("year", strconv.Itoa(year))
  76. _ = writer.WriteField("month", month)
  77. _ = writer.WriteField("day", dayStr)
  78. _ = writer.WriteField("contract.contract_id", ContractId)
  79. _ = writer.WriteField("contract.variety_id", CarietyCode)
  80. _ = writer.WriteField("currDate", currDate)
  81. err = writer.Close()
  82. if err != nil {
  83. utils.FileLog.Info("获取指标失败:" + currDate + VarietyName + ContractId)
  84. return err
  85. }
  86. client := &http.Client{}
  87. req, err := http.NewRequest(method, url, payload)
  88. if err != nil {
  89. return err
  90. }
  91. req.Header.Add("Cookie", "JSESSIONID=36ACF02A59227A3854F9D5D5E2FB5F2E; WMONID=R5ojcAIIcx-")
  92. req.Header.Set("Content-Type", writer.FormDataContentType())
  93. res, err := client.Do(req)
  94. if err != nil {
  95. return err
  96. }
  97. defer res.Body.Close()
  98. body, err := ioutil.ReadAll(res.Body)
  99. if err != nil {
  100. go utils.SendEmail(utils.APPNAME+"【"+utils.RunMode+"】"+"失败提醒", "RefreshDataFromDalian ErrMsg:"+err.Error()+"获取指标失败:"+currDate+VarietyName+ContractId, utils.EmailSendToUsers)
  101. return err
  102. }
  103. exitProductMaps, _, varietyArrMaps := DoSearch(string(body))
  104. var items []*SearchList
  105. for k, v := range exitProductMaps {
  106. item := new(SearchList)
  107. item.VarietyName = v
  108. item.CarietyCode = varietyArrMaps[k]
  109. htmlBody, err := GetDalianHtmlBody(dayNum, "", item.CarietyCode, item.VarietyName)
  110. if err != nil {
  111. return err
  112. }
  113. listContractId := DoSearchMap(htmlBody)
  114. item.ListSearch = listContractId
  115. items = append(items, item)
  116. }
  117. for k, v := range items {
  118. for _, v2 := range v.ListSearch {
  119. fmt.Println(currDate, v.VarietyName, v2.ContractId, v.CarietyCode, k)
  120. }
  121. }
  122. SyncRankingFromDalian(dayNum, items)
  123. return err
  124. }
  125. //处理搜索条件初始
  126. func DoSearch(body string) (exitProductMaps, exitContractIdMaps, varietyArrMaps map[int]string) {
  127. var str string
  128. str = body
  129. doc, err := goquery.NewDocumentFromReader(strings.NewReader(str))
  130. if err != nil {
  131. log.Fatal(err)
  132. }
  133. exitProductMap := make(map[int]string)
  134. exitContractIdMap := make(map[int]string)
  135. varietyArrMap := make(map[int]string)
  136. //var productName string
  137. ul := doc.Find(".selBox ul")
  138. var pNum int
  139. var cidNum int
  140. var vNum int
  141. ul.Each(func(i int, s *goquery.Selection) {
  142. //解析标签
  143. //fmt.Println(i, s.Text())
  144. ulTxt := s.Text()
  145. //fmt.Println(ulTxt)
  146. if ulTxt != "" && (i == 0 || i == 2) {
  147. ulTxtArr := strings.Split(ulTxt, "\n")
  148. for _, v := range ulTxtArr {
  149. v = strings.Replace(v, " ", "", -1)
  150. v = strings.Replace(v, "\n", "", -1)
  151. v = strings.Replace(v, " ", "", -1)
  152. if v != "" && len(v) > 0 {
  153. exitProductMap[pNum] = v
  154. pNum++
  155. }
  156. }
  157. }
  158. if ulTxt != "" && i == 3 {
  159. //fmt.Println(ulTxt)
  160. cidTxtArr := strings.Split(ulTxt, " ")
  161. for _, v := range cidTxtArr {
  162. v = strings.Replace(v, "\n", "", -1)
  163. v = strings.Replace(v, " ", "", -1)
  164. v = strings.Replace(v, " ", "", -1)
  165. if v != "" {
  166. exitContractIdMap[cidNum] = v
  167. cidNum++
  168. }
  169. }
  170. }
  171. })
  172. varietyArr := strings.Split(str, "onclick=\"javascript:setVariety('")
  173. for _, v := range varietyArr {
  174. strnum := strings.Index(v, "');")
  175. if strnum > 0 {
  176. varietyStr := v[0:strnum]
  177. if len(varietyStr) < 10 {
  178. //fmt.Println(strnum, varietyStr)
  179. varietyArrMap[vNum] = varietyStr
  180. vNum++
  181. }
  182. }
  183. }
  184. exitProductMaps = exitProductMap
  185. exitContractIdMaps = exitContractIdMap
  186. varietyArrMaps = varietyArrMap
  187. return
  188. }
  189. //处理搜索条件
  190. func DoSearchMap(body string) (items []*SearchContractId) {
  191. var str string
  192. str = body
  193. doc, err := goquery.NewDocumentFromReader(strings.NewReader(str))
  194. if err != nil {
  195. log.Fatal(err)
  196. }
  197. exitContractIdMap := make(map[int]string)
  198. ul := doc.Find(".selBox ul")
  199. var cidNum int
  200. ul.Each(func(i int, s *goquery.Selection) {
  201. //解析标签
  202. ulTxt := s.Text()
  203. if ulTxt != "" && i == 3 {
  204. cidTxtArr := strings.Split(ulTxt, " ")
  205. for _, v := range cidTxtArr {
  206. v = strings.Replace(v, "\n", "", -1)
  207. v = strings.Replace(v, " ", "", -1)
  208. v = strings.Replace(v, " ", "", -1)
  209. if v != "" {
  210. exitContractIdMap[cidNum] = v
  211. cidNum++
  212. }
  213. }
  214. }
  215. })
  216. for _, v := range exitContractIdMap {
  217. item := new(SearchContractId)
  218. item.ContractId = v
  219. items = append(items, item)
  220. }
  221. return items
  222. }
  223. //处理解析Html
  224. func DoHtml(body, name, contractId string, dateTime time.Time, listDataMap map[string]int, listIndexCodeMap map[string]string) (err error) {
  225. defer func() {
  226. if err != nil {
  227. fmt.Println("RefreshDataFromDaLian Err:" + err.Error())
  228. go utils.SendEmail(utils.APPNAME+"【"+utils.RunMode+"】"+"失败提醒", "RefreshDataFromDaLian ErrMsg:"+err.Error(), utils.EmailSendToUsers)
  229. }
  230. }()
  231. str := body
  232. doc, err := goquery.NewDocumentFromReader(strings.NewReader(str))
  233. if err != nil {
  234. log.Fatal(err)
  235. }
  236. var isAdd bool
  237. addSql := ` INSERT INTO base_from_trade_dalian_index(rank,deal_short_name,deal_name,deal_code,deal_value,buy_short_name,deal_change,buy_name,buy_code,buy_value,buy_change,sold_short_name,sold_name,sold_code,sold_value,sold_change,frequency,classify_name,classify_type,create_time,modify_time,data_time) values `
  238. table := doc.Find("table")
  239. var rank, shortName, dealValue, dealChange, buyName, buyValue, buyChange, soldName, soldValue, soldChange string
  240. table.Find("tr").Each(func(i int, tr *goquery.Selection) {
  241. tds := tr.Find("td")
  242. //fmt.Println(tds.Length(), "长度:", i)
  243. if tds.Length() == 0 || tds.Length() == 7 || i == 23 {
  244. tdText := tds.Text()
  245. utils.FileLog.Info(tdText)
  246. } else {
  247. item := new(models.BaseFromTradeDalianIndex)
  248. tds.Each(func(tk int, td *goquery.Selection) {
  249. tdText := td.Text()
  250. if tk == 0 { //名次
  251. rank = tdText
  252. }
  253. if tk == 1 { //会员简称
  254. shortName = tdText
  255. }
  256. if tk == 2 { //成交量
  257. dealValue = strings.Replace(tdText, ",", "", -1)
  258. }
  259. if tk == 3 { //增减
  260. dealChange = strings.Replace(tdText, ",", "", -1)
  261. }
  262. if tk == 5 { //会员简称
  263. buyName = tdText
  264. }
  265. if tk == 6 { //持买单量
  266. buyValue = strings.Replace(tdText, ",", "", -1)
  267. }
  268. if tk == 7 { //增减
  269. buyChange = strings.Replace(tdText, ",", "", -1)
  270. }
  271. if tk == 9 { //会员简称
  272. soldName = tdText
  273. }
  274. if tk == 10 { //持卖单量
  275. soldValue = strings.Replace(tdText, ",", "", -1)
  276. }
  277. if tk == 11 { //增减
  278. soldChange = strings.Replace(tdText, ",", "", -1)
  279. }
  280. })
  281. item.Rank = rank
  282. item.DealShortName = shortName
  283. item.DealName = shortName + "_成交量_" + contractId
  284. if val, ok := listIndexCodeMap[item.DealName]; ok {
  285. item.DealCode = val
  286. } else {
  287. item.DealCode = DlIndexCodeGenerator(item.DealName, "deal")
  288. }
  289. item.DealValue = dealValue
  290. item.BuyShortName = buyName
  291. item.DealChange = dealChange
  292. item.BuyName = buyName + "_持买单量_" + contractId
  293. if val, ok := listIndexCodeMap[item.BuyName]; ok {
  294. item.BuyCode = val
  295. } else {
  296. item.BuyCode = DlIndexCodeGenerator(item.BuyName, "buy")
  297. }
  298. item.BuyValue = buyValue
  299. item.BuyChange = buyChange
  300. item.SoldShortName = soldName
  301. item.SoldName = soldName + "_持卖单量_" + contractId
  302. if val, ok := listIndexCodeMap[item.SoldName]; ok {
  303. item.SoldCode = val
  304. } else {
  305. item.SoldCode = DlIndexCodeGenerator(item.SoldName, "sold")
  306. }
  307. item.SoldValue = soldValue
  308. item.SoldChange = soldChange
  309. item.Frequency = "日度"
  310. item.ClassifyName = name
  311. item.ClassifyType = contractId
  312. item.CreateTime = time.Now().Format(utils.FormatDateTime)
  313. item.ModifyTime = time.Now().Format(utils.FormatDateTime)
  314. item.DataTime = dateTime.Format(utils.FormatDate)
  315. if val, ok := listDataMap[item.DealShortName+item.ClassifyType+item.DataTime]; !ok {
  316. addSql += models.GetAddSql(item)
  317. isAdd = true
  318. } else {
  319. //更新
  320. err := models.UpdateBaseFromTradeDalianIndex(item, val)
  321. if err != nil {
  322. fmt.Println("UpdateBaseFromTradeDalianIndex err:", err)
  323. }
  324. }
  325. }
  326. })
  327. addSql = strings.TrimRight(addSql, ",")
  328. if isAdd {
  329. err = models.RefreshEdbDataByDaLian(addSql)
  330. if err != nil {
  331. return err
  332. }
  333. }
  334. return
  335. }
  336. func DlIndexCodeGenerator(indexName, suffix string) (ineIndexCode string) {
  337. ineIndexCode = fmt.Sprintf("DL%s", strconv.FormatInt(time.Now().UnixNano(), 10)+suffix)
  338. err := models.AddBaseFromTradeMapping(indexName, ineIndexCode, "DL")
  339. if err != nil {
  340. fmt.Println("add Code err:", err)
  341. }
  342. return ineIndexCode
  343. }
  344. //大连交易所持单排名
  345. func GetDalianHtmlBody(dayNum int, contractId, carietyCode, varietyName string) (body string, err error) {
  346. defer func() {
  347. if err != nil {
  348. fmt.Println("GetDalianHtmlBody Err:" + err.Error())
  349. go utils.SendEmail(utils.APPNAME+"【"+utils.RunMode+"】"+"失败提醒", "GetDalianHtmlBody ErrMsg:"+err.Error(), utils.EmailSendToUsers)
  350. }
  351. }()
  352. //定义爬取时间
  353. endDate := time.Now().AddDate(0, 0, -dayNum).Format(utils.FormatDateTime)
  354. //endDate := time.Now().Format(utils.FormatDateTime)
  355. timeDate := utils.StrTimeToTime(endDate)
  356. currDate := timeDate.Format(utils.FormatDateUnSpace)
  357. year := timeDate.Year()
  358. month := timeDate.Format("01")
  359. var dayStr string
  360. day := timeDate.Day()
  361. if day < 10 {
  362. dayStr = "0" + strconv.Itoa(day)
  363. } else {
  364. dayStr = strconv.Itoa(day)
  365. }
  366. monthNum, _ := strconv.Atoi(month)
  367. month = strconv.Itoa(monthNum - 1) //获取时月份需要减一
  368. list, err := models.GetBaseFromTradeDalianDataList(timeDate.Format(utils.FormatDate))
  369. listDataMap := make(map[string]int)
  370. for _, v := range list {
  371. listDataMap[v.DealShortName+v.ClassifyType+v.DataTime] = v.BaseFromTradeDalianIndexId
  372. }
  373. if err != nil {
  374. fmt.Println(err)
  375. return
  376. }
  377. //模拟form表单请求
  378. url := "http://www.dce.com.cn/publicweb/quotesdata/memberDealPosiQuotes.html"
  379. method := "POST"
  380. payload := &bytes.Buffer{}
  381. writer := multipart.NewWriter(payload)
  382. _ = writer.WriteField("memberDealPosiQuotes.variety", carietyCode)
  383. _ = writer.WriteField("memberDealPosiQuotes.trade_type", "0")
  384. _ = writer.WriteField("year", strconv.Itoa(year))
  385. _ = writer.WriteField("month", month)
  386. _ = writer.WriteField("day", dayStr)
  387. _ = writer.WriteField("contract.contract_id", contractId)
  388. _ = writer.WriteField("contract.variety_id", carietyCode)
  389. _ = writer.WriteField("currDate", currDate)
  390. err = writer.Close()
  391. if err != nil {
  392. utils.FileLog.Info("获取指标失败:" + currDate + varietyName + contractId)
  393. return
  394. }
  395. client := &http.Client{}
  396. req, err := http.NewRequest(method, url, payload)
  397. if err != nil {
  398. return
  399. }
  400. req.Header.Add("Cookie", "JSESSIONID=36ACF02A59227A3854F9D5D5E2FB5F2E; WMONID=R5ojcAIIcx-")
  401. req.Header.Set("Content-Type", writer.FormDataContentType())
  402. res, err := client.Do(req)
  403. if err != nil {
  404. return
  405. }
  406. defer res.Body.Close()
  407. htmlBody, err := ioutil.ReadAll(res.Body)
  408. if err != nil {
  409. go utils.SendEmail(utils.APPNAME+"【"+utils.RunMode+"】"+"失败提醒", "GetDalianHtmlBody ErrMsg:"+err.Error()+"获取指标失败:"+currDate+varietyName+contractId, utils.EmailSendToUsers)
  410. return
  411. }
  412. body = string(htmlBody)
  413. return
  414. }
  415. //大连交易所持单排名
  416. func SyncRankingFromDalian(dayNum int, searchList []*SearchList) (err error) {
  417. fmt.Println("start")
  418. defer func() {
  419. if err != nil {
  420. fmt.Println("RefreshDataFromDalian Err:" + err.Error())
  421. go utils.SendEmail(utils.APPNAME+"【"+utils.RunMode+"】"+"失败提醒", "RefreshDataFromDalian ErrMsg:"+err.Error(), utils.EmailSendToUsers)
  422. }
  423. }()
  424. //n := utils.GetRandInt(10, 120)
  425. //time.Sleep(time.Duration(n) * time.Second)
  426. //定义爬取时间
  427. endDate := time.Now().AddDate(0, 0, -dayNum).Format(utils.FormatDateTime)
  428. //endDate := time.Now().Format(utils.FormatDateTime)
  429. timeDate := utils.StrTimeToTime(endDate)
  430. currDate := timeDate.Format(utils.FormatDateUnSpace)
  431. year := timeDate.Year()
  432. month := timeDate.Format("01")
  433. var dayStr string
  434. day := timeDate.Day()
  435. if day < 10 {
  436. dayStr = "0" + strconv.Itoa(day)
  437. } else {
  438. dayStr = strconv.Itoa(day)
  439. }
  440. monthNum, _ := strconv.Atoi(month)
  441. month = strconv.Itoa(monthNum - 1) //获取时月份需要减一
  442. list, err := models.GetBaseFromTradeDalianDataList(timeDate.Format(utils.FormatDate))
  443. listDataMap := make(map[string]int)
  444. for _, v := range list {
  445. listDataMap[v.DealShortName+v.ClassifyType+v.DataTime] = v.BaseFromTradeDalianIndexId
  446. }
  447. if err != nil {
  448. fmt.Println(err)
  449. return err
  450. }
  451. listIndexCode, err := models.GetIndexCodeMapList("DL") //获取往期指标
  452. if err != nil {
  453. fmt.Println(err)
  454. return err
  455. }
  456. listIndexCodeMap := make(map[string]string)
  457. for _, v := range listIndexCode {
  458. listIndexCodeMap[v.IndexName] = v.IndexCode
  459. }
  460. for _, v := range searchList {
  461. for _, v2 := range v.ListSearch {
  462. //模拟form表单请求
  463. url := "http://www.dce.com.cn/publicweb/quotesdata/memberDealPosiQuotes.html"
  464. method := "POST"
  465. payload := &bytes.Buffer{}
  466. writer := multipart.NewWriter(payload)
  467. _ = writer.WriteField("memberDealPosiQuotes.variety", v.CarietyCode)
  468. _ = writer.WriteField("memberDealPosiQuotes.trade_type", "0")
  469. _ = writer.WriteField("year", strconv.Itoa(year))
  470. _ = writer.WriteField("month", month)
  471. _ = writer.WriteField("day", dayStr)
  472. _ = writer.WriteField("contract.contract_id", v2.ContractId)
  473. _ = writer.WriteField("contract.variety_id", v.CarietyCode)
  474. _ = writer.WriteField("currDate", currDate)
  475. err := writer.Close()
  476. fmt.Println(currDate, v.VarietyName, v2.ContractId)
  477. if err != nil {
  478. utils.FileLog.Info("获取指标失败:" + currDate + v.VarietyName + v2.ContractId)
  479. return err
  480. }
  481. client := &http.Client{}
  482. req, err := http.NewRequest(method, url, payload)
  483. if err != nil {
  484. return err
  485. }
  486. req.Header.Add("Cookie", "JSESSIONID=36ACF02A59227A3854F9D5D5E2FB5F2E; WMONID=R5ojcAIIcx-")
  487. req.Header.Set("Content-Type", writer.FormDataContentType())
  488. res, err := client.Do(req)
  489. if err != nil {
  490. return err
  491. }
  492. defer res.Body.Close()
  493. body, err := ioutil.ReadAll(res.Body)
  494. if err != nil {
  495. go utils.SendEmail(utils.APPNAME+"【"+utils.RunMode+"】"+"失败提醒", "RefreshDataFromDalian ErrMsg:"+err.Error()+"获取指标失败:"+currDate+v.VarietyName+v2.ContractId, utils.EmailSendToUsers)
  496. return err
  497. }
  498. err = DoHtml(string(body), v.VarietyName, v2.ContractId, timeDate, listDataMap, listIndexCodeMap)
  499. if err != nil {
  500. return err
  501. }
  502. }
  503. }
  504. return err
  505. }