commodity_trade_dalian.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531
  1. package services
  2. import (
  3. "bytes"
  4. "fmt"
  5. "github.com/PuerkitoBio/goquery"
  6. "hongze/hongze_data_crawler/models"
  7. "hongze/hongze_data_crawler/utils"
  8. "io/ioutil"
  9. "log"
  10. "mime/multipart"
  11. "net/http"
  12. "strconv"
  13. "strings"
  14. "time"
  15. )
  16. type SearchList struct {
  17. VarietyName string `description:"商品名称"`
  18. CarietyCode string `description:"商品名称对应的编码"`
  19. List []SearchContractId
  20. ListSearch []*SearchContractId
  21. }
  22. type SearchContractId struct {
  23. ContractId string `description:"商品类型"`
  24. }
  25. //同步 N天 之内的数据
  26. func SyncRankingFromDalianDo() {
  27. for i := 200; i >= 0; i-- {
  28. SyncRankingFromDalianSearch(i)
  29. }
  30. }
  31. //大连交易所持单排名
  32. func SyncRankingFromDalianSearch(dayNum int) (err error) {
  33. fmt.Println("start")
  34. defer func() {
  35. if err != nil {
  36. fmt.Println("RefreshDataFromDalian Err:" + err.Error())
  37. go utils.SendEmail(utils.APPNAME+"【"+utils.RunMode+"】"+"失败提醒", "RefreshDataFromDalian ErrMsg:"+err.Error(), utils.EmailSendToUsers)
  38. }
  39. }()
  40. //定义爬取时间
  41. endDate := time.Now().AddDate(0, 0, -dayNum).Format(utils.FormatDateTime)
  42. //endDate := time.Now().Format(utils.FormatDateTime)
  43. timeDate := utils.StrTimeToTime(endDate)
  44. currDate := timeDate.Format(utils.FormatDateUnSpace)
  45. year := timeDate.Year()
  46. month := timeDate.Format("01")
  47. var dayStr string
  48. day := timeDate.Day()
  49. if day < 10 {
  50. dayStr = "0" + strconv.Itoa(day)
  51. } else {
  52. dayStr = strconv.Itoa(day)
  53. }
  54. monthNum, _ := strconv.Atoi(month)
  55. month = strconv.Itoa(monthNum - 1) //获取时月份需要减一
  56. list, err := models.GetBaseFromTradeDalianDataList(timeDate.Format(utils.FormatDate))
  57. if err != nil {
  58. fmt.Println(err)
  59. return err
  60. }
  61. listDataMap := make(map[string]int)
  62. for _, v := range list {
  63. listDataMap[v.DealShortName+v.ClassifyType+v.DataTime] = v.BaseFromTradeDalianIndexId
  64. }
  65. n := utils.GetRandInt(10, 120)
  66. time.Sleep(time.Duration(n) * time.Second)
  67. var ContractId string
  68. var CarietyCode string
  69. var VarietyName string
  70. //模拟form表单请求
  71. url := "http://www.dce.com.cn/publicweb/quotesdata/memberDealPosiQuotes.html"
  72. method := "POST"
  73. payload := &bytes.Buffer{}
  74. writer := multipart.NewWriter(payload)
  75. _ = writer.WriteField("memberDealPosiQuotes.variety", CarietyCode)
  76. _ = writer.WriteField("memberDealPosiQuotes.trade_type", "0")
  77. _ = writer.WriteField("year", strconv.Itoa(year))
  78. _ = writer.WriteField("month", month)
  79. _ = writer.WriteField("day", dayStr)
  80. _ = writer.WriteField("contract.contract_id", ContractId)
  81. _ = writer.WriteField("contract.variety_id", CarietyCode)
  82. _ = writer.WriteField("currDate", currDate)
  83. err = writer.Close()
  84. if err != nil {
  85. utils.FileLog.Info("获取指标失败:" + currDate + VarietyName + ContractId)
  86. return err
  87. }
  88. client := &http.Client{}
  89. req, err := http.NewRequest(method, url, payload)
  90. if err != nil {
  91. return err
  92. }
  93. req.Header.Add("Cookie", "JSESSIONID=36ACF02A59227A3854F9D5D5E2FB5F2E; WMONID=R5ojcAIIcx-")
  94. req.Header.Set("Content-Type", writer.FormDataContentType())
  95. res, err := client.Do(req)
  96. if err != nil {
  97. return err
  98. }
  99. defer res.Body.Close()
  100. body, err := ioutil.ReadAll(res.Body)
  101. if err != nil {
  102. go utils.SendEmail(utils.APPNAME+"【"+utils.RunMode+"】"+"失败提醒", "RefreshDataFromDalian ErrMsg:"+err.Error()+"获取指标失败:"+currDate+VarietyName+ContractId, utils.EmailSendToUsers)
  103. return err
  104. }
  105. exitProductMaps, _, varietyArrMaps := DoSearch(string(body))
  106. var items []*SearchList
  107. for k, v := range exitProductMaps {
  108. item := new(SearchList)
  109. item.VarietyName = v
  110. item.CarietyCode = varietyArrMaps[k]
  111. htmlBody, err := GetDalianHtmlBody(dayNum, "", item.CarietyCode, item.VarietyName)
  112. if err != nil {
  113. return err
  114. }
  115. listContractId := DoSearchMap(htmlBody)
  116. item.ListSearch = listContractId
  117. items = append(items, item)
  118. }
  119. for k, v := range items {
  120. for _, v2 := range v.ListSearch {
  121. fmt.Println(currDate, v.VarietyName, v2.ContractId, v.CarietyCode, k)
  122. }
  123. }
  124. SyncRankingFromDalian(dayNum, items)
  125. return err
  126. }
  127. //处理搜索条件初始
  128. func DoSearch(body string) (exitProductMaps, exitContractIdMaps, varietyArrMaps map[int]string) {
  129. var str string
  130. str = body
  131. doc, err := goquery.NewDocumentFromReader(strings.NewReader(str))
  132. if err != nil {
  133. log.Fatal(err)
  134. }
  135. exitProductMap := make(map[int]string)
  136. exitContractIdMap := make(map[int]string)
  137. varietyArrMap := make(map[int]string)
  138. //var productName string
  139. ul := doc.Find(".selBox ul")
  140. var pNum int
  141. var cidNum int
  142. var vNum int
  143. ul.Each(func(i int, s *goquery.Selection) {
  144. //解析标签
  145. //fmt.Println(i, s.Text())
  146. ulTxt := s.Text()
  147. //fmt.Println(ulTxt)
  148. if ulTxt != "" && (i == 0 || i == 2) {
  149. ulTxtArr := strings.Split(ulTxt, "\n")
  150. for _, v := range ulTxtArr {
  151. v = strings.Replace(v, " ", "", -1)
  152. v = strings.Replace(v, "\n", "", -1)
  153. v = strings.Replace(v, " ", "", -1)
  154. if v != "" && len(v) > 0 {
  155. exitProductMap[pNum] = v
  156. pNum++
  157. }
  158. }
  159. }
  160. if ulTxt != "" && i == 3 {
  161. //fmt.Println(ulTxt)
  162. cidTxtArr := strings.Split(ulTxt, " ")
  163. for _, v := range cidTxtArr {
  164. v = strings.Replace(v, "\n", "", -1)
  165. v = strings.Replace(v, " ", "", -1)
  166. v = strings.Replace(v, " ", "", -1)
  167. if v != "" {
  168. exitContractIdMap[cidNum] = v
  169. cidNum++
  170. }
  171. }
  172. }
  173. })
  174. varietyArr := strings.Split(str, "onclick=\"javascript:setVariety('")
  175. for _, v := range varietyArr {
  176. strnum := strings.Index(v, "');")
  177. if strnum > 0 {
  178. varietyStr := v[0:strnum]
  179. if len(varietyStr) < 10 {
  180. //fmt.Println(strnum, varietyStr)
  181. varietyArrMap[vNum] = varietyStr
  182. vNum++
  183. }
  184. }
  185. }
  186. exitProductMaps = exitProductMap
  187. exitContractIdMaps = exitContractIdMap
  188. varietyArrMaps = varietyArrMap
  189. return
  190. }
  191. //处理搜索条件
  192. func DoSearchMap(body string) (items []*SearchContractId) {
  193. var str string
  194. str = body
  195. doc, err := goquery.NewDocumentFromReader(strings.NewReader(str))
  196. if err != nil {
  197. log.Fatal(err)
  198. }
  199. exitContractIdMap := make(map[int]string)
  200. ul := doc.Find(".selBox ul")
  201. var cidNum int
  202. ul.Each(func(i int, s *goquery.Selection) {
  203. //解析标签
  204. ulTxt := s.Text()
  205. if ulTxt != "" && i == 3 {
  206. cidTxtArr := strings.Split(ulTxt, " ")
  207. for _, v := range cidTxtArr {
  208. v = strings.Replace(v, "\n", "", -1)
  209. v = strings.Replace(v, " ", "", -1)
  210. v = strings.Replace(v, " ", "", -1)
  211. if v != "" {
  212. exitContractIdMap[cidNum] = v
  213. cidNum++
  214. }
  215. }
  216. }
  217. })
  218. for _, v := range exitContractIdMap {
  219. item := new(SearchContractId)
  220. item.ContractId = v
  221. items = append(items, item)
  222. }
  223. return items
  224. }
  225. //处理解析Html
  226. func DoHtml(body, name, contractId string, dateTime time.Time, listDataMap map[string]int, listIndexCodeMap map[string]string, listDataMapVal map[string]int) (err error) {
  227. defer func() {
  228. if err != nil {
  229. fmt.Println("RefreshDataFromDaLian Err:" + err.Error())
  230. go utils.SendEmail(utils.APPNAME+"【"+utils.RunMode+"】"+"失败提醒", "RefreshDataFromDaLian ErrMsg:"+err.Error(), utils.EmailSendToUsers)
  231. }
  232. }()
  233. str := body
  234. doc, err := goquery.NewDocumentFromReader(strings.NewReader(str))
  235. if err != nil {
  236. log.Fatal(err)
  237. }
  238. var isAdd bool
  239. addSql := ` INSERT INTO base_from_trade_dalian_index(rank,deal_short_name,deal_name,deal_code,deal_value,buy_short_name,deal_change,buy_name,buy_code,buy_value,buy_change,sold_short_name,sold_name,sold_code,sold_value,sold_change,frequency,classify_name,classify_type,create_time,modify_time,data_time) values `
  240. table := doc.Find("table")
  241. var rank, shortName, dealValue, dealChange, buyName, buyValue, buyChange, soldName, soldValue, soldChange string
  242. table.Find("tr").Each(func(i int, tr *goquery.Selection) {
  243. tds := tr.Find("td")
  244. //fmt.Println(tds.Length(), "长度:", i)
  245. if tds.Length() == 0 || tds.Length() == 7 || i == 23 {
  246. tdText := tds.Text()
  247. utils.FileLog.Info(tdText)
  248. } else {
  249. item := new(models.BaseFromTradeDalianIndex)
  250. tds.Each(func(tk int, td *goquery.Selection) {
  251. tdText := td.Text()
  252. if tk == 0 { //名次
  253. rank = tdText
  254. }
  255. if tk == 1 { //会员简称
  256. shortName = tdText
  257. }
  258. if tk == 2 { //成交量
  259. dealValue = strings.Replace(tdText, ",", "", -1)
  260. }
  261. if tk == 3 { //增减
  262. dealChange = strings.Replace(tdText, ",", "", -1)
  263. }
  264. if tk == 5 { //会员简称
  265. buyName = tdText
  266. }
  267. if tk == 6 { //持买单量
  268. buyValue = strings.Replace(tdText, ",", "", -1)
  269. }
  270. if tk == 7 { //增减
  271. buyChange = strings.Replace(tdText, ",", "", -1)
  272. }
  273. if tk == 9 { //会员简称
  274. soldName = tdText
  275. }
  276. if tk == 10 { //持卖单量
  277. soldValue = strings.Replace(tdText, ",", "", -1)
  278. }
  279. if tk == 11 { //增减
  280. soldChange = strings.Replace(tdText, ",", "", -1)
  281. }
  282. })
  283. item.Rank = rank
  284. item.DealShortName = shortName
  285. item.DealName = shortName + "_成交量_" + contractId
  286. if val, ok := listIndexCodeMap[item.DealName]; ok {
  287. item.DealCode = val
  288. } else {
  289. item.DealCode = DlIndexCodeGenerator(item.DealName, "deal")
  290. }
  291. item.DealValue = dealValue
  292. item.BuyShortName = buyName
  293. item.DealChange = dealChange
  294. item.BuyName = buyName + "_持买单量_" + contractId
  295. if val, ok := listIndexCodeMap[item.BuyName]; ok {
  296. item.BuyCode = val
  297. } else {
  298. item.BuyCode = DlIndexCodeGenerator(item.BuyName, "buy")
  299. }
  300. item.BuyValue = buyValue
  301. item.BuyChange = buyChange
  302. item.SoldShortName = soldName
  303. item.SoldName = soldName + "_持卖单量_" + contractId
  304. if val, ok := listIndexCodeMap[item.SoldName]; ok {
  305. item.SoldCode = val
  306. } else {
  307. item.SoldCode = DlIndexCodeGenerator(item.SoldName, "sold")
  308. }
  309. item.SoldValue = soldValue
  310. item.SoldChange = soldChange
  311. item.Frequency = "日度"
  312. item.ClassifyName = name
  313. item.ClassifyType = contractId
  314. item.CreateTime = time.Now().Format(utils.FormatDateTime)
  315. item.ModifyTime = time.Now().Format(utils.FormatDateTime)
  316. item.DataTime = dateTime.Format(utils.FormatDate)
  317. if val, ok := listDataMap[item.DealShortName+item.ClassifyType+item.DataTime]; !ok {
  318. addSql += models.GetAddSql(item)
  319. isAdd = true
  320. } else {
  321. //更新
  322. if listDataMapVal[item.DealValue+item.BuyValue+item.SoldValue] != val {
  323. err := models.UpdateBaseFromTradeDalianIndex(item, val)
  324. if err != nil {
  325. fmt.Println("UpdateBaseFromTradeDalianIndex err:", err)
  326. }
  327. }
  328. }
  329. }
  330. })
  331. addSql = strings.TrimRight(addSql, ",")
  332. if isAdd {
  333. err = models.RefreshEdbDataByDaLian(addSql)
  334. if err != nil {
  335. return err
  336. }
  337. }
  338. return
  339. }
  340. func DlIndexCodeGenerator(indexName, suffix string) (ineIndexCode string) {
  341. ineIndexCode = fmt.Sprintf("DL%s", strconv.FormatInt(time.Now().UnixNano(), 10)+suffix)
  342. err := models.AddBaseFromTradeMapping(indexName, ineIndexCode, "DL")
  343. if err != nil {
  344. fmt.Println("add Code err:", err)
  345. }
  346. return ineIndexCode
  347. }
  348. //大连交易所持单排名
  349. func GetDalianHtmlBody(dayNum int, contractId, carietyCode, varietyName string) (body string, err error) {
  350. defer func() {
  351. if err != nil {
  352. fmt.Println("GetDalianHtmlBody Err:" + err.Error())
  353. go utils.SendEmail(utils.APPNAME+"【"+utils.RunMode+"】"+"失败提醒", "GetDalianHtmlBody ErrMsg:"+err.Error(), utils.EmailSendToUsers)
  354. }
  355. }()
  356. //定义爬取时间
  357. endDate := time.Now().AddDate(0, 0, -dayNum).Format(utils.FormatDateTime)
  358. //endDate := time.Now().Format(utils.FormatDateTime)
  359. timeDate := utils.StrTimeToTime(endDate)
  360. currDate := timeDate.Format(utils.FormatDateUnSpace)
  361. year := timeDate.Year()
  362. month := timeDate.Format("01")
  363. var dayStr string
  364. day := timeDate.Day()
  365. if day < 10 {
  366. dayStr = "0" + strconv.Itoa(day)
  367. } else {
  368. dayStr = strconv.Itoa(day)
  369. }
  370. monthNum, _ := strconv.Atoi(month)
  371. month = strconv.Itoa(monthNum - 1) //获取时月份需要减一
  372. list, err := models.GetBaseFromTradeDalianDataList(timeDate.Format(utils.FormatDate))
  373. listDataMap := make(map[string]int)
  374. for _, v := range list {
  375. listDataMap[v.DealShortName+v.ClassifyType+v.DataTime] = v.BaseFromTradeDalianIndexId
  376. }
  377. if err != nil {
  378. fmt.Println(err)
  379. return
  380. }
  381. //模拟form表单请求
  382. url := "http://www.dce.com.cn/publicweb/quotesdata/memberDealPosiQuotes.html"
  383. method := "POST"
  384. payload := &bytes.Buffer{}
  385. writer := multipart.NewWriter(payload)
  386. _ = writer.WriteField("memberDealPosiQuotes.variety", carietyCode)
  387. _ = writer.WriteField("memberDealPosiQuotes.trade_type", "0")
  388. _ = writer.WriteField("year", strconv.Itoa(year))
  389. _ = writer.WriteField("month", month)
  390. _ = writer.WriteField("day", dayStr)
  391. _ = writer.WriteField("contract.contract_id", contractId)
  392. _ = writer.WriteField("contract.variety_id", carietyCode)
  393. _ = writer.WriteField("currDate", currDate)
  394. err = writer.Close()
  395. if err != nil {
  396. utils.FileLog.Info("获取指标失败:" + currDate + varietyName + contractId)
  397. return
  398. }
  399. client := &http.Client{}
  400. req, err := http.NewRequest(method, url, payload)
  401. if err != nil {
  402. return
  403. }
  404. req.Header.Add("Cookie", "JSESSIONID=36ACF02A59227A3854F9D5D5E2FB5F2E; WMONID=R5ojcAIIcx-")
  405. req.Header.Set("Content-Type", writer.FormDataContentType())
  406. res, err := client.Do(req)
  407. if err != nil {
  408. return
  409. }
  410. defer res.Body.Close()
  411. htmlBody, err := ioutil.ReadAll(res.Body)
  412. if err != nil {
  413. go utils.SendEmail(utils.APPNAME+"【"+utils.RunMode+"】"+"失败提醒", "GetDalianHtmlBody ErrMsg:"+err.Error()+"获取指标失败:"+currDate+varietyName+contractId, utils.EmailSendToUsers)
  414. return
  415. }
  416. body = string(htmlBody)
  417. return
  418. }
  419. //大连交易所持单排名
  420. func SyncRankingFromDalian(dayNum int, searchList []*SearchList) (err error) {
  421. fmt.Println("start")
  422. defer func() {
  423. if err != nil {
  424. fmt.Println("RefreshDataFromDalian Err:" + err.Error())
  425. go utils.SendEmail(utils.APPNAME+"【"+utils.RunMode+"】"+"失败提醒", "RefreshDataFromDalian ErrMsg:"+err.Error(), utils.EmailSendToUsers)
  426. }
  427. }()
  428. //n := utils.GetRandInt(10, 120)
  429. //time.Sleep(time.Duration(n) * time.Second)
  430. //定义爬取时间
  431. endDate := time.Now().AddDate(0, 0, -dayNum).Format(utils.FormatDateTime)
  432. //endDate := time.Now().Format(utils.FormatDateTime)
  433. timeDate := utils.StrTimeToTime(endDate)
  434. currDate := timeDate.Format(utils.FormatDateUnSpace)
  435. year := timeDate.Year()
  436. month := timeDate.Format("01")
  437. var dayStr string
  438. day := timeDate.Day()
  439. if day < 10 {
  440. dayStr = "0" + strconv.Itoa(day)
  441. } else {
  442. dayStr = strconv.Itoa(day)
  443. }
  444. monthNum, _ := strconv.Atoi(month)
  445. month = strconv.Itoa(monthNum - 1) //获取时月份需要减一
  446. list, err := models.GetBaseFromTradeDalianDataList(timeDate.Format(utils.FormatDate))
  447. listDataMap := make(map[string]int)
  448. listDataMapVal := make(map[string]int)
  449. for _, v := range list {
  450. listDataMap[v.DealShortName+v.ClassifyType+v.DataTime] = v.BaseFromTradeDalianIndexId
  451. }
  452. for _, v := range list {
  453. listDataMapVal[v.DealValue+v.BuyValue+v.SoldValue] = v.BaseFromTradeDalianIndexId
  454. }
  455. if err != nil {
  456. fmt.Println(err)
  457. return err
  458. }
  459. listIndexCode, err := models.GetIndexCodeMapList("DL") //获取往期指标
  460. if err != nil {
  461. fmt.Println(err)
  462. return err
  463. }
  464. listIndexCodeMap := make(map[string]string)
  465. for _, v := range listIndexCode {
  466. listIndexCodeMap[v.IndexName] = v.IndexCode
  467. }
  468. for _, v := range searchList {
  469. for _, v2 := range v.ListSearch {
  470. //模拟form表单请求
  471. url := "http://www.dce.com.cn/publicweb/quotesdata/memberDealPosiQuotes.html"
  472. method := "POST"
  473. payload := &bytes.Buffer{}
  474. writer := multipart.NewWriter(payload)
  475. _ = writer.WriteField("memberDealPosiQuotes.variety", v.CarietyCode)
  476. _ = writer.WriteField("memberDealPosiQuotes.trade_type", "0")
  477. _ = writer.WriteField("year", strconv.Itoa(year))
  478. _ = writer.WriteField("month", month)
  479. _ = writer.WriteField("day", dayStr)
  480. _ = writer.WriteField("contract.contract_id", v2.ContractId)
  481. _ = writer.WriteField("contract.variety_id", v.CarietyCode)
  482. _ = writer.WriteField("currDate", currDate)
  483. err := writer.Close()
  484. fmt.Println(currDate, v.VarietyName, v2.ContractId)
  485. if err != nil {
  486. utils.FileLog.Info("获取指标失败:" + currDate + v.VarietyName + v2.ContractId)
  487. return err
  488. }
  489. client := &http.Client{}
  490. req, err := http.NewRequest(method, url, payload)
  491. if err != nil {
  492. return err
  493. }
  494. req.Header.Add("Cookie", "JSESSIONID=36ACF02A59227A3854F9D5D5E2FB5F2E; WMONID=R5ojcAIIcx-")
  495. req.Header.Set("Content-Type", writer.FormDataContentType())
  496. res, err := client.Do(req)
  497. if err != nil {
  498. return err
  499. }
  500. defer res.Body.Close()
  501. body, err := ioutil.ReadAll(res.Body)
  502. if err != nil {
  503. go utils.SendEmail(utils.APPNAME+"【"+utils.RunMode+"】"+"失败提醒", "RefreshDataFromDalian ErrMsg:"+err.Error()+"获取指标失败:"+currDate+v.VarietyName+v2.ContractId, utils.EmailSendToUsers)
  504. return err
  505. }
  506. err = DoHtml(string(body), v.VarietyName, v2.ContractId, timeDate, listDataMap, listIndexCodeMap, listDataMapVal)
  507. if err != nil {
  508. return err
  509. }
  510. }
  511. }
  512. return err
  513. }