commodity_trade_dalian.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426
  1. package services
  2. import (
  3. "bytes"
  4. "fmt"
  5. "github.com/PuerkitoBio/goquery"
  6. "hongze/hongze_data_crawler/models"
  7. "hongze/hongze_data_crawler/utils"
  8. "io/ioutil"
  9. "log"
  10. "mime/multipart"
  11. "net/http"
  12. "strconv"
  13. "strings"
  14. "time"
  15. )
  16. //大连商品交易所持单排名
  17. func SyncRankingFromDalianOld() {
  18. fmt.Println("start")
  19. var str string
  20. exitProductMap, exitContractIdMap, varietyArrMap := DoSearch(str)
  21. //fmt.Println(exitProductMap)
  22. //fmt.Println(exitContractIdMap)
  23. //fmt.Println(varietyArrMap)
  24. var varietyNum int
  25. for k, v := range varietyArrMap {
  26. if k < 3 {
  27. if k-1 > varietyNum {
  28. url := "http://www.dce.com.cn/publicweb/quotesdata/memberDealPosiQuotes.html"
  29. method := "POST"
  30. payload := &bytes.Buffer{}
  31. writer := multipart.NewWriter(payload)
  32. _ = writer.WriteField("memberDealPosiQuotes.variety", v)
  33. _ = writer.WriteField("memberDealPosiQuotes.trade_type", "0")
  34. _ = writer.WriteField("year", "2021")
  35. _ = writer.WriteField("month", "9")
  36. _ = writer.WriteField("day", "19")
  37. _ = writer.WriteField("contract.variety_id", v)
  38. _ = writer.WriteField("currDate", "20211019")
  39. err := writer.Close()
  40. if err != nil {
  41. fmt.Println(err)
  42. return
  43. }
  44. client := &http.Client{}
  45. req, err := http.NewRequest(method, url, payload)
  46. if err != nil {
  47. fmt.Println(err)
  48. return
  49. }
  50. req.Header.Add("Cookie", "JSESSIONID=36ACF02A59227A3854F9D5D5E2FB5F2E; WMONID=R5ojcAIIcx-")
  51. req.Header.Set("Content-Type", writer.FormDataContentType())
  52. res, err := client.Do(req)
  53. if err != nil {
  54. fmt.Println(err)
  55. return
  56. }
  57. defer res.Body.Close()
  58. body, err := ioutil.ReadAll(res.Body)
  59. if err != nil {
  60. fmt.Println(err)
  61. return
  62. }
  63. _, exitContractIdMap, _ = DoSearch(string(body))
  64. //fmt.Println(exitContractIdMap)
  65. }
  66. }
  67. if k < -1 {
  68. //fmt.Println(v)
  69. for _, v2 := range exitContractIdMap {
  70. //模拟form表单请求
  71. url := "http://www.dce.com.cn/publicweb/quotesdata/memberDealPosiQuotes.html"
  72. method := "POST"
  73. payload := &bytes.Buffer{}
  74. writer := multipart.NewWriter(payload)
  75. _ = writer.WriteField("memberDealPosiQuotes.variety", v)
  76. _ = writer.WriteField("memberDealPosiQuotes.trade_type", "0")
  77. _ = writer.WriteField("year", "2021")
  78. _ = writer.WriteField("month", "9")
  79. _ = writer.WriteField("day", "19")
  80. _ = writer.WriteField("contract.contract_id", v2)
  81. _ = writer.WriteField("variety_id", v)
  82. _ = writer.WriteField("currDate", "20211019")
  83. err := writer.Close()
  84. if err != nil {
  85. fmt.Println(err)
  86. return
  87. }
  88. client := &http.Client{}
  89. req, err := http.NewRequest(method, url, payload)
  90. if err != nil {
  91. fmt.Println(err)
  92. return
  93. }
  94. req.Header.Add("Cookie", "JSESSIONID=36ACF02A59227A3854F9D5D5E2FB5F2E; WMONID=R5ojcAIIcx-")
  95. req.Header.Set("Content-Type", writer.FormDataContentType())
  96. res, err := client.Do(req)
  97. if err != nil {
  98. fmt.Println(err)
  99. return
  100. }
  101. defer res.Body.Close()
  102. body, err := ioutil.ReadAll(res.Body)
  103. if err != nil {
  104. fmt.Println(err)
  105. return
  106. }
  107. err = DoHtml(string(body), v2, exitProductMap[k], time.Now())
  108. //fmt.Println(err)
  109. //fmt.Println("解析:", v2, exitProductMap[k])
  110. }
  111. varietyNum++
  112. }
  113. }
  114. fmt.Println("end")
  115. }
  116. //处理搜索条件
  117. func DoSearch(body string) (exitProductMaps, exitContractIdMaps, varietyArrMaps map[int]string) {
  118. var str string
  119. if body != "" {
  120. str = body
  121. } else {
  122. str = "HTML文本"
  123. }
  124. doc, err := goquery.NewDocumentFromReader(strings.NewReader(str))
  125. if err != nil {
  126. log.Fatal(err)
  127. }
  128. exitProductMap := make(map[int]string)
  129. exitContractIdMap := make(map[int]string)
  130. varietyArrMap := make(map[int]string)
  131. //var productName string
  132. ul := doc.Find(".selBox ul")
  133. var pNum int
  134. var cidNum int
  135. var vNum int
  136. ul.Each(func(i int, s *goquery.Selection) {
  137. //解析标签
  138. //fmt.Println(i, s.Text())
  139. ulTxt := s.Text()
  140. if ulTxt != "" && (i == 0 || i == 2) {
  141. ulTxtArr := strings.Split(ulTxt, " ")
  142. for _, v := range ulTxtArr {
  143. v = strings.Replace(v, "\n", "", -1)
  144. v = strings.Replace(v, " ", "", -1)
  145. if v != "" {
  146. exitProductMap[pNum] = v
  147. pNum++
  148. }
  149. }
  150. }
  151. if ulTxt != "" && i == 3 {
  152. cidTxtArr := strings.Split(ulTxt, " ")
  153. for _, v := range cidTxtArr {
  154. v = strings.Replace(v, "\n", "", -1)
  155. v = strings.Replace(v, " ", "", -1)
  156. if v != "" {
  157. exitContractIdMap[cidNum] = v
  158. cidNum++
  159. }
  160. }
  161. }
  162. })
  163. varietyArr := strings.Split(str, "onclick=\"javascript:setVariety('")
  164. for _, v := range varietyArr {
  165. strnum := strings.Index(v, "');")
  166. if strnum > 0 {
  167. varietyStr := v[0:strnum]
  168. if len(varietyStr) < 10 {
  169. //fmt.Println(strnum, varietyStr)
  170. varietyArrMap[vNum] = varietyStr
  171. vNum++
  172. }
  173. }
  174. }
  175. exitProductMaps = exitProductMap
  176. exitContractIdMaps = exitContractIdMap
  177. varietyArrMaps = varietyArrMap
  178. fmt.Println(exitProductMaps)
  179. fmt.Println(exitContractIdMaps)
  180. fmt.Println(varietyArrMaps)
  181. return
  182. }
  183. //处理解析Html
  184. func DoHtml(body, name, contractId string, dateTime time.Time) (err error) {
  185. defer func() {
  186. if err != nil {
  187. fmt.Println("RefreshDataFromDaLian Err:" + err.Error())
  188. go utils.SendEmail(utils.APPNAME+"【"+utils.RunMode+"】"+"失败提醒", "RefreshDataFromDaLian ErrMsg:"+err.Error(), utils.EmailSendToUsers)
  189. }
  190. }()
  191. str := body
  192. doc, err := goquery.NewDocumentFromReader(strings.NewReader(str))
  193. if err != nil {
  194. log.Fatal(err)
  195. }
  196. var isAdd bool
  197. addSql := ` INSERT INTO base_from_trade_dalian_index(rank,deal_short_name,deal_name,deal_code,deal_value,buy_short_name,deal_change,buy_name,buy_code,buy_value,buy_change,sold_short_name,sold_name,sold_code,sold_value,sold_change,frequency,classify_name,classify_type,create_time,modify_time,data_time) values `
  198. table := doc.Find("table")
  199. var rank, shortName, dealValue, dealChange, buyName, buyValue, buyChange, soldName, soldValue, soldChange string
  200. table.Find("tr").Each(func(i int, tr *goquery.Selection) {
  201. tds := tr.Find("td")
  202. //fmt.Println(tds.Length(), "长度:", i)
  203. if tds.Length() == 0 || tds.Length() == 7 || i == 23 {
  204. tdText := tds.Text()
  205. utils.FileLog.Info(tdText)
  206. } else {
  207. item := new(models.BaseFromTradeDalianIndex)
  208. tds.Each(func(tk int, td *goquery.Selection) {
  209. tdText := td.Text()
  210. if tk == 0 { //名次
  211. rank = tdText
  212. }
  213. if tk == 1 { //会员简称
  214. shortName = tdText
  215. }
  216. if tk == 2 { //成交量
  217. dealValue = strings.Replace(tdText, ",", "", -1)
  218. }
  219. if tk == 3 { //增减
  220. dealChange = strings.Replace(tdText, ",", "", -1)
  221. }
  222. if tk == 5 { //会员简称
  223. buyName = tdText
  224. }
  225. if tk == 6 { //持买单量
  226. buyValue = strings.Replace(tdText, ",", "", -1)
  227. }
  228. if tk == 7 { //增减
  229. buyChange = strings.Replace(tdText, ",", "", -1)
  230. }
  231. if tk == 9 { //会员简称
  232. soldName = tdText
  233. }
  234. if tk == 10 { //持卖单量
  235. soldValue = strings.Replace(tdText, ",", "", -1)
  236. }
  237. if tk == 11 { //增减
  238. soldChange = strings.Replace(tdText, ",", "", -1)
  239. }
  240. })
  241. item.Rank = rank
  242. item.DealShortName = shortName
  243. item.DealName = shortName + "_成交量_" + contractId
  244. item.DealCode = DlIndexCodeGenerator(item.DealName, "deal")
  245. item.DealValue = dealValue
  246. item.BuyShortName = buyName
  247. item.DealChange = dealChange
  248. item.BuyName = buyName + "_持买单量_" + contractId
  249. item.BuyCode = DlIndexCodeGenerator(item.BuyName, "buy")
  250. item.BuyValue = buyValue
  251. item.BuyChange = buyChange
  252. item.SoldShortName = soldName
  253. item.SoldName = soldName + "_持卖单量_" + contractId
  254. item.SoldCode = DlIndexCodeGenerator(item.SoldName, "sold")
  255. item.SoldValue = soldValue
  256. item.SoldChange = soldChange
  257. item.Frequency = "日度"
  258. item.ClassifyName = name
  259. item.ClassifyType = contractId
  260. item.CreateTime = time.Now().Format(utils.FormatDateTime)
  261. item.ModifyTime = time.Now().Format(utils.FormatDateTime)
  262. item.DataTime = dateTime.Format(utils.FormatDate)
  263. addSql += models.GetAddSql(item)
  264. isAdd = true
  265. }
  266. })
  267. addSql = strings.TrimRight(addSql, ",")
  268. if isAdd {
  269. err = models.RefreshEdbDataByDaLian(addSql)
  270. if err != nil {
  271. return err
  272. }
  273. }
  274. return
  275. }
  276. type SearchList struct {
  277. VarietyName string `description:"商品名称"`
  278. CarietyCode string `description:"商品名称对应的编码"`
  279. List []SearchContractId
  280. }
  281. type SearchContractId struct {
  282. ContractId string `description:"商品类型"`
  283. }
  284. func SyncRankingFromDalianDo() {
  285. for i := 10; i >= 0; i-- {
  286. fmt.Println(i)
  287. //SyncRankingFromDalian2(i)
  288. }
  289. }
  290. func SyncRankingFromDalianDos() (err error) {
  291. fmt.Println(66666)
  292. defer func() {
  293. if err != nil {
  294. fmt.Println("RefreshDataFromWind Err:" + err.Error())
  295. go utils.SendEmail(utils.APPNAME+"【"+utils.RunMode+"】"+"失败提醒", "RefreshDataFromWind ErrMsg:"+err.Error(), utils.EmailSendToUsers)
  296. }
  297. }()
  298. //SyncRankingFromDalian2(1)
  299. return err
  300. }
  301. //大连交易所持单排名
  302. func SyncRankingFromDalian(dayNum int) (err error) {
  303. fmt.Println("start")
  304. defer func() {
  305. if err != nil {
  306. fmt.Println("RefreshDataFromDalian Err:" + err.Error())
  307. go utils.SendEmail(utils.APPNAME+"【"+utils.RunMode+"】"+"失败提醒", "RefreshDataFromDalian ErrMsg:"+err.Error(), utils.EmailSendToUsers)
  308. }
  309. }()
  310. n := utils.GetRandInt(10, 120)
  311. time.Sleep(time.Duration(n) * time.Second)
  312. searchList := []SearchList{
  313. SearchList{VarietyName: "豆一", CarietyCode: "a", List: []SearchContractId{SearchContractId{ContractId: "a2111"}, {ContractId: "a2201"}, {ContractId: "a2203"}}},
  314. SearchList{VarietyName: "豆二", CarietyCode: "b", List: []SearchContractId{SearchContractId{ContractId: "b2112"}, {ContractId: "b2201"}}},
  315. SearchList{VarietyName: "豆粕", CarietyCode: "m", List: []SearchContractId{SearchContractId{ContractId: "m2111"}, {ContractId: "m2112"}, {ContractId: "m2201"}, {ContractId: "m2203"}, {ContractId: "m2205"}, {ContractId: "m2207"}, {ContractId: "m2208"}, {ContractId: "m2209"}}},
  316. SearchList{VarietyName: "豆油", CarietyCode: "y", List: []SearchContractId{SearchContractId{ContractId: "y2112"}, {ContractId: "y2201"}, {ContractId: "y2203"}, {ContractId: "y2205"}, {ContractId: "y2207"}, {ContractId: "y2208"}}},
  317. SearchList{VarietyName: "棕榈油", CarietyCode: "p", List: []SearchContractId{SearchContractId{ContractId: "p2112"}, {ContractId: "p2201"}, {ContractId: "p2202"}, {ContractId: "p2203"}, {ContractId: "p2204"}, {ContractId: "p2205"}}},
  318. SearchList{VarietyName: "玉米", CarietyCode: "c", List: []SearchContractId{SearchContractId{ContractId: "c2111"}, {ContractId: "c2201"}, {ContractId: "c2203"}, {ContractId: "c2205"}, {ContractId: "c2207"}}},
  319. SearchList{VarietyName: "玉米淀粉", CarietyCode: "cs", List: []SearchContractId{SearchContractId{ContractId: "cs2111"}, {ContractId: "cs2201"}, {ContractId: "cs2203"}}},
  320. SearchList{VarietyName: "鸡蛋", CarietyCode: "jd", List: []SearchContractId{SearchContractId{ContractId: "jd2201"}, {ContractId: "jd2202"}, {ContractId: "jd2203"}, {ContractId: "jd2204"}, {ContractId: "jd2205"}}},
  321. SearchList{VarietyName: "粳米", CarietyCode: "rr", List: []SearchContractId{SearchContractId{ContractId: "rr2112"}}},
  322. //SearchList{VarietyName: "纤维板", CarietyCode: "fb", List:[]SearchContractId{}}, //无数据
  323. //SearchList{VarietyName: "胶合板", CarietyCode: "bb", List:[]SearchContractId{}}, //无数据
  324. SearchList{VarietyName: "生猪", CarietyCode: "lh", List: []SearchContractId{SearchContractId{ContractId: "lh2201"}, {ContractId: "lh2203"}, {ContractId: "lh2205"}}},
  325. SearchList{VarietyName: "聚乙烯", CarietyCode: "l", List: []SearchContractId{SearchContractId{ContractId: "l2112"}, {ContractId: "l2201"}, {ContractId: "l2202"}, {ContractId: "l2203"}, {ContractId: "l2204"}, {ContractId: "l2205"}}},
  326. SearchList{VarietyName: "聚氯乙烯", CarietyCode: "v", List: []SearchContractId{SearchContractId{ContractId: "v2111"}, {ContractId: "v2112"}, {ContractId: "v2201"}, {ContractId: "v2202"}, {ContractId: "v2203"}, {ContractId: "v2204"}, {ContractId: "v2205"}}},
  327. SearchList{VarietyName: "聚丙烯", CarietyCode: "pp", List: []SearchContractId{SearchContractId{ContractId: "pp2112"}, {ContractId: "pp2201"}, {ContractId: "pp2202"}, {ContractId: "pp2203"}, {ContractId: "pp2204"}, {ContractId: "pp2205"}}},
  328. SearchList{VarietyName: "苯乙烯", CarietyCode: "eb", List: []SearchContractId{SearchContractId{ContractId: "eb2111"}, {ContractId: "eb2112"}, {ContractId: "eb2201"}, {ContractId: "eb2202"}}},
  329. SearchList{VarietyName: "焦炭", CarietyCode: "j", List: []SearchContractId{SearchContractId{ContractId: "j2201"}, {ContractId: "j2205"}}},
  330. SearchList{VarietyName: "焦煤", CarietyCode: "jm", List: []SearchContractId{SearchContractId{ContractId: "jm2201"}, {ContractId: "jm2205"}}},
  331. SearchList{VarietyName: "铁矿石", CarietyCode: "i", List: []SearchContractId{SearchContractId{ContractId: "i2112"}, {ContractId: "i2201"}, {ContractId: "i2202"}, {ContractId: "i2203"}, {ContractId: "i2204"}, {ContractId: "i2205"}, {ContractId: "i2206"}, {ContractId: "i2207"}, {ContractId: "i2208"}, {ContractId: "i2209"}}},
  332. SearchList{VarietyName: "乙二醇", CarietyCode: "eg", List: []SearchContractId{SearchContractId{ContractId: "eg2112"}, {ContractId: "eg2201"}, {ContractId: "eg2202"}, {ContractId: "eg2203"}, {ContractId: "eg2205"}}},
  333. SearchList{VarietyName: "液化石油气", CarietyCode: "pg", List: []SearchContractId{SearchContractId{ContractId: "pg2111"}, {ContractId: "pg2112"}, {ContractId: "pg2201"}, {ContractId: "pg2202"}}},
  334. }
  335. //定义爬取时间
  336. endDate := time.Now().AddDate(0, 0, -dayNum).Format(utils.FormatDateTime)
  337. //endDate := time.Now().Format(utils.FormatDateTime)
  338. timeDate := utils.StrTimeToTime(endDate)
  339. currDate := timeDate.Format(utils.FormatDateUnSpace)
  340. year := timeDate.Year()
  341. month := timeDate.Format("01")
  342. var dayStr string
  343. day := timeDate.Day()
  344. if day < 10 {
  345. dayStr = "0" + strconv.Itoa(day)
  346. } else {
  347. dayStr = strconv.Itoa(day)
  348. }
  349. monthNum, _ := strconv.Atoi(month)
  350. month = strconv.Itoa(monthNum - 1) //获取时月份需要减一
  351. for _, v := range searchList {
  352. for _, v2 := range v.List {
  353. //模拟form表单请求
  354. url := "http://www.dce.com.cn/publicweb/quotesdata/memberDealPosiQuotes.html"
  355. method := "POST"
  356. payload := &bytes.Buffer{}
  357. writer := multipart.NewWriter(payload)
  358. _ = writer.WriteField("memberDealPosiQuotes.variety", v.CarietyCode)
  359. _ = writer.WriteField("memberDealPosiQuotes.trade_type", "0")
  360. _ = writer.WriteField("year", strconv.Itoa(year))
  361. _ = writer.WriteField("month", month)
  362. _ = writer.WriteField("day", dayStr)
  363. _ = writer.WriteField("contract.contract_id", v2.ContractId)
  364. _ = writer.WriteField("variety_id", v.CarietyCode)
  365. _ = writer.WriteField("currDate", currDate)
  366. err := writer.Close()
  367. fmt.Println(currDate, v.VarietyName, v2.ContractId)
  368. if err != nil {
  369. utils.FileLog.Info("获取指标失败:" + currDate + v.VarietyName + v2.ContractId)
  370. return err
  371. }
  372. client := &http.Client{}
  373. req, err := http.NewRequest(method, url, payload)
  374. if err != nil {
  375. return err
  376. }
  377. req.Header.Add("Cookie", "JSESSIONID=36ACF02A59227A3854F9D5D5E2FB5F2E; WMONID=R5ojcAIIcx-")
  378. req.Header.Set("Content-Type", writer.FormDataContentType())
  379. res, err := client.Do(req)
  380. if err != nil {
  381. return err
  382. }
  383. defer res.Body.Close()
  384. body, err := ioutil.ReadAll(res.Body)
  385. if err != nil {
  386. go utils.SendEmail(utils.APPNAME+"【"+utils.RunMode+"】"+"失败提醒", "RefreshDataFromDalian ErrMsg:"+err.Error()+"获取指标失败:"+currDate+v.VarietyName+v2.ContractId, utils.EmailSendToUsers)
  387. return err
  388. }
  389. err = DoHtml(string(body), v.VarietyName, v2.ContractId, timeDate)
  390. if err != nil {
  391. return err
  392. }
  393. }
  394. }
  395. return err
  396. }
  397. func DlIndexCodeGenerator(indexName, suffix string) string {
  398. ineIndexCode, _ := ineIndexCodeMap[indexName]
  399. if ineIndexCode == "" {
  400. ineIndexCode = fmt.Sprintf("DL%s", time.Now().Format(utils.FormatDateTimeUnSpace)+strconv.Itoa(utils.GetRandInt(1, 100))+suffix)
  401. ineIndexCodeMap[indexName] = ineIndexCode
  402. err := models.AddBaseFromTradeMapping(indexName, ineIndexCode, "DL")
  403. if err != nil {
  404. fmt.Println("add Code err:", err)
  405. }
  406. }
  407. return ineIndexCode
  408. }
  409. //rank,deal_short_name,deal_name,deal_code,deal_value,buy_short_name,deal_change,buy_name,buy_code,buy_value,buy_change,sold_short_name,sold_name,sold_code,sold_value,sold_change,frequency,classify_name,classify_type,create_time,modify_time,data_time