yongyi_pig_manual.py 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132
  1. # coding:utf-8
  2. import os
  3. import shutil
  4. from time import sleep
  5. import datetime
  6. import openpyxl
  7. import rarfile as rarfile
  8. import requests
  9. from selenium import webdriver
  10. # 设置Chrome浏览器选项
  11. from selenium.common import exceptions
  12. from selenium.webdriver.chrome.service import Service
  13. from selenium.webdriver.common.by import By
  14. from selenium.webdriver.support import expected_conditions as EC
  15. from selenium.webdriver.support.wait import WebDriverWait
  16. from imgcode_ak import image_code
  17. def rename_week_file(new_dir, current_time, rar_name):
  18. files = os.listdir(rar_name)
  19. for dir in files:
  20. file_path = os.path.join(rar_name, dir)
  21. if os.path.isdir(file_path):
  22. for f in os.listdir(file_path):
  23. print("f.title()"+f.title())
  24. if f.title().find("周度数据") != -1:
  25. new_name = f'{new_dir}/{current_time}_week.xlsx'
  26. old_name = os.path.join(file_path, f)
  27. # os.rename(old_name, new_name)
  28. shutil.copy(old_name, new_name)
  29. print(f'周度文件重命名成功, 旧文件名{old_name} 新文件名{new_name}')
  30. return
  31. else:
  32. print(dir.title()+"不是一个文件夹")
  33. return
  34. return
  35. if __name__ == "__main__":
  36. # python+selunium定位已打开的浏览器
  37. # 创建一个 Chrome WebDriver 实例
  38. options = webdriver.ChromeOptions()
  39. # options.add_argument("headless")
  40. # options.add_argument('--headless')
  41. options.add_argument('--disable-gpu')
  42. options.add_argument('--no-sandbox')
  43. # 谷歌浏览器运行的默认调试端口:先用以下命令启动浏览器
  44. # 找到谷歌浏览器的程序地址,开启一个新的端口,并设置一个文件夹来保存浏览器的数据
  45. # /Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome --remote-debugging-port=9222 --user-data-dir=/Users/xiexiaoyuan/data/selenium/automationProfile
  46. # --remote-debugging-port值,可以指定任何打开的端口
  47. # --user-data-dir标记,指定创建新Chrome配置文件的目录。它是为了确保在单独的配置文件中启动chrome,不会污染你的默认配置文件。
  48. #
  49. # 此时会打开一个浏览器页面,我们输入目标网址,输入账号密码,登录成功。
  50. # 登录之后,以后都不需要登录,它会把你这次登录的信息记入到 --user-data-dir指定的目录下
  51. # 后面你只需要python + selenium + webdriver定位到这个已经登录的浏览器进行操作就可以啦
  52. options.add_experimental_option("debuggerAddress", "127.0.0.1:9222")
  53. # 修改下载地址
  54. # save_to_dir = '/Users/xiexiaoyuan/Downloads'
  55. save_to_dir = r'D:/eta/yongyi_data/download'
  56. options.add_argument("--download.default_directory=" + save_to_dir)
  57. options.add_argument('--user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, '
  58. 'like Gecko) Chrome/118.0.5993.70 Safari/537.36')
  59. options.add_argument(" window-size=1920,1080")
  60. # s = Service(executable_path='/Users/xiexiaoyuan/chromedriver_mac64_114/chromedriver')
  61. # s = Service(executable_path='D:/download/chromedriver119-win64/chromedriver.exe')
  62. s = Service(executable_path='E:/chromedriver-win64/chromedriver.exe')
  63. driver = webdriver.Chrome(service=s, options=options)
  64. # driver.maximize_window()
  65. driver.get('https://data.yongyizixun888.com/')
  66. sleep(3)
  67. account = driver.find_element(By.XPATH, '//*[@id="dr_member_info"]/a[1]').text
  68. print(account)
  69. # 下载涌溢日度数据库
  70. sleep(1)
  71. a = driver.find_element(By.XPATH, '/html/body/div[4]/div[1]/div[2]/div[2]/a')
  72. print(a.get_attribute("href"))
  73. a.click()
  74. # 下载涌溢完整数据库
  75. sleep(2)
  76. b = driver.find_element(By.XPATH, '/html/body/div[4]/div[1]/div[2]/div[3]/a')
  77. print(b.get_attribute("href"))
  78. b.click()
  79. sleep(30)
  80. # 获取当前时间,并将其格式化为指定的形式
  81. # new_dir = '/Users/xiexiaoyuan/Downloads/yongyi'
  82. new_dir = r'D:\eta\yongyi_data\yongyi'
  83. current_time = datetime.datetime.now().strftime("%Y-%m-%d")
  84. # 查找文件并重命名
  85. os.chdir(save_to_dir)
  86. files = filter(os.path.isfile, os.listdir(save_to_dir))
  87. files = [os.path.join(save_to_dir, f) for f in files] # add path to each file
  88. files.sort(key=lambda x: os.path.getmtime(x))
  89. day_file = ""
  90. week_file = ""
  91. for file in files:
  92. if file.title().find("涌益咨询日度数据") != -1:
  93. if day_file == "":
  94. day_file = file
  95. if file.title().find("涌益咨询周度数据") != -1:
  96. if week_file == "":
  97. week_file = file
  98. if day_file != "" and week_file != "":
  99. break
  100. if day_file != "":
  101. print(day_file.title())
  102. new_name = f'{new_dir}/{current_time}_day.xlsx'
  103. # os.rename(day_file.title(), new_name)
  104. shutil.copy(day_file.title(), new_name)
  105. print(f'日度文件重命名成功, 旧文件名{day_file.title()} 新文件名{new_name}')
  106. else:
  107. print("未找到日度下载文件")
  108. if week_file != "":
  109. print(week_file.title())
  110. filename = week_file.title()
  111. index = filename.find(".Rar")
  112. rar_name = filename[:index]
  113. # 解压缩
  114. rar_file = rarfile.RarFile(filename, 'r')
  115. rar_file.extractall(rar_name)
  116. rar_file.close()
  117. rename_week_file(new_dir, current_time, rar_name)
  118. else:
  119. print("未找到周度下载文件")