# coding:utf-8 import os import shutil import subprocess from time import sleep from bottle import route, run import datetime import hug import rarfile as rarfile from selenium import webdriver # 设置Chrome浏览器选项 from selenium.common import exceptions from selenium.webdriver.chrome.service import Service from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWait chrome_driver_path = r"D:/eta/chromedriver-win64/chromedriver.exe" chrome_address = r"127.0.0.1:9222" save_to_dir = r'D:/eta/yongyi_data/excel' new_dir = r'D:\eta\yongyi_data\excel\unread' rarfile.UNRAR_TOOL = r"D:\Program Files\WinRAR\UnRAR.exe" def rename_week_file(new_dir, rar_name): files = os.listdir(rar_name) for dir in files: file_path = os.path.join(rar_name, dir) if os.path.isdir(file_path): for f in os.listdir(file_path): print("f.title()" + f.title()) if f.title().find("周度数据") != -1: file_name = os.path.basename(f.title()) new_name = os.path.join(new_dir, file_name) file_name = file_name.lower() old_name = os.path.join(file_path, f) # os.rename(old_name, new_name) shutil.move(old_name, new_name) print(f'周度数据文件重命名成功, 旧文件名{old_name} 新文件名{new_name}') elif f.title().find("周度图表版图") != -1: file_name = os.path.basename(f.title()) file_name = file_name.lower() new_name = os.path.join(new_dir, file_name) old_name = os.path.join(file_path, f) # os.rename(old_name, new_name) shutil.move(old_name, new_name) print(f'周度图表版图文件重命名成功, 旧文件名{old_name} 新文件名{new_name}') else: print(dir.title() + "不是一个文件夹") return return def get_yongyi_driver(): # python+selunium定位已打开的浏览器 # 创建一个 Chrome WebDriver 实例 options = webdriver.ChromeOptions() # options.add_argument("headless") # options.add_argument('--headless') options.add_argument('--disable-gpu') options.add_argument('--no-sandbox') # 谷歌浏览器运行的默认调试端口:先用以下命令启动浏览器 # 找到谷歌浏览器的程序地址,开启一个新的端口,并设置一个文件夹来保存浏览器的数据 # /Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome --remote-debugging-port=9222 --user-data-dir=/Users/xiexiaoyuan/data/selenium/automationProfile # --remote-debugging-port值,可以指定任何打开的端口 # --user-data-dir标记,指定创建新Chrome配置文件的目录。它是为了确保在单独的配置文件中启动chrome,不会污染你的默认配置文件。 # # 此时会打开一个浏览器页面,我们输入目标网址,输入账号密码,登录成功。 # 登录之后,以后都不需要登录,它会把你这次登录的信息记入到 --user-data-dir指定的目录下 # 后面你只需要python + selenium + webdriver定位到这个已经登录的浏览器进行操作就可以啦 options.add_experimental_option("debuggerAddress", chrome_address) print("重新打开") options.add_argument('--user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, ' 'like Gecko) Chrome/118.0.5993.70 Safari/537.36') options.add_argument(" window-size=1920,1080") # s = Service(executable_path='/Users/xiexiaoyuan/chromedriver_mac64_114/chromedriver') s = Service(executable_path=chrome_driver_path) driver = webdriver.Chrome(service=s, options=options) driver.get('https://data.yongyizixun888.com/') return driver @hug.get('/yongyi/download') def yongyi_download(dayFlag, weekFlag): print("dayFlag:"+dayFlag) print("weekFlag:"+weekFlag) # 修改下载地址 # save_to_dir = '/Users/xiexiaoyuan/Downloads' # save_to_dir = r'D:/eta/yongyi_data/download' # 获取当前时间,并将其格式化为指定的形式 # new_dir = '/Users/xiexiaoyuan/Downloads/yongyi/unread' # new_dir = r'D:\eta\yongyi_data\yongyi\unread' try: try: driver = get_yongyi_driver() except Exception as e: print("Exception:") print(str(e)) if str(e).find("chrome not reachable") != -1: print("浏览器未开启") return False else: return False sleep(3) account = driver.find_element(By.XPATH, '//*[@id="dr_member_info"]/a[1]').text print(account) # 下载涌溢日度数据库 if dayFlag == '1': sleep(1) a = driver.find_element(By.XPATH, '/html/body/div[4]/div[1]/div[2]/div[2]/a') print(a.get_attribute("href")) a.click() sleep(30) # 下载涌溢完整数据库 if weekFlag == '1': sleep(2) b = driver.find_element(By.XPATH, '/html/body/div[4]/div[1]/div[2]/div[3]/a') print(b.get_attribute("href")) b.click() sleep(30) # 查找文件并重命名 os.chdir(save_to_dir) files = filter(os.path.isfile, os.listdir(save_to_dir)) files = [os.path.join(save_to_dir, f) for f in files] # add path to each file files.sort(key=lambda x: os.path.getmtime(x), reverse=True) day_file = "" week_file = "" i =0 for file in files: if i > 3: break print(file.title()) if file.title().find("涌益咨询日度数据") != -1: if day_file == "": day_file = file if file.title().find("涌益咨询周度数据") != -1: if week_file == "": week_file = file if day_file != "" and week_file != "": break i = i+1 if dayFlag == '1': if day_file != "": print(day_file.title()) file_name = os.path.basename(day_file.title()) file_name = file_name.lower() new_name = os.path.join(new_dir,file_name) print(f'新文件名{new_name}') print(f'旧文件名{day_file.title()}') # os.rename(day_file.title(), new_name) shutil.move(day_file.title(), new_name) print(f'日度文件重命名成功, 旧文件名{day_file.title()} 新文件名{new_name}') else: print("未找到日度下载文件") if weekFlag == '1': if week_file != "": print(week_file.title()) filename = week_file.title() index = filename.find(".Rar") rar_name = filename[:index] # 解压缩 rar_file = rarfile.RarFile(filename, 'r') rar_file.extractall(rar_name) rar_file.close() rename_week_file(new_dir, rar_name) else: print("未找到周度下载文件") return True except Exception as e: print("Exception:") print(str(e)) return False if __name__ == "__main__": app = __hug__.http.server() run(app=app, reloader=True, host='127.0.0.1', port=7010)