|
@@ -1,152 +1,178 @@
|
|
|
-# coding:utf-8
|
|
|
-import os
|
|
|
-import shutil
|
|
|
-from time import sleep
|
|
|
-from bottle import route, run
|
|
|
-import datetime
|
|
|
-
|
|
|
-import hug
|
|
|
-import rarfile as rarfile
|
|
|
-from selenium import webdriver
|
|
|
-
|
|
|
-# 设置Chrome浏览器选项
|
|
|
-from selenium.common import exceptions
|
|
|
-from selenium.webdriver.chrome.service import Service
|
|
|
-from selenium.webdriver.common.by import By
|
|
|
-from selenium.webdriver.support import expected_conditions as EC
|
|
|
-from selenium.webdriver.support.wait import WebDriverWait
|
|
|
-
|
|
|
-
|
|
|
-def rename_week_file(new_dir, current_time, rar_name):
|
|
|
- files = os.listdir(rar_name)
|
|
|
- for dir in files:
|
|
|
- file_path = os.path.join(rar_name, dir)
|
|
|
- if os.path.isdir(file_path):
|
|
|
- for f in os.listdir(file_path):
|
|
|
- print("f.title()" + f.title())
|
|
|
- if f.title().find("周度数据") != -1:
|
|
|
- new_name = f'{new_dir}/{current_time}_week.xlsx'
|
|
|
- old_name = os.path.join(file_path, f)
|
|
|
- # os.rename(old_name, new_name)
|
|
|
- shutil.copy(old_name, new_name)
|
|
|
- print(f'周度文件重命名成功, 旧文件名{old_name} 新文件名{new_name}')
|
|
|
- return
|
|
|
- else:
|
|
|
- print(dir.title() + "不是一个文件夹")
|
|
|
- return
|
|
|
- return
|
|
|
-
|
|
|
-@hug.get('/yongyi/download')
|
|
|
-def yongyi_download(dayFlag, weekFlag):
|
|
|
- print("dayFlag:"+dayFlag)
|
|
|
- print("weekFlag:"+weekFlag)
|
|
|
-
|
|
|
- try:
|
|
|
- # python+selunium定位已打开的浏览器
|
|
|
- # 创建一个 Chrome WebDriver 实例
|
|
|
- options = webdriver.ChromeOptions()
|
|
|
- # options.add_argument("headless")
|
|
|
- # options.add_argument('--headless')
|
|
|
- options.add_argument('--disable-gpu')
|
|
|
- options.add_argument('--no-sandbox')
|
|
|
- # 谷歌浏览器运行的默认调试端口:先用以下命令启动浏览器
|
|
|
- # 找到谷歌浏览器的程序地址,开启一个新的端口,并设置一个文件夹来保存浏览器的数据
|
|
|
- # /Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome --remote-debugging-port=9222 --user-data-dir=/Users/xiexiaoyuan/data/selenium/automationProfile
|
|
|
- # --remote-debugging-port值,可以指定任何打开的端口
|
|
|
- # --user-data-dir标记,指定创建新Chrome配置文件的目录。它是为了确保在单独的配置文件中启动chrome,不会污染你的默认配置文件。
|
|
|
- #
|
|
|
- # 此时会打开一个浏览器页面,我们输入目标网址,输入账号密码,登录成功。
|
|
|
- # 登录之后,以后都不需要登录,它会把你这次登录的信息记入到 --user-data-dir指定的目录下
|
|
|
- # 后面你只需要python + selenium + webdriver定位到这个已经登录的浏览器进行操作就可以啦
|
|
|
- options.add_experimental_option("debuggerAddress", "127.0.0.1:9222")
|
|
|
- # 修改下载地址
|
|
|
- # save_to_dir = '/Users/xiexiaoyuan/Downloads'
|
|
|
- save_to_dir = r'D:/eta/yongyi_data/download'
|
|
|
- options.add_argument("--download.default_directory=" + save_to_dir)
|
|
|
- options.add_argument('--user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, '
|
|
|
- 'like Gecko) Chrome/118.0.5993.70 Safari/537.36')
|
|
|
- options.add_argument(" window-size=1920,1080")
|
|
|
-
|
|
|
- # s = Service(executable_path='/Users/xiexiaoyuan/chromedriver_mac64_114/chromedriver')
|
|
|
- # s = Service(executable_path='D:/download/chromedriver119-win64/chromedriver.exe')
|
|
|
- s = Service(executable_path='D:/eta/chromedriver-win64/chromedriver.exe')
|
|
|
- driver = webdriver.Chrome(service=s, options=options)
|
|
|
- driver.get('https://data.yongyizixun888.com/')
|
|
|
- sleep(3)
|
|
|
-
|
|
|
- account = driver.find_element(By.XPATH, '//*[@id="dr_member_info"]/a[1]').text
|
|
|
- print(account)
|
|
|
-
|
|
|
- # 下载涌溢日度数据库
|
|
|
- if dayFlag == '1':
|
|
|
- sleep(1)
|
|
|
- a = driver.find_element(By.XPATH, '/html/body/div[4]/div[1]/div[2]/div[2]/a')
|
|
|
- print(a.get_attribute("href"))
|
|
|
- a.click()
|
|
|
- sleep(30)
|
|
|
-
|
|
|
- # 下载涌溢完整数据库
|
|
|
- if weekFlag == '1':
|
|
|
- sleep(2)
|
|
|
- b = driver.find_element(By.XPATH, '/html/body/div[4]/div[1]/div[2]/div[3]/a')
|
|
|
- print(b.get_attribute("href"))
|
|
|
- b.click()
|
|
|
- sleep(30)
|
|
|
-
|
|
|
- # 获取当前时间,并将其格式化为指定的形式
|
|
|
- # new_dir = '/Users/xiexiaoyuan/Downloads/yongyi'
|
|
|
- new_dir = r'D:/eta/yongyi_data/yongyi'
|
|
|
- current_time = datetime.datetime.now().strftime("%Y-%m-%d")
|
|
|
- current_time_full = datetime.datetime.now().strftime('%Y{y}%m{m}%d{d}').format(y='年', m='月', d='日')
|
|
|
- print(current_time_full)
|
|
|
-
|
|
|
- # 查找文件并重命名
|
|
|
- os.chdir(save_to_dir)
|
|
|
- files = filter(os.path.isfile, os.listdir(save_to_dir))
|
|
|
- files = [os.path.join(save_to_dir, f) for f in files] # add path to each file
|
|
|
- files.sort(key=lambda x: os.path.getmtime(x), reverse=True)
|
|
|
- day_file = ""
|
|
|
- week_file = ""
|
|
|
- for file in files:
|
|
|
- day_name = f'{current_time_full}涌益咨询日度数据'
|
|
|
- if file.title().find(day_name) != -1:
|
|
|
- if day_file == "":
|
|
|
- day_file = file
|
|
|
- if file.title().find("涌益咨询周度数据") != -1:
|
|
|
- if week_file == "":
|
|
|
- week_file = file
|
|
|
- if day_file != "" and week_file != "":
|
|
|
- break
|
|
|
-
|
|
|
- if dayFlag == '1':
|
|
|
- if day_file != "":
|
|
|
- print(day_file.title())
|
|
|
- new_name = f'{new_dir}/{current_time}_day.xlsx'
|
|
|
- # os.rename(day_file.title(), new_name)
|
|
|
- shutil.copy(day_file.title(), new_name)
|
|
|
- print(f'日度文件重命名成功, 旧文件名{day_file.title()} 新文件名{new_name}')
|
|
|
- else:
|
|
|
- print("未找到日度下载文件")
|
|
|
-
|
|
|
- if weekFlag == '1':
|
|
|
- if week_file != "":
|
|
|
- print(week_file.title())
|
|
|
- filename = week_file.title()
|
|
|
- index = filename.find(".Rar")
|
|
|
- rar_name = filename[:index]
|
|
|
- # 解压缩
|
|
|
- rar_file = rarfile.RarFile(filename, 'r')
|
|
|
- rar_file.extractall(rar_name)
|
|
|
- rar_file.close()
|
|
|
- rename_week_file(new_dir, current_time, rar_name)
|
|
|
- else:
|
|
|
- print("未找到周度下载文件")
|
|
|
- return True
|
|
|
- except Exception as e:
|
|
|
- print("Exception:")
|
|
|
- print(str(e))
|
|
|
- return False
|
|
|
-
|
|
|
-if __name__ == "__main__":
|
|
|
- app = __hug__.http.server()
|
|
|
- run(app=app, reloader=True, host='127.0.0.1', port=7010)
|
|
|
+# coding:utf-8
|
|
|
+import os
|
|
|
+import shutil
|
|
|
+import subprocess
|
|
|
+from time import sleep
|
|
|
+from bottle import route, run
|
|
|
+import datetime
|
|
|
+
|
|
|
+import hug
|
|
|
+import rarfile as rarfile
|
|
|
+from selenium import webdriver
|
|
|
+
|
|
|
+# 设置Chrome浏览器选项
|
|
|
+from selenium.common import exceptions
|
|
|
+from selenium.webdriver.chrome.service import Service
|
|
|
+from selenium.webdriver.common.by import By
|
|
|
+from selenium.webdriver.support import expected_conditions as EC
|
|
|
+from selenium.webdriver.support.wait import WebDriverWait
|
|
|
+
|
|
|
+
|
|
|
+def rename_week_file(new_dir, rar_name):
|
|
|
+ files = os.listdir(rar_name)
|
|
|
+ for dir in files:
|
|
|
+ file_path = os.path.join(rar_name, dir)
|
|
|
+ if os.path.isdir(file_path):
|
|
|
+ for f in os.listdir(file_path):
|
|
|
+ print("f.title()" + f.title())
|
|
|
+ if f.title().find("周度数据") != -1:
|
|
|
+ file_name = f.title().split('/')[-1]
|
|
|
+ new_name = f'{new_dir}/{file_name}'
|
|
|
+ old_name = os.path.join(file_path, f)
|
|
|
+ new_name = new_name.lower()
|
|
|
+ # os.rename(old_name, new_name)
|
|
|
+ shutil.copy(old_name, new_name)
|
|
|
+ print(f'周度数据文件重命名成功, 旧文件名{old_name} 新文件名{new_name}')
|
|
|
+ elif f.title().find("周度图表版图") != -1:
|
|
|
+ file_name = f.title().split('/')[-1]
|
|
|
+ new_name = f'{new_dir}/{file_name}'
|
|
|
+ old_name = os.path.join(file_path, f)
|
|
|
+ new_name = new_name.lower()
|
|
|
+ # os.rename(old_name, new_name)
|
|
|
+ shutil.copy(old_name, new_name)
|
|
|
+ print(f'周度图表版图文件重命名成功, 旧文件名{old_name} 新文件名{new_name}')
|
|
|
+ else:
|
|
|
+ print(dir.title() + "不是一个文件夹")
|
|
|
+ return
|
|
|
+ return
|
|
|
+
|
|
|
+
|
|
|
+def get_yongyi_driver():
|
|
|
+ # python+selunium定位已打开的浏览器
|
|
|
+ # 创建一个 Chrome WebDriver 实例
|
|
|
+ options = webdriver.ChromeOptions()
|
|
|
+ # options.add_argument("headless")
|
|
|
+ # options.add_argument('--headless')
|
|
|
+ options.add_argument('--disable-gpu')
|
|
|
+ options.add_argument('--no-sandbox')
|
|
|
+ # 谷歌浏览器运行的默认调试端口:先用以下命令启动浏览器
|
|
|
+ # 找到谷歌浏览器的程序地址,开启一个新的端口,并设置一个文件夹来保存浏览器的数据
|
|
|
+ # /Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome --remote-debugging-port=9222 --user-data-dir=/Users/xiexiaoyuan/data/selenium/automationProfile
|
|
|
+ # --remote-debugging-port值,可以指定任何打开的端口
|
|
|
+ # --user-data-dir标记,指定创建新Chrome配置文件的目录。它是为了确保在单独的配置文件中启动chrome,不会污染你的默认配置文件。
|
|
|
+ #
|
|
|
+ # 此时会打开一个浏览器页面,我们输入目标网址,输入账号密码,登录成功。
|
|
|
+ # 登录之后,以后都不需要登录,它会把你这次登录的信息记入到 --user-data-dir指定的目录下
|
|
|
+ # 后面你只需要python + selenium + webdriver定位到这个已经登录的浏览器进行操作就可以啦
|
|
|
+ options.add_experimental_option("debuggerAddress", "127.0.0.1:9222")
|
|
|
+
|
|
|
+ print("重新打开")
|
|
|
+ options.add_argument('--user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, '
|
|
|
+ 'like Gecko) Chrome/118.0.5993.70 Safari/537.36')
|
|
|
+ options.add_argument(" window-size=1920,1080")
|
|
|
+
|
|
|
+ # s = Service(executable_path='/Users/xiexiaoyuan/chromedriver_mac64_114/chromedriver')
|
|
|
+ # s = Service(executable_path='D:/download/chromedriver119-win64/chromedriver.exe')
|
|
|
+ s = Service(executable_path='D:/eta/chromedriver-win64/chromedriver.exe')
|
|
|
+ driver = webdriver.Chrome(service=s, options=options)
|
|
|
+ driver.get('https://data.yongyizixun888.com/')
|
|
|
+
|
|
|
+ return driver
|
|
|
+
|
|
|
+@hug.get('/yongyi/download')
|
|
|
+def yongyi_download(dayFlag, weekFlag):
|
|
|
+ print("dayFlag:"+dayFlag)
|
|
|
+ print("weekFlag:"+weekFlag)
|
|
|
+ # 修改下载地址
|
|
|
+ # save_to_dir = '/Users/xiexiaoyuan/Downloads'
|
|
|
+ save_to_dir = r'D:/eta/yongyi_data/download'
|
|
|
+ # 获取当前时间,并将其格式化为指定的形式
|
|
|
+ # new_dir = '/Users/xiexiaoyuan/Downloads/yongyi/unread'
|
|
|
+ new_dir = r'D:\eta\yongyi_data\unread'
|
|
|
+ try:
|
|
|
+ try:
|
|
|
+ driver = get_yongyi_driver()
|
|
|
+ except Exception as e:
|
|
|
+ print("Exception:")
|
|
|
+ print(str(e))
|
|
|
+ if str(e).find("chrome not reachable") != -1:
|
|
|
+ print("浏览器未开启")
|
|
|
+ return False
|
|
|
+ else:
|
|
|
+ return False
|
|
|
+ sleep(3)
|
|
|
+ account = driver.find_element(By.XPATH, '//*[@id="dr_member_info"]/a[1]').text
|
|
|
+ print(account)
|
|
|
+
|
|
|
+ # 下载涌溢日度数据库
|
|
|
+ if dayFlag == '1':
|
|
|
+ sleep(1)
|
|
|
+ a = driver.find_element(By.XPATH, '/html/body/div[4]/div[1]/div[2]/div[2]/a')
|
|
|
+ print(a.get_attribute("href"))
|
|
|
+ a.click()
|
|
|
+ sleep(30)
|
|
|
+
|
|
|
+ # 下载涌溢完整数据库
|
|
|
+ if weekFlag == '1':
|
|
|
+ sleep(2)
|
|
|
+ b = driver.find_element(By.XPATH, '/html/body/div[4]/div[1]/div[2]/div[3]/a')
|
|
|
+ print(b.get_attribute("href"))
|
|
|
+ b.click()
|
|
|
+ sleep(30)
|
|
|
+
|
|
|
+ # 查找文件并重命名
|
|
|
+ os.chdir(save_to_dir)
|
|
|
+ files = filter(os.path.isfile, os.listdir(save_to_dir))
|
|
|
+ files = [os.path.join(save_to_dir, f) for f in files] # add path to each file
|
|
|
+ files.sort(key=lambda x: os.path.getmtime(x), reverse=True)
|
|
|
+ day_file = ""
|
|
|
+ week_file = ""
|
|
|
+ i =0
|
|
|
+ for file in files:
|
|
|
+ if i > 3:
|
|
|
+ break
|
|
|
+ print(file.title())
|
|
|
+ if file.title().find("涌益咨询日度数据") != -1:
|
|
|
+ if day_file == "":
|
|
|
+ day_file = file
|
|
|
+ if file.title().find("涌益咨询周度数据") != -1:
|
|
|
+ if week_file == "":
|
|
|
+ week_file = file
|
|
|
+ if day_file != "" and week_file != "":
|
|
|
+ break
|
|
|
+ i = i+1
|
|
|
+ if dayFlag == '1':
|
|
|
+ if day_file != "":
|
|
|
+ print(day_file.title())
|
|
|
+ # 获取文件名
|
|
|
+ file_name = day_file.title().split('/')[-1]
|
|
|
+ new_name = f'{new_dir}/{file_name}'
|
|
|
+ new_name = new_name.lower()
|
|
|
+ # os.rename(day_file.title(), new_name)
|
|
|
+ shutil.copy(day_file.title(), new_name)
|
|
|
+ print(f'日度文件重命名成功, 旧文件名{day_file.title()} 新文件名{new_name}')
|
|
|
+ else:
|
|
|
+ print("未找到日度下载文件")
|
|
|
+
|
|
|
+ if weekFlag == '1':
|
|
|
+ if week_file != "":
|
|
|
+ print(week_file.title())
|
|
|
+ filename = week_file.title()
|
|
|
+ index = filename.find(".Rar")
|
|
|
+ rar_name = filename[:index]
|
|
|
+ # 解压缩
|
|
|
+ rar_file = rarfile.RarFile(filename, 'r')
|
|
|
+ rar_file.extractall(rar_name)
|
|
|
+ rar_file.close()
|
|
|
+ rename_week_file(new_dir, rar_name)
|
|
|
+ else:
|
|
|
+ print("未找到周度下载文件")
|
|
|
+ return True
|
|
|
+ except Exception as e:
|
|
|
+ print("Exception:")
|
|
|
+ print(str(e))
|
|
|
+ return False
|
|
|
+
|
|
|
+if __name__ == "__main__":
|
|
|
+ app = __hug__.http.server()
|
|
|
+ run(app=app, reloader=True, host='127.0.0.1', port=7010)
|