|
@@ -1,7 +1,9 @@
|
|
|
# coding:utf-8
|
|
|
+import os
|
|
|
from time import sleep
|
|
|
import datetime
|
|
|
import openpyxl
|
|
|
+import rarfile as rarfile
|
|
|
import requests
|
|
|
from selenium import webdriver
|
|
|
|
|
@@ -14,74 +16,19 @@ from selenium.webdriver.support.wait import WebDriverWait
|
|
|
|
|
|
from imgcode_ak import image_code
|
|
|
|
|
|
-"""
|
|
|
-根据table的id属性和table中的某一个元素定位其在table中的位置
|
|
|
-table包括表头,位置坐标都是从1开始算
|
|
|
-tableId:table的id属性
|
|
|
-queryContent:需要确定位置的内容
|
|
|
-"""
|
|
|
-
|
|
|
-
|
|
|
-def get_table_content(driver, tableId, queryContent):
|
|
|
- # 按行查询表格的数据,取出的数据是一整行,按空格分隔每一列的数据
|
|
|
- table_tr_list = driver.find_element(By.ID, tableId).find_elements(By.TAG_NAME, "tr")
|
|
|
- table_list = [] # 存放table数据
|
|
|
- for tr in table_tr_list: # 遍历每一个tr
|
|
|
- # 将每一个tr的数据根据td查询出来,返回结果为list对象
|
|
|
- table_td_list = tr.find_elements(By.TAG_NAME, "td")
|
|
|
- row_list = []
|
|
|
- print(table_td_list)
|
|
|
- for td in table_td_list: # 遍历每一个td
|
|
|
- row_list.append(td.text) # 取出表格的数据,并放入行列表里
|
|
|
- table_list.append(row_list)
|
|
|
-
|
|
|
- # 循环遍历table数据,确定查询数据的位置
|
|
|
- # for i in range(len(table_list)):
|
|
|
- # for j in range(len(table_list[i])):
|
|
|
- # if queryContent == table_list[i][j]:
|
|
|
- # print("%r坐标为(%r,%r)" % (queryContent, i + 1, j + 1))
|
|
|
-
|
|
|
-
|
|
|
-# 写入文件
|
|
|
-def write_excel_xlsx(path, sheet_name, value):
|
|
|
- index = len(value) # 列表中所含元组的个数,从而确定写入Excel的行数
|
|
|
- # 打开Excel
|
|
|
- wb = openpyxl.Workbook()
|
|
|
- # wb = load_workbook(path)
|
|
|
- sheet = wb.active # 获得一个的工作表
|
|
|
- sheet.title = sheet_name
|
|
|
- # 设置格式
|
|
|
- sheet.column_dimensions['B'].width = 115
|
|
|
- # 按行加入
|
|
|
- for i in range(index):
|
|
|
- sheet.append(value[i])
|
|
|
- # 保存文件
|
|
|
- print(sheet.values)
|
|
|
- wb.save(path)
|
|
|
- print("题目写入数据成功!")
|
|
|
-
|
|
|
-
|
|
|
-def send_file(url, file_path):
|
|
|
- with open(file_path, 'rb') as file:
|
|
|
- files = {'file': file}
|
|
|
- response2 = requests.post(url, files=files)
|
|
|
- return response2
|
|
|
-
|
|
|
-
|
|
|
-def get_element(my_driver, xpaths):
|
|
|
- """
|
|
|
- 判断是否存在元素并获取元素对象
|
|
|
- :param my_driver:
|
|
|
- :param xpaths: xpaths表达式
|
|
|
- :return: 元素对象或为空
|
|
|
- """
|
|
|
- try:
|
|
|
- target = my_driver.find_element(By.XPATH, xpaths)
|
|
|
- except exceptions.NoSuchElementException:
|
|
|
- return False
|
|
|
- else:
|
|
|
- return target
|
|
|
-
|
|
|
+def rename_week_file(new_dir, current_time, rar_name):
|
|
|
+ files = os.listdir(rar_name)
|
|
|
+ for dir in files:
|
|
|
+ print(dir.title())
|
|
|
+ if os.path.isdir(dir.title()):
|
|
|
+ dir_list = os.listdir(rar_name + "/" + dir.title())
|
|
|
+ for f in dir_list:
|
|
|
+ print(f.title())
|
|
|
+ if f.title().find("周度数据") != -1:
|
|
|
+ new_name = f'{new_dir}/{current_time}_week.xlsx'
|
|
|
+ os.rename(rar_name + "/" + dir.title() + "/" + f.title(), new_name)
|
|
|
+ return
|
|
|
+ return
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
# python+selunium定位已打开的浏览器
|
|
@@ -102,13 +49,15 @@ if __name__ == "__main__":
|
|
|
# 后面你只需要python + selenium + webdriver定位到这个已经登录的浏览器进行操作就可以啦
|
|
|
options.add_experimental_option("debuggerAddress", "127.0.0.1:9222")
|
|
|
# 修改下载地址
|
|
|
- options.add_argument("--download.default_directory=/Users/xiexiaoyuan/Downloads/")
|
|
|
+ # save_to_dir = '/Users/xiexiaoyuan/Downloads'
|
|
|
+ save_to_dir = r'D:\download\excel'
|
|
|
+ options.add_argument("--download.default_directory="+save_to_dir)
|
|
|
options.add_argument('--user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, '
|
|
|
'like Gecko) Chrome/118.0.5993.70 Safari/537.36')
|
|
|
options.add_argument(" window-size=1920,1080")
|
|
|
|
|
|
- s = Service(executable_path='/Users/xiexiaoyuan/chromedriver_mac64_114/chromedriver')
|
|
|
- # s = Service(executable_path='/Users/xi/Desktop/chromedriver')
|
|
|
+ # s = Service(executable_path='/Users/xiexiaoyuan/chromedriver_mac64_114/chromedriver')
|
|
|
+ s = Service(executable_path='D:\download\chromedriver119-win64\chromedriver.exe')
|
|
|
driver = webdriver.Chrome(service=s, options=options)
|
|
|
# driver.maximize_window()
|
|
|
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
|
|
@@ -131,10 +80,44 @@ if __name__ == "__main__":
|
|
|
a.click()
|
|
|
|
|
|
# 下载涌溢完整数据库
|
|
|
- sleep(1)
|
|
|
+ sleep(2)
|
|
|
b = driver.find_element(By.XPATH, '/html/body/div[4]/div[1]/div[2]/div[3]/a')
|
|
|
print(b.get_attribute("href"))
|
|
|
b.click()
|
|
|
- sleep(10)
|
|
|
- # WebDriverWait(driver, 10).until(
|
|
|
- # EC.element_to_be_clickable((By.XPATH, '/html/body/div[4]/div[1]/div[2]/div[2]/a'))).click()
|
|
|
+ sleep(30)
|
|
|
+
|
|
|
+ # 获取当前时间,并将其格式化为指定的形式
|
|
|
+ current_time = datetime.datetime.now().strftime("%Y-%m-%d")
|
|
|
+ # 查找文件并重命名
|
|
|
+ os.chdir(save_to_dir)
|
|
|
+ files = filter(os.path.isfile, os.listdir(save_to_dir))
|
|
|
+ files = [os.path.join(save_to_dir, f) for f in files] # add path to each file
|
|
|
+ files.sort(key=lambda x: os.path.getmtime(x))
|
|
|
+ day_file = files[-1]
|
|
|
+ # new_dir = '/Users/xiexiaoyuan/Downloads/yongyi'
|
|
|
+ new_dir = r'D:\data\yongyi'
|
|
|
+ if day_file.title().find("日度") == -1:
|
|
|
+ day_file = files[-2]
|
|
|
+
|
|
|
+ if day_file.title().find("日度") != -1:
|
|
|
+ new_name = f'{new_dir}/{current_time}_day.xlsx'
|
|
|
+ os.rename(day_file.title(), new_name)
|
|
|
+ else:
|
|
|
+ print("未找到日度下载文件")
|
|
|
+
|
|
|
+ week_file = files[-2]
|
|
|
+ if week_file.title().find("周度") == -1:
|
|
|
+ week_file = files[-1]
|
|
|
+ print(week_file.title())
|
|
|
+ if week_file.title().find("周度") != -1:
|
|
|
+ filename = week_file.title()
|
|
|
+ index = filename.find(".Rar")
|
|
|
+ rar_name = filename[:index]
|
|
|
+ # 解压缩
|
|
|
+ rar_file = rarfile.RarFile(filename, 'r')
|
|
|
+ rar_file.extractall(rar_name)
|
|
|
+ rar_file.close()
|
|
|
+
|
|
|
+ rename_week_file(new_dir, current_time, rar_name)
|
|
|
+ else:
|
|
|
+ print("未找到周度下载文件")
|