فهرست منبع

Merge remote-tracking branch 'origin/master'

Roc 1 سال پیش
والد
کامیت
9a60206424
6فایلهای تغییر یافته به همراه228 افزوده شده و 165 حذف شده
  1. 79 8
      eta_report2img/html2img_ping.py
  2. 2 5
      eta_report2img/img_pdf_png.py
  3. 2 1
      html2img/main.py
  4. 9 27
      wind_api.py
  5. 2 2
      yongyi_pig/yongyi_pig.py
  6. 134 122
      yongyi_pig/yongyi_pig_manual.py

+ 79 - 8
eta_report2img/html2img_ping.py

@@ -3,6 +3,8 @@ from time import sleep
 from selenium import webdriver
 from selenium.webdriver.chrome.service import Service
 import img_pdf_png
+import os
+from PIL import Image
 
 
 def scroll_page(driver):
@@ -19,7 +21,7 @@ def scroll_page(driver):
         for j in range(n1):
             # print(r"j:{}".format(j))
             if j == n1 - 1 and last_height > 0:  # 截取尾部
-                driver.execute_script("window.scrollBy(0,{})".format(page_height))  # 滚动条滚到底
+                driver.execute_script("window.scrollBy(0,{})".format(last_height))  # 滚动条滚到底
                 print("滚到底了")
             else:
                 driver.execute_script("window.scrollBy(0,{})".format(web_height))
@@ -45,26 +47,95 @@ def html2img(driver, image_folder):
     print("当前窗口网页高度{}".format(web_height))
     last_height = page_height % web_height
     print("底部多出来的部分{}".format(last_height))
-    margin_flag = False
+
+    is_end = 0
     if n1 == 0 or (n1 == 1 and last_height == 0):  # 判断是否需要滚动
-        driver.get_screenshot_as_file(r'{}/{}.png'.format(image_folder, '0'))  # 指定截图保存位置
+        # 去除留白部分, 一般情况下也不会只写一页报告且留那么多空白
+        crop_height = web_height - page_height
+        if crop_height > 0:
+            origin_last_img = r'{}/{}.png'.format(image_folder, 1000)
+            new_last_img = r'{}/{}.png'.format(image_folder, 0)
+            driver.save_screenshot(origin_last_img)
+            crop_img_botton(origin_last_img, new_last_img, page_height)
+        else:
+            driver.get_screenshot_as_file(r'{}/{}.png'.format(image_folder, '0'))
     else:
         j = 0
         while j <= n1:
             if j == 0:
                 driver.execute_script("window.scrollBy(0,0)")
                 # print(0)
-            elif j == n1 and last_height > 0:  # 截取尾部
-                driver.execute_script("$('#app #resetcss').css('margin-bottom', '{}px')".format(web_height))
-                driver.execute_script("window.scrollBy(0,{})".format(web_height))  # 滚动条滚到底
+            elif j == n1:
+                # print(f'n1 is {n1}')
+                if last_height > 0:
+                    # driver.execute_script("$('#app #resetcss').css('margin-bottom', '{}px')".format(web_height))
+                    driver.execute_script("window.scrollBy(0,{})".format(last_height))  # 滚动条滚到底
+
+                    # 最后一屏先进行一次保存
+                    sleep(1)
+                    origin_last_img = r'{}/{}.png'.format(image_folder, j + 1000)
+                    driver.save_screenshot(origin_last_img)
+
+                    # 截取上面重叠的一部分并保存截取后的图片
+                    new_last_img = r'{}/{}.png'.format(image_folder, j)
+                    crop_height = web_height - last_height
+                    crop_img_top(origin_last_img, new_last_img, crop_height)
+
+                is_end = 1
                 print("拉到底拉")
             else:
                 driver.execute_script("window.scrollBy(0,{})".format(web_height))
-            sleep(1)
-            driver.save_screenshot(r'{}/{}.png'.format(image_folder, j))  # 截屏
+
+            if is_end == 0:
+                sleep(1)
+                driver.save_screenshot(r'{}/{}.png'.format(image_folder, j))  # 截屏
             j = j + 1
 
 
+# crop_img_top 裁掉图片上面的部分保留剩下的部分
+def crop_img_top(image_path, output_path, crop_height):
+    # 打开图像文件
+    img = Image.open(image_path)
+
+    # 获取图像的宽度和高度
+    width, height = img.size
+
+    # 计算裁剪的起始坐标
+    start_x = 0
+    start_y = min(height, crop_height)  # 保证不超出图像下边界
+
+    # 裁剪图像,仅保留上半部分
+    cropped_img = img.crop((start_x, start_y, start_x + width, start_y + (height - crop_height)))
+
+    # 保存裁剪后的图像
+    cropped_img.save(output_path)
+
+    # 删除原始图像
+    os.remove(image_path)
+
+
+# crop_img_botton 裁掉图片下面的部分保留上面的部分
+def crop_img_botton(image_path, output_path, keep_height):
+    # 打开图像文件
+    img = Image.open(image_path)
+
+    # 获取图像的宽度和高度
+    width, height = img.size
+
+    # 计算保留部分的起始坐标
+    start_x = 0
+    start_y = 0
+
+    # 裁剪图像,保留从左上角开始指定高度的部分
+    cropped_img = img.crop((start_x, start_y, start_x + width, start_y + keep_height))
+
+    # 保存裁剪后的图像
+    cropped_img.save(output_path)
+
+    # 删除原始图像
+    os.remove(image_path)
+
+
 # 调用截图函数
 if __name__ == "__main__":
     # 创建一个 Chrome WebDriver 实例

+ 2 - 5
eta_report2img/img_pdf_png.py

@@ -35,13 +35,10 @@ def merge_images(image_folder, output_file, file_name, output_type, n=1):
     now_height = 0
     for i in range(image_count):
         img = Image.open(os.path.join(image_folder, target_img[i]))
-        img_size0 = int(img.size[0] / n)
-        img_size1 = int(img.size[1] / n)
-        img = img.resize((img_size0, img_size1))
-        if i != 0:
-            now_height += img.size[1]
         # 纵向拼接图片
         new_img.paste(img, (0, now_height))
+        # 当前粘贴高度自增已拼接的图片高度
+        now_height += img.size[1]
 
     # 默认两种类型都生成
     if output_type == "":

+ 2 - 1
html2img/main.py

@@ -16,7 +16,8 @@ host = '127.0.0.1'
 debug = True
 
 # 工具配置
-tool_path = r'E:\wkhtmltopdf\bin\wkhtmltoimage.exe'
+tool_path = r'/usr/local/bin/wkhtmltoimage'
+# tool_path = r'E:\wkhtmltopdf\bin\wkhtmltoimage.exe'
 
 # OSS配置
 oss_end_point = 'oss-cn-shanghai.aliyuncs.com'

+ 9 - 27
wind_api.py

@@ -49,9 +49,9 @@ def GetEdbDataByWind(EdbCode, StartDate, EndDate):
     return result
 
 
-@hug.get('/edbInfo/wind/future_good')
-def GetFutureGoodEdbDataByWind(FutureGoodEdbCode, StartDate, EndDate):
-    print("GetFutureGoodEdbDataByWind:", FutureGoodEdbCode)
+@hug.get('/edbInfo/wind/wsd')
+def GetEdbDataWindWsd(StockCode,EdbCode, StartDate, EndDate):
+    print("GetEdbDataByWind:", EdbCode)
     isConnected = w.isconnected()
     print("isconnected")
     print(isConnected)
@@ -62,38 +62,20 @@ def GetFutureGoodEdbDataByWind(FutureGoodEdbCode, StartDate, EndDate):
         if isConnected == False:
             return "{'ErrMsg':'启动Wind接口失败'}"
 
-    print("getdata")
     option = "Fill=Previous"
-    data = w.edb(FutureGoodEdbCode, StartDate, EndDate, option)
-    data = w.wsd(FutureGoodEdbCode, "trade_code,open,high,low,close,volume,amt,oi,settle", StartDate, EndDate, option)
-    print("wind data")
-    print(data)
-    df = pd.DataFrame()
+    wsd_data = w.wsd(StockCode,EdbCode, StartDate, EndDate, option)
 
-    if data.ErrorCode == -40521010: # Internet Timeout 超时退出
+    if wsd_data.ErrorCode == -40521010: # Internet Timeout 超时退出
         os._exit(0)
         return "a"
-
-    df['DT'] = data.Times
-    df['TRADE_CODE'] = data.Data[0]
-    df['OPEN'] = data.Data[1]
-    df['HIGH'] = data.Data[2]
-    df['LOW'] = data.Data[3]
-    df['CLOSE'] = data.Data[4]
-    df['VOLUME'] = data.Data[5]
-    df['AMT'] = data.Data[6]
-    df['OI'] = data.Data[7]
-    df['SETTLE'] = data.Data[8]
-    df['ErrorCode'] = data.ErrorCode
-    df = df[['DT', 'TRADE_CODE', 'OPEN', 'HIGH', 'LOW', 'CLOSE', 'VOLUME', 'AMT', 'OI', 'SETTLE', 'ErrorCode']]
-    df = df.dropna()
-    json_data = df.to_json()
-    # w.stop()
-    print("wind data end")
+   
+    fm=pd.DataFrame(wsd_data.Data,index=wsd_data.Fields,columns=wsd_data.Times)
+    json_data=fm.to_json()
     result = json.loads(json_data)
     return result
 
 
+
 if __name__ == "__main__":
     # wind 登录
     wStart = w.start()

+ 2 - 2
yongyi_pig/yongyi_pig.py

@@ -109,8 +109,8 @@ if __name__ == "__main__":
     driver.get('https://data.yongyizixun888.com/index.php?s=member&c=login&m=index')
     sleep(2)
 
-    admin_name = 'huawen001'
-    admin_pwd = 'abcd1234'
+    admin_name = ''
+    admin_pwd = ''
     table = driver.find_element(By.XPATH, '//*[@id="myform"]/div[3]/div/div[2]/img')
     table.screenshot(r'pig.png')
 

+ 134 - 122
yongyi_pig/yongyi_pig_manual.py

@@ -1,8 +1,12 @@
 # coding:utf-8
+import os
+import shutil
 from time import sleep
+from bottle import route, run
 import datetime
-import openpyxl
-import requests
+
+import hug
+import rarfile as rarfile
 from selenium import webdriver
 
 # 设置Chrome浏览器选项
@@ -12,129 +16,137 @@ from selenium.webdriver.common.by import By
 from selenium.webdriver.support import expected_conditions as EC
 from selenium.webdriver.support.wait import WebDriverWait
 
-from imgcode_ak import image_code
-
-"""
-根据table的id属性和table中的某一个元素定位其在table中的位置
-table包括表头,位置坐标都是从1开始算
-tableId:table的id属性
-queryContent:需要确定位置的内容
-"""
-
-
-def get_table_content(driver, tableId, queryContent):
-    # 按行查询表格的数据,取出的数据是一整行,按空格分隔每一列的数据
-    table_tr_list = driver.find_element(By.ID, tableId).find_elements(By.TAG_NAME, "tr")
-    table_list = []  # 存放table数据
-    for tr in table_tr_list:  # 遍历每一个tr
-        # 将每一个tr的数据根据td查询出来,返回结果为list对象
-        table_td_list = tr.find_elements(By.TAG_NAME, "td")
-        row_list = []
-        print(table_td_list)
-        for td in table_td_list:  # 遍历每一个td
-            row_list.append(td.text)  # 取出表格的数据,并放入行列表里
-        table_list.append(row_list)
-
-    # 循环遍历table数据,确定查询数据的位置
-    # for i in range(len(table_list)):
-    #     for j in range(len(table_list[i])):
-    #         if queryContent == table_list[i][j]:
-    #             print("%r坐标为(%r,%r)" % (queryContent, i + 1, j + 1))
-
-
-# 写入文件
-def write_excel_xlsx(path, sheet_name, value):
-    index = len(value)  # 列表中所含元组的个数,从而确定写入Excel的行数
-    # 打开Excel
-    wb = openpyxl.Workbook()
-    # wb = load_workbook(path)
-    sheet = wb.active  # 获得一个的工作表
-    sheet.title = sheet_name
-    # 设置格式
-    sheet.column_dimensions['B'].width = 115
-    # 按行加入
-    for i in range(index):
-        sheet.append(value[i])
-    # 保存文件
-    print(sheet.values)
-    wb.save(path)
-    print("题目写入数据成功!")
 
+def rename_week_file(new_dir, current_time, rar_name):
+    files = os.listdir(rar_name)
+    for dir in files:
+        file_path = os.path.join(rar_name, dir)
+        if os.path.isdir(file_path):
+            for f in os.listdir(file_path):
+                print("f.title()" + f.title())
+                if f.title().find("周度数据") != -1:
+                    new_name = f'{new_dir}/{current_time}_week.xlsx'
+                    old_name = os.path.join(file_path, f)
+                    # os.rename(old_name, new_name)
+                    shutil.copy(old_name, new_name)
+                    print(f'周度文件重命名成功, 旧文件名{old_name} 新文件名{new_name}')
+                    return
+        else:
+            print(dir.title() + "不是一个文件夹")
+            return
+    return
+
+@hug.get('/yongyi/download')
+def yongyi_download(dayFlag, weekFlag):
+    print("dayFlag:"+dayFlag)
+    print("weekFlag:"+weekFlag)
 
-def send_file(url, file_path):
-    with open(file_path, 'rb') as file:
-        files = {'file': file}
-        response2 = requests.post(url, files=files)
-    return response2
-
-
-def get_element(my_driver, xpaths):
-    """
-    判断是否存在元素并获取元素对象
-    :param my_driver:
-    :param xpaths: xpaths表达式
-    :return: 元素对象或为空
-    """
     try:
-        target = my_driver.find_element(By.XPATH, xpaths)
-    except exceptions.NoSuchElementException:
+        # python+selunium定位已打开的浏览器
+        # 创建一个 Chrome WebDriver 实例
+        options = webdriver.ChromeOptions()
+        # options.add_argument("headless")
+        # options.add_argument('--headless')
+        options.add_argument('--disable-gpu')
+        options.add_argument('--no-sandbox')
+        # 谷歌浏览器运行的默认调试端口:先用以下命令启动浏览器
+        # 找到谷歌浏览器的程序地址,开启一个新的端口,并设置一个文件夹来保存浏览器的数据
+        # /Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome --remote-debugging-port=9222  --user-data-dir=/Users/xiexiaoyuan/data/selenium/automationProfile
+        # --remote-debugging-port值,可以指定任何打开的端口
+        # --user-data-dir标记,指定创建新Chrome配置文件的目录。它是为了确保在单独的配置文件中启动chrome,不会污染你的默认配置文件。
+        #
+        # 此时会打开一个浏览器页面,我们输入目标网址,输入账号密码,登录成功。
+        # 登录之后,以后都不需要登录,它会把你这次登录的信息记入到 --user-data-dir指定的目录下
+        # 后面你只需要python + selenium + webdriver定位到这个已经登录的浏览器进行操作就可以啦
+        options.add_experimental_option("debuggerAddress", "127.0.0.1:9222")
+        # 修改下载地址
+        # save_to_dir = '/Users/xiexiaoyuan/Downloads'
+        save_to_dir = r'D:/eta/yongyi_data/download'
+        options.add_argument("--download.default_directory=" + save_to_dir)
+        options.add_argument('--user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, '
+                             'like Gecko) Chrome/118.0.5993.70 Safari/537.36')
+        options.add_argument(" window-size=1920,1080")
+
+        # s = Service(executable_path='/Users/xiexiaoyuan/chromedriver_mac64_114/chromedriver')
+        # s = Service(executable_path='D:/download/chromedriver119-win64/chromedriver.exe')
+        s = Service(executable_path='D:/eta/chromedriver-win64/chromedriver.exe')
+        driver = webdriver.Chrome(service=s, options=options)
+        driver.get('https://data.yongyizixun888.com/')
+        sleep(3)
+
+        account = driver.find_element(By.XPATH, '//*[@id="dr_member_info"]/a[1]').text
+        print(account)
+
+        # 下载涌溢日度数据库
+        if dayFlag == '1':
+            sleep(1)
+            a = driver.find_element(By.XPATH, '/html/body/div[4]/div[1]/div[2]/div[2]/a')
+            print(a.get_attribute("href"))
+            a.click()
+            sleep(30)
+
+        # 下载涌溢完整数据库
+        if weekFlag == '1':
+            sleep(2)
+            b = driver.find_element(By.XPATH, '/html/body/div[4]/div[1]/div[2]/div[3]/a')
+            print(b.get_attribute("href"))
+            b.click()
+            sleep(30)
+
+        # 获取当前时间,并将其格式化为指定的形式
+        # new_dir = '/Users/xiexiaoyuan/Downloads/yongyi'
+        new_dir = r'D:/eta/yongyi_data/yongyi'
+        current_time = datetime.datetime.now().strftime("%Y-%m-%d")
+        current_time_full = datetime.datetime.now().strftime('%Y{y}%m{m}%d{d}').format(y='年', m='月', d='日')
+        print(current_time_full)
+
+        # 查找文件并重命名
+        os.chdir(save_to_dir)
+        files = filter(os.path.isfile, os.listdir(save_to_dir))
+        files = [os.path.join(save_to_dir, f) for f in files]  # add path to each file
+        files.sort(key=lambda x: os.path.getmtime(x), reverse=True)
+        day_file = ""
+        week_file = ""
+        for file in files:
+            day_name = f'{current_time_full}涌益咨询日度数据'
+            if file.title().find(day_name) != -1:
+                if day_file == "":
+                    day_file = file
+            if file.title().find("涌益咨询周度数据") != -1:
+                if week_file == "":
+                    week_file = file
+            if day_file != "" and week_file != "":
+                break
+
+        if dayFlag == '1':
+            if day_file != "":
+                print(day_file.title())
+                new_name = f'{new_dir}/{current_time}_day.xlsx'
+                # os.rename(day_file.title(), new_name)
+                shutil.copy(day_file.title(), new_name)
+                print(f'日度文件重命名成功, 旧文件名{day_file.title()} 新文件名{new_name}')
+            else:
+                print("未找到日度下载文件")
+
+        if weekFlag == '1':
+            if week_file != "":
+                print(week_file.title())
+                filename = week_file.title()
+                index = filename.find(".Rar")
+                rar_name = filename[:index]
+                # 解压缩
+                rar_file = rarfile.RarFile(filename, 'r')
+                rar_file.extractall(rar_name)
+                rar_file.close()
+                rename_week_file(new_dir, current_time, rar_name)
+            else:
+                print("未找到周度下载文件")
+        return True
+    except Exception as e:
+        print("Exception:")
+        print(str(e))
         return False
-    else:
-        return target
-
 
 if __name__ == "__main__":
-    # python+selunium定位已打开的浏览器
-    # 创建一个 Chrome WebDriver 实例
-    options = webdriver.ChromeOptions()
-    # options.add_argument("headless")
-    # options.add_argument('--headless')
-    options.add_argument('--disable-gpu')
-    options.add_argument('--no-sandbox')
-    # 谷歌浏览器运行的默认调试端口:先用以下命令启动浏览器
-    # 找到谷歌浏览器的程序地址,开启一个新的端口,并设置一个文件夹来保存浏览器的数据
-    # /Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome --remote-debugging-port=9222  --user-data-dir=/Users/xiexiaoyuan/data/selenium/automationProfile
-    # --remote-debugging-port值,可以指定任何打开的端口
-    # --user-data-dir标记,指定创建新Chrome配置文件的目录。它是为了确保在单独的配置文件中启动chrome,不会污染你的默认配置文件。
-    #
-    # 此时会打开一个浏览器页面,我们输入目标网址,输入账号密码,登录成功。
-    # 登录之后,以后都不需要登录,它会把你这次登录的信息记入到 --user-data-dir指定的目录下
-    # 后面你只需要python + selenium + webdriver定位到这个已经登录的浏览器进行操作就可以啦
-    options.add_experimental_option("debuggerAddress", "127.0.0.1:9222")
-    # 修改下载地址
-    options.add_argument("--download.default_directory=/Users/xiexiaoyuan/Downloads/")
-    options.add_argument('--user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, '
-                          'like Gecko) Chrome/118.0.5993.70 Safari/537.36')
-    options.add_argument(" window-size=1920,1080")
-
-    s = Service(executable_path='/Users/xiexiaoyuan/chromedriver_mac64_114/chromedriver')
-    # s = Service(executable_path='/Users/xi/Desktop/chromedriver')
-    driver = webdriver.Chrome(service=s, options=options)
-    # driver.maximize_window()
-    driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
-        "source": """
-                            Object.defineProperty(navigator, 'webdriver', {
-                              get: () => undefined
-                            })
-                          """
-    })
-    driver.get('https://data.yongyizixun888.com/')
-    sleep(3)
-
-    account = driver.find_element(By.XPATH, '//*[@id="dr_member_info"]/a[1]').text
-    print(account)
-
-    # 下载涌溢日度数据库
-    sleep(1)
-    a = driver.find_element(By.XPATH, '/html/body/div[4]/div[1]/div[2]/div[2]/a')
-    print(a.get_attribute("href"))
-    a.click()
-
-    # 下载涌溢完整数据库
-    sleep(1)
-    b = driver.find_element(By.XPATH, '/html/body/div[4]/div[1]/div[2]/div[3]/a')
-    print(b.get_attribute("href"))
-    b.click()
-    sleep(10)
-    # WebDriverWait(driver, 10).until(
-    #     EC.element_to_be_clickable((By.XPATH, '/html/body/div[4]/div[1]/div[2]/div[2]/a'))).click()
+    app = __hug__.http.server()
+    run(app=app, reloader=True, host='127.0.0.1', port=7010)