Browse Source

报告转长图服务

hsun 1 year ago
parent
commit
a2076d1e29
5 changed files with 256 additions and 0 deletions
  1. 1 0
      .gitignore
  2. 4 0
      eta_report2img/.gitignore
  3. 84 0
      eta_report2img/html2img_ping.py
  4. 61 0
      eta_report2img/img_pdf_png.py
  5. 106 0
      eta_report2img/main.py

+ 1 - 0
.gitignore

@@ -0,0 +1 @@
+/.idea

+ 4 - 0
eta_report2img/.gitignore

@@ -0,0 +1,4 @@
+/*.exe
+/*.tmp
+/.idea
+/__pycache__

+ 84 - 0
eta_report2img/html2img_ping.py

@@ -0,0 +1,84 @@
+from time import sleep
+from selenium import webdriver
+from selenium.webdriver.chrome.service import Service
+import img_pdf_png
+
+
+def scroll_page(driver):
+    # 设置滚动条缓慢滚动
+    page_height = driver.execute_script('return document.body.scrollHeight')  # 页面高度
+    print(page_height)
+    # web_height = driver.execute_script("return window.innerHeight")  # 网页可见内容的高 原先用页面高度作为滚动条的滑动高度,会导致页面错乱
+    web_height = 300  # 网页可见内容的高 原先用页面高度作为滚动条的滑动高度,会导致页面错乱
+    n1 = page_height // web_height
+    last_height = page_height % web_height
+    print(last_height)
+    driver.execute_script("document.documentElement.style.overflowY = 'hidden'")
+    if n1 > 0:  # 判断是否需要滚动
+        for j in range(n1):
+            # print(r"j:{}".format(j))
+            if j == n1 - 1 and last_height > 0:  # 截取尾部
+                driver.execute_script("window.scrollBy(0,{})".format(page_height))  # 滚动条滚到底
+                print("滚到底了")
+            else:
+                driver.execute_script("window.scrollBy(0,{})".format(web_height))
+                # print(web_height)
+            sleep(1)
+    new_page_height = driver.execute_script('return document.body.scrollHeight')  # 页面高度
+    if new_page_height > page_height:
+        scroll_page(driver)
+
+
+def html2img(driver, image_folder):
+    page_width = driver.execute_script('return document.documentElement.scrollWidth')
+    # page_width = 800
+    driver.execute_script("$('#app #tipsAlert').next().css('display', 'none')")  # 隐藏置顶图标
+    window_height = driver.execute_script("return window.screen.height")  # 屏幕高度
+    driver.set_window_size(page_width, window_height)  # 设置浏览器宽高
+    scroll_page(driver)
+    driver.execute_script("document.body.scrollTop = document.documentElement.scrollTop = 0")  # 滚动条滚到底
+    page_height = driver.execute_script('return document.body.scrollHeight')  # 页面高度
+    print("浏览器宽{} 浏览器高度{} 网页实际总高度{}".format(page_width, window_height, page_height))
+    web_height = driver.execute_script("return window.innerHeight")  # 网页可见内容的高 原先用页面高度作为滚动条的滑动高度,会导致页面错乱
+    n1 = page_height // web_height
+    print("当前窗口网页高度{}".format(web_height))
+    last_height = page_height % web_height
+    print("底部多出来的部分{}".format(last_height))
+    margin_flag = False
+    if n1 == 0 or (n1 == 1 and last_height == 0):  # 判断是否需要滚动
+        driver.get_screenshot_as_file(r'{}/{}.png'.format(image_folder, '0'))  # 指定截图保存位置
+    else:
+        j = 0
+        while j <= n1:
+            if j == 0:
+                driver.execute_script("window.scrollBy(0,0)")
+                # print(0)
+            elif j == n1 and last_height > 0:  # 截取尾部
+                driver.execute_script("$('#app #resetcss').css('margin-bottom', '{}px')".format(web_height))
+                driver.execute_script("window.scrollBy(0,{})".format(web_height))  # 滚动条滚到底
+                print("拉到底拉")
+            else:
+                driver.execute_script("window.scrollBy(0,{})".format(web_height))
+            sleep(1)
+            driver.save_screenshot(r'{}/{}.png'.format(image_folder, j))  # 截屏
+            j = j + 1
+
+
+# 调用截图函数
+if __name__ == "__main__":
+    # 创建一个 Chrome WebDriver 实例
+    options = webdriver.ChromeOptions()
+    options.add_argument("headless")
+    # options.add_argument(" window-size=1920,1080")
+    # s = Service(executable_path='/Users/xiexiaoyuan/Downloads/chromedriver_mac64_111/chromedriver')
+    s = Service(executable_path='E:/chromedriver-win64/chromedriver.exe')
+    driver = webdriver.Chrome(service=s, options=options)
+    # driver.maximize_window()
+    driver.get('https://ficc.hzinsights.com/reportshare_crm_report?code=4e38d30e656da5ae9d3a425109ce9e04')
+    sleep(5)
+    image_folder = './imgs'
+    output_file = './screenshot'
+    file_name = 'output1'
+    html2img(driver, image_folder)
+    img_pdf_png.merge_images(image_folder, output_file, file_name)
+    driver.quit()

+ 61 - 0
eta_report2img/img_pdf_png.py

@@ -0,0 +1,61 @@
+import os
+from datetime import datetime
+from PIL import Image
+
+
+def merge_images(image_folder, output_file, file_name, output_type, n=1):
+    # 获取所有图像文件的列表
+    image_files = [f for f in os.listdir(image_folder) if f.endswith('.png')]
+
+    # 计算每个小图像的大小和大图像的大小
+    image_count = len(image_files)
+    if image_count == 0:
+        print('No image files found in the directory:', image_folder)
+        return
+
+    # 将所有小图像粘贴到新图像的正确位置
+    target_img = {}
+    new_img_size1 = 0
+    new_img_size0 = 0
+    for i, f in enumerate(image_files):
+        idx = f.strip('.png')
+        target_img[int(idx)] = f
+        # 计算小图像的大小以及大图像的大小
+        img = Image.open(os.path.join(image_folder, image_files[i]))
+        img_size1 = int(img.size[1] / n)
+        if i == 0:
+            new_img_size0 = int(img.size[0] / n)
+        new_img_size1 += img_size1
+
+    # 创建一个新的大图像
+    new_img = Image.new('RGB', (new_img_size0, new_img_size1), 'white')
+
+    # 将所有小图像粘贴到新图像的正确位置
+    now_height = 0
+    for i in range(image_count):
+        img = Image.open(os.path.join(image_folder, target_img[i]))
+        img_size0 = int(img.size[0] / n)
+        img_size1 = int(img.size[1] / n)
+        img = img.resize((img_size0, img_size1))
+        if i != 0:
+            now_height += img.size[1]
+        # 纵向拼接图片
+        new_img.paste(img, (0, now_height))
+
+    # 默认两种类型都生成
+    if output_type == "":
+        new_img.save(r'{}/{}.png'.format(output_file, file_name))
+        new_img.save(r'{}/{}.pdf'.format(output_file, file_name), 'PDF', resolution=100.0, save_all=True)
+    elif output_type == "img":
+        new_img.save(r'{}/{}.png'.format(output_file, file_name))
+    elif output_type == "pdf":
+        new_img.save(r'{}/{}.pdf'.format(output_file, file_name), 'PDF', resolution=100.0, save_all=True)
+
+
+# 调用截图函数
+if __name__ == "__main__":
+    # 用法示例
+    image_folder = './imgs'
+    output_file = './screenshot'
+    file_name = 'output1'
+    merge_images(image_folder, output_file, file_name=file_name)

+ 106 - 0
eta_report2img/main.py

@@ -0,0 +1,106 @@
+import hug
+import os
+from bottle import run
+from selenium import webdriver
+from selenium.webdriver.chrome.service import Service
+from time import sleep
+
+import html2img_ping
+import img_pdf_png
+
+# 配置
+server_port = 7109
+# chrome_drive_path = "E:/chromedriver-win64/chromedriver.exe"  # 谷歌浏览器驱动目录
+chrome_drive_path = "/usr/local/bin/chromedriver"  # 谷歌浏览器驱动目录
+image_folder = './imgs'  # 转换过程中生成的图片目录, 最终会合并为一个长图
+output_file = './screenshot'  # 最后的输出目录
+
+# hug接口输出为json格式
+hug.API(__name__).http.output_format = hug.output_format.json
+
+
+@hug.get('/api/report/server')
+def report_server():
+    return 1
+
+
+@hug.post('/api/report/html2img')
+def report_detail2img(report_url: hug.types.text, file_name: hug.types.text, output_type: hug.types.text):
+    # report_url参数, 报告的分享地址
+    if report_url is None or report_url == "":
+        return {"code": 403, "data": "", "error": "report_url parameter is missing"}
+    print("report_url: ", report_url)
+
+    # file_name参数, 生成的文件名
+    if file_name is None or file_name == "":
+        return {"code": 403, "data": "", "error": "file_name parameter is missing"}
+    print("file_name: ", file_name)
+
+    # output_type参数, 非必填, img或pdf, 为空则都生成
+
+    try:
+        # 生成图片
+        create_img_and_pdf(report_url, file_name, output_type)
+
+        # 清空imgs临时图片文件夹
+        for fn in os.listdir(image_folder):
+            fp = os.path.join(image_folder, fn)
+            if os.path.isfile(fp):
+                os.remove(fp)
+
+        # 获取根目录
+        current_file_path = os.path.abspath(__file__)
+        project_root = os.path.dirname(current_file_path)
+
+        # 生成的文件目录
+        img_path = project_root + "/screenshot/" + file_name + ".png"
+        pdf_path = project_root + "/screenshot/" + file_name + ".pdf"
+        print(img_path)
+        print(pdf_path)
+        return {"code": 200, "data": [img_path, pdf_path], "error": ""}
+    except Exception as e:
+        err_msg = str(e)
+        print(err_msg)
+        return {"code": 403, "data": "", "error": err_msg}
+
+
+@hug.post('/api/report/clear_local_file')
+def clear_local_file(file_name: hug.types.text):
+    if file_name is None or file_name == "":
+        return {"code": 403, "data": "", "error": "file_name parameter is missing"}
+
+    file_path = output_file + "/" + file_name
+    if os.path.exists(file_path) is True:
+        os.remove(file_path)
+    print("clear success: ", file_path)
+    return {"code": 200, "data": "", "error": ""}
+
+
+def create_img_and_pdf(report_url, file_name, output_type=""):
+    # 加载驱动
+    options = webdriver.ChromeOptions()  # 创建一个谷歌WebDriver实例
+    options.add_argument("headless")  # 无头浏览器模式
+    s = Service(executable_path=chrome_drive_path)
+    driver = webdriver.Chrome(service=s, options=options)
+
+    # 加载页面
+    print("加载页面")
+    driver.get(report_url)
+    sleep(5)
+    print("加载完成")
+
+    # html页面转换为图片
+    html2img_ping.html2img(driver, image_folder)
+
+    # 合并图片为长图/PDF
+    img_pdf_png.merge_images(image_folder, output_file, file_name, output_type)
+    driver.quit()
+
+    # 退出驱动
+    driver.quit()
+    return
+
+
+if __name__ == "__main__":
+    app = __hug__.http.server()
+    run(app=app, reloader=True, port=server_port)