eta_server
/
eta_python


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156
							# coding=utf-8
from time import sleep
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
import img_pdf_png
import os
from PIL import Image


def scroll_page(driver):
    # 设置滚动条缓慢滚动
    page_height = driver.execute_script('return document.body.scrollHeight')  # 页面高度
    print(page_height)
    # web_height = driver.execute_script("return window.innerHeight")  # 网页可见内容的高 原先用页面高度作为滚动条的滑动高度，会导致页面错乱
    web_height = 300  # 网页可见内容的高 原先用页面高度作为滚动条的滑动高度，会导致页面错乱
    n1 = page_height // web_height
    last_height = page_height % web_height
    print(last_height)
    driver.execute_script("document.documentElement.style.overflowY = 'hidden'")
    if n1 > 0:  # 判断是否需要滚动
        for j in range(n1):
            # print(r"j:{}".format(j))
            if j == n1 - 1 and last_height > 0:  # 截取尾部
                driver.execute_script("window.scrollBy(0,{})".format(last_height))  # 滚动条滚到底
                print("滚到底了")
            else:
                driver.execute_script("window.scrollBy(0,{})".format(web_height))
                # print(web_height)
            sleep(1)
    new_page_height = driver.execute_script('return document.body.scrollHeight')  # 页面高度
    if new_page_height > page_height:
        scroll_page(driver)


def html2img(driver, image_folder):
    page_width = driver.execute_script('return document.documentElement.scrollWidth')
    # page_width = 800
    driver.execute_script("$('#app #tipsAlert').next().css('display', 'none')")  # 隐藏置顶图标
    window_height = driver.execute_script("return window.screen.height")  # 屏幕高度
    driver.set_window_size(page_width, window_height)  # 设置浏览器宽高
    scroll_page(driver)
    driver.execute_script("document.body.scrollTop = document.documentElement.scrollTop = 0")  # 滚动条滚到底
    page_height = driver.execute_script('return document.body.scrollHeight')  # 页面高度
    print("浏览器宽{} 浏览器高度{} 网页实际总高度{}".format(page_width, window_height, page_height))
    web_height = driver.execute_script("return window.innerHeight")  # 网页可见内容的高 原先用页面高度作为滚动条的滑动高度，会导致页面错乱
    n1 = page_height // web_height
    print("当前窗口网页高度{}".format(web_height))
    last_height = page_height % web_height
    print("底部多出来的部分{}".format(last_height))

    is_end = 0
    if n1 == 0 or (n1 == 1 and last_height == 0):  # 判断是否需要滚动
        # 去除留白部分, 一般情况下也不会只写一页报告且留那么多空白
        crop_height = web_height - page_height
        if crop_height > 0:
            origin_last_img = r'{}/{}.png'.format(image_folder, 1000)
            new_last_img = r'{}/{}.png'.format(image_folder, 0)
            driver.save_screenshot(origin_last_img)
            crop_img_botton(origin_last_img, new_last_img, page_height)
        else:
            driver.get_screenshot_as_file(r'{}/{}.png'.format(image_folder, '0'))
    else:
        j = 0
        while j <= n1:
            if j == 0:
                driver.execute_script("window.scrollBy(0,0)")
                # print(0)
            elif j == n1:
                # print(f'n1 is {n1}')
                if last_height > 0:
                    # driver.execute_script("$('#app #resetcss').css('margin-bottom', '{}px')".format(web_height))
                    driver.execute_script("window.scrollBy(0,{})".format(last_height))  # 滚动条滚到底

                    # 最后一屏先进行一次保存
                    sleep(1)
                    origin_last_img = r'{}/{}.png'.format(image_folder, j + 1000)
                    driver.save_screenshot(origin_last_img)

                    # 截取上面重叠的一部分并保存截取后的图片
                    new_last_img = r'{}/{}.png'.format(image_folder, j)
                    crop_height = web_height - last_height
                    crop_img_top(origin_last_img, new_last_img, crop_height)

                is_end = 1
                print("拉到底拉")
            else:
                driver.execute_script("window.scrollBy(0,{})".format(web_height))

            if is_end == 0:
                sleep(1)
                driver.save_screenshot(r'{}/{}.png'.format(image_folder, j))  # 截屏
            j = j + 1


# crop_img_top 裁掉图片上面的部分保留剩下的部分
def crop_img_top(image_path, output_path, crop_height):
    # 打开图像文件
    img = Image.open(image_path)

    # 获取图像的宽度和高度
    width, height = img.size

    # 计算裁剪的起始坐标
    start_x = 0
    start_y = min(height, crop_height)  # 保证不超出图像下边界

    # 裁剪图像，仅保留上半部分
    cropped_img = img.crop((start_x, start_y, start_x + width, start_y + (height - crop_height)))

    # 保存裁剪后的图像
    cropped_img.save(output_path)

    # 删除原始图像
    os.remove(image_path)


# crop_img_botton 裁掉图片下面的部分保留上面的部分
def crop_img_botton(image_path, output_path, keep_height):
    # 打开图像文件
    img = Image.open(image_path)

    # 获取图像的宽度和高度
    width, height = img.size

    # 计算保留部分的起始坐标
    start_x = 0
    start_y = 0

    # 裁剪图像，保留从左上角开始指定高度的部分
    cropped_img = img.crop((start_x, start_y, start_x + width, start_y + keep_height))

    # 保存裁剪后的图像
    cropped_img.save(output_path)

    # 删除原始图像
    os.remove(image_path)


# 调用截图函数
if __name__ == "__main__":
    # 创建一个 Chrome WebDriver 实例
    options = webdriver.ChromeOptions()
    options.add_argument("headless")
    # options.add_argument(" window-size=1920,1080")
    # s = Service(executable_path='/Users/xiexiaoyuan/Downloads/chromedriver_mac64_111/chromedriver')
    s = Service(executable_path='E:/chromedriver-win64/chromedriver.exe')
    driver = webdriver.Chrome(service=s, options=options)
    # driver.maximize_window()
    driver.get('https://ficc.hzinsights.com/reportshare_crm_report?code=4e38d30e656da5ae9d3a425109ce9e04')
    sleep(5)
    image_folder = './imgs'
    output_file = './screenshot'
    file_name = 'output1'
    html2img(driver, image_folder)
    img_pdf_png.merge_images(image_folder, output_file, file_name)
    driver.quit()