# coding=utf-8 from time import sleep from selenium import webdriver from selenium.webdriver.chrome.service import Service import img_pdf_png import os from PIL import Image def scroll_page(driver): # 设置滚动条缓慢滚动 page_height = driver.execute_script('return document.body.scrollHeight') # 页面高度 print(page_height) # web_height = driver.execute_script("return window.innerHeight") # 网页可见内容的高 原先用页面高度作为滚动条的滑动高度,会导致页面错乱 web_height = 300 # 网页可见内容的高 原先用页面高度作为滚动条的滑动高度,会导致页面错乱 n1 = page_height // web_height last_height = page_height % web_height print(last_height) driver.execute_script("document.documentElement.style.overflowY = 'hidden'") if n1 > 0: # 判断是否需要滚动 for j in range(n1): # print(r"j:{}".format(j)) if j == n1 - 1 and last_height > 0: # 截取尾部 driver.execute_script("window.scrollBy(0,{})".format(last_height)) # 滚动条滚到底 print("滚到底了") else: driver.execute_script("window.scrollBy(0,{})".format(web_height)) # print(web_height) sleep(1) new_page_height = driver.execute_script('return document.body.scrollHeight') # 页面高度 if new_page_height > page_height: scroll_page(driver) def html2img(driver, image_folder): page_width = driver.execute_script('return document.documentElement.scrollWidth') # page_width = 800 driver.execute_script("$('#app #tipsAlert').next().css('display', 'none')") # 隐藏置顶图标 window_height = driver.execute_script("return window.screen.height") # 屏幕高度 driver.set_window_size(page_width, window_height) # 设置浏览器宽高 scroll_page(driver) driver.execute_script("document.body.scrollTop = document.documentElement.scrollTop = 0") # 滚动条滚到底 page_height = driver.execute_script('return document.body.scrollHeight') # 页面高度 print("浏览器宽{} 浏览器高度{} 网页实际总高度{}".format(page_width, window_height, page_height)) web_height = driver.execute_script("return window.innerHeight") # 网页可见内容的高 原先用页面高度作为滚动条的滑动高度,会导致页面错乱 n1 = page_height // web_height print("当前窗口网页高度{}".format(web_height)) last_height = page_height % web_height print("底部多出来的部分{}".format(last_height)) is_end = 0 if n1 == 0 or (n1 == 1 and last_height == 0): # 判断是否需要滚动 # 去除留白部分, 一般情况下也不会只写一页报告且留那么多空白 crop_height = web_height - page_height if crop_height > 0: origin_last_img = r'{}/{}.png'.format(image_folder, 1000) new_last_img = r'{}/{}.png'.format(image_folder, 0) driver.save_screenshot(origin_last_img) crop_img_botton(origin_last_img, new_last_img, page_height) else: driver.get_screenshot_as_file(r'{}/{}.png'.format(image_folder, '0')) else: j = 0 while j <= n1: if j == 0: driver.execute_script("window.scrollBy(0,0)") # print(0) elif j == n1: # print(f'n1 is {n1}') if last_height > 0: # driver.execute_script("$('#app #resetcss').css('margin-bottom', '{}px')".format(web_height)) driver.execute_script("window.scrollBy(0,{})".format(last_height)) # 滚动条滚到底 # 最后一屏先进行一次保存 sleep(1) origin_last_img = r'{}/{}.png'.format(image_folder, j + 1000) driver.save_screenshot(origin_last_img) # 截取上面重叠的一部分并保存截取后的图片 new_last_img = r'{}/{}.png'.format(image_folder, j) crop_height = web_height - last_height crop_img_top(origin_last_img, new_last_img, crop_height) is_end = 1 print("拉到底拉") else: driver.execute_script("window.scrollBy(0,{})".format(web_height)) if is_end == 0: sleep(1) driver.save_screenshot(r'{}/{}.png'.format(image_folder, j)) # 截屏 j = j + 1 # crop_img_top 裁掉图片上面的部分保留剩下的部分 def crop_img_top(image_path, output_path, crop_height): # 打开图像文件 img = Image.open(image_path) # 获取图像的宽度和高度 width, height = img.size # 计算裁剪的起始坐标 start_x = 0 start_y = min(height, crop_height) # 保证不超出图像下边界 # 裁剪图像,仅保留上半部分 cropped_img = img.crop((start_x, start_y, start_x + width, start_y + (height - crop_height))) # 保存裁剪后的图像 cropped_img.save(output_path) # 删除原始图像 os.remove(image_path) # crop_img_botton 裁掉图片下面的部分保留上面的部分 def crop_img_botton(image_path, output_path, keep_height): # 打开图像文件 img = Image.open(image_path) # 获取图像的宽度和高度 width, height = img.size # 计算保留部分的起始坐标 start_x = 0 start_y = 0 # 裁剪图像,保留从左上角开始指定高度的部分 cropped_img = img.crop((start_x, start_y, start_x + width, start_y + keep_height)) # 保存裁剪后的图像 cropped_img.save(output_path) # 删除原始图像 os.remove(image_path) # 调用截图函数 if __name__ == "__main__": # 创建一个 Chrome WebDriver 实例 options = webdriver.ChromeOptions() options.add_argument("headless") # options.add_argument(" window-size=1920,1080") # s = Service(executable_path='/Users/xiexiaoyuan/Downloads/chromedriver_mac64_111/chromedriver') s = Service(executable_path='E:/chromedriver-win64/chromedriver.exe') driver = webdriver.Chrome(service=s, options=options) # driver.maximize_window() driver.get('https://ficc.hzinsights.com/reportshare_crm_report?code=4e38d30e656da5ae9d3a425109ce9e04') sleep(5) image_folder = './imgs' output_file = './screenshot' file_name = 'output1' html2img(driver, image_folder) img_pdf_png.merge_images(image_folder, output_file, file_name) driver.quit()