html2img_ping.py 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156
  1. # coding=utf-8
  2. from time import sleep
  3. from selenium import webdriver
  4. from selenium.webdriver.chrome.service import Service
  5. import img_pdf_png
  6. import os
  7. from PIL import Image
  8. def scroll_page(driver):
  9. # 设置滚动条缓慢滚动
  10. page_height = driver.execute_script('return document.body.scrollHeight') # 页面高度
  11. print(page_height)
  12. # web_height = driver.execute_script("return window.innerHeight") # 网页可见内容的高 原先用页面高度作为滚动条的滑动高度,会导致页面错乱
  13. web_height = 300 # 网页可见内容的高 原先用页面高度作为滚动条的滑动高度,会导致页面错乱
  14. n1 = page_height // web_height
  15. last_height = page_height % web_height
  16. print(last_height)
  17. driver.execute_script("document.documentElement.style.overflowY = 'hidden'")
  18. if n1 > 0: # 判断是否需要滚动
  19. for j in range(n1):
  20. # print(r"j:{}".format(j))
  21. if j == n1 - 1 and last_height > 0: # 截取尾部
  22. driver.execute_script("window.scrollBy(0,{})".format(last_height)) # 滚动条滚到底
  23. print("滚到底了")
  24. else:
  25. driver.execute_script("window.scrollBy(0,{})".format(web_height))
  26. # print(web_height)
  27. sleep(1)
  28. new_page_height = driver.execute_script('return document.body.scrollHeight') # 页面高度
  29. if new_page_height > page_height:
  30. scroll_page(driver)
  31. def html2img(driver, image_folder):
  32. page_width = driver.execute_script('return document.documentElement.scrollWidth')
  33. # page_width = 800
  34. driver.execute_script("$('#app #tipsAlert').next().css('display', 'none')") # 隐藏置顶图标
  35. window_height = driver.execute_script("return window.screen.height") # 屏幕高度
  36. driver.set_window_size(page_width, window_height) # 设置浏览器宽高
  37. scroll_page(driver)
  38. driver.execute_script("document.body.scrollTop = document.documentElement.scrollTop = 0") # 滚动条滚到底
  39. page_height = driver.execute_script('return document.body.scrollHeight') # 页面高度
  40. print("浏览器宽{} 浏览器高度{} 网页实际总高度{}".format(page_width, window_height, page_height))
  41. web_height = driver.execute_script("return window.innerHeight") # 网页可见内容的高 原先用页面高度作为滚动条的滑动高度,会导致页面错乱
  42. n1 = page_height // web_height
  43. print("当前窗口网页高度{}".format(web_height))
  44. last_height = page_height % web_height
  45. print("底部多出来的部分{}".format(last_height))
  46. is_end = 0
  47. if n1 == 0 or (n1 == 1 and last_height == 0): # 判断是否需要滚动
  48. # 去除留白部分, 一般情况下也不会只写一页报告且留那么多空白
  49. crop_height = web_height - page_height
  50. if crop_height > 0:
  51. origin_last_img = r'{}/{}.png'.format(image_folder, 1000)
  52. new_last_img = r'{}/{}.png'.format(image_folder, 0)
  53. driver.save_screenshot(origin_last_img)
  54. crop_img_botton(origin_last_img, new_last_img, page_height)
  55. else:
  56. driver.get_screenshot_as_file(r'{}/{}.png'.format(image_folder, '0'))
  57. else:
  58. j = 0
  59. while j <= n1:
  60. if j == 0:
  61. driver.execute_script("window.scrollBy(0,0)")
  62. # print(0)
  63. elif j == n1:
  64. # print(f'n1 is {n1}')
  65. if last_height > 0:
  66. # driver.execute_script("$('#app #resetcss').css('margin-bottom', '{}px')".format(web_height))
  67. driver.execute_script("window.scrollBy(0,{})".format(last_height)) # 滚动条滚到底
  68. # 最后一屏先进行一次保存
  69. sleep(1)
  70. origin_last_img = r'{}/{}.png'.format(image_folder, j + 1000)
  71. driver.save_screenshot(origin_last_img)
  72. # 截取上面重叠的一部分并保存截取后的图片
  73. new_last_img = r'{}/{}.png'.format(image_folder, j)
  74. crop_height = web_height - last_height
  75. crop_img_top(origin_last_img, new_last_img, crop_height)
  76. is_end = 1
  77. print("拉到底拉")
  78. else:
  79. driver.execute_script("window.scrollBy(0,{})".format(web_height))
  80. if is_end == 0:
  81. sleep(1)
  82. driver.save_screenshot(r'{}/{}.png'.format(image_folder, j)) # 截屏
  83. j = j + 1
  84. # crop_img_top 裁掉图片上面的部分保留剩下的部分
  85. def crop_img_top(image_path, output_path, crop_height):
  86. # 打开图像文件
  87. img = Image.open(image_path)
  88. # 获取图像的宽度和高度
  89. width, height = img.size
  90. # 计算裁剪的起始坐标
  91. start_x = 0
  92. start_y = min(height, crop_height) # 保证不超出图像下边界
  93. # 裁剪图像,仅保留上半部分
  94. cropped_img = img.crop((start_x, start_y, start_x + width, start_y + (height - crop_height)))
  95. # 保存裁剪后的图像
  96. cropped_img.save(output_path)
  97. # 删除原始图像
  98. os.remove(image_path)
  99. # crop_img_botton 裁掉图片下面的部分保留上面的部分
  100. def crop_img_botton(image_path, output_path, keep_height):
  101. # 打开图像文件
  102. img = Image.open(image_path)
  103. # 获取图像的宽度和高度
  104. width, height = img.size
  105. # 计算保留部分的起始坐标
  106. start_x = 0
  107. start_y = 0
  108. # 裁剪图像,保留从左上角开始指定高度的部分
  109. cropped_img = img.crop((start_x, start_y, start_x + width, start_y + keep_height))
  110. # 保存裁剪后的图像
  111. cropped_img.save(output_path)
  112. # 删除原始图像
  113. os.remove(image_path)
  114. # 调用截图函数
  115. if __name__ == "__main__":
  116. # 创建一个 Chrome WebDriver 实例
  117. options = webdriver.ChromeOptions()
  118. options.add_argument("headless")
  119. # options.add_argument(" window-size=1920,1080")
  120. # s = Service(executable_path='/Users/xiexiaoyuan/Downloads/chromedriver_mac64_111/chromedriver')
  121. s = Service(executable_path='E:/chromedriver-win64/chromedriver.exe')
  122. driver = webdriver.Chrome(service=s, options=options)
  123. # driver.maximize_window()
  124. driver.get('https://ficc.hzinsights.com/reportshare_crm_report?code=4e38d30e656da5ae9d3a425109ce9e04')
  125. sleep(5)
  126. image_folder = './imgs'
  127. output_file = './screenshot'
  128. file_name = 'output1'
  129. html2img(driver, image_folder)
  130. img_pdf_png.merge_images(image_folder, output_file, file_name)
  131. driver.quit()