admin管理员组

文章数量:1401602

I'm trying to use selenium to scroll down a lazy loading facebook page to get all the comments loaded onto the page, but scrolling does not seem to be working. I only ever get 10 comments, and the debugging output shows a constant scrollHeight. I've seen this question (page down with Keys), and this question (scroll down with scrollBy), and this question (suggests scrollTo, scrollBy and Keys) the answers are not working.

Here's the code:

from selenium import webdriver
from selenium.webdrivermon.by import By
from selenium.webdrivermon.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import sys

SLEEP_TIME = 5

# Set up Selenium WebDriver
options = webdriver.ChromeOptions()
options.add_argument("--headless")  # Run in background (optional)
driver = webdriver.Chrome(options=options)

#print("about to get the page")

# 2/26 - 953 comments
#driver.get(";)

# 2/27 - 259 comments
driver.get(";)

#print("got the page, sleeping")

# Wait for comments to load
time.sleep(SLEEP_TIME)

# try paging down
lastHeight = driver.execute_script("return document.body.scrollHeight")
print("paging down, lastHeight is ", lastHeight)

for _ in range(5):  # Adjust as needed
    driver.find_element(By.TAG_NAME, "body").send_keys(Keys.PAGE_DOWN)
    print("paging...")
    time.sleep(SLEEP_TIME)
   
lastHeight = driver.execute_script("return document.body.scrollHeight")
print("finished paging down, lastHeight is ", lastHeight)

# try scrolling down with window.scrollTo
while True:
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(SLEEP_TIME)
    newHeight = driver.execute_script("return document.body.scrollHeight")
    print("newHeight is ", newHeight)

    if newHeight == lastHeight:
        break
    lastHeight = newHeight
    print("lastHeight is ", lastHeight)

print("finished scrolling, lastHeight is ", lastHeight)

# try scrolling down with window.scrollBy
scroll_pos_init = driver.execute_script("return window.pageYOffset;")
print("scroll_pos_init is ", scroll_pos_init)

stepScroll = 300

while True:
    driver.execute_script(f"window.scrollBy(0, {stepScroll});")
    scroll_pos_end = driver.execute_script("return window.pageYOffset;")
    print("scroll_pos_end is ", scroll_pos_end)

    time.sleep(SLEEP_TIME)
    if scroll_pos_init >= scroll_pos_end:
        break
    scroll_pos_init = scroll_pos_end

print("finished scrolling")

sys.exit()

and here's the result:

paging down, lastHeight is 715

paging...

paging...

paging...

paging...

paging...

finished paging down, lastHeight is 715

newHeight is 715

finished scrolling, lastHeight is 715

scroll_pos_init is 56

scroll_pos_end is 56

finished scrolling

本文标签: pythonUnable to use selenium to scroll down a lazy loading facebook pageStack Overflow