admin管理员组

文章数量:1379654

I’m new to web scraping with Selenium, and I’m trying to scrape property listings from Booking. My code (included below) successfully scrapes 25 results, but I suspect the issue is that more results are available if I scroll and click the "Load more results" button.

I've tried using execute_script to scroll and find_element to locate the button, but I’m not sure how to implement a loop that continues loading results until the button disappears (or no more results are available).

Here's my code so far:

# Relevant imports
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdrivermon.by import By
from seleniummon.exceptions import TimeoutException, NoSuchElementException

# WebDriver setup
driver = webdriver.Chrome(service=Service())
driver.get(".en-gb.html?ss=cornwall...")

def handle_no_such_element_exception(data_extraction_task):
    try:
        return data_extraction_task()
    except NoSuchElementException:
        return None

items = []

# Load more results logic (This part is where I’m struggling)
while True:
    try:
        load_more_button = WebDriverWait(driver, 5).until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, "[data-testid='load-more-button']"))
        )
        load_more_button.click()
        print("Clicked load more button...")
    except (TimeoutException, NoSuchElementException):
        print("No more results to load.")
        break

# Scraping logic (This part works fine)
property_items = driver.find_elements(By.CSS_SELECTOR, "[data-testid=\"property-card\"]")
for property_item in property_items:
    title = handle_no_such_element_exception(lambda: property_item.find_element(By.CSS_SELECTOR, "[data-testid=\"title\"]").text)
    address = handle_no_such_element_exception(lambda: property_item.find_element(By.CSS_SELECTOR, "[data-testid=\"address\"]").text)
    review_score = handle_no_such_element_exception(lambda: property_item.find_element(By.CSS_SELECTOR, "[data-testid=\"review-score\"]").text)
    link = handle_no_such_element_exception(lambda: property_item.find_element(By.CSS_SELECTOR, "[data-testid=\"title-link\"]").get_attribute("href"))
    item = {
        "title": title,
        "address": address,
        "review_score": review_score,
        "link": link
    }
    items.append(item)

print(items)
driver.quit()

What I’m asking:

  1. How can I properly scroll to load more results?
  2. How do I make sure that the "Load more results" button is clicked until no more results are available? Any guidance would be much appreciated!

I’m new to web scraping with Selenium, and I’m trying to scrape property listings from Booking. My code (included below) successfully scrapes 25 results, but I suspect the issue is that more results are available if I scroll and click the "Load more results" button.

I've tried using execute_script to scroll and find_element to locate the button, but I’m not sure how to implement a loop that continues loading results until the button disappears (or no more results are available).

Here's my code so far:

# Relevant imports
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdrivermon.by import By
from seleniummon.exceptions import TimeoutException, NoSuchElementException

# WebDriver setup
driver = webdriver.Chrome(service=Service())
driver.get("https://www.booking/searchresults.en-gb.html?ss=cornwall...")

def handle_no_such_element_exception(data_extraction_task):
    try:
        return data_extraction_task()
    except NoSuchElementException:
        return None

items = []

# Load more results logic (This part is where I’m struggling)
while True:
    try:
        load_more_button = WebDriverWait(driver, 5).until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, "[data-testid='load-more-button']"))
        )
        load_more_button.click()
        print("Clicked load more button...")
    except (TimeoutException, NoSuchElementException):
        print("No more results to load.")
        break

# Scraping logic (This part works fine)
property_items = driver.find_elements(By.CSS_SELECTOR, "[data-testid=\"property-card\"]")
for property_item in property_items:
    title = handle_no_such_element_exception(lambda: property_item.find_element(By.CSS_SELECTOR, "[data-testid=\"title\"]").text)
    address = handle_no_such_element_exception(lambda: property_item.find_element(By.CSS_SELECTOR, "[data-testid=\"address\"]").text)
    review_score = handle_no_such_element_exception(lambda: property_item.find_element(By.CSS_SELECTOR, "[data-testid=\"review-score\"]").text)
    link = handle_no_such_element_exception(lambda: property_item.find_element(By.CSS_SELECTOR, "[data-testid=\"title-link\"]").get_attribute("href"))
    item = {
        "title": title,
        "address": address,
        "review_score": review_score,
        "link": link
    }
    items.append(item)

print(items)
driver.quit()

What I’m asking:

  1. How can I properly scroll to load more results?
  2. How do I make sure that the "Load more results" button is clicked until no more results are available? Any guidance would be much appreciated!
Share Improve this question asked Mar 18 at 15:01 ElisJDElisJD 471 silver badge8 bronze badges
Add a comment  | 

1 Answer 1

Reset to default 2

I did a few changes to the code to make it work for your case:

  • the first results are loaded automatically when the user scrolls so first we need to scroll to the bottom of the page a few times
  • only then the "load more button" appears and we need to properly located it and click it
  • I also closed the cookie banner as it was interfering with clicking the button

Here is the relevant part:

# get rid of the cookie banner
coookie_button = WebDriverWait(driver, 5).until(
    EC.element_to_be_clickable((By.ID, "onetrust-accept-btn-handler"))
)
coookie_button.click()

# Scroll to load more results using JavaScript on the client
prev_height = -1 
max_scrolls = 100
scroll_count = 0
while scroll_count < max_scrolls:
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(1.5)  # give some time for new results to load
    new_height = driver.execute_script("return document.body.scrollHeight")
    if new_height == prev_height:
        # no more elements were loaded
        break
    prev_height = new_height
    scroll_count += 1

# Now click the load more button while there are more results
while True:
    try:
        # choosing a good selector here is a bit tricky as there's
        # nothing reliable but this works at the moment
        load_more_button = WebDriverWait(driver, 5).until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, "[data-results-container=\"1\"] button.af7297d90d.c0e0affd09"))
        )
        load_more_button.click()
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        print("Clicked load more button...")
    except (TimeoutException, NoSuchElementException):
        print("No more results to load.")
        break

Using the code above I was able to extract 981 items for your search query.
The code can be improved, but it works and shows the idea and I think you can improve it further as needed. Hope this helps!

本文标签: How to scroll and click quotLoad more resultsquot using Selenium in Python bookingcomStack Overflow