admin管理员组

文章数量:1332361

I created a data scraping script using Python and it is working fine. My issue now is when I try to run it on multiple instance it stops on after 25 mins. how to handle multi instance of chromium browser and ensure its not gonna crash? Also how to handle slow response of site? I notice that it usually happen when the site is performing too slow below is my code (Note: indents might be due to copy/paste)

# Function to open the browser and process each parcel id. dirs are array of parcel ids

def executeTask(dirs):
   stepid = 0
   chrome_options = webdriver.ChromeOptions()
# chrome_options.add_argument( '--disable-blink-features=AutomationControlled' )
#  chrome_options.add_argument( '--headless' )
#  chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
# chrome_options.add_experimental_option("useAutomationExtension", False)
   driver = webdriver.Chrome(options=chrome_options)

   print("Step success. if a new Chrome Browser Pop Up, you can proceed to next step")

   isSearchPage = 0
   driver.get(f";)

   try:
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.ID, 'PropertySearch'))
        )
        print("page loaded successfully")
   except TimeoutException:
        print("Page was not able to load.")
        driver.quit()
        return 0

  for index, dir in enumerate(dirs):
    dir = dir.strip()
    if len(dir) <= 8:
        dir = dir + "."

    if index == 0:
                time.sleep(2)
                actions = ActionChains(driver)
                actions.move_by_offset(0, 0).click().perform()
                actions.move_by_offset(0, 0).click().perform()

    page_source = driver.page_source
    soup = BeautifulSoup(page_source, 'lxml')
    driver.execute_script(formula)

    set_value_script = f"$('#xxxPARCEL_ID').val('{dir}');"
    driver.execute_script(set_value_script)
    
    submit_script = "$('#btParcelSearches').click();"
    driver.execute_script(submit_script)
    
    try:
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.ID, 'back2Top'))
        )
        
        page_source = driver.page_source
        soup = BeautifulSoup(page_source, 'lxml')
        getPageDetails(soup,dir)
    except TimeoutException:
        print("Loading took too much time! The body element was not found.")
 
            


driver.close()
return "Ok"

Then this is the code for multithreading:

instanceCount = 3
parcelIds = getAllParcelToFind()
print(f"Parcel ID Count: {len(parcelIds)}")

arr = dataSplit(parcelIds,instanceCount)


with concurrent.futures.ThreadPoolExecutor() as executor:
    # Map processData to each item in array_container
    results = list(executor.map(executeTask, arr))

# Print the results
    print("Results:", results)

Thanks in advance.

Ant

本文标签: Selenium Chromium browser stop on multi instance using PythonStack Overflow