admin管理员组

文章数量:1208155

import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdrivermon.keys import Keys
from selenium.webdrivermon.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
import time

def checkLinkedIn(command):
    url = f"/{command}"
    path = "C:\Program Files (x86)\chromedriver.exe"
    options = Options()
    options.add_argument("--headless")
    driver = webdriver.Chrome(path, options=options)
    driver.get(url)
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    time.sleep(2)
    driver.quit()
    name = soup.find("h1", attrs={"class": "top-card-layout__title"})
    if name:
        print("LinkedIn profile found")
        print(url)
    else:
        print("No LinkedIn profile found")

def checkTwitter(command):
    url = f"/{command}"
    path = "C:\Program Files (x86)\chromedriver.exe"
    options = Options()
    options.add_argument("--headless")
    driver = webdriver.Chrome(path, options=options)
    driver.get(url)
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    time.sleep(2)
    driver.quit()
    at_tag = soup.find("div", attrs={"dir": "ltr"})
    print(soup.text)
    if at_tag:
        print("Twitter profile found")
        print(url)
    else:
        print("No Twitter profile found")

usrname = input("--> ")

checkTwitter(usrname)

The LinkedIn function works. However, the Twitter one comes up with this:

JavaScript is not available. We’ve detected that JavaScript is disabled in this browser. Please enable JavaScript or switch to a supported browser to continue using twitter. You can see a list of supported browsers in our Help Centre.

How do I enable Javascript in a headless Chrome? Thanks in advance.

import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
import time

def checkLinkedIn(command):
    url = f"https://www.linkedin.com/in/{command}"
    path = "C:\Program Files (x86)\chromedriver.exe"
    options = Options()
    options.add_argument("--headless")
    driver = webdriver.Chrome(path, options=options)
    driver.get(url)
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    time.sleep(2)
    driver.quit()
    name = soup.find("h1", attrs={"class": "top-card-layout__title"})
    if name:
        print("LinkedIn profile found")
        print(url)
    else:
        print("No LinkedIn profile found")

def checkTwitter(command):
    url = f"https://www.twitter.com/{command}"
    path = "C:\Program Files (x86)\chromedriver.exe"
    options = Options()
    options.add_argument("--headless")
    driver = webdriver.Chrome(path, options=options)
    driver.get(url)
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    time.sleep(2)
    driver.quit()
    at_tag = soup.find("div", attrs={"dir": "ltr"})
    print(soup.text)
    if at_tag:
        print("Twitter profile found")
        print(url)
    else:
        print("No Twitter profile found")

usrname = input("--> ")

checkTwitter(usrname)

The LinkedIn function works. However, the Twitter one comes up with this:

JavaScript is not available. We’ve detected that JavaScript is disabled in this browser. Please enable JavaScript or switch to a supported browser to continue using twitter.com. You can see a list of supported browsers in our Help Centre.

How do I enable Javascript in a headless Chrome? Thanks in advance.

Share Improve this question asked May 20, 2021 at 8:54 TomTom 4501 gold badge4 silver badges11 bronze badges
Add a comment  | 

2 Answers 2

Reset to default 21

This maybe because the website detects it's a headless browser and disables some features.

To get around it you can spoof (as much as possible) the identity of the headless browser to trick the website.

Try the following options:

from fake_useragent import UserAgent

options = webdriver.ChromeOptions()

options.add_argument('--headless')
options.add_argument("--incognito")
options.add_argument("--nogpu")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1280,1280")
options.add_argument("--no-sandbox")
options.add_argument("--enable-javascript")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
options.add_argument('--disable-blink-features=AutomationControlled')

ua = UserAgent()
userAgent = ua.random

driver = webdriver.Chrome(options=options)
driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
driver.execute_cdp_cmd('Network.setUserAgentOverride', {"userAgent": userAgent})

This worked for me with a particular stubborn website. The options I gathered from many SO answers but particularly this one: https://stackoverflow.com/a/53040904/5339857

use

options.add_argument("--enable-javascript")

本文标签: pythonHow to enable JavaScript with headless Chrome in seleniumStack Overflow