admin管理员组

文章数量:1205584

I tried taking a proxy from this site: /en/proxy-list/?type=4#list

Here is my Puppeteer scraping code (deployed to Heroku), which is returning the error in the title on the .goto() line:

const preparePageForTests = async (page) => {

const userAgent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36';

  await page.setUserAgent(userAgent);

  await page.evaluateOnNewDocument(() => {
    Object.defineProperty(navigator, 'webdriver', {
      get: () => false,
    });
  });

  // Pass the Chrome Test.
  await page.evaluateOnNewDocument(() => {
    // We can mock this in as much depth as we need for the test.
    window.navigator.chrome = {
      app: {
        isInstalled: false,
      },
      webstore: {
        onInstallStageChanged: {},
        onDownloadProgress: {},
      },
      runtime: {
        PlatformOs: {
          MAC: 'mac',
          WIN: 'win',
          ANDROID: 'android',
          CROS: 'cros',
          LINUX: 'linux',
          OPENBSD: 'openbsd',
        },
        PlatformArch: {
          ARM: 'arm',
          X86_32: 'x86-32',
          X86_64: 'x86-64',
        },
        PlatformNaclArch: {
          ARM: 'arm',
          X86_32: 'x86-32',
          X86_64: 'x86-64',
        },
        RequestUpdateCheckStatus: {
          THROTTLED: 'throttled',
          NO_UPDATE: 'no_update',
          UPDATE_AVAILABLE: 'update_available',
        },
        OnInstalledReason: {
          INSTALL: 'install',
          UPDATE: 'update',
          CHROME_UPDATE: 'chrome_update',
          SHARED_MODULE_UPDATE: 'shared_module_update',
        },
        OnRestartRequiredReason: {
          APP_UPDATE: 'app_update',
          OS_UPDATE: 'os_update',
          PERIODIC: 'periodic',
        },
      }
    };
  });

  await page.evaluateOnNewDocument(() => {
    const originalQuery = window.navigator.permissions.query;
    return window.navigator.permissions.query = (parameters) => (
      parameters.name === 'notifications' ?
        Promise.resolve({ state: Notification.permission }) :
        originalQuery(parameters)
    );
  });

  await page.evaluateOnNewDocument(() => {
    // Overwrite the `plugins` property to use a custom getter.
    Object.defineProperty(navigator, 'plugins', {
      // This just needs to have `length > 0` for the current test,
      // but we could mock the plugins too if necessary.
      get: () => [1, 2, 3, 4, 5],
    });
  });

  await page.evaluateOnNewDocument(() => {
    // Overwrite the `plugins` property to use a custom getter.
    Object.defineProperty(navigator, 'languages', {
      get: () => ['en-US', 'en'],
    });
  });
}

const browser = await puppeteerExtra.launch({ headless: true, args: [                
'--no-sandbox', '--disable-setuid-sandbox', '--proxy-server=socks4://109.94.182.128:4145']});

const page = await browser.newPage();

await preparePageForTests(page);

await page.goto('+near+new+york&client=safari&rls=en&uact=5&ibp=htl;events&rciv=evn&sa=X&fpstate=tldetail#htivrt=events&htidocid=L2F1dGhvcml0eS9ob3Jpem9uL2NsdXN0ZXJlZF9ldmVudC8yMDIxLTA2LTA0fDIxMjMzMzg4NTU2Nzc1NDk%3D&fpstate=tldetail') 

I also sometimes get an "ERR_CONNECTION_CLOSED" or "ERR_CONNECTION_FAILED" instead of ERR_CONNECTION_RESET.

Any help in getting rid of this error (presumably by adding more ways to pass the google tests in the preparePageForTests function) would be amazing, thank you!

I tried taking a proxy from this site: https://hidemy.name/en/proxy-list/?type=4#list

Here is my Puppeteer scraping code (deployed to Heroku), which is returning the error in the title on the .goto() line:

const preparePageForTests = async (page) => {

const userAgent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36';

  await page.setUserAgent(userAgent);

  await page.evaluateOnNewDocument(() => {
    Object.defineProperty(navigator, 'webdriver', {
      get: () => false,
    });
  });

  // Pass the Chrome Test.
  await page.evaluateOnNewDocument(() => {
    // We can mock this in as much depth as we need for the test.
    window.navigator.chrome = {
      app: {
        isInstalled: false,
      },
      webstore: {
        onInstallStageChanged: {},
        onDownloadProgress: {},
      },
      runtime: {
        PlatformOs: {
          MAC: 'mac',
          WIN: 'win',
          ANDROID: 'android',
          CROS: 'cros',
          LINUX: 'linux',
          OPENBSD: 'openbsd',
        },
        PlatformArch: {
          ARM: 'arm',
          X86_32: 'x86-32',
          X86_64: 'x86-64',
        },
        PlatformNaclArch: {
          ARM: 'arm',
          X86_32: 'x86-32',
          X86_64: 'x86-64',
        },
        RequestUpdateCheckStatus: {
          THROTTLED: 'throttled',
          NO_UPDATE: 'no_update',
          UPDATE_AVAILABLE: 'update_available',
        },
        OnInstalledReason: {
          INSTALL: 'install',
          UPDATE: 'update',
          CHROME_UPDATE: 'chrome_update',
          SHARED_MODULE_UPDATE: 'shared_module_update',
        },
        OnRestartRequiredReason: {
          APP_UPDATE: 'app_update',
          OS_UPDATE: 'os_update',
          PERIODIC: 'periodic',
        },
      }
    };
  });

  await page.evaluateOnNewDocument(() => {
    const originalQuery = window.navigator.permissions.query;
    return window.navigator.permissions.query = (parameters) => (
      parameters.name === 'notifications' ?
        Promise.resolve({ state: Notification.permission }) :
        originalQuery(parameters)
    );
  });

  await page.evaluateOnNewDocument(() => {
    // Overwrite the `plugins` property to use a custom getter.
    Object.defineProperty(navigator, 'plugins', {
      // This just needs to have `length > 0` for the current test,
      // but we could mock the plugins too if necessary.
      get: () => [1, 2, 3, 4, 5],
    });
  });

  await page.evaluateOnNewDocument(() => {
    // Overwrite the `plugins` property to use a custom getter.
    Object.defineProperty(navigator, 'languages', {
      get: () => ['en-US', 'en'],
    });
  });
}

const browser = await puppeteerExtra.launch({ headless: true, args: [                
'--no-sandbox', '--disable-setuid-sandbox', '--proxy-server=socks4://109.94.182.128:4145']});

const page = await browser.newPage();

await preparePageForTests(page);

await page.goto('https://www.google.com/search?q=concerts+near+new+york&client=safari&rls=en&uact=5&ibp=htl;events&rciv=evn&sa=X&fpstate=tldetail#htivrt=events&htidocid=L2F1dGhvcml0eS9ob3Jpem9uL2NsdXN0ZXJlZF9ldmVudC8yMDIxLTA2LTA0fDIxMjMzMzg4NTU2Nzc1NDk%3D&fpstate=tldetail') 

I also sometimes get an "ERR_CONNECTION_CLOSED" or "ERR_CONNECTION_FAILED" instead of ERR_CONNECTION_RESET.

Any help in getting rid of this error (presumably by adding more ways to pass the google tests in the preparePageForTests function) would be amazing, thank you!

Share Improve this question edited Jun 4, 2021 at 13:05 nickcoding2 asked May 30, 2021 at 15:16 nickcoding2nickcoding2 2841 gold badge17 silver badges47 bronze badges 2
  • In my case this is happening randomly using a local nodejs server with puppeteer. – Neil Gaetano Lindberg Commented Jun 2, 2021 at 14:32
  • 1 @NeilGuyLindberg Not sure why that is, and I'm still not sure how to fix mine which isn't a local server... – nickcoding2 Commented Jun 2, 2021 at 20:46
Add a comment  | 

2 Answers 2

Reset to default 12 +150

You're using low-quality public proxies and it's only natural that they will generate network errors and/or be blocked by Google. The simplest solution here is to go for paid ones.

But it's also possible to intercept the error and repeat request if page.open failed:

const collectData = async (page) => {
  try {
    await page.goto('https://www.google.com/search?q=concerts+near+new+york');
    return page.evaluate(() => document.title);
  } catch (err) {
    console.error(err.message);
    return false;
  }
}

let data = false;
let attempts = 0;

// Retry request until it gets data or tries 5 times
while(data === false && attempts < 5)
{
  data = await collectData(page);
  attempts += 1;  
  if (data === false) {
    // Wait a few seconds, also a good idea to swap proxy here*
    await new Promise((resolve) => setTimeout(resolve, 3000));
  }
}


* Modules for changing proxies programmatically:

  • https://www.npmjs.com/package/puppeteer-page-proxy
  • https://www.npmjs.com/package/proxy-chain

You need to await the page.goto("...")

await page.goto("https://google.com", {waitUntil: "networkidle2"});

本文标签: javascriptHow to get around Error netERRCONNECTION in PuppeteerStack Overflow