admin管理员组

文章数量:1295912

I'm trying with this code to get the response body from a website using puppeteer.

#!/usr/bin/env node

require('dotenv').config();
const puppeteer = require('puppeteer');
const readline = require('readline').createInterface({
    input: process.stdin,
    output: process.stdout
});
const path = require('path');
const fs = require('fs');

//
console.log('Starting Puppeteer...');

let responseBody = [];

(async () => {
    const browser = await puppeteer.launch({
        headless: false,
        executablePath: '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome'
    });
    const page = await browser.newPage();
    
    await page.setRequestInterception(true);

    page.on('request', (request) => {
        request.continue();
    });

    //
    page.on('requestfinished', async (request) => {
        const response =  await request.response();
        const url = response.url();
        // store chunks url
        if( url.startsWith('/') ){
            console.log(await response.buffer());
            //responseBody.push(response.buffer());
        }
    });

    //
    await page.goto('', {
        waitUntil: ['load', 'networkidle2']
    });

    const emailField = await page.waitForSelector('#login-username', {timeout: 3000});
    await emailField.type(process.env.EMAIL, {delay: 100});

    const passwordField = await page.waitForSelector('#login-password', {timeout: 3000});
    await passwordField.type(process.env.PASSWORD, {delay: 100});

    const submitButton = await page.waitForSelector('#login-button', {timeout: 3000});
    await submitButton.click();
    
    //
    const navigation = await page.waitForNavigation({ waitUntil: ['load', 'networkidle2'] });
    
    //if( navigation.url().endsWith('status') ){
    await page.goto('', { 
        waitUntil: ['load', 'networkidle2'] 
    }).then( async (response) => {
        //console.log(response);
        const cookieButton = await page.$('#onetrust-accept-btn-handler');
        await cookieButton.click();
        const searchField = await page.$('[data-testid="search-input"]');
        await readline.question('What track do you want to search for?', (input) => {
            console.log('answer:', input);
            searchField.type(input).then( async () => {
                await page.waitForXPath('//*[@id="searchPage"]/div/div/section[1]/div[2]/div/div/div/div[4]').then( async (element) => {
                    element.focus().then( async () => {
                        // //*[@id="searchPage"]/div/div/section[1]/div[2]/div/div/div/div[3]/button
                        const playButton = await page.waitForXPath('//*[@id="searchPage"]/div/div/section[1]/div[2]/div/div/div/div[3]/button');
                        await playButton.click();
                    });
                });
            });
        });
    });
    
    
    //}

})();

I'm having problem with it and this error will be logged and the script will terminate.

/Users/dev/Desktop/test/node_modules/puppeteer/lib/cjs/puppeteer/mon/Connection.js:208
            this._callbacks.set(id, { resolve, reject, error: new Error(), method });
                                                              ^

Error: Protocol error (Network.getResponseBody): No resource with given identifier found
    at /Users/dev/Desktop/test/node_modules/puppeteer/lib/cjs/puppeteer/mon/Connection.js:208:63
    at new Promise (<anonymous>)
    at CDPSession.send (/Users/dev/Desktop/test/node_modules/puppeteer/lib/cjs/puppeteer/mon/Connection.js:207:16)
    at /Users/dev/Desktop/test/node_modules/puppeteer/lib/cjs/puppeteer/mon/HTTPResponse.js:99:53
    at runMicrotasks (<anonymous>)
    at processTicksAndRejections (node:internal/process/task_queues:93:5)
    at async /Users/dev/Desktop/test/index.js:40:25

I need to collect all the response body content when a certain url is called, then using ffmpeg I want to convert it back to a full length track. How I can solve the problem? Is it possible to get the response body of each request and then join all together?

I'm trying with this code to get the response body from a website using puppeteer.

#!/usr/bin/env node

require('dotenv').config();
const puppeteer = require('puppeteer');
const readline = require('readline').createInterface({
    input: process.stdin,
    output: process.stdout
});
const path = require('path');
const fs = require('fs');

//
console.log('Starting Puppeteer...');

let responseBody = [];

(async () => {
    const browser = await puppeteer.launch({
        headless: false,
        executablePath: '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome'
    });
    const page = await browser.newPage();
    
    await page.setRequestInterception(true);

    page.on('request', (request) => {
        request.continue();
    });

    //
    page.on('requestfinished', async (request) => {
        const response =  await request.response();
        const url = response.url();
        // store chunks url
        if( url.startsWith('https://audio-akp-quic-control-examplecdn-.akamaized/audio/') ){
            console.log(await response.buffer());
            //responseBody.push(response.buffer());
        }
    });

    //
    await page.goto('https://accounts.examplecdn./login', {
        waitUntil: ['load', 'networkidle2']
    });

    const emailField = await page.waitForSelector('#login-username', {timeout: 3000});
    await emailField.type(process.env.EMAIL, {delay: 100});

    const passwordField = await page.waitForSelector('#login-password', {timeout: 3000});
    await passwordField.type(process.env.PASSWORD, {delay: 100});

    const submitButton = await page.waitForSelector('#login-button', {timeout: 3000});
    await submitButton.click();
    
    //
    const navigation = await page.waitForNavigation({ waitUntil: ['load', 'networkidle2'] });
    
    //if( navigation.url().endsWith('status') ){
    await page.goto('https://example.cdn./search', { 
        waitUntil: ['load', 'networkidle2'] 
    }).then( async (response) => {
        //console.log(response);
        const cookieButton = await page.$('#onetrust-accept-btn-handler');
        await cookieButton.click();
        const searchField = await page.$('[data-testid="search-input"]');
        await readline.question('What track do you want to search for?', (input) => {
            console.log('answer:', input);
            searchField.type(input).then( async () => {
                await page.waitForXPath('//*[@id="searchPage"]/div/div/section[1]/div[2]/div/div/div/div[4]').then( async (element) => {
                    element.focus().then( async () => {
                        // //*[@id="searchPage"]/div/div/section[1]/div[2]/div/div/div/div[3]/button
                        const playButton = await page.waitForXPath('//*[@id="searchPage"]/div/div/section[1]/div[2]/div/div/div/div[3]/button');
                        await playButton.click();
                    });
                });
            });
        });
    });
    
    
    //}

})();

I'm having problem with it and this error will be logged and the script will terminate.

/Users/dev/Desktop/test/node_modules/puppeteer/lib/cjs/puppeteer/mon/Connection.js:208
            this._callbacks.set(id, { resolve, reject, error: new Error(), method });
                                                              ^

Error: Protocol error (Network.getResponseBody): No resource with given identifier found
    at /Users/dev/Desktop/test/node_modules/puppeteer/lib/cjs/puppeteer/mon/Connection.js:208:63
    at new Promise (<anonymous>)
    at CDPSession.send (/Users/dev/Desktop/test/node_modules/puppeteer/lib/cjs/puppeteer/mon/Connection.js:207:16)
    at /Users/dev/Desktop/test/node_modules/puppeteer/lib/cjs/puppeteer/mon/HTTPResponse.js:99:53
    at runMicrotasks (<anonymous>)
    at processTicksAndRejections (node:internal/process/task_queues:93:5)
    at async /Users/dev/Desktop/test/index.js:40:25

I need to collect all the response body content when a certain url is called, then using ffmpeg I want to convert it back to a full length track. How I can solve the problem? Is it possible to get the response body of each request and then join all together?

Share Improve this question edited Oct 23, 2023 at 15:51 ggorlen 57.3k8 gold badges110 silver badges154 bronze badges asked Feb 26, 2021 at 17:45 newbiedevnewbiedev 3,5964 gold badges28 silver badges82 bronze badges 0
Add a ment  | 

2 Answers 2

Reset to default 2

The error No resource with given identifier found will happen when the page navigated to another URL before you plete getting the content of network response. It should be caused by redirecting, JS history API, and so on.

Thus, you can do either:

  • Stop browser to move to other pages before the response is processed.
  • Use Firefox. Firefox doesn't have this issue and Chrome team won't fix the issue.

ref(in Japanese): https://happy-nap.hatenablog./entry/2023/04/15/081747

"No resource with given identifier found" (and in recent Puppeteer versions, including ^21.2.1, "ProtocolError: Could not load body for this request. This might happen if the request is a preflight request.") is caused by a race condition, which typically occurs when you forget to await a promise, resulting in a navigation interleaving with response handling.

There are many issues and antipatterns here, some of which cause race conditions. A couple of your .then callbacks never return anything. For example:

element.focus().then(...

should be

return element.focus().then(...

The following pattern is incorrect:

await readline.question('What track do you want to search for?', (input) => {

Asynchronous functions typically either return a promise or accept a callback, not both. The await tricks you into thinking you're keeping this in the promise chain, when you're actually awaiting undefined. The actual "promise" is the callback.

Almost always, never mix await and then. The point of promises is to flatten out code so you can write it in a synchronous style. If you find you have many layers of nested callbacks or .then(async () => ..., a red flag should go off and the chances you've failed to handle an error or abandoned a promise chain increase.

If you need to promisify a callback, you can:

const question = prompt =>
  new Promise(resolve =>
    readline.question(prompt, response => resolve(response))
  );

Now you can use it in your code "synchronously" like:

const input = await question("What track do you want to search for?");

There's also Node's utils.promisify which does more or less the same operation mechanically.

I can't run your code without the username and password, but if you remove all thens (yes, every last one!), await everything in a single promise chain and promisify any callback-based asynchronous functions, you should be able to avoid this error.

I also suggest avoiding those long, rigid, browser-generated XPaths. They make too many assumptions about the structure that can easily fail, and there are almost always more robust selectors or paths you can use.

Taking a step back, I suggest coding slowly and running the code at each step so you can verify each assumption along the way. In doing so, you can minimize problems and tackle them immediately and avoid a chaotic, plex situation with multiple issues that are difficult to debug all at once.

See this answer for a minimal, reproducible example of working code that avoids this error (the question it's attached to is also non-reproducible, unfortunately). The linked answer is in Playwright, but the same promise issue and solution applies equally to Puppeteer.

See also:

  • Network.getResponseBody fails for some pages (No resource with given identifier found undefined) #2258, which has a minimal reproduction of the error
  • Could not get response body by using response.text(), response.buffer() or response.json() methods #2176
  • Error: Protocol error (Network.getResponseBody): No resource with given identifier found #4992

Disclosure: I'm the author of the linked blog post.

本文标签: