I'm using puppeteer-extra
and node.js to iterate accross multiple urls.
I'm trying to intercept some resourceType to load upon each iteration, and getting the following error.
PS C:\Users\someuser\Desktop\Project> node temp.js
-- running
C:\Users\someuser\node_modules\puppeteer\lib\cjs\puppeteer\common\assert.js:26
throw new Error(message);
^
Error: Request is already handled!
at Object.exports.assert (C:\Users\someuser\node_modules\puppeteer\lib\cjs\puppeteer\common\assert.js:26:15)
at HTTPRequest.continue (C:\Users\someuser\node_modules\puppeteer\lib\cjs\puppeteer\common\HTTPRequest.js:217:21)
at PuppeteerBlocker.onRequest (C:\Users\someuser\node_modules\@cliqz\adblocker-puppeteer\dist\cjs\adblocker.js:225:33)
at BlockingContext.onRequest (C:\Users\someuser\node_modules\@cliqz\adblocker-puppeteer\dist\cjs\adblocker.js:64:47)
at C:\Users\someuser\node_modules\puppeteer\lib\cjs\vendor\mitt\src\index.js:51:62
at Array.map (<anonymous>)
at Object.emit (C:\Users\someuser\node_modules\puppeteer\lib\cjs\vendor\mitt\src\index.js:51:43)
at Page.emit (C:\Users\someuser\node_modules\puppeteer\lib\cjs\puppeteer\common\EventEmitter.js:72:22)
at C:\Users\someuser\node_modules\puppeteer\lib\cjs\puppeteer\common\Page.js:143:100
at C:\Users\someuser\node_modules\puppeteer\lib\cjs\vendor\mitt\src\index.js:51:62
I'm having trouble understanding why the request would be already handled as the actual request page.goto
is done while in the for
loop. Would anyone one have any hints?
Here is the full project
const puppeteer = require( 'puppeteer-extra' );
const StealthPlugin = require( 'puppeteer-extra-plugin-stealth' );
puppeteer.use( StealthPlugin() );
const AdblockerPlugin = require( 'puppeteer-extra-plugin-adblocker' );
puppeteer.use( AdblockerPlugin( { blockTrackers: true } ) );
puppeteer.launch( { headless: true } ).then( async browser => {
console.log( '--\xa0running' );
console.time( '--\xa0process' );
const page = await browser.newPage();
await page.setRequestInterception( true );
page.on( 'request', ( request ) => {
if ( [ 'image', 'stylesheet', 'font', 'script' ].indexOf( request.resourceType() ) ) {
request.abort();
} else {
request.continue();
};
} );
for ( var i = 1; i <= 20; i++ ) {
console.time( '--\xa0iteration\xa0' + i ); // ... timer start
await page.goto( 'https://www.someurl.it/shop/s%2D' + i, { waitUntil: 'load' } );
const title = await page.title();
console.log( title.includes( '404' ) ? false : title );
console.timeEnd( '--\xa0iteration\xa0' + i ); // ... timer end
};
await browser.close();
console.timeEnd( '--\xa0process' );
console.log( '--\xa0ending' );
} );
Adding a return statement solved the issue on my end.
page.on( 'request', ( request ) => {
if ([ 'image', 'stylesheet', 'font', 'script' ].indexOf( request.resourceType() ) !== -1 ) {
return request.abort();
}
request.continue();
} );
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With