I'm developing web screen capture app with Node.js & Google Puppeteer. Now I have to capture 38000 pages and most of the functions are works find but it has errors in some points and I don't know where the errors are coming from.
I have two assumptions. First, I use headless option to check the problem and I found that some pages have lots of GIF files so It loads too long so the timeout error shows. Second, the website sometimes loads fail so it shows the error.
Here's my full code
const puppeteer = require("puppeteer");
const fs = require('fs');
let galleryName = "frozen"; // Enter gallery name
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
// Adjustments particular to this page to ensure we hit desktop breakpoint.
page.setViewport({
width: 1000,
height: 10000000,
deviceScaleFactor: 1
});
fs.readFile('db.txt', async function (err, data) {
if (err) throw err;
let array = data.toString().split("\n");
for (i in array) {
console.log(`Now Processing : ${array[i]} | ${array.length - i -1} items left`);
await page.goto(`https://gall.dcinside.com/${galleryName}/${array[i]}`), {
waitUntil: "networkidle2",
// timeout: 0
};
await page.waitForSelector(".view_content_wrap"), {
waitUntil: 'networkidle2'
}
/* ScreenShot Functions */
async function screenshotDOMElement(opts = {}) {
const padding = "padding" in opts ? opts.padding : 0;
const path = "path" in opts ? opts.path : null;
const selector = opts.selector;
if (!selector) throw Error("Please provide a selector.");
const rect = await page.evaluate(selector => {
const element = document.querySelector(selector);
if (!element) return null;
const {
x,
y,
width,
height
} = element.getBoundingClientRect();
return {
left: x,
top: y,
width,
height,
id: element.id
};
}, selector);
if (!rect)
throw Error(
`Could not find element that matches selector: ${selector}.`
);
return await page.screenshot({
path,
clip: {
x: rect.left - padding,
y: rect.top - padding,
width: rect.width,
height: rect.height + padding * 2
}
});
}
await screenshotDOMElement({
path: `./result/${array[i]}.png`,
selector: ".view_content_wrap",
padding: 10
});
}
});
// // await browser.close();
})();
A simple workaround is to override the default timeout value, setting the new value to _0_ and passing a “waitUntil”: “load” parameter in the options object in the Puppeteer goto() method.
To set navigation timeout with Node. js Puppeteer, we can call goto with the timeout option. await page. goto(url, { waitUntil: 'load', timeout: 0 });
The default in puppeteer timeout is 30 seconds. To use custom timeouts, you can use the setDefaultNavigationTimeout and setDefaultTimeout methods or the timeout property in the options parameter. The wait time in all cases is specified in milliseconds.
waitForNavigation() method. Waits for the page to navigate to a new URL or to reload. It is useful when you run code that will indirectly cause the page to navigate.
Before await browser.goto try await page.setDefaultNavigationTimeout(0)
or put { waitUntil: 'load', timeout: 0 }
inside .goto options.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With