I am in a situation where new content is created when I scroll down. The new content has a specific class name.
How can I keep scrolling down until all the elements have loaded?
In other words, I want to reach the stage where if I keep scrolling down, nothing new will load.
I was using code to scroll down, coupled with an
await page.waitForSelector('.class_name');
The problem with this approach is that after all the elements have loaded, the code keeps on scrolling down, no new elements are created and eventually I get a timeout error.
This is the code:
await page.evaluate( () => {
window.scrollBy(0, window.innerHeight);
});
await page.waitForSelector('.class_name');
Give this a shot:
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch({
headless: false
});
const page = await browser.newPage();
await page.goto('https://www.yoursite.com');
await page.setViewport({
width: 1200,
height: 800
});
await autoScroll(page);
await page.screenshot({
path: 'yoursite.png',
fullPage: true
});
await browser.close();
})();
async function autoScroll(page){
await page.evaluate(async () => {
await new Promise((resolve) => {
var totalHeight = 0;
var distance = 100;
var timer = setInterval(() => {
var scrollHeight = document.body.scrollHeight;
window.scrollBy(0, distance);
totalHeight += distance;
if(totalHeight >= scrollHeight - window.innerHeight){
clearInterval(timer);
resolve();
}
}, 100);
});
});
}
Source: https://github.com/chenxiaochun/blog/issues/38
EDIT
added window.innerHeight
to the calculation because the available scrolling distance is body height minus viewport height, not the entire body height.
Scrolling down to the bottom of the page can be accomplished in 2 ways:
document.querySelectorAll('.class_name').length
to check whether more content has been generated)Here is an implementation using scrollIntoView
and selector (assuming .class_name
is the selector that we scroll into for more content) in plain JavaScript that we can run in the browser:
Method 1: use scrollIntoView and selectors
const delay = 3000;
const wait = (ms) => new Promise(res => setTimeout(res, ms));
const count = async () => document.querySelectorAll('.class_name').length;
const scrollDown = async () => {
document.querySelector('.class_name:last-child')
.scrollIntoView({ behavior: 'smooth', block: 'end', inline: 'end' });
}
let preCount = 0;
let postCount = 0;
do {
preCount = await count();
await scrollDown();
await wait(delay);
postCount = await count();
} while (postCount > preCount);
await wait(delay);
In this method, we are comparing the # of .class_name
selectors before scrolling (preCount
) vs after scrolling (postCount
) to check whether we are at bottom of page:
if (postCount > precount) {
// NOT bottom of page
} else {
// bottom of page
}
And here are 2 possible implementations using either setTimeout
or setInterval
with scrollBy
in plain JavaScript that we can run in the browser console:
Method 2a: use setTimeout with scrollBy
const distance = 100;
const delay = 100;
while (document.scrollingElement.scrollTop + window.innerHeight < document.scrollingElement.scrollHeight) {
document.scrollingElement.scrollBy(0, distance);
await new Promise(resolve => { setTimeout(resolve, delay); });
}
Method 2b: use setInterval with scrollBy
const distance = 100;
const delay = 100;
const timer = setInterval(() => {
document.scrollingElement.scrollBy(0, distance);
if (document.scrollingElement.scrollTop + window.innerHeight >= document.scrollingElement.scrollHeight) {
clearInterval(timer);
}
}, delay);
In this method, we are comparing document.scrollingElement.scrollTop + window.innerHeight
with document.scrollingElement.scrollHeight
to check whether we are at the bottom of the page:
if (document.scrollingElement.scrollTop + window.innerHeight < document.scrollingElement.scrollHeight) {
// NOT bottom of page
} else {
// bottom of page
}
If either of the JavaScript code above scrolls the page all the way down to the bottom, then we know it is working and we can automate this using Puppeteer.
Here are the sample Puppeteer Node.js scripts that will scroll down to the bottom of the page and wait a few seconds before closing the browser.
Puppeteer Method 1: use scrollIntoView with selector (.class_name
)
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch({
headless: false,
defaultViewport: null,
args: ['--window-size=800,600']
});
const page = await browser.newPage();
await page.goto('https://example.com');
const delay = 3000;
let preCount = 0;
let postCount = 0;
do {
preCount = await getCount(page);
await scrollDown(page);
await page.waitFor(delay);
postCount = await getCount(page);
} while (postCount > preCount);
await page.waitFor(delay);
await browser.close();
})();
async function getCount(page) {
return await page.$$eval('.class_name', a => a.length);
}
async function scrollDown(page) {
await page.$eval('.class_name:last-child', e => {
e.scrollIntoView({ behavior: 'smooth', block: 'end', inline: 'end' });
});
}
Puppeteer Method 2a: use setTimeout with scrollBy
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch({
headless: false,
defaultViewport: null,
args: ['--window-size=800,600']
});
const page = await browser.newPage();
await page.goto('https://example.com');
await scrollToBottom(page);
await page.waitFor(3000);
await browser.close();
})();
async function scrollToBottom(page) {
const distance = 100; // should be less than or equal to window.innerHeight
const delay = 100;
while (await page.evaluate(() => document.scrollingElement.scrollTop + window.innerHeight < document.scrollingElement.scrollHeight)) {
await page.evaluate((y) => { document.scrollingElement.scrollBy(0, y); }, distance);
await page.waitFor(delay);
}
}
Puppeteer Method 2b: use setInterval with scrollBy
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch({
headless: false,
defaultViewport: null,
args: ['--window-size=800,600']
});
const page = await browser.newPage();
await page.goto('https://example.com');
await page.evaluate(scrollToBottom);
await page.waitFor(3000);
await browser.close();
})();
async function scrollToBottom() {
await new Promise(resolve => {
const distance = 100; // should be less than or equal to window.innerHeight
const delay = 100;
const timer = setInterval(() => {
document.scrollingElement.scrollBy(0, distance);
if (document.scrollingElement.scrollTop + window.innerHeight >= document.scrollingElement.scrollHeight) {
clearInterval(timer);
resolve();
}
}, delay);
});
}
Many solutions here assume the page height being constant. This implementation works even if the page height changes (e.g. loading new content as user scrolls down).
await page.evaluate(() => new Promise((resolve) => {
var scrollTop = -1;
const interval = setInterval(() => {
window.scrollBy(0, 100);
if(document.documentElement.scrollTop !== scrollTop) {
scrollTop = document.documentElement.scrollTop;
return;
}
clearInterval(interval);
resolve();
}, 10);
}));
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With