How can I download images on a page using puppeteer?

Tags:

I'm new to web scraping and want to download all images on a webpage using puppeteer:

const puppeteer = require('puppeteer');

let scrape = async () => {
  // Actual Scraping goes Here...

  const browser = await puppeteer.launch({headless: false});
  const page = await browser.newPage();
  await page.goto('https://memeculture69.tumblr.com/');

  //   Right click and save images

};

scrape().then((value) => {
    console.log(value); // Success!
});

I have looked at the API‌ docs but could not figure out how to acheive this. So appreciate your help.

1000

asked Sep 27 '18 17:09

4 Answers

If you want to skip the manual dom traversal you can write the images to disk directly from the page response.

Example:

const puppeteer = require('puppeteer');
const fs = require('fs');
const path = require('path');

(async () => {
    const browser = await puppeteer.launch();
    const page = await browser.newPage();
    page.on('response', async response => {
        const url = response.url();
        if (response.request().resourceType() === 'image') {
            response.buffer().then(file => {
                const fileName = url.split('/').pop();
                const filePath = path.resolve(__dirname, fileName);
                const writeStream = fs.createWriteStream(filePath);
                writeStream.write(file);
            });
        }
    });
    await page.goto('https://memeculture69.tumblr.com/');
    await browser.close();
})();

See the documentation for page.on and for the HTTPResponse object that you get from page.on('response', ...).

answered Oct 16 '22 15:10

Ben Adam

Here is another example. It goes to a generic search in google and downloads the google image at the top left.

const puppeteer = require('puppeteer');
const fs = require('fs');

async function run() {
    const browser = await puppeteer.launch({
        headless: false
    });
    const page = await browser.newPage();
    await page.setViewport({ width: 1200, height: 1200 });
    await page.goto('https://www.google.com/search?q=.net+core&rlz=1C1GGRV_enUS785US785&oq=.net+core&aqs=chrome..69i57j69i60l3j69i65j69i60.999j0j7&sourceid=chrome&ie=UTF-8');

    const IMAGE_SELECTOR = '#tsf > div:nth-child(2) > div > div.logo > a > img';
    let imageHref = await page.evaluate((sel) => {
        return document.querySelector(sel).getAttribute('src').replace('/', '');
    }, IMAGE_SELECTOR);

    console.log("https://www.google.com/" + imageHref);
    var viewSource = await page.goto("https://www.google.com/" + imageHref);
    fs.writeFile(".googles-20th-birthday-us-5142672481189888-s.png", await viewSource.buffer(), function (err) {
    if (err) {
        return console.log(err);
    }

    console.log("The file was saved!");
});

    browser.close();
}

run();

If you have a list of images you want to download then you could change the selector to programatically change as needed and go down the list of images downloading them one at a time.

answered Oct 16 '22 14:10

The logic is simple i think. You just need to make a function which will take url of image and save it to your directory. The puppeteer will just scrape the image url and pass it to downloader function. Here is an example:

const puppeteer = require('puppeteer');
const fs = require('fs');
const request = require('request');

//  This is main download function which takes the url of your image
function download(uri, filename) {
  return new Promise((resolve, reject) => {
    request.head(uri, function (err, res, body) {
      request(uri).pipe(fs.createWriteStream(filename)).on('close', resolve);
    });
  });
}

let main = async () => {
  const browser = await puppeteer.launch({ headless: false });
  const page = await browser.newPage();
  await page.goto('https://memeculture69.tumblr.com/');
  await page.waitFor(1000);
  const imageUrl = await page.evaluate(
    // here we got the image url from the selector.
    () => document.querySelector('img.image')
  );
  // Now just simply pass the image url
  // to the downloader function to download  the image.
  await download(imageUrl, 'image.png');
};

main();

answered Oct 16 '22 15:10

Naimur Rahman

Donate For Us

If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!

Donate Us With

How can I download images on a page using puppeteer?

Tags:

javascript

web-scraping

puppeteer

google-chrome-headless

supermario

People also ask

4 Answers

Ben Adam

Braden Brown

Grant Miller

Naimur Rahman

Recent Activity

Donate For Us

How can I download images on a page using puppeteer?

Tags:

javascript

web-scraping

puppeteer

google-chrome-headless

supermario

People also ask

4 Answers

Ben Adam

Braden Brown

Grant Miller

Naimur Rahman

Related questions

Recent Activity

Donate For Us