Puppeteer makes it straightforward to download images from web pages. Whether you're building a scraper, archiving content, or collecting data for analysis, this guide covers everything you need to know.

Basic setup

First, install Puppeteer in your project:

npm install puppeteer

Here's the minimal code to get started:

const puppeteer = require('puppeteer');
const fs = require('fs');
const path = require('path');

async function downloadImages(url) {
  const browser = await puppeteer.launch();
  const page = await browser.newPage();
  await page.goto(url, { waitUntil: 'networkidle0' });

  // Get all image URLs from the page
  const imageUrls = await page.evaluate(() => {
    const images = document.querySelectorAll('img');
    return Array.from(images)
      .map(img => img.src)
      .filter(src => src && src.startsWith('http'));
  });

  console.log(`Found ${imageUrls.length} images`);

  await browser.close();
  return imageUrls;
}

downloadImages('https://example.com');

Downloading the images

Finding image URLs is only half the battle. You also need to download them. Here's a complete solution:

const puppeteer = require('puppeteer');
const fs = require('fs');
const path = require('path');
const https = require('https');
const http = require('http');

async function downloadImage(url, filepath) {
  return new Promise((resolve, reject) => {
    const protocol = url.startsWith('https') ? https : http;

    const file = fs.createWriteStream(filepath);
    protocol.get(url, (response) => {
      // Handle redirects
      if (response.statusCode === 301 || response.statusCode === 302) {
        downloadImage(response.headers.location, filepath)
          .then(resolve)
          .catch(reject);
        return;
      }

      response.pipe(file);
      file.on('finish', () => {
        file.close();
        resolve(filepath);
      });
    }).on('error', (err) => {
      fs.unlink(filepath, () => {}); // Delete partial file
      reject(err);
    });
  });
}

async function scrapeImages(url, outputDir = './images') {
  // Create output directory
  if (!fs.existsSync(outputDir)) {
    fs.mkdirSync(outputDir, { recursive: true });
  }

  const browser = await puppeteer.launch();
  const page = await browser.newPage();

  // Set a realistic user agent
  await page.setUserAgent(
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
  );

  await page.goto(url, { waitUntil: 'networkidle0' });

  // Extract all image sources
  const imageUrls = await page.evaluate(() => {
    const images = document.querySelectorAll('img');
    return Array.from(images)
      .map(img => img.src || img.dataset.src) // Handle lazy-loaded images
      .filter(src => src && src.startsWith('http'));
  });

  console.log(`Found ${imageUrls.length} images`);

  // Download each image
  for (let i = 0; i < imageUrls.length; i++) {
    const imageUrl = imageUrls[i];
    const extension = path.extname(new URL(imageUrl).pathname) || '.jpg';
    const filename = `image-${i + 1}${extension}`;
    const filepath = path.join(outputDir, filename);

    try {
      await downloadImage(imageUrl, filepath);
      console.log(`Downloaded: ${filename}`);
    } catch (err) {
      console.error(`Failed to download ${imageUrl}: ${err.message}`);
    }
  }

  await browser.close();
  console.log('Done!');
}

scrapeImages('https://example.com');

Handling lazy-loaded images

Many modern websites use lazy loading. Images only load as you scroll down the page. To capture these, you need to scroll first:

async function scrollToBottom(page) {
  await page.evaluate(async () => {
    await new Promise((resolve) => {
      let totalHeight = 0;
      const distance = 500;
      const timer = setInterval(() => {
        const scrollHeight = document.body.scrollHeight;
        window.scrollBy(0, distance);
        totalHeight += distance;

        if (totalHeight >= scrollHeight) {
          clearInterval(timer);
          resolve();
        }
      }, 100);
    });
  });
}

// Use it before extracting images
await page.goto(url, { waitUntil: 'networkidle0' });
await scrollToBottom(page);
// Wait for lazy images to load
await page.waitForTimeout(2000);

Some sites use data-src or data-lazy-src attributes for lazy images. Make sure to check these attributes in addition to src.

Getting background images

Not all images are in <img> tags. Some are CSS background images:

const allImageUrls = await page.evaluate(() => {
  const urls = new Set();

  // Regular img tags
  document.querySelectorAll('img').forEach(img => {
    if (img.src) urls.add(img.src);
    if (img.dataset.src) urls.add(img.dataset.src);
  });

  // Background images
  document.querySelectorAll('*').forEach(el => {
    const style = window.getComputedStyle(el);
    const bg = style.backgroundImage;
    if (bg && bg !== 'none') {
      const match = bg.match(/url\(["']?(.+?)["']?\)/);
      if (match && match[1]) {
        urls.add(match[1]);
      }
    }
  });

  return Array.from(urls).filter(url => url.startsWith('http'));
});

Filtering images by size

Small images are often icons or tracking pixels. Filter them out:

const imageUrls = await page.evaluate(() => {
  const images = document.querySelectorAll('img');
  return Array.from(images)
    .filter(img => img.naturalWidth > 100 && img.naturalHeight > 100)
    .map(img => ({
      src: img.src,
      width: img.naturalWidth,
      height: img.naturalHeight
    }));
});

Intercepting network requests

For more control, intercept image requests directly:

async function interceptImages(url, outputDir = './images') {
  const browser = await puppeteer.launch();
  const page = await browser.newPage();

  const downloadedUrls = new Set();
  let imageCount = 0;

  // Enable request interception
  await page.setRequestInterception(true);

  page.on('request', request => {
    request.continue();
  });

  page.on('response', async response => {
    const contentType = response.headers()['content-type'] || '';
    const url = response.url();

    if (contentType.startsWith('image/') && !downloadedUrls.has(url)) {
      downloadedUrls.add(url);

      try {
        const buffer = await response.buffer();
        const extension = contentType.split('/')[1].split(';')[0];
        const filename = `image-${++imageCount}.${extension}`;
        fs.writeFileSync(path.join(outputDir, filename), buffer);
        console.log(`Saved: ${filename}`);
      } catch (err) {
        // Response body may not be available
      }
    }
  });

  await page.goto(url, { waitUntil: 'networkidle0' });
  await browser.close();
}

This method captures images as they're loaded by the browser, including those loaded via JavaScript.

Always respect robots.txt and terms of service when scraping. Some websites explicitly prohibit automated downloading of their content.

Handling authentication

For pages behind a login:

async function scrapeAuthenticatedPage(url) {
  const browser = await puppeteer.launch({ headless: false });
  const page = await browser.newPage();

  // Navigate to login page
  await page.goto('https://example.com/login');

  // Fill in credentials
  await page.type('#username', 'your-username');
  await page.type('#password', 'your-password');
  await page.click('#login-button');

  // Wait for navigation after login
  await page.waitForNavigation();

  // Now scrape the authenticated page
  await page.goto(url);
  // ... extract images
}

Rate limiting

Be respectful to the servers you're scraping:

function delay(ms) {
  return new Promise(resolve => setTimeout(resolve, ms));
}

// Add delays between downloads
for (const imageUrl of imageUrls) {
  await downloadImage(imageUrl, filepath);
  await delay(500); // Wait 500ms between downloads
}

Complete example

Here's a production-ready script that combines everything:

const puppeteer = require('puppeteer');
const fs = require('fs');
const path = require('path');
const https = require('https');
const http = require('http');

const config = {
  outputDir: './downloaded-images',
  minWidth: 100,
  minHeight: 100,
  delayBetweenDownloads: 300,
  scrollPage: true,
  includeBackgroundImages: true
};

async function downloadImage(url, filepath) {
  return new Promise((resolve, reject) => {
    const protocol = url.startsWith('https') ? https : http;
    const file = fs.createWriteStream(filepath);

    protocol.get(url, {
      headers: { 'User-Agent': 'Mozilla/5.0' }
    }, (response) => {
      if (response.statusCode === 301 || response.statusCode === 302) {
        file.close();
        fs.unlinkSync(filepath);
        downloadImage(response.headers.location, filepath)
          .then(resolve).catch(reject);
        return;
      }
      if (response.statusCode !== 200) {
        reject(new Error(`HTTP ${response.statusCode}`));
        return;
      }
      response.pipe(file);
      file.on('finish', () => { file.close(); resolve(filepath); });
    }).on('error', reject);
  });
}

async function scrapeImages(url) {
  if (!fs.existsSync(config.outputDir)) {
    fs.mkdirSync(config.outputDir, { recursive: true });
  }

  const browser = await puppeteer.launch();
  const page = await browser.newPage();
  await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64)');
  await page.goto(url, { waitUntil: 'networkidle0' });

  if (config.scrollPage) {
    await autoScroll(page);
    await new Promise(r => setTimeout(r, 2000));
  }

  const imageUrls = await page.evaluate((cfg) => {
    const urls = new Set();

    document.querySelectorAll('img').forEach(img => {
      if (img.naturalWidth >= cfg.minWidth &&
          img.naturalHeight >= cfg.minHeight) {
        urls.add(img.src || img.dataset.src);
      }
    });

    if (cfg.includeBackgroundImages) {
      document.querySelectorAll('*').forEach(el => {
        const bg = getComputedStyle(el).backgroundImage;
        const match = bg?.match(/url\(["']?(.+?)["']?\)/);
        if (match?.[1]) urls.add(match[1]);
      });
    }

    return [...urls].filter(u => u?.startsWith('http'));
  }, config);

  console.log(`Found ${imageUrls.length} images`);

  let downloaded = 0;
  for (const imageUrl of imageUrls) {
    const ext = path.extname(new URL(imageUrl).pathname) || '.jpg';
    const filepath = path.join(config.outputDir, `image-${++downloaded}${ext}`);

    try {
      await downloadImage(imageUrl, filepath);
      console.log(`[${downloaded}/${imageUrls.length}] Downloaded`);
      await new Promise(r => setTimeout(r, config.delayBetweenDownloads));
    } catch (err) {
      console.error(`Failed: ${err.message}`);
    }
  }

  await browser.close();
  console.log(`\nComplete! ${downloaded} images saved to ${config.outputDir}`);
}

async function autoScroll(page) {
  await page.evaluate(async () => {
    await new Promise(resolve => {
      let total = 0;
      const timer = setInterval(() => {
        window.scrollBy(0, 500);
        total += 500;
        if (total >= document.body.scrollHeight) {
          clearInterval(timer);
          resolve();
        }
      }, 100);
    });
  });
}

// Run it
scrapeImages('https://example.com');

Taking screenshots of image elements

Sometimes you want to capture images as they appear on the page rather than downloading the source file. This is useful when images have CSS effects, overlays, or when the source is protected. Puppeteer lets you take screenshots of specific elements:

async function screenshotImages(url, outputDir = './screenshots') {
  if (!fs.existsSync(outputDir)) {
    fs.mkdirSync(outputDir, { recursive: true });
  }

  const browser = await puppeteer.launch();
  const page = await browser.newPage();
  await page.goto(url, { waitUntil: 'networkidle0' });

  // Wait for images to be present
  await page.waitForSelector('img');

  // Get all image elements
  const images = await page.$$('img');
  console.log(`Found ${images.length} image elements`);

  for (let i = 0; i < images.length; i++) {
    const image = images[i];

    // Check if the image is visible and has dimensions
    const isVisible = await image.evaluate(el => {
      const rect = el.getBoundingClientRect();
      return rect.width > 0 && rect.height > 0;
    });

    if (isVisible) {
      try {
        await image.screenshot({
          path: path.join(outputDir, `element-${i + 1}.png`),
        });
        console.log(`Screenshot saved: element-${i + 1}.png`);
      } catch (err) {
        console.error(`Failed to screenshot element ${i + 1}: ${err.message}`);
      }
    }
  }

  await browser.close();
  console.log('Done!');
}

screenshotImages('https://example.com');

You can also filter by size and capture only larger images:

async function screenshotLargeImages(url, outputDir = './screenshots') {
  if (!fs.existsSync(outputDir)) {
    fs.mkdirSync(outputDir, { recursive: true });
  }

  const browser = await puppeteer.launch();
  const page = await browser.newPage();
  await page.goto(url, { waitUntil: 'networkidle0' });

  await page.waitForSelector('img');

  const images = await page.$$('img');
  let savedCount = 0;

  for (const image of images) {
    const dimensions = await image.evaluate(el => ({
      width: el.naturalWidth,
      height: el.naturalHeight,
      visible: el.getBoundingClientRect().width > 0
    }));

    // Only screenshot images larger than 100x100
    if (dimensions.width > 100 && dimensions.height > 100 && dimensions.visible) {
      await image.screenshot({
        path: path.join(outputDir, `image-${++savedCount}.png`),
      });
      console.log(`Saved image ${savedCount} (${dimensions.width}x${dimensions.height})`);
    }
  }

  await browser.close();
  console.log(`Saved ${savedCount} image screenshots`);
}

Element screenshots capture exactly what's rendered, including any CSS filters, borders, shadows, or transformations applied to the image.

When to use a screenshot API instead

Puppeteer is powerful but requires you to manage browser instances, handle edge cases, and maintain infrastructure. For simpler use cases, or when you need reliability at scale, a screenshot API might be a better choice.

With allscreenshots, you can capture full-page screenshots without managing browsers:

curl -X POST 'https://api.allscreenshots.com/v1/screenshots' \
  -H 'X-API-Key: your-api-key' \
  -H 'Content-Type: application/json' \
  -d '{"url": "https://example.com", "fullPage": true}'

This handles browser management, ad blocking, and edge cases automatically, and is useful when you need screenshots rather than individual images.

Find out how easy it is to capture and share pixel-perfect screenshots at scale using Allscreenshots. Sign up for a free account and start integrating your first screenshot API call today.

Alex Chen

Full-stack developer and technical writer who loves browser automation and making complex tools accessible.

How to download all images from a webpage using Puppeteer

Basic setup

Downloading the images

Handling lazy-loaded images

Getting background images

Filtering images by size

Intercepting network requests

Handling authentication

Rate limiting

Complete example

Taking screenshots of image elements

When to use a screenshot API instead

Related Posts

Introduction to Puppeteer for Browser Automation

Introduction to Playwright for End-to-End Testing

Automating Visual Regression Testing with Screenshot APIs

Start capturing screenshots in minutes

Basic setup

Downloading the images

Handling lazy-loaded images

Getting background images

Filtering images by size

Intercepting network requests

Handling authentication

Rate limiting

Complete example

Taking screenshots of image elements

When to use a screenshot API instead