My HTML code looks like this:
<div class="class1" data-id="id1">
  <span class="class2">
  "text1"
  </span>
</div>
<div class="class1" data-id="id2">
  <span class="class2">
  "text2"
  </span>
</div>
<div class="class1" data-id="id1">
  <span class="class2">
  "text1"
  </span>
</div>
<div class="class1" data-id="id3">
  <span class="class2">
  "text3"
  </span>
</div>
I'm trying to write Puppetteer code to get the data-id and span inner text pairs, which would result in something like:
  id1: text1,
  id2: text2,
  id3: text3
What I tried:
 const allClass1InPage = await this.page.$$(".class1");
 for (const class1El of allClass1InPage) {
      await elem.$eval(".class2", (class2El) =>
        console.debug(`${???}: ${???}`)
      );
 }
What I don't know is:
data-id out of an ElementHandle (as class1El, normally I would do .dataset.id if it was an Element instead)?There are some way to do this.
Way 1 — nearest to yours — a mixed one and a bit entangled.
Way 2 — with pure puppeteer (JSHandle/ElementHandle) API. It is more consistent, but very verbose.
Way 3 — with pure browser (Web) API. It seems the most simple way, if you just need some serializable data.
'use strict';
const html = `
  <!doctype html>
  <html>
    <head><meta charset='UTF-8'><title>Test</title></head>
    <body>
      <div class="class1" data-id="id1">
        <span class="class2">
        "text1"
        </span>
      </div>
      <div class="class1" data-id="id2">
        <span class="class2">
        "text2"
        </span>
      </div>
      <div class="class1" data-id="id1">
        <span class="class2">
        "text1"
        </span>
      </div>
      <div class="class1" data-id="id3">
        <span class="class2">
        "text3"
        </span>
      </div>
    </body>
  </html>`;
const puppeteer = require('puppeteer');
(async function main() {
  try {
    const browser = await puppeteer.launch();
    const [page] = await browser.pages();
    await page.goto(`data:text/html,${html}`);
    // Way 1.
    {
      const allClass1InPage = await page.$$(".class1");
      for (const class1El of allClass1InPage) {
           console.debug(await class1El.$eval(".class2", class2El =>
             `${class2El.parentNode.dataset.id}: ${class2El.innerText}`
           ));
      }
    }
    console.log();
    // Way 2.
    {
      const allClass1InPage = await page.$$('.class1');
      for (const class1El of allClass1InPage) {
        const datasetHandle = await class1El.getProperty('dataset');
        const idHandle = await datasetHandle.getProperty('id');
        const id = await idHandle.jsonValue();
        const spanHandle = await class1El.$('.class2');
        const textHandle = await spanHandle.getProperty('innerText');
        const text = await textHandle.jsonValue();
        console.log(`${id}: ${text}`);
      }
    }
    console.log();
    // Way 3.
    {
      const data = await page.evaluate(
        () => [...document.querySelectorAll('.class1')].map(element =>
          `${element.dataset.id}: ${element.querySelector('.class2').innerText}`)
      );
      console.log(data.join('\n'));
    }
    await browser.close();
  } catch (err) {
    console.error(err);
  }
})();
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With