Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

How to access meta OpenGraph with cheerio?

i'm trying to do some html scraping with cheerio (can't use jsdon i have a problem with dependency: the bug with contextify ecc.) but i can't get the meta "og:type" , "og:title"...

request(Url, function(error, response, body) {
var $ = cheerio.load(body);
    $('meta').each(function() {
        console.log(  $('meta').attr('content'));
    });
});

i get only the first metatext/html; charset=UTF-8". do you know how to access to og??

like image 496
MkM Avatar asked Oct 05 '12 07:10

MkM


3 Answers

An simpler solution would be, if you know which property you want to get:

var $ = cheerio.load(html);
var result = $('meta[property="og:title"]').attr('content');

(assuming you want to get the title.)

like image 147
Ivor Zhou Avatar answered Nov 16 '22 22:11

Ivor Zhou


Expanding on Herman's answer:

I found the combination of node-crawler + cheerio to be a bit more manageable. The code below makes it a bit easier to track which tag attributes you're searching for, and can easily be tweaked to include other tags. Here's how I did it:

  var crawler = require('crawler'),
  url = require('url');

  var c = new crawler({
    maxConnections:10,
    callback:function(error,response,$) {     
      var data = {
        'og:type':null,
        'og:title':null,
        'og:description':null,
        'og:image':null,
        'twitter:title':null,
        'twitter:image':null,
        'twitter:description':null,
        'twitter:site':null,
        'twitter:creator':null,
      }
      var meta = $('meta');
      var keys = Object.keys(meta);
      for (var s in data) {
        keys.forEach(function(key) {
          if ( meta[key].attribs
            && meta[key].attribs.property 
            && meta[key].attribs.property === s) {
              data[s] = meta[key].attribs.content;
          }
        }) 
      }
      console.log(data);
    }
  })
  c.queue( [ YOUR URL HERE ] )
like image 33
Noel Baron Avatar answered Nov 16 '22 23:11

Noel Baron


You'll have to play a bit with the keys of the object $('meta) and checking whether the required keys exist or not, to obtain your result.

Try this code:

var cheerio = require('cheerio')
var request = require('request')

request(Url, function(error, response, body) {
  var $ = cheerio.load(body);

  var meta = $('meta')
  var keys = Object.keys(meta)

  var ogType;
  var ogTitle;

  keys.forEach(function(key){
    if (  meta[key].attribs
       && meta[key].attribs.property
       && meta[key].attribs.property === 'og:type') {
      ogType = meta[key].attribs.content;
    }
  });

  keys.forEach(function(key){
    if (  meta[key].attribs
       && meta[key].attribs.property
       && meta[key].attribs.property === 'og:title') {
      ogTitle = meta[key].attribs.content;
    }
  });

  console.log(ogType);
  console.log(ogTitle);
});
like image 3
Herman Junge Avatar answered Nov 16 '22 22:11

Herman Junge