i'm trying to do some html scraping with cheerio (can't use jsdon i have a problem with dependency: the bug with contextify ecc.) but i can't get the meta "og:type" , "og:title"...
request(Url, function(error, response, body) {
var $ = cheerio.load(body);
$('meta').each(function() {
console.log( $('meta').attr('content'));
});
});
i get only the first metatext/html; charset=UTF-8". do you know how to access to og??
An simpler solution would be, if you know which property you want to get:
var $ = cheerio.load(html);
var result = $('meta[property="og:title"]').attr('content');
(assuming you want to get the title.)
Expanding on Herman's answer:
I found the combination of node-crawler + cheerio to be a bit more manageable. The code below makes it a bit easier to track which tag attributes you're searching for, and can easily be tweaked to include other tags. Here's how I did it:
var crawler = require('crawler'),
url = require('url');
var c = new crawler({
maxConnections:10,
callback:function(error,response,$) {
var data = {
'og:type':null,
'og:title':null,
'og:description':null,
'og:image':null,
'twitter:title':null,
'twitter:image':null,
'twitter:description':null,
'twitter:site':null,
'twitter:creator':null,
}
var meta = $('meta');
var keys = Object.keys(meta);
for (var s in data) {
keys.forEach(function(key) {
if ( meta[key].attribs
&& meta[key].attribs.property
&& meta[key].attribs.property === s) {
data[s] = meta[key].attribs.content;
}
})
}
console.log(data);
}
})
c.queue( [ YOUR URL HERE ] )
You'll have to play a bit with the keys of the object $('meta)
and checking whether the required keys exist or not, to obtain your result.
Try this code:
var cheerio = require('cheerio')
var request = require('request')
request(Url, function(error, response, body) {
var $ = cheerio.load(body);
var meta = $('meta')
var keys = Object.keys(meta)
var ogType;
var ogTitle;
keys.forEach(function(key){
if ( meta[key].attribs
&& meta[key].attribs.property
&& meta[key].attribs.property === 'og:type') {
ogType = meta[key].attribs.content;
}
});
keys.forEach(function(key){
if ( meta[key].attribs
&& meta[key].attribs.property
&& meta[key].attribs.property === 'og:title') {
ogTitle = meta[key].attribs.content;
}
});
console.log(ogType);
console.log(ogTitle);
});
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With