Right now our app writes the source code of nodejs.org to the console. We'd like it to write all hyperlinks of nodejs.org instead. Maybe we need just one line of code to get the links from body
.
app.js:
var http = require('http'); http.createServer(function (req, res) { res.writeHead(200, {'Content-Type': 'text/plain'}); res.end('Hello World\n'); }).listen(1337, '127.0.0.1'); console.log('Server running at http://127.0.0.1:1337/'); var request = require("request"); request("http://nodejs.org/", function (error, response, body) { if (!error) console.log(body); else console.log(error); });
You are probably looking for either jsdom , jquery or cheerio. What you are doing is called screen scraping, extracting data from a site. jsdom/jquery offer complete set of tools but cheerio is much faster.
Here is a cheerio example :
var request = require('request'); var cheerio = require('cheerio'); var searchTerm = 'screen+scraping'; var url = 'http://www.bing.com/search?q=' + searchTerm; request(url, function(err, resp, body){ $ = cheerio.load(body); links = $('a'); //jquery get all hyperlinks $(links).each(function(i, link){ console.log($(link).text() + ':\n ' + $(link).attr('href')); }); });
You choose whatever is best for you.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With