Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Requesting RSS feeds from two web sites in Node.JS

I have got a Node.JS server that requests data from two web servers: bbc.co.uk and sky.com. Then the RSS feeds are parsed, and a user sees two lists: from BBC and from sky.

Here is the code.

var feed = require('feed-read');
var http = require('http');
var async = require('async');
var request = require('request');

var LIMIT = 10;
var UNABLE_TO_CONNECT = "Unable to connect.";
var BBC_URL = 'http://feeds.bbci.co.uk/news/rss.xml';
var SKY_URL = 'http://news.sky.com/feeds/rss/home.xml';

var server = http.createServer(onRequest);
server.listen(9000);

function onRequest(req, res) {
    res.writeHead(200, {
        'Content-Type' : 'text/html; charset=utf-8'
    });

    async.parallel([ function(callback) {
        feed(BBC_URL, onRssFetched);
        // TODO: where to call callback()?
    }, function(callback) {
        feed(SKY_URL, onRssFetched);
        // TODO: where to call callback()?
    } ], function done(err, results) {
        console.log("Done");
        if (err) {
            throw err;
        }
    });
}

function onRssFetched(err, articles) {
    console.log("RSS fetched");
    var html = [];
    if (err) {
        html.push("<p>", UNABLE_TO_CONNECT = "</p>");
    } else {
        html.push("<ol>");
        var i = 0;
        articles.forEach(function(entry) {
            if (i == LIMIT) {
                return;
            }
            html.push("<li><a href='" + entry.link + "'>" + entry.title
                    + "</a></li>");
            i++;
        });
    }
    console.log(html.join(""));
}

Now I don't know how to add the result to the web page. If I call callback() right after calling the feed method, callback() will be executed without waiting until feed has completed its job. On the other hand, I can't pass callback to feed. Maybe the approach is wrong, and I need some other module for RSS parsing.

like image 338
Maksim Dmitriev Avatar asked Dec 07 '22 04:12

Maksim Dmitriev


1 Answers

@Maksim I know your original question included the async module, but propose an alternative:

why not stream each article to the client as it comes in rather than waiting for all RSS feeds to return before sending a response...?

By using async.parallel you are telling node:

"wait until we have a response from all these news services
and only
then (combine the articles into) a single response to the client ..."

This uses up memory for each connected client while you wait for all responses (from the RSS news services) ... wasteful.

So I've written my answer without resorting to async.
And, instead of waiting for ages (while async combines all the feeds into one),
the client sees news as soon as the first rss feed returns!

var feed = require('feed-read'),  // require the feed-read module
    http = require("http"),
    urls = [
        "http://feeds.bbci.co.uk/news/rss.xml",
        "http://news.sky.com/feeds/rss/home.xml",
        "http://www.techmeme.com/feed.xml"
    ]; // Example RSS Feeds

http.createServer(function (req, res) { 
    // send basic http headers to client
    res.writeHead(200, {
        "Content-Type": "text/html",
        "Transfer-Encoding": "chunked"
    });

    // setup simple html page:
    res.write("<html>\n<head>\n<title>RSS Feeds</title>\n</head>\n<body>");

    // loop through our list of RSS feed urls
    for (var j = 0; j < urls.length; j++) {

        // fetch rss feed for the url:
        feed(urls[j], function(err, articles) {

            // loop through the list of articles returned
            for (var i = 0; i < articles.length; i++) {

                // stream article title (and what ever else you want) to client
                res.write("<h3>"+articles[i].title +"</h3>"); 

                // check we have reached the end of our list of articles & urls
                if( i === articles.length-1 && j === urls.length-1) {
                    res.end("</body>\n</html>"); // end http response
                } // else still have rss urls to check
            } //  end inner for loop
        }); // end call to feed (feed-read) method
    } // end urls for loop
}).listen(9000);

Key Advantages:

  • The people connecting to your app will see news/results Much faster (almost instantly!)
  • Your app uses much less memory
  • You don't have to edit/update any code when you add new RSS news feeds!

For even more detail/notes on this solution
see: https://github.com/nelsonic/node-parse-rss

like image 50
nelsonic Avatar answered Dec 19 '22 11:12

nelsonic