Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Async parallel HTTP request

I'm having a control flow problem with an application loading a large array of URLs. I'm using Caolan Async and the NPM request module.

My problem is that the HTTP response starts as soon as the function is added to the queue. Ideally I want to build my queue and only start making the HTTP requests when the queue starts. Otherwise the callbacks start firing before the queue starts - causing the queue to finish prematurely.

var request = require('request') // https://www.npmjs.com/package/request
    , async = require('async'); // https://www.npmjs.com/package/async

var myLoaderQueue = []; // passed to async.parallel
var myUrls = ['http://...', 'http://...', 'http://...'] // 1000+ urls here

for(var i = 0; i < myUrls.length; i++){
    myLoaderQueue.push(function(callback){

        // Async http request
        request(myUrls[i], function(error, response, html) {

            // Some processing is happening here before the callback is invoked
            callback(error, html);
        });
    });
}

// The loader queue has been made, now start to process the queue
async.parallel(queue, function(err, results){
    // Done
});

Is there a better way of attacking this?

like image 360
ChrisRich Avatar asked Aug 01 '15 12:08

ChrisRich


2 Answers

Using for loops combined with asynchronous calls is problematic (with ES5) and may yield unexpected results (in your case, the wrong URL being retrieved).

Instead, consider using async.map():

async.map(myUrls, function(url, callback) {
  request(url, function(error, response, html) {
    // Some processing is happening here before the callback is invoked
    callback(error, html);
  });
}, function(err, results) {
  ...
});

Given that you have 1000+ url's to retrieve, async.mapLimit() may also be worth considering.

like image 112
robertklep Avatar answered Oct 21 '22 22:10

robertklep


If you're willing to start using Bluebird and Babel to utilize promises and ES7 async / await you can do the following:

let Promise = require('bluebird');
let request = Promise.promisify(require('request'));

let myUrls = ['http://...', 'http://...', 'http://...'] // 1000+ urls here

async function load() {
  try {
    // map myUrls array into array of request promises
    // wait until all request promises in the array resolve
    let results = await Promise.all(myUrls.map(request));
    // don't know if Babel await supports syntax below
    // let results = await* myUrls.map(request));
    // print array of results or use forEach 
    // to process / collect them in any other way
    console.log(results)
  } catch (e) {
    console.log(e);
  }
}
like image 45
krl Avatar answered Oct 21 '22 23:10

krl