Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

nodejs async nested calls

I would like to scrap an url:

1 request to get a list of elements

1 request on each result to get details

Here what I have:

var request = require('request')
    , cheerio = require('cheerio')
    , async = require('async')
    , format = require('util').format;

var baseurl = 'http://magiccards.info';
async.waterfall([
    function (callback) {
        request(baseurl + '/sitemap.html', function (err, response, body) {
            var sets = [];
            var $ = cheerio.load(body);
            $('a[href$="/en.html"]').each(function () {
                sets.push({"name": $(this).text(), "code":$(this).attr('href').match(/\/([^)]+)\//)[1], "path": $(this).attr('href'), "translations":[]});
            });
            callback(null, sets);
        });
    },
    function (sets, callback) {
        console.log(sets);
        async.eachSeries(sets, function (set, callback) {
            console.log('SET ' + set.code.toUpperCase());
            request(baseurl + set.path, function (err, response, body) {
                var $ = cheerio.load(body);
                $('body > a[href^="/' + set.code + '/"]').each(function () {
                    console.log('   %s (%s)', $(this).text(), $(this).attr('href'));
                });
            });
        });
    }
], function (err, result) {
    console.log('ERR');
    // result now equals 'done'
});

The problem is that the 2nd waterfall function run only once, if I replace the eachSeries with an each, the loop does run X times (but I need to wait for result).

Wath am I missing?

like image 691
kitensei Avatar asked Mar 19 '23 20:03

kitensei


1 Answers

You need to call the eachSeries callback function. Otherwise async won't know that you are done. (1)

You also need to tell the waterfall function that you are done with that step, also by calling the callback function. (2)

function (sets, waterfallCallback) {
    async.eachSeries(sets, function (set, seriesCallback) {
        console.log('SET ' + set.code.toUpperCase());
        request(baseurl + set.path, function (err, response, body) {
            var $ = cheerio.load(body);
            $('body > a[href^="/' + set.code + '/"]').each(function () {
                console.log('   %s (%s)', $(this).text(), $(this).attr('href'));
            });

            seriesCallback(null); /* 1 */

        });
    }, waterfallCallback /* 2 */);
}
like image 71
Linus Unnebäck Avatar answered Mar 28 '23 07:03

Linus Unnebäck