I am trying to wrap my head around Node.js and some async operations. In the following code, I fetch some RSS feeds and store the articles found if I haven't stored them before. The code works and stores new articles. However, I'm not sure how to alter this so that I know when all the articles are done being parsed. For example, the callback on each async.eachLimit
gets called each time the limit is met (after 10 articles or 5 feeds). So how do I know when they're done?
var FeedParser = require('feedparser');
var request = require('request');
var mysql = require('mysql');
var async = require('async');
var connection = mysql.createConnection({
host : 'localhost',
user : 'someuser',
password : 'somepass',
database : 'somedb'
});
connection.connect();
connection.query('SELECT * FROM rssfeed', function(err, rows, fields) {
if(err == null){
async.eachLimit(rows, 5, parseFeed, function(err) {
if(! err) {
//horray
} else {
console.log(err);
}
});
}
});
function parseFeed(feed, callback) {
var articles = [];
request(feed.link)
.pipe(new FeedParser())
.on('error', function(error) {
callback(error);
})
.on('meta', function(meta) {
})
.on('readable', function() {
var stream = this, item;
item = stream.read();
if(item != null) {
item.rssfeed_id = feed.id;
articles.push(item);
}
})
.on('finish', function() {
async.eachLimit(articles, 10, parseArticle, function(err) {
if(! err) {
console.log('article each callback');
} else {
callback(error);
}
});
});
callback();
}
function parseArticle(item, callback) {
if(item.hasOwnProperty('rssfeed_id') && item.hasOwnProperty('guid') && item.hasOwnProperty('link') && item.hasOwnProperty('title')){
connection.query('SELECT * FROM rssarticle WHERE rssfeed_id = ? AND guid = ?', [item.rssfeed_id, item.guid], function(err, rows, fields) {
if(rows.length == 0){
connection.query('INSERT INTO rssarticle SET ?', {
rssfeed_id: item.rssfeed_id,
link: item.link,
title: item.title,
description: item.description,
publish_date: item.pubDate,
guid: item.guid
}, function(err, result){
if(err != null){
console.log(err);
}
});
}
});
}
callback();
}
Callbacks are not asynchronous by nature, but can be used for asynchronous purposes. In this code, you define a function fn , define a function higherOrderFunction that takes a function callback as an argument, and pass fn as a callback to higherOrderFunction .
In order to run multiple async/await calls in parallel, all we need to do is add the calls to an array, and then pass that array as an argument to Promise. all() . Promise. all() will wait for all the provided async calls to be resolved before it carries on(see Conclusion for caveat).
Asynchronous callbacks are functions passed to another function that starts executing code in the background. Typically, when the code in the background finishes, the async callback function is called as a way of notifying and passing on data to the callback function that the background task is finished.
The await keyword is used in an async function to ensure that all promises returned in the async function are synchronized, ie. they wait for each other. Await eliminates the use of callbacks in . then() and .
For one, you're preemptively calling your callbacks way too early.
function parseFeed(feed, callback) {
request
.streamStuff()
.streamStuff()
.streamStuff();
callback();
}
You shouldn't be calling callback
before you're done. Otherwise your "finished" method will be called but your async code will actually still be running.
So instead of doing:
.on('finish', function() {
async.eachLimit(articles, 10, parseArticle, function(err) {
if(! err) {
console.log('article each callback');
} else {
callback(error);
}
});
});
callback();
Just do
.on('finish', function() {
async.eachLimit(articles, 10, parseArticle, function(err) {
if(! err) {
// assuming this is a stub and really ends up doing `callback();`
console.log('article each callback');
} else {
callback(error);
}
});
});
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With