I'm learning node, express, mongo, and, in the process, javascript. I'm trying to get a feature where using rssparser, get a list of stories and save them to a mongo database with mongoose.
I've got the RSS pull working, and I'm iterating through the stories, it's the saving that I'm having problems with. I want to 1) check that the story doesn't already exist in the database, and 2) if not, save it. I think I'm getting lost in the way callbacks are handled. Here's my current code, with comments.
rssparser.parseURL(url, options, function(err,out){
// out.items is an array of the items pulled
var items = out.items;
var story;
for (var i=0; i<items.length; i++){
//create a mongoose story
story = new schemas.Stories({
title: items[i].title,
url: items[i].url,
summary: items[i].summary,
published: items[i].published_at
});
//TODO: for testing - these show up correctly.
//If I pull 10 stories, I get 10 entries from here that match
//So "story" is holding the current story
console.log("items[i] is :" + items[i].title);
console.log("story title is : " + story.title);
// setup query to see if it's already in db
var query = schemas.Stories.findOne({
"title" : story.title,
"url" : story.url
});
//execute the query
query.exec( function(err, row){
if(err) console.log("error-query: " + err);
console.log("row: "+ row);
if(!row) {
// not there, so save
console.log('about to save story.title: ' + story.title);
story.save(function (err){
console.log("error in save: " + err);
});
}
});
}
});
When this runs, what I see is alot of console output:
It's starts showing all the stories (many omitted):
items[i] is :TSA Drops Plan to Let Passengers Carry Small Knives on Planes
story title is : TSA Drops Plan to Let Passengers Carry Small Knives on Planes
items[i] is :BUILDING COLLAPSE:1 Reportedly Dead, 13 Pulled From Philly Rubble
story title is : BUILDING COLLAPSE:1 Reportedly Dead, 13 Pulled From Philly Rubble
items[i] is :CONTROVERSIAL PAST: Obama's UN Nominee Once Likened US 'Sins' to Nazis'
story title is : CONTROVERSIAL PAST: Obama's UN Nominee Once Likened US 'Sins' to Nazis'
items[i] is :WRITING OUT WRIGHTS: Bill Gives First Powered Flight Nod to Whitehead
story title is : WRITING OUT WRIGHTS: Bill Gives First Powered Flight Nod to Whitehead
items[i] is :BREAKING NEWS: Rice Named to Top Security Post Despite Libya Fallout
story title is : BREAKING NEWS: Rice Named to Top Security Post Despite Libya Fallout
Then continues like (many omitted):
row: null
about to save story.title: Best Ribs in America
row: null
about to save story.title: Best Ribs in America
row: null
about to save story.title: Best Ribs in America
row: null
about to save story.title: Best Ribs in America
row: null
about to save story.title: Best Ribs in America
row: null
about to save story.title: Best Ribs in America
row: { title: 'Best Ribs in America',
url: 'http://www.foxnews.com/leisure/2013/06/05/10-best-ribs-in-america/',
published: 1370463800000,
_id: 51af9f881995d40425000023,
__v: 0 }
It repeats the "about to save" title (which is the last story in the feed), and it saves the story once, like the last row shows.
The console.log output shows just I put it, all the story title output at the top, then all the stuff from inside the query.exec() call at the bottom.
Any help is appreciated...
The problem with this is that the story referenced in the exec callback will have been set to whatever is the last thing iterated on in the for loop, once the callback will get executed, since all of the executed functions are referencing the same instance of the variable.
The easiest way to fix this is to simply wrap each thing in the for loop in a function that you execute right away with parameters, as in:
rssparser.parseURL(url, options, function(err,out){
// out.items is an array of the items pulled
var items = out.items;
for (var i=0; i<items.length; i++){
(function(item) {
//create a mongoose story
var story = new schemas.Stories({
title: item.title,
url: item.url,
summary: item.summary,
published: item.published_at
});
// setup query to see if it's already in db
var query = schemas.Stories.findOne({
"title" : story.title,
"url" : story.url
});
//execute the query
query.exec( function(err, row){
if(err) console.log("error-query: " + err);
console.log("row: "+ row);
if(!row) {
// not there, so save
console.log('about to save story.title: ' + story.title);
story.save(function (err){
console.log("error in save: " + err);
});
}
});
})(items[i]);
}
});
I haven't tested this, but I'm sure you'll find that it will fix your issue
Another even easier, cleaner, better way would be to iterate over the items in a forEach loop on the array, if your platform supports that (which node.js does) - this version is even more prettier:
rssparser.parseURL(url, options, function(err,out){
// out.items is an array of the items pulled
out.items.forEach(function(item) {
//create a mongoose story
var story = new schemas.Stories({
title: item.title,
url: item.url,
summary: item.summary,
published: item.published_at
});
// setup query to see if it's already in db
var query = schemas.Stories.findOne({
"title" : story.title,
"url" : story.url
});
//execute the query
query.exec( function(err, row){
if(err) console.log("error-query: " + err);
console.log("row: "+ row);
if(!row) {
// not there, so save
console.log('about to save story.title: ' + story.title);
story.save(function (err){
console.log("error in save: " + err);
});
}
});
});
});
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With