I want to bulk insert (array of object) into my document but I want to prevent duplicate records, couldn't find a way to do it with insertMany.
const Song = require('../models/song');
Song.insertMany([{id:1, name:"something"},{id:2, name:"something else"])
.then((result) => {
res.json({
result
})
})
Above code worked but if the record is the same it will still get inserted.
Well in actual fact, MongoDB by "default" will not create duplicate data where there is a "unique key" involved, of which _id
( aliased by mongoose as id
, but ignored by insertMany()
so you need to be careful ), but there is a much larger story to this that you really need to be aware of.
The basic problem here is that both the "mongoose" implementation of insertMany()
as well as the underlying driver are currently are bit "borked" to put it mildly. That being there is a bit of an inconsistency in how the driver passes the error response in "Bulk" operations and this is actually compounded by "mongoose" not really "looking in the right place" for the actual error information.
The "quick" part you are missing is the addition of { ordered: false }
to the "Bulk" operation of which .insertMany()
simply wraps a call to. Setting this ensures that the "batch" of requests is actually submitted "completely" and does not stop execution when an error occurs.
But since "mongoose" does not handle this very well ( nor does the driver "consistently" ) we actually need to look for possible "errors" in the "response" rather than the "error" result of the underlying callback.
As a demonstration:
const mongoose = require('mongoose'),
Schema = mongoose.Schema;
mongoose.Promise = global.Promise;
mongoose.set('debug',true);
const uri = 'mongodb://localhost/test',
options = { useMongoClient: true };
const songSchema = new Schema({
_id: Number,
name: String
});
const Song = mongoose.model('Song', songSchema);
function log(data) {
console.log(JSON.stringify(data, undefined, 2))
}
let docs = [
{ _id: 1, name: "something" },
{ _id: 2, name: "something else" },
{ _id: 2, name: "something else entirely" },
{ _id: 3, name: "another thing" }
];
mongoose.connect(uri,options)
.then( () => Song.remove() )
.then( () =>
new Promise((resolve,reject) =>
Song.collection.insertMany(docs,{ ordered: false },function(err,result) {
if (result.hasWriteErrors()) {
// Log something just for the sake of it
console.log('Has Write Errors:');
log(result.getWriteErrors());
// Check to see if something else other than a duplicate key, and throw
if (result.getWriteErrors().some( error => error.code != 11000 ))
reject(err);
}
resolve(result); // Otherwise resolve
})
)
)
.then( results => { log(results); return true; } )
.then( () => Song.find() )
.then( songs => { log(songs); mongoose.disconnect() })
.catch( err => { console.error(err); mongoose.disconnect(); } );
Or perhaps a bit nicer since current LTS node.js has async/await
:
const mongoose = require('mongoose'),
Schema = mongoose.Schema;
mongoose.Promise = global.Promise;
mongoose.set('debug',true);
const uri = 'mongodb://localhost/test',
options = { useMongoClient: true };
const songSchema = new Schema({
_id: Number,
name: String
});
const Song = mongoose.model('Song', songSchema);
function log(data) {
console.log(JSON.stringify(data, undefined, 2))
}
let docs = [
{ _id: 1, name: "something" },
{ _id: 2, name: "something else" },
{ _id: 2, name: "something else entirely" },
{ _id: 3, name: "another thing" }
];
(async function() {
try {
const conn = await mongoose.connect(uri,options);
await Song.remove();
let results = await new Promise((resolve,reject) => {
Song.collection.insertMany(docs,{ ordered: false },function(err,result) {
if (result.hasWriteErrors()) {
// Log something just for the sake of it
console.log('Has Write Errors:');
log(result.getWriteErrors());
// Check to see if something else other than a duplicate key, then throw
if (result.getWriteErrors().some( error => error.code != 11000 ))
reject(err);
}
resolve(result); // Otherwise resolve
});
});
log(results);
let songs = await Song.find();
log(songs);
} catch(e) {
console.error(e);
} finally {
mongoose.disconnect();
}
})()
At any rate, you get the same result showing that writes are both continued and that we respectfully "ignore" errors that are related to a "duplicate key" or otherwise known as error code 11000
. The "safe handling" is that we expect such errors and discard them whilst looking for the presence of "other errors" that we might just want to pay attention to. We also see the rest of the code continues and lists all documents actually inserted by executing a subsequent .find()
call:
Mongoose: songs.remove({}, {})
Mongoose: songs.insertMany([ { _id: 1, name: 'something' }, { _id: 2, name: 'something else' }, { _id: 2, name: 'something else entirely' }, { _id: 3, name: 'another thing' } ], { ordered: false })
Has Write Errors:
[
{
"code": 11000,
"index": 2,
"errmsg": "E11000 duplicate key error collection: test.songs index: _id_ dup key: { : 2 }",
"op": {
"_id": 2,
"name": "something else entirely"
}
}
]
{
"ok": 1,
"writeErrors": [
{
"code": 11000,
"index": 2,
"errmsg": "E11000 duplicate key error collection: test.songs index: _id_ dup key: { : 2 }",
"op": {
"_id": 2,
"name": "something else entirely"
}
}
],
"writeConcernErrors": [],
"insertedIds": [
{
"index": 0,
"_id": 1
},
{
"index": 1,
"_id": 2
},
{
"index": 2,
"_id": 2
},
{
"index": 3,
"_id": 3
}
],
"nInserted": 3,
"nUpserted": 0,
"nMatched": 0,
"nModified": 0,
"nRemoved": 0,
"upserted": [],
"lastOp": {
"ts": "6485492726828630028",
"t": 23
}
}
Mongoose: songs.find({}, { fields: {} })
[
{
"_id": 1,
"name": "something"
},
{
"_id": 2,
"name": "something else"
},
{
"_id": 3,
"name": "another thing"
}
]
So why this process? The reason being that the underlying call actually returns both the err
and result
as shown in the callback implementation but there is an inconsistency in what is returned. The main reason to do this is so you actually see the "result", which not only has the result of the successful operation, but also the error message.
Along with the error information is the nInserted: 3
indicating how many out of the "batch" actually were written. You can pretty much ignore the insertedIds
here since this particular test involved actually supplying _id
values. In the event where a different property had the "unique" constraint that caused the error, then the only values here would be those from actual successful writes. A bit misleading, but easy to test and see for yourself.
As stated, the catch is the "incosistency" which can be demonstrated with another example ( async/await
only for brevity of listing):
const mongoose = require('mongoose'),
Schema = mongoose.Schema;
mongoose.Promise = global.Promise;
mongoose.set('debug',true);
const uri = 'mongodb://localhost/test',
options = { useMongoClient: true };
const songSchema = new Schema({
_id: Number,
name: String
});
const Song = mongoose.model('Song', songSchema);
function log(data) {
console.log(JSON.stringify(data, undefined, 2))
}
let docs = [
{ _id: 1, name: "something" },
{ _id: 2, name: "something else" },
{ _id: 2, name: "something else entirely" },
{ _id: 3, name: "another thing" },
{ _id: 4, name: "different thing" },
//{ _id: 4, name: "different thing again" }
];
(async function() {
try {
const conn = await mongoose.connect(uri,options);
await Song.remove();
try {
let results = await Song.insertMany(docs,{ ordered: false });
console.log('what? no result!');
log(results); // not going to get here
} catch(e) {
// Log something for the sake of it
console.log('Has write Errors:');
// Check to see if something else other than a duplicate key, then throw
// Branching because MongoError is not consistent
if (e.hasOwnProperty('writeErrors')) {
log(e.writeErrors);
if(e.writeErrors.some( error => error.code !== 11000 ))
throw e;
} else if (e.code !== 11000) {
throw e;
} else {
log(e);
}
}
let songs = await Song.find();
log(songs);
} catch(e) {
console.error(e);
} finally {
mongoose.disconnect();
}
})()
All much the same thing, but pay attention to how the error logs here:
Has write Errors:
{
"code": 11000,
"index": 2,
"errmsg": "E11000 duplicate key error collection: test.songs index: _id_ dup key: { : 2 }",
"op": {
"__v": 0,
"_id": 2,
"name": "something else entirely"
}
}
Note that there is no "success" information, even though we get the same continuation of the listing by doing the subsequent .find()
and getting the output. This is because the implementation only acts on the "thrown error" in rejection and never passes through the actual result
part. So even though we asked for ordered: false
, we don't get the information about what was completed unless we wrap the callback and implement the logic ourselves, as is shown in the initial listings.
The other important "inconsistency" happens when there is "more than one error". So uncommenting the additional value for _id: 4
gives us:
Has write Errors:
[
{
"code": 11000,
"index": 2,
"errmsg": "E11000 duplicate key error collection: test.songs index: _id_ dup key: { : 2 }",
"op": {
"__v": 0,
"_id": 2,
"name": "something else entirely"
}
},
{
"code": 11000,
"index": 5,
"errmsg": "E11000 duplicate key error collection: test.songs index: _id_ dup key: { : 4 }",
"op": {
"__v": 0,
"_id": 4,
"name": "different thing again"
}
}
]
Here you can see the code "branched" on the presence of e.writeErrors
, which does not exist when there is one error. By contrast the earlier response
object has both the hasWriteErrors()
and getWriteErrors()
methods, regardless of any error being present at all. So that is the more consistent interface and the reason why you should use it instead of inspecting the err
response alone.
This behavior is actually fixed in the upcoming 3.x release of the driver which is meant to coincide with the MongoDB 3.6 server release. The behavior changes in that the err
response is more akin to the standard result
, but of course classed as a BulkWriteError
response instead of MongoError
which it presently is.
Until that is released ( and of course until that dependency and changes are propagated to the "mongoose" implementation ), then the recommended course of action is to be aware that the useful information is in the result
and not the err
. In fact your code probably should look for hasErrors()
in the result
and then fallback to check err
as well, in order to cater for the change to be implemented in the driver.
Authors Note: Much of this content and related reading is actually already answered here on Function insertMany() unordered: proper way to get both the errors and the result? and MongoDB Node.js native driver silently swallows
bulkWrite
exception. But repeating and elaborating here until it finally sinks in to people that this is the way you handle exceptions in the current driver implementation. And it does actually work, when you look in the correct place and write your code to handle it accordingly.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With