Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Bulk upsert in MongoDB using mongoose

Is there any option to perform bulk upserts with mongoose? So basically having an array and insert each element if it not exists or update it if it exists? (I am using customs _ids)

When I do use .insert MongoDB returns an error E11000 for duplicate keys (which should be updated). Inserting multiple new document works fine though:

var Users = self.db.collection('Users');

Users.insert(data, function(err){
            if (err) {
                callback(err);
            }
            else {
                callback(null);
            }
        });

Using .save returns an error that the parameter must be a single document:

Users.save(data, function(err){
   ...
}

This answer suggest there is no such option, however it is specific for C# and also already 3 years old. So I was wondering if there is any option to do that using mongoose?

Thank you!

like image 514
user3122267 Avatar asked Aug 13 '14 11:08

user3122267


People also ask

How do you Upsert a mongoose?

Upsert. Using the upsert option, you can use findOneAndUpdate() as a find-and-upsert operation. An upsert behaves like a normal findOneAndUpdate() if it finds a document that matches filter . But, if no document matches filter , MongoDB will insert one by combining filter and update as shown below.

What does Upsert mean mongoose?

May 20, 2019. In MongoDB, an upsert means an update that inserts a new document if no document matches the filter . To upsert a document in Mongoose, you should set the upsert option to the Model.

Does MongoDB have Upsert?

Here in MongoDB, the upsert option is a Boolean value. Suppose the value is true and the documents match the specified query filter. In that case, the applied update operation will update the documents. If the value is true and no documents match the condition, this option inserts a new document into the collection.

What is bulk operation in MongoDB?

MongoDB provides clients the ability to perform write operations in bulk. Bulk write operations affect a single collection. MongoDB allows applications to determine the acceptable level of acknowledgement required for bulk write operations.


2 Answers

Not in "mongoose" specifically, or at least not yet as of writing. The MongoDB shell as of the 2.6 release actually uses the "Bulk operations API" "under the hood" as it were for all of the general helper methods. In it's implementation, it tries to do this first, and if an older version server is detected then there is a "fallback" to the legacy implementation.

All of the mongoose methods "currently" use the "legacy" implementation or the write concern response and the basic legacy methods. But there is a .collection accessor from any given mongoose model that essentially accesses the "collection object" from the underlying "node native driver" on which mongoose is implemented itself:

 var mongoose = require('mongoose'),
     Schema = mongoose.Schema;

 mongoose.connect('mongodb://localhost/test');

 var sampleSchema  = new Schema({},{ "strict": false });

 var Sample = mongoose.model( "Sample", sampleSchema, "sample" );

 mongoose.connection.on("open", function(err,conn) { 

    var bulk = Sample.collection.initializeOrderedBulkOp();
    var counter = 0;

    // representing a long loop
    for ( var x = 0; x < 100000; x++ ) {

        bulk.find(/* some search */).upsert().updateOne(
            /* update conditions */
        });
        counter++;

        if ( counter % 1000 == 0 )
            bulk.execute(function(err,result) {             
                bulk = Sample.collection.initializeOrderedBulkOp();
            });
    }

    if ( counter % 1000 != 0 )
        bulk.execute(function(err,result) {
           // maybe do something with result
        });

 });

The main catch there being that "mongoose methods" are actually aware that a connection may not actually be made yet and "queue" until this is complete. The native driver you are "digging into" does not make this distinction.

So you really have to be aware that the connection is established in some way or form. But you can use the native driver methods as long as you are careful with what you are doing.

like image 89
Neil Lunn Avatar answered Oct 20 '22 14:10

Neil Lunn


You don't need to manage limit (1000) as @neil-lunn suggested. Mongoose does this already. I used his great answer as a basis for this complete Promise-based implementation & example:

var Promise = require('bluebird');
var mongoose = require('mongoose');

var Show = mongoose.model('Show', {
  "id": Number,
  "title": String,
  "provider":  {'type':String, 'default':'eztv'}
});

/**
 * Atomic connect Promise - not sure if I need this, might be in mongoose already..
 * @return {Priomise}
 */
function connect(uri, options){
  return new Promise(function(resolve, reject){
    mongoose.connect(uri, options, function(err){
      if (err) return reject(err);
      resolve(mongoose.connection);
    });
  });
}

/**
 * Bulk-upsert an array of records
 * @param  {Array}    records  List of records to update
 * @param  {Model}    Model    Mongoose model to update
 * @param  {Object}   match    Database field to match
 * @return {Promise}  always resolves a BulkWriteResult
 */
function save(records, Model, match){
  match = match || 'id';
  return new Promise(function(resolve, reject){
    var bulk = Model.collection.initializeUnorderedBulkOp();
    records.forEach(function(record){
      var query = {};
      query[match] = record[match];
      bulk.find(query).upsert().updateOne( record );
    });
    bulk.execute(function(err, bulkres){
        if (err) return reject(err);
        resolve(bulkres);
    });
  });
}

/**
 * Map function for EZTV-to-Show
 * @param  {Object} show EZTV show
 * @return {Object}      Mongoose Show object
 */
function mapEZ(show){
  return {
    title: show.title,
    id: Number(show.id),
    provider: 'eztv'
  };
}

// if you are  not using EZTV, put shows in here
var shows = []; // giant array of {id: X, title: "X"}

// var eztv = require('eztv');
// eztv.getShows({}, function(err, shows){
//   if(err) return console.log('EZ Error:', err);

//   var shows = shows.map(mapEZ);
  console.log('found', shows.length, 'shows.');
  connect('mongodb://localhost/tv', {}).then(function(db){
    save(shows, Show).then(function(bulkRes){
      console.log('Bulk complete.', bulkRes);
      db.close();
    }, function(err){
        console.log('Bulk Error:', err);
        db.close();
    });
  }, function(err){
    console.log('DB Error:', err);
  });

// });

This has the bonus of closing the connection when it's done, displaying any errors if you care, but ignoring them if not (error callbacks in Promises are optional.) It's also very fast. Just leaving this here to share my findings. You can uncomment the eztv stuff if you want to save all eztv shows to a database, as an example.

like image 19
konsumer Avatar answered Oct 20 '22 13:10

konsumer