Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

MongoDB Bulk Insert Ignore Duplicate

I've Googled around and can't find any solid information on how to ignore duplicate errors when using bulk insert.

Here's the code I'm currently using:

MongoClient.connect(mongoURL, function(err, db) {
      if(err) console.err(err)
      let col = db.collection('user_ids')
      let batch = col.initializeUnorderedBulkOp()

      ids.forEach(function(id) {
        batch.insert({ userid: id, used: false, group: argv.groupID })
      })

      batch.execute(function(err, result) {
        if(err) {
          console.error(new Error(err))
          db.close()
        }

        // Do some work

        db.close()
      })
    })

Is it possible? I've tried adding {continueOnError: true, safe: true} to bulk.insert(...) but that didn't work.

Any ideas?

like image 984
BugHunterUK Avatar asked Aug 20 '16 13:08

BugHunterUK


1 Answers

An alternative is to use bulk.find().upsert().replaceOne() instead:

MongoClient.connect(mongoURL, function(err, db) {
    if(err) console.err(err)
    let col = db.collection('user_ids')
    let batch = col.initializeUnorderedBulkOp()

    ids.forEach(function(id) {        
        batch.find({ userid: id }).upsert().replaceOne({ 
            userid: id, 
            used: false,  
            group: argv.groupID 
        });
    });

    batch.execute(function(err, result) {
        if(err) {
            console.error(new Error(err))
            db.close()
        }

        // Do some work

        db.close()
    });
});

With the above, if a document matches the query { userid: id } it will be replaced with the new document, otherwise it will be created hence there are No duplicate key errors thrown.


For MongoDB server versions 3.2+, use bulkWrite as:

MongoClient.connect(mongoURL, function(err, db) {

    if(err) console.err(err)

    let col = db.collection('user_ids')
    let ops = []
    let counter = 0

    ids.forEach(function(id) {
        ops.push({
            "replaceOne": {
                "filter": { "userid": id },
                "replacement": { 
                    userid: id, 
                    used: false,  
                    group: argv.groupID 
                },
                "upsert": true
            }
        })

        counter++

        if (counter % 500 === 0) {
            col.bulkWrite(ops, function(err, r) {
                // do something with result
                db.close()
            })
            ops = []
        }
    })

    if (counter % 500 !== 0) {
        col.bulkWrite(ops, function(err, r) {
            // do something with result
            db.close()
        }
    } 
})
like image 87
chridam Avatar answered Sep 19 '22 16:09

chridam