I've used aggregation in mongo a lot, I know performance benefits on the grouped counts and etc. But, do mongo have any difference in performance on those two ways to count all documents in a collection?:
collection.aggregate([
{
$match: {}
},{
$group: {
_id: null,
count: {$sum: 1}
}
}]);
and
collection.find({}).count()
Update: Second case: Let's say we have this sample data:
{_id: 1, type: 'one', value: true}
{_id: 2, type: 'two', value: false}
{_id: 4, type: 'five', value: false}
With aggregate()
:
var _ids = ['id1', 'id2', 'id3'];
var counted = Collections.mail.aggregate([
{
'$match': {
_id: {
'$in': _ids
},
value: false
}
}, {
'$group': {
_id: "$type",
count: {
'$sum': 1
}
}
}
]);
With count()
:
var counted = {};
var type = 'two';
for (i = 0, len = _ids.length; i < len; i++) {
counted[_ids[i]] = Collections.mail.find({
_id: _ids[i], value: false, type: type
}).count();
}
.count()
is by far faster. You can see the implementation by calling
// Note the missing parentheses at the end
db.collection.count
which returns the length of the cursor. of the default query (if count()
is called with no query document), which in turn is implemented as returning the length of the _id_
index, iirc.
An aggregation, however, reads each and every document and processes it. This can only be halfway in the same order of magnitude with .count()
when doing it over only some 100k of documents (give and take according to your RAM).
Below function was applied to a collection with some 12M entries:
function checkSpeed(col,iterations){
// Get the collection
var collectionUnderTest = db[col];
// The collection we are writing our stats to
var stats = db[col+'STATS']
// remove old stats
stats.remove({})
// Prevent allocation in loop
var start = new Date().getTime()
var duration = new Date().getTime()
print("Counting with count()")
for (var i = 1; i <= iterations; i++){
start = new Date().getTime();
var result = collectionUnderTest.count()
duration = new Date().getTime() - start
stats.insert({"type":"count","pass":i,"duration":duration,"count":result})
}
print("Counting with aggregation")
for(var j = 1; j <= iterations; j++){
start = new Date().getTime()
var doc = collectionUnderTest.aggregate([{ $group:{_id: null, count:{ $sum: 1 } } }])
duration = new Date().getTime() - start
stats.insert({"type":"aggregation", "pass":j, "duration": duration,"count":doc.count})
}
var averages = stats.aggregate([
{$group:{_id:"$type","average":{"$avg":"$duration"}}}
])
return averages
}
And returned:
{ "_id" : "aggregation", "average" : 43828.8 }
{ "_id" : "count", "average" : 0.6 }
The unit is milliseconds.
hth
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With