I have a product collection with the following documents:
{ "_id" : 1, "item" : "abc", created: ISODate("2014-10-01T08:12:00Z") }
{ "_id" : 2, "item" : "jkl", created: ISODate("2014-10-02T09:13:00Z") }
{ "_id" : 3, "item" : "hjk", created: ISODate("2014-10-02T09:18:00Z") }
{ "_id" : 4, "item" : "sdf", created: ISODate("2014-10-07T09:14:00Z") }
{ "_id" : 5, "item" : "xyz", created: ISODate("2014-10-15T09:15:00Z") }
{ "_id" : 6, "item" : "iop", created: ISODate("2014-10-16T09:15:00Z") }
I want to draw a chart describing product count by day, so I use mongodb aggregation framework to count product group by day:
var proj1 = {
"$project": {
"created": 1,
"_id": 0,
"h": {"$hour": "$created"},
"m": {"$minute": "$created"},
"s": {"$second": "$created"},
"ml": {"$millisecond": "$created"}
}
};
var proj2 = {
"$project": {
"created": {
"$subtract": [
"$created", {
"$add": [
"$ml",
{"$multiply": ["$s", 1000]},
{"$multiply": ["$m", 60, 1000]},
{"$multiply": ["$h", 60, 60, 1000]}
]
}]
}
}
};
db.product.aggregate([
proj1,
proj2,
{$group: {
_id: "$created",
count: {$sum: 1}
}},
{$sort: {_id: 1}}
])
The result in mongo shell is:
{
"result" : [
{
"_id" : ISODate("2014-10-01T00:00:00.000Z"),
"count" : 1
},
{
"_id" : ISODate("2014-10-02T00:00:00.000Z"),
"count" : 2
},
{
"_id" : ISODate("2014-10-07T00:00:00.000Z"),
"count" : 1
},
{
"_id" : ISODate("2014-10-15T00:00:00.000Z"),
"count" : 1
},
{
"_id" : ISODate("2014-10-16T00:00:00.000Z"),
"count" : 1
}
],
"ok" : 1
}
Of course, there is no product some days and the chart using the result set above looks like this:
But the desired chart should look like this:
So the question is: How can I add missing days (of the last 30 days, for example) to the result set with count = 0
? That means, the desired result set should looks like this:
{
"result" : [
{
"_id" : ISODate("2014-09-16T00:00:00.000Z"),
"count" : 0
},
{
"_id" : ISODate("2014-09-17T00:00:00.000Z"),
"count" : 0
},
...
{
"_id" : ISODate("2014-10-01T00:00:00.000Z"),
"count" : 1
},
{
"_id" : ISODate("2014-10-02T00:00:00.000Z"),
"count" : 2
},
{
"_id" : ISODate("2014-10-03T00:00:00.000Z"),
"count" : 0
},
...
{
"_id" : ISODate("2014-10-07T00:00:00.000Z"),
"count" : 1
},
{
"_id" : ISODate("2014-09-08T00:00:00.000Z"),
"count" : 0
},
...
{
"_id" : ISODate("2014-10-15T00:00:00.000Z"),
"count" : 1
},
{
"_id" : ISODate("2014-10-16T00:00:00.000Z"),
"count" : 1
},
// also, add some extra days
{
"_id" : ISODate("2014-10-17T00:00:00.000Z"),
"count" : 0
},
{
"_id" : ISODate("2014-10-10T00:00:00.000Z"),
"count" : 0
}
],
"ok" : 1
}
In MongoDB, you can use the $subtract aggregation pipeline operator to subtract numbers and/or dates. Specifically, $subtract can do the following three things: Subtract two numbers to return the difference. Subtract a number (in milliseconds) from a date and return the resulting date.
With aggregate + $match, you get a big monolithic BSON containing all matching documents. With find, you get a cursor to all matching documents. Then you can get each document one by one.
On large collections of millions of documents, MongoDB's aggregation was shown to be much worse than Elasticsearch. Performance worsens with collection size when MongoDB starts using the disk due to limited system RAM. The $lookup stage used without indexes can be very slow.
Accumulators are operators that maintain their state (e.g. totals, maximums, minimums, and related data) as documents progress through the pipeline. Use the $accumulator operator to execute your own JavaScript functions to implement behavior not supported by the MongoDB Query Language.
Using aggregate to handle this question completely is a pain.
But it can be reached.
(MongoDB V2.6+ required)
var proj1 = {
"$project" : {
"created" : 1,
"_id" : 0,
"h" : {
"$hour" : "$created"
},
"m" : {
"$minute" : "$created"
},
"s" : {
"$second" : "$created"
},
"ml" : {
"$millisecond" : "$created"
}
}
};
var proj2 = {
"$project" : {
"created" : {
"$subtract" : [ "$created", {
"$add" : [ "$ml", {
"$multiply" : [ "$s", 1000 ]
}, {
"$multiply" : [ "$m", 60, 1000 ]
}, {
"$multiply" : [ "$h", 60, 60, 1000 ]
} ]
} ]
}
}
};
var group1 = {
$group : {
_id : "$created",
count : {
$sum : 1
}
}
};
var group2 = {
$group : {
_id : 0,
origin : {
$push : "$$ROOT"
},
maxDate : {
$max : "$_id"
}
}
};
var step = 24 * 60 * 60 * 1000; // milliseconds of one day
var project3 = {
$project : {
origin : 1,
extents : {
$map : {
"input" : [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29],
"as" : "e",
"in" : {
_id : {
$subtract : [ "$maxDate", {
$multiply : [ step, "$$e"]
}]
},
count : {
$add : [0]
}
}
}
}
}
};
var project4 = {
$project : {
_id : 0,
values : {
$setUnion : [ "$origin", "$extents"]
}
}
};
var unwind1 = {
$unwind : "$values"
};
var group3 = {
$group : {
_id : "$values._id",
count : {
$max : "$values.count"
}
}
};
db.product.aggregate([ proj1, proj2, group1, group2, project3, project4,
unwind1, group3, {
$sort : {
_id : 1
}
} ]);
I would like to fill the missing part at application end something like this for your reference:
function sortResult(x, y) {
var t1 = x._id.getTime();
var t2 = y._id.getTime();
if (t1 < t2) {
return -1;
} else if (t1 == t2) {
return 0;
} else {
return 1;
}
}
var result = db.product.aggregate();
var endDateMilliseconds = result[result.length - 1]._id.getTime();
var step = 24 * 60 * 60 * 1000; // milliseconds of one day
var map = {};
for (var i in result) {
map[ result[i]._id.getTime() ] = result[i];
}
for (var ms = endDateMilliseconds, x = 1; x < 30; x++) {
ms -= step;
if ( ! ( ms in map ) ) {
map[ms] = {_id : new Date(ms), count : 0};
}
}
var finalResult = [];
for (var x in map) {
finalResult.push(map[x]);
}
finalResult.sort(sortResult);
printjson(finalResult);
Ok, first of all: Non-existing values are evaluated to null (roughly translates to "nada", "nothing", "not there"), which isn't equal to 0, which is a well defined value.
MongoDB has no semantical understanding of the difference between 0 and 42, for example. So how should MongoDB decide which value to assume for a day in the time (of which mongo has no semantical understanding, too)?
Basically, you have two choices: save a 0 for each day when no value is to record or you iterate in your app over the days in the time you want to create a chart for and issue 0 for each day no value exists as a substitute. Id' suggest doing the former, since that would make it possible using the aggregation framework.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With