Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

MongoDB aggreagte fill missing days [duplicate]

I have a product collection with the following documents:

{ "_id" : 1, "item" : "abc", created: ISODate("2014-10-01T08:12:00Z") }
{ "_id" : 2, "item" : "jkl", created: ISODate("2014-10-02T09:13:00Z") }
{ "_id" : 3, "item" : "hjk", created: ISODate("2014-10-02T09:18:00Z") }
{ "_id" : 4, "item" : "sdf", created: ISODate("2014-10-07T09:14:00Z") }
{ "_id" : 5, "item" : "xyz", created: ISODate("2014-10-15T09:15:00Z") }
{ "_id" : 6, "item" : "iop", created: ISODate("2014-10-16T09:15:00Z") }

I want to draw a chart describing product count by day, so I use mongodb aggregation framework to count product group by day:

  var proj1 = {
      "$project": {
          "created": 1,
          "_id": 0,
          "h": {"$hour": "$created"},
          "m": {"$minute": "$created"},
          "s": {"$second": "$created"},
          "ml": {"$millisecond": "$created"}
      }
  };

  var proj2 = {
      "$project": {
          "created": {
              "$subtract": [
                  "$created", {
                      "$add": [
                          "$ml",
                          {"$multiply": ["$s", 1000]},
                          {"$multiply": ["$m", 60, 1000]},
                          {"$multiply": ["$h", 60, 60, 1000]}
                      ]
                  }]
          }
      }
  };

  db.product.aggregate([
          proj1,
          proj2,
          {$group: {
              _id: "$created",
              count: {$sum: 1}
          }},
          {$sort: {_id: 1}}
      ])

The result in mongo shell is:

{
    "result" : [ 
        {
            "_id" : ISODate("2014-10-01T00:00:00.000Z"),
            "count" : 1
        }, 
        {
            "_id" : ISODate("2014-10-02T00:00:00.000Z"),
            "count" : 2
        }, 
        {
            "_id" : ISODate("2014-10-07T00:00:00.000Z"),
            "count" : 1
        }, 
        {
            "_id" : ISODate("2014-10-15T00:00:00.000Z"),
            "count" : 1
        }, 
        {
            "_id" : ISODate("2014-10-16T00:00:00.000Z"),
            "count" : 1
        }
    ],
    "ok" : 1
}

Of course, there is no product some days and the chart using the result set above looks like this:

enter image description here

But the desired chart should look like this:

desired output

So the question is: How can I add missing days (of the last 30 days, for example) to the result set with count = 0? That means, the desired result set should looks like this:

{
    "result" : [
        {
            "_id" : ISODate("2014-09-16T00:00:00.000Z"),
            "count" : 0
        }, 
        {
            "_id" : ISODate("2014-09-17T00:00:00.000Z"),
            "count" : 0
        }, 
        ...            
        {
            "_id" : ISODate("2014-10-01T00:00:00.000Z"),
            "count" : 1
        }, 
        {
            "_id" : ISODate("2014-10-02T00:00:00.000Z"),
            "count" : 2
        }, 
        {
            "_id" : ISODate("2014-10-03T00:00:00.000Z"),
            "count" : 0
        },
        ...
        {
            "_id" : ISODate("2014-10-07T00:00:00.000Z"),
            "count" : 1
        }, 
        {
            "_id" : ISODate("2014-09-08T00:00:00.000Z"),
            "count" : 0
        },
        ...
        {
            "_id" : ISODate("2014-10-15T00:00:00.000Z"),
            "count" : 1
        }, 
        {
            "_id" : ISODate("2014-10-16T00:00:00.000Z"),
            "count" : 1
        },
        // also, add some extra days 
        {
            "_id" : ISODate("2014-10-17T00:00:00.000Z"),
            "count" : 0
        },
        {
            "_id" : ISODate("2014-10-10T00:00:00.000Z"),
            "count" : 0
        }
    ],
    "ok" : 1
}
like image 823
Đức Nguyễn Avatar asked Oct 16 '14 14:10

Đức Nguyễn


People also ask

How do I subtract two dates in MongoDB?

In MongoDB, you can use the $subtract aggregation pipeline operator to subtract numbers and/or dates. Specifically, $subtract can do the following three things: Subtract two numbers to return the difference. Subtract a number (in milliseconds) from a date and return the resulting date.

What are the differences between using aggregate () and find () in MongoDB?

With aggregate + $match, you get a big monolithic BSON containing all matching documents. With find, you get a cursor to all matching documents. Then you can get each document one by one.

Is MongoDB aggregation fast?

On large collections of millions of documents, MongoDB's aggregation was shown to be much worse than Elasticsearch. Performance worsens with collection size when MongoDB starts using the disk due to limited system RAM. The $lookup stage used without indexes can be very slow.

What is accumulator in MongoDB?

Accumulators are operators that maintain their state (e.g. totals, maximums, minimums, and related data) as documents progress through the pipeline. Use the $accumulator operator to execute your own JavaScript functions to implement behavior not supported by the MongoDB Query Language.


2 Answers

Using aggregate to handle this question completely is a pain.
But it can be reached.
(MongoDB V2.6+ required)

var proj1 = {
    "$project" : {
        "created" : 1,
        "_id" : 0,
        "h" : {
            "$hour" : "$created"
        },
        "m" : {
            "$minute" : "$created"
        },
        "s" : {
            "$second" : "$created"
        },
        "ml" : {
            "$millisecond" : "$created"
        }
    }
};

var proj2 = {
    "$project" : {
        "created" : {
            "$subtract" : [ "$created", {
                "$add" : [ "$ml", {
                    "$multiply" : [ "$s", 1000 ]
                }, {
                    "$multiply" : [ "$m", 60, 1000 ]
                }, {
                    "$multiply" : [ "$h", 60, 60, 1000 ]
                } ]
            } ]
        }
    }
};

var group1 = {
        $group : {
            _id : "$created",
            count : {
                $sum : 1
            }
        }
    };

var group2 = {
        $group : {
            _id : 0,
            origin : {
                $push : "$$ROOT"
            },
            maxDate : {
                $max : "$_id"
            }
        }
};

var step = 24 * 60 * 60 * 1000; // milliseconds of one day

var project3 = {
    $project : {
        origin : 1,
        extents : {
            $map : {
                "input" : [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29],
                "as" : "e",
                "in" : {
                    _id : {
                        $subtract : [ "$maxDate", {
                            $multiply : [ step, "$$e"]
                        }]
                    },
                    count : {
                        $add : [0]
                    }
                }
            }
        }
    }
};

var project4 = {
        $project : {
            _id : 0,
            values : {
                $setUnion : [ "$origin", "$extents"]
            }
        }
};

var unwind1 = {
        $unwind : "$values"
};

var group3 = {
        $group : {
            _id : "$values._id",
            count : {
                $max : "$values.count"
            }
        }
};

db.product.aggregate([ proj1, proj2, group1, group2, project3, project4,
        unwind1, group3, {
            $sort : {
                _id : 1
            }
        } ]);

I would like to fill the missing part at application end something like this for your reference:

function sortResult(x, y) {
    var t1 = x._id.getTime();
    var t2 = y._id.getTime();
    if (t1 < t2) {
        return -1;
    } else if (t1 == t2) {
        return 0;
    } else {
        return 1;
    }
}

var result = db.product.aggregate();

var endDateMilliseconds = result[result.length - 1]._id.getTime();
var step = 24 * 60 * 60 * 1000; // milliseconds of one day

var map = {};
for (var i in result) {
    map[ result[i]._id.getTime() ] = result[i];
}

for (var ms = endDateMilliseconds, x = 1; x < 30; x++) {
    ms -= step;
    if ( ! ( ms in map ) ) {
        map[ms] = {_id : new Date(ms), count : 0};
    }
}

var finalResult = [];
for (var x in map) {
    finalResult.push(map[x]);
}
finalResult.sort(sortResult);
printjson(finalResult);
like image 103
Wizard Avatar answered Oct 21 '22 17:10

Wizard


Ok, first of all: Non-existing values are evaluated to null (roughly translates to "nada", "nothing", "not there"), which isn't equal to 0, which is a well defined value.

MongoDB has no semantical understanding of the difference between 0 and 42, for example. So how should MongoDB decide which value to assume for a day in the time (of which mongo has no semantical understanding, too)?

Basically, you have two choices: save a 0 for each day when no value is to record or you iterate in your app over the days in the time you want to create a chart for and issue 0 for each day no value exists as a substitute. Id' suggest doing the former, since that would make it possible using the aggregation framework.

like image 23
Markus W Mahlberg Avatar answered Oct 21 '22 15:10

Markus W Mahlberg