I'm attempting to write a query to return the top X terms across each category - e.g. top 5, top 10 etc. Each term has an associated category, and based up on some help from another stackoverflow question I've managed to get this:
db.collection.aggregate([
{
$group : {
_id : {
category: "$uri.category",
term: "$uri.term",
},
total: { $sum : 1 }
}
},
{ $sort : { total : -1 } },
{
$group : {
_id : "$_id.category",
terms: {
$push: {
term: "$_id.term",
total: "$total"
}
}
}
}
]);
The above query does work, and returns data that looks something like this:
[
{ category: "movies",
terms: [ { term: "movie 1", total: 5000 }, { term: "movie 2", total: 200 } ... ]
},
{ category: "sports",
terms: [ { term: "football 1", total: 4000 }, { term: "tennis 2", total: 250 } ... ]
},
]
However I'm trying to limit the terms array to a fixed number i.e. 5 or 10 - this will correspond to the X number of searches per category. I've been trying various options such as adding $slice within the $push to reduce the terms array down with no success.
Can this be achieved using the aggregate framework, or should I look at another approach?
As of MongoDb version 3.1.6 you can now slice on the $project stage:
{
$project: {
terms: {
$slice: ["$terms", 0, 10]
}
}
}
If you wanted to limit the number of items $pushed to 10.
Here's the issue: https://jira.mongodb.org/browse/SERVER-6074
It seems as of Mongodb 2.6, the ability to limit the size of an array using $slice or $push with the .aggregate() function/command is unsupported.
Here's the feature request on the MongoDb issue tracker.
What I would do is output the aggregated result to an collection. Then update the collection.
use test;
var rInt = function(x) {
return 1 + ~~(Math.random() * x);
};
var rObj = function() {
return {
"timestamp": new Date(),
"category": "movies" + rInt(5),
"term": "my movie" + rInt(20)
}
};
for (var i = 0, l = 100; i < l; i++) {
db.al.insert(rObj());
}
db.al_out.drop();
db.al.aggregate([
{
$group : {
_id : {
category: "$category",
term: "$term",
},
total: { $sum : 1 }
}
},
{ $sort : { total : -1 } },
{
$group : {
_id : "$_id.category",
terms: {
$push: {
term: "$_id.term",
total: "$total"
}
}
}
}
,{ $out : "al_out" } // output the documents to `db.al_out`
]);
// limit the size of terms to 3 elements.
db.al_out.update( {}, {
$push : {
terms : {
$each : [],
$slice : 3
}
}
}, {
multi:true
});
db.al_out.find();
{ "_id" : "movies1", "terms" : [ { "term" : "my movie7", "total" : 3 }, { "term" : "my movie6", "total" : 3 }, { "term" : "my movie17", "total" : 2 } ] }
{ "_id" : "movies2", "terms" : [ { "term" : "my movie3", "total" : 4 }, { "term" : "my movie11", "total" : 2 }, { "term" : "my movie2", "total" : 2 } ] }
{ "_id" : "movies4", "terms" : [ { "term" : "my movie9", "total" : 3 }, { "term" : "my movie1", "total" : 3 }, { "term" : "my movie7", "total" : 2 } ] }
{ "_id" : "movies3", "terms" : [ { "term" : "my movie19", "total" : 5 }, { "term" : "my movie8", "total" : 4 }, { "term" : "my movie14", "total" : 4 } ] }
{ "_id" : "movies5", "terms" : [ { "term" : "my movie7", "total" : 6 }, { "term" : "my movie17", "total" : 4 }, { "term" : "my movie3", "total" : 2 } ] }
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With