I have the following document structure
{
"app_id": "DHJFK67JDSJjdasj909",
"date": ISODate("2014-08-07T00:00:00.000Z"),
"event_count": 100,
"events": [
{ "type": 0, "value": 12 },
{ "type": 10, "value": 24 },
{ "type": 20, "value": 36 },
{ "type": 30, "value": 43 }
],
"unique_events": [
{ "type": 0, "value": 5 },
{ "type": 10, "value": 8 },
{ "type": 20, "value": 12 },
{ "type": 30, "value": 56 }
]
}
I am trying to get a sum of event_counts and also the values for unique_events and events per type. This is the kind of output I am expecting where the event_count and each events and unique_events value has been summed per type.
{
"app_id": "DHJFK67JDSJjdasj909",
"date": ISODate("2014-08-07T00:00:00.000Z"),
"event_count": 4345,
"events": [
{ "type": 0, "value": 624 },
{ "type": 10, "value": 234 },
{ "type": 20, "value": 353 },
{ "type": 30, "value": 472 }
],
"unique_events": [
{ "type": 0, "value": 433 },
{ "type": 10, "value": 554 },
{ "type": 20, "value": 645 },
{ "type": 30, "value": 732 }
]
}
This is my query
db.events.aggregate([
{ "$unwind": "$events" },
{ "$group": {
"_id": {
"app_id": "$app_id",
"type": "$events.type"
"unique_type": "$unique_events.type"
},
"event_count": { "$sum": "$event_count" },
"event_value": { "$sum": "$events.value" },
"unique_event_value": { "$sum": "$unique_events.value" }
}},
{ "$group": {
"_id": "$_id.app_id",
"event_count": { "$sum": "$event_count" },
"events": { "$push": { "type": "$_id.type", "value": "$event_value" } }
"unique_events": { "$push": { "type": "$_id.unique_type", "value": "$unique_event_value" } }
}}
])
The problem is that using two $unwinds and then grouping by both events and unique_events is causing the $sum to be compounded and much too large. Is there some way I can fix this using mongo or do I have to run two queries and then merge the two result sets in code.
Thanks
Irfan
To unwind, use $unwind. The $unwind deconstructs an array field from the input documents to output a document for each element.
Db. collection. aggregate () can use several channels at the same time for data processing.
_id: This field is mandatory for grouping. If you specify the value of the _id field as null or a constant, the $group operator counts the accumulated values for all input documents as a whole.
This is simple enough really, to sum the results for each array it's just a matter of discerning between which is which and "combining the elements". In short you should probably be doing this in you documents anyway as should be evident from the first pipeline stage(s).
So for MongoDB 2.6 and greater there are some helper methods:
db.events.aggregate([
{ "$project": {
"app_id": 1,
"event_count": 1,
"all_events": {
"$setUnion": [
{ "$map": {
"input": "$events",
"as": "el",
"in": {
"type": "$$el.type",
"value": "$$el.value",
"class": { "$literal": "A" }
}
}},
{ "$map": {
"input": "$unique_events",
"as": "el",
"in": {
"type": "$$el.type",
"value": "$$el.value",
"class": { "$literal": "B" }
}
}}
]
}
}},
{ "$unwind": "$all_events" },
{ "$group": {
"_id": {
"app_id": "$app_id",
"class": "$all_events.class",
"type": "$all_events.type"
},
"event_count": { "$sum": "$event_count" },
"value": { "$sum": "$all_events.value" }
}},
{ "$group": {
"_id": "$_id.app_id",
"event_count": { "$sum": "$event_count" },
"events": {
"$push": {
"$cond": [
{ "$eq": [ "$_id.class", "A" ] },
{ "type": "$_id.type", "value": "$value" },
false
]
}
},
"unique_events": {
"$push": {
"$cond": [
{ "$eq": [ "$_id.class", "B" ] },
{ "type": "$_id.type", "value": "$value" },
false
]
}
}
}},
{ "$project": {
"event_count": 1,
"events": { "$setDifference": [ "$events", [false] ] },
"unique_events": {
"$setDifference": [ "$unique_events", [false] ]
}
}}
])
Mostly in the $setUnion
and $setDifference
operators. The other ccase is $map
, which processes arrays in place. The whole thing there is doing operations on arrays without the use of $unwind
. But those can of course be done in prior versions, it just takes a bit more work:
db.events.aggregate([
{ "$unwind": "$events" },
{ "$group": {
"_id": "$_id",
"app_id": { "$first": "$app_id" },
"event_count": { "$first": "$event_count" },
"events": {
"$push": {
"type": "$events.type",
"value": "$events.value",
"class": { "$const": "A" }
}
},
"unique_events": { "$first": "$unique_events" }
}},
{ "$unwind": "$unique_events" },
{ "$group": {
"_id": "$_id",
"app_id": { "$first": "$app_id" },
"event_count": { "$first": "$event_count" },
"events": { "$first": "$events" },
"unique_events": {
"$push": {
"type": "$unique_events.type",
"value": "$unique_events.value",
"class": { "$const": "B" }
}
}
}},
{ "$project": {
"app_id": 1,
"event_count": 1,
"events": 1,
"unique_events": 1,
"type": { "$const": [ "A","B" ] }
}},
{ "$unwind": "$type" },
{ "$unwind": "$events" },
{ "$unwind": "$unique_events" },
{ "$group": {
"_id": "$_id",
"app_id": { "$first": "$app_id" },
"event_count": { "$first": "$event_count" },
"all_events": {
"$addToSet": {
"$cond": [
{ "$eq": [ "$events.class", "$type" ] },
{
"type": "$events.type",
"value": "$events.value",
"class": "$events.class"
},
{
"type": "$unique_events.type",
"value": "$unique_events.value",
"class": "$unique_events.class"
}
]
}
}
}},
{ "$unwind": "$all_events" },
{ "$group": {
"_id": {
"app_id": "$app_id",
"class": "$all_events.class",
"type": "$all_events.type"
},
"event_count": { "$sum": "$event_count" },
"value": { "$sum": "$all_events.value" }
}},
{ "$group": {
"_id": "$_id.app_id",
"event_count": { "$sum": "$event_count" },
"events": {
"$push": {
"$cond": [
{ "$eq": [ "$_id.class", "A" ] },
{ "type": "$_id.type", "value": "$value" },
false
]
}
},
"unique_events": {
"$push": {
"$cond": [
{ "$eq": [ "$_id.class", "B" ] },
{ "type": "$_id.type", "value": "$value" },
false
]
}
}
}},
{ "$unwind": "$events" },
{ "$match": { "events": { "$ne": false } } },
{ "$group": {
"_id": "$_id",
"event_count": { "$first": "$event_count" },
"events": { "$push": "$events" },
"unique_events": { "$first": "$unique_events" }
}},
{ "$unwind": "$unique_events" },
{ "$match": { "unique_events": { "$ne": false } } },
{ "$group": {
"_id": "$_id",
"event_count": { "$first": "$event_count" },
"events": { "$first": "$events" },
"unique_events": { "$push": "$unique_events" }
}}
])
That gets you the results you want with each array being "summed" together as well as the master "event_count" with the correct result.
You probably should consider combining both of those arrays with a similar identifier to what has been used in the pipelines as demonstrated. This part is half of the work. The other half is considering that you probably should store pre-aggregated results in a collection somewhere for the best application performance.
You can perform Following Map reduce:
Its not a dynamic solution, i have created variable
for each events
and unique_events
.
I have saved output in different collection
using out: "session_stat"
in mapReduce
Function.
var mapFunction = function() {
var key = this.app_id;
var value = {
event_count: this.event_count,
events: this.events,
unique_events: this.unique_events
};
emit( key, value );
};
var reduceFunction = function(key, values) {
var reducedObject = {
app_id: key,
events_wise_total: 0,
unique_events_wise_total:0,
total_event_count:0
};
var events_wise_total = [];
var events_0_total = { type:0, value :0};
var events_10_total = { type:10, value :0};
var events_20_total = { type:20, value :0};
var events_30_total = { type:30, value :0};
var unique_events_wise_total = [];
var unique_events_0_total = { type:0, value :0};
var unique_events_10_total = { type:10, value :0};
var unique_events_20_total = { type:20, value :0};
var unique_events_30_total = { type:30, value :0};
var total_event_count = 0;
values.forEach( function(value) {
total_event_count += value.event_count;
var events = value.events;
events.forEach(function(event){
if(event.type == 0){events_0_total.value += event.value;}
if(event.type == 10){events_10_total.value += event.value;}
if(event.type == 20){events_20_total.value += event.value;}
if(event.type == 30){events_30_total.value += event.value;}
});
var unique_events = value.unique_events;
unique_events.forEach(function(unique_event){
if(unique_event.type == 0){unique_events_0_total.value += unique_event.value;}
if(unique_event.type == 10){unique_events_10_total.value += unique_event.value;}
if(unique_event.type == 20){unique_events_20_total.value += unique_event.value;}
if(unique_event.type == 30){unique_events_30_total.value += unique_event.value;}
});
}
);
events_wise_total.push(events_0_total);
events_wise_total.push(events_10_total);
events_wise_total.push(events_20_total);
events_wise_total.push(events_30_total);
unique_events_wise_total.push(unique_events_0_total);
unique_events_wise_total.push(unique_events_10_total);
unique_events_wise_total.push(unique_events_20_total);
unique_events_wise_total.push(unique_events_30_total);
reducedObject.events_wise_total = events_wise_total;
reducedObject.unique_events_wise_total = unique_events_wise_total;
reducedObject.total_event_count = total_event_count;
return reducedObject;
};
var finalizeFunction = function (key, reducedValue) {
return reducedValue;
};
db.GroupBy.mapReduce(
mapFunction,
reduceFunction,
{
out: "session_stat",
finalize: finalizeFunction
});
Hope it may be useful
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With