MongoDB - $unwind - how to unwind more than one array in a $group aggregation

Tags:

(4) However, I need to perform an unwind on 'pathStats_xxx_api_get_response' (from the collection) in the same aggregation above so that I can have totalResponses, totalResponsesCount, avgResponse, minResponse and maxResponse output in the same result above. Hence, my result should look something like this:

{
"result" : [
    {
        "_id" : "07-04-2012:10AM",
        "totalHits" : 54,
        "totalHitsCount" : 20,
        "avgHit" : 2.7,
        "minHit" : 1,
        "maxHit" : 5,
                    "totalResponses" : ??
                    "totalResponsesCount": ??
        "avgResponse" : 2.7,
        "minResponse" : 1,
        "maxResponse" : 5
    }
],
"ok" : 1

}

Not sure exactly how to add more $unwind in the same aggregation as i am almost there!

668

asked Apr 16 '12 00:04

sam0673

2 Answers

How to $unwind more than one array? Have you tried $unwinding multiple times? :)

db.newStats.aggregate([
    {$unwind: "$hitsPerOneSecond"},
    {$unwind: "$pathStats_xxx_api_get_response"},

    {$group:{
        _id:"$startTimeStr", 
        totalHits: {$sum: "$hitsPerOneSecond"}, 
        totalHitsCount: {$sum: 1}, 
        avgHit: {$avg: "$hitsPerOneSecond"}, 
        minHit: {$min:"$hitsPerOneSecond"}, 
        maxHit:{$max: "$hitsPerOneSecond"},

        totalResponses: {$sum: "$pathStats_xxx_api_get_response"},
        . . .
     }}
]);

Remember that the aggregate framework takes an array as an input (note that I added [,]). In the array you can add to the pipeline as many aggregate functions as you want (citation needed) and the output of any step will be the input of the next one!

NOTE:

Don't forget that if you try to $unwind on a not existing key or on an empty array you end up with no document at all! It's like multiplying by 0 I guess... So with multiple (possibly many) $unwind, the chances of dealing with ghosts increase: If any of the involved arrays is empty, the whole document gets lost and you get nothing for any of your $group aggregations...

answered Sep 28 '22 11:09

Marsellus Wallace

Possibly the easiest solution is to do this with two seperate aggregation operations and combine the results in your application.

Alternatively, you could do this with a Map Reduce operation:

The following map and reduce functions should provide the results you are looking for:

var map = function() {
  var totalHits = this.hitsPerOneSecond.map(function(a,b){return a+b;});
  var totalHitsCount = this.hitsPerOneSecond.length;
  var avgHit = totalHits / totalHitsCount;
  var minHit = Math.min.apply(Math, this.hitsPerOneSecond);
  var maxHit = Math.max.apply(Math, this.hitsPerOneSecond);
  var totalResponses = pathStats_xxx_api_get_response.map(function(a,b){return a+b;});
  var totalResponsesCount = this.pathStats_xxx_api_get_response.length;
  var avgResponse = totalResponses / totalResponsesCount;
  var minResponse = Math.min.apply(Math, this.pathStats_xxx_api_get_response);
  var maxResponse = Math.max.apply(Math, this.pathStats_xxx_api_get_response);
  emit(this.startTimeStr, {
    "totalHits": totalHits,
    "totalHitsCount": totalHitsCount,
    "avgHit": avgHit,
    "minHit": minHit,
    "maxHit": maxHit,
    "totalResponses": totalResponses,
    "totalResponsesCount": totalResponsesCount,
    "avgResponse": avgResponse,
    "maxResponse": maxResponse,
    "minResponse": minResponse
  })
}

var reduce = function(key, values) {
  var output = {
    "totalHits": 0,
    "totalHitsCount": 0,
    "avgHit": 0,
    "minHit": null,
    "maxHit": null,
    "totalResponses": 0,
    "totalResponsesCount": 0,
    "avgResponse": 0,
    "maxResponse": null,
    "minResponse": null
  };
  values.forEach(function(v) {
    output.totalHits += v.totalHits;
    output.totalHitsCount += v.totalHitsCount;
    output.avgHit = output.totalHits / output.totalHitsCount;
    if (output.minHit == null) {
      output.minHit = v.minHit;
    } else {
      if (v.minHit < output.minHit) {
        output.minHit = v.minHit
      }
    }
    if (output.maxHit == null) {
      output.maxHit = v.maxHit;
    } else {
      if (v.maxHit > output.maxHit) {
        output.maxHit = v.maxHit
      }
    }

    output.totalResponses += v.totalResponses;
    output.totalResponsesCount += v.totalResponsesCount;
    output.avgResponse = output.totalResponses / output.totalResponsesCount;
    if (output.minResponse == null) {
      output.minResponse = v.minResponse;
    } else {
      if (v.minResponse < output.minResponse) {
        output.minResponse = v.minResponse
      }
    }
    if (output.maxResponse == null) {
      output.maxResponse = v.maxResponse;
    } else {
      if (v.maxResponse > output.maxResponse) {
        output.maxResponse = v.maxResponse
      }
    }
  });
  return output;
}

> db.newStats.mapReduce(map, reduce, {out:{inline:1}})
{
    "results" : [
        {
            "_id" : "07-04-2012:10AM",
            "value" : {
                "totalHits" : 54,
                "totalHitsCount" : 20,
                "avgHit" : 2.7,
                "minHit" : 1,
                "maxHit" : 5,
                "totalResponses" : 7.523893102462698,
                "totalResponsesCount" : 6,
                "avgResponse" : 1.253982183743783,
                "maxResponse" : 1.4853219936411421,
                "minResponse" : 1.0602539963494662
            }
        }
    ],
    "timeMillis" : 0,
    "counts" : {
        "input" : 2,
        "emit" : 2,
        "reduce" : 1,
        "output" : 1
    },
    "ok" : 1,
}
>

If you are unfamiliar with Map Reduce, the documentation may be found here: http://www.mongodb.org/display/DOCS/MapReduce

Additionally, there are some good Map Reduce examples in the MongoDB Cookbook: http://cookbook.mongodb.org/

The "Extras" section of the cookbook article "Finding Max And Min Values with Versioned Documents" http://cookbook.mongodb.org/patterns/finding_max_and_min/ contains a good step-by-step walkthrough of a Map Reduce operation, explaining how the functions are executed.

Hopefully this will help you to achieve your desired results. If you are able to figure out a way to do this with a single aggregation operation, please share your solution, so that the Community may gain the benefit of your experience. Thanks.

Here are a few notes on Map Reduce, in response to your comment:

MapReduce executes JavaScript on the server. As a result, you may find that performance suffers for other operations. Map Reduce is good for once-in-a-while operations that may be done at a time when the server is not at its peak traffic. You may find that using Map Reduce for on-the-fly stats from a large collection is not optimal.

The aggregation framework, on the other hand, relies on native code and does not execute server side JavaScript, making it faster than Map Reduce.

If possible, the best option is to add fields to each document that can be queried on. This adds a little extra overhead to each insert or update, but the results will be returned much more quickly if a Map Reduce operation can be avoided. Unfortunately, this is difficult with Maximum and Minimum values and averages.

If a Map Reduce operation is the only option, there are a few things that can be done to mitigate its impact on the server. Firstly, it is possible to run a Map Reduce on a secondary with SlaveOk. However, because data cannot be written to a secondary, the output must be returned inline, and is therefore limited to 16MB. Some users will take a secondary out of the replica set, restart it as a stand-alone mongod process, run the map-reduce operation on it, copy the output collection wherever it needs to go, and rejoin the secondary to the repica set.

One final thing to consider is incremental Map Reduce: http://www.mongodb.org/display/DOCS/MapReduce#MapReduce-IncrementalMapreduce You can pass a query to the map reduce command that will only match documents that have been modified since the last map reduce, and run the map reduce operation with the reduce output option.

Hopefully the above will give you some food for thought concerning the best way to calculate your statistics. Including the desired information in the documents is preferable, but if that is not possible, using the Aggregation Framework will be more efficient than Map Reduce.

Here is a note on the Aggregation Framework and pymongo, in response to the second comment:

The aggregation framework may be used in pymongo with the command method of the database object.
The documentation on the command method may be found here: http://api.mongodb.org/python/current/api/pymongo/database.html#pymongo.database.Database.command

To perform an aggregation operation, pass a document to the command method with two keys; "aggregate" and "pipeline". The value of "aggregate" is the name of the collection that the operation will be performed on, and the value of "pipeline" will be an array of the aggregation operations to be performed. Pipelines are explained in the "Aggregation Framework" documentation: http://www.mongodb.org/display/DOCS/Aggregation+Framework#AggregationFramework-Pipelines

Here is an example of how you may perform the $unwind operation in pymongo:

In [1]: import pymongo

In [2]: conn = pymongo.Connection()

In [3]: db = conn.test

In [4]: result = db.command({"aggregate":"newStats", "pipeline":
                            [{"$unwind": "$hitsPerOneSecond"},
                             {"$group": {"_id":"$startTimeStr", 
                                          "totalHits": {"$sum": 
                                          "$hitsPerOneSecond"}, 
                              "totalHitsCount": {"$sum": 1}, 
                              "avgHit": {"$avg": "$hitsPerOneSecond"}, 
                              "minHit": {"$min":"$hitsPerOneSecond"}, 
                              "maxHit":{"$max": "$hitsPerOneSecond"}}}]})

In [5]: result
Out[5]: 
{u'ok': 1.0,
 u'result': [{u'_id': u'07-04-2012:10AM',
   u'avgHit': 2.7,
   u'maxHit': 5.0,
   u'minHit': 1.0,
   u'totalHits': 54.0,
   u'totalHitsCount': 20}]}

answered Sep 28 '22 11:09

Marc

Related questions
                            
                                setting up fake data in mongodb for testing
                            
                                Query by relevance with different weight
                            
                                Mongodb Aggregate a $slice to get an element in exact position from a nested array
                            
                                mongodb - mongodump is not defined
                            
                                Query MongoDB search only in specific hours [duplicate]
                            
                                Retrieve last document in a MongoDB using Pymongo and Flask
                            
                                Class 'MongoId' not found in Laradock application
                            
                                Cannot connect to a mongodb service in a Kubernetes cluster
                            
                                Java Future - Spring Authentication is null into AuditorAware
                            
                                Number of collections in mongodb
                            
                                express middleware to modify requests
                            
                                C# MongoDb running an aggregate query directly from JSON
                            
                                how to fix: 'MongoError: authentication fail' @MongoDB Atlas
                            
                                mongoexport - issue with JSON query (extended JSON - Invalid JSON input)
                            
                                Does the mongocli support a formula URL for Apple M1 chips?
                            
                                MongoDb via jndi
                            
                                MongoDB query based on count of embedded document
                            
                                How can you tell if a collection is capped?
                            
                                Bulk insert from Array in mongodb JavaScript console
                            
                                MongoDB in PHP - How do I insert items into an array in a collection?

Donate For Us

If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!

Donate Us With

MongoDB - $unwind - how to unwind more than one array in a $group aggregation

Tags:

mongodb

pymongo

sam0673

People also ask

2 Answers

Marsellus Wallace

Marc

Recent Activity

Donate For Us