MongoDB aggregate framework very slow when have group

Tags:

I am trying to make an aggregate query with "group" to get the total of results.

The total of "requested_items" (my results) are +- 1.900.000.

If I execute with "group", the query is very slow (+- 300sec).

If I execute without "group", the query is very fast (+- 1sec).

What am I doing wrong?

The sample code is below.

SLOW QUERY

db.minute.aggregate([
    { $match: {
        $and: [
            { "status": "Homologado" },
            { "requested_items.status": /aceito/i },
        ]
    } },
    { $sort: {'_id': 1}},
    { $unwind: "$requested_items" },
    { $unwind: "$requested_items.winner" },
    { $match: {
        $and: [
            { "status": "Homologado" },
            { "requested_items.status": /aceito/i },
        ]
    } },
    { $project: {
        "_id": 1
    } },
    { $group: {
        "_id" : null,
        "total" : {$sum: 1},
    } },
], {allowDiskUse: true});

FAST QUERY

db.minute.aggregate([
    { $match: {
        $and: [
            { "status": "Homologado" },
            { "requested_items.status": /aceito/i },
        ]
    } },
    { $sort: {'_id': 1}},
    { $unwind: "$requested_items" },
    { $unwind: "$requested_items.winner" },
    { $match: {
        $and: [
            { "status": "Homologado" },
            { "requested_items.status": /aceito/i },
        ]
    } },
    { $project: {
        "_id": 1
    } },
], {allowDiskUse: true});

DB STRUCTURE

{
    "_id" : "12345678ABCD",
    "field_1" : [ 
        {
            "a" : null,
            "b" : "ABC"
        }, 
        {
            "code" : null,
            "b" : "ABCD"
        }
    ],
    "status" : "Homologado",
    "initial_date" : ISODate("2016-05-24T11:31:00.000Z"),
    "field_2" : [ 
        {
            "a" : "ABC",
            "b" : "ABCDE"
        }, 
        {
            "a" : "ABCF",
            "b" : "ABCDEF"
        }
    ],
    "field_3" : "Lorem ipsum dolor sit amet...",
    "field_4" : [ 
        {
            "date" : ISODate("2016-05-24T13:54:48.000Z"),
            "a" : "Text",
            "b" : "More text..."
        }
    ],
    "field_4" : 12312321,
    "field_5" : ISODate("2016-05-24T13:55:00.000Z"),
    "field_6" : "ABCD",
    "requested_items" : [ 
        {
            "status" : " Aceito e Habilitado",
            "field_a" : "Text...",
            "winner" : [ 
                {
                    "a" : "23213.213213.23/232-23",
                    "b" : 130446,
                    "c" : 543223,
                    "d" : NumberLong(2),
                    "e" : "ABC 123 FULANO",
                    "f" : "text",
                    "g" : {
                        "description" : "TEXT TEXT TEXT"
                    }
                },
                {
                    "a" : "23213.213213.23/232-23",
                    "b" : 130446,
                    "c" : 543223,
                    "d" : NumberLong(2),
                    "e" : "ABC 123 FULANO",
                    "f" : "text",
                    "g" : {
                        "description" : "TEXT TEXT TEXT"
                    }
                }
            ],
            "field_c" : {
                "_id" : ObjectId("5744dd3271af88052f0cc343"),
                "a" : "TEXT",
                "b" : "TEXT"
            },
            "field_d" : NumberLong(2),
            "field_e" : 5223,
            "field_f" : "Não",
            "field_g" : "-",
            "field_h" : {
                "field_a1" : [ 
                    {
                        "a" : "23213.213213.23/232-23",
                        "b" : ISODate("2016-05-23T23:54:21.000Z"),
                        "c" : 103432446,
                        "d" : 522343,
                        "e" : "Sim",
                        "f" : NumberLong(2),
                        "g" : "TEXT TEXT TEXT",
                        "h" : "Sim",
                        "i" : {
                            "a" : "TEXT TEXT TEXT"
                        }
                    }, 
                    {
                        "a" : "23213.213213.23/232-23",
                        "b" : ISODate("2016-05-23T23:54:21.000Z"),
                        "c" : 103432446,
                        "d" : 522343,
                        "e" : "Sim",
                        "f" : NumberLong(2),
                        "g" : "TEXT TEXT TEXT",
                        "h" : "Sim",
                        "i" : {
                            "a" : "TEXT TEXT TEXT"
                        }
                    }
                ],
                "field_a2" : [ 
                    {
                        "a" : "23213.213213.23/232-23",
                        "b" : ISODate("2016-05-23T23:54:21.000Z"),
                        "c" : 103432446,
                        "d" : 522343,
                        "e" : "Sim",
                        "f" : NumberLong(2),
                        "g" : "TEXT TEXT TEXT",
                        "h" : "Sim",
                        "i" : {
                            "a" : "TEXT TEXT TEXT"
                        }
                    }, 
                    {
                        "a" : "23213.213213.23/232-23",
                        "b" : ISODate("2016-05-23T23:54:21.000Z"),
                        "c" : 103432446,
                        "d" : 522343,
                        "e" : "Sim",
                        "f" : NumberLong(2),
                        "g" : "TEXT TEXT TEXT",
                        "h" : "Sim",
                        "i" : {
                            "a" : "TEXT TEXT TEXT"
                        }
                    }
                ],
                "field_a3" : {},
                "field_a4" : [ 
                    {
                        "date" : ISODate("2016-05-24T11:34:32.000Z"),
                        "A" : "TEXT",
                        "B" : "TEXT"
                    }, 
                    {
                        "date" : ISODate("2016-05-24T12:12:54.000Z"),
                        "A" : "TEXT",
                        "B" : "TEXT"
                    }, 
                    {
                        "date" : ISODate("2016-05-24T12:48:21.000Z"),
                        "A" : "TEXT",
                        "B" : "TEXT"
                    }, 
                    {
                        "date" : ISODate("2016-05-24T12:55:38.000Z"),
                        "A" : "TEXT",
                        "B" : "TEXT"
                    }, 
                    {
                        "date" : ISODate("2016-05-24T12:55:47.000Z"),
                        "A" : "TEXT",
                        "B" : "TEXT"
                    }, 
                    {
                        "date" : ISODate("2016-05-24T13:01:36.000Z"),
                        "A" : "TEXT",
                        "B" : "TEXT"
                    }, 
                    {
                        "date" : ISODate("2016-05-24T13:15:02.000Z"),
                        "A" : "TEXT",
                        "B" : "TEXT"
                    }
                ]
            },
            "field_i" : "Não",
            "field_j" : 1
        }, 
        {
            "status" : " Aceito e Habilitado",
            "field_a" : "Text...",
            "winner" : [ 
                {
                    "a" : "23213.213213.23/232-23",
                    "b" : 130446,
                    "c" : 543223,
                    "d" : NumberLong(2),
                    "e" : "ABC 123 FULANO",
                    "f" : "text",
                    "g" : {
                        "description" : "TEXT TEXT TEXT"
                    }
                }
            ],
            "field_c" : {
                "_id" : ObjectId("5744dd3271af88052f0cc343"),
                "a" : "TEXT",
                "b" : "TEXT"
            },
            "field_d" : NumberLong(2),
            "field_e" : 5223,
            "field_f" : "Não",
            "field_g" : "-",
            "field_h" : {
                "field_a1" : [ 
                    {
                        "a" : "23213.213213.23/232-23",
                        "b" : ISODate("2016-05-23T23:54:21.000Z"),
                        "c" : 103432446,
                        "d" : 522343,
                        "e" : "Sim",
                        "f" : NumberLong(2),
                        "g" : "TEXT TEXT TEXT",
                        "h" : "Sim",
                        "i" : {
                            "a" : "TEXT TEXT TEXT"
                        }
                    }, 
                    {
                        "a" : "23213.213213.23/232-23",
                        "b" : ISODate("2016-05-23T23:54:21.000Z"),
                        "c" : 103432446,
                        "d" : 522343,
                        "e" : "Sim",
                        "f" : NumberLong(2),
                        "g" : "TEXT TEXT TEXT",
                        "h" : "Sim",
                        "i" : {
                            "a" : "TEXT TEXT TEXT"
                        }
                    }
                ],
                "field_a2" : [ 
                    {
                        "a" : "23213.213213.23/232-23",
                        "b" : ISODate("2016-05-23T23:54:21.000Z"),
                        "c" : 103432446,
                        "d" : 522343,
                        "e" : "Sim",
                        "f" : NumberLong(2),
                        "g" : "TEXT TEXT TEXT",
                        "h" : "Sim",
                        "i" : {
                            "a" : "TEXT TEXT TEXT"
                        }
                    }, 
                    {
                        "a" : "23213.213213.23/232-23",
                        "b" : ISODate("2016-05-23T23:54:21.000Z"),
                        "c" : 103432446,
                        "d" : 522343,
                        "e" : "Sim",
                        "f" : NumberLong(2),
                        "g" : "TEXT TEXT TEXT",
                        "h" : "Sim",
                        "i" : {
                            "a" : "TEXT TEXT TEXT"
                        }
                    }
                ],
                "field_a3" : {},
                "field_a4" : [ 
                    {
                        "date" : ISODate("2016-05-24T11:34:32.000Z"),
                        "A" : "TEXT",
                        "B" : "TEXT"
                    }, 
                    {
                        "date" : ISODate("2016-05-24T12:12:54.000Z"),
                        "A" : "TEXT",
                        "B" : "TEXT"
                    }, 
                    {
                        "date" : ISODate("2016-05-24T12:48:21.000Z"),
                        "A" : "TEXT",
                        "B" : "TEXT"
                    }, 
                    {
                        "date" : ISODate("2016-05-24T12:55:38.000Z"),
                        "A" : "TEXT",
                        "B" : "TEXT"
                    }, 
                    {
                        "date" : ISODate("2016-05-24T12:55:47.000Z"),
                        "A" : "TEXT",
                        "B" : "TEXT"
                    }, 
                    {
                        "date" : ISODate("2016-05-24T13:01:36.000Z"),
                        "A" : "TEXT",
                        "B" : "TEXT"
                    }, 
                    {
                        "date" : ISODate("2016-05-24T13:15:02.000Z"),
                        "A" : "TEXT",
                        "B" : "TEXT"
                    }
                ]
            },
            "field_i" : "Não",
            "field_j" : 2
        }, 
        {
            "status" : " Aceito e Habilitado",
            "field_a" : "Text...",
            "winner" : [ 
                {
                    "a" : "23213.213213.23/232-23",
                    "b" : 130446,
                    "c" : 543223,
                    "d" : NumberLong(2),
                    "e" : "ABC 123 FULANO",
                    "f" : "text",
                    "g" : {
                        "description" : "TEXT TEXT TEXT"
                    }
                }
            ],
            "field_c" : {
                "_id" : ObjectId("5744dd3271af88052f0cc343"),
                "a" : "TEXT",
                "b" : "TEXT"
            },
            "field_d" : NumberLong(2),
            "field_e" : 5223,
            "field_f" : "Não",
            "field_g" : "-",
            "field_h" : {
                "field_a1" : [ 
                    {
                        "a" : "23213.213213.23/232-23",
                        "b" : ISODate("2016-05-23T23:54:21.000Z"),
                        "c" : 103432446,
                        "d" : 522343,
                        "e" : "Sim",
                        "f" : NumberLong(2),
                        "g" : "TEXT TEXT TEXT",
                        "h" : "Sim",
                        "i" : {
                            "a" : "TEXT TEXT TEXT"
                        }
                    }, 
                    {
                        "a" : "23213.213213.23/232-23",
                        "b" : ISODate("2016-05-23T23:54:21.000Z"),
                        "c" : 103432446,
                        "d" : 522343,
                        "e" : "Sim",
                        "f" : NumberLong(2),
                        "g" : "TEXT TEXT TEXT",
                        "h" : "Sim",
                        "i" : {
                            "a" : "TEXT TEXT TEXT"
                        }
                    }
                ],
                "field_a2" : [ 
                    {
                        "a" : "23213.213213.23/232-23",
                        "b" : ISODate("2016-05-23T23:54:21.000Z"),
                        "c" : 103432446,
                        "d" : 522343,
                        "e" : "Sim",
                        "f" : NumberLong(2),
                        "g" : "TEXT TEXT TEXT",
                        "h" : "Sim",
                        "i" : {
                            "a" : "TEXT TEXT TEXT"
                        }
                    }, 
                    {
                        "a" : "23213.213213.23/232-23",
                        "b" : ISODate("2016-05-23T23:54:21.000Z"),
                        "c" : 103432446,
                        "d" : 522343,
                        "e" : "Sim",
                        "f" : NumberLong(2),
                        "g" : "TEXT TEXT TEXT",
                        "h" : "Sim",
                        "i" : {
                            "a" : "TEXT TEXT TEXT"
                        }
                    }
                ],
                "field_a3" : {},
                "field_a4" : [ 
                    {
                        "date" : ISODate("2016-05-24T11:34:32.000Z"),
                        "A" : "TEXT",
                        "B" : "TEXT"
                    }, 
                    {
                        "date" : ISODate("2016-05-24T12:12:54.000Z"),
                        "A" : "TEXT",
                        "B" : "TEXT"
                    }, 
                    {
                        "date" : ISODate("2016-05-24T12:48:21.000Z"),
                        "A" : "TEXT",
                        "B" : "TEXT"
                    }, 
                    {
                        "date" : ISODate("2016-05-24T12:55:38.000Z"),
                        "A" : "TEXT",
                        "B" : "TEXT"
                    }, 
                    {
                        "date" : ISODate("2016-05-24T12:55:47.000Z"),
                        "A" : "TEXT",
                        "B" : "TEXT"
                    }, 
                    {
                        "date" : ISODate("2016-05-24T13:01:36.000Z"),
                        "A" : "TEXT",
                        "B" : "TEXT"
                    }, 
                    {
                        "date" : ISODate("2016-05-24T13:15:02.000Z"),
                        "A" : "TEXT",
                        "B" : "TEXT"
                    }
                ]
            },
            "field_i" : "Não",
            "field_j" : 3
        }, 
    ],
    "field_7" : "TEXT",
    "field_8" : {
        "a" : "TEXT",
        "b" : "TEXT",
        "c" : "324234",
        "d" : "TEXT TEXT TEXT TEXT"
    },
    "field_9" : 43234
}

EXPLAIN

{
    "waitedMS" : NumberLong(0),
    "stages" : [ 
        {
            "$cursor" : {
                "query" : {
                    "$and" : [ 
                        {
                            "status" : "Homologado"
                        }, 
                        {
                            "requested_items.status" : /aceito/i
                        }
                    ]
                },
                "queryPlanner" : {
                    "plannerVersion" : 1,
                    "namespace" : "module_database.minute",
                    "indexFilterSet" : false,
                    "parsedQuery" : {
                        "$and" : [ 
                            {
                                "status" : {
                                    "$eq" : "Homologado"
                                }
                            }, 
                            {
                                "requested_items.status" : /aceito/i
                            }
                        ]
                    },
                    "winningPlan" : {
                        "stage" : "COLLSCAN",
                        "filter" : {
                            "$and" : [ 
                                {
                                    "status" : {
                                        "$eq" : "Homologado"
                                    }
                                }, 
                                {
                                    "requested_items.status" : /aceito/i
                                }
                            ]
                        },
                        "direction" : "forward"
                    },
                    "rejectedPlans" : []
                }
            }
        }, 
        {
            "$unwind" : {
                "path" : "$requested_items"
            }
        }, 
        {
            "$unwind" : {
                "path" : "$requested_items.winner"
            }
        }, 
        {
            "$match" : {
                "$and" : [ 
                    {
                        "status" : "Homologado"
                    }, 
                    {
                        "requested_items.status" : /aceito/i
                    }
                ]
            }
        }, 
        {
            "$group" : {
                "_id" : {
                    "$const" : null
                },
                "numberOfdocs" : {
                    "$sum" : {
                        "$const" : 1
                    }
                }
            }
        }
    ],
    "ok" : 1
}

My server is:
OS: UBUNTU14/64
CPU: 6
RAM: 16 GB
Total Storage: 80 GB
Running just the tests of my question.

962

asked Jun 09 '16 08:06

Gabriel Cunha

2 Answers

Finally solved the problem on my query with group. It was an error of design patterns. Thinking in SQL world, I designed the collections before thinking about my app. As a result, slow queries.

To resolve it I had to redesign my collections and put the relevant data in first level of my docs. In my searches, I found that on Aggregation, index needs to be in first stage of pipeline. If I use a field with index after the stage $unwind, it is not considered.

Besides that, I created an int hash for text fields using the package https://github.com/darkskyapp/string-hash. So, my text fields can be indexed.

So my queries changed from 300s to 5s.

155

answered Oct 19 '22 17:10

Gabriel Cunha

it is hard to determine speed as we don't have environment details. What you could try to see how explain is predicting you query by adding:

{
   explain:true
}

to your aggregation query db.coll.aggregate([pipeline], {explain:true},{allowDiskUse: true}). What also need to be considered that $unwind doubles amount of documents to process.

As you re going to count amount of documents -> it could be faster just take size of an array (after first unwind) and sum it later

db.inventory.aggregate(
   [
      {
         $group: {
            _id: null,
            numberOfdocs: { $sum:{$size: "$requested_items.winner" }}
         }
      }
   ]
)

EDIT

after playing with this query I was able to reduce it execution time circa 45%. The main point is to skip second $match as this scans full result set , so last $group contains all data and we can filter out what's needed at the end as this operation is done on a small result set.

db.coll.aggregate([{
            $match : {
                "status" : "Homologado"
            }
        }, {
            $unwind : "$requested_items"
        }, {
            $unwind : "$requested_items.winner"
        }, {
            $project : {
                x : "$requested_items.status",
            }
        }, {
            $group : {
                _id : "$x",
                numberOfdocs : {
                    $sum : 1
                }
            }
        }, {
            $match : {
                "_id" : /acesssito/i
            }
        }
    ], {
        allowDiskUse: true
});

answered Oct 19 '22 16:10

profesor79

Related questions
                            
                                MongoDB: How to merge two collections/databases together into one?
                            
                                Tool to create a visualization from existing MongoDB [closed]
                            
                                Ideal way of doing infinite scroll and real time updates in Meteor
                            
                                spring data mongodb _id mapping preference
                            
                                Make mongo query in spring where Document contain array
                            
                                Check if document exists in mongodb using es7 async/await
                            
                                Can MongoDB be packaged in an Electron app?
                            
                                brew services start [email protected] fails - uninitialized constant
                            
                                Can a MongoDB collection have inside it another collection?
                            
                                When should i be opening and closing MongoDB connections?
                            
                                How to compare 2 mongodb collections?
                            
                                Can a native iOS client connect to a meteor.js server?
                            
                                In Mongoose, how to filter an array of objects
                            
                                Is it possible to store multiple types of Objects into 1 mongodb collection?
                            
                                How do I iterate over an entire MongoDB collection using mongojs?
                            
                                C# MongoDB Driver Ignores timeout options
                            
                                How to query to mongo using spark?
                            
                                Docker mongodb share volume with mac os x
                            
                                How to set default dbpath for mongoDB in windows 7?
                            
                                How can we ensure Data integrity in mongoDb?

Donate For Us

If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!

Donate Us With

MongoDB aggregate framework very slow when have group

Tags:

performance

mongodb

aggregation-framework

Gabriel Cunha

People also ask

2 Answers

Gabriel Cunha

EDIT

profesor79

Recent Activity

Donate For Us