Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Slow query behaviour using $exists with mongodb on fields with an index

Tags:

mongodb

I've been doing some live data investigations using a mongo 3.2.9 installation. The main crux was to find out some details around records that had missing data within the documents. But the queries I was running were timing out in robomongo and compass.

I have a collection (foo) containing just over 3 million records. I'm searching for all the records that do not have a barId, this is the query that I am firing at mongo:

db.foo.find({barId:{$exists:true}}).explain(true)

From the mongo shell this is the execution plan (it times out in robomongo or compass)

MongoDB Enterprise > db.foo.find({barId:{$exists:true}}).explain(true)
{
  "queryPlanner" : {
    "plannerVersion" : 1,
    "namespace" : "myDatabase01.foo",
    "indexFilterSet" : false,
    "parsedQuery" : {
      "barId" : {
        "$exists" : true
      }
    },
    "winningPlan" : {
      "stage" : "FETCH",
      "filter" : {
        "barId" : {
          "$exists" : true
        }
      },
      "inputStage" : {
        "stage" : "IXSCAN",
        "keyPattern" : {
          "barId" : 1
        },
        "indexName" : "barId_1",
        "isMultiKey" : false,
        "isUnique" : false,
        "isSparse" : false,
        "isPartial" : false,
        "indexVersion" : 1,
        "direction" : "forward",
        "indexBounds" : {
          "barId" : [
            "[MinKey, MaxKey]"
          ]
        }
      }
    },
    "rejectedPlans" : [ ]
  },
  "executionStats" : {
    "executionSuccess" : true,
    "nReturned" : 2,
    "executionTimeMillis" : 154716,
    "totalKeysExamined" : 3361040,
    "totalDocsExamined" : 3361040,
    "executionStages" : {
      "stage" : "FETCH",
      "filter" : {
        "barId" : {
          "$exists" : true
        }
      },
      "nReturned" : 2,
      "executionTimeMillisEstimate" : 152060,
      "works" : 3361041,
      "advanced" : 2,
      "needTime" : 3361038,
      "needYield" : 0,
      "saveState" : 27619,
      "restoreState" : 27619,
      "isEOF" : 1,
      "invalidates" : 0,
      "docsExamined" : 3361040,
      "alreadyHasObj" : 0,
      "inputStage" : {
        "stage" : "IXSCAN",
        "nReturned" : 3361040,
        "executionTimeMillisEstimate" : 1260,
        "works" : 3361041,
        "advanced" : 3361040,
        "needTime" : 0,
        "needYield" : 0,
        "saveState" : 27619,
        "restoreState" : 27619,
        "isEOF" : 1,
        "invalidates" : 0,
        "keyPattern" : {
          "barId" : 1
        },
        "indexName" : "barId_1",
        "isMultiKey" : false,
        "isUnique" : false,
        "isSparse" : false,
        "isPartial" : false,
        "indexVersion" : 1,
        "direction" : "forward",
        "indexBounds" : {
          "barId" : [
            "[MinKey, MaxKey]"
          ]
        },
        "keysExamined" : 3361040,
        "dupsTested" : 0,
        "dupsDropped" : 0,
        "seenInvalidated" : 0
      }
    },
    "allPlansExecution" : [ ]
  },
  "serverInfo" : {
    "host" : "myLinuxMachine",
    "port" : 8080,
    "version" : "3.2.9",
    "gitVersion" : "22ec9e93b40c85fc7cae7d56e7d6a02fd811088c"
  },
  "ok" : 1
}

It looks like its using my barId_1 index but at the same time its scanning all 3 million records only to return 2.

I ran a similar query but rather than looking for the existence of fields I looked for ids greater than 0 (all of them)

MongoDB Enterprise > db.foo.find({barId:{$gt:"0"}}).explain(true)
{
  "queryPlanner" : {
    "plannerVersion" : 1,
    "namespace" : "myDatabase01.foo",
    "indexFilterSet" : false,
    "parsedQuery" : {
      "barId" : {
        "$gt" : "0"
      }
    },
    "winningPlan" : {
      "stage" : "FETCH",
      "inputStage" : {
        "stage" : "IXSCAN",
        "keyPattern" : {
          "barId" : 1
        },
        "indexName" : "barId_1",
        "isMultiKey" : false,
        "isUnique" : false,
        "isSparse" : false,
        "isPartial" : false,
        "indexVersion" : 1,
        "direction" : "forward",
        "indexBounds" : {
          "barId" : [
            "(\"0\", {})"
          ]
        }
      }
    },
    "rejectedPlans" : [ ]
  },
  "executionStats" : {
    "executionSuccess" : true,
    "nReturned" : 2,
    "executionTimeMillis" : 54,
    "totalKeysExamined" : 2,
    "totalDocsExamined" : 2,
    "executionStages" : {
      "stage" : "FETCH",
      "nReturned" : 2,
      "executionTimeMillisEstimate" : 10,
      "works" : 3,
      "advanced" : 2,
      "needTime" : 0,
      "needYield" : 0,
      "saveState" : 0,
      "restoreState" : 0,
      "isEOF" : 1,
      "invalidates" : 0,
      "docsExamined" : 2,
      "alreadyHasObj" : 0,
      "inputStage" : {
        "stage" : "IXSCAN",
        "nReturned" : 2,
        "executionTimeMillisEstimate" : 10,
        "works" : 3,
        "advanced" : 2,
        "needTime" : 0,
        "needYield" : 0,
        "saveState" : 0,
        "restoreState" : 0,
        "isEOF" : 1,
        "invalidates" : 0,
        "keyPattern" : {
          "barId" : 1
        },
        "indexName" : "barId_1",
        "isMultiKey" : false,
        "isUnique" : false,
        "isSparse" : false,
        "isPartial" : false,
        "indexVersion" : 1,
        "direction" : "forward",
        "indexBounds" : {
          "barId" : [
            "(\"1\", {})"
          ]
        },
        "keysExamined" : 2,
        "dupsTested" : 0,
        "dupsDropped" : 0,
        "seenInvalidated" : 0
      }
    },
    "allPlansExecution" : [ ]
  },
  "serverInfo" : {
    "host" : "myLinuxMachine",
    "port" : 8080,
    "version" : "3.2.9",
    "gitVersion" : "22ec9e93b40c85fc7cae7d56e7d6a02fd811088c"
  },
  "ok" : 1
}

This again did an index scan of barId_1. It scanned 2 records returning 2.

For completeness here are the 2 records, the other 3 million are very similar in size and composition.

MongoDB Enterprise > db.foo.find({barId:{$gt:"0"}})
{ 
  "_id" : "00002f5d-ee4a-4996-bb27-b54ea84df777", "createdDate" : ISODate("2016-11-16T02:26:48.500Z"), "createdBy" : "Exporter", "lastModifiedDate" : ISODate("2016-11-16T02:26:48.500Z"), "lastModifiedBy" : "Exporter", "rolePlayed" : "LA", "roleType" : "T", "oId" : [ "d7316944-62ed-48dc-8ee4-e3bad8c58b10" ], "barId" : "e45b3160-bbb4-24e5-82b3-ad0c28329555", "cId" : "dcc29053-7a1f-439e-9536-fb4e44ff8a51", "timestamp" : "2017-02-20T16:23:15.795Z" 
}
{ 
  "_id" : "00002f5d-ee4a-4996-bb27-b54ea84df888", "createdDate" : ISODate("2016-11-16T02:26:48.500Z"), "createdBy" : "Exporter", "lastModifiedDate" : ISODate("2016-11-16T02:26:48.500Z"), "lastModifiedBy" : "Exporter", "rolePlayed" : "LA", "roleType" : "T", "oId" : [ "d7316944-62ed-48dc-8ee4-e3bad8c58b10" ], "barId" : "e45b3160-bbb4-24e5-82b3-ad0c28329555", "cId" : "dcc29053-7a1f-439e-9536-fb4e44ff8a51", "timestamp" : "2017-02-20T16:23:15.795Z" 
}

Of course I've done some googling around and found that there used to be a problem using an index along with the exists clause, but in many threads I've read this is fixed. Is it? Also I've found the following Hack that you can use rather than the $exists clause to force the 'correct' use of indexes when looking for the existence of fields.

MongoDB Enterprise > db.foo.find({barId:{$ne:null}}).explain(true)
{
  "queryPlanner" : {
    "plannerVersion" : 1,
    "namespace" : "myDatabase01.foo",
    "indexFilterSet" : false,
    "parsedQuery" : {
      "$not" : {
        "barId" : {
          "$eq" : null
        }
      }
    },
    "winningPlan" : {
      "stage" : "FETCH",
      "filter" : {
        "$not" : {
          "barId" : {
            "$eq" : null
          }
        }
      },
      "inputStage" : {
        "stage" : "IXSCAN",
        "keyPattern" : {
          "barId" : 1
        },
        "indexName" : "barId_1",
        "isMultiKey" : false,
        "isUnique" : false,
        "isSparse" : false,
        "isPartial" : false,
        "indexVersion" : 1,
        "direction" : "forward",
        "indexBounds" : {
          "barId" : [
            "[MinKey, null)",
            "(null, MaxKey]"
          ]
        }
      }
    },
    "rejectedPlans" : [ ]
  },
  "executionStats" : {
    "executionSuccess" : true,
    "nReturned" : 2,
    "executionTimeMillis" : 57,
    "totalKeysExamined" : 3,
    "totalDocsExamined" : 2,
    "executionStages" : {
      "stage" : "FETCH",
      "filter" : {
        "$not" : {
          "barId" : {
            "$eq" : null
          }
        }
      },
      "nReturned" : 2,
      "executionTimeMillisEstimate" : 10,
      "works" : 4,
      "advanced" : 2,
      "needTime" : 1,
      "needYield" : 0,
      "saveState" : 0,
      "restoreState" : 0,
      "isEOF" : 1,
      "invalidates" : 0,
      "docsExamined" : 2,
      "alreadyHasObj" : 0,
      "inputStage" : {
        "stage" : "IXSCAN",
        "nReturned" : 2,
        "executionTimeMillisEstimate" : 10,
        "works" : 4,
        "advanced" : 2,
        "needTime" : 1,
        "needYield" : 0,
        "saveState" : 0,
        "restoreState" : 0,
        "isEOF" : 1,
        "invalidates" : 0,
        "keyPattern" : {
          "barId" : 1
        },
        "indexName" : "barId_1",
        "isMultiKey" : false,
        "isUnique" : false,
        "isSparse" : false,
        "isPartial" : false,
        "indexVersion" : 1,
        "direction" : "forward",
        "indexBounds" : {
          "barId" : [
            "[MinKey, null)",
            "(null, MaxKey]"
          ]
        },
        "keysExamined" : 3,
        "dupsTested" : 0,
        "dupsDropped" : 0,
        "seenInvalidated" : 0
      }
    },
    "allPlansExecution" : [ ]
  },
  "serverInfo" : {
    "host" : "myLinuxMachine",
    "port" : 8080,
    "version" : "3.2.9",
    "gitVersion" : "22ec9e93b40c85fc7cae7d56e7d6a02fd811088c"
  },
  "ok" : 1
}

This works, only 2 documents scanned, only 2 documents returned.

My question is thus. Should I ever use $exists in a query? is it ever suited to use in the live production application? and if the answer is no why does the $exist clause even exist in the first place?

There is always the possibility that its the install of mongo that is at fault, or maybe the indexes are somehow ill conceived. Any light would be very welcome, but for now I'm sticking with the $ne:null hack.

like image 374
Damo Avatar asked Feb 21 '17 21:02

Damo


People also ask

How do I find slow queries in MongoDB?

Finding slow queries in MongoDB. or slow queries and operations. Off. & No profiling By default, mongod records slow queries to its log, as defined by slowOpThresholdMs. Enabling database profiler puts negative impact on MongoDB’s performance. It’s better to enable it for specific intervals & minimal on Production Servers.

What is a MongoDB index and how does it work?

Just like relational databases, NoSQL databases like MongoDB also utilize indexes to speed up queries. Indexes store a small portion of each collection’s data set into separate traversable data structures. These indexes then enable your queries to perform at faster speeds by minimizing the number of disk accesses required with each request.

What is slowopthresholdms in MongoDB?

or slow queries and operations. Off. & No profiling By default, mongod records slow queries to its log, as defined by slowOpThresholdMs. Enabling database profiler puts negative impact on MongoDB’s performance.

Does database profiler affect mongod performance?

By default, mongod records slow queries to its log, as defined by slowOpThresholdMs. Enabling database profiler puts negative impact on MongoDB’s performance. It’s better to enable it for specific intervals & minimal on Production Servers.


1 Answers

You should use partial index (preferred) or sparse index for barId field:

db.foo.createIndex(
   { barId: 1 },
   { partialFilterExpression: { barId: { $exists: true } } }
)
like image 197
Sergey Berezovskiy Avatar answered Nov 15 '22 08:11

Sergey Berezovskiy