We have large MongoDB database (about 1,4mln collections), MongoDB 3.0, engine rocksDB, operating system Ubuntu 14.04.
This DB is located on virtual machine (VmWare vCloud) with 16 cores and 108 GB RAM (currently mongoDB used 70GB of memory without swap).
Production setup options:
DB stats:
{
"db" : "ctp",
"collections" : 1369486,
"objects" : 20566852,
"avgObjSize" : 1126.82749999854,
"dataSize" : 23175294422,
"storageSize" : 23231888384,
"numExtents" : 0,
"indexes" : 6686175,
"indexSize" : 685981393,
"ok" : 1
}
Sample collections sizes:
{
"ns" : "ctp._cf123_ct49_dfc-r_dtc-r_tof2_groupat",
"count" : 33,
"size" : 38172,
"avgObjSize" : 1156,
"storageSize" : 38144,
"capped" : false,
"nindexes" : 5,
"totalIndexSize" : 6312,
"indexSizes" : {
"_id_" : 18,
"exAt" : 16,
"unique" : 6246,
"_smp" : 10,
"_smpdf" : 22
},
"ok" : 1
}
{
"ns" : "ctp._afpoznan123_atlondyn49_df2016-09_dt2016-09_tof2_groupdfdt",
"count" : 188,
"size" : 208677,
"avgObjSize" : 1109,
"storageSize" : 208640,
"capped" : false,
"nindexes" : 5,
"totalIndexSize" : 7945,
"indexSizes" : {
"_id_" : 2845,
"exAt" : 256,
"_smp" : 160,
"_smpdf" : 352,
"unique" : 4332
},
"ok" : 1
}
{
"ns" : "ctp._cf123_ct42_dfc-r_dtc-r_tof2_groupat",
"count" : 27,
"size" : 30400,
"avgObjSize" : 1125,
"storageSize" : 30208,
"capped" : false,
"nindexes" : 5,
"totalIndexSize" : 84,
"indexSizes" : {
"_id_" : 18,
"exAt" : 16,
"unique" : 18,
"_smp" : 10,
"_smpdf" : 22
},
"ok" : 1
}
Periodically every 5 minutes is running script which writes to these collections and creates new one if this collection doesn't exists (collections names are based on data which are inside these collections) and creating indexes.
We've noticed that this server has some freezes during writing data to collections. This kind of freeze can take from 5 to 60 seconds.
Has anyone experienced this issue and could help us?
Here are some logs from moment of freeze:
db.currentOP();
"opid" : 22717868,
"active" : false,
"op" : "query",
"ns" : "ctp._cf115_atboma25_dfc-r_dtc-r_tof2_groupdfym",
"query" : {
"$query" : {
"T#df" : {
"$lt" : "2017-02-28"
}
},
"$orderby" : {
"T#mp" : NumberLong(1)
}
},
"client" : "192.168.1.33:33832",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(22004831)
}
}
}
},
{
"desc" : "conn135907",
"threadId" : "0xc3e5d64e0",
"connectionId" : 135907,
"opid" : 22719375,
"active" : true,
"secs_running" : 0,
"microsecs_running" : NumberLong(223601),
"op" : "query",
"ns" : "top_search.top_searches",
"query" : {
"$msg" : "query not recording (too large)"
},
"planSummary" : "IXSCAN { T#df: 1, T#dt: 1 }",
"client" : "192.168.1.33:33648",
"numYields" : 170,
"locks" : {
"Global" : "r",
"Database" : "r",
"Collection" : "r"
},
"waitingForLock" : false,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(342)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(171)
}
},
"Collection" : {
"acquireCount" : {
"r" : NumberLong(171)
}
}
}
},
{
"desc" : "conn135959",
"threadId" : "0x10d4445260",
"connectionId" : 135959,
"opid" : 22718533,
"active" : false,
"op" : "query",
"ns" : "ctp._afoxford-house23_attamarindo32_dfc-r_dtc-r_tof2_groupdfdt",
"query" : {
"$query" : {},
"$orderby" : {
"T#mp" : NumberLong(1),
"T#df" : NumberLong(1)
}
},
"client" : "192.168.1.33:34022",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(15003580)
}
}
}
},
{
"desc" : "conn135829",
"threadId" : "0x10d4445740",
"connectionId" : 135829,
"opid" : 22717923,
"active" : false,
"op" : "query",
"ns" : "ctp._ct123_dfc-r_dtc-r_tof2_groupdfym",
"query" : {
"$query" : {
"T#df" : {
"$lt" : "2017-02-28"
}
},
"$orderby" : {
"T#mp" : NumberLong(1)
}
},
"client" : "192.168.1.33:33026",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(21004810)
}
}
}
},
{
"desc" : "conn135781",
"threadId" : "0x2d678e0",
"connectionId" : 135781,
"opid" : 22718920,
"active" : false,
"op" : "query",
"ns" : "ctp._cf1147_atrostock36_df2016-06_dtc-r_tof2_groupaf",
"query" : {
"$query" : {},
"$orderby" : {
"T#mp" : NumberLong(1)
}
},
"client" : "192.168.1.33:60874",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(12002770)
}
}
}
},
{
"desc" : "conn135870",
"threadId" : "0xd04ed5d40",
"connectionId" : 135870,
"opid" : 22719172,
"active" : false,
"op" : "query",
"ns" : "ctp._cf61_atpristina131_dfc-r_dtc-r_tof2_groupaf",
"query" : {
"$query" : {},
"$orderby" : {
"T#mp" : NumberLong(1)
}
},
"client" : "192.168.1.33:33369",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(7001590)
}
}
}
},
{
"desc" : "conn135687",
"threadId" : "0xc3e5d7380",
"connectionId" : 135687,
"opid" : 22717925,
"active" : false,
"op" : "query",
"ns" : "ctp._cf105_athana156_df2016-06_dt2016-06_tof2_groupaf",
"query" : {
"$query" : {},
"$orderby" : {
"T#mp" : NumberLong(1)
}
},
"client" : "192.168.1.33:60022",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(21003871)
}
}
}
},
{
"desc" : "conn135754",
"threadId" : "0xd04ed5860",
"connectionId" : 135754,
"opid" : 22718485,
"active" : false,
"op" : "query",
"ns" : "ctp._cf5_atdhaka1113_dfc-r_dtc-r_tof2_groupaf",
"query" : {
"$query" : {},
"$orderby" : {
"T#mp" : NumberLong(1)
}
},
"client" : "192.168.1.33:60603",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(15503084)
}
}
}
},
{
"desc" : "conn135644",
"threadId" : "0xc3e5d9c20",
"connectionId" : 135644,
"opid" : 22719073,
"active" : false,
"op" : "query",
"ns" : "ctp._afgenua71_ataarhus37_dfc-r_dtc-r_tof2_groupdfdt",
"query" : {
"$query" : {},
"$orderby" : {
"T#mp" : NumberLong(1),
"T#df" : NumberLong(1)
}
},
"client" : "192.168.1.33:59698",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(7501602)
}
}
}
},
{
"desc" : "conn135891",
"threadId" : "0xd04ed7a80",
"connectionId" : 135891,
"opid" : 22719284,
"active" : false,
"op" : "query",
"ns" : "ctp._attianjin30_dfc-r_dtc-r_tof2_groupcf",
"query" : {
"$query" : {},
"$orderby" : {
"T#mp" : NumberLong(1)
}
},
"client" : "192.168.1.33:33530",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(3000658)
}
}
}
},
{
"desc" : "conn135673",
"threadId" : "0xd04ed6220",
"connectionId" : 135673,
"opid" : 22718185,
"active" : false,
"op" : "query",
"ns" : "ctp._afwroclaw123_atlondyn49_df2016-06_dt2016-06_tof2_groupdfdt",
"query" : {
"$query" : {},
"$orderby" : {
"T#mp" : NumberLong(1),
"T#df" : NumberLong(1)
}
},
"client" : "192.168.1.33:59925",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(16503737)
}
}
}
},
{
"desc" : "conn135989",
"threadId" : "0x10d44443c0",
"connectionId" : 135989,
"opid" : 22719240,
"active" : false,
"op" : "query",
"ns" : "ctp._cf28_atmarakesz93_dfc-r_dtc-r_tof2_groupaf",
"query" : {
"$query" : {},
"$orderby" : {
"T#mp" : NumberLong(1)
}
},
"client" : "192.168.1.33:34367",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(4500947)
}
}
}
},
{
"desc" : "conn135410",
"threadId" : "0x2d66220",
"connectionId" : 135410,
"opid" : 22717853,
"active" : true,
"secs_running" : 22,
"microsecs_running" : NumberLong(22406019),
"op" : "query",
"ns" : "ctp.$cmd",
"query" : {
"createIndexes" : "_cf71_df2016-07_dt2016-11_tof2_groupct",
"indexes" : [
{
"key" : {
"expireAt" : 1
},
"name" : "exAt",
"background" : true,
"expireAfterSeconds" : 0
}
]
},
"client" : "0.0.0.0:0",
"numYields" : 0,
"locks" : {
"Global" : "w",
"Database" : "W"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(376227),
"w" : NumberLong(15477)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(180375),
"w" : NumberLong(15476),
"W" : NumberLong(1)
},
"acquireWaitCount" : {
"W" : NumberLong(1)
},
"timeAcquiringMicros" : {
"W" : NumberLong(22004935)
}
},
"Collection" : {
"acquireCount" : {
"r" : NumberLong(180375),
"w" : NumberLong(15476)
}
}
}
},
{
"desc" : "conn135961",
"threadId" : "0x10d4442b60",
"connectionId" : 135961,
"opid" : 22718537,
"active" : false,
"op" : "query",
"ns" : "ctp._cf5_atattawapiskat23_dfc-r_dtc-r_tof2_groupaf",
"query" : {
"$query" : {},
"$orderby" : {
"T#mp" : NumberLong(1)
}
},
"client" : "192.168.1.33:34029",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(15002978)
}
}
}
},
{
"desc" : "conn135905",
"threadId" : "0xc3e5d6000",
"connectionId" : 135905,
"opid" : 22718186,
"active" : false,
"op" : "query",
"ns" : "ctp._afwarszawa123_atdubrownik61_df2016-08_dt2016-08_tof2_groupdfdt",
"query" : {
"$query" : {},
"$orderby" : {
"T#mp" : NumberLong(1),
"T#df" : NumberLong(1)
}
},
"client" : "192.168.1.33:33638",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(16503305)
}
}
}
}
]
db.serverStatus()["rocksdb"];
{
"stats" : [
"",
"** Compaction Stats [default] **",
"Level Files Size(MB) Score Read(GB) Rn(GB) Rnp1(GB) Write(GB) Wnew(GB) Moved(GB) W-Amp Rd(MB/s) Wr(MB/s) Comp(sec) Comp(cnt) Avg(sec) Stall(cnt) KeyIn KeyDrop",
"---------------------------------------------------------------------------------------------------------------------------------------------------------------------",
" L0 0/0 0.00 0.0 0.0 0.0 0.0 1.4 1.4 0.0 0.0 0.0 120.1 12 39 0.312 0 0 0",
" L4 0/0 0.00 0.0 1.8 1.8 0.0 1.7 1.7 0.0 1.0 102.0 99.7 18 11 1.606 7 21M 153K",
" L5 15/0 620.47 1.0 6.6 1.4 5.2 5.5 0.3 0.0 3.9 44.4 37.0 152 25 6.086 0 110M 840K",
" L6 106/0 6401.43 0.0 3.5 0.3 3.3 3.3 -0.0 0.0 12.6 25.9 23.7 140 7 20.057 0 162M 14M",
" Sum 121/0 7021.90 0.0 11.9 3.4 8.5 11.9 3.4 0.0 8.3 37.8 37.8 322 82 3.932 7 295M 15M",
" Int 0/0 0.00 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0 0 0.000 0 0 0",
"Flush(GB): cumulative 1.429, interval 0.000",
"Stalls(count): 0 level0_slowdown, 0 level0_slowdown_with_compaction, 0 level0_numfiles, 0 level0_numfiles_with_compaction, 0 pending_compaction_bytes, 0 memtable_compaction, 7 leveln_slowdown_soft, 0 leveln_slowdown_hard",
"",
"** DB Stats **",
"Uptime(secs): 34952.0 total, 0.2 interval",
"Cumulative writes: 4990K writes, 17M keys, 4989K batches, 1.0 writes per batch, ingest: 2.02 GB, 0.06 MB/s",
"Cumulative WAL: 4990K writes, 0 syncs, 4990122.00 writes per sync, written: 2.02 GB, 0.06 MB/s",
"Cumulative compaction: 11.90 GB write, 0.35 MB/s write, 11.90 GB read, 0.35 MB/s read, 322.4 seconds",
"Cumulative stall: 00:00:3.548 H:M:S, 0.0 percent",
"Interval writes: 0 writes, 0 keys, 0 batches, 0.0 writes per batch, ingest: 0.00 MB, 0.00 MB/s",
"Interval WAL: 0 writes, 0 syncs, 0.00 writes per sync, written: 0.00 MB, 0.00 MB/s",
"Interval compaction: 0.00 GB write, 0.00 MB/s write, 0.00 GB read, 0.00 MB/s read, 0.0 seconds",
"Interval stall: 00:00:0.000 H:M:S, 0.0 percent"
],
"num-immutable-mem-table" : "0",
"mem-table-flush-pending" : "0",
"compaction-pending" : "0",
"background-errors" : "0",
"cur-size-active-mem-table" : "33MB",
"cur-size-all-mem-tables" : "33MB",
"num-entries-active-mem-table" : "185495",
"num-entries-imm-mem-tables" : "0",
"estimate-table-readers-mem" : "91MB",
"num-snapshots" : "1",
"oldest-snapshot-time" : "1465911051",
"num-live-versions" : "1",
"total-live-recovery-units" : 60,
"block-cache-usage" : "34GB",
"transaction-engine-keys" : NumberLong(4210),
"transaction-engine-snapshots" : NumberLong(1),
"thread-status" : []
}
db.serverStatus()['globalLock'];
{
"totalTime" : NumberLong(34952090000),
"currentQueue" : {
"total" : 57,
"readers" : 56,
"writers" : 1
},
"activeClients" : {
"total" : 124,
"readers" : 57,
"writers" : 1
}
}
And screen from mongostat:
Best Regards
Creation of new collections has no point. It is an engineering flaw. This amount of collections surely will freeze db server because it is forced to walk over list of available collections and check a presence of collection in the list. Also, I believe, the are some operations like metadata updates involved.
From the operations log it is getting clear that building an index takes significant time
"query" : {
"createIndexes" : "_cf71_df2016-07_dt2016-11_tof2_groupct",
"indexes" : [
{
"key" : {
"expireAt" : 1
},
"name" : "exAt",
"background" : true,
"expireAfterSeconds" : 0
}
]
}
Also, things like acquiring locks take time, which are necessary for creation new namespace and index building.
Again, parameters like "expireAfterSeconds" : 0
are not wise in such scale. Basically, you will have to run 1.4M timers every 60 seconds to find and prune expired records and ensure that index will be rebuilt (see those deletes on mongostat).
Please, consider changes toward reducing number of collections or splitting collections between deployments. Also you can drop indexes for inactive collections. Also, it would be wise to drop all empty & unused collections.
For immediate solution you have to find current bottlenecks: RAM, CPU or IOpS.
You have extra RAM, you can give it to MongoDB, it will help to cache more and avoid unnecessary reads. Measure your IOpS via iostat
to see how busy is your drive.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With