I've got some event data captured in a MongoDB database, and some of these events occur in pairs.
Eg: DOOR_OPEN and DOOR_CLOSE are two events that occur in pairs
Events collection:
{ _id: 1, name: "DOOR_OPEN", userID: "user1", timestamp: t }
{ _id: 2, name: "DOOR_OPEN", userID: "user2", timestamp: t+5 }
{ _id: 3, name: "DOOR_CLOSE", userID: "user1", timestamp:t+10 }
{ _id: 4, name: "DOOR_OPEN", userID: "user1", timestamp:t+30 }
{ _id: 5, name: "SOME_OTHER_EVENT", userID: "user3", timestamp:t+35 }
{ _id: 6, name: "DOOR_CLOSE", userID: "user2", timestamp:t+40 }
...
Assuming the records are sorted on the timestamp, the _id: 1 and _id: 3 are a "pair" for "user1. _id: 2 and _id: 6 for "user2".
I'd like to take all these DOOR_OPEN & DOOR_CLOSE pairs per user and calculate the average duration etc. the door has been opened by each user.
Can this be achieved using the aggregate framework?
You can use $lookup
and $group for achieving this.
db.getCollection('TestColl').aggregate([
{ $match: {"name": { $in: [ "DOOR_OPEN", "DOOR_CLOSE" ] } }},
{ $lookup:
{
from: "TestColl",
let: { userID_lu: "$userID", name_lu: "$name", timestamp_lu :"$timestamp" },
pipeline: [
{ $match:
{ $expr:
{ $and:
[
{ $eq: [ "$userID", "$$userID_lu" ] },
{ $eq: [ "$$name_lu", "DOOR_OPEN" ]},
{ $eq: [ "$name", "DOOR_CLOSE" ]},
{ $gt: [ "$timestamp", "$$timestamp_lu" ] }
]
}
}
},
],
as: "close_dates"
}
},
{ $addFields: { "close_time": { $arrayElemAt: [ "$close_dates.timestamp", 0 ] } } },
{ $addFields: { "time_diff": { $divide: [ { $subtract: [ "$close_time", "$timestamp" ] }, 1000 * 60 ]} } }, // Minutes
{ $group: { _id: "$userID" ,
events: { $push: { "eventId": "$_id", "name": "$name", "timestamp": "$timestamp" } },
averageTimestamp: {$avg: "$time_diff"}
}
}
])
Sample Data:
[
{ _id: 1, name: "DOOR_OPEN", userID: "user1", timestamp: ISODate("2019-10-24T08:00:00Z") },
{ _id: 2, name: "DOOR_OPEN", userID: "user2", timestamp: ISODate("2019-10-24T08:05:00Z") },
{ _id: 3, name: "DOOR_CLOSE", userID: "user1", timestamp:ISODate("2019-10-24T08:10:00Z") },
{ _id: 4, name: "DOOR_OPEN", userID: "user1", timestamp:ISODate("2019-10-24T08:30:00Z") },
{ _id: 5, name: "SOME_OTHER_EVENT", userID: "user3", timestamp:ISODate("2019-10-24T08:35:00Z") },
{ _id: 6, name: "DOOR_CLOSE", userID: "user2", timestamp:ISODate("2019-10-24T08:40:00Z") },
{ _id: 7, name: "DOOR_CLOSE", userID: "user1", timestamp:ISODate("2019-10-24T08:50:00Z") },
{ _id: 8, name: "DOOR_OPEN", userID: "user2", timestamp:ISODate("2019-10-24T08:55:00Z") }
]
Result:
/* 1 */
{
"_id" : "user2",
"events" : [
{
"eventId" : 2.0,
"name" : "DOOR_OPEN",
"timestamp" : ISODate("2019-10-24T08:05:00.000Z")
},
{
"eventId" : 6.0,
"name" : "DOOR_CLOSE",
"timestamp" : ISODate("2019-10-24T08:40:00.000Z")
},
{
"eventId" : 8.0,
"name" : "DOOR_OPEN",
"timestamp" : ISODate("2019-10-24T08:55:00.000Z")
}
],
"averageTimestamp" : 35.0
}
/* 2 */
{
"_id" : "user1",
"events" : [
{
"eventId" : 1.0,
"name" : "DOOR_OPEN",
"timestamp" : ISODate("2019-10-24T08:00:00.000Z")
},
{
"eventId" : 3.0,
"name" : "DOOR_CLOSE",
"timestamp" : ISODate("2019-10-24T08:10:00.000Z")
},
{
"eventId" : 4.0,
"name" : "DOOR_OPEN",
"timestamp" : ISODate("2019-10-24T08:30:00.000Z")
},
{
"eventId" : 7.0,
"name" : "DOOR_CLOSE",
"timestamp" : ISODate("2019-10-24T08:50:00.000Z")
}
],
"averageTimestamp" : 15.0
}
You could use the $group
operator of the aggregate framework to group by userID and calculate the averages:
db.events.aggregate([{
$group: {
_id: "$userID",
averageTimestamp: {$avg: "$timestamp"}
}
}]);
If you also want to discard any other event other than DOOR_OPEN or DOOR_CLOSED, you can add a filter adding a $match
in the aggregate pipeline:
db.events.aggregate([{
$match: {
$or: [{name: "DOOR_OPEN"},{name: "DOOR_CLOSE"}]
}
}, {
$group: {
_id: "$userID",
averageTimestamp: {$avg: "$timestamp"}
}
}]);
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With