Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

MongoDB Aggregate how to pair relevant records for processing

I've got some event data captured in a MongoDB database, and some of these events occur in pairs.

Eg: DOOR_OPEN and DOOR_CLOSE are two events that occur in pairs

Events collection:

{ _id: 1, name: "DOOR_OPEN", userID: "user1", timestamp: t }
{ _id: 2, name: "DOOR_OPEN", userID: "user2", timestamp: t+5 }
{ _id: 3, name: "DOOR_CLOSE", userID: "user1", timestamp:t+10 }
{ _id: 4, name: "DOOR_OPEN", userID: "user1", timestamp:t+30 }
{ _id: 5, name: "SOME_OTHER_EVENT", userID: "user3", timestamp:t+35 }
{ _id: 6, name: "DOOR_CLOSE", userID: "user2", timestamp:t+40 }
...

Assuming the records are sorted on the timestamp, the _id: 1 and _id: 3 are a "pair" for "user1. _id: 2 and _id: 6 for "user2".

I'd like to take all these DOOR_OPEN & DOOR_CLOSE pairs per user and calculate the average duration etc. the door has been opened by each user.

Can this be achieved using the aggregate framework?

like image 951
Anusha Dharmasena Avatar asked Oct 16 '22 10:10

Anusha Dharmasena


2 Answers

You can use $lookup and $group for achieving this.

db.getCollection('TestColl').aggregate([
{ $match: {"name": { $in: [ "DOOR_OPEN", "DOOR_CLOSE" ] } }},
{ $lookup:
       {
         from: "TestColl",
         let: { userID_lu: "$userID", name_lu: "$name", timestamp_lu :"$timestamp" },
         pipeline: [
              { $match:
                 { $expr:
                    { $and:
                       [
                         { $eq: [ "$userID",  "$$userID_lu" ] },
                         { $eq: [ "$$name_lu", "DOOR_OPEN" ]},
                         { $eq: [ "$name", "DOOR_CLOSE" ]},
                         { $gt: [ "$timestamp", "$$timestamp_lu" ] }
                       ]
                    }
                 }
              },              
           ],
         as: "close_dates"
       }
},
{ $addFields: { "close_time": { $arrayElemAt: [ "$close_dates.timestamp", 0 ] }  } },
{ $addFields: { "time_diff": { $divide: [ { $subtract: [ "$close_time", "$timestamp" ] }, 1000 * 60 ]} } }, // Minutes
{ $group: { _id: "$userID" , 
    events: { $push: { "eventId": "$_id", "name": "$name",  "timestamp": "$timestamp" } },
    averageTimestamp: {$avg: "$time_diff"}
    }
}
])

Sample Data:

[
{ _id: 1, name: "DOOR_OPEN", userID: "user1", timestamp: ISODate("2019-10-24T08:00:00Z") },
{ _id: 2, name: "DOOR_OPEN", userID: "user2", timestamp: ISODate("2019-10-24T08:05:00Z") },
{ _id: 3, name: "DOOR_CLOSE", userID: "user1", timestamp:ISODate("2019-10-24T08:10:00Z") },
{ _id: 4, name: "DOOR_OPEN", userID: "user1", timestamp:ISODate("2019-10-24T08:30:00Z") },
{ _id: 5, name: "SOME_OTHER_EVENT", userID: "user3", timestamp:ISODate("2019-10-24T08:35:00Z") },
{ _id: 6, name: "DOOR_CLOSE", userID: "user2", timestamp:ISODate("2019-10-24T08:40:00Z") },
{ _id: 7, name: "DOOR_CLOSE", userID: "user1", timestamp:ISODate("2019-10-24T08:50:00Z") },
{ _id: 8, name: "DOOR_OPEN", userID: "user2", timestamp:ISODate("2019-10-24T08:55:00Z") }
]

Result:

/* 1 */
{
    "_id" : "user2",
    "events" : [ 
        {
            "eventId" : 2.0,
            "name" : "DOOR_OPEN",
            "timestamp" : ISODate("2019-10-24T08:05:00.000Z")
        }, 
        {
            "eventId" : 6.0,
            "name" : "DOOR_CLOSE",
            "timestamp" : ISODate("2019-10-24T08:40:00.000Z")
        }, 
        {
            "eventId" : 8.0,
            "name" : "DOOR_OPEN",
            "timestamp" : ISODate("2019-10-24T08:55:00.000Z")
        }
    ],
    "averageTimestamp" : 35.0
}

/* 2 */
{
    "_id" : "user1",
    "events" : [ 
        {
            "eventId" : 1.0,
            "name" : "DOOR_OPEN",
            "timestamp" : ISODate("2019-10-24T08:00:00.000Z")
        }, 
        {
            "eventId" : 3.0,
            "name" : "DOOR_CLOSE",
            "timestamp" : ISODate("2019-10-24T08:10:00.000Z")
        }, 
        {
            "eventId" : 4.0,
            "name" : "DOOR_OPEN",
            "timestamp" : ISODate("2019-10-24T08:30:00.000Z")
        }, 
        {
            "eventId" : 7.0,
            "name" : "DOOR_CLOSE",
            "timestamp" : ISODate("2019-10-24T08:50:00.000Z")
        }
    ],
    "averageTimestamp" : 15.0
}
like image 142
Serkan Arslan Avatar answered Nov 02 '22 12:11

Serkan Arslan


You could use the $group operator of the aggregate framework to group by userID and calculate the averages:

db.events.aggregate([{
    $group: {
        _id: "$userID",
        averageTimestamp: {$avg: "$timestamp"}
    }
}]);

If you also want to discard any other event other than DOOR_OPEN or DOOR_CLOSED, you can add a filter adding a $match in the aggregate pipeline:

db.events.aggregate([{
    $match: {
        $or: [{name: "DOOR_OPEN"},{name: "DOOR_CLOSE"}]
    }
}, {
    $group: {
        _id: "$userID",
        averageTimestamp: {$avg: "$timestamp"}
    }
}]);
like image 36
David SN Avatar answered Nov 02 '22 13:11

David SN