Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Conditionally Include Aggregation Pipeline Stages

I have a function which get me some orders based on given parameters. But, the parameter can be empty, in that case I want to leave the $match alone.

This is the code which I currently have:

if(req.query.status && typeof(req.query.status) == 'array'){
        var match = {
            $in: req.query.status
        };
    }else if(req.query.status){
        var match = req.query.status;
    }else{
        //when empty find all statuses
        var match = null;
    }


 Order.aggregate(
    {
        $match: {
            'shop.nameSlug' : req.query.nameSlug,
        }
    },
    {
        $unwind: "$status"
    },
    {
        $match: {
            "status.status" : match
        }
    },
    {
        $group: {
            _id: "$_id",
            status: {
                $addToSet: "$status"
            },
            number: {
                $first: "$number"
            },
            date: {
                $first: "$date"
            },
            comment: {
                $first: "$comment"
            }
        }
    }
).exec(function(err, orders){
})

when match is null, there a no orders returned, because mongo is searching for fields equals to null. How can I remove that $match when match == null?

like image 259
NVO Avatar asked Sep 22 '17 10:09

NVO


People also ask

How many stages must the aggregate function include in the aggregation pipeline?

Here, the aggregate() function is used to perform aggregation it can have three operators stages, expression and accumulator.

Which of these aggregation pipeline stages will allow you to process multiple aggregation?

aggregate() method. Creates new documents in a sequence of documents where certain values in a field are missing. Returns literal documents from input values. Processes multiple aggregation pipelines within a single stage on the same set of input documents.

Which of the following pipeline is used for aggregation in MongoDB?

Mongoid exposes MongoDB's aggregation pipeline, which is used to construct flows of operations that process and return results. The aggregation pipeline is a superset of the deprecated map/reduce framework functionality.


1 Answers

What you mean to do is build the whole pipeline depending on the options provided. It' is just a data structure after-all.

You are also incorrectly testing for an "array" and you should be using instanceof because typeof would actually return "object" and not "array".

Moreover you really want that condition in the first pipeline stage to optimally select the documents as well, in addition to being added after $unwind where needed:

var pipeline = [
  { $match: 
      Object.assign(
        { 'shop.nameSlug' : req.query.nameSlug },
        (req.query.status) 
          ? { "status.status": (req.query.status instanceof Array)
            ? { "$in": req.query.status } : req.query.status }
          : {}
      )
  },
  { $unwind: "$status" },
  ...(
    (req.query.status)
      ? [{ "$match": { 
          "status.status": (req.query.status instanceof Array)
           ? { "$in": req.query.status } : req.query.status
       }}]
      : []
    ),
    { $group: {
      _id: "$_id",
      status: { $addToSet: "$status" },
      number: { $first: "$number" },
      date: { $first: "$date" },
      comment: { $first: "$comment" }
    }}
];    


Order.aggregate(pipeline).exec(function(err, orders){

})

Given a req object with something present in status you get:

// Example stucture
var req = {
  query: { 
   nameSlug: "Bill", 
   status: "A"
  },
};

// Pipeline output as:

[
    {
        "$match" : {
            "shop.nameSlug" : "Bill",
            "status.status" : "A"
        }
    },
    {
        "$unwind" : "$status"
    },
    {
        "$match" : {
            "status.status" : "A"
        }
    },
    {
        "$group" : {
            "_id" : "$_id",
            "status" : {
                "$addToSet" : "$status"
            },
            "number" : {
                "$first" : "$number"
            },
            "date" : {
                "$first" : "$date"
            },
            "comment" : {
                "$first" : "$comment"
            }
        }
    }
]

With an array:

var req = {
  query: { 
   nameSlug: "Bill", 
   status: ["A","B"]
  },
};

// Pipeline output as:
[
    {
        "$match" : {
            "shop.nameSlug" : "Bill",
            "status.status" : {
                "$in" : [ 
                    "A", 
                    "B"
                ]
            }
        }
    },
    {
        "$unwind" : "$status"
    },
    {
        "$match" : {
            "status.status" : {
                "$in" : [ 
                    "A", 
                    "B"
                ]
            }
        }
    },
    {
        "$group" : {
            "_id" : "$_id",
            "status" : {
                "$addToSet" : "$status"
            },
            "number" : {
                "$first" : "$number"
            },
            "date" : {
                "$first" : "$date"
            },
            "comment" : {
                "$first" : "$comment"
            }
        }
    }
]

And with nothing:

var req = {
  query: { 
   nameSlug: "Bill", 
   //status: ["A","B"]
  },
};

// Pipeline output as:
[
    {
        "$match" : {
            "shop.nameSlug" : "Bill"
        }
    },
    {
        "$unwind" : "$status"
    },
    {
        "$group" : {
            "_id" : "$_id",
            "status" : {
                "$addToSet" : "$status"
            },
            "number" : {
                "$first" : "$number"
            },
            "date" : {
                "$first" : "$date"
            },
            "comment" : {
                "$first" : "$comment"
            }
        }
    }
]

So you can see where the parts are conditionally included depending on the values provided.


Using $filter

You really should be using $filter here instead. It's a lot more efficient than $unwind and you really are not grouping anything. All you really want is filtered arrays. So that's all you conditionally add:

var pipeline = [
  { $match: 
      Object.assign(
        { 'shop.nameSlug' : req.query.nameSlug },
        (req.query.status) 
          ? { "status.status": (req.query.status instanceof Array)
            ? { "$in": req.query.status } : req.query.status }
          : {}
      )
  },
  ...(
    (req.query.status)
      ? [{ "$addFields": { 
          "status": {
            "$filter": {
              "input": "$status",
              "cond": {
                [(req.query.status instanceof Array) ? "$in" : "$eq"]:
                  [ "$$this.status", req.query.status ]
              }
            }    
          }
       }}]
      : []
    )
];

The choice there is between $in and $eq for the comparison test, depending on what is supplied. You can optionally wrap the whole thing in $setUnion if you "really mean" to use a "set" in the result. But it's generally looking like you simply want to "filter" values out of the array.

With the same expectations of a single value:

var req = {
  query: { 
   nameSlug: "Bill", 
   //status: ["A","B"]
   status: "A"
  },
};

/* 1 */
[
    {
        "$match" : {
            "shop.nameSlug" : "Bill",
            "status.status" : "A"
        }
    },
    {
        "$addFields" : {
            "status" : {
                "$filter" : {
                    "input" : "$status",
                    "cond" : {
                        "$eq" : [ 
                            "$$this.status", 
                            "A"
                        ]
                    }
                }
            }
        }
    }
]

An array:

var req = {
  query: { 
   nameSlug: "Bill", 
   status: ["A","B"]
  },
};

/* 1 */
[
    {
        "$match" : {
            "shop.nameSlug" : "Bill",
            "status.status" : {
                "$in" : [ 
                    "A", 
                    "B"
                ]
            }
        }
    },
    {
        "$addFields" : {
            "status" : {
                "$filter" : {
                    "input" : "$status",
                    "cond" : {
                        "$in" : [ 
                            "$$this.status", 
                            [ 
                                "A", 
                                "B"
                            ]
                        ]
                    }
                }
            }
        }
    }
]

Or nothing:

var req = {
  query: { 
   nameSlug: "Bill", 
   //status: ["A","B"]
  },
};

/* 1 */
[
    {
        "$match" : {
            "shop.nameSlug" : "Bill"
        }
    }
]

Where if you don't need to filter, then you simply drop the additional pipeline stage.

like image 81
Neil Lunn Avatar answered Oct 13 '22 16:10

Neil Lunn