Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Regex inside array in mongoDB

i want to do a query inside a array in mongodb with regex, the collections have documents like this:

{
"_id" : ObjectId("53340d07d6429d27e1284c77"),
"company" : "New Company",
"worktypes" : [ 
    {
        "name" : "Pompas",
        "works" : [ 
            {
                "name" : "name 2",
                "code" : "A00011",
                "price" : "22,22"
            }, 
            {
                "name" : "name 3",
                "code" : "A00011",
                "price" : "22,22"
            }, 
            {
                "name" : "name 4",
                "code" : "A00011",
                "price" : "22,22"
            }, 
            {
                "code" : "asdasd",
                "name" : "asdads",
                "price" : "22"
            }, 
            {
                "code" : "yy",
                "name" : "yy",
                "price" : "11"
            }
        ]
    }, 
    {
        "name" : "name 4",
        "works" : [ 
            {
                "code" : "A112",
                "name" : "Nombre",
                "price" : "11,2"
            }
        ]
    },          
    {
        "name" : "ee",
        works":[

            {
                "code" : "aa",
                "name" : "aa",
                "price" : "11"
            }, 
            {
                "code" : "A00112",
                "name" : "Nombre",
                "price" : "12,22"
            }
              ]
    }
]

}

Then i need to find a document by the company name and any work inside it have match a regex in code or name work. I have this:

var companyquery = { "company": "New Company"};
var regQuery = new RegExp('^A0011.*$', 'i');

db.categories.find({$and: [companyquery,
            {$or: [
                {"worktypes.works.$.name": regQuery},
                {"worktypes.works.$.code": regQuery}
            ]}]})

But dont return any result..I think the error is try to search inside array with de dot and $.. Any idea?

Edit:

With this:

db.categories.find({$and: [{"company":"New Company"},
            {$or: [
                {"worktypes.works.name": {"$regex": "^A00011$|^a00011$"}},
                {"worktypes.works.code": {"$regex": "^A00011$|^a00011$"}}
            ]}]})

This is the result:

{
    "_id" : ObjectId("53340d07d6429d27e1284c77"),
    "company" : "New Company",
    "worktypes" : [ 
        {
            "name" : "Pompas",
            "works" : [ 
                {
                    "name" : "name 2",
                    "code" : "A00011",
                    "price" : "22,22"
                }, 
                {
                    "code" : "aa",
                    "name" : "aa",
                    "price" : "11"
                }, 
                {
                    "code" : "A00112",
                    "name" : "Nombre",
                    "price" : "12,22"
                }, 
                {
                    "code" : "asdasd",
                    "name" : "asdads",
                    "price" : "22"
                }, 
                {
                    "code" : "yy",
                    "name" : "yy",
                    "price" : "11"
                }
            ]
        }, 
        {
            "name" : "name 4",
            "works" : [ 
                {
                    "code" : "A112",
                    "name" : "Nombre",
                    "price" : "11,2"
                }
            ]
        }, 
        {
            "name" : "Bombillos"
        }, 
        {
            "name" : "Pompas"
        }, 
        {
            "name" : "Bombillos 2"
        }, 
        {
            "name" : "Other type"
        }, 
        {
            "name" : "Other new type"
        }
    ]
}

The regex dont field the results ok..

like image 998
colymore Avatar asked Apr 07 '14 09:04

colymore


2 Answers

You are using a JavaScript native RegExp object for the regular expression, however for mongo to process the regular expression it needs to be sent as part of the query document, and this is not the same thing.

Also the regex will not match the values that you want. It could actualy be ^A0111$ for the exact match, but your case insensitive match causes a problem causing a larger scan of a possible index. So there is a better way to write that. Also see the documentation link for the problems with case insensitive matches.

Use the $regex operator instead:

db.categories.find({
    "$and": [
        {"company":"New Company"},
        { "$or": [
            { "worktypes.works.name": { "$regex": "^A00011$|^a00011$" }},
            { "worktypes.works.code": { "$regex": "^A00011$|^a00011$" }}
        ]}
    ]
})

Also the positional $ placeholders are not valid for a query, they are only used in projection or an update or the first matching element found by the query.

But your actual problem seems to be that you are trying to only get the elements of an array that "match" your conditions. You cannot do this with .find() and for that you need to use .aggregate() instead:

db.categories.aggregate([

    // Always makes sense to match the actual documents
    { "$match": {
        "$and": [
            {"company":"New Company"},
            { "$or": [
                { "worktypes.works.name": { "$regex": "^A00011$|^a00011$" }},
                { "worktypes.works.code": { "$regex": "^A00011$|^a00011$" }}
            ]}
        ]
    }},

    // Unwind the worktypes array
    { "$unwind": "$worktypes" },

    // Unwind the works array
    { "$unwind": "$worktypes.works" },

    // Then use match to filter only the matching entries
    { "$match": {
       "$or": [
            { "worktypes.works.name": { "$regex": "^A00011$|^a00011$" } },
            { "worktypes.works.code": { "$regex": "^A00011$|^a00011$" } }
        ]
    }},

    /* Stop */
    // If you "really" need the arrays back then include all the following
    // Otherwise the steps up to here actually got you your results

    // First put the "works" array back together
    { "$group": {
        "_id": {
            "_id": "$_id",
            "company": "$company",
            "workname": "$worktypes.name"
        },
        "works": { "$push": "$worktypes.works" }
    }},

    // Then put the "worktypes" array back
    { "$group": {
        "_id": "$_id._id",
        "company": { "$first": "$_id.company" },
        "worktypes": {
            "$push": {
                "name": "$_id.workname",
                "works": "$works"
            } 
        } 
    }}
])

So what .aggregate() does with all of these stages is it breaks the array elements into normal document form so they can be filtered using the $match operator. In that way, only the elements that "match" are returned.

What "find" is correctly doing is matching the "document" that meets the conditions. Since documents contain the elements that match then they are returned. The two principles are very different things.

When you mean to "filter" use aggregate.

like image 109
Neil Lunn Avatar answered Oct 01 '22 19:10

Neil Lunn


i think there is a typo :

the regex should be : ^A00011.*$

triple 0 instead of double 0

like image 20
aelor Avatar answered Oct 01 '22 20:10

aelor