Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

How to perform a $text search on a 'joined' collection via $lookup?

i’m new at Mongo, using v3.2. I have 2 collections Parent & Child. I’d like to use Parent.aggregate and use $lookup to “join” Child then perform $text $search on a field in Child and a date-range seach on the parent. Is this possible...?

like image 434
Cory Robinson Avatar asked Mar 31 '16 23:03

Cory Robinson


1 Answers

In line with the comments already given, it is true that you cannot perform a $text search on the results of a $lookupsince there would not be an available index at any stage other than the very first pipeline stage. And it is true that especially considering that you really want the "join" to occur based on the results from the "child" collection, then it would indeed be better to search on the "child" instead.

Which brings the obvious conclusion that in order to do this you perform the aggregation on the "child" collection with the initial $text query and then $lookup the "parent" instead of the other way around.

As a working example, and just using the core driver for demonstration purposes:

MongoClient.connect('mongodb://localhost/rlookup',function(err,db) {
  if (err) throw err;

  var Parent = db.collection('parents');
  var Child = db.collection('children');

  async.series(
    [
      // Cleanup
      function(callback) {
        async.each([Parent,Child],function(coll,callback) {
          coll.deleteMany({},callback);
        },callback);
      },
      // Create Index
      function(callback) {
        Child.createIndex({ "text": "text" },callback);
      },
      // Create Documents
      function(callback) {
        async.parallel(
          [
            function(callback) {
              Parent.insertMany(
                [
                  { "_id": 1, "name": "Parent 1" },
                  { "_id": 2, "name": "Parent 2" },
                  { "_id": 3, "name": "Parent 3" }
                ],
                callback
              );
            },
            function(callback) {
              Child.insertMany(
                [
                  {
                    "_id": 1,
                    "parent": 1,
                    "text": "The little dog laughed to see such fun"
                  },
                  {
                    "_id": 2,
                    "parent": 1,
                    "text": "The quick brown fox jumped over the lazy dog"
                  },
                  {
                    "_id": 3,
                    "parent": 1,
                    "text": "The dish ran away with the spoon"
                  },
                  {
                    "_id": 4,
                    "parent": 2,
                    "text": "Miss muffet on here tuffet"
                  },
                  {
                    "_id": 5,
                    "parent": 3,
                    "text": "Lady is a fox"
                  },
                  {
                    "_id": 6,
                    "parent": 3,
                    "text": "Every dog has it's day"
                  }
                ],
                callback
              )
            }
          ],
          callback
        );
      },
      // Aggregate with $text and $lookup
      function(callback) {
        Child.aggregate(
          [
            { "$match": {
              "$text": { "$search": "fox dog" }
            }},
            { "$project": {
              "parent": 1,
              "text": 1,
              "score": { "$meta": "textScore" }
            }},
            { "$sort": { "score": { "$meta": "textScore" } } },
            { "$lookup": {
              "from": "parents",
              "localField": "parent",
              "foreignField": "_id",
              "as": "parent"
            }},
            { "$unwind": "$parent" },
            { "$group": {
              "_id": "$parent._id",
              "name": { "$first": "$parent.name" },
              "children": {
                "$push": {
                  "_id": "$_id",
                  "text": "$text",
                  "score": "$score"
                }
              },
              "score": { "$sum": "$score" }
            }},
            { "$sort": { "score": -1 } }
          ],
          function(err,result) {
            console.log(JSON.stringify(result,undefined,2));
            callback(err);
          }
        )
      }
    ],
    function(err) {
      if (err) throw err;
      db.close();
    }
  );

});

This results in the $text matches from the query on the Child populated within each Parent, as well as being ordered by "score":

[
  {
    "_id": 1,
    "name": "Parent 1",
    "children": [
      {
        "_id": 2,
        "text": "The quick brown fox jumped over the lazy dog",
        "score": 1.1666666666666667
      },
      {
        "_id": 1,
        "text": "The little dog laughed to see such fun",
        "score": 0.6
      }
    ],
    "score": 1.7666666666666666
  },
  {
    "_id": 3,
    "name": "Parent 3",
    "children": [
      {
        "_id": 5,
        "text": "Lady is a fox",
        "score": 0.75
      },
      {
        "_id": 6,
        "text": "Every dog has it's day",
        "score": 0.6666666666666666
      }
    ],
    "score": 1.4166666666666665
  }
]

This ultimately makes sense and will be a lot more efficient than querying from the "parent" to find all "children" in a $lookup and then "post filtering" with $match to remove any "children" that did not meet criteria, and then subsequently discarding the "parents" without any match.

The same case is true for mongoose style "referencing" where you included an "array" of "children" within the "parent" instead of recording on the child. So as long as the "localField" on the child ( _id in that case ) is the same type as defined within the array on the parent as "foriegnField" ( which is will be if it was working with .populate() anyway ) then you are still getting the matched "parent(s)" for each "child" in the $lookup result.

This all comes down to reversing your thinking and realizing that the $text results are the most important thing, and therefore "that" is the collection on which the operation needs to be initiated.

It's possible, but just do it the other way around.


Using mongoose style with list of referenced children in the parent

Just showing the reverse case for references on the parent as well as date filtering:

var async = require('async'),
    mongoose = require('mongoose'),
    Schema = mongoose.Schema;

mongoose.connect('mongodb://localhost/rlookup');

var parentSchema = new Schema({
  "_id": Number,
  "name": String,
  "date": Date,
  "children": [{ "type": Number, "ref": "Child" }]
});

var childSchema = new Schema({
  "_id": Number,
  "text": { "type": String, "index": "text" }
},{ "autoIndex": false });

var Parent = mongoose.model("Parent",parentSchema),
    Child = mongoose.model("Child",childSchema);

async.series(
  [
    function(callback) {
      async.each([Parent,Child],function(model,callback) {
        model.remove({},callback);
      },callback);
    },
    function(callback) {
      Child.ensureIndexes({ "background": false },callback);
    },
    function(callback) {
      async.parallel(
        [
          function(callback) {
            Parent.create([
              {
                "_id": 1,
                "name": "Parent 1",
                "date": new Date("2016-02-01"),
                "children": [1,2]
              },
              {
                "_id": 2,
                "name": "Parent 2",
                "date": new Date("2016-02-02"),
                "children": [3,4]
              },
              {
                "_id": 3,
                "name": "Parent 3",
                "date": new Date("2016-02-03"),
                "children": [5,6]
              },
              {
                "_id": 4,
                "name": "Parent 4",
                "date": new Date("2016-01-15"),
                "children": [1,2,6]
              }
            ],callback)
          },
          function(callback) {
            Child.create([
              {
                "_id": 1,
                "text": "The little dog laughed to see such fun"
              },
              {
                "_id": 2,
                "text": "The quick brown fox jumped over the lazy dog"
              },
              {
                "_id": 3,
                "text": "The dish ran awy with the spoon"
              },
              {
                "_id": 4,
                "text": "Miss muffet on her tuffet"
              },
              {
                "_id": 5,
                "text": "Lady is a fox"
              },
              {
                "_id": 6,
                "text": "Every dog has it's day"
              }
            ],callback);
          }
        ],
        callback
      );
    },
    function(callback) {
      Child.aggregate(
        [
          { "$match": {
            "$text": { "$search": "fox dog" }
          }},
          { "$project": {
            "text": 1,
            "score": { "$meta": "textScore" }
          }},
          { "$sort": { "score": { "$meta": "textScore" } } },
          { "$lookup": {
            "from": "parents",
            "localField": "_id",
            "foreignField": "children",
            "as": "parent"
          }},
          { "$project": {
            "text": 1,
            "score": 1,
            "parent": {
              "$filter": {
                "input": "$parent",
                "as": "parent",
                "cond": {
                  "$and": [
                    { "$gte": [ "$$parent.date", new Date("2016-02-01") ] },
                    { "$lt": [ "$$parent.date", new Date("2016-03-01") ] }
                  ]
                }
              }
            }
          }},
          { "$unwind": "$parent" },
          { "$group": {
            "_id": "$parent._id",
            "name": { "$first": "$parent.name" },
            "date": { "$first": "$parent.date" },
            "children": {
              "$push": {
                "_id": "$_id",
                "text": "$text",
                "score": "$score"
              }
            },
            "score": { "$sum": "$score" }
          }},
          { "$sort": { "score": -1 } }
        ],
        function(err,result) {
          console.log(JSON.stringify(result,undefined,2));
          callback(err);
        }
      )
    }
  ],
  function(err) {
    if (err) throw err;
    mongoose.disconnect();
  }
);

With the output:

[
  {
    "_id": 1,
    "name": "Parent 1",
    "date": "2016-02-01T00:00:00.000Z",
    "children": [
      {
        "_id": 2,
        "text": "The quick brown fox jumped over the lazy dog",
        "score": 1.1666666666666667
      },
      {
        "_id": 1,
        "text": "The little dog laughed to see such fun",
        "score": 0.6
      }
    ],
    "score": 1.7666666666666666
  },
  {
    "_id": 3,
    "name": "Parent 3",
    "date": "2016-02-03T00:00:00.000Z",
    "children": [
      {
        "_id": 5,
        "text": "Lady is a fox",
        "score": 0.75
      },
      {
        "_id": 6,
        "text": "Every dog has it's day",
        "score": 0.6666666666666666
      }
    ],
    "score": 1.4166666666666665
  }
]

Noting that the "Parent 4" which would otherwise of had the largest ranking is removed since the date does not fall in the query range applied with $filter.

like image 190
Neil Lunn Avatar answered Sep 29 '22 01:09

Neil Lunn