i’m new at Mongo, using v3.2. I have 2 collections Parent & Child. I’d like to use Parent.aggregate and use $lookup to “join” Child then perform $text $search on a field in Child and a date-range seach on the parent. Is this possible...?
In line with the comments already given, it is true that you cannot perform a $text
search on the results of a $lookup
since there would not be an available index at any stage other than the very first pipeline stage. And it is true that especially considering that you really want the "join" to occur based on the results from the "child" collection, then it would indeed be better to search on the "child" instead.
Which brings the obvious conclusion that in order to do this you perform the aggregation on the "child" collection with the initial $text
query and then $lookup
the "parent" instead of the other way around.
As a working example, and just using the core driver for demonstration purposes:
MongoClient.connect('mongodb://localhost/rlookup',function(err,db) {
if (err) throw err;
var Parent = db.collection('parents');
var Child = db.collection('children');
async.series(
[
// Cleanup
function(callback) {
async.each([Parent,Child],function(coll,callback) {
coll.deleteMany({},callback);
},callback);
},
// Create Index
function(callback) {
Child.createIndex({ "text": "text" },callback);
},
// Create Documents
function(callback) {
async.parallel(
[
function(callback) {
Parent.insertMany(
[
{ "_id": 1, "name": "Parent 1" },
{ "_id": 2, "name": "Parent 2" },
{ "_id": 3, "name": "Parent 3" }
],
callback
);
},
function(callback) {
Child.insertMany(
[
{
"_id": 1,
"parent": 1,
"text": "The little dog laughed to see such fun"
},
{
"_id": 2,
"parent": 1,
"text": "The quick brown fox jumped over the lazy dog"
},
{
"_id": 3,
"parent": 1,
"text": "The dish ran away with the spoon"
},
{
"_id": 4,
"parent": 2,
"text": "Miss muffet on here tuffet"
},
{
"_id": 5,
"parent": 3,
"text": "Lady is a fox"
},
{
"_id": 6,
"parent": 3,
"text": "Every dog has it's day"
}
],
callback
)
}
],
callback
);
},
// Aggregate with $text and $lookup
function(callback) {
Child.aggregate(
[
{ "$match": {
"$text": { "$search": "fox dog" }
}},
{ "$project": {
"parent": 1,
"text": 1,
"score": { "$meta": "textScore" }
}},
{ "$sort": { "score": { "$meta": "textScore" } } },
{ "$lookup": {
"from": "parents",
"localField": "parent",
"foreignField": "_id",
"as": "parent"
}},
{ "$unwind": "$parent" },
{ "$group": {
"_id": "$parent._id",
"name": { "$first": "$parent.name" },
"children": {
"$push": {
"_id": "$_id",
"text": "$text",
"score": "$score"
}
},
"score": { "$sum": "$score" }
}},
{ "$sort": { "score": -1 } }
],
function(err,result) {
console.log(JSON.stringify(result,undefined,2));
callback(err);
}
)
}
],
function(err) {
if (err) throw err;
db.close();
}
);
});
This results in the $text
matches from the query on the Child
populated within each Parent
, as well as being ordered by "score"
:
[
{
"_id": 1,
"name": "Parent 1",
"children": [
{
"_id": 2,
"text": "The quick brown fox jumped over the lazy dog",
"score": 1.1666666666666667
},
{
"_id": 1,
"text": "The little dog laughed to see such fun",
"score": 0.6
}
],
"score": 1.7666666666666666
},
{
"_id": 3,
"name": "Parent 3",
"children": [
{
"_id": 5,
"text": "Lady is a fox",
"score": 0.75
},
{
"_id": 6,
"text": "Every dog has it's day",
"score": 0.6666666666666666
}
],
"score": 1.4166666666666665
}
]
This ultimately makes sense and will be a lot more efficient than querying from the "parent" to find all "children" in a $lookup
and then "post filtering" with $match
to remove any "children" that did not meet criteria, and then subsequently discarding the "parents" without any match.
The same case is true for mongoose style "referencing" where you included an "array" of "children" within the "parent" instead of recording on the child. So as long as the "localField"
on the child ( _id
in that case ) is the same type as defined within the array on the parent as "foriegnField"
( which is will be if it was working with .populate()
anyway ) then you are still getting the matched "parent(s)" for each "child" in the $lookup
result.
This all comes down to reversing your thinking and realizing that the $text
results are the most important thing, and therefore "that" is the collection on which the operation needs to be initiated.
It's possible, but just do it the other way around.
Just showing the reverse case for references on the parent as well as date filtering:
var async = require('async'),
mongoose = require('mongoose'),
Schema = mongoose.Schema;
mongoose.connect('mongodb://localhost/rlookup');
var parentSchema = new Schema({
"_id": Number,
"name": String,
"date": Date,
"children": [{ "type": Number, "ref": "Child" }]
});
var childSchema = new Schema({
"_id": Number,
"text": { "type": String, "index": "text" }
},{ "autoIndex": false });
var Parent = mongoose.model("Parent",parentSchema),
Child = mongoose.model("Child",childSchema);
async.series(
[
function(callback) {
async.each([Parent,Child],function(model,callback) {
model.remove({},callback);
},callback);
},
function(callback) {
Child.ensureIndexes({ "background": false },callback);
},
function(callback) {
async.parallel(
[
function(callback) {
Parent.create([
{
"_id": 1,
"name": "Parent 1",
"date": new Date("2016-02-01"),
"children": [1,2]
},
{
"_id": 2,
"name": "Parent 2",
"date": new Date("2016-02-02"),
"children": [3,4]
},
{
"_id": 3,
"name": "Parent 3",
"date": new Date("2016-02-03"),
"children": [5,6]
},
{
"_id": 4,
"name": "Parent 4",
"date": new Date("2016-01-15"),
"children": [1,2,6]
}
],callback)
},
function(callback) {
Child.create([
{
"_id": 1,
"text": "The little dog laughed to see such fun"
},
{
"_id": 2,
"text": "The quick brown fox jumped over the lazy dog"
},
{
"_id": 3,
"text": "The dish ran awy with the spoon"
},
{
"_id": 4,
"text": "Miss muffet on her tuffet"
},
{
"_id": 5,
"text": "Lady is a fox"
},
{
"_id": 6,
"text": "Every dog has it's day"
}
],callback);
}
],
callback
);
},
function(callback) {
Child.aggregate(
[
{ "$match": {
"$text": { "$search": "fox dog" }
}},
{ "$project": {
"text": 1,
"score": { "$meta": "textScore" }
}},
{ "$sort": { "score": { "$meta": "textScore" } } },
{ "$lookup": {
"from": "parents",
"localField": "_id",
"foreignField": "children",
"as": "parent"
}},
{ "$project": {
"text": 1,
"score": 1,
"parent": {
"$filter": {
"input": "$parent",
"as": "parent",
"cond": {
"$and": [
{ "$gte": [ "$$parent.date", new Date("2016-02-01") ] },
{ "$lt": [ "$$parent.date", new Date("2016-03-01") ] }
]
}
}
}
}},
{ "$unwind": "$parent" },
{ "$group": {
"_id": "$parent._id",
"name": { "$first": "$parent.name" },
"date": { "$first": "$parent.date" },
"children": {
"$push": {
"_id": "$_id",
"text": "$text",
"score": "$score"
}
},
"score": { "$sum": "$score" }
}},
{ "$sort": { "score": -1 } }
],
function(err,result) {
console.log(JSON.stringify(result,undefined,2));
callback(err);
}
)
}
],
function(err) {
if (err) throw err;
mongoose.disconnect();
}
);
With the output:
[
{
"_id": 1,
"name": "Parent 1",
"date": "2016-02-01T00:00:00.000Z",
"children": [
{
"_id": 2,
"text": "The quick brown fox jumped over the lazy dog",
"score": 1.1666666666666667
},
{
"_id": 1,
"text": "The little dog laughed to see such fun",
"score": 0.6
}
],
"score": 1.7666666666666666
},
{
"_id": 3,
"name": "Parent 3",
"date": "2016-02-03T00:00:00.000Z",
"children": [
{
"_id": 5,
"text": "Lady is a fox",
"score": 0.75
},
{
"_id": 6,
"text": "Every dog has it's day",
"score": 0.6666666666666666
}
],
"score": 1.4166666666666665
}
]
Noting that the "Parent 4"
which would otherwise of had the largest ranking is removed since the date does not fall in the query range applied with $filter
.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With