I have a collection which looks like below:
{
"_id": 1,
"user": "xyz",
"sentence": "I watch movies and web series.",
"nouns": [
"movies",
"web series"
],
"verbs": [
"watch"
]
},
{
"_id": 2,
"user": "xyz",
"sentence": "movies are good way to relax",
"nouns": [
"movies"
],
"verbs": [
"relax"
]
}
There are two array fields, nouns
and verbs
for each user's sentences. I want to group the documents by user
field and separately count the number of each distinct elements in nouns
and verbs
arrays. I have tried the following query (if you wan't you can skip to the last stage of this aggregation):
db.collection.aggregate([
{
$group: {
_id: "$user",
sentence: {
$push: "$sentence"
},
verbs: {
$push: "$verbs"
},
nouns: {
$push: "$nouns"
}
}
},
{
$project: {
verbs: {
$reduce: {
input: "$verbs",
initialValue: [],
in: {
$concatArrays: [
"$$value",
"$$this"
]
}
}
},
nouns: {
$reduce: {
input: "$nouns",
initialValue: [],
in: {
$concatArrays: [
"$$value",
"$$this"
]
}
}
},
sentence: 1
}
},
{
$project: {
nouns_count_temp: {
$map: {
input: "$nouns",
as: "c",
in: {
k: "$$c",
v: 1
}
}
},
verbs_count_temp: {
$map: {
input: "$verbs",
as: "c",
in: {
k: "$$c",
v: 1
}
}
},
sentence: 1
}
},
{
$project: {
sentence: 1,
noun_count: {
$reduce: {
input: "$nouns_count_temp",
initialValue: [],
in: {
$cond: [
{
$in: [
{
k: "$$this.k",
v: "$$this.v"
},
"$$value"
]
},
{
$add: [
"$$value.$.v",
1
]
},
{
$concatArrays: [
"$$value",
[
{
k: "$$this.k",
v: "$$this.v"
}
]
]
}
]
}
}
},
verb_count: {
$reduce: {
input: "$verbs_count_temp",
initialValue: [],
in: {
$cond: [
{
$in: [
{
k: "$$this.k",
v: "$$this.v"
},
"$$value"
]
},
{
$add: [
"$$value.$.v",
1
]
},
{
$concatArrays: [
"$$value",
[
{
k: "$$this.k",
v: "$$this.v"
}
]
]
}
]
}
}
}
}
}
])
I am facing problem in the last state of the aggregation. I want to know, if there is any better way to use $cond
in $reduce
, so that I can conditionally reduce the arrays.
My expected output is like below:
{
"_id": "xyz",
"noun_count": {
"movies": 2,
"web series": 1
},
"sentence": [
"I watch movies and web series.",
"movies are good way to relax"
],
"verb_count": {
"relax": 1,
"watch": 1
}
}
Here is the MongoPlayGroundLink, that I have tried.
Unfortunately, we can't build dynamic key:value
object within $reduce
operator.
Workaround: We combine nouns
and verbs
in a single array and count how many times they are repeated.
db.collection.aggregate([
{
$group: {
_id: "$user",
sentence: {
$push: "$sentence"
},
verbs: {
$push: "$verbs"
},
nouns: {
$push: "$nouns"
}
}
},
{
$project: {
sentence: 1,
verbs: {
$reduce: {
input: "$verbs",
initialValue: [],
in: {
$concatArrays: [
"$$value",
"$$this"
]
}
}
},
nouns: {
$reduce: {
input: "$nouns",
initialValue: [],
in: {
$concatArrays: [
"$$value",
"$$this"
]
}
}
}
}
},
{
$addFields: {
mix: {
$concatArrays: [
"$verbs",
"$nouns"
]
}
}
},
{
$unwind: "$mix"
},
{
$group: {
_id: {
user: "$_id",
word: "$mix"
},
count: {
$sum: 1
},
sentence: {
$first: "$sentence"
},
verbs: {
$first: "$verbs"
},
nouns: {
$first: "$nouns"
}
}
},
{
$group: {
_id: "$_id.user",
data: {
$push: {
k: "$_id.word",
v: "$count"
}
},
verbs: {
$first: "$verbs"
},
nouns: {
$first: "$nouns"
},
sentence: {
$first: "$sentence"
}
}
},
{
$project: {
_id: 1,
sentence: 1,
noun_count: {
$arrayToObject: {
$filter: {
input: "$data",
as: "data",
cond: {
$in: [
"$$data.k",
"$nouns"
]
}
}
}
},
verb_count: {
$arrayToObject: {
$filter: {
input: "$data",
as: "data",
cond: {
$in: [
"$$data.k",
"$verbs"
]
}
}
}
}
}
}
])
MongoPlayground | Alternative solution
Note: MapReduce solution is slower then aggregation
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With