Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

ElasticSearch - Filter Nested Aggregation

I have a problem with aggregating results after filtering them. I think I'm on the right track but I feel like I'm chasing my tail.

Here is how it looks:

PUT /my_index
{
  "mappings": {
    "reporting": {
      "properties": {
        "events": {
          "type": "nested", 
          "properties": {
            "name":    { "type": "string", "index" : "not_analyzed"  },
            "date":    { "type": "date"    }
          }
        }
      }
    }
  }
}

So, my document looks like:

{
  "events": [
    { "name": "INSTALL", "date": "2014-11-01" },
    { "name": "UNINSTALL", "date": "2014-11-03" },
    { "name": "INSTALL", "date": "2014-11-04" },
    ...
  ]
}

Now, when I index some data, for example:

PUT /my_index/reporting/1
{
  "events": [
    {
       "name": "INSTALL",
       "date": "2014-11-01"
    },
    {
       "name": "UNINSTALL",
       "date": "2014-11-05"
    }
 ]
}

PUT /my_index/reporting/2
{
  "events": [
    {
       "name": "INSTALL",
       "date": "2014-11-01"
    },
    {
       "name": "UNINSTALL",
       "date": "2014-11-03"
    }
 ]
}

PUT /my_index/reporting/3
{
  "events": [
    {
       "name": "INSTALL",
       "date": "2014-11-01"
    },
    {
       "name": "UNINSTALL",
       "date": "2014-11-02"
    }
 ]
}

PUT /my_index/reporting/4
{
  "events": [
    {
       "name": "INSTALL",
       "date": "2014-11-01"
    },
    {
       "name": "UNINSTALL",
       "date": "2014-11-02"
    },
    {
       "name": "INSTALL",
       "date": "2014-11-03"
    }
 ]
}

PUT /my_index/reporting/5
{
  "events": [
    {
       "name": "INSTALL",
       "date": "2014-11-01"
    },
    {
       "name": "UNINSTALL",
       "date": "2014-11-03"
    },
    {
       "name": "INSTALL",
       "date": "2014-11-03"
    }
 ]
}

PUT /my_index/reporting/6
{
  "events": [
    {
       "name": "INSTALL",
       "date": "2014-11-03"
    },
    {
       "name": "UNINSTALL",
       "date": "2014-11-03"
    },
    {
       "name": "INSTALL",
       "date": "2014-11-05"
    }
 ]
}

PUT /my_index/reporting/7
{
  "events": [
    {
       "name": "INSTALL",
       "date": "2014-11-02"
    },
    {
       "name": "UNINSTALL",
       "date": "2014-11-03"
    },
    {
       "name": "INSTALL",
       "date": "2014-11-05"
    }
 ]
}

PUT /my_index/reporting/8
{
  "events": [
    {
       "name": "INSTALL",
       "date": "2014-11-01"
    }
 ]
}

I want to get number of those who INSTALLED after (including) 2014-11-02 and didn't uninstall (so, UNINSTALL was before 2014-11-02 or there is no UNINSTALL event), and to group them in date_histogram meaning (to have bucket with "date"->"count" data).

I managed to write filter on this nested data, so I can get that filtered result, but I keep chasing my tail when it comes to that histogram aggregation.

This is where I've stuck.

GET /my_index/reporting/_search
{
    "query": {
    "filtered": {
      "query": {
        "match_all": {}
      },
      "filter": {
        "bool": {
          "must": [
            {
              "nested": {
                "path": "events",
                "filter": {
                  "bool": {
                    "must": [
                      {
                        "term": {
                          "name": "INSTALL"
                        }
                      },
                      {
                        "range": {
                          "date": {
                            "gte": "2014-11-02"
                          }
                        }
                      }
                    ]
                  }
                }
              }
            },
            {
              "nested": {
                "path": "events",
                "filter": {
                  "bool": {
                    "should": [
                      {
                        "bool": {
                          "must_not": [
                            {
                              "term": {
                                "name": "UNINSTALL"
                              }
                            }
                          ]
                        }
                      },
                      {
                        "bool": {
                          "must": [
                            {
                              "term": {
                                "name": "UNINSTALL"
                              }
                            },
                            {
                              "range": {
                                "date": {
                                  "lt": "2014-11-02"
                                }
                              }
                            }
                          ]
                        }
                      }
                    ]
                  }
                }
              }
            }
          ]
        }
      }
    }
  },
  "aggregations": {
    "filtered_result": {
      "filter": {
        "bool": {
          "must": [
            {
              "nested": {
                "path": "events",
                "filter": {
                  "bool": {
                    "must": [
                      {
                        "term": {
                          "name": "INSTALL"
                        }
                      },
                      {
                        "range": {
                          "date": {
                            "gte": "2014-11-02"
                          }
                        }
                      }
                    ]
                  }
                }
              }
            },
            {
              "nested": {
                "path": "events",
                "filter": {
                  "bool": {
                    "should": [
                      {
                        "bool": {
                          "must_not": [
                            {
                              "term": {
                                "name": "UNINSTALL"
                              }
                            }
                          ]
                        }
                      },
                      {
                        "bool": {
                          "must": [
                            {
                              "term": {
                                "name": "UNINSTALL"
                              }
                            },
                            {
                              "range": {
                                "date": {
                                  "lt": "2014-11-02"
                                }
                              }
                            }
                          ]
                        }
                      }
                    ]
                  }
                }
              }
            }
          ]
        }
      },
      "aggs": {
        "result": {
          "nested": {
            "path": "events"
          },
          "aggs": {
            "NAME": {
              "terms": {
                "field": "events.date",
                "format": "yyyy-MM-dd",
                "order": {
                  "_term": "asc"
                }
              }
            }
          }
        }
      }
    }
  }
}

And my result looks like:

... omitted 4 documents that match filter criteria ...
   "aggregations": {
      "filtered_result": {
         "doc_count": 4, <---- this is ok, I really have 4 docs that match criteria
         "result": {
            "doc_count": 12, <---- those 4 documents really have 12 events (together)
            "NAME": {
               "buckets": [
                  {
                     "key": 1414800000000,
                     "key_as_string": "2014-11-01",
                     "doc_count": 2
                  },
                  {
                     "key": 1414886400000,
                     "key_as_string": "2014-11-02",
                     "doc_count": 2
                  },
                  {
                     "key": 1414972800000,
                     "key_as_string": "2014-11-03",
                     "doc_count": 6
                  },
                  {
                     "key": 1415145600000,
                     "key_as_string": "2014-11-05",
                     "doc_count": 2
                  }
               ]
            }
         }
      }
   }

And I wanted to get something like:

"buckets": [
 {
   "key_as_string": "2014-11-02",
   "doc_count": 0
 },
 {
   "key_as_string": "2014-11-03",
   "doc_count": 2
 },
 {
   "key_as_string": "2014-11-04",
   "doc_count": 0
 },
 {
   "key_as_string": "2014-11-05",
   "doc_count": 2
 } 
]

Basically, 4 documents that matched criteria are distributed by dates when that criteria occurred, 2 docs on "2011-11-03" and two docs on "2014-11-05" (4 documents that have event "install" after 2014-11-02 and didn't have uninstall event after that (they are still installed).

like image 775
tomdzon Avatar asked Nov 04 '14 09:11

tomdzon


1 Answers

This is a partial answer.

There's one main problem: according to your data, there is actually NO document that would match your requirements, so I added some:

curl -XPUT 'localhost:9200/my_index/reporting/9' -d '{
  "events": [
    {
       "name": "INSTALL",
       "date": "2014-11-03"
    }
 ]
}'

curl -XPUT 'localhost:9200/my_index/reporting/10' -d '{
  "events": [
    {
       "name": "INSTALL",
       "date": "2014-11-03"
    },
    {
       "name": "UNINSTALL",
       "date": "2014-11-01"
    }
  ]
}'

To be able to apply the logic, I changed the Schema so that the events are also included in the parent - that way you can search for "doesn't have any UNINSTALL events". Because the thing is, in a nested search, you're always just looking at ONE single event, so you can't do any kind of "reporting-wide" searches.

curl -XPUT 'localhost:9200/my_index' -d '{
  "mappings": {
    "reporting": {
      "properties": {
        "events": {
          "type": "nested", "include_in_root": true,
          "properties": {
            "name":    { "type": "string", "index" : "not_analyzed"  },
            "date":    { "type": "date"    }
          }
        }
      }
    }
  }
}'

And now to the query itself. It seems that when using a nested filter, you can't go directly to the "filter". You must first do the "query > filtered > filter" thing.

One tip for writing long elasticsearch queries in general - remembering that you have "and" and "or" operators aside from "must" and "must_not" - is to just write it out code-like. In your case:

has_one(event.name == 'INSTALL' && event.date >= '2014-11-02')
&& has_none(event.name == 'UNINSTALL') 
&& has_none(event.name == 'UNINSTALL' && event.date >= '2014-11-02')

Or:

has_one(event.name == 'INSTALL' && event.date >= '2014-11-02')
&& ( has_none(event.name == 'UNINSTALL') 
     || has_only(event.name == 'UNINSTALL' && event.date >= '2014-11-02') )

I was able to apply all but the last has_only / has_none. For that, you might want to try using child-documents. There you can at least use the has_child filter under a must_not bool.

The current query:

GET /my_index/reporting/_search
{
  "query": {
    "filtered": {
      "query": {
        "match_all": {}
      },
      "filter": {
        "and": {
          "filters": [
            {
              "or": {
                "filters": [
                  {
                    "bool": {
                      "must_not": [
                        {
                          "term": {
                            "events.name": "UNINSTALL"
                          }
                        }
                      ]
                    }
                  },
                  {
                    "nested": {
                      "path": "events",
                      "query": {
                        "filtered": {
                          "filter": {
                            "bool": {
                              "must": [
                                {
                                  "term": {
                                    "name": "UNINSTALL"
                                  }
                                },
                                {
                                  "range": {
                                    "date": {
                                      "lt": "2014-11-02"
                                    }
                                  }
                                }
                              ]
                            }
                          }
                        }
                      }
                    }
                  }
                ]
              }
            },
            {
              "nested": {
                "path": "events",
                "query": {
                  "filtered": {
                    "filter": {
                      "bool": {
                        "must": [
                          {
                            "term": {
                              "name": "INSTALL"
                            }
                          },
                          {
                            "range": {
                              "date": {
                                "gte": "2014-11-02"
                              }
                            }
                          }
                        ]
                      }
                    }
                  }
                }
              }
            }
          ]
        }
      }
    }
  },
  "aggregations": {
    "filtered_result": {
      "filter": {
        "and": {
          "filters": [
            {
              "or": {
                "filters": [
                  {
                    "bool": {
                      "must_not": [
                        {
                          "term": {
                            "events.name": "UNINSTALL"
                          }
                        }
                      ]
                    }
                  },
                  {
                    "nested": {
                      "path": "events",
                      "query": {
                        "filtered": {
                          "filter": {
                            "bool": {
                              "must": [
                                {
                                  "term": {
                                    "name": "UNINSTALL"
                                  }
                                },
                                {
                                  "range": {
                                    "date": {
                                      "lt": "2014-11-02"
                                    }
                                  }
                                }
                              ]
                            }
                          }
                        }
                      }
                    }
                  }
                ]
              }
            },
            {
              "nested": {
                "path": "events",
                "query": {
                  "filtered": {
                    "filter": {
                      "bool": {
                        "must": [
                          {
                            "term": {
                              "name": "INSTALL"
                            }
                          },
                          {
                            "range": {
                              "date": {
                                "gte": "2014-11-02"
                              }
                            }
                          }
                        ]
                      }
                    }
                  }
                }
              }
            }
          ]
        }
      },
      "aggs": {
        "result": {
          "nested": {
            "path": "events"
          },
          "aggs": {
            "NAME": {
              "terms": {
                "field": "date",
                "format": "yyyy-MM-dd",
                "order": {
                  "_term": "asc"
                }
              }
            }
          }
        }
      }
    }
  }
}
like image 163
whythecode Avatar answered Sep 19 '22 15:09

whythecode