Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

ElasticSearch First and Last Value within Date Range along with other aggregations

I have data indexed in elastic search like this, and this is the output am expecting where the data is grouped on sku_id, I need average rank for the entire date range, and within the date range, first value of last_7days_avg_rank and last value of last_7days_avg_rank w.r.t the date as 2 separate fields like shown below

Can someone please let me know, if this is possible in elastic search ? right now am doing this computation at service layer, but since the response times have become UN-acceptable, I want to move this logic to ES itself, but not able to figure out how to achieve this ?

Input:

 date     sku_id last_7days_avg_rank rank 
 20180101  S1      200                200
 20180102  S1      210                200
 20180105  S1      220                200
 20180108  S1      230                200

 20180101  S2      180                300
 20180103  S2      200                300
 20180106  S2      250                300
 20180107  S2      300                300

Expected Output:

sku  first_val_last7day_avg  last_val_last7days_avg  avg(rank)   
S1    200                       230                  200
S2    180                       300                  300

Thanks!

like image 510
Rajiv Avatar asked Mar 07 '23 16:03

Rajiv


1 Answers

you can get desired result using aggregation

{

   "size": 0,
   "aggs": {
      "GROUP": {
         "terms": {
            "field": "sku_id"
         },
         "aggs": {
            "AVG_RANK": {
               "avg": {
                  "field": "rank"
               }
            },
            "FIRST_7_RANK": {
               "top_hits": {
                  "size": 1,
                  "sort": [
                     {
                        "my_date": {
                           "order": "asc"
                        }
                     }
                  ]
               }
            },
            "LAST_7_RANK": {
               "top_hits": {
                  "size": 1,
                  "sort": [
                     {
                        "my_date": {
                           "order": "desc"
                        }
                     }
                  ]
               }
            }
         }
      }
   }
}

You can get following result as output:

 "aggregations": {
      "GROUP": {
         "doc_count_error_upper_bound": 0,
         "sum_other_doc_count": 0,
         "buckets": [
            {
               "key": "S1",
               "doc_count": 40,
               "LAST_7_RANK": {
                  "hits": {
                     "total": 40,
                     "max_score": null,
                     "hits": [
                        {
                           "_index": "index_name",
                           "_type": "type_name",
                           "_id": "AWI9MU6JeKRzn3ttxGOr",
                           "_score": null,
                           "_source": {
                              "my_date": "2018-01-08",
                              "sku_id": "S1",
                              "last_7days_avg_rank": 230,
                              "rank": 200
                           },
                           "sort": [
                              1515369600000
                           ]
                        }
                     ]
                  }
               },
               "AVG_RANK": {
                  "value": 200
               },
               "FIRST_7_RANK": {
                  "hits": {
                     "total": 40,
                     "max_score": null,
                     "hits": [
                        {
                           "_index": "index_name",
                           "_type": "type_name",
                           "_id": "AWI9LYVpeKRzn3ttxGOQ",
                           "_score": null,
                           "_source": {
                              "my_date": "20180101",
                              "sku_id": "S1",
                              "last_7days_avg_rank": 200,
                              "rank": 200
                           },
                           "sort": [
                              20180101
                           ]
                        }
                     ]
                  }
               }
            },
            {
               "key": "S2",
               "doc_count": 40,
               "LAST_7_RANK": {
                  "hits": {
                     "total": 40,
                     "max_score": null,
                     "hits": [
                        {
                           "_index": "index_name",
                           "_type": "type_name",
                           "_id": "AWI9MU6JeKRzn3ttxGOv",
                           "_score": null,
                           "_source": {
                              "my_date": "2018-01-07",
                              "sku_id": "S2",
                              "last_7days_avg_rank": 300,
                              "rank": 300
                           },
                           "sort": [
                              1515283200000
                           ]
                        }
                     ]
                  }
               },
               "AVG_RANK": {
                  "value": 300
               },
               "FIRST_7_RANK": {
                  "hits": {
                     "total": 40,
                     "max_score": null,
                     "hits": [
                        {
                           "_index": "index_name",
                           "_type": "type_name",
                           "_id": "AWI9LYVpeKRzn3ttxGOU",
                           "_score": null,
                           "_source": {
                              "my_date": "20180101",
                              "sku_id": "S2",
                              "last_7days_avg_rank": 180,
                              "rank": 300
                           },
                           "sort": [
                              20180101
                           ]
                        }
                     ]
                  }
               }
            }
         ]
      }
   }

Above result creates two buckets(groups) for S1 and S2. and within each bucket you can get average rank of that group in AVG_RANK field, for first_val_last7day_avg you need to trace value of "FIRST_7_RANK"-> "hits"->"hits"->"_source"->"rank" and similarly for last_val_last7days_avg you need to trance value of "LAST_7_RANK"-> "hits"->"hits"->"_source"->"rank" I hope this might help you

like image 143
bornTalented Avatar answered Apr 28 '23 04:04

bornTalented