Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Highlight term issue in elastic search

  1. We created an index with below settings and mapping.
 PUT http://localhost:9200/essearch
{ 
"mappings": {
        "object": {
            "_all": {
                "enabled": false
            },
            "properties": {
               "content": {
                    "type": "text",
                    "term_vector": "with_positions_offsets",
                    "similarity": "classic",
                    "analyzer": "content_standard"
                },
                "content_phonic": {
                    "type": "text",
                    "term_vector": "with_positions_offsets",
                    "similarity": "classic",
                    "analyzer": "content_phonetic"
                },
                "content_stemming": {
                    "type": "text",
                    "term_vector": "with_positions_offsets",
                    "similarity": "classic",
                    "analyzer": "content_stemming"
                }
            }
        }
    },
 "settings": {
        "index": {
            "number_of_shards": "1",
            "similarity": {
                "default": {
                    "type": "classic"
                }
            },
            "max_result_window": "50000",
            "mapper": {
                "dynamic": "false"
            },
            "analysis": {
                "filter": {
                    "content_phonetic": {
                        "type": "phonetic",
                        "encoder": "doublemetaphone"
                    },
                    "StopWords": {
                        "type": "stop",
                        "stopwords": [
                            "after",
                            "all",
                            "under",
                            "very",
                            "well"]
                    }
                },
                "analyzer": {
                    "content_phonetic": {
                        "filter": [
                            "content_phonetic"
                        ],
                        "char_filter": [
                            "CharFilters"
                        ],
                        "type": "custom",
                        "tokenizer": "standard"
                    },
                    "content_stemming": {
                        "filter": [
                            "lowercase",
                            "porter_stem"
                        ],
                        "char_filter": [
                            "CharFilters"
                        ],
                        "type": "custom",
                        "tokenizer": "standard"
                    },
                    "content_standard": {
                        "filter": [
                            "lowercase",
                            "StopWords"
                        ],
                        "char_filter": [
                            "CharFilters"
                        ],
                        "type": "custom",
                        "tokenizer": "standard"
                    }
                },
                "char_filter": {
                    "CharFilters": {
                        "type": "mapping",
                        "mappings": [
                            ". => ' '",
                            "' => ' '",
                            "_ => ' '",
                            ": => ' '"
                        ]
                    }
                }
            },
            "number_of_replicas": "0"
        }
    }}

2: Indexed a document

 http://localhost:9200/essearch/object/1
{ "content" : "beginning thirty days after the anticipated COD. 
             Buyer shall be responsible for all natural gas and electrical imbalance charges.
             All prices shall be at the Reference Conditions.
             Buyer’s performance of its obligations under the ECSA with a form of guarantee in an amount. Seller shall assign its rights under said requests to Buyer.  Buyer shall have full dispatch rights subject to operational parameters  (including ramp rates. buyer said to me..."   }

3: Performed Highlight query

    http://localhost:9200/essearch/_search
 {
 "highlight": {
"pre_tags": [ "<term0 style='background-color:Lime'>", "<term1 style='background-color:Chocolate'>", "<term2 style='background-color:Pink'>"
],"post_tags": [ "</term0>", "</term1>", "</term2>" ],
"encoder": "html",
"fields": { "content": { "fragment_size": 50, "number_of_fragments": 0, "type": "fvh" } } },
"_source": false,
"query": {
"bool": {
  "must": [
    {
      "query_string": {
        "query": "(\"under said\") OR (said) OR (buyer)",
        "default_field": "content"}} ],
  "filter": [
    {
      "ids": {
        "values": [ "1" ] } } ] } } }

4: Highlight Query Output

    {
"took": 0,
"timed_out": false,
"_shards": {
    "total": 1,
    "successful": 1,
    "failed": 0
},
"hits": {
    "total": 1,
    "max_score": 0.30490398,
    "hits": [
        {
            "_index": "essearch",
            "_type": "object",
            "_id": "1",
            "_score": 0.30490398,
            "highlight": {
                "content": [
                    "beginning thirty days after the anticipated COD.
                    <term1 style='background-color:Chocolate'>Buyer</term1> 
                    shall be responsible for all natural gas and electrical imbalance charges.
                    All prices shall be at the Reference Conditions.Buyer’s performance of its obligations under the ECSA with a form of guarantee in an amount. Seller shall assign its rights under <term0 style='background-color:Lime'>said</term0> requests    to <term1 style='background-color:Chocolate'>Buyer</term1>. <term1 style='background-color:Chocolate'>Buyer</term1> shall have full dispatch rights subject to operational parameters (including ramp rates. <term1 style='background-color:Chocolate'>buyer</term1> <term0 style='background-color:Lime'>said</term0> to me..."
                ]    }  } ] } }

If you see we have applied pre,post tags according to the number of query terms supplied. Here we have 3 terms with OR operator so total three pre,post tags are supplied. After performing highlight query it should apply term1 tag to "said" term as per the sequence but ES is applying term0 tag to "said" term and for "buyer" its applying term1 tag.

like image 778
Hardik Dobariya Avatar asked Nov 07 '22 01:11

Hardik Dobariya


1 Answers

I'm not sure if this is a problem with your mappings, because I ran the same text + query and got the expected result. Perhaps its something to do with the other fields you have in your query. I have found that using the highlight query to isolate the stuff you want to highlight helps rationalize the order of tags.

"beginning thirty days after the anticipated COD. \n             <term2 style='background-color:Pink'>Buyer</term2> shall be responsible for all natural gas and electrical imbalance charges.\n             All prices shall be at the Reference Conditions.\n             <term2 style='background-color:Pink'>Buyer’s</term2> performance of its obligations under the ECSA with a form of guarantee in an amount. Seller shall assign its rights <term0 style='background-color:Lime'>under said</term0> requests to <term2 style='background-color:Pink'>Buyer</term2>.  <term2 style='background-color:Pink'>Buyer</term2> shall have full dispatch rights subject to operational parameters  (including ramp rates. <term2 style='background-color:Pink'>buyer</term2> <term1 style='background-color:Chocolate'>said</term1> to me..."

My mappings:

{
        "mappings": {
            "properties": {
                "text": {
                    "type": "text",
                    "term_vector": "with_positions_offsets",
                    "analyzer": "english",
                }                
            }
        }
    }

My document:

{"text": """beginning thirty days after the anticipated COD. 
             Buyer shall be responsible for all natural gas and electrical imbalance charges.
             All prices shall be at the Reference Conditions.
             Buyer’s performance of its obligations under the ECSA with a form of guarantee in an amount. Seller shall assign its rights under said requests to Buyer.  Buyer shall have full dispatch rights subject to operational parameters  (including ramp rates. buyer said to me..."""}

My query:

{
"bool": {
  "must": [
    {
      "query_string": {
        "query": "(\"under said\") OR (said) OR (buyer)",
        "default_field": "text"}} ],
   } } }
like image 77
Lochlan Avatar answered Nov 15 '22 08:11

Lochlan