I have a hashtag with tags in camel Case like #teamIndia. Now when this hashtag is clicked, it should fetch all results which have "#teamIndia" in it, It should first show results with "#teamIndia", then results with "teamIndia" and then "team India" and then "team" or "India" and so on.
What I am doing:
Search text: "#teamIndia", "#NEWYORK", "#profession", "#2016"
POST /clip
{
"settings": {
"analysis": {
"char_filter" : {
"space_hashtags" : {
"type" : "mapping",
"mappings" : ["#=>|#"]
}
},
"filter": {
"substring": {
"max_gram": "20",
"type": "nGram",
"min_gram": "1",
"token_chars": [
"whitespace"
]
},
"camelcase": {
"type": "word_delimiter",
"type_table": ["# => ALPHANUM", "@ => ALPHANUM"]
},
"stopword": {
"type": "stop",
"stopwords": ["and", "is", "the"]
}
},
"analyzer": {
"substring_analyzer": {
"filter": [
"lowercase",
"substring"
],
"tokenizer": "standard"
},
"camelcase_analyzer": {
"type" : "custom",
"char_filter" : "space_hashtags",
"tokenizer" : "whitespace",
"filter": [
"camelcase",
"lowercase",
"stopword"
]
}
}
}
},
"mappings": {
"Clip": {
"properties": {
"description": {
"type": "multi_field",
"fields": {
"description": {
"type": "string",
"analyzer": "substring_analyzer",
"search_analyzer": "standard"
},
"raw": {
"type": "string",
"index": "not_analyzed"
},
"hashtag": {
"type": "string",
"index": "analyzed",
"analyzer": "camelcase_analyzer"
}
}
},
....
}
}
}
}
Docs example :-
POST /clip/Clip/2 {"id" : 1, "description" : "TheBestAndTheBeast"}
POST /clip/Clip/3 {"id" : 2, "description" : "bikes in DUBAI TheBestAndTheBeast profession"}
POST /clip/Clip/3 {"id" : 2, "description" : "Know how a software engineer surprised his wife! <a href="search/clips?q=%23theProvider&source=hashtag" ng-click="handleModalClick()"> #theProvider </a> rioOlympic <a href="search/clips?q=%23DUBAI&source=hashtag" ng-click="handleModalClick()"> #DUBAI </a> <a href="search/clips?q=%23TheBestAndTheBeast&source=hashtag" ng-click="handleModalClick()"> #TheBestAndTheBeast </a> <a href="search/clips?q=%23rioOlympic&source=hashtag" ng-click="handleModalClick()"> #rioOlympic </a>"}
** Search Query **
GET clip/_search
{
"size": 100,
"query": {
"filtered": {
"query": {
"bool": {
"must":
{
"query_string": {
"fields": [
"description.hashtag"
],
"query": "teamIndia"
}
},
"should": {
"match":
{ "description.raw": "#teamIndia"}
}
}
}
}
}
}
Excepted Result: "#teamIndia", "teamIndia", "team India", "team", "India",
and similar for other test keywords.
One of the reasons the query in the original post does not work as intended is because description.raw
is not_analyzed
.
As a result #teamIndia
would never match a document with description: "Animals and Pets and #teamIndia"
since the description.raw
would contain
the non-analyzed term Animals and Pets and #teamIndia
and not #teamIndia
Assuming that the documents you have are like 2nd example in the OP.
Example:
{"id" : 2, "description" : "Animals and Pets and #teamIndia"}
OR
{"id":7,"description":"This <a href="search/clips?q=%23teamIndia&source=hashtag">#teamIndia</a>"}
Then you should be able to rank documents in the following order :
1) description containing "#teamIndia",
2) description containing "teamIndia"
3) description containing "team India"
4) description containing "India"
by enabling preserve_orginal
and catenate_words
in the wordlimiter filter as shown in the example below
Example:
Index Documents
PUT clip
{
"settings": {
"analysis": {
"char_filter": {
"zwsp_normalize": {
"type": "mapping",
"mappings": [
"\\u200B=>",
"\\u200C=>",
"\\u200D=>"
]
},
"html_decoder": {
"type": "mapping",
"mappings": [
"<=> <",
">=> >"
]
}
},
"filter": {
"camelcase": {
"type": "word_delimiter",
"preserve_original": "true",
"catenate_all": "true"
},
"stopword": {
"type": "stop",
"stopwords": [
"and",
"is",
"the"
]
}
},
"analyzer": {
"camelcase_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"camelcase",
"lowercase",
"stopword"
],
"char_filter": [
"zwsp_normalize",
"html_decoder",
"html_strip"
]
}
}
}
},
"mappings": {
"Clip": {
"properties": {
"description": {
"type": "multi_field",
"fields": {
"hashtag": {
"type": "string",
"index": "analyzed",
"analyzer": "camelcase_analyzer",
"norms": {
"enabled": false
}
}
}
}
}
}
}
}
POST /clip/Clip/1
{
"id": 1,
"description": "Animals and Pets and #teamIndia"
}
POST /clip/Clip/2
{
"id": 2,
"description": "Animals and Pets and teamIndia"
}
POST /clip/Clip/3
{
"id": 3,
"description": "Animals and Pets and team India"
}
POST /clip/Clip/4
{
"id": 4,
"description": "Animals and Pets and India"
}
POST /clip/Clip/7
{
"id": 7,
"description": "This <a href="search/clips?q=%23teamIndia&source=hashtag">#teamIndia</a>"
}
Query Result:
POST clip/_search?search_type=dfs_query_then_fetch
{
"size": 100,
"query": {
"filtered": {
"query": {
"bool": {
"must": [
{
"query_string": {
"fields": [
"description.hashtag"
],
"query": "#teamIndia"
}
}
]
}
}
}
}
}
Results:
"hits": {
"total": 5,
"max_score": 1.4969246,
"hits": [
{
"_index": "clip",
"_type": "Clip",
"_id": "7",
"_score": 1.4969246,
"_source": {
"id": 7,
"description": "This <a href="search/clips?q=%23teamIndia&source=hashtag">#teamIndia</a>"
}
},
{
"_index": "clip",
"_type": "Clip",
"_id": "1",
"_score": 1.4969246,
"_source": {
"id": 1,
"description": "Animals and Pets and #teamIndia"
}
},
{
"_index": "clip",
"_type": "Clip",
"_id": "2",
"_score": 1.0952718,
"_source": {
"id": 2,
"description": "Animals and Pets and teamIndia"
}
},
{
"_index": "clip",
"_type": "Clip",
"_id": "3",
"_score": 0.5207714,
"_source": {
"id": 3,
"description": "Animals and Pets and team India"
}
},
{
"_index": "clip",
"_type": "Clip",
"_id": "4",
"_score": 0.11123338,
"_source": {
"id": 4,
"description": "Animals and Pets and India"
}
}
]
}
Example #dubai:
POST /clip/Clip/5
{
"id": 5,
"description": "#dubai is hot"
}
POST /clip/Clip/6
{
"id": 6,
"description": "dubai airport is huge"
}
POST clip/_search?search_type=dfs_query_then_fetch
{
"size": 100,
"query": {
"filtered": {
"query": {
"bool": {
"must": [
{
"query_string": {
"fields": [
"description.hashtag"
],
"query": "#dubai"
}
}
]
}
}
}
}
}
"hits": {
"total": 2,
"max_score": 1.820827,
"hits": [
{
"_index": "clip",
"_type": "Clip",
"_id": "5",
"_score": 1.820827,
"_source": {
"id": 5,
"description": "#dubai is hot"
}
},
{
"_index": "clip",
"_type": "Clip",
"_id": "6",
"_score": 0.5856731,
"_source": {
"id": 6,
"description": "dubai airport is huge"
}
}
]
}
Example #professionalAndPunctual :
POST /clip/Clip/7
{
"id": 7,
"description": "professionalAndPunctual"
}
POST clip/_search?search_type=dfs_query_then_fetch
{
"size": 100,
"query": {
"filtered": {
"query": {
"bool": {
"must": [
{
"query_string": {
"fields": [
"description.hashtag"
],
"query": "#professionalAndPunctual"
}
}
]
}
}
}
}
}
"hits": [
{
"_index": "clip",
"_type": "Clip",
"_id": "7",
"_score": 2.2149992,
"_source": {
"id": 7,
"description": "professionalAndPunctual"
}
}
]
Example: #TheBestAndTheBeast
POST /clip/Clip/10
{"id" : 10, "description" : "TheBestAndTheBeast"}
POST /clip/Clip/11
{"id" :11, "description" : "bikes in DUBAI TheBestAndTheBeast profession"}
POST /clip/Clip/12
{"id" : 12, "description" : "Know how a software engineer surprised his wife! <a href=\"search/clips?q=%23theProvider&source=hashtag\" ng-click=\"handleModalClick()\"> #theProvider </a> rioOlympic <a href=\"search/clips?q=%23DUBAI&source=hashtag\" ng-click=\"handleModalClick()\"> #DUBAI </a> <a href=\"search/clips?q=%23TheBestAndTheBeast&source=hashtag\" ng-click=\"handleModalClick()\"> #TheBestAndTheBeast </a> <a href=\"search/clips?q=%23rioOlympic&source=hashtag\" ng-click=\"handleModalClick()\"> #rioOlympic </a>"}
POST clip/_search?search_type=dfs_query_then_fetch
{
"size": 100,
"query": {
"filtered": {
"query": {
"bool": {
"must": [
{
"query_string": {
"fields": [
"description.hashtag"
],
"query": "#TheBestAndTheBeast"
}
}
]
}
}
}
}
}
#Results
"hits": [
{
"_index": "clip",
"_type": "Clip",
"_id": "12",
"_score": 1.8701664,
"_source": {
"id": 12,
"description": "Know how a software engineer surprised his wife! <a href=\"search/clips?q=%23theProvider&source=hashtag\" ng-click=\"handleModalClick()\"> #theProvider </a> rioOlympic <a href=\"search/clips?q=%23DUBAI&source=hashtag\" ng-click=\"handleModalClick()\"> #DUBAI </a> <a href=\"search/clips?q=%23TheBestAndTheBeast&source=hashtag\" ng-click=\"handleModalClick()\"> #TheBestAndTheBeast </a> <a href=\"search/clips?q=%23rioOlympic&source=hashtag\" ng-click=\"handleModalClick()\"> #rioOlympic </a>"
}
},
{
"_index": "clip",
"_type": "Clip",
"_id": "10",
"_score": 0.9263139,
"_source": {
"id": 10,
"description": "TheBestAndTheBeast"
}
},
{
"_index": "clip",
"_type": "Clip",
"_id": "11",
"_score": 0.9263139,
"_source": {
"id": 11,
"description": "bikes in DUBAI TheBestAndTheBeast profession"
}
}
]
Analyzer Example :
get clip/_analyze?analyzer=camelcase_analyzer&text=%23DUBAI
{
"tokens": [
{
"token": "#dubai",
"start_offset": 0,
"end_offset": 6,
"type": "word",
"position": 0
},
{
"token": "dubai",
"start_offset": 1,
"end_offset": 6,
"type": "word",
"position": 0
}
]
}
get clip/_analyze?analyzer=camelcase_analyzer&text=This%20%26lt%3Ba%20href%3D%26quot%3Bsearch%2Fclips%3Fq%3D%2523teamIndia%26amp%3Bsource%3Dhashtag%26quot%3B%26gt%3B%23teamIndia%26lt%3B%2Fa%26gt%3B
{
"tokens": [
{
"token": "this",
"start_offset": 0,
"end_offset": 4,
"type": "word",
"position": 0
},
{
"token": "#teamindia",
"start_offset": 78,
"end_offset": 98,
"type": "word",
"position": 1
},
{
"token": "india",
"start_offset": 78,
"end_offset": 98,
"type": "word",
"position": 2
},
{
"token": "team",
"start_offset": 78,
"end_offset": 98,
"type": "word",
"position": 2
},
{
"token": "teamindia",
"start_offset": 78,
"end_offset": 98,
"type": "word",
"position": 2
}
]
}
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With