I have a search page which contains two search result types: summary result and concrete result.
To obtain the Summary page I use the request:
var searchDescriptor = new SearchDescriptor<ElasticType>();
searchDescriptor.Index("index_name")
.Query(q =>
q.MultiMatch(m => m
.Fields(fs => fs
.Field(f => f.Content1, 3)
.Field(f => f.Content2, 2)
.Field(f => f.Content3, 1))
.Fuzziness(Fuzziness.EditDistance(1))
.Query(query)
.Boost(1.1)
.Slop(2)
.PrefixLength(1)
.MaxExpansions(100)
.Operator(Operator.Or)
.MinimumShouldMatch(2)
.FuzzyRewrite(RewriteMultiTerm.ConstantScoreBoolean)
.TieBreaker(1.0)
.CutoffFrequency(0.5)
.Lenient()
.ZeroTermsQuery(ZeroTermsQuery.All))
&& (q.Terms(t => t.Field(f => f.LanguageId).Terms(1)) || q.Terms(t => t.Field(f => f.LanguageId).Terms(0))))
.Aggregations(a => a
.Terms("category", tagd => tagd
.Field(f => f.Category)
.Size(10)
.Aggregations(aggs => aggs.TopHits("top_tag_hits", t => t.Size(3)))))
.FielddataFields(fs => fs
.Field(p => p.Content1, 3)
.Field(p => p.Content2, 2)
.Field(p => p.Content3, 1));
var elasticResult = _elasticClient.Search<ElasticType>(_ => searchDescriptor);
And I get result, for example
{
"aggregations": {
"category": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "category1",
"doc_count": 40,
"top_tag_hits": {
"hits": {
"total": 40,
"max_score": 5.4,
"hits": [{
"_index": "...",
"_type": "...",
"_id": "...",
"_score": 5.4,
"_source": {
"id": 1
}
},
{
"_index": "...",
"_type": "...",
"_id": "...",
"_score": 4.3,
"_source": {
"id": 3 // FAIL!
}
},
{
"_index": "...",
"_type": "...",
"_id": "...",
"_score": 4.3,
"_source": {
"id": 2
}
}]
}
}
}]
}
}
}
So i get few hits with the same _score
.
To obtain the concrete result (by category) page I use the request:
var searchDescriptor = new SearchDescriptor<ElasticType>();
searchDescriptor.Index("index_name")
.Size(perPage <= 0 ? 100 : perPage)
.From(page * perPage)
.Query(q => q
.MultiMatch(m => m
.Fields(fs => fs
.Field(f => f.Content1, 3)
.Field(f => f.Content2, 2)
.Field(f => f.Content3, 1)
.Field(f => f.Category))
.Fuzziness(Fuzziness.EditDistance(1))
.Query(searchRequest.Query)
.Boost(1.1)
.Slop(2)
.PrefixLength(1)
.MaxExpansions(100)
.Operator(Operator.Or)
.MinimumShouldMatch(2)
.FuzzyRewrite(RewriteMultiTerm.ConstantScoreBoolean)
.TieBreaker(1.0)
.CutoffFrequency(0.5)
.Lenient()
.ZeroTermsQuery(ZeroTermsQuery.All))
&& q.Term(t => t.Field(f => f.Category).Value(searchRequest.Category))
&& (q.Terms(t => t.Field(f => f.LanguageId).Terms(1)) || q.Terms(t => t.Field(f => f.LanguageId).Terms(0))))
.FielddataFields(fs => fs
.Field(p => p.Content1, 3)
.Field(p => p.Content2, 2)
.Field(p => p.Content3, 1))
.Aggregations(a => a
.Terms("category", tagd => tagd
.Field(f => f.Category)));
And the result something like this:
{
"hits": {
"total": 40,
"max_score": 7.816723,
"hits": [{
"_index": "...",
"_type": "...",
"_id": "...",
"_score": 7.816723,
"_source": {
"id": 1
}
},
{
"_index": "...",
"_type": "...",
"_id": "...",
"_score": 6.514713,
"_source": {
"id": 2
}
},
{
"_index": "...",
"_type": "...",
"_id": "...",
"_score": 6.514709,
"_source": {
"id": 3
}
}]
}
}
And so in the second case, for a specific category I get the _score with great precision and elastic can easily sort the results correctly. But in the case of aggregation there are results with the same _score
, and in this case, the sorting is not clear how it works.
Can someone direct me to the right path how to solve this problem? or how can I achieve the same order in the results? Maybe I can increase the accuracy for the aggregated results?
I use elasticsearch server version "5.3.0" and NEST library version "5.0.0".
Update: Native query for aggregation request:
{
"fielddata_fields": [
"content1^3",
"content2^2",
"content3^1"
],
"aggs": {
"category": {
"terms": {
"field": "category",
"size": 10
},
"aggs": {
"top_tag_hits": {
"top_hits": {
"size": 3
}
}
}
}
},
"query": {
"bool": {
"must": [
{
"multi_match": {
"boost": 1.1,
"query": "sparta",
"fuzzy_rewrite": "constant_score_boolean",
"fuzziness": 1,
"cutoff_frequency": 0.5,
"prefix_length": 1,
"max_expansions": 100,
"slop": 2,
"lenient": true,
"tie_breaker": 1.0,
"minimum_should_match": 2,
"operator": "or",
"fields": [
"content1^3",
"content2^2",
"content3^1"
],
"zero_terms_query": "all"
}
},
{
"bool": {
"should": [
{
"terms": {
"languageId": [
1
]
}
},
{
"terms": {
"languageId": [
0
]
}
}
]
}
}
]
}
}
}
Native query for concrete request:
{
"from": 0,
"size": 100,
"fielddata_fields": [
"content1^3",
"content2^2",
"content3^1"
],
"aggs": {
"category": {
"terms": {
"field": "category"
}
}
},
"query": {
"bool": {
"must": [
{
"bool": {
"must": [
{
"multi_match": {
"boost": 1.1,
"query": ".....",
"fuzzy_rewrite": "constant_score_boolean",
"fuzziness": 1,
"cutoff_frequency": 0.5,
"prefix_length": 1,
"max_expansions": 100,
"slop": 2,
"lenient": true,
"tie_breaker": 1.0,
"minimum_should_match": 2,
"operator": "or",
"fields": [
"content1^3",
"content2^2",
"content3^1",
"category"
],
"zero_terms_query": "all"
}
},
{
"term": {
"category": {
"value": "category1"
}
}
}
]
}
},
{
"bool": {
"should": [
{
"terms": {
"languageId": [
1
]
}
},
{
"terms": {
"languageId": [
0
]
}
}
]
}
}
]
}
}
}
Also i use next mapping for creating index:
var descriptor = new CreateIndexDescriptor(indexName)
.Mappings(ms => ms
.Map<ElasticType>(m => m
.Properties(ps => ps
.Keyword(s => s.Name(ecp => ecp.Title))
.Text(s => s.Name(ecp => ecp.Content1))
.Text(s => s.Name(ecp => ecp.Content2))
.Text(s => s.Name(ecp => ecp.Content3))
.Date(s => s.Name(ecp => ecp.Date))
.Number(s => s.Name(ecp => ecp.LanguageId).Type(NumberType.Integer))
.Keyword(s => s.Name(ecp => ecp.Category))
.Text(s => s.Name(ecp => ecp.PreviewImageUrl).Index(false))
.Text(s => s.Name(ecp => ecp.OptionalContent).Index(false))
.Text(s => s.Name(ecp => ecp.Url).Index(false)))));
_elasticClient.CreateIndex(indexName, _ => descriptor);
Your query has problems.
What you are using is combination of must
and should
inside a must
as part of bool
query.
So if you read more in this link, you can see for must
The clause (query) must appear in matching documents and will contribute to the score.
so it will five equal scoring to all your documents which matched the condition. Any other condition which didn't match the condition won't even be there in results to score.
What you should do it use should
query but outside of must
query, so Elasticsearch will be able to score your documents correctly
For more info as part of this question
Can someone direct me to the right path how to solve this problem?
you should pass 'explain': true
in the query. You can read more about explain query and how to interpret results in this link.
You answer for this question is
how can I achieve the same order in the results?
As every score is same therefore Elasticsearch can sort the result in any way it gets the response from its nodes.
Possible Solution:
You should reorganize your query to make real use of should
query and its boosting capabilities. You can read more about boosting here.
I tried two query similar to yours but with correct usage of should
and they gave me same order as expected. Your both query should be constructed as below:
{
"from": 0,
"size": 10,
"_source": [
"content1^3",
"content2^2",
"content3^1"
],
"query": {
"bool": {
"should": [
{
"match": {
"languageId": 1
}
},
{
"match": {
"languageId": 0
}
}
],
"must": [
{
"multi_match": {
"boost": 1.1,
"query": ".....",
"fuzzy_rewrite": "constant_score_boolean",
"fuzziness": 1,
"cutoff_frequency": 0.5,
"prefix_length": 1,
"max_expansions": 100,
"slop": 2,
"lenient": true,
"tie_breaker": 1,
"minimum_should_match": 2,
"operator": "or",
"fields": [
"content1^3",
"content2^2",
"content3^1",
"category"
],
"zero_terms_query": "all"
}
}
]
}
}
}
and second query as
{
"size": 0,
"query": {
"bool": {
"should": [
{
"match": {
"languageId": 1
}
},
{
"match": {
"languageId": 0
}
}
],
"must": [
{
"multi_match": {
"boost": 1.1,
"query": ".....",
"fuzzy_rewrite": "constant_score_boolean",
"fuzziness": 1,
"cutoff_frequency": 0.5,
"prefix_length": 1,
"max_expansions": 100,
"slop": 2,
"lenient": true,
"tie_breaker": 1,
"minimum_should_match": 2,
"operator": "or",
"fields": [
"content1^3",
"content2^2",
"content3^1",
"category"
],
"zero_terms_query": "all"
}
}
]
}
},
"aggs": {
"categories": {
"terms": {
"field": "category",
"size": 10
},
"aggs": {
"produdtcs": {
"top_hits": {
"_source": [
"content1^3",
"content2^2",
"content3^1"
],
"size": 3
}
}
}
}
}
}
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With