I've got problem with sorting my lucene.net index in .NET. I tried almost every solution on stackoverflow and looking for google answers. I'm using Lucene.NET 2.9.2 and ASP.NET 2.0. I want to sort over string like in sql you can type 'order by Title desc [asc]'
I will show you my code and I hope someone can help me.
//Here I create Index with some fields
doc.Add(new Field("prod_id",row["prod_id"].ToString(),Field.Store.YES,Field.Index.ANALYZED));
doc.Add(new Field("prod_title", row["prod_title"].ToString(), Field.Store.YES, Field.Index.ANALYZED));
doc.Add(new Field("prod_desc", row["prod_desc"].ToString(), Field.Store.YES, Field.Index.ANALYZED));
doc.Add(new Field("prod_author", row["prod_author"].ToString(), Field.Store.YES, Field.Index.ANALYZED));
doc.Add(new Field("prod_publisher", row["prod_publisher"].ToString(), Field.Store.YES, Field.Index.ANALYZED));
doc.Add(new Field("prod_price", row["prod_price"].ToString(), Field.Store.YES, Field.Index.ANALYZED));
//Then next I try to do search with sort option:
//method for return approciate Sort object
private static Sort SetSortForLucene(string _sort)
{
Sort sort;
switch (_sort)
{
case "UnitPriceGorss":
sort = new Sort(new SortField("prod_price",SortField.DOUBLE,false);
break;
case "UnitPriceGorssDESC":
sort = new Sort(new SortField("prod_price",SortField.DOUBLE,true);
break;
case "Title":
//not working
sort = new Sort(new SortField("prod_title", SortField.STRING, true));
break;
case "TitleDESC":
//not working
sort = new Sort(new SortField("prod_title", SortField.STRING, false));
break;
case "":
sort = new Sort(new SortField("prod_title", SortField.STRING, false));
break;
default:
sort = new Sort(new SortField("prod_title", SortField.STRING, false));
break;
}
return sort;
}
//Inside my query of lucene method:
StandardAnalyzer analizer = new StandardAnalyzer(Version.LUCENE_29);
IndexReader reader =IndexReader.Open(IndexPath);
Searcher searcher = new IndexSearcher(reader);
//Here call for Sort object
Sort sort = SetSortForLucene(_sort);
TopFieldDocCollector collector = new TopFieldDocCollector(reader, sort, pageSize);
//Find which document field need to me asked in QueryParser object
string _luceneField = "";
if (luceneField.Contains("_"))
_luceneField = luceneField;
else
switch (luceneField)
{
case "Title": _luceneField = "prod_title"; break;
case "Description": _luceneField = "prod_desc"; break;
case "Author": _luceneField = "prod_author"; break;
case "Publisher": _luceneField = "prod_publisher"; break;
default: _luceneField = "prod_title"; break;
}
QueryParser parser = new QueryParser(_luceneField, analizer);
Query query = parser.Parse(luceneQuery);
ScoreDoc[] hits;
searcher.Search(query,collector);
//Obtaining top records from search but without any sort.
hits = collector.TopDocs().scoreDocs;
foreach (ScoreDoc hit in hits)
{
Document doc = searcher.Doc(hit.doc);
string a = doc.Get("prod_id");
int id = 0;
if (hit.score > score)
{
if (int.TryParse(doc.Get("prod_id"), out id))
tmpId.Add(id);
}
}
//I also define stop words for full text searching and i think this is
//real cause of problem with sorting.
System.String[] stopWords = new System.String[]{"a","że","w","przy","o","bo","co","z","za","ze","ta","i","no","do"};
I used this link in stackoverflow. and this pretty one link to solve my problem but sorting fails and I don't know what is wrong with my code.
After few days finally i found a solution. The field which I want sort shoudn't be tokenized when it represent string value.
For example when I want to sort products by Title (ASCENDING/DESCENDING) you should put something like this:
doc.Add(new Field(Product.PROD_TITLE_SORT, row["prod_title"].ToString().Replace(" ", "_") + "_" + row[Product.PROD_ID].ToString(), Field.Store.NO, Field.Index.NOT_ANALYZED));
What I don't get is fact why this field is not storing and not analizyng and thus lucene.net can sort by this added field. This sort field even is not in index!! I checked with lukeall-1.0.1.jar index browser.
Secondly you need to create a proper sort method:
private static Sort SetSortForLucene(string _sort)
{
Sort sort;
_sort = !string.IsNullOrEmpty(_sort) ? _sort : "";
switch (_sort)
{
case "UnitPriceGorss":
sort = new Sort(new SortField(PROD_PRICE, SortField.DOUBLE, false));
break;
case "UnitPriceGorssDESC":
sort = new Sort(new SortField(PROD_PRICE, SortField.DOUBLE, true));
break;
case "Title":
//not it works perfectly.
sort = new Sort(new SortField(PROD_TITLE_SORT, SortField.STRING, true));
break;
case "TitleDESC":
//not it works perfectly.
sort = new Sort(new SortField(PROD_TITLE_SORT, SortField.STRING, false));
break;
case ""://Here is default sorting behavior. It get's result according to Lucene.NET search result score.
sort = new Sort(SortField.FIELD_SCORE);
break;
default:
sort = new Sort(SortField.FIELD_SCORE);
break;
}
return sort;
}
What makes me really suspicious is that sort works with SortField.DOUBLE when field is indexed in lucene fulltext index.
I Hope this post will help anyone who has similar problem with sorting.
The field does not need to be stored unless you are returning the data in your query. It is however still added to the index.
The reason you do not analyze a field you wish to sort on is that an analyzer breaks the field into seperate terms, which makes sorting very difficult as the index for the document will have multiple words in it, which obviously cannot be sorted against the whole index. This applies to all field types, wether they are a single term or not.
I believe you can store the field, but unless you want to return it in your query there is no need.
one important thing i suspect to know about sorting.
It doesn't work on tokenized (analyzed) data.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With