Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

How to create own analyzer using Whitespaceanalyzer and LowerCase filter in Lucene.Net?

In my case i need to search Keywords like C#, .Net,C++..etc where standard analyzer strips out special characters so i used whitespace analyzer it doesn't work for me. while Indexing:

public void Indexing(DataSet ds)
{
        string indexFileLocation = @"D:\Lucene.Net\Data";
        Lucene.Net.Store.Directory dir = Lucene.Net.Store.FSDirectory.GetDirectory(indexFileLocation, true);
        IndexWriter indexWriter = new IndexWriter(dir, new WhitespaceAnalyzer(), Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED);
        if (ds.Tables[0] != null)
        {
            DataTable dt = ds.Tables[0];
            if (dt.Rows.Count > 0)
            {
                foreach (DataRow dr in dt.Rows)
               {
                    //Create the Document object
                    Document doc = new Document();

                    foreach (DataColumn dc in dt.Columns)
                    {
                        string check = dc.ToString();

                        if (check.Equals("Skill_Summary"))
                        {
                            doc.Add(new Field(dc.ColumnName, dr[dc.ColumnName].ToString(), Field.Store.YES, Field.Index.ANALYZED));
                        }
                        if (check.Equals("Title"))
                        {
                            doc.Add(new Field(dc.ColumnName, dr[dc.ColumnName].ToString(), Field.Store.YES, Field.Index.ANALYZED));
                        }
                    }
                    // Write the Document to the catalog
                    indexWriter.AddDocument(doc);
                }
            }
        }
        // Close the writer
        indexWriter.Close();
    }

and Searching the Field like:

string[] searchfields = new string[] { "Skill_Summary", "Title" };
var parser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_29, searchfields, new WhitespaceAnalyzer());
string searchText = "C#";

//Split the search string into separate search terms by word
string[] terms = searchText.Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries);
foreach (string term in terms)
{
    finalQuery.Add(parser.Parse(term.Replace("*", "") + "*"), BooleanClause.Occur.MUST);
}
hits = searcher.Search(finalQuery);

how to build own analyzer using Whitespaceanalyzer and LowerCase filter in my case?.

like image 264
Dinesh_Dini Avatar asked Feb 03 '26 01:02

Dinesh_Dini


1 Answers

how to build own analyzer using Whitespaceanalyzer and LowerCase filter in my case?.

public class CaseInsensitiveWhitespaceAnalyzer : Analyzer
{
    /// <summary>
    /// </summary>
    public override TokenStream TokenStream(string fieldName, TextReader reader)
    {
        TokenStream t = null;
        t = new WhitespaceTokenizer(reader);
        t = new LowerCaseFilter(t);

        return t;
    }
}

PS: When you use wildcards(?,*), the query parser does not use any analyzer, just the lowercased form of your term (depending on the value of QueryParser.LowercaseExpandedTerms)

like image 95
I4V Avatar answered Feb 05 '26 14:02

I4V