Using Apache Lucene to search

I've been trying to implement Lucene to make the searching on my website faster.

My code currently works, however, I think I am not correctly making use of Lucene. Right now, my search query is productName:asterisk(input)asterisk - I can't imagine this is what you're supposed to do to find all products where productName contains input. I think it has something to do with the way I save the fields to a document.

My code:


using System;
using System.Collections;
using System.Collections.Generic;
using System.Data.Entity.Migrations.Model;
using System.Linq;
using System.Threading.Tasks;
using Lucene.Net;
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.QueryParsers;
using Lucene.Net.Search;
using Lucene.Net.Store;
using Rentpro.Models;
using RentPro.Models.Tables;
using RentProModels.Models;

namespace RentPro.Helpers
    public class LuceneHelper
        private const Lucene.Net.Util.Version Version = Lucene.Net.Util.Version.LUCENE_30;
        private bool IndicesInitialized;
        private List<Language> Languages = new List<Language>();

        public void BuildIndices(DB db)
            Languages = GetLanguages(db);
            Analyzer analyzer = new StandardAnalyzer(Version);
            List<Product> allProducts = db.GetAllProducts(true, false);
            foreach (Language l in Languages)
                BuildIndicesForLanguage(allProducts, analyzer, l.ID);
            IndicesInitialized = true;

        private void BuildIndicesForLanguage(List<Product> products, Analyzer analyzer, int id = 0)
            using (
                IndexWriter indexWriter = new IndexWriter(GetDirectory(id), analyzer,
                var x = products.Count;
                foreach (Product p in products)
                    SearchProduct product = SearchProduct.FromProduct(p, id);
                    Document document = new Document();
                    Field productIdField = new Field("productId", product.ID.ToString(), Field.Store.YES, Field.Index.NO);
                    Field productTitleField = new Field("productName", product.Name, Field.Store.YES, Field.Index.ANALYZED);
                    Field productDescriptionField = new Field("productDescription", product.Description, Field.Store.YES, Field.Index.ANALYZED);
                    Field productCategoryField = new Field("productCategory", product.Category, Field.Store.YES, Field.Index.ANALYZED);
                    Field productCategorySynonymField = new Field("productCategorySynonym", product.CategorySynonym, Field.Store.YES, Field.Index.ANALYZED);
                    Field productImageUrlField = new Field("productImageUrl", product.ImageUrl, Field.Store.YES, Field.Index.NO);
                    Field productTypeField = new Field("productType", product.Type, Field.Store.YES, Field.Index.NO);
                    Field productDescriptionShortField = new Field("productDescriptionShort", product.DescriptionShort, Field.Store.YES, Field.Index.NO);
                    Field productPriceField = new Field("productPrice", product.Price, Field.Store.YES, Field.Index.NO);


        public List<SearchProduct> Search(string input)
            if (!IndicesInitialized)
                BuildIndices(new DB());
                return Search(input);

            IndexReader reader = IndexReader.Open(GetCurrentDirectory(), true);
            Searcher searcher = new IndexSearcher(reader);
            Analyzer analyzer = new StandardAnalyzer(Version);
            TopScoreDocCollector collector = TopScoreDocCollector.Create(100, true);
            MultiFieldQueryParser parser = new MultiFieldQueryParser(Version,
                new[] { "productDescription", "productCategory", "productCategorySynonym", "productName" }, analyzer)
                AllowLeadingWildcard = true

            searcher.Search(parser.Parse("*" + input + "*"), collector);

            ScoreDoc[] hits = collector.TopDocs().ScoreDocs;

            List<int> productIds = new List<int>();
            List<SearchProduct> results = new List<SearchProduct>();

            foreach (ScoreDoc scoreDoc in hits)
                Document document = searcher.Doc(scoreDoc.Doc);
                int productId = int.Parse(document.Get("productId"));
                if (!productIds.Contains(productId))
                    SearchProduct result = new SearchProduct
                        ID = productId,
                        Description = document.Get("productDescription"),
                        Name = document.Get("productName"),
                        Category = document.Get("productCategory"),
                        CategorySynonym = document.Get("productCategorySynonym"),
                        ImageUrl = document.Get("productImageUrl"),
                        Type = document.Get("productType"),
                        DescriptionShort = document.Get("productDescriptionShort"),
                        Price = document.Get("productPrice")
            return results;

        private string GetDirectoryPath(int languageId = 1)
            return GetDirectoryPath(Languages.SingleOrDefault(x => x.ID == languageId).UriPart);

        private string GetDirectoryPath(string languageUri)
            return AppDomain.CurrentDomain.BaseDirectory + @"\App_Data\LuceneIndices\" + languageUri;

        private List<Language> GetLanguages(DB db)
            return db.Languages.ToList();

        private int GetCurrentLanguageId()
            return Translator.GetCurrentLanguageID();

        private FSDirectory GetCurrentDirectory()
            return FSDirectory.Open(GetDirectoryPath(GetCurrentLanguageId()));

        private FSDirectory GetDirectory(int languageId)
            return FSDirectory.Open(GetDirectoryPath(languageId));

    public class SearchProduct
        public int ID { get; set; }
        public string Description { get; set; }
        public string Name { get; set; }
        public string ImageUrl { get; set; }
        public string Type { get; set; }
        public string DescriptionShort { get; set; }
        public string Price { get; set; }
        public string Category { get; set; }
        public string CategorySynonym { get; set; }

        public static SearchProduct FromProduct(Product p, int languageId)
            return new SearchProduct()
                ID = p.ID,
                Description = p.GetText(languageId, ProductLanguageType.Description),
                Name = p.GetText(languageId),
                ImageUrl =
                    p.Images.Count > 0
                        ? "/Company/" + Settings.Get("FolderName") + "/Pictures/Products/100x100/" +
                          p.Images.Single(x => x.Type == "Main").Url
                        : "",
                Type = p is HuurProduct ? "HuurProduct" : "KoopProduct",
                DescriptionShort = p.GetText(languageId, ProductLanguageType.DescriptionShort),
                Price = p is HuurProduct ? ((HuurProduct)p).CalculatedPrice(1, !Settings.GetBool("BTWExLeading")).ToString("0.00") : "",
                Category = p.Category.Name,
                CategorySynonym = p.Category.Synonym



How I call the LuceneHelper:

        public ActionResult Lucene(string SearchString, string SearchOrderBy, int? page, int? amount)
            List<SearchProduct> searchResults = new List<SearchProduct>();
            if (!SearchString.IsNullOrWhiteSpace())
                LuceneHelper lucene = new LuceneHelper();
                searchResults = lucene.Search(SearchString);
            return View(new LuceneSearchResultsVM(db, SearchString, searchResults, SearchOrderBy, page ?? 1, amount ?? 10));


using System;
using System.Collections.Generic;
using System.Linq;
using System.Linq.Dynamic;
using System.Web;
using RentPro.Models.Tables;
using System.Linq.Expressions;
using System.Reflection;
using Microsoft.Ajax.Utilities;
using Rentpro.Models;
using RentPro.Helpers;
using RentProModels.Models;

namespace RentPro.ViewModels
    public class LuceneSearchResultsVM
        public List<SearchProduct> SearchProducts { get; set; }
        public bool BTWActive { get; set; }
        public bool BTWEXInput { get; set; }
        public bool BTWShow { get; set; }
        public bool BTWExLeading { get; set; }
        public string FolderName { get; set; }
        public string CurrentSearchString { get; set; }
        public string SearchOrderBy { get; set; }
        public int Page;
        public int Amount;
        public String SearchQueryString {
                return Translator.Translate("Zoekresultaten voor") + ": " + CurrentSearchString + " (" +
                       SearchProducts.Count + " " + Translator.Translate("resultaten") + " - " +
                       Translator.Translate("pagina") + " " + Page + " " + Translator.Translate("van") + " " +
                       CalculateAmountOfPages() + ")";
            set { }

        public LuceneSearchResultsVM(DB db, string queryString, List<SearchProduct> results, string searchOrderBy, int page, int amt)
            BTWActive = Settings.GetBool("BTWActive");
            BTWEXInput = Settings.GetBool("BTWEXInput");
            BTWShow = Settings.GetBool("BTWShow");
            BTWExLeading = Settings.GetBool("BTWExLeading");
            FolderName = Settings.Get("FolderName");
            SearchProducts = results;
            CurrentSearchString = queryString;
            if (searchOrderBy.IsNullOrWhiteSpace())
                searchOrderBy = "Name";
            SearchOrderBy = searchOrderBy;
            Amount = amt == 0 ? 10 : amt;
            int maxPages = CalculateAmountOfPages();
            Page = page > maxPages ? maxPages : page;
            SearchLog.MakeEntry(queryString, SearchProducts.Count(), db, HttpContext.Current.Request.UserHostAddress);

        public List<SearchProduct> GetOrderedList()
            List<SearchProduct> copySearchProductList = new List<SearchProduct>(SearchProducts);
            copySearchProductList = copySearchProductList.Skip((Page - 1) * Amount).Take(Amount).ToList();
            switch (SearchOrderBy)
                case "Price":
                    copySearchProductList.Sort(new PriceSorter());
                case "DateCreated":
                    return copySearchProductList; //TODO
                    return copySearchProductList.OrderBy(n => n.Name).ToList();
            return copySearchProductList;

        public int CalculateAmountOfPages()
            int items = SearchProducts.Count;
            return items / Amount + (items % Amount > 0 ? 1 : 0);


    public class PriceSorter : IComparer<SearchProduct>
        public int Compare(SearchProduct x, SearchProduct y)
            if (x == null || x.Price == "") return 1;
            if (y == null || y.Price == "") return -1;
            decimal priceX = decimal.Parse(x.Price);
            decimal priceY = decimal.Parse(y.Price);
            return priceX > priceY ? 1 : priceX == priceY ? 0 : -1;


Any help would be greatly appreciated.

Example input list of products: ProductList

Query: SELECT Product.ID, Product.Decription, Product.Name FROM Product

Desired results: DesiredResults

SQL Server query equivalent: SELECT Product.ID, Product.Decription, Product.Name FROM Product WHERE Product.Name LIKE '%Zelf%' OR Product.Decription LIKE '%Zelf%'

Basically, Zelf is the input. I want to find all matches with product descriptions or product names that contain the input string.

1 Answers

ucene not allows to use ? or * as starting symbols of the searching term. To overcome this issue you need to store in your index a sub-strings from any position of word to it end position. E.g. for word test you should put to index


I recommend to use separate field for that. Java example for case if you have a short field with one word like a product name.

for(int i = 0; i <  product.SafeName.length()-1; i++){
   Field productTitleSearchField = new Field("productNameSearch", product.SafeName.substring(i, product.SafeName.length()), Field.Store.NO, Field.Index.ANALYZED);

After this you can use following query string productNameSearch:(input)asterisk or use a PrefixQuery for searching product names containing input.

In case if you have several words in you field and for you will be enough to have some reasonable length of your input, then it will better to add for this field a NGramTokenFilter. You if have limit on your input string from n to m you should create a NGram Token Filter with n minGram and m maxGramm. If you has word test and you limit 2 to 3 you will have in your index words


After this you can search via string


