I have added the code mention above for IndexWriter
.
I have set
writer.SetRAMBufferSizeMB(32);
writer.MergeFactor = 1000;
writer.SetMaxFieldLength(Int32.MaxValue);
writer.UseCompoundFile = false;
all the property for avoiding OutOfMemoryException(OOMException)
.
Here in this code on line writer.AddDocument(document);
shows OOM exception.
Can you guide me why I am this error?
Can anyone help me out to solve this?
My machine's configuration:
System type : 64-bit operating system.
RAM : 4 GB (3.86 GB usable)
Processor : Intel i5 - 3230M CPU @ 2.60GHz
using System;
using System.Data.SqlClient;
using Lucene.Net.Documents;
using System.Data;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.Search;
using Lucene.Net.Store;
using Lucene.Net.QueryParsers;
namespace ConsoleApplication1
{
class Program
{
static String searchTerm = "";
static void Main(string[] args) {
/**
* This will create dataset according to
* connectingString and query
**/
Console.WriteLine("Connecting to Sql database server.");
String connectionString = "Data Source=proxy-pc;Initial Catalog=Snomed; User ID=SA;password=admin";
String query = "SELECT * FROM DESCRIPTION";
String INDEX_DIRECTORY = "c:\\DatabaseIndex";
Console.WriteLine("Creating dataset.");
DataSet dataSet = createDataset(connectionString, query);
Console.WriteLine("Created dataset successfully.");
Console.WriteLine("Creating document.");
Document document = createDocument(dataSet);
Console.WriteLine("Created document successfully.");
var version = Lucene.Net.Util.Version.LUCENE_30;
var length = Lucene.Net.Index.IndexWriter.MaxFieldLength.LIMITED;
Lucene.Net.Analysis.Standard.StandardAnalyzer analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(version);
Lucene.Net.Store.Directory directory = Lucene.Net.Store.FSDirectory.Open(new System.IO.DirectoryInfo(@INDEX_DIRECTORY));
Lucene.Net.Index.IndexWriter writer = new Lucene.Net.Index.IndexWriter(directory, analyzer, length);
writer.SetMergeScheduler(new Lucene.Net.Index.SerialMergeScheduler());
writer.SetRAMBufferSizeMB(32);
writer.MergeFactor = 1000;
writer.SetMaxFieldLength(Int32.MaxValue);
writer.UseCompoundFile = false;
Console.WriteLine("Before Adding document");
**writer.AddDocument(document); **
Console.WriteLine("Indexing...");
writer.Optimize();
writer.Dispose();
Console.WriteLine("Indexing finished");
if (searchTerm == "")
{
searchTerm = "(keyword)";
}
Console.WriteLine("Searching '" + searchTerm + "'...");
var occurance = searchKeyword(INDEX_DIRECTORY, version, searchTerm);
if (occurance != -1)
{
Console.WriteLine("Your search found : " + occurance);
}
else
{
Console.WriteLine("Invalid index directory.");
}
Console.Read();
}
/**
* Method works as a searcher
**/
private static int searchKeyword(String index_Directory_Path, Lucene.Net.Util.Version version, String searchWord) {
if (index_Directory_Path != null)
{
var standAnalyzer = new StandardAnalyzer(version);
IndexSearcher searcher = new IndexSearcher(FSDirectory.Open(index_Directory_Path));
// parse the query, "term" is the default field to search
var parser = new QueryParser(version, "term", standAnalyzer);
Query searchQuery = parser.Parse(searchWord);
// search
TopDocs hits = searcher.Search(searchQuery, 100);
var total = hits.TotalHits;
return total;
}
else
{
return -1;
}
}
static DataSet createDataset(String connectionString, String query) {
DataSet ds = new DataSet();
using (SqlConnection connection = new SqlConnection(connectionString))
using (SqlCommand command = new SqlCommand(query, connection))
using (SqlDataAdapter adapter = new SqlDataAdapter(command))
{
adapter.Fill(ds);
}
return ds;
}
static Lucene.Net.Documents.Document createDocument(DataSet dataSet) {
Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document();
using (dataSet)
{
foreach (DataTable table in dataSet.Tables)
{
foreach (DataRow row in table.Rows)
{
String id = row["id"].ToString();
String rTime = row["rTime"].ToString();
String active = row["active"].ToString();
String mId = row["mId"].ToString();
String cId = row["cId"].ToString();
String lCode = row["lCode"].ToString();
String tId = row["tId"].ToString();
String detail = row["detail"].ToString();
String sId = row["sId"].ToString();
Field idField = new Field("id", id, Field.Store.YES, Field.Index.ANALYZED);
Field rTimeField = new Field("rTime", rTime, Field.Store.YES, Field.Index.ANALYZED);
Field activeField = new Field("active", active, Field.Store.YES, Field.Index.ANALYZED);
Field mIdField = new Field("mId", mId, Field.Store.YES, Field.Index.ANALYZED);
Field cIdField = new Field("cId", cId, Field.Store.YES, Field.Index.ANALYZED);
Field lCodeField = new Field("lCode", lCode, Field.Store.YES, Field.Index.ANALYZED);
Field tIdField = new Field("tId", tId, Field.Store.YES, Field.Index.ANALYZED);
Field detailField = new Field("detail", detail, Field.Store.YES, Field.Index.ANALYZED);
Field sIdField = new Field("sId", sId, Field.Store.YES, Field.Index.ANALYZED);
doc.Add(idField);
doc.Add(rTimeField);
doc.Add(activeField);
doc.Add(mIdField);
doc.Add(cIdField);
doc.Add(lCodeField);
doc.Add(tIdField);
doc.Add(detailField);
doc.Add(sIdField);
}
}
}
return doc;
}
}
}
It looks like you're adding the entire database as a single document.
Have you tried adding each row as a separate document? You could perhaps change "createDocument" to "createDocuments" and yield a single Lucene.Net document per row. That'd leave most of your current code unchanged...
Hope this helps,
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With