The release notes for Lucene-Core 4.0 mention as a noteworthy change:
• A new "Block" PostingsFormat offering improved search performance and index compression. This will likely become the default format in a future release.
Per this blog post, the BlockPostingsFormat results in smaller indexes and is faster (for most queries) than the prior format.
But, I can't find a mention anywhere of how to opt for this format in 4.0. Where can the new BlockPostingsFormat be specified in preference to the old default?
Several steps:
According to the Javadoc, BlockPostingsFormat creates .doc and .pos files in the index directoy, while Lucene40PostingsFormat creates .frq and .prx files. So that is one way of telling if Lucene is really using the block posting format.
I modified the example in the Lucene core Javadoc to test the block posting format. Here is the code (and hope it helps):
org.apache.lucene.codecs.Codec
# See http://www.romseysoftware.co.uk/2012/07/04/writing-a-new-lucene-codec/
# This file should be in /somewhere_in_your_classpath/META-INF/services/org.apache.lucene.codecs.Codec
#
# List of codecs
lucene4examples.Lucene40WithBlockCodec
Lucene40WithBlockCodec.java
package lucene4examples;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.block.BlockPostingsFormat;
import org.apache.lucene.codecs.lucene40.Lucene40Codec;
// Lucene 4.0 codec with block posting format
public class Lucene40WithBlockCodec extends FilterCodec {
public Lucene40WithBlockCodec() {
super("Lucene40WithBlock", new Lucene40Codec());
}
@Override
public PostingsFormat postingsFormat() {
return new BlockPostingsFormat();
}
}
BlockPostingsFormatExample.java
package lucene4examples;
import java.io.File;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
// This example is based on the one that comes with Lucene 4.0.0 core API Javadoc
// (http://lucene.apache.org/core/4_0_0/core/overview-summary.html)
public class BlockPostingsFormatExample {
public static void main(String[] args) throws IOException, ParseException {
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);
// Store the index on disk:
Directory directory = FSDirectory.open(new File(
"/index_dir"));
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_40,
analyzer);
// If the following line of code is commented out, the original Lucene
// 4.0 codec will be used.
// Else, the Lucene 4.0 codec with block posting format
// (http://blog.mikemccandless.com/2012/08/lucenes-new-blockpostingsformat-thanks.html)
// will be used.
config.setCodec(new Lucene40WithBlockCodec());
IndexWriter iwriter = new IndexWriter(directory, config);
Document doc = new Document();
String text = "This is the text to be indexed.";
doc.add(new Field("fieldname", text, TextField.TYPE_STORED));
iwriter.addDocument(doc);
iwriter.close();
// Now search the index:
DirectoryReader ireader = DirectoryReader.open(directory);
IndexSearcher isearcher = new IndexSearcher(ireader);
// Parse a simple query that searches for "text":
QueryParser parser = new QueryParser(Version.LUCENE_40, "fieldname",
analyzer);
Query query = parser.parse("text");
ScoreDoc[] hits = isearcher.search(query, null, 1000).scoreDocs;
System.out.println("hits.length = " + hits.length);
// Iterate through the results:
for (int i = 0; i < hits.length; i++) {
Document hitDoc = isearcher.doc(hits[i].doc);
System.out.println("text: " + hitDoc.get("fieldname"));
}
ireader.close();
directory.close();
}
}
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With