We are currently on a very old version of Lucene V 4.X
and are now migrating to Solr V 7.4.0
cloud. We had a custom Similarity Class that we use to influence the score using an indexed field ("RANK") we have in the documents.
Here is how the classes looks like -
CustomSimilarity.java
public class CustomSimilarity extends Similarity {
private final Similarity sim;
private final double coefficiency;
private String popularityRank;
static InfoStream infoStream;
public CustomSimilarity() {
this.sim = new CustomPayloadSimilarity();
this.coefficiency = 0.1;
this.popularityRank = "RANK";
infoStream = new LoggingInfoStream();
}
@Override
public long computeNorm(FieldInvertState state) {
return sim.computeNorm(state);
}
@Override
public SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
final Explanation idf = termStats.length == 1 ? ((PclnPayloadSimilarity) sim).idfExplain(collectionStats, termStats[0]) : ((PclnPayloadSimilarity) sim)
.idfExplain(collectionStats, termStats);
float[] normTable = new float[256];
for (int i = 1; i < 256; ++i) {
int length = SmallFloat.byte4ToInt((byte) i);
float norm = ((PclnPayloadSimilarity) sim).lengthNorm(length);
normTable[i] = norm;
}
normTable[0] = 1f / normTable[255];
return new IDFStats(collectionStats.field(), queryBoost, idf, normTable);
}
public float sloppyFreq(int distance) {
return 1.0f / (distance + 1);
}
public float scorePayload(int doc, int start, int end, BytesRef payload) {
return 1;
}
@Override
public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException {
final IDFStats idfstats = (IDFStats) weight;
final NumericDocValues rank1Value = context.reader().getNumericDocValues(popularityRank);
infoStream.message("PCLNSimilarity", "NumericDocValues-1 >> rank1Value = " + rank1Value);
System.out.println("NumericDocValues-1 >> rank1Value = " + rank1Value);
return new SimScorer() {
@Override
public Explanation explain(int doc, Explanation freq) throws IOException {
return super.explain(doc, freq);
}
@Override
public float score(int doc, float freq) throws IOException {
// float weightValue = idfstats.queryWeight;
// // logger.trace("weight " + weightValue + "freq " + freq);
//
// float score = 0.0f;
// if (rank1Value != null) {
// score = (float) rank1Value.longValue() + score;
// }
//
// if (coefficiency > 0) {
// score = score + (float) coefficiency * weightValue;
// }
// return score;
return (float) rank1Value.longValue();
}
@Override
public float computeSlopFactor(int distance) {
return sloppyFreq(distance);
}
@Override
public float computePayloadFactor(int doc, int start, int end, BytesRef payload) {
return scorePayload(doc, start, end, payload);
}
};
}
static class IDFStats extends SimWeight {
private final String field;
/** The idf and its explanation */
private final Explanation idf;
private final float boost;
private final float queryWeight;
final float[] normTable;
public IDFStats(String field, float boost, Explanation idf, float[] normTable) {
// TODO: Validate?
this.field = field;
this.idf = idf;
this.boost = boost;
this.queryWeight = boost * idf.getValue();
this.normTable = normTable;
}
}
}
CustomPayloadSimilarity.java
public class CustomPayloadSimilarity extends ClassicSimilarity {
@Override
public float tf(float freq) {
return 1;
}
@Override
public float scorePayload(int doc, int start, int end, BytesRef payload) {
if (payload != null) {
return PayloadHelper.decodeFloat(payload.bytes, payload.offset);
} else {
return 1.0F;
}
}
@Override
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) {
final long df = termStats.docFreq();
final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount();
final float idf = idf(df, docCount);
return Explanation.match(idf, "idf(docFreq=" + df + ", docCount=" + docCount + ")");
}
}
As you can notice, since we want to retain the parity (sort of) between older and newer TFIDF implementation, we are still using older algorithm and haven't switch to BM25Similarity.
With the above code, I am unable to retrieve the value of RANK field from the document. So essentially, the following line is returning some value which I am unable to log to the solr.log file -
final NumericDocValues rank1Value = context.reader().getNumericDocValues(popularityRank);
but return (float) rank1Value.longValue()
throws the following exception -
"java.lang.IndexOutOfBoundsException
at java.nio.Buffer.checkIndex(Buffer.java:546)
at java.nio.DirectByteBuffer.getInt(DirectByteBuffer.java:685)
at org.apache.lucene.store.ByteBufferGuard.getInt(ByteBufferGuard.java:128)
at org.apache.lucene.store.ByteBufferIndexInput$SingleBufferImpl.readInt(ByteBufferIndexInput.java:415)
at org.apache.lucene.util.packed.DirectReader$DirectPackedReader28.get(DirectReader.java:248)
at org.apache.lucene.codecs.lucene70.Lucene70DocValuesProducer$4.longValue(Lucene70DocValuesProducer.java:490)
at com.priceline.rc.solr.similarity.CustomSimilarity$1.score(CustomSimilarity.java:117)
at org.apache.lucene.search.TermScorer.score(TermScorer.java:65)
at org.apache.lucene.search.TopScoreDocCollector$SimpleTopScoreDocCollector$1.collect(TopScoreDocCollector.java:64)
at org.apache.lucene.search.Weight$DefaultBulkScorer.scoreAll(Weight.java:263)
at org.apache.lucene.search.Weight$DefaultBulkScorer.score(Weight.java:214)
at org.apache.lucene.search.BulkScorer.score(BulkScorer.java:39)
at org.apache.lucene.search.IndexSearcher.search(IndexSearcher.java:662)
at org.apache.lucene.search.IndexSearcher.search(IndexSearcher.java:463)
at org.apache.solr.search.SolrIndexSearcher.buildAndRunCollectorChain(SolrIndexSearcher.java:217)
at org.apache.solr.search.SolrIndexSearcher.getDocListNC(SolrIndexSearcher.java:1622)
at org.apache.solr.search.SolrIndexSearcher.getDocListC(SolrIndexSearcher.java:1439)
at org.apache.solr.search.SolrIndexSearcher.search(SolrIndexSearcher.java:586)
at org.apache.solr.handler.component.QueryComponent.doProcessUngroupedSearch(QueryComponent.java:1435)
at org.apache.solr.handler.component.QueryComponent.process(QueryComponent.java:375)
at org.apache.solr.handler.component.SearchHandler.handleRequestBody(SearchHandler.java:298)
at org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:199)
at org.apache.solr.core.SolrCore.execute(SolrCore.java:2539)
at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:709)
at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:515)
at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:377)
at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:323)
at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1634)
at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:533)
at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:146)
at org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548)
at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
at org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:257)
at org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:1595)
at org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:255)
at org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1253)
at org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:203)
at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:473)
at org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:1564)
at org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:201)
at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1155)
at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:144)
at org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:219)
at org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:126)
at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
at org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335)
at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
at org.eclipse.jetty.server.Server.handle(Server.java:531)
at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:352)
at org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:260)
at org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:281)
at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:102)
at org.eclipse.jetty.io.ChannelEndPoint$2.run(ChannelEndPoint.java:118)
at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.runTask(EatWhatYouKill.java:333)
at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:310)
at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.tryProduce(EatWhatYouKill.java:168)
at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.run(EatWhatYouKill.java:126)
at org.eclipse.jetty.util.thread.ReservedThreadExecutor$ReservedThread.run(ReservedThreadExecutor.java:366)
at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:760)
at org.eclipse.jetty.util.thread.QueuedThreadPool$2.run(QueuedThreadPool.java:678)
at java.lang.Thread.run(Thread.java:745)\n"
Any advice?
You are trying to get a value from NumericDocValues without setting the current document with advanceExact()
. Remember that there's a single NumericDocValues for that accounts for every document, you still need to tell it which document you are referring to before requesting a value. In your score function try adding advanceExact(doc)
before calling rank1Value.longValue()
.
It should be like this:
if(advanceExact(doc))
return (float) rank1Value.longValue();
else
return 0; // or whatever value you want as default
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With