Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SOLR-10320: Perform secondary sort using both values in and outside S… #179

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
498 changes: 498 additions & 0 deletions 0001-SOLR-10320-Perform-secondary-sort-using-both-values-.patch

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions lucene/core/ivy.xml
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,8 @@
-->
<ivy-module version="2.0">
<info organisation="org.apache.lucene" module="core"/>
<dependencies>
<dependency org="com.google.guava" name="guava" rev="21.0"/>
</dependencies>

</ivy-module>
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ final class HitQueue extends PriorityQueue<ScoreDoc> {
* specifies whether to pre-populate the queue with sentinel values.
* @see #getSentinelObject()
*/
HitQueue(int size, boolean prePopulate) {
public HitQueue(int size, boolean prePopulate) {
super(size, prePopulate);
}

Expand Down
31 changes: 31 additions & 0 deletions solr/core/src/java/org/apache/solr/schema/IndexSchema.java
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,10 @@
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.TopDocsCollector;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.solr.search.sorting.TBGAwareCollector;
import org.apache.solr.uninverting.UninvertingReader;
import org.apache.lucene.util.Version;
import org.apache.solr.common.SolrException;
Expand Down Expand Up @@ -100,6 +103,7 @@ public class IndexSchema {
public static final String DEFAULT_SEARCH_FIELD = "defaultSearchField";
public static final String DESTINATION = "dest";
public static final String DYNAMIC_FIELD = "dynamicField";
public static final String DYNAMIC_VALUE_SORT = "dynamicValueSort";
public static final String DYNAMIC_FIELDS = DYNAMIC_FIELD + "s";
public static final String FIELD = "field";
public static final String FIELDS = FIELD + "s";
Expand Down Expand Up @@ -159,6 +163,9 @@ public class IndexSchema {
protected DynamicCopy[] dynamicCopyFields;
public DynamicCopy[] getDynamicCopyFields() { return dynamicCopyFields; }

protected Map<String, TBGAwareCollector> secondarySortCollectorMap;
public Map<String, TBGAwareCollector> getSecondarySortCollectorMap() { return secondarySortCollectorMap; }

/**
* keys are all fields copied to, count is num of copyField
* directives that target them.
Expand Down Expand Up @@ -493,6 +500,8 @@ protected void readSchema(InputSource is) {
// load the fields
Map<String,Boolean> explicitRequiredProp = loadFields(document, xpath);

secondarySortCollectorMap = createSecondarySortCollectorMap(document, xpath);

expression = stepsToPath(SCHEMA, SIMILARITY); // /schema/similarity
Node node = (Node) xpath.evaluate(expression, document, XPathConstants.NODE);
similarityFactory = readSimilarity(loader, node);
Expand Down Expand Up @@ -699,6 +708,28 @@ protected synchronized Map<String,Boolean> loadFields(Document document, XPath x

return explicitRequiredProp;
}

protected Map<String, TBGAwareCollector> createSecondarySortCollectorMap(Document document, XPath xpath) throws XPathExpressionException {
Map<String, TBGAwareCollector> collectorMap = new HashMap<>();
String expression = stepsToPath(SCHEMA, DYNAMIC_VALUE_SORT);
NodeList nodes = (NodeList) xpath.evaluate(expression, document, XPathConstants.NODESET);

for (int i=0; i<nodes.getLength(); i++) {

NamedNodeMap attrs = nodes.item(i).getAttributes();

final String name = DOMUtil.getAttr(attrs, NAME, "field definition");
final String collector = DOMUtil.getAttr(attrs, "collector", "field definition");
final Object obj = loader.newInstance(collector, Object.class);
if(!(obj instanceof TBGAwareCollector)) {
String msg = "Dynamic sorting value: " + name + "must have a TBGAwareCollector";
throw new SolrException(ErrorCode.BAD_REQUEST, msg);
} else {
collectorMap.put(name, (TBGAwareCollector)obj);
}
}
return collectorMap;
}

/**
* Sort the dynamic fields and stuff them in a normal array for faster access.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ public abstract class QParserPlugin implements NamedListInitializedPlugin, SolrI
map.put(IGainTermsQParserPlugin.NAME, IGainTermsQParserPlugin.class);
map.put(TextLogisticRegressionQParserPlugin.NAME, TextLogisticRegressionQParserPlugin.class);
map.put(SignificantTermsQParserPlugin.NAME, SignificantTermsQParserPlugin.class);
map.put(SecondarySortQParserPlugin.NAME, SecondarySortQParserPlugin.class);

standardPlugins = Collections.unmodifiableMap(map);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
package org.apache.solr.search;

import org.apache.lucene.search.*;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.component.MergeStrategy;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.search.sorting.SecondarySortCollector;
import org.apache.solr.search.sorting.TBGAwareCollector;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

public class SecondarySortQParserPlugin extends QParserPlugin {
public static final String NAME = "sesort";

@Override
public QParser createParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) {
return new SecondarySortQParser(qstr, localParams, params, req);
}

private class SecondarySortQParser extends QParser {

String qstr;
SolrParams localParams;
SolrParams params;
SolrQueryRequest req;

private SecondarySortQParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) {
super(qstr, localParams, params, req);
this.qstr = qstr;
this.localParams = localParams;
this.params = params;
this.req = req;
}

@Override
public Query parse() {

String sortStr = localParams.get(CommonParams.SORT);
String[] sortAlgs;
if(sortStr == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Need to provide sort algorithms to perform secondary sort.");
} else {
sortAlgs = sortStr.split(",");
if(sortAlgs.length == 0) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Need to provide valid sort algorithms to perform secondary sort.");
}
}

SolrCore core = req.getCore();
Map<String, TBGAwareCollector> secondaySortCollectorMap = core.getLatestSchema().getSecondarySortCollectorMap();
List<TBGAwareCollector> collectors = new ArrayList<>();

for(String sortAlg : sortAlgs) {
if(secondaySortCollectorMap.containsKey(sortAlg)) {
collectors.add(secondaySortCollectorMap.get(sortAlg));
}
}

return new RankQuery() {
Query mainQuery;

@Override
public TopDocsCollector getTopDocsCollector(int len, QueryCommand cmd, IndexSearcher searcher) throws IOException {
return SecondarySortCollector.create(len, collectors, null ,false, false, false);
}

@Override
public MergeStrategy getMergeStrategy() {
return null;
}

@Override
public RankQuery wrap(Query mainQuery) {
this.mainQuery = mainQuery;
return this;
}

@Override
public boolean equals(Object o) {
return false;
}

@Override
public int hashCode() {
return 0;
}
};
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
package org.apache.solr.search.sorting;


import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.*;
import org.apache.lucene.util.PriorityQueue;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.ListIterator;

import com.google.common.collect.RangeMap;
import com.google.common.collect.Range;
import com.google.common.collect.BoundType;
import com.google.common.collect.ImmutableRangeMap;

/**
* This collector will be used to chain together a dynamic list of collectors,
* to perform sorting and ranking on the result set. The collectors passed in are
* expected to implement the TieBreakerGroupAware interface, which requires the collector
* to return any
*/
public class SecondarySortCollector extends TopDocsCollector {

private boolean fillFields;
private boolean trackDocScores;
private boolean trackMaxScore;
private int numHits;
private FieldDoc after;
private TBGAwareCollector initCollector;
private ListIterator<TBGAwareCollector> collectorsIterator;
private LeafReaderContext singleLeafReaderContext;
private RangeMap<Integer, LeafReaderContext> contextRangeMap;

public SecondarySortCollector(PriorityQueue<?> priorityQueue,
List<TBGAwareCollector> collectors,
int numHits, FieldDoc after,
boolean fillFields,
boolean trackDocScores,
boolean trackMaxScore) {
super(priorityQueue);
this.collectorsIterator = collectors.listIterator();
this.fillFields = fillFields;
this.trackDocScores = trackMaxScore;
this.trackMaxScore = trackMaxScore;
this.numHits = numHits;
this.after = after;
this.initCollector = this.collectorsIterator.next();
}

public static TopDocsCollector<?> create(int numHits, List<TBGAwareCollector> collectors, FieldDoc after, boolean fillFields, boolean trackDocScores, boolean trackMaxScore) {
return new SecondarySortCollector(null, collectors, numHits, after, fillFields, trackDocScores, trackMaxScore);
}


public TopDocs topDocs(int start, int howMany) {
TopDocs topDocs = this.initCollector.topDocs(start, howMany);
return rankDocs(topDocs, start, howMany, 1, this.initCollector);
}

protected TopDocs rankDocs(TopDocs topDocs, int start, int pageSize, int factoryCount, TBGAwareCollector currentCollector) {

List<TieBreakerGroup> tbGroups = currentCollector.getTieBreakerGroups();

if (null == tbGroups || tbGroups.isEmpty()) {
return topDocs;
}

for (TieBreakerGroup tbGroup : tbGroups) {
ScoreDoc[] scoreDocs = rankScoreDocs(tbGroup.getDocs(), start, pageSize, factoryCount, currentCollector);
// set the correctly sorted docs back on the tieBreakerGroup object
tbGroup.setDocs(Arrays.asList(scoreDocs));
}

// After the groups have been sorted, add them back to the original
// TopDocs in sorted order
mergeRankedTieBreakerGroups(topDocs, tbGroups);
return topDocs;
}

public ScoreDoc[] rankScoreDocs(List<ScoreDoc> scoreDocs, int start, int pageSize, int factoryCount, TBGAwareCollector currentCollector) {
// if there are tie breaker groups, set currentCollector to the next
// collector that can be created from the list of collectors
// and used that collector to break the ties
currentCollector = getNextCollector();

if (currentCollector == null) {
return scoreDocs.toArray(new ScoreDoc[]{});
}

for (ScoreDoc scoreDoc : scoreDocs) {
// use currCollector to collect the docs in the tieBreakerGroup
int doc = scoreDoc.doc;
try {
LeafCollector currLeafCollector = currentCollector.getLeafCollector((this.singleLeafReaderContext != null) ? this.singleLeafReaderContext : this.contextRangeMap.get(doc));
currLeafCollector.collect(doc);
} catch (IOException e) {
}
}

TopDocs tbTopDocs = currentCollector.topDocs(start, pageSize);
TopDocs sortedDocs = rankDocs(tbTopDocs, start, pageSize, factoryCount + 1, currentCollector);
return sortedDocs.scoreDocs;
}

/**
* This method loops through each tieBreakerGroup and adds the hits in the
* group back to the scoreDocs in the initial {@link TopDocs} in the
* correctly sorted order. Sometimes the tieBreakerGroup may be larger than
* the size of the documents that need to be replaced in the scoreDocs. This
* will occur if a collector produced a TieBreakerGroup that falls at the
* top or at the bottom of that page, and multiple documents outside the
* paging had the same score. In this case a secondary sort was needed to
* determine what actually falls within the page and the postion or start
* valued in {@link TieBreakerGroup} is used to determine which documents
* should be added back to the original topDocs.
*/
protected void mergeRankedTieBreakerGroups(TopDocs topDocs, List<TieBreakerGroup> tbGroups) {
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
int start = 0;
int end = scoreDocs.length - 1;

for (TieBreakerGroup group : tbGroups) {
int tbStart = group.getStart();
int tbEnd = group.getDocs().size();
int grpCounter = 0;

// Make sure if start position is negative skip negative indexes and
// increment the counter until you reach zero,
// and, if the tieBreakerGroup size is larger the the size of the
// scoreDocs, only add the values that will fit in the scoreDocs
// appropriately
while ((tbStart <= end) && (grpCounter < tbEnd)) {
if ((tbStart >= start) && (tbStart <= end)) {
scoreDocs[tbStart] = group.getDocs().get(grpCounter);
}
grpCounter++;
tbStart++;
}
}
}

@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
if (this.singleLeafReaderContext != null && this.contextRangeMap == null) {
if (context.reader().leaves().size() == 1 && this.singleLeafReaderContext == null) {
this.singleLeafReaderContext = context;
} else if (context.reader().leaves().size() > 1) {
ImmutableRangeMap.Builder<Integer, LeafReaderContext> builder = ImmutableRangeMap.builder();
for (LeafReaderContext ctx : context.reader().leaves()) {
int lowerBound = ctx.docBase;
int upperBound = ctx.docBase + ctx.reader().maxDoc();
Range<Integer> range = Range.range(lowerBound, BoundType.CLOSED, upperBound, BoundType.OPEN);
builder.put(range, ctx);
}
this.contextRangeMap = builder.build();
}
}
return initCollector.getLeafCollector(context);
}

@Override
public boolean needsScores() {
return false;
}

private TBGAwareCollector getNextCollector() {
if (!collectorsIterator.hasNext()) {
return null;
}

TBGAwareCollector nextCollector = collectorsIterator.next();
return nextCollector.create(this.numHits, this.fillFields, this.trackDocScores, this.trackMaxScore);
}
}

Loading