Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LUCENE-7434, first draft #75

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
*/
public class SpanNearQuery extends SpanQuery implements Cloneable {

private final static int REQUIRE_ALL = -1;
/**
* A builder for SpanNearQueries
*/
Expand All @@ -48,17 +49,28 @@ public static class Builder {
private final String field;
private final List<SpanQuery> clauses = new LinkedList<>();
private int slop;
private final int minShouldMatch;

/**
* Construct a new builder
* @param field the field to search in
* @param ordered whether or not clauses must be in-order to match
*/
public Builder(String field, boolean ordered) {
this(field, ordered, REQUIRE_ALL);
}

/**
* Construct a new builder
* @param field the field to search in
* @param ordered whether or not clauses must be in-order to match
* @param minShouldMatch the minimum number of spans that need to match
*/
public Builder(String field, boolean ordered, int minShouldMatch) {
this.field = field;
this.ordered = ordered;
this.minShouldMatch = minShouldMatch;
}

/**
* Add a new clause
*/
Expand Down Expand Up @@ -91,7 +103,7 @@ public Builder setSlop(int slop) {
* Build the query
*/
public SpanNearQuery build() {
return new SpanNearQuery(clauses.toArray(new SpanQuery[clauses.size()]), slop, ordered);
return new SpanNearQuery(clauses.toArray(new SpanQuery[clauses.size()]), slop, ordered, minShouldMatch);
}

}
Expand All @@ -113,6 +125,7 @@ public static Builder newUnorderedNearQuery(String field) {
protected List<SpanQuery> clauses;
protected int slop;
protected boolean inOrder;
protected int minShouldMatch;

protected String field;

Expand All @@ -128,6 +141,24 @@ public static Builder newUnorderedNearQuery(String field) {
* @param inOrder true if order is important
*/
public SpanNearQuery(SpanQuery[] clausesIn, int slop, boolean inOrder) {
this(clausesIn, slop, inOrder, REQUIRE_ALL);
}

/** Construct a SpanNearQuery. Matches spans matching a span from each
* clause, with up to <code>slop</code> total unmatched positions between
* them.
* <br>When <code>inOrder</code> is true, the spans from each clause
* must be in the same order as in <code>clauses</code> and must be non-overlapping.
* <br>When <code>inOrder</code> is false, the spans from each clause
* need not be ordered and may overlap.
* <br>Must match at least <code>minShouldMatch</code> within the
* allowable <code>slop</code>.
* @param clausesIn the clauses to find near each other, in the same field, at least 2.
* @param slop The slop value
* @param inOrder true if order is important
* @param minShouldMatch minimum number that should match, at least 2 and <code>&lt;= clausesIn.length</code>
*/
public SpanNearQuery(SpanQuery[] clausesIn, int slop, boolean inOrder, int minShouldMatch) {
this.clauses = new ArrayList<>(clausesIn.length);
for (SpanQuery clause : clausesIn) {
if (this.field == null) { // check field
Expand All @@ -137,10 +168,21 @@ public SpanNearQuery(SpanQuery[] clausesIn, int slop, boolean inOrder) {
}
this.clauses.add(clause);
}

if (minShouldMatch > REQUIRE_ALL) {
if (minShouldMatch == 1) {
throw new IllegalArgumentException("Minimum must be > 1. Consider using a SpanOrQuery if you only require one match.");
} else if (minShouldMatch < 2) {
throw new IllegalArgumentException("Minimum must be > 1");
} else if (minShouldMatch > clauses.size()) {
throw new IllegalArgumentException("Minimum should be <= the number of clauses");
}
}

this.slop = slop;
this.inOrder = inOrder;
this.minShouldMatch = minShouldMatch;
}

/** Return the clauses whose spans are matched. */
public SpanQuery[] getClauses() {
return clauses.toArray(new SpanQuery[clauses.size()]);
Expand All @@ -152,6 +194,9 @@ public SpanQuery[] getClauses() {
/** Return true if matches are required to be in-order.*/
public boolean isInOrder() { return inOrder; }

/** Return minimum number of clauses that must match.*/
public int getMinShouldMatch() { return minShouldMatch; }

@Override
public String getField() { return field; }

Expand All @@ -171,6 +216,10 @@ public String toString(String field) {
buffer.append(slop);
buffer.append(", ");
buffer.append(inOrder);
if (minShouldMatch > REQUIRE_ALL) {
buffer.append(", ");
buffer.append(minShouldMatch);
}
buffer.append(")");
return buffer.toString();
}
Expand All @@ -181,16 +230,18 @@ public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, floa
for (SpanQuery q : clauses) {
subWeights.add(q.createWeight(searcher, false, boost));
}
return new SpanNearWeight(subWeights, searcher, needsScores ? getTermContexts(subWeights) : null, boost);
return new SpanNearWeight(subWeights, searcher, needsScores ? getTermContexts(subWeights) : null, boost, minShouldMatch);
}

public class SpanNearWeight extends SpanWeight {

final List<SpanWeight> subWeights;
final int minShouldMatch;

public SpanNearWeight(List<SpanWeight> subWeights, IndexSearcher searcher, Map<Term, TermContext> terms, float boost) throws IOException {
public SpanNearWeight(List<SpanWeight> subWeights, IndexSearcher searcher, Map<Term, TermContext> terms, float boost, int minShouldMatch) throws IOException {
super(SpanNearQuery.this, searcher, terms, boost);
this.subWeights = subWeights;
this.minShouldMatch = minShouldMatch;
}

@Override
Expand All @@ -213,11 +264,15 @@ public Spans getSpans(final LeafReaderContext context, Postings requiredPostings
Spans subSpan = w.getSpans(context, requiredPostings);
if (subSpan != null) {
subSpans.add(subSpan);
} else {
return null; // all required
} else if (minShouldMatch == REQUIRE_ALL) {
return null;
}
}

if (minShouldMatch > REQUIRE_ALL && subSpans.size() < minShouldMatch) {
return null;
}

// all NearSpans require at least two subSpans
return (!inOrder) ? new NearSpansUnordered(slop, subSpans)
: new NearSpansOrdered(slop, subSpans);
Expand Down Expand Up @@ -262,14 +317,20 @@ public boolean equals(Object other) {
private boolean equalsTo(SpanNearQuery other) {
return inOrder == other.inOrder &&
slop == other.slop &&
clauses.equals(other.clauses);
clauses.equals(other.clauses) &&
minShouldMatch == other.minShouldMatch;
}

@Override
public int hashCode() {
int result = classHash();
result = Integer.rotateLeft(result, 1);
result ^= minShouldMatch;
result = Integer.rotateLeft(result, 1);
result ^= clauses.hashCode();
result = Integer.rotateLeft(result, 1);
result += slop;
result = Integer.rotateLeft(result, 1);
int fac = 1 + (inOrder ? 8 : 4);
return fac * result;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,9 @@ public void setUp() throws Exception {
"w1 w3 xx w2 yy w3 zz",
"t1 t2 t2 t1",
"g x x g g x x x g g x x g",
"go to webpage"
"go to webpage",
"x x a x a x",
"x x a x b x c x x x x a x b"
};

protected SpanNearQuery makeQuery(String s1, String s2, String s3,
Expand Down Expand Up @@ -245,6 +247,68 @@ public void testOrderedSpanIterationSameTerms2() throws Exception {
assertFinished(spans);
}

public void testMinShouldMatch1() throws Exception {
//test repeated token
SpanNearQuery q = new SpanNearQuery(new SpanQuery[]{
new SpanTermQuery(new Term(FIELD, "a")), new SpanTermQuery(new Term(FIELD, "a"))
}, 1, true, 2);
Spans spans = q.createWeight(searcher, false, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS);
assertNext(spans,7,2,5);
assertFinished(spans);
}

public void testMinShouldMatch3() throws Exception {
//test that 2 work
SpanNearQuery q = new SpanNearQuery(new SpanQuery[]{
new SpanTermQuery(new Term(FIELD, "a")), new SpanTermQuery(new Term(FIELD, "b")),
new SpanTermQuery(new Term(FIELD, "d"))
}, 1, true, 2);
Spans spans = q.createWeight(searcher, false, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS);
assertNext(spans,8,2,5);
assertNext(spans,8,11,14);
assertFinished(spans);
}

public void testMinShouldMatch4() throws Exception {
//requires 3, only 2 in docs: no hits
SpanNearQuery q = new SpanNearQuery(new SpanQuery[]{
new SpanTermQuery(new Term(FIELD, "a")), new SpanTermQuery(new Term(FIELD, "b")),
new SpanTermQuery(new Term(FIELD, "d")), new SpanTermQuery(new Term(FIELD, "e"))
}, 1, true, 3);
Spans spans = q.createWeight(searcher, false, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS);
assertFinished(spans);
}

public void testMinShouldMatchEx1(){
try {
SpanNearQuery q = new SpanNearQuery(new SpanQuery[]{
new SpanTermQuery(new Term(FIELD, "t1")), new SpanTermQuery(new Term(FIELD, "t2"))
}, 0, true, 1);
fail("Can't have value < 2");
} catch (IllegalArgumentException e) {
}
}

public void testMinShouldMatchEx2(){
try {
SpanNearQuery q = new SpanNearQuery(new SpanQuery[]{
new SpanTermQuery(new Term(FIELD, "t1")), new SpanTermQuery(new Term(FIELD, "t2"))
}, 0, true, 0);
fail("Can't have value < 2");
} catch (IllegalArgumentException e) {
}
}

public void testMinShouldMatchEx3(){
try {
SpanNearQuery q = new SpanNearQuery(new SpanQuery[]{
new SpanTermQuery(new Term(FIELD, "t1")), new SpanTermQuery(new Term(FIELD, "t2"))
}, 0, true, 5);
fail("MinNumberShouldMatch can't be > length of SpanQuery[]");
} catch (IllegalArgumentException e) {
}
}

/**
* not a direct test of NearSpans, but a demonstration of how/when
* this causes problems
Expand Down