From c37f1e0d66f1f28a5c83033d9496cc33c55f265e Mon Sep 17 00:00:00 2001 From: tballison Date: Thu, 1 Sep 2016 15:33:55 -0400 Subject: [PATCH] LUCENE-7434, first draft --- .../lucene/search/spans/SpanNearQuery.java | 77 +++++++++++++++++-- .../search/spans/TestNearSpansOrdered.java | 66 +++++++++++++++- 2 files changed, 134 insertions(+), 9 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java index 7958f4758b0b..ea772c4a5a1b 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java @@ -40,6 +40,7 @@ */ public class SpanNearQuery extends SpanQuery implements Cloneable { + private final static int REQUIRE_ALL = -1; /** * A builder for SpanNearQueries */ @@ -48,6 +49,7 @@ public static class Builder { private final String field; private final List clauses = new LinkedList<>(); private int slop; + private final int minShouldMatch; /** * Construct a new builder @@ -55,10 +57,20 @@ public static class Builder { * @param ordered whether or not clauses must be in-order to match */ public Builder(String field, boolean ordered) { + this(field, ordered, REQUIRE_ALL); + } + + /** + * Construct a new builder + * @param field the field to search in + * @param ordered whether or not clauses must be in-order to match + * @param minShouldMatch the minimum number of spans that need to match + */ + public Builder(String field, boolean ordered, int minShouldMatch) { this.field = field; this.ordered = ordered; + this.minShouldMatch = minShouldMatch; } - /** * Add a new clause */ @@ -91,7 +103,7 @@ public Builder setSlop(int slop) { * Build the query */ public SpanNearQuery build() { - return new SpanNearQuery(clauses.toArray(new SpanQuery[clauses.size()]), slop, ordered); + return new SpanNearQuery(clauses.toArray(new SpanQuery[clauses.size()]), slop, ordered, minShouldMatch); } } @@ -113,6 +125,7 @@ public static Builder newUnorderedNearQuery(String field) { protected List clauses; protected int slop; protected boolean inOrder; + protected int minShouldMatch; protected String field; @@ -128,6 +141,24 @@ public static Builder newUnorderedNearQuery(String field) { * @param inOrder true if order is important */ public SpanNearQuery(SpanQuery[] clausesIn, int slop, boolean inOrder) { + this(clausesIn, slop, inOrder, REQUIRE_ALL); + } + + /** Construct a SpanNearQuery. Matches spans matching a span from each + * clause, with up to slop total unmatched positions between + * them. + *
When inOrder is true, the spans from each clause + * must be in the same order as in clauses and must be non-overlapping. + *
When inOrder is false, the spans from each clause + * need not be ordered and may overlap. + *
Must match at least minShouldMatch within the + * allowable slop. + * @param clausesIn the clauses to find near each other, in the same field, at least 2. + * @param slop The slop value + * @param inOrder true if order is important + * @param minShouldMatch minimum number that should match, at least 2 and <= clausesIn.length + */ + public SpanNearQuery(SpanQuery[] clausesIn, int slop, boolean inOrder, int minShouldMatch) { this.clauses = new ArrayList<>(clausesIn.length); for (SpanQuery clause : clausesIn) { if (this.field == null) { // check field @@ -137,10 +168,21 @@ public SpanNearQuery(SpanQuery[] clausesIn, int slop, boolean inOrder) { } this.clauses.add(clause); } + + if (minShouldMatch > REQUIRE_ALL) { + if (minShouldMatch == 1) { + throw new IllegalArgumentException("Minimum must be > 1. Consider using a SpanOrQuery if you only require one match."); + } else if (minShouldMatch < 2) { + throw new IllegalArgumentException("Minimum must be > 1"); + } else if (minShouldMatch > clauses.size()) { + throw new IllegalArgumentException("Minimum should be <= the number of clauses"); + } + } + this.slop = slop; this.inOrder = inOrder; + this.minShouldMatch = minShouldMatch; } - /** Return the clauses whose spans are matched. */ public SpanQuery[] getClauses() { return clauses.toArray(new SpanQuery[clauses.size()]); @@ -152,6 +194,9 @@ public SpanQuery[] getClauses() { /** Return true if matches are required to be in-order.*/ public boolean isInOrder() { return inOrder; } + /** Return minimum number of clauses that must match.*/ + public int getMinShouldMatch() { return minShouldMatch; } + @Override public String getField() { return field; } @@ -171,6 +216,10 @@ public String toString(String field) { buffer.append(slop); buffer.append(", "); buffer.append(inOrder); + if (minShouldMatch > REQUIRE_ALL) { + buffer.append(", "); + buffer.append(minShouldMatch); + } buffer.append(")"); return buffer.toString(); } @@ -181,16 +230,18 @@ public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, floa for (SpanQuery q : clauses) { subWeights.add(q.createWeight(searcher, false, boost)); } - return new SpanNearWeight(subWeights, searcher, needsScores ? getTermContexts(subWeights) : null, boost); + return new SpanNearWeight(subWeights, searcher, needsScores ? getTermContexts(subWeights) : null, boost, minShouldMatch); } public class SpanNearWeight extends SpanWeight { final List subWeights; + final int minShouldMatch; - public SpanNearWeight(List subWeights, IndexSearcher searcher, Map terms, float boost) throws IOException { + public SpanNearWeight(List subWeights, IndexSearcher searcher, Map terms, float boost, int minShouldMatch) throws IOException { super(SpanNearQuery.this, searcher, terms, boost); this.subWeights = subWeights; + this.minShouldMatch = minShouldMatch; } @Override @@ -213,11 +264,15 @@ public Spans getSpans(final LeafReaderContext context, Postings requiredPostings Spans subSpan = w.getSpans(context, requiredPostings); if (subSpan != null) { subSpans.add(subSpan); - } else { - return null; // all required + } else if (minShouldMatch == REQUIRE_ALL) { + return null; } } + if (minShouldMatch > REQUIRE_ALL && subSpans.size() < minShouldMatch) { + return null; + } + // all NearSpans require at least two subSpans return (!inOrder) ? new NearSpansUnordered(slop, subSpans) : new NearSpansOrdered(slop, subSpans); @@ -262,14 +317,20 @@ public boolean equals(Object other) { private boolean equalsTo(SpanNearQuery other) { return inOrder == other.inOrder && slop == other.slop && - clauses.equals(other.clauses); + clauses.equals(other.clauses) && + minShouldMatch == other.minShouldMatch; } @Override public int hashCode() { int result = classHash(); + result = Integer.rotateLeft(result, 1); + result ^= minShouldMatch; + result = Integer.rotateLeft(result, 1); result ^= clauses.hashCode(); + result = Integer.rotateLeft(result, 1); result += slop; + result = Integer.rotateLeft(result, 1); int fac = 1 + (inOrder ? 8 : 4); return fac * result; } diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java index 6b491fe78501..1cd4ad321bc6 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java @@ -74,7 +74,9 @@ public void setUp() throws Exception { "w1 w3 xx w2 yy w3 zz", "t1 t2 t2 t1", "g x x g g x x x g g x x g", - "go to webpage" + "go to webpage", + "x x a x a x", + "x x a x b x c x x x x a x b" }; protected SpanNearQuery makeQuery(String s1, String s2, String s3, @@ -245,6 +247,68 @@ public void testOrderedSpanIterationSameTerms2() throws Exception { assertFinished(spans); } + public void testMinShouldMatch1() throws Exception { + //test repeated token + SpanNearQuery q = new SpanNearQuery(new SpanQuery[]{ + new SpanTermQuery(new Term(FIELD, "a")), new SpanTermQuery(new Term(FIELD, "a")) + }, 1, true, 2); + Spans spans = q.createWeight(searcher, false, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + assertNext(spans,7,2,5); + assertFinished(spans); + } + + public void testMinShouldMatch3() throws Exception { + //test that 2 work + SpanNearQuery q = new SpanNearQuery(new SpanQuery[]{ + new SpanTermQuery(new Term(FIELD, "a")), new SpanTermQuery(new Term(FIELD, "b")), + new SpanTermQuery(new Term(FIELD, "d")) + }, 1, true, 2); + Spans spans = q.createWeight(searcher, false, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + assertNext(spans,8,2,5); + assertNext(spans,8,11,14); + assertFinished(spans); + } + + public void testMinShouldMatch4() throws Exception { + //requires 3, only 2 in docs: no hits + SpanNearQuery q = new SpanNearQuery(new SpanQuery[]{ + new SpanTermQuery(new Term(FIELD, "a")), new SpanTermQuery(new Term(FIELD, "b")), + new SpanTermQuery(new Term(FIELD, "d")), new SpanTermQuery(new Term(FIELD, "e")) + }, 1, true, 3); + Spans spans = q.createWeight(searcher, false, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); + assertFinished(spans); + } + + public void testMinShouldMatchEx1(){ + try { + SpanNearQuery q = new SpanNearQuery(new SpanQuery[]{ + new SpanTermQuery(new Term(FIELD, "t1")), new SpanTermQuery(new Term(FIELD, "t2")) + }, 0, true, 1); + fail("Can't have value < 2"); + } catch (IllegalArgumentException e) { + } + } + + public void testMinShouldMatchEx2(){ + try { + SpanNearQuery q = new SpanNearQuery(new SpanQuery[]{ + new SpanTermQuery(new Term(FIELD, "t1")), new SpanTermQuery(new Term(FIELD, "t2")) + }, 0, true, 0); + fail("Can't have value < 2"); + } catch (IllegalArgumentException e) { + } + } + + public void testMinShouldMatchEx3(){ + try { + SpanNearQuery q = new SpanNearQuery(new SpanQuery[]{ + new SpanTermQuery(new Term(FIELD, "t1")), new SpanTermQuery(new Term(FIELD, "t2")) + }, 0, true, 5); + fail("MinNumberShouldMatch can't be > length of SpanQuery[]"); + } catch (IllegalArgumentException e) { + } + } + /** * not a direct test of NearSpans, but a demonstration of how/when * this causes problems