Skip to content

Commit

Permalink
SOLR-15449: edismax sow and mm (apache#158)
Browse files Browse the repository at this point in the history
  • Loading branch information
alessandrobenedetti authored Jun 9, 2021
1 parent 96d0718 commit 9791057
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 15 deletions.
2 changes: 2 additions & 0 deletions solr/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,8 @@ Bug Fixes

* SOLR-15334: Return error response when failing auth in PKIAuthPlugin (Mike Drob)

* SOLR-15449: Edismax sow incorrectly affect mm parameter in multi field search (Alessandro Benedetti, Michael Gibney, David Smiley)

================== 8.10.0 ==================

Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.PhraseQuery;
Expand Down Expand Up @@ -1160,7 +1161,11 @@ protected Query getFieldQuery(String field, List<String> queryTerms, boolean raw
try {
subqs.add(ft.getFieldQuery(parser, sf, queryTerm));
} catch (Exception e) { // assumption: raw = false only when called from ExtendedDismaxQueryParser.getQuery()
// for edismax: ignore parsing failures
// ExtendedDismaxQueryParser is a lenient query parser
// This happens when a field tries to parse a query term that has a type incompatible with the field
// e.g.
// a numerical field trying to parse a textual query term
subqs.add(new MatchNoDocsQuery());
}
}
if (subqs.size() == 1) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -415,19 +415,7 @@ public void testFocusQueryParser() {
assertQ(req("defType","edismax", "mm","0", "q","Terminator: 100", "qf","movies_t foo_i"),
twor);

assertQ(req("defType","edismax", "mm","100%", "q","Terminator: 100", "qf","movies_t foo_i", "sow","true"),
nor);
// When sow=false, the per-field query structures differ (no "Terminator" query on integer field foo_i),
// so a dismax-per-field is constructed. As a result, mm=100% is applied per-field instead of per-term;
// since there is only one term (100) required in the foo_i field's dismax, the query can match docs that
// only have the 100 term in the foo_i field, and don't necessarily have "Terminator" in any field.
assertQ(req("defType","edismax", "mm","100%", "q","Terminator: 100", "qf","movies_t foo_i", "sow","false"),
oner);
assertQ(req("defType","edismax", "mm","100%", "q","Terminator: 100", "qf","movies_t foo_i"), // default sow=false
oner);

assertQ(req("defType","edismax", "mm","100%", "q","Terminator: 8", "qf","movies_t foo_i"),
oner);


assertQ(req("defType","edismax", "mm","0", "q","movies_t:Terminator 100", "qf","movies_t foo_i"),
twor);
Expand Down Expand Up @@ -1775,7 +1763,34 @@ private static String getParsedQuery(SolrQueryRequest request) throws Exception
String resp = h.query(request);
return (String) BaseTestHarness.evaluateXPath(resp, "//str[@name='parsedquery']/text()", XPathConstants.STRING);
}


public void testSplitOnWhitespace_shouldRespectMinimumShouldMatch() {
String oner = "*[count(//doc)=1]";
String nor = "*[count(//doc)=0]";
/*
* in multi-field search with different analysis per field
* sow=true causes the minimum should match to be "per document"
* i.e a document to be a match must contain all the mm query terms anywhere at least once
* sow=false causes the minimum should match to be "per field"
* i.e a document to be a match must contain all the mm query terms in a single field at least once
* See https://issues.apache.org/jira/browse/SOLR-12779 for additional details
*/
assertQ(req("defType", "edismax", "mm", "100%", "q", "Terminator: 100", "qf", "movies_t foo_i", "sow", "true"),
nor); //no document contains both terms, in a field or in multiple field
assertQ(req("defType", "edismax", "mm", "100%", "q", "Terminator: 100", "qf", "movies_t foo_i", "sow", "false"),
nor); //no document contains both terms in a field

assertQ(req("defType", "edismax", "mm", "100%", "q", "Terminator: 8", "qf", "movies_t foo_i", "sow", "true"),
oner); //document 46 contains both terms, Terminator in movies_t and 8 in foo_i
assertQ(req("defType", "edismax", "mm", "100%", "q", "Terminator: 8", "qf", "movies_t foo_i", "sow", "false"),
nor); //no document contains both terms in a field

assertQ(req("defType", "edismax", "mm", "100%", "q", "mission impossible Terminator: 8", "qf", "movies_t foo_i", "sow", "true"),
oner); //document 46 contains all terms, mission, impossible, Terminator in movies_t and 8 in foo_i
assertQ(req("defType", "edismax", "mm", "100%", "q", "mission impossible Terminator: 8", "qf", "movies_t foo_i", "sow", "false"),
nor); //no document contains all terms, in a field
}

public void testSplitOnWhitespace_Different_Field_Analysis() throws Exception {
// When the *structure* of produced queries is different in each field,
// sow=true produces boolean-of-dismax query structure,
Expand Down

0 comments on commit 9791057

Please sign in to comment.