Skip to content

Commit

Permalink
LUCENE-9953: Make FacetResult#value accurate for LongValueFacetCounts…
Browse files Browse the repository at this point in the history
… multi-value doc cases (apache#2491)

Co-authored-by: Greg Miller <[email protected]>
  • Loading branch information
gsmiller and Greg Miller authored May 18, 2021
1 parent 3d2e5f8 commit 3a7387d
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 9 deletions.
4 changes: 4 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,10 @@ Bug Fixes

* LUCENE-9958: Fixed performance regression for boolean queries that configure a
minimum number of matching clauses. (Adrien Grand, Matt Weber)

* LUCENE-9953: LongValueFacetCounts should count each document at most once when determining
the total count for a dimension. Prior to this fix, multi-value docs could contribute a > 1
count to the dimension count. (Greg Miller)

Other
---------------------
Expand Down
10 changes: 7 additions & 3 deletions lucene/facet/src/java/org/apache/lucene/facet/FacetResult.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,13 @@ public final class FacetResult {
/** Path whose children were requested. */
public final String[] path;

/** Total value for this path (sum of all child counts, or
* sum of all child values), even those not included in
* the topN. */
/**
* Total number of documents containing a value for this path, even those not included in the
* topN. If a document contains multiple values for the same path, it will only be counted once in
* this value.
*/
// TODO: This may not hold true for SSDV faceting, where docs can be counted more than
// once. We should fix this. See LUCENE-9952
public final Number value;

/** How many child labels were encountered. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,9 @@ private void countMultiValued(String field, List<MatchingDocs> matchingDocs) thr

for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
int limit = values.docValueCount();
totCount += limit;
if (limit > 0) {
totCount++;
}
for (int i = 0; i < limit; i++) {
increment(values.nextValue());
}
Expand Down Expand Up @@ -239,7 +241,9 @@ private void countAllMultiValued(IndexReader reader, String field) throws IOExce
int doc;
while ((doc = values.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
int limit = values.docValueCount();
totCount += limit;
if (limit > 0) {
totCount++;
}
for (int i = 0; i < limit; i++) {
increment(values.nextValue());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -390,15 +390,15 @@ public void testRandomMultiValued() throws Exception {
int expectedChildCount = 0;
int expectedTotalCount = 0;
for (int i = 0; i < valueCount; i++) {
if (values[i] != null) {
if (values[i] != null && values[i].length > 0) {
expectedTotalCount++;
for (long value : values[i]) {
Integer curCount = expected.get(value);
if (curCount == null) {
curCount = 0;
expectedChildCount++;
}
expected.put(value, curCount + 1);
expectedTotalCount++;
}
}
}
Expand Down Expand Up @@ -471,9 +471,9 @@ public void testRandomMultiValued() throws Exception {
expectedChildCount = 0;
int totCount = 0;
for (int i = minId; i <= maxId; i++) {
if (values[i] != null) {
if (values[i] != null && values[i].length > 0) {
totCount++;
for (long value : values[i]) {
totCount++;
Integer curCount = expected.get(value);
if (curCount == null) {
expectedChildCount++;
Expand Down

0 comments on commit 3a7387d

Please sign in to comment.