diff --git a/solr/core/src/java/org/apache/solr/handler/component/StatsField.java b/solr/core/src/java/org/apache/solr/handler/component/StatsField.java index 4c2a2b6a5abd..0a9bc6cb4c02 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/StatsField.java +++ b/solr/core/src/java/org/apache/solr/handler/component/StatsField.java @@ -208,7 +208,9 @@ boolean parseParams(StatsField sf) { private final EnumSet statsInResponse = EnumSet.noneOf(Stat.class); private final List percentilesList= new ArrayList(); private final boolean isShard; - + + private String floor; + private String ceil; private double tdigestCompression = 100.0D; private HllOptions hllOpts; @@ -301,6 +303,11 @@ public StatsField(ResponseBuilder rb, String statsParam) { ? Collections.emptyList() : StrUtils.splitSmart(excludeStr,','); + // schema-type specific, so these are pulled out as strings and + // dealt with by the stats values + this.floor = localParams.get("floor"); + this.ceil = localParams.get("ceil"); + assert ( (null == this.valueSource) ^ (null == this.schemaField) ) : "exactly one of valueSource & schemaField must be null"; } @@ -512,6 +519,10 @@ public List getTagList() { return tagList; } + public String getFloor() {return floor;} + + public String getCeil() {return ceil; } + public String toString() { return "StatsField<" + originalParam + ">"; } diff --git a/solr/core/src/java/org/apache/solr/handler/component/StatsValues.java b/solr/core/src/java/org/apache/solr/handler/component/StatsValues.java index 8c85fda7a71b..f32c30d276a2 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/StatsValues.java +++ b/solr/core/src/java/org/apache/solr/handler/component/StatsValues.java @@ -54,6 +54,13 @@ public interface StatsValues { */ void missing(); + /** + * Updates the statistics for a document that was excluded as + * it was out of bounds + */ + + void outOfBounds(); + /** * Updates the statistics when multiple documents are missing a value * diff --git a/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java b/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java index a2e4a4e78fd8..17132d6ea164 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java +++ b/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java @@ -33,6 +33,7 @@ import com.tdunning.math.stats.AVLTreeDigest; import com.google.common.hash.HashFunction; +import org.apache.solr.util.DateMathParser; import org.apache.solr.util.hll.HLL; import org.apache.solr.util.hll.HLLType; @@ -89,11 +90,15 @@ public static StatsValues createStatsValues(StatsField statsField) { */ abstract class AbstractStatsValues implements StatsValues { private static final String FACETS = "facets"; - - /** Tracks all data about tthe stats we need to collect */ + + /** + * Tracks all data about tthe stats we need to collect + */ final protected StatsField statsField; - /** may be null if we are collecting stats directly from a function ValueSource */ + /** + * may be null if we are collecting stats directly from a function ValueSource + */ final protected SchemaField sf; /** * may be null if we are collecting stats directly from a function ValueSource @@ -107,11 +112,16 @@ abstract class AbstractStatsValues implements StatsValues { final protected boolean computeMin; final protected boolean computeMax; final protected boolean computeMinOrMax; - final protected boolean computeCardinality; + final protected boolean computeCardinality; - /** - * Either a function value source to collect from, or the ValueSource associated - * with a single valued field we are collecting from. Will be null until/unless + interface Boundable { + boolean isWithinBounds(T value); + } + + protected Boundable bounds = (T) -> true; + /** + * Either a function value source to collect from, or the ValueSource associated + * with a single valued field we are collecting from. Will be null until/unless * {@link #setNextReader} is called at least once */ private ValueSource valueSource; @@ -125,10 +135,11 @@ abstract class AbstractStatsValues implements StatsValues { * called at least once */ protected FunctionValues values; - + protected T max; protected T min; protected long missing; + protected long outOfBounds; protected long count; protected long countDistinct; protected final Set distinctValues; @@ -136,27 +147,27 @@ abstract class AbstractStatsValues implements StatsValues { /** * Hash function that must be used by implementations of {@link #hash} */ - protected final HashFunction hasher; + protected final HashFunction hasher; // if null, no HLL logic can be computed; not final because of "union" optimization (see below) - private HLL hll; + private HLL hll; // facetField facetValue - protected Map> facets = new HashMap<>(); - + protected Map> facets = new HashMap<>(); + protected AbstractStatsValues(StatsField statsField) { this.statsField = statsField; this.computeCount = statsField.calculateStats(Stat.count); this.computeMissing = statsField.calculateStats(Stat.missing); - this.computeCalcDistinct = statsField.calculateStats(Stat.countDistinct) - || statsField.calculateStats(Stat.distinctValues); + this.computeCalcDistinct = statsField.calculateStats(Stat.countDistinct) + || statsField.calculateStats(Stat.distinctValues); this.computeMin = statsField.calculateStats(Stat.min); this.computeMax = statsField.calculateStats(Stat.max); this.computeMinOrMax = computeMin || computeMax; - + this.distinctValues = computeCalcDistinct ? new TreeSet<>() : null; this.computeCardinality = statsField.calculateStats(Stat.cardinality); - if ( computeCardinality ) { + if (computeCardinality) { hasher = statsField.getHllOptions().getHasher(); hll = statsField.getHllOptions().newHLL(); @@ -188,7 +199,7 @@ protected AbstractStatsValues(StatsField statsField) { this.ft = null; } } - + /** * {@inheritDoc} */ @@ -204,7 +215,7 @@ public void accumulate(NamedList stv) { distinctValues.addAll((Collection) stv.get("distinctValues")); countDistinct = distinctValues.size(); } - + if (computeMinOrMax) { updateMinMax((T) stv.get("min"), (T) stv.get("max")); } @@ -225,12 +236,12 @@ public void accumulate(NamedList stv) { } updateTypeSpecificStats(stv); - + NamedList f = (NamedList) stv.get(FACETS); if (f == null) { return; } - + for (int i = 0; i < f.size(); i++) { String field = f.getName(i); NamedList vals = (NamedList) f.getVal(i); @@ -250,7 +261,7 @@ public void accumulate(NamedList stv) { } } } - + /** * {@inheritDoc} */ @@ -265,9 +276,14 @@ public void accumulate(BytesRef value, int count) { accumulate(typedValue, count); } - public void accumulate(T value, int count) { + public void accumulate(T value, int count) { assert null != value : "Can't accumulate null"; + if (!bounds.isWithinBounds(value)) { + outOfBounds(); + return; + } + if (computeCount) { this.count += count; } @@ -281,14 +297,14 @@ public void accumulate(T value, int count) { if (computeCardinality) { if (null == hasher) { assert value instanceof Number : "pre-hashed value support only works with numeric longs"; - hll.addRaw(((Number)value).longValue()); + hll.addRaw(((Number) value).longValue()); } else { hll.addRaw(hash(value)); } } updateTypeSpecificStats(value, count); } - + /** * {@inheritDoc} */ @@ -298,6 +314,11 @@ public void missing() { missing++; } } + + public void outOfBounds() { + outOfBounds++; + } + /** * {@inheritDoc} @@ -347,6 +368,9 @@ public NamedList getStatsValues() { res.add("cardinality", hll.cardinality()); } } + if (outOfBounds > 0) { + res.add("outOfBounds", outOfBounds); + } addTypeSpecificStats(res); @@ -444,7 +468,7 @@ class NumericStatsValues extends AbstractStatsValues { double minD; // perf optimization, only valid if (null != this.min) double maxD; // perf optimization, only valid if (null != this.max) - + final protected boolean computeSum; final protected boolean computeSumOfSquares; final protected boolean computePercentiles; @@ -460,6 +484,22 @@ public NumericStatsValues(StatsField statsField) { tdigest = new AVLTreeDigest(statsField.getTdigestCompression()); } + + if (statsField.getCeil() != null || statsField.getFloor() != null) { + double floor = statsField.getFloor() != null ? Double.parseDouble(statsField.getFloor()) : -Double.MAX_VALUE; + double ceil = statsField.getCeil() != null ? Double.parseDouble(statsField.getCeil()) : +Double.MAX_VALUE; + + bounds = (Number value) -> { + // really we should think harder about when things are longs, but in accumulate + // stats component only cares about double, so we do here as well + if (value.doubleValue() >= floor && value.doubleValue() <= ceil) { + return true; + } + return false; + }; + } + + } @Override @@ -488,6 +528,7 @@ public long hash(Number v) { public void accumulate(int docID) { if (values.exists(docID)) { Number value = (Number) values.objectVal(docID); + double dVal = value.doubleValue(); accumulate(value, 1); } else { missing(); @@ -704,7 +745,8 @@ class DateStatsValues extends AbstractStatsValues { private double sum = 0.0; double sumOfSquares = 0; - + + final protected boolean computeSum; final protected boolean computeSumOfSquares; @@ -712,6 +754,24 @@ public DateStatsValues(StatsField statsField) { super(statsField); this.computeSum = statsField.calculateStats(Stat.sum); this.computeSumOfSquares = statsField.calculateStats(Stat.sumOfSquares); + + if (statsField.getCeil() != null || statsField.getFloor() != null) { + Date floor = statsField.getFloor() != null ? DateMathParser.parseMath(null, statsField.getFloor()) : null; + Date ceil = statsField.getCeil() != null ? DateMathParser.parseMath(null, statsField.getCeil()) : null; + + bounds = (Date value) -> { + // really we should think harder about when things are longs, but in accumulate + // stats component only cares about double, so we do here as well + if (floor != null && (value.compareTo(floor) <= 0)) { + return false; + } + + if (ceil != null && value.compareTo(ceil) >= 0) { + return false; + } + return true; + }; + } } @Override diff --git a/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java b/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java index c0e88fc23ab5..049de7a5b1e2 100644 --- a/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java +++ b/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java @@ -863,7 +863,69 @@ public void testStatsFacetMultivaluedErrorHandling() throws Exception { } - //SOLR-3177 + public void testStatsFencing() throws Exception { + SolrCore core = h.getCore(); + + assertU(adoc("id", "1", "foo_i", "4")); + assertU(adoc("id", "2", "foo_i", "5")); + assertU(commit()); + assertU(adoc("id", "3", "foo_i", "6")); + assertU(adoc("id", "4", "foo_i", "7")); + assertU(adoc("id", "5")); + assertU(commit()); + + assertQ("min fence" + , req("q","*:*", "stats", "true", "stats.field", "{!floor=5}foo_i") + ,"//lst[@name='foo_i']/double[@name='min'][.='5.0']" + ,"//lst[@name='foo_i']/double[@name='max'][.='7.0']" + ,"//lst[@name='foo_i']/long[@name='outOfBounds'][.='1']" + ,"//lst[@name='foo_i']/long[@name='missing'][.='1']" + ); + + + assertQ("min fence" + , req("q","*:*", "stats", "true", "stats.field", "{!ceil=6}foo_i") + ,"//lst[@name='foo_i']/double[@name='min'][.='4.0']" + ,"//lst[@name='foo_i']/double[@name='max'][.='6.0']" + ,"//lst[@name='foo_i']/long[@name='outOfBounds'][.='1']" + ,"//lst[@name='foo_i']/long[@name='missing'][.='1']" + ); + + } + + public void testStatsDateFencing() throws Exception { + SolrCore core = h.getCore(); + + DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.ROOT); + dateFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + + String date1 = dateFormat.format(new Date(123456789)) + "Z"; + String date2 = dateFormat.format(new Date(987654321)) + "Z"; + + assertU(adoc("id", "1", "active_dt", date1)); + assertU(adoc("id", "2", "active_dt", date2)); + assertU(adoc("id", "3")); + assertU(commit()); + + Map args = new HashMap<>(); + args.put(CommonParams.Q, "*:*"); + args.put(StatsParams.STATS, "true"); + args.put(StatsParams.STATS_FIELD, "{!ceil=" + dateFormat.format(new Date(987654320)) + "Z}active_dt"); + args.put("indent", "true"); + SolrQueryRequest req = new LocalSolrQueryRequest(core, new MapSolrParams(args)); + + assertQ("test date statistics values", req, + "//long[@name='count'][.='1']", + "//long[@name='missing'][.='1']", + "//long[@name='outOfBounds'][.='1']", + "//date[@name='min'][.='1970-01-02T10:17:36Z']", + "//date[@name='max'][.='1970-01-02T10:17:36Z']"); + + + } + + + //SOLR-3177 public void testStatsExcludeFilterQuery() throws Exception { SolrCore core = h.getCore(); assertU(adoc("id", "1"));