Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SOLR-9395: Add ceil/floor to stats component #64

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,9 @@ boolean parseParams(StatsField sf) {
private final EnumSet<Stat> statsInResponse = EnumSet.noneOf(Stat.class);
private final List<Double> percentilesList= new ArrayList<Double>();
private final boolean isShard;


private String floor;
private String ceil;
private double tdigestCompression = 100.0D;
private HllOptions hllOpts;

Expand Down Expand Up @@ -301,6 +303,11 @@ public StatsField(ResponseBuilder rb, String statsParam) {
? Collections.<String>emptyList()
: StrUtils.splitSmart(excludeStr,',');

// schema-type specific, so these are pulled out as strings and
// dealt with by the stats values
this.floor = localParams.get("floor");
this.ceil = localParams.get("ceil");

assert ( (null == this.valueSource) ^ (null == this.schemaField) )
: "exactly one of valueSource & schemaField must be null";
}
Expand Down Expand Up @@ -512,6 +519,10 @@ public List<String> getTagList() {
return tagList;
}

public String getFloor() {return floor;}

public String getCeil() {return ceil; }

public String toString() {
return "StatsField<" + originalParam + ">";
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,13 @@ public interface StatsValues {
*/
void missing();

/**
* Updates the statistics for a document that was excluded as
* it was out of bounds
*/

void outOfBounds();

/**
* Updates the statistics when multiple documents are missing a value
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import com.tdunning.math.stats.AVLTreeDigest;
import com.google.common.hash.HashFunction;

import org.apache.solr.util.DateMathParser;
import org.apache.solr.util.hll.HLL;
import org.apache.solr.util.hll.HLLType;

Expand Down Expand Up @@ -89,11 +90,15 @@ public static StatsValues createStatsValues(StatsField statsField) {
*/
abstract class AbstractStatsValues<T> implements StatsValues {
private static final String FACETS = "facets";

/** Tracks all data about tthe stats we need to collect */

/**
* Tracks all data about tthe stats we need to collect
*/
final protected StatsField statsField;

/** may be null if we are collecting stats directly from a function ValueSource */
/**
* may be null if we are collecting stats directly from a function ValueSource
*/
final protected SchemaField sf;
/**
* may be null if we are collecting stats directly from a function ValueSource
Expand All @@ -107,11 +112,16 @@ abstract class AbstractStatsValues<T> implements StatsValues {
final protected boolean computeMin;
final protected boolean computeMax;
final protected boolean computeMinOrMax;
final protected boolean computeCardinality;
final protected boolean computeCardinality;

/**
* Either a function value source to collect from, or the ValueSource associated
* with a single valued field we are collecting from. Will be null until/unless
interface Boundable<T> {
boolean isWithinBounds(T value);
}

protected Boundable<T> bounds = (T) -> true;
/**
* Either a function value source to collect from, or the ValueSource associated
* with a single valued field we are collecting from. Will be null until/unless
* {@link #setNextReader} is called at least once
*/
private ValueSource valueSource;
Expand All @@ -125,38 +135,39 @@ abstract class AbstractStatsValues<T> implements StatsValues {
* called at least once
*/
protected FunctionValues values;

protected T max;
protected T min;
protected long missing;
protected long outOfBounds;
protected long count;
protected long countDistinct;
protected final Set<T> distinctValues;

/**
* Hash function that must be used by implementations of {@link #hash}
*/
protected final HashFunction hasher;
protected final HashFunction hasher;
// if null, no HLL logic can be computed; not final because of "union" optimization (see below)
private HLL hll;
private HLL hll;

// facetField facetValue
protected Map<String,Map<String, StatsValues>> facets = new HashMap<>();
protected Map<String, Map<String, StatsValues>> facets = new HashMap<>();

protected AbstractStatsValues(StatsField statsField) {
this.statsField = statsField;
this.computeCount = statsField.calculateStats(Stat.count);
this.computeMissing = statsField.calculateStats(Stat.missing);
this.computeCalcDistinct = statsField.calculateStats(Stat.countDistinct)
|| statsField.calculateStats(Stat.distinctValues);
this.computeCalcDistinct = statsField.calculateStats(Stat.countDistinct)
|| statsField.calculateStats(Stat.distinctValues);
this.computeMin = statsField.calculateStats(Stat.min);
this.computeMax = statsField.calculateStats(Stat.max);
this.computeMinOrMax = computeMin || computeMax;

this.distinctValues = computeCalcDistinct ? new TreeSet<>() : null;

this.computeCardinality = statsField.calculateStats(Stat.cardinality);
if ( computeCardinality ) {
if (computeCardinality) {

hasher = statsField.getHllOptions().getHasher();
hll = statsField.getHllOptions().newHLL();
Expand Down Expand Up @@ -188,7 +199,7 @@ protected AbstractStatsValues(StatsField statsField) {
this.ft = null;
}
}

/**
* {@inheritDoc}
*/
Expand All @@ -204,7 +215,7 @@ public void accumulate(NamedList stv) {
distinctValues.addAll((Collection<T>) stv.get("distinctValues"));
countDistinct = distinctValues.size();
}

if (computeMinOrMax) {
updateMinMax((T) stv.get("min"), (T) stv.get("max"));
}
Expand All @@ -225,12 +236,12 @@ public void accumulate(NamedList stv) {
}

updateTypeSpecificStats(stv);

NamedList f = (NamedList) stv.get(FACETS);
if (f == null) {
return;
}

for (int i = 0; i < f.size(); i++) {
String field = f.getName(i);
NamedList vals = (NamedList) f.getVal(i);
Expand All @@ -250,7 +261,7 @@ public void accumulate(NamedList stv) {
}
}
}

/**
* {@inheritDoc}
*/
Expand All @@ -265,9 +276,14 @@ public void accumulate(BytesRef value, int count) {
accumulate(typedValue, count);
}

public void accumulate(T value, int count) {
public void accumulate(T value, int count) {
assert null != value : "Can't accumulate null";

if (!bounds.isWithinBounds(value)) {
outOfBounds();
return;
}

if (computeCount) {
this.count += count;
}
Expand All @@ -281,14 +297,14 @@ public void accumulate(T value, int count) {
if (computeCardinality) {
if (null == hasher) {
assert value instanceof Number : "pre-hashed value support only works with numeric longs";
hll.addRaw(((Number)value).longValue());
hll.addRaw(((Number) value).longValue());
} else {
hll.addRaw(hash(value));
}
}
updateTypeSpecificStats(value, count);
}

/**
* {@inheritDoc}
*/
Expand All @@ -298,6 +314,11 @@ public void missing() {
missing++;
}
}

public void outOfBounds() {
outOfBounds++;
}


/**
* {@inheritDoc}
Expand Down Expand Up @@ -347,6 +368,9 @@ public NamedList<?> getStatsValues() {
res.add("cardinality", hll.cardinality());
}
}
if (outOfBounds > 0) {
res.add("outOfBounds", outOfBounds);
}

addTypeSpecificStats(res);

Expand Down Expand Up @@ -444,7 +468,7 @@ class NumericStatsValues extends AbstractStatsValues<Number> {

double minD; // perf optimization, only valid if (null != this.min)
double maxD; // perf optimization, only valid if (null != this.max)

final protected boolean computeSum;
final protected boolean computeSumOfSquares;
final protected boolean computePercentiles;
Expand All @@ -460,6 +484,22 @@ public NumericStatsValues(StatsField statsField) {
tdigest = new AVLTreeDigest(statsField.getTdigestCompression());
}


if (statsField.getCeil() != null || statsField.getFloor() != null) {
double floor = statsField.getFloor() != null ? Double.parseDouble(statsField.getFloor()) : -Double.MAX_VALUE;
double ceil = statsField.getCeil() != null ? Double.parseDouble(statsField.getCeil()) : +Double.MAX_VALUE;

bounds = (Number value) -> {
// really we should think harder about when things are longs, but in accumulate
// stats component only cares about double, so we do here as well
if (value.doubleValue() >= floor && value.doubleValue() <= ceil) {
return true;
}
return false;
};
}


}

@Override
Expand Down Expand Up @@ -488,6 +528,7 @@ public long hash(Number v) {
public void accumulate(int docID) {
if (values.exists(docID)) {
Number value = (Number) values.objectVal(docID);
double dVal = value.doubleValue();
accumulate(value, 1);
} else {
missing();
Expand Down Expand Up @@ -704,14 +745,33 @@ class DateStatsValues extends AbstractStatsValues<Date> {

private double sum = 0.0;
double sumOfSquares = 0;



final protected boolean computeSum;
final protected boolean computeSumOfSquares;

public DateStatsValues(StatsField statsField) {
super(statsField);
this.computeSum = statsField.calculateStats(Stat.sum);
this.computeSumOfSquares = statsField.calculateStats(Stat.sumOfSquares);

if (statsField.getCeil() != null || statsField.getFloor() != null) {
Date floor = statsField.getFloor() != null ? DateMathParser.parseMath(null, statsField.getFloor()) : null;
Date ceil = statsField.getCeil() != null ? DateMathParser.parseMath(null, statsField.getCeil()) : null;

bounds = (Date value) -> {
// really we should think harder about when things are longs, but in accumulate
// stats component only cares about double, so we do here as well
if (floor != null && (value.compareTo(floor) <= 0)) {
return false;
}

if (ceil != null && value.compareTo(ceil) >= 0) {
return false;
}
return true;
};
}
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -863,7 +863,69 @@ public void testStatsFacetMultivaluedErrorHandling() throws Exception {

}

//SOLR-3177
public void testStatsFencing() throws Exception {
SolrCore core = h.getCore();

assertU(adoc("id", "1", "foo_i", "4"));
assertU(adoc("id", "2", "foo_i", "5"));
assertU(commit());
assertU(adoc("id", "3", "foo_i", "6"));
assertU(adoc("id", "4", "foo_i", "7"));
assertU(adoc("id", "5"));
assertU(commit());

assertQ("min fence"
, req("q","*:*", "stats", "true", "stats.field", "{!floor=5}foo_i")
,"//lst[@name='foo_i']/double[@name='min'][.='5.0']"
,"//lst[@name='foo_i']/double[@name='max'][.='7.0']"
,"//lst[@name='foo_i']/long[@name='outOfBounds'][.='1']"
,"//lst[@name='foo_i']/long[@name='missing'][.='1']"
);


assertQ("min fence"
, req("q","*:*", "stats", "true", "stats.field", "{!ceil=6}foo_i")
,"//lst[@name='foo_i']/double[@name='min'][.='4.0']"
,"//lst[@name='foo_i']/double[@name='max'][.='6.0']"
,"//lst[@name='foo_i']/long[@name='outOfBounds'][.='1']"
,"//lst[@name='foo_i']/long[@name='missing'][.='1']"
);

}

public void testStatsDateFencing() throws Exception {
SolrCore core = h.getCore();

DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.ROOT);
dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));

String date1 = dateFormat.format(new Date(123456789)) + "Z";
String date2 = dateFormat.format(new Date(987654321)) + "Z";

assertU(adoc("id", "1", "active_dt", date1));
assertU(adoc("id", "2", "active_dt", date2));
assertU(adoc("id", "3"));
assertU(commit());

Map<String, String> args = new HashMap<>();
args.put(CommonParams.Q, "*:*");
args.put(StatsParams.STATS, "true");
args.put(StatsParams.STATS_FIELD, "{!ceil=" + dateFormat.format(new Date(987654320)) + "Z}active_dt");
args.put("indent", "true");
SolrQueryRequest req = new LocalSolrQueryRequest(core, new MapSolrParams(args));

assertQ("test date statistics values", req,
"//long[@name='count'][.='1']",
"//long[@name='missing'][.='1']",
"//long[@name='outOfBounds'][.='1']",
"//date[@name='min'][.='1970-01-02T10:17:36Z']",
"//date[@name='max'][.='1970-01-02T10:17:36Z']");


}


//SOLR-3177
public void testStatsExcludeFilterQuery() throws Exception {
SolrCore core = h.getCore();
assertU(adoc("id", "1"));
Expand Down