Skip to content

Commit

Permalink
Add an optional cohort block to science experiments
Browse files Browse the repository at this point in the history
Many experiments operate on data with a very long tail, and the most
frequent part of the distribution can wash out notable results in
sub-groups.  For example, experiment results derived from the data of
very large customers often look quite different than the much more
common results from the small data.  Even the use of percentile metrics
can't overcome these effects since often the relevant percentiles are
very high (above 99-percentile).

This adds an optional block to Science::Experiment which should return a
"cohort" when called.  The cohort is passed the result of the experiment
so it can determine the cohort from the context data, whether the result
is a mismatch or any of the observation data.

The determined cohort value is available as `Scientist::Result#cohort`
and is intended to be used by the user-defined publication mechanism.
  • Loading branch information
Carl Brasic committed Jan 3, 2022
1 parent ba65028 commit 1174575
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 5 deletions.
9 changes: 8 additions & 1 deletion lib/scientist/experiment.rb
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,13 @@ def use(&block)
try "control", &block
end

# Define a block which will determine the cohort of this experiment
# when called. The block will be passed a `Scientist::Result` as its
# only argument and the cohort will be set on the result.
def cohort(&block)
@_scientist_determine_cohort = block
end

# Whether or not to raise a mismatch error when a mismatch occurs.
def raise_on_mismatches?
if raise_on_mismatches.nil?
Expand All @@ -316,7 +323,7 @@ def generate_result(name)
end

control = observations.detect { |o| o.name == name }
Scientist::Result.new(self, observations, control)
Scientist::Result.new(self, observations, control, @_scientist_determine_cohort)
end

private
Expand Down
22 changes: 18 additions & 4 deletions lib/scientist/result.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,33 @@ class Scientist::Result
# An Array of Observations in execution order.
attr_reader :observations

# If the experiment was defined with a cohort block, the cohort this
# result has been determined to belong to.
attr_reader :cohort

# Internal: Create a new result.
#
# experiment - the Experiment this result is for
# observations: - an Array of Observations, in execution order
# control: - the control Observation
# experiment - the Experiment this result is for
# observations: - an Array of Observations, in execution order
# control: - the control Observation
# determine_cohort - An optional callable that is passed the Result to
# determine its cohort
#
def initialize(experiment, observations = [], control = nil)
def initialize(experiment, observations = [], control = nil, determine_cohort = nil)
@experiment = experiment
@observations = observations
@control = control
@candidates = observations - [control]
evaluate_candidates

if determine_cohort
begin
@cohort = determine_cohort.call(self)
rescue StandardError => e
experiment.raised :cohort, e
end
end

freeze
end

Expand Down
40 changes: 40 additions & 0 deletions test/scientist/experiment_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,46 @@ def @ex.enabled?
assert_equal "kaboom", exception.message
end

describe "cohorts" do
it "accepts a cohort config block" do
@ex.cohort { "1" }
end

it "assigns a cohort to the result using the provided block" do
@ex.context(foo: "bar")
@ex.cohort { |res| "foo-#{res.context[:foo]}-#{Math.log10(res.control.value).round}" }
@ex.use { 5670 }
@ex.try { 5670 }

@ex.run
assert_equal "foo-bar-4", @ex.published_result.cohort
end

it "assigns no cohort if no cohort block passed" do
@ex.use { 5670 }
@ex.try { 5670 }

@ex.run
assert_nil @ex.published_result.cohort
end

it "rescues errors raised in the cohort determination block" do
@ex.use { 5670 }
@ex.try { 5670 }
@ex.cohort { |res| raise "intentional" }

@ex.run

refute_nil @ex.published_result
assert_nil @ex.published_result.cohort

assert_equal 1, @ex.exceptions.size
code, exception = @ex.exceptions[0]
assert_equal :cohort, code
assert_equal "intentional", exception.message
end
end

describe "#raise_with" do
it "raises custom error if provided" do
CustomError = Class.new(Scientist::Experiment::MismatchError)
Expand Down
11 changes: 11 additions & 0 deletions test/scientist/result_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,17 @@
assert_equal @experiment.name, result.experiment_name
end

it "takes an optional callable to determine cohort" do
a = Scientist::Observation.new("a", @experiment) { 1 }
b = Scientist::Observation.new("b", @experiment) { 1 }

result = Scientist::Result.new @experiment, [a, b], a
assert_nil result.cohort

result = Scientist::Result.new @experiment, [a, b], a, ->(res) { "cohort-1" }
assert_equal "cohort-1", result.cohort
end

it "has the context from an experiment" do
@experiment.context :foo => :bar
a = Scientist::Observation.new("a", @experiment) { 1 }
Expand Down

0 comments on commit 1174575

Please sign in to comment.