-
Notifications
You must be signed in to change notification settings - Fork 61
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Callback #73
Callback #73
Changes from 9 commits
2fd99a6
98aad5e
107b5c5
abcff50
4d9b669
58b9482
86fedb1
18a4436
1282569
cb8421c
3331fa0
0d5d4bb
339b699
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -396,27 +396,31 @@ class MCMCAcquistion(AcquisitionSum): | |
""" | ||
Apply MCMC over the hyperparameters of an acquisition function (= over the hyperparameters of the contained models). | ||
|
||
The models passed into an object of this class are optimized with MLE, and then further sampled with HMC. | ||
These hyperparameter samples are then set in copies of the acquisition. | ||
The models passed into an object of this class are optimized with MLE (fast burn-in), and then further sampled with | ||
HMC. These hyperparameter samples are then set in copies of the acquisition. | ||
|
||
For evaluating the underlying acquisition function, the predictions of the acquisition copies are averaged. | ||
""" | ||
def __init__(self, acquisition, n_slices, **kwargs): | ||
assert isinstance(acquisition, Acquisition) | ||
assert n_slices > 0 | ||
|
||
copies = [copy.deepcopy(acquisition) for _ in range(n_slices - 1)] | ||
for c in copies: | ||
c.optimize_restarts = 0 | ||
|
||
# the call to the constructor of the parent classes, will optimize acquisition, so it obtains the MLE solution. | ||
super(MCMCAcquistion, self).__init__([acquisition] + copies) | ||
super(MCMCAcquistion, self).__init__([acquisition]*n_slices) | ||
self._needs_new_copies = True | ||
self._sample_opt = kwargs | ||
|
||
def _optimize_models(self): | ||
# Optimize model #1 | ||
self.operands[0]._optimize_models() | ||
|
||
# Copy it again if needed due to changed free state | ||
if self._needs_new_copies: | ||
new_copies = [copy.deepcopy(self.operands[0]) for _ in range(len(self.operands) - 1)] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. copy.deepcopy([self.operands[0]]*len(self.operands)) not tested, works too? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no, the * syntax are shallow copies so the deepcopy will copy the object they are all pointing to. |
||
for c in new_copies: | ||
c.optimize_restarts = 0 | ||
self.operands = ParamList([self.operands[0]] + new_copies) | ||
self._needs_new_copies = False | ||
|
||
# Draw samples using HMC | ||
# Sample each model of the acquisition function - results in a list of 2D ndarrays. | ||
hypers = np.hstack([model.sample(len(self.operands), **self._sample_opt) for model in self.models]) | ||
|
@@ -440,3 +444,11 @@ def set_data(self, X, Y): | |
def build_acquisition(self, Xcand): | ||
# Average the predictions of the copies. | ||
return 1. / len(self.operands) * super(MCMCAcquistion, self).build_acquisition(Xcand) | ||
|
||
def _kill_autoflow(self): | ||
""" | ||
Following the recompilation of models, the free state might have changed. This means updating the samples can | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. """ Following the ... |
||
cause inconsistencies and errors. Flag for recreation on next optimize | ||
""" | ||
super(MCMCAcquistion, self)._kill_autoflow() | ||
self._needs_new_copies = True | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I assume we cant use needs_setup for this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. _needs_setup is triggered by a simple |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,14 +16,36 @@ | |
|
||
import numpy as np | ||
from scipy.optimize import OptimizeResult | ||
import tensorflow as tf | ||
from GPflow.gpr import GPR | ||
|
||
from .acquisition import Acquisition, MCMCAcquistion | ||
from .optim import Optimizer, SciPyOptimizer | ||
from .objective import ObjectiveWrapper | ||
from .design import Design, EmptyDesign | ||
from .objective import ObjectiveWrapper | ||
from .optim import Optimizer, SciPyOptimizer | ||
from .pareto import non_dominated_sort | ||
|
||
|
||
def jitchol_callback(models): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. callbacks can be in a separate callbacks.py file? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I do not plan on shipping any additional callbacks (I might even get rid of this one, it got comitted by accident but it might improve stability?) so that file would be quite empty. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok, I'm not in favor of including jitchol. I think there are other ways users can improve stability. First and foremost putting priors and transforms on the hyps. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Given #74 I think we should really consider this. For standard scenario's with GPRs (which is what most people will start with) I think this might give an additional automated stability support (which can be disabled by setting the callback to None) |
||
""" | ||
Default callback for BayesianOptimizer. For all GPR models, increase the likelihood variance in case of cholesky | ||
faillures. This is similar to the use of jitchol in GPy | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. failures """ This is similar to ... Default callback for BayesianOptimizers. Only usable with GPR models. |
||
:return: | ||
""" | ||
for m in models: | ||
if not isinstance(m, GPR): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we are sure this can never be a wrapper class? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hm it could be, good point I need to fix this |
||
continue | ||
s = m.get_free_state() | ||
eKdiag = np.mean(np.diag(m.kern.compute_K_symm(m.X.value))) | ||
for e in [0] + [10**ex for ex in range(-6,-1)]: | ||
try: | ||
m.likelihood.variance = m.likelihood.variance.value + e * eKdiag | ||
m.optimize(maxiter=5) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can be a kwarg of the callback signature. I guess it is not really feasible to pass these kind of options to Acquisition. Users should simply define a lambda There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Too much additional things, I only take a few steps to see if the starting point is valid here. No point setting the actual number of evaluations here, the model is further optimized from optimize models. |
||
break | ||
except tf.errors.InvalidArgumentError: # pragma: no cover | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. doesn't this exception covers much more than a cholesky error? Is there a more specific exception we can catch? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. in jitchol its the only error I care about. If anything else pops up this isn't the place to catch and handle it. |
||
m.set_state(s) | ||
|
||
|
||
class BayesianOptimizer(Optimizer): | ||
""" | ||
A traditional Bayesian optimization framework implementation. | ||
|
@@ -32,7 +54,8 @@ class BayesianOptimizer(Optimizer): | |
Additionally, it is configured with a separate optimizer for the acquisition function. | ||
""" | ||
|
||
def __init__(self, domain, acquisition, optimizer=None, initial=None, scaling=True, hyper_draws=None): | ||
def __init__(self, domain, acquisition, optimizer=None, initial=None, scaling=True, hyper_draws=None, | ||
callback=jitchol_callback): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is it useful to have a list of callbacks?
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Too much features in my opinion. Its possible to make a callback running several functions, or to call a function on each model. The only reason for this PR is to provide an entry point in GPflowOpt for users to control the models during the optimization in several ways, should that be helpful due to use of special models. |
||
""" | ||
:param Domain domain: The optimization space. | ||
:param Acquisition acquisition: The acquisition function to optimize over the domain. | ||
|
@@ -51,6 +74,12 @@ def __init__(self, domain, acquisition, optimizer=None, initial=None, scaling=Tr | |
are obtained using Hamiltonian MC. | ||
(see `GPflow documentation <https://gpflow.readthedocs.io/en/latest//>`_ for details) for each model. | ||
The acquisition score is computed for each draw, and averaged. | ||
:param callable callback: (optional) this function or object will be called after each evaluate, after the | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. think you can remove the "after each evaluate" |
||
data of all models has been updated with all models as retrieved by acquisition.models as argument without | ||
the wrapping model handling any scaling . This allows custom model optimization strategies to be implemented. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if we do a separate callbacks.py file some of the explanation can be moved there + module link There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. see above |
||
All manipulations of GPflow models are permitted. Combined with the optimize_restarts parameter of | ||
:class:`~.Acquisition` this allows several scenarios: do the optimization manually from the callback | ||
(optimize_restarts equals zero), orchoose the starting point + some random restarts (optimize_restarts > 0). | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. or choose |
||
""" | ||
assert isinstance(acquisition, Acquisition) | ||
assert hyper_draws is None or hyper_draws > 0 | ||
|
@@ -69,6 +98,8 @@ def __init__(self, domain, acquisition, optimizer=None, initial=None, scaling=Tr | |
initial = initial or EmptyDesign(domain) | ||
self.set_initial(initial.generate()) | ||
|
||
self._iter_callback = callback | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why call it iter_callback and not model_callback? |
||
|
||
@Optimizer.domain.setter | ||
def domain(self, dom): | ||
assert self.domain.size == dom.size | ||
|
@@ -86,6 +117,8 @@ def _update_model_data(self, newX, newY): | |
assert self.acquisition.data[0].shape[1] == newX.shape[-1] | ||
assert self.acquisition.data[1].shape[1] == newY.shape[-1] | ||
assert newX.shape[0] == newY.shape[0] | ||
if newX.size == 0: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. will this ever happen? As far as I know we cant empty GPflow models so data[0] will never be empty. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this line avoids _needs_setup = True in case i.e. the EmptyDesign is configured as initial design (as is by default) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As a sidenote, as GPflow doesn't support models with no data I actually see no use case for BOptimizer having an initial design parameter. |
||
return | ||
X = np.vstack((self.acquisition.data[0], newX)) | ||
Y = np.vstack((self.acquisition.data[1], newY)) | ||
self.acquisition.set_data(X, Y) | ||
|
@@ -174,7 +207,6 @@ def _optimize(self, fx, n_iter): | |
:param n_iter: number of iterations to run | ||
:return: OptimizeResult object | ||
""" | ||
|
||
assert isinstance(fx, ObjectiveWrapper) | ||
|
||
# Evaluate and add the initial design (if any) | ||
|
@@ -190,6 +222,10 @@ def inverse_acquisition(x): | |
|
||
# Optimization loop | ||
for i in range(n_iter): | ||
# If callback specified, and acquisition has the setup flag enabled (indicating an upcoming compilation, | ||
# run the callback. | ||
if self._iter_callback and self.acquisition._needs_setup: | ||
self._iter_callback([m.wrapped for m in self.acquisition.models]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if there is no callback:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You confuse something here: you can optimize your model in the callback but this is one of the scenarios (which would require optimize_restarts to be 0 in order to avoid two optimizes). The primary use case is to only set the initial starting point. (The reason the jitchol callback runs the optimization for a small number of steps is to check if no cholesky error occurs, not to optimize the model. ) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok, there was indeed some confusion here. I thought the callback would implement the complete model building strategy: setting hyps, running one or more optimizations, etc. This is still possible but you have to set optimize_restarts = 0 |
||
result = self.optimizer.optimize(inverse_acquisition) | ||
self._update_model_data(result.x, fx(result.x)) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -214,8 +214,8 @@ def test_optimize_multi_objective(self): | |
result = optimizer.optimize(vlmop2, n_iter=2) | ||
self.assertTrue(result.success) | ||
self.assertEqual(result.nfev, 2, "Only 2 evaluations permitted") | ||
self.assertTupleEqual(result.x.shape, (9, 2)) | ||
self.assertTupleEqual(result.fun.shape, (9, 2)) | ||
self.assertTupleEqual(result.x.shape, (7, 2)) | ||
self.assertTupleEqual(result.fun.shape, (7, 2)) | ||
_, dom = GPflowOpt.pareto.non_dominated_sort(result.fun) | ||
self.assertTrue(np.all(dom==0)) | ||
|
||
|
@@ -288,6 +288,62 @@ def test_mcmc(self): | |
self.assertTrue(np.allclose(result.x, 0), msg="Optimizer failed to find optimum") | ||
self.assertTrue(np.allclose(result.fun, 0), msg="Incorrect function value returned") | ||
|
||
def test_callback(self): | ||
class DummyCallback(object): | ||
def __init__(self): | ||
self.counter = 0 | ||
|
||
def __call__(self, models): | ||
self.counter += 1 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Lets think about the callback signature some more. Is there any information we want to pass that might be useful for model building? For instance, to let the model building strategy depend on the iteration number (we can stop optimizing the hyps after a while like in the MES paper). Although we can also look at the data set size. What about model building strategies that changes model.X en model.Y (like replace clusters etc.). Not sure if that fits here or is even relevant (the GPflow model should be able to cope with it). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the model contains all the data you need to accomplish something. I believe X and Y can even be updated in this callback as long as the model supports it (all models in GPflow do). If at some point some information is really missing, this can be added. |
||
|
||
c = DummyCallback() | ||
optimizer = GPflowOpt.BayesianOptimizer(self.domain, self.acquisition, callback=c) | ||
result = optimizer.optimize(lambda X: parabola2d(X)[0], n_iter=2) | ||
self.assertEqual(c.counter, 2) | ||
|
||
def test_callback_recompile(self): | ||
class DummyCallback(object): | ||
def __init__(self): | ||
self.recompile = False | ||
|
||
def __call__(self, models): | ||
c = np.random.randint(2, 10) | ||
models[0].kern.variance.prior = GPflow.priors.Gamma(c, 1./c) | ||
self.recompile = models[0]._needs_recompile | ||
|
||
c = DummyCallback() | ||
optimizer = GPflowOpt.BayesianOptimizer(self.domain, self.acquisition, callback=c) | ||
self.acquisition.evaluate(np.zeros((1,2))) # Make sure its run and setup to skip | ||
result = optimizer.optimize(lambda X: parabola2d(X)[0], n_iter=1) | ||
self.assertFalse(c.recompile) | ||
result = optimizer.optimize(lambda X: parabola2d(X)[0], n_iter=1) | ||
self.assertTrue(c.recompile) | ||
self.assertFalse(self.acquisition.models[0]._needs_recompile) | ||
|
||
def test_callback_recompile_mcmc(self): | ||
class DummyCallback(object): | ||
def __init__(self): | ||
self.no_models = 0 | ||
|
||
def __call__(self, models): | ||
c = np.random.randint(2, 10) | ||
models[0].kern.variance.prior = GPflow.priors.Gamma(c, 1. / c) | ||
self.no_models = len(models) | ||
|
||
c = DummyCallback() | ||
optimizer = GPflowOpt.BayesianOptimizer(self.domain, self.acquisition, hyper_draws=5, callback=c) | ||
opers = optimizer.acquisition.operands | ||
result = optimizer.optimize(lambda X: parabola2d(X)[0], n_iter=1) | ||
self.assertEqual(c.no_models, 1) | ||
self.assertEqual(id(opers[0]), id(optimizer.acquisition.operands[0])) | ||
for op1, op2 in zip(opers[1:], optimizer.acquisition.operands[1:]): | ||
self.assertNotEqual(id(op1), id(op2)) | ||
opers = optimizer.acquisition.operands | ||
result = optimizer.optimize(lambda X: parabola2d(X)[0], n_iter=1) | ||
self.assertEqual(id(opers[0]), id(optimizer.acquisition.operands[0])) | ||
for op1, op2 in zip(opers[1:], optimizer.acquisition.operands[1:]): | ||
self.assertNotEqual(id(op1), id(op2)) | ||
|
||
|
||
class TestSilentOptimization(unittest.TestCase): | ||
@contextmanager | ||
|
@@ -323,3 +379,4 @@ def _optimize(self, objective): | |
opt.optimize(None) | ||
output = out.getvalue().strip() | ||
self.assertEqual(output, '') | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does this make deep copies? I assumed you used the old way to assure that it were deep copies
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah I see, need_new_copies = True makes sure deep copies are made later
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This version does shallow copies, its mostly to assure the copy later on is aware of the amount of copies required without serious overhead.