From 08aaaafa73e7749a597748a4f1792bb0dad4dcaf Mon Sep 17 00:00:00 2001 From: Uri Granta Date: Wed, 19 Jun 2024 08:52:21 +0100 Subject: [PATCH] Update optimizers too --- benchmarking/main.py | 5 +++-- docs/index.rst | 2 +- docs/notebooks/deep_cde.ipynb | 4 ++-- docs/notebooks/efficient_sampling.py | 2 +- docs/notebooks/gpflux_features.py | 2 +- docs/notebooks/intro.py | 4 ++-- docs/notebooks/keras_integration.py | 4 ++-- gpflux/helpers.py | 2 +- gpflux/layers/trackable_layer.py | 1 + gpflux/optimization/keras_natgrad.py | 6 ++++-- tests/gpflux/models/test_deep_gp.py | 3 ++- tests/integration/test_svgp_equivalence.py | 8 ++++---- 12 files changed, 24 insertions(+), 19 deletions(-) diff --git a/benchmarking/main.py b/benchmarking/main.py index a88718c0..6f5b5712 100644 --- a/benchmarking/main.py +++ b/benchmarking/main.py @@ -23,11 +23,12 @@ import tensorflow as tf from bayesian_benchmarks import data as uci_datasets from bayesian_benchmarks.data import Dataset -from gpflow.keras import tf_keras from sacred import Experiment from scipy.stats import norm from utils import ExperimentName, git_version +from gpflow.keras import tf_keras + from gpflux.architectures import Config, build_constant_input_dim_deep_gp THIS_DIR = Path(__file__).parent @@ -124,7 +125,7 @@ def main(_config): data = get_data() model = build_model(data.X_train) - model.compile(optimizer=tf.optimizers.Adam(0.01)) + model.compile(optimizer=tf_keras.optimizers.Adam(0.01)) train_model(model, (data.X_train, data.Y_train)) metrics = evaluate_model(model, (data.X_test, data.Y_test)) diff --git a/docs/index.rst b/docs/index.rst index 6e9cca44..bb101973 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -71,7 +71,7 @@ As a quick teaser, here's a snippet from the `intro notebook ` # Compile and fit model = two_layer_dgp.as_training_model() - model.compile(tf.optimizers.Adam(0.01)) + model.compile(gpflow.keras.tf_keras.optimizers.Adam(0.01)) history = model.fit({"inputs": X, "targets": Y}, epochs=int(1e3), verbose=0) The model described above produces the fit shown in Fig 1. For comparison, in Fig. 2 we show the fit on the same dataset by a vanilla single-layer GP model. diff --git a/docs/notebooks/deep_cde.ipynb b/docs/notebooks/deep_cde.ipynb index eb069dbb..a555ef3a 100644 --- a/docs/notebooks/deep_cde.ipynb +++ b/docs/notebooks/deep_cde.ipynb @@ -124,7 +124,7 @@ "\n", "single_layer_dgp = gpflux.models.DeepGP([gp_layer], likelihood_layer)\n", "model = single_layer_dgp.as_training_model()\n", - "model.compile(tf.optimizers.Adam(0.01))\n", + "model.compile(gpflow.keras.tf_keras.optimizers.Adam(0.01))\n", "\n", "history = model.fit({\"inputs\": X, \"targets\": Y}, epochs=int(1e3), verbose=0)\n", "fig, ax = plt.subplots()\n", @@ -387,7 +387,7 @@ "execution_count": 17, "source": [ "model = dgp.as_training_model()\n", - "model.compile(tf.optimizers.Adam(0.005))\n", + "model.compile(gpflow.keras.tf_keras.optimizers.Adam(0.005))\n", "history = model.fit({\"inputs\": X, \"targets\": Y}, epochs=int(20e3), verbose=0, batch_size=num_data, shuffle=False)" ], "outputs": [], diff --git a/docs/notebooks/efficient_sampling.py b/docs/notebooks/efficient_sampling.py index a012903c..e7b8661a 100644 --- a/docs/notebooks/efficient_sampling.py +++ b/docs/notebooks/efficient_sampling.py @@ -100,7 +100,7 @@ """ # %% -model.compile(tf.optimizers.Adam(learning_rate=0.1)) +model.compile(tf_keras.optimizers.Adam(learning_rate=0.1)) callbacks = [ tf_keras.callbacks.ReduceLROnPlateau( diff --git a/docs/notebooks/gpflux_features.py b/docs/notebooks/gpflux_features.py index 62709368..cc8813bf 100644 --- a/docs/notebooks/gpflux_features.py +++ b/docs/notebooks/gpflux_features.py @@ -85,7 +85,7 @@ def motorcycle_data(): # Following the Keras procedure we need to compile and pass a optimizer, # before fitting the model to data -training_model.compile(optimizer=tf.optimizers.Adam(learning_rate=0.01)) +training_model.compile(optimizer=tf_keras.optimizers.Adam(learning_rate=0.01)) callbacks = [ # Create callback that reduces the learning rate every time the ELBO plateaus diff --git a/docs/notebooks/intro.py b/docs/notebooks/intro.py index 5f64aae5..062343c3 100644 --- a/docs/notebooks/intro.py +++ b/docs/notebooks/intro.py @@ -97,7 +97,7 @@ def motorcycle_data(): # %% single_layer_dgp = gpflux.models.DeepGP([gp_layer], likelihood_layer) model = single_layer_dgp.as_training_model() -model.compile(tf.optimizers.Adam(0.01)) +model.compile(gpflow.keras.tf_keras.optimizers.Adam(0.01)) # %% [markdown] """ @@ -168,7 +168,7 @@ def plot(model, X, Y, ax=None): likelihood_layer = gpflux.layers.LikelihoodLayer(gpflow.likelihoods.Gaussian(0.1)) two_layer_dgp = gpflux.models.DeepGP([gp_layer1, gp_layer2], likelihood_layer) model = two_layer_dgp.as_training_model() -model.compile(tf.optimizers.Adam(0.01)) +model.compile(gpflow.keras.tf_keras.optimizers.Adam(0.01)) # %% history = model.fit({"inputs": X, "targets": Y}, epochs=int(1e3), verbose=0) diff --git a/docs/notebooks/keras_integration.py b/docs/notebooks/keras_integration.py index 6ef111b2..b430bb37 100644 --- a/docs/notebooks/keras_integration.py +++ b/docs/notebooks/keras_integration.py @@ -97,7 +97,7 @@ def create_model(model_class): ] dgp_train = dgp.as_training_model() -dgp_train.compile(tf.optimizers.Adam(learning_rate=0.1)) +dgp_train.compile(tf_keras.optimizers.Adam(learning_rate=0.1)) history = dgp_train.fit( {"inputs": X, "targets": Y}, batch_size=batch_size, epochs=num_epochs, callbacks=callbacks @@ -125,7 +125,7 @@ def create_model(model_class): [ gpflow.optimizers.NaturalGradient(gamma=0.05), gpflow.optimizers.NaturalGradient(gamma=0.05), - tf.optimizers.Adam(learning_rate=0.1), + tf_keras.optimizers.Adam(learning_rate=0.1), ] ) diff --git a/gpflux/helpers.py b/gpflux/helpers.py index 8ae31505..cc21d11d 100644 --- a/gpflux/helpers.py +++ b/gpflux/helpers.py @@ -22,7 +22,7 @@ import inspect import warnings from dataclasses import fields -from typing import List, Optional, Type, TypeVar, Union, Any +from typing import Any, List, Optional, Type, TypeVar, Union import numpy as np diff --git a/gpflux/layers/trackable_layer.py b/gpflux/layers/trackable_layer.py index 86c8d073..27ceaa1e 100644 --- a/gpflux/layers/trackable_layer.py +++ b/gpflux/layers/trackable_layer.py @@ -16,6 +16,7 @@ """Utility layer that tracks variables in :class:`tf.Module`.""" from deprecated import deprecated + from gpflow.keras import tf_keras diff --git a/gpflux/optimization/keras_natgrad.py b/gpflux/optimization/keras_natgrad.py index 4bdd5781..f968110c 100644 --- a/gpflux/optimization/keras_natgrad.py +++ b/gpflux/optimization/keras_natgrad.py @@ -94,7 +94,7 @@ def natgrad_optimizers(self) -> List[gpflow.optimizers.NaturalGradient]: return self._all_optimizers[:-1] @property - def optimizer(self) -> tf.optimizers.Optimizer: + def optimizer(self) -> tf_keras.optimizers.Optimizer: """ HACK to cope with Keras's callbacks such as :class:`~tf.keras.callbacks.ReduceLROnPlateau` @@ -109,7 +109,9 @@ def optimizer(self) -> tf.optimizers.Optimizer: return self._all_optimizers[-1] @optimizer.setter - def optimizer(self, optimizers: List[Union[NaturalGradient, tf.optimizers.Optimizer]]) -> None: + def optimizer( + self, optimizers: List[Union[NaturalGradient, tf_keras.optimizers.Optimizer]] + ) -> None: if optimizers is None: # tf.keras.Model.__init__() sets self.optimizer = None self._all_optimizers = None diff --git a/tests/gpflux/models/test_deep_gp.py b/tests/gpflux/models/test_deep_gp.py index d238088a..4ef9eb22 100644 --- a/tests/gpflux/models/test_deep_gp.py +++ b/tests/gpflux/models/test_deep_gp.py @@ -18,6 +18,7 @@ import tensorflow as tf import tqdm +from gpflow.keras import tf_keras from gpflow.kernels import RBF, Matern12 from gpflow.likelihoods import Gaussian from gpflow.mean_functions import Zero @@ -63,7 +64,7 @@ def build_deep_gp(input_dim, num_data): def train_deep_gp(deep_gp, data, maxiter=MAXITER, plotter=None, plotter_interval=PLOTTER_INTERVAL): - optimizer = tf.optimizers.Adam() + optimizer = tf_keras.optimizers.Adam() @tf.function(autograph=False) def objective_closure(): diff --git a/tests/integration/test_svgp_equivalence.py b/tests/integration/test_svgp_equivalence.py index 1e8af087..9b29b23e 100644 --- a/tests/integration/test_svgp_equivalence.py +++ b/tests/integration/test_svgp_equivalence.py @@ -150,7 +150,7 @@ def training_loss(): """ return -model.elbo(data) / num_data - adam = tf.optimizers.Adam(adam_learning_rate) + adam = tf_keras.optimizers.Adam(adam_learning_rate) @tf.function def optimization_step(): @@ -161,7 +161,7 @@ def optimization_step(): def _keras_fit_adam(model, dataset, maxiter, adam_learning_rate=0.01, loss=None): - model.compile(optimizer=tf.optimizers.Adam(adam_learning_rate), loss=loss) + model.compile(optimizer=tf_keras.optimizers.Adam(adam_learning_rate), loss=loss) model.fit(dataset, epochs=maxiter) @@ -183,7 +183,7 @@ def _keras_fit_natgrad( model = gpflux.optimization.NatGradWrapper(base_model) model.natgrad_layers = True # Shortcut to apply natural gradients to all layers natgrad = gpflow.optimizers.NaturalGradient(gamma=gamma) - adam = tf.optimizers.Adam(adam_learning_rate) + adam = tf_keras.optimizers.Adam(adam_learning_rate) model.compile( optimizer=[natgrad, adam], loss=loss, @@ -235,7 +235,7 @@ def training_loss(): return -model.elbo(data) / num_data natgrad = gpflow.optimizers.NaturalGradient(gamma=gamma) - adam = tf.optimizers.Adam(adam_learning_rate) + adam = tf_keras.optimizers.Adam(adam_learning_rate) @tf.function def optimization_step():