Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deeply nonstationary GP #85

Open
wants to merge 3 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 67 additions & 0 deletions docs/notebooks/deep_nonstationary_gp_samples.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# ---
# jupyter:
# jupytext:
# formats: ipynb,py:percent
# text_representation:
# extension: .py
# format_name: percent
# format_version: '1.3'
# jupytext_version: 1.3.2
# kernelspec:
# display_name: Python 3
# language: python
# name: python3
# ---

# %%
import gpflow
import gpflux
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from gpflux.nonstationary import NonstationaryKernel
from plotting import plot_layers

# %%
Ns = 1000
D = 1
a, b = 0, 1
X = np.linspace(a, b, 1000).reshape(-1, 1)
Xlam = np.c_[X, np.zeros_like(X)]

# %%
D_in = D1 = D2 = D_out = D

# Layer 1
Z1 = X.copy()
feat1 = gpflow.features.InducingPoints(Z1)
kern1 = gpflow.kernels.RBF(D_in, lengthscales=0.1)
layer1 = gpflux.layers.GPLayer(kern1, feat1, D1)

# Layer 2
Z2 = Xlam.copy()
feat2 = gpflow.features.InducingPoints(Z2)
kern2 = NonstationaryKernel(
gpflow.kernels.RBF(D1),
D_in,
scaling_offset=-0.1,
positivity=gpflow.transforms.positive.forward_tensor,
)
layer2 = gpflux.layers.NonstationaryGPLayer(kern2, feat2, D2)

# Layer 3
Z3 = Xlam.copy()
feat3 = gpflow.features.InducingPoints(Z3)
kern3 = NonstationaryKernel(
gpflow.kernels.RBF(D2),
D1,
scaling_offset=-0.1,
positivity=gpflow.transforms.positive.forward_tensor,
)
layer3 = gpflux.layers.NonstationaryGPLayer(kern3, feat3, D_out)

model = gpflux.DeepGP(np.empty((1, 1)), np.empty((1, 1)), [layer1, layer2, layer3])

# %%
plot_layers(X, model)
plt.show()
118 changes: 118 additions & 0 deletions gpflux/kernels.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
#
# Copyright (c) 2021 The GPflux Contributors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import tensorflow as tf

import gpflow
from gpflow import Param, params_as_tensors


class NonstationaryKernel(gpflow.kernels.Kernel):
def __init__(self, basekernel, lengthscales_dim, scaling_offset=0.0, positivity=tf.exp):
"""
basekernel.input_dim == data_dim: dimension of original (first-layer) input
lengthscales_dim: output dimension of previous layer

For the nonstationary deep GP, the `positivity` is actually part of the *model*
specification and does change model behaviour.
`scaling_offset` behaves equivalent to a constant mean function in the previous
layer; when giving the previous layer a constant mean function, it should be set
to non-trainable.

When the positivity is chosen to be exp(), then the scaling offset is also
equivalent to the basekernel lengthscale, hence the default is 0 and non-trainable.
For other positivity choices, it may be useful to enable scaling_offset to be trainable.

Scaling offset can be scalar or be a vector of length `data_dim`.
"""
if not isinstance(basekernel, gpflow.kernels.Stationary):
raise TypeError("base kernel must be stationary")

data_dim = basekernel.input_dim
# must call super().__init__() before adding parameters:
super().__init__(data_dim + lengthscales_dim)

self.data_dim = data_dim
self.lengthscales_dim = lengthscales_dim
if lengthscales_dim not in (data_dim, 1):
raise ValueError("lengthscales_dim must be equal to basekernel's input_dim or 1")

self.basekernel = basekernel
self.scaling_offset = Param(scaling_offset)
if positivity is tf.exp:
self.scaling_offset.trainable = False
self._positivity = positivity # XXX use softplus / log(1+exp)?

@params_as_tensors
def _split_scale(self, X):
"""
X has data_dim + lengthscales_dim elements
"""
original_input = X[..., : self.data_dim]
log_lengthscales = X[..., self.data_dim :]
lengthscales = self._positivity(log_lengthscales + self.scaling_offset)
return original_input, lengthscales

@params_as_tensors
def K(self, X, X2=None):
base_lengthscales = self.basekernel.lengthscales
x1, lengthscales1 = self._split_scale(X)
if X2 is not None:
x2, lengthscales2 = self._split_scale(X2)
else:
x2, lengthscales2 = x1, lengthscales1

# d = self.lengthscales_dim, either 1 or D
# x1: [N, D], lengthscales1: [N, d]
# x2: [M, D], lengthscales2: [M, d]

# axis=0 and axis=-3 are equivalent, and axis=1 and axis=-2 are equivalent
lengthscales1 = tf.expand_dims(lengthscales1, axis=-2) # [N, 1, d]
lengthscales2 = tf.expand_dims(lengthscales2, axis=-3) # [1, M, d]
squared_lengthscales_sum = lengthscales1 ** 2 + lengthscales2 ** 2 # [N, M, d]

q = tf.reduce_prod(
tf.sqrt(2 * lengthscales1 * lengthscales2 / squared_lengthscales_sum), axis=-1
) # [N, M]

if self.lengthscales_dim == 1: # also use this when data_dim==1
# Last dimension should be 1, so the reduce_prod above *should* have no effect here.
# The Stationary._scaled_square_dist() handles the base lengthscale rescaling,
# but we need to correct the `q` prefactor:
q = q ** tf.cast(self.data_dim, q.dtype)
r2 = self.basekernel._scaled_square_dist(x1, None if X2 is None else x2)
r2 /= 0.5 * squared_lengthscales_sum[..., 0] # [N, M]

elif self.lengthscales_dim == self.data_dim:
assert self.data_dim > 1

# Here, we have to handle the rescaling and expanding manually, as the full case cannot
# be implemented with a matmul.
x1 /= base_lengthscales
x2 /= base_lengthscales
x1 = tf.expand_dims(x1, axis=-2) # [N, 1, D]
x2 = tf.expand_dims(x2, axis=-3) # [1, M, D]

r2 = tf.reduce_sum(2 * (x1 - x2) ** 2 / squared_lengthscales_sum, axis=-1) # [N, M]

else:
assert False, "lengthscales_dim must be 1 or same as data_dim"

return q * self.basekernel.K_r2(r2)

@params_as_tensors
def Kdiag(self, X):
return self.basekernel.Kdiag(X)
54 changes: 54 additions & 0 deletions gpflux/layers/nonstationary_layer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#
# Copyright (c) 2021 The GPflux Contributors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#


from typing import NamedTuple, Optional

import numpy as np
import tensorflow as tf

import gpflow
from gpflow import Param, Parameterized, params_as_tensors, settings
from gpflow.conditionals import sample_conditional
from gpflow.kullback_leiblers import gauss_kl
from gpflow.mean_functions import Zero

from gpflux.kernels import NonstationaryKernel
from gpflux.layers.gp_layer import GPLayer
from gpflux.types import TensorLike


class NonstationaryGPLayer(GPLayer):
def __init__(
self, kernel: NonstationaryKernel, feature: gpflow.features.InducingFeature, *args, **kwargs
):
assert isinstance(kernel, NonstationaryKernel)
if isinstance(feature, gpflow.features.InducingPoints):
assert feature.Z.shape[1] == kernel.input_dim
super().__init__(kernel, feature, *args, **kwargs)

@params_as_tensors
def propagate(self, H, *, X=None, **kwargs):
"""
Concatenates original input X with output H of the previous layer
for the non-stationary kernel: the latter will be interpreted as
lengthscales.

:param H: input to this layer [N, P]
:param X: original input [N, D]
"""
XH = tf.concat([X, H], axis=1)
return super().propagate(XH, **kwargs)
70 changes: 70 additions & 0 deletions tests/gpflux/test_nonstationary_unit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#
# Copyright (c) 2021 The GPflux Contributors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#


import numpy as np
import pytest
from numpy.testing import assert_allclose

import gpflow

from gpflux.nonstationary import NonstationaryKernel


def test_positive_definite(session_tf):
D = 2
kern = NonstationaryKernel(gpflow.kernels.RBF(D), D)

XL = np.random.rand(3, 2 * D)
K1 = kern.compute_K_symm(XL)
K2 = kern.compute_K(XL, XL)

assert_allclose(K1, K2)

np.linalg.cholesky(K1)


@pytest.mark.parametrize("lengthscales_dim", [1, 2])
def test_shapes(session_tf, lengthscales_dim):
D = 2
N = 5
M = 6
d = lengthscales_dim
X1 = np.random.randn(N, D)
X2 = np.random.randn(M, D)
L1 = np.random.randn(N, d)
L2 = np.random.randn(M, d)
XL1 = np.hstack([X1, L1])
XL2 = np.hstack([X2, L2])
basekern = gpflow.kernels.RBF(D)
kern = NonstationaryKernel(basekern, d)
baseK = basekern.compute_K(X1, X2)
K = kern.compute_K(XL1, XL2)
assert baseK.shape == K.shape


def test_1D_equivalence(session_tf):
D = 2
kern = NonstationaryKernel(gpflow.kernels.RBF(D), D)
kern_1D = NonstationaryKernel(gpflow.kernels.RBF(D), 1)

XL1D = np.random.randn(3, D + 1)
XL = np.concatenate([XL1D, np.tile(XL1D[:, -1, None], [1, D - 1])], 1)

K = kern.compute_K_symm(XL)
K_1D = kern_1D.compute_K_symm(XL1D)

assert_allclose(K, K_1D)
72 changes: 72 additions & 0 deletions tests/integration/test_nonstationary_integration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#
# Copyright (c) 2021 The GPflux Contributors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#


import numpy as np

import gpflow

import gpflux
from gpflux.models.deep_gp import DeepGP
from gpflux.nonstationary import NonstationaryKernel


def test_nonstationary_gp_1d(session_tf):
"""
This test build a deep nonstationary GP model consisting of 3 layers,
and checks if the model can be optimized.
"""
D_in = D1 = D2 = D_out = 1
X = np.linspace(0, 10, 500).reshape(-1, 1)
Y = np.random.randn(500, 1)
Xlam = np.c_[X, np.zeros_like(X)]

# Layer 1
Z1 = X.copy()
feat1 = gpflow.features.InducingPoints(Z1)
kern1 = gpflow.kernels.RBF(D_in, lengthscales=0.1)
layer1 = gpflux.layers.GPLayer(kern1, feat1, D1)

# Layer 2
Z2 = Xlam.copy()
feat2 = gpflow.features.InducingPoints(Z2)
kern2 = NonstationaryKernel(
gpflow.kernels.RBF(D1),
D_in,
scaling_offset=-0.1,
positivity=gpflow.transforms.positive.forward_tensor,
)
layer2 = gpflux.layers.NonstationaryGPLayer(kern2, feat2, D2)

# Layer 3
Z3 = Xlam.copy()
feat3 = gpflow.features.InducingPoints(Z3)
kern3 = NonstationaryKernel(
gpflow.kernels.RBF(D2),
D1,
scaling_offset=-0.1,
positivity=gpflow.transforms.positive.forward_tensor,
)
layer3 = gpflux.layers.NonstationaryGPLayer(kern3, feat3, D_out)

model = DeepGP(X, Y, [layer1, layer2, layer3])

# minimize
likelihood_before_opt = model.compute_log_likelihood()
gpflow.train.AdamOptimizer(0.01).minimize(model, maxiter=100)
likelihood_after_opt = model.compute_log_likelihood()

assert likelihood_before_opt < likelihood_after_opt