# Copyright 2016-2020 The GPflow Contributors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import abc
from typing import Any, Optional
import tensorflow as tf
from check_shapes import check_shapes
from ..base import InputData, MeanAndVariance, Module, RegressionData
from ..conditionals.util import sample_mvn
from ..kernels import Kernel, MultioutputKernel
from ..likelihoods import Likelihood, SwitchedLikelihood
from ..mean_functions import MeanFunction, Zero
from ..utilities import assert_params_false, to_default_float
[docs]
class BayesianModel(Module, metaclass=abc.ABCMeta):
"""Bayesian model.
This is a base class for all GPflow models. See also :class:`GPModel`.
A bayesian model provides methods for computing prior- and posterior densities, and a maximum
likelihood objective; allowing you to use generic code to optimise model parameters to fit data.
Most bayesian models are expected to hold their data internally, but the methods take ``*args``
and ``**kwargs`` allowing you to write implementations that take data as parameters.
See also :class:`gpflow.models.training_mixins.InternalDataTrainingLossMixin`,
:class:`gpflow.models.training_mixins.ExternalDataTrainingLossMixin`,
and :func:`gpflow.models.training_loss`.
"""
[docs]
@check_shapes(
"return: []",
)
def log_prior_density(self) -> tf.Tensor:
"""
Sum of the log prior probability densities of all (constrained) variables in this model.
"""
if self.trainable_parameters:
return tf.add_n([p.log_prior_density() for p in self.trainable_parameters])
else:
return to_default_float(0.0)
[docs]
@check_shapes(
"return: []",
)
def log_posterior_density(self, *args: Any, **kwargs: Any) -> tf.Tensor:
"""
This may be the posterior with respect to the hyperparameters (e.g. for
GPR) or the posterior with respect to the function (e.g. for GPMC and
SGPMC). It assumes that maximum_log_likelihood_objective() is defined
sensibly.
"""
return self.maximum_log_likelihood_objective(*args, **kwargs) + self.log_prior_density()
@check_shapes(
"return: []",
)
def _training_loss(self, *args: Any, **kwargs: Any) -> tf.Tensor:
"""
Training loss definition. To allow MAP (maximum a-posteriori) estimation,
adds the log density of all priors to maximum_log_likelihood_objective().
"""
return -(self.maximum_log_likelihood_objective(*args, **kwargs) + self.log_prior_density())
[docs]
@abc.abstractmethod
@check_shapes(
"return: []",
)
def maximum_log_likelihood_objective(self, *args: Any, **kwargs: Any) -> tf.Tensor:
"""
Objective for maximum likelihood estimation. Should be maximized. E.g.
log-marginal likelihood (hyperparameter likelihood) for GPR, or lower
bound to the log-marginal likelihood (ELBO) for sparse and variational
GPs.
"""
raise NotImplementedError
[docs]
class GPModel(BayesianModel):
r"""
A stateless base class for Gaussian process models, that is, those of the
form
.. math::
:nowrap:
\begin{align}
\theta & \sim p(\theta) \\
f & \sim \mathcal{GP}(m(x), k(x, x'; \theta)) \\
f_i & = f(x_i) \\
y_i \,|\, f_i & \sim p(y_i|f_i)
\end{align}
This class mostly adds functionality for predictions. To use it, inheriting
classes must define a predict_f function, which computes the means and
variances of the latent function.
These predictions are then pushed through the likelihood to obtain means
and variances of held out data, self.predict_y.
The predictions can also be used to compute the (log) density of held-out
data via self.predict_log_density.
It is also possible to draw samples from the latent GPs using
self.predict_f_samples.
If you are new to GPflow, see our :doc:`../../../../getting_started` for examples on how to use
a model.
:param kernel: Covariance function. $k$ above.
:param likelihood: The likelihood of $y_i$, given $f_i$.
:param mean_function: Mean of $f$.
:param num_latent_gps: The number of latent GPs - the output dimension of $f$.
"""
def __init__(
self,
kernel: Kernel,
likelihood: Likelihood,
mean_function: Optional[MeanFunction] = None,
num_latent_gps: Optional[int] = None,
):
super().__init__()
assert num_latent_gps is not None, "GPModel requires specification of num_latent_gps"
self.num_latent_gps = num_latent_gps
if mean_function is None:
mean_function = Zero()
self.mean_function = mean_function
self.kernel = kernel
self.likelihood = likelihood
[docs]
@staticmethod
@check_shapes(
"data[0]: [batch..., N, D]",
"data[1]: [batch..., N, P]",
)
def calc_num_latent_gps_from_data(
data: RegressionData, kernel: Kernel, likelihood: Likelihood
) -> int:
"""
Calculates the number of latent GPs required based on the data as well
as the type of kernel and likelihood.
"""
_, Y = data
output_dim = Y.shape[-1]
return GPModel.calc_num_latent_gps(kernel, likelihood, output_dim)
[docs]
@staticmethod
def calc_num_latent_gps(kernel: Kernel, likelihood: Likelihood, output_dim: int) -> int:
"""
Calculates the number of latent GPs required given the number of
outputs `output_dim` and the type of likelihood and kernel.
Note: It's not nice for `GPModel` to need to be aware of specific
likelihoods as here. However, `num_latent_gps` is a bit more broken in
general, we should fix this in the future. There are also some slightly
problematic assumptions re the output dimensions of mean_function.
See https://github.com/GPflow/GPflow/issues/1343
"""
if isinstance(kernel, MultioutputKernel):
# MultioutputKernels already have num_latent_gps attributes
num_latent_gps: int = kernel.num_latent_gps
elif isinstance(likelihood, SwitchedLikelihood):
# the SwitchedLikelihood partitions/stitches based on the last
# column in Y, but we should not add a separate latent GP for this!
# hence decrement by 1
num_latent_gps = output_dim - 1
assert num_latent_gps > 0
else:
num_latent_gps = output_dim
return num_latent_gps
[docs]
@abc.abstractmethod
@check_shapes(
"Xnew: [batch..., N, D]",
"return[0]: [batch..., N, P]",
"return[1]: [batch..., N, P, N, P] if full_cov and full_output_cov",
"return[1]: [batch..., P, N, N] if full_cov and (not full_output_cov)",
"return[1]: [batch..., N, P, P] if (not full_cov) and full_output_cov",
"return[1]: [batch..., N, P] if (not full_cov) and (not full_output_cov)",
)
def predict_f(
self, Xnew: InputData, full_cov: bool = False, full_output_cov: bool = False
) -> MeanAndVariance:
r"""
Compute the mean and variance of the posterior latent function(s) at the input points.
Given $x_i$ this computes $f_i$, for:
.. math::
:nowrap:
\begin{align}
\theta & \sim p(\theta) \\
f & \sim \mathcal{GP}(m(x), k(x, x'; \theta)) \\
f_i & = f(x_i) \\
\end{align}
For an example of how to use ``predict_f``, see
:doc:`../../../../notebooks/getting_started/basic_usage`.
:param Xnew:
Input locations at which to compute mean and variance.
:param full_cov:
If ``True``, compute the full covariance between the inputs.
If ``False``, only returns the point-wise variance.
:param full_output_cov:
If ``True``, compute the full covariance between the outputs.
If ``False``, assumes outputs are independent.
"""
raise NotImplementedError
[docs]
@check_shapes(
"Xnew: [batch..., N, D]",
"return: [batch..., N, P] if (num_samples is None)",
"return: [batch..., S, N, P] if (num_samples is not None)",
)
def predict_f_samples(
self,
Xnew: InputData,
num_samples: Optional[int] = None,
full_cov: bool = True,
full_output_cov: bool = False,
) -> tf.Tensor:
"""
Produce samples from the posterior latent function(s) at the input points.
Currently, the method does not support `full_output_cov=True` and `full_cov=True`.
:param Xnew:
Input locations at which to draw samples.
:param num_samples:
Number of samples to draw.
If `None`, a single sample is drawn and the return shape is [..., N, P],
for any positive integer the return shape contains an extra batch
dimension, [..., S, N, P], with S = num_samples and P is the number of outputs.
:param full_cov:
If True, draw correlated samples over the inputs. Computes the Cholesky over the
dense covariance matrix of size [num_data, num_data].
If False, draw samples that are uncorrelated over the inputs.
:param full_output_cov:
If True, draw correlated samples over the outputs.
If False, draw samples that are uncorrelated over the outputs.
"""
if full_cov and full_output_cov:
raise NotImplementedError(
"The combination of both `full_cov` and `full_output_cov` is not supported."
)
# check below for shape info
mean, cov = self.predict_f(Xnew, full_cov=full_cov, full_output_cov=full_output_cov)
if full_cov:
# mean: [..., N, P]
# cov: [..., P, N, N]
mean_for_sample = tf.linalg.adjoint(mean) # [..., P, N]
samples = sample_mvn(
mean_for_sample, cov, full_cov, num_samples=num_samples
) # [..., (S), P, N]
samples = tf.linalg.adjoint(samples) # [..., (S), N, P]
else:
# mean: [..., N, P]
# cov: [..., N, P] or [..., N, P, P]
samples = sample_mvn(
mean, cov, full_output_cov, num_samples=num_samples
) # [..., (S), N, P]
return samples # [..., (S), N, P]
[docs]
@check_shapes(
"Xnew: [batch..., N, D]",
"return[0]: [batch..., N, P]",
"return[1]: [batch..., N, P, N, P] if full_cov and full_output_cov",
"return[1]: [batch..., P, N, N] if full_cov and (not full_output_cov)",
"return[1]: [batch..., N, P, P] if (not full_cov) and full_output_cov",
"return[1]: [batch..., N, P] if (not full_cov) and (not full_output_cov)",
)
def predict_y(
self, Xnew: InputData, full_cov: bool = False, full_output_cov: bool = False
) -> MeanAndVariance:
r"""
Compute the mean and variance of the held-out data at the input points.
Given $x_i$ this computes $y_i$, for:
.. math::
:nowrap:
\begin{align}
\theta & \sim p(\theta) \\
f & \sim \mathcal{GP}(m(x), k(x, x'; \theta)) \\
f_i & = f(x_i) \\
y_i \,|\, f_i & \sim p(y_i|f_i)
\end{align}
For an example of how to use ``predict_y``, see
:doc:`../../../../notebooks/getting_started/basic_usage`.
:param Xnew:
Input locations at which to compute mean and variance.
:param full_cov:
If ``True``, compute the full covariance between the inputs.
If ``False``, only returns the point-wise variance.
:param full_output_cov:
If ``True``, compute the full covariance between the outputs.
If ``False``, assumes outputs are independent.
"""
# See https://github.com/GPflow/GPflow/issues/1461
assert_params_false(self.predict_y, full_cov=full_cov, full_output_cov=full_output_cov)
f_mean, f_var = self.predict_f(Xnew, full_cov=full_cov, full_output_cov=full_output_cov)
return self.likelihood.predict_mean_and_var(Xnew, f_mean, f_var)
[docs]
@check_shapes(
"data[0]: [batch..., N, D]",
"data[1]: [batch..., N, P]",
"return: [batch..., N]",
)
def predict_log_density(
self, data: RegressionData, full_cov: bool = False, full_output_cov: bool = False
) -> tf.Tensor:
"""
Compute the log of the probability density of the data at the new data points.
"""
# See https://github.com/GPflow/GPflow/issues/1461
assert_params_false(self.predict_y, full_cov=full_cov, full_output_cov=full_output_cov)
X, Y = data
f_mean, f_var = self.predict_f(X, full_cov=full_cov, full_output_cov=full_output_cov)
return self.likelihood.predict_log_density(X, f_mean, f_var, Y)