Source code for gpflow.models.model

# Copyright 2016-2020 The GPflow Contributors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import abc
from typing import Any, Optional

import tensorflow as tf
from check_shapes import check_shapes

from ..base import InputData, MeanAndVariance, Module, RegressionData
from ..conditionals.util import sample_mvn
from ..kernels import Kernel, MultioutputKernel
from ..likelihoods import Likelihood, SwitchedLikelihood
from ..mean_functions import MeanFunction, Zero
from ..utilities import assert_params_false, to_default_float


[docs]class BayesianModel(Module, metaclass=abc.ABCMeta):
    """Bayesian model.

    This is a base class for all GPflow models. See also :class:`GPModel`.

    A bayesian model provides methods for computing prior- and posterior densities, and a maximum
    likelihood objective; allowing you to use generic code to optimise model parameters to fit data.

    Most bayesian models are expected to hold their data internally, but the methods take ``*args``
    and ``**kwargs`` allowing you to write implementations that take data as parameters.
    See also :class:`gpflow.models.training_mixins.InternalDataTrainingLossMixin`,
    :class:`gpflow.models.training_mixins.ExternalDataTrainingLossMixin`,
    and :func:`gpflow.models.training_loss`.
    """

[docs]    @check_shapes(
        "return: []",
    )
    def log_prior_density(self) -> tf.Tensor:
        """
        Sum of the log prior probability densities of all (constrained) variables in this model.
        """
        if self.trainable_parameters:
            return tf.add_n([p.log_prior_density() for p in self.trainable_parameters])
        else:
            return to_default_float(0.0)

[docs]    @check_shapes(
        "return: []",
    )
    def log_posterior_density(self, *args: Any, **kwargs: Any) -> tf.Tensor:
        """
        This may be the posterior with respect to the hyperparameters (e.g. for
        GPR) or the posterior with respect to the function (e.g. for GPMC and
        SGPMC). It assumes that maximum_log_likelihood_objective() is defined
        sensibly.
        """
        return self.maximum_log_likelihood_objective(*args, **kwargs) + self.log_prior_density()

    @check_shapes(
        "return: []",
    )
    def _training_loss(self, *args: Any, **kwargs: Any) -> tf.Tensor:
        """
        Training loss definition. To allow MAP (maximum a-posteriori) estimation,
        adds the log density of all priors to maximum_log_likelihood_objective().
        """
        return -(self.maximum_log_likelihood_objective(*args, **kwargs) + self.log_prior_density())

[docs]    @abc.abstractmethod
    @check_shapes(
        "return: []",
    )
    def maximum_log_likelihood_objective(self, *args: Any, **kwargs: Any) -> tf.Tensor:
        """
        Objective for maximum likelihood estimation. Should be maximized. E.g.
        log-marginal likelihood (hyperparameter likelihood) for GPR, or lower
        bound to the log-marginal likelihood (ELBO) for sparse and variational
        GPs.
        """
        raise NotImplementedError


[docs]class GPModel(BayesianModel):
    r"""
    A stateless base class for Gaussian process models, that is, those of the
    form

    .. math::
       :nowrap:

       \begin{align}
           \theta        & \sim p(\theta) \\
           f             & \sim \mathcal{GP}(m(x), k(x, x'; \theta)) \\
           f_i           & = f(x_i) \\
           y_i \,|\, f_i & \sim p(y_i|f_i)
       \end{align}

    This class mostly adds functionality for predictions. To use it, inheriting
    classes must define a predict_f function, which computes the means and
    variances of the latent function.

    These predictions are then pushed through the likelihood to obtain means
    and variances of held out data, self.predict_y.

    The predictions can also be used to compute the (log) density of held-out
    data via self.predict_log_density.

    It is also possible to draw samples from the latent GPs using
    self.predict_f_samples.

    If you are new to GPflow, see our :doc:`../../../../getting_started` for examples on how to use
    a model.

    :param kernel: Covariance function. $k$ above.
    :param likelihood: The likelihood of $y_i$, given $f_i$.
    :param mean_function: Mean of $f$.
    :param num_latent_gps: The number of latent GPs - the output dimension of $f$.
    """

    def __init__(
        self,
        kernel: Kernel,
        likelihood: Likelihood,
        mean_function: Optional[MeanFunction] = None,
        num_latent_gps: Optional[int] = None,
    ):
        super().__init__()
        assert num_latent_gps is not None, "GPModel requires specification of num_latent_gps"
        self.num_latent_gps = num_latent_gps
        if mean_function is None:
            mean_function = Zero()
        self.mean_function = mean_function
        self.kernel = kernel
        self.likelihood = likelihood

[docs]    @staticmethod
    @check_shapes(
        "data[0]: [batch..., N, D]",
        "data[1]: [batch..., N, P]",
    )
    def calc_num_latent_gps_from_data(
        data: RegressionData, kernel: Kernel, likelihood: Likelihood
    ) -> int:
        """
        Calculates the number of latent GPs required based on the data as well
        as the type of kernel and likelihood.
        """
        _, Y = data
        output_dim = Y.shape[-1]
        return GPModel.calc_num_latent_gps(kernel, likelihood, output_dim)

[docs]    @staticmethod
    def calc_num_latent_gps(kernel: Kernel, likelihood: Likelihood, output_dim: int) -> int:
        """
        Calculates the number of latent GPs required given the number of
        outputs `output_dim` and the type of likelihood and kernel.

        Note: It's not nice for `GPModel` to need to be aware of specific
        likelihoods as here. However, `num_latent_gps` is a bit more broken in
        general, we should fix this in the future. There are also some slightly
        problematic assumptions re the output dimensions of mean_function.
        See https://github.com/GPflow/GPflow/issues/1343
        """
        if isinstance(kernel, MultioutputKernel):
            # MultioutputKernels already have num_latent_gps attributes
            num_latent_gps: int = kernel.num_latent_gps
        elif isinstance(likelihood, SwitchedLikelihood):
            # the SwitchedLikelihood partitions/stitches based on the last
            # column in Y, but we should not add a separate latent GP for this!
            # hence decrement by 1
            num_latent_gps = output_dim - 1
            assert num_latent_gps > 0
        else:
            num_latent_gps = output_dim

        return num_latent_gps

[docs]    @abc.abstractmethod
    @check_shapes(
        "Xnew: [batch..., N, D]",
        "return[0]: [batch..., N, P]",
        "return[1]: [batch..., N, P, N, P] if full_cov and full_output_cov",
        "return[1]: [batch..., P, N, N] if full_cov and (not full_output_cov)",
        "return[1]: [batch..., N, P, P] if (not full_cov) and full_output_cov",
        "return[1]: [batch..., N, P] if (not full_cov) and (not full_output_cov)",
    )
    def predict_f(
        self, Xnew: InputData, full_cov: bool = False, full_output_cov: bool = False
    ) -> MeanAndVariance:
        r"""
        Compute the mean and variance of the posterior latent function(s) at the input points.

        Given $x_i$ this computes $f_i$, for:

        .. math::
           :nowrap:

           \begin{align}
               \theta        & \sim p(\theta) \\
               f             & \sim \mathcal{GP}(m(x), k(x, x'; \theta)) \\
               f_i           & = f(x_i) \\
           \end{align}

        For an example of how to use ``predict_f``, see
        :doc:`../../../../notebooks/getting_started/basic_usage`.

        :param Xnew:
            Input locations at which to compute mean and variance.
        :param full_cov:
            If ``True``, compute the full covariance between the inputs.
            If ``False``, only returns the point-wise variance.
        :param full_output_cov:
            If ``True``, compute the full covariance between the outputs.
            If ``False``, assumes outputs are independent.
        """
        raise NotImplementedError

[docs]    @check_shapes(
        "Xnew: [batch..., N, D]",
        "return: [batch..., N, P] if (num_samples is None)",
        "return: [batch..., S, N, P] if (num_samples is not None)",
    )
    def predict_f_samples(
        self,
        Xnew: InputData,
        num_samples: Optional[int] = None,
        full_cov: bool = True,
        full_output_cov: bool = False,
    ) -> tf.Tensor:
        """
        Produce samples from the posterior latent function(s) at the input points.

        Currently, the method does not support `full_output_cov=True` and `full_cov=True`.

        :param Xnew:
            Input locations at which to draw samples.
        :param num_samples:
            Number of samples to draw.
            If `None`, a single sample is drawn and the return shape is [..., N, P],
            for any positive integer the return shape contains an extra batch
            dimension, [..., S, N, P], with S = num_samples and P is the number of outputs.
        :param full_cov:
            If True, draw correlated samples over the inputs. Computes the Cholesky over the
            dense covariance matrix of size [num_data, num_data].
            If False, draw samples that are uncorrelated over the inputs.
        :param full_output_cov:
            If True, draw correlated samples over the outputs.
            If False, draw samples that are uncorrelated over the outputs.
        """
        if full_cov and full_output_cov:
            raise NotImplementedError(
                "The combination of both `full_cov` and `full_output_cov` is not supported."
            )

        # check below for shape info
        mean, cov = self.predict_f(Xnew, full_cov=full_cov, full_output_cov=full_output_cov)
        if full_cov:
            # mean: [..., N, P]
            # cov: [..., P, N, N]
            mean_for_sample = tf.linalg.adjoint(mean)  # [..., P, N]
            samples = sample_mvn(
                mean_for_sample, cov, full_cov, num_samples=num_samples
            )  # [..., (S), P, N]
            samples = tf.linalg.adjoint(samples)  # [..., (S), N, P]
        else:
            # mean: [..., N, P]
            # cov: [..., N, P] or [..., N, P, P]
            samples = sample_mvn(
                mean, cov, full_output_cov, num_samples=num_samples
            )  # [..., (S), N, P]
        return samples  # [..., (S), N, P]

[docs]    @check_shapes(
        "Xnew: [batch..., N, D]",
        "return[0]: [batch..., N, P]",
        "return[1]: [batch..., N, P, N, P] if full_cov and full_output_cov",
        "return[1]: [batch..., P, N, N] if full_cov and (not full_output_cov)",
        "return[1]: [batch..., N, P, P] if (not full_cov) and full_output_cov",
        "return[1]: [batch..., N, P] if (not full_cov) and (not full_output_cov)",
    )
    def predict_y(
        self, Xnew: InputData, full_cov: bool = False, full_output_cov: bool = False
    ) -> MeanAndVariance:
        r"""
        Compute the mean and variance of the held-out data at the input points.

        Given $x_i$ this computes $y_i$, for:

        .. math::
           :nowrap:

           \begin{align}
               \theta        & \sim p(\theta) \\
               f             & \sim \mathcal{GP}(m(x), k(x, x'; \theta)) \\
               f_i           & = f(x_i) \\
               y_i \,|\, f_i & \sim p(y_i|f_i)
           \end{align}


        For an example of how to use ``predict_y``, see
        :doc:`../../../../notebooks/getting_started/basic_usage`.

        :param Xnew:
            Input locations at which to compute mean and variance.
        :param full_cov:
            If ``True``, compute the full covariance between the inputs.
            If ``False``, only returns the point-wise variance.
        :param full_output_cov:
            If ``True``, compute the full covariance between the outputs.
            If ``False``, assumes outputs are independent.
        """
        # See https://github.com/GPflow/GPflow/issues/1461
        assert_params_false(self.predict_y, full_cov=full_cov, full_output_cov=full_output_cov)

        f_mean, f_var = self.predict_f(Xnew, full_cov=full_cov, full_output_cov=full_output_cov)
        return self.likelihood.predict_mean_and_var(Xnew, f_mean, f_var)

[docs]    @check_shapes(
        "data[0]: [batch..., N, D]",
        "data[1]: [batch..., N, P]",
        "return: [batch..., N]",
    )
    def predict_log_density(
        self, data: RegressionData, full_cov: bool = False, full_output_cov: bool = False
    ) -> tf.Tensor:
        """
        Compute the log of the probability density of the data at the new data points.
        """
        # See https://github.com/GPflow/GPflow/issues/1461
        assert_params_false(self.predict_y, full_cov=full_cov, full_output_cov=full_output_cov)

        X, Y = data
        f_mean, f_var = self.predict_f(X, full_cov=full_cov, full_output_cov=full_output_cov)
        return self.likelihood.predict_log_density(X, f_mean, f_var, Y)