Add batch_shape property to GP model class

Balandat · Balandat · commit 14bc8b041718 · 2023-06-04T11:18:57.000-07:00
Implements #2301. TODO: Verify compatibility with the botorch setup of other models
diff --git a/gpytorch/models/approximate_gp.py b/gpytorch/models/approximate_gp.py
@@ -1,5 +1,7 @@
 #!/usr/bin/env python3
 
+import torch
+
 from .gp import GP
 from .pyro import _PyroMixin  # This will only contain functions if Pyro is installed
 
@@ -40,6 +42,11 @@ class ApproximateGP(GP, _PyroMixin):
         >>> # test_x = ...;
         >>> model(test_x)  # Returns the approximate GP latent function at test_x
         >>> likelihood(model(test_x))  # Returns the (approximate) predictive posterior distribution at test_x
+
+    :ivar torch.Size batch_shape: The batch shape of the model. This is a batch shape from an I/O perspective,
+        independent of the internal representation of the model. For a model with `(m)` outputs, a
+        `test_batch_shape x q x d`-shaped input to the model in eval mode returns a
+        distribution of shape `broadcast(test_batch_shape, model.batch_shape) x q x (m)`.
     """
 
     def __init__(self, variational_strategy):
@@ -49,6 +56,17 @@ def __init__(self, variational_strategy):
     def forward(self, x):
         raise NotImplementedError
 
+    @property
+    def batch_shape(self) -> torch.Size:
+        r"""The batch shape of the model.
+
+        This is a batch shape from an I/O perspective, independent of the internal
+        representation of the model. For a model with `(m)` outputs, a
+        `test_batch_shape x q x d`-shaped input to the model in eval mode returns a
+        distribution of shape `broadcast(test_batch_shape, model.batch_shape) x q x (m)`.
+        """
+        return self.variational_strategy.batch_shape
+
     def pyro_guide(self, input, beta=1.0, name_prefix=""):
         r"""
         (For Pyro integration only). The component of a `pyro.guide` that
diff --git a/gpytorch/models/exact_gp.py b/gpytorch/models/exact_gp.py
@@ -50,6 +50,11 @@ class ExactGP(GP):
         >>> # test_x = ...;
         >>> model(test_x)  # Returns the GP latent function at test_x
         >>> likelihood(model(test_x))  # Returns the (approximate) predictive posterior distribution at test_x
+
+    :ivar torch.Size batch_shape: The batch shape of the model. This is a batch shape from an I/O perspective,
+        independent of the internal representation of the model. For a model with `(m)` outputs, a
+        `test_batch_shape x q x d`-shaped input to the model in eval mode returns a
+        distribution of shape `broadcast(test_batch_shape, model.batch_shape) x q x (m)`.
     """
 
     def __init__(self, train_inputs, train_targets, likelihood):
@@ -71,6 +76,17 @@ def __init__(self, train_inputs, train_targets, likelihood):
 
         self.prediction_strategy = None
 
+    @property
+    def batch_shape(self) -> torch.Size:
+        r"""The batch shape of the model.
+
+        This is a batch shape from an I/O perspective, independent of the internal
+        representation of the model. For a model with `(m)` outputs, a
+        `test_batch_shape x q x d`-shaped input to the model in eval mode returns a
+        distribution of shape `broadcast(test_batch_shape, model.batch_shape) x q x (m)`.
+        """
+        return self.train_inputs[0].shape[:-2]
+
     @property
     def train_targets(self):
         return self._train_targets
@@ -160,8 +176,6 @@ def get_fantasy_model(self, inputs, targets, **kwargs):
                 "all test independent caches exist. Call the model on some data first!"
             )
 
-        model_batch_shape = self.train_inputs[0].shape[:-2]
-
         if not isinstance(inputs, list):
             inputs = [inputs]
 
@@ -184,17 +198,17 @@ def get_fantasy_model(self, inputs, targets, **kwargs):
 
         # Check whether we can properly broadcast batch dimensions
         try:
-            torch.broadcast_shapes(model_batch_shape, target_batch_shape)
+            torch.broadcast_shapes(self.batch_shape, target_batch_shape)
         except RuntimeError:
             raise RuntimeError(
-                f"Model batch shape ({model_batch_shape}) and target batch shape "
+                f"Model batch shape ({self.batch_shape}) and target batch shape "
                 f"({target_batch_shape}) are not broadcastable."
             )
 
-        if len(model_batch_shape) > len(input_batch_shape):
-            input_batch_shape = model_batch_shape
-        if len(model_batch_shape) > len(target_batch_shape):
-            target_batch_shape = model_batch_shape
+        if len(self.batch_shape) > len(input_batch_shape):
+            input_batch_shape = self.batch_shape
+        if len(self.batch_shape) > len(target_batch_shape):
+            target_batch_shape = self.batch_shape
 
         # If input has no fantasy batch dimension but target does, we can save memory and computation by not
         # computing the covariance for each element of the batch. Therefore we don't expand the inputs to the
diff --git a/gpytorch/models/gp.py b/gpytorch/models/gp.py
@@ -1,7 +1,19 @@
 #!/usr/bin/env python3
 
+import torch
+
 from ..module import Module
 
 
 class GP(Module):
-    pass
+    @property
+    def batch_shape(self) -> torch.Size:
+        r"""The batch shape of the model.
+
+        This is a batch shape from an I/O perspective, independent of the internal
+        representation of the model. For a model with `(m)` outputs, a
+        `test_batch_shape x q x d`-shaped input to the model in eval mode returns a
+        distribution of shape `broadcast(test_batch_shape, model.batch_shape) x q x (m)`.
+        """
+        cls_name = self.__class__.__name__
+        raise NotImplementedError(f"{cls_name} does not define batch_shape property")
diff --git a/gpytorch/models/model_list.py b/gpytorch/models/model_list.py
@@ -31,6 +31,24 @@ def __init__(self, *models):
                 )
         self.likelihood = LikelihoodList(*[m.likelihood for m in models])
 
+    @property
+    def batch_shape(self) -> torch.Size:
+        r"""The batch shape of the model.
+
+        This is a batch shape from an I/O perspective, independent of the internal
+        representation of the model. For a model with `(m)` outputs, a
+        `test_batch_shape x q x d`-shaped input to the model in eval mode returns a
+        distribution of shape `broadcast(test_batch_shape, model.batch_shape) x q x (m)`.
+        """
+        batch_shape = self.models[0].batch_shape
+        if all(batch_shape == m.batch_shape for m in self.models[1:]):
+            return batch_shape
+        # TODO: Allow broadcasting of model batch shapes
+        raise NotImplementedError(
+            f"`{self.__class__.__name__}.batch_shape` is only supported if all "
+            "constituent models have the same `batch_shape`."
+        )
+
     def forward_i(self, i, *args, **kwargs):
         return self.models[i].forward(*args, **kwargs)
 
diff --git a/gpytorch/test/model_test_case.py b/gpytorch/test/model_test_case.py
@@ -32,6 +32,7 @@ def test_forward_train(self):
         data = self.create_test_data()
         likelihood, labels = self.create_likelihood_and_labels()
         model = self.create_model(data, labels, likelihood)
+        self.assertEqual(model.batch_shape, data.shape[:-2])  # test batch_shape property
         model.train()
         output = model(data)
         self.assertTrue(output.lazy_covariance_matrix.dim() == 2)
@@ -42,6 +43,7 @@ def test_batch_forward_train(self):
         batch_data = self.create_batch_test_data()
         likelihood, labels = self.create_batch_likelihood_and_labels()
         model = self.create_model(batch_data, labels, likelihood)
+        self.assertEqual(model.batch_shape, batch_data.shape[:-2])  # test batch_shape property
         model.train()
         output = model(batch_data)
         self.assertTrue(output.lazy_covariance_matrix.dim() == 3)
@@ -52,6 +54,7 @@ def test_multi_batch_forward_train(self):
         batch_data = self.create_batch_test_data(batch_shape=torch.Size([2, 3]))
         likelihood, labels = self.create_batch_likelihood_and_labels(batch_shape=torch.Size([2, 3]))
         model = self.create_model(batch_data, labels, likelihood)
+        self.assertEqual(model.batch_shape, batch_data.shape[:-2])  # test batch_shape property
         model.train()
         output = model(batch_data)
         self.assertTrue(output.lazy_covariance_matrix.dim() == 4)
diff --git a/gpytorch/variational/_variational_strategy.py b/gpytorch/variational/_variational_strategy.py
@@ -90,11 +90,16 @@ def _expand_inputs(self, x: Tensor, inducing_points: Tensor) -> Tuple[Tensor, Te
         """
         Pre-processing step in __call__ to make x the same batch_shape as the inducing points
         """
-        batch_shape = torch.broadcast_shapes(inducing_points.shape[:-2], x.shape[:-2])
+        batch_shape = torch.broadcast_shapes(self.batch_shape, x.shape[:-2])
         inducing_points = inducing_points.expand(*batch_shape, *inducing_points.shape[-2:])
         x = x.expand(*batch_shape, *x.shape[-2:])
         return x, inducing_points
 
+    @property
+    def batch_shape(self) -> torch.Size:
+        r"""The batch shape of the variational strategy."""
+        return self.inducing_points.shape[:-2]
+
     @property
     def jitter_val(self) -> float:
         if self._jitter_val is None:
diff --git a/gpytorch/variational/lmc_variational_strategy.py b/gpytorch/variational/lmc_variational_strategy.py
@@ -116,26 +116,24 @@ def __init__(
         Module.__init__(self)
         self.base_variational_strategy = base_variational_strategy
         self.num_tasks = num_tasks
-        batch_shape = self.base_variational_strategy._variational_distribution.batch_shape
+        vdist_batch_shape = self.base_variational_strategy._variational_distribution.batch_shape
 
         # Check if no functions
         if latent_dim >= 0:
             raise RuntimeError(f"latent_dim must be a negative indexed batch dimension: got {latent_dim}.")
-        if not (batch_shape[latent_dim] == num_latents or batch_shape[latent_dim] == 1):
+        if not (vdist_batch_shape[latent_dim] == num_latents or vdist_batch_shape[latent_dim] == 1):
             raise RuntimeError(
-                f"Mismatch in num_latents: got a variational distribution of batch shape {batch_shape}, "
+                f"Mismatch in num_latents: got a variational distribution of batch shape {vdist_batch_shape}, "
                 f"expected the function dim {latent_dim} to be {num_latents}."
             )
         self.num_latents = num_latents
         self.latent_dim = latent_dim
 
         # Make the batch_shape
-        self.batch_shape = list(batch_shape)
-        del self.batch_shape[self.latent_dim]
-        self.batch_shape = torch.Size(self.batch_shape)
+        self._batch_shape = vdist_batch_shape[: self.latent_dim] + vdist_batch_shape[self.latent_dim + 1 :]
 
         # LCM coefficients
-        lmc_coefficients = torch.randn(*batch_shape, self.num_tasks)
+        lmc_coefficients = torch.randn(*vdist_batch_shape, self.num_tasks)
         self.register_parameter("lmc_coefficients", torch.nn.Parameter(lmc_coefficients))
 
         if jitter_val is None:
@@ -145,6 +143,11 @@ def __init__(
         else:
             self.jitter_val = jitter_val
 
+    @property
+    def batch_shape(self) -> torch.Size:
+        r"""The batch shape of the variational strategy."""
+        return self._batch_shape
+
     @property
     def prior_distribution(self) -> MultivariateNormal:
         return self.base_variational_strategy.prior_distribution
diff --git a/test/models/test_exact_gp.py b/test/models/test_exact_gp.py
@@ -106,6 +106,10 @@ def test_batch_forward_then_nonbatch_forward_eval(self):
         batch_data = self.create_batch_test_data()
         likelihood, labels = self.create_batch_likelihood_and_labels()
         model = self.create_model(batch_data, labels, likelihood)
+
+        # test batch_shape property
+        self.assertEqual(model.batch_shape, batch_data.shape[:-2])
+
         model.eval()
         output = model(batch_data)
 
diff --git a/test/models/test_variational_gp.py b/test/models/test_variational_gp.py
@@ -12,8 +12,9 @@
 
 class GPClassificationModel(ApproximateGP):
     def __init__(self, train_x, use_inducing=False):
-        variational_distribution = CholeskyVariationalDistribution(train_x.size(-2), batch_shape=train_x.shape[:-2])
-        inducing_points = torch.randn(50, train_x.size(-1)) if use_inducing else train_x
+        batch_shape = train_x.shape[:-2]
+        variational_distribution = CholeskyVariationalDistribution(train_x.size(-2), batch_shape=batch_shape)
+        inducing_points = torch.randn(*batch_shape, 50, train_x.size(-1)) if use_inducing else train_x
         strategy_cls = VariationalStrategy
         variational_strategy = strategy_cls(
             self, inducing_points, variational_distribution, learn_inducing_locations=use_inducing