Fixing ptz and pxz for multioutput

Christian Jorgensen · Christian Jorgensen · commit 1ff4fedfe72f · 2025-09-15T15:56:18.000-05:00
diff --git a/examples/pcovc/PCovC_multioutput.py b/examples/pcovc/PCovC_multioutput.py
@@ -22,8 +22,8 @@
 plt.rcParams["image.cmap"] = "tab10"
 plt.rcParams["scatter.edgecolors"] = "k"
 # %%
-#
-#
+# For this, we will use the `sklearn.datasets.load_digits` dataset.
+# This dataset contains 8x8 images of handwritten digits (0-9).
 X, y = load_digits(return_X_y=True)
 x_scaler = StandardScaler()
 X_scaled = StandardScaler().fit_transform(X)
@@ -127,3 +127,5 @@
 axs[1, 0].set_ylabel("PCovC")
 fig.colorbar(scat_pca, ax=axs, orientation="horizontal")
 fig.suptitle("Multiclass-Multilabel PCovC")
+
+# %%
diff --git a/src/skmatter/decomposition/_kernel_pcovc.py b/src/skmatter/decomposition/_kernel_pcovc.py
@@ -39,8 +39,8 @@ class KernelPCovC(LinearClassifierMixin, _BaseKPCov):
 
     where :math:`\alpha` is a mixing parameter,
     :math:`\mathbf{K}` is the input kernel of shape :math:`(n_{samples}, n_{samples})`
-    and :math:`\mathbf{Z}` is a matrix of class confidence scores of shape
-    :math:`(n_{samples}, n_{classes})`
+    and :math:`\mathbf{Z}` is a tensor of class confidence scores of shape
+    :math:`(n_{samples}, n_{classes}, n_{labels})`
 
     Parameters
     ----------
@@ -82,10 +82,10 @@ class KernelPCovC(LinearClassifierMixin, _BaseKPCov):
         - ``sklearn.linear_model.LogisticRegressionCV()``
         - ``sklearn.svm.LinearSVC()``
         - ``sklearn.discriminant_analysis.LinearDiscriminantAnalysis()``
-        - ``sklearn.multioutput.MultiOutputClassifier()``
+        - ``sklearn.linear_model.Perceptron()``
         - ``sklearn.linear_model.RidgeClassifier()``
         - ``sklearn.linear_model.RidgeClassifierCV()``
-        - ``sklearn.linear_model.Perceptron()``
+        - ``sklearn.multioutput.MultiOutputClassifier()``
 
         If a pre-fitted classifier
         is provided, it is used to compute :math:`{\mathbf{Z}}`.
@@ -167,13 +167,15 @@ class KernelPCovC(LinearClassifierMixin, _BaseKPCov):
         the projector, or weights, from the input kernel :math:`\mathbf{K}`
         to the latent-space projection :math:`\mathbf{T}`
 
-    pkz_: numpy.ndarray of size :math:`({n_{samples}, })` or :math:`({n_{samples}, n_{classes}})`
-        the projector, or weights, from the input kernel :math:`\mathbf{K}`
-        to the class confidence scores :math:`\mathbf{Z}`
+    pkz_ : ndarray of size :math:`({n_{features}, {n_{classes}}})`, or list of
+        ndarrays of size :math:`({n_{features}, {n_{classes_i}}})` for a dataset
+        with :math: `i` labels.
+        the projector, or weights, from the input space :math:`\mathbf{X}`
+        to the class confidence scores :math:`\mathbf{Z}`.
 
-    ptz_: numpy.ndarray of size :math:`({n_{components}, })` or :math:`({n_{components}, n_{classes}})`
-        the projector, or weights, from the latent-space projection
-        :math:`\mathbf{T}` to the class confidence scores :math:`\mathbf{Z}`
+    ptz_ : ndarray of size :math:`({n_{components}, {n_{classes}}})`, or list of
+        ndarrays of size :math:`({n_{components}, {n_{classes_i}}})` for a dataset
+        with :math: `i` labels.
 
     ptx_: numpy.ndarray of size :math:`({n_{components}, n_{features}})`
         the projector, or weights, from the latent-space projection
@@ -271,13 +273,16 @@ def fit(self, X, Y, W=None):
             scaled to have unit variance, otherwise :math:`\mathbf{X}` should
             be scaled so that each feature has a variance of 1 / n_features.
 
-        Y : numpy.ndarray, shape (n_samples,)
-            Training data, where n_samples is the number of samples.
+        Y : numpy.ndarray, shape (n_samples,) or (n_samples, n_outputs)
+            Training data, where n_samples is the number of samples and
+            n_outputs is the number of outputs.
 
-        W : numpy.ndarray, shape (n_features, n_classes)
+        W : numpy.ndarray, shape (n_features, n_classes) or (n_features, )
             Classification weights, optional when classifier = `precomputed`. If
             not passed, it is assumed that the weights will be taken from a
-            linear classifier fit between K and Y.
+            linear classifier fit between :math:`\mathbf{X}` and :math:`\mathbf{Y}`.
+            In the multioutput case, use
+            `` W = np.hstack([est_.coef_.T for est_ in classifier.estimators_])``.
 
         Returns
         -------
@@ -355,7 +360,7 @@ def fit(self, X, Y, W=None):
             else:
                 W = _.coef_.T
 
-        else:
+        elif W is None:
             self.z_classifier_ = check_cl_fit(classifier, K, Y)
             if multioutput:
                 W = np.hstack([est_.coef_.T for est_ in self.z_classifier_.estimators_])
@@ -374,10 +379,8 @@ def fit(self, X, Y, W=None):
         self.classifier_ = clone(classifier).fit(K @ self.pkt_, Y)
 
         if multioutput:
-            self.ptz_ = np.hstack(
-                [est_.coef_.T for est_ in self.classifier_.estimators_]
-            )
-            self.pkz_ = self.pkt_ @ self.ptz_
+            self.ptz_ = [est_.coef_.T for est_ in self.classifier_.estimators_]
+            self.pkz_ = [self.pkt_ @ ptz for ptz in self.ptz_]
         else:
             self.ptz_ = self.classifier_.coef_.T
             self.pkz_ = self.pkt_ @ self.ptz_
diff --git a/src/skmatter/decomposition/_pcovc.py b/src/skmatter/decomposition/_pcovc.py
@@ -11,7 +11,6 @@
 )
 from sklearn.linear_model._base import LinearClassifierMixin
 
-from sklearn.base import MultiOutputMixin
 from sklearn.multioutput import MultiOutputClassifier
 from sklearn.svm import LinearSVC
 from sklearn.utils import check_array
@@ -36,8 +35,8 @@ class PCovC(LinearClassifierMixin, _BasePCov):
             (1 - \alpha) \mathbf{Z}\mathbf{Z}^T
 
     where :math:`\alpha` is a mixing parameter, :math:`\mathbf{X}` is an input matrix of shape
-    :math:`(n_{samples}, n_{features})`, and :math:`\mathbf{Z}` is a matrix of class confidence scores
-    of shape :math:`(n_{samples}, n_{classes})`. For :math:`(n_{samples} < n_{features})`,
+    :math:`(n_{samples}, n_{features})`, and :math:`\mathbf{Z}` is a tensor of class confidence scores
+    of shape :math:`(n_{samples}, n_{classes}, n_{labels})`. For :math:`(n_{samples} < n_{features})`,
     this can be more efficiently computed using the eigendecomposition of a modified covariance matrix
     :math:`\mathbf{\tilde{C}}`
 
@@ -112,10 +111,10 @@ class PCovC(LinearClassifierMixin, _BasePCov):
         - ``sklearn.linear_model.LogisticRegressionCV()``
         - ``sklearn.svm.LinearSVC()``
         - ``sklearn.discriminant_analysis.LinearDiscriminantAnalysis()``
-        - ``sklearn.multioutput.MultiOutputClassifier()``
+        - ``sklearn.linear_model.Perceptron()``
         - ``sklearn.linear_model.RidgeClassifier()``
         - ``sklearn.linear_model.RidgeClassifierCV()``
-        - ``sklearn.linear_model.Perceptron()``
+        - ``sklearn.multioutput.MultiOutputClassifier()``
 
         If a pre-fitted classifier
         is provided, it is used to compute :math:`{\mathbf{Z}}`.
@@ -175,11 +174,15 @@ class PCovC(LinearClassifierMixin, _BasePCov):
         the projector, or weights, from the input space :math:`\mathbf{X}`
         to the latent-space projection :math:`\mathbf{T}`
 
-    pxz_ : ndarray of size :math:`({n_{features}, })`, :math:`({n_{features}, n_{classes}})`
+    pxz_ : ndarray of size :math:`({n_{features}, {n_{classes}}})`, or list of
+        ndarrays of size :math:`({n_{features}, {n_{classes_i}}})` for a dataset
+        with :math: `i` labels.
         the projector, or weights, from the input space :math:`\mathbf{X}`
         to the class confidence scores :math:`\mathbf{Z}`.
 
-    ptz_ : ndarray of size :math:`({n_{components}, })`, :math:`({n_{components}, n_{classes}})`
+    ptz_ : ndarray of size :math:`({n_{components}, {n_{classes}}})`, or list of
+        ndarrays of size :math:`({n_{components}, {n_{classes_i}}})` for a dataset
+        with :math: `i` labels.
         the projector, or weights, from from the latent-space projection
         :math:`\mathbf{T}` to the class confidence scores :math:`\mathbf{Z}`.
 
@@ -267,7 +270,7 @@ def fit(self, X, Y, W=None):
             Classification weights, optional when classifier is ``precomputed``. If
             not passed, it is assumed that the weights will be taken from a
             linear classifier fit between :math:`\mathbf{X}` and :math:`\mathbf{Y}`.
-            In the multioutput case,
+            In the multioutput case, use
             `` W = np.hstack([est_.coef_.T for est_ in classifier.estimators_])``.
         """
         X, Y = validate_data(self, X, Y, multi_output=True, y_numeric=False)
@@ -329,15 +332,15 @@ def fit(self, X, Y, W=None):
                 W = np.hstack([_.coef_.T for _ in _.estimators_])
             else:
                 W = _.coef_.T
-        else:
+        elif W is None:
             self.z_classifier_ = check_cl_fit(classifier, X, Y)
             if multioutput:
                 W = np.hstack([est_.coef_.T for est_ in self.z_classifier_.estimators_])
             else:
                 W = self.z_classifier_.coef_.T
 
         Z = X @ W
-
+        
         if self.space_ == "feature":
             self._fit_feature_space(X, Y, Z)
         else:
@@ -348,19 +351,12 @@ def fit(self, X, Y, W=None):
         self.classifier_ = clone(classifier).fit(X @ self.pxt_, Y)
 
         if multioutput:
-            self.ptz_ = np.hstack(
-                [est_.coef_.T for est_ in self.classifier_.estimators_]
-            )
-            # print(f"pxt {self.pxt_.shape}")
-            # print(f"ptz {self.ptz_.shape}")
-            self.pxz_ = self.pxt_ @ self.ptz_
-            # print(f"pxz {self.pxz_.shape}")
+            self.ptz_ = [est_.coef_.T for est_ in self.classifier_.estimators_]
+            self.pxz_ = [self.pxt_ @ ptz for ptz in self.ptz_]
         else:
             self.ptz_ = self.classifier_.coef_.T
-            # print(self.ptz_.shape)
             self.pxz_ = self.pxt_ @ self.ptz_
 
-        # print(self.ptz_.shape)
         if not multioutput and type_of_target(Y) == "binary":
             self.pxz_ = self.pxz_.reshape(
                 X.shape[1],
@@ -531,3 +527,4 @@ def score(self, X, y, sample_weight=None):
 
     # Inherit the docstring from scikit-learn
     score.__doc__ = LinearClassifierMixin.score.__doc__
+    
diff --git a/tests/test_pcovc.py b/tests/test_pcovc.py
@@ -606,6 +606,7 @@ def test_precomputed_multioutput(self):
 
         classifier.fit(self.X, Y_double)
         W = np.hstack([est_.coef_.T for est_ in classifier.estimators_])
+        print(W.shape)
         pcovc1 = self.model(mixing=0.5, classifier="precomputed", n_components=1)
         pcovc1.fit(self.X, Y_double, W)
         t1 = pcovc1.transform(self.X)