Updating CHANGELOG, changing PCovC fit() note

rvasav26 · rvasav26 · commit ddee0d562bab · 2025-05-27T18:42:08.000-05:00
diff --git a/CHANGELOG b/CHANGELOG
@@ -13,6 +13,10 @@ The rules for CHANGELOG file:
 
 0.3.0 (XXXX/XX/XX)
 ------------------
+- Add ``_BasePCov`` class (#248)
+- Add ``PCovC`` class that inherits shared functionality from ``_BasePCov`` (#248)
+- Add ``PCovC`` testing suite and examples (#248)
+- Modify ``PCovR`` to inherit shared functionality from ``_BasePCov_`` (#248)
 - Update to sklearn >= 1.6.0 and scipy >= 1.15.0 (#239)
 - Fixed moved function import from scipy and bump scipy dependency to 1.15.0 (#236)
 - Fix rendering issues for `SparseKDE` and `QuickShift` (#236)
diff --git a/docs/src/bibliography.rst b/docs/src/bibliography.rst
@@ -45,3 +45,9 @@ References
     Michele Ceriotti, "Improving Sample and Feature Selection with Principal Covariates
     Regression" 2021 Mach. Learn.: Sci. Technol. 2 035038.
     https://iopscience.iop.org/article/10.1088/2632-2153/abfe7c.
+
+.. [Jorgensen2025]
+    Christian Jorgensen, Arthur Y. Lin, and Rose K. Cersonsky, 
+    "Interpretable Visualizations of Data Spaces for Classification Problems" 
+    2025 arXiv. 2503.05861
+    https://doi.org/10.48550/arXiv.2503.05861.
diff --git a/docs/src/references/decomposition.rst b/docs/src/references/decomposition.rst
@@ -1,5 +1,5 @@
-Principal Covariates Regression (PCovR) and Classification (PCovC)
-==================================================================
+Hybrid Mapping Techniques (PCovR and PCovC)
+===========================================
 
 .. _PCovR-api:
 
diff --git a/src/skmatter/decomposition/__init__.py b/src/skmatter/decomposition/__init__.py
@@ -25,19 +25,16 @@
   original PCovR method, proposed in [Helfrecht2020]_.
 """
 
-from ._pcov import _BasePCov, pcovr_covariance, pcovr_kernel
+from ._pcov import _BasePCov
 
 from ._pcovr import PCovR
-from ._kernel_pcovr import KernelPCovR
-
 from ._pcovc import PCovC
 
+from ._kernel_pcovr import KernelPCovR
 
 __all__ = [
     "_BasePCov",
-    "pcovr_covariance",
-    "pcovr_kernel",
     "PCovR",
-    "KernelPCovR",
     "PCovC",
+    "KernelPCovR",
 ]
diff --git a/src/skmatter/decomposition/_pcovc.py b/src/skmatter/decomposition/_pcovc.py
@@ -20,7 +20,8 @@
 
 
 class PCovC(LinearClassifierMixin, _BasePCov):
-    r"""Principal Covariates Classification determines a latent-space projection :math:`\mathbf{T}`
+    r"""Principal Covariates Classification, as described in [Jorgensen2025]_,
+    determines a latent-space projection :math:`\mathbf{T}`
     which minimizes a combined loss in supervised and unsupervised tasks.
 
     This projection is determined by the eigendecomposition of a modified gram
@@ -219,8 +220,16 @@ def __init__(
         self.classifier = classifier
 
     def fit(self, X, Y, W=None):
-        r"""Fit the model with X and Y. Depending on the dimensions of X,
-        calls either `_fit_feature_space` or `_fit_sample_space`.
+        r"""Fit the model with X and Y. Note that W is taken from the 
+        coefficients of a linear classifier fit between X and Y to compute
+        Z: 
+        
+        .. math::
+            \mathbf{Z} = \mathbf{X} \mathbf{W}
+
+        We then call either `_fit_feature_space` or `_fit_sample_space`,
+        using Z as our approximation of Y. Finally, we refit a classifier on 
+        T and Y to obtain :math:`\mathbf{P}_{TZ}`.
 
         Parameters
         ----------
@@ -237,24 +246,9 @@ def fit(self, X, Y, W=None):
             Training data, where n_samples is the number of samples.
 
         W : numpy.ndarray, shape (n_features, n_properties)
-            Classification weights, optional when classifier=`precomputed`. If
+            Classification weights, optional when classifier= `precomputed`. If
             not passed, it is assumed that the weights will be taken from a
             linear classifier fit between :math:`\mathbf{X}` and :math:`\mathbf{Y}`
-
-        Notes
-        -----
-        Note the relationship between :math:`\mathbf{X}`, :math:`\mathbf{Y}`,
-        :math:`\mathbf{Z}`, and :math:`\mathbf{W}`. The classification weights
-        :math:`\mathbf{W}`, obtained through a linear classifier fit between
-        :math:`\mathbf{X}` and :math:`\mathbf{Y}`, are used to compute:
-
-        .. math::
-            \mathbf{Z} = \mathbf{X} \mathbf{W}
-
-        Next, :math:`\mathbf{Z}` is used in either `_fit_feature_space` or
-        `_fit_sample_space` as our approximation of :math:`\mathbf{Y}`.
-        Finally, we refit a classifier on :math:`\mathbf{T}` and :math:`\mathbf{Y}`
-        to obtain :math:`\mathbf{P}_{XZ}` and :math:`\mathbf{P}_{TZ}`
         """
         X, Y = validate_data(self, X, Y, y_numeric=False)
         check_classification_targets(Y)
diff --git a/src/skmatter/decomposition/_pcovr.py b/src/skmatter/decomposition/_pcovr.py
@@ -9,7 +9,7 @@
 
 
 class PCovR(RegressorMixin, MultiOutputMixin, _BasePCov):
-    r"""Principal Covariates Regression, as described in [deJong1992]_
+    r"""Principal Covariates Regression, as described in [deJong1992]_,
     determines a latent-space projection :math:`\mathbf{T}` which
     minimizes a combined loss in supervised and unsupervised tasks.
 
@@ -225,7 +225,7 @@ def fit(self, X, Y, W=None):
             regressed form of the properties, :math:`{\mathbf{\hat{Y}}}`.
 
         W : numpy.ndarray, shape (n_features, n_properties)
-            Regression weights, optional when regressor=`precomputed`. If not
+            Regression weights, optional when regressor= `precomputed`. If not
             passed, it is assumed that `W = np.linalg.lstsq(X, Y, self.tol)[0]`
         """
         X, Y = validate_data(self, X, Y, y_numeric=True, multi_output=True)