From e5cd4f0c520f4558855668134ed1d656280e03f6 Mon Sep 17 00:00:00 2001 From: Christian Jorgensen Date: Tue, 16 Sep 2025 12:50:02 -0500 Subject: [PATCH 01/11] Adding standardscaler calls --- src/skmatter/decomposition/_kernel_pcovc.py | 9 +++++++++ src/skmatter/decomposition/_pcovc.py | 10 ++++++++++ 2 files changed, 19 insertions(+) diff --git a/src/skmatter/decomposition/_kernel_pcovc.py b/src/skmatter/decomposition/_kernel_pcovc.py index e8965a223..bc46e9326 100644 --- a/src/skmatter/decomposition/_kernel_pcovc.py +++ b/src/skmatter/decomposition/_kernel_pcovc.py @@ -15,6 +15,7 @@ from sklearn.utils.validation import check_is_fitted, validate_data from sklearn.linear_model._base import LinearClassifierMixin from sklearn.utils.multiclass import check_classification_targets, type_of_target +from sklearn.preprocessing import StandardScaler from skmatter.preprocessing import KernelNormalizer from skmatter.utils import check_cl_fit @@ -85,6 +86,10 @@ class KernelPCovC(LinearClassifierMixin, _BaseKPCov): If a pre-fitted classifier is provided, it is used to compute :math:`{\mathbf{Z}}`. If None, ``sklearn.linear_model.LogisticRegression()`` is used as the classifier. + + scale_z: boolean, default=True + whether to scale Z to zero mean and unit variance prior to + eigendecomposition. kernel : {"linear", "poly", "rbf", "sigmoid", "precomputed"} or callable, default="linear" Kernel. @@ -200,6 +205,7 @@ def __init__( n_components=None, svd_solver="auto", classifier=None, + scale_z=True, kernel="linear", gamma=None, degree=3, @@ -229,6 +235,7 @@ def __init__( fit_inverse_transform=fit_inverse_transform, ) self.classifier = classifier + self.scale_z = scale_z def fit(self, X, Y, W=None): r"""Fit the model with X and Y. @@ -323,6 +330,8 @@ def fit(self, X, Y, W=None): W = LogisticRegression().fit(K, Y).coef_.T Z = K @ W + if self.scale_z: + Z = StandardScaler().fit_transform(Z) self._fit(K, Z, W) diff --git a/src/skmatter/decomposition/_pcovc.py b/src/skmatter/decomposition/_pcovc.py index e0cee034e..7473b12b7 100644 --- a/src/skmatter/decomposition/_pcovc.py +++ b/src/skmatter/decomposition/_pcovc.py @@ -16,6 +16,7 @@ from sklearn.utils.validation import check_is_fitted, validate_data from skmatter.decomposition import _BasePCov from skmatter.utils import check_cl_fit +from sklearn.preprocessing import StandardScaler class PCovC(LinearClassifierMixin, _BasePCov): @@ -122,6 +123,10 @@ class PCovC(LinearClassifierMixin, _BasePCov): training data as the composite estimator. If None, ``sklearn.linear_model.LogisticRegression()`` is used as the classifier. + + scale_z: boolean, default=True + whether to scale Z to zero mean and unit variance prior to + eigendecomposition. iterated_power : int or 'auto', default='auto' Number of iterations for the power method computed by @@ -210,9 +215,11 @@ def __init__( tol=1e-12, space="auto", classifier=None, + scale_z=True, iterated_power="auto", random_state=None, whiten=False, + ): super().__init__( mixing=mixing, @@ -225,6 +232,7 @@ def __init__( whiten=whiten, ) self.classifier = classifier + self.scale_z = scale_z def fit(self, X, Y, W=None): r"""Fit the model with X and Y. @@ -300,6 +308,8 @@ def fit(self, X, Y, W=None): W = LogisticRegression().fit(X, Y).coef_.T Z = X @ W + if self.scale_z: + Z = StandardScaler().fit_transform(Z) if self.space_ == "feature": self._fit_feature_space(X, Y, Z) From 6272caee6d1a32bc15c2abd6a6ded55a0be7122e Mon Sep 17 00:00:00 2001 From: Christian Jorgensen Date: Thu, 18 Sep 2025 10:35:46 -0500 Subject: [PATCH 02/11] Fixing sample space pcovc scaling --- src/skmatter/decomposition/_pcovc.py | 7 +++++-- tests/test_pcovc.py | 2 ++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/skmatter/decomposition/_pcovc.py b/src/skmatter/decomposition/_pcovc.py index 7473b12b7..551589f5d 100644 --- a/src/skmatter/decomposition/_pcovc.py +++ b/src/skmatter/decomposition/_pcovc.py @@ -299,7 +299,7 @@ def fit(self, X, Y, W=None): classifier = self.classifier self.z_classifier_ = check_cl_fit(classifier, X, Y) - W = self.z_classifier_.coef_.T + W = self.z_classifier_.coef_.T.copy() else: # If precomputed, use default classifier to predict Y from T @@ -308,8 +308,11 @@ def fit(self, X, Y, W=None): W = LogisticRegression().fit(X, Y).coef_.T Z = X @ W + if self.scale_z: - Z = StandardScaler().fit_transform(Z) + z_scaler = StandardScaler().fit(Z) + Z = z_scaler.transform(Z) + W /= np.sqrt(z_scaler.var_).reshape(1, -1) if self.space_ == "feature": self._fit_feature_space(X, Y, Z) diff --git a/tests/test_pcovc.py b/tests/test_pcovc.py index 8607a2e2a..b476e80e8 100644 --- a/tests/test_pcovc.py +++ b/tests/test_pcovc.py @@ -464,6 +464,8 @@ def test_default_ncomponents(self): self.assertEqual(pcovc.n_components_, min(self.X.shape)) def test_prefit_classifier(self): + """Check that a passed prefit classifier is not modified in + PCovC's `fit` call.""" classifier = LinearSVC() classifier.fit(self.X, self.Y) pcovc = self.model(mixing=0.5, classifier=classifier) From d0dace417676aebc183694e94ee6f66900a18925 Mon Sep 17 00:00:00 2001 From: Christian Jorgensen Date: Thu, 18 Sep 2025 10:42:54 -0500 Subject: [PATCH 03/11] Adjusting some params in the examples --- examples/pcovc/KPCovC_Comparison.py | 2 +- examples/pcovc/KPCovC_Hyperparameters.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/pcovc/KPCovC_Comparison.py b/examples/pcovc/KPCovC_Comparison.py index 5028a9b5c..0811224a2 100644 --- a/examples/pcovc/KPCovC_Comparison.py +++ b/examples/pcovc/KPCovC_Comparison.py @@ -85,7 +85,7 @@ # Both PCA and PCovC fail to produce linearly separable latent space # maps. We will need a kernel method to effectively separate the moon classes. -mixing = 0.10 +mixing = 0.5 alpha_d = 0.5 alpha_p = 0.4 diff --git a/examples/pcovc/KPCovC_Hyperparameters.py b/examples/pcovc/KPCovC_Hyperparameters.py index ce3948e25..3b02cab62 100644 --- a/examples/pcovc/KPCovC_Hyperparameters.py +++ b/examples/pcovc/KPCovC_Hyperparameters.py @@ -65,7 +65,7 @@ fig, axs = plt.subplots(2, len(kernels), figsize=(len(kernels) * 4, 8)) center = True -mixing = 0.10 +mixing = 0.5 for i, kernel in enumerate(kernels): kpca = KernelPCA( From 858f6a2feb5ce00736a860c32bf415eba3ac4f66 Mon Sep 17 00:00:00 2001 From: Christian Jorgensen Date: Thu, 18 Sep 2025 13:01:55 -0500 Subject: [PATCH 04/11] oops wrong scaler --- src/skmatter/decomposition/_kernel_pcovc.py | 5 ++--- src/skmatter/decomposition/_pcovc.py | 6 +++--- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/skmatter/decomposition/_kernel_pcovc.py b/src/skmatter/decomposition/_kernel_pcovc.py index bc46e9326..46f789057 100644 --- a/src/skmatter/decomposition/_kernel_pcovc.py +++ b/src/skmatter/decomposition/_kernel_pcovc.py @@ -15,9 +15,8 @@ from sklearn.utils.validation import check_is_fitted, validate_data from sklearn.linear_model._base import LinearClassifierMixin from sklearn.utils.multiclass import check_classification_targets, type_of_target -from sklearn.preprocessing import StandardScaler -from skmatter.preprocessing import KernelNormalizer +from skmatter.preprocessing import KernelNormalizer, StandardFlexibleScaler from skmatter.utils import check_cl_fit from skmatter.decomposition import _BaseKPCov @@ -331,7 +330,7 @@ def fit(self, X, Y, W=None): Z = K @ W if self.scale_z: - Z = StandardScaler().fit_transform(Z) + Z = StandardFlexibleScaler().fit_transform(Z) self._fit(K, Z, W) diff --git a/src/skmatter/decomposition/_pcovc.py b/src/skmatter/decomposition/_pcovc.py index 551589f5d..74197bd37 100644 --- a/src/skmatter/decomposition/_pcovc.py +++ b/src/skmatter/decomposition/_pcovc.py @@ -16,7 +16,7 @@ from sklearn.utils.validation import check_is_fitted, validate_data from skmatter.decomposition import _BasePCov from skmatter.utils import check_cl_fit -from sklearn.preprocessing import StandardScaler +from skmatter.preprocessing import StandardFlexibleScaler class PCovC(LinearClassifierMixin, _BasePCov): @@ -310,9 +310,9 @@ def fit(self, X, Y, W=None): Z = X @ W if self.scale_z: - z_scaler = StandardScaler().fit(Z) + z_scaler = StandardFlexibleScaler().fit(Z) Z = z_scaler.transform(Z) - W /= np.sqrt(z_scaler.var_).reshape(1, -1) + W /= z_scaler.scale_.reshape(1, -1) if self.space_ == "feature": self._fit_feature_space(X, Y, Z) From 03618738e3e1770cb32bdc4767a9e20ec11f1762 Mon Sep 17 00:00:00 2001 From: Christian Jorgensen Date: Thu, 18 Sep 2025 13:19:36 -0500 Subject: [PATCH 05/11] Touch up docs --- src/skmatter/decomposition/_kernel_pcovc.py | 2 +- src/skmatter/decomposition/_pcovc.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/skmatter/decomposition/_kernel_pcovc.py b/src/skmatter/decomposition/_kernel_pcovc.py index 46f789057..4e364677a 100644 --- a/src/skmatter/decomposition/_kernel_pcovc.py +++ b/src/skmatter/decomposition/_kernel_pcovc.py @@ -86,7 +86,7 @@ class KernelPCovC(LinearClassifierMixin, _BaseKPCov): If None, ``sklearn.linear_model.LogisticRegression()`` is used as the classifier. - scale_z: boolean, default=True + scale_z: bool, default=True whether to scale Z to zero mean and unit variance prior to eigendecomposition. diff --git a/src/skmatter/decomposition/_pcovc.py b/src/skmatter/decomposition/_pcovc.py index 74197bd37..37ab98e09 100644 --- a/src/skmatter/decomposition/_pcovc.py +++ b/src/skmatter/decomposition/_pcovc.py @@ -124,7 +124,7 @@ class PCovC(LinearClassifierMixin, _BasePCov): If None, ``sklearn.linear_model.LogisticRegression()`` is used as the classifier. - scale_z: boolean, default=True + scale_z: bool, default=True whether to scale Z to zero mean and unit variance prior to eigendecomposition. From 3f00c562bdf208bb80c0a9831ceba675299f481d Mon Sep 17 00:00:00 2001 From: Christian Jorgensen Date: Thu, 18 Sep 2025 13:21:34 -0500 Subject: [PATCH 06/11] Fix docs again --- src/skmatter/decomposition/_kernel_pcovc.py | 5 ++--- src/skmatter/decomposition/_pcovc.py | 6 ++---- tests/test_pcovc.py | 3 ++- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/skmatter/decomposition/_kernel_pcovc.py b/src/skmatter/decomposition/_kernel_pcovc.py index 4e364677a..c7a95fa26 100644 --- a/src/skmatter/decomposition/_kernel_pcovc.py +++ b/src/skmatter/decomposition/_kernel_pcovc.py @@ -85,10 +85,9 @@ class KernelPCovC(LinearClassifierMixin, _BaseKPCov): If a pre-fitted classifier is provided, it is used to compute :math:`{\mathbf{Z}}`. If None, ``sklearn.linear_model.LogisticRegression()`` is used as the classifier. - + scale_z: bool, default=True - whether to scale Z to zero mean and unit variance prior to - eigendecomposition. + whether to scale Z prior to eigendecomposition. kernel : {"linear", "poly", "rbf", "sigmoid", "precomputed"} or callable, default="linear" Kernel. diff --git a/src/skmatter/decomposition/_pcovc.py b/src/skmatter/decomposition/_pcovc.py index 37ab98e09..9b1660894 100644 --- a/src/skmatter/decomposition/_pcovc.py +++ b/src/skmatter/decomposition/_pcovc.py @@ -123,10 +123,9 @@ class PCovC(LinearClassifierMixin, _BasePCov): training data as the composite estimator. If None, ``sklearn.linear_model.LogisticRegression()`` is used as the classifier. - + scale_z: bool, default=True - whether to scale Z to zero mean and unit variance prior to - eigendecomposition. + whether to scale Z to zero prior to eigendecomposition. iterated_power : int or 'auto', default='auto' Number of iterations for the power method computed by @@ -219,7 +218,6 @@ def __init__( iterated_power="auto", random_state=None, whiten=False, - ): super().__init__( mixing=mixing, diff --git a/tests/test_pcovc.py b/tests/test_pcovc.py index b476e80e8..283403324 100644 --- a/tests/test_pcovc.py +++ b/tests/test_pcovc.py @@ -465,7 +465,8 @@ def test_default_ncomponents(self): def test_prefit_classifier(self): """Check that a passed prefit classifier is not modified in - PCovC's `fit` call.""" + PCovC's `fit` call. + """ classifier = LinearSVC() classifier.fit(self.X, self.Y) pcovc = self.model(mixing=0.5, classifier=classifier) From fb207629ddeb7bcf0d2f0d58fa847d8e6e96e546 Mon Sep 17 00:00:00 2001 From: Christian Jorgensen Date: Thu, 18 Sep 2025 13:23:40 -0500 Subject: [PATCH 07/11] fix docs --- src/skmatter/decomposition/_pcovc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/skmatter/decomposition/_pcovc.py b/src/skmatter/decomposition/_pcovc.py index 9b1660894..091df6023 100644 --- a/src/skmatter/decomposition/_pcovc.py +++ b/src/skmatter/decomposition/_pcovc.py @@ -125,7 +125,7 @@ class PCovC(LinearClassifierMixin, _BasePCov): is used as the classifier. scale_z: bool, default=True - whether to scale Z to zero prior to eigendecomposition. + whether to scale Z prior to eigendecomposition. iterated_power : int or 'auto', default='auto' Number of iterations for the power method computed by From cd2d27ba58f6d11b29294f6b9a2c24852ebcca92 Mon Sep 17 00:00:00 2001 From: Christian Jorgensen Date: Thu, 18 Sep 2025 13:24:57 -0500 Subject: [PATCH 08/11] One last docs fix --- src/skmatter/decomposition/_kernel_pcovc.py | 2 +- src/skmatter/decomposition/_pcovc.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/skmatter/decomposition/_kernel_pcovc.py b/src/skmatter/decomposition/_kernel_pcovc.py index c7a95fa26..7ceff5983 100644 --- a/src/skmatter/decomposition/_kernel_pcovc.py +++ b/src/skmatter/decomposition/_kernel_pcovc.py @@ -87,7 +87,7 @@ class KernelPCovC(LinearClassifierMixin, _BaseKPCov): is used as the classifier. scale_z: bool, default=True - whether to scale Z prior to eigendecomposition. + Whether to scale Z prior to eigendecomposition. kernel : {"linear", "poly", "rbf", "sigmoid", "precomputed"} or callable, default="linear" Kernel. diff --git a/src/skmatter/decomposition/_pcovc.py b/src/skmatter/decomposition/_pcovc.py index 091df6023..fd8abe566 100644 --- a/src/skmatter/decomposition/_pcovc.py +++ b/src/skmatter/decomposition/_pcovc.py @@ -125,7 +125,7 @@ class PCovC(LinearClassifierMixin, _BasePCov): is used as the classifier. scale_z: bool, default=True - whether to scale Z prior to eigendecomposition. + Whether to scale Z prior to eigendecomposition. iterated_power : int or 'auto', default='auto' Number of iterations for the power method computed by From 04cfab0a3642f36ad90aa33930b645109c9e7cba Mon Sep 17 00:00:00 2001 From: Christian Jorgensen Date: Thu, 18 Sep 2025 13:41:46 -0500 Subject: [PATCH 09/11] Adding tests --- tests/test_kernel_pcovc.py | 9 +++++++++ tests/test_pcovc.py | 11 +++++++++++ 2 files changed, 20 insertions(+) diff --git a/tests/test_kernel_pcovc.py b/tests/test_kernel_pcovc.py index 9b29b8437..bbfe8fbb3 100644 --- a/tests/test_kernel_pcovc.py +++ b/tests/test_kernel_pcovc.py @@ -327,6 +327,15 @@ def test_precomputed_classification(self): self.assertTrue(np.linalg.norm(t3 - t2) < self.error_tol) self.assertTrue(np.linalg.norm(t3 - t1) < self.error_tol) + def test_scale_z_parameter(self): + """Check that changing scale_z changes the eigendecomposition.""" + kpcovc_scaled = self.model(scale_z=True) + kpcovc_scaled.fit(self.X, self.Y) + + kpcovc_unscaled = self.model(scale_z=False) + kpcovc_unscaled.fit(self.X, self.Y) + assert not np.allclose(kpcovc_scaled.pkt_, kpcovc_unscaled.pkt_) + class KernelTests(KernelPCovCBaseTest): def test_kernel_types(self): diff --git a/tests/test_pcovc.py b/tests/test_pcovc.py index 283403324..883279b59 100644 --- a/tests/test_pcovc.py +++ b/tests/test_pcovc.py @@ -578,6 +578,17 @@ def test_incompatible_coef_shape(self): % (len(pcovc_multi.classes_), self.X.shape[1], cl_binary.coef_.shape), ) + def test_scale_z_parameter(self): + """Check that changing scale_z changes the eigendecomposition.""" + pcovc_scaled = self.model(scale_z=True) + pcovc_scaled.fit(self.X, self.Y) + + pcovc_unscaled = self.model(scale_z=False) + pcovc_unscaled.fit(self.X, self.Y) + assert not np.allclose( + pcovc_scaled.singular_values_, pcovc_unscaled.singular_values_ + ) + if __name__ == "__main__": unittest.main(verbosity=2) From d77d013a70388c35872d2e2e28416ae0db166b1f Mon Sep 17 00:00:00 2001 From: Christian Jorgensen Date: Fri, 19 Sep 2025 12:31:19 -0500 Subject: [PATCH 10/11] Fixed doctests --- src/skmatter/decomposition/_kernel_pcovc.py | 10 +++++----- src/skmatter/decomposition/_pcovc.py | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/skmatter/decomposition/_kernel_pcovc.py b/src/skmatter/decomposition/_kernel_pcovc.py index 7ceff5983..976946dd7 100644 --- a/src/skmatter/decomposition/_kernel_pcovc.py +++ b/src/skmatter/decomposition/_kernel_pcovc.py @@ -177,7 +177,7 @@ class KernelPCovC(LinearClassifierMixin, _BaseKPCov): >>> from sklearn.preprocessing import StandardScaler >>> X = np.array([[-2, 3, -1, 0], [2, 0, -3, 1], [3, 0, -1, 3], [2, -2, 1, 0]]) >>> X = StandardScaler().fit_transform(X) - >>> Y = np.array([[2], [0], [1], [2]]) + >>> Y = np.array([2, 0, 1, 2]) >>> kpcovc = KernelPCovC( ... mixing=0.1, ... n_components=2, @@ -187,10 +187,10 @@ class KernelPCovC(LinearClassifierMixin, _BaseKPCov): >>> kpcovc.fit(X, Y) KernelPCovC(gamma=1, kernel='rbf', mixing=0.1, n_components=2) >>> kpcovc.transform(X) - array([[-4.45970689e-01, 8.95327566e-06], - [ 4.52745933e-01, 5.54810948e-01], - [ 4.52881359e-01, -5.54708315e-01], - [-4.45921092e-01, -7.32157649e-05]]) + array([[-4.41692911e-01, 6.87831803e-06], + [ 4.47719340e-01, 5.47456981e-01], + [ 4.47850288e-01, -5.47360522e-01], + [-4.41645711e-01, -7.05197801e-05]]) >>> kpcovc.predict(X) array([2, 0, 1, 2]) >>> kpcovc.score(X, Y) diff --git a/src/skmatter/decomposition/_pcovc.py b/src/skmatter/decomposition/_pcovc.py index fd8abe566..e4f6698be 100644 --- a/src/skmatter/decomposition/_pcovc.py +++ b/src/skmatter/decomposition/_pcovc.py @@ -198,10 +198,10 @@ class PCovC(LinearClassifierMixin, _BasePCov): >>> pcovc.fit(X, Y) PCovC(mixing=0.1, n_components=2) >>> pcovc.transform(X) - array([[-0.4794854 , -0.46228114], - [ 1.9416966 , 0.2532831 ], - [-1.08744947, 0.89117784], - [-0.37476173, -0.6821798 ]]) + array([[-0.38989065, -0.21368409], + [ 1.55313271, 0.20273297], + [-0.87105559, 0.68233882], + [-0.29218647, -0.6713877 ]]) >>> pcovc.predict(X) array([0, 1, 2, 0]) """ # NoQa: E501 From f3a372fc808f368e5de21f4e17a060089d68a7b3 Mon Sep 17 00:00:00 2001 From: Christian Jorgensen Date: Tue, 23 Sep 2025 15:41:35 -0500 Subject: [PATCH 11/11] Changing default and adding warnings --- src/skmatter/decomposition/_kernel_pcovc.py | 32 ++++++++++++++++++- src/skmatter/decomposition/_pcovc.py | 34 +++++++++++++++++++-- 2 files changed, 63 insertions(+), 3 deletions(-) diff --git a/src/skmatter/decomposition/_kernel_pcovc.py b/src/skmatter/decomposition/_kernel_pcovc.py index 976946dd7..0b1b6e55a 100644 --- a/src/skmatter/decomposition/_kernel_pcovc.py +++ b/src/skmatter/decomposition/_kernel_pcovc.py @@ -1,3 +1,4 @@ +import warnings import numpy as np from sklearn import clone @@ -119,6 +120,14 @@ class KernelPCovC(LinearClassifierMixin, _BaseKPCov): and for matrix inversions. Must be of range [0.0, infinity). + z_mean_tol: float, default=1e-12 + Tolerance for the column means of Z. + Must be of range [0.0, infinity). + + z_var_tol: float, default=1.5 + Tolerance for the column variances of Z. + Must be of range [0.0, infinity). + n_jobs : int, default=None The number of parallel jobs to run. :obj:`None` means 1 unless in a :obj:`joblib.parallel_backend` context. @@ -203,7 +212,7 @@ def __init__( n_components=None, svd_solver="auto", classifier=None, - scale_z=True, + scale_z=False, kernel="linear", gamma=None, degree=3, @@ -212,6 +221,8 @@ def __init__( center=False, fit_inverse_transform=False, tol=1e-12, + z_mean_tol=1e-12, + z_var_tol=1.5, n_jobs=None, iterated_power="auto", random_state=None, @@ -234,6 +245,8 @@ def __init__( ) self.classifier = classifier self.scale_z = scale_z + self.z_mean_tol = z_mean_tol + self.z_var_tol = z_var_tol def fit(self, X, Y, W=None): r"""Fit the model with X and Y. @@ -331,6 +344,23 @@ def fit(self, X, Y, W=None): if self.scale_z: Z = StandardFlexibleScaler().fit_transform(Z) + self.z_means_ = np.mean(Z, axis=0) + self.z_vars_ = np.var(Z, axis=0) + + if np.max(np.abs(self.z_means_)) > self.z_mean_tol: + warnings.warn( + "This class does not automatically center Z, and the column means " + "of Z are greater than the supplied tolerance. We recommend scaling " + "Z (and the weights) by setting `scale_z=True`." + ) + + if np.max(self.z_vars_) > self.z_var_tol: + warnings.warn( + "This class does not automatically scale Z, and the column variances " + "of Z are greater than the supplied tolerance. We recommend scaling " + "Z (and the weights) by setting `scale_z=True`. " + ) + self._fit(K, Z, W) self.ptk_ = self.pt__ @ K diff --git a/src/skmatter/decomposition/_pcovc.py b/src/skmatter/decomposition/_pcovc.py index e4f6698be..8f2b93555 100644 --- a/src/skmatter/decomposition/_pcovc.py +++ b/src/skmatter/decomposition/_pcovc.py @@ -17,6 +17,7 @@ from skmatter.decomposition import _BasePCov from skmatter.utils import check_cl_fit from skmatter.preprocessing import StandardFlexibleScaler +import warnings class PCovC(LinearClassifierMixin, _BasePCov): @@ -124,7 +125,7 @@ class PCovC(LinearClassifierMixin, _BasePCov): If None, ``sklearn.linear_model.LogisticRegression()`` is used as the classifier. - scale_z: bool, default=True + scale_z: bool, default=False Whether to scale Z prior to eigendecomposition. iterated_power : int or 'auto', default='auto' @@ -147,6 +148,14 @@ class PCovC(LinearClassifierMixin, _BasePCov): Tolerance for singular values computed by svd_solver == 'arpack'. Must be of range [0.0, infinity). + z_mean_tol: float, default=1e-12 + Tolerance for the column means of Z. + Must be of range [0.0, infinity). + + z_var_tol: float, default=1.5 + Tolerance for the column variances of Z. + Must be of range [0.0, infinity). + space: {'feature', 'sample', 'auto'}, default='auto' whether to compute the PCovC in ``sample`` or ``feature`` space. The default is equal to ``sample`` when :math:`{n_{samples} < n_{features}}` @@ -212,9 +221,11 @@ def __init__( n_components=None, svd_solver="auto", tol=1e-12, + z_mean_tol=1e-12, + z_var_tol=1.5, space="auto", classifier=None, - scale_z=True, + scale_z=False, iterated_power="auto", random_state=None, whiten=False, @@ -231,6 +242,8 @@ def __init__( ) self.classifier = classifier self.scale_z = scale_z + self.z_mean_tol = z_mean_tol + self.z_var_tol = z_var_tol def fit(self, X, Y, W=None): r"""Fit the model with X and Y. @@ -312,6 +325,23 @@ def fit(self, X, Y, W=None): Z = z_scaler.transform(Z) W /= z_scaler.scale_.reshape(1, -1) + self.z_means_ = np.mean(Z, axis=0) + self.z_vars_ = np.var(Z, axis=0) + + if np.max(np.abs(self.z_means_)) > self.z_mean_tol: + warnings.warn( + "This class does not automatically center Z, and the column means " + "of Z are greater than the supplied tolerance. We recommend scaling " + "Z (and the weights) by setting `scale_z=True`." + ) + + if np.max(self.z_vars_) > self.z_var_tol: + warnings.warn( + "This class does not automatically scale Z, and the column variances " + "of Z are greater than the supplied tolerance. We recommend scaling " + "Z (and the weights) by setting `scale_z=True`." + ) + if self.space_ == "feature": self._fit_feature_space(X, Y, Z) else: