Skip to content

Commit 46d2fca

Browse files
committed
Changes to PCovC tests
1 parent 82ff826 commit 46d2fca

File tree

1 file changed

+40
-45
lines changed

1 file changed

+40
-45
lines changed

tests/test_pcovc.py

Lines changed: 40 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@
44
import numpy as np
55
from sklearn import exceptions
66
from sklearn.calibration import LinearSVC
7-
from sklearn.datasets import load_breast_cancer as get_dataset
7+
from sklearn.datasets import load_iris as get_dataset
88
from sklearn.decomposition import PCA
9-
from sklearn.linear_model import LogisticRegression
9+
from sklearn.linear_model import LogisticRegression, RidgeClassifier
1010
from sklearn.naive_bayes import GaussianNB
1111
from sklearn.preprocessing import StandardScaler
1212
from sklearn.utils.validation import check_X_y
@@ -75,11 +75,16 @@ def test_simple_reconstruction(self):
7575
def test_simple_prediction(self):
7676
"""
7777
Check that PCovC with a full eigendecomposition at mixing=0
78-
can fully reconstruct the input properties.
78+
can reproduce a linear classification result.
7979
"""
8080
for space in ["feature", "sample", "auto"]:
8181
with self.subTest(space=space):
82-
pcovc = self.model(mixing=0.0, n_components=2, space=space)
82+
pcovc = self.model(
83+
mixing=0.0,
84+
classifier=RidgeClassifier(),
85+
n_components=2,
86+
space=space,
87+
)
8388

8489
pcovc.classifier.fit(self.X, self.Y)
8590
Yhat = pcovc.classifier.predict(self.X)
@@ -172,8 +177,10 @@ def test_select_sample_space(self):
172177
"""
173178
pcovc = self.model(n_components=2, tol=1e-12)
174179

175-
n_samples = self.X.shape[1] - 1
176-
pcovc.fit(self.X[:n_samples], self.Y[:n_samples])
180+
n_samples = 2
181+
182+
# select range where there are at least 2 classes in Y
183+
pcovc.fit(self.X[49 : 49 + n_samples], self.Y[49 : 49 + n_samples])
177184

178185
self.assertTrue(pcovc.space_ == "sample")
179186

@@ -289,7 +296,8 @@ def test_bad_n_components(self):
289296
pcovc = self.model(
290297
n_components="mle", classifier=LinearSVC(), svd_solver="full"
291298
)
292-
pcovc.fit(self.X[:20], self.Y[:20])
299+
# select range where there are at least 2 classes in Y
300+
pcovc.fit(self.X[49:51], self.Y[49:51])
293301
self.assertEqual(
294302
str(cm.exception),
295303
"n_components='mle' is only supported if n_samples >= n_features",
@@ -395,7 +403,7 @@ def test_T_shape(self):
395403
"""Check that PCovC returns a latent space projection consistent with
396404
the shape of the input matrix.
397405
"""
398-
n_components = 5
406+
n_components = 4
399407
pcovc = self.model(n_components=n_components, tol=1e-12)
400408
pcovc.fit(self.X, self.Y)
401409
T = pcovc.transform(self.X)
@@ -414,27 +422,21 @@ def test_Z_shape(self):
414422
"""Check that PCovC returns an evidence matrix consistent with the
415423
number of samples and the number of classes.
416424
"""
417-
n_components = 5
425+
n_components = 2
418426
pcovc = self.model(n_components=n_components, tol=1e-12)
419-
pcovc.fit(self.X, self.Y)
427+
pcovc.fit(self.X, np.random.randint(0, 2, size=self.X.shape[0]))
420428

421429
# Shape (n_samples, ) for binary classifcation
422-
Z = pcovc.decision_function(self.X)
423-
424-
self.assertTrue(Z.ndim == 1)
425-
self.assertTrue(Z.shape[0] == self.X.shape[0])
426-
427-
# Modify Y so that it now contains three classes
428-
Y_multiclass = self.Y.copy()
429-
Y_multiclass[0] = 2
430-
pcovc.fit(self.X, Y_multiclass)
431-
n_classes = len(np.unique(Y_multiclass))
430+
Z_binary = pcovc.decision_function(self.X)
431+
self.assertEqual(Z_binary.ndim, 1)
432+
self.assertEqual(Z_binary.shape[0], self.X.shape[0])
432433

433434
# Shape (n_samples, n_classes) for multiclass classification
434-
Z = pcovc.decision_function(self.X)
435+
pcovc.fit(self.X, self.Y)
436+
Z_multi = pcovc.decision_function(self.X)
435437

436-
self.assertTrue(Z.ndim == 2)
437-
self.assertTrue((Z.shape[0], Z.shape[1]) == (self.X.shape[0], n_classes))
438+
self.assertEqual(Z_multi.ndim, 2)
439+
self.assertEqual(Z_multi.shape, (self.X.shape[0], len(pcovc.classes_)))
438440

439441
def test_decision_function(self):
440442
"""Check that PCovC's decision_function works when only T is
@@ -464,13 +466,11 @@ def test_prefit_classifier(self):
464466
pcovc = self.model(mixing=0.5, classifier=classifier)
465467
pcovc.fit(self.X, self.Y)
466468

467-
Z_classifier = classifier.decision_function(self.X).reshape(self.X.shape[0], -1)
468-
W_classifier = classifier.coef_.T.reshape(self.X.shape[1], -1)
469+
Z_classifier = classifier.decision_function(self.X)
470+
W_classifier = classifier.coef_.T
469471

470-
Z_pcovc = pcovc.z_classifier_.decision_function(self.X).reshape(
471-
self.X.shape[0], -1
472-
)
473-
W_pcovc = pcovc.z_classifier_.coef_.T.reshape(self.X.shape[1], -1)
472+
Z_pcovc = pcovc.z_classifier_.decision_function(self.X)
473+
W_pcovc = pcovc.z_classifier_.coef_.T
474474

475475
self.assertTrue(np.allclose(Z_classifier, Z_pcovc))
476476
self.assertTrue(np.allclose(W_classifier, W_pcovc))
@@ -479,7 +479,7 @@ def test_precomputed_classification(self):
479479
classifier = LogisticRegression()
480480
classifier.fit(self.X, self.Y)
481481

482-
W = classifier.coef_.T.reshape(self.X.shape[1], -1)
482+
W = classifier.coef_.T
483483
pcovc1 = self.model(mixing=0.5, classifier="precomputed", n_components=1)
484484
pcovc1.fit(self.X, self.Y, W)
485485
t1 = pcovc1.transform(self.X)
@@ -544,37 +544,32 @@ def test_none_classifier(self):
544544
self.assertTrue(pcovc.classifier_ is not None)
545545

546546
def test_incompatible_coef_shape(self):
547-
classifier1 = LogisticRegression()
548-
549-
# Modify Y to be multiclass
550-
Y_multiclass = self.Y.copy()
551-
Y_multiclass[0] = 2
552-
553-
classifier1.fit(self.X, Y_multiclass)
554-
pcovc1 = self.model(mixing=0.5, classifier=classifier1)
547+
cl_multi = LogisticRegression()
548+
cl_multi.fit(self.X, self.Y)
549+
pcovc_binary = self.model(mixing=0.5, classifier=cl_multi)
555550

556551
# Binary classification shape mismatch
557552
with self.assertRaises(ValueError) as cm:
558-
pcovc1.fit(self.X, self.Y)
553+
pcovc_binary.fit(self.X, np.random.randint(0, 2, size=self.X.shape[0]))
559554
self.assertEqual(
560555
str(cm.exception),
561556
"For binary classification, expected classifier coefficients "
562557
"to have shape (1, %d) but got shape %r"
563-
% (self.X.shape[1], classifier1.coef_.shape),
558+
% (self.X.shape[1], cl_multi.coef_.shape),
564559
)
565560

566-
classifier2 = LogisticRegression()
567-
classifier2.fit(self.X, self.Y)
568-
pcovc2 = self.model(mixing=0.5, classifier=classifier2)
561+
cl_binary = LogisticRegression()
562+
cl_binary.fit(self.X, np.random.randint(0, 2, size=self.X.shape[0]))
563+
pcovc_multi = self.model(mixing=0.5, classifier=cl_binary)
569564

570565
# Multiclass classification shape mismatch
571566
with self.assertRaises(ValueError) as cm:
572-
pcovc2.fit(self.X, Y_multiclass)
567+
pcovc_multi.fit(self.X, self.Y)
573568
self.assertEqual(
574569
str(cm.exception),
575570
"For multiclass classification, expected classifier coefficients "
576571
"to have shape (%d, %d) but got shape %r"
577-
% (len(np.unique(Y_multiclass)), self.X.shape[1], classifier2.coef_.shape),
572+
% (len(pcovc_multi.classes_), self.X.shape[1], cl_binary.coef_.shape),
578573
)
579574

580575

0 commit comments

Comments
 (0)