44import numpy as np
55from sklearn import exceptions
66from sklearn .calibration import LinearSVC
7- from sklearn .datasets import load_breast_cancer as get_dataset
7+ from sklearn .datasets import load_iris as get_dataset
88from sklearn .decomposition import PCA
9- from sklearn .linear_model import LogisticRegression
9+ from sklearn .linear_model import LogisticRegression , RidgeClassifier
1010from sklearn .naive_bayes import GaussianNB
1111from sklearn .preprocessing import StandardScaler
1212from sklearn .utils .validation import check_X_y
@@ -75,11 +75,16 @@ def test_simple_reconstruction(self):
7575 def test_simple_prediction (self ):
7676 """
7777 Check that PCovC with a full eigendecomposition at mixing=0
78- can fully reconstruct the input properties .
78+ can reproduce a linear classification result .
7979 """
8080 for space in ["feature" , "sample" , "auto" ]:
8181 with self .subTest (space = space ):
82- pcovc = self .model (mixing = 0.0 , n_components = 2 , space = space )
82+ pcovc = self .model (
83+ mixing = 0.0 ,
84+ classifier = RidgeClassifier (),
85+ n_components = 2 ,
86+ space = space ,
87+ )
8388
8489 pcovc .classifier .fit (self .X , self .Y )
8590 Yhat = pcovc .classifier .predict (self .X )
@@ -172,8 +177,10 @@ def test_select_sample_space(self):
172177 """
173178 pcovc = self .model (n_components = 2 , tol = 1e-12 )
174179
175- n_samples = self .X .shape [1 ] - 1
176- pcovc .fit (self .X [:n_samples ], self .Y [:n_samples ])
180+ n_samples = 2
181+
182+ # select range where there are at least 2 classes in Y
183+ pcovc .fit (self .X [49 : 49 + n_samples ], self .Y [49 : 49 + n_samples ])
177184
178185 self .assertTrue (pcovc .space_ == "sample" )
179186
@@ -289,7 +296,8 @@ def test_bad_n_components(self):
289296 pcovc = self .model (
290297 n_components = "mle" , classifier = LinearSVC (), svd_solver = "full"
291298 )
292- pcovc .fit (self .X [:20 ], self .Y [:20 ])
299+ # select range where there are at least 2 classes in Y
300+ pcovc .fit (self .X [49 :51 ], self .Y [49 :51 ])
293301 self .assertEqual (
294302 str (cm .exception ),
295303 "n_components='mle' is only supported if n_samples >= n_features" ,
@@ -395,7 +403,7 @@ def test_T_shape(self):
395403 """Check that PCovC returns a latent space projection consistent with
396404 the shape of the input matrix.
397405 """
398- n_components = 5
406+ n_components = 4
399407 pcovc = self .model (n_components = n_components , tol = 1e-12 )
400408 pcovc .fit (self .X , self .Y )
401409 T = pcovc .transform (self .X )
@@ -414,27 +422,21 @@ def test_Z_shape(self):
414422 """Check that PCovC returns an evidence matrix consistent with the
415423 number of samples and the number of classes.
416424 """
417- n_components = 5
425+ n_components = 2
418426 pcovc = self .model (n_components = n_components , tol = 1e-12 )
419- pcovc .fit (self .X , self .Y )
427+ pcovc .fit (self .X , np . random . randint ( 0 , 2 , size = self .X . shape [ 0 ]) )
420428
421429 # Shape (n_samples, ) for binary classifcation
422- Z = pcovc .decision_function (self .X )
423-
424- self .assertTrue (Z .ndim == 1 )
425- self .assertTrue (Z .shape [0 ] == self .X .shape [0 ])
426-
427- # Modify Y so that it now contains three classes
428- Y_multiclass = self .Y .copy ()
429- Y_multiclass [0 ] = 2
430- pcovc .fit (self .X , Y_multiclass )
431- n_classes = len (np .unique (Y_multiclass ))
430+ Z_binary = pcovc .decision_function (self .X )
431+ self .assertEqual (Z_binary .ndim , 1 )
432+ self .assertEqual (Z_binary .shape [0 ], self .X .shape [0 ])
432433
433434 # Shape (n_samples, n_classes) for multiclass classification
434- Z = pcovc .decision_function (self .X )
435+ pcovc .fit (self .X , self .Y )
436+ Z_multi = pcovc .decision_function (self .X )
435437
436- self .assertTrue ( Z .ndim == 2 )
437- self .assertTrue (( Z .shape [ 0 ], Z . shape [ 1 ]) == (self .X .shape [0 ], n_classes ))
438+ self .assertEqual ( Z_multi .ndim , 2 )
439+ self .assertEqual ( Z_multi .shape , (self .X .shape [0 ], len ( pcovc . classes_ ) ))
438440
439441 def test_decision_function (self ):
440442 """Check that PCovC's decision_function works when only T is
@@ -464,13 +466,11 @@ def test_prefit_classifier(self):
464466 pcovc = self .model (mixing = 0.5 , classifier = classifier )
465467 pcovc .fit (self .X , self .Y )
466468
467- Z_classifier = classifier .decision_function (self .X ). reshape ( self . X . shape [ 0 ], - 1 )
468- W_classifier = classifier .coef_ .T . reshape ( self . X . shape [ 1 ], - 1 )
469+ Z_classifier = classifier .decision_function (self .X )
470+ W_classifier = classifier .coef_ .T
469471
470- Z_pcovc = pcovc .z_classifier_ .decision_function (self .X ).reshape (
471- self .X .shape [0 ], - 1
472- )
473- W_pcovc = pcovc .z_classifier_ .coef_ .T .reshape (self .X .shape [1 ], - 1 )
472+ Z_pcovc = pcovc .z_classifier_ .decision_function (self .X )
473+ W_pcovc = pcovc .z_classifier_ .coef_ .T
474474
475475 self .assertTrue (np .allclose (Z_classifier , Z_pcovc ))
476476 self .assertTrue (np .allclose (W_classifier , W_pcovc ))
@@ -479,7 +479,7 @@ def test_precomputed_classification(self):
479479 classifier = LogisticRegression ()
480480 classifier .fit (self .X , self .Y )
481481
482- W = classifier .coef_ .T . reshape ( self . X . shape [ 1 ], - 1 )
482+ W = classifier .coef_ .T
483483 pcovc1 = self .model (mixing = 0.5 , classifier = "precomputed" , n_components = 1 )
484484 pcovc1 .fit (self .X , self .Y , W )
485485 t1 = pcovc1 .transform (self .X )
@@ -544,37 +544,32 @@ def test_none_classifier(self):
544544 self .assertTrue (pcovc .classifier_ is not None )
545545
546546 def test_incompatible_coef_shape (self ):
547- classifier1 = LogisticRegression ()
548-
549- # Modify Y to be multiclass
550- Y_multiclass = self .Y .copy ()
551- Y_multiclass [0 ] = 2
552-
553- classifier1 .fit (self .X , Y_multiclass )
554- pcovc1 = self .model (mixing = 0.5 , classifier = classifier1 )
547+ cl_multi = LogisticRegression ()
548+ cl_multi .fit (self .X , self .Y )
549+ pcovc_binary = self .model (mixing = 0.5 , classifier = cl_multi )
555550
556551 # Binary classification shape mismatch
557552 with self .assertRaises (ValueError ) as cm :
558- pcovc1 .fit (self .X , self .Y )
553+ pcovc_binary .fit (self .X , np . random . randint ( 0 , 2 , size = self .X . shape [ 0 ]) )
559554 self .assertEqual (
560555 str (cm .exception ),
561556 "For binary classification, expected classifier coefficients "
562557 "to have shape (1, %d) but got shape %r"
563- % (self .X .shape [1 ], classifier1 .coef_ .shape ),
558+ % (self .X .shape [1 ], cl_multi .coef_ .shape ),
564559 )
565560
566- classifier2 = LogisticRegression ()
567- classifier2 .fit (self .X , self .Y )
568- pcovc2 = self .model (mixing = 0.5 , classifier = classifier2 )
561+ cl_binary = LogisticRegression ()
562+ cl_binary .fit (self .X , np . random . randint ( 0 , 2 , size = self .X . shape [ 0 ]) )
563+ pcovc_multi = self .model (mixing = 0.5 , classifier = cl_binary )
569564
570565 # Multiclass classification shape mismatch
571566 with self .assertRaises (ValueError ) as cm :
572- pcovc2 .fit (self .X , Y_multiclass )
567+ pcovc_multi .fit (self .X , self . Y )
573568 self .assertEqual (
574569 str (cm .exception ),
575570 "For multiclass classification, expected classifier coefficients "
576571 "to have shape (%d, %d) but got shape %r"
577- % (len (np . unique ( Y_multiclass )) , self .X .shape [1 ], classifier2 .coef_ .shape ),
572+ % (len (pcovc_multi . classes_ ) , self .X .shape [1 ], cl_binary .coef_ .shape ),
578573 )
579574
580575
0 commit comments