1
1
import numpy as np
2
2
3
3
from sklearn import clone
4
+ from sklearn .multioutput import MultiOutputClassifier
4
5
from sklearn .svm import LinearSVC
5
6
from sklearn .discriminant_analysis import LinearDiscriminantAnalysis
6
7
from sklearn .linear_model import (
24
25
class KernelPCovC (LinearClassifierMixin , _BaseKPCov ):
25
26
r"""Kernel Principal Covariates Classification (KPCovC).
26
27
27
- KPCovC is a modification on the PrincipalCovariates Classification
28
+ KPCovC is a modification on the Principal Covariates Classification
28
29
proposed in [Jorgensen2025]_. It determines a latent-space projection
29
30
:math:`\mathbf{T}` which minimizes a combined loss in supervised and unsupervised
30
31
tasks in the reproducing kernel Hilbert space (RKHS).
@@ -52,6 +53,9 @@ class KernelPCovC(LinearClassifierMixin, _BaseKPCov):
52
53
53
54
n_components == n_samples
54
55
56
+ n_outputs : int
57
+ The number of outputs when ``fit`` is performed.
58
+
55
59
svd_solver : {'auto', 'full', 'arpack', 'randomized'}, default='auto'
56
60
If auto :
57
61
The solver is selected by a default policy based on `X.shape` and
@@ -78,13 +82,14 @@ class KernelPCovC(LinearClassifierMixin, _BaseKPCov):
78
82
- ``sklearn.linear_model.LogisticRegressionCV()``
79
83
- ``sklearn.svm.LinearSVC()``
80
84
- ``sklearn.discriminant_analysis.LinearDiscriminantAnalysis()``
85
+ - ``sklearn.multioutput.MultiOutputClassifier()``
81
86
- ``sklearn.linear_model.RidgeClassifier()``
82
87
- ``sklearn.linear_model.RidgeClassifierCV()``
83
88
- ``sklearn.linear_model.Perceptron()``
84
89
85
90
If a pre-fitted classifier is provided, it is used to compute :math:`{\mathbf{Z}}`.
86
- If None, ``sklearn.linear_model.LogisticRegression()``
87
- is used as the classifier .
91
+ If None and ``n_outputs < 2`` , ``sklearn.linear_model.LogisticRegression()`` is used.
92
+ If None and ``n_outputs == 2``, ``sklearn.multioutput.MultiOutputClassifier()`` is used .
88
93
89
94
kernel : {"linear", "poly", "rbf", "sigmoid", "precomputed"} or callable, default="linear"
90
95
Kernel.
@@ -132,6 +137,9 @@ class KernelPCovC(LinearClassifierMixin, _BaseKPCov):
132
137
133
138
Attributes
134
139
----------
140
+ n_outputs : int
141
+ The number of outputs when ``fit`` is performed.
142
+
135
143
classifier : estimator object
136
144
The linear classifier passed for fitting. If pre-fitted, it is assummed
137
145
to be fit on a precomputed kernel :math:`\mathbf{K}` and :math:`\mathbf{Y}`.
@@ -268,9 +276,11 @@ def fit(self, X, Y, W=None):
268
276
self: object
269
277
Returns the instance itself.
270
278
"""
271
- X , Y = validate_data (self , X , Y , y_numeric = False )
279
+ X , Y = validate_data (self , X , Y , multi_output = True , y_numeric = False )
280
+
272
281
check_classification_targets (Y )
273
282
self .classes_ = np .unique (Y )
283
+ self .n_outputs = 1 if Y .ndim == 1 else Y .shape [1 ]
274
284
275
285
super ().fit (X )
276
286
@@ -285,6 +295,7 @@ def fit(self, X, Y, W=None):
285
295
LogisticRegressionCV ,
286
296
LinearSVC ,
287
297
LinearDiscriminantAnalysis ,
298
+ MultiOutputClassifier ,
288
299
RidgeClassifier ,
289
300
RidgeClassifierCV ,
290
301
SGDClassifier ,
@@ -300,28 +311,37 @@ def fit(self, X, Y, W=None):
300
311
", or `precomputed`"
301
312
)
302
313
303
- if self .classifier != "precomputed" :
304
- if self .classifier is None :
305
- classifier = LogisticRegression ()
306
- else :
307
- classifier = self .classifier
314
+ multioutput = self .n_outputs != 1
315
+ precomputed = self .classifier == "precomputed"
308
316
309
- # for convergence warnings
310
- if hasattr (classifier , "max_iter" ) and (
311
- classifier .max_iter is None or classifier .max_iter < 500
312
- ):
313
- classifier .max_iter = 500
317
+ if self .classifier is None or precomputed :
318
+ # used as the default classifier for subsequent computations
319
+ classifier = (
320
+ MultiOutputClassifier (LogisticRegression ())
321
+ if multioutput
322
+ else LogisticRegression ()
323
+ )
324
+ else :
325
+ classifier = self .classifier
314
326
315
- # Check if classifier is fitted; if not, fit with precomputed K
316
- self .z_classifier_ = check_cl_fit (classifier , K , Y )
317
- W = self .z_classifier_ .coef_ .T .reshape (K .shape [1 ], - 1 )
327
+ if hasattr (classifier , "max_iter" ) and (
328
+ classifier .max_iter is None or classifier .max_iter < 500
329
+ ):
330
+ classifier .max_iter = 500
331
+
332
+ if precomputed and W is None :
333
+ _ = clone (classifier ).fit (K , Y )
334
+ if multioutput :
335
+ W = np .hstack ([_ .coef_ .T for _ in _ .estimators_ ])
336
+ else :
337
+ W = _ .coef_ .T
318
338
319
339
else :
320
- # If precomputed, use default classifier to predict Y from T
321
- classifier = LogisticRegression ( max_iter = 500 )
322
- if W is None :
323
- W = LogisticRegression (). fit ( K , Y ). coef_ . T
324
- W = W . reshape ( K . shape [ 1 ], - 1 )
340
+ self . z_classifier_ = check_cl_fit ( classifier , K , Y )
341
+ if multioutput :
342
+ W = np . hstack ([ est_ . coef_ . T for est_ in self . z_classifier_ . estimators_ ])
343
+ else :
344
+ W = self . z_classifier_ . coef_ . T
325
345
326
346
Z = K @ W
327
347
@@ -334,10 +354,16 @@ def fit(self, X, Y, W=None):
334
354
335
355
self .classifier_ = clone (classifier ).fit (K @ self .pkt_ , Y )
336
356
337
- self .ptz_ = self .classifier_ .coef_ .T
338
- self .pkz_ = self .pkt_ @ self .ptz_
357
+ if multioutput :
358
+ self .ptz_ = np .hstack (
359
+ [est_ .coef_ .T for est_ in self .classifier_ .estimators_ ]
360
+ )
361
+ self .pkz_ = self .pkt_ @ self .ptz_
362
+ else :
363
+ self .ptz_ = self .classifier_ .coef_ .T
364
+ self .pkz_ = self .pkt_ @ self .ptz_
339
365
340
- if len ( Y . shape ) == 1 and type_of_target (Y ) == "binary" :
366
+ if not multioutput and type_of_target (Y ) == "binary" :
341
367
self .pkz_ = self .pkz_ .reshape (
342
368
K .shape [1 ],
343
369
)
@@ -346,6 +372,7 @@ def fit(self, X, Y, W=None):
346
372
)
347
373
348
374
self .components_ = self .pkt_ .T # for sklearn compatibility
375
+
349
376
return self
350
377
351
378
def predict (self , X = None , T = None ):
@@ -425,9 +452,12 @@ def decision_function(self, X=None, T=None):
425
452
426
453
Returns
427
454
-------
428
- Z : numpy.ndarray, shape (n_samples,) or (n_samples, n_classes)
455
+ Z : numpy.ndarray, shape (n_samples,) or (n_samples, n_classes), or a list of \
456
+ n_outputs such arrays if n_outputs > 1
429
457
Confidence scores. For binary classification, has shape `(n_samples,)`,
430
- for multiclass classification, has shape `(n_samples, n_classes)`
458
+ for multiclass classification, has shape `(n_samples, n_classes)`.
459
+ If n_outputs > 1, the list can contain arrays with differing shapes
460
+ depending on the number of classes in each output of Y.
431
461
"""
432
462
check_is_fitted (self , attributes = ["pkz_" , "ptz_" ])
433
463
@@ -440,9 +470,21 @@ def decision_function(self, X=None, T=None):
440
470
if self .center :
441
471
K = self .centerer_ .transform (K )
442
472
443
- # Or self.classifier_.decision_function(K @ self.pxt_)
444
- return K @ self .pkz_ + self .classifier_ .intercept_
473
+ if self .n_outputs == 1 :
474
+ # Or self.classifier_.decision_function(K @ self.pkt_)
475
+ return K @ self .pkz_ + self .classifier_ .intercept_
476
+ else :
477
+ return [
478
+ est_ .decision_function (K @ self .pkt_ )
479
+ for est_ in self .classifier_ .estimators_
480
+ ]
445
481
446
482
else :
447
483
T = check_array (T )
448
- return T @ self .ptz_ + self .classifier_ .intercept_
484
+
485
+ if self .n_outputs == 1 :
486
+ T @ self .ptz_ + self .classifier_ .intercept_
487
+ else :
488
+ return [
489
+ est_ .decision_function (T ) for est_ in self .classifier_ .estimators_
490
+ ]
0 commit comments