28
28
# did a search of all classifiers that inherit from MultiOutputMixin - none of them implement
29
29
# decision function, so I don't think we need to inherit
30
30
31
+
31
32
class PCovC (LinearClassifierMixin , _BasePCov ):
32
33
r"""Principal Covariates Classification (PCovC).
33
34
@@ -178,16 +179,11 @@ class PCovC(LinearClassifierMixin, _BasePCov):
178
179
179
180
pxz_ : ndarray of size :math:`({n_{features}, })`, :math:`({n_{features}, n_{classes}})`
180
181
the projector, or weights, from the input space :math:`\mathbf{X}`
181
- to the class confidence scores :math:`\mathbf{Z}`. In the multioutput case,
182
- has shape , :math:`({n_{features}, n_{classes}*n_{outputs}})`, a flattened form
183
- of a 3D tensor.
182
+ to the class confidence scores :math:`\mathbf{Z}`.
184
183
185
- ptz_ : ndarray of size :math:`({n_{components}, })`, :math:`({n_{components}, n_{classes}})` \
186
- or :math:`({n_{components}, n_{classes}*n_{outputs}})`
187
- the projector, or weights, from the latent-space projection
188
- :math:`\mathbf{T}` to the class confidence scores :math:`\mathbf{Z}`.
189
- In the multioutput case, has shape , :math:`({n_{components}, n_{classes}*n_{outputs}})`,
190
- a flattened form of a 3D tensor.
184
+ ptz_ : ndarray of size :math:`({n_{components}, })`, :math:`({n_{components}, n_{classes}})`
185
+ the projector, or weights, from from the latent-space projection
186
+ :math:`\mathbf{T}` to the class confidence scores :math:`\mathbf{Z}`.
191
187
192
188
explained_variance_ : numpy.ndarray of shape (n_components,)
193
189
The amount of variance explained by each of the selected components.
@@ -279,7 +275,7 @@ def fit(self, X, Y, W=None):
279
275
`` W = np.hstack([est_.coef_.T for est_ in classifier.estimators_])``.
280
276
"""
281
277
X , Y = validate_data (self , X , Y , multi_output = True , y_numeric = False )
282
-
278
+
283
279
check_classification_targets (Y )
284
280
self .classes_ = np .unique (Y )
285
281
self .n_outputs = 1 if Y .ndim == 1 else Y .shape [1 ]
@@ -305,33 +301,51 @@ def fit(self, X, Y, W=None):
305
301
"Classifier must be an instance of `"
306
302
f"{ '`, `' .join (c .__name__ for c in compatible_classifiers )} `"
307
303
", or `precomputed`"
308
- )
304
+ )
309
305
310
- # if self.n_outputs == 1:
311
- # classifier = LogisticRegression()
312
- # else:
313
- # classifier = MultiOutputClassifier(estimator=LogisticRegression())
306
+ if self .n_outputs == 1 and isinstance (self .classifier , MultiOutputClassifier ):
307
+ raise ValueError (
308
+ "Classifier cannot be an instance of `MultiOutputClassifier` when Y is 1D"
309
+ )
310
+
311
+ if (
312
+ self .n_outputs != 1
313
+ and self .classifier not in ["precomputed" , None ]
314
+ and not (
315
+ isinstance (self .classifier , MultiOutputClassifier )
316
+ or self .classifier == "precomputed"
317
+ )
318
+ ):
319
+ raise ValueError (
320
+ "Classifier must be an instance of `MultiOutputClassifier` when Y is 2D"
321
+ )
314
322
315
- # if self.classifier == "precomputed":
316
-
323
+ if self .n_outputs == 1 :
324
+ if self .classifier != "precomputed" :
325
+ classifier = self .classifier or LogisticRegression ()
326
+ self .z_classifier_ = check_cl_fit (classifier , X , Y )
327
+ W = self .z_classifier_ .coef_ .T
317
328
318
- if self .classifier != "precomputed" :
319
- if self .classifier is None :
320
- classifier = LogisticRegression ()
321
329
else :
322
- classifier = self . classifier
323
-
324
- self . z_classifier_ = check_cl_fit ( classifier , X , Y )
325
- W = self . z_classifier_ .coef_ .T
330
+ # to be used later on as the classifier fit between T and Y
331
+ classifier = LogisticRegression ()
332
+ if W is None :
333
+ W = clone ( classifier ). fit ( X , Y ) .coef_ .T
326
334
327
335
else :
328
- # If precomputed, use default classifier to predict Y from T
329
- classifier = LogisticRegression ()
330
- if W is None :
331
- W = LogisticRegression ().fit (X , Y ).coef_ .T
336
+ if self .classifier != "precomputed" :
337
+ classifier = self .classifier or MultiOutputClassifier (
338
+ estimator = LogisticRegression ()
339
+ )
340
+ self .z_classifier_ = check_cl_fit (classifier , X , Y )
341
+ W = np .hstack ([est_ .coef_ .T for est_ in self .z_classifier_ .estimators_ ])
332
342
333
- print (f"X: { X .shape } " )
334
- print (f"W: { W .shape } " )
343
+ else :
344
+ # to be used later on as the classifier fit between T and Y
345
+ classifier = MultiOutputClassifier (estimator = LogisticRegression ())
346
+ if W is None :
347
+ _ = clone (classifier ).fit (X , Y )
348
+ W = np .hstack ([_ .coef_ .T for _ in _ .estimators_ ])
335
349
336
350
Z = X @ W
337
351
@@ -344,7 +358,11 @@ def fit(self, X, Y, W=None):
344
358
# classifier and steal weights to get pxz and ptz
345
359
self .classifier_ = clone (classifier ).fit (X @ self .pxt_ , Y )
346
360
347
- if isinstance (self .classifier_ , MultiOutputClassifier ):
361
+ if self .n_outputs == 1 :
362
+ self .ptz_ = self .classifier_ .coef_ .T
363
+ # print(self.ptz_.shape)
364
+ self .pxz_ = self .pxt_ @ self .ptz_
365
+ else :
348
366
self .ptz_ = np .hstack (
349
367
[est_ .coef_ .T for est_ in self .classifier_ .estimators_ ]
350
368
)
@@ -353,12 +371,7 @@ def fit(self, X, Y, W=None):
353
371
self .pxz_ = self .pxt_ @ self .ptz_
354
372
# print(f"pxz {self.pxz_.shape}")
355
373
356
- else :
357
- self .ptz_ = self .classifier_ .coef_ .T
358
- # print(self.ptz_.shape)
359
- self .pxz_ = self .pxt_ @ self .ptz_
360
-
361
- print (self .ptz_ .shape )
374
+ # print(self.ptz_.shape)
362
375
if len (Y .shape ) == 1 and type_of_target (Y ) == "binary" :
363
376
self .pxz_ = self .pxz_ .reshape (
364
377
X .shape [1 ],
@@ -460,7 +473,7 @@ def decision_function(self, X=None, T=None):
460
473
n_outputs such arrays if n_outputs > 1
461
474
Confidence scores. For binary classification, has shape `(n_samples,)`,
462
475
for multiclass classification, has shape `(n_samples, n_classes)`. If n_outputs > 1,
463
- the list returned can contain arrays with differing shapes depending on the
476
+ the list can contain arrays with differing shapes depending on the
464
477
number of classes in each output of Y.
465
478
"""
466
479
check_is_fitted (self , attributes = ["pxz_" , "ptz_" ])
@@ -471,25 +484,24 @@ def decision_function(self, X=None, T=None):
471
484
if X is not None :
472
485
X = validate_data (self , X , reset = False )
473
486
474
- # this is similar to how MultiOutputClassifier handles predict_proba() if n_outputs > 1
475
- if isinstance (self .classifier_ , MultiOutputClassifier ):
487
+ if self .n_outputs == 1 :
488
+ # Or self.classifier_.decision_function(X @ self.pxt_)
489
+ return X @ self .pxz_ + self .classifier_ .intercept_
490
+ else :
476
491
return [
477
492
est_ .decision_function (X @ self .pxt_ )
478
493
for est_ in self .classifier_ .estimators_
479
494
]
480
-
481
- # Or self.classifier_.decision_function(X @ self.pxt_)
482
- return X @ self .pxz_ + self .classifier_ .intercept_
483
495
else :
484
496
T = check_array (T )
485
497
486
- if isinstance (self .classifier_ , MultiOutputClassifier ):
498
+ if self .n_outputs == 1 :
499
+ return T @ self .ptz_ + self .classifier_ .intercept_
500
+ else :
487
501
return [
488
502
est_ .decision_function (T ) for est_ in self .classifier_ .estimators_
489
503
]
490
504
491
- return T @ self .ptz_ + self .classifier_ .intercept_
492
-
493
505
def predict (self , X = None , T = None ):
494
506
"""Predicts the property labels using classification on T."""
495
507
check_is_fitted (self , attributes = ["pxz_" , "ptz_" ])
0 commit comments