Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ History
* Fix issue 548 to correct labels generated in tutorial
* Fix issue 547 to fix wrong warning
* Fix issue 480 (correct display of mathematical equations in generated notebooks)
* Refactor MapieRegressor and EnsembleRegressor, deprecate EnsembleRegressor.fit
* Refactor MapieRegressor, EnsembleRegressor, and MapieQuantileRegressor, to prepare for the release of v1.0.0

0.9.1 (2024-09-13)
------------------
Expand Down
201 changes: 129 additions & 72 deletions mapie/regression/quantile_regression.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

import warnings
from typing import Iterable, List, Optional, Tuple, Union, cast
from typing import Iterable, Dict, List, Optional, Tuple, Union, cast

import numpy as np
from sklearn.base import RegressorMixin, clone
Expand Down Expand Up @@ -546,93 +546,150 @@ def fit(
MapieQuantileRegressor
The model itself.
"""
self.cv = self._check_cv(cast(str, self.cv))

# Initialization
self.estimators_: List[RegressorMixin] = []
if self.cv == "prefit":
estimator = cast(List, self.estimator)
alpha = self._check_alpha(self.alpha)
self._check_prefit_params(estimator)
X_calib, y_calib = indexable(X, y)
self.init_fit()

self.n_calib_samples = _num_samples(y_calib)
y_calib_preds = np.full(
shape=(3, self.n_calib_samples),
fill_value=np.nan
)
for i, est in enumerate(estimator):
self.estimators_.append(est)
y_calib_preds[i] = est.predict(X_calib).ravel()
self.single_estimator_ = self.estimators_[2]
if self.cv == "prefit":
X_calib, y_calib = self.prefit_estimators(X, y)
else:
# Checks
self._check_parameters()
checked_estimator = self._check_estimator(self.estimator)
alpha = self._check_alpha(self.alpha)
X, y = indexable(X, y)
random_state = check_random_state(random_state)
results = self._check_calib_set(
X,
y,
sample_weight,
X_calib,
y_calib,
calib_size,
random_state,
shuffle,
stratify,
X_calib, y_calib = self.fit_estimators(
X=X,
y=y,
sample_weight=sample_weight,
groups=groups,
X_calib=X_calib,
y_calib=y_calib,
calib_size=calib_size,
random_state=random_state,
shuffle=shuffle,
stratify=stratify,
**fit_params,
)
X_train, y_train, X_calib, y_calib, sample_weight_train = results
X_train, y_train = indexable(X_train, y_train)
X_calib, y_calib = indexable(X_calib, y_calib)
y_train, y_calib = _check_y(y_train), _check_y(y_calib)
self.n_calib_samples = _num_samples(y_calib)
check_alpha_and_n_samples(self.alpha, self.n_calib_samples)
sample_weight_train, X_train, y_train = check_null_weight(
sample_weight_train,

self.conformalize(X_calib, y_calib)

return self

def init_fit(self):

self.cv = self._check_cv(cast(str, self.cv))
self.alpha_np = self._check_alpha(self.alpha)
self.estimators_: List[RegressorMixin] = []

def prefit_estimators(
self,
X: ArrayLike,
y: ArrayLike
) -> Tuple[ArrayLike, ArrayLike]:

estimator = cast(List, self.estimator)
self._check_prefit_params(estimator)
self.estimators_ = list(estimator)
self.single_estimator_ = self.estimators_[2]

X_calib, y_calib = indexable(X, y)
return X_calib, y_calib

def fit_estimators(
self,
X: ArrayLike,
y: ArrayLike,
sample_weight: Optional[ArrayLike] = None,
groups: Optional[ArrayLike] = None,
X_calib: Optional[ArrayLike] = None,
y_calib: Optional[ArrayLike] = None,
calib_size: Optional[float] = 0.3,
random_state: Optional[Union[int, np.random.RandomState]] = None,
shuffle: Optional[bool] = True,
stratify: Optional[ArrayLike] = None,
**fit_params,
) -> Tuple[ArrayLike, ArrayLike]:

self._check_parameters()
checked_estimator = self._check_estimator(self.estimator)
random_state = check_random_state(random_state)
X, y = indexable(X, y)

results = self._check_calib_set(
X,
y,
sample_weight,
X_calib,
y_calib,
calib_size,
random_state,
shuffle,
stratify,
)

X_train, y_train, X_calib, y_calib, sample_weight_train = results
X_train, y_train = indexable(X_train, y_train)
X_calib, y_calib = indexable(X_calib, y_calib)
y_train, y_calib = _check_y(y_train), _check_y(y_calib)
self.n_calib_samples = _num_samples(y_calib)
check_alpha_and_n_samples(self.alpha, self.n_calib_samples)
sample_weight_train, X_train, y_train = check_null_weight(
sample_weight_train,
X_train,
y_train
)
y_train = cast(NDArray, y_train)

if isinstance(checked_estimator, Pipeline):
estimator = checked_estimator[-1]
else:
estimator = checked_estimator
name_estimator = estimator.__class__.__name__
alpha_name = self.quantile_estimator_params[
name_estimator
]["alpha_name"]
for i, alpha_ in enumerate(self.alpha_np):
cloned_estimator_ = clone(checked_estimator)
params = {alpha_name: alpha_}
if isinstance(checked_estimator, Pipeline):
cloned_estimator_[-1].set_params(**params)
else:
cloned_estimator_.set_params(**params)
self.estimators_.append(fit_estimator(
cloned_estimator_,
X_train,
y_train
y_train,
sample_weight_train,
**fit_params,
)
)
y_train = cast(NDArray, y_train)
self.single_estimator_ = self.estimators_[2]

y_calib_preds = np.full(
X_calib = cast(ArrayLike, X_calib)
y_calib = cast(ArrayLike, y_calib)

return X_calib, y_calib

def conformalize(
self,
X_conf: ArrayLike,
y_conf: ArrayLike,
sample_weight: Optional[ArrayLike] = None,
predict_params: Dict = {},
):

self.n_calib_samples = _num_samples(y_conf)

y_calib_preds = np.full(
shape=(3, self.n_calib_samples),
fill_value=np.nan
)

if isinstance(checked_estimator, Pipeline):
estimator = checked_estimator[-1]
else:
estimator = checked_estimator
name_estimator = estimator.__class__.__name__
alpha_name = self.quantile_estimator_params[
name_estimator
]["alpha_name"]
for i, alpha_ in enumerate(alpha):
cloned_estimator_ = clone(checked_estimator)
params = {alpha_name: alpha_}
if isinstance(checked_estimator, Pipeline):
cloned_estimator_[-1].set_params(**params)
else:
cloned_estimator_.set_params(**params)
self.estimators_.append(fit_estimator(
cloned_estimator_,
X_train,
y_train,
sample_weight_train,
**fit_params,
)
)
y_calib_preds[i] = self.estimators_[-1].predict(X_calib)
self.single_estimator_ = self.estimators_[2]
for i, est in enumerate(self.estimators_):
y_calib_preds[i] = est.predict(X_conf, **predict_params).ravel()

self.conformity_scores_ = np.full(
shape=(3, self.n_calib_samples),
fill_value=np.nan
)
self.conformity_scores_[0] = y_calib_preds[0] - y_calib
self.conformity_scores_[1] = y_calib - y_calib_preds[1]

self.conformity_scores_[0] = y_calib_preds[0] - y_conf
self.conformity_scores_[1] = y_conf - y_calib_preds[1]
self.conformity_scores_[2] = np.max(
[
self.conformity_scores_[0],
Expand Down
Loading