Skip to content

Commit 71a62fb

Browse files
TEST & DOC : add integration tests for CrossConformalRegressor, finish related docstring for param cv (#544)
1 parent 00d7ef6 commit 71a62fb

File tree

2 files changed

+122
-46
lines changed

2 files changed

+122
-46
lines changed

mapie_v1/integration_tests/tests/test_regression.py

Lines changed: 114 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -2,30 +2,29 @@
22

33
import numpy as np
44
import pytest
5+
from numpy.random import RandomState
6+
from sklearn.compose import TransformedTargetRegressor
57
from sklearn.datasets import make_regression
68
from sklearn.linear_model import LinearRegression
79
from sklearn.ensemble import RandomForestRegressor
810

9-
from mapie_v1.regression import (
10-
SplitConformalRegressor,
11-
CrossConformalRegressor,
12-
JackknifeAfterBootstrapRegressor,
13-
ConformalizedQuantileRegressor
14-
)
15-
from mapiev0.regression import MapieRegressor as MapieRegressorV0 # noqa
16-
from mapiev0.regression import MapieQuantileRegressor as MapieQuantileRegressorV0 # noqa
11+
from mapie.conformity_scores import GammaConformityScore, \
12+
AbsoluteConformityScore
13+
from mapie_v1.regression import SplitConformalRegressor, \
14+
CrossConformalRegressor
15+
16+
from mapiev0.regression import MapieRegressor as MapieRegressorV0 # noqa
17+
1718
from mapie_v1.conformity_scores.utils import \
1819
check_and_select_split_conformity_score
1920
from mapie_v1.integration_tests.utils import (filter_params,
2021
train_test_split_shuffle)
21-
from sklearn.model_selection import KFold
22+
from sklearn.model_selection import LeaveOneOut, GroupKFold
2223

2324
RANDOM_STATE = 1
2425
K_FOLDS = 3
2526
N_BOOTSTRAPS = 30
2627

27-
X_toy = np.array([0, 1, 2, 3, 4, 5]).reshape(-1, 1)
28-
y_toy = np.array([5, 7, 9, 11, 13, 15])
2928
X, y = make_regression(n_samples=500,
3029
n_features=10,
3130
noise=1.0,
@@ -57,7 +56,6 @@ def test_exact_interval_equality_split(
5756
Test that the prediction intervals are exactly the same
5857
between v0 and v1 models when using the same settings.
5958
"""
60-
6159
X_train, X_conf, y_train, y_conf = train_test_split_shuffle(
6260
X, y, test_size=test_size, random_state=RANDOM_STATE
6361
)
@@ -92,8 +90,6 @@ def test_exact_interval_equality_split(
9290
strategy_key=strategy_key,
9391
v0_params=v0_params,
9492
v1_params=v1_params,
95-
k_folds=K_FOLDS,
96-
random_state=RANDOM_STATE
9793
)
9894

9995
if strategy_key == 'prefit':
@@ -117,14 +113,115 @@ def test_exact_interval_equality_split(
117113
)
118114

119115

116+
X_cross, y_cross_signed = make_regression(
117+
n_samples=50,
118+
n_features=10,
119+
noise=1.0,
120+
random_state=RANDOM_STATE
121+
)
122+
y_cross = np.abs(y_cross_signed)
123+
sample_weight = RandomState(RANDOM_STATE).random(len(X_cross))
124+
groups = [0] * 10 + [1] * 10 + [2] * 10 + [3] * 10 + [4] * 10
125+
positive_predictor = TransformedTargetRegressor(
126+
regressor=LinearRegression(),
127+
func=lambda y_: np.log(y_ + 1),
128+
inverse_func=lambda X_: np.exp(X_) - 1
129+
)
130+
131+
params_test_cases_cross = [
132+
{
133+
"v0": {
134+
"alpha": 0.2,
135+
"conformity_score": AbsoluteConformityScore(),
136+
"cv": 4,
137+
"agg_function": "median",
138+
"ensemble": True,
139+
"method": "base",
140+
"sample_weight": sample_weight,
141+
},
142+
"v1": {
143+
"confidence_level": 0.8,
144+
"conformity_score": "absolute",
145+
"cv": 4,
146+
"aggregation_method": "median",
147+
"method": "base",
148+
"fit_params": {"sample_weight": sample_weight},
149+
}
150+
},
151+
{
152+
"v0": {
153+
"estimator": positive_predictor,
154+
"alpha": 0.5,
155+
"conformity_score": GammaConformityScore(),
156+
"cv": LeaveOneOut(),
157+
"method": "plus",
158+
"optimize_beta": True,
159+
},
160+
"v1": {
161+
"estimator": positive_predictor,
162+
"confidence_level": 0.5,
163+
"conformity_score": "gamma",
164+
"cv": LeaveOneOut(),
165+
"method": "plus",
166+
"minimize_interval_width": True,
167+
}
168+
},
169+
{
170+
"v0": {
171+
"alpha": 0.1,
172+
"cv": GroupKFold(),
173+
"groups": groups,
174+
"method": "minmax",
175+
"allow_infinite_bounds": True,
176+
},
177+
"v1": {
178+
"cv": GroupKFold(),
179+
"groups": groups,
180+
"method": "minmax",
181+
"allow_infinite_bounds": True,
182+
}
183+
},
184+
]
185+
186+
187+
@pytest.mark.parametrize("params_cross", params_test_cases_cross)
188+
def test_intervals_and_predictions_exact_equality_cross(params_cross):
189+
v0_params = params_cross["v0"]
190+
v1_params = params_cross["v1"]
191+
192+
v0 = MapieRegressorV0(
193+
**filter_params(MapieRegressorV0.__init__, v0_params)
194+
)
195+
v1 = CrossConformalRegressor(
196+
**filter_params(CrossConformalRegressor.__init__, v1_params)
197+
)
198+
199+
v0_fit_params = filter_params(v0.fit, v0_params)
200+
v1_fit_params = filter_params(v1.fit, v1_params)
201+
v1_conformalize_params = filter_params(v1.conformalize, v1_params)
202+
203+
v0.fit(X_cross, y_cross, **v0_fit_params)
204+
v1.fit(X_cross, y_cross, **v1_fit_params)
205+
v1.conformalize(X_cross, y_cross, **v1_conformalize_params)
206+
207+
v0_predict_params = filter_params(v0.predict, v0_params)
208+
v1_predict_params = filter_params(v1.predict, v1_params)
209+
v1_predict_set_params = filter_params(v1.predict_set, v1_params)
210+
211+
v0_preds, v0_pred_intervals = v0.predict(X_cross, **v0_predict_params)
212+
v0_pred_intervals = v0_pred_intervals[:, :, 0]
213+
v1_pred_intervals = v1.predict_set(X_cross, **v1_predict_set_params)
214+
v1_preds = v1.predict(X_cross, **v1_predict_params)
215+
216+
assert np.equal(v0_preds, v1_preds)
217+
assert np.equal(v0_pred_intervals, v1_pred_intervals)
218+
219+
120220
def initialize_models(
121221
strategy_key,
122222
v0_params: dict,
123223
v1_params: dict,
124-
k_folds=5,
125-
random_state=42
126224
):
127-
128225
if strategy_key == "prefit":
129226
v0_params.update({"cv": "prefit"})
130227
v0_params = filter_params(MapieRegressorV0.__init__, v0_params)
@@ -139,29 +236,6 @@ def initialize_models(
139236
v0 = MapieRegressorV0(**v0_params)
140237
v1 = SplitConformalRegressor(**v1_params)
141238

142-
elif strategy_key == "cv":
143-
v0_params.update({"cv": KFold(n_splits=k_folds,
144-
shuffle=True,
145-
random_state=random_state)})
146-
v0_params = filter_params(MapieRegressorV0.__init__, v0_params)
147-
v1_params = filter_params(CrossConformalRegressor.__init__, v1_params)
148-
v0 = MapieRegressorV0(**v0_params)
149-
v1 = CrossConformalRegressor(cv=k_folds, **v1_params)
150-
151-
elif strategy_key == "jackknife":
152-
v0_params.update({"cv": -1})
153-
v0_params = filter_params(MapieRegressorV0.__init__, v0_params)
154-
v1_params = filter_params(JackknifeAfterBootstrapRegressor.__init__,
155-
v1_params)
156-
v0 = MapieRegressorV0(**v0_params)
157-
v1 = JackknifeAfterBootstrapRegressor(**v1_params)
158-
159-
elif strategy_key == "CQR":
160-
v0_params = filter_params(MapieQuantileRegressorV0.__init__, v0_params)
161-
v1_params = filter_params(SplitConformalRegressor.__init__, v1_params)
162-
v0 = MapieQuantileRegressorV0(**v0_params)
163-
v1 = ConformalizedQuantileRegressor(**v1_params)
164-
165239
else:
166240
raise ValueError(f"Unknown strategy key: {strategy_key}")
167241

mapie_v1/regression.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -274,12 +274,14 @@ class CrossConformalRegressor:
274274
each fold models.
275275
276276
cv : Union[int, BaseCrossValidator], default=5
277-
The cross-validation strategy used to compute confomity scores. If an
278-
integer is passed, it is the number of folds for `KFold`
279-
cross-validation. Alternatively, a BaseCrossValidator from scikit-learn
280-
can be provided. Valid options:
281-
TODO : reference here the valid options,
282-
once the list has been be created during the implementation
277+
The cross-validation strategy used to compute confomity scores.
278+
Valid options:
279+
- integer, to specify the number of folds
280+
- any ``sklearn.model_selection.BaseCrossValidator`` suitable for
281+
regression, or a custom cross-validator inheriting from it.
282+
Main variants in the cross conformal setting are:
283+
- ``sklearn.model_selection.KFold`` (vanilla cross conformal)
284+
- ``sklearn.model_selection.LeaveOneOut`` (jackknife)
283285
284286
n_jobs : Optional[int], default=None
285287
The number of jobs to run in parallel when applicable.

0 commit comments

Comments
 (0)