Skip to content

Commit 31846e1

Browse files
sd29206Valentin-Laurent
authored andcommitted
v1 branch - initial commit (made of a bunch of wip commits squashed together)
1 parent c134fc4 commit 31846e1

File tree

11 files changed

+1450
-1
lines changed

11 files changed

+1450
-1
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ var/
4949
pip-log.txt
5050
pip-delete-this-directory.txt
5151

52-
# Unit test / coverage reports
52+
# Tests / coverage reports
5353
htmlcov/
5454
.tox/
5555
.coverage
@@ -59,6 +59,7 @@ nosetests.xml
5959
coverage.xml
6060
*,cover
6161
.hypothesis/
62+
mapie_v1/integration_tests/mapie_v0_package
6263

6364
# Translations
6465
*.mo

Makefile

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,12 @@ type-check:
99
tests:
1010
pytest -vs --doctest-modules mapie
1111

12+
integration-tests-v1:
13+
@pip install mapie --no-dependencies --target=./mapie_v1/integration_tests/mapie_v0_package >/dev/null 2>&1
14+
@mv ./mapie_v1/integration_tests/mapie_v0_package/mapie ./mapie_v1/integration_tests/mapie_v0_package/mapiev0
15+
@- export PYTHONPATH="${PYTHONPATH}:./mapie_v1/integration_tests/mapie_v0_package"; pytest -vs mapie_v1/integration_tests/tests
16+
@mv ./mapie_v1/integration_tests/mapie_v0_package/mapiev0 ./mapie_v1/integration_tests/mapie_v0_package/mapie
17+
1218
coverage:
1319
pytest -vs \
1420
--doctest-modules \

mapie/estimator/regressor.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,8 @@ def _fit_oof_estimator(
212212
RegressorMixin
213213
Fitted estimator.
214214
"""
215+
# TODO back-end: avoid using private utilities from sklearn like
216+
# _safe_indexing (may break anytime without notice)
215217
X_train = _safe_indexing(X, train_index)
216218
y_train = _safe_indexing(y, train_index)
217219
if not (sample_weight is None):

mapie/utils.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,8 @@ def check_null_weight(
7676
return sample_weight, X, y
7777

7878

79+
# TODO back-end: this will be useless in v1 because we'll not distinguish
80+
# sample_weight from other fit_params
7981
def fit_estimator(
8082
estimator: Union[RegressorMixin, ClassifierMixin],
8183
X: ArrayLike,

mapie_v1/classification.py

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
from __future__ import annotations
2+
3+
from typing import Optional, Union, List
4+
from typing_extensions import Self
5+
6+
import numpy as np
7+
from sklearn.base import ClassifierMixin
8+
from sklearn.model_selection import BaseCrossValidator
9+
from sklearn.linear_model import LogisticRegression
10+
11+
from mapie._typing import ArrayLike, NDArray
12+
from mapie.conformity_scores import BaseClassificationScore
13+
14+
15+
class SplitConformalClassifier:
16+
def __init__(
17+
self,
18+
estimator: ClassifierMixin = LogisticRegression(),
19+
confidence_level: Union[float, List[float]] = 0.9,
20+
conformity_score: Union[str, BaseClassificationScore] = "lac",
21+
prefit: bool = False,
22+
n_jobs: Optional[int] = None,
23+
verbose: int = 0,
24+
random_state: Optional[Union[int, np.random.RandomState]] = None,
25+
) -> None:
26+
pass
27+
28+
def fit(
29+
self,
30+
X_train: ArrayLike,
31+
y_train: ArrayLike,
32+
fit_params: Optional[dict] = None,
33+
) -> Self:
34+
pass
35+
36+
def conformalize(
37+
self,
38+
X_conf: ArrayLike,
39+
y_conf: ArrayLike,
40+
predict_params: Optional[dict] = None,
41+
) -> Self:
42+
pass
43+
44+
def predict(self, X: ArrayLike) -> NDArray:
45+
"""
46+
Return
47+
-----
48+
Return ponctual prediction similar to predict method of
49+
scikit-learn classifiers
50+
Shape (n_samples,)
51+
"""
52+
pass
53+
54+
def predict_sets(
55+
self,
56+
X: ArrayLike,
57+
conformity_score_params: Optional[dict] = None,
58+
# Parameters specific to conformal method,
59+
# For example: include_last_label
60+
) -> NDArray:
61+
"""
62+
Return
63+
-----
64+
An array containing the prediction sets
65+
Shape (n_samples, n_classes) if confidence_level is float,
66+
Shape (n_samples, n_classes, confidence_level) if confidence_level
67+
is a list of floats
68+
"""
69+
pass
70+
71+
72+
class CrossConformalClassifier:
73+
def __init__(
74+
self,
75+
estimator: ClassifierMixin = LogisticRegression(),
76+
confidence_level: Union[float, List[float]] = 0.9,
77+
conformity_score: Union[str, BaseClassificationScore] = "lac",
78+
cross_val: Union[BaseCrossValidator, str] = 5,
79+
n_jobs: Optional[int] = None,
80+
verbose: int = 0,
81+
random_state: Optional[Union[int, np.random.RandomState]] = None,
82+
83+
) -> None:
84+
pass
85+
86+
def fit(
87+
self,
88+
X_train: ArrayLike,
89+
y_train: ArrayLike,
90+
fit_params: Optional[dict] = None,
91+
) -> Self:
92+
pass
93+
94+
def conformalize(
95+
self,
96+
X_conf: ArrayLike,
97+
y_conf: ArrayLike,
98+
predict_params: Optional[dict] = None
99+
) -> Self:
100+
pass
101+
102+
def predict(self,
103+
X: ArrayLike):
104+
"""
105+
Return
106+
-----
107+
Return ponctual prediction similar to predict method of
108+
scikit-learn classifiers
109+
Shape (n_samples,)
110+
"""
111+
pass
112+
113+
def predict_sets(
114+
self,
115+
X: ArrayLike,
116+
aggregation_method: Optional[str] = "mean",
117+
# How to aggregate the scores by the estimators on test data
118+
conformity_score_params: Optional[dict] = None
119+
) -> NDArray:
120+
"""
121+
Return
122+
-----
123+
An array containing the prediction sets
124+
Shape (n_samples, n_classes) if confidence_level is float,
125+
Shape (n_samples, n_classes, confidence_level) if confidence_level
126+
is a list of floats
127+
"""
128+
pass
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
from mapie.conformity_scores import (
2+
AbsoluteConformityScore,
3+
GammaConformityScore,
4+
ResidualNormalisedScore,
5+
)
6+
7+
REGRESSION_CONFORMITY_SCORES_STRING_MAP = {
8+
"absolute": AbsoluteConformityScore,
9+
"gamma": GammaConformityScore,
10+
"residual_normalized": ResidualNormalisedScore,
11+
}
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
from typing import Union
2+
from mapie.conformity_scores import BaseRegressionScore
3+
from . import REGRESSION_CONFORMITY_SCORES_STRING_MAP
4+
5+
6+
def check_and_select_split_conformity_score(
7+
conformity_score: Union[str, BaseRegressionScore]
8+
) -> BaseRegressionScore:
9+
if isinstance(conformity_score, BaseRegressionScore):
10+
return conformity_score
11+
elif conformity_score in REGRESSION_CONFORMITY_SCORES_STRING_MAP:
12+
return REGRESSION_CONFORMITY_SCORES_STRING_MAP[conformity_score]()
13+
else:
14+
raise ValueError("Invalid conformity_score type")
Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
from __future__ import annotations
2+
3+
import numpy as np
4+
import pytest
5+
from sklearn.datasets import make_regression
6+
from sklearn.linear_model import LinearRegression
7+
from sklearn.ensemble import RandomForestRegressor
8+
9+
from mapie_v1.regression import (
10+
SplitConformalRegressor,
11+
CrossConformalRegressor,
12+
JackknifeAfterBootstrapRegressor,
13+
ConformalizedQuantileRegressor
14+
)
15+
from mapiev0.regression import MapieRegressor as MapieRegressorV0 # noqa
16+
from mapiev0.regression import MapieQuantileRegressor as MapieQuantileRegressorV0 # noqa
17+
from mapie_v1.conformity_scores.utils import \
18+
check_and_select_split_conformity_score
19+
from mapie_v1.integration_tests.utils import (filter_params,
20+
train_test_split_shuffle)
21+
from sklearn.model_selection import KFold
22+
23+
RANDOM_STATE = 1
24+
K_FOLDS = 3
25+
N_BOOTSTRAPS = 30
26+
27+
X_toy = np.array([0, 1, 2, 3, 4, 5]).reshape(-1, 1)
28+
y_toy = np.array([5, 7, 9, 11, 13, 15])
29+
X, y = make_regression(n_samples=500,
30+
n_features=10,
31+
noise=1.0,
32+
random_state=RANDOM_STATE)
33+
34+
35+
@pytest.mark.parametrize("strategy_key", ["split", "prefit"])
36+
@pytest.mark.parametrize("method", ["base", "plus", "minmax"])
37+
@pytest.mark.parametrize("conformity_score", ["absolute"])
38+
@pytest.mark.parametrize("confidence_level", [0.9, 0.95, 0.99])
39+
@pytest.mark.parametrize("agg_function", ["mean", "median"])
40+
@pytest.mark.parametrize("allow_infinite_bounds", [True, False])
41+
@pytest.mark.parametrize(
42+
"estimator", [
43+
LinearRegression(),
44+
RandomForestRegressor(random_state=RANDOM_STATE, max_depth=2)])
45+
@pytest.mark.parametrize("test_size", [0.2, 0.5])
46+
def test_exact_interval_equality_split(
47+
strategy_key,
48+
method,
49+
conformity_score,
50+
confidence_level,
51+
agg_function,
52+
allow_infinite_bounds,
53+
estimator,
54+
test_size
55+
):
56+
"""
57+
Test that the prediction intervals are exactly the same
58+
between v0 and v1 models when using the same settings.
59+
"""
60+
61+
X_train, X_conf, y_train, y_conf = train_test_split_shuffle(
62+
X, y, test_size=test_size, random_state=RANDOM_STATE
63+
)
64+
65+
if strategy_key == "prefit":
66+
estimator.fit(X_train, y_train)
67+
68+
v0_params = {
69+
"estimator": estimator,
70+
"method": method,
71+
"conformity_score": check_and_select_split_conformity_score(
72+
conformity_score
73+
),
74+
"alpha": 1 - confidence_level,
75+
"agg_function": agg_function,
76+
"random_state": RANDOM_STATE,
77+
"test_size": test_size,
78+
"allow_infinite_bounds": allow_infinite_bounds
79+
}
80+
v1_params = {
81+
"estimator": estimator,
82+
"method": method,
83+
"conformity_score": conformity_score,
84+
"confidence_level": confidence_level,
85+
"aggregate_function": agg_function,
86+
"random_state": RANDOM_STATE,
87+
"n_bootstraps": N_BOOTSTRAPS,
88+
"allow_infinite_bounds": allow_infinite_bounds
89+
}
90+
91+
v0, v1 = initialize_models(
92+
strategy_key=strategy_key,
93+
v0_params=v0_params,
94+
v1_params=v1_params,
95+
k_folds=K_FOLDS,
96+
random_state=RANDOM_STATE
97+
)
98+
99+
if strategy_key == 'prefit':
100+
v0.fit(X_conf, y_conf)
101+
else:
102+
v0.fit(X, y)
103+
v1.fit(X_train, y_train)
104+
105+
v1.conformalize(X_conf, y_conf)
106+
107+
v0_predict_params = filter_params(v0.predict, v0_params)
108+
v1_predict_params = filter_params(v1.predict, v1_params)
109+
_, v0_pred_intervals = v0.predict(X_conf, **v0_predict_params)
110+
v1_pred_intervals = v1.predict_set(X_conf, **v1_predict_params)
111+
v0_pred_intervals = v0_pred_intervals[:, :, 0]
112+
113+
np.testing.assert_array_equal(
114+
v1_pred_intervals,
115+
v0_pred_intervals,
116+
err_msg="Prediction intervals differ between v0 and v1 models"
117+
)
118+
119+
120+
def initialize_models(
121+
strategy_key,
122+
v0_params: dict,
123+
v1_params: dict,
124+
k_folds=5,
125+
random_state=42
126+
):
127+
128+
if strategy_key == "prefit":
129+
v0_params.update({"cv": "prefit"})
130+
v0_params = filter_params(MapieRegressorV0.__init__, v0_params)
131+
v1_params = filter_params(SplitConformalRegressor.__init__, v1_params)
132+
v0 = MapieRegressorV0(**v0_params)
133+
v1 = SplitConformalRegressor(prefit=True, **v1_params)
134+
135+
elif strategy_key == "split":
136+
v0_params.update({"cv": "split"})
137+
v0_params = filter_params(MapieRegressorV0.__init__, v0_params)
138+
v1_params = filter_params(SplitConformalRegressor.__init__, v1_params)
139+
v0 = MapieRegressorV0(**v0_params)
140+
v1 = SplitConformalRegressor(**v1_params)
141+
142+
elif strategy_key == "cv":
143+
v0_params.update({"cv": KFold(n_splits=k_folds,
144+
shuffle=True,
145+
random_state=random_state)})
146+
v0_params = filter_params(MapieRegressorV0.__init__, v0_params)
147+
v1_params = filter_params(CrossConformalRegressor.__init__, v1_params)
148+
v0 = MapieRegressorV0(**v0_params)
149+
v1 = CrossConformalRegressor(cv=k_folds, **v1_params)
150+
151+
elif strategy_key == "jackknife":
152+
v0_params.update({"cv": -1})
153+
v0_params = filter_params(MapieRegressorV0.__init__, v0_params)
154+
v1_params = filter_params(JackknifeAfterBootstrapRegressor.__init__,
155+
v1_params)
156+
v0 = MapieRegressorV0(**v0_params)
157+
v1 = JackknifeAfterBootstrapRegressor(**v1_params)
158+
159+
elif strategy_key == "CQR":
160+
v0_params = filter_params(MapieQuantileRegressorV0.__init__, v0_params)
161+
v1_params = filter_params(SplitConformalRegressor.__init__, v1_params)
162+
v0 = MapieQuantileRegressorV0(**v0_params)
163+
v1 = ConformalizedQuantileRegressor(**v1_params)
164+
165+
else:
166+
raise ValueError(f"Unknown strategy key: {strategy_key}")
167+
168+
return v0, v1

0 commit comments

Comments
 (0)