Skip to content

Commit 9000b15

Browse files
ENH: use confidence level instead of alpha in classification and regression metrics, time series excluded (#638)
1 parent bd7b21d commit 9000b15

File tree

3 files changed

+29
-29
lines changed

3 files changed

+29
-29
lines changed

mapie/metrics/classification.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -122,18 +122,18 @@ def classification_coverage_score_v2(
122122
123123
It is different from ``classification_coverage_score`` because it uses
124124
directly the output of ``predict`` method and can compute the
125-
coverage for each alpha.
125+
coverage for each confidence level.
126126
127127
Parameters
128128
----------
129-
y_true: NDArray of shape (n_samples, n_alpha) or (n_samples,)
129+
y_true: NDArray of shape (n_samples, n_confidence_level) or (n_samples,)
130130
True labels.
131-
y_pred_set: NDArray of shape (n_samples, n_class, n_alpha)
131+
y_pred_set: NDArray of shape (n_samples, n_class, n_confidence_level)
132132
Prediction sets given by booleans of labels.
133133
134134
Returns
135135
-------
136-
NDArray of shape (n_alpha,)
136+
NDArray of shape (n_confidence_level,)
137137
Effective coverage obtained by the prediction sets.
138138
"""
139139
check_arrays_length(y_true, y_pred_set)
@@ -174,7 +174,7 @@ def classification_ssc(
174174
----------
175175
y_true: NDArray of shape (n_samples,)
176176
True labels.
177-
y_pred_set: NDArray of shape (n_samples, n_class, n_alpha)
177+
y_pred_set: NDArray of shape (n_samples, n_class, n_confidence_level)
178178
or (n_samples, n_class)
179179
Prediction sets given by booleans of labels.
180180
num_bins: int or None
@@ -184,7 +184,7 @@ def classification_ssc(
184184
185185
Returns
186186
-------
187-
NDArray of shape (n_alpha, num_bins)
187+
NDArray of shape (n_confidence_level, num_bins)
188188
189189
Examples
190190
--------
@@ -246,15 +246,15 @@ def classification_ssc_score(
246246
num_bins: Union[int, None] = None
247247
) -> NDArray:
248248
"""
249-
Aggregate by the minimum for each alpha the Size-Stratified Coverage [3]:
249+
Aggregate by the minimum for each confidence level the Size-Stratified Coverage [3]:
250250
returns the maximum violation of the conditional coverage
251251
(with the groups defined).
252252
253253
Parameters
254254
----------
255255
y_true: NDArray of shape (n_samples,)
256256
True labels.
257-
y_pred_set: NDArray of shape (n_samples, n_class, n_alpha)
257+
y_pred_set: NDArray of shape (n_samples, n_class, n_confidence_level)
258258
or (n_samples, n_class)
259259
Prediction sets given by booleans of labels.
260260
num_bins: int or None
@@ -264,7 +264,7 @@ def classification_ssc_score(
264264
265265
Returns
266266
-------
267-
NDArray of shape (n_alpha,)
267+
NDArray of shape (n_confidence_level,)
268268
269269
Examples
270270
--------

mapie/metrics/regression.py

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -121,19 +121,19 @@ def regression_coverage_score_v2(
121121
122122
It is different from ``regression_coverage_score`` because it uses
123123
directly the output of ``predict`` method and can compute the
124-
coverage for each alpha.
124+
coverage for each confidence level.
125125
126126
Parameters
127127
----------
128-
y_true: NDArray of shape (n_samples, n_alpha) or (n_samples,)
128+
y_true: NDArray of shape (n_samples, n_confidence_level) or (n_samples,)
129129
True labels.
130-
y_intervals: NDArray of shape (n_samples, 2, n_alpha)
130+
y_intervals: NDArray of shape (n_samples, 2, n_confidence_level)
131131
Lower and upper bound of prediction intervals
132-
with different alpha risks.
132+
with different confidence levels.
133133
134134
Returns
135135
-------
136-
NDArray of shape (n_alpha,)
136+
NDArray of shape (n_confidence_level,)
137137
Effective coverage obtained by the prediction intervals.
138138
"""
139139
check_arrays_length(y_true, y_intervals)
@@ -180,15 +180,15 @@ def regression_ssc(
180180
----------
181181
y_true: NDArray of shape (n_samples,)
182182
True labels.
183-
y_intervals: NDArray of shape (n_samples, 2, n_alpha) or (n_samples, 2)
183+
y_intervals: NDArray of shape (n_samples, 2, n_confidence_level) or (n_samples, 2)
184184
Prediction intervals given by booleans of labels.
185185
num_bins: int n
186186
Number of groups. Should be less than the number of different
187187
interval widths.
188188
189189
Returns
190190
-------
191-
NDArray of shape (n_alpha, num_bins)
191+
NDArray of shape (n_confidence_level, num_bins)
192192
193193
Examples
194194
--------
@@ -235,7 +235,7 @@ def regression_ssc_score(
235235
num_bins: int = 3
236236
) -> NDArray:
237237
"""
238-
Aggregate by the minimum for each alpha the Size-Stratified Coverage [3]:
238+
Aggregate by the minimum for each confidence level the Size-Stratified Coverage [3]:
239239
returns the maximum violation of the conditional coverage
240240
(with the groups defined).
241241
@@ -252,15 +252,15 @@ def regression_ssc_score(
252252
----------
253253
y_true: NDArray of shape (n_samples,)
254254
True labels.
255-
y_intervals: NDArray of shape (n_samples, 2, n_alpha) or (n_samples, 2)
255+
y_intervals: NDArray of shape (n_samples, 2, n_confidence_level) or (n_samples, 2)
256256
Prediction intervals given by booleans of labels.
257257
num_bins: int n
258258
Number of groups. Should be less than the number of different
259259
interval widths.
260260
261261
Returns
262262
-------
263-
NDArray of shape (n_alpha,)
263+
NDArray of shape (n_confidence_level,)
264264
265265
Examples
266266
--------
@@ -324,16 +324,16 @@ def hsic(
324324
----------
325325
y_true: NDArray of shape (n_samples,)
326326
True labels.
327-
y_intervals: NDArray of shape (n_samples, 2, n_alpha) or (n_samples, 2)
327+
y_intervals: NDArray of shape (n_samples, 2, n_confidence_level) or (n_samples, 2)
328328
Prediction sets given by booleans of labels.
329329
kernel_sizes: ArrayLike of size (2,)
330330
The variance (sigma) for each variable (the indicator of coverage and
331331
the interval size), this coefficient controls the width of the curve.
332332
333333
Returns
334334
-------
335-
NDArray of shape (n_alpha,)
336-
One hsic correlation coefficient by alpha.
335+
NDArray of shape (n_confidence_level,)
336+
One hsic correlation coefficient by confidence level.
337337
338338
Raises
339339
------
@@ -372,8 +372,8 @@ def hsic(
372372
raise ValueError(
373373
"kernel_size should be positive"
374374
)
375-
n_samples, _, n_alpha = y_intervals.shape
376-
y_true_per_alpha = np.tile(y_true, (n_alpha, 1)).transpose()
375+
n_samples, _, n_confidence_level = y_intervals.shape
376+
y_true_per_alpha = np.tile(y_true, (n_confidence_level, 1)).transpose()
377377
widths = np.expand_dims(
378378
np.abs(y_intervals[:, 1, :] - y_intervals[:, 0, :]).transpose(),
379379
axis=2
@@ -532,7 +532,7 @@ def coverage_width_based(
532532
def regression_mwi_score(
533533
y_true: NDArray,
534534
y_pis: NDArray,
535-
alpha: float
535+
confidence_level: float
536536
) -> float:
537537
"""
538538
The Winkler score, proposed by Winkler (1972), is a measure used to
@@ -547,8 +547,8 @@ def regression_mwi_score(
547547
y_pis: ArrayLike of shape (n_samples, 2, 1)
548548
Lower and upper bounds of prediction intervals
549549
output from a MAPIE regressor
550-
alpha: float
551-
The value of alpha
550+
confidence_level: float
551+
The value of confidence_level
552552
553553
Returns
554554
-------
@@ -584,5 +584,5 @@ def regression_mwi_score(
584584
error_above = np.sum((y_true - y_pred_up)[y_true > y_pred_up])
585585
error_below = np.sum((y_pred_low - y_true)[y_true < y_pred_low])
586586
total_error = error_above + error_below
587-
mwi = (width + total_error * 2 / alpha) / len(y_true)
587+
mwi = (width + total_error * 2 / (1 - confidence_level)) / len(y_true)
588588
return mwi

mapie/tests/test_metrics.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -837,5 +837,5 @@ def test_regression_mwi_score() -> None:
837837

838838
alpha = 0.1
839839

840-
mwi_score = regression_mwi_score(y_true, y_pis, alpha)
840+
mwi_score = regression_mwi_score(y_true, y_pis, 1 - alpha)
841841
np.testing.assert_allclose(mwi_score, 82.25, rtol=1e-2)

0 commit comments

Comments
 (0)