Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 25 additions & 40 deletions mapie/tests/risk_control/test_binary_classification_control.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,6 @@

random_state = 42

X, y = make_multilabel_classification(
n_samples=1000,
n_classes=5,
random_state=random_state,
allow_unlabeled=False
)


def fpr_func(y_true: NDArray, y_pred: NDArray) -> float:
"""Computes false positive rate."""
Expand All @@ -34,6 +27,11 @@ def fpr_func(y_true: NDArray, y_pred: NDArray) -> float:
return fp / (tn + fp)


dummy_param = np.array([0.5])
dummy_target = 0.9
dummy_X_test = [[0]]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could be renamed dummy_X for a more generic use?



def dummy_predict(X):
return np.random.rand(1, 2)

Expand All @@ -43,7 +41,7 @@ def bcc_dummy():
return BinaryClassificationController(
predict_function=dummy_predict,
risk=precision,
target_level=0.9,
target_level=dummy_target,
)

# The following test is voluntarily agnostic
Expand Down Expand Up @@ -107,7 +105,7 @@ def test_auto(
controller = BinaryClassificationController(
predict_function=dummy_predict,
risk=risk_instance,
target_level=0.8,
target_level=dummy_target,
best_predict_param_choice="auto"
)

Expand All @@ -121,7 +119,7 @@ def test_custom(self):
controller = BinaryClassificationController(
predict_function=dummy_predict,
risk=precision,
target_level=0.8,
target_level=dummy_target,
best_predict_param_choice=custom_risk
)

Expand All @@ -136,7 +134,7 @@ def test_auto_unknown_risk(self):
BinaryClassificationController(
predict_function=dummy_predict,
risk=unknown_risk,
target_level=0.8,
target_level=dummy_target,
best_predict_param_choice="auto"
)

Expand Down Expand Up @@ -199,15 +197,14 @@ def test_only_one_param(self, best_predict_param_choice):
controller = BinaryClassificationController(
predict_function=dummy_predict,
risk=precision,
target_level=0.9,
target_level=dummy_target,
best_predict_param_choice=best_predict_param_choice
)

dummy_param = 0.5
y_calibrate = np.array([1, 0])
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We may use a dummy_y here

dummy_predictions = np.array([[True, False]])
valid_params_index = [0]
controller.valid_predict_params = np.array([dummy_param])
controller.valid_predict_params = dummy_param
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unsure about using dummy_param here, as it is not obvious that it is a singleton (and that's the point of the test)

Copy link
Preview

Copilot AI Sep 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The assignment is inconsistent with the expected array type. In the original code (line 808 in the diff), this was assigned as np.array([dummy_param]) but now it's assigned as just dummy_param. This will cause issues when the controller expects an array but receives a scalar value.

Copilot uses AI. Check for mistakes.


controller._set_best_predict_param(
y_calibrate_=y_calibrate,
Expand All @@ -222,13 +219,10 @@ def test_only_one_param(self, best_predict_param_choice):
[[precision, 0.5], [recall, 0.7]]
)
def test_correct_param_out_of_two(self, best_predict_param_choice, expected):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Regarding this test, maybe we can stick to numerical values everywhere (for param and target)? (the dummy_param and dummy_param_2 were misnamed). My point: here we actually care about the param value. In other words, if we change the dummy param/target values, the test may break.

dummy_param = 0.5
dummy_param_2 = 0.7

controller = BinaryClassificationController(
predict_function=dummy_predict,
risk=precision,
target_level=0.9,
target_level=dummy_target,
best_predict_param_choice=best_predict_param_choice
)

Expand All @@ -242,8 +236,7 @@ def test_correct_param_out_of_two(self, best_predict_param_choice, expected):
valid_params_index = [0, 1]

controller.valid_predict_params = np.array(
[dummy_param, dummy_param_2]
)
[dummy_param, dummy_param + 0.2])
Copy link
Preview

Copilot AI Sep 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The array construction is incorrect. dummy_param is defined as np.array([0.5]) (line 20), so dummy_param + 0.2 results in np.array([0.7]). The expected values in the parametrized test are 0.5 and 0.7 (scalars), but this creates arrays [0.5] and [0.7]. This should be [dummy_param[0], dummy_param[0] + 0.2] or define dummy_param as a scalar.

Suggested change
[dummy_param, dummy_param + 0.2])
[dummy_param[0], dummy_param[0] + 0.2])

Copilot uses AI. Check for mistakes.


controller._set_best_predict_param(
y_calibrate_=y_calibrate,
Expand All @@ -260,16 +253,15 @@ def test_secondary_risk_undefined(self):
controller = BinaryClassificationController(
predict_function=dummy_predict,
risk=precision,
target_level=0.9,
target_level=dummy_target,
best_predict_param_choice=precision
)

y_calibrate = np.array([1, 0])
predictions_per_param = np.array(
[[False, False]]) # precision undefined
valid_params_index = [0]
dummy_param = 0.5
controller.valid_predict_params = np.array([dummy_param])
controller.valid_predict_params = dummy_param
Copy link
Preview

Copilot AI Sep 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same issue as in line 218. The assignment should be np.array([dummy_param]) to maintain consistency with the expected array type, or dummy_param should be defined as a scalar if that's the intended usage pattern.

Copilot uses AI. Check for mistakes.


controller._set_best_predict_param(
y_calibrate_=y_calibrate,
Expand All @@ -290,15 +282,15 @@ def bcc_deterministic():
return BinaryClassificationController(
predict_function=deterministic_predict_function,
risk=precision,
target_level=0.9,
target_level=dummy_target,
)


class TestBinaryClassificationControllerGetPredictionsPerParam:
def test_single_parameter(self, bcc_deterministic):
result = bcc_deterministic._get_predictions_per_param(
X=[],
params=np.array([0.5])
params=dummy_param
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unsure about using dummy_param here, as it is not obvious that it is a singleton (and that's the point of the test). Maybe we can create a single_param constant.

)

expected = np.array([[False, True, True]])
Expand Down Expand Up @@ -327,7 +319,7 @@ def predict_fn(X):
controller = BinaryClassificationController(
predict_function=predict_fn,
risk=precision,
target_level=0.9,
target_level=dummy_target,
)

params = np.array([0.2, 0.5, 0.8])
Expand All @@ -344,16 +336,14 @@ def test_error_passing_classifier(self):
bcc = BinaryClassificationController(
predict_function=clf,
risk=precision,
target_level=0.9
target_level=dummy_target
)
X_test = [[0]]
params = np.array([0.5])

with pytest.raises(
TypeError,
match=r"Maybe you provided a binary classifier"
):
bcc._get_predictions_per_param(X_test, params)
bcc._get_predictions_per_param(dummy_X_test, dummy_param)

def test_error_incorrect_predict_shape(self):
"""
Expand All @@ -368,17 +358,15 @@ def pred_func(X):
bcc = BinaryClassificationController(
predict_function=pred_func,
risk=precision,
target_level=0.9
target_level=dummy_target
)
X_test = [[0]]
params = np.array([0.5])

with pytest.raises(
IndexError,
match=r"Maybe the predict function you provided returns only the "
r"probability of the positive class."
):
bcc._get_predictions_per_param(X_test, params)
bcc._get_predictions_per_param(dummy_X_test, dummy_param)

@pytest.mark.parametrize(
"error,expected_error_type,expected_error_message",
Expand All @@ -397,14 +385,11 @@ def failing_predict_function(X):
bcc = BinaryClassificationController(
predict_function=failing_predict_function,
risk=precision,
target_level=0.9
target_level=dummy_target
)

X_test = [[0]]
params = np.array([0.5])

with pytest.raises(expected_error_type, match=expected_error_message):
bcc._get_predictions_per_param(X_test, params)
bcc._get_predictions_per_param(dummy_X_test, dummy_param)


class TestBinaryClassificationControllerPredict:
Expand All @@ -424,4 +409,4 @@ def test_error(self, bcc_dummy):
ValueError,
match=r"Cannot predict"
):
controller.predict(X)
controller.predict(dummy_X_test)