diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 9bd7aa880..24cf43ce5 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -5,7 +5,11 @@ Contribution guidelines What to work on? ---------------- -You are welcome to propose and contribute new ideas. +Issues tagged "Good first issue" are perfect for open-source beginners. + +For the more experienced, issues tagged "Contributors welcome" are recommended if you want to help. + +You are also welcome to propose and contribute to new ideas. We encourage you to `open an issue `_ so that we can align on the work to be done. It is generally a good idea to have a quick discussion before opening a pull request that is potentially out-of-scope. @@ -43,73 +47,65 @@ Finally, install ``mapie`` in development mode: $ pip install -e . +Implementing your change +------------------------------------------ -Documenting your change ------------------------ - -If you're adding a public class or function, then you'll need to add a docstring with a doctest. We follow the `numpy docstring convention `_, so please do too. -Any estimator should follow the `scikit-learn API `_, so please follow these guidelines. - -In order to build the documentation locally, you first need to create a different virtual environment than the one used for development, and then install some dependencies using ``pip`` with the following commands: +The linter must pass: .. code-block:: sh - $ pip install -r requirements.doc.txt - $ pip install -e . + $ make lint -Finally, once dependencies are installed, you can build the documentation locally by running: +The typing must pass. .. code-block:: sh - $ make clean-doc - $ make doc + $ make type-check -Updating changelog ------------------- -You can make your contribution visible by: +Testing your change +--------------------- -1. Adding your name to the Contributors section of `AUTHORS.rst `_ -2. If your change is user-facing (bug fix, feature, ...), adding a line to describe it in `HISTORY.rst `_ +See `the tests README.md `_ for guidance. + +The tests absolutely have to pass. -Testing -------- +.. code-block:: sh -Linting -^^^^^^^ + $ make tests -These tests absolutely have to pass. +The coverage should absolutely be 100%. .. code-block:: sh - $ make lint + $ make coverage +Documenting your change +----------------------- -Static typing -^^^^^^^^^^^^^ +If you're adding a public class or function, then you'll need to add a docstring with a doctest. We follow the `numpy docstring convention `_, so please do too. +Any estimator should follow the `scikit-learn API `_, so please follow these guidelines. -These tests absolutely have to pass. +In order to build the documentation locally, you first need to create a different virtual environment than the one used for development, and then install some dependencies using ``pip`` with the following commands: .. code-block:: sh - $ make type-check - - -Unit tests -^^^^^^^^^^ + $ pip install -r requirements.doc.txt + $ pip install -e . -These tests absolutely have to pass. +Finally, once dependencies are installed, you can build the documentation locally by running: .. code-block:: sh - $ make tests + $ make clean-doc + $ make doc -Coverage -^^^^^^^^ -The coverage should absolutely be 100%. +Updating changelog +------------------ -.. code-block:: sh +You can make your contribution visible by: - $ make coverage +1. Adding your name to the Contributors section of `AUTHORS.rst `_ +2. If your change is user-facing (bug fix, feature, ...), adding a line to describe it in `HISTORY.rst `_ diff --git a/doc/theoretical_description_risk_control.rst b/doc/theoretical_description_risk_control.rst index 6c8cbe4c4..001bb9dc2 100644 --- a/doc/theoretical_description_risk_control.rst +++ b/doc/theoretical_description_risk_control.rst @@ -138,7 +138,7 @@ Let's first give the settings and the notations of the method: - Let :math:`R` be the risk associated to a set-valued predictor: .. math:: - R(\mathcal{T}_{\hat{\lambda}}) = \mathbb{E}[L(Y, \mathcal{T}_{\lambda}(X))] + R(\mathcal{T}_{\lambda}) = \mathbb{E}[L(Y, \mathcal{T}_{\lambda}(X))] The goal of the method is to compute an Upper Confidence Bound (UCB) :math:`\hat{R}^+(\lambda)` of :math:`R(\lambda)` and then to find :math:`\hat{\lambda}` as follows: diff --git a/mapie/control_risk/ltt.py b/mapie/control_risk/ltt.py index d87ca0936..69af0d50c 100644 --- a/mapie/control_risk/ltt.py +++ b/mapie/control_risk/ltt.py @@ -1,5 +1,5 @@ import warnings -from typing import Any, List, Tuple, Union +from typing import Any, List, Tuple import numpy as np @@ -12,7 +12,7 @@ def ltt_procedure( r_hat: NDArray, alpha_np: NDArray, delta: float, - n_obs: Union[int, NDArray], + n_obs: NDArray, binary: bool = False, ) -> List[List[Any]]: """ @@ -24,28 +24,36 @@ def ltt_procedure( - Apply a family wise error rate algorithm, here Bonferonni correction - Return the index lambdas that give you the control at alpha level + Note that in the case of multi-risk, the arrays r_hat, alpha_np, and n_obs + should have the same length for the first dimension which corresponds + to the number of risks. In the case of a single risk, the length should be 1. + Parameters ---------- - r_hat: NDArray of shape (n_lambdas, ). + r_hat: NDArray of shape (n_risks, n_lambdas). Empirical risk with respect to the lambdas. Here lambdas are thresholds that impact decision-making, therefore empirical risk. - alpha_np: NDArray of shape (n_alpha, ). + alpha_np: NDArray of shape (n_risks, n_alpha). Contains the different alphas control level. The empirical risk should be less than alpha with probability 1-delta. + Note: MAPIE 1.2 does not support multiple risks and multiple alphas + simultaneously. + For PrecisionRecallController, the shape should be (1, n_alpha). + For BinaryClassificationController, the shape should be (n_risks, 1). delta: float. Probability of not controlling empirical risk. Correspond to proportion of failure we don't want to exceed. - n_obs: Union[int, NDArray] + n_obs: NDArray of shape (n_risks, n_lambdas). Correspond to the number of observations used to compute the risk. In the case of a conditional loss, n_obs must be the number of effective observations used to compute the empirical risk - for each lambda, hence of shape (n_lambdas, ). + for each lambda. binary: bool, default=False Must be True if the loss associated to the risk is binary. @@ -62,11 +70,19 @@ def ltt_procedure( M. I., & Lei, L. (2021). Learn then test: "Calibrating predictive algorithms to achieve risk control". """ - p_values = compute_hoeffding_bentkus_p_value(r_hat, n_obs, alpha_np, binary) + if not (r_hat.shape[0] == n_obs.shape[0] == alpha_np.shape[0]): + raise ValueError( + "r_hat, n_obs, and alpha_np must have the same length." + ) + p_values = np.array([ + compute_hoeffding_bentkus_p_value(r_hat_i, n_obs_i, alpha_np_i, binary) + for r_hat_i, n_obs_i, alpha_np_i in zip(r_hat, n_obs, alpha_np) + ]) + p_values = p_values.max(axis=0) # take max over risks (no effect if mono risk) N = len(p_values) valid_index = [] - for i in range(len(alpha_np)): - l_index = np.where(p_values[:, i] <= delta/N)[0].tolist() + for i in range(alpha_np.shape[1]): + l_index = np.nonzero(p_values[:, i] <= delta/N)[0].tolist() valid_index.append(l_index) return valid_index @@ -95,7 +111,7 @@ def find_lambda_control_star( the empirical risk is less than alpha. valid_index: List[List[Any]]. - Contain the valid index that satisfy fwer control + Contain the valid index that satisfy FWER control for each alpha (length aren't the same for each alpha). lambdas: NDArray of shape (n_lambda, ) @@ -104,7 +120,7 @@ def find_lambda_control_star( Returns ------- l_lambda_star: ArrayLike of shape (n_alpha, ). - The lambda that give the highest precision + The lambda that gives the minimum precision for a given alpha. r_star: ArrayLike of shape (n_alpha, ). @@ -113,7 +129,8 @@ def find_lambda_control_star( if [] in valid_index: warnings.warn( """ - Warning: At least one sequence is empty! + Warning: the risk couldn't be controlled for at least one value of alpha. + The corresponding lambdas have been set to 1. """ ) l_lambda_star = [] # type: List[Any] diff --git a/mapie/risk_control.py b/mapie/risk_control.py index f541fde34..eacfdea93 100644 --- a/mapie/risk_control.py +++ b/mapie/risk_control.py @@ -57,10 +57,9 @@ class PrecisionRecallController(BaseEstimator, ClassifierMixin): method : Optional[str] Method to use for the prediction sets. If `metric_control` is - "recall", then the method can be either "crc" or "rcps". + "recall", then the method can be either "crc" (default) or "rcps". If `metric_control` is "precision", then the method used to control the precision is "ltt". - If `metric_control` is "recall" the default method is "crc". n_jobs: Optional[int] Number of jobs for parallel processing using joblib @@ -95,7 +94,7 @@ class PrecisionRecallController(BaseEstimator, ClassifierMixin): ---------- valid_methods: List[str] List of all valid methods. Either CRC or RCPS - valid_methods: List[Union[str, ``None``]] + valid_bounds: List[Union[str, ``None``]] List of all valid bounds computation for RCPS only. single_estimator_ : sklearn.ClassifierMixin Estimator fitted on the whole training set. @@ -270,7 +269,7 @@ def _check_delta(self, delta: Optional[float]): Raises ------ ValueError - If delta is ``None`` and method is RCSP or + If delta is ``None`` and method is RCPS or if delta is not in [0, 1] and method is RCPS. Warning @@ -321,7 +320,7 @@ def _check_estimator( y: ArrayLike, estimator: Optional[ClassifierMixin] = None, _refit: Optional[bool] = False, - ) -> ClassifierMixin: + ) -> Tuple[ClassifierMixin, ArrayLike, ArrayLike]: """ Check the estimator value. If it is ``None``, it returns a multi-output ``LogisticRegression`` @@ -344,13 +343,6 @@ def _check_estimator( By default False - - Returns - ------- - ClassifierMixin - The estimator itself or a default multi-output - ``LogisticRegression`` instance. - Raises ------ ValueError @@ -420,19 +412,6 @@ def _check_partial_fit_first_call(self) -> bool: def _check_bound(self, bound: Optional[str]): """ Check the value of the bound. - - Parameters - ---------- - bound : Optional[str] - Bound defined in the predict. - - Raises - ------ - AttributeError - If bound is not in ["hoeffding", "bernstein", "wsr", ``None``] - - Warning - If bound is not ``None``and method is CRC """ if bound not in self.valid_bounds_: raise ValueError( @@ -492,7 +471,7 @@ def _transform_pred_proba( y_pred_proba_array = y_pred_proba else: y_pred_proba_stacked = np.stack( - y_pred_proba, # type: ignore + y_pred_proba, axis=0 )[:, :, 1] y_pred_proba_array = np.moveaxis(y_pred_proba_stacked, 0, -1) @@ -507,7 +486,7 @@ def partial_fit( ) -> PrecisionRecallController: """ Fit the base estimator or use the fitted base estimator on - batch data. All the computed risks will be concatenated each + batch data to compute risks. All the computed risks will be concatenated each time the partial_fit method is called. Parameters @@ -592,7 +571,7 @@ def fit( calib_size: Optional[float] = .3 ) -> PrecisionRecallController: """ - Fit the base estimator or use the fitted base estimator. + Fit the base estimator (or use the fitted base estimator) and compute risks. Parameters ---------- @@ -624,8 +603,7 @@ def predict( bound: Optional[Union[str, None]] = None ) -> Union[NDArray, Tuple[NDArray, NDArray]]: """ - Prediction sets on new samples based on target confidence - interval. + Prediction sets on new samples based on the target risk level. Prediction sets for a given ``alpha`` are deduced from the computed risks. @@ -634,13 +612,12 @@ def predict( X: ArrayLike of shape (n_samples, n_features) alpha : Optional[Union[float, Iterable[float]]] - Can be a float, a list of floats, or a ``ArrayLike`` of floats. - Between 0 and 1, represent the uncertainty of the confidence - interval. + The target risk level. + Can be a float, a list of floats, or a ``ArrayLike`` of floats, + between 0 and 1. Lower ``alpha`` produce larger (more conservative) prediction sets. - ``alpha`` is the complement of the target coverage level. - By default ``None``. + By default ``None`` (which means alpha=0.1). delta : Optional[float] Can be a float, or ``None``. If using method="rcps", then it @@ -692,7 +669,10 @@ def predict( self.n_obs = len(self.risks) self.r_hat = self.risks.mean(axis=0) self.valid_index = ltt_procedure( - self.r_hat, alpha_np, cast(float, delta), self.n_obs + np.expand_dims(self.r_hat, axis=0), + np.expand_dims(alpha_np, axis=0), + cast(float, delta), + np.expand_dims(np.array([self.n_obs]), axis=0) ) self._check_valid_index(alpha_np) self.lambdas_star, self.r_star = find_lambda_control_star( @@ -888,7 +868,7 @@ class BinaryClassificationController: predict_proba method of a fitted binary classifier. Its output signature must be of shape (len(X), 2) - risk : BinaryClassificationRisk + risk : Union[BinaryClassificationRisk, List[BinaryClassificationRisk]] The risk or performance metric to control. Valid options: @@ -896,8 +876,12 @@ class BinaryClassificationController: accuracy, false_positive_rate) - A custom instance of BinaryClassificationRisk object - target_level : float + Can be a list of risks in the case of multi risk control. + + target_level : Union[float, List[float]] The maximum risk level (or minimum performance level). Must be between 0 and 1. + Can be a list of target levels in the case of multi risk control (length should + match the length of the risks list). confidence_level : float, default=0.9 The confidence level with which the risk (or performance) is controlled. @@ -973,18 +957,19 @@ class BinaryClassificationController: def __init__( self, predict_function: Callable[[ArrayLike], NDArray], - risk: BinaryClassificationRisk, - target_level: float, + risk: Union[BinaryClassificationRisk, List[BinaryClassificationRisk]], + target_level: Union[float, List[float]], confidence_level: float = 0.9, best_predict_param_choice: Union[ Literal["auto"], BinaryClassificationRisk] = "auto", ): + self.is_multi_risk = self._check_if_multi_risk_control(risk, target_level) self._predict_function = predict_function - self._risk = risk - if self._risk.higher_is_better: - self._alpha = 1 - target_level - else: - self._alpha = target_level + self._risk = risk if isinstance(risk, list) else [risk] + target_level_list = ( + target_level if isinstance(target_level, list) else [target_level] + ) + self._alpha = self._convert_target_level_to_alpha(target_level_list) self._delta = 1 - confidence_level self._best_predict_param_choice = self._set_best_predict_param_choice( @@ -1029,20 +1014,16 @@ def calibrate( # pragma: no cover self._predict_params ) - risks_and_eff_sizes = self._get_risks_and_effective_sample_sizes_per_param( + risk_values, eff_sample_sizes = self._get_risk_values_and_eff_sample_sizes( y_calibrate_, predictions_per_param, self._risk ) - - risks_per_param = risks_and_eff_sizes[:, 0] - eff_sample_sizes_per_param = risks_and_eff_sizes[:, 1] - valid_params_index = ltt_procedure( - risks_per_param, - np.array([self._alpha]), + risk_values, + np.expand_dims(self._alpha, axis=1), self._delta, - eff_sample_sizes_per_param, + eff_sample_sizes, True, )[0] @@ -1082,7 +1063,7 @@ def predict(self, X_test: ArrayLike) -> NDArray: raise ValueError( "Cannot predict. " "Either you forgot to calibrate the controller first, " - "either calibration was not successful." + "or calibration was not successful." ) return self._get_predictions_per_param( X_test, @@ -1095,16 +1076,20 @@ def _set_best_predict_param_choice( Literal["auto"], BinaryClassificationRisk] = "auto", ) -> BinaryClassificationRisk: if best_predict_param_choice == "auto": - try: - return self._best_predict_param_choice_map[ - self._risk - ] - except KeyError: - raise ValueError( - "When best_predict_param_choice is 'auto', " - "risk must be one of the risks defined in mapie.risk_control" - "(e.g. precision, accuracy, false_positive_rate)." - ) + if self.is_multi_risk: + # when multi risk, we minimize the first risk in the list + return self._risk[0] + else: + try: + return self._best_predict_param_choice_map[ + self._risk[0] + ] + except KeyError: + raise ValueError( + "When best_predict_param_choice is 'auto', " + "risk must be one of the risks defined in mapie.risk_control" + "(e.g. precision, accuracy, false_positive_rate)." + ) else: return best_predict_param_choice @@ -1122,29 +1107,37 @@ def _set_best_predict_param( predictions_per_param: NDArray, valid_params_index: List[Any], ): - secondary_risks_per_param = \ - self._get_risks_and_effective_sample_sizes_per_param( + secondary_risks_per_param, _ = self._get_risk_values_and_eff_sample_sizes( y_calibrate_, predictions_per_param[valid_params_index], - self._best_predict_param_choice - )[:, 0] + [self._best_predict_param_choice] + ) self.best_predict_param = self.valid_predict_params[ np.argmin(secondary_risks_per_param) ] @staticmethod - def _get_risks_and_effective_sample_sizes_per_param( + def _get_risk_values_and_eff_sample_sizes( y_true: NDArray, predictions_per_param: NDArray, - risk: BinaryClassificationRisk, - ) -> NDArray: - return np.array( - [risk.get_value_and_effective_sample_size( - y_true, - predictions - ) for predictions in predictions_per_param] - ) + risks: List[BinaryClassificationRisk], + ) -> Tuple[NDArray, NDArray]: + """ + Compute the values of risks and effective sample sizes for multiple risks + and for multiple parameter values. + Returns arrays with shape (n_risks, n_params). + """ + risks_values_and_eff_sizes = np.array([ + [risk.get_value_and_effective_sample_size(y_true, predictions) + for predictions in predictions_per_param] + for risk in risks + ]) + + risk_values = risks_values_and_eff_sizes[:, :, 0] + effective_sample_sizes = risks_values_and_eff_sizes[:, :, 1] + + return risk_values, effective_sample_sizes def _get_predictions_per_param(self, X: ArrayLike, params: NDArray) -> NDArray: try: @@ -1171,3 +1164,42 @@ def _get_predictions_per_param(self, X: ArrayLike, params: NDArray) -> NDArray: else: raise return (predictions_proba[:, np.newaxis] >= params).T.astype(int) + + def _convert_target_level_to_alpha(self, target_level: List[float]) -> NDArray: + alpha = [] + for risk, target in zip(self._risk, target_level): + if risk.higher_is_better: + alpha.append(1 - target) + else: + alpha.append(target) + return np.array(alpha) + + @staticmethod + def _check_if_multi_risk_control( + risk: Union[BinaryClassificationRisk, List[BinaryClassificationRisk]], + target_level: Union[float, List[float]], + ) -> bool: + """ + Check if we are in a multi risk setting and if inputs types are correct. + """ + if ( + isinstance(risk, list) and isinstance(target_level, list) + and len(risk) == len(target_level) + and len(risk) > 0 + ): + if len(risk) == 1: + return False + else: + return True + elif ( + isinstance(risk, BinaryClassificationRisk) + and isinstance(target_level, float) + ): + return False + else: + raise ValueError( + "If you provide a list of risks, you must provide " + "a list of target levels of the same length and vice versa. " + "If you provide a single BinaryClassificationRisk risk, " + "you must provide a single float target level." + ) diff --git a/mapie/tests/README.md b/mapie/tests/README.md new file mode 100644 index 000000000..a00e7a90f --- /dev/null +++ b/mapie/tests/README.md @@ -0,0 +1,29 @@ +# Overall recommendations + +- Group tests in a class if more than one test is needed for a given function/functionality +- Prefer black-box tests (no mocks) if possible, to avoid testing implementation details. +- Avoid unnecessary comments/docstrings: the code must be self-explanatory as much as possible. + +# Unit tests + +## Scope + +Testing one function, method, or functionality + +## Recommendations + +- Focus on the function goal to define the test cases. +- Testing corner cases is not mandatory. Sometimes we prefer a function to fail rather than being robust to unwanted scenarii. +- Unit tests on their own should provide a coverage close to 100%. + +# Functional or end-to-end tests + +## Scope + +Testing the main functionalities of the API as seen from a user point-of-view, or testing behaviors hard to test in a unit style. + +## Recommendations + +- Such tests here should be added wisely (they take usually more time to run) +- Be careful of test time. Testing few _varied_ scenarios is more important than trying to test _all_ scenarios. +- This is not implemented yet, but ideally those tests should not count against coverage. diff --git a/mapie/tests/risk_control/test_binary_classification_control.py b/mapie/tests/risk_control/test_binary_classification_control.py index 36dcbb5b9..4a9662c82 100644 --- a/mapie/tests/risk_control/test_binary_classification_control.py +++ b/mapie/tests/risk_control/test_binary_classification_control.py @@ -1,4 +1,5 @@ from copy import deepcopy +from typing import List, Union import numpy as np import pandas as pd @@ -151,15 +152,29 @@ def test_auto_unknown_risk(self): best_predict_param_choice="auto" ) + def test_multi_risk_auto(self): + """Test _set_best_predict_param_choice with 'auto' mode for multiple risks.""" + first_risk = precision + controller = BinaryClassificationController( + predict_function=dummy_predict, + risk=[first_risk, recall], + target_level=[dummy_target, dummy_target], + best_predict_param_choice="auto" + ) + + result = controller._best_predict_param_choice + assert result is first_risk + @pytest.mark.parametrize( "risk_instance,target_level,expected_alpha", [ (recall, 0.6, 0.4), # higher_is_better=True (false_positive_rate, 0.6, 0.6), # higher_is_better=False + ([recall, false_positive_rate], [0.7, 0.8], [0.3, 0.8]), # multi-risk ], ) -def test_binary_classification_controller_alpha( +def test_binary_classification__convert_target_level_to_alpha( risk_instance: BinaryClassificationRisk, target_level: float, expected_alpha: float, @@ -169,7 +184,7 @@ def test_binary_classification_controller_alpha( risk=risk_instance, target_level=target_level, ) - assert np.isclose(controller._alpha, expected_alpha) + assert np.isclose(controller._alpha, expected_alpha).all() def test_binary_classification_controller_sklearn_pipeline_with_dataframe() -> None: @@ -406,3 +421,71 @@ def test_error(self, bcc_dummy): match=r"Cannot predict" ): controller.predict(dummy_X) + + +class TestCheckIfMultiRiskControl: + def test_mono_risk(self, bcc_deterministic: BinaryClassificationController): + is_multi_risk = bcc_deterministic._check_if_multi_risk_control( + precision, dummy_target + ) + assert not is_multi_risk + + def test_mono_risk_list(self, bcc_deterministic: BinaryClassificationController): + is_multi_risk = bcc_deterministic._check_if_multi_risk_control( + [precision], [dummy_target] + ) + assert not is_multi_risk + + def test_multi_risk(self, bcc_deterministic: BinaryClassificationController): + is_multi_risk = bcc_deterministic._check_if_multi_risk_control( + [precision, recall], + [dummy_target, dummy_target] + ) + assert is_multi_risk + + @pytest.mark.parametrize( + "risk,target_level", + [ + ([], []), + ([recall, false_positive_rate], 0.6), + (false_positive_rate, [0.6, 0.8]), + ([recall, false_positive_rate], [0.6, 0.8, 0.7]), + ], + ) + def test_error_cases( + self, + risk: Union[List[BinaryClassificationRisk], BinaryClassificationRisk], + target_level: Union[List[float], float] + ): + with pytest.raises(ValueError, match='If you provide a list of risks,'): + BinaryClassificationController._check_if_multi_risk_control( + risk, target_level + ) + + +@pytest.mark.parametrize( + "y_true, y_pred", + [ + (np.array([1, 0, 1, 0]), np.array([1, 1, 0, 0])), + (np.array([1, 1, 0, 0]), np.array([1, 1, 1, 0])), + (np.array([0, 0, 0, 0]), np.array([0, 1, 0, 1])), + ], +) +def test_get_risk_values_and_eff_sample_sizes( + y_true: NDArray, y_pred: NDArray +): + risk_list = [precision, recall, false_positive_rate] + + bcc = BinaryClassificationController( + predict_function=deterministic_predict_function, + risk=risk_list, + target_level=[dummy_target] * len(risk_list), + ) + all_values, all_n = bcc._get_risk_values_and_eff_sample_sizes( + y_true, y_pred[np.newaxis, :], risk_list + ) + + for i, risk in enumerate(risk_list): + value, n = risk.get_value_and_effective_sample_size(y_true, y_pred) + assert np.isclose(all_values[i], value) + assert all_n[i] == n diff --git a/mapie/tests/risk_control/test_control_risk.py b/mapie/tests/risk_control/test_control_risk.py index 3e74c9a73..1906cd8b6 100644 --- a/mapie/tests/risk_control/test_control_risk.py +++ b/mapie/tests/risk_control/test_control_risk.py @@ -45,11 +45,11 @@ [0., 1.] ]) -r_hat = np.array([0.5, 0.8]) +r_hat = np.array([[0.5, 0.8]]) -n = 1100 +n = np.array([[1100]]) -alpha = np.array([0.6]) +alpha = np.array([[0.6]]) valid_index = [[0, 1]] @@ -129,7 +129,7 @@ def test_compute_precision_with_wrong_shape() -> None: @pytest.mark.parametrize("alpha", [0.5, [0.5], [0.5, 0.9]]) def test_p_values_different_alpha(alpha: Union[float, NDArray]) -> None: """Test type for different alpha for p_values""" - result = compute_hoeffding_bentkus_p_value(r_hat, n, alpha) + result = compute_hoeffding_bentkus_p_value(r_hat[0], n[0], alpha) assert isinstance(result, np.ndarray) @@ -145,7 +145,7 @@ def test_find_lambda_control_star() -> None: @pytest.mark.parametrize("delta", [0.1, 0.8]) -@pytest.mark.parametrize("alpha", [[0.5], [0.6, 0.8]]) +@pytest.mark.parametrize("alpha", [np.array([[0.5]]), np.array([[0.6, 0.8]])]) def test_ltt_type_output_alpha_delta( alpha: NDArray, delta: float @@ -165,7 +165,7 @@ def test_warning_valid_index_empty() -> None: """Test warning sent when empty list""" valid_index = [[]] # type: List[List[int]] with pytest.warns( - UserWarning, match=r".*At least one sequence is empty*" + UserWarning, match=r".*Warning: the risk couldn'*" ): find_lambda_control_star(r_hat, valid_index, lambdas) @@ -212,8 +212,31 @@ def test_ltt_procedure_n_obs_negative() -> None: a loss, is undefined because the condition is never met. This should return an invalid lambda. """ - r_hat = np.array([0.5]) - n_obs = np.array([-1]) - alpha_np = np.array([0.6]) + r_hat = np.array([[0.5]]) + n_obs = np.array([[-1]]) + alpha_np = np.array([[0.6]]) binary = True assert ltt_procedure(r_hat, alpha_np, 0.1, n_obs, binary) == [[]] + + +def test_ltt_multi_risk() -> None: + """Test _ltt_procedure for multi risk scenario""" + assert ltt_procedure( + np.repeat(r_hat, 2, axis=0), + np.repeat(alpha, 2, axis=0), + 0.1, + np.repeat(n, 2, axis=0) + ) + + +def test_ltt_multi_risk_error() -> None: + """Test _ltt_procedure for multi risk scenario error where n_risks differ""" + with pytest.raises( + ValueError, match=r"r_hat, n_obs, and alpha_np must have the same length." + ): + ltt_procedure( + np.repeat(r_hat, 2, axis=0), + np.repeat(alpha, 1, axis=0), + 0.1, + np.repeat(n, 2, axis=0) + )