Skip to content

Commit d3bb775

Browse files
fix: test failure fixes for v0.15.1 (#1358)
* fix: make FakeObject be the correct standard and robustify usage * fix: get none valued study object from server * add: test for minio download failures * fix: skip test for WSL as it is not supported * maint: rework if/else case workflow * maint: ruff fix * add/fix: log messages for no premission * fix: make flow name unique and enable testing of avoiding duplicates
1 parent c1911c7 commit d3bb775

File tree

7 files changed

+75
-31
lines changed

7 files changed

+75
-31
lines changed

openml/_api_calls.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,8 @@ def _download_minio_bucket(source: str, destination: str | Path) -> None:
208208
for file_object in client.list_objects(bucket, prefix=prefix, recursive=True):
209209
if file_object.object_name is None:
210210
raise ValueError(f"Object name is None for object {file_object!r}")
211+
if file_object.etag is None:
212+
raise ValueError(f"Object etag is None for object {file_object!r}")
211213

212214
marker = destination / file_object.etag
213215
if marker.exists():

openml/config.py

Lines changed: 9 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -345,7 +345,10 @@ def _setup(config: _Config | None = None) -> None:
345345
if not config_dir.exists():
346346
config_dir.mkdir(exist_ok=True, parents=True)
347347
except PermissionError:
348-
pass
348+
openml_logger.warning(
349+
f"No permission to create OpenML directory at {config_dir}!"
350+
" This can result in OpenML-Python not working properly."
351+
)
349352

350353
if config is None:
351354
config = _parse_config(config_file)
@@ -367,27 +370,16 @@ def _setup(config: _Config | None = None) -> None:
367370

368371
try:
369372
cache_exists = _root_cache_directory.exists()
370-
except PermissionError:
371-
cache_exists = False
372-
373-
# create the cache subdirectory
374-
try:
375-
if not _root_cache_directory.exists():
373+
# create the cache subdirectory
374+
if not cache_exists:
376375
_root_cache_directory.mkdir(exist_ok=True, parents=True)
376+
_create_log_handlers()
377377
except PermissionError:
378378
openml_logger.warning(
379-
f"No permission to create openml cache directory at {_root_cache_directory}!"
380-
" This can result in OpenML-Python not working properly.",
379+
f"No permission to create OpenML directory at {_root_cache_directory}!"
380+
" This can result in OpenML-Python not working properly."
381381
)
382-
383-
if cache_exists:
384-
_create_log_handlers()
385-
else:
386382
_create_log_handlers(create_file_handler=False)
387-
openml_logger.warning(
388-
f"No permission to create OpenML directory at {config_dir}! This can result in "
389-
" OpenML-Python not working properly.",
390-
)
391383

392384

393385
def set_field_in_config_file(field: str, value: Any) -> None:

openml/runs/functions.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -206,9 +206,6 @@ def run_flow_on_task( # noqa: C901, PLR0912, PLR0915, PLR0913
206206
avoid_duplicate_runs : bool, optional (default=True)
207207
If True, the run will throw an error if the setup/task combination is already present on
208208
the server. This feature requires an internet connection.
209-
avoid_duplicate_runs : bool, optional (default=True)
210-
If True, the run will throw an error if the setup/task combination is already present on
211-
the server. This feature requires an internet connection.
212209
flow_tags : List[str], optional (default=None)
213210
A list of tags that the flow should have at creation.
214211
seed: int, optional (default=None)

openml/study/functions.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ def get_study(
7878
return study
7979

8080

81-
def _get_study(id_: int | str, entity_type: str) -> BaseStudy:
81+
def _get_study(id_: int | str, entity_type: str) -> BaseStudy: # noqa: C901
8282
xml_string = openml._api_calls._perform_api_call(f"study/{id_}", "get")
8383
force_list_tags = (
8484
"oml:data_id",
@@ -93,6 +93,12 @@ def _get_study(id_: int | str, entity_type: str) -> BaseStudy:
9393
alias = result_dict.get("oml:alias", None)
9494
main_entity_type = result_dict["oml:main_entity_type"]
9595

96+
# Parses edge cases where the server returns a string with a newline character for empty values.
97+
none_value_indicator = "\n "
98+
for key in result_dict:
99+
if result_dict[key] == none_value_indicator:
100+
result_dict[key] = None
101+
96102
if entity_type != main_entity_type:
97103
raise ValueError(
98104
f"Unexpected entity type '{main_entity_type}' reported by the server"

tests/test_openml/test_api_calls.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,12 @@ def test_retry_on_database_error(self, Session_class_mock, _):
3636

3737
assert Session_class_mock.return_value.__enter__.return_value.get.call_count == 20
3838

39+
3940
class FakeObject(NamedTuple):
4041
object_name: str
42+
etag: str
43+
"""We use the etag of a Minio object as the name of a marker if we already downloaded it."""
44+
4145

4246
class FakeMinio:
4347
def __init__(self, objects: Iterable[FakeObject] | None = None):
@@ -60,7 +64,7 @@ def test_download_all_files_observes_cache(mock_minio, tmp_path: Path) -> None:
6064
some_url = f"https://not.real.com/bucket/{some_object_path}"
6165
mock_minio.return_value = FakeMinio(
6266
objects=[
63-
FakeObject(some_object_path),
67+
FakeObject(object_name=some_object_path, etag=str(hash(some_object_path))),
6468
],
6569
)
6670

@@ -71,3 +75,27 @@ def test_download_all_files_observes_cache(mock_minio, tmp_path: Path) -> None:
7175
time_modified = (tmp_path / some_filename).stat().st_mtime
7276

7377
assert time_created == time_modified
78+
79+
80+
@mock.patch.object(minio, "Minio")
81+
def test_download_minio_failure(mock_minio, tmp_path: Path) -> None:
82+
some_prefix, some_filename = "some/prefix", "dataset.arff"
83+
some_object_path = f"{some_prefix}/{some_filename}"
84+
some_url = f"https://not.real.com/bucket/{some_object_path}"
85+
mock_minio.return_value = FakeMinio(
86+
objects=[
87+
FakeObject(object_name=None, etag="tmp"),
88+
],
89+
)
90+
91+
with pytest.raises(ValueError):
92+
_download_minio_bucket(source=some_url, destination=tmp_path)
93+
94+
mock_minio.return_value = FakeMinio(
95+
objects=[
96+
FakeObject(object_name="tmp", etag=None),
97+
],
98+
)
99+
100+
with pytest.raises(ValueError):
101+
_download_minio_bucket(source=some_url, destination=tmp_path)

tests/test_openml/test_config.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from copy import copy
99
from typing import Any, Iterator
1010
from pathlib import Path
11+
import platform
1112

1213
import pytest
1314

@@ -37,14 +38,18 @@ class TestConfig(openml.testing.TestBase):
3738
@unittest.mock.patch("openml.config.openml_logger.warning")
3839
@unittest.mock.patch("openml.config._create_log_handlers")
3940
@unittest.skipIf(os.name == "nt", "https://github.com/openml/openml-python/issues/1033")
41+
@unittest.skipIf(
42+
platform.uname().release.endswith(("-Microsoft", "microsoft-standard-WSL2")),
43+
"WSL does nto support chmod as we would need here, see https://github.com/microsoft/WSL/issues/81",
44+
)
4045
def test_non_writable_home(self, log_handler_mock, warnings_mock):
4146
with tempfile.TemporaryDirectory(dir=self.workdir) as td:
4247
os.chmod(td, 0o444)
4348
_dd = copy(openml.config._defaults)
4449
_dd["cachedir"] = Path(td) / "something-else"
4550
openml.config._setup(_dd)
4651

47-
assert warnings_mock.call_count == 2
52+
assert warnings_mock.call_count == 1
4853
assert log_handler_mock.call_count == 1
4954
assert not log_handler_mock.call_args_list[0][1]["create_file_handler"]
5055
assert openml.config._root_cache_directory == Path(td) / "something-else"

tests/test_runs/test_run_functions.py

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,6 @@ def _wait_for_processed_run(self, run_id, max_waiting_time_seconds):
119119
# time.time() works in seconds
120120
start_time = time.time()
121121
while time.time() - start_time < max_waiting_time_seconds:
122-
123122
try:
124123
openml.runs.get_run_trace(run_id)
125124
except openml.exceptions.OpenMLServerException:
@@ -131,7 +130,9 @@ def _wait_for_processed_run(self, run_id, max_waiting_time_seconds):
131130
time.sleep(10)
132131
continue
133132

134-
assert len(run.evaluations) > 0, "Expect not-None evaluations to always contain elements."
133+
assert (
134+
len(run.evaluations) > 0
135+
), "Expect not-None evaluations to always contain elements."
135136
return
136137

137138
raise RuntimeError(
@@ -557,7 +558,7 @@ def determine_grid_size(param_grid):
557558
fold_evaluations=run.fold_evaluations,
558559
num_repeats=1,
559560
num_folds=num_folds,
560-
task_type=task_type
561+
task_type=task_type,
561562
)
562563

563564
# Check if run string and print representation do not run into an error
@@ -796,7 +797,9 @@ def test_run_and_upload_knn_pipeline(self, warnings_mock):
796797

797798
@pytest.mark.sklearn()
798799
def test_run_and_upload_gridsearch(self):
799-
estimator_name = "base_estimator" if Version(sklearn.__version__) < Version("1.4") else "estimator"
800+
estimator_name = (
801+
"base_estimator" if Version(sklearn.__version__) < Version("1.4") else "estimator"
802+
)
800803
gridsearch = GridSearchCV(
801804
BaggingClassifier(**{estimator_name: SVC()}),
802805
{f"{estimator_name}__C": [0.01, 0.1, 10], f"{estimator_name}__gamma": [0.01, 0.1, 10]},
@@ -1826,7 +1829,9 @@ def test_joblib_backends(self, parallel_mock):
18261829
num_instances = x.shape[0]
18271830
line_length = 6 + len(task.class_labels)
18281831

1829-
backend_choice = "loky" if Version(joblib.__version__) > Version("0.11") else "multiprocessing"
1832+
backend_choice = (
1833+
"loky" if Version(joblib.__version__) > Version("0.11") else "multiprocessing"
1834+
)
18301835
for n_jobs, backend, call_count in [
18311836
(1, backend_choice, 10),
18321837
(2, backend_choice, 10),
@@ -1877,14 +1882,23 @@ def test_joblib_backends(self, parallel_mock):
18771882
reason="SimpleImputer doesn't handle mixed type DataFrame as input",
18781883
)
18791884
def test_delete_run(self):
1880-
rs = 1
1885+
rs = np.random.randint(1, 2**32 - 1)
18811886
clf = sklearn.pipeline.Pipeline(
1882-
steps=[("imputer", SimpleImputer()), ("estimator", DecisionTreeClassifier())],
1887+
steps=[
1888+
(f"test_server_imputer_{rs}", SimpleImputer()),
1889+
("estimator", DecisionTreeClassifier()),
1890+
],
18831891
)
18841892
task = openml.tasks.get_task(32) # diabetes; crossvalidation
18851893

1886-
run = openml.runs.run_model_on_task(model=clf, task=task, seed=rs)
1894+
run = openml.runs.run_model_on_task(
1895+
model=clf, task=task, seed=rs, avoid_duplicate_runs=False
1896+
)
18871897
run.publish()
1898+
1899+
with pytest.raises(openml.exceptions.OpenMLRunsExistError):
1900+
openml.runs.run_model_on_task(model=clf, task=task, seed=rs, avoid_duplicate_runs=True)
1901+
18881902
TestBase._mark_entity_for_removal("run", run.run_id)
18891903
TestBase.logger.info(f"collected from test_run_functions: {run.run_id}")
18901904

0 commit comments

Comments
 (0)