From 8a07c58961a3d40369bc420a98fd20af0df5471b Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 28 May 2025 09:13:41 -0500 Subject: [PATCH 01/88] start using datetimes with fractional seconds with mysql --- .../mlos_bench/storage/sql/experiment.py | 6 +--- mlos_bench/mlos_bench/storage/sql/schema.py | 31 +++++++++++++++---- 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/experiment.py b/mlos_bench/mlos_bench/storage/sql/experiment.py index acc2a497b4..7a6dbf75b4 100644 --- a/mlos_bench/mlos_bench/storage/sql/experiment.py +++ b/mlos_bench/mlos_bench/storage/sql/experiment.py @@ -367,11 +367,7 @@ def _new_trial( ts_start: datetime | None = None, config: dict[str, Any] | None = None, ) -> Storage.Trial: - # MySQL can round microseconds into the future causing scheduler to skip trials. - # Truncate microseconds to avoid this issue. - ts_start = utcify_timestamp(ts_start or datetime.now(UTC), origin="local").replace( - microsecond=0 - ) + ts_start = utcify_timestamp(ts_start or datetime.now(UTC), origin="local") _LOG.debug("Create trial: %s:%d @ %s", self._experiment_id, self._trial_id, ts_start) with self._engine.begin() as conn: try: diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index 2bc00f0082..3cbb63bcc4 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -39,6 +39,7 @@ create_mock_engine, inspect, ) +from sqlalchemy.dialects import mysql from sqlalchemy.engine import Engine from mlos_bench.util import path_join @@ -104,8 +105,8 @@ def __init__(self, engine: Engine | None): Column("git_repo", String(1024), nullable=False), Column("git_commit", String(40), nullable=False), # For backwards compatibility, we allow NULL for ts_start. - Column("ts_start", DateTime), - Column("ts_end", DateTime), + Column("ts_start", DateTime().with_variant(mysql.DATETIME(fsp=6), "mysql")), + Column("ts_end", DateTime().with_variant(mysql.DATETIME(fsp=6), "mysql")), # Should match the text IDs of `mlos_bench.environments.Status` enum: # For backwards compatibility, we allow NULL for status. Column("status", String(self._STATUS_LEN)), @@ -179,8 +180,16 @@ def __init__(self, engine: Engine | None): Column("trial_id", Integer, nullable=False), Column("config_id", Integer, nullable=False), Column("trial_runner_id", Integer, nullable=True, default=None), - Column("ts_start", DateTime, nullable=False), - Column("ts_end", DateTime), + Column( + "ts_start", + DateTime().with_variant(mysql.DATETIME(fsp=6), "mysql"), + nullable=False, + ), + Column( + "ts_end", + DateTime().with_variant(mysql.DATETIME(fsp=6), "mysql"), + nullable=True, + ), # Should match the text IDs of `mlos_bench.environments.Status` enum: Column("status", String(self._STATUS_LEN), nullable=False), PrimaryKeyConstraint("exp_id", "trial_id"), @@ -232,7 +241,12 @@ def __init__(self, engine: Engine | None): self._meta, Column("exp_id", String(self._ID_LEN), nullable=False), Column("trial_id", Integer, nullable=False), - Column("ts", DateTime(timezone=True), nullable=False, default="now"), + Column( + "ts", + DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + nullable=False, + default="now", + ), Column("status", String(self._STATUS_LEN), nullable=False), UniqueConstraint("exp_id", "trial_id", "ts"), ForeignKeyConstraint( @@ -267,7 +281,12 @@ def __init__(self, engine: Engine | None): self._meta, Column("exp_id", String(self._ID_LEN), nullable=False), Column("trial_id", Integer, nullable=False), - Column("ts", DateTime(timezone=True), nullable=False, default="now"), + Column( + "ts", + DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + nullable=False, + default="now", + ), Column("metric_id", String(self._ID_LEN), nullable=False), Column("metric_value", String(self._METRIC_VALUE_LEN)), UniqueConstraint("exp_id", "trial_id", "ts", "metric_id"), From d6fc9e279bdf0e5c932640ef58617d08a6b6bb00 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 28 May 2025 09:18:40 -0500 Subject: [PATCH 02/88] adjust tests to also check for fractional time --- .../tests/environments/local/composite_local_env_test.py | 5 ++--- .../tests/environments/local/local_env_telemetry_test.py | 8 ++------ 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/environments/local/composite_local_env_test.py b/mlos_bench/mlos_bench/tests/environments/local/composite_local_env_test.py index 033fa16330..64f1df273c 100644 --- a/mlos_bench/mlos_bench/tests/environments/local/composite_local_env_test.py +++ b/mlos_bench/mlos_bench/tests/environments/local/composite_local_env_test.py @@ -17,8 +17,8 @@ def _format_str(zone_info: tzinfo | None) -> str: if zone_info is not None: - return "%Y-%m-%d %H:%M:%S %z" - return "%Y-%m-%d %H:%M:%S" + return "%Y-%m-%d %H:%M:%S.%f %z" + return "%Y-%m-%d %H:%M:%S.%f" # FIXME: This fails with zone_info = None when run with `TZ="America/Chicago pytest -n0 ...` @@ -34,7 +34,6 @@ def test_composite_env(tunable_groups: TunableGroups, zone_info: tzinfo | None) See Also: http://github.com/microsoft/MLOS/issues/501 """ ts1 = datetime.now(zone_info) - ts1 -= timedelta(microseconds=ts1.microsecond) # Round to a second ts2 = ts1 + timedelta(minutes=2) format_str = _format_str(zone_info) diff --git a/mlos_bench/mlos_bench/tests/environments/local/local_env_telemetry_test.py b/mlos_bench/mlos_bench/tests/environments/local/local_env_telemetry_test.py index a654ed1f34..2bc789485a 100644 --- a/mlos_bench/mlos_bench/tests/environments/local/local_env_telemetry_test.py +++ b/mlos_bench/mlos_bench/tests/environments/local/local_env_telemetry_test.py @@ -16,8 +16,8 @@ def _format_str(zone_info: tzinfo | None) -> str: if zone_info is not None: - return "%Y-%m-%d %H:%M:%S %z" - return "%Y-%m-%d %H:%M:%S" + return "%Y-%m-%d %H:%M:%S.%f %z" + return "%Y-%m-%d %H:%M:%S.%f" # FIXME: This fails with zone_info = None when run with `TZ="America/Chicago pytest -n0 ...` @@ -25,7 +25,6 @@ def _format_str(zone_info: tzinfo | None) -> str: def test_local_env_telemetry(tunable_groups: TunableGroups, zone_info: tzinfo | None) -> None: """Produce benchmark and telemetry data in a local script and read it.""" ts1 = datetime.now(zone_info) - ts1 -= timedelta(microseconds=ts1.microsecond) # Round to a second ts2 = ts1 + timedelta(minutes=1) format_str = _format_str(zone_info) @@ -77,7 +76,6 @@ def test_local_env_telemetry_no_header( ) -> None: """Read the telemetry data with no header.""" ts1 = datetime.now(zone_info) - ts1 -= timedelta(microseconds=ts1.microsecond) # Round to a second ts2 = ts1 + timedelta(minutes=1) format_str = _format_str(zone_info) @@ -121,7 +119,6 @@ def test_local_env_telemetry_wrong_header( ) -> None: """Read the telemetry data with incorrect header.""" ts1 = datetime.now(zone_info) - ts1 -= timedelta(microseconds=ts1.microsecond) # Round to a second ts2 = ts1 + timedelta(minutes=1) format_str = _format_str(zone_info) @@ -150,7 +147,6 @@ def test_local_env_telemetry_invalid(tunable_groups: TunableGroups) -> None: """Fail when the telemetry data has wrong format.""" zone_info = UTC ts1 = datetime.now(zone_info) - ts1 -= timedelta(microseconds=ts1.microsecond) # Round to a second ts2 = ts1 + timedelta(minutes=1) format_str = _format_str(zone_info) From 65e7f08578ede928d5bb5ca16b499f7276f05e57 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 28 May 2025 09:19:17 -0500 Subject: [PATCH 03/88] Revert "start using datetimes with fractional seconds with mysql" This reverts commit 8a07c58961a3d40369bc420a98fd20af0df5471b. --- .../mlos_bench/storage/sql/experiment.py | 6 +++- mlos_bench/mlos_bench/storage/sql/schema.py | 31 ++++--------------- 2 files changed, 11 insertions(+), 26 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/experiment.py b/mlos_bench/mlos_bench/storage/sql/experiment.py index 7a6dbf75b4..acc2a497b4 100644 --- a/mlos_bench/mlos_bench/storage/sql/experiment.py +++ b/mlos_bench/mlos_bench/storage/sql/experiment.py @@ -367,7 +367,11 @@ def _new_trial( ts_start: datetime | None = None, config: dict[str, Any] | None = None, ) -> Storage.Trial: - ts_start = utcify_timestamp(ts_start or datetime.now(UTC), origin="local") + # MySQL can round microseconds into the future causing scheduler to skip trials. + # Truncate microseconds to avoid this issue. + ts_start = utcify_timestamp(ts_start or datetime.now(UTC), origin="local").replace( + microsecond=0 + ) _LOG.debug("Create trial: %s:%d @ %s", self._experiment_id, self._trial_id, ts_start) with self._engine.begin() as conn: try: diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index 3cbb63bcc4..2bc00f0082 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -39,7 +39,6 @@ create_mock_engine, inspect, ) -from sqlalchemy.dialects import mysql from sqlalchemy.engine import Engine from mlos_bench.util import path_join @@ -105,8 +104,8 @@ def __init__(self, engine: Engine | None): Column("git_repo", String(1024), nullable=False), Column("git_commit", String(40), nullable=False), # For backwards compatibility, we allow NULL for ts_start. - Column("ts_start", DateTime().with_variant(mysql.DATETIME(fsp=6), "mysql")), - Column("ts_end", DateTime().with_variant(mysql.DATETIME(fsp=6), "mysql")), + Column("ts_start", DateTime), + Column("ts_end", DateTime), # Should match the text IDs of `mlos_bench.environments.Status` enum: # For backwards compatibility, we allow NULL for status. Column("status", String(self._STATUS_LEN)), @@ -180,16 +179,8 @@ def __init__(self, engine: Engine | None): Column("trial_id", Integer, nullable=False), Column("config_id", Integer, nullable=False), Column("trial_runner_id", Integer, nullable=True, default=None), - Column( - "ts_start", - DateTime().with_variant(mysql.DATETIME(fsp=6), "mysql"), - nullable=False, - ), - Column( - "ts_end", - DateTime().with_variant(mysql.DATETIME(fsp=6), "mysql"), - nullable=True, - ), + Column("ts_start", DateTime, nullable=False), + Column("ts_end", DateTime), # Should match the text IDs of `mlos_bench.environments.Status` enum: Column("status", String(self._STATUS_LEN), nullable=False), PrimaryKeyConstraint("exp_id", "trial_id"), @@ -241,12 +232,7 @@ def __init__(self, engine: Engine | None): self._meta, Column("exp_id", String(self._ID_LEN), nullable=False), Column("trial_id", Integer, nullable=False), - Column( - "ts", - DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), - nullable=False, - default="now", - ), + Column("ts", DateTime(timezone=True), nullable=False, default="now"), Column("status", String(self._STATUS_LEN), nullable=False), UniqueConstraint("exp_id", "trial_id", "ts"), ForeignKeyConstraint( @@ -281,12 +267,7 @@ def __init__(self, engine: Engine | None): self._meta, Column("exp_id", String(self._ID_LEN), nullable=False), Column("trial_id", Integer, nullable=False), - Column( - "ts", - DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), - nullable=False, - default="now", - ), + Column("ts", DateTime(timezone=True), nullable=False, default="now"), Column("metric_id", String(self._ID_LEN), nullable=False), Column("metric_value", String(self._METRIC_VALUE_LEN)), UniqueConstraint("exp_id", "trial_id", "ts", "metric_id"), From b7dad429bfba4e7ec55ab7e05a1a7b2ef39c11be Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 28 May 2025 09:19:53 -0500 Subject: [PATCH 04/88] Reapply "start using datetimes with fractional seconds with mysql" This reverts commit 65e7f08578ede928d5bb5ca16b499f7276f05e57. --- .../mlos_bench/storage/sql/experiment.py | 6 +--- mlos_bench/mlos_bench/storage/sql/schema.py | 31 +++++++++++++++---- 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/experiment.py b/mlos_bench/mlos_bench/storage/sql/experiment.py index acc2a497b4..7a6dbf75b4 100644 --- a/mlos_bench/mlos_bench/storage/sql/experiment.py +++ b/mlos_bench/mlos_bench/storage/sql/experiment.py @@ -367,11 +367,7 @@ def _new_trial( ts_start: datetime | None = None, config: dict[str, Any] | None = None, ) -> Storage.Trial: - # MySQL can round microseconds into the future causing scheduler to skip trials. - # Truncate microseconds to avoid this issue. - ts_start = utcify_timestamp(ts_start or datetime.now(UTC), origin="local").replace( - microsecond=0 - ) + ts_start = utcify_timestamp(ts_start or datetime.now(UTC), origin="local") _LOG.debug("Create trial: %s:%d @ %s", self._experiment_id, self._trial_id, ts_start) with self._engine.begin() as conn: try: diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index 2bc00f0082..3cbb63bcc4 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -39,6 +39,7 @@ create_mock_engine, inspect, ) +from sqlalchemy.dialects import mysql from sqlalchemy.engine import Engine from mlos_bench.util import path_join @@ -104,8 +105,8 @@ def __init__(self, engine: Engine | None): Column("git_repo", String(1024), nullable=False), Column("git_commit", String(40), nullable=False), # For backwards compatibility, we allow NULL for ts_start. - Column("ts_start", DateTime), - Column("ts_end", DateTime), + Column("ts_start", DateTime().with_variant(mysql.DATETIME(fsp=6), "mysql")), + Column("ts_end", DateTime().with_variant(mysql.DATETIME(fsp=6), "mysql")), # Should match the text IDs of `mlos_bench.environments.Status` enum: # For backwards compatibility, we allow NULL for status. Column("status", String(self._STATUS_LEN)), @@ -179,8 +180,16 @@ def __init__(self, engine: Engine | None): Column("trial_id", Integer, nullable=False), Column("config_id", Integer, nullable=False), Column("trial_runner_id", Integer, nullable=True, default=None), - Column("ts_start", DateTime, nullable=False), - Column("ts_end", DateTime), + Column( + "ts_start", + DateTime().with_variant(mysql.DATETIME(fsp=6), "mysql"), + nullable=False, + ), + Column( + "ts_end", + DateTime().with_variant(mysql.DATETIME(fsp=6), "mysql"), + nullable=True, + ), # Should match the text IDs of `mlos_bench.environments.Status` enum: Column("status", String(self._STATUS_LEN), nullable=False), PrimaryKeyConstraint("exp_id", "trial_id"), @@ -232,7 +241,12 @@ def __init__(self, engine: Engine | None): self._meta, Column("exp_id", String(self._ID_LEN), nullable=False), Column("trial_id", Integer, nullable=False), - Column("ts", DateTime(timezone=True), nullable=False, default="now"), + Column( + "ts", + DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + nullable=False, + default="now", + ), Column("status", String(self._STATUS_LEN), nullable=False), UniqueConstraint("exp_id", "trial_id", "ts"), ForeignKeyConstraint( @@ -267,7 +281,12 @@ def __init__(self, engine: Engine | None): self._meta, Column("exp_id", String(self._ID_LEN), nullable=False), Column("trial_id", Integer, nullable=False), - Column("ts", DateTime(timezone=True), nullable=False, default="now"), + Column( + "ts", + DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + nullable=False, + default="now", + ), Column("metric_id", String(self._ID_LEN), nullable=False), Column("metric_value", String(self._METRIC_VALUE_LEN)), UniqueConstraint("exp_id", "trial_id", "ts", "metric_id"), From 4bf0c5f69966ade2a37cf6665b2385f58853e3e0 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 28 May 2025 16:35:00 -0500 Subject: [PATCH 05/88] apply black on alembic commit --- mlos_bench/mlos_bench/storage/sql/alembic.ini | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/alembic.ini b/mlos_bench/mlos_bench/storage/sql/alembic.ini index 4d2a1120c5..f5a249fd7a 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic.ini +++ b/mlos_bench/mlos_bench/storage/sql/alembic.ini @@ -72,10 +72,10 @@ sqlalchemy.url = sqlite:///mlos_bench.sqlite # detail and examples # format using "black" - use the console_scripts runner, against the "black" entrypoint -# hooks = black -# black.type = console_scripts -# black.entrypoint = black -# black.options = -l 79 REVISION_SCRIPT_FILENAME +hooks = black +black.type = console_scripts +black.entrypoint = black +black.options = REVISION_SCRIPT_FILENAME # lint with attempts to fix using "ruff" - use the exec runner, execute a binary # hooks = ruff From 8088734e20baa181c4eed46fe475a2a4649e6df5 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 28 May 2025 11:18:46 -0500 Subject: [PATCH 06/88] preparing to support testing multiple backend engines for schema changes --- .../mlos_bench/config/storage/mysql.jsonc | 2 +- .../config/storage/postgresql.jsonc | 2 +- mlos_bench/mlos_bench/storage/sql/alembic.ini | 3 + .../mlos_bench/storage/sql/alembic/README.md | 115 +++++++++++++++--- .../tests/storage/test_storage_schemas.py | 1 + 5 files changed, 106 insertions(+), 17 deletions(-) diff --git a/mlos_bench/mlos_bench/config/storage/mysql.jsonc b/mlos_bench/mlos_bench/config/storage/mysql.jsonc index c133b379ba..15986de4c2 100644 --- a/mlos_bench/mlos_bench/config/storage/mysql.jsonc +++ b/mlos_bench/mlos_bench/config/storage/mysql.jsonc @@ -6,7 +6,7 @@ "log_sql": false, // Write all SQL statements to the log. // Parameters below must match kwargs of `sqlalchemy.URL.create()`: "drivername": "mysql+mysqlconnector", - "database": "osat", + "database": "mlos_bench", "username": "root", "password": "PLACERHOLDER PASSWORD", // Comes from global config "host": "localhost", diff --git a/mlos_bench/mlos_bench/config/storage/postgresql.jsonc b/mlos_bench/mlos_bench/config/storage/postgresql.jsonc index cd1214835c..1cff2d76b7 100644 --- a/mlos_bench/mlos_bench/config/storage/postgresql.jsonc +++ b/mlos_bench/mlos_bench/config/storage/postgresql.jsonc @@ -8,7 +8,7 @@ "log_sql": false, // Write all SQL statements to the log. // Parameters below must match kwargs of `sqlalchemy.URL.create()`: "drivername": "postgresql+psycopg2", - "database": "osat", + "database": "mlos_bench", "username": "postgres", "password": "PLACERHOLDER PASSWORD", // Comes from global config "host": "localhost", diff --git a/mlos_bench/mlos_bench/storage/sql/alembic.ini b/mlos_bench/mlos_bench/storage/sql/alembic.ini index f5a249fd7a..6cc1e4b7c8 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic.ini +++ b/mlos_bench/mlos_bench/storage/sql/alembic.ini @@ -63,7 +63,10 @@ version_path_separator = os # Use os.pathsep. Default configuration used for ne # output_encoding = utf-8 # See README.md for details. +# Uncomment one of these: sqlalchemy.url = sqlite:///mlos_bench.sqlite +#sqlalchemy.url = mysql+pymysql://root:password@localhost:3306/mlos_bench +#sqlalchemy.url = postgresql+psycopg2://root:password@localhost:5432/mlos_bench [post_write_hooks] diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/README.md b/mlos_bench/mlos_bench/storage/sql/alembic/README.md index ec35eb70f6..9a9aafe1f4 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/README.md +++ b/mlos_bench/mlos_bench/storage/sql/alembic/README.md @@ -4,49 +4,134 @@ This document contains some notes on how to use [`alembic`](https://alembic.sqla ## Overview -1. Create a blank `mlos_bench.sqlite` database file in the [`mlos_bench/storage/sql`](../) directory with the current schema using the following command: +1. Create a blank database instance in the [`mlos_bench/storage/sql`](../) directory with the current schema using the following command: - ```sh - cd mlos_bench/storage/sql - rm mlos_bench.sqlite - mlos_bench --storage storage/sqlite.jsonc --create-update-storage-schema-only - ``` + This allows `alembic` to automatically generate a migration script from the current schema. + + > NOTE: If your schema changes target a particular backend engine (e.g., using `with_variant`) you will need to use an engine with that config for this step. + > \ + > In the remainder of this document we should some examples for different DB types. + > Pick the one you're targeting and stick with it thru the example. + > You may need to repeat the process several times to test all of them. + > - [ ] TODO: Add scripts to automatically do this for several different backend engines all at once. + + For instance: + + 1. Start a temporary server either as a local file or in a docker instance + + ```sh + # sqlite + cd mlos_bench/storage/sql + rm -f mlos_bench.sqlite + ``` + + ```sh + # mysql + docker run -it --rm --name mysql-alembic --env MYSQL_ROOT_PASSWORD=password --env MYSQL_DATABASE=mlos_bench -p 3306:3306 mysql:latest + ``` - > This allows `alembic` to automatically generate a migration script from the current schema. + ```sh + # postgres + docker run -it --rm --name postgres-alembic --env POSTGRES_PASSWORD=password --env POSTGRES_DB=mlos_bench -p 5432:5432 postgres:latest + ``` -1. Adjust the [`mlos_bench/storage/sql/schema.py`](../schema.py) file to reflect the new desired schema. + 1. Adjust the `sqlalchemy.url` in the [`alembic.ini`](../alembic.ini) file. + + ```ini + # Uncomment one of these. + # See README.md for details. + + #sqlalchemy.url = sqlite:///mlos_bench.sqlite + sqlalchemy.url = mysql+pymysql://root:password@localhost:3306/mlos_bench + #sqlalchemy.url = postgresql+psycopg2://root:password@localhost:5432/mlos_bench + ``` + + 1. Prime the DB schema + + ```sh + # sqlite + mlos_bench --storage storage/sqlite.jsonc --create-update-storage-schema-only --password=password + ``` + + ```sh + # mysql + mlos_bench --storage storage/mysql.jsonc --create-update-storage-schema-only --password=password + ``` + + ```sh + # postgres + mlos_bench --storage storage/postgresql.jsonc --create-update-storage-schema-only --password=password + ``` + +2. Now, adjust the [`mlos_bench/storage/sql/schema.py`](../schema.py) file to reflect the new desired schema. > Keep each change small and atomic. > For example, if you want to add a new column, do that in one change. > If you want to rename a column, do that in another change. -1. Generate a new migration script with the following command: +3. Generate a new migration script with the following command: ```sh - alembic revision --autogenerate -m "Descriptive text about the change." + alembic revision --autogenerate -m "CHANGEME: Descriptive text about the change." ``` -1. Review the generated migration script in the [`mlos_bench/storage/sql/alembic/versions`](./versions/) directory. +4. Review the generated migration script in the [`mlos_bench/storage/sql/alembic/versions`](./versions/) directory. -1. Verify that the migration script works by running the following command: +5. Verify that the migration script works by running the following command: ```sh + # sqlite mlos_bench --storage storage/sqlite.jsonc --create-update-storage-schema-only ``` + ```sh + # mysql: + mlos_bench --storage storage/mysql.jsonc --create-update-storage-schema-only --password=password + ``` + + ```sh + # postgres: + mlos_bench --storage storage/postgresql.jsonc --create-update-storage-schema-only --password=password + ``` + > Normally this would be done with `alembic upgrade head`, but this command is convenient to ensure if will work with the `mlos_bench` command line interface as well. Examine the results using something like: ```sh + # For sqlite: sqlite3 mlos_bench.sqlite .schema sqlite3 mlos_bench.sqlite "SELECT * FROM alembic_version;" ``` -1. If the migration script works, commit the changes to the [`mlos_bench/storage/sql/schema.py`](../schema.py) and [`mlos_bench/storage/sql/alembic/versions`](./versions/) files. + ```sh + # For mysql: + mysql --user root --password=password --host localhost --protocol tcp --database mlos_bench -e "SHOW TABLES; SELECT * FROM alembic_version;" + ``` + + ```sh + # For postgres: + PGPASSWORD=password psql -h localhost -p 5432 -U postgres mlos_bench -c "SELECT table_name FROM information_schema.tables WHERE table_schema='public' and table_catalog='mlos_bench'; SELECT * FROM alembic_version;" + ``` + + > Use different CLI clients for targeting other engines. + +6. If the migration script works, commit the changes to the [`mlos_bench/storage/sql/schema.py`](../schema.py) and [`mlos_bench/storage/sql/alembic/versions`](./versions/) files. > Be sure to update the latest version in the [`test_storage_schemas.py`](../../../tests/storage/test_storage_schemas.py) file as well. -1. Merge that to the `main` branch. +7. Cleanup any server instances you started. + + For instance: + + ```sh + rm mlos_bench/storage/sql/mlos_bench.sqlite + ``` + + ```sh + docker kill mysql-alembic + ``` + +8. Merge that to the `main` branch. -1. Might be good to cut a new `mlos_bench` release at this point as well. +9. Might be good to cut a new `mlos_bench` release at this point as well. diff --git a/mlos_bench/mlos_bench/tests/storage/test_storage_schemas.py b/mlos_bench/mlos_bench/tests/storage/test_storage_schemas.py index 8a6c36e6bb..1cbdaa9c6a 100644 --- a/mlos_bench/mlos_bench/tests/storage/test_storage_schemas.py +++ b/mlos_bench/mlos_bench/tests/storage/test_storage_schemas.py @@ -14,6 +14,7 @@ # See Also: schema.py for an example of programmatic alembic config access. CURRENT_ALEMBIC_HEAD = "8928a401115b" +# TODO: Test other engines as well. def test_storage_schemas(storage: SqlStorage) -> None: """Test storage schema creation.""" From a2c32562fd879d241a5d1a6335492c7d933358d3 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 28 May 2025 16:34:31 -0500 Subject: [PATCH 07/88] refactoring storage tests to check other db engines --- mlos_bench/mlos_bench/storage/sql/schema.py | 25 ++++ mlos_bench/mlos_bench/storage/sql/storage.py | 27 ++++ mlos_bench/mlos_bench/tests/__init__.py | 24 +++ .../test_load_scheduler_config_examples.py | 4 +- mlos_bench/mlos_bench/tests/conftest.py | 1 + .../tests/services/remote/ssh/README.md | 2 +- .../tests/services/remote/ssh/__init__.py | 19 +-- .../tests/services/remote/ssh/fixtures.py | 30 ++-- .../remote/ssh/test_ssh_host_service.py | 3 +- .../mlos_bench/tests/storage/conftest.py | 4 +- .../mlos_bench/tests/storage/sql/README.md | 27 ++++ .../mlos_bench/tests/storage/sql/__init__.py | 72 +++++++++ .../tests/storage/sql/docker-compose.yml | 40 +++++ .../mlos_bench/tests/storage/sql/down.sh | 18 +++ .../mlos_bench/tests/storage/sql/fixtures.py | 138 +++++++++++++++++- .../storage/{ => sql}/test_storage_schemas.py | 24 ++- mlos_bench/mlos_bench/tests/storage/sql/up.sh | 87 +++++++++++ .../tests/storage/test_storage_pickling.py | 28 +++- 18 files changed, 524 insertions(+), 49 deletions(-) create mode 100644 mlos_bench/mlos_bench/tests/storage/sql/README.md create mode 100644 mlos_bench/mlos_bench/tests/storage/sql/docker-compose.yml create mode 100755 mlos_bench/mlos_bench/tests/storage/sql/down.sh rename mlos_bench/mlos_bench/tests/storage/{ => sql}/test_storage_schemas.py (68%) create mode 100755 mlos_bench/mlos_bench/tests/storage/sql/up.sh diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index 3cbb63bcc4..f0071bd757 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -315,6 +315,31 @@ def _get_alembic_cfg(conn: Connection) -> config.Config: alembic_cfg.attributes["connection"] = conn return alembic_cfg + def drop_all_tables(self, *, force: bool = False) -> None: + """ + Helper method used in testing to reset the DB schema. + + Notes + ----- + This method is not intended for production use, as it will drop all tables + in the database. Use with caution. + + Parameters + ---------- + force : bool + If True, drop all tables in the target database. + If False, this method will not drop any tables and will log a warning. + """ + assert self._engine + self.meta.reflect(bind=self._engine) + if force: + self.meta.drop_all(bind=self._engine) + else: + _LOG.warning( + "Resetting the schema without force is not implemented. " + "Use force=True to drop all tables." + ) + def create(self) -> "DbSchema": """Create the DB schema.""" _LOG.info("Create the DB schema") diff --git a/mlos_bench/mlos_bench/storage/sql/storage.py b/mlos_bench/mlos_bench/storage/sql/storage.py index 6d98dc97fd..7fc86aff1f 100644 --- a/mlos_bench/mlos_bench/storage/sql/storage.py +++ b/mlos_bench/mlos_bench/storage/sql/storage.py @@ -82,6 +82,33 @@ def _schema(self) -> DbSchema: _LOG.debug("DDL statements:\n%s", self._db_schema) return self._db_schema + def _reset_schema(self, *, force: bool = False) -> None: + """ + Helper method used in testing to reset the DB schema. + + Notes + ----- + This method is not intended for production use, as it will drop all tables + in the database. Use with caution. + + Parameters + ---------- + force : bool + If True, drop all tables in the target database. + If False, this method will not drop any tables and will log a warning. + """ + assert self._engine + if force: + self._schema.drop_all_tables(force=force) + self._db_schema = DbSchema(self._engine) + self._schema_created = False + self._schema_updated = False + else: + _LOG.warning( + "Resetting the schema without force is not implemented. " + "Use force=True to drop all tables." + ) + def update_schema(self) -> None: """Update the database schema.""" if not self._schema_updated: diff --git a/mlos_bench/mlos_bench/tests/__init__.py b/mlos_bench/mlos_bench/tests/__init__.py index ce5fbdb45a..2d9112e52d 100644 --- a/mlos_bench/mlos_bench/tests/__init__.py +++ b/mlos_bench/mlos_bench/tests/__init__.py @@ -86,6 +86,30 @@ def check_class_name(obj: object, expected_class_name: str) -> bool: full_class_name = obj.__class__.__module__ + "." + obj.__class__.__name__ return full_class_name == try_resolve_class_name(expected_class_name) +HOST_DOCKER_NAME = "host.docker.internal" + + +@pytest.fixture(scope="session") +def docker_hostname() -> str: + """Returns the local hostname to use to connect to the test ssh server.""" + if sys.platform != "win32" and resolve_host_name(HOST_DOCKER_NAME): + # On Linux, if we're running in a docker container, we can use the + # --add-host (extra_hosts in docker-compose.yml) to refer to the host IP. + return HOST_DOCKER_NAME + # Docker (Desktop) for Windows (WSL2) uses a special networking magic + # to refer to the host machine as `localhost` when exposing ports. + # In all other cases, assume we're executing directly inside conda on the host. + return "localhost" + + +def wait_docker_service_socket(docker_services: DockerServices, hostname: str, port: int) -> None: + """Wait until a docker service is ready.""" + docker_services.wait_until_responsive( + check=lambda: check_socket(hostname, port), + timeout=30.0, + pause=0.5, + ) + def check_socket(host: str, port: int, timeout: float = 1.0) -> bool: """ diff --git a/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py b/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py index ad8f9248ac..7d19c7a2c1 100644 --- a/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py +++ b/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py @@ -14,7 +14,7 @@ from mlos_bench.schedulers.base_scheduler import Scheduler from mlos_bench.schedulers.trial_runner import TrialRunner from mlos_bench.services.config_persistence import ConfigPersistenceService -from mlos_bench.storage.sql.storage import SqlStorage +from mlos_bench.storage.base_storage import Storage from mlos_bench.tests.config import BUILTIN_TEST_CONFIG_PATH, locate_config_examples from mlos_bench.util import get_class_from_name @@ -58,7 +58,7 @@ def test_load_scheduler_config_examples( config_path: str, mock_env_config_path: str, trial_runners: list[TrialRunner], - storage: SqlStorage, + storage: Storage, mock_opt: MockOptimizer, ) -> None: """Tests loading a config example.""" diff --git a/mlos_bench/mlos_bench/tests/conftest.py b/mlos_bench/mlos_bench/tests/conftest.py index becae20503..8e0aba5c00 100644 --- a/mlos_bench/mlos_bench/tests/conftest.py +++ b/mlos_bench/mlos_bench/tests/conftest.py @@ -90,6 +90,7 @@ def docker_compose_file(pytestconfig: pytest.Config) -> list[str]: _ = pytestconfig # unused return [ os.path.join(os.path.dirname(__file__), "services", "remote", "ssh", "docker-compose.yml"), + os.path.join(os.path.dirname(__file__), "storage", "sql", "docker-compose.yml"), # Add additional configs as necessary here. ] diff --git a/mlos_bench/mlos_bench/tests/services/remote/ssh/README.md b/mlos_bench/mlos_bench/tests/services/remote/ssh/README.md index 4fe4216ff3..827372d58c 100644 --- a/mlos_bench/mlos_bench/tests/services/remote/ssh/README.md +++ b/mlos_bench/mlos_bench/tests/services/remote/ssh/README.md @@ -22,7 +22,7 @@ These are brought up as session fixtures under a unique (PID based) compose proj In the case of `pytest`, since the `SshService` base class implements a shared connection cache that we wish to test, and testing "rebooting" of servers (containers) is also necessary, but we want to avoid single threaded execution for tests, we start a third container only for testing reboots. -Additionally, since `scope="session"` fixtures are executed once per worker, which is excessive in our case, we use lockfiles (one of the only portal synchronization methods) to ensure that the usual `docker_services` fixture which starts and stops the containers is only executed once per test run and uses a shared compose instance. +Additionally, since `scope="session"` fixtures are executed once per worker, which is excessive in our case, we use lockfiles (one of the only portable synchronization methods) to ensure that the usual `docker_services` fixture which starts and stops the containers is only executed once per test run and uses a shared compose instance. ## See Also diff --git a/mlos_bench/mlos_bench/tests/services/remote/ssh/__init__.py b/mlos_bench/mlos_bench/tests/services/remote/ssh/__init__.py index a6244e3a7a..877d39a26a 100644 --- a/mlos_bench/mlos_bench/tests/services/remote/ssh/__init__.py +++ b/mlos_bench/mlos_bench/tests/services/remote/ssh/__init__.py @@ -7,9 +7,6 @@ from dataclasses import dataclass from subprocess import run -from pytest_docker.plugin import Services as DockerServices - -from mlos_bench.tests import check_socket # The SSH test server port and name. # See Also: docker-compose.yml @@ -21,7 +18,12 @@ @dataclass class SshTestServerInfo: - """A data class for SshTestServerInfo.""" + """A data class for SshTestServerInfo. + + See Also + -------- + mlos_bench.tests.storage.sql.SqlTestServerInfo + """ compose_project_name: str service_name: str @@ -70,12 +72,3 @@ def to_connect_params(self, uncached: bool = False) -> dict: "port": self.get_port(uncached), "username": self.username, } - - -def wait_docker_service_socket(docker_services: DockerServices, hostname: str, port: int) -> None: - """Wait until a docker service is ready.""" - docker_services.wait_until_responsive( - check=lambda: check_socket(hostname, port), - timeout=30.0, - pause=0.5, - ) diff --git a/mlos_bench/mlos_bench/tests/services/remote/ssh/fixtures.py b/mlos_bench/mlos_bench/tests/services/remote/ssh/fixtures.py index 79938784a7..23a4a9ed2f 100644 --- a/mlos_bench/mlos_bench/tests/services/remote/ssh/fixtures.py +++ b/mlos_bench/mlos_bench/tests/services/remote/ssh/fixtures.py @@ -19,36 +19,20 @@ from mlos_bench.services.remote.ssh.ssh_fileshare import SshFileShareService from mlos_bench.services.remote.ssh.ssh_host_service import SshHostService -from mlos_bench.tests import resolve_host_name +from mlos_bench.tests import wait_docker_service_socket from mlos_bench.tests.services.remote.ssh import ( ALT_TEST_SERVER_NAME, REBOOT_TEST_SERVER_NAME, SSH_TEST_SERVER_NAME, SshTestServerInfo, - wait_docker_service_socket, ) # pylint: disable=redefined-outer-name -HOST_DOCKER_NAME = "host.docker.internal" - - -@pytest.fixture(scope="session") -def ssh_test_server_hostname() -> str: - """Returns the local hostname to use to connect to the test ssh server.""" - if sys.platform != "win32" and resolve_host_name(HOST_DOCKER_NAME): - # On Linux, if we're running in a docker container, we can use the - # --add-host (extra_hosts in docker-compose.yml) to refer to the host IP. - return HOST_DOCKER_NAME - # Docker (Desktop) for Windows (WSL2) uses a special networking magic - # to refer to the host machine as `localhost` when exposing ports. - # In all other cases, assume we're executing directly inside conda on the host. - return "localhost" - @pytest.fixture(scope="session") def ssh_test_server( - ssh_test_server_hostname: str, + docker_hostname: str, docker_compose_project_name: str, locked_docker_services: DockerServices, ) -> Generator[SshTestServerInfo]: @@ -66,12 +50,14 @@ def ssh_test_server( ssh_test_server_info = SshTestServerInfo( compose_project_name=docker_compose_project_name, service_name=SSH_TEST_SERVER_NAME, - hostname=ssh_test_server_hostname, + hostname=docker_hostname, username="root", id_rsa_path=id_rsa_file.name, ) wait_docker_service_socket( - locked_docker_services, ssh_test_server_info.hostname, ssh_test_server_info.get_port() + locked_docker_services, + ssh_test_server_info.hostname, + ssh_test_server_info.get_port(), ) id_rsa_src = f"/{ssh_test_server_info.username}/.ssh/id_rsa" docker_cp_cmd = ( @@ -116,7 +102,9 @@ def alt_test_server( id_rsa_path=ssh_test_server.id_rsa_path, ) wait_docker_service_socket( - locked_docker_services, alt_test_server_info.hostname, alt_test_server_info.get_port() + locked_docker_services, + alt_test_server_info.hostname, + alt_test_server_info.get_port(), ) return alt_test_server_info diff --git a/mlos_bench/mlos_bench/tests/services/remote/ssh/test_ssh_host_service.py b/mlos_bench/mlos_bench/tests/services/remote/ssh/test_ssh_host_service.py index 003a8e6433..30fdc804f2 100644 --- a/mlos_bench/mlos_bench/tests/services/remote/ssh/test_ssh_host_service.py +++ b/mlos_bench/mlos_bench/tests/services/remote/ssh/test_ssh_host_service.py @@ -12,13 +12,12 @@ from mlos_bench.services.remote.ssh.ssh_host_service import SshHostService from mlos_bench.services.remote.ssh.ssh_service import SshClient -from mlos_bench.tests import requires_docker +from mlos_bench.tests import requires_docker, wait_docker_service_socket, from mlos_bench.tests.services.remote.ssh import ( ALT_TEST_SERVER_NAME, REBOOT_TEST_SERVER_NAME, SSH_TEST_SERVER_NAME, SshTestServerInfo, - wait_docker_service_socket, ) _LOG = logging.getLogger(__name__) diff --git a/mlos_bench/mlos_bench/tests/storage/conftest.py b/mlos_bench/mlos_bench/tests/storage/conftest.py index c510793fac..526ea945f2 100644 --- a/mlos_bench/mlos_bench/tests/storage/conftest.py +++ b/mlos_bench/mlos_bench/tests/storage/conftest.py @@ -11,8 +11,10 @@ # same. # Expose some of those as local names so they can be picked up as fixtures by pytest. -storage = sql_storage_fixtures.storage +mysql_storage = sql_storage_fixtures.mysql_storage +postgres_storage = sql_storage_fixtures.postgres_storage sqlite_storage = sql_storage_fixtures.sqlite_storage +storage = sql_storage_fixtures.storage exp_storage = sql_storage_fixtures.exp_storage exp_no_tunables_storage = sql_storage_fixtures.exp_no_tunables_storage mixed_numerics_exp_storage = sql_storage_fixtures.mixed_numerics_exp_storage diff --git a/mlos_bench/mlos_bench/tests/storage/sql/README.md b/mlos_bench/mlos_bench/tests/storage/sql/README.md new file mode 100644 index 0000000000..c9b124eab3 --- /dev/null +++ b/mlos_bench/mlos_bench/tests/storage/sql/README.md @@ -0,0 +1,27 @@ +# Sql Storage Tests + +The "unit" tests for the `SqlStorage` classes are more functional than other unit tests in that we don't merely mock them out, but actually setup small SQL databases with `docker compose` and interact with them using the `SqlStorage` class. + +To do this, we make use of the `pytest-docker` plugin to bring up the services defined in the [`docker-compose.yml`](./docker-compose.yml) file in this directory. + +There are currently two services defined in that config, though others could be added in the future: + +1. `mysql-mlos-bench-server` +1. `postgres-mlos-bench-server` + +We rely on `docker compose` to map their internal container service ports to random ports on the host. +Hence, when connecting, we need to look up these ports on demand using something akin to `docker compose port`. +Because of complexities of networking in different development environments (especially for Docker on WSL2 for Windows), we may also have to connect to a different host address than `localhost` (e.g., `host.docker.internal`, which is dynamically requested as a part of of the [devcontainer](../../../../../../.devcontainer/docker-compose.yml) setup). + +These containers are brought up as session fixtures under a unique (PID based) compose project name for each `pytest` invocation, but only when docker is detected on the host (via the `@docker_required` decorator we define in [`mlos_bench/tests/__init__.py`](../../../__init__.py)), else those tests are skipped. + +> For manual testing, to bring up/down the test infrastructure the [`up.sh`](./up.sh) and [`down.sh`](./down.sh) scripts can be used, which assigns a known project name. + +In the case of `pytest`, we also want to be able to test with a fresh state in most cases, so we use the `pytest` `yield` pattern to allow schema cleanup code to happen after the end of each test (see: `_create_storage_from_test_server_info`). +We use lockfiles to prevent races between tests that would otherwise try to create or cleanup the same database schema at the same time. + +Additionally, since `scope="session"` fixtures are executed once per worker, which is excessive in our case, we use lockfiles (one of the only portable synchronization methods) to ensure that the usual `docker_services` fixture which starts and stops the containers is only executed once per test run and uses a shared compose instance. + +## See Also + +Notes in the [`mlos_bench/tests/services/remote/ssh/README.md`](../../../services/remote/ssh/README.md) file for a similar setup for SSH services. diff --git a/mlos_bench/mlos_bench/tests/storage/sql/__init__.py b/mlos_bench/mlos_bench/tests/storage/sql/__init__.py index d17a448b5e..442e531a43 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/__init__.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/__init__.py @@ -3,3 +3,75 @@ # Licensed under the MIT License. # """Tests for mlos_bench sql storage.""" + +from dataclasses import dataclass +from subprocess import run + + +# The DB servers' names and other connection info. +# See Also: docker-compose.yml + +MYSQL_TEST_SERVER_NAME = "mysql-mlos-bench-server" +PGSQL_TEST_SERVER_NAME = "postgres-mlos-bench-server" + +SQL_TEST_SERVER_DATABASE = "mlos_bench" +SQL_TEST_SERVER_PASSWORD = "password" + + +@dataclass +class SqlTestServerInfo: + """A data class for SqlTestServerInfo. + + See Also + -------- + mlos_bench.tests.services.remote.ssh.SshTestServerInfo + """ + + compose_project_name: str + service_name: str + hostname: str + _port: int | None = None + + @property + def username(self) -> str: + """Gets the username.""" + usernames = { + MYSQL_TEST_SERVER_NAME: "root", + PGSQL_TEST_SERVER_NAME: "postgres", + } + return usernames[self.service_name] + + @property + def password(self) -> str: + """Gets the password.""" + return SQL_TEST_SERVER_PASSWORD + + @property + def database(self) -> str: + """Gets the database.""" + return SQL_TEST_SERVER_DATABASE + + def get_port(self, uncached: bool = False) -> int: + """ + Gets the port that the SSH test server is listening on. + + Note: this value can change when the service restarts so we can't rely on + the DockerServices. + """ + if self._port is None or uncached: + default_ports = { + MYSQL_TEST_SERVER_NAME: 3306, + PGSQL_TEST_SERVER_NAME: 5432, + } + default_port = default_ports[self.service_name] + port_cmd = run( + ( + f"docker compose -p {self.compose_project_name} " + f"port {self.service_name} {default_port}" + ), + shell=True, + check=True, + capture_output=True, + ) + self._port = int(port_cmd.stdout.decode().strip().split(":")[1]) + return self._port diff --git a/mlos_bench/mlos_bench/tests/storage/sql/docker-compose.yml b/mlos_bench/mlos_bench/tests/storage/sql/docker-compose.yml new file mode 100644 index 0000000000..0bfd0bce81 --- /dev/null +++ b/mlos_bench/mlos_bench/tests/storage/sql/docker-compose.yml @@ -0,0 +1,40 @@ +name: mlos_bench-test-sql-storage +services: + mysql-mlos-bench-server: + hostname: mysql-mlos-bench-server + attach: false + image: docker.io/library/mysql:latest + ports: + # To allow multiple instances of this to coexist, instead of explicitly + # mapping the port, let it get assigned randomly on the host. + - ${PORT:-3306} + extra_hosts: + - host.docker.internal:host-gateway + environment: + - MYSQL_ROOT_PASSWORD=password + - MYSQL_DATABASE=mlos_bench + healthcheck: + test: ["CMD-SHELL", "mysqladmin --host localhost --protocol tcp --password=$${MYSQL_ROOT_PASSWORD} ping"] + interval: 10s + timeout: 30s + retries: 5 + start_period: 5s + postgres-mlos-bench-server: + hostname: postgres-mlos-bench-server + attach: false + image: docker.io/library/postgres:latest + ports: + # To allow multiple instances of this to coexist, instead of explicitly + # mapping the port, let it get assigned randomly on the host. + - ${PORT:-5432} + extra_hosts: + - host.docker.internal:host-gateway + environment: + - POSTGRES_PASSWORD=password + - POSTGRES_DB=mlos_bench + healthcheck: + test: ["CMD-SHELL", "pg_isready -d $${POSTGRES_DB}"] + interval: 10s + timeout: 30s + retries: 5 + start_period: 5s diff --git a/mlos_bench/mlos_bench/tests/storage/sql/down.sh b/mlos_bench/mlos_bench/tests/storage/sql/down.sh new file mode 100755 index 0000000000..3d6068cecf --- /dev/null +++ b/mlos_bench/mlos_bench/tests/storage/sql/down.sh @@ -0,0 +1,18 @@ +#!/bin/bash +## +## Copyright (c) Microsoft Corporation. +## Licensed under the MIT License. +## + +# A script to stop the containerized SQL DBMS servers. +# For pytest, the fixture in conftest.py will handle this for us using the +# pytest-docker plugin, but for manual testing, this script can be used. + +set -eu + +scriptdir=$(dirname "$(readlink -f "$0")") +cd "$scriptdir" + +PROJECT_NAME="mlos_bench-test-sql-storage-manual" + +docker compose -p "$PROJECT_NAME" down diff --git a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py index 0bebeeff82..50376b8727 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py @@ -8,9 +8,12 @@ import os import tempfile from collections.abc import Generator +from importlib.resources import files from random import seed as rand_seed +from filelock import FileLock import pytest +from pytest_docker.plugin import Services as DockerServices from mlos_bench.optimizers.mock_optimizer import MockOptimizer from mlos_bench.schedulers.sync_scheduler import SyncScheduler @@ -19,16 +22,149 @@ from mlos_bench.storage.base_experiment_data import ExperimentData from mlos_bench.storage.sql.storage import SqlStorage from mlos_bench.storage.storage_factory import from_config -from mlos_bench.tests import SEED +from mlos_bench.util import path_join +from mlos_bench.tests import SEED, wait_docker_service_socket from mlos_bench.tests.storage import ( CONFIG_TRIAL_REPEAT_COUNT, MAX_TRIALS, TRIAL_RUNNER_COUNT, ) +from mlos_bench.tests.storage.sql import ( + SqlTestServerInfo, + MYSQL_TEST_SERVER_NAME, + PGSQL_TEST_SERVER_NAME, +) from mlos_bench.tunables.tunable_groups import TunableGroups # pylint: disable=redefined-outer-name +# TODO: Add mysql_storage and postgres_storage + + +@pytest.fixture(scope="session") +def mysql_storage_info( + docker_hostname: str, + docker_compose_project_name: str, + locked_docker_services: DockerServices, +) -> SqlTestServerInfo: + """ + Fixture for getting mysql storage connection info. + """ + storage_info = SqlTestServerInfo( + compose_project_name=docker_compose_project_name, + service_name=MYSQL_TEST_SERVER_NAME, + hostname=docker_hostname, + ) + wait_docker_service_socket( + locked_docker_services, + storage_info.hostname, + storage_info.get_port(), + ) + return storage_info + + +@pytest.fixture(scope="session") +def postgres_storage_info( + docker_hostname: str, + docker_compose_project_name: str, + locked_docker_services: DockerServices, +) -> SqlTestServerInfo: + """ + Fixture for getting postgres storage connection info. + """ + storage_info = SqlTestServerInfo( + compose_project_name=docker_compose_project_name, + service_name=PGSQL_TEST_SERVER_NAME, + hostname=docker_hostname, + ) + wait_docker_service_socket( + locked_docker_services, + storage_info.hostname, + storage_info.get_port(), + ) + return storage_info + + +def _create_storage_from_test_server_info( + config_file: str, + test_server_info: SqlTestServerInfo, + shared_temp_dir: str, + short_testrun_uid: str, +) -> Generator[SqlStorage]: + """ + Creates a SqlStorage instance from the given test server info. + + Notes + ----- + Resets the schema as a cleanup operation on return from the function scope + fixture so each test gets a fresh storage instance. + Uses a file lock to ensure that only one test can access the storage at a time. + + Yields + ------ + SqlStorage + """ + sql_storage_name = test_server_info.service_name + with FileLock(path_join(shared_temp_dir, f"{sql_storage_name}-{short_testrun_uid}.lock")): + global_config = { + "host": test_server_info.username, + "port": test_server_info.get_port() or 0, + "database": test_server_info.database, + "username": test_server_info.username, + "password": test_server_info.password, + "lazy_schema_create": True, + } + storage = from_config( + config_file, + global_configs=[json.dumps(global_config)], + ) + assert isinstance(storage, SqlStorage) + yield storage + # Cleanup the storage on return + storage._reset_schema(force=True) # pylint: disable=protected-access + + +@pytest.fixture(scope="function") +def mysql_storage( + mysql_storage_info: SqlTestServerInfo, + shared_temp_dir: str, + short_testrun_uid: str, +) -> Generator[SqlStorage]: + """ + Fixture of a MySQL backed SqlStorage engine. + + See Also + -------- + _create_storage_from_test_server_info + """ + return _create_storage_from_test_server_info( + path_join(str(files("mlos_bench.config")), "storage", "mysql.jsonc"), + mysql_storage_info, + shared_temp_dir, + short_testrun_uid, + ) + + +@pytest.fixture(scope="function") +def postgres_storage( + postgres_storage_info: SqlTestServerInfo, + shared_temp_dir: str, + short_testrun_uid: str, +) -> Generator[SqlStorage]: + """ + Fixture of a MySQL backed SqlStorage engine. + + See Also + -------- + _create_storage_from_test_server_info + """ + return _create_storage_from_test_server_info( + path_join(str(files("mlos_bench.config")), "storage", "postgresql.jsonc"), + postgres_storage_info, + shared_temp_dir, + short_testrun_uid, + ) + @pytest.fixture def sqlite_storage() -> Generator[SqlStorage]: diff --git a/mlos_bench/mlos_bench/tests/storage/test_storage_schemas.py b/mlos_bench/mlos_bench/tests/storage/sql/test_storage_schemas.py similarity index 68% rename from mlos_bench/mlos_bench/tests/storage/test_storage_schemas.py rename to mlos_bench/mlos_bench/tests/storage/sql/test_storage_schemas.py index 1cbdaa9c6a..660f4e245d 100644 --- a/mlos_bench/mlos_bench/tests/storage/test_storage_schemas.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/test_storage_schemas.py @@ -4,21 +4,37 @@ # """Test sql schemas for mlos_bench storage.""" +import pytest +from pytest_lazy_fixtures.lazy_fixture import lf as lazy_fixture + from alembic.migration import MigrationContext from sqlalchemy import inspect from mlos_bench.storage.sql.storage import SqlStorage +from mlos_bench.tests import DOCKER + # NOTE: This value is hardcoded to the latest revision in the alembic versions directory. # It could also be obtained programmatically using the "alembic heads" command or heads() API. # See Also: schema.py for an example of programmatic alembic config access. CURRENT_ALEMBIC_HEAD = "8928a401115b" -# TODO: Test other engines as well. - -def test_storage_schemas(storage: SqlStorage) -> None: +# Try to test multiple DBMS engines. +docker_dbms_fixtures = [] +if DOCKER: + docker_dbms_fixtures = [ + lazy_fixture("mysql_storage"), + lazy_fixture("postgres_storage"), + ] +@pytest.mark.parameterize( + ["some_sql_storage_fixture"], [ + lazy_fixture("sql_storage"), + *docker_dbms_fixtures, + ] +) +def test_storage_schemas(some_sql_storage_fixture: SqlStorage) -> None: """Test storage schema creation.""" - eng = storage._engine # pylint: disable=protected-access + eng = some_sql_storage_fixture._engine # pylint: disable=protected-access with eng.connect() as conn: # pylint: disable=protected-access inspector = inspect(conn) # Make sure the "trial_runner_id" column exists. diff --git a/mlos_bench/mlos_bench/tests/storage/sql/up.sh b/mlos_bench/mlos_bench/tests/storage/sql/up.sh new file mode 100755 index 0000000000..fd99d1770d --- /dev/null +++ b/mlos_bench/mlos_bench/tests/storage/sql/up.sh @@ -0,0 +1,87 @@ +#!/bin/bash +## +## Copyright (c) Microsoft Corporation. +## Licensed under the MIT License. +## + +# A script to start the containerized SQL DBMS servers. +# For pytest, the fixture in conftest.py will handle this for us using the +# pytest-docker plugin, but for manual testing, this script can be used. + +set -eu +set -x + +scriptdir=$(dirname "$(readlink -f "$0")") +cd "$scriptdir" + +PROJECT_NAME="mlos_bench-test-sql-storage-manual" +CONTAINER_COUNT=2 + +if ! type mysqladmin >/dev/null 2>&1; then + echo "ERROR: Missing mysqladmin tool to check status of the server." >&2 + exit 1 +fi + +if ! type psql >/dev/null 2>&1; then + echo "ERROR: Missing psql tool to check status of the server." >&2 + exit 1 +fi + + +docker compose -p "$PROJECT_NAME" up --build --remove-orphans -d +set +x + +while ! docker compose -p "$PROJECT_NAME" ps --format '{{.Name}} {{.State}} {{.Health}}' | grep -c ' running healthy$' | grep -q -x $CONTAINER_COUNT; do + echo "Waiting for $CONTAINER_COUNT containers to report healthy ..." + sleep 1 +done + +mysql_port=$(docker compose -p "$PROJECT_NAME" port mysql-mlos-bench-server ${PORT:-3306} | cut -d: -f2) +echo "Connect to the mysql server container at the following port: $mysql_port" +postgres_port=$(docker compose -p "$PROJECT_NAME" port postgres-mlos-bench-server ${PORT:-5432} | cut -d: -f2) +echo "Connect to the postgres server container at the following port: $postgres_port" + +# TODO: Remove the rest of this: + +mysql_ok=0 +if ! type mysqladmin >/dev/null 2>&1; then + echo "WARNING: Missing mysqladmin tool to check status of the server." >&2 + mysql_ok=1 +fi + +pg_ok=0 +if ! type psql >/dev/null 2>&1; then + echo "WARNING: Missing psql tool to check status of the server." >&2 + pg_ok=1 +fi + +for i in {1..10}; do + if [ "$mysql_ok" == "1" ]; then + break + fi + if mysqladmin -h localhost --port $mysql_port --protocol tcp --password=password ping >/dev/null; then + mysql_ok=1 + else + sleep 1 + fi +done +if [ "$mysql_ok" != 1 ]; then + echo "ERR: MySQL failed to start." >&2 + exit 1 +fi + +for i in {1..10}; do + if [ "$pg_ok" == "1" ]; then + break + fi + if PGPASSWORD=password psql -h localhost -p $postgres_port -U postgres mlos_bench -c "SELECT 1;" > /dev/null; then + pg_ok=1 + break + else + sleep 1 + fi +done +if [ "$pg_ok" != 1 ]; then + echo "ERR: Postgres failed to start." >&2 + exit 1 +fi diff --git a/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py b/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py index 7871e7f68c..1aa29b5efe 100644 --- a/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py +++ b/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py @@ -9,11 +9,20 @@ from typing import Literal import pytest +from pytest_lazy_fixtures.lazy_fixture import lf as lazy_fixture from pytz import UTC from mlos_bench.environments.status import Status -from mlos_bench.storage.sql.storage import SqlStorage +from mlos_bench.storage.base_storage import Storage from mlos_bench.tunables.tunable_groups import TunableGroups +from mlos_bench.tests import DOCKER + +docker_dbms_fixtures = [] +if DOCKER: + docker_dbms_fixtures = [ + lazy_fixture("mysql_storage"), + lazy_fixture("postgres_storage"), + ] # TODO: When we introduce ParallelTrialScheduler warn at config startup time @@ -22,14 +31,25 @@ sys.platform == "win32", reason="Windows doesn't support multiple processes accessing the same file.", ) +@pytest.mark.parametrize( + ["persistent_storage"], + [ + # TODO: Improve this test to support non-sql backends eventually as well. + lazy_fixture("sqlite_storage"), + *docker_dbms_fixtures, + ], +) def test_storage_pickle_restore_experiment_and_trial( - sqlite_storage: SqlStorage, + persistent_storage: Storage, tunable_groups: TunableGroups, ) -> None: """Check that we can pickle and unpickle the Storage object, and restore Experiment and Trial by id. """ - storage = sqlite_storage + storage = persistent_storage + storage_class = storage.__class__ + assert issubclass(storage_class, Storage) + assert storage_class != Storage # Create an Experiment and a Trial opt_targets: dict[str, Literal["min", "max"]] = {"metric": "min"} experiment = storage.experiment( @@ -49,7 +69,7 @@ def test_storage_pickle_restore_experiment_and_trial( # Pickle and unpickle the Storage object pickled = pickle.dumps(storage) restored_storage = pickle.loads(pickled) - assert isinstance(restored_storage, SqlStorage) + assert isinstance(restored_storage, storage_class) # Restore the Experiment from storage by id and check that it matches the original restored_experiment = restored_storage.get_experiment_by_id( From 5d3f0d3c66d9e827caf61649cbc34c436d397042 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 28 May 2025 16:35:31 -0500 Subject: [PATCH 08/88] change column lengths for mysql --- mlos_bench/mlos_bench/storage/sql/schema.py | 56 ++++++++++++--------- 1 file changed, 32 insertions(+), 24 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index f0071bd757..facc57b818 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -73,12 +73,6 @@ class DbSchema: # for all DB tables, so it's ok to disable the warnings. # pylint: disable=too-many-instance-attributes - # Common string column sizes. - _ID_LEN = 512 - _PARAM_VALUE_LEN = 1024 - _METRIC_VALUE_LEN = 255 - _STATUS_LEN = 16 - def __init__(self, engine: Engine | None): """ Declare the SQLAlchemy schema for the database. @@ -96,10 +90,24 @@ def __init__(self, engine: Engine | None): self._engine = engine self._meta = MetaData() + # Common string column sizes. + self._exp_id_len = 512 + self._param_id_len = 512 + self._param_value_len = 1024 + self._metric_id_len = 512 + self._metric_value_len = 255 + self._status_len = 16 + + # Some overrides for certain DB engines: + if engine and engine.dialect.name in {"mysql", "mariadb"}: + self._exp_id_len = 255 + self._param_id_len = 255 + self._metric_id_len = 255 + self.experiment = Table( "experiment", self._meta, - Column("exp_id", String(self._ID_LEN), nullable=False), + Column("exp_id", String(self._exp_id_len), nullable=False), Column("description", String(1024)), Column("root_env_config", String(1024), nullable=False), Column("git_repo", String(1024), nullable=False), @@ -109,7 +117,7 @@ def __init__(self, engine: Engine | None): Column("ts_end", DateTime().with_variant(mysql.DATETIME(fsp=6), "mysql")), # Should match the text IDs of `mlos_bench.environments.Status` enum: # For backwards compatibility, we allow NULL for status. - Column("status", String(self._STATUS_LEN)), + Column("status", String(self._status_len)), # There may be more than one mlos_benchd_service running on different hosts. # This column stores the host/container name of the driver that # picked up the experiment. @@ -127,7 +135,7 @@ def __init__(self, engine: Engine | None): "objectives", self._meta, Column("exp_id"), - Column("optimization_target", String(self._ID_LEN), nullable=False), + Column("optimization_target", String(self._metric_id_len), nullable=False), Column("optimization_direction", String(4), nullable=False), # TODO: Note: weight is not fully supported yet as currently # multi-objective is expected to explore each objective equally. @@ -176,7 +184,7 @@ def __init__(self, engine: Engine | None): self.trial = Table( "trial", self._meta, - Column("exp_id", String(self._ID_LEN), nullable=False), + Column("exp_id", String(self._exp_id_len), nullable=False), Column("trial_id", Integer, nullable=False), Column("config_id", Integer, nullable=False), Column("trial_runner_id", Integer, nullable=True, default=None), @@ -191,7 +199,7 @@ def __init__(self, engine: Engine | None): nullable=True, ), # Should match the text IDs of `mlos_bench.environments.Status` enum: - Column("status", String(self._STATUS_LEN), nullable=False), + Column("status", String(self._status_len), nullable=False), PrimaryKeyConstraint("exp_id", "trial_id"), ForeignKeyConstraint(["exp_id"], [self.experiment.c.exp_id]), ForeignKeyConstraint(["config_id"], [self.config.c.config_id]), @@ -206,8 +214,8 @@ def __init__(self, engine: Engine | None): "config_param", self._meta, Column("config_id", Integer, nullable=False), - Column("param_id", String(self._ID_LEN), nullable=False), - Column("param_value", String(self._PARAM_VALUE_LEN)), + Column("param_id", String(self._param_id_len), nullable=False), + Column("param_value", String(self._param_value_len)), PrimaryKeyConstraint("config_id", "param_id"), ForeignKeyConstraint(["config_id"], [self.config.c.config_id]), ) @@ -221,10 +229,10 @@ def __init__(self, engine: Engine | None): self.trial_param = Table( "trial_param", self._meta, - Column("exp_id", String(self._ID_LEN), nullable=False), + Column("exp_id", String(self._exp_id_len), nullable=False), Column("trial_id", Integer, nullable=False), - Column("param_id", String(self._ID_LEN), nullable=False), - Column("param_value", String(self._PARAM_VALUE_LEN)), + Column("param_id", String(self._param_id_len), nullable=False), + Column("param_value", String(self._param_value_len)), PrimaryKeyConstraint("exp_id", "trial_id", "param_id"), ForeignKeyConstraint( ["exp_id", "trial_id"], @@ -239,7 +247,7 @@ def __init__(self, engine: Engine | None): self.trial_status = Table( "trial_status", self._meta, - Column("exp_id", String(self._ID_LEN), nullable=False), + Column("exp_id", String(self._exp_id_len), nullable=False), Column("trial_id", Integer, nullable=False), Column( "ts", @@ -247,7 +255,7 @@ def __init__(self, engine: Engine | None): nullable=False, default="now", ), - Column("status", String(self._STATUS_LEN), nullable=False), + Column("status", String(self._status_len), nullable=False), UniqueConstraint("exp_id", "trial_id", "ts"), ForeignKeyConstraint( ["exp_id", "trial_id"], @@ -261,10 +269,10 @@ def __init__(self, engine: Engine | None): self.trial_result = Table( "trial_result", self._meta, - Column("exp_id", String(self._ID_LEN), nullable=False), + Column("exp_id", String(self._exp_id_len), nullable=False), Column("trial_id", Integer, nullable=False), - Column("metric_id", String(self._ID_LEN), nullable=False), - Column("metric_value", String(self._METRIC_VALUE_LEN)), + Column("metric_id", String(self._metric_id_len), nullable=False), + Column("metric_value", String(self._metric_value_len)), PrimaryKeyConstraint("exp_id", "trial_id", "metric_id"), ForeignKeyConstraint( ["exp_id", "trial_id"], @@ -279,7 +287,7 @@ def __init__(self, engine: Engine | None): self.trial_telemetry = Table( "trial_telemetry", self._meta, - Column("exp_id", String(self._ID_LEN), nullable=False), + Column("exp_id", String(self._exp_id_len), nullable=False), Column("trial_id", Integer, nullable=False), Column( "ts", @@ -287,8 +295,8 @@ def __init__(self, engine: Engine | None): nullable=False, default="now", ), - Column("metric_id", String(self._ID_LEN), nullable=False), - Column("metric_value", String(self._METRIC_VALUE_LEN)), + Column("metric_id", String(self._metric_id_len), nullable=False), + Column("metric_value", String(self._metric_value_len)), UniqueConstraint("exp_id", "trial_id", "ts", "metric_id"), ForeignKeyConstraint( ["exp_id", "trial_id"], From 01b9df4101799ee0ad6c2a3c5d27b38d78c81fa2 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 28 May 2025 16:35:41 -0500 Subject: [PATCH 09/88] more refactor of storage tests --- mlos_bench/mlos_bench/tests/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mlos_bench/mlos_bench/tests/__init__.py b/mlos_bench/mlos_bench/tests/__init__.py index 2d9112e52d..63be24b311 100644 --- a/mlos_bench/mlos_bench/tests/__init__.py +++ b/mlos_bench/mlos_bench/tests/__init__.py @@ -11,9 +11,11 @@ import os import shutil import socket +import sys from datetime import tzinfo from logging import debug, warning from subprocess import run +from pytest_docker.plugin import Services as DockerServices import pytest import pytz From d5781765fa15691d7b2f60a3700468baf0186ecd Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 28 May 2025 11:18:46 -0500 Subject: [PATCH 10/88] preparing to support testing multiple backend engines for schema changes --- .../mlos_bench/config/storage/mysql.jsonc | 2 +- .../config/storage/postgresql.jsonc | 2 +- mlos_bench/mlos_bench/storage/sql/alembic.ini | 3 + .../mlos_bench/storage/sql/alembic/README.md | 115 +++++++++++++++--- .../tests/storage/test_storage_schemas.py | 1 + 5 files changed, 106 insertions(+), 17 deletions(-) diff --git a/mlos_bench/mlos_bench/config/storage/mysql.jsonc b/mlos_bench/mlos_bench/config/storage/mysql.jsonc index c133b379ba..15986de4c2 100644 --- a/mlos_bench/mlos_bench/config/storage/mysql.jsonc +++ b/mlos_bench/mlos_bench/config/storage/mysql.jsonc @@ -6,7 +6,7 @@ "log_sql": false, // Write all SQL statements to the log. // Parameters below must match kwargs of `sqlalchemy.URL.create()`: "drivername": "mysql+mysqlconnector", - "database": "osat", + "database": "mlos_bench", "username": "root", "password": "PLACERHOLDER PASSWORD", // Comes from global config "host": "localhost", diff --git a/mlos_bench/mlos_bench/config/storage/postgresql.jsonc b/mlos_bench/mlos_bench/config/storage/postgresql.jsonc index cd1214835c..1cff2d76b7 100644 --- a/mlos_bench/mlos_bench/config/storage/postgresql.jsonc +++ b/mlos_bench/mlos_bench/config/storage/postgresql.jsonc @@ -8,7 +8,7 @@ "log_sql": false, // Write all SQL statements to the log. // Parameters below must match kwargs of `sqlalchemy.URL.create()`: "drivername": "postgresql+psycopg2", - "database": "osat", + "database": "mlos_bench", "username": "postgres", "password": "PLACERHOLDER PASSWORD", // Comes from global config "host": "localhost", diff --git a/mlos_bench/mlos_bench/storage/sql/alembic.ini b/mlos_bench/mlos_bench/storage/sql/alembic.ini index 4d2a1120c5..7f88d809ee 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic.ini +++ b/mlos_bench/mlos_bench/storage/sql/alembic.ini @@ -63,7 +63,10 @@ version_path_separator = os # Use os.pathsep. Default configuration used for ne # output_encoding = utf-8 # See README.md for details. +# Uncomment one of these: sqlalchemy.url = sqlite:///mlos_bench.sqlite +#sqlalchemy.url = mysql+pymysql://root:password@localhost:3306/mlos_bench +#sqlalchemy.url = postgresql+psycopg2://root:password@localhost:5432/mlos_bench [post_write_hooks] diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/README.md b/mlos_bench/mlos_bench/storage/sql/alembic/README.md index ec35eb70f6..9a9aafe1f4 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/README.md +++ b/mlos_bench/mlos_bench/storage/sql/alembic/README.md @@ -4,49 +4,134 @@ This document contains some notes on how to use [`alembic`](https://alembic.sqla ## Overview -1. Create a blank `mlos_bench.sqlite` database file in the [`mlos_bench/storage/sql`](../) directory with the current schema using the following command: +1. Create a blank database instance in the [`mlos_bench/storage/sql`](../) directory with the current schema using the following command: - ```sh - cd mlos_bench/storage/sql - rm mlos_bench.sqlite - mlos_bench --storage storage/sqlite.jsonc --create-update-storage-schema-only - ``` + This allows `alembic` to automatically generate a migration script from the current schema. + + > NOTE: If your schema changes target a particular backend engine (e.g., using `with_variant`) you will need to use an engine with that config for this step. + > \ + > In the remainder of this document we should some examples for different DB types. + > Pick the one you're targeting and stick with it thru the example. + > You may need to repeat the process several times to test all of them. + > - [ ] TODO: Add scripts to automatically do this for several different backend engines all at once. + + For instance: + + 1. Start a temporary server either as a local file or in a docker instance + + ```sh + # sqlite + cd mlos_bench/storage/sql + rm -f mlos_bench.sqlite + ``` + + ```sh + # mysql + docker run -it --rm --name mysql-alembic --env MYSQL_ROOT_PASSWORD=password --env MYSQL_DATABASE=mlos_bench -p 3306:3306 mysql:latest + ``` - > This allows `alembic` to automatically generate a migration script from the current schema. + ```sh + # postgres + docker run -it --rm --name postgres-alembic --env POSTGRES_PASSWORD=password --env POSTGRES_DB=mlos_bench -p 5432:5432 postgres:latest + ``` -1. Adjust the [`mlos_bench/storage/sql/schema.py`](../schema.py) file to reflect the new desired schema. + 1. Adjust the `sqlalchemy.url` in the [`alembic.ini`](../alembic.ini) file. + + ```ini + # Uncomment one of these. + # See README.md for details. + + #sqlalchemy.url = sqlite:///mlos_bench.sqlite + sqlalchemy.url = mysql+pymysql://root:password@localhost:3306/mlos_bench + #sqlalchemy.url = postgresql+psycopg2://root:password@localhost:5432/mlos_bench + ``` + + 1. Prime the DB schema + + ```sh + # sqlite + mlos_bench --storage storage/sqlite.jsonc --create-update-storage-schema-only --password=password + ``` + + ```sh + # mysql + mlos_bench --storage storage/mysql.jsonc --create-update-storage-schema-only --password=password + ``` + + ```sh + # postgres + mlos_bench --storage storage/postgresql.jsonc --create-update-storage-schema-only --password=password + ``` + +2. Now, adjust the [`mlos_bench/storage/sql/schema.py`](../schema.py) file to reflect the new desired schema. > Keep each change small and atomic. > For example, if you want to add a new column, do that in one change. > If you want to rename a column, do that in another change. -1. Generate a new migration script with the following command: +3. Generate a new migration script with the following command: ```sh - alembic revision --autogenerate -m "Descriptive text about the change." + alembic revision --autogenerate -m "CHANGEME: Descriptive text about the change." ``` -1. Review the generated migration script in the [`mlos_bench/storage/sql/alembic/versions`](./versions/) directory. +4. Review the generated migration script in the [`mlos_bench/storage/sql/alembic/versions`](./versions/) directory. -1. Verify that the migration script works by running the following command: +5. Verify that the migration script works by running the following command: ```sh + # sqlite mlos_bench --storage storage/sqlite.jsonc --create-update-storage-schema-only ``` + ```sh + # mysql: + mlos_bench --storage storage/mysql.jsonc --create-update-storage-schema-only --password=password + ``` + + ```sh + # postgres: + mlos_bench --storage storage/postgresql.jsonc --create-update-storage-schema-only --password=password + ``` + > Normally this would be done with `alembic upgrade head`, but this command is convenient to ensure if will work with the `mlos_bench` command line interface as well. Examine the results using something like: ```sh + # For sqlite: sqlite3 mlos_bench.sqlite .schema sqlite3 mlos_bench.sqlite "SELECT * FROM alembic_version;" ``` -1. If the migration script works, commit the changes to the [`mlos_bench/storage/sql/schema.py`](../schema.py) and [`mlos_bench/storage/sql/alembic/versions`](./versions/) files. + ```sh + # For mysql: + mysql --user root --password=password --host localhost --protocol tcp --database mlos_bench -e "SHOW TABLES; SELECT * FROM alembic_version;" + ``` + + ```sh + # For postgres: + PGPASSWORD=password psql -h localhost -p 5432 -U postgres mlos_bench -c "SELECT table_name FROM information_schema.tables WHERE table_schema='public' and table_catalog='mlos_bench'; SELECT * FROM alembic_version;" + ``` + + > Use different CLI clients for targeting other engines. + +6. If the migration script works, commit the changes to the [`mlos_bench/storage/sql/schema.py`](../schema.py) and [`mlos_bench/storage/sql/alembic/versions`](./versions/) files. > Be sure to update the latest version in the [`test_storage_schemas.py`](../../../tests/storage/test_storage_schemas.py) file as well. -1. Merge that to the `main` branch. +7. Cleanup any server instances you started. + + For instance: + + ```sh + rm mlos_bench/storage/sql/mlos_bench.sqlite + ``` + + ```sh + docker kill mysql-alembic + ``` + +8. Merge that to the `main` branch. -1. Might be good to cut a new `mlos_bench` release at this point as well. +9. Might be good to cut a new `mlos_bench` release at this point as well. diff --git a/mlos_bench/mlos_bench/tests/storage/test_storage_schemas.py b/mlos_bench/mlos_bench/tests/storage/test_storage_schemas.py index 8a6c36e6bb..1cbdaa9c6a 100644 --- a/mlos_bench/mlos_bench/tests/storage/test_storage_schemas.py +++ b/mlos_bench/mlos_bench/tests/storage/test_storage_schemas.py @@ -14,6 +14,7 @@ # See Also: schema.py for an example of programmatic alembic config access. CURRENT_ALEMBIC_HEAD = "8928a401115b" +# TODO: Test other engines as well. def test_storage_schemas(storage: SqlStorage) -> None: """Test storage schema creation.""" From 3b3bf6ce50ff23e43bd0bf62c26f2410877ea458 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 28 May 2025 16:34:31 -0500 Subject: [PATCH 11/88] refactoring storage tests to check other db engines --- mlos_bench/mlos_bench/storage/sql/schema.py | 25 ++++ mlos_bench/mlos_bench/storage/sql/storage.py | 27 ++++ mlos_bench/mlos_bench/tests/__init__.py | 24 +++ .../test_load_scheduler_config_examples.py | 4 +- mlos_bench/mlos_bench/tests/conftest.py | 1 + .../tests/services/remote/ssh/README.md | 2 +- .../tests/services/remote/ssh/__init__.py | 19 +-- .../tests/services/remote/ssh/fixtures.py | 30 ++-- .../remote/ssh/test_ssh_host_service.py | 3 +- .../mlos_bench/tests/storage/conftest.py | 4 +- .../mlos_bench/tests/storage/sql/README.md | 27 ++++ .../mlos_bench/tests/storage/sql/__init__.py | 72 +++++++++ .../tests/storage/sql/docker-compose.yml | 40 +++++ .../mlos_bench/tests/storage/sql/down.sh | 18 +++ .../mlos_bench/tests/storage/sql/fixtures.py | 138 +++++++++++++++++- .../storage/{ => sql}/test_storage_schemas.py | 24 ++- mlos_bench/mlos_bench/tests/storage/sql/up.sh | 87 +++++++++++ .../tests/storage/test_storage_pickling.py | 28 +++- 18 files changed, 524 insertions(+), 49 deletions(-) create mode 100644 mlos_bench/mlos_bench/tests/storage/sql/README.md create mode 100644 mlos_bench/mlos_bench/tests/storage/sql/docker-compose.yml create mode 100755 mlos_bench/mlos_bench/tests/storage/sql/down.sh rename mlos_bench/mlos_bench/tests/storage/{ => sql}/test_storage_schemas.py (68%) create mode 100755 mlos_bench/mlos_bench/tests/storage/sql/up.sh diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index 2bc00f0082..c6a7849880 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -296,6 +296,31 @@ def _get_alembic_cfg(conn: Connection) -> config.Config: alembic_cfg.attributes["connection"] = conn return alembic_cfg + def drop_all_tables(self, *, force: bool = False) -> None: + """ + Helper method used in testing to reset the DB schema. + + Notes + ----- + This method is not intended for production use, as it will drop all tables + in the database. Use with caution. + + Parameters + ---------- + force : bool + If True, drop all tables in the target database. + If False, this method will not drop any tables and will log a warning. + """ + assert self._engine + self.meta.reflect(bind=self._engine) + if force: + self.meta.drop_all(bind=self._engine) + else: + _LOG.warning( + "Resetting the schema without force is not implemented. " + "Use force=True to drop all tables." + ) + def create(self) -> "DbSchema": """Create the DB schema.""" _LOG.info("Create the DB schema") diff --git a/mlos_bench/mlos_bench/storage/sql/storage.py b/mlos_bench/mlos_bench/storage/sql/storage.py index 6d98dc97fd..7fc86aff1f 100644 --- a/mlos_bench/mlos_bench/storage/sql/storage.py +++ b/mlos_bench/mlos_bench/storage/sql/storage.py @@ -82,6 +82,33 @@ def _schema(self) -> DbSchema: _LOG.debug("DDL statements:\n%s", self._db_schema) return self._db_schema + def _reset_schema(self, *, force: bool = False) -> None: + """ + Helper method used in testing to reset the DB schema. + + Notes + ----- + This method is not intended for production use, as it will drop all tables + in the database. Use with caution. + + Parameters + ---------- + force : bool + If True, drop all tables in the target database. + If False, this method will not drop any tables and will log a warning. + """ + assert self._engine + if force: + self._schema.drop_all_tables(force=force) + self._db_schema = DbSchema(self._engine) + self._schema_created = False + self._schema_updated = False + else: + _LOG.warning( + "Resetting the schema without force is not implemented. " + "Use force=True to drop all tables." + ) + def update_schema(self) -> None: """Update the database schema.""" if not self._schema_updated: diff --git a/mlos_bench/mlos_bench/tests/__init__.py b/mlos_bench/mlos_bench/tests/__init__.py index ce5fbdb45a..2d9112e52d 100644 --- a/mlos_bench/mlos_bench/tests/__init__.py +++ b/mlos_bench/mlos_bench/tests/__init__.py @@ -86,6 +86,30 @@ def check_class_name(obj: object, expected_class_name: str) -> bool: full_class_name = obj.__class__.__module__ + "." + obj.__class__.__name__ return full_class_name == try_resolve_class_name(expected_class_name) +HOST_DOCKER_NAME = "host.docker.internal" + + +@pytest.fixture(scope="session") +def docker_hostname() -> str: + """Returns the local hostname to use to connect to the test ssh server.""" + if sys.platform != "win32" and resolve_host_name(HOST_DOCKER_NAME): + # On Linux, if we're running in a docker container, we can use the + # --add-host (extra_hosts in docker-compose.yml) to refer to the host IP. + return HOST_DOCKER_NAME + # Docker (Desktop) for Windows (WSL2) uses a special networking magic + # to refer to the host machine as `localhost` when exposing ports. + # In all other cases, assume we're executing directly inside conda on the host. + return "localhost" + + +def wait_docker_service_socket(docker_services: DockerServices, hostname: str, port: int) -> None: + """Wait until a docker service is ready.""" + docker_services.wait_until_responsive( + check=lambda: check_socket(hostname, port), + timeout=30.0, + pause=0.5, + ) + def check_socket(host: str, port: int, timeout: float = 1.0) -> bool: """ diff --git a/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py b/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py index ad8f9248ac..7d19c7a2c1 100644 --- a/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py +++ b/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py @@ -14,7 +14,7 @@ from mlos_bench.schedulers.base_scheduler import Scheduler from mlos_bench.schedulers.trial_runner import TrialRunner from mlos_bench.services.config_persistence import ConfigPersistenceService -from mlos_bench.storage.sql.storage import SqlStorage +from mlos_bench.storage.base_storage import Storage from mlos_bench.tests.config import BUILTIN_TEST_CONFIG_PATH, locate_config_examples from mlos_bench.util import get_class_from_name @@ -58,7 +58,7 @@ def test_load_scheduler_config_examples( config_path: str, mock_env_config_path: str, trial_runners: list[TrialRunner], - storage: SqlStorage, + storage: Storage, mock_opt: MockOptimizer, ) -> None: """Tests loading a config example.""" diff --git a/mlos_bench/mlos_bench/tests/conftest.py b/mlos_bench/mlos_bench/tests/conftest.py index becae20503..8e0aba5c00 100644 --- a/mlos_bench/mlos_bench/tests/conftest.py +++ b/mlos_bench/mlos_bench/tests/conftest.py @@ -90,6 +90,7 @@ def docker_compose_file(pytestconfig: pytest.Config) -> list[str]: _ = pytestconfig # unused return [ os.path.join(os.path.dirname(__file__), "services", "remote", "ssh", "docker-compose.yml"), + os.path.join(os.path.dirname(__file__), "storage", "sql", "docker-compose.yml"), # Add additional configs as necessary here. ] diff --git a/mlos_bench/mlos_bench/tests/services/remote/ssh/README.md b/mlos_bench/mlos_bench/tests/services/remote/ssh/README.md index 4fe4216ff3..827372d58c 100644 --- a/mlos_bench/mlos_bench/tests/services/remote/ssh/README.md +++ b/mlos_bench/mlos_bench/tests/services/remote/ssh/README.md @@ -22,7 +22,7 @@ These are brought up as session fixtures under a unique (PID based) compose proj In the case of `pytest`, since the `SshService` base class implements a shared connection cache that we wish to test, and testing "rebooting" of servers (containers) is also necessary, but we want to avoid single threaded execution for tests, we start a third container only for testing reboots. -Additionally, since `scope="session"` fixtures are executed once per worker, which is excessive in our case, we use lockfiles (one of the only portal synchronization methods) to ensure that the usual `docker_services` fixture which starts and stops the containers is only executed once per test run and uses a shared compose instance. +Additionally, since `scope="session"` fixtures are executed once per worker, which is excessive in our case, we use lockfiles (one of the only portable synchronization methods) to ensure that the usual `docker_services` fixture which starts and stops the containers is only executed once per test run and uses a shared compose instance. ## See Also diff --git a/mlos_bench/mlos_bench/tests/services/remote/ssh/__init__.py b/mlos_bench/mlos_bench/tests/services/remote/ssh/__init__.py index a6244e3a7a..877d39a26a 100644 --- a/mlos_bench/mlos_bench/tests/services/remote/ssh/__init__.py +++ b/mlos_bench/mlos_bench/tests/services/remote/ssh/__init__.py @@ -7,9 +7,6 @@ from dataclasses import dataclass from subprocess import run -from pytest_docker.plugin import Services as DockerServices - -from mlos_bench.tests import check_socket # The SSH test server port and name. # See Also: docker-compose.yml @@ -21,7 +18,12 @@ @dataclass class SshTestServerInfo: - """A data class for SshTestServerInfo.""" + """A data class for SshTestServerInfo. + + See Also + -------- + mlos_bench.tests.storage.sql.SqlTestServerInfo + """ compose_project_name: str service_name: str @@ -70,12 +72,3 @@ def to_connect_params(self, uncached: bool = False) -> dict: "port": self.get_port(uncached), "username": self.username, } - - -def wait_docker_service_socket(docker_services: DockerServices, hostname: str, port: int) -> None: - """Wait until a docker service is ready.""" - docker_services.wait_until_responsive( - check=lambda: check_socket(hostname, port), - timeout=30.0, - pause=0.5, - ) diff --git a/mlos_bench/mlos_bench/tests/services/remote/ssh/fixtures.py b/mlos_bench/mlos_bench/tests/services/remote/ssh/fixtures.py index 79938784a7..23a4a9ed2f 100644 --- a/mlos_bench/mlos_bench/tests/services/remote/ssh/fixtures.py +++ b/mlos_bench/mlos_bench/tests/services/remote/ssh/fixtures.py @@ -19,36 +19,20 @@ from mlos_bench.services.remote.ssh.ssh_fileshare import SshFileShareService from mlos_bench.services.remote.ssh.ssh_host_service import SshHostService -from mlos_bench.tests import resolve_host_name +from mlos_bench.tests import wait_docker_service_socket from mlos_bench.tests.services.remote.ssh import ( ALT_TEST_SERVER_NAME, REBOOT_TEST_SERVER_NAME, SSH_TEST_SERVER_NAME, SshTestServerInfo, - wait_docker_service_socket, ) # pylint: disable=redefined-outer-name -HOST_DOCKER_NAME = "host.docker.internal" - - -@pytest.fixture(scope="session") -def ssh_test_server_hostname() -> str: - """Returns the local hostname to use to connect to the test ssh server.""" - if sys.platform != "win32" and resolve_host_name(HOST_DOCKER_NAME): - # On Linux, if we're running in a docker container, we can use the - # --add-host (extra_hosts in docker-compose.yml) to refer to the host IP. - return HOST_DOCKER_NAME - # Docker (Desktop) for Windows (WSL2) uses a special networking magic - # to refer to the host machine as `localhost` when exposing ports. - # In all other cases, assume we're executing directly inside conda on the host. - return "localhost" - @pytest.fixture(scope="session") def ssh_test_server( - ssh_test_server_hostname: str, + docker_hostname: str, docker_compose_project_name: str, locked_docker_services: DockerServices, ) -> Generator[SshTestServerInfo]: @@ -66,12 +50,14 @@ def ssh_test_server( ssh_test_server_info = SshTestServerInfo( compose_project_name=docker_compose_project_name, service_name=SSH_TEST_SERVER_NAME, - hostname=ssh_test_server_hostname, + hostname=docker_hostname, username="root", id_rsa_path=id_rsa_file.name, ) wait_docker_service_socket( - locked_docker_services, ssh_test_server_info.hostname, ssh_test_server_info.get_port() + locked_docker_services, + ssh_test_server_info.hostname, + ssh_test_server_info.get_port(), ) id_rsa_src = f"/{ssh_test_server_info.username}/.ssh/id_rsa" docker_cp_cmd = ( @@ -116,7 +102,9 @@ def alt_test_server( id_rsa_path=ssh_test_server.id_rsa_path, ) wait_docker_service_socket( - locked_docker_services, alt_test_server_info.hostname, alt_test_server_info.get_port() + locked_docker_services, + alt_test_server_info.hostname, + alt_test_server_info.get_port(), ) return alt_test_server_info diff --git a/mlos_bench/mlos_bench/tests/services/remote/ssh/test_ssh_host_service.py b/mlos_bench/mlos_bench/tests/services/remote/ssh/test_ssh_host_service.py index 003a8e6433..30fdc804f2 100644 --- a/mlos_bench/mlos_bench/tests/services/remote/ssh/test_ssh_host_service.py +++ b/mlos_bench/mlos_bench/tests/services/remote/ssh/test_ssh_host_service.py @@ -12,13 +12,12 @@ from mlos_bench.services.remote.ssh.ssh_host_service import SshHostService from mlos_bench.services.remote.ssh.ssh_service import SshClient -from mlos_bench.tests import requires_docker +from mlos_bench.tests import requires_docker, wait_docker_service_socket, from mlos_bench.tests.services.remote.ssh import ( ALT_TEST_SERVER_NAME, REBOOT_TEST_SERVER_NAME, SSH_TEST_SERVER_NAME, SshTestServerInfo, - wait_docker_service_socket, ) _LOG = logging.getLogger(__name__) diff --git a/mlos_bench/mlos_bench/tests/storage/conftest.py b/mlos_bench/mlos_bench/tests/storage/conftest.py index c510793fac..526ea945f2 100644 --- a/mlos_bench/mlos_bench/tests/storage/conftest.py +++ b/mlos_bench/mlos_bench/tests/storage/conftest.py @@ -11,8 +11,10 @@ # same. # Expose some of those as local names so they can be picked up as fixtures by pytest. -storage = sql_storage_fixtures.storage +mysql_storage = sql_storage_fixtures.mysql_storage +postgres_storage = sql_storage_fixtures.postgres_storage sqlite_storage = sql_storage_fixtures.sqlite_storage +storage = sql_storage_fixtures.storage exp_storage = sql_storage_fixtures.exp_storage exp_no_tunables_storage = sql_storage_fixtures.exp_no_tunables_storage mixed_numerics_exp_storage = sql_storage_fixtures.mixed_numerics_exp_storage diff --git a/mlos_bench/mlos_bench/tests/storage/sql/README.md b/mlos_bench/mlos_bench/tests/storage/sql/README.md new file mode 100644 index 0000000000..c9b124eab3 --- /dev/null +++ b/mlos_bench/mlos_bench/tests/storage/sql/README.md @@ -0,0 +1,27 @@ +# Sql Storage Tests + +The "unit" tests for the `SqlStorage` classes are more functional than other unit tests in that we don't merely mock them out, but actually setup small SQL databases with `docker compose` and interact with them using the `SqlStorage` class. + +To do this, we make use of the `pytest-docker` plugin to bring up the services defined in the [`docker-compose.yml`](./docker-compose.yml) file in this directory. + +There are currently two services defined in that config, though others could be added in the future: + +1. `mysql-mlos-bench-server` +1. `postgres-mlos-bench-server` + +We rely on `docker compose` to map their internal container service ports to random ports on the host. +Hence, when connecting, we need to look up these ports on demand using something akin to `docker compose port`. +Because of complexities of networking in different development environments (especially for Docker on WSL2 for Windows), we may also have to connect to a different host address than `localhost` (e.g., `host.docker.internal`, which is dynamically requested as a part of of the [devcontainer](../../../../../../.devcontainer/docker-compose.yml) setup). + +These containers are brought up as session fixtures under a unique (PID based) compose project name for each `pytest` invocation, but only when docker is detected on the host (via the `@docker_required` decorator we define in [`mlos_bench/tests/__init__.py`](../../../__init__.py)), else those tests are skipped. + +> For manual testing, to bring up/down the test infrastructure the [`up.sh`](./up.sh) and [`down.sh`](./down.sh) scripts can be used, which assigns a known project name. + +In the case of `pytest`, we also want to be able to test with a fresh state in most cases, so we use the `pytest` `yield` pattern to allow schema cleanup code to happen after the end of each test (see: `_create_storage_from_test_server_info`). +We use lockfiles to prevent races between tests that would otherwise try to create or cleanup the same database schema at the same time. + +Additionally, since `scope="session"` fixtures are executed once per worker, which is excessive in our case, we use lockfiles (one of the only portable synchronization methods) to ensure that the usual `docker_services` fixture which starts and stops the containers is only executed once per test run and uses a shared compose instance. + +## See Also + +Notes in the [`mlos_bench/tests/services/remote/ssh/README.md`](../../../services/remote/ssh/README.md) file for a similar setup for SSH services. diff --git a/mlos_bench/mlos_bench/tests/storage/sql/__init__.py b/mlos_bench/mlos_bench/tests/storage/sql/__init__.py index d17a448b5e..442e531a43 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/__init__.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/__init__.py @@ -3,3 +3,75 @@ # Licensed under the MIT License. # """Tests for mlos_bench sql storage.""" + +from dataclasses import dataclass +from subprocess import run + + +# The DB servers' names and other connection info. +# See Also: docker-compose.yml + +MYSQL_TEST_SERVER_NAME = "mysql-mlos-bench-server" +PGSQL_TEST_SERVER_NAME = "postgres-mlos-bench-server" + +SQL_TEST_SERVER_DATABASE = "mlos_bench" +SQL_TEST_SERVER_PASSWORD = "password" + + +@dataclass +class SqlTestServerInfo: + """A data class for SqlTestServerInfo. + + See Also + -------- + mlos_bench.tests.services.remote.ssh.SshTestServerInfo + """ + + compose_project_name: str + service_name: str + hostname: str + _port: int | None = None + + @property + def username(self) -> str: + """Gets the username.""" + usernames = { + MYSQL_TEST_SERVER_NAME: "root", + PGSQL_TEST_SERVER_NAME: "postgres", + } + return usernames[self.service_name] + + @property + def password(self) -> str: + """Gets the password.""" + return SQL_TEST_SERVER_PASSWORD + + @property + def database(self) -> str: + """Gets the database.""" + return SQL_TEST_SERVER_DATABASE + + def get_port(self, uncached: bool = False) -> int: + """ + Gets the port that the SSH test server is listening on. + + Note: this value can change when the service restarts so we can't rely on + the DockerServices. + """ + if self._port is None or uncached: + default_ports = { + MYSQL_TEST_SERVER_NAME: 3306, + PGSQL_TEST_SERVER_NAME: 5432, + } + default_port = default_ports[self.service_name] + port_cmd = run( + ( + f"docker compose -p {self.compose_project_name} " + f"port {self.service_name} {default_port}" + ), + shell=True, + check=True, + capture_output=True, + ) + self._port = int(port_cmd.stdout.decode().strip().split(":")[1]) + return self._port diff --git a/mlos_bench/mlos_bench/tests/storage/sql/docker-compose.yml b/mlos_bench/mlos_bench/tests/storage/sql/docker-compose.yml new file mode 100644 index 0000000000..0bfd0bce81 --- /dev/null +++ b/mlos_bench/mlos_bench/tests/storage/sql/docker-compose.yml @@ -0,0 +1,40 @@ +name: mlos_bench-test-sql-storage +services: + mysql-mlos-bench-server: + hostname: mysql-mlos-bench-server + attach: false + image: docker.io/library/mysql:latest + ports: + # To allow multiple instances of this to coexist, instead of explicitly + # mapping the port, let it get assigned randomly on the host. + - ${PORT:-3306} + extra_hosts: + - host.docker.internal:host-gateway + environment: + - MYSQL_ROOT_PASSWORD=password + - MYSQL_DATABASE=mlos_bench + healthcheck: + test: ["CMD-SHELL", "mysqladmin --host localhost --protocol tcp --password=$${MYSQL_ROOT_PASSWORD} ping"] + interval: 10s + timeout: 30s + retries: 5 + start_period: 5s + postgres-mlos-bench-server: + hostname: postgres-mlos-bench-server + attach: false + image: docker.io/library/postgres:latest + ports: + # To allow multiple instances of this to coexist, instead of explicitly + # mapping the port, let it get assigned randomly on the host. + - ${PORT:-5432} + extra_hosts: + - host.docker.internal:host-gateway + environment: + - POSTGRES_PASSWORD=password + - POSTGRES_DB=mlos_bench + healthcheck: + test: ["CMD-SHELL", "pg_isready -d $${POSTGRES_DB}"] + interval: 10s + timeout: 30s + retries: 5 + start_period: 5s diff --git a/mlos_bench/mlos_bench/tests/storage/sql/down.sh b/mlos_bench/mlos_bench/tests/storage/sql/down.sh new file mode 100755 index 0000000000..3d6068cecf --- /dev/null +++ b/mlos_bench/mlos_bench/tests/storage/sql/down.sh @@ -0,0 +1,18 @@ +#!/bin/bash +## +## Copyright (c) Microsoft Corporation. +## Licensed under the MIT License. +## + +# A script to stop the containerized SQL DBMS servers. +# For pytest, the fixture in conftest.py will handle this for us using the +# pytest-docker plugin, but for manual testing, this script can be used. + +set -eu + +scriptdir=$(dirname "$(readlink -f "$0")") +cd "$scriptdir" + +PROJECT_NAME="mlos_bench-test-sql-storage-manual" + +docker compose -p "$PROJECT_NAME" down diff --git a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py index 0bebeeff82..50376b8727 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py @@ -8,9 +8,12 @@ import os import tempfile from collections.abc import Generator +from importlib.resources import files from random import seed as rand_seed +from filelock import FileLock import pytest +from pytest_docker.plugin import Services as DockerServices from mlos_bench.optimizers.mock_optimizer import MockOptimizer from mlos_bench.schedulers.sync_scheduler import SyncScheduler @@ -19,16 +22,149 @@ from mlos_bench.storage.base_experiment_data import ExperimentData from mlos_bench.storage.sql.storage import SqlStorage from mlos_bench.storage.storage_factory import from_config -from mlos_bench.tests import SEED +from mlos_bench.util import path_join +from mlos_bench.tests import SEED, wait_docker_service_socket from mlos_bench.tests.storage import ( CONFIG_TRIAL_REPEAT_COUNT, MAX_TRIALS, TRIAL_RUNNER_COUNT, ) +from mlos_bench.tests.storage.sql import ( + SqlTestServerInfo, + MYSQL_TEST_SERVER_NAME, + PGSQL_TEST_SERVER_NAME, +) from mlos_bench.tunables.tunable_groups import TunableGroups # pylint: disable=redefined-outer-name +# TODO: Add mysql_storage and postgres_storage + + +@pytest.fixture(scope="session") +def mysql_storage_info( + docker_hostname: str, + docker_compose_project_name: str, + locked_docker_services: DockerServices, +) -> SqlTestServerInfo: + """ + Fixture for getting mysql storage connection info. + """ + storage_info = SqlTestServerInfo( + compose_project_name=docker_compose_project_name, + service_name=MYSQL_TEST_SERVER_NAME, + hostname=docker_hostname, + ) + wait_docker_service_socket( + locked_docker_services, + storage_info.hostname, + storage_info.get_port(), + ) + return storage_info + + +@pytest.fixture(scope="session") +def postgres_storage_info( + docker_hostname: str, + docker_compose_project_name: str, + locked_docker_services: DockerServices, +) -> SqlTestServerInfo: + """ + Fixture for getting postgres storage connection info. + """ + storage_info = SqlTestServerInfo( + compose_project_name=docker_compose_project_name, + service_name=PGSQL_TEST_SERVER_NAME, + hostname=docker_hostname, + ) + wait_docker_service_socket( + locked_docker_services, + storage_info.hostname, + storage_info.get_port(), + ) + return storage_info + + +def _create_storage_from_test_server_info( + config_file: str, + test_server_info: SqlTestServerInfo, + shared_temp_dir: str, + short_testrun_uid: str, +) -> Generator[SqlStorage]: + """ + Creates a SqlStorage instance from the given test server info. + + Notes + ----- + Resets the schema as a cleanup operation on return from the function scope + fixture so each test gets a fresh storage instance. + Uses a file lock to ensure that only one test can access the storage at a time. + + Yields + ------ + SqlStorage + """ + sql_storage_name = test_server_info.service_name + with FileLock(path_join(shared_temp_dir, f"{sql_storage_name}-{short_testrun_uid}.lock")): + global_config = { + "host": test_server_info.username, + "port": test_server_info.get_port() or 0, + "database": test_server_info.database, + "username": test_server_info.username, + "password": test_server_info.password, + "lazy_schema_create": True, + } + storage = from_config( + config_file, + global_configs=[json.dumps(global_config)], + ) + assert isinstance(storage, SqlStorage) + yield storage + # Cleanup the storage on return + storage._reset_schema(force=True) # pylint: disable=protected-access + + +@pytest.fixture(scope="function") +def mysql_storage( + mysql_storage_info: SqlTestServerInfo, + shared_temp_dir: str, + short_testrun_uid: str, +) -> Generator[SqlStorage]: + """ + Fixture of a MySQL backed SqlStorage engine. + + See Also + -------- + _create_storage_from_test_server_info + """ + return _create_storage_from_test_server_info( + path_join(str(files("mlos_bench.config")), "storage", "mysql.jsonc"), + mysql_storage_info, + shared_temp_dir, + short_testrun_uid, + ) + + +@pytest.fixture(scope="function") +def postgres_storage( + postgres_storage_info: SqlTestServerInfo, + shared_temp_dir: str, + short_testrun_uid: str, +) -> Generator[SqlStorage]: + """ + Fixture of a MySQL backed SqlStorage engine. + + See Also + -------- + _create_storage_from_test_server_info + """ + return _create_storage_from_test_server_info( + path_join(str(files("mlos_bench.config")), "storage", "postgresql.jsonc"), + postgres_storage_info, + shared_temp_dir, + short_testrun_uid, + ) + @pytest.fixture def sqlite_storage() -> Generator[SqlStorage]: diff --git a/mlos_bench/mlos_bench/tests/storage/test_storage_schemas.py b/mlos_bench/mlos_bench/tests/storage/sql/test_storage_schemas.py similarity index 68% rename from mlos_bench/mlos_bench/tests/storage/test_storage_schemas.py rename to mlos_bench/mlos_bench/tests/storage/sql/test_storage_schemas.py index 1cbdaa9c6a..660f4e245d 100644 --- a/mlos_bench/mlos_bench/tests/storage/test_storage_schemas.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/test_storage_schemas.py @@ -4,21 +4,37 @@ # """Test sql schemas for mlos_bench storage.""" +import pytest +from pytest_lazy_fixtures.lazy_fixture import lf as lazy_fixture + from alembic.migration import MigrationContext from sqlalchemy import inspect from mlos_bench.storage.sql.storage import SqlStorage +from mlos_bench.tests import DOCKER + # NOTE: This value is hardcoded to the latest revision in the alembic versions directory. # It could also be obtained programmatically using the "alembic heads" command or heads() API. # See Also: schema.py for an example of programmatic alembic config access. CURRENT_ALEMBIC_HEAD = "8928a401115b" -# TODO: Test other engines as well. - -def test_storage_schemas(storage: SqlStorage) -> None: +# Try to test multiple DBMS engines. +docker_dbms_fixtures = [] +if DOCKER: + docker_dbms_fixtures = [ + lazy_fixture("mysql_storage"), + lazy_fixture("postgres_storage"), + ] +@pytest.mark.parameterize( + ["some_sql_storage_fixture"], [ + lazy_fixture("sql_storage"), + *docker_dbms_fixtures, + ] +) +def test_storage_schemas(some_sql_storage_fixture: SqlStorage) -> None: """Test storage schema creation.""" - eng = storage._engine # pylint: disable=protected-access + eng = some_sql_storage_fixture._engine # pylint: disable=protected-access with eng.connect() as conn: # pylint: disable=protected-access inspector = inspect(conn) # Make sure the "trial_runner_id" column exists. diff --git a/mlos_bench/mlos_bench/tests/storage/sql/up.sh b/mlos_bench/mlos_bench/tests/storage/sql/up.sh new file mode 100755 index 0000000000..fd99d1770d --- /dev/null +++ b/mlos_bench/mlos_bench/tests/storage/sql/up.sh @@ -0,0 +1,87 @@ +#!/bin/bash +## +## Copyright (c) Microsoft Corporation. +## Licensed under the MIT License. +## + +# A script to start the containerized SQL DBMS servers. +# For pytest, the fixture in conftest.py will handle this for us using the +# pytest-docker plugin, but for manual testing, this script can be used. + +set -eu +set -x + +scriptdir=$(dirname "$(readlink -f "$0")") +cd "$scriptdir" + +PROJECT_NAME="mlos_bench-test-sql-storage-manual" +CONTAINER_COUNT=2 + +if ! type mysqladmin >/dev/null 2>&1; then + echo "ERROR: Missing mysqladmin tool to check status of the server." >&2 + exit 1 +fi + +if ! type psql >/dev/null 2>&1; then + echo "ERROR: Missing psql tool to check status of the server." >&2 + exit 1 +fi + + +docker compose -p "$PROJECT_NAME" up --build --remove-orphans -d +set +x + +while ! docker compose -p "$PROJECT_NAME" ps --format '{{.Name}} {{.State}} {{.Health}}' | grep -c ' running healthy$' | grep -q -x $CONTAINER_COUNT; do + echo "Waiting for $CONTAINER_COUNT containers to report healthy ..." + sleep 1 +done + +mysql_port=$(docker compose -p "$PROJECT_NAME" port mysql-mlos-bench-server ${PORT:-3306} | cut -d: -f2) +echo "Connect to the mysql server container at the following port: $mysql_port" +postgres_port=$(docker compose -p "$PROJECT_NAME" port postgres-mlos-bench-server ${PORT:-5432} | cut -d: -f2) +echo "Connect to the postgres server container at the following port: $postgres_port" + +# TODO: Remove the rest of this: + +mysql_ok=0 +if ! type mysqladmin >/dev/null 2>&1; then + echo "WARNING: Missing mysqladmin tool to check status of the server." >&2 + mysql_ok=1 +fi + +pg_ok=0 +if ! type psql >/dev/null 2>&1; then + echo "WARNING: Missing psql tool to check status of the server." >&2 + pg_ok=1 +fi + +for i in {1..10}; do + if [ "$mysql_ok" == "1" ]; then + break + fi + if mysqladmin -h localhost --port $mysql_port --protocol tcp --password=password ping >/dev/null; then + mysql_ok=1 + else + sleep 1 + fi +done +if [ "$mysql_ok" != 1 ]; then + echo "ERR: MySQL failed to start." >&2 + exit 1 +fi + +for i in {1..10}; do + if [ "$pg_ok" == "1" ]; then + break + fi + if PGPASSWORD=password psql -h localhost -p $postgres_port -U postgres mlos_bench -c "SELECT 1;" > /dev/null; then + pg_ok=1 + break + else + sleep 1 + fi +done +if [ "$pg_ok" != 1 ]; then + echo "ERR: Postgres failed to start." >&2 + exit 1 +fi diff --git a/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py b/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py index 7871e7f68c..1aa29b5efe 100644 --- a/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py +++ b/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py @@ -9,11 +9,20 @@ from typing import Literal import pytest +from pytest_lazy_fixtures.lazy_fixture import lf as lazy_fixture from pytz import UTC from mlos_bench.environments.status import Status -from mlos_bench.storage.sql.storage import SqlStorage +from mlos_bench.storage.base_storage import Storage from mlos_bench.tunables.tunable_groups import TunableGroups +from mlos_bench.tests import DOCKER + +docker_dbms_fixtures = [] +if DOCKER: + docker_dbms_fixtures = [ + lazy_fixture("mysql_storage"), + lazy_fixture("postgres_storage"), + ] # TODO: When we introduce ParallelTrialScheduler warn at config startup time @@ -22,14 +31,25 @@ sys.platform == "win32", reason="Windows doesn't support multiple processes accessing the same file.", ) +@pytest.mark.parametrize( + ["persistent_storage"], + [ + # TODO: Improve this test to support non-sql backends eventually as well. + lazy_fixture("sqlite_storage"), + *docker_dbms_fixtures, + ], +) def test_storage_pickle_restore_experiment_and_trial( - sqlite_storage: SqlStorage, + persistent_storage: Storage, tunable_groups: TunableGroups, ) -> None: """Check that we can pickle and unpickle the Storage object, and restore Experiment and Trial by id. """ - storage = sqlite_storage + storage = persistent_storage + storage_class = storage.__class__ + assert issubclass(storage_class, Storage) + assert storage_class != Storage # Create an Experiment and a Trial opt_targets: dict[str, Literal["min", "max"]] = {"metric": "min"} experiment = storage.experiment( @@ -49,7 +69,7 @@ def test_storage_pickle_restore_experiment_and_trial( # Pickle and unpickle the Storage object pickled = pickle.dumps(storage) restored_storage = pickle.loads(pickled) - assert isinstance(restored_storage, SqlStorage) + assert isinstance(restored_storage, storage_class) # Restore the Experiment from storage by id and check that it matches the original restored_experiment = restored_storage.get_experiment_by_id( From 1c9633c61c5da559e35d66e6dc8dd6c3016a739c Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 28 May 2025 16:35:31 -0500 Subject: [PATCH 12/88] change column lengths for mysql --- mlos_bench/mlos_bench/storage/sql/schema.py | 56 ++++++++++++--------- 1 file changed, 32 insertions(+), 24 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index c6a7849880..48d06f63c5 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -72,12 +72,6 @@ class DbSchema: # for all DB tables, so it's ok to disable the warnings. # pylint: disable=too-many-instance-attributes - # Common string column sizes. - _ID_LEN = 512 - _PARAM_VALUE_LEN = 1024 - _METRIC_VALUE_LEN = 255 - _STATUS_LEN = 16 - def __init__(self, engine: Engine | None): """ Declare the SQLAlchemy schema for the database. @@ -95,10 +89,24 @@ def __init__(self, engine: Engine | None): self._engine = engine self._meta = MetaData() + # Common string column sizes. + self._exp_id_len = 512 + self._param_id_len = 512 + self._param_value_len = 1024 + self._metric_id_len = 512 + self._metric_value_len = 255 + self._status_len = 16 + + # Some overrides for certain DB engines: + if engine and engine.dialect.name in {"mysql", "mariadb"}: + self._exp_id_len = 255 + self._param_id_len = 255 + self._metric_id_len = 255 + self.experiment = Table( "experiment", self._meta, - Column("exp_id", String(self._ID_LEN), nullable=False), + Column("exp_id", String(self._exp_id_len), nullable=False), Column("description", String(1024)), Column("root_env_config", String(1024), nullable=False), Column("git_repo", String(1024), nullable=False), @@ -108,7 +116,7 @@ def __init__(self, engine: Engine | None): Column("ts_end", DateTime), # Should match the text IDs of `mlos_bench.environments.Status` enum: # For backwards compatibility, we allow NULL for status. - Column("status", String(self._STATUS_LEN)), + Column("status", String(self._status_len)), # There may be more than one mlos_benchd_service running on different hosts. # This column stores the host/container name of the driver that # picked up the experiment. @@ -126,7 +134,7 @@ def __init__(self, engine: Engine | None): "objectives", self._meta, Column("exp_id"), - Column("optimization_target", String(self._ID_LEN), nullable=False), + Column("optimization_target", String(self._metric_id_len), nullable=False), Column("optimization_direction", String(4), nullable=False), # TODO: Note: weight is not fully supported yet as currently # multi-objective is expected to explore each objective equally. @@ -175,14 +183,14 @@ def __init__(self, engine: Engine | None): self.trial = Table( "trial", self._meta, - Column("exp_id", String(self._ID_LEN), nullable=False), + Column("exp_id", String(self._exp_id_len), nullable=False), Column("trial_id", Integer, nullable=False), Column("config_id", Integer, nullable=False), Column("trial_runner_id", Integer, nullable=True, default=None), Column("ts_start", DateTime, nullable=False), Column("ts_end", DateTime), # Should match the text IDs of `mlos_bench.environments.Status` enum: - Column("status", String(self._STATUS_LEN), nullable=False), + Column("status", String(self._status_len), nullable=False), PrimaryKeyConstraint("exp_id", "trial_id"), ForeignKeyConstraint(["exp_id"], [self.experiment.c.exp_id]), ForeignKeyConstraint(["config_id"], [self.config.c.config_id]), @@ -197,8 +205,8 @@ def __init__(self, engine: Engine | None): "config_param", self._meta, Column("config_id", Integer, nullable=False), - Column("param_id", String(self._ID_LEN), nullable=False), - Column("param_value", String(self._PARAM_VALUE_LEN)), + Column("param_id", String(self._param_id_len), nullable=False), + Column("param_value", String(self._param_value_len)), PrimaryKeyConstraint("config_id", "param_id"), ForeignKeyConstraint(["config_id"], [self.config.c.config_id]), ) @@ -212,10 +220,10 @@ def __init__(self, engine: Engine | None): self.trial_param = Table( "trial_param", self._meta, - Column("exp_id", String(self._ID_LEN), nullable=False), + Column("exp_id", String(self._exp_id_len), nullable=False), Column("trial_id", Integer, nullable=False), - Column("param_id", String(self._ID_LEN), nullable=False), - Column("param_value", String(self._PARAM_VALUE_LEN)), + Column("param_id", String(self._param_id_len), nullable=False), + Column("param_value", String(self._param_value_len)), PrimaryKeyConstraint("exp_id", "trial_id", "param_id"), ForeignKeyConstraint( ["exp_id", "trial_id"], @@ -230,10 +238,10 @@ def __init__(self, engine: Engine | None): self.trial_status = Table( "trial_status", self._meta, - Column("exp_id", String(self._ID_LEN), nullable=False), + Column("exp_id", String(self._exp_id_len), nullable=False), Column("trial_id", Integer, nullable=False), Column("ts", DateTime(timezone=True), nullable=False, default="now"), - Column("status", String(self._STATUS_LEN), nullable=False), + Column("status", String(self._status_len), nullable=False), UniqueConstraint("exp_id", "trial_id", "ts"), ForeignKeyConstraint( ["exp_id", "trial_id"], @@ -247,10 +255,10 @@ def __init__(self, engine: Engine | None): self.trial_result = Table( "trial_result", self._meta, - Column("exp_id", String(self._ID_LEN), nullable=False), + Column("exp_id", String(self._exp_id_len), nullable=False), Column("trial_id", Integer, nullable=False), - Column("metric_id", String(self._ID_LEN), nullable=False), - Column("metric_value", String(self._METRIC_VALUE_LEN)), + Column("metric_id", String(self._metric_id_len), nullable=False), + Column("metric_value", String(self._metric_value_len)), PrimaryKeyConstraint("exp_id", "trial_id", "metric_id"), ForeignKeyConstraint( ["exp_id", "trial_id"], @@ -265,11 +273,11 @@ def __init__(self, engine: Engine | None): self.trial_telemetry = Table( "trial_telemetry", self._meta, - Column("exp_id", String(self._ID_LEN), nullable=False), + Column("exp_id", String(self._exp_id_len), nullable=False), Column("trial_id", Integer, nullable=False), Column("ts", DateTime(timezone=True), nullable=False, default="now"), - Column("metric_id", String(self._ID_LEN), nullable=False), - Column("metric_value", String(self._METRIC_VALUE_LEN)), + Column("metric_id", String(self._exp_id_len), nullable=False), + Column("metric_value", String(self._metric_value_len)), UniqueConstraint("exp_id", "trial_id", "ts", "metric_id"), ForeignKeyConstraint( ["exp_id", "trial_id"], From f5b7bf364d52dc60d672c361611a460e139ec492 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 28 May 2025 16:43:48 -0500 Subject: [PATCH 13/88] fixups --- mlos_bench/mlos_bench/tests/__init__.py | 16 +--------------- mlos_bench/mlos_bench/tests/conftest.py | 18 +++++++++++++++++- .../tests/services/remote/ssh/conftest.py | 1 - .../tests/services/remote/ssh/fixtures.py | 1 - .../remote/ssh/test_ssh_host_service.py | 2 +- 5 files changed, 19 insertions(+), 19 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/__init__.py b/mlos_bench/mlos_bench/tests/__init__.py index 2d9112e52d..a39337648d 100644 --- a/mlos_bench/mlos_bench/tests/__init__.py +++ b/mlos_bench/mlos_bench/tests/__init__.py @@ -16,6 +16,7 @@ from subprocess import run import pytest +from pytest_docker.plugin import Services as DockerServices import pytz from mlos_bench.util import get_class_from_name, nullable @@ -86,21 +87,6 @@ def check_class_name(obj: object, expected_class_name: str) -> bool: full_class_name = obj.__class__.__module__ + "." + obj.__class__.__name__ return full_class_name == try_resolve_class_name(expected_class_name) -HOST_DOCKER_NAME = "host.docker.internal" - - -@pytest.fixture(scope="session") -def docker_hostname() -> str: - """Returns the local hostname to use to connect to the test ssh server.""" - if sys.platform != "win32" and resolve_host_name(HOST_DOCKER_NAME): - # On Linux, if we're running in a docker container, we can use the - # --add-host (extra_hosts in docker-compose.yml) to refer to the host IP. - return HOST_DOCKER_NAME - # Docker (Desktop) for Windows (WSL2) uses a special networking magic - # to refer to the host machine as `localhost` when exposing ports. - # In all other cases, assume we're executing directly inside conda on the host. - return "localhost" - def wait_docker_service_socket(docker_services: DockerServices, hostname: str, port: int) -> None: """Wait until a docker service is ready.""" diff --git a/mlos_bench/mlos_bench/tests/conftest.py b/mlos_bench/mlos_bench/tests/conftest.py index 8e0aba5c00..8d48d2094c 100644 --- a/mlos_bench/mlos_bench/tests/conftest.py +++ b/mlos_bench/mlos_bench/tests/conftest.py @@ -15,7 +15,7 @@ from pytest_docker.plugin import get_docker_services from mlos_bench.environments.mock_env import MockEnv -from mlos_bench.tests import SEED, tunable_groups_fixtures +from mlos_bench.tests import SEED, tunable_groups_fixtures, resolve_host_name from mlos_bench.tunables.tunable_groups import TunableGroups # pylint: disable=redefined-outer-name @@ -29,6 +29,22 @@ covariant_group = tunable_groups_fixtures.covariant_group +HOST_DOCKER_NAME = "host.docker.internal" + + +@pytest.fixture(scope="session") +def docker_hostname() -> str: + """Returns the local hostname to use to connect to the test ssh server.""" + if sys.platform != "win32" and resolve_host_name(HOST_DOCKER_NAME): + # On Linux, if we're running in a docker container, we can use the + # --add-host (extra_hosts in docker-compose.yml) to refer to the host IP. + return HOST_DOCKER_NAME + # Docker (Desktop) for Windows (WSL2) uses a special networking magic + # to refer to the host machine as `localhost` when exposing ports. + # In all other cases, assume we're executing directly inside conda on the host. + return "localhost" + + @pytest.fixture def mock_env(tunable_groups: TunableGroups) -> MockEnv: """Test fixture for MockEnv.""" diff --git a/mlos_bench/mlos_bench/tests/services/remote/ssh/conftest.py b/mlos_bench/mlos_bench/tests/services/remote/ssh/conftest.py index 34006985af..83e98c44a2 100644 --- a/mlos_bench/mlos_bench/tests/services/remote/ssh/conftest.py +++ b/mlos_bench/mlos_bench/tests/services/remote/ssh/conftest.py @@ -7,7 +7,6 @@ import mlos_bench.tests.services.remote.ssh.fixtures as ssh_fixtures # Expose some of those as local names so they can be picked up as fixtures by pytest. -ssh_test_server_hostname = ssh_fixtures.ssh_test_server_hostname ssh_test_server = ssh_fixtures.ssh_test_server alt_test_server = ssh_fixtures.alt_test_server reboot_test_server = ssh_fixtures.reboot_test_server diff --git a/mlos_bench/mlos_bench/tests/services/remote/ssh/fixtures.py b/mlos_bench/mlos_bench/tests/services/remote/ssh/fixtures.py index 23a4a9ed2f..fda84e9f79 100644 --- a/mlos_bench/mlos_bench/tests/services/remote/ssh/fixtures.py +++ b/mlos_bench/mlos_bench/tests/services/remote/ssh/fixtures.py @@ -9,7 +9,6 @@ """ import os -import sys import tempfile from collections.abc import Generator from subprocess import run diff --git a/mlos_bench/mlos_bench/tests/services/remote/ssh/test_ssh_host_service.py b/mlos_bench/mlos_bench/tests/services/remote/ssh/test_ssh_host_service.py index 30fdc804f2..1dce67a13d 100644 --- a/mlos_bench/mlos_bench/tests/services/remote/ssh/test_ssh_host_service.py +++ b/mlos_bench/mlos_bench/tests/services/remote/ssh/test_ssh_host_service.py @@ -12,7 +12,7 @@ from mlos_bench.services.remote.ssh.ssh_host_service import SshHostService from mlos_bench.services.remote.ssh.ssh_service import SshClient -from mlos_bench.tests import requires_docker, wait_docker_service_socket, +from mlos_bench.tests import requires_docker, wait_docker_service_socket from mlos_bench.tests.services.remote.ssh import ( ALT_TEST_SERVER_NAME, REBOOT_TEST_SERVER_NAME, From 7fdf06dead9b182f02f454c63378161243fe3d53 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 28 May 2025 17:46:07 -0500 Subject: [PATCH 14/88] fixup --- mlos_bench/mlos_bench/tests/__init__.py | 30 +++++++++++++++++++ .../mlos_bench/tests/storage/conftest.py | 2 ++ .../mlos_bench/tests/storage/sql/fixtures.py | 29 ++++++++++-------- .../tests/storage/sql/test_storage_schemas.py | 15 ++++++---- 4 files changed, 59 insertions(+), 17 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/__init__.py b/mlos_bench/mlos_bench/tests/__init__.py index a39337648d..98324c6ebb 100644 --- a/mlos_bench/mlos_bench/tests/__init__.py +++ b/mlos_bench/mlos_bench/tests/__init__.py @@ -8,6 +8,7 @@ Used to make mypy happy about multiple conftest.py modules. """ import filecmp +import json import os import shutil import socket @@ -88,6 +89,35 @@ def check_class_name(obj: object, expected_class_name: str) -> bool: return full_class_name == try_resolve_class_name(expected_class_name) +def is_docker_service_healthy( + compose_project_name: str, + service_name: str, +) -> bool: + """Check if a docker service is healthy.""" + docker_ps_out = run( + f"docker compose -p {compose_project_name} " f"ps --format json {service_name}", + shell=True, + check=True, + capture_output=True, + ) + docker_ps_json = json.loads(docker_ps_out.stdout.decode().strip()) + return docker_ps_json["State"] == "running" and docker_ps_json["Health"] == "healthy" + + +def wait_docker_service_healthy( + docker_services: DockerServices, + project_name: str, + service_name: str, + timeout: float = 30.0, +) -> None: + """Wait until a docker service is healthy.""" + docker_services.wait_until_responsive( + check=lambda: is_docker_service_healthy(project_name, service_name), + timeout=timeout, + pause=0.5, + ) + + def wait_docker_service_socket(docker_services: DockerServices, hostname: str, port: int) -> None: """Wait until a docker service is ready.""" docker_services.wait_until_responsive( diff --git a/mlos_bench/mlos_bench/tests/storage/conftest.py b/mlos_bench/mlos_bench/tests/storage/conftest.py index 526ea945f2..acdbe5024a 100644 --- a/mlos_bench/mlos_bench/tests/storage/conftest.py +++ b/mlos_bench/mlos_bench/tests/storage/conftest.py @@ -11,7 +11,9 @@ # same. # Expose some of those as local names so they can be picked up as fixtures by pytest. +mysql_storage_info = sql_storage_fixtures.mysql_storage_info mysql_storage = sql_storage_fixtures.mysql_storage +postgres_storage_info = sql_storage_fixtures.postgres_storage_info postgres_storage = sql_storage_fixtures.postgres_storage sqlite_storage = sql_storage_fixtures.sqlite_storage storage = sql_storage_fixtures.storage diff --git a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py index 50376b8727..eb3c1c3eca 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py @@ -4,6 +4,7 @@ # """Test fixtures for mlos_bench storage.""" +from contextlib import contextmanager import json import os import tempfile @@ -23,7 +24,7 @@ from mlos_bench.storage.sql.storage import SqlStorage from mlos_bench.storage.storage_factory import from_config from mlos_bench.util import path_join -from mlos_bench.tests import SEED, wait_docker_service_socket +from mlos_bench.tests import SEED, wait_docker_service_healthy from mlos_bench.tests.storage import ( CONFIG_TRIAL_REPEAT_COUNT, MAX_TRIALS, @@ -55,11 +56,12 @@ def mysql_storage_info( service_name=MYSQL_TEST_SERVER_NAME, hostname=docker_hostname, ) - wait_docker_service_socket( + wait_docker_service_healthy( locked_docker_services, - storage_info.hostname, - storage_info.get_port(), + storage_info.compose_project_name, + storage_info.service_name, ) + return storage_info @@ -77,14 +79,15 @@ def postgres_storage_info( service_name=PGSQL_TEST_SERVER_NAME, hostname=docker_hostname, ) - wait_docker_service_socket( + wait_docker_service_healthy( locked_docker_services, - storage_info.hostname, - storage_info.get_port(), + storage_info.compose_project_name, + storage_info.service_name, ) return storage_info +@contextmanager def _create_storage_from_test_server_info( config_file: str, test_server_info: SqlTestServerInfo, @@ -107,7 +110,7 @@ def _create_storage_from_test_server_info( sql_storage_name = test_server_info.service_name with FileLock(path_join(shared_temp_dir, f"{sql_storage_name}-{short_testrun_uid}.lock")): global_config = { - "host": test_server_info.username, + "host": test_server_info.hostname, "port": test_server_info.get_port() or 0, "database": test_server_info.database, "username": test_server_info.username, @@ -137,12 +140,13 @@ def mysql_storage( -------- _create_storage_from_test_server_info """ - return _create_storage_from_test_server_info( + with _create_storage_from_test_server_info( path_join(str(files("mlos_bench.config")), "storage", "mysql.jsonc"), mysql_storage_info, shared_temp_dir, short_testrun_uid, - ) + ) as storage: + yield storage @pytest.fixture(scope="function") @@ -158,12 +162,13 @@ def postgres_storage( -------- _create_storage_from_test_server_info """ - return _create_storage_from_test_server_info( + with _create_storage_from_test_server_info( path_join(str(files("mlos_bench.config")), "storage", "postgresql.jsonc"), postgres_storage_info, shared_temp_dir, short_testrun_uid, - ) + ) as storage: + yield storage @pytest.fixture diff --git a/mlos_bench/mlos_bench/tests/storage/sql/test_storage_schemas.py b/mlos_bench/mlos_bench/tests/storage/sql/test_storage_schemas.py index 660f4e245d..569b19984a 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/test_storage_schemas.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/test_storage_schemas.py @@ -11,6 +11,7 @@ from sqlalchemy import inspect from mlos_bench.storage.sql.storage import SqlStorage +from mlos_bench.tests.storage.sql.fixtures import storage as sql_storage from mlos_bench.tests import DOCKER @@ -26,14 +27,18 @@ lazy_fixture("mysql_storage"), lazy_fixture("postgres_storage"), ] -@pytest.mark.parameterize( - ["some_sql_storage_fixture"], [ - lazy_fixture("sql_storage"), - *docker_dbms_fixtures, - ] + + +@pytest.mark.parametrize( + "some_sql_storage_fixture", + [ + lazy_fixture("sqlite_storage"), + *docker_dbms_fixtures, + ], ) def test_storage_schemas(some_sql_storage_fixture: SqlStorage) -> None: """Test storage schema creation.""" + assert isinstance(some_sql_storage_fixture, SqlStorage) eng = some_sql_storage_fixture._engine # pylint: disable=protected-access with eng.connect() as conn: # pylint: disable=protected-access inspector = inspect(conn) From 1bf3c30d733b4d750f6cb289c3cdb495cad67795 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 28 May 2025 17:46:12 -0500 Subject: [PATCH 15/88] fixup --- mlos_bench/mlos_bench/tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/tests/conftest.py b/mlos_bench/mlos_bench/tests/conftest.py index 8d48d2094c..7885bfcbe4 100644 --- a/mlos_bench/mlos_bench/tests/conftest.py +++ b/mlos_bench/mlos_bench/tests/conftest.py @@ -42,7 +42,7 @@ def docker_hostname() -> str: # Docker (Desktop) for Windows (WSL2) uses a special networking magic # to refer to the host machine as `localhost` when exposing ports. # In all other cases, assume we're executing directly inside conda on the host. - return "localhost" + return "127.0.0.1" # "localhost" @pytest.fixture From bd5862f12c7f3be114730f08cb07847094801d22 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 28 May 2025 17:46:17 -0500 Subject: [PATCH 16/88] cleanup --- mlos_bench/mlos_bench/tests/storage/sql/up.sh | 65 +++---------------- 1 file changed, 8 insertions(+), 57 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/storage/sql/up.sh b/mlos_bench/mlos_bench/tests/storage/sql/up.sh index fd99d1770d..36fea613b9 100755 --- a/mlos_bench/mlos_bench/tests/storage/sql/up.sh +++ b/mlos_bench/mlos_bench/tests/storage/sql/up.sh @@ -17,71 +17,22 @@ cd "$scriptdir" PROJECT_NAME="mlos_bench-test-sql-storage-manual" CONTAINER_COUNT=2 -if ! type mysqladmin >/dev/null 2>&1; then - echo "ERROR: Missing mysqladmin tool to check status of the server." >&2 - exit 1 -fi - -if ! type psql >/dev/null 2>&1; then - echo "ERROR: Missing psql tool to check status of the server." >&2 - exit 1 -fi - - docker compose -p "$PROJECT_NAME" up --build --remove-orphans -d set +x -while ! docker compose -p "$PROJECT_NAME" ps --format '{{.Name}} {{.State}} {{.Health}}' | grep -c ' running healthy$' | grep -q -x $CONTAINER_COUNT; do +function get_project_health() { + docker compose -p "$PROJECT_NAME" ps --format '{{.Name}} {{.State}} {{.Health}}' +} + +project_health=$(get_project_health) +while ! echo "$project_health" | grep -c ' running healthy$' | grep -q -x $CONTAINER_COUNT; do echo "Waiting for $CONTAINER_COUNT containers to report healthy ..." + echo "$project_health" sleep 1 + project_health=$(get_project_health) done mysql_port=$(docker compose -p "$PROJECT_NAME" port mysql-mlos-bench-server ${PORT:-3306} | cut -d: -f2) echo "Connect to the mysql server container at the following port: $mysql_port" postgres_port=$(docker compose -p "$PROJECT_NAME" port postgres-mlos-bench-server ${PORT:-5432} | cut -d: -f2) echo "Connect to the postgres server container at the following port: $postgres_port" - -# TODO: Remove the rest of this: - -mysql_ok=0 -if ! type mysqladmin >/dev/null 2>&1; then - echo "WARNING: Missing mysqladmin tool to check status of the server." >&2 - mysql_ok=1 -fi - -pg_ok=0 -if ! type psql >/dev/null 2>&1; then - echo "WARNING: Missing psql tool to check status of the server." >&2 - pg_ok=1 -fi - -for i in {1..10}; do - if [ "$mysql_ok" == "1" ]; then - break - fi - if mysqladmin -h localhost --port $mysql_port --protocol tcp --password=password ping >/dev/null; then - mysql_ok=1 - else - sleep 1 - fi -done -if [ "$mysql_ok" != 1 ]; then - echo "ERR: MySQL failed to start." >&2 - exit 1 -fi - -for i in {1..10}; do - if [ "$pg_ok" == "1" ]; then - break - fi - if PGPASSWORD=password psql -h localhost -p $postgres_port -U postgres mlos_bench -c "SELECT 1;" > /dev/null; then - pg_ok=1 - break - else - sleep 1 - fi -done -if [ "$pg_ok" != 1 ]; then - echo "ERR: Postgres failed to start." >&2 - exit 1 -fi From 67f84ead1d571c8357a659dae0321e489dc45d00 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 28 May 2025 17:51:16 -0500 Subject: [PATCH 17/88] fixup --- mlos_bench/mlos_bench/storage/sql/schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index 48d06f63c5..28f00a8af3 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -276,7 +276,7 @@ def __init__(self, engine: Engine | None): Column("exp_id", String(self._exp_id_len), nullable=False), Column("trial_id", Integer, nullable=False), Column("ts", DateTime(timezone=True), nullable=False, default="now"), - Column("metric_id", String(self._exp_id_len), nullable=False), + Column("metric_id", String(self._metric_id_len), nullable=False), Column("metric_value", String(self._metric_value_len)), UniqueConstraint("exp_id", "trial_id", "ts", "metric_id"), ForeignKeyConstraint( From 41199fc5c6125c2d6ce121b6eb78a3e7461ea5e0 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 28 May 2025 17:54:05 -0500 Subject: [PATCH 18/88] format --- .../mlos_bench/storage/sql/alembic/README.md | 31 ++++++++++--------- mlos_bench/mlos_bench/tests/conftest.py | 2 +- .../tests/services/remote/ssh/__init__.py | 4 +-- .../mlos_bench/tests/storage/sql/__init__.py | 4 +-- .../mlos_bench/tests/storage/sql/fixtures.py | 16 ++++------ .../tests/storage/sql/test_storage_schemas.py | 6 ++-- .../tests/storage/test_storage_pickling.py | 2 +- 7 files changed, 30 insertions(+), 35 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/README.md b/mlos_bench/mlos_bench/storage/sql/alembic/README.md index 9a9aafe1f4..50164ed005 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/README.md +++ b/mlos_bench/mlos_bench/storage/sql/alembic/README.md @@ -13,6 +13,7 @@ This document contains some notes on how to use [`alembic`](https://alembic.sqla > In the remainder of this document we should some examples for different DB types. > Pick the one you're targeting and stick with it thru the example. > You may need to repeat the process several times to test all of them. + > > - [ ] TODO: Add scripts to automatically do this for several different backend engines all at once. For instance: @@ -63,21 +64,21 @@ This document contains some notes on how to use [`alembic`](https://alembic.sqla mlos_bench --storage storage/postgresql.jsonc --create-update-storage-schema-only --password=password ``` -2. Now, adjust the [`mlos_bench/storage/sql/schema.py`](../schema.py) file to reflect the new desired schema. +1. Now, adjust the [`mlos_bench/storage/sql/schema.py`](../schema.py) file to reflect the new desired schema. > Keep each change small and atomic. > For example, if you want to add a new column, do that in one change. > If you want to rename a column, do that in another change. -3. Generate a new migration script with the following command: +1. Generate a new migration script with the following command: ```sh alembic revision --autogenerate -m "CHANGEME: Descriptive text about the change." ``` -4. Review the generated migration script in the [`mlos_bench/storage/sql/alembic/versions`](./versions/) directory. +1. Review the generated migration script in the [`mlos_bench/storage/sql/alembic/versions`](./versions/) directory. -5. Verify that the migration script works by running the following command: +1. Verify that the migration script works by running the following command: ```sh # sqlite @@ -116,22 +117,22 @@ This document contains some notes on how to use [`alembic`](https://alembic.sqla > Use different CLI clients for targeting other engines. -6. If the migration script works, commit the changes to the [`mlos_bench/storage/sql/schema.py`](../schema.py) and [`mlos_bench/storage/sql/alembic/versions`](./versions/) files. +1. If the migration script works, commit the changes to the [`mlos_bench/storage/sql/schema.py`](../schema.py) and [`mlos_bench/storage/sql/alembic/versions`](./versions/) files. > Be sure to update the latest version in the [`test_storage_schemas.py`](../../../tests/storage/test_storage_schemas.py) file as well. -7. Cleanup any server instances you started. +1. Cleanup any server instances you started. - For instance: + For instance: - ```sh - rm mlos_bench/storage/sql/mlos_bench.sqlite - ``` + ```sh + rm mlos_bench/storage/sql/mlos_bench.sqlite + ``` - ```sh - docker kill mysql-alembic - ``` + ```sh + docker kill mysql-alembic + ``` -8. Merge that to the `main` branch. +1. Merge that to the `main` branch. -9. Might be good to cut a new `mlos_bench` release at this point as well. +1. Might be good to cut a new `mlos_bench` release at this point as well. diff --git a/mlos_bench/mlos_bench/tests/conftest.py b/mlos_bench/mlos_bench/tests/conftest.py index 7885bfcbe4..59dfbff9d2 100644 --- a/mlos_bench/mlos_bench/tests/conftest.py +++ b/mlos_bench/mlos_bench/tests/conftest.py @@ -15,7 +15,7 @@ from pytest_docker.plugin import get_docker_services from mlos_bench.environments.mock_env import MockEnv -from mlos_bench.tests import SEED, tunable_groups_fixtures, resolve_host_name +from mlos_bench.tests import SEED, resolve_host_name, tunable_groups_fixtures from mlos_bench.tunables.tunable_groups import TunableGroups # pylint: disable=redefined-outer-name diff --git a/mlos_bench/mlos_bench/tests/services/remote/ssh/__init__.py b/mlos_bench/mlos_bench/tests/services/remote/ssh/__init__.py index 877d39a26a..de822fe2eb 100644 --- a/mlos_bench/mlos_bench/tests/services/remote/ssh/__init__.py +++ b/mlos_bench/mlos_bench/tests/services/remote/ssh/__init__.py @@ -7,7 +7,6 @@ from dataclasses import dataclass from subprocess import run - # The SSH test server port and name. # See Also: docker-compose.yml SSH_TEST_SERVER_PORT = 2254 @@ -18,7 +17,8 @@ @dataclass class SshTestServerInfo: - """A data class for SshTestServerInfo. + """ + A data class for SshTestServerInfo. See Also -------- diff --git a/mlos_bench/mlos_bench/tests/storage/sql/__init__.py b/mlos_bench/mlos_bench/tests/storage/sql/__init__.py index 442e531a43..d1a7c3c800 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/__init__.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/__init__.py @@ -7,7 +7,6 @@ from dataclasses import dataclass from subprocess import run - # The DB servers' names and other connection info. # See Also: docker-compose.yml @@ -20,7 +19,8 @@ @dataclass class SqlTestServerInfo: - """A data class for SqlTestServerInfo. + """ + A data class for SqlTestServerInfo. See Also -------- diff --git a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py index eb3c1c3eca..d7f058483c 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py @@ -4,16 +4,16 @@ # """Test fixtures for mlos_bench storage.""" -from contextlib import contextmanager import json import os import tempfile from collections.abc import Generator +from contextlib import contextmanager from importlib.resources import files from random import seed as rand_seed -from filelock import FileLock import pytest +from filelock import FileLock from pytest_docker.plugin import Services as DockerServices from mlos_bench.optimizers.mock_optimizer import MockOptimizer @@ -23,7 +23,6 @@ from mlos_bench.storage.base_experiment_data import ExperimentData from mlos_bench.storage.sql.storage import SqlStorage from mlos_bench.storage.storage_factory import from_config -from mlos_bench.util import path_join from mlos_bench.tests import SEED, wait_docker_service_healthy from mlos_bench.tests.storage import ( CONFIG_TRIAL_REPEAT_COUNT, @@ -31,11 +30,12 @@ TRIAL_RUNNER_COUNT, ) from mlos_bench.tests.storage.sql import ( - SqlTestServerInfo, MYSQL_TEST_SERVER_NAME, PGSQL_TEST_SERVER_NAME, + SqlTestServerInfo, ) from mlos_bench.tunables.tunable_groups import TunableGroups +from mlos_bench.util import path_join # pylint: disable=redefined-outer-name @@ -48,9 +48,7 @@ def mysql_storage_info( docker_compose_project_name: str, locked_docker_services: DockerServices, ) -> SqlTestServerInfo: - """ - Fixture for getting mysql storage connection info. - """ + """Fixture for getting mysql storage connection info.""" storage_info = SqlTestServerInfo( compose_project_name=docker_compose_project_name, service_name=MYSQL_TEST_SERVER_NAME, @@ -71,9 +69,7 @@ def postgres_storage_info( docker_compose_project_name: str, locked_docker_services: DockerServices, ) -> SqlTestServerInfo: - """ - Fixture for getting postgres storage connection info. - """ + """Fixture for getting postgres storage connection info.""" storage_info = SqlTestServerInfo( compose_project_name=docker_compose_project_name, service_name=PGSQL_TEST_SERVER_NAME, diff --git a/mlos_bench/mlos_bench/tests/storage/sql/test_storage_schemas.py b/mlos_bench/mlos_bench/tests/storage/sql/test_storage_schemas.py index 569b19984a..32653b474e 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/test_storage_schemas.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/test_storage_schemas.py @@ -5,15 +5,13 @@ """Test sql schemas for mlos_bench storage.""" import pytest -from pytest_lazy_fixtures.lazy_fixture import lf as lazy_fixture - from alembic.migration import MigrationContext +from pytest_lazy_fixtures.lazy_fixture import lf as lazy_fixture from sqlalchemy import inspect from mlos_bench.storage.sql.storage import SqlStorage -from mlos_bench.tests.storage.sql.fixtures import storage as sql_storage - from mlos_bench.tests import DOCKER +from mlos_bench.tests.storage.sql.fixtures import storage as sql_storage # NOTE: This value is hardcoded to the latest revision in the alembic versions directory. # It could also be obtained programmatically using the "alembic heads" command or heads() API. diff --git a/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py b/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py index 1aa29b5efe..bb54ea66c2 100644 --- a/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py +++ b/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py @@ -14,8 +14,8 @@ from mlos_bench.environments.status import Status from mlos_bench.storage.base_storage import Storage -from mlos_bench.tunables.tunable_groups import TunableGroups from mlos_bench.tests import DOCKER +from mlos_bench.tunables.tunable_groups import TunableGroups docker_dbms_fixtures = [] if DOCKER: From ee4a900b410fe99060776fb776cfcd37fb683b1c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 28 May 2025 23:04:12 +0000 Subject: [PATCH 19/88] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mlos_bench/mlos_bench/tests/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/tests/__init__.py b/mlos_bench/mlos_bench/tests/__init__.py index 98324c6ebb..fecb883971 100644 --- a/mlos_bench/mlos_bench/tests/__init__.py +++ b/mlos_bench/mlos_bench/tests/__init__.py @@ -17,8 +17,8 @@ from subprocess import run import pytest -from pytest_docker.plugin import Services as DockerServices import pytz +from pytest_docker.plugin import Services as DockerServices from mlos_bench.util import get_class_from_name, nullable From 82220ecc4cd9f05b9368cc912b46808ddcea0293 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 28 May 2025 18:08:10 -0500 Subject: [PATCH 20/88] fixup --- mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py b/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py index bb54ea66c2..365c68af93 100644 --- a/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py +++ b/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py @@ -32,7 +32,7 @@ reason="Windows doesn't support multiple processes accessing the same file.", ) @pytest.mark.parametrize( - ["persistent_storage"], + "persistent_storage", [ # TODO: Improve this test to support non-sql backends eventually as well. lazy_fixture("sqlite_storage"), From aa2621b4ae1e059c0a08f3cadb26c83b83e91118 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 28 May 2025 18:10:26 -0500 Subject: [PATCH 21/88] fixup --- mlos_bench/mlos_bench/tests/environments/remote/conftest.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mlos_bench/mlos_bench/tests/environments/remote/conftest.py b/mlos_bench/mlos_bench/tests/environments/remote/conftest.py index 257e37fa9e..b8ea5a2a6b 100644 --- a/mlos_bench/mlos_bench/tests/environments/remote/conftest.py +++ b/mlos_bench/mlos_bench/tests/environments/remote/conftest.py @@ -8,4 +8,3 @@ # Expose some of those as local names so they can be picked up as fixtures by pytest. ssh_test_server = ssh_fixtures.ssh_test_server -ssh_test_server_hostname = ssh_fixtures.ssh_test_server_hostname From a84d30bab434a0f360cb81880889973245fb9c96 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 29 May 2025 10:28:12 -0500 Subject: [PATCH 22/88] switch to interprocesslock - already using that --- mlos_bench/mlos_bench/tests/storage/sql/fixtures.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py index d7f058483c..a582da5dff 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py @@ -13,7 +13,7 @@ from random import seed as rand_seed import pytest -from filelock import FileLock +from fasteners import InterProcessLock from pytest_docker.plugin import Services as DockerServices from mlos_bench.optimizers.mock_optimizer import MockOptimizer @@ -104,7 +104,9 @@ def _create_storage_from_test_server_info( SqlStorage """ sql_storage_name = test_server_info.service_name - with FileLock(path_join(shared_temp_dir, f"{sql_storage_name}-{short_testrun_uid}.lock")): + with InterProcessLock( + path_join(shared_temp_dir, f"{sql_storage_name}-{short_testrun_uid}.lock") + ): global_config = { "host": test_server_info.hostname, "port": test_server_info.get_port() or 0, From 2341a10fecfede15ea2f0f7faa9b4cc8334cee24 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 29 May 2025 10:31:52 -0500 Subject: [PATCH 23/88] address a lint issue --- mlos_bench/mlos_bench/tests/storage/sql/fixtures.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py index a582da5dff..572b1a4c0d 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py @@ -120,9 +120,11 @@ def _create_storage_from_test_server_info( global_configs=[json.dumps(global_config)], ) assert isinstance(storage, SqlStorage) - yield storage - # Cleanup the storage on return - storage._reset_schema(force=True) # pylint: disable=protected-access + try: + yield storage + finally: + # Cleanup the storage on return + storage._reset_schema(force=True) # pylint: disable=protected-access @pytest.fixture(scope="function") From 45ad11ffb8f3eed0b3a46987d7897bb732b9c5f8 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 29 May 2025 10:33:33 -0500 Subject: [PATCH 24/88] restore the original alembic comments - moving to separate PR --- mlos_bench/mlos_bench/storage/sql/alembic.ini | 3 - .../mlos_bench/storage/sql/alembic/README.md | 104 ++---------------- 2 files changed, 9 insertions(+), 98 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/alembic.ini b/mlos_bench/mlos_bench/storage/sql/alembic.ini index 7f88d809ee..4d2a1120c5 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic.ini +++ b/mlos_bench/mlos_bench/storage/sql/alembic.ini @@ -63,10 +63,7 @@ version_path_separator = os # Use os.pathsep. Default configuration used for ne # output_encoding = utf-8 # See README.md for details. -# Uncomment one of these: sqlalchemy.url = sqlite:///mlos_bench.sqlite -#sqlalchemy.url = mysql+pymysql://root:password@localhost:3306/mlos_bench -#sqlalchemy.url = postgresql+psycopg2://root:password@localhost:5432/mlos_bench [post_write_hooks] diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/README.md b/mlos_bench/mlos_bench/storage/sql/alembic/README.md index 50164ed005..ec35eb70f6 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/README.md +++ b/mlos_bench/mlos_bench/storage/sql/alembic/README.md @@ -4,67 +4,17 @@ This document contains some notes on how to use [`alembic`](https://alembic.sqla ## Overview -1. Create a blank database instance in the [`mlos_bench/storage/sql`](../) directory with the current schema using the following command: +1. Create a blank `mlos_bench.sqlite` database file in the [`mlos_bench/storage/sql`](../) directory with the current schema using the following command: - This allows `alembic` to automatically generate a migration script from the current schema. - - > NOTE: If your schema changes target a particular backend engine (e.g., using `with_variant`) you will need to use an engine with that config for this step. - > \ - > In the remainder of this document we should some examples for different DB types. - > Pick the one you're targeting and stick with it thru the example. - > You may need to repeat the process several times to test all of them. - > - > - [ ] TODO: Add scripts to automatically do this for several different backend engines all at once. - - For instance: - - 1. Start a temporary server either as a local file or in a docker instance - - ```sh - # sqlite - cd mlos_bench/storage/sql - rm -f mlos_bench.sqlite - ``` - - ```sh - # mysql - docker run -it --rm --name mysql-alembic --env MYSQL_ROOT_PASSWORD=password --env MYSQL_DATABASE=mlos_bench -p 3306:3306 mysql:latest - ``` - - ```sh - # postgres - docker run -it --rm --name postgres-alembic --env POSTGRES_PASSWORD=password --env POSTGRES_DB=mlos_bench -p 5432:5432 postgres:latest - ``` - - 1. Adjust the `sqlalchemy.url` in the [`alembic.ini`](../alembic.ini) file. - - ```ini - # Uncomment one of these. - # See README.md for details. - - #sqlalchemy.url = sqlite:///mlos_bench.sqlite - sqlalchemy.url = mysql+pymysql://root:password@localhost:3306/mlos_bench - #sqlalchemy.url = postgresql+psycopg2://root:password@localhost:5432/mlos_bench - ``` - - 1. Prime the DB schema - - ```sh - # sqlite - mlos_bench --storage storage/sqlite.jsonc --create-update-storage-schema-only --password=password - ``` - - ```sh - # mysql - mlos_bench --storage storage/mysql.jsonc --create-update-storage-schema-only --password=password - ``` + ```sh + cd mlos_bench/storage/sql + rm mlos_bench.sqlite + mlos_bench --storage storage/sqlite.jsonc --create-update-storage-schema-only + ``` - ```sh - # postgres - mlos_bench --storage storage/postgresql.jsonc --create-update-storage-schema-only --password=password - ``` + > This allows `alembic` to automatically generate a migration script from the current schema. -1. Now, adjust the [`mlos_bench/storage/sql/schema.py`](../schema.py) file to reflect the new desired schema. +1. Adjust the [`mlos_bench/storage/sql/schema.py`](../schema.py) file to reflect the new desired schema. > Keep each change small and atomic. > For example, if you want to add a new column, do that in one change. @@ -73,7 +23,7 @@ This document contains some notes on how to use [`alembic`](https://alembic.sqla 1. Generate a new migration script with the following command: ```sh - alembic revision --autogenerate -m "CHANGEME: Descriptive text about the change." + alembic revision --autogenerate -m "Descriptive text about the change." ``` 1. Review the generated migration script in the [`mlos_bench/storage/sql/alembic/versions`](./versions/) directory. @@ -81,58 +31,22 @@ This document contains some notes on how to use [`alembic`](https://alembic.sqla 1. Verify that the migration script works by running the following command: ```sh - # sqlite mlos_bench --storage storage/sqlite.jsonc --create-update-storage-schema-only ``` - ```sh - # mysql: - mlos_bench --storage storage/mysql.jsonc --create-update-storage-schema-only --password=password - ``` - - ```sh - # postgres: - mlos_bench --storage storage/postgresql.jsonc --create-update-storage-schema-only --password=password - ``` - > Normally this would be done with `alembic upgrade head`, but this command is convenient to ensure if will work with the `mlos_bench` command line interface as well. Examine the results using something like: ```sh - # For sqlite: sqlite3 mlos_bench.sqlite .schema sqlite3 mlos_bench.sqlite "SELECT * FROM alembic_version;" ``` - ```sh - # For mysql: - mysql --user root --password=password --host localhost --protocol tcp --database mlos_bench -e "SHOW TABLES; SELECT * FROM alembic_version;" - ``` - - ```sh - # For postgres: - PGPASSWORD=password psql -h localhost -p 5432 -U postgres mlos_bench -c "SELECT table_name FROM information_schema.tables WHERE table_schema='public' and table_catalog='mlos_bench'; SELECT * FROM alembic_version;" - ``` - - > Use different CLI clients for targeting other engines. - 1. If the migration script works, commit the changes to the [`mlos_bench/storage/sql/schema.py`](../schema.py) and [`mlos_bench/storage/sql/alembic/versions`](./versions/) files. > Be sure to update the latest version in the [`test_storage_schemas.py`](../../../tests/storage/test_storage_schemas.py) file as well. -1. Cleanup any server instances you started. - - For instance: - - ```sh - rm mlos_bench/storage/sql/mlos_bench.sqlite - ``` - - ```sh - docker kill mysql-alembic - ``` - 1. Merge that to the `main` branch. 1. Might be good to cut a new `mlos_bench` release at this point as well. From bbcf689a3ec1d53f5430502e50cc98182010ae36 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 29 May 2025 10:34:00 -0500 Subject: [PATCH 25/88] Revert "restore the original alembic comments - moving to separate PR" This reverts commit 45ad11ffb8f3eed0b3a46987d7897bb732b9c5f8. --- mlos_bench/mlos_bench/storage/sql/alembic.ini | 3 + .../mlos_bench/storage/sql/alembic/README.md | 104 ++++++++++++++++-- 2 files changed, 98 insertions(+), 9 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/alembic.ini b/mlos_bench/mlos_bench/storage/sql/alembic.ini index f5a249fd7a..6cc1e4b7c8 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic.ini +++ b/mlos_bench/mlos_bench/storage/sql/alembic.ini @@ -63,7 +63,10 @@ version_path_separator = os # Use os.pathsep. Default configuration used for ne # output_encoding = utf-8 # See README.md for details. +# Uncomment one of these: sqlalchemy.url = sqlite:///mlos_bench.sqlite +#sqlalchemy.url = mysql+pymysql://root:password@localhost:3306/mlos_bench +#sqlalchemy.url = postgresql+psycopg2://root:password@localhost:5432/mlos_bench [post_write_hooks] diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/README.md b/mlos_bench/mlos_bench/storage/sql/alembic/README.md index ec35eb70f6..50164ed005 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/README.md +++ b/mlos_bench/mlos_bench/storage/sql/alembic/README.md @@ -4,17 +4,67 @@ This document contains some notes on how to use [`alembic`](https://alembic.sqla ## Overview -1. Create a blank `mlos_bench.sqlite` database file in the [`mlos_bench/storage/sql`](../) directory with the current schema using the following command: +1. Create a blank database instance in the [`mlos_bench/storage/sql`](../) directory with the current schema using the following command: - ```sh - cd mlos_bench/storage/sql - rm mlos_bench.sqlite - mlos_bench --storage storage/sqlite.jsonc --create-update-storage-schema-only - ``` + This allows `alembic` to automatically generate a migration script from the current schema. + + > NOTE: If your schema changes target a particular backend engine (e.g., using `with_variant`) you will need to use an engine with that config for this step. + > \ + > In the remainder of this document we should some examples for different DB types. + > Pick the one you're targeting and stick with it thru the example. + > You may need to repeat the process several times to test all of them. + > + > - [ ] TODO: Add scripts to automatically do this for several different backend engines all at once. + + For instance: + + 1. Start a temporary server either as a local file or in a docker instance + + ```sh + # sqlite + cd mlos_bench/storage/sql + rm -f mlos_bench.sqlite + ``` + + ```sh + # mysql + docker run -it --rm --name mysql-alembic --env MYSQL_ROOT_PASSWORD=password --env MYSQL_DATABASE=mlos_bench -p 3306:3306 mysql:latest + ``` + + ```sh + # postgres + docker run -it --rm --name postgres-alembic --env POSTGRES_PASSWORD=password --env POSTGRES_DB=mlos_bench -p 5432:5432 postgres:latest + ``` - > This allows `alembic` to automatically generate a migration script from the current schema. + 1. Adjust the `sqlalchemy.url` in the [`alembic.ini`](../alembic.ini) file. -1. Adjust the [`mlos_bench/storage/sql/schema.py`](../schema.py) file to reflect the new desired schema. + ```ini + # Uncomment one of these. + # See README.md for details. + + #sqlalchemy.url = sqlite:///mlos_bench.sqlite + sqlalchemy.url = mysql+pymysql://root:password@localhost:3306/mlos_bench + #sqlalchemy.url = postgresql+psycopg2://root:password@localhost:5432/mlos_bench + ``` + + 1. Prime the DB schema + + ```sh + # sqlite + mlos_bench --storage storage/sqlite.jsonc --create-update-storage-schema-only --password=password + ``` + + ```sh + # mysql + mlos_bench --storage storage/mysql.jsonc --create-update-storage-schema-only --password=password + ``` + + ```sh + # postgres + mlos_bench --storage storage/postgresql.jsonc --create-update-storage-schema-only --password=password + ``` + +1. Now, adjust the [`mlos_bench/storage/sql/schema.py`](../schema.py) file to reflect the new desired schema. > Keep each change small and atomic. > For example, if you want to add a new column, do that in one change. @@ -23,7 +73,7 @@ This document contains some notes on how to use [`alembic`](https://alembic.sqla 1. Generate a new migration script with the following command: ```sh - alembic revision --autogenerate -m "Descriptive text about the change." + alembic revision --autogenerate -m "CHANGEME: Descriptive text about the change." ``` 1. Review the generated migration script in the [`mlos_bench/storage/sql/alembic/versions`](./versions/) directory. @@ -31,22 +81,58 @@ This document contains some notes on how to use [`alembic`](https://alembic.sqla 1. Verify that the migration script works by running the following command: ```sh + # sqlite mlos_bench --storage storage/sqlite.jsonc --create-update-storage-schema-only ``` + ```sh + # mysql: + mlos_bench --storage storage/mysql.jsonc --create-update-storage-schema-only --password=password + ``` + + ```sh + # postgres: + mlos_bench --storage storage/postgresql.jsonc --create-update-storage-schema-only --password=password + ``` + > Normally this would be done with `alembic upgrade head`, but this command is convenient to ensure if will work with the `mlos_bench` command line interface as well. Examine the results using something like: ```sh + # For sqlite: sqlite3 mlos_bench.sqlite .schema sqlite3 mlos_bench.sqlite "SELECT * FROM alembic_version;" ``` + ```sh + # For mysql: + mysql --user root --password=password --host localhost --protocol tcp --database mlos_bench -e "SHOW TABLES; SELECT * FROM alembic_version;" + ``` + + ```sh + # For postgres: + PGPASSWORD=password psql -h localhost -p 5432 -U postgres mlos_bench -c "SELECT table_name FROM information_schema.tables WHERE table_schema='public' and table_catalog='mlos_bench'; SELECT * FROM alembic_version;" + ``` + + > Use different CLI clients for targeting other engines. + 1. If the migration script works, commit the changes to the [`mlos_bench/storage/sql/schema.py`](../schema.py) and [`mlos_bench/storage/sql/alembic/versions`](./versions/) files. > Be sure to update the latest version in the [`test_storage_schemas.py`](../../../tests/storage/test_storage_schemas.py) file as well. +1. Cleanup any server instances you started. + + For instance: + + ```sh + rm mlos_bench/storage/sql/mlos_bench.sqlite + ``` + + ```sh + docker kill mysql-alembic + ``` + 1. Merge that to the `main` branch. 1. Might be good to cut a new `mlos_bench` release at this point as well. From 53ee6e288c5fb69452d2e6d62162ef89a14a394c Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 29 May 2025 10:36:29 -0500 Subject: [PATCH 26/88] more comments --- mlos_bench/mlos_bench/storage/sql/alembic/README.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/README.md b/mlos_bench/mlos_bench/storage/sql/alembic/README.md index 50164ed005..48b0b04aa7 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/README.md +++ b/mlos_bench/mlos_bench/storage/sql/alembic/README.md @@ -49,6 +49,8 @@ This document contains some notes on how to use [`alembic`](https://alembic.sqla 1. Prime the DB schema + > Note: you may want to `git checkout main` first to make sure you're using the current schema here. + ```sh # sqlite mlos_bench --storage storage/sqlite.jsonc --create-update-storage-schema-only --password=password @@ -66,7 +68,10 @@ This document contains some notes on how to use [`alembic`](https://alembic.sqla 1. Now, adjust the [`mlos_bench/storage/sql/schema.py`](../schema.py) file to reflect the new desired schema. + > Don't forget to do this on a new branch. + > \ > Keep each change small and atomic. + > \ > For example, if you want to add a new column, do that in one change. > If you want to rename a column, do that in another change. @@ -133,6 +138,10 @@ This document contains some notes on how to use [`alembic`](https://alembic.sqla docker kill mysql-alembic ``` + ```sh + docker kill postgres-alembic + ``` + 1. Merge that to the `main` branch. 1. Might be good to cut a new `mlos_bench` release at this point as well. From 994a32a2476062d04a1a4e6cf3c388f74fcf54f5 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 29 May 2025 10:40:59 -0500 Subject: [PATCH 27/88] mypy --- mlos_bench/mlos_bench/tests/__init__.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/tests/__init__.py b/mlos_bench/mlos_bench/tests/__init__.py index f9bf6d5b7d..1e0d8c9941 100644 --- a/mlos_bench/mlos_bench/tests/__init__.py +++ b/mlos_bench/mlos_bench/tests/__init__.py @@ -127,7 +127,11 @@ def is_docker_service_healthy( capture_output=True, ) docker_ps_json = json.loads(docker_ps_out.stdout.decode().strip()) - return docker_ps_json["State"] == "running" and docker_ps_json["Health"] == "healthy" + state = docker_ps_json["State"] + assert isinstance(state, str) + health = docker_ps_json["Health"] + assert isinstance(health, str) + return state == "running" and health == "healthy" def wait_docker_service_healthy( From 2faf6434a796514c958737649a8dbd9273be667d Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 29 May 2025 10:42:44 -0500 Subject: [PATCH 28/88] pylint From a6b89419358eddb352335d7697077a2197737320 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 29 May 2025 11:13:51 -0500 Subject: [PATCH 29/88] allow retrieving storage url from the environment --- mlos_bench/mlos_bench/storage/sql/alembic/env.py | 10 ++++++++-- mlos_bench/mlos_bench/storage/sql/storage.py | 3 +++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/env.py b/mlos_bench/mlos_bench/storage/sql/alembic/env.py index fc186b8cb1..b74da7c6c6 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/env.py +++ b/mlos_bench/mlos_bench/storage/sql/alembic/env.py @@ -5,11 +5,12 @@ """Alembic environment script.""" # pylint: disable=no-member +import os import sys from logging.config import fileConfig from alembic import context -from sqlalchemy import engine_from_config, pool +from sqlalchemy import create_engine, engine_from_config, pool from mlos_bench.storage.sql.schema import DbSchema @@ -25,7 +26,12 @@ # add your model's MetaData object here # for 'autogenerate' support -target_metadata = DbSchema(engine=None).meta +# First, try to read it from an environment variable that we set in mlos_bench. +# Then, fallback to the config file (should only be used by alembic cli while +# devs prepare schema changes). +url = os.getenv("MLOS_ALEMBIC_URL") or config.get_main_option("sqlalchemy.url") +engine = create_engine(url) if url else None +target_metadata = DbSchema(engine=engine).meta # other values from the config, defined by the needs of env.py, # can be acquired: diff --git a/mlos_bench/mlos_bench/storage/sql/storage.py b/mlos_bench/mlos_bench/storage/sql/storage.py index 7fc86aff1f..470cc06fa9 100644 --- a/mlos_bench/mlos_bench/storage/sql/storage.py +++ b/mlos_bench/mlos_bench/storage/sql/storage.py @@ -5,6 +5,7 @@ """Saving and restoring the benchmark data in SQL database.""" import logging +import os from typing import Literal from sqlalchemy import URL, Engine, create_engine @@ -37,6 +38,8 @@ def __init__( self._lazy_schema_create = self._config.pop("lazy_schema_create", False) self._log_sql = self._config.pop("log_sql", False) self._url = URL.create(**self._config) + os.environ["MLOS_ALEMBIC_URL"] = str(self._url) + _LOG.debug("MLOS_ALEMBIC_URL: %s", os.environ["MLOS_ALEMBIC_URL"]) self._repr = f"{self._url.get_backend_name()}:{self._url.database}" self._engine: Engine self._db_schema: DbSchema From 9c5ac14d4fd3cec0a018401fe5ae775a4c004cd6 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 29 May 2025 11:26:21 -0500 Subject: [PATCH 30/88] more alembic tweaks --- mlos_bench/mlos_bench/storage/sql/alembic/env.py | 9 ++++----- mlos_bench/mlos_bench/storage/sql/schema.py | 10 ++++++++-- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/env.py b/mlos_bench/mlos_bench/storage/sql/alembic/env.py index b74da7c6c6..407a986120 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/env.py +++ b/mlos_bench/mlos_bench/storage/sql/alembic/env.py @@ -5,7 +5,6 @@ """Alembic environment script.""" # pylint: disable=no-member -import os import sys from logging.config import fileConfig @@ -26,10 +25,10 @@ # add your model's MetaData object here # for 'autogenerate' support -# First, try to read it from an environment variable that we set in mlos_bench. -# Then, fallback to the config file (should only be used by alembic cli while -# devs prepare schema changes). -url = os.getenv("MLOS_ALEMBIC_URL") or config.get_main_option("sqlalchemy.url") +# NOTE: We override the alembic.ini file programmatically in storage/sql/schema.py +# However, the alembic.ini file value is used during alembic CLI operations +# (e.g., dev ops extending the schema). +url = config.get_main_option("sqlalchemy.url") engine = create_engine(url) if url else None target_metadata = DbSchema(engine=engine).meta diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index facc57b818..902681203c 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -315,11 +315,17 @@ def meta(self) -> MetaData: """Return the SQLAlchemy MetaData object.""" return self._meta - @staticmethod - def _get_alembic_cfg(conn: Connection) -> config.Config: + def _get_alembic_cfg(self, conn: Connection) -> config.Config: alembic_cfg = config.Config( path_join(str(files("mlos_bench.storage.sql")), "alembic.ini", abs_path=True) ) + assert self._engine is not None + alembic_cfg.set_main_option( + "sqlalchemy.url", + self._engine.url.render_as_string( + hide_password=False, + ), + ) alembic_cfg.attributes["connection"] = conn return alembic_cfg From 988bc903b73c8842d164f99323627ad7b8b11fb0 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 29 May 2025 11:27:26 -0500 Subject: [PATCH 31/88] remove env --- mlos_bench/mlos_bench/storage/sql/storage.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/storage.py b/mlos_bench/mlos_bench/storage/sql/storage.py index 470cc06fa9..7fc86aff1f 100644 --- a/mlos_bench/mlos_bench/storage/sql/storage.py +++ b/mlos_bench/mlos_bench/storage/sql/storage.py @@ -5,7 +5,6 @@ """Saving and restoring the benchmark data in SQL database.""" import logging -import os from typing import Literal from sqlalchemy import URL, Engine, create_engine @@ -38,8 +37,6 @@ def __init__( self._lazy_schema_create = self._config.pop("lazy_schema_create", False) self._log_sql = self._config.pop("log_sql", False) self._url = URL.create(**self._config) - os.environ["MLOS_ALEMBIC_URL"] = str(self._url) - _LOG.debug("MLOS_ALEMBIC_URL: %s", os.environ["MLOS_ALEMBIC_URL"]) self._repr = f"{self._url.get_backend_name()}:{self._url.database}" self._engine: Engine self._db_schema: DbSchema From f720d0dc18793af0c5577dffbf94bce9cc1752c4 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 29 May 2025 11:36:13 -0500 Subject: [PATCH 32/88] temporarily revert back to something like the original schema --- mlos_bench/mlos_bench/storage/sql/schema.py | 31 ++++----------------- 1 file changed, 6 insertions(+), 25 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index 902681203c..f280f819c2 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -39,7 +39,6 @@ create_mock_engine, inspect, ) -from sqlalchemy.dialects import mysql from sqlalchemy.engine import Engine from mlos_bench.util import path_join @@ -113,8 +112,8 @@ def __init__(self, engine: Engine | None): Column("git_repo", String(1024), nullable=False), Column("git_commit", String(40), nullable=False), # For backwards compatibility, we allow NULL for ts_start. - Column("ts_start", DateTime().with_variant(mysql.DATETIME(fsp=6), "mysql")), - Column("ts_end", DateTime().with_variant(mysql.DATETIME(fsp=6), "mysql")), + Column("ts_start", DateTime), + Column("ts_end", DateTime), # Should match the text IDs of `mlos_bench.environments.Status` enum: # For backwards compatibility, we allow NULL for status. Column("status", String(self._status_len)), @@ -188,16 +187,8 @@ def __init__(self, engine: Engine | None): Column("trial_id", Integer, nullable=False), Column("config_id", Integer, nullable=False), Column("trial_runner_id", Integer, nullable=True, default=None), - Column( - "ts_start", - DateTime().with_variant(mysql.DATETIME(fsp=6), "mysql"), - nullable=False, - ), - Column( - "ts_end", - DateTime().with_variant(mysql.DATETIME(fsp=6), "mysql"), - nullable=True, - ), + Column("ts_start", DateTime, nullable=False), + Column("ts_end", DateTime, nullable=True), # Should match the text IDs of `mlos_bench.environments.Status` enum: Column("status", String(self._status_len), nullable=False), PrimaryKeyConstraint("exp_id", "trial_id"), @@ -249,12 +240,7 @@ def __init__(self, engine: Engine | None): self._meta, Column("exp_id", String(self._exp_id_len), nullable=False), Column("trial_id", Integer, nullable=False), - Column( - "ts", - DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), - nullable=False, - default="now", - ), + Column("ts", DateTime(timezone=True), nullable=False, default="now"), Column("status", String(self._status_len), nullable=False), UniqueConstraint("exp_id", "trial_id", "ts"), ForeignKeyConstraint( @@ -289,12 +275,7 @@ def __init__(self, engine: Engine | None): self._meta, Column("exp_id", String(self._exp_id_len), nullable=False), Column("trial_id", Integer, nullable=False), - Column( - "ts", - DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), - nullable=False, - default="now", - ), + Column("ts", DateTime(timezone=True), nullable=False, default="now"), Column("metric_id", String(self._metric_id_len), nullable=False), Column("metric_value", String(self._metric_value_len)), UniqueConstraint("exp_id", "trial_id", "ts", "metric_id"), From f250e617d1f63ddc20de22024471f0de7a31e5f9 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 29 May 2025 11:37:15 -0500 Subject: [PATCH 33/88] Revert "temporarily revert back to something like the original schema" This reverts commit d77a35127c83366f0d53af664371a6ea9798a3f7. --- mlos_bench/mlos_bench/storage/sql/schema.py | 55 ++++++++++++++++++--- 1 file changed, 49 insertions(+), 6 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index f280f819c2..885e9e356f 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -39,6 +39,7 @@ create_mock_engine, inspect, ) +from sqlalchemy.dialects import mysql from sqlalchemy.engine import Engine from mlos_bench.util import path_join @@ -112,8 +113,20 @@ def __init__(self, engine: Engine | None): Column("git_repo", String(1024), nullable=False), Column("git_commit", String(40), nullable=False), # For backwards compatibility, we allow NULL for ts_start. - Column("ts_start", DateTime), - Column("ts_end", DateTime), + Column( + "ts_start", + DateTime(timezone=True).with_variant( + mysql.DATETIME(fsp=6), + "mysql", + ), + ), + Column( + "ts_end", + DateTime(timezone=True).with_variant( + mysql.DATETIME(fsp=6), + "mysql", + ), + ), # Should match the text IDs of `mlos_bench.environments.Status` enum: # For backwards compatibility, we allow NULL for status. Column("status", String(self._status_len)), @@ -187,8 +200,22 @@ def __init__(self, engine: Engine | None): Column("trial_id", Integer, nullable=False), Column("config_id", Integer, nullable=False), Column("trial_runner_id", Integer, nullable=True, default=None), - Column("ts_start", DateTime, nullable=False), - Column("ts_end", DateTime, nullable=True), + Column( + "ts_start", + DateTime(timezone=True).with_variant( + mysql.DATETIME(fsp=6), + "mysql", + ), + nullable=False, + ), + Column( + "ts_end", + DateTime(timezone=True).with_variant( + mysql.DATETIME(fsp=6), + "mysql", + ), + nullable=True, + ), # Should match the text IDs of `mlos_bench.environments.Status` enum: Column("status", String(self._status_len), nullable=False), PrimaryKeyConstraint("exp_id", "trial_id"), @@ -240,7 +267,15 @@ def __init__(self, engine: Engine | None): self._meta, Column("exp_id", String(self._exp_id_len), nullable=False), Column("trial_id", Integer, nullable=False), - Column("ts", DateTime(timezone=True), nullable=False, default="now"), + Column( + "ts", + DateTime(timezone=True).with_variant( + mysql.DATETIME(fsp=6), + "mysql", + ), + nullable=False, + default="now", + ), Column("status", String(self._status_len), nullable=False), UniqueConstraint("exp_id", "trial_id", "ts"), ForeignKeyConstraint( @@ -275,7 +310,15 @@ def __init__(self, engine: Engine | None): self._meta, Column("exp_id", String(self._exp_id_len), nullable=False), Column("trial_id", Integer, nullable=False), - Column("ts", DateTime(timezone=True), nullable=False, default="now"), + Column( + "ts", + DateTime(timezone=True).with_variant( + mysql.DATETIME(fsp=6), + "mysql", + ), + nullable=False, + default="now", + ), Column("metric_id", String(self._metric_id_len), nullable=False), Column("metric_value", String(self._metric_value_len)), UniqueConstraint("exp_id", "trial_id", "ts", "metric_id"), From 793c2e516b7eb546eebdb60358b7dcbd30cb6d29 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 29 May 2025 11:45:44 -0500 Subject: [PATCH 34/88] Reapply "temporarily revert back to something like the original schema" This reverts commit f250e617d1f63ddc20de22024471f0de7a31e5f9. --- mlos_bench/mlos_bench/storage/sql/schema.py | 55 +++------------------ 1 file changed, 6 insertions(+), 49 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index 885e9e356f..f280f819c2 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -39,7 +39,6 @@ create_mock_engine, inspect, ) -from sqlalchemy.dialects import mysql from sqlalchemy.engine import Engine from mlos_bench.util import path_join @@ -113,20 +112,8 @@ def __init__(self, engine: Engine | None): Column("git_repo", String(1024), nullable=False), Column("git_commit", String(40), nullable=False), # For backwards compatibility, we allow NULL for ts_start. - Column( - "ts_start", - DateTime(timezone=True).with_variant( - mysql.DATETIME(fsp=6), - "mysql", - ), - ), - Column( - "ts_end", - DateTime(timezone=True).with_variant( - mysql.DATETIME(fsp=6), - "mysql", - ), - ), + Column("ts_start", DateTime), + Column("ts_end", DateTime), # Should match the text IDs of `mlos_bench.environments.Status` enum: # For backwards compatibility, we allow NULL for status. Column("status", String(self._status_len)), @@ -200,22 +187,8 @@ def __init__(self, engine: Engine | None): Column("trial_id", Integer, nullable=False), Column("config_id", Integer, nullable=False), Column("trial_runner_id", Integer, nullable=True, default=None), - Column( - "ts_start", - DateTime(timezone=True).with_variant( - mysql.DATETIME(fsp=6), - "mysql", - ), - nullable=False, - ), - Column( - "ts_end", - DateTime(timezone=True).with_variant( - mysql.DATETIME(fsp=6), - "mysql", - ), - nullable=True, - ), + Column("ts_start", DateTime, nullable=False), + Column("ts_end", DateTime, nullable=True), # Should match the text IDs of `mlos_bench.environments.Status` enum: Column("status", String(self._status_len), nullable=False), PrimaryKeyConstraint("exp_id", "trial_id"), @@ -267,15 +240,7 @@ def __init__(self, engine: Engine | None): self._meta, Column("exp_id", String(self._exp_id_len), nullable=False), Column("trial_id", Integer, nullable=False), - Column( - "ts", - DateTime(timezone=True).with_variant( - mysql.DATETIME(fsp=6), - "mysql", - ), - nullable=False, - default="now", - ), + Column("ts", DateTime(timezone=True), nullable=False, default="now"), Column("status", String(self._status_len), nullable=False), UniqueConstraint("exp_id", "trial_id", "ts"), ForeignKeyConstraint( @@ -310,15 +275,7 @@ def __init__(self, engine: Engine | None): self._meta, Column("exp_id", String(self._exp_id_len), nullable=False), Column("trial_id", Integer, nullable=False), - Column( - "ts", - DateTime(timezone=True).with_variant( - mysql.DATETIME(fsp=6), - "mysql", - ), - nullable=False, - default="now", - ), + Column("ts", DateTime(timezone=True), nullable=False, default="now"), Column("metric_id", String(self._metric_id_len), nullable=False), Column("metric_value", String(self._metric_value_len)), UniqueConstraint("exp_id", "trial_id", "ts", "metric_id"), From 952fba02cb1481565b50e9b5a61a9fb83cf527e8 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 29 May 2025 11:58:05 -0500 Subject: [PATCH 35/88] include timezone --- mlos_bench/mlos_bench/storage/sql/schema.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index f280f819c2..aa8132dacb 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -112,8 +112,8 @@ def __init__(self, engine: Engine | None): Column("git_repo", String(1024), nullable=False), Column("git_commit", String(40), nullable=False), # For backwards compatibility, we allow NULL for ts_start. - Column("ts_start", DateTime), - Column("ts_end", DateTime), + Column("ts_start", DateTime(timezone=True)), + Column("ts_end", DateTime(timezone=True)), # Should match the text IDs of `mlos_bench.environments.Status` enum: # For backwards compatibility, we allow NULL for status. Column("status", String(self._status_len)), @@ -187,8 +187,8 @@ def __init__(self, engine: Engine | None): Column("trial_id", Integer, nullable=False), Column("config_id", Integer, nullable=False), Column("trial_runner_id", Integer, nullable=True, default=None), - Column("ts_start", DateTime, nullable=False), - Column("ts_end", DateTime, nullable=True), + Column("ts_start", DateTime(timezone=True), nullable=False), + Column("ts_end", DateTime(timezone=True), nullable=True), # Should match the text IDs of `mlos_bench.environments.Status` enum: Column("status", String(self._status_len), nullable=False), PrimaryKeyConstraint("exp_id", "trial_id"), From d6617bab5cbc2bb8561d9dfe509c9be234dedf8d Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 29 May 2025 12:05:58 -0500 Subject: [PATCH 36/88] make mysql datetimes support fractional seconds --- mlos_bench/mlos_bench/storage/sql/schema.py | 55 ++++++++++++++++++--- 1 file changed, 49 insertions(+), 6 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index aa8132dacb..885e9e356f 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -39,6 +39,7 @@ create_mock_engine, inspect, ) +from sqlalchemy.dialects import mysql from sqlalchemy.engine import Engine from mlos_bench.util import path_join @@ -112,8 +113,20 @@ def __init__(self, engine: Engine | None): Column("git_repo", String(1024), nullable=False), Column("git_commit", String(40), nullable=False), # For backwards compatibility, we allow NULL for ts_start. - Column("ts_start", DateTime(timezone=True)), - Column("ts_end", DateTime(timezone=True)), + Column( + "ts_start", + DateTime(timezone=True).with_variant( + mysql.DATETIME(fsp=6), + "mysql", + ), + ), + Column( + "ts_end", + DateTime(timezone=True).with_variant( + mysql.DATETIME(fsp=6), + "mysql", + ), + ), # Should match the text IDs of `mlos_bench.environments.Status` enum: # For backwards compatibility, we allow NULL for status. Column("status", String(self._status_len)), @@ -187,8 +200,22 @@ def __init__(self, engine: Engine | None): Column("trial_id", Integer, nullable=False), Column("config_id", Integer, nullable=False), Column("trial_runner_id", Integer, nullable=True, default=None), - Column("ts_start", DateTime(timezone=True), nullable=False), - Column("ts_end", DateTime(timezone=True), nullable=True), + Column( + "ts_start", + DateTime(timezone=True).with_variant( + mysql.DATETIME(fsp=6), + "mysql", + ), + nullable=False, + ), + Column( + "ts_end", + DateTime(timezone=True).with_variant( + mysql.DATETIME(fsp=6), + "mysql", + ), + nullable=True, + ), # Should match the text IDs of `mlos_bench.environments.Status` enum: Column("status", String(self._status_len), nullable=False), PrimaryKeyConstraint("exp_id", "trial_id"), @@ -240,7 +267,15 @@ def __init__(self, engine: Engine | None): self._meta, Column("exp_id", String(self._exp_id_len), nullable=False), Column("trial_id", Integer, nullable=False), - Column("ts", DateTime(timezone=True), nullable=False, default="now"), + Column( + "ts", + DateTime(timezone=True).with_variant( + mysql.DATETIME(fsp=6), + "mysql", + ), + nullable=False, + default="now", + ), Column("status", String(self._status_len), nullable=False), UniqueConstraint("exp_id", "trial_id", "ts"), ForeignKeyConstraint( @@ -275,7 +310,15 @@ def __init__(self, engine: Engine | None): self._meta, Column("exp_id", String(self._exp_id_len), nullable=False), Column("trial_id", Integer, nullable=False), - Column("ts", DateTime(timezone=True), nullable=False, default="now"), + Column( + "ts", + DateTime(timezone=True).with_variant( + mysql.DATETIME(fsp=6), + "mysql", + ), + nullable=False, + default="now", + ), Column("metric_id", String(self._metric_id_len), nullable=False), Column("metric_value", String(self._metric_value_len)), UniqueConstraint("exp_id", "trial_id", "ts", "metric_id"), From 2f28d79e61af2139860589fb52b83219fd662bf7 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 29 May 2025 12:07:38 -0500 Subject: [PATCH 37/88] log the alembic target engine url --- mlos_bench/mlos_bench/storage/sql/alembic/env.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/env.py b/mlos_bench/mlos_bench/storage/sql/alembic/env.py index 407a986120..842550bc0a 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/env.py +++ b/mlos_bench/mlos_bench/storage/sql/alembic/env.py @@ -7,6 +7,7 @@ import sys from logging.config import fileConfig +from logging import info from alembic import context from sqlalchemy import create_engine, engine_from_config, pool @@ -30,6 +31,8 @@ # (e.g., dev ops extending the schema). url = config.get_main_option("sqlalchemy.url") engine = create_engine(url) if url else None +if engine: + info(f"engine.url {str(engine.url)}") target_metadata = DbSchema(engine=engine).meta # other values from the config, defined by the needs of env.py, From 1b0fb2cf75a3ba3766b4ef898faecd690cf165b2 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 29 May 2025 12:19:36 -0500 Subject: [PATCH 38/88] engine no longer optional --- mlos_bench/mlos_bench/storage/sql/alembic/env.py | 4 +++- mlos_bench/mlos_bench/storage/sql/schema.py | 10 +++------- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/env.py b/mlos_bench/mlos_bench/storage/sql/alembic/env.py index 842550bc0a..cb384e5910 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/env.py +++ b/mlos_bench/mlos_bench/storage/sql/alembic/env.py @@ -7,7 +7,7 @@ import sys from logging.config import fileConfig -from logging import info +from logging import info, warning from alembic import context from sqlalchemy import create_engine, engine_from_config, pool @@ -33,6 +33,8 @@ engine = create_engine(url) if url else None if engine: info(f"engine.url {str(engine.url)}") +else: + warning("Missing engine.url: schema changes may not be accurate.") target_metadata = DbSchema(engine=engine).meta # other values from the config, defined by the needs of env.py, diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index 885e9e356f..e9b369259b 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -73,19 +73,15 @@ class DbSchema: # for all DB tables, so it's ok to disable the warnings. # pylint: disable=too-many-instance-attributes - def __init__(self, engine: Engine | None): + def __init__(self, engine: Engine): """ Declare the SQLAlchemy schema for the database. Parameters ---------- - engine : sqlalchemy.engine.Engine | None - The SQLAlchemy engine to use for the DB schema. - Listed as optional for `alembic `_ - schema migration purposes so we can reference it inside it's ``env.py`` - config file for :attr:`~meta` data inspection, but won't generally be - functional without one. + engine : sqlalchemy.engine.Engine """ + assert engine, "Error: can't create schema without engine." _LOG.info("Create the DB schema for: %s", engine) self._engine = engine self._meta = MetaData() From 928f4915227521e0e8f98247dd7326ae8a1d2a60 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 29 May 2025 12:21:34 -0500 Subject: [PATCH 39/88] Revert "make mysql datetimes support fractional seconds" This reverts commit d6617bab5cbc2bb8561d9dfe509c9be234dedf8d. --- mlos_bench/mlos_bench/storage/sql/schema.py | 55 +++------------------ 1 file changed, 6 insertions(+), 49 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index e9b369259b..bb97d61049 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -39,7 +39,6 @@ create_mock_engine, inspect, ) -from sqlalchemy.dialects import mysql from sqlalchemy.engine import Engine from mlos_bench.util import path_join @@ -109,20 +108,8 @@ def __init__(self, engine: Engine): Column("git_repo", String(1024), nullable=False), Column("git_commit", String(40), nullable=False), # For backwards compatibility, we allow NULL for ts_start. - Column( - "ts_start", - DateTime(timezone=True).with_variant( - mysql.DATETIME(fsp=6), - "mysql", - ), - ), - Column( - "ts_end", - DateTime(timezone=True).with_variant( - mysql.DATETIME(fsp=6), - "mysql", - ), - ), + Column("ts_start", DateTime(timezone=True)), + Column("ts_end", DateTime(timezone=True)), # Should match the text IDs of `mlos_bench.environments.Status` enum: # For backwards compatibility, we allow NULL for status. Column("status", String(self._status_len)), @@ -196,22 +183,8 @@ def __init__(self, engine: Engine): Column("trial_id", Integer, nullable=False), Column("config_id", Integer, nullable=False), Column("trial_runner_id", Integer, nullable=True, default=None), - Column( - "ts_start", - DateTime(timezone=True).with_variant( - mysql.DATETIME(fsp=6), - "mysql", - ), - nullable=False, - ), - Column( - "ts_end", - DateTime(timezone=True).with_variant( - mysql.DATETIME(fsp=6), - "mysql", - ), - nullable=True, - ), + Column("ts_start", DateTime(timezone=True), nullable=False), + Column("ts_end", DateTime(timezone=True), nullable=True), # Should match the text IDs of `mlos_bench.environments.Status` enum: Column("status", String(self._status_len), nullable=False), PrimaryKeyConstraint("exp_id", "trial_id"), @@ -263,15 +236,7 @@ def __init__(self, engine: Engine): self._meta, Column("exp_id", String(self._exp_id_len), nullable=False), Column("trial_id", Integer, nullable=False), - Column( - "ts", - DateTime(timezone=True).with_variant( - mysql.DATETIME(fsp=6), - "mysql", - ), - nullable=False, - default="now", - ), + Column("ts", DateTime(timezone=True), nullable=False, default="now"), Column("status", String(self._status_len), nullable=False), UniqueConstraint("exp_id", "trial_id", "ts"), ForeignKeyConstraint( @@ -306,15 +271,7 @@ def __init__(self, engine: Engine): self._meta, Column("exp_id", String(self._exp_id_len), nullable=False), Column("trial_id", Integer, nullable=False), - Column( - "ts", - DateTime(timezone=True).with_variant( - mysql.DATETIME(fsp=6), - "mysql", - ), - nullable=False, - default="now", - ), + Column("ts", DateTime(timezone=True), nullable=False, default="now"), Column("metric_id", String(self._metric_id_len), nullable=False), Column("metric_value", String(self._metric_value_len)), UniqueConstraint("exp_id", "trial_id", "ts", "metric_id"), From 4863a5b8c390d32862b4182d320be2a8f666ae36 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 2 Jun 2025 13:31:13 -0500 Subject: [PATCH 40/88] Enable alembic to detect datetime precision issues with MySQL --- .../mlos_bench/storage/sql/alembic/env.py | 142 ++++++++++++++++-- 1 file changed, 133 insertions(+), 9 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/env.py b/mlos_bench/mlos_bench/storage/sql/alembic/env.py index cb384e5910..c56e9cebf3 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/env.py +++ b/mlos_bench/mlos_bench/storage/sql/alembic/env.py @@ -7,10 +7,15 @@ import sys from logging.config import fileConfig -from logging import info, warning +import logging from alembic import context +from alembic.migration import MigrationContext from sqlalchemy import create_engine, engine_from_config, pool +from sqlalchemy.schema import Column as SchemaColumn +from sqlalchemy.sql.schema import Column +from sqlalchemy.types import TypeEngine +from sqlalchemy.dialects import mysql from mlos_bench.storage.sql.schema import DbSchema @@ -23,18 +28,18 @@ # Don't override the mlos_bench or pytest loggers though. if config.config_file_name is not None and "alembic" in sys.argv[0]: fileConfig(config.config_file_name) +alembic_logger = logging.getLogger("alembic") # add your model's MetaData object here # for 'autogenerate' support # NOTE: We override the alembic.ini file programmatically in storage/sql/schema.py # However, the alembic.ini file value is used during alembic CLI operations # (e.g., dev ops extending the schema). -url = config.get_main_option("sqlalchemy.url") -engine = create_engine(url) if url else None -if engine: - info(f"engine.url {str(engine.url)}") -else: - warning("Missing engine.url: schema changes may not be accurate.") +sqlalchemy_url = config.get_main_option("sqlalchemy.url") +if not sqlalchemy_url: + raise ValueError("Missing sqlalchemy.url: schema changes may not be accurate.") +engine = create_engine(sqlalchemy_url) +alembic_logger.info("engine.url %s", str(engine.url)) target_metadata = DbSchema(engine=engine).meta # other values from the config, defined by the needs of env.py, @@ -43,6 +48,116 @@ # ... etc. +def fq_class_name(t: object) -> str: + """Return the fully qualified class name of a type.""" + return t.__module__ + "." + t.__class__.__name__ + + +def custom_compare_types( + migration_context: MigrationContext, # pylint: disable=unused-argument + inspected_column: SchemaColumn | None, # pylint: disable=unused-argument + metadata_column: Column, # pylint: disable=unused-argument + inspected_type: TypeEngine, + metadata_type: TypeEngine, +) -> bool | None: + """Custom column type comparator. + + See `Comparing Types + `_ + documentation for more details. + + Parameters + ---------- + + Notes + ----- + In the case of a MySQL DateTime variant, it makes sure that the floating + point accuracy is met. + + Returns + ------- + result : bool | None + Returns True if the column specifications don't match the column (i.e., + a change is needed). + Returns False when the column specification and column match. + Returns None to fallback to the default comparator logic. + """ + metadata_dialect_type = metadata_type.dialect_impl(migration_context.dialect) + if alembic_logger.isEnabledFor(logging.DEBUG): + alembic_logger.debug( + ( + "Comparing columns: " + "inspected_column: [%s] %s and " + "metadata_column: [%s (%s)] %s " + "inspected_column.__dict__: %s\n" + "inspected_column.dialect_options: %s\n" + "inspected_column.dialect_kwargs: %s\n" + "inspected_type.__dict__: %s\n" + "metadata_column.__dict__: %s\n" + "metadata_type.__dict__: %s\n" + "metadata_dialect_type.__dict__: %s\n" + ), + fq_class_name(inspected_type), + inspected_column, + fq_class_name(metadata_type), + fq_class_name(metadata_dialect_type), + metadata_column, + inspected_column.__dict__, + dict(inspected_column.dialect_options) if inspected_column is not None else None, + dict(inspected_column.dialect_kwargs) if inspected_column is not None else None, + inspected_type.__dict__, + metadata_column.__dict__, + metadata_type.__dict__, + metadata_dialect_type.__dict__, + ) + + # Implement a more detailed DATETIME precision comparison for MySQL. + # Note: Currently also handles MariaDB. + if migration_context.dialect.name == "mysql": + if isinstance(metadata_dialect_type, (mysql.DATETIME, mysql.TIMESTAMP)): + if not isinstance(inspected_type, type(metadata_dialect_type)): + alembic_logger.info( + "inspected_type %s does not match metadata_dialect_type %s", + fq_class_name(inspected_type), + fq_class_name(metadata_dialect_type), + ) + return True + else: + if inspected_type.fsp != metadata_dialect_type.fsp: + alembic_logger.info( + "inspected_type.fsp (%s) and metadata_dialect_type.fsp (%s) don't match", + inspected_type.fsp, + metadata_dialect_type.fsp, + ) + return True + + if inspected_type.timezone != metadata_dialect_type.timezone: + alembic_logger.info( + ( + "inspected_type.timezone (%s) and " + "metadata_dialect_type.timezone (%s) don't match" + ), + inspected_type.timezone, + metadata_dialect_type.timezone, + ) + return True + + if alembic_logger.isEnabledFor(logging.DEBUG): + alembic_logger.debug( + ( + "Using default compare_type behavior for " + "inspected_column: [%s] %s and " + "metadata_column: [%s (%s)] %s (see above for details).\n" + ), + fq_class_name(inspected_type), + inspected_column, + fq_class_name(metadata_type), + fq_class_name(metadata_dialect_type), + metadata_column, + ) + return None # fallback to default comparison behavior + + def run_migrations_offline() -> None: """ Run migrations in 'offline' mode. @@ -59,6 +174,7 @@ def run_migrations_offline() -> None: target_metadata=target_metadata, literal_binds=True, dialect_opts={"paramstyle": "named"}, + compare_type=custom_compare_types, ) with context.begin_transaction(): @@ -84,12 +200,20 @@ def run_migrations_online() -> None: ) with connectable.connect() as connection: - context.configure(connection=connection, target_metadata=target_metadata) + context.configure( + connection=connection, + target_metadata=target_metadata, + compare_type=custom_compare_types, + ) with context.begin_transaction(): context.run_migrations() else: - context.configure(connection=connectable, target_metadata=target_metadata) + context.configure( + connection=connectable, + target_metadata=target_metadata, + compare_type=custom_compare_types, + ) with context.begin_transaction(): context.run_migrations() From 6a697a06df901e3508d080ed7fd138c4f3dd8c70 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 2 Jun 2025 13:31:34 -0500 Subject: [PATCH 41/88] Enable floating point seconds with mysql --- mlos_bench/mlos_bench/storage/sql/schema.py | 55 ++++++++++++++++++--- 1 file changed, 49 insertions(+), 6 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index bb97d61049..e9b369259b 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -39,6 +39,7 @@ create_mock_engine, inspect, ) +from sqlalchemy.dialects import mysql from sqlalchemy.engine import Engine from mlos_bench.util import path_join @@ -108,8 +109,20 @@ def __init__(self, engine: Engine): Column("git_repo", String(1024), nullable=False), Column("git_commit", String(40), nullable=False), # For backwards compatibility, we allow NULL for ts_start. - Column("ts_start", DateTime(timezone=True)), - Column("ts_end", DateTime(timezone=True)), + Column( + "ts_start", + DateTime(timezone=True).with_variant( + mysql.DATETIME(fsp=6), + "mysql", + ), + ), + Column( + "ts_end", + DateTime(timezone=True).with_variant( + mysql.DATETIME(fsp=6), + "mysql", + ), + ), # Should match the text IDs of `mlos_bench.environments.Status` enum: # For backwards compatibility, we allow NULL for status. Column("status", String(self._status_len)), @@ -183,8 +196,22 @@ def __init__(self, engine: Engine): Column("trial_id", Integer, nullable=False), Column("config_id", Integer, nullable=False), Column("trial_runner_id", Integer, nullable=True, default=None), - Column("ts_start", DateTime(timezone=True), nullable=False), - Column("ts_end", DateTime(timezone=True), nullable=True), + Column( + "ts_start", + DateTime(timezone=True).with_variant( + mysql.DATETIME(fsp=6), + "mysql", + ), + nullable=False, + ), + Column( + "ts_end", + DateTime(timezone=True).with_variant( + mysql.DATETIME(fsp=6), + "mysql", + ), + nullable=True, + ), # Should match the text IDs of `mlos_bench.environments.Status` enum: Column("status", String(self._status_len), nullable=False), PrimaryKeyConstraint("exp_id", "trial_id"), @@ -236,7 +263,15 @@ def __init__(self, engine: Engine): self._meta, Column("exp_id", String(self._exp_id_len), nullable=False), Column("trial_id", Integer, nullable=False), - Column("ts", DateTime(timezone=True), nullable=False, default="now"), + Column( + "ts", + DateTime(timezone=True).with_variant( + mysql.DATETIME(fsp=6), + "mysql", + ), + nullable=False, + default="now", + ), Column("status", String(self._status_len), nullable=False), UniqueConstraint("exp_id", "trial_id", "ts"), ForeignKeyConstraint( @@ -271,7 +306,15 @@ def __init__(self, engine: Engine): self._meta, Column("exp_id", String(self._exp_id_len), nullable=False), Column("trial_id", Integer, nullable=False), - Column("ts", DateTime(timezone=True), nullable=False, default="now"), + Column( + "ts", + DateTime(timezone=True).with_variant( + mysql.DATETIME(fsp=6), + "mysql", + ), + nullable=False, + default="now", + ), Column("metric_id", String(self._metric_id_len), nullable=False), Column("metric_value", String(self._metric_value_len)), UniqueConstraint("exp_id", "trial_id", "ts", "metric_id"), From f7ccf2621835bd9a62e7a04eef3319cb0e526955 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 2 Jun 2025 13:33:36 -0500 Subject: [PATCH 42/88] Alembic script to add floating point seconds precision --- ...4_support_fractional_seconds_with_mysql.py | 120 ++++++++++++++++++ 1 file changed, 120 insertions(+) create mode 100644 mlos_bench/mlos_bench/storage/sql/alembic/versions/b61aa446e724_support_fractional_seconds_with_mysql.py diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/versions/b61aa446e724_support_fractional_seconds_with_mysql.py b/mlos_bench/mlos_bench/storage/sql/alembic/versions/b61aa446e724_support_fractional_seconds_with_mysql.py new file mode 100644 index 0000000000..c7a12d1b19 --- /dev/null +++ b/mlos_bench/mlos_bench/storage/sql/alembic/versions/b61aa446e724_support_fractional_seconds_with_mysql.py @@ -0,0 +1,120 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# +"""Support fractional seconds with MySQL + +Revision ID: b61aa446e724 +Revises: 8928a401115b +Create Date: 2025-06-02 17:56:34.746642+00:00 + +""" +# pylint: disable=no-member + +from collections.abc import Sequence + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import mysql + +# revision identifiers, used by Alembic. +revision: str = "b61aa446e724" +down_revision: str | None = "8928a401115b" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + """The schema upgrade script for this revision.""" + # ### commands auto generated by Alembic - please adjust! ### + op.alter_column( + "experiment", + "ts_start", + existing_type=mysql.DATETIME(), + type_=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + existing_nullable=True, + ) + op.alter_column( + "experiment", + "ts_end", + existing_type=mysql.DATETIME(), + type_=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + existing_nullable=True, + ) + op.alter_column( + "trial", + "ts_start", + existing_type=mysql.DATETIME(), + type_=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + existing_nullable=False, + ) + op.alter_column( + "trial", + "ts_end", + existing_type=mysql.DATETIME(), + type_=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + existing_nullable=True, + ) + op.alter_column( + "trial_status", + "ts", + existing_type=mysql.DATETIME(), + type_=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + existing_nullable=False, + ) + op.alter_column( + "trial_telemetry", + "ts", + existing_type=mysql.DATETIME(), + type_=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + existing_nullable=False, + ) + # ### end Alembic commands ### + + +def downgrade() -> None: + """The schema downgrade script for this revision.""" + # ### commands auto generated by Alembic - please adjust! ### + op.alter_column( + "trial_telemetry", + "ts", + existing_type=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + type_=mysql.DATETIME(), + existing_nullable=False, + ) + op.alter_column( + "trial_status", + "ts", + existing_type=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + type_=mysql.DATETIME(), + existing_nullable=False, + ) + op.alter_column( + "trial", + "ts_end", + existing_type=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + type_=mysql.DATETIME(), + existing_nullable=True, + ) + op.alter_column( + "trial", + "ts_start", + existing_type=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + type_=mysql.DATETIME(), + existing_nullable=False, + ) + op.alter_column( + "experiment", + "ts_end", + existing_type=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + type_=mysql.DATETIME(), + existing_nullable=True, + ) + op.alter_column( + "experiment", + "ts_start", + existing_type=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + type_=mysql.DATETIME(), + existing_nullable=True, + ) + # ### end Alembic commands ### From 40374c2f25b91f8d9b7633c18e84f72741a7bc6e Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 2 Jun 2025 13:34:20 -0500 Subject: [PATCH 43/88] fixup --- mlos_bench/mlos_bench/storage/sql/alembic.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/storage/sql/alembic.ini b/mlos_bench/mlos_bench/storage/sql/alembic.ini index 6cc1e4b7c8..857375445e 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic.ini +++ b/mlos_bench/mlos_bench/storage/sql/alembic.ini @@ -65,7 +65,7 @@ version_path_separator = os # Use os.pathsep. Default configuration used for ne # See README.md for details. # Uncomment one of these: sqlalchemy.url = sqlite:///mlos_bench.sqlite -#sqlalchemy.url = mysql+pymysql://root:password@localhost:3306/mlos_bench +#sqlalchemy.url = mysql+mysqlconnector://root:password@localhost:3306/mlos_bench #sqlalchemy.url = postgresql+psycopg2://root:password@localhost:5432/mlos_bench From fe4171a1b47f2da26d144d28f58fc0731537c673 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 2 Jun 2025 13:39:59 -0500 Subject: [PATCH 44/88] rework to only apply to mysql --- ...4_support_fractional_seconds_with_mysql.py | 182 +++++++++--------- 1 file changed, 93 insertions(+), 89 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/versions/b61aa446e724_support_fractional_seconds_with_mysql.py b/mlos_bench/mlos_bench/storage/sql/alembic/versions/b61aa446e724_support_fractional_seconds_with_mysql.py index c7a12d1b19..1b933d916f 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/versions/b61aa446e724_support_fractional_seconds_with_mysql.py +++ b/mlos_bench/mlos_bench/storage/sql/alembic/versions/b61aa446e724_support_fractional_seconds_with_mysql.py @@ -13,7 +13,7 @@ from collections.abc import Sequence -from alembic import op +from alembic import context, op import sqlalchemy as sa from sqlalchemy.dialects import mysql @@ -26,95 +26,99 @@ def upgrade() -> None: """The schema upgrade script for this revision.""" - # ### commands auto generated by Alembic - please adjust! ### - op.alter_column( - "experiment", - "ts_start", - existing_type=mysql.DATETIME(), - type_=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), - existing_nullable=True, - ) - op.alter_column( - "experiment", - "ts_end", - existing_type=mysql.DATETIME(), - type_=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), - existing_nullable=True, - ) - op.alter_column( - "trial", - "ts_start", - existing_type=mysql.DATETIME(), - type_=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), - existing_nullable=False, - ) - op.alter_column( - "trial", - "ts_end", - existing_type=mysql.DATETIME(), - type_=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), - existing_nullable=True, - ) - op.alter_column( - "trial_status", - "ts", - existing_type=mysql.DATETIME(), - type_=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), - existing_nullable=False, - ) - op.alter_column( - "trial_telemetry", - "ts", - existing_type=mysql.DATETIME(), - type_=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), - existing_nullable=False, - ) - # ### end Alembic commands ### + bind = context.get_bind() + if bind.dialect.name == "mysql": + # ### commands auto generated by Alembic - please adjust! ### + op.alter_column( + "experiment", + "ts_start", + existing_type=mysql.DATETIME(), + type_=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + existing_nullable=True, + ) + op.alter_column( + "experiment", + "ts_end", + existing_type=mysql.DATETIME(), + type_=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + existing_nullable=True, + ) + op.alter_column( + "trial", + "ts_start", + existing_type=mysql.DATETIME(), + type_=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + existing_nullable=False, + ) + op.alter_column( + "trial", + "ts_end", + existing_type=mysql.DATETIME(), + type_=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + existing_nullable=True, + ) + op.alter_column( + "trial_status", + "ts", + existing_type=mysql.DATETIME(), + type_=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + existing_nullable=False, + ) + op.alter_column( + "trial_telemetry", + "ts", + existing_type=mysql.DATETIME(), + type_=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + existing_nullable=False, + ) + # ### end Alembic commands ### def downgrade() -> None: """The schema downgrade script for this revision.""" - # ### commands auto generated by Alembic - please adjust! ### - op.alter_column( - "trial_telemetry", - "ts", - existing_type=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), - type_=mysql.DATETIME(), - existing_nullable=False, - ) - op.alter_column( - "trial_status", - "ts", - existing_type=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), - type_=mysql.DATETIME(), - existing_nullable=False, - ) - op.alter_column( - "trial", - "ts_end", - existing_type=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), - type_=mysql.DATETIME(), - existing_nullable=True, - ) - op.alter_column( - "trial", - "ts_start", - existing_type=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), - type_=mysql.DATETIME(), - existing_nullable=False, - ) - op.alter_column( - "experiment", - "ts_end", - existing_type=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), - type_=mysql.DATETIME(), - existing_nullable=True, - ) - op.alter_column( - "experiment", - "ts_start", - existing_type=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), - type_=mysql.DATETIME(), - existing_nullable=True, - ) - # ### end Alembic commands ### + bind = context.get_bind() + if bind.dialect.name == "mysql": + # ### commands auto generated by Alembic - please adjust! ### + op.alter_column( + "trial_telemetry", + "ts", + existing_type=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + type_=mysql.DATETIME(), + existing_nullable=False, + ) + op.alter_column( + "trial_status", + "ts", + existing_type=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + type_=mysql.DATETIME(), + existing_nullable=False, + ) + op.alter_column( + "trial", + "ts_end", + existing_type=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + type_=mysql.DATETIME(), + existing_nullable=True, + ) + op.alter_column( + "trial", + "ts_start", + existing_type=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + type_=mysql.DATETIME(), + existing_nullable=False, + ) + op.alter_column( + "experiment", + "ts_end", + existing_type=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + type_=mysql.DATETIME(), + existing_nullable=True, + ) + op.alter_column( + "experiment", + "ts_start", + existing_type=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + type_=mysql.DATETIME(), + existing_nullable=True, + ) + # ### end Alembic commands ### From 5a42a141e9ca1d7455f6387f32bb60e4a966a93b Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 2 Jun 2025 15:55:34 -0500 Subject: [PATCH 45/88] be sure to mark that version as required --- .../mlos_bench/tests/storage/sql/fixtures.py | 13 +++++++++++-- .../tests/storage/sql/test_storage_schemas.py | 17 +++++------------ 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py index 572b1a4c0d..1c3ab4614e 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py @@ -23,7 +23,7 @@ from mlos_bench.storage.base_experiment_data import ExperimentData from mlos_bench.storage.sql.storage import SqlStorage from mlos_bench.storage.storage_factory import from_config -from mlos_bench.tests import SEED, wait_docker_service_healthy +from mlos_bench.tests import SEED, wait_docker_service_healthy, DOCKER from mlos_bench.tests.storage import ( CONFIG_TRIAL_REPEAT_COUNT, MAX_TRIALS, @@ -39,7 +39,16 @@ # pylint: disable=redefined-outer-name -# TODO: Add mysql_storage and postgres_storage +DOCKER_DBMS_FIXTURES = [] +if DOCKER: + DOCKER_DBMS_FIXTURES = [ + lazy_fixture("mysql_storage"), + lazy_fixture("postgres_storage"), + ] + +PERSISTENT_SQL_STORAGE_FIXTURES = [lazy_fixture("sqlite_storage")] +if DOCKER: + PERSISTENT_SQL_STORAGE_FIXTURES.extend(DOCKER_DBMS_FIXTURES) @pytest.fixture(scope="session") diff --git a/mlos_bench/mlos_bench/tests/storage/sql/test_storage_schemas.py b/mlos_bench/mlos_bench/tests/storage/sql/test_storage_schemas.py index 0206bb0dd7..bf7c4dabee 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/test_storage_schemas.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/test_storage_schemas.py @@ -10,28 +10,21 @@ from sqlalchemy import inspect from mlos_bench.storage.sql.storage import SqlStorage - -from mlos_bench.tests import DOCKER +from mlos_bench.tests.storage.sql.fixtures import DOCKER_DBMS_FIXTURES # NOTE: This value is hardcoded to the latest revision in the alembic versions directory. # It could also be obtained programmatically using the "alembic heads" command or heads() API. # See Also: schema.py for an example of programmatic alembic config access. -CURRENT_ALEMBIC_HEAD = "8928a401115b" - -# Try to test multiple DBMS engines. -docker_dbms_fixtures = [] -if DOCKER: - docker_dbms_fixtures = [ - lazy_fixture("mysql_storage"), - lazy_fixture("postgres_storage"), - ] +CURRENT_ALEMBIC_HEAD = "b61aa446e724" +# Try to test multiple DBMS engines. @pytest.mark.parametrize( "some_sql_storage_fixture", [ + lazy_fixture("mem_storage"), lazy_fixture("sqlite_storage"), - *docker_dbms_fixtures, + *DOCKER_DBMS_FIXTURES, ], ) def test_storage_schemas(some_sql_storage_fixture: SqlStorage) -> None: From f273ea3cec7c5d68a4d2d145dd24de8f88dc11c2 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 2 Jun 2025 15:56:41 -0500 Subject: [PATCH 46/88] add that refactor too --- .../tests/storage/test_storage_pickling.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py b/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py index 365c68af93..f4cc1793b9 100644 --- a/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py +++ b/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py @@ -9,21 +9,13 @@ from typing import Literal import pytest -from pytest_lazy_fixtures.lazy_fixture import lf as lazy_fixture from pytz import UTC from mlos_bench.environments.status import Status from mlos_bench.storage.base_storage import Storage -from mlos_bench.tests import DOCKER +from mlos_bench.tests.storage.sql.fixtures import PERSISTENT_SQL_STORAGE_FIXTURES from mlos_bench.tunables.tunable_groups import TunableGroups -docker_dbms_fixtures = [] -if DOCKER: - docker_dbms_fixtures = [ - lazy_fixture("mysql_storage"), - lazy_fixture("postgres_storage"), - ] - # TODO: When we introduce ParallelTrialScheduler warn at config startup time # that it is incompatible with sqlite storage on Windows. @@ -35,8 +27,7 @@ "persistent_storage", [ # TODO: Improve this test to support non-sql backends eventually as well. - lazy_fixture("sqlite_storage"), - *docker_dbms_fixtures, + *PERSISTENT_SQL_STORAGE_FIXTURES, ], ) def test_storage_pickle_restore_experiment_and_trial( From b1b3733eec90f9ec4ceeef2c52b0333dda940f9a Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 2 Jun 2025 16:01:16 -0500 Subject: [PATCH 47/88] Fixups and refactors to allow two things 1. Allow restoring an Experiment by ID for parallel workers without needing to re-evaluate the git info. 2. Allow recovering the absolute root env rather than mistakenly re-resolving it from the root of the repo multiple times (which results in an error). --- mlos_bench/mlos_bench/storage/base_storage.py | 74 +++++++++++++++++-- .../mlos_bench/storage/sql/experiment.py | 10 ++- mlos_bench/mlos_bench/storage/sql/storage.py | 6 +- mlos_bench/mlos_bench/tests/__init__.py | 1 - .../mlos_bench/tests/storage/exp_data_test.py | 2 +- .../mlos_bench/tests/storage/sql/fixtures.py | 4 +- .../tests/storage/test_storage_pickling.py | 2 +- mlos_bench/mlos_bench/tests/util_git_test.py | 20 ++++- mlos_bench/mlos_bench/util.py | 39 ++++++++-- 9 files changed, 134 insertions(+), 24 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/base_storage.py b/mlos_bench/mlos_bench/storage/base_storage.py index 75a84bf0b2..4a7bc65063 100644 --- a/mlos_bench/mlos_bench/storage/base_storage.py +++ b/mlos_bench/mlos_bench/storage/base_storage.py @@ -25,10 +25,12 @@ from __future__ import annotations import logging +import os from abc import ABCMeta, abstractmethod from collections.abc import Iterator, Mapping from contextlib import AbstractContextManager as ContextManager from datetime import datetime +from subprocess import CalledProcessError from types import TracebackType from typing import Any, Literal @@ -38,7 +40,7 @@ from mlos_bench.services.base_service import Service from mlos_bench.storage.base_experiment_data import ExperimentData from mlos_bench.tunables.tunable_groups import TunableGroups -from mlos_bench.util import get_git_info +from mlos_bench.util import get_git_info, get_git_root, path_join _LOG = logging.getLogger(__name__) @@ -187,16 +189,61 @@ def __init__( # pylint: disable=too-many-arguments tunables: TunableGroups, experiment_id: str, trial_id: int, - root_env_config: str, + root_env_config: str | None, description: str, opt_targets: dict[str, Literal["min", "max"]], + git_repo: str | None = None, + git_commit: str | None = None, + rel_root_env_config: str | None = None, ): self._tunables = tunables.copy() self._trial_id = trial_id self._experiment_id = experiment_id - (self._git_repo, self._git_commit, self._root_env_config) = get_git_info( - root_env_config - ) + if root_env_config is None: + # Restoring from DB. + if not (git_repo and git_commit and rel_root_env_config): + raise ValueError( + "Missing required args: git_repo, git_commit, rel_root_env_config" + ) + self._git_repo = git_repo + self._git_commit = git_commit + self._rel_root_env_config = rel_root_env_config + + # Currently we only store the relative path of the root env config + # from the git repo it came from. + git_root = git_repo + if not os.path.exists(git_root): + try: + git_root = get_git_root(os.curdir) + except CalledProcessError: + _LOG.warning( + "Failed to find a git repo in the current working directory: %s", + os.curdir, + ) + git_root = get_git_root(__file__) + + self._abs_root_env_config = path_join( + git_root, + self._rel_root_env_config, + abs_path=True, + ) + _LOG.info( + "Resolved relative root_config %s for experiment %s to %s", + self._rel_root_env_config, + self._experiment_id, + self._abs_root_env_config, + ) + else: + if git_repo or git_commit or rel_root_env_config: + raise ValueError("Unexpected args: git_repo, git_commit, rel_root_env_config") + ( + self._git_repo, + self._git_commit, + self._rel_root_env_config, + self._abs_root_env_config, + ) = get_git_info( + root_env_config, + ) self._description = description self._opt_targets = opt_targets self._in_context = False @@ -278,9 +325,20 @@ def description(self) -> str: return self._description @property - def root_env_config(self) -> str: - """Get the Experiment's root Environment config file path.""" - return self._root_env_config + def rel_root_env_config(self) -> str: + """Get the Experiment's root Environment config's relative file path + to the git repo root. + """ + return self._rel_root_env_config + + @property + def abs_root_env_config(self) -> str: + """Get the Experiment's root Environment config file path. + + This returns the current absolute path to the root config for this + process instead of the path relative to the git repo root. + """ + return self._abs_root_env_config @property def tunables(self) -> TunableGroups: diff --git a/mlos_bench/mlos_bench/storage/sql/experiment.py b/mlos_bench/mlos_bench/storage/sql/experiment.py index 7a6dbf75b4..6b7aa220a6 100644 --- a/mlos_bench/mlos_bench/storage/sql/experiment.py +++ b/mlos_bench/mlos_bench/storage/sql/experiment.py @@ -38,9 +38,12 @@ def __init__( # pylint: disable=too-many-arguments tunables: TunableGroups, experiment_id: str, trial_id: int, - root_env_config: str, + root_env_config: str | None, description: str, opt_targets: dict[str, Literal["min", "max"]], + git_repo: str | None = None, + git_commit: str | None = None, + rel_root_env_config: str | None = None, ): super().__init__( tunables=tunables, @@ -49,6 +52,9 @@ def __init__( # pylint: disable=too-many-arguments root_env_config=root_env_config, description=description, opt_targets=opt_targets, + git_repo=git_repo, + git_commit=git_commit, + rel_root_env_config=rel_root_env_config, ) self._engine = engine self._schema = schema @@ -89,7 +95,7 @@ def _setup(self) -> None: description=self._description, git_repo=self._git_repo, git_commit=self._git_commit, - root_env_config=self._root_env_config, + root_env_config=self._rel_root_env_config, ) ) conn.execute( diff --git a/mlos_bench/mlos_bench/storage/sql/storage.py b/mlos_bench/mlos_bench/storage/sql/storage.py index 7fc86aff1f..27748e5ca8 100644 --- a/mlos_bench/mlos_bench/storage/sql/storage.py +++ b/mlos_bench/mlos_bench/storage/sql/storage.py @@ -139,9 +139,13 @@ def get_experiment_by_id( experiment_id=exp.exp_id, trial_id=-1, # will be loaded upon __enter__ which calls _setup() description=exp.description, - root_env_config=exp.root_env_config, + # Use special logic to load the experiment root config info directly. + root_env_config=None, tunables=tunables, opt_targets=opt_targets, + git_repo=exp.git_repo, + git_commit=exp.git_commit, + rel_root_env_config=exp.root_env_config, ) def experiment( # pylint: disable=too-many-arguments diff --git a/mlos_bench/mlos_bench/tests/__init__.py b/mlos_bench/mlos_bench/tests/__init__.py index 1e0d8c9941..41fbd0bc37 100644 --- a/mlos_bench/mlos_bench/tests/__init__.py +++ b/mlos_bench/mlos_bench/tests/__init__.py @@ -20,7 +20,6 @@ import pytest import pytz -from pytest_docker.plugin import Services as DockerServices from mlos_bench.util import get_class_from_name, nullable diff --git a/mlos_bench/mlos_bench/tests/storage/exp_data_test.py b/mlos_bench/mlos_bench/tests/storage/exp_data_test.py index dc8baf489c..b8ca6a64b9 100644 --- a/mlos_bench/mlos_bench/tests/storage/exp_data_test.py +++ b/mlos_bench/mlos_bench/tests/storage/exp_data_test.py @@ -30,7 +30,7 @@ def test_exp_data_root_env_config( """Tests the root_env_config property of ExperimentData.""" # pylint: disable=protected-access assert exp_data.root_env_config == ( - exp_storage._root_env_config, + exp_storage._rel_root_env_config, exp_storage._git_repo, exp_storage._git_commit, ) diff --git a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py index 1c3ab4614e..bcb74ad375 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py @@ -238,7 +238,7 @@ def exp_storage( with storage.experiment( experiment_id="Test-001", trial_id=1, - root_env_config="environment.jsonc", + root_env_config="my-environment.jsonc", description="pytest experiment", tunables=tunable_groups, opt_targets={"score": "min"}, @@ -372,7 +372,7 @@ def _dummy_run_exp( trial_runners=trial_runners, optimizer=opt, storage=storage, - root_env_config=exp.root_env_config, + root_env_config=exp.abs_root_env_config, ) # Add some trial data to that experiment by "running" it. diff --git a/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py b/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py index f4cc1793b9..21c204283c 100644 --- a/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py +++ b/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py @@ -72,7 +72,7 @@ def test_storage_pickle_restore_experiment_and_trial( assert restored_experiment is not experiment assert restored_experiment.experiment_id == experiment.experiment_id assert restored_experiment.description == experiment.description - assert restored_experiment.root_env_config == experiment.root_env_config + assert restored_experiment.rel_root_env_config == experiment.rel_root_env_config assert restored_experiment.tunables == experiment.tunables assert restored_experiment.opt_targets == experiment.opt_targets with restored_experiment: diff --git a/mlos_bench/mlos_bench/tests/util_git_test.py b/mlos_bench/mlos_bench/tests/util_git_test.py index 77fd2779c7..bbac89399d 100644 --- a/mlos_bench/mlos_bench/tests/util_git_test.py +++ b/mlos_bench/mlos_bench/tests/util_git_test.py @@ -4,13 +4,27 @@ # """Unit tests for get_git_info utility function.""" import re +import os -from mlos_bench.util import get_git_info +from mlos_bench.util import get_git_info, path_join def test_get_git_info() -> None: - """Check that we can retrieve git info about the current repository correctly.""" - (git_repo, git_commit, rel_path) = get_git_info(__file__) + """Check that we can retrieve git info about the current repository + correctly from a file.""" + (git_repo, git_commit, rel_path, abs_path) = get_git_info(__file__) assert "mlos" in git_repo.lower() assert re.match(r"[0-9a-f]{40}", git_commit) is not None assert rel_path == "mlos_bench/mlos_bench/tests/util_git_test.py" + assert abs_path == path_join(__file__, abs_path=True) + + +def test_get_git_info_dir() -> None: + """Check that we can retrieve git info about the current repository + correctly from a directory.""" + dirname = os.path.dirname(__file__) + (git_repo, git_commit, rel_path, abs_path) = get_git_info(dirname) + assert "mlos" in git_repo.lower() + assert re.match(r"[0-9a-f]{40}", git_commit) is not None + assert rel_path == "mlos_bench/mlos_bench/tests" + assert abs_path == path_join(dirname, abs_path=True) diff --git a/mlos_bench/mlos_bench/util.py b/mlos_bench/mlos_bench/util.py index 1c45cd4ecf..0836f5f048 100644 --- a/mlos_bench/mlos_bench/util.py +++ b/mlos_bench/mlos_bench/util.py @@ -274,7 +274,32 @@ def check_required_params(config: Mapping[str, Any], required_params: Iterable[s ) -def get_git_info(path: str = __file__) -> tuple[str, str, str]: +def get_git_root(path: str = __file__) -> str: + """ + Get the root dir of the git repository. + + Parameters + ---------- + path : str, optional + Path to the file in git repository. + + Returns + ------- + str + _description_ + """ + abspath = path_join(path, abs_path=True) + if not os.path.exists(abspath) or not os.path.isdir(abspath): + dirname = os.path.dirname(abspath) + else: + dirname = abspath + git_root = subprocess.check_output( + ["git", "-C", dirname, "rev-parse", "--show-toplevel"], text=True + ).strip() + return git_root + + +def get_git_info(path: str = __file__) -> tuple[str, str, str, str]: """ Get the git repository, commit hash, and local path of the given file. @@ -286,9 +311,13 @@ def get_git_info(path: str = __file__) -> tuple[str, str, str]: Returns ------- (git_repo, git_commit, git_path) : tuple[str, str, str] - Git repository URL, last commit hash, and relative file path. + Git repository URL, last commit hash, and relative file path and current absolute path. """ - dirname = os.path.dirname(path) + abspath = path_join(path, abs_path=True) + if not os.path.exists(abspath) or not os.path.isdir(abspath): + dirname = os.path.dirname(abspath) + else: + dirname = abspath git_repo = subprocess.check_output( ["git", "-C", dirname, "remote", "get-url", "origin"], text=True ).strip() @@ -299,8 +328,8 @@ def get_git_info(path: str = __file__) -> tuple[str, str, str]: ["git", "-C", dirname, "rev-parse", "--show-toplevel"], text=True ).strip() _LOG.debug("Current git branch: %s %s", git_repo, git_commit) - rel_path = os.path.relpath(os.path.abspath(path), os.path.abspath(git_root)) - return (git_repo, git_commit, rel_path.replace("\\", "/")) + rel_path = os.path.relpath(abspath, os.path.abspath(git_root)) + return (git_repo, git_commit, rel_path.replace("\\", "/"), abspath) # Note: to avoid circular imports, we don't specify TunableValue here. From f632f902184d4ae79fde1232607d37177468dc49 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 2 Jun 2025 21:14:54 +0000 Subject: [PATCH 48/88] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mlos_bench/mlos_bench/storage/base_storage.py | 11 ++++++----- mlos_bench/mlos_bench/storage/sql/alembic/env.py | 7 ++++--- ...a446e724_support_fractional_seconds_with_mysql.py | 6 +++--- mlos_bench/mlos_bench/tests/__init__.py | 3 ++- mlos_bench/mlos_bench/tests/storage/sql/fixtures.py | 2 +- mlos_bench/mlos_bench/tests/util_git_test.py | 12 +++++++----- 6 files changed, 23 insertions(+), 18 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/base_storage.py b/mlos_bench/mlos_bench/storage/base_storage.py index 4a7bc65063..aec5e4d244 100644 --- a/mlos_bench/mlos_bench/storage/base_storage.py +++ b/mlos_bench/mlos_bench/storage/base_storage.py @@ -326,17 +326,18 @@ def description(self) -> str: @property def rel_root_env_config(self) -> str: - """Get the Experiment's root Environment config's relative file path - to the git repo root. + """Get the Experiment's root Environment config's relative file path to the + git repo root. """ return self._rel_root_env_config @property def abs_root_env_config(self) -> str: - """Get the Experiment's root Environment config file path. + """ + Get the Experiment's root Environment config file path. - This returns the current absolute path to the root config for this - process instead of the path relative to the git repo root. + This returns the current absolute path to the root config for this process + instead of the path relative to the git repo root. """ return self._abs_root_env_config diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/env.py b/mlos_bench/mlos_bench/storage/sql/alembic/env.py index c56e9cebf3..af1a0db720 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/env.py +++ b/mlos_bench/mlos_bench/storage/sql/alembic/env.py @@ -5,17 +5,17 @@ """Alembic environment script.""" # pylint: disable=no-member +import logging import sys from logging.config import fileConfig -import logging from alembic import context from alembic.migration import MigrationContext from sqlalchemy import create_engine, engine_from_config, pool +from sqlalchemy.dialects import mysql from sqlalchemy.schema import Column as SchemaColumn from sqlalchemy.sql.schema import Column from sqlalchemy.types import TypeEngine -from sqlalchemy.dialects import mysql from mlos_bench.storage.sql.schema import DbSchema @@ -60,7 +60,8 @@ def custom_compare_types( inspected_type: TypeEngine, metadata_type: TypeEngine, ) -> bool | None: - """Custom column type comparator. + """ + Custom column type comparator. See `Comparing Types `_ diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/versions/b61aa446e724_support_fractional_seconds_with_mysql.py b/mlos_bench/mlos_bench/storage/sql/alembic/versions/b61aa446e724_support_fractional_seconds_with_mysql.py index 1b933d916f..0d0f3f9b3d 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/versions/b61aa446e724_support_fractional_seconds_with_mysql.py +++ b/mlos_bench/mlos_bench/storage/sql/alembic/versions/b61aa446e724_support_fractional_seconds_with_mysql.py @@ -2,19 +2,19 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -"""Support fractional seconds with MySQL +""" +Support fractional seconds with MySQL. Revision ID: b61aa446e724 Revises: 8928a401115b Create Date: 2025-06-02 17:56:34.746642+00:00 - """ # pylint: disable=no-member from collections.abc import Sequence -from alembic import context, op import sqlalchemy as sa +from alembic import context, op from sqlalchemy.dialects import mysql # revision identifiers, used by Alembic. diff --git a/mlos_bench/mlos_bench/tests/__init__.py b/mlos_bench/mlos_bench/tests/__init__.py index 41fbd0bc37..a367133701 100644 --- a/mlos_bench/mlos_bench/tests/__init__.py +++ b/mlos_bench/mlos_bench/tests/__init__.py @@ -16,10 +16,10 @@ from datetime import tzinfo from logging import debug, warning from subprocess import run -from pytest_docker.plugin import Services as DockerServices import pytest import pytz +from pytest_docker.plugin import Services as DockerServices from mlos_bench.util import get_class_from_name, nullable @@ -89,6 +89,7 @@ def check_class_name(obj: object, expected_class_name: str) -> bool: full_class_name = obj.__class__.__module__ + "." + obj.__class__.__name__ return full_class_name == try_resolve_class_name(expected_class_name) + HOST_DOCKER_NAME = "host.docker.internal" diff --git a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py index bcb74ad375..19c69b9f59 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py @@ -23,7 +23,7 @@ from mlos_bench.storage.base_experiment_data import ExperimentData from mlos_bench.storage.sql.storage import SqlStorage from mlos_bench.storage.storage_factory import from_config -from mlos_bench.tests import SEED, wait_docker_service_healthy, DOCKER +from mlos_bench.tests import DOCKER, SEED, wait_docker_service_healthy from mlos_bench.tests.storage import ( CONFIG_TRIAL_REPEAT_COUNT, MAX_TRIALS, diff --git a/mlos_bench/mlos_bench/tests/util_git_test.py b/mlos_bench/mlos_bench/tests/util_git_test.py index bbac89399d..788c06a5ea 100644 --- a/mlos_bench/mlos_bench/tests/util_git_test.py +++ b/mlos_bench/mlos_bench/tests/util_git_test.py @@ -3,15 +3,16 @@ # Licensed under the MIT License. # """Unit tests for get_git_info utility function.""" -import re import os +import re from mlos_bench.util import get_git_info, path_join def test_get_git_info() -> None: - """Check that we can retrieve git info about the current repository - correctly from a file.""" + """Check that we can retrieve git info about the current repository correctly from a + file. + """ (git_repo, git_commit, rel_path, abs_path) = get_git_info(__file__) assert "mlos" in git_repo.lower() assert re.match(r"[0-9a-f]{40}", git_commit) is not None @@ -20,8 +21,9 @@ def test_get_git_info() -> None: def test_get_git_info_dir() -> None: - """Check that we can retrieve git info about the current repository - correctly from a directory.""" + """Check that we can retrieve git info about the current repository correctly from a + directory. + """ dirname = os.path.dirname(__file__) (git_repo, git_commit, rel_path, abs_path) = get_git_info(dirname) assert "mlos" in git_repo.lower() From bf13366429f2e168d58ec750cba651514b36a7d2 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 5 Jun 2025 12:13:23 -0500 Subject: [PATCH 49/88] copilot fixups --- mlos_bench/mlos_bench/tests/__init__.py | 13 ------------- mlos_bench/mlos_bench/tests/conftest.py | 5 +---- 2 files changed, 1 insertion(+), 17 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/__init__.py b/mlos_bench/mlos_bench/tests/__init__.py index a367133701..cc57eccfe9 100644 --- a/mlos_bench/mlos_bench/tests/__init__.py +++ b/mlos_bench/mlos_bench/tests/__init__.py @@ -93,19 +93,6 @@ def check_class_name(obj: object, expected_class_name: str) -> bool: HOST_DOCKER_NAME = "host.docker.internal" -@pytest.fixture(scope="session") -def docker_hostname() -> str: - """Returns the local hostname to use to connect to the test ssh server.""" - if sys.platform != "win32" and resolve_host_name(HOST_DOCKER_NAME): - # On Linux, if we're running in a docker container, we can use the - # --add-host (extra_hosts in docker-compose.yml) to refer to the host IP. - return HOST_DOCKER_NAME - # Docker (Desktop) for Windows (WSL2) uses a special networking magic - # to refer to the host machine as `localhost` when exposing ports. - # In all other cases, assume we're executing directly inside conda on the host. - return "localhost" - - def wait_docker_service_socket(docker_services: DockerServices, hostname: str, port: int) -> None: """Wait until a docker service is ready.""" docker_services.wait_until_responsive( diff --git a/mlos_bench/mlos_bench/tests/conftest.py b/mlos_bench/mlos_bench/tests/conftest.py index 59dfbff9d2..602aacd0f8 100644 --- a/mlos_bench/mlos_bench/tests/conftest.py +++ b/mlos_bench/mlos_bench/tests/conftest.py @@ -15,7 +15,7 @@ from pytest_docker.plugin import get_docker_services from mlos_bench.environments.mock_env import MockEnv -from mlos_bench.tests import SEED, resolve_host_name, tunable_groups_fixtures +from mlos_bench.tests import SEED, resolve_host_name, tunable_groups_fixtures, HOST_DOCKER_NAME from mlos_bench.tunables.tunable_groups import TunableGroups # pylint: disable=redefined-outer-name @@ -29,9 +29,6 @@ covariant_group = tunable_groups_fixtures.covariant_group -HOST_DOCKER_NAME = "host.docker.internal" - - @pytest.fixture(scope="session") def docker_hostname() -> str: """Returns the local hostname to use to connect to the test ssh server.""" From 9df032c55112a7f496bd9003a6704ac4d25b8930 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 5 Jun 2025 17:14:45 +0000 Subject: [PATCH 50/88] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mlos_bench/mlos_bench/tests/conftest.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/tests/conftest.py b/mlos_bench/mlos_bench/tests/conftest.py index 602aacd0f8..1b295f2855 100644 --- a/mlos_bench/mlos_bench/tests/conftest.py +++ b/mlos_bench/mlos_bench/tests/conftest.py @@ -15,7 +15,12 @@ from pytest_docker.plugin import get_docker_services from mlos_bench.environments.mock_env import MockEnv -from mlos_bench.tests import SEED, resolve_host_name, tunable_groups_fixtures, HOST_DOCKER_NAME +from mlos_bench.tests import ( + HOST_DOCKER_NAME, + SEED, + resolve_host_name, + tunable_groups_fixtures, +) from mlos_bench.tunables.tunable_groups import TunableGroups # pylint: disable=redefined-outer-name From e001d9a166d6d4d8185ac7a6549c802954eb0f37 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 13 Jun 2025 13:28:25 -0500 Subject: [PATCH 51/88] merge cleanup --- .../mlos_bench/storage/sql/alembic/README.md | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/README.md b/mlos_bench/mlos_bench/storage/sql/alembic/README.md index b100b2e07b..30bd51ab92 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/README.md +++ b/mlos_bench/mlos_bench/storage/sql/alembic/README.md @@ -142,22 +142,6 @@ This document contains some notes on how to use [`alembic`](https://alembic.sqla docker kill postgres-alembic ``` -1. Cleanup any server instances you started. - - For instance: - - ```sh - rm mlos_bench/storage/sql/mlos_bench.sqlite - ``` - - ```sh - docker kill mysql-alembic - ``` - - ```sh - docker kill postgres-alembic - ``` - 1. Merge that to the `main` branch. 1. Might be good to cut a new `mlos_bench` release at this point as well. From c46f16a2ea816965cac723020e8185c0ee2d602f Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Fri, 13 Jun 2025 13:29:15 -0500 Subject: [PATCH 52/88] unused-imnport --- mlos_bench/mlos_bench/tests/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mlos_bench/mlos_bench/tests/__init__.py b/mlos_bench/mlos_bench/tests/__init__.py index b8e39d66ec..778d89836d 100644 --- a/mlos_bench/mlos_bench/tests/__init__.py +++ b/mlos_bench/mlos_bench/tests/__init__.py @@ -12,7 +12,6 @@ import os import shutil import socket -import sys from datetime import tzinfo from logging import debug, warning from subprocess import run From 856041647455dbb44a4c38517c6d0fbebd49c83e Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 23 Jun 2025 11:53:15 +0200 Subject: [PATCH 53/88] bad merge --- mlos_bench/mlos_bench/tests/storage/sql/test_storage_schemas.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mlos_bench/mlos_bench/tests/storage/sql/test_storage_schemas.py b/mlos_bench/mlos_bench/tests/storage/sql/test_storage_schemas.py index bf7c4dabee..f1d1b4c353 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/test_storage_schemas.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/test_storage_schemas.py @@ -22,7 +22,6 @@ @pytest.mark.parametrize( "some_sql_storage_fixture", [ - lazy_fixture("mem_storage"), lazy_fixture("sqlite_storage"), *DOCKER_DBMS_FIXTURES, ], From f451e2b594b0794390a30fdf9318563ea42b92fe Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 23 Jun 2025 15:00:04 +0200 Subject: [PATCH 54/88] add a test for non-git dirs, refactor to reduce duplicate code --- mlos_bench/mlos_bench/tests/util_git_test.py | 12 ++++++++++++ mlos_bench/mlos_bench/util.py | 6 ++---- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/util_git_test.py b/mlos_bench/mlos_bench/tests/util_git_test.py index 788c06a5ea..9fe1689f53 100644 --- a/mlos_bench/mlos_bench/tests/util_git_test.py +++ b/mlos_bench/mlos_bench/tests/util_git_test.py @@ -5,6 +5,10 @@ """Unit tests for get_git_info utility function.""" import os import re +import tempfile +from subprocess import CalledProcessError + +import pytest from mlos_bench.util import get_git_info, path_join @@ -30,3 +34,11 @@ def test_get_git_info_dir() -> None: assert re.match(r"[0-9a-f]{40}", git_commit) is not None assert rel_path == "mlos_bench/mlos_bench/tests" assert abs_path == path_join(dirname, abs_path=True) + + +def test_non_git_dir() -> None: + """Check that we can handle a non-git directory.""" + with tempfile.TemporaryDirectory() as non_git_dir: + with pytest.raises(CalledProcessError): + # This should raise an error because the directory is not a git repository. + get_git_info(non_git_dir) diff --git a/mlos_bench/mlos_bench/util.py b/mlos_bench/mlos_bench/util.py index 7287ec4b6b..a52a89f795 100644 --- a/mlos_bench/mlos_bench/util.py +++ b/mlos_bench/mlos_bench/util.py @@ -324,10 +324,8 @@ def get_git_info(path: str = __file__) -> tuple[str, str, str, str]: git_commit = subprocess.check_output( ["git", "-C", dirname, "rev-parse", "HEAD"], text=True ).strip() - git_root = subprocess.check_output( - ["git", "-C", dirname, "rev-parse", "--show-toplevel"], text=True - ).strip() - _LOG.debug("Current git branch: %s %s", git_repo, git_commit) + git_root = get_git_root(path=abspath) + _LOG.debug("Current git branch for %s: %s %s", git_root, git_repo, git_commit) rel_path = os.path.relpath(abspath, os.path.abspath(git_root)) return (git_repo, git_commit, rel_path.replace("\\", "/"), abspath) From 32a18fe94386f7f10515d1b41765c3fa1b228719 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 23 Jun 2025 15:02:20 +0200 Subject: [PATCH 55/88] docs --- mlos_bench/mlos_bench/util.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/util.py b/mlos_bench/mlos_bench/util.py index a52a89f795..ef77424333 100644 --- a/mlos_bench/mlos_bench/util.py +++ b/mlos_bench/mlos_bench/util.py @@ -283,10 +283,15 @@ def get_git_root(path: str = __file__) -> str: path : str, optional Path to the file in git repository. + Raises + ------ + subprocess.CalledProcessError + If the path is not a git repository or the command fails. + Returns ------- str - _description_ + The root directory of the git repository. """ abspath = path_join(path, abs_path=True) if not os.path.exists(abspath) or not os.path.isdir(abspath): @@ -308,6 +313,11 @@ def get_git_info(path: str = __file__) -> tuple[str, str, str, str]: path : str Path to the file in git repository. + Raises + ------ + subprocess.CalledProcessError + If the path is not a git repository or the command fails. + Returns ------- (git_repo, git_commit, git_path) : tuple[str, str, str] From 19fefde384fcb040f817e29e615c7c98882d6ffa Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 23 Jun 2025 15:04:12 +0200 Subject: [PATCH 56/88] test --- mlos_bench/mlos_bench/tests/util_git_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/util_git_test.py b/mlos_bench/mlos_bench/tests/util_git_test.py index 9fe1689f53..3623a8e7f7 100644 --- a/mlos_bench/mlos_bench/tests/util_git_test.py +++ b/mlos_bench/mlos_bench/tests/util_git_test.py @@ -10,7 +10,7 @@ import pytest -from mlos_bench.util import get_git_info, path_join +from mlos_bench.util import get_git_info, get_git_root, path_join def test_get_git_info() -> None: @@ -41,4 +41,4 @@ def test_non_git_dir() -> None: with tempfile.TemporaryDirectory() as non_git_dir: with pytest.raises(CalledProcessError): # This should raise an error because the directory is not a git repository. - get_git_info(non_git_dir) + get_git_root(non_git_dir) From 40417231dbba42c506ebb1a74f9fd06ab9586e4f Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 23 Jun 2025 17:22:34 +0200 Subject: [PATCH 57/88] improvements on git info handling --- mlos_bench/mlos_bench/tests/util_git_test.py | 36 ++++++++ mlos_bench/mlos_bench/util.py | 97 ++++++++++++++++++-- 2 files changed, 123 insertions(+), 10 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/util_git_test.py b/mlos_bench/mlos_bench/tests/util_git_test.py index 3623a8e7f7..cf92bbdad3 100644 --- a/mlos_bench/mlos_bench/tests/util_git_test.py +++ b/mlos_bench/mlos_bench/tests/util_git_test.py @@ -6,10 +6,13 @@ import os import re import tempfile + +from pathlib import Path from subprocess import CalledProcessError import pytest + from mlos_bench.util import get_git_info, get_git_root, path_join @@ -42,3 +45,36 @@ def test_non_git_dir() -> None: with pytest.raises(CalledProcessError): # This should raise an error because the directory is not a git repository. get_git_root(non_git_dir) + + +def test_non_upstream_git() -> None: + """Check that we can handle a git directory without an upstream.""" + with tempfile.TemporaryDirectory() as non_upstream_git_dir: + non_upstream_git_dir = path_join(non_upstream_git_dir, abs_path=True) + # Initialize a new git repository. + os.system(f"git init {non_upstream_git_dir} -b main") + Path(non_upstream_git_dir).joinpath("README.md").touch() + os.system(f"git -C {non_upstream_git_dir} add README.md") + os.system(f"git -C {non_upstream_git_dir} commit -m 'Initial commit'") + # This should raise an error because the repository has no upstream. + (git_repo, _git_commit, rel_path, abs_path) = get_git_info(non_upstream_git_dir) + assert git_repo == f"file://{non_upstream_git_dir}" + assert abs_path == non_upstream_git_dir + assert rel_path == "." + + +@pytest.mark.skipif( + os.environ.get("GITHUB_ACTIONS") != "true", + reason="Not running in GitHub Actions CI.", +) +def test_github_actions_git_info() -> None: + """Test that get_git_info matches GitHub Actions environment variables if running in CI.""" + repo_env = os.environ.get("GITHUB_REPOSITORY") # "owner/repo" format + sha_env = os.environ.get("GITHUB_SHA") + assert repo_env, "GITHUB_REPOSITORY not set in environment." + assert sha_env, "GITHUB_SHA not set in environment." + git_repo, git_commit, _rel_path, _abs_path = get_git_info(__file__) + assert git_repo.endswith(repo_env), f"git_repo '{git_repo}' does not end with '{repo_env}'" + assert ( + git_commit == sha_env + ), f"git_commit '{git_commit}' does not match GITHUB_SHA '{sha_env}'" diff --git a/mlos_bench/mlos_bench/util.py b/mlos_bench/mlos_bench/util.py index ef77424333..0f1e3d2ef7 100644 --- a/mlos_bench/mlos_bench/util.py +++ b/mlos_bench/mlos_bench/util.py @@ -153,7 +153,7 @@ def path_join(*args: str, abs_path: bool = False) -> str: """ path = os.path.join(*args) if abs_path: - path = os.path.abspath(path) + path = os.path.realpath(path) return os.path.normpath(path).replace("\\", "/") @@ -291,7 +291,7 @@ def get_git_root(path: str = __file__) -> str: Returns ------- str - The root directory of the git repository. + The absolute path to the root directory of the git repository. """ abspath = path_join(path, abs_path=True) if not os.path.exists(abspath) or not os.path.isdir(abspath): @@ -299,9 +299,87 @@ def get_git_root(path: str = __file__) -> str: else: dirname = abspath git_root = subprocess.check_output( - ["git", "-C", dirname, "rev-parse", "--show-toplevel"], text=True + ["git", "-C", dirname, "rev-parse", "--show-toplevel"], + text=True, ).strip() - return git_root + return path_join(git_root, abs_path=True) + + +def get_git_remote_info(path: str, remote: str) -> str: + """ + Gets the remote URL for the given remote name in the git repository. + + Parameters + ---------- + path : str + Path to the file in git repository. + remote : str + The name of the remote (e.g., "origin"). + + Raises + ------ + subprocess.CalledProcessError + If the command fails or the remote does not exist. + + Returns + ------- + str + The URL of the remote repository. + """ + return subprocess.check_output( + ["git", "-C", path, "remote", "get-url", remote], text=True + ).strip() + + +def get_git_repo_info(path: str) -> str: + """ + Get the git repository URL for the given git repo. + + Tries to get the upstream branch URL, falling back to the "origin" remote + if the upstream branch is not set or does not exist. If that also fails, + it returns a file URL pointing to the local path. + + Parameters + ---------- + path : str + Path to the git repository. + + Raises + ------ + subprocess.CalledProcessError + If the command fails or the git repository does not exist. + + Returns + ------- + str + The upstream URL of the git repository. + """ + # In case "origin" remote is not set, or this branch has a different + # upstream, we should handle it gracefully. + # (e.g., fallback to the first one we find?) + path = path_join(path, abs_path=True) + cmd = ["git", "-C", path, "rev-parse", "--abbrev-ref", "--symbolic-full-name", "HEAD@{u}"] + try: + git_remote = subprocess.check_output(cmd, text=True).strip() + git_remote = git_remote.split("/", 1)[0] + git_repo = get_git_remote_info(path, git_remote) + except subprocess.CalledProcessError: + git_remote = "origin" + _LOG.warning( + "Failed to get the upstream branch for %s. Falling back to '%s' remote.", + path, + git_remote, + ) + try: + git_repo = get_git_remote_info(path, git_remote) + except subprocess.CalledProcessError: + git_repo = "file://" + path + _LOG.warning( + "Failed to get the upstream branch for %s. Falling back to '%s'.", + path, + git_repo, + ) + return git_repo def get_git_info(path: str = __file__) -> tuple[str, str, str, str]: @@ -320,21 +398,20 @@ def get_git_info(path: str = __file__) -> tuple[str, str, str, str]: Returns ------- - (git_repo, git_commit, git_path) : tuple[str, str, str] - Git repository URL, last commit hash, and relative file path and current absolute path. + (git_repo, git_commit, rel_path, abs_path) : tuple[str, str, str, str] + Git repository URL, last commit hash, and relative file path and current + absolute path. """ abspath = path_join(path, abs_path=True) if not os.path.exists(abspath) or not os.path.isdir(abspath): dirname = os.path.dirname(abspath) else: dirname = abspath - git_repo = subprocess.check_output( - ["git", "-C", dirname, "remote", "get-url", "origin"], text=True - ).strip() + git_root = get_git_root(path=abspath) + git_repo = get_git_repo_info(git_root) git_commit = subprocess.check_output( ["git", "-C", dirname, "rev-parse", "HEAD"], text=True ).strip() - git_root = get_git_root(path=abspath) _LOG.debug("Current git branch for %s: %s %s", git_root, git_repo, git_commit) rel_path = os.path.relpath(abspath, os.path.abspath(git_root)) return (git_repo, git_commit, rel_path.replace("\\", "/"), abspath) From 1757b7e3a5aef7c48af12a002a8c664e1d9f3789 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 23 Jun 2025 17:37:34 +0200 Subject: [PATCH 58/88] cleanup --- mlos_bench/mlos_bench/storage/base_storage.py | 94 ++++++++++++++----- 1 file changed, 73 insertions(+), 21 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/base_storage.py b/mlos_bench/mlos_bench/storage/base_storage.py index aec5e4d244..dd9b85675a 100644 --- a/mlos_bench/mlos_bench/storage/base_storage.py +++ b/mlos_bench/mlos_bench/storage/base_storage.py @@ -208,25 +208,7 @@ def __init__( # pylint: disable=too-many-arguments self._git_repo = git_repo self._git_commit = git_commit self._rel_root_env_config = rel_root_env_config - - # Currently we only store the relative path of the root env config - # from the git repo it came from. - git_root = git_repo - if not os.path.exists(git_root): - try: - git_root = get_git_root(os.curdir) - except CalledProcessError: - _LOG.warning( - "Failed to find a git repo in the current working directory: %s", - os.curdir, - ) - git_root = get_git_root(__file__) - - self._abs_root_env_config = path_join( - git_root, - self._rel_root_env_config, - abs_path=True, - ) + self._abs_root_env_config = self._restore_abs_root_env_path_info() _LOG.info( "Resolved relative root_config %s for experiment %s to %s", self._rel_root_env_config, @@ -248,6 +230,66 @@ def __init__( # pylint: disable=too-many-arguments self._opt_targets = opt_targets self._in_context = False + def _restore_abs_root_env_path_info(self) -> str: + abs_root_env_config = None + # Currently we only store the relative path of the root env config + # from the git repo it came from. + # This attempts to restore the absolute path to the root config + # by finding the git repo root and joining it with the relative + # path. If the git repo root cannot be found, it will try and + # find it from the current working directory, which may also + # fail, in which case we simply default to using the URL path of + # the repo. + if self._git_repo.startswith("file://"): + # If the git repo is a file URL, we need to convert it to a local path. + git_root = self._git_repo[7:] # Remove 'file://' prefix + else: + git_root = self._git_repo + if not os.path.exists(git_root): + try: + git_root = get_git_root(os.curdir) + except CalledProcessError: + _LOG.warning( + "Failed to find the git repo in the current working directory: %s", + os.curdir, + ) + git_root = None + + if git_root: + # If we have a git root, lookup its info again to make sure + # it matches the DB info. + (git_repo, git_commit, rel_path, git_root) = get_git_info(git_root) + assert rel_path == "." + if git_repo != self._git_repo: + _LOG.warning( + "Git repo %s does not match the one in the DB: %s", + git_repo, + self._git_repo, + ) + if git_commit != self._git_commit: + _LOG.warning( + "Git commit %s does not match the one in the DB: %s", + git_commit, + self._git_commit, + ) + + abs_root_env_config = path_join( + git_root, + self._rel_root_env_config, + abs_path=True, + ) + + # Fallback to the git repo URL if the absolute path + # does not exist. + if not abs_root_env_config or not os.path.exists(abs_root_env_config): + _LOG.warning( + "Root config file %s does not exist in the git repo %s", + abs_root_env_config, + git_root, + ) + abs_root_env_config = f"{self._git_repo}/{self._rel_root_env_config}" + return abs_root_env_config + def __enter__(self) -> Storage.Experiment: """ Enter the context of the experiment. @@ -336,8 +378,18 @@ def abs_root_env_config(self) -> str: """ Get the Experiment's root Environment config file path. - This returns the current absolute path to the root config for this process - instead of the path relative to the git repo root. + This attempts to return the current absolute path to the root config + for this process instead of the path relative to the git repo root. + + However, this may not always be possible if the git repo root is not + accessible, which can happen if the Experiment was restored from the + DB. + + Notes + ----- + This is mostly useful for other components (e.g., + :py:class:`~mlos_bench.schedulers.base_scheduler.Scheduler`) to use + within the same process, and not across invocations. """ return self._abs_root_env_config From 38ed63ed1395e486d4612b3b72beeb3e988caa38 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 23 Jun 2025 17:39:52 +0200 Subject: [PATCH 59/88] style --- mlos_bench/mlos_bench/storage/base_storage.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/base_storage.py b/mlos_bench/mlos_bench/storage/base_storage.py index dd9b85675a..8c48b40cdf 100644 --- a/mlos_bench/mlos_bench/storage/base_storage.py +++ b/mlos_bench/mlos_bench/storage/base_storage.py @@ -223,9 +223,7 @@ def __init__( # pylint: disable=too-many-arguments self._git_commit, self._rel_root_env_config, self._abs_root_env_config, - ) = get_git_info( - root_env_config, - ) + ) = get_git_info(root_env_config) self._description = description self._opt_targets = opt_targets self._in_context = False From 7903c41c4ab3fef3ae0e57a4e72aa27a490e7bae Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 23 Jun 2025 15:40:22 +0000 Subject: [PATCH 60/88] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mlos_bench/mlos_bench/tests/util_git_test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/util_git_test.py b/mlos_bench/mlos_bench/tests/util_git_test.py index cf92bbdad3..1a19c0a6c1 100644 --- a/mlos_bench/mlos_bench/tests/util_git_test.py +++ b/mlos_bench/mlos_bench/tests/util_git_test.py @@ -6,13 +6,11 @@ import os import re import tempfile - from pathlib import Path from subprocess import CalledProcessError import pytest - from mlos_bench.util import get_git_info, get_git_root, path_join @@ -68,7 +66,9 @@ def test_non_upstream_git() -> None: reason="Not running in GitHub Actions CI.", ) def test_github_actions_git_info() -> None: - """Test that get_git_info matches GitHub Actions environment variables if running in CI.""" + """Test that get_git_info matches GitHub Actions environment variables if running in + CI. + """ repo_env = os.environ.get("GITHUB_REPOSITORY") # "owner/repo" format sha_env = os.environ.get("GITHUB_SHA") assert repo_env, "GITHUB_REPOSITORY not set in environment." From 6838d456d8f34b66318a86063f6329fab4e9d658 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 24 Jun 2025 14:37:09 +0200 Subject: [PATCH 61/88] mypy --- mlos_bench/mlos_bench/storage/base_storage.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/base_storage.py b/mlos_bench/mlos_bench/storage/base_storage.py index 8c48b40cdf..87d0a5d54a 100644 --- a/mlos_bench/mlos_bench/storage/base_storage.py +++ b/mlos_bench/mlos_bench/storage/base_storage.py @@ -229,7 +229,6 @@ def __init__( # pylint: disable=too-many-arguments self._in_context = False def _restore_abs_root_env_path_info(self) -> str: - abs_root_env_config = None # Currently we only store the relative path of the root env config # from the git repo it came from. # This attempts to restore the absolute path to the root config @@ -238,12 +237,12 @@ def _restore_abs_root_env_path_info(self) -> str: # find it from the current working directory, which may also # fail, in which case we simply default to using the URL path of # the repo. + abs_root_env_config = None + git_root: str | None = self._git_repo if self._git_repo.startswith("file://"): # If the git repo is a file URL, we need to convert it to a local path. git_root = self._git_repo[7:] # Remove 'file://' prefix - else: - git_root = self._git_repo - if not os.path.exists(git_root): + if git_root and not os.path.exists(git_root): try: git_root = get_git_root(os.curdir) except CalledProcessError: From e9bf5f2b800dc577d8287217bace862d3d02b9ef Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 24 Jun 2025 14:39:42 +0200 Subject: [PATCH 62/88] test tweak --- mlos_bench/mlos_bench/tests/util_git_test.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mlos_bench/mlos_bench/tests/util_git_test.py b/mlos_bench/mlos_bench/tests/util_git_test.py index 1a19c0a6c1..897c5bf8af 100644 --- a/mlos_bench/mlos_bench/tests/util_git_test.py +++ b/mlos_bench/mlos_bench/tests/util_git_test.py @@ -54,6 +54,8 @@ def test_non_upstream_git() -> None: Path(non_upstream_git_dir).joinpath("README.md").touch() os.system(f"git -C {non_upstream_git_dir} add README.md") os.system(f"git -C {non_upstream_git_dir} commit -m 'Initial commit'") + os.system("git config user.email 'pytest@example.com'") + os.system("git config user.name 'PyTest User'") # This should raise an error because the repository has no upstream. (git_repo, _git_commit, rel_path, abs_path) = get_git_info(non_upstream_git_dir) assert git_repo == f"file://{non_upstream_git_dir}" From 826348414dd6e5a9bc4b3f069526717f12d238f7 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 24 Jun 2025 14:40:37 +0200 Subject: [PATCH 63/88] fixup --- mlos_bench/mlos_bench/tests/util_git_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/util_git_test.py b/mlos_bench/mlos_bench/tests/util_git_test.py index 897c5bf8af..0354bb2faa 100644 --- a/mlos_bench/mlos_bench/tests/util_git_test.py +++ b/mlos_bench/mlos_bench/tests/util_git_test.py @@ -54,8 +54,8 @@ def test_non_upstream_git() -> None: Path(non_upstream_git_dir).joinpath("README.md").touch() os.system(f"git -C {non_upstream_git_dir} add README.md") os.system(f"git -C {non_upstream_git_dir} commit -m 'Initial commit'") - os.system("git config user.email 'pytest@example.com'") - os.system("git config user.name 'PyTest User'") + os.system(f"git -C {non_upstream_git_dir} config --local user.email 'pytest@example.com'") + os.system(f"git -C {non_upstream_git_dir} config --local user.name 'PyTest User'") # This should raise an error because the repository has no upstream. (git_repo, _git_commit, rel_path, abs_path) = get_git_info(non_upstream_git_dir) assert git_repo == f"file://{non_upstream_git_dir}" From 3c10846c3e79ef146df9d25fd00483fac1d3596e Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 24 Jun 2025 14:42:11 +0200 Subject: [PATCH 64/88] refactor --- mlos_bench/mlos_bench/storage/base_storage.py | 24 +++++++++---------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/base_storage.py b/mlos_bench/mlos_bench/storage/base_storage.py index 87d0a5d54a..658c222e69 100644 --- a/mlos_bench/mlos_bench/storage/base_storage.py +++ b/mlos_bench/mlos_bench/storage/base_storage.py @@ -199,12 +199,19 @@ def __init__( # pylint: disable=too-many-arguments self._tunables = tunables.copy() self._trial_id = trial_id self._experiment_id = experiment_id - if root_env_config is None: + if root_env_config is not None: + if git_repo or git_commit or rel_root_env_config: + raise ValueError("Unexpected args: git_repo, git_commit, rel_root_env_config") + ( + self._git_repo, + self._git_commit, + self._rel_root_env_config, + self._abs_root_env_config, + ) = get_git_info(root_env_config) + else: # Restoring from DB. if not (git_repo and git_commit and rel_root_env_config): - raise ValueError( - "Missing required args: git_repo, git_commit, rel_root_env_config" - ) + raise ValueError("Missing args: git_repo, git_commit, rel_root_env_config") self._git_repo = git_repo self._git_commit = git_commit self._rel_root_env_config = rel_root_env_config @@ -215,15 +222,6 @@ def __init__( # pylint: disable=too-many-arguments self._experiment_id, self._abs_root_env_config, ) - else: - if git_repo or git_commit or rel_root_env_config: - raise ValueError("Unexpected args: git_repo, git_commit, rel_root_env_config") - ( - self._git_repo, - self._git_commit, - self._rel_root_env_config, - self._abs_root_env_config, - ) = get_git_info(root_env_config) self._description = description self._opt_targets = opt_targets self._in_context = False From 0cb5948865662776e92ceaca3f0a80a34c6a39ef Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 24 Jun 2025 14:50:09 +0200 Subject: [PATCH 65/88] fixup --- mlos_bench/mlos_bench/tests/util_git_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/util_git_test.py b/mlos_bench/mlos_bench/tests/util_git_test.py index 0354bb2faa..8985e201e6 100644 --- a/mlos_bench/mlos_bench/tests/util_git_test.py +++ b/mlos_bench/mlos_bench/tests/util_git_test.py @@ -51,11 +51,11 @@ def test_non_upstream_git() -> None: non_upstream_git_dir = path_join(non_upstream_git_dir, abs_path=True) # Initialize a new git repository. os.system(f"git init {non_upstream_git_dir} -b main") + os.system(f"git -C {non_upstream_git_dir} config --local user.email 'pytest@example.com'") + os.system(f"git -C {non_upstream_git_dir} config --local user.name 'PyTest User'") Path(non_upstream_git_dir).joinpath("README.md").touch() os.system(f"git -C {non_upstream_git_dir} add README.md") os.system(f"git -C {non_upstream_git_dir} commit -m 'Initial commit'") - os.system(f"git -C {non_upstream_git_dir} config --local user.email 'pytest@example.com'") - os.system(f"git -C {non_upstream_git_dir} config --local user.name 'PyTest User'") # This should raise an error because the repository has no upstream. (git_repo, _git_commit, rel_path, abs_path) = get_git_info(non_upstream_git_dir) assert git_repo == f"file://{non_upstream_git_dir}" From 59050ce4e80945304d471c70808aebb14312ed10 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 24 Jun 2025 14:57:15 +0200 Subject: [PATCH 66/88] start to disable the lookup logic for abs config path --- mlos_bench/mlos_bench/storage/base_storage.py | 23 ++++++++++++------- .../mlos_bench/tests/storage/sql/fixtures.py | 2 +- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/base_storage.py b/mlos_bench/mlos_bench/storage/base_storage.py index 658c222e69..53ffeac984 100644 --- a/mlos_bench/mlos_bench/storage/base_storage.py +++ b/mlos_bench/mlos_bench/storage/base_storage.py @@ -215,13 +215,20 @@ def __init__( # pylint: disable=too-many-arguments self._git_repo = git_repo self._git_commit = git_commit self._rel_root_env_config = rel_root_env_config - self._abs_root_env_config = self._restore_abs_root_env_path_info() - _LOG.info( - "Resolved relative root_config %s for experiment %s to %s", - self._rel_root_env_config, - self._experiment_id, - self._abs_root_env_config, - ) + # Note: The absolute path to the root config is not stored in the DB, + # and resolving it is not always possible, so we omit this operation for now. + # See commit 0cb5948865662776e92ceaca3f0a80a34c6a39ef in + # for prior + # implementation attempts. + self._abs_root_env_config = None # self._restore_abs_root_env_path_info() + _LOG.info( + "Resolved relative root_config %s from %s at commit %s for Experiment %s to %s", + self._rel_root_env_config, + self._git_repo, + self._git_commit, + self._experiment_id, + self._abs_root_env_config, + ) self._description = description self._opt_targets = opt_targets self._in_context = False @@ -369,7 +376,7 @@ def rel_root_env_config(self) -> str: return self._rel_root_env_config @property - def abs_root_env_config(self) -> str: + def abs_root_env_config(self) -> str | None: """ Get the Experiment's root Environment config file path. diff --git a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py index f4bc8eca92..6be568eafb 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py @@ -375,7 +375,7 @@ def _dummy_run_exp( trial_runners=trial_runners, optimizer=opt, storage=storage, - root_env_config=exp.abs_root_env_config, + root_env_config=exp.abs_root_env_config or "ERROR-UNKNOWN.jsonc", ) # Add some trial data to that experiment by "running" it. From 062fad78fe0e46b0839c490b9b10e2354f6cd43e Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 24 Jun 2025 15:23:36 +0200 Subject: [PATCH 67/88] error handling --- mlos_bench/mlos_bench/storage/base_storage.py | 26 ++++++++++++++----- 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/base_storage.py b/mlos_bench/mlos_bench/storage/base_storage.py index 53ffeac984..b14c618ef2 100644 --- a/mlos_bench/mlos_bench/storage/base_storage.py +++ b/mlos_bench/mlos_bench/storage/base_storage.py @@ -201,13 +201,27 @@ def __init__( # pylint: disable=too-many-arguments self._experiment_id = experiment_id if root_env_config is not None: if git_repo or git_commit or rel_root_env_config: + # Extra args are only used when restoring an Experiment from the DB. raise ValueError("Unexpected args: git_repo, git_commit, rel_root_env_config") - ( - self._git_repo, - self._git_commit, - self._rel_root_env_config, - self._abs_root_env_config, - ) = get_git_info(root_env_config) + try: + ( + self._git_repo, + self._git_commit, + self._rel_root_env_config, + self._abs_root_env_config, + ) = get_git_info(root_env_config) + except CalledProcessError as e: + # Note: If the root_env_config is not in a git repo, we could still use it + # as a relative path, but we won't have git metadata. + # In this case, we *could* set the git metadata to None, but + # the Experiment schema currently requires this to be set, + # so we simply throw an error for now. + _LOG.warning( + "Failed to get git info for root_env_config %s: %s", + root_env_config, + e, + ) + raise e else: # Restoring from DB. if not (git_repo and git_commit and rel_root_env_config): From 4bf6b88a65d7166e3dfb9ddbfd897aa154214d98 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 24 Jun 2025 15:23:53 +0200 Subject: [PATCH 68/88] remove resolution logic --- mlos_bench/mlos_bench/storage/base_storage.py | 67 ++----------------- 1 file changed, 6 insertions(+), 61 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/base_storage.py b/mlos_bench/mlos_bench/storage/base_storage.py index b14c618ef2..dc6b5a77b9 100644 --- a/mlos_bench/mlos_bench/storage/base_storage.py +++ b/mlos_bench/mlos_bench/storage/base_storage.py @@ -230,11 +230,15 @@ def __init__( # pylint: disable=too-many-arguments self._git_commit = git_commit self._rel_root_env_config = rel_root_env_config # Note: The absolute path to the root config is not stored in the DB, - # and resolving it is not always possible, so we omit this operation for now. + # and resolving it is not always possible, so we omit this + # operation by default for now. # See commit 0cb5948865662776e92ceaca3f0a80a34c6a39ef in # for prior # implementation attempts. - self._abs_root_env_config = None # self._restore_abs_root_env_path_info() + self._abs_root_env_config = None + assert isinstance( + self._rel_root_env_config, str + ), "Failed to get relative root config path" _LOG.info( "Resolved relative root_config %s from %s at commit %s for Experiment %s to %s", self._rel_root_env_config, @@ -247,65 +251,6 @@ def __init__( # pylint: disable=too-many-arguments self._opt_targets = opt_targets self._in_context = False - def _restore_abs_root_env_path_info(self) -> str: - # Currently we only store the relative path of the root env config - # from the git repo it came from. - # This attempts to restore the absolute path to the root config - # by finding the git repo root and joining it with the relative - # path. If the git repo root cannot be found, it will try and - # find it from the current working directory, which may also - # fail, in which case we simply default to using the URL path of - # the repo. - abs_root_env_config = None - git_root: str | None = self._git_repo - if self._git_repo.startswith("file://"): - # If the git repo is a file URL, we need to convert it to a local path. - git_root = self._git_repo[7:] # Remove 'file://' prefix - if git_root and not os.path.exists(git_root): - try: - git_root = get_git_root(os.curdir) - except CalledProcessError: - _LOG.warning( - "Failed to find the git repo in the current working directory: %s", - os.curdir, - ) - git_root = None - - if git_root: - # If we have a git root, lookup its info again to make sure - # it matches the DB info. - (git_repo, git_commit, rel_path, git_root) = get_git_info(git_root) - assert rel_path == "." - if git_repo != self._git_repo: - _LOG.warning( - "Git repo %s does not match the one in the DB: %s", - git_repo, - self._git_repo, - ) - if git_commit != self._git_commit: - _LOG.warning( - "Git commit %s does not match the one in the DB: %s", - git_commit, - self._git_commit, - ) - - abs_root_env_config = path_join( - git_root, - self._rel_root_env_config, - abs_path=True, - ) - - # Fallback to the git repo URL if the absolute path - # does not exist. - if not abs_root_env_config or not os.path.exists(abs_root_env_config): - _LOG.warning( - "Root config file %s does not exist in the git repo %s", - abs_root_env_config, - git_root, - ) - abs_root_env_config = f"{self._git_repo}/{self._rel_root_env_config}" - return abs_root_env_config - def __enter__(self) -> Storage.Experiment: """ Enter the context of the experiment. From e5374e0c5380fecd509a0c4da0e71253302cc284 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 24 Jun 2025 15:24:01 +0200 Subject: [PATCH 69/88] docs --- mlos_bench/mlos_bench/storage/base_storage.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/base_storage.py b/mlos_bench/mlos_bench/storage/base_storage.py index dc6b5a77b9..f0103ad6e6 100644 --- a/mlos_bench/mlos_bench/storage/base_storage.py +++ b/mlos_bench/mlos_bench/storage/base_storage.py @@ -255,6 +255,8 @@ def __enter__(self) -> Storage.Experiment: """ Enter the context of the experiment. + Notes + ----- Override the `_setup` method to add custom context initialization. """ _LOG.debug("Starting experiment: %s", self) @@ -272,6 +274,8 @@ def __exit__( """ End the context of the experiment. + Notes + ----- Override the `_teardown` method to add custom context teardown logic. """ is_ok = exc_val is None @@ -297,14 +301,14 @@ def _setup(self) -> None: Create a record of the new experiment or find an existing one in the storage. - This method is called by `Storage.Experiment.__enter__()`. + This method is called by :py:class:`.Storage.Experiment.__enter__()`. """ def _teardown(self, is_ok: bool) -> None: """ Finalize the experiment in the storage. - This method is called by `Storage.Experiment.__exit__()`. + This method is called by :py:class:`.Storage.Experiment.__exit__()`. Parameters ---------- @@ -337,7 +341,7 @@ def rel_root_env_config(self) -> str: @property def abs_root_env_config(self) -> str | None: """ - Get the Experiment's root Environment config file path. + Get the Experiment's root Environment config absolute file path. This attempts to return the current absolute path to the root config for this process instead of the path relative to the git repo root. @@ -352,6 +356,9 @@ def abs_root_env_config(self) -> str | None: :py:class:`~mlos_bench.schedulers.base_scheduler.Scheduler`) to use within the same process, and not across invocations. """ + # TODO: In the future, we can consider fetching the git_repo to a + # standard working directory for ``mlos_bench`` and then resolving + # the root config path from there based on the relative path. return self._abs_root_env_config @property From 81dcea045e656891b4ab9e88b9255ea0262c58d2 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 24 Jun 2025 15:24:52 +0200 Subject: [PATCH 70/88] imports --- mlos_bench/mlos_bench/storage/base_storage.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/base_storage.py b/mlos_bench/mlos_bench/storage/base_storage.py index f0103ad6e6..0cf8027aaa 100644 --- a/mlos_bench/mlos_bench/storage/base_storage.py +++ b/mlos_bench/mlos_bench/storage/base_storage.py @@ -25,7 +25,6 @@ from __future__ import annotations import logging -import os from abc import ABCMeta, abstractmethod from collections.abc import Iterator, Mapping from contextlib import AbstractContextManager as ContextManager @@ -40,7 +39,7 @@ from mlos_bench.services.base_service import Service from mlos_bench.storage.base_experiment_data import ExperimentData from mlos_bench.tunables.tunable_groups import TunableGroups -from mlos_bench.util import get_git_info, get_git_root, path_join +from mlos_bench.util import get_git_info _LOG = logging.getLogger(__name__) From 948c3d53e11758b9fc52a8c21b942fb86aa558f0 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 24 Jun 2025 15:26:34 +0200 Subject: [PATCH 71/88] mypy --- mlos_bench/mlos_bench/storage/base_storage.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mlos_bench/mlos_bench/storage/base_storage.py b/mlos_bench/mlos_bench/storage/base_storage.py index 0cf8027aaa..658e2e74e8 100644 --- a/mlos_bench/mlos_bench/storage/base_storage.py +++ b/mlos_bench/mlos_bench/storage/base_storage.py @@ -198,6 +198,7 @@ def __init__( # pylint: disable=too-many-arguments self._tunables = tunables.copy() self._trial_id = trial_id self._experiment_id = experiment_id + self._abs_root_env_config: str | None if root_env_config is not None: if git_repo or git_commit or rel_root_env_config: # Extra args are only used when restoring an Experiment from the DB. From 717930132b08cf01411591ea01df27d03a9d0bea Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 24 Jun 2025 15:29:30 +0200 Subject: [PATCH 72/88] cleanup --- mlos_bench/mlos_bench/storage/base_storage.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/base_storage.py b/mlos_bench/mlos_bench/storage/base_storage.py index 658e2e74e8..c67ab44db2 100644 --- a/mlos_bench/mlos_bench/storage/base_storage.py +++ b/mlos_bench/mlos_bench/storage/base_storage.py @@ -211,11 +211,9 @@ def __init__( # pylint: disable=too-many-arguments self._abs_root_env_config, ) = get_git_info(root_env_config) except CalledProcessError as e: - # Note: If the root_env_config is not in a git repo, we could still use it - # as a relative path, but we won't have git metadata. - # In this case, we *could* set the git metadata to None, but - # the Experiment schema currently requires this to be set, - # so we simply throw an error for now. + # Note: currently the Experiment schema requires git + # metadata to be set. We *could* set the git metadata to + # dummy values, but for now we just throw an error. _LOG.warning( "Failed to get git info for root_env_config %s: %s", root_env_config, From 43c5887c5a6b603d4990871ca6da1fd400b80305 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 24 Jun 2025 15:32:18 +0200 Subject: [PATCH 73/88] notes --- mlos_bench/mlos_bench/storage/base_storage.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/storage/base_storage.py b/mlos_bench/mlos_bench/storage/base_storage.py index c67ab44db2..6a6fd12914 100644 --- a/mlos_bench/mlos_bench/storage/base_storage.py +++ b/mlos_bench/mlos_bench/storage/base_storage.py @@ -346,7 +346,8 @@ def abs_root_env_config(self) -> str | None: However, this may not always be possible if the git repo root is not accessible, which can happen if the Experiment was restored from the - DB. + DB, but the process was started from a different working directory, + for instance. Notes ----- From 82c6b8b9f216af90ea894c7d13e8a3b8a773f465 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 24 Jun 2025 15:35:30 +0200 Subject: [PATCH 74/88] rename --- mlos_bench/mlos_bench/storage/base_storage.py | 16 ++++++++-------- mlos_bench/mlos_bench/storage/sql/experiment.py | 6 +++--- mlos_bench/mlos_bench/storage/sql/storage.py | 2 +- .../mlos_bench/tests/storage/exp_data_test.py | 2 +- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/base_storage.py b/mlos_bench/mlos_bench/storage/base_storage.py index 6a6fd12914..3965c1404f 100644 --- a/mlos_bench/mlos_bench/storage/base_storage.py +++ b/mlos_bench/mlos_bench/storage/base_storage.py @@ -193,21 +193,21 @@ def __init__( # pylint: disable=too-many-arguments opt_targets: dict[str, Literal["min", "max"]], git_repo: str | None = None, git_commit: str | None = None, - rel_root_env_config: str | None = None, + git_rel_root_env_config: str | None = None, ): self._tunables = tunables.copy() self._trial_id = trial_id self._experiment_id = experiment_id self._abs_root_env_config: str | None if root_env_config is not None: - if git_repo or git_commit or rel_root_env_config: + if git_repo or git_commit or git_rel_root_env_config: # Extra args are only used when restoring an Experiment from the DB. raise ValueError("Unexpected args: git_repo, git_commit, rel_root_env_config") try: ( self._git_repo, self._git_commit, - self._rel_root_env_config, + self._git_rel_root_env_config, self._abs_root_env_config, ) = get_git_info(root_env_config) except CalledProcessError as e: @@ -222,11 +222,11 @@ def __init__( # pylint: disable=too-many-arguments raise e else: # Restoring from DB. - if not (git_repo and git_commit and rel_root_env_config): + if not (git_repo and git_commit and git_rel_root_env_config): raise ValueError("Missing args: git_repo, git_commit, rel_root_env_config") self._git_repo = git_repo self._git_commit = git_commit - self._rel_root_env_config = rel_root_env_config + self._git_rel_root_env_config = git_rel_root_env_config # Note: The absolute path to the root config is not stored in the DB, # and resolving it is not always possible, so we omit this # operation by default for now. @@ -235,11 +235,11 @@ def __init__( # pylint: disable=too-many-arguments # implementation attempts. self._abs_root_env_config = None assert isinstance( - self._rel_root_env_config, str + self._git_rel_root_env_config, str ), "Failed to get relative root config path" _LOG.info( "Resolved relative root_config %s from %s at commit %s for Experiment %s to %s", - self._rel_root_env_config, + self._git_rel_root_env_config, self._git_repo, self._git_commit, self._experiment_id, @@ -334,7 +334,7 @@ def rel_root_env_config(self) -> str: """Get the Experiment's root Environment config's relative file path to the git repo root. """ - return self._rel_root_env_config + return self._git_rel_root_env_config @property def abs_root_env_config(self) -> str | None: diff --git a/mlos_bench/mlos_bench/storage/sql/experiment.py b/mlos_bench/mlos_bench/storage/sql/experiment.py index d15cc15302..ea8c857495 100644 --- a/mlos_bench/mlos_bench/storage/sql/experiment.py +++ b/mlos_bench/mlos_bench/storage/sql/experiment.py @@ -43,7 +43,7 @@ def __init__( # pylint: disable=too-many-arguments opt_targets: dict[str, Literal["min", "max"]], git_repo: str | None = None, git_commit: str | None = None, - rel_root_env_config: str | None = None, + git_rel_root_env_config: str | None = None, ): super().__init__( tunables=tunables, @@ -54,7 +54,7 @@ def __init__( # pylint: disable=too-many-arguments opt_targets=opt_targets, git_repo=git_repo, git_commit=git_commit, - rel_root_env_config=rel_root_env_config, + git_rel_root_env_config=git_rel_root_env_config, ) self._engine = engine self._schema = schema @@ -95,7 +95,7 @@ def _setup(self) -> None: description=self._description, git_repo=self._git_repo, git_commit=self._git_commit, - root_env_config=self._rel_root_env_config, + root_env_config=self._git_rel_root_env_config, ) ) conn.execute( diff --git a/mlos_bench/mlos_bench/storage/sql/storage.py b/mlos_bench/mlos_bench/storage/sql/storage.py index 301e175b3b..383f4de146 100644 --- a/mlos_bench/mlos_bench/storage/sql/storage.py +++ b/mlos_bench/mlos_bench/storage/sql/storage.py @@ -162,7 +162,7 @@ def get_experiment_by_id( opt_targets=opt_targets, git_repo=exp.git_repo, git_commit=exp.git_commit, - rel_root_env_config=exp.root_env_config, + git_rel_root_env_config=exp.root_env_config, ) def experiment( # pylint: disable=too-many-arguments diff --git a/mlos_bench/mlos_bench/tests/storage/exp_data_test.py b/mlos_bench/mlos_bench/tests/storage/exp_data_test.py index b8ca6a64b9..310f22d98d 100644 --- a/mlos_bench/mlos_bench/tests/storage/exp_data_test.py +++ b/mlos_bench/mlos_bench/tests/storage/exp_data_test.py @@ -30,7 +30,7 @@ def test_exp_data_root_env_config( """Tests the root_env_config property of ExperimentData.""" # pylint: disable=protected-access assert exp_data.root_env_config == ( - exp_storage._rel_root_env_config, + exp_storage._git_rel_root_env_config, exp_storage._git_repo, exp_storage._git_commit, ) From a1eb6de19cbf9cd591e37b84acbe9900e4057071 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 24 Jun 2025 15:38:10 +0200 Subject: [PATCH 75/88] Fixups and improvements to git info retrieval --- mlos_bench/mlos_bench/tests/util_git_test.py | 72 +++++++++- mlos_bench/mlos_bench/util.py | 142 +++++++++++++++++-- 2 files changed, 197 insertions(+), 17 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/util_git_test.py b/mlos_bench/mlos_bench/tests/util_git_test.py index 77fd2779c7..8985e201e6 100644 --- a/mlos_bench/mlos_bench/tests/util_git_test.py +++ b/mlos_bench/mlos_bench/tests/util_git_test.py @@ -3,14 +3,80 @@ # Licensed under the MIT License. # """Unit tests for get_git_info utility function.""" +import os import re +import tempfile +from pathlib import Path +from subprocess import CalledProcessError -from mlos_bench.util import get_git_info +import pytest + +from mlos_bench.util import get_git_info, get_git_root, path_join def test_get_git_info() -> None: - """Check that we can retrieve git info about the current repository correctly.""" - (git_repo, git_commit, rel_path) = get_git_info(__file__) + """Check that we can retrieve git info about the current repository correctly from a + file. + """ + (git_repo, git_commit, rel_path, abs_path) = get_git_info(__file__) assert "mlos" in git_repo.lower() assert re.match(r"[0-9a-f]{40}", git_commit) is not None assert rel_path == "mlos_bench/mlos_bench/tests/util_git_test.py" + assert abs_path == path_join(__file__, abs_path=True) + + +def test_get_git_info_dir() -> None: + """Check that we can retrieve git info about the current repository correctly from a + directory. + """ + dirname = os.path.dirname(__file__) + (git_repo, git_commit, rel_path, abs_path) = get_git_info(dirname) + assert "mlos" in git_repo.lower() + assert re.match(r"[0-9a-f]{40}", git_commit) is not None + assert rel_path == "mlos_bench/mlos_bench/tests" + assert abs_path == path_join(dirname, abs_path=True) + + +def test_non_git_dir() -> None: + """Check that we can handle a non-git directory.""" + with tempfile.TemporaryDirectory() as non_git_dir: + with pytest.raises(CalledProcessError): + # This should raise an error because the directory is not a git repository. + get_git_root(non_git_dir) + + +def test_non_upstream_git() -> None: + """Check that we can handle a git directory without an upstream.""" + with tempfile.TemporaryDirectory() as non_upstream_git_dir: + non_upstream_git_dir = path_join(non_upstream_git_dir, abs_path=True) + # Initialize a new git repository. + os.system(f"git init {non_upstream_git_dir} -b main") + os.system(f"git -C {non_upstream_git_dir} config --local user.email 'pytest@example.com'") + os.system(f"git -C {non_upstream_git_dir} config --local user.name 'PyTest User'") + Path(non_upstream_git_dir).joinpath("README.md").touch() + os.system(f"git -C {non_upstream_git_dir} add README.md") + os.system(f"git -C {non_upstream_git_dir} commit -m 'Initial commit'") + # This should raise an error because the repository has no upstream. + (git_repo, _git_commit, rel_path, abs_path) = get_git_info(non_upstream_git_dir) + assert git_repo == f"file://{non_upstream_git_dir}" + assert abs_path == non_upstream_git_dir + assert rel_path == "." + + +@pytest.mark.skipif( + os.environ.get("GITHUB_ACTIONS") != "true", + reason="Not running in GitHub Actions CI.", +) +def test_github_actions_git_info() -> None: + """Test that get_git_info matches GitHub Actions environment variables if running in + CI. + """ + repo_env = os.environ.get("GITHUB_REPOSITORY") # "owner/repo" format + sha_env = os.environ.get("GITHUB_SHA") + assert repo_env, "GITHUB_REPOSITORY not set in environment." + assert sha_env, "GITHUB_SHA not set in environment." + git_repo, git_commit, _rel_path, _abs_path = get_git_info(__file__) + assert git_repo.endswith(repo_env), f"git_repo '{git_repo}' does not end with '{repo_env}'" + assert ( + git_commit == sha_env + ), f"git_commit '{git_commit}' does not match GITHUB_SHA '{sha_env}'" diff --git a/mlos_bench/mlos_bench/util.py b/mlos_bench/mlos_bench/util.py index 7cf9bc640c..0f1e3d2ef7 100644 --- a/mlos_bench/mlos_bench/util.py +++ b/mlos_bench/mlos_bench/util.py @@ -153,7 +153,7 @@ def path_join(*args: str, abs_path: bool = False) -> str: """ path = os.path.join(*args) if abs_path: - path = os.path.abspath(path) + path = os.path.realpath(path) return os.path.normpath(path).replace("\\", "/") @@ -274,33 +274,147 @@ def check_required_params(config: Mapping[str, Any], required_params: Iterable[s ) -def get_git_info(path: str = __file__) -> tuple[str, str, str]: +def get_git_root(path: str = __file__) -> str: """ - Get the git repository, commit hash, and local path of the given file. + Get the root dir of the git repository. + + Parameters + ---------- + path : str, optional + Path to the file in git repository. + + Raises + ------ + subprocess.CalledProcessError + If the path is not a git repository or the command fails. + + Returns + ------- + str + The absolute path to the root directory of the git repository. + """ + abspath = path_join(path, abs_path=True) + if not os.path.exists(abspath) or not os.path.isdir(abspath): + dirname = os.path.dirname(abspath) + else: + dirname = abspath + git_root = subprocess.check_output( + ["git", "-C", dirname, "rev-parse", "--show-toplevel"], + text=True, + ).strip() + return path_join(git_root, abs_path=True) + + +def get_git_remote_info(path: str, remote: str) -> str: + """ + Gets the remote URL for the given remote name in the git repository. Parameters ---------- path : str Path to the file in git repository. + remote : str + The name of the remote (e.g., "origin"). + + Raises + ------ + subprocess.CalledProcessError + If the command fails or the remote does not exist. Returns ------- - (git_repo, git_commit, git_path) : tuple[str, str, str] - Git repository URL, last commit hash, and relative file path. + str + The URL of the remote repository. """ - dirname = os.path.dirname(path) - git_repo = subprocess.check_output( - ["git", "-C", dirname, "remote", "get-url", "origin"], text=True + return subprocess.check_output( + ["git", "-C", path, "remote", "get-url", remote], text=True ).strip() + + +def get_git_repo_info(path: str) -> str: + """ + Get the git repository URL for the given git repo. + + Tries to get the upstream branch URL, falling back to the "origin" remote + if the upstream branch is not set or does not exist. If that also fails, + it returns a file URL pointing to the local path. + + Parameters + ---------- + path : str + Path to the git repository. + + Raises + ------ + subprocess.CalledProcessError + If the command fails or the git repository does not exist. + + Returns + ------- + str + The upstream URL of the git repository. + """ + # In case "origin" remote is not set, or this branch has a different + # upstream, we should handle it gracefully. + # (e.g., fallback to the first one we find?) + path = path_join(path, abs_path=True) + cmd = ["git", "-C", path, "rev-parse", "--abbrev-ref", "--symbolic-full-name", "HEAD@{u}"] + try: + git_remote = subprocess.check_output(cmd, text=True).strip() + git_remote = git_remote.split("/", 1)[0] + git_repo = get_git_remote_info(path, git_remote) + except subprocess.CalledProcessError: + git_remote = "origin" + _LOG.warning( + "Failed to get the upstream branch for %s. Falling back to '%s' remote.", + path, + git_remote, + ) + try: + git_repo = get_git_remote_info(path, git_remote) + except subprocess.CalledProcessError: + git_repo = "file://" + path + _LOG.warning( + "Failed to get the upstream branch for %s. Falling back to '%s'.", + path, + git_repo, + ) + return git_repo + + +def get_git_info(path: str = __file__) -> tuple[str, str, str, str]: + """ + Get the git repository, commit hash, and local path of the given file. + + Parameters + ---------- + path : str + Path to the file in git repository. + + Raises + ------ + subprocess.CalledProcessError + If the path is not a git repository or the command fails. + + Returns + ------- + (git_repo, git_commit, rel_path, abs_path) : tuple[str, str, str, str] + Git repository URL, last commit hash, and relative file path and current + absolute path. + """ + abspath = path_join(path, abs_path=True) + if not os.path.exists(abspath) or not os.path.isdir(abspath): + dirname = os.path.dirname(abspath) + else: + dirname = abspath + git_root = get_git_root(path=abspath) + git_repo = get_git_repo_info(git_root) git_commit = subprocess.check_output( ["git", "-C", dirname, "rev-parse", "HEAD"], text=True ).strip() - git_root = subprocess.check_output( - ["git", "-C", dirname, "rev-parse", "--show-toplevel"], text=True - ).strip() - _LOG.debug("Current git branch: %s %s", git_repo, git_commit) - rel_path = os.path.relpath(os.path.abspath(path), os.path.abspath(git_root)) - return (git_repo, git_commit, rel_path.replace("\\", "/")) + _LOG.debug("Current git branch for %s: %s %s", git_root, git_repo, git_commit) + rel_path = os.path.relpath(abspath, os.path.abspath(git_root)) + return (git_repo, git_commit, rel_path.replace("\\", "/"), abspath) # Note: to avoid circular imports, we don't specify TunableValue here. From d0f8e74ea696968cb8a6d544088d082dde44165e Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 24 Jun 2025 15:50:36 +0200 Subject: [PATCH 76/88] comments --- mlos_bench/mlos_bench/tests/util_git_test.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/util_git_test.py b/mlos_bench/mlos_bench/tests/util_git_test.py index 8985e201e6..6b199a5edd 100644 --- a/mlos_bench/mlos_bench/tests/util_git_test.py +++ b/mlos_bench/mlos_bench/tests/util_git_test.py @@ -68,9 +68,24 @@ def test_non_upstream_git() -> None: reason="Not running in GitHub Actions CI.", ) def test_github_actions_git_info() -> None: - """Test that get_git_info matches GitHub Actions environment variables if running in - CI. """ + Test that get_git_info matches GitHub Actions environment variables if + running in CI. + + Examples + -------- + Test locally with the following command: + + .. code-block:: shell + + export GITHUB_ACTIONS=true + GITHUB_SHA=$(git rev-parse HEAD) + # GITHUB_REPOSITORY should be in "owner/repo" format. + # e.g., GITHUB_REPOSITORY="bpkroth/MLOS" or "microsoft/MLOS" + GITHUB_REPOSITORY=$(git rev-parse --abbrev-ref --symbolic-full-name HEAD@{u} | cut -d/ -f1 | xargs git remote get-url | grep https://github.com | cut -d/ -f4-) + pytest -n0 mlos_bench/mlos_bench/tests/util_git_test.py + ``` + """ # pylint: disable=line-too-long # noqa: E501 repo_env = os.environ.get("GITHUB_REPOSITORY") # "owner/repo" format sha_env = os.environ.get("GITHUB_SHA") assert repo_env, "GITHUB_REPOSITORY not set in environment." From f7d91a74a76953e99075797d4ca5ad5a8ef6f282 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 24 Jun 2025 15:53:09 +0200 Subject: [PATCH 77/88] tweaks --- mlos_bench/mlos_bench/storage/base_storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/storage/base_storage.py b/mlos_bench/mlos_bench/storage/base_storage.py index 75a84bf0b2..de81bb94d7 100644 --- a/mlos_bench/mlos_bench/storage/base_storage.py +++ b/mlos_bench/mlos_bench/storage/base_storage.py @@ -194,7 +194,7 @@ def __init__( # pylint: disable=too-many-arguments self._tunables = tunables.copy() self._trial_id = trial_id self._experiment_id = experiment_id - (self._git_repo, self._git_commit, self._root_env_config) = get_git_info( + (self._git_repo, self._git_commit, self._root_env_config, _future_pr) = get_git_info( root_env_config ) self._description = description From 739c3cec72a552340436708650f8bde82867c2f9 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 24 Jun 2025 13:53:36 +0000 Subject: [PATCH 78/88] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mlos_bench/mlos_bench/tests/util_git_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/util_git_test.py b/mlos_bench/mlos_bench/tests/util_git_test.py index 6b199a5edd..2d215a4332 100644 --- a/mlos_bench/mlos_bench/tests/util_git_test.py +++ b/mlos_bench/mlos_bench/tests/util_git_test.py @@ -69,8 +69,8 @@ def test_non_upstream_git() -> None: ) def test_github_actions_git_info() -> None: """ - Test that get_git_info matches GitHub Actions environment variables if - running in CI. + Test that get_git_info matches GitHub Actions environment variables if running in + CI. Examples -------- From a0dd7b70c35e807964c803874c19e98fce1a9b39 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 24 Jun 2025 16:04:02 +0200 Subject: [PATCH 79/88] tweak --- mlos_bench/mlos_bench/tests/util_git_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/tests/util_git_test.py b/mlos_bench/mlos_bench/tests/util_git_test.py index 2d215a4332..864231baa9 100644 --- a/mlos_bench/mlos_bench/tests/util_git_test.py +++ b/mlos_bench/mlos_bench/tests/util_git_test.py @@ -84,7 +84,7 @@ def test_github_actions_git_info() -> None: # e.g., GITHUB_REPOSITORY="bpkroth/MLOS" or "microsoft/MLOS" GITHUB_REPOSITORY=$(git rev-parse --abbrev-ref --symbolic-full-name HEAD@{u} | cut -d/ -f1 | xargs git remote get-url | grep https://github.com | cut -d/ -f4-) pytest -n0 mlos_bench/mlos_bench/tests/util_git_test.py - ``` + """ # pylint: disable=line-too-long # noqa: E501 repo_env = os.environ.get("GITHUB_REPOSITORY") # "owner/repo" format sha_env = os.environ.get("GITHUB_SHA") From 942d57baa693cfccbb831c68f13707e81aebbc19 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 24 Jun 2025 14:04:31 +0000 Subject: [PATCH 80/88] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mlos_bench/mlos_bench/tests/util_git_test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mlos_bench/mlos_bench/tests/util_git_test.py b/mlos_bench/mlos_bench/tests/util_git_test.py index 864231baa9..6990ca6abb 100644 --- a/mlos_bench/mlos_bench/tests/util_git_test.py +++ b/mlos_bench/mlos_bench/tests/util_git_test.py @@ -84,7 +84,6 @@ def test_github_actions_git_info() -> None: # e.g., GITHUB_REPOSITORY="bpkroth/MLOS" or "microsoft/MLOS" GITHUB_REPOSITORY=$(git rev-parse --abbrev-ref --symbolic-full-name HEAD@{u} | cut -d/ -f1 | xargs git remote get-url | grep https://github.com | cut -d/ -f4-) pytest -n0 mlos_bench/mlos_bench/tests/util_git_test.py - """ # pylint: disable=line-too-long # noqa: E501 repo_env = os.environ.get("GITHUB_REPOSITORY") # "owner/repo" format sha_env = os.environ.get("GITHUB_SHA") From 6d563a8c5e53e4725893d821d60784e310642625 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 24 Jun 2025 19:21:33 +0200 Subject: [PATCH 81/88] refactor --- mlos_bench/mlos_bench/tests/util_git_test.py | 24 ++++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/util_git_test.py b/mlos_bench/mlos_bench/tests/util_git_test.py index 6990ca6abb..06cfdba8f0 100644 --- a/mlos_bench/mlos_bench/tests/util_git_test.py +++ b/mlos_bench/mlos_bench/tests/util_git_test.py @@ -7,7 +7,7 @@ import re import tempfile from pathlib import Path -from subprocess import CalledProcessError +from subprocess import CalledProcessError, check_call as run import pytest @@ -47,19 +47,19 @@ def test_non_git_dir() -> None: def test_non_upstream_git() -> None: """Check that we can handle a git directory without an upstream.""" - with tempfile.TemporaryDirectory() as non_upstream_git_dir: - non_upstream_git_dir = path_join(non_upstream_git_dir, abs_path=True) + with tempfile.TemporaryDirectory() as local_git_dir: + local_git_dir = path_join(local_git_dir, abs_path=True) # Initialize a new git repository. - os.system(f"git init {non_upstream_git_dir} -b main") - os.system(f"git -C {non_upstream_git_dir} config --local user.email 'pytest@example.com'") - os.system(f"git -C {non_upstream_git_dir} config --local user.name 'PyTest User'") - Path(non_upstream_git_dir).joinpath("README.md").touch() - os.system(f"git -C {non_upstream_git_dir} add README.md") - os.system(f"git -C {non_upstream_git_dir} commit -m 'Initial commit'") + run(["git", "init", local_git_dir, "-b", "main"]) + run(["git", "-C", local_git_dir, "config", "--local", "user.email", "pytest@example.com"]) + run(["git", "-C", local_git_dir, "config", "--local", "user.name", "PyTest User"]) + Path(local_git_dir).joinpath("README.md").touch() + run(["git", "-C", local_git_dir, "add", "README.md"]) + run(["git", "-C", local_git_dir, "commit", "-m", "Initial commit"]) # This should raise an error because the repository has no upstream. - (git_repo, _git_commit, rel_path, abs_path) = get_git_info(non_upstream_git_dir) - assert git_repo == f"file://{non_upstream_git_dir}" - assert abs_path == non_upstream_git_dir + (git_repo, _git_commit, rel_path, abs_path) = get_git_info(local_git_dir) + assert git_repo == f"file://{local_git_dir}" + assert abs_path == local_git_dir assert rel_path == "." From 4e33efb34424c21aef93a4943341020bd9d172b8 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 24 Jun 2025 17:22:06 +0000 Subject: [PATCH 82/88] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mlos_bench/mlos_bench/tests/util_git_test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/tests/util_git_test.py b/mlos_bench/mlos_bench/tests/util_git_test.py index 06cfdba8f0..f532a1feca 100644 --- a/mlos_bench/mlos_bench/tests/util_git_test.py +++ b/mlos_bench/mlos_bench/tests/util_git_test.py @@ -7,7 +7,8 @@ import re import tempfile from pathlib import Path -from subprocess import CalledProcessError, check_call as run +from subprocess import CalledProcessError +from subprocess import check_call as run import pytest From 5f13ff9d27608f9c81baa28f656b9e578ea0cd26 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 24 Jun 2025 17:23:52 +0000 Subject: [PATCH 83/88] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mlos_bench/mlos_bench/tests/util_git_test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/tests/util_git_test.py b/mlos_bench/mlos_bench/tests/util_git_test.py index 06cfdba8f0..f532a1feca 100644 --- a/mlos_bench/mlos_bench/tests/util_git_test.py +++ b/mlos_bench/mlos_bench/tests/util_git_test.py @@ -7,7 +7,8 @@ import re import tempfile from pathlib import Path -from subprocess import CalledProcessError, check_call as run +from subprocess import CalledProcessError +from subprocess import check_call as run import pytest From 8f8f0bb4c01e16d8a7e5845d231b1a0323601483 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 24 Jun 2025 19:27:57 +0200 Subject: [PATCH 84/88] comment --- mlos_bench/mlos_bench/tests/util_git_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/tests/util_git_test.py b/mlos_bench/mlos_bench/tests/util_git_test.py index f532a1feca..1956c501e5 100644 --- a/mlos_bench/mlos_bench/tests/util_git_test.py +++ b/mlos_bench/mlos_bench/tests/util_git_test.py @@ -57,7 +57,7 @@ def test_non_upstream_git() -> None: Path(local_git_dir).joinpath("README.md").touch() run(["git", "-C", local_git_dir, "add", "README.md"]) run(["git", "-C", local_git_dir, "commit", "-m", "Initial commit"]) - # This should raise an error because the repository has no upstream. + # This should have slightly different behavior when there is no upstream. (git_repo, _git_commit, rel_path, abs_path) = get_git_info(local_git_dir) assert git_repo == f"file://{local_git_dir}" assert abs_path == local_git_dir From 2257e17e9b9b6c7e526d83d3799566b240db72a7 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 24 Jun 2025 19:30:13 +0200 Subject: [PATCH 85/88] format --- mlos_bench/mlos_bench/util.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mlos_bench/mlos_bench/util.py b/mlos_bench/mlos_bench/util.py index 0f1e3d2ef7..4d42100b8c 100644 --- a/mlos_bench/mlos_bench/util.py +++ b/mlos_bench/mlos_bench/util.py @@ -299,8 +299,7 @@ def get_git_root(path: str = __file__) -> str: else: dirname = abspath git_root = subprocess.check_output( - ["git", "-C", dirname, "rev-parse", "--show-toplevel"], - text=True, + ["git", "-C", dirname, "rev-parse", "--show-toplevel"], text=True ).strip() return path_join(git_root, abs_path=True) From 01e844a484851000dd8e1e40ad397c33cdce1aac Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 24 Jun 2025 19:33:44 +0200 Subject: [PATCH 86/88] comments --- mlos_bench/mlos_bench/util.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mlos_bench/mlos_bench/util.py b/mlos_bench/mlos_bench/util.py index 4d42100b8c..945143defc 100644 --- a/mlos_bench/mlos_bench/util.py +++ b/mlos_bench/mlos_bench/util.py @@ -413,9 +413,13 @@ def get_git_info(path: str = __file__) -> tuple[str, str, str, str]: ).strip() _LOG.debug("Current git branch for %s: %s %s", git_root, git_repo, git_commit) rel_path = os.path.relpath(abspath, os.path.abspath(git_root)) + # TODO: return the branch too? return (git_repo, git_commit, rel_path.replace("\\", "/"), abspath) +# TODO: Add support for checking out the branch locally. + + # Note: to avoid circular imports, we don't specify TunableValue here. def try_parse_val(val: str | None) -> int | float | str | None: """ From 0357849867717d07a289f686ae674c8cb4940471 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 24 Jun 2025 20:18:34 +0200 Subject: [PATCH 87/88] fixup --- mlos_bench/mlos_bench/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/util.py b/mlos_bench/mlos_bench/util.py index 945143defc..006e0e0a54 100644 --- a/mlos_bench/mlos_bench/util.py +++ b/mlos_bench/mlos_bench/util.py @@ -280,7 +280,7 @@ def get_git_root(path: str = __file__) -> str: Parameters ---------- - path : str, optional + path : Optional[str] Path to the file in git repository. Raises From c7245a2fbebf28d0192e4f73ff3b4fc7225498c3 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 24 Jun 2025 20:23:36 +0200 Subject: [PATCH 88/88] fix docs --- mlos_bench/mlos_bench/tests/util_git_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/util_git_test.py b/mlos_bench/mlos_bench/tests/util_git_test.py index 1956c501e5..88fae7c7f4 100644 --- a/mlos_bench/mlos_bench/tests/util_git_test.py +++ b/mlos_bench/mlos_bench/tests/util_git_test.py @@ -80,10 +80,10 @@ def test_github_actions_git_info() -> None: .. code-block:: shell export GITHUB_ACTIONS=true - GITHUB_SHA=$(git rev-parse HEAD) + export GITHUB_SHA=$(git rev-parse HEAD) # GITHUB_REPOSITORY should be in "owner/repo" format. # e.g., GITHUB_REPOSITORY="bpkroth/MLOS" or "microsoft/MLOS" - GITHUB_REPOSITORY=$(git rev-parse --abbrev-ref --symbolic-full-name HEAD@{u} | cut -d/ -f1 | xargs git remote get-url | grep https://github.com | cut -d/ -f4-) + export GITHUB_REPOSITORY=$(git rev-parse --abbrev-ref --symbolic-full-name HEAD@{u} | cut -d/ -f1 | xargs git remote get-url | grep https://github.com | cut -d/ -f4-) pytest -n0 mlos_bench/mlos_bench/tests/util_git_test.py """ # pylint: disable=line-too-long # noqa: E501 repo_env = os.environ.get("GITHUB_REPOSITORY") # "owner/repo" format