Skip to content

Commit 54536b4

Browse files
fix: mark review comments
Signed-off-by: Alon Kellner <[email protected]>
1 parent c727dbc commit 54536b4

File tree

11 files changed

+630
-761
lines changed

11 files changed

+630
-761
lines changed

src/guidellm/__main__.py

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525

2626
import asyncio
2727
import codecs
28+
import json
2829
from pathlib import Path
2930

3031
import click
@@ -383,19 +384,40 @@ def benchmark():
383384
help="Maximum global error rate across all benchmarks.",
384385
)
385386
@click.option(
386-
"--stop-over-saturated",
387-
"--stop-osd", # alias
388-
default=BenchmarkGenerativeTextArgs.get_default("stop_over_saturated"),
387+
"--over-saturation",
388+
"--detect-saturation", # alias
389+
default=None,
389390
help=(
390-
"Set this flag to stop the benchmark if the model is over-saturated. "
391-
"Defaults to False."
391+
"Enable over-saturation detection. Can be a flag (bool) or a JSON dict with "
392+
'configuration (e.g., \'{"enabled": true, "min_seconds": 30}\'). '
393+
"Defaults to None (disabled)."
392394
),
393-
is_flag=True,
395+
type=click.UNPROCESSED,
394396
)
395-
def run(**kwargs):
397+
def run(**kwargs): # noqa: C901
396398
# Only set CLI args that differ from click defaults
397399
kwargs = cli_tools.set_if_not_default(click.get_current_context(), **kwargs)
398400

401+
# Handle over_saturation parsing (can be bool flag or JSON dict string)
402+
if "over_saturation" in kwargs and kwargs["over_saturation"] is not None:
403+
over_sat = kwargs["over_saturation"]
404+
if isinstance(over_sat, str):
405+
try:
406+
# Try parsing as JSON dict
407+
kwargs["over_saturation"] = json.loads(over_sat)
408+
except (json.JSONDecodeError, ValueError):
409+
# If not valid JSON, treat as bool flag
410+
kwargs["over_saturation"] = over_sat.lower() in (
411+
"true",
412+
"1",
413+
"yes",
414+
"on",
415+
)
416+
elif isinstance(over_sat, bool):
417+
# Already a bool, keep as is
418+
pass
419+
# If it's already a dict, keep as is
420+
399421
# Handle remapping for request params
400422
request_type = kwargs.pop("request_type", None)
401423
request_formatter_kwargs = kwargs.pop("request_formatter_kwargs", None)

src/guidellm/benchmark/entrypoints.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,7 @@ async def resolve_profile(
323323
max_errors: int | None,
324324
max_error_rate: float | None,
325325
max_global_error_rate: float | None,
326-
stop_over_saturated: bool | None = None,
326+
over_saturation: bool | dict[str, Any] | None = None,
327327
console: Console | None = None,
328328
) -> Profile:
329329
"""
@@ -344,7 +344,7 @@ async def resolve_profile(
344344
:param max_errors: Maximum number of errors before stopping
345345
:param max_error_rate: Maximum error rate threshold before stopping
346346
:param max_global_error_rate: Maximum global error rate threshold before stopping
347-
:param stop_over_saturated: Whether to stop if over-saturation is detected
347+
:param over_saturation: Over-saturation detection configuration (bool or dict)
348348
:param console: Console instance for progress reporting, or None
349349
:return: Configured Profile instance ready for benchmarking
350350
:raises ValueError: If constraints are provided with a pre-configured Profile
@@ -361,7 +361,7 @@ async def resolve_profile(
361361
"max_errors": max_errors,
362362
"max_error_rate": max_error_rate,
363363
"max_global_error_rate": max_global_error_rate,
364-
"stop_over_saturated": stop_over_saturated,
364+
"over_saturation": over_saturation,
365365
}.items():
366366
if val is not None:
367367
constraints[key] = val
@@ -503,7 +503,7 @@ async def benchmark_generative_text(
503503
max_errors=args.max_errors,
504504
max_error_rate=args.max_error_rate,
505505
max_global_error_rate=args.max_global_error_rate,
506-
stop_over_saturated=args.stop_over_saturated,
506+
over_saturation=args.over_saturation,
507507
console=console,
508508
)
509509
output_formats = await resolve_output_formats(

src/guidellm/benchmark/schemas/generative/entrypoints.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -283,9 +283,13 @@ def get_default(cls: type[BenchmarkGenerativeTextArgs], field: str) -> Any:
283283
max_global_error_rate: float | None = Field(
284284
default=None, description="Maximum global error rate (0-1) before stopping"
285285
)
286-
stop_over_saturated: bool = Field(
287-
default=False,
288-
description="Whether to stop the benchmark if over-saturation is detected",
286+
over_saturation: bool | dict[str, Any] | None = Field(
287+
default=None,
288+
description=(
289+
"Over-saturation detection configuration. Can be a bool to enable/disable "
290+
"with defaults, or a dict with configuration parameters (enabled, "
291+
"min_seconds, max_window_seconds, moe_threshold, etc.)."
292+
),
289293
)
290294

291295
@field_validator("data", "data_args", "rate", mode="wrap")

src/guidellm/scheduler/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
MaxNumberConstraint,
2222
OverSaturationConstraint,
2323
OverSaturationConstraintInitializer,
24-
OverSaturationDetector,
2524
PydanticConstraintInitializer,
2625
SerializableConstraintInitializer,
2726
UnserializableConstraintInitializer,
@@ -71,7 +70,6 @@
7170
"NonDistributedEnvironment",
7271
"OverSaturationConstraint",
7372
"OverSaturationConstraintInitializer",
74-
"OverSaturationDetector",
7573
"PydanticConstraintInitializer",
7674
"RequestT",
7775
"ResponseT",

src/guidellm/scheduler/constraints/__init__.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,15 @@
1313
UnserializableConstraintInitializer,
1414
)
1515
from .factory import ConstraintsInitializerFactory
16-
from .over_saturation import (
17-
OverSaturationConstraint,
18-
OverSaturationConstraintInitializer,
19-
OverSaturationDetector,
20-
)
2116
from .protocols import (
2217
Constraint,
2318
ConstraintInitializer,
2419
SerializableConstraintInitializer,
2520
)
21+
from .saturation import (
22+
OverSaturationConstraint,
23+
OverSaturationConstraintInitializer,
24+
)
2625
from .standard import (
2726
MaxDurationConstraint,
2827
MaxErrorRateConstraint,
@@ -43,7 +42,6 @@
4342
"MaxNumberConstraint",
4443
"OverSaturationConstraint",
4544
"OverSaturationConstraintInitializer",
46-
"OverSaturationDetector",
4745
"PydanticConstraintInitializer",
4846
"RequestsExhaustedConstraint",
4947
"SerializableConstraintInitializer",

0 commit comments

Comments
 (0)