Skip to content

Commit e8a0bb6

Browse files
fix: over-saturation settings handling
Signed-off-by: Alon Kellner <[email protected]>
1 parent 86aad7e commit e8a0bb6

File tree

6 files changed

+72
-16
lines changed

6 files changed

+72
-16
lines changed

src/guidellm/scheduler/constraints/saturation.py

Lines changed: 39 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@
6262
SchedulerUpdateAction,
6363
)
6464
from guidellm.schemas import RequestInfo
65+
from guidellm.settings import settings
6566

6667
from .constraint import Constraint, PydanticConstraintInitializer
6768
from .factory import ConstraintsInitializerFactory
@@ -630,7 +631,8 @@ def validated_kwargs(
630631
:param over_saturation: Boolean to enable/disable with defaults, or dictionary
631632
with configuration parameters (min_seconds, max_window_seconds, etc.)
632633
:param kwargs: Additional keyword arguments supporting aliases like
633-
"detect_saturation" for compatibility
634+
"detect_saturation" for compatibility, or unpacked dict values when
635+
dict is passed to factory
634636
:return: Validated dictionary with constraint configuration ready for
635637
initializer creation
636638
"""
@@ -644,17 +646,49 @@ def validated_kwargs(
644646
result = alias_value
645647
break
646648

649+
# If over_saturation is None but kwargs contain constraint parameters,
650+
# treat kwargs as an unpacked dict (happens when dict is passed to factory)
651+
if result is None and kwargs:
652+
constraint_keys = {
653+
"enabled",
654+
"min_seconds",
655+
"max_window_seconds",
656+
"moe_threshold",
657+
"minimum_ttft",
658+
"maximum_window_ratio",
659+
"minimum_window_size",
660+
"confidence",
661+
}
662+
if any(key in kwargs for key in constraint_keys):
663+
# Reconstruct dict from kwargs
664+
result = {key: kwargs[key] for key in constraint_keys if key in kwargs}
665+
647666
if result is None:
648667
return {}
649668

650669
if isinstance(result, bool):
651-
return {"enabled": result}
670+
# When a boolean is passed, read defaults from settings
671+
return {
672+
"enabled": result,
673+
"min_seconds": kwargs.get(
674+
"min_seconds", settings.constraint_over_saturation_min_seconds
675+
),
676+
"max_window_seconds": kwargs.get(
677+
"max_window_seconds",
678+
settings.constraint_over_saturation_max_window_seconds,
679+
),
680+
}
652681
elif isinstance(result, dict):
653-
# Extract configuration from dict
682+
# Extract configuration from dict, reading from settings for missing values
654683
return {
655684
"enabled": result.get("enabled", True),
656-
"min_seconds": result.get("min_seconds", 30.0),
657-
"max_window_seconds": result.get("max_window_seconds", 120.0),
685+
"min_seconds": result.get(
686+
"min_seconds", settings.constraint_over_saturation_min_seconds
687+
),
688+
"max_window_seconds": result.get(
689+
"max_window_seconds",
690+
settings.constraint_over_saturation_max_window_seconds,
691+
),
658692
"moe_threshold": result.get("moe_threshold", 2.0),
659693
"minimum_ttft": result.get("minimum_ttft", 2.5),
660694
"maximum_window_ratio": result.get("maximum_window_ratio", 0.75),

tests/e2e/test_over_saturated_benchmark.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def server():
2222
port=8000,
2323
model="databricks/dolly-v2-12b",
2424
mode="random",
25-
time_to_first_token=10000,
25+
time_to_first_token=60000,
2626
inter_token_latency=100,
2727
max_num_seqs=1,
2828
)
@@ -39,7 +39,7 @@ def test_over_saturated_benchmark(server: VllmSimServer):
3939
Another example test interacting with the server.
4040
"""
4141
report_path = Path("tests/e2e/over_saturated_benchmarks.json")
42-
rate = 1
42+
rate = 10
4343

4444
# Create and configure the guidellm client
4545
client = GuidellmClient(target=server.get_url(), output_path=report_path)
@@ -80,7 +80,7 @@ def test_over_saturated_benchmark_with_dict_config(server: VllmSimServer):
8080
Test over-saturation detection with dictionary configuration instead of boolean.
8181
"""
8282
report_path = Path("tests/e2e/over_saturated_benchmarks_dict.json")
83-
rate = 1
83+
rate = 10
8484

8585
# Create and configure the guidellm client
8686
client = GuidellmClient(target=server.get_url(), output_path=report_path)

tests/e2e/utils.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,6 @@ def start_benchmark(
9393
if over_saturation:
9494
cmd_parts.append("--over-saturation=True")
9595
elif isinstance(over_saturation, dict):
96-
import json
97-
9896
cmd_parts.append(f"--over-saturation '{json.dumps(over_saturation)}'")
9997

10098
cmd_parts.extend(

tests/e2e/vllm-sim-macos.Dockerfile

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,14 @@ FROM golang AS base
22

33
WORKDIR /app
44

5-
ARG BUILDOS
65
ARG BUILDARCH
76

87
RUN apt-get update && \
98
apt-get install -y libzmq3-dev pkg-config && \
109
git clone https://github.com/llm-d/llm-d-inference-sim.git && \
1110
cd llm-d-inference-sim && \
1211
git checkout v0.3.0 && \
13-
GOOS=${BUILDOS} GOARCH=${BUILDARCH} make build
12+
GOOS=darwin GOARCH=${BUILDARCH} make build
1413

1514
WORKDIR /app/llm-d-inference-sim
1615

tests/unit/scheduler/test_over_saturation.py

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
SerializableConstraintInitializer,
1919
)
2020
from guidellm.schemas import RequestInfo, RequestTimings
21+
from guidellm.settings import settings
2122

2223

2324
class TestOverSaturationConstraintInternal:
@@ -382,28 +383,46 @@ def test_create_constraint(self, valid_instances):
382383
@pytest.mark.smoke
383384
def test_validated_kwargs(self):
384385
"""Test validated_kwargs method with various inputs."""
386+
385387
result = OverSaturationConstraintInitializer.validated_kwargs(
386388
over_saturation=True
387389
)
388-
assert result == {"enabled": True}
390+
# When a boolean is passed, settings values should be included
391+
assert result["enabled"] is True
392+
assert result["min_seconds"] == settings.constraint_over_saturation_min_seconds
393+
assert (
394+
result["max_window_seconds"]
395+
== settings.constraint_over_saturation_max_window_seconds
396+
)
389397

390398
result = OverSaturationConstraintInitializer.validated_kwargs(
391399
over_saturation=False
392400
)
393-
assert result == {"enabled": False}
401+
assert result["enabled"] is False
402+
assert result["min_seconds"] == settings.constraint_over_saturation_min_seconds
403+
assert (
404+
result["max_window_seconds"]
405+
== settings.constraint_over_saturation_max_window_seconds
406+
)
394407

395408
# Test with dict input
396409
result = OverSaturationConstraintInitializer.validated_kwargs(
397410
over_saturation={"enabled": True, "min_seconds": 20.0}
398411
)
399412
assert result["enabled"] is True
400-
assert "min_seconds" in result
413+
assert result["min_seconds"] == 20.0
414+
# Other values should come from settings if not provided
415+
assert (
416+
result["max_window_seconds"]
417+
== settings.constraint_over_saturation_max_window_seconds
418+
)
401419

402420
# Test with aliases
403421
result = OverSaturationConstraintInitializer.validated_kwargs(
404422
detect_saturation=True
405423
)
406-
assert result == {"enabled": True}
424+
assert result["enabled"] is True
425+
assert result["min_seconds"] == settings.constraint_over_saturation_min_seconds
407426

408427
@pytest.mark.smoke
409428
def test_marshalling(self, valid_instances):

tox.ini

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,12 @@ commands =
2929
python -m pytest tests/e2e {posargs}
3030

3131

32+
[testenv:test-path]
33+
description = Run tests at a given path
34+
commands =
35+
python -m pytest {posargs}
36+
37+
3238
[testenv:quality]
3339
description = Run all quality checks
3440
commands =

0 commit comments

Comments
 (0)