5757
5858from pydantic import Field
5959
60+ from guidellm .scheduler .constraints .constraint import (
61+ Constraint ,
62+ PydanticConstraintInitializer ,
63+ )
64+ from guidellm .scheduler .constraints .factory import ConstraintsInitializerFactory
6065from guidellm .scheduler .schemas import (
6166 SchedulerState ,
6267 SchedulerUpdateAction ,
6368)
6469from guidellm .schemas import RequestInfo
65- from guidellm .settings import settings
66-
67- from .constraint import Constraint , PydanticConstraintInitializer
68- from .factory import ConstraintsInitializerFactory
6970
7071__all__ = [
7172 "OverSaturationConstraint" ,
@@ -355,7 +356,12 @@ def reset(self) -> None:
355356 )
356357
357358 def _add_finished (self , request : dict [str , Any ]) -> None :
358- """Add a finished request to tracking."""
359+ """
360+ Add a finished request to tracking.
361+
362+ :param request: Dictionary containing request data with 'ttft' and
363+ 'duration' keys.
364+ """
359365 ttft = request ["ttft" ]
360366 duration = request ["duration" ]
361367 if ttft is not None :
@@ -366,7 +372,12 @@ def _add_finished(self, request: dict[str, Any]) -> None:
366372 self .ttft_slope_checker .add_data_point (duration , ttft )
367373
368374 def _remove_finished (self , request : dict [str , Any ]) -> None :
369- """Remove a finished request from tracking."""
375+ """
376+ Remove a finished request from tracking.
377+
378+ :param request: Dictionary containing request data with 'ttft' and
379+ 'duration' keys.
380+ """
370381 del self .finished_requests [0 ]
371382 ttft = request ["ttft" ]
372383 duration = request ["duration" ]
@@ -375,7 +386,12 @@ def _remove_finished(self, request: dict[str, Any]) -> None:
375386 self .ttft_slope_checker .remove_data_point (duration , ttft )
376387
377388 def _add_started (self , request : dict [str , Any ]) -> None :
378- """Add a started request to tracking."""
389+ """
390+ Add a started request to tracking.
391+
392+ :param request: Dictionary containing request data with
393+ 'concurrent_requests' and 'duration' keys.
394+ """
379395 concurrent = request ["concurrent_requests" ]
380396 duration = request ["duration" ]
381397 if concurrent is not None :
@@ -384,14 +400,26 @@ def _add_started(self, request: dict[str, Any]) -> None:
384400 self .concurrent_slope_checker .add_data_point (duration , concurrent )
385401
386402 def _remove_started (self , request : dict [str , Any ]) -> None :
387- """Remove a started request from tracking."""
403+ """
404+ Remove a started request from tracking.
405+
406+ :param request: Dictionary containing request data with
407+ 'concurrent_requests' and 'duration' keys.
408+ """
388409 del self .started_requests [0 ]
389410 concurrent = request ["concurrent_requests" ]
390411 duration = request ["duration" ]
391412 self .concurrent_slope_checker .remove_data_point (duration , concurrent )
392413
393414 def _update_duration (self , duration : float ) -> None :
394- """Update duration and prune old data points."""
415+ """
416+ Update duration and prune old data points.
417+
418+ Updates the current duration and removes data points that exceed the maximum
419+ window size (by ratio or time) to maintain bounded memory usage.
420+
421+ :param duration: Current duration in seconds since benchmark start.
422+ """
395423 self .duration = duration
396424
397425 maximum_finished_window_size = int (
@@ -428,8 +456,7 @@ def _check_alert(self) -> bool:
428456 """
429457 Check if over-saturation is currently detected.
430458
431- Returns:
432- True if over-saturation is detected, False otherwise.
459+ :return: True if over-saturation is detected, False otherwise.
433460 """
434461 # Use duration as the maximum n value since requests from the
435462 # same second are highly correlated, this is simple and good enough
@@ -521,13 +548,13 @@ class OverSaturationConstraintInitializer(PydanticConstraintInitializer):
521548 Factory for creating OverSaturationConstraint instances from configuration.
522549
523550 Provides a Pydantic-based initializer for over-saturation detection constraints
524- with support for flexible configuration patterns. Supports both simple boolean
525- flags and detailed configuration dictionaries, enabling easy integration with
526- CLI arguments, configuration files, and programmatic constraint creation.
551+ with support for flexible configuration patterns. Supports detailed configuration
552+ dictionaries, enabling easy integration with CLI arguments, configuration files,
553+ and programmatic constraint creation.
527554
528555 Example:
529556 ::
530- # Simple boolean configuration
557+ # Configuration with defaults
531558 initializer = OverSaturationConstraintInitializer(enabled=True)
532559 constraint = initializer.create_constraint()
533560
@@ -618,18 +645,18 @@ def create_constraint(self, **_kwargs) -> Constraint:
618645
619646 @classmethod
620647 def validated_kwargs (
621- cls , over_saturation : bool | dict [str , Any ] | None = None , ** kwargs
648+ cls , over_saturation : dict [str , Any ] | None = None , ** kwargs
622649 ) -> dict [str , Any ]:
623650 """
624651 Validate and process arguments for OverSaturationConstraint creation.
625652
626- Processes flexible input formats to create validated constraint configuration.
627- Supports boolean flags for simple enable/disable, dictionary inputs for detailed
628- configuration, and alias parameters for compatibility. Handles parameter
629- normalization and default value application.
653+ Processes flexible input formats to create validated constraint
654+ configuration. Supports dictionary inputs for detailed configuration, and
655+ alias parameters for compatibility. Handles parameter normalization and
656+ default value application.
630657
631- :param over_saturation: Boolean to enable/disable with defaults, or dictionary
632- with configuration parameters (min_seconds, max_window_seconds, etc.)
658+ :param over_saturation: Dictionary with configuration parameters
659+ (min_seconds, max_window_seconds, etc.)
633660 :param kwargs: Additional keyword arguments supporting aliases like
634661 "detect_saturation" for compatibility, or unpacked dict values when
635662 dict is passed to factory
@@ -638,7 +665,7 @@ def validated_kwargs(
638665 """
639666 # Check for aliases in kwargs
640667 aliases = ["over_saturation" , "detect_saturation" ]
641- result : bool | dict [str , Any ] | None = over_saturation
668+ result : dict [str , Any ] | None = over_saturation
642669
643670 for alias in aliases :
644671 alias_value = kwargs .get (alias )
@@ -664,37 +691,13 @@ def validated_kwargs(
664691 result = {key : kwargs [key ] for key in constraint_keys if key in kwargs }
665692
666693 if result is None :
667- return {}
668-
669- if isinstance (result , bool ):
670- # When a boolean is passed, read defaults from settings
671- return {
672- "enabled" : result ,
673- "min_seconds" : kwargs .get (
674- "min_seconds" , settings .constraint_over_saturation_min_seconds
675- ),
676- "max_window_seconds" : kwargs .get (
677- "max_window_seconds" ,
678- settings .constraint_over_saturation_max_window_seconds ,
679- ),
680- }
681- elif isinstance (result , dict ):
682- # Extract configuration from dict, reading from settings for missing values
683- return {
684- "enabled" : result .get ("enabled" , True ),
685- "min_seconds" : result .get (
686- "min_seconds" , settings .constraint_over_saturation_min_seconds
687- ),
688- "max_window_seconds" : result .get (
689- "max_window_seconds" ,
690- settings .constraint_over_saturation_max_window_seconds ,
691- ),
692- "moe_threshold" : result .get ("moe_threshold" , 2.0 ),
693- "minimum_ttft" : result .get ("minimum_ttft" , 2.5 ),
694- "maximum_window_ratio" : result .get ("maximum_window_ratio" , 0.75 ),
695- "minimum_window_size" : result .get ("minimum_window_size" , 5 ),
696- "confidence" : result .get ("confidence" , 0.95 ),
697- }
694+ return {"enabled" : False }
695+
696+ if isinstance (result , dict ):
697+ # Return dict as-is, defaults come from fields above
698+ return result
698699 else :
699- # Convert to bool if it's truthy
700- return {"enabled" : bool (result )}
700+ # Type signature only accepts dict or None, so this should never happen
701+ raise TypeError (
702+ f"over_saturation must be a dict or None, got { type (result ).__name__ } "
703+ )
0 commit comments