@@ -74,7 +74,9 @@ def __init__(
7474 # Scheduling constraints.
7575 self .max_num_running_reqs = self .scheduler_config .max_num_seqs
7676 self .max_num_scheduled_tokens = self .scheduler_config .max_num_batched_tokens
77- self .prefill_max_num_scheduled_tokens = self .scheduler_config .prefill_max_num_batched_tokens
77+ self .prefill_max_num_scheduled_tokens = (
78+ self .scheduler_config .prefill_max_num_batched_tokens
79+ )
7880 self .max_model_len = self .scheduler_config .max_model_len
7981 self .enable_kv_cache_events = (
8082 self .kv_events_config is not None
@@ -176,8 +178,9 @@ def __init__(
176178
177179 def _has_decode_requests (self ) -> bool :
178180 """Check if there are any requests in the decode phase in the running queue.
179-
180- Criteria: The request has completed prompt computation and is generating output tokens
181+
182+ Criteria:
183+ The request has completed prompt computation and is generating output tokens
181184 i.e., num_computed_tokens >= num_prompt_tokens
182185 """
183186 for request in self .running :
@@ -207,7 +210,10 @@ def schedule(self) -> SchedulerOutput:
207210
208211 # Check if there are any requests in the decode phase in the running queue.
209212 has_decode_requests = self ._has_decode_requests ()
210- if self .scheduler_config .enable_hybrid_chunked_prefill and not has_decode_requests :
213+ if (
214+ self .scheduler_config .enable_hybrid_chunked_prefill
215+ and not has_decode_requests
216+ ):
211217 token_budget = self .prefill_max_num_scheduled_tokens
212218 else :
213219 token_budget = self .max_num_scheduled_tokens
@@ -597,7 +603,10 @@ def schedule(self) -> SchedulerOutput:
597603
598604 # Check if the scheduling constraints are satisfied.
599605 total_num_scheduled_tokens = sum (num_scheduled_tokens .values ())
600- if self .scheduler_config .enable_hybrid_chunked_prefill and not has_decode_requests :
606+ if (
607+ self .scheduler_config .enable_hybrid_chunked_prefill
608+ and not has_decode_requests
609+ ):
601610 assert total_num_scheduled_tokens <= self .prefill_max_num_scheduled_tokens
602611 else :
603612 assert total_num_scheduled_tokens <= self .max_num_scheduled_tokens
0 commit comments