Skip to content

Commit f8f9bee

Browse files
committed
fix(arg_utils): docs build
Signed-off-by: Ther-LF <[email protected]>
1 parent d2e59a1 commit f8f9bee

File tree

1 file changed

+14
-5
lines changed

1 file changed

+14
-5
lines changed

vllm/v1/core/sched/scheduler.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,9 @@ def __init__(
7474
# Scheduling constraints.
7575
self.max_num_running_reqs = self.scheduler_config.max_num_seqs
7676
self.max_num_scheduled_tokens = self.scheduler_config.max_num_batched_tokens
77-
self.prefill_max_num_scheduled_tokens = self.scheduler_config.prefill_max_num_batched_tokens
77+
self.prefill_max_num_scheduled_tokens = (
78+
self.scheduler_config.prefill_max_num_batched_tokens
79+
)
7880
self.max_model_len = self.scheduler_config.max_model_len
7981
self.enable_kv_cache_events = (
8082
self.kv_events_config is not None
@@ -176,8 +178,9 @@ def __init__(
176178

177179
def _has_decode_requests(self) -> bool:
178180
"""Check if there are any requests in the decode phase in the running queue.
179-
180-
Criteria: The request has completed prompt computation and is generating output tokens
181+
182+
Criteria:
183+
The request has completed prompt computation and is generating output tokens
181184
i.e., num_computed_tokens >= num_prompt_tokens
182185
"""
183186
for request in self.running:
@@ -207,7 +210,10 @@ def schedule(self) -> SchedulerOutput:
207210

208211
# Check if there are any requests in the decode phase in the running queue.
209212
has_decode_requests = self._has_decode_requests()
210-
if self.scheduler_config.enable_hybrid_chunked_prefill and not has_decode_requests:
213+
if (
214+
self.scheduler_config.enable_hybrid_chunked_prefill
215+
and not has_decode_requests
216+
):
211217
token_budget = self.prefill_max_num_scheduled_tokens
212218
else:
213219
token_budget = self.max_num_scheduled_tokens
@@ -597,7 +603,10 @@ def schedule(self) -> SchedulerOutput:
597603

598604
# Check if the scheduling constraints are satisfied.
599605
total_num_scheduled_tokens = sum(num_scheduled_tokens.values())
600-
if self.scheduler_config.enable_hybrid_chunked_prefill and not has_decode_requests:
606+
if (
607+
self.scheduler_config.enable_hybrid_chunked_prefill
608+
and not has_decode_requests
609+
):
601610
assert total_num_scheduled_tokens <= self.prefill_max_num_scheduled_tokens
602611
else:
603612
assert total_num_scheduled_tokens <= self.max_num_scheduled_tokens

0 commit comments

Comments
 (0)