Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions vllm_spyre/v1/core/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,8 +224,9 @@ def can_schedule(self, request) -> bool:
cond2 = len(self.waiting) < max_prompt_batch_size
# check that the prompt length does not exceed the current tkv
cond3 = request.num_prompt_tokens <= self.tkv
# check that the number of requested tokens can be served
cond4 = request.max_tokens <= (max_context_len - self.tkv)
# check that the number of requested tokens can be served (-1 for free
# prefill token)
cond4 = request.max_tokens - 1 <= (max_context_len - self.tkv)
# check that there are enough free blocks/pages remaining
# Note: we only have to do check in case of a running batches
# (not start_new_batch), because the minimal number of blocks covers
Expand Down
Loading