Skip to content

Commit 1ca933c

Browse files
committed
fix
Signed-off-by: Vadim Gimpelson <[email protected]>
1 parent 23b55cb commit 1ca933c

File tree

1 file changed

+2
-1
lines changed

1 file changed

+2
-1
lines changed

vllm/model_executor/models/config.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,7 @@ def verify_and_update_config(cls, vllm_config: "VllmConfig") -> None:
410410
attn_tokens_per_mamba_state = cdiv(mamba_page_size, attn_page_size_1_token)
411411
chunk_size = lcm(base_chunk_size, kernel_block_alignment_size)
412412
attn_block_size = chunk_size * cdiv(attn_tokens_per_mamba_state, chunk_size)
413+
attn_block_size = next_power_of_2(attn_block_size)
413414
cache_config.mamba_block_size = attn_block_size
414415
else:
415416
# Without prefix caching, select minimum valid attention block size
@@ -421,12 +422,12 @@ def verify_and_update_config(cls, vllm_config: "VllmConfig") -> None:
421422
attn_block_size = kernel_block_alignment_size * cdiv(
422423
mamba_page_size, kernel_block_alignment_size * attn_page_size_1_token
423424
)
425+
attn_block_size = next_power_of_2(attn_block_size)
424426

425427
# override attention block size if either (a) the
426428
# user has not set it or (b) the user has set it
427429
# too small.
428430
if cache_config.block_size is None or cache_config.block_size < attn_block_size:
429-
attn_block_size = next_power_of_2(attn_block_size)
430431
cache_config.block_size = attn_block_size
431432
logger.info(
432433
"Setting attention block size to %d tokens "

0 commit comments

Comments
 (0)