File tree Expand file tree Collapse file tree 1 file changed +2
-1
lines changed
vllm/model_executor/models Expand file tree Collapse file tree 1 file changed +2
-1
lines changed Original file line number Diff line number Diff line change @@ -410,6 +410,7 @@ def verify_and_update_config(cls, vllm_config: "VllmConfig") -> None:
410410 attn_tokens_per_mamba_state = cdiv (mamba_page_size , attn_page_size_1_token )
411411 chunk_size = lcm (base_chunk_size , kernel_block_alignment_size )
412412 attn_block_size = chunk_size * cdiv (attn_tokens_per_mamba_state , chunk_size )
413+ attn_block_size = next_power_of_2 (attn_block_size )
413414 cache_config .mamba_block_size = attn_block_size
414415 else :
415416 # Without prefix caching, select minimum valid attention block size
@@ -421,12 +422,12 @@ def verify_and_update_config(cls, vllm_config: "VllmConfig") -> None:
421422 attn_block_size = kernel_block_alignment_size * cdiv (
422423 mamba_page_size , kernel_block_alignment_size * attn_page_size_1_token
423424 )
425+ attn_block_size = next_power_of_2 (attn_block_size )
424426
425427 # override attention block size if either (a) the
426428 # user has not set it or (b) the user has set it
427429 # too small.
428430 if cache_config .block_size is None or cache_config .block_size < attn_block_size :
429- attn_block_size = next_power_of_2 (attn_block_size )
430431 cache_config .block_size = attn_block_size
431432 logger .info (
432433 "Setting attention block size to %d tokens "
You can’t perform that action at this time.
0 commit comments