File tree Expand file tree Collapse file tree 1 file changed +2
-1
lines changed
vllm/model_executor/models Expand file tree Collapse file tree 1 file changed +2
-1
lines changed Original file line number Diff line number Diff line change @@ -411,6 +411,7 @@ def lcm(a, b):
411411
412412 chunk_size = lcm (base_chunk_size , kernel_block_alignment_size )
413413 attn_block_size = chunk_size * cdiv (attn_tokens_per_mamba_state , chunk_size )
414+ attn_block_size = next_power_of_2 (attn_block_size )
414415 cache_config .mamba_block_size = attn_block_size
415416 else :
416417 # Without prefix caching, select minimum valid attention block size
@@ -422,12 +423,12 @@ def lcm(a, b):
422423 attn_block_size = kernel_block_alignment_size * cdiv (
423424 mamba_page_size , kernel_block_alignment_size * attn_page_size_1_token
424425 )
426+ attn_block_size = next_power_of_2 (attn_block_size )
425427
426428 # override attention block size if either (a) the
427429 # user has not set it or (b) the user has set it
428430 # too small.
429431 if cache_config .block_size is None or cache_config .block_size < attn_block_size :
430- attn_block_size = next_power_of_2 (attn_block_size )
431432 cache_config .block_size = attn_block_size
432433 logger .info (
433434 "Setting attention block size to %d tokens "
You can’t perform that action at this time.
0 commit comments