We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 752c6ad commit 2e8cbb5Copy full SHA for 2e8cbb5
vllm/v1/worker/gpu_model_runner.py
@@ -2079,7 +2079,7 @@ def _dummy_run(
2079
block_table_tensor=self.input_batch.block_table[
2080
kv_cache_group_id].get_device_tensor()[:num_reqs],
2081
slot_mapping=self.input_batch.
2082
- block_table[kv_cache_group_id].slot_mapping[:num_reqs])
+ block_table[kv_cache_group_id].slot_mapping[:num_tokens])
2083
2084
attn_metadata_i = self.attn_metadata_builders[
2085
kv_cache_group_id].build_for_cudagraph_capture(
0 commit comments