We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent dd45530 commit d5cb8a5Copy full SHA for d5cb8a5
vllm/v1/worker/gpu_model_runner.py
@@ -2802,7 +2802,7 @@ def _determine_batch_execution_and_padding(
2802
)
2803
2804
should_ubatch, num_tokens_across_dp = coordinate_batch_across_dp(
2805
- num_tokens_unpadded=num_tokens_padded,
+ num_tokens_unpadded=num_tokens,
2806
parallel_config=self.parallel_config,
2807
allow_microbatching=allow_microbatching,
2808
allow_dp_padding=allow_dp_padding,
0 commit comments