We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent b7d2390 commit bdf92b6Copy full SHA for bdf92b6
vllm/v1/worker/gpu_model_runner.py
@@ -1456,7 +1456,7 @@ def _forward(
1456
self,
1457
attn_metadata: dict[str, Any],
1458
num_input_tokens: int,
1459
- num_tokens_across_dp: int,
+ num_tokens_across_dp: Optional[Any],
1460
cudagraph_runtime_mode: CUDAGraphMode,
1461
batch_descriptor: BatchDescriptor,
1462
scheduler_output: "SchedulerOutput",
@@ -1465,7 +1465,7 @@ def _forward(
1465
intermediate_tensors: IntermediateTensors,
1466
inputs_embeds: list[torch.Tensor],
1467
model_kwargs: dict[str, Any],
1468
- ) -> Tuple[torch.Tensor, Optional[KVConnectorOutput]]:
+ ) -> tuple[torch.Tensor, Optional[KVConnectorOutput]]:
1469
with set_forward_context(
1470
attn_metadata,
1471
self.vllm_config,
0 commit comments