We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 4863dff commit e850c8bCopy full SHA for e850c8b
vllm/model_executor/layers/quantization/fp8.py
@@ -1171,12 +1171,9 @@ def apply(
1171
x: torch.Tensor,
1172
router_logits: torch.Tensor,
1173
) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
1174
- if layer.enable_eplb:
1175
- assert layer.expert_load_view is not None
1176
- assert layer.logical_to_physical_map is not None
1177
- assert layer.logical_replica_count is not None
1178
-
1179
if self.flashinfer_moe_backend == FlashinferMoeBackend.TENSORRT_LLM:
+ if layer.enable_eplb:
+ raise NotImplementedError("EPLB not supported for `Fp8MoEMethod` yet.")
1180
assert layer.activation == "silu", (
1181
f"Expected 'silu' activation but got {layer.activation}"
1182
)
0 commit comments