We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent b3d76a5 commit 587ee3bCopy full SHA for 587ee3b
vllm/model_executor/layers/quantization/fp8.py
@@ -1143,12 +1143,9 @@ def apply(
1143
x: torch.Tensor,
1144
router_logits: torch.Tensor,
1145
) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
1146
- if layer.enable_eplb:
1147
- assert layer.expert_load_view is not None
1148
- assert layer.logical_to_physical_map is not None
1149
- assert layer.logical_replica_count is not None
1150
-
1151
if self.flashinfer_moe_backend == FlashinferMoeBackend.TENSORRT_LLM:
+ if layer.enable_eplb:
+ raise NotImplementedError("EPLB not supported for `Fp8MoEMethod` yet.")
1152
assert layer.activation == "silu", (
1153
f"Expected 'silu' activation but got {layer.activation}"
1154
)
0 commit comments