Skip to content

Commit ec8cb01

Browse files
authored
Moe bf16 ep (InternLM#4144)
* refactor pytorch.nn.moe * add ep support * fix tp * support blocked fp8 moe with split_size<world_size * unit test allow both fa3 and fa * add singleton * singleton and ctxmgrbase * comment * add static * remove chunk * remove forward dptp * bound check * remove monkey patch * rename kernel
1 parent dc28b85 commit ec8cb01

32 files changed

+2601
-2234
lines changed

lmdeploy/pytorch/backends/cuda/graph_runner.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import torch
66
from torch.profiler import record_function
77

8-
from lmdeploy.pytorch.backends.deepep_moe_checker import moe_backend
8+
from lmdeploy.pytorch.backends.deepep_moe_checker import get_moe_backend
99
from lmdeploy.pytorch.backends.selector import get_backend
1010
from lmdeploy.pytorch.config import BackendConfig, CacheConfig, ModelConfig
1111
from lmdeploy.pytorch.model_inputs import StepContext, get_step_ctx_manager
@@ -256,7 +256,7 @@ def prepare_inputs_for_generation(
256256
):
257257
"""Prepare inputs."""
258258

259-
if moe_backend.use_deepep_moe_backend():
259+
if get_moe_backend().use_deepep_moe_backend():
260260
from dlblas.layers.moe.token_dispatcher import DeepEPBuffer, DeepEPMode
261261
deepep_mode = DeepEPMode.LOW_LATENCY if context.is_decoding else DeepEPMode.NORMAL
262262
DeepEPBuffer.set_deepep_mode(deepep_mode)

0 commit comments

Comments
 (0)