Skip to content

Commit dd38ba3

Browse files
authored
[Bugfix] Fix adapter_enabled IMA (#29977)
Signed-off-by: Jee Jee Li <[email protected]>
1 parent 5f91cdd commit dd38ba3

File tree

1 file changed

+6
-2
lines changed

1 file changed

+6
-2
lines changed

vllm/lora/ops/triton_ops/fused_moe_lora_op.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,10 +96,14 @@ def _fused_moe_lora_kernel(
9696
slice_id = tl.program_id(axis=1)
9797
lora_idx = tl.program_id(axis=2)
9898
lora_id = tl.load(lora_ids + lora_idx)
99-
moe_enabled = tl.load(adapter_enabled + lora_id)
100-
if lora_id == -1 or moe_enabled == 0:
99+
100+
if lora_id == -1:
101101
# Early exit for the no-lora case.
102102
return
103+
moe_enabled = tl.load(adapter_enabled + lora_id)
104+
if moe_enabled == 0:
105+
# Early exit for the no moe lora case.
106+
return
103107
max_loras = tl.num_programs(axis=2)
104108
grid_k = tl.cdiv(K, BLOCK_SIZE_K * SPLIT_K)
105109

0 commit comments

Comments
 (0)