We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 5f91cdd commit dd38ba3Copy full SHA for dd38ba3
vllm/lora/ops/triton_ops/fused_moe_lora_op.py
@@ -96,10 +96,14 @@ def _fused_moe_lora_kernel(
96
slice_id = tl.program_id(axis=1)
97
lora_idx = tl.program_id(axis=2)
98
lora_id = tl.load(lora_ids + lora_idx)
99
- moe_enabled = tl.load(adapter_enabled + lora_id)
100
- if lora_id == -1 or moe_enabled == 0:
+
+ if lora_id == -1:
101
# Early exit for the no-lora case.
102
return
103
+ moe_enabled = tl.load(adapter_enabled + lora_id)
104
+ if moe_enabled == 0:
105
+ # Early exit for the no moe lora case.
106
+ return
107
max_loras = tl.num_programs(axis=2)
108
grid_k = tl.cdiv(K, BLOCK_SIZE_K * SPLIT_K)
109
0 commit comments