Skip to content

Commit 7334950

Browse files
author
tanqingshan (A)
committed
Fix MoE MLP related issues (ref vllm-project#4490)
1 parent b32ef53 commit 7334950

File tree

1 file changed

+5
-2
lines changed

1 file changed

+5
-2
lines changed

vllm_ascend/ops/fused_moe/moe_mlp.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -127,14 +127,17 @@ def quant_apply_mlp(hidden_states: torch.Tensor,
127127
if quantized_hidden_states is not None:
128128
dispose_tensor(quantized_hidden_states)
129129
# act_fn: swiglu
130+
group_diff = torch.diff(group_list, dim=0)
131+
new_group = torch.cat([group_list[0].unsqueeze(0), group_diff],
132+
dim=0)
130133
hidden_states, swiglu_out_scale = torch_npu.npu_dequant_swiglu_quant(
131134
x=hidden_states,
132135
weight_scale=w1_scale,
133136
activation_scale=pertoken_scale,
134137
bias=None,
135138
quant_scale=None,
136139
quant_offset=None,
137-
group_index=group_list,
140+
group_index=new_group,
138141
activate_left=True,
139142
quant_mode=1,
140143
)
@@ -295,4 +298,4 @@ def unified_apply_mlp(hidden_states: torch.Tensor,
295298
group_list=group_list,
296299
group_list_type=group_list_type,
297300
topk_scales=topk_scales,
298-
need_trans=need_trans)
301+
need_trans=need_trans)

0 commit comments

Comments
 (0)