File tree Expand file tree Collapse file tree 1 file changed +5
-2
lines changed
vllm_ascend/ops/fused_moe Expand file tree Collapse file tree 1 file changed +5
-2
lines changed Original file line number Diff line number Diff line change @@ -127,14 +127,17 @@ def quant_apply_mlp(hidden_states: torch.Tensor,
127127 if quantized_hidden_states is not None :
128128 dispose_tensor (quantized_hidden_states )
129129 # act_fn: swiglu
130+ group_diff = torch .diff (group_list , dim = 0 )
131+ new_group = torch .cat ([group_list [0 ].unsqueeze (0 ), group_diff ],
132+ dim = 0 )
130133 hidden_states , swiglu_out_scale = torch_npu .npu_dequant_swiglu_quant (
131134 x = hidden_states ,
132135 weight_scale = w1_scale ,
133136 activation_scale = pertoken_scale ,
134137 bias = None ,
135138 quant_scale = None ,
136139 quant_offset = None ,
137- group_index = group_list ,
140+ group_index = new_group ,
138141 activate_left = True ,
139142 quant_mode = 1 ,
140143 )
@@ -295,4 +298,4 @@ def unified_apply_mlp(hidden_states: torch.Tensor,
295298 group_list = group_list ,
296299 group_list_type = group_list_type ,
297300 topk_scales = topk_scales ,
298- need_trans = need_trans )
301+ need_trans = need_trans )
You can’t perform that action at this time.
0 commit comments