File tree Expand file tree Collapse file tree 1 file changed +13
-3
lines changed
vllm/model_executor/layers/fused_moe Expand file tree Collapse file tree 1 file changed +13
-3
lines changed Original file line number Diff line number Diff line change @@ -475,12 +475,11 @@ def forward_cuda(
475475 activation = activation ,
476476 apply_router_weight_on_input = apply_router_weight_on_input )
477477 else :
478- return self .fused_experts (
478+ # add w1_bias/w2_bias to kwargs if they exist
479+ kwargs = dict (
479480 hidden_states = x ,
480481 w1 = layer .w13_weight ,
481482 w2 = layer .w2_weight ,
482- w1_bias = layer .w13_bias if self .has_bias else None ,
483- w2_bias = layer .w2_bias if self .has_bias else None ,
484483 topk_weights = topk_weights ,
485484 topk_ids = topk_ids ,
486485 inplace = True ,
@@ -489,6 +488,17 @@ def forward_cuda(
489488 global_num_experts = global_num_experts ,
490489 expert_map = expert_map ,
491490 )
491+ if isinstance (self .fused_experts ,
492+ FusedMoEModularKernel ) and self .has_bias :
493+ raise ValueError (
494+ "FusedMoEModularKernel does not support bias." )
495+ if self .has_bias :
496+ kwargs .update ({
497+ "w1_bias" : getattr (layer , "w13_bias" , None ),
498+ "w2_bias" : getattr (layer , "w2_bias" , None ),
499+ })
500+
501+ return self .fused_experts (** kwargs )
492502
493503 def forward_cpu (
494504 self ,
You can’t perform that action at this time.
0 commit comments