File tree Expand file tree Collapse file tree 1 file changed +9
-2
lines changed
vllm/model_executor/layers/quantization Expand file tree Collapse file tree 1 file changed +9
-2
lines changed Original file line number Diff line number Diff line change @@ -178,7 +178,10 @@ def get_quant_method(
178178 isinstance (layer , ParallelLMHead ) and self .lm_head_quantized
179179 ):
180180 if is_layer_skipped (
181- prefix , self .modules_to_not_convert , self .packed_modules_mapping
181+ prefix ,
182+ self .modules_to_not_convert ,
183+ self .packed_modules_mapping ,
184+ skip_with_substr = True ,
182185 ):
183186 return UnquantizedLinearMethod ()
184187 # Check if the layer is supported by AWQMarlin.
@@ -194,7 +197,11 @@ def get_quant_method(
194197 elif isinstance (layer , FusedMoE ):
195198 from vllm .model_executor .layers .quantization .moe_wna16 import MoeWNA16Config
196199
197- if is_layer_skipped (prefix , getattr (self , "modules_to_not_convert" , [])):
200+ if is_layer_skipped (
201+ prefix ,
202+ getattr (self , "modules_to_not_convert" , []),
203+ skip_with_substr = True ,
204+ ):
198205 return UnquantizedFusedMoEMethod (layer .moe_config )
199206 if not check_moe_marlin_supports_layer (layer , self .group_size ):
200207 logger .warning_once (
You can’t perform that action at this time.
0 commit comments