[Bugfix] Fix AWQ marlin layer skipping (#27416)

Isotr0py · web-flow · commit 81d5bb765a33 · 2025-10-23T18:30:28.000Z
Signed-off-by: Isotr0py &lt;mozf@mail2.sysu.edu.cn&gt;
diff --git a/vllm/model_executor/layers/quantization/awq_marlin.py b/vllm/model_executor/layers/quantization/awq_marlin.py
@@ -178,7 +178,10 @@ def get_quant_method(
             isinstance(layer, ParallelLMHead) and self.lm_head_quantized
         ):
             if is_layer_skipped(
-                prefix, self.modules_to_not_convert, self.packed_modules_mapping
+                prefix,
+                self.modules_to_not_convert,
+                self.packed_modules_mapping,
+                skip_with_substr=True,
             ):
                 return UnquantizedLinearMethod()
             # Check if the layer is supported by AWQMarlin.
@@ -194,7 +197,11 @@ def get_quant_method(
         elif isinstance(layer, FusedMoE):
             from vllm.model_executor.layers.quantization.moe_wna16 import MoeWNA16Config
 
-            if is_layer_skipped(prefix, getattr(self, "modules_to_not_convert", [])):
+            if is_layer_skipped(
+                prefix,
+                getattr(self, "modules_to_not_convert", []),
+                skip_with_substr=True,
+            ):
                 return UnquantizedFusedMoEMethod(layer.moe_config)
             if not check_moe_marlin_supports_layer(layer, self.group_size):
                 logger.warning_once(