Skip to content

Commit 81d5bb7

Browse files
authored
[Bugfix] Fix AWQ marlin layer skipping (#27416)
Signed-off-by: Isotr0py <[email protected]>
1 parent 0825197 commit 81d5bb7

File tree

1 file changed

+9
-2
lines changed

1 file changed

+9
-2
lines changed

vllm/model_executor/layers/quantization/awq_marlin.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,10 @@ def get_quant_method(
178178
isinstance(layer, ParallelLMHead) and self.lm_head_quantized
179179
):
180180
if is_layer_skipped(
181-
prefix, self.modules_to_not_convert, self.packed_modules_mapping
181+
prefix,
182+
self.modules_to_not_convert,
183+
self.packed_modules_mapping,
184+
skip_with_substr=True,
182185
):
183186
return UnquantizedLinearMethod()
184187
# Check if the layer is supported by AWQMarlin.
@@ -194,7 +197,11 @@ def get_quant_method(
194197
elif isinstance(layer, FusedMoE):
195198
from vllm.model_executor.layers.quantization.moe_wna16 import MoeWNA16Config
196199

197-
if is_layer_skipped(prefix, getattr(self, "modules_to_not_convert", [])):
200+
if is_layer_skipped(
201+
prefix,
202+
getattr(self, "modules_to_not_convert", []),
203+
skip_with_substr=True,
204+
):
198205
return UnquantizedFusedMoEMethod(layer.moe_config)
199206
if not check_moe_marlin_supports_layer(layer, self.group_size):
200207
logger.warning_once(

0 commit comments

Comments
 (0)