Skip to content

Commit 1f5a669

Browse files
authored
Support non-contiguous weight saving (#2049)
Signed-off-by: Kaihui-intel <[email protected]>
1 parent aafb938 commit 1f5a669

File tree

1 file changed

+7
-0
lines changed
  • neural_compressor/transformers/quantization

1 file changed

+7
-0
lines changed

neural_compressor/transformers/quantization/utils.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -592,6 +592,12 @@ def convert_to_GPTQ_checkpoints(model, quantization_config):
592592
return model
593593

594594

595+
def make_contiguous(model):
596+
for param in model.parameters():
597+
if param.data.ndimension() > 1:
598+
param.data = param.data.contiguous()
599+
600+
595601
def save_low_bit(self, save_directory: Union[str, os.PathLike], push_to_hub: bool = False, **kwargs):
596602

597603
assert hasattr(self, "quantization_config"), "Detected this model is not a low-bit model."
@@ -603,6 +609,7 @@ def save_low_bit(self, save_directory: Union[str, os.PathLike], push_to_hub: boo
603609
os.makedirs(save_directory, exist_ok=True)
604610
# use transformers original `save_pretrained` function
605611
del self.save_pretrained
612+
make_contiguous(self)
606613

607614
if self.device == "cpu" or self.device == torch.device("cpu"):
608615
convert_to_GPTQ_checkpoints(self, self.quantization_config)

0 commit comments

Comments
 (0)