Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ def replace_architectures(json_path):
json.dump(data, file, indent=4)

def eval_func(model):
from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser
from neural_compressor.evaluation.lm_eval import evaluate, LMEvalParser

model_dir = model
if isinstance(model, str) and model.endswith(".onnx"):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ onnxruntime-extensions; python_version < '3.11'
datasets
optimum
evaluate
intel-extension-for-transformers >= 1.4.1
peft
lm-eval==0.4.2
lm-eval==0.4.3
numba
pydantic
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def replace_architectures(json_path):
json.dump(data, file, indent=4)

def eval_func(model):
from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser
from neural_compressor.evaluation.lm_eval import evaluate, LMEvalParser

model_dir = model
if isinstance(model, str) and model.endswith(".onnx"):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ onnxruntime-extensions; python_version < '3.11'
datasets
optimum
evaluate
intel-extension-for-transformers >= 1.4.1
peft
lm-eval==0.4.2
lm-eval==0.4.3
numba
pydantic
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
accelerate
datasets
einops
intel-extension-for-transformers
optimum
peft
sentencepiece
Expand All @@ -10,4 +9,6 @@ torch
tqdm
tiktoken
transformers_stream_generator
lm_eval==0.4.2
lm_eval==0.4.3
numba
pydantic
Original file line number Diff line number Diff line change
Expand Up @@ -588,7 +588,7 @@ def group_texts(examples):
eval_batch = args.per_device_eval_batch_size
user_model = None if args.use_accelerate else model

from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser
from neural_compressor.evaluation.lm_eval import evaluate, LMEvalParser
eval_args = LMEvalParser(
model="hf",
user_model=user_model,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,6 @@ transformers
pytest
wandb
einops
neural-compressor
intel-extension-for-transformers
lm_eval==0.4.2
lm_eval==0.4.3
numba
pydantic
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,10 @@ function run_benchmark {
extra_cmd=$extra_cmd" --woq_algo TEQ"
elif [ "${topology}" = "opt_125m_ipex" ]; then
model_name_or_path="facebook/opt-125m"
extra_cmd=$extra_cmd" --ipex --int8_bf16_mixed"
extra_cmd=$extra_cmd" --ipex"
elif [ "${topology}" = "opt_125m_ipex_sq" ]; then
model_name_or_path="facebook/opt-125m"
extra_cmd=$extra_cmd" --ipex --int8_bf16_mixed --sq --alpha 0.5"
extra_cmd=$extra_cmd" --ipex --sq --alpha 0.5"
elif [ "${topology}" = "bloom_560m_ipex_sq" ]; then
model_name_or_path="bigscience/bloom-560m"
extra_cmd=$extra_cmd" --ipex --sq --alpha 0.5"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,10 @@ def eval_func(model):

if args.ipex:
user_model = load(os.path.abspath(os.path.expanduser(args.output_dir)))
from transformers import AutoTokenizer, AutoConfig
tokenizer = AutoTokenizer.from_pretrained(args.model, trust_remote_code=args.trust_remote_code)
config = AutoConfig.from_pretrained(args.model, trust_remote_code=args.trust_remote_code)
setattr(user_model, "config", config)
else:
user_model, tokenizer = get_user_model()
kwargs = {'weight_only': True} if args.approach == 'weight_only' else {}
Expand All @@ -354,7 +357,7 @@ def eval_func(model):
if args.accuracy:
user_model.eval()
if args.code_generation:
from intel_extension_for_transformers.transformers.llm.evaluation.bigcode_eval import evaluate
from neural_compressor.evaluation.bigcode_eval import evaluate
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(args.model, trust_remote_code=args.trust_remote_code)
results = evaluate(
Expand All @@ -370,7 +373,7 @@ def eval_func(model):
else:
acc = results["results"][task_name]["acc"]
else:
from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser
from neural_compressor.evaluation.lm_eval import evaluate, LMEvalParser
eval_args = LMEvalParser(
model="hf",
user_model=user_model,
Expand All @@ -395,7 +398,7 @@ def eval_func(model):
samples = args.iters * args.batch_size

if args.code_generation:
from intel_extension_for_transformers.transformers.llm.evaluation.bigcode_eval import evaluate
from neural_compressor.evaluation.bigcode_eval import evaluate
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(args.model, trust_remote_code=args.trust_remote_code)
start = time.time()
Expand All @@ -413,7 +416,7 @@ def eval_func(model):
else:
acc = results["results"][task_name]["acc"]
else:
from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser
from neural_compressor.evaluation.lm_eval import evaluate, LMEvalParser
eval_args = LMEvalParser(
model="hf",
user_model=user_model,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ function run_tuning {
--tune \
--batch_size $batch_size \
--output_dir ${output_model} \
--cache_dir ${dataset_location}
--cache_dir ${dataset_location} \
--trust_remote_code

}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,15 @@
help='the folder path to save the results.')
parser.add_argument('--cache_dir', default=None, type=str,
help='the folder path to save the results.')
parser.add_argument("--trust_remote_code", action="store_true")

args = parser.parse_args()
model_name = 'openai/whisper-large'
processor = WhisperProcessor.from_pretrained(model_name)
model = WhisperForConditionalGeneration.from_pretrained(model_name)
# dataset
librispeech_test_clean = load_dataset("librispeech_asr", "clean", split="test", cache_dir=args.cache_dir)
librispeech_test_clean = load_dataset("librispeech_asr", "clean", split="test", cache_dir=args.cache_dir,
trust_remote_code=args.trust_remote_code)

# metric
wer = load("wer")
Expand Down
3 changes: 3 additions & 0 deletions neural_compressor/adaptor/torch_utils/awq.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,9 @@ def block_inference(self, model):
"""
total_out = []
for args, kwargs in zip(self.total_block_args, self.total_block_kwargs):
# to avoid layer_past: Dynamic_cache when transformers higher than 4.45.1
if "layer_past" in kwargs.keys() and kwargs["layer_past"] is not None:
kwargs["layer_past"] = None
out = model(*args, **kwargs)
if isinstance(out, tuple): # pragma: no cover
out = out[0]
Expand Down
8 changes: 5 additions & 3 deletions neural_compressor/adaptor/torch_utils/bf16_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import torch.nn as nn

from ...utils import logger
from .util import append_attr


class BF16ModuleWrapper(nn.Module):
Expand Down Expand Up @@ -62,9 +63,10 @@ def Convert(model, tune_cfg):
def _bf16_wrapper_model(model, bf16_ops_list, prefix=""):
for name, child in model.named_children():
op_name = prefix + "." + name if prefix != "" else name
_bf16_wrapper_model(child, bf16_ops_list, op_name)
for bf16_op_name in bf16_ops_list:
if op_name == bf16_op_name[0] or op_name == bf16_op_name[0].split(".module")[0]:
child = BF16ModuleWrapper(child)
setattr(model, name, child)
_bf16_wrapper_model(child, bf16_ops_list, op_name)
child_bf16 = BF16ModuleWrapper(child)
append_attr(child_bf16, child)
setattr(model, name, child_bf16)
return model
9 changes: 7 additions & 2 deletions test/adaptor/pytorch_adaptor/test_adaptor_pytorch_2x.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,8 +401,13 @@ def test_mix_precision(self):
ptq_fx_op_name_list["conv.*"] = {"weight": {"dtype": "bf16"}, "activation": {"dtype": "bf16"}}
conf = PostTrainingQuantConfig(op_name_dict=ptq_fx_op_name_list)
q_model = quantization.fit(model_origin, conf, calib_dataloader=dataloader, calib_func=eval_func)
self.assertEqual(q_model._model.conv.module.module.weight.dtype, torch.bfloat16)
self.assertEqual(q_model._model.conv.module.module.bias.dtype, torch.bfloat16)
self.assertEqual(q_model._model.conv.module.weight.dtype, torch.bfloat16)
self.assertEqual(q_model._model.conv.module.bias.dtype, torch.bfloat16)
self.assertEqual(
q_model._model.conv.stride[0],
1,
msg="GraphModule object should have the attributes of the original module.",
)

def test_hawq_metric(self):
# Test for hawq metric
Expand Down
Loading