quic-rishinr
diff --git a/‎QEfficient/base/common.py‎
Lines changed: 0 additions & 3 deletions b/‎QEfficient/base/common.py‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎QEfficient/base/modeling_qeff.py‎
Lines changed: 279 additions & 65 deletions b/‎QEfficient/base/modeling_qeff.py‎
Lines changed: 279 additions & 65 deletions
diff --git a/‎QEfficient/cloud/execute.py‎
Lines changed: 0 additions & 1 deletion b/‎QEfficient/cloud/execute.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎QEfficient/cloud/infer.py‎
Lines changed: 0 additions & 1 deletion b/‎QEfficient/cloud/infer.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎QEfficient/compile/compile_helper.py‎
Lines changed: 6 additions & 0 deletions b/‎QEfficient/compile/compile_helper.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎QEfficient/customop/ctx_scatter_gather.py‎
Lines changed: 3 additions & 1 deletion b/‎QEfficient/customop/ctx_scatter_gather.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎QEfficient/customop/ctx_scatter_gather_cb.py‎
Lines changed: 3 additions & 1 deletion b/‎QEfficient/customop/ctx_scatter_gather_cb.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎QEfficient/customop/rms_norm.py‎
Lines changed: 3 additions & 1 deletion b/‎QEfficient/customop/rms_norm.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎QEfficient/exporter/export_hf_to_cloud_ai_100.py‎
Lines changed: 14 additions & 33 deletions b/‎QEfficient/exporter/export_hf_to_cloud_ai_100.py‎
Lines changed: 14 additions & 33 deletions
diff --git a/‎QEfficient/generation/cloud_infer.py‎
Lines changed: 4 additions & 4 deletions b/‎QEfficient/generation/cloud_infer.py‎
Lines changed: 4 additions & 4 deletions
@@ -79,9 +79,6 @@ def from_pretrained(cls, pretrained_model_name_or_path: str, *args, **kwargs) ->
         Downloads HuggingFace model if already doesn't exist locally, returns QEffAutoModel object based on type of model.
         """
         if not os.path.isdir(pretrained_model_name_or_path):
-            # Save model_card_name if passed
-            model_card_name = kwargs.pop("model_card_name", pretrained_model_name_or_path)
-            kwargs.update({"model_card_name": model_card_name})
             pretrained_model_name_or_path = login_and_download_hf_lm(pretrained_model_name_or_path, *args, **kwargs)
         model_type = get_hf_model_type(hf_model_path=pretrained_model_name_or_path)
         qeff_auto_model_class = MODEL_TYPE_TO_QEFF_AUTO_MODEL_MAP[model_type]
 
@@ -58,7 +58,6 @@ def main(
         prompt=prompt,
         prompts_txt_file_path=prompts_txt_file_path,
         generation_len=generation_len,
-        full_batch_size=full_batch_size,
     )
 
 
 
@@ -116,7 +116,6 @@ def main(
         prompt=prompt,
         prompts_txt_file_path=prompts_txt_file_path,
         generation_len=generation_len,
-        full_batch_size=full_batch_size,
     )
 
 
 
@@ -9,6 +9,7 @@
 import os
 import shutil
 import subprocess
+import warnings
 from typing import List, Optional, Tuple
 
 from QEfficient.utils.logging_utils import logger
@@ -51,6 +52,11 @@ def compile_kv_model_on_cloud_ai_100(
     device_group: Optional[List[int]] = None,
     **kwargs,
 ) -> Tuple[bool, str]:
+    warnings.warn(
+        "\033[93mUse `QEFFAutoModelForCausalLM.compile` instead, this method will be removed soon.\033[0m",
+        DeprecationWarning,
+        stacklevel=2,
+    )
     if kwargs:
         # FIXME
         raise NotImplementedError("Can't handle extra compilation args now!")
 
@@ -8,7 +8,9 @@
 import onnxscript
 import torch
 
-ops = onnxscript.opset13
+from QEfficient.utils import constants
+
+ops = getattr(onnxscript, "opset" + str(constants.ONNX_EXPORT_OPSET))
 
 
 @onnxscript.script(onnxscript.values.Opset("com.qualcomm.cloud", 1))
 
@@ -8,7 +8,9 @@
 import onnxscript
 import torch
 
-ops = onnxscript.opset13
+from QEfficient.utils import constants
+
+ops = getattr(onnxscript, "opset" + str(constants.ONNX_EXPORT_OPSET))
 
 
 @onnxscript.script(onnxscript.values.Opset("com.qualcomm.cloud", 1))
 
@@ -9,7 +9,9 @@
 import torch
 from torch import nn
 
-ops = onnxscript.opset13
+from QEfficient.utils import constants
+
+ops = getattr(onnxscript, "opset" + str(constants.ONNX_EXPORT_OPSET))
 
 
 @onnxscript.script(onnxscript.values.Opset(domain="com.qti.aisw.onnx", version=1))
 
@@ -13,7 +13,6 @@
 import torch
 from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast
 
-import QEfficient
 from QEfficient.base.common import AUTO_MODEL_MAP_TO_MODEL_TYPE_MAP, QEFF_MODEL_TYPE, QEFFCommonLoader
 from QEfficient.base.modeling_qeff import QEFFBaseModel
 from QEfficient.exporter.export_utils import export_onnx, fix_onnx_fp16, generate_input_files, run_model_on_ort
@@ -168,11 +167,6 @@ def convert_to_cloud_kvstyle(
     Returns:
          :str: Path of exported ``ONNX`` file.
     """
-    warnings.warn(
-        "\033[93mThis function will be deprecated soon, use QEfficient.export instead\033[0m",
-        DeprecationWarning,
-        stacklevel=2,
-    )
     if os.path.exists(onnx_dir_path):
         logger.warning(f"Overriding {onnx_dir_path}")
         shutil.rmtree(onnx_dir_path)
@@ -323,7 +317,9 @@ def export_for_cloud(
     full_batch_size: Optional[int] = None,
 ) -> str:
     # Check if model architecture is supported for continuous batching.
-    if full_batch_size and qeff_model.model.config.architectures[0] not in get_lists_of_cb_qeff_models.architectures:
+    if full_batch_size and qeff_model.model.config.architectures[0].lower() not in {
+        x.lower() for x in get_lists_of_cb_qeff_models.architectures
+    }:
         raise NotImplementedError(
             f"Continuous batching is not supported for {qeff_model.model.config.architectures[0]}"
         )
@@ -356,24 +352,14 @@ def export_lm_model_for_cloud(
         logger.warning(f"Overriding {onnx_dir_path}")
         shutil.rmtree(onnx_dir_path)
 
-    if qeff_model.is_transformed:
-        model_name = export_kvstyle_transformed_model_to_onnx(
-            model_name=model_name,
-            transformed_model=qeff_model.model,
-            tokenizer=tokenizer,
-            onnx_dir_path=onnx_dir_path,
-            seq_len=seq_length,
-            full_batch_size=full_batch_size,
-        )  # type: ignore
-
-    else:
-        model_name = export_bertstyle_model_to_onnx(
-            model_name=model_name,
-            model=qeff_model.model,
-            tokenizer=tokenizer,
-            onnx_dir_path=onnx_dir_path,
-            seq_len=seq_length,
-        )  # type: ignore
+    model_name = export_kvstyle_transformed_model_to_onnx(
+        model_name=model_name,
+        transformed_model=qeff_model.model,
+        tokenizer=tokenizer,
+        onnx_dir_path=onnx_dir_path,
+        seq_len=seq_length,
+        full_batch_size=full_batch_size,
+    )
     return os.path.join(onnx_dir_path, f"{model_name}.onnx")
 
 
@@ -398,7 +384,7 @@ def qualcomm_efficient_converter(
 
     Usage 2: You can pass ``model_name`` and ``model_kv`` as an object of ``QEfficient.QEFFAutoModelForCausalLM``, In this case will directly export the ``model_kv.model`` to ``ONNX``
 
-    We will be deprecating this function and it will be replaced by ``QEffAutoModelForCausalLM.export``.
+    We will be deprecating this function and it will be replaced by ``QEFFAutoModelForCausalLM.export``.
 
     ``Mandatory`` Args:
         :model_name (str): The name of the model to be used.
@@ -423,7 +409,7 @@ def qualcomm_efficient_converter(
 
     """
     warnings.warn(
-        "\033[93mmodel_kv argument will be replaced by qeff_model of type QEFFBaseModel\033[0m",
+        "\033[93m`qualcomm_efficient_converter` method will be deprecated soon, use `QEFFAutoModelForCausalLM.export` instead\033[0m",
         DeprecationWarning,
         stacklevel=2,
     )
@@ -440,13 +426,8 @@ def qualcomm_efficient_converter(
         )
     )
 
-    # Transform if required
-    if model_kv.is_transformed and not kv:
-        raise AttributeError("Transformed model is passed while requesting to convert non-transformed model")
-    model_kv = model_kv if model_kv.is_transformed else QEfficient.transform(model_kv) if kv else model_kv
-
     if onnx_dir_path is None:
-        model_card_dir = os.path.join(QEFF_MODELS_DIR, str(model_kv.model_card_name))
+        model_card_dir = os.path.join(QEFF_MODELS_DIR, str(model_name))
         onnx_dir_path = os.path.join(model_card_dir, "onnx")
         os.makedirs(onnx_dir_path, exist_ok=True)
 
 
@@ -5,7 +5,8 @@
 #
 # -----------------------------------------------------------------------------
 
-from typing import Dict, List, Optional
+from pathlib import Path
+from typing import Dict, List, Optional, Union
 from warnings import warn
 
 import numpy as np
@@ -43,7 +44,7 @@
 class QAICInferenceSession:
     def __init__(
         self,
-        qpc_path: str,
+        qpc_path: Union[Path, str],
         device_ids: Optional[List[int]] = None,
         activate: bool = True,
         enable_debug_logs: bool = False,
@@ -68,8 +69,7 @@ def __init__(
         if enable_debug_logs:
             if self.context.setLogLevel(qaicrt.QLogLevel.QL_DEBUG) != qaicrt.QStatus.QS_SUCCESS:
                 raise RuntimeError("Failed to setLogLevel")
-
-        qpc = qaicrt.Qpc(qpc_path)
+        qpc = qaicrt.Qpc(str(qpc_path))
         # Load IO Descriptor
         iodesc = aicapi.IoDesc()
         status, iodesc_data = qpc.getIoDescriptor()
Original file line number	Diff line number	Diff line change
`@@ -58,7 +58,6 @@ def main(`
`58`	`58`	`prompt=prompt,`
`59`	`59`	`prompts_txt_file_path=prompts_txt_file_path,`
`60`	`60`	`generation_len=generation_len,`
`61`		`- full_batch_size=full_batch_size,`
`62`	`61`	`)`
`63`	`62`
`64`	`63`
Original file line number	Diff line number	Diff line change
`@@ -116,7 +116,6 @@ def main(`
`116`	`116`	`prompt=prompt,`
`117`	`117`	`prompts_txt_file_path=prompts_txt_file_path,`
`118`	`118`	`generation_len=generation_len,`
`119`		`- full_batch_size=full_batch_size,`
`120`	`119`	`)`
`121`	`120`
`122`	`121`