quic
diff --git a/‎.github/CODEOWNERS
Lines changed: 2 additions & 2 deletions b/‎.github/CODEOWNERS
Lines changed: 2 additions & 2 deletions
diff --git a/‎LICENSE
Lines changed: 2 additions & 2 deletions b/‎LICENSE
Lines changed: 2 additions & 2 deletions
diff --git a/‎QEfficient/__init__.py
Lines changed: 8 additions & 12 deletions b/‎QEfficient/__init__.py
Lines changed: 8 additions & 12 deletions
diff --git a/‎QEfficient/base/__init__.py
Lines changed: 1 addition & 1 deletion b/‎QEfficient/base/__init__.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎QEfficient/base/common.py
Lines changed: 3 additions & 4 deletions b/‎QEfficient/base/common.py
Lines changed: 3 additions & 4 deletions
diff --git a/‎QEfficient/base/modeling_qeff.py
Lines changed: 10 additions & 3 deletions b/‎QEfficient/base/modeling_qeff.py
Lines changed: 10 additions & 3 deletions
diff --git a/‎QEfficient/base/onnx_transforms.py
Lines changed: 1 addition & 1 deletion b/‎QEfficient/base/onnx_transforms.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎QEfficient/base/pytorch_transforms.py
Lines changed: 65 additions & 1 deletion b/‎QEfficient/base/pytorch_transforms.py
Lines changed: 65 additions & 1 deletion
diff --git a/‎QEfficient/cloud/__init__.py
Lines changed: 1 addition & 1 deletion b/‎QEfficient/cloud/__init__.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎QEfficient/cloud/compile.py
Lines changed: 22 additions & 10 deletions b/‎QEfficient/cloud/compile.py
Lines changed: 22 additions & 10 deletions
diff --git a/‎QEfficient/cloud/execute.py
Lines changed: 1 addition & 1 deletion b/‎QEfficient/cloud/execute.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎QEfficient/cloud/export.py
Lines changed: 1 addition & 1 deletion b/‎QEfficient/cloud/export.py
Lines changed: 1 addition & 1 deletion
@@ -1,12 +1,12 @@
 # -----------------------------------------------------------------------------
 #
-# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
 # SPDX-License-Identifier: BSD-3-Clause
 #
 # -----------------------------------------------------------------------------
 
 # Default owners
 # review when someone opens a pull request and assign appropriate reviewer
-* @quic-rishinr @ochougul @quic-hemagnih
+* @quic-rishinr @ochougul @quic-hemagnih @quic-amitraj
 pyproject.toml @carlstreeter-quic
 
@@ -1,4 +1,4 @@
-Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved.
+Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
 
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
@@ -11,7 +11,7 @@ modification, are permitted provided that the following conditions are met:
       disclaimer in the documentation and/or other materials provided
       with the distribution.
 
-    * Neither the name of Qualcomm Innovation Center, Inc. nor the names of its
+    * Neither the name of Qualcomm Technologies, Inc. nor the names of its
       contributors may be used to endorse or promote products derived
       from this software without specific prior written permission.
 
 
@@ -1,29 +1,25 @@
 # -----------------------------------------------------------------------------
 #
-# Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
 # SPDX-License-Identifier: BSD-3-Clause
 #
 # -----------------------------------------------------------------------------
 
 import os
+import warnings
+
+from QEfficient.utils import custom_format_warning
 
 # For faster downloads via hf_transfer
 # This code is put above import statements as this needs to be executed before
 # hf_transfer is imported (will happen on line 15 via leading imports)
 os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
-
-from transformers import AutoConfig
-
-from QEfficient.transformers.modeling_utils import MODEL_TYPE_TO_CONFIG_CLS_AND_ARCH_CLS
+# Placeholder for all non-transformer models registered in QEfficient
+import QEfficient.utils.model_registery  # noqa: F401
 from QEfficient.utils.logging_utils import logger
 
-# loop over all the model types which are not present in transformers and register them
-for model_type, model_cls in MODEL_TYPE_TO_CONFIG_CLS_AND_ARCH_CLS.items():
-    # Register the model config class based on the model type. This will be first element in the tuple
-    AutoConfig.register(model_type, model_cls[0])
-
-    # Register the non transformer library Class and config class using AutoModelClass
-    model_cls[2].register(model_cls[0], model_cls[1])
+# custom warning for the better logging experience
+warnings.formatwarning = custom_format_warning
 
 
 def check_qaic_sdk():
 
@@ -1,6 +1,6 @@
 # -----------------------------------------------------------------------------
 #
-# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
 # SPDX-License-Identifier: BSD-3-Clause
 #
 # -----------------------------------------------------------------------------
 
@@ -1,6 +1,6 @@
 # -----------------------------------------------------------------------------
 #
-# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
 # SPDX-License-Identifier: BSD-3-Clause
 #
 # -----------------------------------------------------------------------------
@@ -41,15 +41,14 @@ def from_pretrained(cls, pretrained_model_name_or_path: str, *args, **kwargs) ->
         Downloads HuggingFace model if already doesn't exist locally, returns QEFFAutoModel object based on type of model.
         """
         config = AutoConfig.from_pretrained(pretrained_model_name_or_path)
-        architecture = config.architectures[0] if config.architectures else None
 
-        class_name = MODEL_CLASS_MAPPING.get(architecture)
+        class_name = MODEL_CLASS_MAPPING.get(config.__class__.__name__, None)
         if class_name:
             module = __import__("QEfficient.transformers.models.modeling_auto")
             model_class = getattr(module, class_name)
         else:
             raise NotImplementedError(
-                f"Unknown architecture={architecture}, either use specific auto model class for loading the model or raise an issue for support!"
+                f"Unknown architecture={config.__class__.__name__}, either use specific auto model class for loading the model or raise an issue for support!"
             )
 
         local_model_dir = kwargs.pop("local_model_dir", None)
 
@@ -1,6 +1,6 @@
 # -----------------------------------------------------------------------------
 #
-# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
 # SPDX-License-Identifier: BSD-3-Clause
 #
 # ----------------------------------------------------------------------------
@@ -241,10 +241,12 @@ def _compile(
             :mdp_ts_num_devices (int): Number of devices to partition to use Multi-Device Partitioning with tensor-slicing.
             :num_speculative_tokens (int, optional): Number of speculative tokens to take as input for Speculative Decoding Target Language Model.
             :enable_qnn (bool): Enables QNN Compilation. ``Defaults to False.``
-            :qnn_config (str): Path of QNN Config parameters file. ``Defaults to None.``
-            :compiler_options: Pass any compiler option as input. Any flag that is supported by `qaic-exec` can be passed. Params are converted to flags as below:
+            :qnn_config (str): Path of QNN Config parameters file. Any extra parameters for QNN compilation can be passed via this file. ``Defaults to None.``
+            :compiler_options: Pass any compiler option as input.
+                Any flag that is supported by `qaic-exec` can be passed. Params are converted to flags as below:
                 - aic_num_cores=16 -> -aic-num-cores=16
                 - convert_to_fp16=True -> -convert-to-fp16
+                For QNN Compilation path, when enable_qnn is set to True, any parameter passed in compiler_options will be ignored.
         """
         if onnx_path is None and self.onnx_path is None:
             self.export()
@@ -256,6 +258,11 @@ def _compile(
             raise FileNotFoundError(f"ONNX file not found at: {onnx_path}")
 
         if enable_qnn:
+            if compiler_options:
+                logger.warning(
+                    f"Extra arguments to QNN compilation are supported only via qnn_config file. Ignoring {compiler_options}"
+                )
+
             self.qpc_path = qnn_compile(
                 onnx_path=onnx_path,
                 qpc_base_path=compile_dir,
 
@@ -1,6 +1,6 @@
 # -----------------------------------------------------------------------------
 #
-# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
 # SPDX-License-Identifier: BSD-3-Clause
 #
 # ----------------------------------------------------------------------------
 
@@ -1,6 +1,6 @@
 # -----------------------------------------------------------------------------
 #
-# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
 # SPDX-License-Identifier: BSD-3-Clause
 #
 # ----------------------------------------------------------------------------
@@ -9,6 +9,8 @@
 
 from torch import nn
 
+from QEfficient.utils.logging_utils import logger
+
 
 class PytorchTransform:
     """
@@ -110,3 +112,65 @@ def apply(cls, model: nn.Module) -> Tuple[nn.Module, bool]:
                     transformed = True
 
         return model, transformed
+
+
+class SplitGateUpWeightsTransform(PytorchTransform):
+    """
+    split fused Gate+Up weights and copy into the model
+
+    For every transformer layer inside `model`:
+      • expects   <PREFIX>.experts.gate_up_proj   in the *source* `sd`
+      • copies halves into
+            <PREFIX>.experts.gate_proj     <-- Gate   [E,H,I]
+            <PREFIX>.experts.up_proj       <-- Up     [E,H,I]
+    """
+
+    @classmethod
+    def apply(cls, model: nn.Module) -> Tuple[nn.Module, bool]:
+        transformed = False
+        model_class = model.__class__.__name__ if hasattr(model, "model") else model.__class__.__name__
+
+        if model_class not in VLM_SPLIT_GATE_UP_WEIGHTS:
+            return model, transformed
+
+        model_tmp = model.language_model if hasattr(model, "language_model") else model
+
+        num_layers = len(model_tmp.model.layers)
+        delete_fused_key = True
+        sd = model_tmp.state_dict()
+        for layer_idx in range(num_layers):
+            # ---- build the textual prefix once per layer ----------
+            prefix = f"model.layers.{layer_idx}.feed_forward.experts."
+
+            fused_key = prefix + "gate_up_proj"
+            gate_key = prefix + "gate_proj"
+            up_key = prefix + "up_proj"
+
+            # ---- split  [E,H,2I] → two  [E,H,I]  tensors ----------------------
+            fused = sd[fused_key]  # [E, H, 2I]  (no .weight here)
+            E, H, two_I = fused.shape
+            ffn_dim = two_I // 2
+            gate, up = fused.split(ffn_dim, dim=-1)  # views – no copy
+
+            experts = model_tmp.model.layers[layer_idx].feed_forward.experts
+            experts.gate_proj.data.copy_(gate)
+            experts.up_proj.data.copy_(up)
+
+            # ---- update the state-dict so load_state_dict sees the right keys
+            sd[gate_key] = gate
+            sd[up_key] = up
+
+            if delete_fused_key:
+                del sd[fused_key]
+
+            logger.info(f"[layer {layer_idx:02d}] loaded gate_proj & up_proj from fused tensor  (shape {fused.shape})")
+            transformed = True
+
+        if hasattr(model, "language_model"):
+            model.language_model = model_tmp
+        else:
+            model = model_tmp
+        return model, transformed
+
+
+VLM_SPLIT_GATE_UP_WEIGHTS = {"QEffLlama4ForConditionalGeneration"}
@@ -1,6 +1,6 @@
 # -----------------------------------------------------------------------------
 #
-# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
 # SPDX-License-Identifier: BSD-3-Clause
 #
 # -----------------------------------------------------------------------------
 
@@ -1,6 +1,6 @@
 # -----------------------------------------------------------------------------
 #
-# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
 # SPDX-License-Identifier: BSD-3-Clause
 #
 # -----------------------------------------------------------------------------
@@ -85,17 +85,29 @@
     parser.add_argument(
         "--enable_qnn",
         "--enable-qnn",
-        action="store_true",
+        nargs="?",
+        const=True,
+        type=str,
         default=False,
         help="Enables QNN. Optionally, a configuration file can be provided with [--enable_qnn CONFIG_FILE].\
              If not provided, the default configuration will be used.\
              Sample Config: QEfficient/compile/qnn_config.json",
     )
-    parser.add_argument(
-        "qnn_config",
-        nargs="?",
-        type=str,
-    )
-    # FIXME(ochougul): Allow extra compilation arguments
-    args = parser.parse_args()
-    QEfficient.compile(**vars(args))
+
+    args, compiler_options = parser.parse_known_args()
+
+    if isinstance(args.enable_qnn, str):
+        args.qnn_config = args.enable_qnn
+        args.enable_qnn = True
+
+    compiler_options_dict = {}
+    for i in range(0, len(compiler_options)):
+        if compiler_options[i].startswith("--"):
+            key = compiler_options[i].lstrip("-").replace("-", "_")
+            value = (
+                compiler_options[i + 1]
+                if i + 1 < len(compiler_options) and not compiler_options[i + 1].startswith("-")
+                else True
+            )
+            compiler_options_dict[key] = value
+    QEfficient.compile(**args.__dict__, **compiler_options_dict)
@@ -1,6 +1,6 @@
 # -----------------------------------------------------------------------------
 #
-# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
 # SPDX-License-Identifier: BSD-3-Clause
 #
 # -----------------------------------------------------------------------------
 
@@ -1,6 +1,6 @@
 # -----------------------------------------------------------------------------
 #
-# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
 # SPDX-License-Identifier: BSD-3-Clause
 #
 # -----------------------------------------------------------------------------
Original file line number	Diff line number	Diff line change
`@@ -1,12 +1,12 @@`
`1`	`1`	`# -----------------------------------------------------------------------------`
`2`	`2`	`#`
`3`		`-# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.`
	`3`	`+# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.`
`4`	`4`	`# SPDX-License-Identifier: BSD-3-Clause`
`5`	`5`	`#`
`6`	`6`	`# -----------------------------------------------------------------------------`
`7`	`7`
`8`	`8`	`# Default owners`
`9`	`9`	`# review when someone opens a pull request and assign appropriate reviewer`
`10`		`-* @quic-rishinr @ochougul @quic-hemagnih`
	`10`	`+* @quic-rishinr @ochougul @quic-hemagnih @quic-amitraj`
`11`	`11`	`pyproject.toml @carlstreeter-quic`
`12`	`12`
Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`# -----------------------------------------------------------------------------`
`2`	`2`	`#`
`3`		`-# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.`
	`3`	`+# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.`
`4`	`4`	`# SPDX-License-Identifier: BSD-3-Clause`
`5`	`5`	`#`
`6`	`6`	`# -----------------------------------------------------------------------------`