Skip to content

Commit 6262009

Browse files
authored
Merge branch 'main' into custom_dataset
Signed-off-by: Swati Allabadi <[email protected]>
2 parents 155bb77 + 1e8039b commit 6262009

File tree

203 files changed

+4936
-797
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

203 files changed

+4936
-797
lines changed

.github/CODEOWNERS

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
# -----------------------------------------------------------------------------
22
#
3-
# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
3+
# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
44
# SPDX-License-Identifier: BSD-3-Clause
55
#
66
# -----------------------------------------------------------------------------
77

88
# Default owners
99
# review when someone opens a pull request and assign appropriate reviewer
10-
* @quic-rishinr @ochougul @quic-hemagnih
10+
* @quic-rishinr @ochougul @quic-hemagnih @quic-amitraj
1111
pyproject.toml @carlstreeter-quic
1212

LICENSE

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved.
1+
Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
22

33
Redistribution and use in source and binary forms, with or without
44
modification, are permitted provided that the following conditions are met:
@@ -11,7 +11,7 @@ modification, are permitted provided that the following conditions are met:
1111
disclaimer in the documentation and/or other materials provided
1212
with the distribution.
1313

14-
* Neither the name of Qualcomm Innovation Center, Inc. nor the names of its
14+
* Neither the name of Qualcomm Technologies, Inc. nor the names of its
1515
contributors may be used to endorse or promote products derived
1616
from this software without specific prior written permission.
1717

QEfficient/__init__.py

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,25 @@
11
# -----------------------------------------------------------------------------
22
#
3-
# Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
3+
# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
44
# SPDX-License-Identifier: BSD-3-Clause
55
#
66
# -----------------------------------------------------------------------------
77

88
import os
9+
import warnings
10+
11+
from QEfficient.utils import custom_format_warning
912

1013
# For faster downloads via hf_transfer
1114
# This code is put above import statements as this needs to be executed before
1215
# hf_transfer is imported (will happen on line 15 via leading imports)
1316
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
14-
15-
from transformers import AutoConfig
16-
17-
from QEfficient.transformers.modeling_utils import MODEL_TYPE_TO_CONFIG_CLS_AND_ARCH_CLS
17+
# Placeholder for all non-transformer models registered in QEfficient
18+
import QEfficient.utils.model_registery # noqa: F401
1819
from QEfficient.utils.logging_utils import logger
1920

20-
# loop over all the model types which are not present in transformers and register them
21-
for model_type, model_cls in MODEL_TYPE_TO_CONFIG_CLS_AND_ARCH_CLS.items():
22-
# Register the model config class based on the model type. This will be first element in the tuple
23-
AutoConfig.register(model_type, model_cls[0])
24-
25-
# Register the non transformer library Class and config class using AutoModelClass
26-
model_cls[2].register(model_cls[0], model_cls[1])
21+
# custom warning for the better logging experience
22+
warnings.formatwarning = custom_format_warning
2723

2824

2925
def check_qaic_sdk():

QEfficient/base/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# -----------------------------------------------------------------------------
22
#
3-
# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
3+
# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
44
# SPDX-License-Identifier: BSD-3-Clause
55
#
66
# -----------------------------------------------------------------------------

QEfficient/base/common.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# -----------------------------------------------------------------------------
22
#
3-
# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
3+
# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
44
# SPDX-License-Identifier: BSD-3-Clause
55
#
66
# -----------------------------------------------------------------------------
@@ -41,15 +41,14 @@ def from_pretrained(cls, pretrained_model_name_or_path: str, *args, **kwargs) ->
4141
Downloads HuggingFace model if already doesn't exist locally, returns QEFFAutoModel object based on type of model.
4242
"""
4343
config = AutoConfig.from_pretrained(pretrained_model_name_or_path)
44-
architecture = config.architectures[0] if config.architectures else None
4544

46-
class_name = MODEL_CLASS_MAPPING.get(architecture)
45+
class_name = MODEL_CLASS_MAPPING.get(config.__class__.__name__, None)
4746
if class_name:
4847
module = __import__("QEfficient.transformers.models.modeling_auto")
4948
model_class = getattr(module, class_name)
5049
else:
5150
raise NotImplementedError(
52-
f"Unknown architecture={architecture}, either use specific auto model class for loading the model or raise an issue for support!"
51+
f"Unknown architecture={config.__class__.__name__}, either use specific auto model class for loading the model or raise an issue for support!"
5352
)
5453

5554
local_model_dir = kwargs.pop("local_model_dir", None)

QEfficient/base/modeling_qeff.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# -----------------------------------------------------------------------------
22
#
3-
# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
3+
# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
44
# SPDX-License-Identifier: BSD-3-Clause
55
#
66
# ----------------------------------------------------------------------------
@@ -241,10 +241,12 @@ def _compile(
241241
:mdp_ts_num_devices (int): Number of devices to partition to use Multi-Device Partitioning with tensor-slicing.
242242
:num_speculative_tokens (int, optional): Number of speculative tokens to take as input for Speculative Decoding Target Language Model.
243243
:enable_qnn (bool): Enables QNN Compilation. ``Defaults to False.``
244-
:qnn_config (str): Path of QNN Config parameters file. ``Defaults to None.``
245-
:compiler_options: Pass any compiler option as input. Any flag that is supported by `qaic-exec` can be passed. Params are converted to flags as below:
244+
:qnn_config (str): Path of QNN Config parameters file. Any extra parameters for QNN compilation can be passed via this file. ``Defaults to None.``
245+
:compiler_options: Pass any compiler option as input.
246+
Any flag that is supported by `qaic-exec` can be passed. Params are converted to flags as below:
246247
- aic_num_cores=16 -> -aic-num-cores=16
247248
- convert_to_fp16=True -> -convert-to-fp16
249+
For QNN Compilation path, when enable_qnn is set to True, any parameter passed in compiler_options will be ignored.
248250
"""
249251
if onnx_path is None and self.onnx_path is None:
250252
self.export()
@@ -256,6 +258,11 @@ def _compile(
256258
raise FileNotFoundError(f"ONNX file not found at: {onnx_path}")
257259

258260
if enable_qnn:
261+
if compiler_options:
262+
logger.warning(
263+
f"Extra arguments to QNN compilation are supported only via qnn_config file. Ignoring {compiler_options}"
264+
)
265+
259266
self.qpc_path = qnn_compile(
260267
onnx_path=onnx_path,
261268
qpc_base_path=compile_dir,

QEfficient/base/onnx_transforms.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# -----------------------------------------------------------------------------
22
#
3-
# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
3+
# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
44
# SPDX-License-Identifier: BSD-3-Clause
55
#
66
# ----------------------------------------------------------------------------

QEfficient/base/pytorch_transforms.py

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# -----------------------------------------------------------------------------
22
#
3-
# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
3+
# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
44
# SPDX-License-Identifier: BSD-3-Clause
55
#
66
# ----------------------------------------------------------------------------
@@ -9,6 +9,8 @@
99

1010
from torch import nn
1111

12+
from QEfficient.utils.logging_utils import logger
13+
1214

1315
class PytorchTransform:
1416
"""
@@ -110,3 +112,65 @@ def apply(cls, model: nn.Module) -> Tuple[nn.Module, bool]:
110112
transformed = True
111113

112114
return model, transformed
115+
116+
117+
class SplitGateUpWeightsTransform(PytorchTransform):
118+
"""
119+
split fused Gate+Up weights and copy into the model
120+
121+
For every transformer layer inside `model`:
122+
• expects <PREFIX>.experts.gate_up_proj in the *source* `sd`
123+
• copies halves into
124+
<PREFIX>.experts.gate_proj <-- Gate [E,H,I]
125+
<PREFIX>.experts.up_proj <-- Up [E,H,I]
126+
"""
127+
128+
@classmethod
129+
def apply(cls, model: nn.Module) -> Tuple[nn.Module, bool]:
130+
transformed = False
131+
model_class = model.__class__.__name__ if hasattr(model, "model") else model.__class__.__name__
132+
133+
if model_class not in VLM_SPLIT_GATE_UP_WEIGHTS:
134+
return model, transformed
135+
136+
model_tmp = model.language_model if hasattr(model, "language_model") else model
137+
138+
num_layers = len(model_tmp.model.layers)
139+
delete_fused_key = True
140+
sd = model_tmp.state_dict()
141+
for layer_idx in range(num_layers):
142+
# ---- build the textual prefix once per layer ----------
143+
prefix = f"model.layers.{layer_idx}.feed_forward.experts."
144+
145+
fused_key = prefix + "gate_up_proj"
146+
gate_key = prefix + "gate_proj"
147+
up_key = prefix + "up_proj"
148+
149+
# ---- split [E,H,2I] → two [E,H,I] tensors ----------------------
150+
fused = sd[fused_key] # [E, H, 2I] (no .weight here)
151+
E, H, two_I = fused.shape
152+
ffn_dim = two_I // 2
153+
gate, up = fused.split(ffn_dim, dim=-1) # views – no copy
154+
155+
experts = model_tmp.model.layers[layer_idx].feed_forward.experts
156+
experts.gate_proj.data.copy_(gate)
157+
experts.up_proj.data.copy_(up)
158+
159+
# ---- update the state-dict so load_state_dict sees the right keys
160+
sd[gate_key] = gate
161+
sd[up_key] = up
162+
163+
if delete_fused_key:
164+
del sd[fused_key]
165+
166+
logger.info(f"[layer {layer_idx:02d}] loaded gate_proj & up_proj from fused tensor (shape {fused.shape})")
167+
transformed = True
168+
169+
if hasattr(model, "language_model"):
170+
model.language_model = model_tmp
171+
else:
172+
model = model_tmp
173+
return model, transformed
174+
175+
176+
VLM_SPLIT_GATE_UP_WEIGHTS = {"QEffLlama4ForConditionalGeneration"}

QEfficient/cloud/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# -----------------------------------------------------------------------------
22
#
3-
# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
3+
# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
44
# SPDX-License-Identifier: BSD-3-Clause
55
#
66
# -----------------------------------------------------------------------------

QEfficient/cloud/compile.py

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# -----------------------------------------------------------------------------
22
#
3-
# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
3+
# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
44
# SPDX-License-Identifier: BSD-3-Clause
55
#
66
# -----------------------------------------------------------------------------
@@ -85,17 +85,29 @@
8585
parser.add_argument(
8686
"--enable_qnn",
8787
"--enable-qnn",
88-
action="store_true",
88+
nargs="?",
89+
const=True,
90+
type=str,
8991
default=False,
9092
help="Enables QNN. Optionally, a configuration file can be provided with [--enable_qnn CONFIG_FILE].\
9193
If not provided, the default configuration will be used.\
9294
Sample Config: QEfficient/compile/qnn_config.json",
9395
)
94-
parser.add_argument(
95-
"qnn_config",
96-
nargs="?",
97-
type=str,
98-
)
99-
# FIXME(ochougul): Allow extra compilation arguments
100-
args = parser.parse_args()
101-
QEfficient.compile(**vars(args))
96+
97+
args, compiler_options = parser.parse_known_args()
98+
99+
if isinstance(args.enable_qnn, str):
100+
args.qnn_config = args.enable_qnn
101+
args.enable_qnn = True
102+
103+
compiler_options_dict = {}
104+
for i in range(0, len(compiler_options)):
105+
if compiler_options[i].startswith("--"):
106+
key = compiler_options[i].lstrip("-").replace("-", "_")
107+
value = (
108+
compiler_options[i + 1]
109+
if i + 1 < len(compiler_options) and not compiler_options[i + 1].startswith("-")
110+
else True
111+
)
112+
compiler_options_dict[key] = value
113+
QEfficient.compile(**args.__dict__, **compiler_options_dict)

QEfficient/cloud/execute.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# -----------------------------------------------------------------------------
22
#
3-
# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
3+
# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
44
# SPDX-License-Identifier: BSD-3-Clause
55
#
66
# -----------------------------------------------------------------------------

QEfficient/cloud/export.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# -----------------------------------------------------------------------------
22
#
3-
# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
3+
# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
44
# SPDX-License-Identifier: BSD-3-Clause
55
#
66
# -----------------------------------------------------------------------------

0 commit comments

Comments
 (0)