From 83cc2405e86d890de98e4173083ab75a31327552 Mon Sep 17 00:00:00 2001 From: Shubham Agrawal Date: Thu, 29 May 2025 16:31:56 +0530 Subject: [PATCH] Updated Python APIs Compile doc string for QNN Compilation path. Signed-off-by: Shubham Agrawal --- QEfficient/base/modeling_qeff.py | 11 +++++-- .../transformers/models/modeling_auto.py | 29 ++++++++++++------- 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/QEfficient/base/modeling_qeff.py b/QEfficient/base/modeling_qeff.py index 7d92bfd2f..cf53a8c70 100644 --- a/QEfficient/base/modeling_qeff.py +++ b/QEfficient/base/modeling_qeff.py @@ -241,10 +241,12 @@ def _compile( :mdp_ts_num_devices (int): Number of devices to partition to use Multi-Device Partitioning with tensor-slicing. :num_speculative_tokens (int, optional): Number of speculative tokens to take as input for Speculative Decoding Target Language Model. :enable_qnn (bool): Enables QNN Compilation. ``Defaults to False.`` - :qnn_config (str): Path of QNN Config parameters file. ``Defaults to None.`` - :compiler_options: Pass any compiler option as input. Any flag that is supported by `qaic-exec` can be passed. Params are converted to flags as below: + :qnn_config (str): Path of QNN Config parameters file. Any extra parameters for QNN compilation can be passed via this file. ``Defaults to None.`` + :compiler_options: Pass any compiler option as input. + Any flag that is supported by `qaic-exec` can be passed. Params are converted to flags as below: - aic_num_cores=16 -> -aic-num-cores=16 - convert_to_fp16=True -> -convert-to-fp16 + For QNN Compilation path, when enable_qnn is set to True, any parameter passed in compiler_options will be ignored. """ if onnx_path is None and self.onnx_path is None: self.export() @@ -256,6 +258,11 @@ def _compile( raise FileNotFoundError(f"ONNX file not found at: {onnx_path}") if enable_qnn: + if compiler_options: + logger.warning( + f"Extra arguments to QNN compilation are supported only via qnn_config file. Ignoring {compiler_options}" + ) + self.qpc_path = qnn_compile( onnx_path=onnx_path, qpc_base_path=compile_dir, diff --git a/QEfficient/transformers/models/modeling_auto.py b/QEfficient/transformers/models/modeling_auto.py index f4245d271..4d1531fd5 100644 --- a/QEfficient/transformers/models/modeling_auto.py +++ b/QEfficient/transformers/models/modeling_auto.py @@ -291,8 +291,13 @@ def compile( :num_devices (int): Number of devices the model needs to be compiled for. Defaults to 1. :num_cores (int): Number of cores used to compile the model. :mxfp6_matmul (bool, optional): Whether to use ``mxfp6`` compression for weights. ``Defaults to False``. - :aic_enable_depth_first (bool, optional): Enables DFS with default memory size. ``Defaults to False``. - :allow_mxint8_mdp_io (bool, optional): Allows MXINT8 compression of MDP IO traffic. ``Defaults to False.`` + :compiler_options (dict, optional): Additional compiler options. + For QAIC Compiler: Extra arguments for qaic-exec can be passed. + :aic_enable_depth_first (bool, optional): Enables DFS with default memory size. ``Defaults to False``. + :allow_mxint8_mdp_io (bool, optional): Allows MXINT8 compression of MDP IO traffic. ``Defaults to False.`` + For QNN Compiler: Following arguments can be passed. + :enable_qnn (bool): Enables QNN Compilation. + :qnn_config (str): Path of QNN Config parameters file. Any extra parameters for QNN compilation can be passed via this file. Returns: :str: Path of the compiled ``qpc`` package. """ @@ -1571,16 +1576,18 @@ def compile( :mxfp6_matmul (bool, optional): Whether to use ``mxfp6`` compression for weights. ``Defaults to False``. :mxint8_kv_cache (bool, optional): Whether to use ``mxint8`` compression for KV cache. ``Defaults to False``. :num_speculative_tokens (int, optional): Number of speculative tokens to take as input for Speculative Decoding Target Language Model. - :mos (int, optional): Effort level to reduce on-chip memory. Defaults to -1, meaning no effort. ``Defaults to -1``. - :aic_enable_depth_first (bool, optional): Enables DFS with default memory size. ``Defaults to False``. :prefill_only (bool): if ``True`` compile for prefill only and if ``False`` compile for decode only. Defaults to None, which compiles for both ``prefill and ``decode``. - :compiler_options (dict, optional): Pass any compiler option as input. ``Defaults to None``. - Following flag can be passed in compiler_options to enable QNN Compilation path. - :enable_qnn (bool): Enables QNN Compilation. ``Defaults to False. if not passed.`` - :qnn_config (str): Path of QNN Config parameters file. ``Defaults to None. if not passed`` - for QAIC compilation path, any flag that is supported by ``qaic-exec`` can be passed. Params are converted to flags as below: - - aic_num_cores=16 -> -aic-num-cores=16 - - convert_to_fp16=True -> -convert-to-fp16 + :compiler_options (dict, optional): Additional compiler options. ``Defaults to None``. + For QAIC Compiler: Extra arguments for qaic-exec can be passed. + :mos (int, optional): Effort level to reduce on-chip memory. Defaults to -1, meaning no effort. ``Defaults to -1``. + :aic_enable_depth_first (bool, optional): Enables DFS with default memory size. ``Defaults to False``. + :allow_mxint8_mdp_io (bool, optional): Allows MXINT8 compression of MDP IO traffic. ``Defaults to False.`` + Params are converted to flags as below: + - aic_num_cores=16 -> -aic-num-cores=16 + - convert_to_fp16=True -> -convert-to-fp16 + For QNN Compiler: Following arguments can be passed. + :enable_qnn (bool): Enables QNN Compilation. + :qnn_config (str): Path of QNN Config parameters file. Any extra parameters for QNN compilation can be passed via this file. Returns: :str: Path of the compiled ``qpc`` package.