Cleaning done 3

qcdipankar · qcdipankar · commit f5ddf693aadc · 2025-06-12T12:31:53.000Z
Signed-off-by: Dipankar Sarkar &lt;dipankar@qti.qualcomm.com&gt;
diff --git a/QEfficient/utils/_utils.py b/QEfficient/utils/_utils.py
@@ -521,27 +521,57 @@ def __repr__(self):
 def dump_qconfig(func):
     def wrapper(self, *args, **kwargs):
         result = func(self, *args, **kwargs)
-        create_and_dump_qconfigs(
-            self.qpc_path,
-            self.onnx_path,
-            self.get_model_config,
-            [cls.__name__ for cls in self._pytorch_transforms],
-            [cls.__name__ for cls in self._onnx_transforms],
-            kwargs.get("specializations"),
-            kwargs.get("mdp_ts_num_devices", 1),
-            kwargs.get("num_speculative_tokens"),
-            **{
-                k: v
-                for k, v in kwargs.items()
-                if k
-                not in ["specializations", "mdp_ts_num_devices", "num_speculative_tokens", "custom_io", "onnx_path"]
-            },
-        )
+        try:
+            create_and_dump_qconfigs(
+                self.qpc_path,
+                self.onnx_path,
+                self.get_model_config,
+                [cls.__name__ for cls in self._pytorch_transforms],
+                [cls.__name__ for cls in self._onnx_transforms],
+                kwargs.get("specializations"),
+                kwargs.get("mdp_ts_num_devices", 1),
+                kwargs.get("num_speculative_tokens"),
+                **{
+                    k: v
+                    for k, v in kwargs.items()
+                    if k
+                    not in ["specializations", "mdp_ts_num_devices", "num_speculative_tokens", "custom_io", "onnx_path"]
+                },
+            )
+        except Exception as e:
+            print(f"An unexpected error occurred while dumping the qconfig: {e}")
         return result
 
     return wrapper
 
 
+def get_qaic_sdk_version(qaic_sdk_xml_path: str) -> Optional[str]:
+    """
+    Extracts the QAIC SDK version from the given SDK XML file.
+
+    Args:
+        qaic_sdk_xml_path (str): Path to the SDK XML file.
+    Returns:
+        The SDK version as a string if found, otherwise None.
+    """
+    qaic_sdk_version = None
+
+    # Check and extract version from the given SDK XML file
+    if os.path.exists(qaic_sdk_xml_path):
+        try:
+            tree = ET.parse(qaic_sdk_xml_path)
+            root = tree.getroot()
+            base_version_element = root.find(".//base_version")
+            if base_version_element is not None:
+                qaic_sdk_version = base_version_element.text
+        except ET.ParseError as e:
+            print(f"Error parsing XML file {qaic_sdk_xml_path}: {e}")
+        except Exception as e:
+            print(f"An unexpected error occurred while processing {qaic_sdk_xml_path}: {e}")
+
+    return qaic_sdk_version
+
+
 def create_and_dump_qconfigs(
     qpc_path,
     onnx_path,
@@ -558,29 +588,12 @@ def create_and_dump_qconfigs(
     Such as huggingface configs, QEff transforms, QAIC sdk version, QNN sdk, compilation dir, qpc dir and
     many other compilation options.
     """
-    qnn_config = compiler_options["qnn_config"] if "qnn_config" in compiler_options else None
-    enable_qnn = True if "qnn_config" in compiler_options else None
-
+    enable_qnn = compiler_options.get("enable_qnn", False)
+    qnn_config_path = compiler_options.get("qnn_config", None)
     qconfig_file_path = os.path.join(os.path.dirname(qpc_path), "qconfig.json")
     onnx_path = str(onnx_path)
     specializations_file_path = str(os.path.join(os.path.dirname(qpc_path), "specializations.json"))
     compile_dir = str(os.path.dirname(qpc_path))
-    qnn_config_path = (
-        (qnn_config if qnn_config is not None else "QEfficient/compile/qnn_config.json") if enable_qnn else None
-    )
-
-    # Extract QAIC SDK Apps Version from SDK XML file
-    tree = ET.parse(Constants.SDK_APPS_XML)
-    root = tree.getroot()
-    qaic_version = root.find(".//base_version").text
-
-    # Extract QNN SDK details from YAML file if the environment variable is set
-    qnn_sdk_details = None
-    qnn_sdk_path = os.getenv(QnnConstants.QNN_SDK_PATH_ENV_VAR_NAME)
-    if enable_qnn and qnn_sdk_path:
-        qnn_sdk_yaml_path = os.path.join(qnn_sdk_path, QnnConstants.QNN_SDK_YAML)
-        with open(qnn_sdk_yaml_path, "r") as file:
-            qnn_sdk_details = yaml.safe_load(file)
 
     # Ensure all objects in the configs dictionary are JSON serializable
     def make_serializable(obj):
@@ -602,29 +615,38 @@ def make_serializable(obj):
                 "onnx_transforms": make_serializable(onnx_transforms),
                 "onnx_path": onnx_path,
             },
+            "compiler_config": {
+                "enable_qnn": enable_qnn,
+                "compile_dir": compile_dir,
+                "specializations_file_path": specializations_file_path,
+                "specializations": make_serializable(specializations),
+                "mdp_ts_num_devices": mdp_ts_num_devices,
+                "num_speculative_tokens": num_speculative_tokens,
+                **compiler_options,
+            },
+            "aic_sdk_config": {
+                "qaic_apps_version": get_qaic_sdk_version(Constants.SDK_APPS_XML),
+                "qaic_platform_version": get_qaic_sdk_version(Constants.SDK_PLATFORM_XML),
+            },
         },
     }
 
-    aic_compiler_config = {
-        "apps_sdk_version": qaic_version,
-        "compile_dir": compile_dir,
-        "specializations_file_path": specializations_file_path,
-        "specializations": make_serializable(specializations),
-        "mdp_ts_num_devices": mdp_ts_num_devices,
-        "num_speculative_tokens": num_speculative_tokens,
-        **compiler_options,
-    }
-    qnn_config = {
-        "enable_qnn": enable_qnn,
-        "qnn_config_path": qnn_config_path,
-    }
-    # Put AIC or qnn details.
     if enable_qnn:
+        qnn_sdk_path = os.getenv(QnnConstants.QNN_SDK_PATH_ENV_VAR_NAME)
+        if not qnn_sdk_path:
+            raise EnvironmentError(
+                f"QNN_SDK_PATH {qnn_sdk_path} is not set. Please set {QnnConstants.QNN_SDK_PATH_ENV_VAR_NAME}"
+            )
+        qnn_sdk_yaml_path = os.path.join(qnn_sdk_path, QnnConstants.QNN_SDK_YAML)
+        qnn_sdk_details = load_yaml(
+            qnn_sdk_yaml_path
+        )  # Extract QNN SDK details from YAML file if the environment variable is set
+        qnn_config = {
+            "qnn_config_path": qnn_config_path,
+        }
         qconfigs["qpc_config"]["qnn_config"] = qnn_config
         if qnn_sdk_details:
             qconfigs["qpc_config"]["qnn_config"].update(qnn_sdk_details)
-    else:
-        qconfigs["qpc_config"]["aic_compiler_config"] = aic_compiler_config
 
     create_json(qconfig_file_path, qconfigs)
 
diff --git a/QEfficient/utils/constants.py b/QEfficient/utils/constants.py
@@ -105,15 +105,13 @@ class Constants:
     MAX_QPC_LIMIT = 30
     MAX_RETRIES = 10  # This constant will be used set the maximum number of retry attempts for downloading a model using huggingface_hub snapshot_download
     NUM_SPECULATIVE_TOKENS = 2
-
     MAX_TOP_K_IDS = ONNX_EXPORT_EXAMPLE_MAX_TOP_K_IDS
     SDK_APPS_XML = "/opt/qti-aic/versions/apps.xml"  # This xml file is parsed to find out the SDK apps version.
     SDK_PLATFORM_XML = (
         "/opt/qti-aic/versions/platform.xml"  # This xml file is parsed to find out the SDK platform version.
     )
 
 
-
 @dataclass
 class QnnConstants:
     # QNN PATH to be read from environment variable.
diff --git a/scripts/Jenkinsfile b/scripts/Jenkinsfile
@@ -170,4 +170,4 @@ pipeline {
            deleteDir()
        }
    }
-}
+}
diff --git a/scripts/finetune/run_ft_model.py b/scripts/finetune/run_ft_model.py
@@ -12,7 +12,7 @@
 from peft import AutoPeftModelForCausalLM
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
-from QEfficient.finetune.configs.training import train_config as TRAIN_CONFIG
+from QEfficient.finetune.configs.training import TrainConfig
 
 # Suppress all warnings
 warnings.filterwarnings("ignore")
@@ -25,7 +25,7 @@
     print(f"Warning: {e}. Moving ahead without these qaic modules.")
     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 
-train_config = TRAIN_CONFIG()
+train_config = TrainConfig()
 model = AutoModelForCausalLM.from_pretrained(
     train_config.model_name,
     use_cache=False,
diff --git a/tests/finetune/test_finetune.py b/tests/finetune/test_finetune.py
@@ -8,6 +8,7 @@
 import os
 import shutil
 
+import numpy as np
 import pytest
 import torch.optim as optim
 from torch.utils.data import DataLoader
@@ -22,12 +23,25 @@ def clean_up(path):
         shutil.rmtree(path)
 
 
-configs = [pytest.param("meta-llama/Llama-3.2-1B", 1, 1, 1, None, True, True, "cpu", id="llama_config")]
+configs = [
+    pytest.param(
+        "meta-llama/Llama-3.2-1B",  # model_name
+        10,  # max_eval_step
+        20,  # max_train_step
+        1,  # intermediate_step_save
+        None,  # context_length
+        True,  # run_validation
+        True,  # use_peft
+        "qaic",  # device
+        id="llama_config",  # config name
+    )
+]
 
 
-# TODO:enable this once docker is available
+@pytest.mark.skip(reason="Currently CI is broken. Once it is fixed we will enable this test.")
+@pytest.mark.cli
 @pytest.mark.on_qaic
-@pytest.mark.skip(reason="eager docker not available in sdk")
+@pytest.mark.finetune
 @pytest.mark.parametrize(
     "model_name,max_eval_step,max_train_step,intermediate_step_save,context_length,run_validation,use_peft,device",
     configs,
@@ -43,7 +57,7 @@ def test_finetune(
     device,
     mocker,
 ):
-    train_config_spy = mocker.spy(QEfficient.cloud.finetune, "TRAIN_CONFIG")
+    train_config_spy = mocker.spy(QEfficient.cloud.finetune, "TrainConfig")
     generate_dataset_config_spy = mocker.spy(QEfficient.cloud.finetune, "generate_dataset_config")
     generate_peft_config_spy = mocker.spy(QEfficient.cloud.finetune, "generate_peft_config")
     get_dataloader_kwargs_spy = mocker.spy(QEfficient.cloud.finetune, "get_dataloader_kwargs")
@@ -65,23 +79,28 @@ def test_finetune(
         "device": device,
     }
 
-    finetune(**kwargs)
+    results = finetune(**kwargs)
+    assert np.allclose(results["avg_train_loss"], 0.00232327, atol=1e-5), "Train loss is not matching."
+    assert np.allclose(results["avg_train_metric"], 1.002326, atol=1e-5), "Train metric is not matching."
+    assert np.allclose(results["avg_eval_loss"], 0.0206124, atol=1e-5), "Eval loss is not matching."
+    assert np.allclose(results["avg_eval_metric"], 1.020826, atol=1e-5), "Eval metric is not matching."
+    assert results["avg_epoch_time"] < 60, "Training should complete within 60 seconds."
 
     train_config_spy.assert_called_once()
     generate_dataset_config_spy.assert_called_once()
     generate_peft_config_spy.assert_called_once()
-    update_config_spy.assert_called_once()
     get_custom_data_collator_spy.assert_called_once()
     get_longest_seq_length_spy.assert_called_once()
     print_model_size_spy.assert_called_once()
     train_spy.assert_called_once()
 
+    assert update_config_spy.call_count == 2
     assert get_dataloader_kwargs_spy.call_count == 2
     assert get_preprocessed_dataset_spy.call_count == 2
 
     args, kwargs = train_spy.call_args
-    train_dataloader = args[1]
-    eval_dataloader = args[2]
+    train_dataloader = args[2]
+    eval_dataloader = args[3]
     optimizer = args[4]
 
     batch = next(iter(train_dataloader))
@@ -97,12 +116,19 @@ def test_finetune(
     else:
         assert eval_dataloader is None
 
-    args, kwargs = update_config_spy.call_args
+    args, kwargs = update_config_spy.call_args_list[0]
     train_config = args[0]
+    assert max_train_step >= train_config.gradient_accumulation_steps, (
+        "Total training step should be more than "
+        f"{train_config.gradient_accumulation_steps} which is gradient accumulation steps."
+    )
 
-    saved_file = os.path.join(train_config.output_dir, "adapter_model.safetensors")
+    saved_file = os.path.join(train_config.output_dir, "complete_epoch_1/adapter_model.safetensors")
     assert os.path.isfile(saved_file)
 
     clean_up(train_config.output_dir)
     clean_up("runs")
     clean_up(train_config.dump_root_dir)
+
+
+# TODO (Meet): Add seperate tests for BERT FT and LLama FT
diff --git a/tests/transformers/spd/test_pld_inference.py b/tests/transformers/spd/test_pld_inference.py
@@ -262,7 +262,7 @@ def test_pld_spec_decode_inference(
         num_speculative_tokens=num_speculative_tokens,
     )
     # init qaic session
-    target_model_session = QAICInferenceSession(target_model_qpc_path, device_ids=device_group)
+    target_model_session = QAICInferenceSession(target_model_qpc_path)
     draft_model_session = None
 
     # skip inputs/outputs buffers
@@ -453,7 +453,7 @@ def test_pld_spec_decode_inference(
     del draft_model_session
     generated_ids = np.asarray(generated_ids[0]).flatten()
     gen_len = generated_ids.shape[0]
-    exec_info = target_model.generate(tokenizer, Constants.INPUT_STR, device_group)
+    exec_info = target_model.generate(tokenizer, Constants.INPUT_STR)
     cloud_ai_100_tokens = exec_info.generated_ids[0][
         :gen_len
     ]  # Because we always run for single input and single batch size
diff --git a/tests/transformers/spd/test_spd_inference.py b/tests/transformers/spd/test_spd_inference.py
@@ -157,8 +157,8 @@ def test_spec_decode_inference(
         full_batch_size=full_batch_size,
     )
     # init qaic session
-    target_model_session = QAICInferenceSession(target_model_qpc_path, device_ids=device_group)
-    draft_model_session = QAICInferenceSession(draft_model_qpc_path, device_ids=device_group)
+    target_model_session = QAICInferenceSession(target_model_qpc_path)
+    draft_model_session = QAICInferenceSession(draft_model_qpc_path)
 
     # skip inputs/outputs buffers
     target_model_session.skip_buffers(set([x for x in target_model_session.input_names if x.startswith("past_")]))
@@ -341,7 +341,7 @@ def test_spec_decode_inference(
     del draft_model_session
     generated_ids = np.asarray(generated_ids[0]).flatten()
     gen_len = generated_ids.shape[0]
-    exec_info = draft_model.generate(tokenizer, Constants.INPUT_STR, device_group)
+    exec_info = draft_model.generate(tokenizer, Constants.INPUT_STR)
     cloud_ai_100_tokens = exec_info.generated_ids[0][
         :gen_len
     ]  # Because we always run for single input and single batch size

Original file line number	Diff line number	Diff line change
`@@ -170,4 +170,4 @@ pipeline {`
`170`	`170`	`deleteDir()`
`171`	`171`	`}`
`172`	`172`	`}`
`173`		`-}`
	`173`	`+}`