Added test

quic-amitraj · quic-amitraj · commit a74b9a710c01 · 2025-06-06T09:46:32.000Z
Signed-off-by: Amit Raj &lt;quic_amitraj@quicinc.com&gt;
diff --git a/QEfficient/transformers/models/grok_1/modeling_grok1.py b/QEfficient/transformers/models/grok_1/modeling_grok1.py
@@ -97,9 +97,6 @@ def forward(
 
         attn_output = self.o_proj(attn_output)
 
-        if not output_attentions:
-            attn_weights = None
-
         return attn_output, attn_weights, past_key_value
 
 
@@ -234,10 +231,8 @@ def forward(
             raise ValueError("You have to specify either input_ids or inputs_embeds")
 
         seq_length_with_past = seq_length
-        past_key_values_length = 0
-        if past_key_values is not None:
-            past_key_values_length = past_key_values[0][0].shape[2]
-            seq_length_with_past = seq_length_with_past + past_key_values_length
+        past_key_values_length = past_key_values[0][0].shape[2]
+        seq_length_with_past = seq_length_with_past + past_key_values_length
 
         past_key_values = QEffDynamicCache.from_legacy_cache(past_key_values)
 
diff --git a/QEfficient/transformers/models/pytorch_transforms.py b/QEfficient/transformers/models/pytorch_transforms.py
@@ -131,7 +131,7 @@
     WhisperPositionalEmbedding,
 )
 
-from QEfficient.base.pytorch_transforms import ModuleMappingTransform, ExternalModuleMapperTransform
+from QEfficient.base.pytorch_transforms import ExternalModuleMapperTransform, ModuleMappingTransform
 from QEfficient.customop import CustomRMSNormAIC, GemmaCustomRMSNormAIC
 from QEfficient.transformers.models.codegen.modeling_codegen import (
     QEffCodeGenAttention,
diff --git a/tests/transformers/models/test_causal_lm_models.py b/tests/transformers/models/test_causal_lm_models.py
@@ -10,6 +10,7 @@
 
 import numpy as np
 import pytest
+import torch
 from transformers import AutoModelForCausalLM
 
 from QEfficient.exporter.export_hf_to_cloud_ai_100 import qualcomm_efficient_converter
@@ -44,6 +45,7 @@
     "neuralmagic/Qwen2-0.5B-Instruct-FP8",  # fp8 quant method, static, with lm head ignored
     "ibm-granite/granite-3.1-2b-instruct",
     "ibm-granite/granite-guardian-3.1-2b",
+    "hpcai-tech/grok-1",
 ]
 
 test_models_qnn = [
@@ -78,8 +80,10 @@ def load_causal_lm_model(model_config):
         num_hidden_layers=model_config["n_layer"],
         attn_implementation="eager",
         low_cpu_mem_usage=False,
+        trust_remote_code=True,
     )  # Run models for single layers only
     params = sum(p.numel() for p in model_hf.parameters())
+    model_hf.to(torch.float32)
     model_hf.eval()
     return model_hf, params
 

Original file line number	Diff line number	Diff line change
`@@ -131,7 +131,7 @@`
`131`	`131`	`WhisperPositionalEmbedding,`
`132`	`132`	`)`
`133`	`133`
`134`		`-from QEfficient.base.pytorch_transforms import ModuleMappingTransform, ExternalModuleMapperTransform`
	`134`	`+from QEfficient.base.pytorch_transforms import ExternalModuleMapperTransform, ModuleMappingTransform`
`135`	`135`	`from QEfficient.customop import CustomRMSNormAIC, GemmaCustomRMSNormAIC`
`136`	`136`	`from QEfficient.transformers.models.codegen.modeling_codegen import (`
`137`	`137`	`QEffCodeGenAttention,`