Skip to content

Commit a74b9a7

Browse files
committed
Added test
Signed-off-by: Amit Raj <[email protected]>
1 parent a06f774 commit a74b9a7

File tree

3 files changed

+7
-8
lines changed

3 files changed

+7
-8
lines changed

QEfficient/transformers/models/grok_1/modeling_grok1.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -97,9 +97,6 @@ def forward(
9797

9898
attn_output = self.o_proj(attn_output)
9999

100-
if not output_attentions:
101-
attn_weights = None
102-
103100
return attn_output, attn_weights, past_key_value
104101

105102

@@ -234,10 +231,8 @@ def forward(
234231
raise ValueError("You have to specify either input_ids or inputs_embeds")
235232

236233
seq_length_with_past = seq_length
237-
past_key_values_length = 0
238-
if past_key_values is not None:
239-
past_key_values_length = past_key_values[0][0].shape[2]
240-
seq_length_with_past = seq_length_with_past + past_key_values_length
234+
past_key_values_length = past_key_values[0][0].shape[2]
235+
seq_length_with_past = seq_length_with_past + past_key_values_length
241236

242237
past_key_values = QEffDynamicCache.from_legacy_cache(past_key_values)
243238

QEfficient/transformers/models/pytorch_transforms.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@
131131
WhisperPositionalEmbedding,
132132
)
133133

134-
from QEfficient.base.pytorch_transforms import ModuleMappingTransform, ExternalModuleMapperTransform
134+
from QEfficient.base.pytorch_transforms import ExternalModuleMapperTransform, ModuleMappingTransform
135135
from QEfficient.customop import CustomRMSNormAIC, GemmaCustomRMSNormAIC
136136
from QEfficient.transformers.models.codegen.modeling_codegen import (
137137
QEffCodeGenAttention,

tests/transformers/models/test_causal_lm_models.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
import numpy as np
1212
import pytest
13+
import torch
1314
from transformers import AutoModelForCausalLM
1415

1516
from QEfficient.exporter.export_hf_to_cloud_ai_100 import qualcomm_efficient_converter
@@ -44,6 +45,7 @@
4445
"neuralmagic/Qwen2-0.5B-Instruct-FP8", # fp8 quant method, static, with lm head ignored
4546
"ibm-granite/granite-3.1-2b-instruct",
4647
"ibm-granite/granite-guardian-3.1-2b",
48+
"hpcai-tech/grok-1",
4749
]
4850

4951
test_models_qnn = [
@@ -78,8 +80,10 @@ def load_causal_lm_model(model_config):
7880
num_hidden_layers=model_config["n_layer"],
7981
attn_implementation="eager",
8082
low_cpu_mem_usage=False,
83+
trust_remote_code=True,
8184
) # Run models for single layers only
8285
params = sum(p.numel() for p in model_hf.parameters())
86+
model_hf.to(torch.float32)
8387
model_hf.eval()
8488
return model_hf, params
8589

0 commit comments

Comments
 (0)