Ruff check and format

quic-amitraj · quic-amitraj · commit 3c76e0beac6c · 2025-06-09T10:20:55.000Z
Signed-off-by: Amit Raj &lt;quic_amitraj@quicinc.com&gt;
diff --git a/QEfficient/transformers/embeddings/embedding_utils.py b/QEfficient/transformers/embeddings/embedding_utils.py
@@ -57,12 +57,29 @@ def forward(
         output = self.base_model(input_ids, attention_mask, **kwargs)
         return self.pooling_fn(output[0], attention_mask)
 
+
 def validate_user_pooling_function(user_function):
+    """
+    Validate a user-provided pooling function to ensure it meets the required interface.
+
+    The function should take two arguments:
+    - last_hidden_states (torch.Tensor): The last hidden states of the model.
+    - attention_mask (torch.Tensor): The attention mask of the input sequence.
+
+    It should return a torch.Tensor representing the pooled output.
+
+    Args:
+        user_function (callable): The user-provided pooling function.
+
+    Raises:
+        ValueError: If the user-provided function does not meet the required interface.
+    """
+
     if not callable(user_function):
         raise TypeError("Provided pooling function is not callable.")
 
     sig = inspect.signature(user_function)
     required_args = {"last_hidden_states", "attention_mask"}
     if not required_args.issubset(sig.parameters.keys()):
         raise ValueError(f"Pooling function must accept arguments: {required_args}")
-    return user_function
+    return user_function
diff --git a/QEfficient/transformers/models/pytorch_transforms.py b/QEfficient/transformers/models/pytorch_transforms.py
@@ -502,7 +502,9 @@ def apply(cls, model: nn.Module, **kwargs) -> Tuple[nn.Module, bool]:
         transformed = False
         if kwargs.get("pooling") is not None:
             pooling = kwargs["pooling"]
-            pooling_method = POOLING_MAP[pooling] if isinstance(pooling,str) else validate_user_pooling_function(pooling)
+            pooling_method = (
+                POOLING_MAP[pooling] if isinstance(pooling, str) else validate_user_pooling_function(pooling)
+            )
             model = PooledModel(model, pooling_method)
             warnings.warn(f"Pooling method {pooling.__name__} is applied to the model.")
         return model, transformed
diff --git a/examples/embedding_model.py b/examples/embedding_model.py
@@ -13,22 +13,30 @@
 
 from QEfficient import QEFFAutoModel as AutoModel
 
+
 def max_pooling(last_hidden_states: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor:
     input_mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden_states.size()).float()
     last_hidden_states[input_mask_expanded == 0] = -1e9
     return torch.max(last_hidden_states, 1)[0]
 
+
 # Sentences we want sentence embeddings for
 sentences = "This is an example sentence"
 
 # Load model from HuggingFace Hub
 tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
 
-# If pooling is not set, model will generate default output
+
+# You can specify the pooling strategy either as a string (e.g., "mean") or by passing a custom pooling function.
+# If no pooling is specified, the model will return its default output (typically token embeddings).
 qeff_model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2", pooling=max_pooling)
 
+# Example: Using mean pooling by specifying it as a string.
+# This will return sentence embeddings computed using mean pooling.
+# qeff_model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2", pooling="mean")
+
 # Here seq_len can be list seq_len or single int
-qeff_model.compile(num_cores=16, seq_len=[32,64])
+qeff_model.compile(num_cores=16, seq_len=[32, 64])
 
 # Tokenize sentences
 encoded_input = tokenizer(sentences, return_tensors="pt")