From a6ce9a7c585fa020953758d8b848809857e873eb Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Thu, 24 Apr 2025 11:27:54 +0200
Subject: [PATCH 1/2] Test generate_stream error

---
 tests/test_models.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tests/test_models.py b/tests/test_models.py
index 9711c53f1..869a64ba3 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -207,6 +207,14 @@ def test_get_hfapi_message_no_tool_external_provider(self):
         messages = [{"role": "user", "content": [{"type": "text", "text": "Hello!"}]}]
         model(messages, stop_sequences=["great"])
 
+    @require_run_all
+    def test_generate_stream_error(self):
+        # Setting max_tokens to 5 to get finish_reason='length'
+        model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct", max_tokens=5)
+        messages = [{"role": "user", "content": [{"type": "text", "text": "Hello!"}]}]
+        with pytest.raises(ValueError, match="No content or tool calls in event:"):
+            list(model.generate_stream(messages))
+
 
 class TestHfApiModel:
     def test_init_model_with_tokens(self):

From 6406eee7b415aea299e841af13c752112c7e4481 Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Thu, 24 Apr 2025 11:48:06 +0200
Subject: [PATCH 2/2] Test generate_stream error with mock client

---
 tests/test_models.py | 33 ++++++++++++++++++++++++++++++++-
 1 file changed, 32 insertions(+), 1 deletion(-)

diff --git a/tests/test_models.py b/tests/test_models.py
index 869a64ba3..1eff0297b 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -20,7 +20,12 @@
 from unittest.mock import MagicMock, patch
 
 import pytest
-from huggingface_hub import ChatCompletionOutputMessage
+from huggingface_hub import (
+    ChatCompletionOutputMessage,
+    ChatCompletionStreamOutput,
+    ChatCompletionStreamOutputChoice,
+    ChatCompletionStreamOutputDelta,
+)
 
 from smolagents.models import (
     AmazonBedrockServerModel,
@@ -215,6 +220,32 @@ def test_generate_stream_error(self):
         with pytest.raises(ValueError, match="No content or tool calls in event:"):
             list(model.generate_stream(messages))
 
+    def test_generate_stream_error_with_mock_client(self):
+        # Setting max_tokens to 5 to get finish_reason='length'
+        model = InferenceClientModel(model_id="test-model", max_tokens=5)
+        model.client = MagicMock()
+        # Mock the response to simulate finish_reason='length'
+        model.client.chat.completions.create.return_value = [
+            ChatCompletionStreamOutput(
+                choices=[
+                    ChatCompletionStreamOutputChoice(
+                        delta=ChatCompletionStreamOutputDelta(role="assistant", content=" I", tool_calls=None),
+                        index=0,
+                        finish_reason="length",
+                        logprobs=None,
+                    )
+                ],
+                created=1,
+                id="",
+                model="test-model",
+                system_fingerprint="3.2.1-sha-4d28897",
+                usage=None,
+            )
+        ]
+        messages = [{"role": "user", "content": [{"type": "text", "text": "Hello!"}]}]
+        with pytest.raises(ValueError, match="No content or tool calls in event:"):
+            list(model.generate_stream(messages))
+
 
 class TestHfApiModel:
     def test_init_model_with_tokens(self):