From f17c973ea3f83c7d2a556948c76d5fc4b4628c76 Mon Sep 17 00:00:00 2001
From: Xingyao Wang <xingyao6@illinois.edu>
Date: Fri, 4 Apr 2025 14:06:25 -0400
Subject: [PATCH 01/14] fix(llm): retry on InternalServerError and Timeout;
 handle Gemini returns len(choices) < 1

---
 openhands/llm/llm.py | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py
index 42ffe5eac87c..91a480c7b2a7 100644
--- a/openhands/llm/llm.py
+++ b/openhands/llm/llm.py
@@ -37,7 +37,11 @@
 __all__ = ['LLM']
 
 # tuple of exceptions to retry on
-LLM_RETRY_EXCEPTIONS: tuple[type[Exception], ...] = (RateLimitError,)
+LLM_RETRY_EXCEPTIONS: tuple[type[Exception], ...] = (
+    RateLimitError,
+    litellm.Timeout,
+    litellm.InternalServerError,
+)
 
 # cache prompt supporting models
 # remove this when we gemini and deepseek are supported
@@ -63,6 +67,7 @@
     'o1-2024-12-17',
     'o3-mini-2025-01-31',
     'o3-mini',
+    'gemini-2.5-pro',
 ]
 
 REASONING_EFFORT_SUPPORTED_MODELS = [
@@ -268,7 +273,16 @@ def wrapper(*args, **kwargs):
             # if we mocked function calling, and we have tools, convert the response back to function calling format
             if mock_function_calling and mock_fncall_tools is not None:
                 logger.debug(f'Response choices: {len(resp.choices)}')
-                assert len(resp.choices) >= 1
+                if len(resp.choices) < 1:
+                    # Just try again
+                    raise litellm.InternalServerError(
+                        'Response choices is less than 1 - This is only seen in Gemini Pro 2.5 so far. Response: '
+                        + str(resp),
+                        self.config.custom_llm_provider,
+                        self.config.model,
+                        response=resp,
+                    )
+
                 non_fncall_response_message = resp.choices[0].message
                 fn_call_messages_with_response = (
                     convert_non_fncall_messages_to_fncall_messages(
@@ -282,6 +296,15 @@ def wrapper(*args, **kwargs):
                     )
                 resp.choices[0].message = fn_call_response_message
 
+            if len(resp.choices) < 1:
+                # Just try again
+                raise litellm.InternalServerError(
+                    'Response choices is less than 1 - This is only seen in Gemini Pro 2.5 so far. Response: '
+                    + str(resp),
+                    self.config.custom_llm_provider,
+                    self.config.model,
+                    response=resp,
+                )
             message_back: str = resp['choices'][0]['message']['content'] or ''
             tool_calls: list[ChatCompletionMessageToolCall] = resp['choices'][0][
                 'message'

From 10d4077990f9bf3531aee299bbf7aa18ac70a901 Mon Sep 17 00:00:00 2001
From: Xingyao Wang <xingyao@all-hands.dev>
Date: Sat, 5 Apr 2025 02:12:34 +0800
Subject: [PATCH 02/14] Update openhands/llm/llm.py

Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
---
 openhands/llm/llm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py
index 91a480c7b2a7..322f485850b2 100644
--- a/openhands/llm/llm.py
+++ b/openhands/llm/llm.py
@@ -276,7 +276,7 @@ def wrapper(*args, **kwargs):
                 if len(resp.choices) < 1:
                     # Just try again
                     raise litellm.InternalServerError(
-                        'Response choices is less than 1 - This is only seen in Gemini Pro 2.5 so far. Response: '
+                        'Response choices is less than 1 - This is only seen in Gemini models so far. Response: '
                         + str(resp),
                         self.config.custom_llm_provider,
                         self.config.model,

From 77a43ef1d6e05ff10841a43b8ab0827ccb6c3884 Mon Sep 17 00:00:00 2001
From: Xingyao Wang <xingyao@all-hands.dev>
Date: Sat, 5 Apr 2025 02:23:45 +0800
Subject: [PATCH 03/14] Update openhands/llm/llm.py

Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
---
 openhands/llm/llm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py
index 322f485850b2..8f8caab4bfa1 100644
--- a/openhands/llm/llm.py
+++ b/openhands/llm/llm.py
@@ -299,7 +299,7 @@ def wrapper(*args, **kwargs):
             if len(resp.choices) < 1:
                 # Just try again
                 raise litellm.InternalServerError(
-                    'Response choices is less than 1 - This is only seen in Gemini Pro 2.5 so far. Response: '
+                    'Response choices is less than 1 - This is only seen in Gemini models so far. Response: '
                     + str(resp),
                     self.config.custom_llm_provider,
                     self.config.model,

From d04b240527d9052b932c2dc5fdda66217befc8bc Mon Sep 17 00:00:00 2001
From: openhands <openhands@all-hands.dev>
Date: Fri, 4 Apr 2025 18:27:55 +0000
Subject: [PATCH 04/14] Fix AttributeError in LLM class when accessing
 resp.choices as a dict

---
 openhands/llm/llm.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py
index 8f8caab4bfa1..60ddf5cba523 100644
--- a/openhands/llm/llm.py
+++ b/openhands/llm/llm.py
@@ -296,7 +296,8 @@ def wrapper(*args, **kwargs):
                     )
                 resp.choices[0].message = fn_call_response_message
 
-            if len(resp.choices) < 1:
+            # Check if resp has 'choices' key with at least one item
+            if not resp.get('choices') or len(resp['choices']) < 1:
                 # Just try again
                 raise litellm.InternalServerError(
                     'Response choices is less than 1 - This is only seen in Gemini models so far. Response: '

From 9521c362207f0025d02fc367e161e0be5f91a7a5 Mon Sep 17 00:00:00 2001
From: Xingyao Wang <xingyao@all-hands.dev>
Date: Fri, 4 Apr 2025 20:04:12 +0000
Subject: [PATCH 05/14] add special exception for llm no response error

---
 openhands/core/exceptions.py | 10 ++++++++++
 openhands/llm/llm.py         | 20 +++++++-------------
 2 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/openhands/core/exceptions.py b/openhands/core/exceptions.py
index 342e0db0e7c5..c154152d4b66 100644
--- a/openhands/core/exceptions.py
+++ b/openhands/core/exceptions.py
@@ -88,6 +88,16 @@ def __init__(
         super().__init__(message)
 
 
+# This exception should be retried
+# Typically, after retry with a non-zero temperature, the LLM will return a response
+class LLMNoResponseError(Exception):
+    def __init__(
+        self,
+        message: str = 'LLM did not return a response. This is only seen in Gemini models so far.',
+    ) -> None:
+        super().__init__(message)
+
+
 class UserCancelledError(Exception):
     def __init__(self, message: str = 'User cancelled the request') -> None:
         super().__init__(message)
diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py
index 60ddf5cba523..27a948fd1c3a 100644
--- a/openhands/llm/llm.py
+++ b/openhands/llm/llm.py
@@ -23,6 +23,7 @@
 from litellm.types.utils import CostPerToken, ModelResponse, Usage
 from litellm.utils import create_pretrained_tokenizer
 
+from openhands.core.exceptions import LLMNoResponseError
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.message import Message
 from openhands.llm.debug_mixin import DebugMixin
@@ -41,6 +42,7 @@
     RateLimitError,
     litellm.Timeout,
     litellm.InternalServerError,
+    LLMNoResponseError,
 )
 
 # cache prompt supporting models
@@ -272,15 +274,10 @@ def wrapper(*args, **kwargs):
 
             # if we mocked function calling, and we have tools, convert the response back to function calling format
             if mock_function_calling and mock_fncall_tools is not None:
-                logger.debug(f'Response choices: {len(resp.choices)}')
                 if len(resp.choices) < 1:
-                    # Just try again
-                    raise litellm.InternalServerError(
+                    raise LLMNoResponseError(
                         'Response choices is less than 1 - This is only seen in Gemini models so far. Response: '
-                        + str(resp),
-                        self.config.custom_llm_provider,
-                        self.config.model,
-                        response=resp,
+                        + str(resp)
                     )
 
                 non_fncall_response_message = resp.choices[0].message
@@ -298,14 +295,11 @@ def wrapper(*args, **kwargs):
 
             # Check if resp has 'choices' key with at least one item
             if not resp.get('choices') or len(resp['choices']) < 1:
-                # Just try again
-                raise litellm.InternalServerError(
+                raise LLMNoResponseError(
                     'Response choices is less than 1 - This is only seen in Gemini models so far. Response: '
-                    + str(resp),
-                    self.config.custom_llm_provider,
-                    self.config.model,
-                    response=resp,
+                    + str(resp)
                 )
+
             message_back: str = resp['choices'][0]['message']['content'] or ''
             tool_calls: list[ChatCompletionMessageToolCall] = resp['choices'][0][
                 'message'

From 066a8988c8d26c285c60d3c7e7f7e2f978e86854 Mon Sep 17 00:00:00 2001
From: Xingyao Wang <xingyao@all-hands.dev>
Date: Fri, 4 Apr 2025 20:05:36 +0000
Subject: [PATCH 06/14] try handle LLM No Response Error

---
 openhands/llm/retry_mixin.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/openhands/llm/retry_mixin.py b/openhands/llm/retry_mixin.py
index 08a8add63939..4410aecd9156 100644
--- a/openhands/llm/retry_mixin.py
+++ b/openhands/llm/retry_mixin.py
@@ -5,6 +5,7 @@
     wait_exponential,
 )
 
+from openhands.core.exceptions import LLMNoResponseError
 from openhands.core.logger import openhands_logger as logger
 from openhands.utils.tenacity_stop import stop_if_should_exit
 
@@ -35,6 +36,15 @@ def before_sleep(retry_state):
             if retry_listener:
                 retry_listener(retry_state.attempt_number, num_retries)
 
+            # Check if the exception is LLMNoResponseError
+            exception = retry_state.outcome.exception()
+            if isinstance(exception, LLMNoResponseError):
+                logger.debug(
+                    'LLMNoResponseError detected, setting temperature to 0.2 for next attempt'
+                )
+                if hasattr(retry_state, 'kwargs'):
+                    retry_state.kwargs['temperature'] = 0.2
+
         return retry(
             before_sleep=before_sleep,
             stop=stop_after_attempt(num_retries) | stop_if_should_exit(),

From 7f92fece4d94ccac2c292408e75441a8c0a75a71 Mon Sep 17 00:00:00 2001
From: openhands <openhands@all-hands.dev>
Date: Fri, 4 Apr 2025 20:15:10 +0000
Subject: [PATCH 07/14] Add tests for LLMNoResponseError retry decorator

---
 tests/unit/test_llm.py | 164 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 163 insertions(+), 1 deletion(-)

diff --git a/tests/unit/test_llm.py b/tests/unit/test_llm.py
index 87db3bc62e2e..2e92e46f87e0 100644
--- a/tests/unit/test_llm.py
+++ b/tests/unit/test_llm.py
@@ -10,7 +10,7 @@
 )
 
 from openhands.core.config import LLMConfig
-from openhands.core.exceptions import OperationCancelled
+from openhands.core.exceptions import LLMNoResponseError, OperationCancelled
 from openhands.core.message import Message, TextContent
 from openhands.llm.llm import LLM
 from openhands.llm.metrics import Metrics, TokenUsage
@@ -385,6 +385,168 @@ def side_effect(*args, **kwargs):
     _should_exit = False
 
 
+@patch('openhands.llm.llm.litellm_completion')
+def test_completion_retry_with_llm_no_response_error_zero_temp(
+    mock_litellm_completion, default_config
+):
+    """
+    Test that the retry decorator properly handles LLMNoResponseError by:
+    1. First call to llm_completion uses temperature=0 and throws LLMNoResponseError
+    2. Second call should have temperature=0.2 and return a successful response
+    """
+
+    # Define a side effect function that checks the temperature parameter
+    # and returns different responses based on it
+    def side_effect(*args, **kwargs):
+        temperature = kwargs.get('temperature', 0)
+
+        # First call with temperature=0 should raise LLMNoResponseError
+        if temperature == 0:
+            raise LLMNoResponseError('LLM did not return a response')
+
+        # Second call with temperature=0.2 should return a successful response
+        elif temperature == 0.2:
+            return {
+                'choices': [{'message': {'content': 'Successful response after retry'}}]
+            }
+
+        # Any other temperature value should return a different response
+        else:
+            return {
+                'choices': [
+                    {'message': {'content': f'Response with temperature={temperature}'}}
+                ]
+            }
+
+    mock_litellm_completion.side_effect = side_effect
+
+    # Create LLM instance and make a completion call
+    llm = LLM(config=default_config)
+    response = llm.completion(
+        messages=[{'role': 'user', 'content': 'Hello!'}],
+        stream=False,
+        temperature=0,  # Initial temperature is 0
+    )
+
+    # Verify the response after retry
+    assert (
+        response['choices'][0]['message']['content']
+        == 'Successful response after retry'
+    )
+
+    # Verify that litellm_completion was called twice
+    assert mock_litellm_completion.call_count == 2
+
+    # Check the temperature in the first call (should be 0)
+    first_call_kwargs = mock_litellm_completion.call_args_list[0][1]
+    assert first_call_kwargs.get('temperature') == 0
+
+    # Check the temperature in the second call (should be 0.2)
+    second_call_kwargs = mock_litellm_completion.call_args_list[1][1]
+    assert second_call_kwargs.get('temperature') == 0.2
+
+
+@patch('openhands.llm.llm.litellm_completion')
+def test_completion_retry_with_llm_no_response_error_nonzero_temp(
+    mock_litellm_completion, default_config
+):
+    """
+    Test that the retry decorator works for LLMNoResponseError when initial temperature is non-zero,
+    and changes the temperature to 0.2 on retry.
+
+    This test verifies that when LLMNoResponseError is raised with a non-zero temperature:
+    1. The retry mechanism is triggered
+    2. The temperature is changed to 0.2 on the retry attempt
+    3. After all retries are exhausted, the exception is propagated
+    """
+    # Define a side effect function that always raises LLMNoResponseError
+    mock_litellm_completion.side_effect = LLMNoResponseError(
+        'LLM did not return a response'
+    )
+
+    # Create LLM instance and make a completion call with non-zero temperature
+    llm = LLM(config=default_config)
+
+    # We expect this to raise an exception after all retries are exhausted
+    with pytest.raises(LLMNoResponseError):
+        llm.completion(
+            messages=[{'role': 'user', 'content': 'Hello!'}],
+            stream=False,
+            temperature=0.7,  # Initial temperature is non-zero
+        )
+
+    # Verify that litellm_completion was called multiple times (retries happened)
+    # The default_config has num_retries=2, so it should be called 2 times
+    assert mock_litellm_completion.call_count == 2
+
+    # Check the temperature in the first call (should be 0.7)
+    first_call_kwargs = mock_litellm_completion.call_args_list[0][1]
+    assert first_call_kwargs.get('temperature') == 0.7
+
+    # Check the temperature in the second call (should be changed to 0.2)
+    second_call_kwargs = mock_litellm_completion.call_args_list[1][1]
+    assert second_call_kwargs.get('temperature') == 0.2
+
+
+@patch('openhands.llm.llm.litellm_completion')
+def test_completion_retry_with_llm_no_response_error_successful_retry(
+    mock_litellm_completion, default_config
+):
+    """
+    Test that the retry decorator works for LLMNoResponseError and successfully retries with temperature=0.2.
+
+    This test verifies that:
+    1. First call to llm_completion throws LLMNoResponseError
+    2. Second call with temperature=0.2 returns a successful response
+    """
+
+    # Define a side effect function that raises LLMNoResponseError on first call
+    # and returns a successful response on second call
+    def side_effect(*args, **kwargs):
+        temperature = kwargs.get('temperature', 0)
+
+        if mock_litellm_completion.call_count == 1:
+            # First call should raise LLMNoResponseError
+            raise LLMNoResponseError('LLM did not return a response')
+        else:
+            # Second call should return a successful response
+            return {
+                'choices': [
+                    {
+                        'message': {
+                            'content': f'Successful response with temperature={temperature}'
+                        }
+                    }
+                ]
+            }
+
+    mock_litellm_completion.side_effect = side_effect
+
+    # Create LLM instance and make a completion call
+    llm = LLM(config=default_config)
+    response = llm.completion(
+        messages=[{'role': 'user', 'content': 'Hello!'}],
+        stream=False,
+    )
+
+    # Verify the response after retry
+    assert (
+        response['choices'][0]['message']['content']
+        == 'Successful response with temperature=0.2'
+    )
+
+    # Verify that litellm_completion was called twice
+    assert mock_litellm_completion.call_count == 2
+
+    # Check the temperature in the first call (should be 0)
+    first_call_kwargs = mock_litellm_completion.call_args_list[0][1]
+    assert first_call_kwargs.get('temperature') == 0
+
+    # Check the temperature in the second call (should be 0.2)
+    second_call_kwargs = mock_litellm_completion.call_args_list[1][1]
+    assert second_call_kwargs.get('temperature') == 0.2
+
+
 @patch('openhands.llm.llm.litellm_completion')
 def test_completion_with_litellm_mock(mock_litellm_completion, default_config):
     mock_response = {

From 7a4c4558fc505175be02eaaa4ef07f20efe03f68 Mon Sep 17 00:00:00 2001
From: Xingyao Wang <xingyao@all-hands.dev>
Date: Fri, 4 Apr 2025 20:17:07 +0000
Subject: [PATCH 08/14] change to .warning

---
 openhands/llm/retry_mixin.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openhands/llm/retry_mixin.py b/openhands/llm/retry_mixin.py
index 4410aecd9156..fdc767139d4c 100644
--- a/openhands/llm/retry_mixin.py
+++ b/openhands/llm/retry_mixin.py
@@ -39,7 +39,7 @@ def before_sleep(retry_state):
             # Check if the exception is LLMNoResponseError
             exception = retry_state.outcome.exception()
             if isinstance(exception, LLMNoResponseError):
-                logger.debug(
+                logger.warning(
                     'LLMNoResponseError detected, setting temperature to 0.2 for next attempt'
                 )
                 if hasattr(retry_state, 'kwargs'):

From 4d9a6967ca95a1dd4f5a9ca6cb9827e5a7ad0fd8 Mon Sep 17 00:00:00 2001
From: openhands <openhands@all-hands.dev>
Date: Fri, 4 Apr 2025 20:27:30 +0000
Subject: [PATCH 09/14] Update retry decorator to only change temperature when
 it's zero

---
 openhands/llm/retry_mixin.py | 15 +++++--
 tests/unit/test_llm.py       | 77 ++++++++++++++++++++++++++++++++----
 2 files changed, 81 insertions(+), 11 deletions(-)

diff --git a/openhands/llm/retry_mixin.py b/openhands/llm/retry_mixin.py
index fdc767139d4c..eae0f9cfb2cf 100644
--- a/openhands/llm/retry_mixin.py
+++ b/openhands/llm/retry_mixin.py
@@ -39,11 +39,18 @@ def before_sleep(retry_state):
             # Check if the exception is LLMNoResponseError
             exception = retry_state.outcome.exception()
             if isinstance(exception, LLMNoResponseError):
-                logger.warning(
-                    'LLMNoResponseError detected, setting temperature to 0.2 for next attempt'
-                )
                 if hasattr(retry_state, 'kwargs'):
-                    retry_state.kwargs['temperature'] = 0.2
+                    # Only change temperature if it's zero or not set
+                    current_temp = retry_state.kwargs.get('temperature', 0)
+                    if current_temp == 0:
+                        logger.warning(
+                            'LLMNoResponseError detected with temperature=0, setting temperature to 0.2 for next attempt'
+                        )
+                        retry_state.kwargs['temperature'] = 0.2
+                    else:
+                        logger.warning(
+                            f'LLMNoResponseError detected with temperature={current_temp}, keeping original temperature'
+                        )
 
         return retry(
             before_sleep=before_sleep,
diff --git a/tests/unit/test_llm.py b/tests/unit/test_llm.py
index 2e92e46f87e0..70c103f7d8f4 100644
--- a/tests/unit/test_llm.py
+++ b/tests/unit/test_llm.py
@@ -452,11 +452,11 @@ def test_completion_retry_with_llm_no_response_error_nonzero_temp(
 ):
     """
     Test that the retry decorator works for LLMNoResponseError when initial temperature is non-zero,
-    and changes the temperature to 0.2 on retry.
+    and keeps the original temperature on retry.
 
     This test verifies that when LLMNoResponseError is raised with a non-zero temperature:
     1. The retry mechanism is triggered
-    2. The temperature is changed to 0.2 on the retry attempt
+    2. The temperature remains unchanged (not set to 0.2)
     3. After all retries are exhausted, the exception is propagated
     """
     # Define a side effect function that always raises LLMNoResponseError
@@ -483,9 +483,70 @@ def test_completion_retry_with_llm_no_response_error_nonzero_temp(
     first_call_kwargs = mock_litellm_completion.call_args_list[0][1]
     assert first_call_kwargs.get('temperature') == 0.7
 
-    # Check the temperature in the second call (should be changed to 0.2)
+    # Check the temperature in the second call (should remain 0.7, not changed to 0.2)
     second_call_kwargs = mock_litellm_completion.call_args_list[1][1]
-    assert second_call_kwargs.get('temperature') == 0.2
+    assert second_call_kwargs.get('temperature') == 0.7
+
+
+@patch('openhands.llm.llm.litellm_completion')
+def test_completion_retry_with_llm_no_response_error_nonzero_temp_successful_retry(
+    mock_litellm_completion, default_config
+):
+    """
+    Test that the retry decorator works for LLMNoResponseError with non-zero temperature
+    and successfully retries while preserving the original temperature.
+
+    This test verifies that:
+    1. First call to llm_completion with temperature=0.7 throws LLMNoResponseError
+    2. Second call with the same temperature=0.7 returns a successful response
+    """
+
+    # Define a side effect function that raises LLMNoResponseError on first call
+    # and returns a successful response on second call
+    def side_effect(*args, **kwargs):
+        temperature = kwargs.get('temperature', 0)
+
+        if mock_litellm_completion.call_count == 1:
+            # First call should raise LLMNoResponseError
+            raise LLMNoResponseError('LLM did not return a response')
+        else:
+            # Second call should return a successful response
+            return {
+                'choices': [
+                    {
+                        'message': {
+                            'content': f'Successful response with temperature={temperature}'
+                        }
+                    }
+                ]
+            }
+
+    mock_litellm_completion.side_effect = side_effect
+
+    # Create LLM instance and make a completion call with non-zero temperature
+    llm = LLM(config=default_config)
+    response = llm.completion(
+        messages=[{'role': 'user', 'content': 'Hello!'}],
+        stream=False,
+        temperature=0.7,  # Non-zero temperature
+    )
+
+    # Verify the response after retry
+    assert (
+        response['choices'][0]['message']['content']
+        == 'Successful response with temperature=0.7'
+    )
+
+    # Verify that litellm_completion was called twice
+    assert mock_litellm_completion.call_count == 2
+
+    # Check the temperature in the first call (should be 0.7)
+    first_call_kwargs = mock_litellm_completion.call_args_list[0][1]
+    assert first_call_kwargs.get('temperature') == 0.7
+
+    # Check the temperature in the second call (should still be 0.7)
+    second_call_kwargs = mock_litellm_completion.call_args_list[1][1]
+    assert second_call_kwargs.get('temperature') == 0.7
 
 
 @patch('openhands.llm.llm.litellm_completion')
@@ -493,10 +554,11 @@ def test_completion_retry_with_llm_no_response_error_successful_retry(
     mock_litellm_completion, default_config
 ):
     """
-    Test that the retry decorator works for LLMNoResponseError and successfully retries with temperature=0.2.
+    Test that the retry decorator works for LLMNoResponseError with zero temperature
+    and successfully retries with temperature=0.2.
 
     This test verifies that:
-    1. First call to llm_completion throws LLMNoResponseError
+    1. First call to llm_completion with temperature=0 throws LLMNoResponseError
     2. Second call with temperature=0.2 returns a successful response
     """
 
@@ -522,11 +584,12 @@ def side_effect(*args, **kwargs):
 
     mock_litellm_completion.side_effect = side_effect
 
-    # Create LLM instance and make a completion call
+    # Create LLM instance and make a completion call with explicit temperature=0
     llm = LLM(config=default_config)
     response = llm.completion(
         messages=[{'role': 'user', 'content': 'Hello!'}],
         stream=False,
+        temperature=0,  # Explicitly set temperature to 0
     )
 
     # Verify the response after retry

From e439fb616488903d7ac491769e6c58d166249f8c Mon Sep 17 00:00:00 2001
From: Xingyao Wang <xingyao@all-hands.dev>
Date: Fri, 4 Apr 2025 20:32:07 +0000
Subject: [PATCH 10/14] increase temperature to 1.0

---
 openhands/llm/retry_mixin.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/openhands/llm/retry_mixin.py b/openhands/llm/retry_mixin.py
index eae0f9cfb2cf..d5726adb91cc 100644
--- a/openhands/llm/retry_mixin.py
+++ b/openhands/llm/retry_mixin.py
@@ -43,10 +43,11 @@ def before_sleep(retry_state):
                     # Only change temperature if it's zero or not set
                     current_temp = retry_state.kwargs.get('temperature', 0)
                     if current_temp == 0:
+                        retry_state.kwargs['temperature'] = 1.0
                         logger.warning(
-                            'LLMNoResponseError detected with temperature=0, setting temperature to 0.2 for next attempt'
+                            'LLMNoResponseError detected with temperature=0, setting temperature to 1.0 for next attempt. kwargs: '
+                            + str(retry_state.kwargs)
                         )
-                        retry_state.kwargs['temperature'] = 0.2
                     else:
                         logger.warning(
                             f'LLMNoResponseError detected with temperature={current_temp}, keeping original temperature'

From 8b204dafebc80888039e900a60c64e379d9b5ee7 Mon Sep 17 00:00:00 2001
From: Xingyao Wang <xingyao@all-hands.dev>
Date: Fri, 4 Apr 2025 20:40:47 +0000
Subject: [PATCH 11/14] fix llm retry temperature

---
 tests/unit/test_llm.py | 20 ++++++--------------
 1 file changed, 6 insertions(+), 14 deletions(-)

diff --git a/tests/unit/test_llm.py b/tests/unit/test_llm.py
index 70c103f7d8f4..6d243ed5775f 100644
--- a/tests/unit/test_llm.py
+++ b/tests/unit/test_llm.py
@@ -404,13 +404,6 @@ def side_effect(*args, **kwargs):
         if temperature == 0:
             raise LLMNoResponseError('LLM did not return a response')
 
-        # Second call with temperature=0.2 should return a successful response
-        elif temperature == 0.2:
-            return {
-                'choices': [{'message': {'content': 'Successful response after retry'}}]
-            }
-
-        # Any other temperature value should return a different response
         else:
             return {
                 'choices': [
@@ -430,8 +423,7 @@ def side_effect(*args, **kwargs):
 
     # Verify the response after retry
     assert (
-        response['choices'][0]['message']['content']
-        == 'Successful response after retry'
+        response['choices'][0]['message']['content'] == 'Response with temperature=1.0'
     )
 
     # Verify that litellm_completion was called twice
@@ -441,9 +433,9 @@ def side_effect(*args, **kwargs):
     first_call_kwargs = mock_litellm_completion.call_args_list[0][1]
     assert first_call_kwargs.get('temperature') == 0
 
-    # Check the temperature in the second call (should be 0.2)
+    # Check the temperature in the second call (should be 1.0)
     second_call_kwargs = mock_litellm_completion.call_args_list[1][1]
-    assert second_call_kwargs.get('temperature') == 0.2
+    assert second_call_kwargs.get('temperature') == 1.0
 
 
 @patch('openhands.llm.llm.litellm_completion')
@@ -595,7 +587,7 @@ def side_effect(*args, **kwargs):
     # Verify the response after retry
     assert (
         response['choices'][0]['message']['content']
-        == 'Successful response with temperature=0.2'
+        == 'Successful response with temperature=1.0'
     )
 
     # Verify that litellm_completion was called twice
@@ -605,9 +597,9 @@ def side_effect(*args, **kwargs):
     first_call_kwargs = mock_litellm_completion.call_args_list[0][1]
     assert first_call_kwargs.get('temperature') == 0
 
-    # Check the temperature in the second call (should be 0.2)
+    # Check the temperature in the second call (should be 1.0)
     second_call_kwargs = mock_litellm_completion.call_args_list[1][1]
-    assert second_call_kwargs.get('temperature') == 0.2
+    assert second_call_kwargs.get('temperature') == 1.0
 
 
 @patch('openhands.llm.llm.litellm_completion')

From 271e049b4abff38a75d840c0fff24b0704e459e4 Mon Sep 17 00:00:00 2001
From: Xingyao Wang <xingyao@all-hands.dev>
Date: Fri, 4 Apr 2025 21:13:16 +0000
Subject: [PATCH 12/14] stop logging kwargs

---
 openhands/llm/retry_mixin.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/openhands/llm/retry_mixin.py b/openhands/llm/retry_mixin.py
index d5726adb91cc..19b7c9689f7b 100644
--- a/openhands/llm/retry_mixin.py
+++ b/openhands/llm/retry_mixin.py
@@ -45,8 +45,7 @@ def before_sleep(retry_state):
                     if current_temp == 0:
                         retry_state.kwargs['temperature'] = 1.0
                         logger.warning(
-                            'LLMNoResponseError detected with temperature=0, setting temperature to 1.0 for next attempt. kwargs: '
-                            + str(retry_state.kwargs)
+                            'LLMNoResponseError detected with temperature=0, setting temperature to 1.0 for next attempt.'
                         )
                     else:
                         logger.warning(

From a275efc1eee022352450a5d6ea0ba5abd3a17d90 Mon Sep 17 00:00:00 2001
From: Xingyao Wang <xingyao@all-hands.dev>
Date: Sat, 5 Apr 2025 20:51:51 +0000
Subject: [PATCH 13/14] action execution retry on retryable httpx error

---
 .../impl/action_execution/action_execution_client.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/openhands/runtime/impl/action_execution/action_execution_client.py b/openhands/runtime/impl/action_execution/action_execution_client.py
index 3ae72b7790fa..d9aa31d8c846 100644
--- a/openhands/runtime/impl/action_execution/action_execution_client.py
+++ b/openhands/runtime/impl/action_execution/action_execution_client.py
@@ -45,7 +45,7 @@
 from openhands.utils.tenacity_stop import stop_if_should_exit
 
 
-def _is_retryable_check_alive_error(exception):
+def _is_retryable_error(exception):
     return isinstance(
         exception, (httpx.RemoteProtocolError, httpcore.RemoteProtocolError)
     )
@@ -93,6 +93,11 @@ def __init__(
     def _get_action_execution_server_host(self) -> str:
         pass
 
+    @retry(
+        retry=retry_if_exception(_is_retryable_error),
+        stop=stop_after_attempt(5) | stop_if_should_exit(),
+        wait=wait_exponential(multiplier=1, min=4, max=15),
+    )
     def _send_action_server_request(
         self,
         method: str,
@@ -114,11 +119,6 @@ def _send_action_server_request(
         """
         return send_request(self.session, method, url, **kwargs)
 
-    @retry(
-        retry=retry_if_exception(_is_retryable_check_alive_error),
-        stop=stop_after_attempt(5) | stop_if_should_exit(),
-        wait=wait_exponential(multiplier=1, min=4, max=15),
-    )
     def check_if_alive(self) -> None:
         response = self._send_action_server_request(
             'GET',

From 1134352e25973d50d25e5e4db8b2ee972669d377 Mon Sep 17 00:00:00 2001
From: Xingyao Wang <xingyao6@illinois.edu>
Date: Sat, 5 Apr 2025 16:55:37 -0400
Subject: [PATCH 14/14] revert unrelated files

---
 openhands/core/exceptions.py |  10 --
 openhands/core/setup.py      |   6 +-
 openhands/llm/llm.py         |  24 +---
 openhands/llm/retry_mixin.py |  17 ---
 tests/unit/test_llm.py       | 219 +----------------------------------
 5 files changed, 9 insertions(+), 267 deletions(-)

diff --git a/openhands/core/exceptions.py b/openhands/core/exceptions.py
index c154152d4b66..342e0db0e7c5 100644
--- a/openhands/core/exceptions.py
+++ b/openhands/core/exceptions.py
@@ -88,16 +88,6 @@ def __init__(
         super().__init__(message)
 
 
-# This exception should be retried
-# Typically, after retry with a non-zero temperature, the LLM will return a response
-class LLMNoResponseError(Exception):
-    def __init__(
-        self,
-        message: str = 'LLM did not return a response. This is only seen in Gemini models so far.',
-    ) -> None:
-        super().__init__(message)
-
-
 class UserCancelledError(Exception):
     def __init__(self, message: str = 'User cancelled the request') -> None:
         super().__init__(message)
diff --git a/openhands/core/setup.py b/openhands/core/setup.py
index 55bc6d9779a1..fd9554ea0e4a 100644
--- a/openhands/core/setup.py
+++ b/openhands/core/setup.py
@@ -119,7 +119,11 @@ def initialize_repository_for_runtime(
     if selected_repository and provider_tokens:
         logger.debug(f'Selected repository {selected_repository}.')
         repo_directory = call_async_from_sync(
-            runtime.clone_repo, GENERAL_TIMEOUT, github_token, selected_repository, None
+            runtime.clone_repo,
+            GENERAL_TIMEOUT,
+            provider_tokens,
+            selected_repository,
+            None,
         )
         # Run setup script if it exists
         runtime.maybe_run_setup_script()
diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py
index 27a948fd1c3a..42ffe5eac87c 100644
--- a/openhands/llm/llm.py
+++ b/openhands/llm/llm.py
@@ -23,7 +23,6 @@
 from litellm.types.utils import CostPerToken, ModelResponse, Usage
 from litellm.utils import create_pretrained_tokenizer
 
-from openhands.core.exceptions import LLMNoResponseError
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.message import Message
 from openhands.llm.debug_mixin import DebugMixin
@@ -38,12 +37,7 @@
 __all__ = ['LLM']
 
 # tuple of exceptions to retry on
-LLM_RETRY_EXCEPTIONS: tuple[type[Exception], ...] = (
-    RateLimitError,
-    litellm.Timeout,
-    litellm.InternalServerError,
-    LLMNoResponseError,
-)
+LLM_RETRY_EXCEPTIONS: tuple[type[Exception], ...] = (RateLimitError,)
 
 # cache prompt supporting models
 # remove this when we gemini and deepseek are supported
@@ -69,7 +63,6 @@
     'o1-2024-12-17',
     'o3-mini-2025-01-31',
     'o3-mini',
-    'gemini-2.5-pro',
 ]
 
 REASONING_EFFORT_SUPPORTED_MODELS = [
@@ -274,12 +267,8 @@ def wrapper(*args, **kwargs):
 
             # if we mocked function calling, and we have tools, convert the response back to function calling format
             if mock_function_calling and mock_fncall_tools is not None:
-                if len(resp.choices) < 1:
-                    raise LLMNoResponseError(
-                        'Response choices is less than 1 - This is only seen in Gemini models so far. Response: '
-                        + str(resp)
-                    )
-
+                logger.debug(f'Response choices: {len(resp.choices)}')
+                assert len(resp.choices) >= 1
                 non_fncall_response_message = resp.choices[0].message
                 fn_call_messages_with_response = (
                     convert_non_fncall_messages_to_fncall_messages(
@@ -293,13 +282,6 @@ def wrapper(*args, **kwargs):
                     )
                 resp.choices[0].message = fn_call_response_message
 
-            # Check if resp has 'choices' key with at least one item
-            if not resp.get('choices') or len(resp['choices']) < 1:
-                raise LLMNoResponseError(
-                    'Response choices is less than 1 - This is only seen in Gemini models so far. Response: '
-                    + str(resp)
-                )
-
             message_back: str = resp['choices'][0]['message']['content'] or ''
             tool_calls: list[ChatCompletionMessageToolCall] = resp['choices'][0][
                 'message'
diff --git a/openhands/llm/retry_mixin.py b/openhands/llm/retry_mixin.py
index 19b7c9689f7b..08a8add63939 100644
--- a/openhands/llm/retry_mixin.py
+++ b/openhands/llm/retry_mixin.py
@@ -5,7 +5,6 @@
     wait_exponential,
 )
 
-from openhands.core.exceptions import LLMNoResponseError
 from openhands.core.logger import openhands_logger as logger
 from openhands.utils.tenacity_stop import stop_if_should_exit
 
@@ -36,22 +35,6 @@ def before_sleep(retry_state):
             if retry_listener:
                 retry_listener(retry_state.attempt_number, num_retries)
 
-            # Check if the exception is LLMNoResponseError
-            exception = retry_state.outcome.exception()
-            if isinstance(exception, LLMNoResponseError):
-                if hasattr(retry_state, 'kwargs'):
-                    # Only change temperature if it's zero or not set
-                    current_temp = retry_state.kwargs.get('temperature', 0)
-                    if current_temp == 0:
-                        retry_state.kwargs['temperature'] = 1.0
-                        logger.warning(
-                            'LLMNoResponseError detected with temperature=0, setting temperature to 1.0 for next attempt.'
-                        )
-                    else:
-                        logger.warning(
-                            f'LLMNoResponseError detected with temperature={current_temp}, keeping original temperature'
-                        )
-
         return retry(
             before_sleep=before_sleep,
             stop=stop_after_attempt(num_retries) | stop_if_should_exit(),
diff --git a/tests/unit/test_llm.py b/tests/unit/test_llm.py
index 6d243ed5775f..87db3bc62e2e 100644
--- a/tests/unit/test_llm.py
+++ b/tests/unit/test_llm.py
@@ -10,7 +10,7 @@
 )
 
 from openhands.core.config import LLMConfig
-from openhands.core.exceptions import LLMNoResponseError, OperationCancelled
+from openhands.core.exceptions import OperationCancelled
 from openhands.core.message import Message, TextContent
 from openhands.llm.llm import LLM
 from openhands.llm.metrics import Metrics, TokenUsage
@@ -385,223 +385,6 @@ def side_effect(*args, **kwargs):
     _should_exit = False
 
 
-@patch('openhands.llm.llm.litellm_completion')
-def test_completion_retry_with_llm_no_response_error_zero_temp(
-    mock_litellm_completion, default_config
-):
-    """
-    Test that the retry decorator properly handles LLMNoResponseError by:
-    1. First call to llm_completion uses temperature=0 and throws LLMNoResponseError
-    2. Second call should have temperature=0.2 and return a successful response
-    """
-
-    # Define a side effect function that checks the temperature parameter
-    # and returns different responses based on it
-    def side_effect(*args, **kwargs):
-        temperature = kwargs.get('temperature', 0)
-
-        # First call with temperature=0 should raise LLMNoResponseError
-        if temperature == 0:
-            raise LLMNoResponseError('LLM did not return a response')
-
-        else:
-            return {
-                'choices': [
-                    {'message': {'content': f'Response with temperature={temperature}'}}
-                ]
-            }
-
-    mock_litellm_completion.side_effect = side_effect
-
-    # Create LLM instance and make a completion call
-    llm = LLM(config=default_config)
-    response = llm.completion(
-        messages=[{'role': 'user', 'content': 'Hello!'}],
-        stream=False,
-        temperature=0,  # Initial temperature is 0
-    )
-
-    # Verify the response after retry
-    assert (
-        response['choices'][0]['message']['content'] == 'Response with temperature=1.0'
-    )
-
-    # Verify that litellm_completion was called twice
-    assert mock_litellm_completion.call_count == 2
-
-    # Check the temperature in the first call (should be 0)
-    first_call_kwargs = mock_litellm_completion.call_args_list[0][1]
-    assert first_call_kwargs.get('temperature') == 0
-
-    # Check the temperature in the second call (should be 1.0)
-    second_call_kwargs = mock_litellm_completion.call_args_list[1][1]
-    assert second_call_kwargs.get('temperature') == 1.0
-
-
-@patch('openhands.llm.llm.litellm_completion')
-def test_completion_retry_with_llm_no_response_error_nonzero_temp(
-    mock_litellm_completion, default_config
-):
-    """
-    Test that the retry decorator works for LLMNoResponseError when initial temperature is non-zero,
-    and keeps the original temperature on retry.
-
-    This test verifies that when LLMNoResponseError is raised with a non-zero temperature:
-    1. The retry mechanism is triggered
-    2. The temperature remains unchanged (not set to 0.2)
-    3. After all retries are exhausted, the exception is propagated
-    """
-    # Define a side effect function that always raises LLMNoResponseError
-    mock_litellm_completion.side_effect = LLMNoResponseError(
-        'LLM did not return a response'
-    )
-
-    # Create LLM instance and make a completion call with non-zero temperature
-    llm = LLM(config=default_config)
-
-    # We expect this to raise an exception after all retries are exhausted
-    with pytest.raises(LLMNoResponseError):
-        llm.completion(
-            messages=[{'role': 'user', 'content': 'Hello!'}],
-            stream=False,
-            temperature=0.7,  # Initial temperature is non-zero
-        )
-
-    # Verify that litellm_completion was called multiple times (retries happened)
-    # The default_config has num_retries=2, so it should be called 2 times
-    assert mock_litellm_completion.call_count == 2
-
-    # Check the temperature in the first call (should be 0.7)
-    first_call_kwargs = mock_litellm_completion.call_args_list[0][1]
-    assert first_call_kwargs.get('temperature') == 0.7
-
-    # Check the temperature in the second call (should remain 0.7, not changed to 0.2)
-    second_call_kwargs = mock_litellm_completion.call_args_list[1][1]
-    assert second_call_kwargs.get('temperature') == 0.7
-
-
-@patch('openhands.llm.llm.litellm_completion')
-def test_completion_retry_with_llm_no_response_error_nonzero_temp_successful_retry(
-    mock_litellm_completion, default_config
-):
-    """
-    Test that the retry decorator works for LLMNoResponseError with non-zero temperature
-    and successfully retries while preserving the original temperature.
-
-    This test verifies that:
-    1. First call to llm_completion with temperature=0.7 throws LLMNoResponseError
-    2. Second call with the same temperature=0.7 returns a successful response
-    """
-
-    # Define a side effect function that raises LLMNoResponseError on first call
-    # and returns a successful response on second call
-    def side_effect(*args, **kwargs):
-        temperature = kwargs.get('temperature', 0)
-
-        if mock_litellm_completion.call_count == 1:
-            # First call should raise LLMNoResponseError
-            raise LLMNoResponseError('LLM did not return a response')
-        else:
-            # Second call should return a successful response
-            return {
-                'choices': [
-                    {
-                        'message': {
-                            'content': f'Successful response with temperature={temperature}'
-                        }
-                    }
-                ]
-            }
-
-    mock_litellm_completion.side_effect = side_effect
-
-    # Create LLM instance and make a completion call with non-zero temperature
-    llm = LLM(config=default_config)
-    response = llm.completion(
-        messages=[{'role': 'user', 'content': 'Hello!'}],
-        stream=False,
-        temperature=0.7,  # Non-zero temperature
-    )
-
-    # Verify the response after retry
-    assert (
-        response['choices'][0]['message']['content']
-        == 'Successful response with temperature=0.7'
-    )
-
-    # Verify that litellm_completion was called twice
-    assert mock_litellm_completion.call_count == 2
-
-    # Check the temperature in the first call (should be 0.7)
-    first_call_kwargs = mock_litellm_completion.call_args_list[0][1]
-    assert first_call_kwargs.get('temperature') == 0.7
-
-    # Check the temperature in the second call (should still be 0.7)
-    second_call_kwargs = mock_litellm_completion.call_args_list[1][1]
-    assert second_call_kwargs.get('temperature') == 0.7
-
-
-@patch('openhands.llm.llm.litellm_completion')
-def test_completion_retry_with_llm_no_response_error_successful_retry(
-    mock_litellm_completion, default_config
-):
-    """
-    Test that the retry decorator works for LLMNoResponseError with zero temperature
-    and successfully retries with temperature=0.2.
-
-    This test verifies that:
-    1. First call to llm_completion with temperature=0 throws LLMNoResponseError
-    2. Second call with temperature=0.2 returns a successful response
-    """
-
-    # Define a side effect function that raises LLMNoResponseError on first call
-    # and returns a successful response on second call
-    def side_effect(*args, **kwargs):
-        temperature = kwargs.get('temperature', 0)
-
-        if mock_litellm_completion.call_count == 1:
-            # First call should raise LLMNoResponseError
-            raise LLMNoResponseError('LLM did not return a response')
-        else:
-            # Second call should return a successful response
-            return {
-                'choices': [
-                    {
-                        'message': {
-                            'content': f'Successful response with temperature={temperature}'
-                        }
-                    }
-                ]
-            }
-
-    mock_litellm_completion.side_effect = side_effect
-
-    # Create LLM instance and make a completion call with explicit temperature=0
-    llm = LLM(config=default_config)
-    response = llm.completion(
-        messages=[{'role': 'user', 'content': 'Hello!'}],
-        stream=False,
-        temperature=0,  # Explicitly set temperature to 0
-    )
-
-    # Verify the response after retry
-    assert (
-        response['choices'][0]['message']['content']
-        == 'Successful response with temperature=1.0'
-    )
-
-    # Verify that litellm_completion was called twice
-    assert mock_litellm_completion.call_count == 2
-
-    # Check the temperature in the first call (should be 0)
-    first_call_kwargs = mock_litellm_completion.call_args_list[0][1]
-    assert first_call_kwargs.get('temperature') == 0
-
-    # Check the temperature in the second call (should be 1.0)
-    second_call_kwargs = mock_litellm_completion.call_args_list[1][1]
-    assert second_call_kwargs.get('temperature') == 1.0
-
-
 @patch('openhands.llm.llm.litellm_completion')
 def test_completion_with_litellm_mock(mock_litellm_completion, default_config):
     mock_response = {