From f17c973ea3f83c7d2a556948c76d5fc4b4628c76 Mon Sep 17 00:00:00 2001 From: Xingyao Wang Date: Fri, 4 Apr 2025 14:06:25 -0400 Subject: [PATCH 01/14] fix(llm): retry on InternalServerError and Timeout; handle Gemini returns len(choices) < 1 --- openhands/llm/llm.py | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py index 42ffe5eac87c..91a480c7b2a7 100644 --- a/openhands/llm/llm.py +++ b/openhands/llm/llm.py @@ -37,7 +37,11 @@ __all__ = ['LLM'] # tuple of exceptions to retry on -LLM_RETRY_EXCEPTIONS: tuple[type[Exception], ...] = (RateLimitError,) +LLM_RETRY_EXCEPTIONS: tuple[type[Exception], ...] = ( + RateLimitError, + litellm.Timeout, + litellm.InternalServerError, +) # cache prompt supporting models # remove this when we gemini and deepseek are supported @@ -63,6 +67,7 @@ 'o1-2024-12-17', 'o3-mini-2025-01-31', 'o3-mini', + 'gemini-2.5-pro', ] REASONING_EFFORT_SUPPORTED_MODELS = [ @@ -268,7 +273,16 @@ def wrapper(*args, **kwargs): # if we mocked function calling, and we have tools, convert the response back to function calling format if mock_function_calling and mock_fncall_tools is not None: logger.debug(f'Response choices: {len(resp.choices)}') - assert len(resp.choices) >= 1 + if len(resp.choices) < 1: + # Just try again + raise litellm.InternalServerError( + 'Response choices is less than 1 - This is only seen in Gemini Pro 2.5 so far. Response: ' + + str(resp), + self.config.custom_llm_provider, + self.config.model, + response=resp, + ) + non_fncall_response_message = resp.choices[0].message fn_call_messages_with_response = ( convert_non_fncall_messages_to_fncall_messages( @@ -282,6 +296,15 @@ def wrapper(*args, **kwargs): ) resp.choices[0].message = fn_call_response_message + if len(resp.choices) < 1: + # Just try again + raise litellm.InternalServerError( + 'Response choices is less than 1 - This is only seen in Gemini Pro 2.5 so far. Response: ' + + str(resp), + self.config.custom_llm_provider, + self.config.model, + response=resp, + ) message_back: str = resp['choices'][0]['message']['content'] or '' tool_calls: list[ChatCompletionMessageToolCall] = resp['choices'][0][ 'message' From 10d4077990f9bf3531aee299bbf7aa18ac70a901 Mon Sep 17 00:00:00 2001 From: Xingyao Wang Date: Sat, 5 Apr 2025 02:12:34 +0800 Subject: [PATCH 02/14] Update openhands/llm/llm.py Co-authored-by: Engel Nyst --- openhands/llm/llm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py index 91a480c7b2a7..322f485850b2 100644 --- a/openhands/llm/llm.py +++ b/openhands/llm/llm.py @@ -276,7 +276,7 @@ def wrapper(*args, **kwargs): if len(resp.choices) < 1: # Just try again raise litellm.InternalServerError( - 'Response choices is less than 1 - This is only seen in Gemini Pro 2.5 so far. Response: ' + 'Response choices is less than 1 - This is only seen in Gemini models so far. Response: ' + str(resp), self.config.custom_llm_provider, self.config.model, From 77a43ef1d6e05ff10841a43b8ab0827ccb6c3884 Mon Sep 17 00:00:00 2001 From: Xingyao Wang Date: Sat, 5 Apr 2025 02:23:45 +0800 Subject: [PATCH 03/14] Update openhands/llm/llm.py Co-authored-by: Engel Nyst --- openhands/llm/llm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py index 322f485850b2..8f8caab4bfa1 100644 --- a/openhands/llm/llm.py +++ b/openhands/llm/llm.py @@ -299,7 +299,7 @@ def wrapper(*args, **kwargs): if len(resp.choices) < 1: # Just try again raise litellm.InternalServerError( - 'Response choices is less than 1 - This is only seen in Gemini Pro 2.5 so far. Response: ' + 'Response choices is less than 1 - This is only seen in Gemini models so far. Response: ' + str(resp), self.config.custom_llm_provider, self.config.model, From d04b240527d9052b932c2dc5fdda66217befc8bc Mon Sep 17 00:00:00 2001 From: openhands Date: Fri, 4 Apr 2025 18:27:55 +0000 Subject: [PATCH 04/14] Fix AttributeError in LLM class when accessing resp.choices as a dict --- openhands/llm/llm.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py index 8f8caab4bfa1..60ddf5cba523 100644 --- a/openhands/llm/llm.py +++ b/openhands/llm/llm.py @@ -296,7 +296,8 @@ def wrapper(*args, **kwargs): ) resp.choices[0].message = fn_call_response_message - if len(resp.choices) < 1: + # Check if resp has 'choices' key with at least one item + if not resp.get('choices') or len(resp['choices']) < 1: # Just try again raise litellm.InternalServerError( 'Response choices is less than 1 - This is only seen in Gemini models so far. Response: ' From 9521c362207f0025d02fc367e161e0be5f91a7a5 Mon Sep 17 00:00:00 2001 From: Xingyao Wang Date: Fri, 4 Apr 2025 20:04:12 +0000 Subject: [PATCH 05/14] add special exception for llm no response error --- openhands/core/exceptions.py | 10 ++++++++++ openhands/llm/llm.py | 20 +++++++------------- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/openhands/core/exceptions.py b/openhands/core/exceptions.py index 342e0db0e7c5..c154152d4b66 100644 --- a/openhands/core/exceptions.py +++ b/openhands/core/exceptions.py @@ -88,6 +88,16 @@ def __init__( super().__init__(message) +# This exception should be retried +# Typically, after retry with a non-zero temperature, the LLM will return a response +class LLMNoResponseError(Exception): + def __init__( + self, + message: str = 'LLM did not return a response. This is only seen in Gemini models so far.', + ) -> None: + super().__init__(message) + + class UserCancelledError(Exception): def __init__(self, message: str = 'User cancelled the request') -> None: super().__init__(message) diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py index 60ddf5cba523..27a948fd1c3a 100644 --- a/openhands/llm/llm.py +++ b/openhands/llm/llm.py @@ -23,6 +23,7 @@ from litellm.types.utils import CostPerToken, ModelResponse, Usage from litellm.utils import create_pretrained_tokenizer +from openhands.core.exceptions import LLMNoResponseError from openhands.core.logger import openhands_logger as logger from openhands.core.message import Message from openhands.llm.debug_mixin import DebugMixin @@ -41,6 +42,7 @@ RateLimitError, litellm.Timeout, litellm.InternalServerError, + LLMNoResponseError, ) # cache prompt supporting models @@ -272,15 +274,10 @@ def wrapper(*args, **kwargs): # if we mocked function calling, and we have tools, convert the response back to function calling format if mock_function_calling and mock_fncall_tools is not None: - logger.debug(f'Response choices: {len(resp.choices)}') if len(resp.choices) < 1: - # Just try again - raise litellm.InternalServerError( + raise LLMNoResponseError( 'Response choices is less than 1 - This is only seen in Gemini models so far. Response: ' - + str(resp), - self.config.custom_llm_provider, - self.config.model, - response=resp, + + str(resp) ) non_fncall_response_message = resp.choices[0].message @@ -298,14 +295,11 @@ def wrapper(*args, **kwargs): # Check if resp has 'choices' key with at least one item if not resp.get('choices') or len(resp['choices']) < 1: - # Just try again - raise litellm.InternalServerError( + raise LLMNoResponseError( 'Response choices is less than 1 - This is only seen in Gemini models so far. Response: ' - + str(resp), - self.config.custom_llm_provider, - self.config.model, - response=resp, + + str(resp) ) + message_back: str = resp['choices'][0]['message']['content'] or '' tool_calls: list[ChatCompletionMessageToolCall] = resp['choices'][0][ 'message' From 066a8988c8d26c285c60d3c7e7f7e2f978e86854 Mon Sep 17 00:00:00 2001 From: Xingyao Wang Date: Fri, 4 Apr 2025 20:05:36 +0000 Subject: [PATCH 06/14] try handle LLM No Response Error --- openhands/llm/retry_mixin.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/openhands/llm/retry_mixin.py b/openhands/llm/retry_mixin.py index 08a8add63939..4410aecd9156 100644 --- a/openhands/llm/retry_mixin.py +++ b/openhands/llm/retry_mixin.py @@ -5,6 +5,7 @@ wait_exponential, ) +from openhands.core.exceptions import LLMNoResponseError from openhands.core.logger import openhands_logger as logger from openhands.utils.tenacity_stop import stop_if_should_exit @@ -35,6 +36,15 @@ def before_sleep(retry_state): if retry_listener: retry_listener(retry_state.attempt_number, num_retries) + # Check if the exception is LLMNoResponseError + exception = retry_state.outcome.exception() + if isinstance(exception, LLMNoResponseError): + logger.debug( + 'LLMNoResponseError detected, setting temperature to 0.2 for next attempt' + ) + if hasattr(retry_state, 'kwargs'): + retry_state.kwargs['temperature'] = 0.2 + return retry( before_sleep=before_sleep, stop=stop_after_attempt(num_retries) | stop_if_should_exit(), From 7f92fece4d94ccac2c292408e75441a8c0a75a71 Mon Sep 17 00:00:00 2001 From: openhands Date: Fri, 4 Apr 2025 20:15:10 +0000 Subject: [PATCH 07/14] Add tests for LLMNoResponseError retry decorator --- tests/unit/test_llm.py | 164 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 163 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_llm.py b/tests/unit/test_llm.py index 87db3bc62e2e..2e92e46f87e0 100644 --- a/tests/unit/test_llm.py +++ b/tests/unit/test_llm.py @@ -10,7 +10,7 @@ ) from openhands.core.config import LLMConfig -from openhands.core.exceptions import OperationCancelled +from openhands.core.exceptions import LLMNoResponseError, OperationCancelled from openhands.core.message import Message, TextContent from openhands.llm.llm import LLM from openhands.llm.metrics import Metrics, TokenUsage @@ -385,6 +385,168 @@ def side_effect(*args, **kwargs): _should_exit = False +@patch('openhands.llm.llm.litellm_completion') +def test_completion_retry_with_llm_no_response_error_zero_temp( + mock_litellm_completion, default_config +): + """ + Test that the retry decorator properly handles LLMNoResponseError by: + 1. First call to llm_completion uses temperature=0 and throws LLMNoResponseError + 2. Second call should have temperature=0.2 and return a successful response + """ + + # Define a side effect function that checks the temperature parameter + # and returns different responses based on it + def side_effect(*args, **kwargs): + temperature = kwargs.get('temperature', 0) + + # First call with temperature=0 should raise LLMNoResponseError + if temperature == 0: + raise LLMNoResponseError('LLM did not return a response') + + # Second call with temperature=0.2 should return a successful response + elif temperature == 0.2: + return { + 'choices': [{'message': {'content': 'Successful response after retry'}}] + } + + # Any other temperature value should return a different response + else: + return { + 'choices': [ + {'message': {'content': f'Response with temperature={temperature}'}} + ] + } + + mock_litellm_completion.side_effect = side_effect + + # Create LLM instance and make a completion call + llm = LLM(config=default_config) + response = llm.completion( + messages=[{'role': 'user', 'content': 'Hello!'}], + stream=False, + temperature=0, # Initial temperature is 0 + ) + + # Verify the response after retry + assert ( + response['choices'][0]['message']['content'] + == 'Successful response after retry' + ) + + # Verify that litellm_completion was called twice + assert mock_litellm_completion.call_count == 2 + + # Check the temperature in the first call (should be 0) + first_call_kwargs = mock_litellm_completion.call_args_list[0][1] + assert first_call_kwargs.get('temperature') == 0 + + # Check the temperature in the second call (should be 0.2) + second_call_kwargs = mock_litellm_completion.call_args_list[1][1] + assert second_call_kwargs.get('temperature') == 0.2 + + +@patch('openhands.llm.llm.litellm_completion') +def test_completion_retry_with_llm_no_response_error_nonzero_temp( + mock_litellm_completion, default_config +): + """ + Test that the retry decorator works for LLMNoResponseError when initial temperature is non-zero, + and changes the temperature to 0.2 on retry. + + This test verifies that when LLMNoResponseError is raised with a non-zero temperature: + 1. The retry mechanism is triggered + 2. The temperature is changed to 0.2 on the retry attempt + 3. After all retries are exhausted, the exception is propagated + """ + # Define a side effect function that always raises LLMNoResponseError + mock_litellm_completion.side_effect = LLMNoResponseError( + 'LLM did not return a response' + ) + + # Create LLM instance and make a completion call with non-zero temperature + llm = LLM(config=default_config) + + # We expect this to raise an exception after all retries are exhausted + with pytest.raises(LLMNoResponseError): + llm.completion( + messages=[{'role': 'user', 'content': 'Hello!'}], + stream=False, + temperature=0.7, # Initial temperature is non-zero + ) + + # Verify that litellm_completion was called multiple times (retries happened) + # The default_config has num_retries=2, so it should be called 2 times + assert mock_litellm_completion.call_count == 2 + + # Check the temperature in the first call (should be 0.7) + first_call_kwargs = mock_litellm_completion.call_args_list[0][1] + assert first_call_kwargs.get('temperature') == 0.7 + + # Check the temperature in the second call (should be changed to 0.2) + second_call_kwargs = mock_litellm_completion.call_args_list[1][1] + assert second_call_kwargs.get('temperature') == 0.2 + + +@patch('openhands.llm.llm.litellm_completion') +def test_completion_retry_with_llm_no_response_error_successful_retry( + mock_litellm_completion, default_config +): + """ + Test that the retry decorator works for LLMNoResponseError and successfully retries with temperature=0.2. + + This test verifies that: + 1. First call to llm_completion throws LLMNoResponseError + 2. Second call with temperature=0.2 returns a successful response + """ + + # Define a side effect function that raises LLMNoResponseError on first call + # and returns a successful response on second call + def side_effect(*args, **kwargs): + temperature = kwargs.get('temperature', 0) + + if mock_litellm_completion.call_count == 1: + # First call should raise LLMNoResponseError + raise LLMNoResponseError('LLM did not return a response') + else: + # Second call should return a successful response + return { + 'choices': [ + { + 'message': { + 'content': f'Successful response with temperature={temperature}' + } + } + ] + } + + mock_litellm_completion.side_effect = side_effect + + # Create LLM instance and make a completion call + llm = LLM(config=default_config) + response = llm.completion( + messages=[{'role': 'user', 'content': 'Hello!'}], + stream=False, + ) + + # Verify the response after retry + assert ( + response['choices'][0]['message']['content'] + == 'Successful response with temperature=0.2' + ) + + # Verify that litellm_completion was called twice + assert mock_litellm_completion.call_count == 2 + + # Check the temperature in the first call (should be 0) + first_call_kwargs = mock_litellm_completion.call_args_list[0][1] + assert first_call_kwargs.get('temperature') == 0 + + # Check the temperature in the second call (should be 0.2) + second_call_kwargs = mock_litellm_completion.call_args_list[1][1] + assert second_call_kwargs.get('temperature') == 0.2 + + @patch('openhands.llm.llm.litellm_completion') def test_completion_with_litellm_mock(mock_litellm_completion, default_config): mock_response = { From 7a4c4558fc505175be02eaaa4ef07f20efe03f68 Mon Sep 17 00:00:00 2001 From: Xingyao Wang Date: Fri, 4 Apr 2025 20:17:07 +0000 Subject: [PATCH 08/14] change to .warning --- openhands/llm/retry_mixin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openhands/llm/retry_mixin.py b/openhands/llm/retry_mixin.py index 4410aecd9156..fdc767139d4c 100644 --- a/openhands/llm/retry_mixin.py +++ b/openhands/llm/retry_mixin.py @@ -39,7 +39,7 @@ def before_sleep(retry_state): # Check if the exception is LLMNoResponseError exception = retry_state.outcome.exception() if isinstance(exception, LLMNoResponseError): - logger.debug( + logger.warning( 'LLMNoResponseError detected, setting temperature to 0.2 for next attempt' ) if hasattr(retry_state, 'kwargs'): From 4d9a6967ca95a1dd4f5a9ca6cb9827e5a7ad0fd8 Mon Sep 17 00:00:00 2001 From: openhands Date: Fri, 4 Apr 2025 20:27:30 +0000 Subject: [PATCH 09/14] Update retry decorator to only change temperature when it's zero --- openhands/llm/retry_mixin.py | 15 +++++-- tests/unit/test_llm.py | 77 ++++++++++++++++++++++++++++++++---- 2 files changed, 81 insertions(+), 11 deletions(-) diff --git a/openhands/llm/retry_mixin.py b/openhands/llm/retry_mixin.py index fdc767139d4c..eae0f9cfb2cf 100644 --- a/openhands/llm/retry_mixin.py +++ b/openhands/llm/retry_mixin.py @@ -39,11 +39,18 @@ def before_sleep(retry_state): # Check if the exception is LLMNoResponseError exception = retry_state.outcome.exception() if isinstance(exception, LLMNoResponseError): - logger.warning( - 'LLMNoResponseError detected, setting temperature to 0.2 for next attempt' - ) if hasattr(retry_state, 'kwargs'): - retry_state.kwargs['temperature'] = 0.2 + # Only change temperature if it's zero or not set + current_temp = retry_state.kwargs.get('temperature', 0) + if current_temp == 0: + logger.warning( + 'LLMNoResponseError detected with temperature=0, setting temperature to 0.2 for next attempt' + ) + retry_state.kwargs['temperature'] = 0.2 + else: + logger.warning( + f'LLMNoResponseError detected with temperature={current_temp}, keeping original temperature' + ) return retry( before_sleep=before_sleep, diff --git a/tests/unit/test_llm.py b/tests/unit/test_llm.py index 2e92e46f87e0..70c103f7d8f4 100644 --- a/tests/unit/test_llm.py +++ b/tests/unit/test_llm.py @@ -452,11 +452,11 @@ def test_completion_retry_with_llm_no_response_error_nonzero_temp( ): """ Test that the retry decorator works for LLMNoResponseError when initial temperature is non-zero, - and changes the temperature to 0.2 on retry. + and keeps the original temperature on retry. This test verifies that when LLMNoResponseError is raised with a non-zero temperature: 1. The retry mechanism is triggered - 2. The temperature is changed to 0.2 on the retry attempt + 2. The temperature remains unchanged (not set to 0.2) 3. After all retries are exhausted, the exception is propagated """ # Define a side effect function that always raises LLMNoResponseError @@ -483,9 +483,70 @@ def test_completion_retry_with_llm_no_response_error_nonzero_temp( first_call_kwargs = mock_litellm_completion.call_args_list[0][1] assert first_call_kwargs.get('temperature') == 0.7 - # Check the temperature in the second call (should be changed to 0.2) + # Check the temperature in the second call (should remain 0.7, not changed to 0.2) second_call_kwargs = mock_litellm_completion.call_args_list[1][1] - assert second_call_kwargs.get('temperature') == 0.2 + assert second_call_kwargs.get('temperature') == 0.7 + + +@patch('openhands.llm.llm.litellm_completion') +def test_completion_retry_with_llm_no_response_error_nonzero_temp_successful_retry( + mock_litellm_completion, default_config +): + """ + Test that the retry decorator works for LLMNoResponseError with non-zero temperature + and successfully retries while preserving the original temperature. + + This test verifies that: + 1. First call to llm_completion with temperature=0.7 throws LLMNoResponseError + 2. Second call with the same temperature=0.7 returns a successful response + """ + + # Define a side effect function that raises LLMNoResponseError on first call + # and returns a successful response on second call + def side_effect(*args, **kwargs): + temperature = kwargs.get('temperature', 0) + + if mock_litellm_completion.call_count == 1: + # First call should raise LLMNoResponseError + raise LLMNoResponseError('LLM did not return a response') + else: + # Second call should return a successful response + return { + 'choices': [ + { + 'message': { + 'content': f'Successful response with temperature={temperature}' + } + } + ] + } + + mock_litellm_completion.side_effect = side_effect + + # Create LLM instance and make a completion call with non-zero temperature + llm = LLM(config=default_config) + response = llm.completion( + messages=[{'role': 'user', 'content': 'Hello!'}], + stream=False, + temperature=0.7, # Non-zero temperature + ) + + # Verify the response after retry + assert ( + response['choices'][0]['message']['content'] + == 'Successful response with temperature=0.7' + ) + + # Verify that litellm_completion was called twice + assert mock_litellm_completion.call_count == 2 + + # Check the temperature in the first call (should be 0.7) + first_call_kwargs = mock_litellm_completion.call_args_list[0][1] + assert first_call_kwargs.get('temperature') == 0.7 + + # Check the temperature in the second call (should still be 0.7) + second_call_kwargs = mock_litellm_completion.call_args_list[1][1] + assert second_call_kwargs.get('temperature') == 0.7 @patch('openhands.llm.llm.litellm_completion') @@ -493,10 +554,11 @@ def test_completion_retry_with_llm_no_response_error_successful_retry( mock_litellm_completion, default_config ): """ - Test that the retry decorator works for LLMNoResponseError and successfully retries with temperature=0.2. + Test that the retry decorator works for LLMNoResponseError with zero temperature + and successfully retries with temperature=0.2. This test verifies that: - 1. First call to llm_completion throws LLMNoResponseError + 1. First call to llm_completion with temperature=0 throws LLMNoResponseError 2. Second call with temperature=0.2 returns a successful response """ @@ -522,11 +584,12 @@ def side_effect(*args, **kwargs): mock_litellm_completion.side_effect = side_effect - # Create LLM instance and make a completion call + # Create LLM instance and make a completion call with explicit temperature=0 llm = LLM(config=default_config) response = llm.completion( messages=[{'role': 'user', 'content': 'Hello!'}], stream=False, + temperature=0, # Explicitly set temperature to 0 ) # Verify the response after retry From e439fb616488903d7ac491769e6c58d166249f8c Mon Sep 17 00:00:00 2001 From: Xingyao Wang Date: Fri, 4 Apr 2025 20:32:07 +0000 Subject: [PATCH 10/14] increase temperature to 1.0 --- openhands/llm/retry_mixin.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/openhands/llm/retry_mixin.py b/openhands/llm/retry_mixin.py index eae0f9cfb2cf..d5726adb91cc 100644 --- a/openhands/llm/retry_mixin.py +++ b/openhands/llm/retry_mixin.py @@ -43,10 +43,11 @@ def before_sleep(retry_state): # Only change temperature if it's zero or not set current_temp = retry_state.kwargs.get('temperature', 0) if current_temp == 0: + retry_state.kwargs['temperature'] = 1.0 logger.warning( - 'LLMNoResponseError detected with temperature=0, setting temperature to 0.2 for next attempt' + 'LLMNoResponseError detected with temperature=0, setting temperature to 1.0 for next attempt. kwargs: ' + + str(retry_state.kwargs) ) - retry_state.kwargs['temperature'] = 0.2 else: logger.warning( f'LLMNoResponseError detected with temperature={current_temp}, keeping original temperature' From 8b204dafebc80888039e900a60c64e379d9b5ee7 Mon Sep 17 00:00:00 2001 From: Xingyao Wang Date: Fri, 4 Apr 2025 20:40:47 +0000 Subject: [PATCH 11/14] fix llm retry temperature --- tests/unit/test_llm.py | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/tests/unit/test_llm.py b/tests/unit/test_llm.py index 70c103f7d8f4..6d243ed5775f 100644 --- a/tests/unit/test_llm.py +++ b/tests/unit/test_llm.py @@ -404,13 +404,6 @@ def side_effect(*args, **kwargs): if temperature == 0: raise LLMNoResponseError('LLM did not return a response') - # Second call with temperature=0.2 should return a successful response - elif temperature == 0.2: - return { - 'choices': [{'message': {'content': 'Successful response after retry'}}] - } - - # Any other temperature value should return a different response else: return { 'choices': [ @@ -430,8 +423,7 @@ def side_effect(*args, **kwargs): # Verify the response after retry assert ( - response['choices'][0]['message']['content'] - == 'Successful response after retry' + response['choices'][0]['message']['content'] == 'Response with temperature=1.0' ) # Verify that litellm_completion was called twice @@ -441,9 +433,9 @@ def side_effect(*args, **kwargs): first_call_kwargs = mock_litellm_completion.call_args_list[0][1] assert first_call_kwargs.get('temperature') == 0 - # Check the temperature in the second call (should be 0.2) + # Check the temperature in the second call (should be 1.0) second_call_kwargs = mock_litellm_completion.call_args_list[1][1] - assert second_call_kwargs.get('temperature') == 0.2 + assert second_call_kwargs.get('temperature') == 1.0 @patch('openhands.llm.llm.litellm_completion') @@ -595,7 +587,7 @@ def side_effect(*args, **kwargs): # Verify the response after retry assert ( response['choices'][0]['message']['content'] - == 'Successful response with temperature=0.2' + == 'Successful response with temperature=1.0' ) # Verify that litellm_completion was called twice @@ -605,9 +597,9 @@ def side_effect(*args, **kwargs): first_call_kwargs = mock_litellm_completion.call_args_list[0][1] assert first_call_kwargs.get('temperature') == 0 - # Check the temperature in the second call (should be 0.2) + # Check the temperature in the second call (should be 1.0) second_call_kwargs = mock_litellm_completion.call_args_list[1][1] - assert second_call_kwargs.get('temperature') == 0.2 + assert second_call_kwargs.get('temperature') == 1.0 @patch('openhands.llm.llm.litellm_completion') From 271e049b4abff38a75d840c0fff24b0704e459e4 Mon Sep 17 00:00:00 2001 From: Xingyao Wang Date: Fri, 4 Apr 2025 21:13:16 +0000 Subject: [PATCH 12/14] stop logging kwargs --- openhands/llm/retry_mixin.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/openhands/llm/retry_mixin.py b/openhands/llm/retry_mixin.py index d5726adb91cc..19b7c9689f7b 100644 --- a/openhands/llm/retry_mixin.py +++ b/openhands/llm/retry_mixin.py @@ -45,8 +45,7 @@ def before_sleep(retry_state): if current_temp == 0: retry_state.kwargs['temperature'] = 1.0 logger.warning( - 'LLMNoResponseError detected with temperature=0, setting temperature to 1.0 for next attempt. kwargs: ' - + str(retry_state.kwargs) + 'LLMNoResponseError detected with temperature=0, setting temperature to 1.0 for next attempt.' ) else: logger.warning( From a275efc1eee022352450a5d6ea0ba5abd3a17d90 Mon Sep 17 00:00:00 2001 From: Xingyao Wang Date: Sat, 5 Apr 2025 20:51:51 +0000 Subject: [PATCH 13/14] action execution retry on retryable httpx error --- .../impl/action_execution/action_execution_client.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/openhands/runtime/impl/action_execution/action_execution_client.py b/openhands/runtime/impl/action_execution/action_execution_client.py index 3ae72b7790fa..d9aa31d8c846 100644 --- a/openhands/runtime/impl/action_execution/action_execution_client.py +++ b/openhands/runtime/impl/action_execution/action_execution_client.py @@ -45,7 +45,7 @@ from openhands.utils.tenacity_stop import stop_if_should_exit -def _is_retryable_check_alive_error(exception): +def _is_retryable_error(exception): return isinstance( exception, (httpx.RemoteProtocolError, httpcore.RemoteProtocolError) ) @@ -93,6 +93,11 @@ def __init__( def _get_action_execution_server_host(self) -> str: pass + @retry( + retry=retry_if_exception(_is_retryable_error), + stop=stop_after_attempt(5) | stop_if_should_exit(), + wait=wait_exponential(multiplier=1, min=4, max=15), + ) def _send_action_server_request( self, method: str, @@ -114,11 +119,6 @@ def _send_action_server_request( """ return send_request(self.session, method, url, **kwargs) - @retry( - retry=retry_if_exception(_is_retryable_check_alive_error), - stop=stop_after_attempt(5) | stop_if_should_exit(), - wait=wait_exponential(multiplier=1, min=4, max=15), - ) def check_if_alive(self) -> None: response = self._send_action_server_request( 'GET', From 1134352e25973d50d25e5e4db8b2ee972669d377 Mon Sep 17 00:00:00 2001 From: Xingyao Wang Date: Sat, 5 Apr 2025 16:55:37 -0400 Subject: [PATCH 14/14] revert unrelated files --- openhands/core/exceptions.py | 10 -- openhands/core/setup.py | 6 +- openhands/llm/llm.py | 24 +--- openhands/llm/retry_mixin.py | 17 --- tests/unit/test_llm.py | 219 +---------------------------------- 5 files changed, 9 insertions(+), 267 deletions(-) diff --git a/openhands/core/exceptions.py b/openhands/core/exceptions.py index c154152d4b66..342e0db0e7c5 100644 --- a/openhands/core/exceptions.py +++ b/openhands/core/exceptions.py @@ -88,16 +88,6 @@ def __init__( super().__init__(message) -# This exception should be retried -# Typically, after retry with a non-zero temperature, the LLM will return a response -class LLMNoResponseError(Exception): - def __init__( - self, - message: str = 'LLM did not return a response. This is only seen in Gemini models so far.', - ) -> None: - super().__init__(message) - - class UserCancelledError(Exception): def __init__(self, message: str = 'User cancelled the request') -> None: super().__init__(message) diff --git a/openhands/core/setup.py b/openhands/core/setup.py index 55bc6d9779a1..fd9554ea0e4a 100644 --- a/openhands/core/setup.py +++ b/openhands/core/setup.py @@ -119,7 +119,11 @@ def initialize_repository_for_runtime( if selected_repository and provider_tokens: logger.debug(f'Selected repository {selected_repository}.') repo_directory = call_async_from_sync( - runtime.clone_repo, GENERAL_TIMEOUT, github_token, selected_repository, None + runtime.clone_repo, + GENERAL_TIMEOUT, + provider_tokens, + selected_repository, + None, ) # Run setup script if it exists runtime.maybe_run_setup_script() diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py index 27a948fd1c3a..42ffe5eac87c 100644 --- a/openhands/llm/llm.py +++ b/openhands/llm/llm.py @@ -23,7 +23,6 @@ from litellm.types.utils import CostPerToken, ModelResponse, Usage from litellm.utils import create_pretrained_tokenizer -from openhands.core.exceptions import LLMNoResponseError from openhands.core.logger import openhands_logger as logger from openhands.core.message import Message from openhands.llm.debug_mixin import DebugMixin @@ -38,12 +37,7 @@ __all__ = ['LLM'] # tuple of exceptions to retry on -LLM_RETRY_EXCEPTIONS: tuple[type[Exception], ...] = ( - RateLimitError, - litellm.Timeout, - litellm.InternalServerError, - LLMNoResponseError, -) +LLM_RETRY_EXCEPTIONS: tuple[type[Exception], ...] = (RateLimitError,) # cache prompt supporting models # remove this when we gemini and deepseek are supported @@ -69,7 +63,6 @@ 'o1-2024-12-17', 'o3-mini-2025-01-31', 'o3-mini', - 'gemini-2.5-pro', ] REASONING_EFFORT_SUPPORTED_MODELS = [ @@ -274,12 +267,8 @@ def wrapper(*args, **kwargs): # if we mocked function calling, and we have tools, convert the response back to function calling format if mock_function_calling and mock_fncall_tools is not None: - if len(resp.choices) < 1: - raise LLMNoResponseError( - 'Response choices is less than 1 - This is only seen in Gemini models so far. Response: ' - + str(resp) - ) - + logger.debug(f'Response choices: {len(resp.choices)}') + assert len(resp.choices) >= 1 non_fncall_response_message = resp.choices[0].message fn_call_messages_with_response = ( convert_non_fncall_messages_to_fncall_messages( @@ -293,13 +282,6 @@ def wrapper(*args, **kwargs): ) resp.choices[0].message = fn_call_response_message - # Check if resp has 'choices' key with at least one item - if not resp.get('choices') or len(resp['choices']) < 1: - raise LLMNoResponseError( - 'Response choices is less than 1 - This is only seen in Gemini models so far. Response: ' - + str(resp) - ) - message_back: str = resp['choices'][0]['message']['content'] or '' tool_calls: list[ChatCompletionMessageToolCall] = resp['choices'][0][ 'message' diff --git a/openhands/llm/retry_mixin.py b/openhands/llm/retry_mixin.py index 19b7c9689f7b..08a8add63939 100644 --- a/openhands/llm/retry_mixin.py +++ b/openhands/llm/retry_mixin.py @@ -5,7 +5,6 @@ wait_exponential, ) -from openhands.core.exceptions import LLMNoResponseError from openhands.core.logger import openhands_logger as logger from openhands.utils.tenacity_stop import stop_if_should_exit @@ -36,22 +35,6 @@ def before_sleep(retry_state): if retry_listener: retry_listener(retry_state.attempt_number, num_retries) - # Check if the exception is LLMNoResponseError - exception = retry_state.outcome.exception() - if isinstance(exception, LLMNoResponseError): - if hasattr(retry_state, 'kwargs'): - # Only change temperature if it's zero or not set - current_temp = retry_state.kwargs.get('temperature', 0) - if current_temp == 0: - retry_state.kwargs['temperature'] = 1.0 - logger.warning( - 'LLMNoResponseError detected with temperature=0, setting temperature to 1.0 for next attempt.' - ) - else: - logger.warning( - f'LLMNoResponseError detected with temperature={current_temp}, keeping original temperature' - ) - return retry( before_sleep=before_sleep, stop=stop_after_attempt(num_retries) | stop_if_should_exit(), diff --git a/tests/unit/test_llm.py b/tests/unit/test_llm.py index 6d243ed5775f..87db3bc62e2e 100644 --- a/tests/unit/test_llm.py +++ b/tests/unit/test_llm.py @@ -10,7 +10,7 @@ ) from openhands.core.config import LLMConfig -from openhands.core.exceptions import LLMNoResponseError, OperationCancelled +from openhands.core.exceptions import OperationCancelled from openhands.core.message import Message, TextContent from openhands.llm.llm import LLM from openhands.llm.metrics import Metrics, TokenUsage @@ -385,223 +385,6 @@ def side_effect(*args, **kwargs): _should_exit = False -@patch('openhands.llm.llm.litellm_completion') -def test_completion_retry_with_llm_no_response_error_zero_temp( - mock_litellm_completion, default_config -): - """ - Test that the retry decorator properly handles LLMNoResponseError by: - 1. First call to llm_completion uses temperature=0 and throws LLMNoResponseError - 2. Second call should have temperature=0.2 and return a successful response - """ - - # Define a side effect function that checks the temperature parameter - # and returns different responses based on it - def side_effect(*args, **kwargs): - temperature = kwargs.get('temperature', 0) - - # First call with temperature=0 should raise LLMNoResponseError - if temperature == 0: - raise LLMNoResponseError('LLM did not return a response') - - else: - return { - 'choices': [ - {'message': {'content': f'Response with temperature={temperature}'}} - ] - } - - mock_litellm_completion.side_effect = side_effect - - # Create LLM instance and make a completion call - llm = LLM(config=default_config) - response = llm.completion( - messages=[{'role': 'user', 'content': 'Hello!'}], - stream=False, - temperature=0, # Initial temperature is 0 - ) - - # Verify the response after retry - assert ( - response['choices'][0]['message']['content'] == 'Response with temperature=1.0' - ) - - # Verify that litellm_completion was called twice - assert mock_litellm_completion.call_count == 2 - - # Check the temperature in the first call (should be 0) - first_call_kwargs = mock_litellm_completion.call_args_list[0][1] - assert first_call_kwargs.get('temperature') == 0 - - # Check the temperature in the second call (should be 1.0) - second_call_kwargs = mock_litellm_completion.call_args_list[1][1] - assert second_call_kwargs.get('temperature') == 1.0 - - -@patch('openhands.llm.llm.litellm_completion') -def test_completion_retry_with_llm_no_response_error_nonzero_temp( - mock_litellm_completion, default_config -): - """ - Test that the retry decorator works for LLMNoResponseError when initial temperature is non-zero, - and keeps the original temperature on retry. - - This test verifies that when LLMNoResponseError is raised with a non-zero temperature: - 1. The retry mechanism is triggered - 2. The temperature remains unchanged (not set to 0.2) - 3. After all retries are exhausted, the exception is propagated - """ - # Define a side effect function that always raises LLMNoResponseError - mock_litellm_completion.side_effect = LLMNoResponseError( - 'LLM did not return a response' - ) - - # Create LLM instance and make a completion call with non-zero temperature - llm = LLM(config=default_config) - - # We expect this to raise an exception after all retries are exhausted - with pytest.raises(LLMNoResponseError): - llm.completion( - messages=[{'role': 'user', 'content': 'Hello!'}], - stream=False, - temperature=0.7, # Initial temperature is non-zero - ) - - # Verify that litellm_completion was called multiple times (retries happened) - # The default_config has num_retries=2, so it should be called 2 times - assert mock_litellm_completion.call_count == 2 - - # Check the temperature in the first call (should be 0.7) - first_call_kwargs = mock_litellm_completion.call_args_list[0][1] - assert first_call_kwargs.get('temperature') == 0.7 - - # Check the temperature in the second call (should remain 0.7, not changed to 0.2) - second_call_kwargs = mock_litellm_completion.call_args_list[1][1] - assert second_call_kwargs.get('temperature') == 0.7 - - -@patch('openhands.llm.llm.litellm_completion') -def test_completion_retry_with_llm_no_response_error_nonzero_temp_successful_retry( - mock_litellm_completion, default_config -): - """ - Test that the retry decorator works for LLMNoResponseError with non-zero temperature - and successfully retries while preserving the original temperature. - - This test verifies that: - 1. First call to llm_completion with temperature=0.7 throws LLMNoResponseError - 2. Second call with the same temperature=0.7 returns a successful response - """ - - # Define a side effect function that raises LLMNoResponseError on first call - # and returns a successful response on second call - def side_effect(*args, **kwargs): - temperature = kwargs.get('temperature', 0) - - if mock_litellm_completion.call_count == 1: - # First call should raise LLMNoResponseError - raise LLMNoResponseError('LLM did not return a response') - else: - # Second call should return a successful response - return { - 'choices': [ - { - 'message': { - 'content': f'Successful response with temperature={temperature}' - } - } - ] - } - - mock_litellm_completion.side_effect = side_effect - - # Create LLM instance and make a completion call with non-zero temperature - llm = LLM(config=default_config) - response = llm.completion( - messages=[{'role': 'user', 'content': 'Hello!'}], - stream=False, - temperature=0.7, # Non-zero temperature - ) - - # Verify the response after retry - assert ( - response['choices'][0]['message']['content'] - == 'Successful response with temperature=0.7' - ) - - # Verify that litellm_completion was called twice - assert mock_litellm_completion.call_count == 2 - - # Check the temperature in the first call (should be 0.7) - first_call_kwargs = mock_litellm_completion.call_args_list[0][1] - assert first_call_kwargs.get('temperature') == 0.7 - - # Check the temperature in the second call (should still be 0.7) - second_call_kwargs = mock_litellm_completion.call_args_list[1][1] - assert second_call_kwargs.get('temperature') == 0.7 - - -@patch('openhands.llm.llm.litellm_completion') -def test_completion_retry_with_llm_no_response_error_successful_retry( - mock_litellm_completion, default_config -): - """ - Test that the retry decorator works for LLMNoResponseError with zero temperature - and successfully retries with temperature=0.2. - - This test verifies that: - 1. First call to llm_completion with temperature=0 throws LLMNoResponseError - 2. Second call with temperature=0.2 returns a successful response - """ - - # Define a side effect function that raises LLMNoResponseError on first call - # and returns a successful response on second call - def side_effect(*args, **kwargs): - temperature = kwargs.get('temperature', 0) - - if mock_litellm_completion.call_count == 1: - # First call should raise LLMNoResponseError - raise LLMNoResponseError('LLM did not return a response') - else: - # Second call should return a successful response - return { - 'choices': [ - { - 'message': { - 'content': f'Successful response with temperature={temperature}' - } - } - ] - } - - mock_litellm_completion.side_effect = side_effect - - # Create LLM instance and make a completion call with explicit temperature=0 - llm = LLM(config=default_config) - response = llm.completion( - messages=[{'role': 'user', 'content': 'Hello!'}], - stream=False, - temperature=0, # Explicitly set temperature to 0 - ) - - # Verify the response after retry - assert ( - response['choices'][0]['message']['content'] - == 'Successful response with temperature=1.0' - ) - - # Verify that litellm_completion was called twice - assert mock_litellm_completion.call_count == 2 - - # Check the temperature in the first call (should be 0) - first_call_kwargs = mock_litellm_completion.call_args_list[0][1] - assert first_call_kwargs.get('temperature') == 0 - - # Check the temperature in the second call (should be 1.0) - second_call_kwargs = mock_litellm_completion.call_args_list[1][1] - assert second_call_kwargs.get('temperature') == 1.0 - - @patch('openhands.llm.llm.litellm_completion') def test_completion_with_litellm_mock(mock_litellm_completion, default_config): mock_response = {