Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 82e2500

Browse files
committedApr 2, 2025
fix: initialize mcp tools in session w simpler code
chore: revert original prompts codeact chore: re-add system prompt chore: remove playwright mcp custom chore: remove unused
1 parent a1de26e commit 82e2500

File tree

19 files changed

+28
-207
lines changed

19 files changed

+28
-207
lines changed
 

‎frontend/src/services/actions.ts

-5
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,6 @@ const messageActions = {
3131
store.dispatch(addAssistantMessage(message.message));
3232
}
3333
},
34-
[ActionType.PLAYWRIGHT_MCP_BROWSER_SCREENSHOT]: (message: ActionMessage) => {
35-
if (!message.args.thought && message.message) {
36-
store.dispatch(addAssistantMessage(message.message));
37-
}
38-
},
3934
[ActionType.WRITE]: (message: ActionMessage) => {
4035
const { path, content } = message.args;
4136
store.dispatch(setActiveFilepath(path));

‎frontend/src/services/observations.ts

-14
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ export function handleObservationMessage(message: ObservationMessage) {
3131
break;
3232
case ObservationType.BROWSE:
3333
case ObservationType.BROWSE_INTERACTIVE:
34-
case ObservationType.PLAYWRIGHT_MCP_BROWSER_SCREENSHOT:
3534
if (message.extras?.screenshot) {
3635
store.dispatch(setScreenshotSrc(message.extras?.screenshot));
3736
}
@@ -220,19 +219,6 @@ export function handleObservationMessage(message: ObservationMessage) {
220219
}),
221220
);
222221
break;
223-
case ObservationType.PLAYWRIGHT_MCP_BROWSER_SCREENSHOT:
224-
store.dispatch(
225-
addAssistantObservation({
226-
...baseObservation,
227-
observation: ObservationType.PLAYWRIGHT_MCP_BROWSER_SCREENSHOT,
228-
extras: {
229-
url: String(message.extras.url || ""),
230-
screenshot: String(message.extras.screenshot || ""),
231-
trigger_by_action: String(message.extras.trigger_by_action || ""),
232-
},
233-
}),
234-
);
235-
break;
236222
case "error":
237223
store.dispatch(
238224
addAssistantObservation({

‎frontend/src/types/action-type.tsx

-3
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,6 @@ enum ActionType {
3838

3939
// Changes the state of the agent, e.g. to paused or running
4040
CHANGE_AGENT_STATE = "change_agent_state",
41-
42-
// Take a screenshot of the browser
43-
PLAYWRIGHT_MCP_BROWSER_SCREENSHOT = "playwright_mcp_browser_screenshot",
4441
}
4542

4643
export default ActionType;

‎frontend/src/types/core/base.ts

+1-2
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,7 @@ export type OpenHandsEventType =
1212
| "reject"
1313
| "think"
1414
| "finish"
15-
| "error"
16-
| "playwright_mcp_browser_screenshot";
15+
| "error";
1716

1817
interface OpenHandsBaseEvent {
1918
id: number;

‎frontend/src/types/core/observations.ts

+1-12
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import { AgentState } from "../agent-state";
2-
import ObservationType from "../observation-type";
32
import { OpenHandsObservationEvent } from "./base";
43

54
export interface AgentStateChangeObservation
@@ -110,15 +109,6 @@ export interface AgentThinkObservation
110109
};
111110
}
112111

113-
export interface PlaywrightMcpBrowserScreenshotObservation
114-
extends OpenHandsObservationEvent<ObservationType.PLAYWRIGHT_MCP_BROWSER_SCREENSHOT> {
115-
source: "agent";
116-
extras: {
117-
url: string;
118-
screenshot: string;
119-
trigger_by_action: string;
120-
};
121-
}
122112
export type OpenHandsObservation =
123113
| AgentStateChangeObservation
124114
| AgentThinkObservation
@@ -130,5 +120,4 @@ export type OpenHandsObservation =
130120
| WriteObservation
131121
| ReadObservation
132122
| EditObservation
133-
| ErrorObservation
134-
| PlaywrightMcpBrowserScreenshotObservation;
123+
| ErrorObservation;

‎frontend/src/types/observation-type.tsx

-2
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,6 @@ enum ObservationType {
1111
// Interactive browsing
1212
BROWSE_INTERACTIVE = "browse_interactive",
1313

14-
PLAYWRIGHT_MCP_BROWSER_SCREENSHOT = 'playwright_mcp_browser_screenshot',
15-
1614
// The output of a command
1715
RUN = "run",
1816

‎openhands/agenthub/codeact_agent/function_calling.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -205,8 +205,8 @@ def response_to_actions(response: ModelResponse) -> list[Action]:
205205
action = McpAction(
206206
name=tool_call.function.name, arguments=tool_call.function.arguments
207207
)
208-
action.set_hard_timeout(120)
209-
logger.warning(f'MCP action in function_calling.py: {action}')
208+
# action.set_hard_timeout(120)
209+
logger.debug(f'MCP action in function_calling.py: {action}')
210210

211211
# We only add thought to the first action
212212
if i == 0:

‎openhands/agenthub/codeact_agent/prompts/system_prompt.j2

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
You are Thesis Capsule agent, a helpful AI assistant that can interact with a computer to solve tasks.
1+
You are OpenHands agent, a helpful AI assistant that can interact with a computer to solve tasks.
22

33
<ROLE>
44
Your primary role is to assist users by executing commands, modifying code, and solving technical problems effectively. You should be thorough, methodical, and prioritize quality over speed.
Original file line numberDiff line numberDiff line change
@@ -1,5 +0,0 @@
1-
You are Thesis Capsule agent, a helpful AI assistant that can interact with a computer to solve tasks.
2-
3-
<PROBLEM_SOLVING_WORKFLOW>
4-
Take screenshot of important actions you take related to web-browsing.
5-
</PROBLEM_SOLVING_WORKFLOW>

‎openhands/core/schema/observation.py

-4
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,3 @@ class ObservationType(str, Enum):
5252

5353
MCP = 'mcp'
5454
"""Result of a MCP Server operation"""
55-
56-
PLAYWRIGHT_MCP_BROWSER_SCREENSHOT = 'playwright_mcp_browser_screenshot'
57-
"""Result of a Playwright MCP Browser Screenshot operation. The response is a base64 encoded string of the screenshot, which should be streamed to the client using the correct format matching
58-
browsergym's screenshot format."""

‎openhands/events/observation/__init__.py

-1
Original file line numberDiff line numberDiff line change
@@ -45,5 +45,4 @@
4545
'RecallObservation',
4646
'RecallType',
4747
'MCPObservation',
48-
'PlaywrightMcpBrowserScreenshotObservation',
4948
]

‎openhands/events/observation/playwright_mcp.py

-22
This file was deleted.

‎openhands/events/serialization/observation.py

-4
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,6 @@
2727
)
2828
from openhands.events.observation.mcp import MCPObservation
2929
from openhands.events.observation.observation import Observation
30-
from openhands.events.observation.playwright_mcp import (
31-
PlaywrightMcpBrowserScreenshotObservation,
32-
)
3330
from openhands.events.observation.reject import UserRejectObservation
3431
from openhands.events.observation.success import SuccessObservation
3532

@@ -50,7 +47,6 @@
5047
AgentThinkObservation,
5148
RecallObservation,
5249
MCPObservation,
53-
PlaywrightMcpBrowserScreenshotObservation,
5450
)
5551

5652
OBSERVATION_TYPE_TO_CLASS = {

‎openhands/memory/conversation_memory.py

-20
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
from typing import Generator
2-
import json
32

43
from litellm import ModelResponse
54

@@ -40,9 +39,6 @@
4039
from openhands.events.observation.error import ErrorObservation
4140
from openhands.events.observation.mcp import MCPObservation
4241
from openhands.events.observation.observation import Observation
43-
from openhands.events.observation.playwright_mcp import (
44-
PlaywrightMcpBrowserScreenshotObservation,
45-
)
4642
from openhands.events.serialization.event import truncate_content
4743
from openhands.utils.prompt import PromptManager, RepositoryInfo, RuntimeInfo
4844

@@ -337,22 +333,6 @@ def _process_observation(
337333
elif isinstance(obs, MCPObservation):
338334
# logger.warning(f'MCPObservation: {obs}')
339335
message = Message(role='assistant', content=[TextContent(text=obs.content)])
340-
elif isinstance(obs, PlaywrightMcpBrowserScreenshotObservation):
341-
text = 'Image: Current webpage screenshot\n'
342-
screenshot_content = json.loads(obs.content)
343-
logger.debug(
344-
f'screenshot_content in conversation_memory: {screenshot_content}'
345-
)
346-
if 'url' in screenshot_content:
347-
text += f'URL: {screenshot_content["url"]}\n'
348-
349-
# We don't actually need to screenshot fed into the LLM. We can use snapshots. Meanwhile, the screenshot will be streamed to the user.
350-
message = Message(
351-
role='assistant',
352-
content=[
353-
TextContent(text=text),
354-
],
355-
)
356336
elif isinstance(obs, IPythonRunCellObservation):
357337
text = obs.content
358338
# replace base64 images with a placeholder

‎openhands/runtime/action_execution_server.py

-32
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
from fastapi.exceptions import RequestValidationError
2525
from fastapi.responses import FileResponse, HTMLResponse, JSONResponse
2626
from fastapi.security import APIKeyHeader
27-
from mcp.types import ImageContent
2827
from openhands_aci.editor.editor import OHEditor
2928
from openhands_aci.editor.exceptions import ToolError
3029
from openhands_aci.editor.results import ToolResult
@@ -59,11 +58,7 @@
5958
Observation,
6059
)
6160
from openhands.events.observation.mcp import MCPObservation
62-
from openhands.events.observation.playwright_mcp import (
63-
PlaywrightMcpBrowserScreenshotObservation,
64-
)
6561
from openhands.events.serialization import event_from_dict, event_to_dict
66-
from openhands.mcp.mcp_base import ToolResult as MCPToolResult
6762
from openhands.runtime.browser import browse
6863
from openhands.runtime.browser.browser_env import BrowserEnv
6964
from openhands.runtime.plugins import ALL_PLUGINS, JupyterPlugin, Plugin, VSCodePlugin
@@ -559,32 +554,8 @@ async def call_tool_mcp(self, action: McpAction) -> Observation:
559554
for agent in mcp_agents:
560555
await agent.cleanup()
561556

562-
# special case for browser screenshot of playwright_mcp
563-
if action.name == 'browser_screenshot':
564-
return self.playwright_mcp_browser_screenshot(action, response)
565-
566557
return MCPObservation(content=f'MCP result:{response}')
567558

568-
def playwright_mcp_browser_screenshot(
569-
self, action: McpAction, response: MCPToolResult
570-
) -> Observation:
571-
# example response:
572-
"""
573-
{
574-
"type": "image",
575-
"data": "image/jpeg;base64,/9j/4AA...",
576-
"mimeType": "image/jpeg",
577-
"url": "https://www.google.com"
578-
}
579-
"""
580-
screenshot_content: ImageContent = response.output
581-
return PlaywrightMcpBrowserScreenshotObservation(
582-
content=f'{response}',
583-
url=screenshot_content.url if screenshot_content.url is not None else '',
584-
trigger_by_action=action.name,
585-
screenshot=f'data:image/png;base64,{screenshot_content.data}',
586-
)
587-
588559
def close(self):
589560
self.memory_monitor.stop_monitoring()
590561
if self.bash_session is not None:
@@ -609,9 +580,6 @@ def close(self):
609580
help='BrowserGym environment used for browser evaluation',
610581
default=None,
611582
)
612-
parser.add_argument(
613-
'--runtime-mode', type=str, help='docker | others', default='others'
614-
)
615583

616584
# example: python client.py 8000 --working-dir /workspace --plugins JupyterRequirement
617585
args = parser.parse_args()

‎openhands/runtime/impl/docker/docker_runtime.py

+8-6
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
1+
import os
12
from functools import lru_cache
23
from typing import Callable
34
from uuid import UUID
45

5-
import os
6-
76
import docker
87
import httpx
98
import tenacity
@@ -89,9 +88,13 @@ def __init__(
8988
self._vscode_port = -1
9089
self._app_ports: list[int] = []
9190

92-
if os.environ.get("DOCKER_HOST_ADDR"):
93-
logger.info(f'Using DOCKER_HOST_IP: {os.environ["DOCKER_HOST_ADDR"]} for local_runtime_url')
94-
self.config.sandbox.local_runtime_url = f'http://{os.environ["DOCKER_HOST_ADDR"]}'
91+
if os.environ.get('DOCKER_HOST_ADDR'):
92+
logger.info(
93+
f'Using DOCKER_HOST_IP: {os.environ["DOCKER_HOST_ADDR"]} for local_runtime_url'
94+
)
95+
self.config.sandbox.local_runtime_url = (
96+
f'http://{os.environ["DOCKER_HOST_ADDR"]}'
97+
)
9598

9699
self.docker_client: docker.DockerClient = self._init_docker_client()
97100
self.api_url = f'{self.config.sandbox.local_runtime_url}:{self._container_port}'
@@ -284,7 +287,6 @@ def _init_container(self):
284287
server_port=self._container_port,
285288
plugins=self.plugins,
286289
app_config=self.config,
287-
runtime_mode='docker',
288290
)
289291

290292
try:

‎openhands/runtime/utils/command.py

-3
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ def get_action_execution_server_startup_command(
1818
python_prefix: list[str] = DEFAULT_PYTHON_PREFIX,
1919
override_user_id: int | None = None,
2020
override_username: str | None = None,
21-
runtime_mode: str = 'others',
2221
) -> list[str]:
2322
sandbox_config = app_config.sandbox
2423

@@ -56,8 +55,6 @@ def get_action_execution_server_startup_command(
5655
'--user-id',
5756
str(user_id),
5857
*browsergym_args,
59-
'--runtime-mode',
60-
runtime_mode,
6158
]
6259

6360
return base_cmd

‎openhands/server/session/agent_session.py

+3-68
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,10 @@
1313
from openhands.core.exceptions import AgentRuntimeUnavailableError
1414
from openhands.core.logger import OpenHandsLoggerAdapter
1515
from openhands.core.schema.agent import AgentState
16-
from openhands.core.setup import create_mcp_agents
1716
from openhands.events.action import ChangeAgentStateAction, MessageAction
1817
from openhands.events.event import Event, EventSource
1918
from openhands.events.stream import EventStream
2019
from openhands.integrations.provider import PROVIDER_TOKEN_TYPE, ProviderHandler
21-
from openhands.mcp.mcp_agent import convert_mcp_agents_to_tools
2220
from openhands.memory.memory import Memory
2321
from openhands.microagent.microagent import BaseMicroAgent
2422
from openhands.runtime import get_runtime_cls
@@ -115,69 +113,6 @@ async def start(
115113
finished = False # For monitoring
116114
runtime_connected = False
117115
try:
118-
# Initialize MCP agents first before creating runtime and controller
119-
try:
120-
# Log MCP configuration to help with debugging
121-
self.logger.info(f'MCP SSE servers: {config.mcp.sse.mcp_servers}')
122-
self.logger.info(f'MCP stdio commands: {config.mcp.stdio.commands}')
123-
self.logger.info(f'MCP stdio args: {config.mcp.stdio.args}')
124-
125-
# Check if MCP servers are available
126-
if not config.mcp.sse.mcp_servers and not config.mcp.stdio.commands:
127-
self.logger.warning(
128-
'No MCP servers or commands configured. MCP integration will not work.'
129-
)
130-
else:
131-
self.logger.info('Initializing MCP agents for server mode...')
132-
mcp_agents = await create_mcp_agents(
133-
config.mcp.sse.mcp_servers,
134-
config.mcp.stdio.commands,
135-
config.mcp.stdio.args,
136-
)
137-
138-
# Give some time for MCP connections to stabilize
139-
await asyncio.sleep(1)
140-
141-
# For CodeActAgent and similar agents that use the tools attribute
142-
if hasattr(agent, 'tools'):
143-
try:
144-
# Convert MCP agents to tools format for CodeActAgent
145-
mcp_tools = convert_mcp_agents_to_tools(mcp_agents)
146-
self.logger.info(
147-
f"MCP tools created: {[tool.get('function', {}).get('name', '<unnamed>') for tool in mcp_tools]}"
148-
)
149-
150-
# If agent already has tools, extend them; otherwise create a new list
151-
if isinstance(agent.tools, list):
152-
agent.tools.extend(mcp_tools)
153-
else:
154-
agent.tools = mcp_tools
155-
156-
self.logger.info(
157-
f'Agent now has {len(agent.tools)} tools including MCP tools'
158-
)
159-
except Exception as e:
160-
self.logger.error(
161-
f'Error converting MCP agents to tools: {str(e)}',
162-
exc_info=True,
163-
)
164-
165-
# Log MCP agents status
166-
for idx, mcp_agent in enumerate(mcp_agents):
167-
self.logger.info(
168-
f'MCP Agent {idx} connection type: {mcp_agent.connection_type}'
169-
)
170-
self.logger.info(
171-
f"MCP Agent {idx} available tools: {list(mcp_agent.mcp_clients.tool_map.keys()) if hasattr(mcp_agent, 'mcp_clients') and hasattr(mcp_agent.mcp_clients, 'tool_map') else 'No tools available'}"
172-
)
173-
await mcp_agent.cleanup()
174-
175-
self.logger.info(
176-
f'Successfully initialized {len(mcp_agents)} MCP agents'
177-
)
178-
except Exception as e:
179-
self.logger.error(f'Error initializing MCP agents: {e}', exc_info=True)
180-
181116
self._create_security_analyzer(config.security.security_analyzer)
182117
runtime_connected = await self._create_runtime(
183118
runtime_name=runtime_name,
@@ -388,9 +323,9 @@ async def _create_runtime(
388323
return False
389324

390325
if selected_repository and git_provider_tokens:
391-
await self.runtime.clone_repo(git_provider_tokens,
392-
selected_repository,
393-
selected_branch)
326+
await self.runtime.clone_repo(
327+
git_provider_tokens, selected_repository, selected_branch
328+
)
394329
await call_sync_from_async(self.runtime.maybe_run_setup_script)
395330

396331
self.logger.debug(

‎openhands/server/session/session.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
)
1313
from openhands.core.logger import OpenHandsLoggerAdapter
1414
from openhands.core.schema import AgentState
15+
from openhands.core.setup import create_mcp_agents
1516
from openhands.events.action import MessageAction, NullAction
1617
from openhands.events.event import Event, EventSource
1718
from openhands.events.observation import (
@@ -23,6 +24,7 @@
2324
from openhands.events.serialization import event_from_dict, event_to_dict
2425
from openhands.events.stream import EventStreamSubscriber
2526
from openhands.llm.llm import LLM
27+
from openhands.mcp.mcp_agent import convert_mcp_agents_to_tools
2628
from openhands.server.session.agent_session import AgentSession
2729
from openhands.server.session.conversation_init_data import ConversationInitData
2830
from openhands.server.settings import Settings
@@ -133,7 +135,16 @@ async def initialize_agent(
133135
self.logger.info(f'Enabling default condenser: {default_condenser_config}')
134136
agent_config.condenser = default_condenser_config
135137

136-
agent = Agent.get_cls(agent_cls)(llm, agent_config)
138+
mcp_agents = await create_mcp_agents(
139+
self.config.mcp.sse.mcp_servers,
140+
self.config.mcp.stdio.commands,
141+
self.config.mcp.stdio.args,
142+
)
143+
mcp_tools = convert_mcp_agents_to_tools(mcp_agents)
144+
agent = Agent.get_cls(agent_cls)(llm, agent_config, mcp_tools)
145+
# close all mcp agents after extracting tools
146+
for mcp_agent in mcp_agents:
147+
await mcp_agent.cleanup()
137148

138149
git_provider_tokens = None
139150
selected_repository = None

0 commit comments

Comments
 (0)
Please sign in to comment.