@@ -4,70 +4,88 @@ import (
4
4
"context"
5
5
"errors"
6
6
"fmt"
7
+ "os"
8
+ "path/filepath"
9
+ "time"
10
+
7
11
"github.com/charmbracelet/log"
8
12
"github.com/go-rod/rod-mcp/types"
9
13
"github.com/go-rod/rod-mcp/utils"
10
14
"github.com/go-rod/rod/lib/input"
11
15
"github.com/go-rod/rod/lib/proto"
12
16
"github.com/mark3labs/mcp-go/mcp"
13
- "time"
14
17
)
15
18
16
19
const (
17
20
defaultWaitStableDur = 1 * time .Second
18
21
defaultDomDiff = 0.2
19
22
)
20
23
24
+ const (
25
+ NavigationToolKey = "rod_navigate"
26
+ GoBackToolKey = "rod_go_back"
27
+ GoForwardToolKey = "rod_go_forward"
28
+ ReloadToolKey = "rod_reload"
29
+ PressKeyToolKey = "rod_press"
30
+ ClickToolKey = "rod_click"
31
+ FillToolKey = "rod_fill"
32
+ PdfToolKey = "rod_pdf"
33
+ ScreenshotToolKey = "rod_screenshot"
34
+ EvaluateToolKey = "rod_evaluate"
35
+ CloseBrowserToolKey = "rod_close_browser"
36
+ SelectorToolKey = "rod_selector"
37
+ )
38
+
21
39
var (
22
40
Navigation = mcp .NewTool ("rod_navigate" ,
23
41
mcp .WithDescription ("Navigate to a URL" ),
24
42
mcp .WithString ("url" , mcp .Description ("URL to navigate to" ), mcp .Required ()),
25
43
)
26
- GoBack = mcp .NewTool ("rod_go_back" ,
44
+ GoBack = mcp .NewTool (GoBackToolKey ,
27
45
mcp .WithDescription ("Go back in the browser history, go back to the previous page" ),
28
46
)
29
- GoForward = mcp .NewTool ("rod_go_forward" ,
47
+ GoForward = mcp .NewTool (GoForwardToolKey ,
30
48
mcp .WithDescription ("Go forward in the browser history, go to the next page" ),
31
49
)
32
- ReLoad = mcp .NewTool ("rod_reload" ,
50
+ ReLoad = mcp .NewTool (ReloadToolKey ,
33
51
mcp .WithDescription ("Reload the current page" ),
34
52
)
35
- PressKey = mcp .NewTool ("rod_press_key" ,
53
+ PressKey = mcp .NewTool (PressKeyToolKey ,
36
54
mcp .WithDescription ("Press a key on the keyboard" ),
37
55
mcp .WithString ("key" , mcp .Description ("Name of the key to press or a character to generate, such as `ArrowLeft` or `a`" ), mcp .Required ()),
38
56
)
39
- Pdf = mcp .NewTool ("rod_pdf" ,
57
+ Pdf = mcp .NewTool (PdfToolKey ,
40
58
mcp .WithDescription ("Generate a PDF from the current page" ),
41
59
mcp .WithString ("file_path" , mcp .Description ("Path to save the PDF file" ), mcp .Required ()),
42
60
mcp .WithString ("file_name" , mcp .Description ("Name of the PDF file" ), mcp .Required ()),
43
61
)
44
- CloseBrowser = mcp .NewTool ("rod_close_browser" ,
62
+ CloseBrowser = mcp .NewTool (CloseBrowserToolKey ,
45
63
mcp .WithDescription ("Close the browser" ),
46
64
)
47
- Screenshot = mcp .NewTool ("rod_screenshot" ,
65
+ Screenshot = mcp .NewTool (ScreenshotToolKey ,
48
66
mcp .WithDescription ("Take a screenshot of the current page or a specific element" ),
49
67
mcp .WithString ("name" , mcp .Description ("Name of the screenshot" ), mcp .Required ()),
50
68
mcp .WithString ("selector" , mcp .Description ("CSS selector of the element to take a screenshot of" )),
51
69
mcp .WithNumber ("width" , mcp .Description ("Width in pixels (default: 800)" )),
52
70
mcp .WithNumber ("height" , mcp .Description ("Height in pixels (default: 600)" )),
53
71
)
54
- Click = mcp .NewTool ("rod_click" ,
72
+ Click = mcp .NewTool (ClickToolKey ,
55
73
mcp .WithDescription ("Click an element on the page" ),
56
74
mcp .WithString ("selector" , mcp .Description ("CSS selector of the element to click" ), mcp .Required ()),
57
75
)
58
- Fill = mcp .NewTool ("rod_fill" ,
76
+ Fill = mcp .NewTool (FillToolKey ,
59
77
mcp .WithDescription ("Fill out an input field" ),
60
78
mcp .WithString ("selector" , mcp .Description ("CSS selector of the element to type into" ), mcp .Required ()),
61
79
mcp .WithString ("value" , mcp .Description ("Value to fill" ), mcp .Required ()),
62
80
)
63
- Selector = mcp .NewTool ("rod_selector" ,
81
+ Selector = mcp .NewTool (SelectorToolKey ,
64
82
mcp .WithDescription ("Select an element on the page with Select tag" ),
65
83
mcp .WithString ("selector" , mcp .Description ("CSS selector for element to select" ), mcp .Required ()),
66
84
mcp .WithString ("value" , mcp .Description ("Value to select" ), mcp .Required ()),
67
85
)
68
- Evaluate = mcp .NewTool ("rod_evaluate" ,
86
+ Evaluate = mcp .NewTool (EvaluateToolKey ,
69
87
mcp .WithDescription ("Execute JavaScript in the browser console" ),
70
- mcp .WithString ("script" , mcp .Description ("JavaScript code to execute " ), mcp .Required ()),
88
+ mcp .WithString ("script" , mcp .Description ("A function name or an unnamed function definition " ), mcp .Required ()),
71
89
)
72
90
)
73
91
@@ -219,6 +237,61 @@ var (
219
237
return mcp .NewToolResultText ("Close browser successfully" ), nil
220
238
}
221
239
}
240
+ EvaluateHandler = func (rodCtx * types.Context ) func (context.Context , mcp.CallToolRequest ) (* mcp.CallToolResult , error ) {
241
+ return func (ctx context.Context , request mcp.CallToolRequest ) (* mcp.CallToolResult , error ) {
242
+ page , err := rodCtx .EnsurePage ()
243
+ if err != nil {
244
+ log .Errorf ("Failed to evaluate: %s" , err .Error ())
245
+ }
246
+ script := request .Params .Arguments ["script" ].(string )
247
+ r , err := proto.RuntimeEvaluate {
248
+ Expression : script ,
249
+ ObjectGroup : "console" ,
250
+ IncludeCommandLineAPI : true ,
251
+ }.Call (page )
252
+ if err != nil {
253
+ log .Errorf ("Failed to evaluate code: %s" , err .Error ())
254
+ return nil , errors .New (fmt .Sprintf ("Failed to evaluate code: %s" , err .Error ()))
255
+ }
256
+ return mcp .NewToolResultText (fmt .Sprintf ("Evaluate code successfully with result: %s" , r .Result .Value .String ())), nil
257
+ }
258
+ }
259
+ SelectorHandler = func (rodCtx * types.Context ) func (context.Context , mcp.CallToolRequest ) (* mcp.CallToolResult , error ) {
260
+ return func (ctx context.Context , request mcp.CallToolRequest ) (* mcp.CallToolResult , error ) {
261
+ page , err := rodCtx .EnsurePage ()
262
+ if err != nil {
263
+ log .Errorf ("Failed to select: %s" , err .Error ())
264
+ }
265
+ res , err := page .Element (request .Params .Arguments ["selector" ].(string ))
266
+ if err != nil {
267
+ log .Errorf ("Failed to select: %s" , err .Error ())
268
+ }
269
+ return mcp .NewToolResultText (fmt .Sprintf ("The object's id matched: %s, plain text is: %s" , res .Object .ObjectID , res .String ())), nil
270
+ }
271
+ }
272
+ ScreenshotHandler = func (rodCtx * types.Context ) func (context.Context , mcp.CallToolRequest ) (* mcp.CallToolResult , error ) {
273
+ return func (ctx context.Context , request mcp.CallToolRequest ) (* mcp.CallToolResult , error ) {
274
+ page , err := rodCtx .EnsurePage ()
275
+ if err != nil {
276
+ log .Errorf ("Failed to screenshot: %s" , err .Error ())
277
+ }
278
+ req := & proto.PageCaptureScreenshot {
279
+ Format : proto .PageCaptureScreenshotFormatPng ,
280
+ }
281
+ bin , err := page .Screenshot (false , req )
282
+ if err != nil {
283
+ log .Errorf ("Failed to screenshot: %s" , err .Error ())
284
+ }
285
+ fileName := request .Params .Arguments ["name" ].(string )
286
+ toFile := []string {"tmp" , "screenshots" , fileName + ".png" }
287
+ filePath := filepath .Join (toFile ... )
288
+ err = os .WriteFile (filePath , bin , 0o664 )
289
+ if err != nil {
290
+ log .Errorf ("Failed to screenshot: %s" , err .Error ())
291
+ }
292
+ return mcp .NewToolResultText (fmt .Sprintf ("Save to %s" , filePath )), nil
293
+ }
294
+ }
222
295
)
223
296
224
297
var (
@@ -231,15 +304,23 @@ var (
231
304
Click ,
232
305
Fill ,
233
306
CloseBrowser ,
307
+ Pdf ,
308
+ Screenshot ,
309
+ Selector ,
310
+ Evaluate ,
234
311
}
235
312
CommonToolHandlers = map [string ]ToolHandler {
236
- "rod_navigate" : NavigationHandler ,
237
- "rod_go_back" : GoBackHandler ,
238
- "rod_go_forward" : GoForwardHandler ,
239
- "rod_reload" : ReLoadHandler ,
240
- "rod_press_key" : PressKeyHandler ,
241
- "rod_click" : ClickHandler ,
242
- "rod_fill" : FillHandler ,
243
- "rod_close_browser" : CloseBrowserHandler ,
313
+ NavigationToolKey : NavigationHandler ,
314
+ GoBackToolKey : GoBackHandler ,
315
+ GoForwardToolKey : GoForwardHandler ,
316
+ ReloadToolKey : ReLoadHandler ,
317
+ PressKeyToolKey : PressKeyHandler ,
318
+ ClickToolKey : ClickHandler ,
319
+ FillToolKey : FillHandler ,
320
+ //PdfToolKey: PdfHandler,
321
+ ScreenshotToolKey : ScreenshotHandler ,
322
+ EvaluateToolKey : EvaluateHandler ,
323
+ CloseBrowserToolKey : CloseBrowserHandler ,
324
+ SelectorToolKey : SelectorHandler ,
244
325
}
245
326
)
0 commit comments