Skip to content

Commit 7ad6c51

Browse files
Merge pull request #10 from zeoxisca/ft-locate
feat(tools): evaluate\selector\screenshot
2 parents b4e11e4 + 0537db5 commit 7ad6c51

File tree

2 files changed

+106
-28
lines changed

2 files changed

+106
-28
lines changed

tools/common.go

+102-21
Original file line numberDiff line numberDiff line change
@@ -4,70 +4,88 @@ import (
44
"context"
55
"errors"
66
"fmt"
7+
"os"
8+
"path/filepath"
9+
"time"
10+
711
"github.com/charmbracelet/log"
812
"github.com/go-rod/rod-mcp/types"
913
"github.com/go-rod/rod-mcp/utils"
1014
"github.com/go-rod/rod/lib/input"
1115
"github.com/go-rod/rod/lib/proto"
1216
"github.com/mark3labs/mcp-go/mcp"
13-
"time"
1417
)
1518

1619
const (
1720
defaultWaitStableDur = 1 * time.Second
1821
defaultDomDiff = 0.2
1922
)
2023

24+
const (
25+
NavigationToolKey = "rod_navigate"
26+
GoBackToolKey = "rod_go_back"
27+
GoForwardToolKey = "rod_go_forward"
28+
ReloadToolKey = "rod_reload"
29+
PressKeyToolKey = "rod_press"
30+
ClickToolKey = "rod_click"
31+
FillToolKey = "rod_fill"
32+
PdfToolKey = "rod_pdf"
33+
ScreenshotToolKey = "rod_screenshot"
34+
EvaluateToolKey = "rod_evaluate"
35+
CloseBrowserToolKey = "rod_close_browser"
36+
SelectorToolKey = "rod_selector"
37+
)
38+
2139
var (
2240
Navigation = mcp.NewTool("rod_navigate",
2341
mcp.WithDescription("Navigate to a URL"),
2442
mcp.WithString("url", mcp.Description("URL to navigate to"), mcp.Required()),
2543
)
26-
GoBack = mcp.NewTool("rod_go_back",
44+
GoBack = mcp.NewTool(GoBackToolKey,
2745
mcp.WithDescription("Go back in the browser history, go back to the previous page"),
2846
)
29-
GoForward = mcp.NewTool("rod_go_forward",
47+
GoForward = mcp.NewTool(GoForwardToolKey,
3048
mcp.WithDescription("Go forward in the browser history, go to the next page"),
3149
)
32-
ReLoad = mcp.NewTool("rod_reload",
50+
ReLoad = mcp.NewTool(ReloadToolKey,
3351
mcp.WithDescription("Reload the current page"),
3452
)
35-
PressKey = mcp.NewTool("rod_press_key",
53+
PressKey = mcp.NewTool(PressKeyToolKey,
3654
mcp.WithDescription("Press a key on the keyboard"),
3755
mcp.WithString("key", mcp.Description("Name of the key to press or a character to generate, such as `ArrowLeft` or `a`"), mcp.Required()),
3856
)
39-
Pdf = mcp.NewTool("rod_pdf",
57+
Pdf = mcp.NewTool(PdfToolKey,
4058
mcp.WithDescription("Generate a PDF from the current page"),
4159
mcp.WithString("file_path", mcp.Description("Path to save the PDF file"), mcp.Required()),
4260
mcp.WithString("file_name", mcp.Description("Name of the PDF file"), mcp.Required()),
4361
)
44-
CloseBrowser = mcp.NewTool("rod_close_browser",
62+
CloseBrowser = mcp.NewTool(CloseBrowserToolKey,
4563
mcp.WithDescription("Close the browser"),
4664
)
47-
Screenshot = mcp.NewTool("rod_screenshot",
65+
Screenshot = mcp.NewTool(ScreenshotToolKey,
4866
mcp.WithDescription("Take a screenshot of the current page or a specific element"),
4967
mcp.WithString("name", mcp.Description("Name of the screenshot"), mcp.Required()),
5068
mcp.WithString("selector", mcp.Description("CSS selector of the element to take a screenshot of")),
5169
mcp.WithNumber("width", mcp.Description("Width in pixels (default: 800)")),
5270
mcp.WithNumber("height", mcp.Description("Height in pixels (default: 600)")),
5371
)
54-
Click = mcp.NewTool("rod_click",
72+
Click = mcp.NewTool(ClickToolKey,
5573
mcp.WithDescription("Click an element on the page"),
5674
mcp.WithString("selector", mcp.Description("CSS selector of the element to click"), mcp.Required()),
5775
)
58-
Fill = mcp.NewTool("rod_fill",
76+
Fill = mcp.NewTool(FillToolKey,
5977
mcp.WithDescription("Fill out an input field"),
6078
mcp.WithString("selector", mcp.Description("CSS selector of the element to type into"), mcp.Required()),
6179
mcp.WithString("value", mcp.Description("Value to fill"), mcp.Required()),
6280
)
63-
Selector = mcp.NewTool("rod_selector",
81+
Selector = mcp.NewTool(SelectorToolKey,
6482
mcp.WithDescription("Select an element on the page with Select tag"),
6583
mcp.WithString("selector", mcp.Description("CSS selector for element to select"), mcp.Required()),
6684
mcp.WithString("value", mcp.Description("Value to select"), mcp.Required()),
6785
)
68-
Evaluate = mcp.NewTool("rod_evaluate",
86+
Evaluate = mcp.NewTool(EvaluateToolKey,
6987
mcp.WithDescription("Execute JavaScript in the browser console"),
70-
mcp.WithString("script", mcp.Description("JavaScript code to execute"), mcp.Required()),
88+
mcp.WithString("script", mcp.Description("A function name or an unnamed function definition"), mcp.Required()),
7189
)
7290
)
7391

@@ -219,6 +237,61 @@ var (
219237
return mcp.NewToolResultText("Close browser successfully"), nil
220238
}
221239
}
240+
EvaluateHandler = func(rodCtx *types.Context) func(context.Context, mcp.CallToolRequest) (*mcp.CallToolResult, error) {
241+
return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
242+
page, err := rodCtx.EnsurePage()
243+
if err != nil {
244+
log.Errorf("Failed to evaluate: %s", err.Error())
245+
}
246+
script := request.Params.Arguments["script"].(string)
247+
r, err := proto.RuntimeEvaluate{
248+
Expression: script,
249+
ObjectGroup: "console",
250+
IncludeCommandLineAPI: true,
251+
}.Call(page)
252+
if err != nil {
253+
log.Errorf("Failed to evaluate code: %s", err.Error())
254+
return nil, errors.New(fmt.Sprintf("Failed to evaluate code: %s", err.Error()))
255+
}
256+
return mcp.NewToolResultText(fmt.Sprintf("Evaluate code successfully with result: %s", r.Result.Value.String())), nil
257+
}
258+
}
259+
SelectorHandler = func(rodCtx *types.Context) func(context.Context, mcp.CallToolRequest) (*mcp.CallToolResult, error) {
260+
return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
261+
page, err := rodCtx.EnsurePage()
262+
if err != nil {
263+
log.Errorf("Failed to select: %s", err.Error())
264+
}
265+
res, err := page.Element(request.Params.Arguments["selector"].(string))
266+
if err != nil {
267+
log.Errorf("Failed to select: %s", err.Error())
268+
}
269+
return mcp.NewToolResultText(fmt.Sprintf("The object's id matched: %s, plain text is: %s", res.Object.ObjectID, res.String())), nil
270+
}
271+
}
272+
ScreenshotHandler = func(rodCtx *types.Context) func(context.Context, mcp.CallToolRequest) (*mcp.CallToolResult, error) {
273+
return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
274+
page, err := rodCtx.EnsurePage()
275+
if err != nil {
276+
log.Errorf("Failed to screenshot: %s", err.Error())
277+
}
278+
req := &proto.PageCaptureScreenshot{
279+
Format: proto.PageCaptureScreenshotFormatPng,
280+
}
281+
bin, err := page.Screenshot(false, req)
282+
if err != nil {
283+
log.Errorf("Failed to screenshot: %s", err.Error())
284+
}
285+
fileName := request.Params.Arguments["name"].(string)
286+
toFile := []string{"tmp", "screenshots", fileName + ".png"}
287+
filePath := filepath.Join(toFile...)
288+
err = os.WriteFile(filePath, bin, 0o664)
289+
if err != nil {
290+
log.Errorf("Failed to screenshot: %s", err.Error())
291+
}
292+
return mcp.NewToolResultText(fmt.Sprintf("Save to %s", filePath)), nil
293+
}
294+
}
222295
)
223296

224297
var (
@@ -231,15 +304,23 @@ var (
231304
Click,
232305
Fill,
233306
CloseBrowser,
307+
Pdf,
308+
Screenshot,
309+
Selector,
310+
Evaluate,
234311
}
235312
CommonToolHandlers = map[string]ToolHandler{
236-
"rod_navigate": NavigationHandler,
237-
"rod_go_back": GoBackHandler,
238-
"rod_go_forward": GoForwardHandler,
239-
"rod_reload": ReLoadHandler,
240-
"rod_press_key": PressKeyHandler,
241-
"rod_click": ClickHandler,
242-
"rod_fill": FillHandler,
243-
"rod_close_browser": CloseBrowserHandler,
313+
NavigationToolKey: NavigationHandler,
314+
GoBackToolKey: GoBackHandler,
315+
GoForwardToolKey: GoForwardHandler,
316+
ReloadToolKey: ReLoadHandler,
317+
PressKeyToolKey: PressKeyHandler,
318+
ClickToolKey: ClickHandler,
319+
FillToolKey: FillHandler,
320+
//PdfToolKey: PdfHandler,
321+
ScreenshotToolKey: ScreenshotHandler,
322+
EvaluateToolKey: EvaluateHandler,
323+
CloseBrowserToolKey: CloseBrowserHandler,
324+
SelectorToolKey: SelectorHandler,
244325
}
245326
)

types/context.go

+4-7
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,15 @@ package types
33
import (
44
"context"
55
"fmt"
6+
"strings"
7+
"sync"
8+
"sync/atomic"
9+
610
"github.com/go-rod/rod"
711
"github.com/go-rod/rod-mcp/utils"
812
"github.com/go-rod/rod/lib/launcher"
913
"github.com/go-rod/rod/lib/proto"
1014
"github.com/pkg/errors"
11-
"strings"
12-
"sync"
13-
"sync/atomic"
1415
)
1516

1617
func launchBrowser(ctx context.Context, cfg Config) (*rod.Browser, error) {
@@ -90,7 +91,6 @@ func (ctx *Context) EnsurePage() (*rod.Page, error) {
9091
return nil, err
9192
}
9293
return ctx.page, nil
93-
9494
}
9595

9696
func (ctx *Context) initial() error {
@@ -108,7 +108,6 @@ func (ctx *Context) initial() error {
108108
return err
109109
}
110110
return nil
111-
112111
}
113112
if ctx.page == nil {
114113
ctx.page, err = ctx.createPage()
@@ -146,7 +145,6 @@ func (ctx *Context) closePage() error {
146145
return err
147146
}
148147
func (ctx *Context) closeBrowser() error {
149-
150148
err := ctx.closePage()
151149
if err != nil {
152150
return err
@@ -179,5 +177,4 @@ func (ctx *Context) Close() error {
179177
defer ctx.stateLock.Unlock()
180178
ctx.closeBrowser()
181179
return nil
182-
183180
}

0 commit comments

Comments
 (0)