feat: browser tools audit and improvements
This commit is contained in:
@@ -46,9 +46,9 @@ def register_tools(mcp: FastMCP) -> None:
|
||||
- Navigation: browser_navigate, browser_go_back, browser_go_forward, browser_reload
|
||||
- Inspection: browser_screenshot, browser_snapshot, browser_console
|
||||
- Interactions: browser_click, browser_click_coordinate, browser_type, browser_type_focused,
|
||||
browser_fill, browser_press, browser_hover, browser_select, browser_scroll, browser_drag
|
||||
browser_press, browser_hover, browser_select, browser_scroll, browser_drag
|
||||
- Advanced: browser_wait, browser_evaluate, browser_get_text, browser_get_attribute,
|
||||
browser_resize, browser_upload, browser_dialog
|
||||
browser_resize, browser_upload
|
||||
"""
|
||||
register_lifecycle_tools(mcp)
|
||||
register_tab_tools(mcp)
|
||||
|
||||
@@ -35,16 +35,6 @@ TOOL_SCHEMAS: dict[str, dict] = {
|
||||
"use_insert_text": {"type": "boolean", "default": True},
|
||||
},
|
||||
},
|
||||
"browser_fill": {
|
||||
"description": "Fill an input element (clears existing content first).",
|
||||
"params": {
|
||||
"selector": {"type": "string", "required": True},
|
||||
"value": {"type": "string", "required": True},
|
||||
"tab_id": {"type": "integer"},
|
||||
"profile": {"type": "string"},
|
||||
"timeout_ms": {"type": "integer", "default": 30000},
|
||||
},
|
||||
},
|
||||
"browser_type_focused": {
|
||||
"description": (
|
||||
"Type text into the already-focused element. Use after browser_click_coordinate "
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
"""
|
||||
Browser advanced tools - wait, evaluate, get_text, get_attribute, resize, dialog.
|
||||
Browser advanced tools - wait, evaluate, get_text, get_attribute, resize, upload.
|
||||
|
||||
All operations go through the Beeline extension via CDP - no Playwright required.
|
||||
"""
|
||||
@@ -8,7 +8,6 @@ from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from typing import Literal
|
||||
|
||||
from fastmcp import FastMCP
|
||||
|
||||
@@ -394,54 +393,3 @@ def register_advanced_tools(mcp: FastMCP) -> None:
|
||||
}
|
||||
except Exception as e:
|
||||
return {"ok": False, "error": str(e)}
|
||||
|
||||
@mcp.tool()
|
||||
async def browser_dialog(
|
||||
action: Literal["accept", "dismiss"] = "accept",
|
||||
prompt_text: str | None = None,
|
||||
tab_id: int | None = None,
|
||||
profile: str | None = None,
|
||||
timeout_ms: int = 30000,
|
||||
) -> dict:
|
||||
"""
|
||||
Handle browser dialogs (alert, confirm, prompt).
|
||||
|
||||
Note: Dialog handling via CDP requires Page.javascriptDialogOpening
|
||||
event handling. This sets up a one-time handler.
|
||||
|
||||
Call BEFORE triggering the action that opens the dialog.
|
||||
|
||||
Args:
|
||||
action: How to handle - "accept" or "dismiss"
|
||||
prompt_text: Text for prompt dialogs (optional)
|
||||
tab_id: Chrome tab ID (default: active tab)
|
||||
profile: Browser profile name (default: "default")
|
||||
timeout_ms: Timeout in ms (default: 30000)
|
||||
|
||||
Returns:
|
||||
Dict with dialog handling result
|
||||
"""
|
||||
bridge = get_bridge()
|
||||
if not bridge or not bridge.is_connected:
|
||||
return {"ok": False, "error": "Browser extension not connected"}
|
||||
|
||||
ctx = _get_context(profile)
|
||||
if not ctx:
|
||||
return {"ok": False, "error": "Browser not started"}
|
||||
|
||||
target_tab = tab_id or ctx.get("activeTabId")
|
||||
if target_tab is None:
|
||||
return {"ok": False, "error": "No active tab"}
|
||||
|
||||
try:
|
||||
await bridge.cdp_attach(target_tab)
|
||||
await bridge._cdp(target_tab, "Page.enable")
|
||||
|
||||
return {
|
||||
"ok": True,
|
||||
"action": "handler_set",
|
||||
"message": "Dialog handler prepared.",
|
||||
"suggestion": "Handle dialogs manually or use browser_evaluate.",
|
||||
}
|
||||
except Exception as e:
|
||||
return {"ok": False, "error": str(e)}
|
||||
|
||||
@@ -384,48 +384,6 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
log_tool_call("browser_type", params, error=e, duration_ms=(time.perf_counter() - start) * 1000)
|
||||
return result
|
||||
|
||||
@mcp.tool()
|
||||
async def browser_fill(
|
||||
selector: str,
|
||||
value: str,
|
||||
tab_id: int | None = None,
|
||||
profile: str | None = None,
|
||||
timeout_ms: int = 30000,
|
||||
auto_snapshot_mode: AutoSnapshotMode = "simple",
|
||||
) -> dict:
|
||||
"""
|
||||
Fill an input element with a value (clears existing content first).
|
||||
|
||||
Faster than browser_type for filling form fields.
|
||||
|
||||
Args:
|
||||
selector: CSS selector for the input element
|
||||
value: Value to fill
|
||||
tab_id: Chrome tab ID (default: active tab)
|
||||
profile: Browser profile name (default: "default")
|
||||
timeout_ms: Timeout waiting for element (default: 30000)
|
||||
auto_snapshot_mode: Controls the accessibility snapshot taken
|
||||
0.5s after a successful fill. ``"simple"`` (the default)
|
||||
trims unnamed structural nodes; ``"default"`` returns the
|
||||
full tree; ``"interactive"`` returns only controls for the
|
||||
tightest token footprint; ``"off"`` skips the capture —
|
||||
use when batching.
|
||||
|
||||
Returns:
|
||||
Dict with fill result. Includes ``snapshot`` unless
|
||||
``auto_snapshot_mode="off"`` or the fill failed.
|
||||
"""
|
||||
return await browser_type(
|
||||
selector=selector,
|
||||
text=value,
|
||||
tab_id=tab_id,
|
||||
profile=profile,
|
||||
delay_ms=0,
|
||||
clear_first=True,
|
||||
timeout_ms=timeout_ms,
|
||||
auto_snapshot_mode=auto_snapshot_mode,
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
async def browser_type_focused(
|
||||
text: str,
|
||||
|
||||
@@ -52,6 +52,49 @@ def _clear_profile_tab_caches(ctx: dict[str, Any]) -> None:
|
||||
clear_tab_highlights(tab_ids)
|
||||
|
||||
|
||||
async def _ensure_context(
|
||||
bridge: Any,
|
||||
profile: str | None,
|
||||
) -> tuple[str, dict[str, Any], bool]:
|
||||
"""Return ``(profile_name, ctx, created)`` for ``profile``.
|
||||
|
||||
Lazy-creates the browser context (tab group + seed tab) the first time
|
||||
a profile is used so URL-taking tools (``browser_open`` /
|
||||
``browser_navigate``) can be the agent's single cold-start entry
|
||||
point instead of forcing an explicit ``browser_start`` round trip.
|
||||
|
||||
Caller must verify ``bridge`` is connected first; any failure in
|
||||
``bridge.create_context`` propagates so the caller's existing
|
||||
try/except converts it to an ``{"ok": False, ...}`` result.
|
||||
"""
|
||||
profile_name = _resolve_profile(profile)
|
||||
existing = _contexts.get(profile_name)
|
||||
if existing is not None:
|
||||
return profile_name, existing, False
|
||||
|
||||
result = await bridge.create_context(profile_name)
|
||||
group_id = result.get("groupId")
|
||||
tab_id = result.get("tabId")
|
||||
|
||||
ctx: dict[str, Any] = {
|
||||
"groupId": group_id,
|
||||
"activeTabId": tab_id,
|
||||
"_seedTabId": tab_id, # reused by first browser_open call
|
||||
"tabs": {tab_id} if tab_id is not None else set(),
|
||||
}
|
||||
_contexts[profile_name] = ctx
|
||||
|
||||
logger.info(
|
||||
"Started browser context '%s': groupId=%s, tabId=%s",
|
||||
profile_name,
|
||||
group_id,
|
||||
tab_id,
|
||||
)
|
||||
log_context_event("start", profile_name, group_id=group_id, tab_id=tab_id)
|
||||
|
||||
return profile_name, ctx, True
|
||||
|
||||
|
||||
async def shutdown_all_contexts() -> None:
|
||||
"""Close all active browser contexts. Called at GCU server shutdown."""
|
||||
if not _contexts:
|
||||
@@ -198,16 +241,25 @@ def register_lifecycle_tools(mcp: FastMCP) -> None:
|
||||
@mcp.tool()
|
||||
async def browser_start(profile: str | None = None) -> dict:
|
||||
"""
|
||||
Start a browser context for the given profile.
|
||||
Explicitly create a browser context (tab group) for ``profile``.
|
||||
|
||||
Creates a tab group in the user's Chrome via the Beeline extension.
|
||||
No separate browser process is launched - uses the user's existing Chrome.
|
||||
Most workflows do NOT need to call this directly: ``browser_open``
|
||||
and ``browser_navigate`` lazy-create a context on first use, so a
|
||||
single ``browser_open(url)`` covers the cold path. Reach for
|
||||
``browser_start`` when you want to (a) warm a profile without
|
||||
opening a URL yet, or (b) recreate a context after
|
||||
``browser_stop`` to clear stale state.
|
||||
|
||||
No separate browser process is launched — uses the user's
|
||||
existing Chrome via the Beeline extension.
|
||||
|
||||
Args:
|
||||
profile: Browser profile name (default: "default")
|
||||
|
||||
Returns:
|
||||
Dict with start status including groupId and initial tabId
|
||||
Dict with start status (``"started"`` on fresh creation,
|
||||
``"already_running"`` when a context for the profile exists),
|
||||
including ``groupId`` and ``activeTabId``.
|
||||
"""
|
||||
start = time.perf_counter()
|
||||
params = {"profile": profile}
|
||||
@@ -221,14 +273,11 @@ def register_lifecycle_tools(mcp: FastMCP) -> None:
|
||||
log_tool_call("browser_start", params, result=result)
|
||||
return result
|
||||
|
||||
profile_name = _resolve_profile(profile)
|
||||
|
||||
# Check if already running
|
||||
if profile_name in _contexts:
|
||||
ctx = _contexts[profile_name]
|
||||
try:
|
||||
profile_name, ctx, created = await _ensure_context(bridge, profile)
|
||||
result = {
|
||||
"ok": True,
|
||||
"status": "already_running",
|
||||
"status": "started" if created else "already_running",
|
||||
"profile": profile_name,
|
||||
"groupId": ctx.get("groupId"),
|
||||
"activeTabId": ctx.get("activeTabId"),
|
||||
@@ -240,42 +289,6 @@ def register_lifecycle_tools(mcp: FastMCP) -> None:
|
||||
duration_ms=(time.perf_counter() - start) * 1000,
|
||||
)
|
||||
return result
|
||||
|
||||
try:
|
||||
result = await bridge.create_context(profile_name)
|
||||
group_id = result.get("groupId")
|
||||
tab_id = result.get("tabId")
|
||||
|
||||
_contexts[profile_name] = {
|
||||
"groupId": group_id,
|
||||
"activeTabId": tab_id,
|
||||
"_seedTabId": tab_id, # reused by first browser_open call
|
||||
"tabs": {tab_id} if tab_id is not None else set(),
|
||||
}
|
||||
|
||||
logger.info(
|
||||
"Started browser context '%s': groupId=%s, tabId=%s",
|
||||
profile_name,
|
||||
group_id,
|
||||
tab_id,
|
||||
)
|
||||
|
||||
log_context_event("start", profile_name, group_id=group_id, tab_id=tab_id)
|
||||
|
||||
result = {
|
||||
"ok": True,
|
||||
"status": "started",
|
||||
"profile": profile_name,
|
||||
"groupId": group_id,
|
||||
"activeTabId": tab_id,
|
||||
}
|
||||
log_tool_call(
|
||||
"browser_start",
|
||||
params,
|
||||
result=result,
|
||||
duration_ms=(time.perf_counter() - start) * 1000,
|
||||
)
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.exception("Failed to start browser context")
|
||||
result = {"ok": False, "error": str(e)}
|
||||
|
||||
@@ -14,6 +14,7 @@ from fastmcp import FastMCP
|
||||
|
||||
from ..bridge import get_bridge
|
||||
from ..telemetry import log_tool_call
|
||||
from .lifecycle import _ensure_context
|
||||
from .tabs import _get_context
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -32,8 +33,14 @@ def register_navigation_tools(mcp: FastMCP) -> None:
|
||||
"""
|
||||
Navigate a tab to a URL.
|
||||
|
||||
This tool waits for the page to reach the ``wait_until`` condition
|
||||
before returning.
|
||||
Lazy-creates a browser context if none exists (no need to call
|
||||
``browser_start`` first); when no ``tab_id`` is given and the
|
||||
context was just created, navigation lands on the seed tab.
|
||||
Prefer ``browser_open`` when you specifically want a new tab —
|
||||
``browser_navigate`` is for redirecting an existing tab.
|
||||
|
||||
Waits for the page to reach the ``wait_until`` condition before
|
||||
returning.
|
||||
|
||||
Args:
|
||||
url: URL to navigate to
|
||||
@@ -54,10 +61,16 @@ def register_navigation_tools(mcp: FastMCP) -> None:
|
||||
log_tool_call("browser_navigate", params, result=result)
|
||||
return result
|
||||
|
||||
ctx = _get_context(profile)
|
||||
if not ctx:
|
||||
result = {"ok": False, "error": "Browser not started. Call browser_start first."}
|
||||
log_tool_call("browser_navigate", params, result=result)
|
||||
try:
|
||||
_, ctx, _ = await _ensure_context(bridge, profile)
|
||||
except Exception as e:
|
||||
result = {"ok": False, "error": str(e)}
|
||||
log_tool_call(
|
||||
"browser_navigate",
|
||||
params,
|
||||
error=e,
|
||||
duration_ms=(time.perf_counter() - start) * 1000,
|
||||
)
|
||||
return result
|
||||
|
||||
target_tab = tab_id or ctx.get("activeTabId")
|
||||
|
||||
@@ -16,7 +16,7 @@ from pydantic import Field
|
||||
from ..bridge import get_bridge
|
||||
from ..session import _active_profile
|
||||
from ..telemetry import log_tool_call
|
||||
from .lifecycle import _contexts
|
||||
from .lifecycle import _contexts, _ensure_context
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -98,10 +98,14 @@ def register_tab_tools(mcp: FastMCP) -> None:
|
||||
profile: str | None = None,
|
||||
) -> dict:
|
||||
"""
|
||||
Open a new browser tab and navigate to the given URL.
|
||||
Open a browser tab at the given URL — preferred entry point.
|
||||
|
||||
The tab is automatically added to the agent's tab group.
|
||||
This tool waits for the page to load before returning.
|
||||
This is the agent's primary "go to a page" tool. If no browser
|
||||
context exists yet for the profile, one is created transparently
|
||||
(no need to call ``browser_start`` first). The first call after
|
||||
a fresh context reuses the seed ``about:blank`` tab; subsequent
|
||||
calls open new tabs in the agent's tab group. Waits for the
|
||||
page to load before returning.
|
||||
|
||||
Args:
|
||||
url: URL to navigate to
|
||||
@@ -120,13 +124,8 @@ def register_tab_tools(mcp: FastMCP) -> None:
|
||||
log_tool_call("browser_open", params, result=result)
|
||||
return result
|
||||
|
||||
ctx = _get_context(profile)
|
||||
if not ctx:
|
||||
result = {"ok": False, "error": "Browser not started. Call browser_start first."}
|
||||
log_tool_call("browser_open", params, result=result)
|
||||
return result
|
||||
|
||||
try:
|
||||
_, ctx, _ = await _ensure_context(bridge, profile)
|
||||
# Reuse the seed about:blank tab from context.create on first open
|
||||
seed_tab = ctx.pop("_seedTabId", None)
|
||||
if seed_tab is not None:
|
||||
|
||||
Reference in New Issue
Block a user