feat: browser tools audit and improvements
This commit is contained in:
@@ -246,9 +246,11 @@ search inside files, target='files' (with a glob like '*.py') to list \
|
||||
or find files. Mtime-sorted in files mode.
|
||||
|
||||
## Browser Automation (gcu-tools MCP)
|
||||
- Use `browser_*` tools (browser_start, browser_navigate, browser_click, \
|
||||
browser_fill, browser_snapshot, <!-- vision-only -->browser_screenshot, <!-- /vision-only -->browser_scroll, \
|
||||
browser_tabs, browser_close, browser_evaluate, etc.).
|
||||
- Use `browser_*` tools — `browser_open(url)` is the cold-start entry point \
|
||||
(lazy-creates the context; no `browser_start` first). Then `browser_navigate`, \
|
||||
`browser_click`, `browser_type`, `browser_snapshot`, \
|
||||
<!-- vision-only -->`browser_screenshot`, <!-- /vision-only -->`browser_scroll`, \
|
||||
`browser_tabs`, `browser_close`, `browser_evaluate`, etc.
|
||||
- MUST Follow the browser-automation skill protocol before using browser tools.
|
||||
|
||||
## Hand off to a colony
|
||||
|
||||
@@ -36,20 +36,39 @@ logger = logging.getLogger(__name__)
|
||||
# the named entries only).
|
||||
|
||||
_TOOL_CATEGORIES: dict[str, list[str]] = {
|
||||
# Unified file ops — read, write, edit, search across the post-refactor
|
||||
# files-tools MCP server (read_file, write_file, edit_file, hashline_edit,
|
||||
# apply_patch, search_files).
|
||||
# Unified file ops — read, write, edit, search across the files-tools
|
||||
# MCP server (read_file, write_file, edit_file, search_files). pdf_read
|
||||
# lives in hive_tools so it's listed explicitly; without it queens
|
||||
# cannot read PDF documents by default.
|
||||
"file_ops": [
|
||||
"@server:files-tools",
|
||||
"pdf_read",
|
||||
],
|
||||
# Terminal + process control — engineering personas only.
|
||||
# The terminal-tools MCP server covers foreground exec with auto-promotion,
|
||||
# background jobs, persistent PTY sessions, and ripgrep/find search.
|
||||
"terminal": [
|
||||
"@server:terminal-tools",
|
||||
# Terminal basic — the 3-tool subset queens get out of the box.
|
||||
# terminal_exec — foreground command execution (Bash equivalent)
|
||||
# terminal_rg — ripgrep content search (Grep equivalent)
|
||||
# terminal_find — glob/find file listing (Glob equivalent)
|
||||
"terminal_basic": [
|
||||
"terminal_exec",
|
||||
"terminal_rg",
|
||||
"terminal_find",
|
||||
],
|
||||
# Terminal advanced — the power-user tools beyond the basics. Not in
|
||||
# any role default; opt in explicitly per-queen via the Tool Library.
|
||||
# terminal_job_* — background job lifecycle (start/manage/logs)
|
||||
# terminal_output_get — fetch captured output from foreground exec
|
||||
# terminal_pty_* — persistent PTY sessions (open/run/close)
|
||||
"terminal_advanced": [
|
||||
"terminal_job_start",
|
||||
"terminal_job_manage",
|
||||
"terminal_job_logs",
|
||||
"terminal_output_get",
|
||||
"terminal_pty_open",
|
||||
"terminal_pty_run",
|
||||
"terminal_pty_close",
|
||||
],
|
||||
# Tabular data. CSV/Excel read/write + DuckDB SQL.
|
||||
"advanced_spreadsheet": [
|
||||
"spreadsheet_advanced": [
|
||||
"csv_read",
|
||||
"csv_info",
|
||||
"csv_write",
|
||||
@@ -75,8 +94,6 @@ _TOOL_CATEGORIES: dict[str, list[str]] = {
|
||||
"browser_open",
|
||||
"browser_close",
|
||||
"browser_activate_tab",
|
||||
"browser_close_all",
|
||||
"browser_close_finished",
|
||||
"browser_navigate",
|
||||
"browser_go_back",
|
||||
"browser_go_forward",
|
||||
@@ -98,7 +115,6 @@ _TOOL_CATEGORIES: dict[str, list[str]] = {
|
||||
"browser_click",
|
||||
"browser_click_coordinate",
|
||||
"browser_type",
|
||||
"browser_fill",
|
||||
"browser_type_focused",
|
||||
"browser_press",
|
||||
"browser_press_at",
|
||||
@@ -110,13 +126,32 @@ _TOOL_CATEGORIES: dict[str, list[str]] = {
|
||||
"browser_wait",
|
||||
"browser_resize",
|
||||
"browser_upload",
|
||||
"browser_dialog",
|
||||
],
|
||||
# Research — paper search, Wikipedia, ad-hoc web scrape. Pair with
|
||||
# browser_basic for richer site-by-site research; this category is the
|
||||
# lightweight always-available fallback.
|
||||
"research": [
|
||||
"search_papers",
|
||||
"download_paper",
|
||||
"search_wikipedia",
|
||||
"web_scrape",
|
||||
],
|
||||
# Security — defensive scanning and reconnaissance. Engineering-only
|
||||
# surface; the rest of the queens shouldn't see port scanners.
|
||||
"security": [
|
||||
"port_scan",
|
||||
"dns_security_scan",
|
||||
"http_headers_scan",
|
||||
"ssl_tls_scan",
|
||||
"subdomain_enumerate",
|
||||
"tech_stack_detect",
|
||||
"risk_score",
|
||||
],
|
||||
# Lightweight context helpers — good default for every queen.
|
||||
"time_context": [
|
||||
"get_current_time",
|
||||
"get_account_info",
|
||||
]
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
@@ -137,24 +172,26 @@ QUEEN_DEFAULT_CATEGORIES: dict[str, list[str]] = {
|
||||
# Head of Technology — builds and operates systems; full toolkit.
|
||||
"queen_technology": [
|
||||
"file_ops",
|
||||
"terminal",
|
||||
"terminal_basic",
|
||||
"browser_basic",
|
||||
"browser_interaction",
|
||||
"research",
|
||||
"security",
|
||||
"time_context",
|
||||
],
|
||||
# Head of Growth — data, experiments, competitor research; no terminal/security.
|
||||
# Head of Growth — data, experiments, competitor research; no security.
|
||||
"queen_growth": [
|
||||
"file_ops",
|
||||
"terminal_basic",
|
||||
"browser_basic",
|
||||
"browser_interaction",
|
||||
"research",
|
||||
"time_context",
|
||||
],
|
||||
# Head of Product Strategy — user research + roadmaps; no terminal/security.
|
||||
# Head of Product Strategy — user research + roadmaps; no security.
|
||||
"queen_product_strategy": [
|
||||
"file_ops",
|
||||
"terminal_basic",
|
||||
"browser_basic",
|
||||
"browser_interaction",
|
||||
"research",
|
||||
@@ -163,23 +200,26 @@ QUEEN_DEFAULT_CATEGORIES: dict[str, list[str]] = {
|
||||
# Head of Finance — financial models (CSV/Excel heavy), market research.
|
||||
"queen_finance_fundraising": [
|
||||
"file_ops",
|
||||
"advanced_spreadsheet",
|
||||
"terminal_basic",
|
||||
"spreadsheet_advanced",
|
||||
"browser_basic",
|
||||
"browser_interaction",
|
||||
"research",
|
||||
"time_context",
|
||||
],
|
||||
# Head of Legal — reads contracts/PDFs, researches; no terminal/data/security.
|
||||
# Head of Legal — reads contracts/PDFs, researches; no data/security.
|
||||
"queen_legal": [
|
||||
"file_ops",
|
||||
"terminal_basic",
|
||||
"browser_basic",
|
||||
"browser_interaction",
|
||||
"research",
|
||||
"time_context",
|
||||
],
|
||||
# Head of Brand & Design — visual refs, style guides; no terminal/data/security.
|
||||
# Head of Brand & Design — visual refs, style guides; no data/security.
|
||||
"queen_brand_design": [
|
||||
"file_ops",
|
||||
"terminal_basic",
|
||||
"browser_basic",
|
||||
"browser_interaction",
|
||||
"research",
|
||||
@@ -188,6 +228,8 @@ QUEEN_DEFAULT_CATEGORIES: dict[str, list[str]] = {
|
||||
# Head of Talent — candidate pipelines, resumes; data + browser heavy.
|
||||
"queen_talent": [
|
||||
"file_ops",
|
||||
"terminal_basic",
|
||||
"spreadsheet_advanced",
|
||||
"browser_basic",
|
||||
"browser_interaction",
|
||||
"research",
|
||||
@@ -196,7 +238,8 @@ QUEEN_DEFAULT_CATEGORIES: dict[str, list[str]] = {
|
||||
# Head of Operations — processes, automation, observability.
|
||||
"queen_operations": [
|
||||
"file_ops",
|
||||
"data",
|
||||
"terminal_basic",
|
||||
"spreadsheet_advanced",
|
||||
"browser_basic",
|
||||
"browser_interaction",
|
||||
"research",
|
||||
|
||||
@@ -17,8 +17,8 @@ Use browser nodes (with `tools: {policy: "all"}`) when:
|
||||
## Available Browser Tools
|
||||
|
||||
All tools are prefixed with `browser_`:
|
||||
- `browser_start`, `browser_open`, `browser_navigate` — launch/navigate
|
||||
- `browser_click`, `browser_click_coordinate`, `browser_fill`, `browser_type`, `browser_type_focused` — interact
|
||||
- `browser_open`, `browser_navigate` — preferred entry points; both lazy-create a browser context, so a single `browser_open(url)` covers the cold path. Use `browser_start` only to warm a profile without a URL or to recreate a context after `browser_stop`.
|
||||
- `browser_click`, `browser_click_coordinate`, `browser_type`, `browser_type_focused` — interact
|
||||
- `browser_press` (with optional `modifiers=["ctrl"]` etc.) — keyboard shortcuts
|
||||
- `browser_snapshot` — compact accessibility-tree read (structured)
|
||||
<!-- vision-only -->
|
||||
|
||||
@@ -35,7 +35,7 @@ Follow these rules for reliable, efficient browser interaction.
|
||||
Use snapshot first for structure and ordinary controls; switch to
|
||||
screenshot when snapshot can't find or verify the target. Interaction
|
||||
tools (`browser_click`, `browser_type`, `browser_type_focused`,
|
||||
`browser_fill`, `browser_scroll`) wait 0.5 s for the page to settle
|
||||
`browser_scroll`) wait 0.5 s for the page to settle
|
||||
after a successful action, then attach a fresh snapshot under the
|
||||
`snapshot` key of their result — so don't call `browser_snapshot`
|
||||
separately after an interaction unless you need a newer view. Tune
|
||||
|
||||
@@ -113,7 +113,7 @@ Even after `wait_until="load"`, React/Vue SPAs often render their real chrome in
|
||||
### Reading pages efficiently
|
||||
|
||||
- **Prefer `browser_snapshot` over `browser_get_text("body")`** — returns a compact ~1–5 KB accessibility tree vs 100+ KB of raw HTML.
|
||||
- Interaction tools `browser_click`, `browser_type`, `browser_type_focused`, `browser_fill`, and `browser_scroll` wait 0.5 s for the page to settle after a successful action, then attach a fresh accessibility snapshot under the `snapshot` key of their result. Use it to decide your next action — do NOT call `browser_snapshot` separately after every action. Tune the capture via `auto_snapshot_mode`: `"default"` (full tree, the default), `"simple"` (trims unnamed structural nodes), `"interactive"` (only controls — tightest token footprint), or `"off"` to skip the capture entirely (useful when batching several interactions and you don't need the intermediate trees). Call `browser_snapshot` explicitly only when you need a newer view or a different mode than what was auto-captured.
|
||||
- Interaction tools `browser_click`, `browser_type`, `browser_type_focused`, and `browser_scroll` wait 0.5 s for the page to settle after a successful action, then attach a fresh accessibility snapshot under the `snapshot` key of their result. Use it to decide your next action — do NOT call `browser_snapshot` separately after every action. Tune the capture via `auto_snapshot_mode`: `"default"` (full tree, the default), `"simple"` (trims unnamed structural nodes), `"interactive"` (only controls — tightest token footprint), or `"off"` to skip the capture entirely (useful when batching several interactions and you don't need the intermediate trees). Call `browser_snapshot` explicitly only when you need a newer view or a different mode than what was auto-captured.
|
||||
- Complex pages (LinkedIn, Twitter/X, SPAs with virtual scrolling) can have DOMs that don't match what's visually rendered — snapshot refs may be stale, missing, or misaligned with visible layout. Try the available snapshot first; when the target is not present in that snapshot or visual position matters, switch to `browser_screenshot` to orient yourself.
|
||||
- Only fall back to `browser_get_text` for extracting specific small elements by CSS selector.
|
||||
|
||||
|
||||
@@ -460,9 +460,9 @@ const CATEGORIES = {
|
||||
'Lifecycle': ['browser_setup', 'browser_start', 'browser_stop', 'browser_status'],
|
||||
'Tabs': ['browser_tabs', 'browser_open', 'browser_close', 'browser_close_all', 'browser_close_finished', 'browser_activate_tab'],
|
||||
'Navigation': ['browser_navigate', 'browser_go_back', 'browser_go_forward', 'browser_reload'],
|
||||
'Interactions': ['browser_click', 'browser_click_coordinate', 'browser_type', 'browser_type_focused', 'browser_fill', 'browser_press', 'browser_press_at', 'browser_hover', 'browser_hover_coordinate', 'browser_select', 'browser_scroll', 'browser_drag'],
|
||||
'Interactions': ['browser_click', 'browser_click_coordinate', 'browser_type', 'browser_type_focused', 'browser_press', 'browser_press_at', 'browser_hover', 'browser_hover_coordinate', 'browser_select', 'browser_scroll', 'browser_drag'],
|
||||
'Inspection': ['browser_screenshot', 'browser_snapshot', 'browser_console', 'browser_html', 'browser_get_text', 'browser_get_attribute', 'browser_get_rect', 'browser_shadow_query', 'browser_evaluate', 'browser_wait'],
|
||||
'Advanced': ['browser_resize', 'browser_upload', 'browser_dialog'],
|
||||
'Advanced': ['browser_resize', 'browser_upload'],
|
||||
};
|
||||
|
||||
async function init() {
|
||||
|
||||
@@ -46,9 +46,9 @@ def register_tools(mcp: FastMCP) -> None:
|
||||
- Navigation: browser_navigate, browser_go_back, browser_go_forward, browser_reload
|
||||
- Inspection: browser_screenshot, browser_snapshot, browser_console
|
||||
- Interactions: browser_click, browser_click_coordinate, browser_type, browser_type_focused,
|
||||
browser_fill, browser_press, browser_hover, browser_select, browser_scroll, browser_drag
|
||||
browser_press, browser_hover, browser_select, browser_scroll, browser_drag
|
||||
- Advanced: browser_wait, browser_evaluate, browser_get_text, browser_get_attribute,
|
||||
browser_resize, browser_upload, browser_dialog
|
||||
browser_resize, browser_upload
|
||||
"""
|
||||
register_lifecycle_tools(mcp)
|
||||
register_tab_tools(mcp)
|
||||
|
||||
@@ -35,16 +35,6 @@ TOOL_SCHEMAS: dict[str, dict] = {
|
||||
"use_insert_text": {"type": "boolean", "default": True},
|
||||
},
|
||||
},
|
||||
"browser_fill": {
|
||||
"description": "Fill an input element (clears existing content first).",
|
||||
"params": {
|
||||
"selector": {"type": "string", "required": True},
|
||||
"value": {"type": "string", "required": True},
|
||||
"tab_id": {"type": "integer"},
|
||||
"profile": {"type": "string"},
|
||||
"timeout_ms": {"type": "integer", "default": 30000},
|
||||
},
|
||||
},
|
||||
"browser_type_focused": {
|
||||
"description": (
|
||||
"Type text into the already-focused element. Use after browser_click_coordinate "
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
"""
|
||||
Browser advanced tools - wait, evaluate, get_text, get_attribute, resize, dialog.
|
||||
Browser advanced tools - wait, evaluate, get_text, get_attribute, resize, upload.
|
||||
|
||||
All operations go through the Beeline extension via CDP - no Playwright required.
|
||||
"""
|
||||
@@ -8,7 +8,6 @@ from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from typing import Literal
|
||||
|
||||
from fastmcp import FastMCP
|
||||
|
||||
@@ -394,54 +393,3 @@ def register_advanced_tools(mcp: FastMCP) -> None:
|
||||
}
|
||||
except Exception as e:
|
||||
return {"ok": False, "error": str(e)}
|
||||
|
||||
@mcp.tool()
|
||||
async def browser_dialog(
|
||||
action: Literal["accept", "dismiss"] = "accept",
|
||||
prompt_text: str | None = None,
|
||||
tab_id: int | None = None,
|
||||
profile: str | None = None,
|
||||
timeout_ms: int = 30000,
|
||||
) -> dict:
|
||||
"""
|
||||
Handle browser dialogs (alert, confirm, prompt).
|
||||
|
||||
Note: Dialog handling via CDP requires Page.javascriptDialogOpening
|
||||
event handling. This sets up a one-time handler.
|
||||
|
||||
Call BEFORE triggering the action that opens the dialog.
|
||||
|
||||
Args:
|
||||
action: How to handle - "accept" or "dismiss"
|
||||
prompt_text: Text for prompt dialogs (optional)
|
||||
tab_id: Chrome tab ID (default: active tab)
|
||||
profile: Browser profile name (default: "default")
|
||||
timeout_ms: Timeout in ms (default: 30000)
|
||||
|
||||
Returns:
|
||||
Dict with dialog handling result
|
||||
"""
|
||||
bridge = get_bridge()
|
||||
if not bridge or not bridge.is_connected:
|
||||
return {"ok": False, "error": "Browser extension not connected"}
|
||||
|
||||
ctx = _get_context(profile)
|
||||
if not ctx:
|
||||
return {"ok": False, "error": "Browser not started"}
|
||||
|
||||
target_tab = tab_id or ctx.get("activeTabId")
|
||||
if target_tab is None:
|
||||
return {"ok": False, "error": "No active tab"}
|
||||
|
||||
try:
|
||||
await bridge.cdp_attach(target_tab)
|
||||
await bridge._cdp(target_tab, "Page.enable")
|
||||
|
||||
return {
|
||||
"ok": True,
|
||||
"action": "handler_set",
|
||||
"message": "Dialog handler prepared.",
|
||||
"suggestion": "Handle dialogs manually or use browser_evaluate.",
|
||||
}
|
||||
except Exception as e:
|
||||
return {"ok": False, "error": str(e)}
|
||||
|
||||
@@ -384,48 +384,6 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
log_tool_call("browser_type", params, error=e, duration_ms=(time.perf_counter() - start) * 1000)
|
||||
return result
|
||||
|
||||
@mcp.tool()
|
||||
async def browser_fill(
|
||||
selector: str,
|
||||
value: str,
|
||||
tab_id: int | None = None,
|
||||
profile: str | None = None,
|
||||
timeout_ms: int = 30000,
|
||||
auto_snapshot_mode: AutoSnapshotMode = "simple",
|
||||
) -> dict:
|
||||
"""
|
||||
Fill an input element with a value (clears existing content first).
|
||||
|
||||
Faster than browser_type for filling form fields.
|
||||
|
||||
Args:
|
||||
selector: CSS selector for the input element
|
||||
value: Value to fill
|
||||
tab_id: Chrome tab ID (default: active tab)
|
||||
profile: Browser profile name (default: "default")
|
||||
timeout_ms: Timeout waiting for element (default: 30000)
|
||||
auto_snapshot_mode: Controls the accessibility snapshot taken
|
||||
0.5s after a successful fill. ``"simple"`` (the default)
|
||||
trims unnamed structural nodes; ``"default"`` returns the
|
||||
full tree; ``"interactive"`` returns only controls for the
|
||||
tightest token footprint; ``"off"`` skips the capture —
|
||||
use when batching.
|
||||
|
||||
Returns:
|
||||
Dict with fill result. Includes ``snapshot`` unless
|
||||
``auto_snapshot_mode="off"`` or the fill failed.
|
||||
"""
|
||||
return await browser_type(
|
||||
selector=selector,
|
||||
text=value,
|
||||
tab_id=tab_id,
|
||||
profile=profile,
|
||||
delay_ms=0,
|
||||
clear_first=True,
|
||||
timeout_ms=timeout_ms,
|
||||
auto_snapshot_mode=auto_snapshot_mode,
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
async def browser_type_focused(
|
||||
text: str,
|
||||
|
||||
@@ -52,6 +52,49 @@ def _clear_profile_tab_caches(ctx: dict[str, Any]) -> None:
|
||||
clear_tab_highlights(tab_ids)
|
||||
|
||||
|
||||
async def _ensure_context(
|
||||
bridge: Any,
|
||||
profile: str | None,
|
||||
) -> tuple[str, dict[str, Any], bool]:
|
||||
"""Return ``(profile_name, ctx, created)`` for ``profile``.
|
||||
|
||||
Lazy-creates the browser context (tab group + seed tab) the first time
|
||||
a profile is used so URL-taking tools (``browser_open`` /
|
||||
``browser_navigate``) can be the agent's single cold-start entry
|
||||
point instead of forcing an explicit ``browser_start`` round trip.
|
||||
|
||||
Caller must verify ``bridge`` is connected first; any failure in
|
||||
``bridge.create_context`` propagates so the caller's existing
|
||||
try/except converts it to an ``{"ok": False, ...}`` result.
|
||||
"""
|
||||
profile_name = _resolve_profile(profile)
|
||||
existing = _contexts.get(profile_name)
|
||||
if existing is not None:
|
||||
return profile_name, existing, False
|
||||
|
||||
result = await bridge.create_context(profile_name)
|
||||
group_id = result.get("groupId")
|
||||
tab_id = result.get("tabId")
|
||||
|
||||
ctx: dict[str, Any] = {
|
||||
"groupId": group_id,
|
||||
"activeTabId": tab_id,
|
||||
"_seedTabId": tab_id, # reused by first browser_open call
|
||||
"tabs": {tab_id} if tab_id is not None else set(),
|
||||
}
|
||||
_contexts[profile_name] = ctx
|
||||
|
||||
logger.info(
|
||||
"Started browser context '%s': groupId=%s, tabId=%s",
|
||||
profile_name,
|
||||
group_id,
|
||||
tab_id,
|
||||
)
|
||||
log_context_event("start", profile_name, group_id=group_id, tab_id=tab_id)
|
||||
|
||||
return profile_name, ctx, True
|
||||
|
||||
|
||||
async def shutdown_all_contexts() -> None:
|
||||
"""Close all active browser contexts. Called at GCU server shutdown."""
|
||||
if not _contexts:
|
||||
@@ -198,16 +241,25 @@ def register_lifecycle_tools(mcp: FastMCP) -> None:
|
||||
@mcp.tool()
|
||||
async def browser_start(profile: str | None = None) -> dict:
|
||||
"""
|
||||
Start a browser context for the given profile.
|
||||
Explicitly create a browser context (tab group) for ``profile``.
|
||||
|
||||
Creates a tab group in the user's Chrome via the Beeline extension.
|
||||
No separate browser process is launched - uses the user's existing Chrome.
|
||||
Most workflows do NOT need to call this directly: ``browser_open``
|
||||
and ``browser_navigate`` lazy-create a context on first use, so a
|
||||
single ``browser_open(url)`` covers the cold path. Reach for
|
||||
``browser_start`` when you want to (a) warm a profile without
|
||||
opening a URL yet, or (b) recreate a context after
|
||||
``browser_stop`` to clear stale state.
|
||||
|
||||
No separate browser process is launched — uses the user's
|
||||
existing Chrome via the Beeline extension.
|
||||
|
||||
Args:
|
||||
profile: Browser profile name (default: "default")
|
||||
|
||||
Returns:
|
||||
Dict with start status including groupId and initial tabId
|
||||
Dict with start status (``"started"`` on fresh creation,
|
||||
``"already_running"`` when a context for the profile exists),
|
||||
including ``groupId`` and ``activeTabId``.
|
||||
"""
|
||||
start = time.perf_counter()
|
||||
params = {"profile": profile}
|
||||
@@ -221,14 +273,11 @@ def register_lifecycle_tools(mcp: FastMCP) -> None:
|
||||
log_tool_call("browser_start", params, result=result)
|
||||
return result
|
||||
|
||||
profile_name = _resolve_profile(profile)
|
||||
|
||||
# Check if already running
|
||||
if profile_name in _contexts:
|
||||
ctx = _contexts[profile_name]
|
||||
try:
|
||||
profile_name, ctx, created = await _ensure_context(bridge, profile)
|
||||
result = {
|
||||
"ok": True,
|
||||
"status": "already_running",
|
||||
"status": "started" if created else "already_running",
|
||||
"profile": profile_name,
|
||||
"groupId": ctx.get("groupId"),
|
||||
"activeTabId": ctx.get("activeTabId"),
|
||||
@@ -240,42 +289,6 @@ def register_lifecycle_tools(mcp: FastMCP) -> None:
|
||||
duration_ms=(time.perf_counter() - start) * 1000,
|
||||
)
|
||||
return result
|
||||
|
||||
try:
|
||||
result = await bridge.create_context(profile_name)
|
||||
group_id = result.get("groupId")
|
||||
tab_id = result.get("tabId")
|
||||
|
||||
_contexts[profile_name] = {
|
||||
"groupId": group_id,
|
||||
"activeTabId": tab_id,
|
||||
"_seedTabId": tab_id, # reused by first browser_open call
|
||||
"tabs": {tab_id} if tab_id is not None else set(),
|
||||
}
|
||||
|
||||
logger.info(
|
||||
"Started browser context '%s': groupId=%s, tabId=%s",
|
||||
profile_name,
|
||||
group_id,
|
||||
tab_id,
|
||||
)
|
||||
|
||||
log_context_event("start", profile_name, group_id=group_id, tab_id=tab_id)
|
||||
|
||||
result = {
|
||||
"ok": True,
|
||||
"status": "started",
|
||||
"profile": profile_name,
|
||||
"groupId": group_id,
|
||||
"activeTabId": tab_id,
|
||||
}
|
||||
log_tool_call(
|
||||
"browser_start",
|
||||
params,
|
||||
result=result,
|
||||
duration_ms=(time.perf_counter() - start) * 1000,
|
||||
)
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.exception("Failed to start browser context")
|
||||
result = {"ok": False, "error": str(e)}
|
||||
|
||||
@@ -14,6 +14,7 @@ from fastmcp import FastMCP
|
||||
|
||||
from ..bridge import get_bridge
|
||||
from ..telemetry import log_tool_call
|
||||
from .lifecycle import _ensure_context
|
||||
from .tabs import _get_context
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -32,8 +33,14 @@ def register_navigation_tools(mcp: FastMCP) -> None:
|
||||
"""
|
||||
Navigate a tab to a URL.
|
||||
|
||||
This tool waits for the page to reach the ``wait_until`` condition
|
||||
before returning.
|
||||
Lazy-creates a browser context if none exists (no need to call
|
||||
``browser_start`` first); when no ``tab_id`` is given and the
|
||||
context was just created, navigation lands on the seed tab.
|
||||
Prefer ``browser_open`` when you specifically want a new tab —
|
||||
``browser_navigate`` is for redirecting an existing tab.
|
||||
|
||||
Waits for the page to reach the ``wait_until`` condition before
|
||||
returning.
|
||||
|
||||
Args:
|
||||
url: URL to navigate to
|
||||
@@ -54,10 +61,16 @@ def register_navigation_tools(mcp: FastMCP) -> None:
|
||||
log_tool_call("browser_navigate", params, result=result)
|
||||
return result
|
||||
|
||||
ctx = _get_context(profile)
|
||||
if not ctx:
|
||||
result = {"ok": False, "error": "Browser not started. Call browser_start first."}
|
||||
log_tool_call("browser_navigate", params, result=result)
|
||||
try:
|
||||
_, ctx, _ = await _ensure_context(bridge, profile)
|
||||
except Exception as e:
|
||||
result = {"ok": False, "error": str(e)}
|
||||
log_tool_call(
|
||||
"browser_navigate",
|
||||
params,
|
||||
error=e,
|
||||
duration_ms=(time.perf_counter() - start) * 1000,
|
||||
)
|
||||
return result
|
||||
|
||||
target_tab = tab_id or ctx.get("activeTabId")
|
||||
|
||||
@@ -16,7 +16,7 @@ from pydantic import Field
|
||||
from ..bridge import get_bridge
|
||||
from ..session import _active_profile
|
||||
from ..telemetry import log_tool_call
|
||||
from .lifecycle import _contexts
|
||||
from .lifecycle import _contexts, _ensure_context
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -98,10 +98,14 @@ def register_tab_tools(mcp: FastMCP) -> None:
|
||||
profile: str | None = None,
|
||||
) -> dict:
|
||||
"""
|
||||
Open a new browser tab and navigate to the given URL.
|
||||
Open a browser tab at the given URL — preferred entry point.
|
||||
|
||||
The tab is automatically added to the agent's tab group.
|
||||
This tool waits for the page to load before returning.
|
||||
This is the agent's primary "go to a page" tool. If no browser
|
||||
context exists yet for the profile, one is created transparently
|
||||
(no need to call ``browser_start`` first). The first call after
|
||||
a fresh context reuses the seed ``about:blank`` tab; subsequent
|
||||
calls open new tabs in the agent's tab group. Waits for the
|
||||
page to load before returning.
|
||||
|
||||
Args:
|
||||
url: URL to navigate to
|
||||
@@ -120,13 +124,8 @@ def register_tab_tools(mcp: FastMCP) -> None:
|
||||
log_tool_call("browser_open", params, result=result)
|
||||
return result
|
||||
|
||||
ctx = _get_context(profile)
|
||||
if not ctx:
|
||||
result = {"ok": False, "error": "Browser not started. Call browser_start first."}
|
||||
log_tool_call("browser_open", params, result=result)
|
||||
return result
|
||||
|
||||
try:
|
||||
_, ctx, _ = await _ensure_context(bridge, profile)
|
||||
# Reuse the seed about:blank tab from context.create on first open
|
||||
seed_tab = ctx.pop("_seedTabId", None)
|
||||
if seed_tab is not None:
|
||||
|
||||
Reference in New Issue
Block a user