feat: improve web search and consolidate browser open

This commit is contained in:
Richard Tang
2026-05-01 14:55:20 -07:00
parent b939a875a7
commit a09eac06f1
21 changed files with 414 additions and 168 deletions
-1
View File
@@ -47,7 +47,6 @@
"Bash(grep -v ':0$')",
"Bash(curl -s -m 2 http://127.0.0.1:4002/sse -o /dev/null -w 'status=%{http_code} time=%{time_total}s\\\\n')",
"mcp__gcu-tools__browser_status",
"mcp__gcu-tools__browser_start",
"mcp__gcu-tools__browser_navigate",
"mcp__gcu-tools__browser_evaluate",
"mcp__gcu-tools__browser_screenshot",
@@ -214,7 +214,7 @@ Curated list of known browser automation edge cases with symptoms, causes, and f
| **Symptom** | `browser_open()` returns `"No group with id: XXXXXXX"` even though `browser_status` shows `running: true` |
| **Root Cause** | In-memory `_contexts` dict has a stale `groupId` from a Chrome tab group that was closed outside the tool (e.g. user closed the tab group) |
| **Detection** | `browser_status` returns `running: true` but `browser_open` fails with "No group with id" |
| **Fix** | Call `browser_stop()` to clear stale context from `_contexts`, then `browser_start()` again |
| **Fix** | Call `browser_stop()` to clear stale context from `_contexts`, then `browser_open(url)` to lazy-create a fresh one |
| **Code** | `tools/lifecycle.py:144-160` - `already_running` check uses cached dict without validating against Chrome |
| **Verified** | 2026-04-03 ✓ |
@@ -249,7 +249,7 @@ or find files. Mtime-sorted in files mode.
## Browser Automation (gcu-tools MCP)
- Use `browser_*` tools `browser_open(url)` is the cold-start entry point \
(lazy-creates the context; no `browser_start` first). Then `browser_navigate`, \
(lazy-creates the context; no separate "start" call). Then `browser_navigate`, \
`browser_click`, `browser_type`, `browser_snapshot`, \
<!-- vision-only -->`browser_screenshot`, <!-- /vision-only -->`browser_scroll`, \
`browser_tabs`, `browser_close`, `browser_evaluate`, etc.
@@ -88,7 +88,6 @@ _TOOL_CATEGORIES: dict[str, list[str]] = {
"browser_basic": [
"browser_setup",
"browser_status",
"browser_start",
"browser_stop",
"browser_tabs",
"browser_open",
@@ -17,7 +17,7 @@ Use browser nodes (with `tools: {policy: "all"}`) when:
## Available Browser Tools
All tools are prefixed with `browser_`:
- `browser_open`, `browser_navigate` preferred entry points; both lazy-create a browser context, so a single `browser_open(url)` covers the cold path. Use `browser_start` only to warm a profile without a URL or to recreate a context after `browser_stop`.
- `browser_open`, `browser_navigate` — both lazy-create the browser context, so a single `browser_open(url)` covers the cold path. To recover from a stale context, call `browser_stop` then `browser_open(url)` again.
- `browser_click`, `browser_click_coordinate`, `browser_type`, `browser_type_focused` — interact
- `browser_press` (with optional `modifiers=["ctrl"]` etc.) — keyboard shortcuts
- `browser_snapshot` — compact accessibility-tree read (structured)
+1 -1
View File
@@ -158,7 +158,7 @@ cookie consent banners if they block content.
- If `browser_snapshot` fails, try `browser_get_text` with a narrow
selector as fallback.
- If `browser_open` fails or the page seems stale, `browser_stop`
`browser_start` retry.
`browser_open(url)` to lazy-create a fresh context.
## `browser_evaluate`
@@ -410,7 +410,7 @@ In all of these cases the script is SHORT (< 10 lines) and the result is CONSUME
- If a tool fails, retry once with the same approach.
- If it fails a second time, STOP retrying and switch approach.
- If `browser_snapshot` fails, try `browser_get_text` with a specific small selector as fallback.
- If `browser_open` fails or page seems stale, `browser_stop`, then `browser_start`, then retry.
- If `browser_open` fails or page seems stale, `browser_stop`, then `browser_open(url)` again to recreate a fresh context.
## Verified workflows
@@ -17,16 +17,15 @@ map_search_gcu = NodeSpec(
You are a browser agent. Your job: Search Google Maps for the provided query and extract business names and website URLs.
## Workflow
1. browser_start
2. browser_open(url="https://www.google.com/maps")
3. use the url query to search for the keyword
3.1 alternatively, use browser_type or browser_click to search for the "query" in memory.'
4. browser_wait(seconds=3)
5. browser_snapshot to find the list of results.
6. For each relevant result, extract:
1. browser_open(url="https://www.google.com/maps") # lazy-creates the context
2. use the url query to search for the keyword
2.1 alternatively, use browser_type or browser_click to search for the "query" in memory.'
3. browser_wait(seconds=3)
4. browser_snapshot to find the list of results.
5. For each relevant result, extract:
- Name of the business
- Website URL (look for the website icon/link)
7. set_output("business_list", [{"name": "...", "website": "..."}, ...])
6. set_output("business_list", [{"name": "...", "website": "..."}, ...])
## Constraints
- Extract at least 5-10 businesses if possible.
@@ -24,13 +24,12 @@ Focus on:
- Hardware/Silicon breakthroughs
## Instructions
1. browser_start
2. For each handle:
a. browser_open(url=f"https://x.com/{handle}")
1. For each handle:
a. browser_open(url=f"https://x.com/{handle}") # lazy-creates the context on first call
b. browser_wait(seconds=5)
c. browser_snapshot
d. Parse relevant tech news text
3. set_output("raw_tweets", consolidated_json)
2. set_output("raw_tweets", consolidated_json)
""",
)
+6 -4
View File
@@ -244,12 +244,14 @@ def main() -> None:
logger.error("Failed to connect to GCU server: %s", e)
sys.exit(1)
# Auto-start browser context so tools work immediately
# Warm the browser context so the first interactive call doesn't pay the
# cold-start round trip. about:blank lazy-creates the context just like
# a real URL would, without committing to a destination page.
try:
result = client.call_tool("browser_start", {})
logger.info("browser_start: %s", result)
result = client.call_tool("browser_open", {"url": "about:blank"})
logger.info("browser_open(about:blank): %s", result)
except Exception as e:
logger.warning("browser_start failed (may already be started): %s", e)
logger.warning("browser warm-up failed (may already be running): %s", e)
app = create_app()
+1 -1
View File
@@ -457,7 +457,7 @@ let currentView = 'grid';
// Tool categories for sidebar grouping
const CATEGORIES = {
'Lifecycle': ['browser_setup', 'browser_start', 'browser_stop', 'browser_status'],
'Lifecycle': ['browser_setup', 'browser_stop', 'browser_status'],
'Tabs': ['browser_tabs', 'browser_open', 'browser_close', 'browser_close_all', 'browser_close_finished', 'browser_activate_tab'],
'Navigation': ['browser_navigate', 'browser_go_back', 'browser_go_forward', 'browser_reload'],
'Interactions': ['browser_click', 'browser_click_coordinate', 'browser_type', 'browser_type_focused', 'browser_press', 'browser_press_at', 'browser_hover', 'browser_hover_coordinate', 'browser_select', 'browser_scroll', 'browser_drag'],
+1 -1
View File
@@ -61,7 +61,7 @@ All replies carry `{ id, result }` or `{ id, error }`.
# 1. At GCU server startup, open ws://localhost:9229/beeline and wait for
# the extension to connect (sends { type: "hello" }).
#
# 2. On browser_start(profile):
# 2. On the first browser tool call for a profile (lazy-start via _ensure_context):
# - Send { id, type: "context.create", agentId: profile }
# - Receive { groupId, tabId }
# - Store groupId in the session object (no Chrome process, no CDP port)
@@ -10,12 +10,14 @@ Validates URLs against internal network ranges to prevent SSRF attacks.
from __future__ import annotations
import ipaddress
import json
import re
import socket
from typing import Any
from urllib.parse import urljoin, urlparse
from urllib.robotparser import RobotFileParser
from bs4 import BeautifulSoup
from bs4 import BeautifulSoup, NavigableString
from fastmcp import FastMCP
from playwright.async_api import (
Error as PlaywrightError,
@@ -82,6 +84,7 @@ def register_tools(mcp: FastMCP) -> None:
selector: str | None = None,
include_links: bool = False,
max_length: int = 50000,
offset: int = 0,
respect_robots_txt: bool = True,
) -> dict:
"""
@@ -94,12 +97,18 @@ def register_tools(mcp: FastMCP) -> None:
Args:
url: URL of the webpage to scrape
selector: CSS selector to target specific content (e.g., 'article', '.main-content')
include_links: Include extracted links in the response
max_length: Maximum length of extracted text (1000-500000)
include_links: When True, links are inlined as `[text](url)` in
content and also returned as a `links` list
max_length: Maximum length of extracted text returned in this call (1000-500000)
offset: Character offset into the extracted text. Use with
`next_offset` from a prior truncated result to paginate.
respect_robots_txt: Whether to respect robots.txt rules (default True)
Returns:
Dict with scraped content (url, title, description, content, length) or error dict
Dict with: url, final_url, title, description, page_type
(article|listing|page), content, length, offset, total_length,
truncated, next_offset, headings, structured_data (json_ld + open_graph),
and optionally links. On error, returns {"error": str, ...} with a hint when applicable.
"""
try:
# Validate URL
@@ -128,6 +137,10 @@ def register_tools(mcp: FastMCP) -> None:
"error": f"Blocked by robots.txt: {url}",
"url": url,
"skipped": True,
"hint": (
"Pass respect_robots_txt=False if you have "
"authorization to scrape this site."
),
}
except Exception:
pass # If robots.txt can't be fetched, proceed anyway
@@ -195,7 +208,17 @@ def register_tools(mcp: FastMCP) -> None:
return {"error": "Navigation failed: no response received"}
if response.status != 200:
return {"error": f"HTTP {response.status}: Failed to fetch URL"}
hint = (
"Site likely requires auth, blocks bots, or is rate-limiting."
if response.status in (401, 403, 429)
else "Resource may not exist or server may be down."
)
return {
"error": f"HTTP {response.status}: Failed to fetch URL",
"url": url,
"status": response.status,
"hint": hint,
}
content_type = response.headers.get("content-type", "").lower()
if not any(t in content_type for t in ["text/html", "application/xhtml+xml"]):
@@ -218,63 +241,165 @@ def register_tools(mcp: FastMCP) -> None:
# Parse rendered HTML with BeautifulSoup
soup = BeautifulSoup(html_content, "html.parser")
base_url = str(response.url) # Final URL after redirects
# Extract structured data BEFORE noise removal — JSON-LD lives
# in <script>, which gets decomposed below. JSON-LD is often the
# cleanest source of structured info on listing pages.
json_ld: list[Any] = []
for script in soup.find_all("script", type="application/ld+json"):
raw = script.string or script.get_text() or ""
if raw.strip():
try:
json_ld.append(json.loads(raw))
except (json.JSONDecodeError, TypeError):
pass
open_graph: dict[str, str] = {}
for meta in soup.find_all("meta"):
prop = (meta.get("property") or "").strip()
if prop.startswith("og:"):
val = (meta.get("content") or "").strip()
if val:
open_graph[prop[3:]] = val
# Remove noise elements
for tag in soup(["script", "style", "nav", "footer", "header", "aside", "noscript", "iframe"]):
tag.decompose()
# Get title and description
# Get title and description (fall back to OG description)
title = soup.title.get_text(strip=True) if soup.title else ""
description = ""
meta_desc = soup.find("meta", attrs={"name": "description"})
if meta_desc:
description = meta_desc.get("content", "")
description = meta_desc.get("content", "") or ""
if not description:
description = open_graph.get("description", "")
# Target content
# Headings outline (capped) — lets the agent drill in via selector
headings: list[dict[str, Any]] = []
for h in soup.find_all(["h1", "h2", "h3", "h4", "h5", "h6"]):
h_text = h.get_text(strip=True)
if h_text:
headings.append({"level": int(h.name[1]), "text": h_text})
if len(headings) >= 100:
break
# Page-type heuristic: many <article> blocks → listing page
article_count = len(soup.find_all("article"))
if article_count >= 3:
page_type = "listing"
elif article_count == 1 or soup.find("main"):
page_type = "article"
else:
page_type = "page"
# Locate target subtree
if selector:
content_elem = soup.select_one(selector)
if not content_elem:
return {"error": f"No elements found matching selector: {selector}"}
text = content_elem.get_text(separator=" ", strip=True)
return {
"error": f"No elements found matching selector: {selector}",
"url": url,
"hint": "Try a broader selector or omit selector to use auto-detection.",
}
else:
# Auto-detect main content
main_content = (
soup.find("article")
or soup.find("main")
# Prefer <main> over the first <article> — on listing pages
# the latter would drop every article after the first.
content_elem = (
soup.find("main")
or soup.find(attrs={"role": "main"})
or soup.find("article")
or soup.find(class_=["content", "post", "entry", "article-body"])
or soup.find("body")
)
text = main_content.get_text(separator=" ", strip=True) if main_content else ""
# Clean up whitespace
text = " ".join(text.split())
# Collect link metadata BEFORE rewriting anchors (rewriting
# replaces <a> elements with NavigableStrings, so find_all('a')
# would miss them after).
links: list[dict[str, str]] = []
if content_elem and include_links:
for a in content_elem.find_all("a", href=True)[:50]:
link_text = a.get_text(strip=True)
href = urljoin(base_url, a["href"])
if link_text and href:
links.append({"text": link_text, "href": href})
# Truncate if needed (reserve 3 chars for the ellipsis so the
# final string stays within max_length)
if len(text) > max_length:
text = text[: max_length - 3] + "..."
text = ""
if content_elem:
# Inline anchors as [text](url) so links survive text
# extraction (otherwise the agent has to correlate `links`
# against the text blob).
if include_links:
for a in content_elem.find_all("a", href=True):
link_text = a.get_text(strip=True)
if link_text:
href = urljoin(base_url, a["href"])
a.replace_with(NavigableString(f"[{link_text}]({href})"))
# Convert <br> and block elements into newlines so the output
# preserves paragraph/list/heading structure rather than
# collapsing into one giant whitespace-joined string.
for br in content_elem.find_all("br"):
br.replace_with(NavigableString("\n"))
block_tags = (
"p", "h1", "h2", "h3", "h4", "h5", "h6",
"li", "tr", "div", "section", "article", "blockquote",
)
for block in content_elem.find_all(block_tags):
block.insert_before(NavigableString("\n"))
block.append(NavigableString("\n"))
raw_text = content_elem.get_text(separator=" ")
# Normalize: squash spaces within each line, collapse runs of
# blank lines to a single blank, trim.
cleaned: list[str] = []
blank = True # swallow leading blanks
for line in raw_text.split("\n"):
line = re.sub(r"[ \t]+", " ", line).strip()
if line:
cleaned.append(line)
blank = False
elif not blank:
cleaned.append("")
blank = True
text = "\n".join(cleaned).strip()
# Apply offset/truncation with continuation metadata. Reserve 3
# chars for the ellipsis so the returned string stays within
# max_length (back-compat with existing test expectations).
total_length = len(text)
offset = max(0, min(offset, total_length))
end = offset + max_length
truncated = end < total_length
sliced = text[offset:end]
if truncated and len(sliced) >= 3:
sliced = sliced[: -3] + "..."
structured_data: dict[str, Any] = {}
if json_ld:
structured_data["json_ld"] = json_ld
if open_graph:
structured_data["open_graph"] = open_graph
result: dict[str, Any] = {
"url": url,
"final_url": base_url,
"title": title,
"description": description,
"content": text,
"length": len(text),
"page_type": page_type,
"content": sliced,
"length": len(sliced),
"offset": offset,
"total_length": total_length,
"truncated": truncated,
"next_offset": end if truncated else None,
"headings": headings,
}
# Extract links if requested
if structured_data:
result["structured_data"] = structured_data
if include_links:
links: list[dict[str, str]] = []
base_url = str(response.url) # Use final URL after redirects
for a in soup.find_all("a", href=True)[:50]:
href = a["href"]
# Convert relative URLs to absolute URLs
absolute_href = urljoin(base_url, href)
link_text = a.get_text(strip=True)
if link_text and absolute_href:
links.append({"text": link_text, "href": absolute_href})
result["links"] = links
return result
+1 -1
View File
@@ -41,7 +41,7 @@ def register_tools(mcp: FastMCP) -> None:
"""Register all GCU browser tools with the MCP server.
Tools are organized into categories:
- Lifecycle: browser_start, browser_stop, browser_status
- Lifecycle: browser_setup, browser_status, browser_stop (browser_open lazy-creates the context)
- Tabs: browser_tabs, browser_open, browser_close, browser_activate_tab
- Navigation: browser_navigate, browser_go_back, browser_go_forward, browser_reload
- Inspection: browser_screenshot, browser_snapshot, browser_console
+2 -2
View File
@@ -642,7 +642,7 @@ def register_inspection_tools(mcp: FastMCP) -> None:
ctx = _get_context(profile)
if not ctx:
result = {"ok": False, "error": "Browser not started. Call browser_start first."}
result = {"ok": False, "error": "Browser not started. Call browser_open(url) first to open a tab."}
log_tool_call("browser_snapshot", params, result=result)
return result
@@ -727,7 +727,7 @@ def register_inspection_tools(mcp: FastMCP) -> None:
ctx = _get_context(profile)
if not ctx:
result = {"ok": False, "error": "Browser not started. Call browser_start first."}
result = {"ok": False, "error": "Browser not started. Call browser_open(url) first to open a tab."}
log_tool_call("browser_html", params, result=result)
return result
+11 -11
View File
@@ -153,7 +153,7 @@ def register_interaction_tools(mcp: FastMCP) -> None:
ctx = _get_context(profile)
if not ctx:
result = {"ok": False, "error": "Browser not started. Call browser_start first."}
result = {"ok": False, "error": "Browser not started. Call browser_open(url) first to open a tab."}
log_tool_call("browser_click", params, result=result)
return result
@@ -247,7 +247,7 @@ def register_interaction_tools(mcp: FastMCP) -> None:
ctx = _get_context(profile)
if not ctx:
result = {"ok": False, "error": "Browser not started. Call browser_start first."}
result = {"ok": False, "error": "Browser not started. Call browser_open(url) first to open a tab."}
log_tool_call("browser_click_coordinate", params, result=result)
return _text_only(result)
@@ -352,7 +352,7 @@ def register_interaction_tools(mcp: FastMCP) -> None:
ctx = _get_context(profile)
if not ctx:
result = {"ok": False, "error": "Browser not started. Call browser_start first."}
result = {"ok": False, "error": "Browser not started. Call browser_open(url) first to open a tab."}
log_tool_call("browser_type", params, result=result)
return result
@@ -432,7 +432,7 @@ def register_interaction_tools(mcp: FastMCP) -> None:
ctx = _get_context(profile)
if not ctx:
result = {"ok": False, "error": "Browser not started. Call browser_start first."}
result = {"ok": False, "error": "Browser not started. Call browser_open(url) first to open a tab."}
log_tool_call("browser_type_focused", params, result=result)
return result
@@ -506,7 +506,7 @@ def register_interaction_tools(mcp: FastMCP) -> None:
ctx = _get_context(profile)
if not ctx:
result = {"ok": False, "error": "Browser not started. Call browser_start first."}
result = {"ok": False, "error": "Browser not started. Call browser_open(url) first to open a tab."}
log_tool_call("browser_press", params, result=result)
return result
@@ -560,7 +560,7 @@ def register_interaction_tools(mcp: FastMCP) -> None:
ctx = _get_context(profile)
if not ctx:
result = {"ok": False, "error": "Browser not started. Call browser_start first."}
result = {"ok": False, "error": "Browser not started. Call browser_open(url) first to open a tab."}
log_tool_call("browser_hover", params, result=result)
return result
@@ -627,7 +627,7 @@ def register_interaction_tools(mcp: FastMCP) -> None:
ctx = _get_context(profile)
if not ctx:
result = {"ok": False, "error": "Browser not started. Call browser_start first."}
result = {"ok": False, "error": "Browser not started. Call browser_open(url) first to open a tab."}
log_tool_call("browser_hover_coordinate", params, result=result)
return _text_only(result)
@@ -712,7 +712,7 @@ def register_interaction_tools(mcp: FastMCP) -> None:
ctx = _get_context(profile)
if not ctx:
result = {"ok": False, "error": "Browser not started. Call browser_start first."}
result = {"ok": False, "error": "Browser not started. Call browser_open(url) first to open a tab."}
log_tool_call("browser_press_at", params, result=result)
return _text_only(result)
@@ -782,7 +782,7 @@ def register_interaction_tools(mcp: FastMCP) -> None:
ctx = _get_context(profile)
if not ctx:
result = {"ok": False, "error": "Browser not started. Call browser_start first."}
result = {"ok": False, "error": "Browser not started. Call browser_open(url) first to open a tab."}
log_tool_call("browser_select", params, result=result)
return result
@@ -860,7 +860,7 @@ def register_interaction_tools(mcp: FastMCP) -> None:
ctx = _get_context(profile)
if not ctx:
result = {"ok": False, "error": "Browser not started. Call browser_start first."}
result = {"ok": False, "error": "Browser not started. Call browser_open(url) first to open a tab."}
log_tool_call("browser_scroll", params, result=result)
return result
@@ -924,7 +924,7 @@ def register_interaction_tools(mcp: FastMCP) -> None:
ctx = _get_context(profile)
if not ctx:
result = {"ok": False, "error": "Browser not started. Call browser_start first."}
result = {"ok": False, "error": "Browser not started. Call browser_open(url) first to open a tab."}
log_tool_call("browser_drag", params, result=result)
return result
+3 -60
View File
@@ -61,7 +61,7 @@ async def _ensure_context(
Lazy-creates the browser context (tab group + seed tab) the first time
a profile is used so URL-taking tools (``browser_open`` /
``browser_navigate``) can be the agent's single cold-start entry
point instead of forcing an explicit ``browser_start`` round trip.
point no separate "start" tool to remember.
Caller must verify ``bridge`` is connected first; any failure in
``bridge.create_context`` propagates so the caller's existing
@@ -137,7 +137,7 @@ def register_lifecycle_tools(mcp: FastMCP) -> None:
return {
"ok": True,
"connected": True,
"status": "Extension is connected and ready. Call browser_start to begin.",
"status": "Extension is connected and ready. Call browser_open(url) to begin.",
}
return {
@@ -150,7 +150,7 @@ def register_lifecycle_tools(mcp: FastMCP) -> None:
"step_3": "Click 'Load unpacked'",
"step_4": f"Select this directory: {ext_path}",
"step_5": ("Click the extension icon in the Chrome toolbar to confirm it says 'Connected'"),
"step_6": "Return here and call browser_start",
"step_6": "Return here and call browser_open(url) to begin",
},
"extensionPath": ext_path,
"extensionPathExists": ext_exists,
@@ -238,63 +238,6 @@ def register_lifecycle_tools(mcp: FastMCP) -> None:
)
return result
@mcp.tool()
async def browser_start(profile: str | None = None) -> dict:
"""
Explicitly create a browser context (tab group) for ``profile``.
Most workflows do NOT need to call this directly: ``browser_open``
and ``browser_navigate`` lazy-create a context on first use, so a
single ``browser_open(url)`` covers the cold path. Reach for
``browser_start`` when you want to (a) warm a profile without
opening a URL yet, or (b) recreate a context after
``browser_stop`` to clear stale state.
No separate browser process is launched uses the user's
existing Chrome via the Beeline extension.
Args:
profile: Browser profile name (default: "default")
Returns:
Dict with start status (``"started"`` on fresh creation,
``"already_running"`` when a context for the profile exists),
including ``groupId`` and ``activeTabId``.
"""
start = time.perf_counter()
params = {"profile": profile}
bridge = get_bridge()
if not bridge or not bridge.is_connected:
result = {
"ok": False,
"error": ("Browser extension not connected. Call browser_setup for installation instructions."),
}
log_tool_call("browser_start", params, result=result)
return result
try:
profile_name, ctx, created = await _ensure_context(bridge, profile)
result = {
"ok": True,
"status": "started" if created else "already_running",
"profile": profile_name,
"groupId": ctx.get("groupId"),
"activeTabId": ctx.get("activeTabId"),
}
log_tool_call(
"browser_start",
params,
result=result,
duration_ms=(time.perf_counter() - start) * 1000,
)
return result
except Exception as e:
logger.exception("Failed to start browser context")
result = {"ok": False, "error": str(e)}
log_tool_call("browser_start", params, error=e, duration_ms=(time.perf_counter() - start) * 1000)
return result
@mcp.tool()
async def browser_stop(profile: str | None = None) -> dict:
"""
+7 -8
View File
@@ -33,11 +33,10 @@ def register_navigation_tools(mcp: FastMCP) -> None:
"""
Navigate a tab to a URL.
Lazy-creates a browser context if none exists (no need to call
``browser_start`` first); when no ``tab_id`` is given and the
context was just created, navigation lands on the seed tab.
Prefer ``browser_open`` when you specifically want a new tab
``browser_navigate`` is for redirecting an existing tab.
Lazy-creates a browser context if none exists; when no ``tab_id``
is given and the context was just created, navigation lands on
the seed tab. Prefer ``browser_open`` when you specifically want
a new tab ``browser_navigate`` is for redirecting an existing tab.
Waits for the page to reach the ``wait_until`` condition before
returning.
@@ -130,7 +129,7 @@ def register_navigation_tools(mcp: FastMCP) -> None:
ctx = _get_context(profile)
if not ctx:
result = {"ok": False, "error": "Browser not started. Call browser_start first."}
result = {"ok": False, "error": "Browser not started. Call browser_open(url) first to open a tab."}
log_tool_call("browser_go_back", params, result=result)
return result
@@ -180,7 +179,7 @@ def register_navigation_tools(mcp: FastMCP) -> None:
ctx = _get_context(profile)
if not ctx:
result = {"ok": False, "error": "Browser not started. Call browser_start first."}
result = {"ok": False, "error": "Browser not started. Call browser_open(url) first to open a tab."}
log_tool_call("browser_go_forward", params, result=result)
return result
@@ -235,7 +234,7 @@ def register_navigation_tools(mcp: FastMCP) -> None:
ctx = _get_context(profile)
if not ctx:
result = {"ok": False, "error": "Browser not started. Call browser_start first."}
result = {"ok": False, "error": "Browser not started. Call browser_open(url) first to open a tab."}
log_tool_call("browser_reload", params, result=result)
return result
+9 -9
View File
@@ -65,7 +65,7 @@ def register_tab_tools(mcp: FastMCP) -> None:
ctx = _get_context(profile)
if not ctx:
result = {"ok": False, "error": "Browser not started. Call browser_start first."}
result = {"ok": False, "error": "Browser not started. Call browser_open(url) first to open a tab."}
log_tool_call("browser_tabs", params, result=result)
return result
@@ -100,12 +100,12 @@ def register_tab_tools(mcp: FastMCP) -> None:
"""
Open a browser tab at the given URL preferred entry point.
This is the agent's primary "go to a page" tool. If no browser
context exists yet for the profile, one is created transparently
(no need to call ``browser_start`` first). The first call after
a fresh context reuses the seed ``about:blank`` tab; subsequent
calls open new tabs in the agent's tab group. Waits for the
page to load before returning.
This is the agent's primary "go to a page" tool and the cold-start
entry point if no browser context exists yet for the profile,
one is created transparently. The first call after a fresh
context reuses the seed ``about:blank`` tab; subsequent calls
open new tabs in the agent's tab group. Waits for the page to
load before returning.
Args:
url: URL to navigate to
@@ -192,7 +192,7 @@ def register_tab_tools(mcp: FastMCP) -> None:
ctx = _get_context(profile)
if not ctx:
result = {"ok": False, "error": "Browser not started. Call browser_start first."}
result = {"ok": False, "error": "Browser not started. Call browser_open(url) first to open a tab."}
log_tool_call("browser_close", params, result=result)
return result
@@ -271,7 +271,7 @@ def register_tab_tools(mcp: FastMCP) -> None:
ctx = _get_context(profile)
if not ctx:
result = {"ok": False, "error": "Browser not started. Call browser_start first."}
result = {"ok": False, "error": "Browser not started. Call browser_open(url) first to open a tab."}
log_tool_call("browser_activate_tab", params, result=result)
return result
@@ -107,22 +107,17 @@ class TestMultipleSubagentsTabGroups:
mock_bridge.create_context = AsyncMock(side_effect=mock_create_context)
# Register tools first
register_lifecycle_tools(mcp)
browser_start = mcp._tool_manager._tools["browser_start"].fn
from gcu.browser.tools.lifecycle import _ensure_context
# Now patch for execution
with patch("gcu.browser.tools.lifecycle.get_bridge", return_value=mock_bridge):
# Simulate 3 different subagents starting browsers
results = await asyncio.gather(
browser_start(profile="agent_1"),
browser_start(profile="agent_2"),
browser_start(profile="agent_3"),
_ensure_context(mock_bridge, "agent_1"),
_ensure_context(mock_bridge, "agent_2"),
_ensure_context(mock_bridge, "agent_3"),
)
# Each should have created a separate context
assert mock_bridge.create_context.call_count == 3
assert all(r.get("ok") for r in results)
assert all(created for (_, _, created) in results)
@pytest.mark.asyncio
async def test_concurrent_tab_operations_different_groups(self, mcp: FastMCP, mock_bridge: MagicMock):
@@ -709,11 +704,11 @@ class TestErrorHandling:
mock_bridge = MagicMock(spec=BeelineBridge)
mock_bridge.is_connected = False
register_lifecycle_tools(mcp)
browser_start = mcp._tool_manager._tools["browser_start"].fn
register_tab_tools(mcp)
browser_open = mcp._tool_manager._tools["browser_open"].fn
with patch("gcu.browser.tools.lifecycle.get_bridge", return_value=mock_bridge):
result = await browser_start(profile="test")
with patch("gcu.browser.tools.tabs.get_bridge", return_value=mock_bridge):
result = await browser_open(url="https://example.com", profile="test")
assert result.get("ok") is False
assert "not connected" in result.get("error", "").lower()
+187 -1
View File
@@ -374,6 +374,190 @@ class TestWebScrapeToolLinkConversion:
assert len([t for t in texts if not t.strip()]) == 0
class TestWebScrapeToolAIFriendlyOutput:
"""Tests for the AI-friendly output additions: structured data,
headings, page_type, block-level newlines, inline links, truncation
metadata, and offset-based pagination."""
@pytest.mark.asyncio
@patch(_STEALTH_PATH)
@patch(_PW_PATH)
async def test_block_level_newlines_preserved(self, mock_pw, mock_stealth, web_scrape_fn):
"""Block elements (p, h1, li) produce newlines, not space-collapsed."""
html = """
<html><body>
<h1>Title</h1>
<p>First paragraph.</p>
<p>Second paragraph.</p>
<ul><li>Item one</li><li>Item two</li></ul>
</body></html>
"""
mock_cm, _, _ = _make_playwright_mocks(html, final_url="https://example.com")
mock_pw.return_value = mock_cm
mock_stealth.return_value.apply_stealth_async = AsyncMock()
result = await web_scrape_fn(url="https://example.com")
assert "error" not in result
content = result["content"]
assert "Title" in content
assert "First paragraph." in content
assert "Second paragraph." in content
# Block separation should produce newlines, not run paragraphs together
assert "First paragraph.\n" in content or "First paragraph.\n\nSecond" in content
assert "Item one" in content and "Item two" in content
@pytest.mark.asyncio
@patch(_STEALTH_PATH)
@patch(_PW_PATH)
async def test_headings_outline_returned(self, mock_pw, mock_stealth, web_scrape_fn):
"""Headings outline lists h1-h6 with level + text."""
html = """
<html><body>
<h1>Top</h1>
<h2>Section A</h2>
<h3>Sub A1</h3>
</body></html>
"""
mock_cm, _, _ = _make_playwright_mocks(html, final_url="https://example.com")
mock_pw.return_value = mock_cm
mock_stealth.return_value.apply_stealth_async = AsyncMock()
result = await web_scrape_fn(url="https://example.com")
assert result["headings"] == [
{"level": 1, "text": "Top"},
{"level": 2, "text": "Section A"},
{"level": 3, "text": "Sub A1"},
]
@pytest.mark.asyncio
@patch(_STEALTH_PATH)
@patch(_PW_PATH)
async def test_inline_links_when_include_links(self, mock_pw, mock_stealth, web_scrape_fn):
"""include_links=True inlines anchors as [text](url) in content."""
html = """
<html><body>
<p>See <a href="/docs">our docs</a> for details.</p>
</body></html>
"""
mock_cm, _, _ = _make_playwright_mocks(html, final_url="https://example.com")
mock_pw.return_value = mock_cm
mock_stealth.return_value.apply_stealth_async = AsyncMock()
result = await web_scrape_fn(url="https://example.com", include_links=True)
assert "[our docs](https://example.com/docs)" in result["content"]
# Separate links list still present for back-compat
assert any(link["text"] == "our docs" for link in result["links"])
@pytest.mark.asyncio
@patch(_STEALTH_PATH)
@patch(_PW_PATH)
async def test_structured_data_json_ld(self, mock_pw, mock_stealth, web_scrape_fn):
"""JSON-LD blocks are parsed and surfaced under structured_data."""
html = """
<html><head>
<script type="application/ld+json">
{"@type": "Article", "headline": "Hello"}
</script>
</head><body><p>body</p></body></html>
"""
mock_cm, _, _ = _make_playwright_mocks(html, final_url="https://example.com")
mock_pw.return_value = mock_cm
mock_stealth.return_value.apply_stealth_async = AsyncMock()
result = await web_scrape_fn(url="https://example.com")
assert "structured_data" in result
assert result["structured_data"]["json_ld"] == [
{"@type": "Article", "headline": "Hello"}
]
@pytest.mark.asyncio
@patch(_STEALTH_PATH)
@patch(_PW_PATH)
async def test_structured_data_open_graph(self, mock_pw, mock_stealth, web_scrape_fn):
"""OpenGraph meta tags are surfaced under structured_data.open_graph."""
html = """
<html><head>
<meta property="og:title" content="OG Title">
<meta property="og:type" content="article">
</head><body><p>body</p></body></html>
"""
mock_cm, _, _ = _make_playwright_mocks(html, final_url="https://example.com")
mock_pw.return_value = mock_cm
mock_stealth.return_value.apply_stealth_async = AsyncMock()
result = await web_scrape_fn(url="https://example.com")
assert result["structured_data"]["open_graph"] == {
"title": "OG Title",
"type": "article",
}
@pytest.mark.asyncio
@patch(_STEALTH_PATH)
@patch(_PW_PATH)
async def test_truncation_metadata(self, mock_pw, mock_stealth, web_scrape_fn):
"""Truncated responses set truncated/total_length/next_offset."""
html = f"<html><body>{'a' * 5000}</body></html>"
mock_cm, _, _ = _make_playwright_mocks(html, final_url="https://example.com")
mock_pw.return_value = mock_cm
mock_stealth.return_value.apply_stealth_async = AsyncMock()
result = await web_scrape_fn(url="https://example.com", max_length=1000)
assert result["truncated"] is True
assert result["total_length"] == 5000
assert result["next_offset"] == 1000
assert result["offset"] == 0
@pytest.mark.asyncio
@patch(_STEALTH_PATH)
@patch(_PW_PATH)
async def test_offset_pagination(self, mock_pw, mock_stealth, web_scrape_fn):
"""offset arg returns content starting from the given character."""
body = "a" * 1000 + "b" * 1000 + "c" * 1000
html = f"<html><body>{body}</body></html>"
mock_cm, _, _ = _make_playwright_mocks(html, final_url="https://example.com")
mock_pw.return_value = mock_cm
mock_stealth.return_value.apply_stealth_async = AsyncMock()
result = await web_scrape_fn(url="https://example.com", max_length=1000, offset=1000)
assert result["offset"] == 1000
# Window should start in the b-region
assert result["content"].startswith("b")
assert result["truncated"] is True
assert result["next_offset"] == 2000
@pytest.mark.asyncio
@patch(_STEALTH_PATH)
@patch(_PW_PATH)
async def test_page_type_listing(self, mock_pw, mock_stealth, web_scrape_fn):
"""3+ <article> elements => page_type 'listing'."""
html = """
<html><body>
<article><h2>Post 1</h2></article>
<article><h2>Post 2</h2></article>
<article><h2>Post 3</h2></article>
</body></html>
"""
mock_cm, _, _ = _make_playwright_mocks(html, final_url="https://example.com")
mock_pw.return_value = mock_cm
mock_stealth.return_value.apply_stealth_async = AsyncMock()
result = await web_scrape_fn(url="https://example.com")
assert result["page_type"] == "listing"
@pytest.mark.asyncio
@patch(_STEALTH_PATH)
@patch(_PW_PATH)
async def test_page_type_article(self, mock_pw, mock_stealth, web_scrape_fn):
"""Single <article> => page_type 'article'."""
html = "<html><body><article><p>Hello</p></article></body></html>"
mock_cm, _, _ = _make_playwright_mocks(html, final_url="https://example.com")
mock_pw.return_value = mock_cm
mock_stealth.return_value.apply_stealth_async = AsyncMock()
result = await web_scrape_fn(url="https://example.com")
assert result["page_type"] == "article"
class TestWebScrapeToolErrorHandling:
"""Tests for error handling and early exit before JS wait."""
@@ -388,7 +572,9 @@ class TestWebScrapeToolErrorHandling:
mock_stealth.return_value.apply_stealth_async = AsyncMock()
result = await web_scrape_fn(url="https://example.com/missing")
assert result == {"error": "HTTP 404: Failed to fetch URL"}
assert result["error"] == "HTTP 404: Failed to fetch URL"
assert result["status"] == 404
assert "hint" in result
mock_page.wait_for_load_state.assert_not_called()
@pytest.mark.asyncio