feat: improve web search and consolidate browser open
This commit is contained in:
@@ -47,7 +47,6 @@
|
||||
"Bash(grep -v ':0$')",
|
||||
"Bash(curl -s -m 2 http://127.0.0.1:4002/sse -o /dev/null -w 'status=%{http_code} time=%{time_total}s\\\\n')",
|
||||
"mcp__gcu-tools__browser_status",
|
||||
"mcp__gcu-tools__browser_start",
|
||||
"mcp__gcu-tools__browser_navigate",
|
||||
"mcp__gcu-tools__browser_evaluate",
|
||||
"mcp__gcu-tools__browser_screenshot",
|
||||
|
||||
@@ -214,7 +214,7 @@ Curated list of known browser automation edge cases with symptoms, causes, and f
|
||||
| **Symptom** | `browser_open()` returns `"No group with id: XXXXXXX"` even though `browser_status` shows `running: true` |
|
||||
| **Root Cause** | In-memory `_contexts` dict has a stale `groupId` from a Chrome tab group that was closed outside the tool (e.g. user closed the tab group) |
|
||||
| **Detection** | `browser_status` returns `running: true` but `browser_open` fails with "No group with id" |
|
||||
| **Fix** | Call `browser_stop()` to clear stale context from `_contexts`, then `browser_start()` again |
|
||||
| **Fix** | Call `browser_stop()` to clear stale context from `_contexts`, then `browser_open(url)` to lazy-create a fresh one |
|
||||
| **Code** | `tools/lifecycle.py:144-160` - `already_running` check uses cached dict without validating against Chrome |
|
||||
| **Verified** | 2026-04-03 ✓ |
|
||||
|
||||
|
||||
@@ -249,7 +249,7 @@ or find files. Mtime-sorted in files mode.
|
||||
|
||||
## Browser Automation (gcu-tools MCP)
|
||||
- Use `browser_*` tools — `browser_open(url)` is the cold-start entry point \
|
||||
(lazy-creates the context; no `browser_start` first). Then `browser_navigate`, \
|
||||
(lazy-creates the context; no separate "start" call). Then `browser_navigate`, \
|
||||
`browser_click`, `browser_type`, `browser_snapshot`, \
|
||||
<!-- vision-only -->`browser_screenshot`, <!-- /vision-only -->`browser_scroll`, \
|
||||
`browser_tabs`, `browser_close`, `browser_evaluate`, etc.
|
||||
|
||||
@@ -88,7 +88,6 @@ _TOOL_CATEGORIES: dict[str, list[str]] = {
|
||||
"browser_basic": [
|
||||
"browser_setup",
|
||||
"browser_status",
|
||||
"browser_start",
|
||||
"browser_stop",
|
||||
"browser_tabs",
|
||||
"browser_open",
|
||||
|
||||
@@ -17,7 +17,7 @@ Use browser nodes (with `tools: {policy: "all"}`) when:
|
||||
## Available Browser Tools
|
||||
|
||||
All tools are prefixed with `browser_`:
|
||||
- `browser_open`, `browser_navigate` — preferred entry points; both lazy-create a browser context, so a single `browser_open(url)` covers the cold path. Use `browser_start` only to warm a profile without a URL or to recreate a context after `browser_stop`.
|
||||
- `browser_open`, `browser_navigate` — both lazy-create the browser context, so a single `browser_open(url)` covers the cold path. To recover from a stale context, call `browser_stop` then `browser_open(url)` again.
|
||||
- `browser_click`, `browser_click_coordinate`, `browser_type`, `browser_type_focused` — interact
|
||||
- `browser_press` (with optional `modifiers=["ctrl"]` etc.) — keyboard shortcuts
|
||||
- `browser_snapshot` — compact accessibility-tree read (structured)
|
||||
|
||||
@@ -158,7 +158,7 @@ cookie consent banners if they block content.
|
||||
- If `browser_snapshot` fails, try `browser_get_text` with a narrow
|
||||
selector as fallback.
|
||||
- If `browser_open` fails or the page seems stale, `browser_stop` →
|
||||
`browser_start` → retry.
|
||||
`browser_open(url)` to lazy-create a fresh context.
|
||||
|
||||
## `browser_evaluate`
|
||||
|
||||
|
||||
@@ -410,7 +410,7 @@ In all of these cases the script is SHORT (< 10 lines) and the result is CONSUME
|
||||
- If a tool fails, retry once with the same approach.
|
||||
- If it fails a second time, STOP retrying and switch approach.
|
||||
- If `browser_snapshot` fails, try `browser_get_text` with a specific small selector as fallback.
|
||||
- If `browser_open` fails or page seems stale, `browser_stop`, then `browser_start`, then retry.
|
||||
- If `browser_open` fails or page seems stale, `browser_stop`, then `browser_open(url)` again to recreate a fresh context.
|
||||
|
||||
## Verified workflows
|
||||
|
||||
|
||||
@@ -17,16 +17,15 @@ map_search_gcu = NodeSpec(
|
||||
You are a browser agent. Your job: Search Google Maps for the provided query and extract business names and website URLs.
|
||||
|
||||
## Workflow
|
||||
1. browser_start
|
||||
2. browser_open(url="https://www.google.com/maps")
|
||||
3. use the url query to search for the keyword
|
||||
3.1 alternatively, use browser_type or browser_click to search for the "query" in memory.'
|
||||
4. browser_wait(seconds=3)
|
||||
5. browser_snapshot to find the list of results.
|
||||
6. For each relevant result, extract:
|
||||
1. browser_open(url="https://www.google.com/maps") # lazy-creates the context
|
||||
2. use the url query to search for the keyword
|
||||
2.1 alternatively, use browser_type or browser_click to search for the "query" in memory.'
|
||||
3. browser_wait(seconds=3)
|
||||
4. browser_snapshot to find the list of results.
|
||||
5. For each relevant result, extract:
|
||||
- Name of the business
|
||||
- Website URL (look for the website icon/link)
|
||||
7. set_output("business_list", [{"name": "...", "website": "..."}, ...])
|
||||
6. set_output("business_list", [{"name": "...", "website": "..."}, ...])
|
||||
|
||||
## Constraints
|
||||
- Extract at least 5-10 businesses if possible.
|
||||
|
||||
@@ -24,13 +24,12 @@ Focus on:
|
||||
- Hardware/Silicon breakthroughs
|
||||
|
||||
## Instructions
|
||||
1. browser_start
|
||||
2. For each handle:
|
||||
a. browser_open(url=f"https://x.com/{handle}")
|
||||
1. For each handle:
|
||||
a. browser_open(url=f"https://x.com/{handle}") # lazy-creates the context on first call
|
||||
b. browser_wait(seconds=5)
|
||||
c. browser_snapshot
|
||||
d. Parse relevant tech news text
|
||||
3. set_output("raw_tweets", consolidated_json)
|
||||
2. set_output("raw_tweets", consolidated_json)
|
||||
""",
|
||||
)
|
||||
|
||||
|
||||
@@ -244,12 +244,14 @@ def main() -> None:
|
||||
logger.error("Failed to connect to GCU server: %s", e)
|
||||
sys.exit(1)
|
||||
|
||||
# Auto-start browser context so tools work immediately
|
||||
# Warm the browser context so the first interactive call doesn't pay the
|
||||
# cold-start round trip. about:blank lazy-creates the context just like
|
||||
# a real URL would, without committing to a destination page.
|
||||
try:
|
||||
result = client.call_tool("browser_start", {})
|
||||
logger.info("browser_start: %s", result)
|
||||
result = client.call_tool("browser_open", {"url": "about:blank"})
|
||||
logger.info("browser_open(about:blank): %s", result)
|
||||
except Exception as e:
|
||||
logger.warning("browser_start failed (may already be started): %s", e)
|
||||
logger.warning("browser warm-up failed (may already be running): %s", e)
|
||||
|
||||
app = create_app()
|
||||
|
||||
|
||||
@@ -457,7 +457,7 @@ let currentView = 'grid';
|
||||
|
||||
// Tool categories for sidebar grouping
|
||||
const CATEGORIES = {
|
||||
'Lifecycle': ['browser_setup', 'browser_start', 'browser_stop', 'browser_status'],
|
||||
'Lifecycle': ['browser_setup', 'browser_stop', 'browser_status'],
|
||||
'Tabs': ['browser_tabs', 'browser_open', 'browser_close', 'browser_close_all', 'browser_close_finished', 'browser_activate_tab'],
|
||||
'Navigation': ['browser_navigate', 'browser_go_back', 'browser_go_forward', 'browser_reload'],
|
||||
'Interactions': ['browser_click', 'browser_click_coordinate', 'browser_type', 'browser_type_focused', 'browser_press', 'browser_press_at', 'browser_hover', 'browser_hover_coordinate', 'browser_select', 'browser_scroll', 'browser_drag'],
|
||||
|
||||
@@ -61,7 +61,7 @@ All replies carry `{ id, result }` or `{ id, error }`.
|
||||
# 1. At GCU server startup, open ws://localhost:9229/beeline and wait for
|
||||
# the extension to connect (sends { type: "hello" }).
|
||||
#
|
||||
# 2. On browser_start(profile):
|
||||
# 2. On the first browser tool call for a profile (lazy-start via _ensure_context):
|
||||
# - Send { id, type: "context.create", agentId: profile }
|
||||
# - Receive { groupId, tabId }
|
||||
# - Store groupId in the session object (no Chrome process, no CDP port)
|
||||
|
||||
@@ -10,12 +10,14 @@ Validates URLs against internal network ranges to prevent SSRF attacks.
|
||||
from __future__ import annotations
|
||||
|
||||
import ipaddress
|
||||
import json
|
||||
import re
|
||||
import socket
|
||||
from typing import Any
|
||||
from urllib.parse import urljoin, urlparse
|
||||
from urllib.robotparser import RobotFileParser
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from bs4 import BeautifulSoup, NavigableString
|
||||
from fastmcp import FastMCP
|
||||
from playwright.async_api import (
|
||||
Error as PlaywrightError,
|
||||
@@ -82,6 +84,7 @@ def register_tools(mcp: FastMCP) -> None:
|
||||
selector: str | None = None,
|
||||
include_links: bool = False,
|
||||
max_length: int = 50000,
|
||||
offset: int = 0,
|
||||
respect_robots_txt: bool = True,
|
||||
) -> dict:
|
||||
"""
|
||||
@@ -94,12 +97,18 @@ def register_tools(mcp: FastMCP) -> None:
|
||||
Args:
|
||||
url: URL of the webpage to scrape
|
||||
selector: CSS selector to target specific content (e.g., 'article', '.main-content')
|
||||
include_links: Include extracted links in the response
|
||||
max_length: Maximum length of extracted text (1000-500000)
|
||||
include_links: When True, links are inlined as `[text](url)` in
|
||||
content and also returned as a `links` list
|
||||
max_length: Maximum length of extracted text returned in this call (1000-500000)
|
||||
offset: Character offset into the extracted text. Use with
|
||||
`next_offset` from a prior truncated result to paginate.
|
||||
respect_robots_txt: Whether to respect robots.txt rules (default True)
|
||||
|
||||
Returns:
|
||||
Dict with scraped content (url, title, description, content, length) or error dict
|
||||
Dict with: url, final_url, title, description, page_type
|
||||
(article|listing|page), content, length, offset, total_length,
|
||||
truncated, next_offset, headings, structured_data (json_ld + open_graph),
|
||||
and optionally links. On error, returns {"error": str, ...} with a hint when applicable.
|
||||
"""
|
||||
try:
|
||||
# Validate URL
|
||||
@@ -128,6 +137,10 @@ def register_tools(mcp: FastMCP) -> None:
|
||||
"error": f"Blocked by robots.txt: {url}",
|
||||
"url": url,
|
||||
"skipped": True,
|
||||
"hint": (
|
||||
"Pass respect_robots_txt=False if you have "
|
||||
"authorization to scrape this site."
|
||||
),
|
||||
}
|
||||
except Exception:
|
||||
pass # If robots.txt can't be fetched, proceed anyway
|
||||
@@ -195,7 +208,17 @@ def register_tools(mcp: FastMCP) -> None:
|
||||
return {"error": "Navigation failed: no response received"}
|
||||
|
||||
if response.status != 200:
|
||||
return {"error": f"HTTP {response.status}: Failed to fetch URL"}
|
||||
hint = (
|
||||
"Site likely requires auth, blocks bots, or is rate-limiting."
|
||||
if response.status in (401, 403, 429)
|
||||
else "Resource may not exist or server may be down."
|
||||
)
|
||||
return {
|
||||
"error": f"HTTP {response.status}: Failed to fetch URL",
|
||||
"url": url,
|
||||
"status": response.status,
|
||||
"hint": hint,
|
||||
}
|
||||
|
||||
content_type = response.headers.get("content-type", "").lower()
|
||||
if not any(t in content_type for t in ["text/html", "application/xhtml+xml"]):
|
||||
@@ -218,63 +241,165 @@ def register_tools(mcp: FastMCP) -> None:
|
||||
|
||||
# Parse rendered HTML with BeautifulSoup
|
||||
soup = BeautifulSoup(html_content, "html.parser")
|
||||
base_url = str(response.url) # Final URL after redirects
|
||||
|
||||
# Extract structured data BEFORE noise removal — JSON-LD lives
|
||||
# in <script>, which gets decomposed below. JSON-LD is often the
|
||||
# cleanest source of structured info on listing pages.
|
||||
json_ld: list[Any] = []
|
||||
for script in soup.find_all("script", type="application/ld+json"):
|
||||
raw = script.string or script.get_text() or ""
|
||||
if raw.strip():
|
||||
try:
|
||||
json_ld.append(json.loads(raw))
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
|
||||
open_graph: dict[str, str] = {}
|
||||
for meta in soup.find_all("meta"):
|
||||
prop = (meta.get("property") or "").strip()
|
||||
if prop.startswith("og:"):
|
||||
val = (meta.get("content") or "").strip()
|
||||
if val:
|
||||
open_graph[prop[3:]] = val
|
||||
|
||||
# Remove noise elements
|
||||
for tag in soup(["script", "style", "nav", "footer", "header", "aside", "noscript", "iframe"]):
|
||||
tag.decompose()
|
||||
|
||||
# Get title and description
|
||||
# Get title and description (fall back to OG description)
|
||||
title = soup.title.get_text(strip=True) if soup.title else ""
|
||||
|
||||
description = ""
|
||||
meta_desc = soup.find("meta", attrs={"name": "description"})
|
||||
if meta_desc:
|
||||
description = meta_desc.get("content", "")
|
||||
description = meta_desc.get("content", "") or ""
|
||||
if not description:
|
||||
description = open_graph.get("description", "")
|
||||
|
||||
# Target content
|
||||
# Headings outline (capped) — lets the agent drill in via selector
|
||||
headings: list[dict[str, Any]] = []
|
||||
for h in soup.find_all(["h1", "h2", "h3", "h4", "h5", "h6"]):
|
||||
h_text = h.get_text(strip=True)
|
||||
if h_text:
|
||||
headings.append({"level": int(h.name[1]), "text": h_text})
|
||||
if len(headings) >= 100:
|
||||
break
|
||||
|
||||
# Page-type heuristic: many <article> blocks → listing page
|
||||
article_count = len(soup.find_all("article"))
|
||||
if article_count >= 3:
|
||||
page_type = "listing"
|
||||
elif article_count == 1 or soup.find("main"):
|
||||
page_type = "article"
|
||||
else:
|
||||
page_type = "page"
|
||||
|
||||
# Locate target subtree
|
||||
if selector:
|
||||
content_elem = soup.select_one(selector)
|
||||
if not content_elem:
|
||||
return {"error": f"No elements found matching selector: {selector}"}
|
||||
text = content_elem.get_text(separator=" ", strip=True)
|
||||
return {
|
||||
"error": f"No elements found matching selector: {selector}",
|
||||
"url": url,
|
||||
"hint": "Try a broader selector or omit selector to use auto-detection.",
|
||||
}
|
||||
else:
|
||||
# Auto-detect main content
|
||||
main_content = (
|
||||
soup.find("article")
|
||||
or soup.find("main")
|
||||
# Prefer <main> over the first <article> — on listing pages
|
||||
# the latter would drop every article after the first.
|
||||
content_elem = (
|
||||
soup.find("main")
|
||||
or soup.find(attrs={"role": "main"})
|
||||
or soup.find("article")
|
||||
or soup.find(class_=["content", "post", "entry", "article-body"])
|
||||
or soup.find("body")
|
||||
)
|
||||
text = main_content.get_text(separator=" ", strip=True) if main_content else ""
|
||||
|
||||
# Clean up whitespace
|
||||
text = " ".join(text.split())
|
||||
# Collect link metadata BEFORE rewriting anchors (rewriting
|
||||
# replaces <a> elements with NavigableStrings, so find_all('a')
|
||||
# would miss them after).
|
||||
links: list[dict[str, str]] = []
|
||||
if content_elem and include_links:
|
||||
for a in content_elem.find_all("a", href=True)[:50]:
|
||||
link_text = a.get_text(strip=True)
|
||||
href = urljoin(base_url, a["href"])
|
||||
if link_text and href:
|
||||
links.append({"text": link_text, "href": href})
|
||||
|
||||
# Truncate if needed (reserve 3 chars for the ellipsis so the
|
||||
# final string stays within max_length)
|
||||
if len(text) > max_length:
|
||||
text = text[: max_length - 3] + "..."
|
||||
text = ""
|
||||
if content_elem:
|
||||
# Inline anchors as [text](url) so links survive text
|
||||
# extraction (otherwise the agent has to correlate `links`
|
||||
# against the text blob).
|
||||
if include_links:
|
||||
for a in content_elem.find_all("a", href=True):
|
||||
link_text = a.get_text(strip=True)
|
||||
if link_text:
|
||||
href = urljoin(base_url, a["href"])
|
||||
a.replace_with(NavigableString(f"[{link_text}]({href})"))
|
||||
|
||||
# Convert <br> and block elements into newlines so the output
|
||||
# preserves paragraph/list/heading structure rather than
|
||||
# collapsing into one giant whitespace-joined string.
|
||||
for br in content_elem.find_all("br"):
|
||||
br.replace_with(NavigableString("\n"))
|
||||
block_tags = (
|
||||
"p", "h1", "h2", "h3", "h4", "h5", "h6",
|
||||
"li", "tr", "div", "section", "article", "blockquote",
|
||||
)
|
||||
for block in content_elem.find_all(block_tags):
|
||||
block.insert_before(NavigableString("\n"))
|
||||
block.append(NavigableString("\n"))
|
||||
|
||||
raw_text = content_elem.get_text(separator=" ")
|
||||
|
||||
# Normalize: squash spaces within each line, collapse runs of
|
||||
# blank lines to a single blank, trim.
|
||||
cleaned: list[str] = []
|
||||
blank = True # swallow leading blanks
|
||||
for line in raw_text.split("\n"):
|
||||
line = re.sub(r"[ \t]+", " ", line).strip()
|
||||
if line:
|
||||
cleaned.append(line)
|
||||
blank = False
|
||||
elif not blank:
|
||||
cleaned.append("")
|
||||
blank = True
|
||||
text = "\n".join(cleaned).strip()
|
||||
|
||||
# Apply offset/truncation with continuation metadata. Reserve 3
|
||||
# chars for the ellipsis so the returned string stays within
|
||||
# max_length (back-compat with existing test expectations).
|
||||
total_length = len(text)
|
||||
offset = max(0, min(offset, total_length))
|
||||
end = offset + max_length
|
||||
truncated = end < total_length
|
||||
sliced = text[offset:end]
|
||||
if truncated and len(sliced) >= 3:
|
||||
sliced = sliced[: -3] + "..."
|
||||
|
||||
structured_data: dict[str, Any] = {}
|
||||
if json_ld:
|
||||
structured_data["json_ld"] = json_ld
|
||||
if open_graph:
|
||||
structured_data["open_graph"] = open_graph
|
||||
|
||||
result: dict[str, Any] = {
|
||||
"url": url,
|
||||
"final_url": base_url,
|
||||
"title": title,
|
||||
"description": description,
|
||||
"content": text,
|
||||
"length": len(text),
|
||||
"page_type": page_type,
|
||||
"content": sliced,
|
||||
"length": len(sliced),
|
||||
"offset": offset,
|
||||
"total_length": total_length,
|
||||
"truncated": truncated,
|
||||
"next_offset": end if truncated else None,
|
||||
"headings": headings,
|
||||
}
|
||||
|
||||
# Extract links if requested
|
||||
if structured_data:
|
||||
result["structured_data"] = structured_data
|
||||
if include_links:
|
||||
links: list[dict[str, str]] = []
|
||||
base_url = str(response.url) # Use final URL after redirects
|
||||
for a in soup.find_all("a", href=True)[:50]:
|
||||
href = a["href"]
|
||||
# Convert relative URLs to absolute URLs
|
||||
absolute_href = urljoin(base_url, href)
|
||||
link_text = a.get_text(strip=True)
|
||||
if link_text and absolute_href:
|
||||
links.append({"text": link_text, "href": absolute_href})
|
||||
result["links"] = links
|
||||
|
||||
return result
|
||||
|
||||
@@ -41,7 +41,7 @@ def register_tools(mcp: FastMCP) -> None:
|
||||
"""Register all GCU browser tools with the MCP server.
|
||||
|
||||
Tools are organized into categories:
|
||||
- Lifecycle: browser_start, browser_stop, browser_status
|
||||
- Lifecycle: browser_setup, browser_status, browser_stop (browser_open lazy-creates the context)
|
||||
- Tabs: browser_tabs, browser_open, browser_close, browser_activate_tab
|
||||
- Navigation: browser_navigate, browser_go_back, browser_go_forward, browser_reload
|
||||
- Inspection: browser_screenshot, browser_snapshot, browser_console
|
||||
|
||||
@@ -642,7 +642,7 @@ def register_inspection_tools(mcp: FastMCP) -> None:
|
||||
|
||||
ctx = _get_context(profile)
|
||||
if not ctx:
|
||||
result = {"ok": False, "error": "Browser not started. Call browser_start first."}
|
||||
result = {"ok": False, "error": "Browser not started. Call browser_open(url) first to open a tab."}
|
||||
log_tool_call("browser_snapshot", params, result=result)
|
||||
return result
|
||||
|
||||
@@ -727,7 +727,7 @@ def register_inspection_tools(mcp: FastMCP) -> None:
|
||||
|
||||
ctx = _get_context(profile)
|
||||
if not ctx:
|
||||
result = {"ok": False, "error": "Browser not started. Call browser_start first."}
|
||||
result = {"ok": False, "error": "Browser not started. Call browser_open(url) first to open a tab."}
|
||||
log_tool_call("browser_html", params, result=result)
|
||||
return result
|
||||
|
||||
|
||||
@@ -153,7 +153,7 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
|
||||
ctx = _get_context(profile)
|
||||
if not ctx:
|
||||
result = {"ok": False, "error": "Browser not started. Call browser_start first."}
|
||||
result = {"ok": False, "error": "Browser not started. Call browser_open(url) first to open a tab."}
|
||||
log_tool_call("browser_click", params, result=result)
|
||||
return result
|
||||
|
||||
@@ -247,7 +247,7 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
|
||||
ctx = _get_context(profile)
|
||||
if not ctx:
|
||||
result = {"ok": False, "error": "Browser not started. Call browser_start first."}
|
||||
result = {"ok": False, "error": "Browser not started. Call browser_open(url) first to open a tab."}
|
||||
log_tool_call("browser_click_coordinate", params, result=result)
|
||||
return _text_only(result)
|
||||
|
||||
@@ -352,7 +352,7 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
|
||||
ctx = _get_context(profile)
|
||||
if not ctx:
|
||||
result = {"ok": False, "error": "Browser not started. Call browser_start first."}
|
||||
result = {"ok": False, "error": "Browser not started. Call browser_open(url) first to open a tab."}
|
||||
log_tool_call("browser_type", params, result=result)
|
||||
return result
|
||||
|
||||
@@ -432,7 +432,7 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
|
||||
ctx = _get_context(profile)
|
||||
if not ctx:
|
||||
result = {"ok": False, "error": "Browser not started. Call browser_start first."}
|
||||
result = {"ok": False, "error": "Browser not started. Call browser_open(url) first to open a tab."}
|
||||
log_tool_call("browser_type_focused", params, result=result)
|
||||
return result
|
||||
|
||||
@@ -506,7 +506,7 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
|
||||
ctx = _get_context(profile)
|
||||
if not ctx:
|
||||
result = {"ok": False, "error": "Browser not started. Call browser_start first."}
|
||||
result = {"ok": False, "error": "Browser not started. Call browser_open(url) first to open a tab."}
|
||||
log_tool_call("browser_press", params, result=result)
|
||||
return result
|
||||
|
||||
@@ -560,7 +560,7 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
|
||||
ctx = _get_context(profile)
|
||||
if not ctx:
|
||||
result = {"ok": False, "error": "Browser not started. Call browser_start first."}
|
||||
result = {"ok": False, "error": "Browser not started. Call browser_open(url) first to open a tab."}
|
||||
log_tool_call("browser_hover", params, result=result)
|
||||
return result
|
||||
|
||||
@@ -627,7 +627,7 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
|
||||
ctx = _get_context(profile)
|
||||
if not ctx:
|
||||
result = {"ok": False, "error": "Browser not started. Call browser_start first."}
|
||||
result = {"ok": False, "error": "Browser not started. Call browser_open(url) first to open a tab."}
|
||||
log_tool_call("browser_hover_coordinate", params, result=result)
|
||||
return _text_only(result)
|
||||
|
||||
@@ -712,7 +712,7 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
|
||||
ctx = _get_context(profile)
|
||||
if not ctx:
|
||||
result = {"ok": False, "error": "Browser not started. Call browser_start first."}
|
||||
result = {"ok": False, "error": "Browser not started. Call browser_open(url) first to open a tab."}
|
||||
log_tool_call("browser_press_at", params, result=result)
|
||||
return _text_only(result)
|
||||
|
||||
@@ -782,7 +782,7 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
|
||||
ctx = _get_context(profile)
|
||||
if not ctx:
|
||||
result = {"ok": False, "error": "Browser not started. Call browser_start first."}
|
||||
result = {"ok": False, "error": "Browser not started. Call browser_open(url) first to open a tab."}
|
||||
log_tool_call("browser_select", params, result=result)
|
||||
return result
|
||||
|
||||
@@ -860,7 +860,7 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
|
||||
ctx = _get_context(profile)
|
||||
if not ctx:
|
||||
result = {"ok": False, "error": "Browser not started. Call browser_start first."}
|
||||
result = {"ok": False, "error": "Browser not started. Call browser_open(url) first to open a tab."}
|
||||
log_tool_call("browser_scroll", params, result=result)
|
||||
return result
|
||||
|
||||
@@ -924,7 +924,7 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
|
||||
ctx = _get_context(profile)
|
||||
if not ctx:
|
||||
result = {"ok": False, "error": "Browser not started. Call browser_start first."}
|
||||
result = {"ok": False, "error": "Browser not started. Call browser_open(url) first to open a tab."}
|
||||
log_tool_call("browser_drag", params, result=result)
|
||||
return result
|
||||
|
||||
|
||||
@@ -61,7 +61,7 @@ async def _ensure_context(
|
||||
Lazy-creates the browser context (tab group + seed tab) the first time
|
||||
a profile is used so URL-taking tools (``browser_open`` /
|
||||
``browser_navigate``) can be the agent's single cold-start entry
|
||||
point instead of forcing an explicit ``browser_start`` round trip.
|
||||
point — no separate "start" tool to remember.
|
||||
|
||||
Caller must verify ``bridge`` is connected first; any failure in
|
||||
``bridge.create_context`` propagates so the caller's existing
|
||||
@@ -137,7 +137,7 @@ def register_lifecycle_tools(mcp: FastMCP) -> None:
|
||||
return {
|
||||
"ok": True,
|
||||
"connected": True,
|
||||
"status": "Extension is connected and ready. Call browser_start to begin.",
|
||||
"status": "Extension is connected and ready. Call browser_open(url) to begin.",
|
||||
}
|
||||
|
||||
return {
|
||||
@@ -150,7 +150,7 @@ def register_lifecycle_tools(mcp: FastMCP) -> None:
|
||||
"step_3": "Click 'Load unpacked'",
|
||||
"step_4": f"Select this directory: {ext_path}",
|
||||
"step_5": ("Click the extension icon in the Chrome toolbar to confirm it says 'Connected'"),
|
||||
"step_6": "Return here and call browser_start",
|
||||
"step_6": "Return here and call browser_open(url) to begin",
|
||||
},
|
||||
"extensionPath": ext_path,
|
||||
"extensionPathExists": ext_exists,
|
||||
@@ -238,63 +238,6 @@ def register_lifecycle_tools(mcp: FastMCP) -> None:
|
||||
)
|
||||
return result
|
||||
|
||||
@mcp.tool()
|
||||
async def browser_start(profile: str | None = None) -> dict:
|
||||
"""
|
||||
Explicitly create a browser context (tab group) for ``profile``.
|
||||
|
||||
Most workflows do NOT need to call this directly: ``browser_open``
|
||||
and ``browser_navigate`` lazy-create a context on first use, so a
|
||||
single ``browser_open(url)`` covers the cold path. Reach for
|
||||
``browser_start`` when you want to (a) warm a profile without
|
||||
opening a URL yet, or (b) recreate a context after
|
||||
``browser_stop`` to clear stale state.
|
||||
|
||||
No separate browser process is launched — uses the user's
|
||||
existing Chrome via the Beeline extension.
|
||||
|
||||
Args:
|
||||
profile: Browser profile name (default: "default")
|
||||
|
||||
Returns:
|
||||
Dict with start status (``"started"`` on fresh creation,
|
||||
``"already_running"`` when a context for the profile exists),
|
||||
including ``groupId`` and ``activeTabId``.
|
||||
"""
|
||||
start = time.perf_counter()
|
||||
params = {"profile": profile}
|
||||
|
||||
bridge = get_bridge()
|
||||
if not bridge or not bridge.is_connected:
|
||||
result = {
|
||||
"ok": False,
|
||||
"error": ("Browser extension not connected. Call browser_setup for installation instructions."),
|
||||
}
|
||||
log_tool_call("browser_start", params, result=result)
|
||||
return result
|
||||
|
||||
try:
|
||||
profile_name, ctx, created = await _ensure_context(bridge, profile)
|
||||
result = {
|
||||
"ok": True,
|
||||
"status": "started" if created else "already_running",
|
||||
"profile": profile_name,
|
||||
"groupId": ctx.get("groupId"),
|
||||
"activeTabId": ctx.get("activeTabId"),
|
||||
}
|
||||
log_tool_call(
|
||||
"browser_start",
|
||||
params,
|
||||
result=result,
|
||||
duration_ms=(time.perf_counter() - start) * 1000,
|
||||
)
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.exception("Failed to start browser context")
|
||||
result = {"ok": False, "error": str(e)}
|
||||
log_tool_call("browser_start", params, error=e, duration_ms=(time.perf_counter() - start) * 1000)
|
||||
return result
|
||||
|
||||
@mcp.tool()
|
||||
async def browser_stop(profile: str | None = None) -> dict:
|
||||
"""
|
||||
|
||||
@@ -33,11 +33,10 @@ def register_navigation_tools(mcp: FastMCP) -> None:
|
||||
"""
|
||||
Navigate a tab to a URL.
|
||||
|
||||
Lazy-creates a browser context if none exists (no need to call
|
||||
``browser_start`` first); when no ``tab_id`` is given and the
|
||||
context was just created, navigation lands on the seed tab.
|
||||
Prefer ``browser_open`` when you specifically want a new tab —
|
||||
``browser_navigate`` is for redirecting an existing tab.
|
||||
Lazy-creates a browser context if none exists; when no ``tab_id``
|
||||
is given and the context was just created, navigation lands on
|
||||
the seed tab. Prefer ``browser_open`` when you specifically want
|
||||
a new tab — ``browser_navigate`` is for redirecting an existing tab.
|
||||
|
||||
Waits for the page to reach the ``wait_until`` condition before
|
||||
returning.
|
||||
@@ -130,7 +129,7 @@ def register_navigation_tools(mcp: FastMCP) -> None:
|
||||
|
||||
ctx = _get_context(profile)
|
||||
if not ctx:
|
||||
result = {"ok": False, "error": "Browser not started. Call browser_start first."}
|
||||
result = {"ok": False, "error": "Browser not started. Call browser_open(url) first to open a tab."}
|
||||
log_tool_call("browser_go_back", params, result=result)
|
||||
return result
|
||||
|
||||
@@ -180,7 +179,7 @@ def register_navigation_tools(mcp: FastMCP) -> None:
|
||||
|
||||
ctx = _get_context(profile)
|
||||
if not ctx:
|
||||
result = {"ok": False, "error": "Browser not started. Call browser_start first."}
|
||||
result = {"ok": False, "error": "Browser not started. Call browser_open(url) first to open a tab."}
|
||||
log_tool_call("browser_go_forward", params, result=result)
|
||||
return result
|
||||
|
||||
@@ -235,7 +234,7 @@ def register_navigation_tools(mcp: FastMCP) -> None:
|
||||
|
||||
ctx = _get_context(profile)
|
||||
if not ctx:
|
||||
result = {"ok": False, "error": "Browser not started. Call browser_start first."}
|
||||
result = {"ok": False, "error": "Browser not started. Call browser_open(url) first to open a tab."}
|
||||
log_tool_call("browser_reload", params, result=result)
|
||||
return result
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ def register_tab_tools(mcp: FastMCP) -> None:
|
||||
|
||||
ctx = _get_context(profile)
|
||||
if not ctx:
|
||||
result = {"ok": False, "error": "Browser not started. Call browser_start first."}
|
||||
result = {"ok": False, "error": "Browser not started. Call browser_open(url) first to open a tab."}
|
||||
log_tool_call("browser_tabs", params, result=result)
|
||||
return result
|
||||
|
||||
@@ -100,12 +100,12 @@ def register_tab_tools(mcp: FastMCP) -> None:
|
||||
"""
|
||||
Open a browser tab at the given URL — preferred entry point.
|
||||
|
||||
This is the agent's primary "go to a page" tool. If no browser
|
||||
context exists yet for the profile, one is created transparently
|
||||
(no need to call ``browser_start`` first). The first call after
|
||||
a fresh context reuses the seed ``about:blank`` tab; subsequent
|
||||
calls open new tabs in the agent's tab group. Waits for the
|
||||
page to load before returning.
|
||||
This is the agent's primary "go to a page" tool and the cold-start
|
||||
entry point — if no browser context exists yet for the profile,
|
||||
one is created transparently. The first call after a fresh
|
||||
context reuses the seed ``about:blank`` tab; subsequent calls
|
||||
open new tabs in the agent's tab group. Waits for the page to
|
||||
load before returning.
|
||||
|
||||
Args:
|
||||
url: URL to navigate to
|
||||
@@ -192,7 +192,7 @@ def register_tab_tools(mcp: FastMCP) -> None:
|
||||
|
||||
ctx = _get_context(profile)
|
||||
if not ctx:
|
||||
result = {"ok": False, "error": "Browser not started. Call browser_start first."}
|
||||
result = {"ok": False, "error": "Browser not started. Call browser_open(url) first to open a tab."}
|
||||
log_tool_call("browser_close", params, result=result)
|
||||
return result
|
||||
|
||||
@@ -271,7 +271,7 @@ def register_tab_tools(mcp: FastMCP) -> None:
|
||||
|
||||
ctx = _get_context(profile)
|
||||
if not ctx:
|
||||
result = {"ok": False, "error": "Browser not started. Call browser_start first."}
|
||||
result = {"ok": False, "error": "Browser not started. Call browser_open(url) first to open a tab."}
|
||||
log_tool_call("browser_activate_tab", params, result=result)
|
||||
return result
|
||||
|
||||
|
||||
@@ -107,22 +107,17 @@ class TestMultipleSubagentsTabGroups:
|
||||
|
||||
mock_bridge.create_context = AsyncMock(side_effect=mock_create_context)
|
||||
|
||||
# Register tools first
|
||||
register_lifecycle_tools(mcp)
|
||||
browser_start = mcp._tool_manager._tools["browser_start"].fn
|
||||
from gcu.browser.tools.lifecycle import _ensure_context
|
||||
|
||||
# Now patch for execution
|
||||
with patch("gcu.browser.tools.lifecycle.get_bridge", return_value=mock_bridge):
|
||||
# Simulate 3 different subagents starting browsers
|
||||
results = await asyncio.gather(
|
||||
browser_start(profile="agent_1"),
|
||||
browser_start(profile="agent_2"),
|
||||
browser_start(profile="agent_3"),
|
||||
_ensure_context(mock_bridge, "agent_1"),
|
||||
_ensure_context(mock_bridge, "agent_2"),
|
||||
_ensure_context(mock_bridge, "agent_3"),
|
||||
)
|
||||
|
||||
# Each should have created a separate context
|
||||
assert mock_bridge.create_context.call_count == 3
|
||||
assert all(r.get("ok") for r in results)
|
||||
assert all(created for (_, _, created) in results)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_concurrent_tab_operations_different_groups(self, mcp: FastMCP, mock_bridge: MagicMock):
|
||||
@@ -709,11 +704,11 @@ class TestErrorHandling:
|
||||
mock_bridge = MagicMock(spec=BeelineBridge)
|
||||
mock_bridge.is_connected = False
|
||||
|
||||
register_lifecycle_tools(mcp)
|
||||
browser_start = mcp._tool_manager._tools["browser_start"].fn
|
||||
register_tab_tools(mcp)
|
||||
browser_open = mcp._tool_manager._tools["browser_open"].fn
|
||||
|
||||
with patch("gcu.browser.tools.lifecycle.get_bridge", return_value=mock_bridge):
|
||||
result = await browser_start(profile="test")
|
||||
with patch("gcu.browser.tools.tabs.get_bridge", return_value=mock_bridge):
|
||||
result = await browser_open(url="https://example.com", profile="test")
|
||||
|
||||
assert result.get("ok") is False
|
||||
assert "not connected" in result.get("error", "").lower()
|
||||
|
||||
@@ -374,6 +374,190 @@ class TestWebScrapeToolLinkConversion:
|
||||
assert len([t for t in texts if not t.strip()]) == 0
|
||||
|
||||
|
||||
class TestWebScrapeToolAIFriendlyOutput:
|
||||
"""Tests for the AI-friendly output additions: structured data,
|
||||
headings, page_type, block-level newlines, inline links, truncation
|
||||
metadata, and offset-based pagination."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch(_STEALTH_PATH)
|
||||
@patch(_PW_PATH)
|
||||
async def test_block_level_newlines_preserved(self, mock_pw, mock_stealth, web_scrape_fn):
|
||||
"""Block elements (p, h1, li) produce newlines, not space-collapsed."""
|
||||
html = """
|
||||
<html><body>
|
||||
<h1>Title</h1>
|
||||
<p>First paragraph.</p>
|
||||
<p>Second paragraph.</p>
|
||||
<ul><li>Item one</li><li>Item two</li></ul>
|
||||
</body></html>
|
||||
"""
|
||||
mock_cm, _, _ = _make_playwright_mocks(html, final_url="https://example.com")
|
||||
mock_pw.return_value = mock_cm
|
||||
mock_stealth.return_value.apply_stealth_async = AsyncMock()
|
||||
|
||||
result = await web_scrape_fn(url="https://example.com")
|
||||
assert "error" not in result
|
||||
content = result["content"]
|
||||
assert "Title" in content
|
||||
assert "First paragraph." in content
|
||||
assert "Second paragraph." in content
|
||||
# Block separation should produce newlines, not run paragraphs together
|
||||
assert "First paragraph.\n" in content or "First paragraph.\n\nSecond" in content
|
||||
assert "Item one" in content and "Item two" in content
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch(_STEALTH_PATH)
|
||||
@patch(_PW_PATH)
|
||||
async def test_headings_outline_returned(self, mock_pw, mock_stealth, web_scrape_fn):
|
||||
"""Headings outline lists h1-h6 with level + text."""
|
||||
html = """
|
||||
<html><body>
|
||||
<h1>Top</h1>
|
||||
<h2>Section A</h2>
|
||||
<h3>Sub A1</h3>
|
||||
</body></html>
|
||||
"""
|
||||
mock_cm, _, _ = _make_playwright_mocks(html, final_url="https://example.com")
|
||||
mock_pw.return_value = mock_cm
|
||||
mock_stealth.return_value.apply_stealth_async = AsyncMock()
|
||||
|
||||
result = await web_scrape_fn(url="https://example.com")
|
||||
assert result["headings"] == [
|
||||
{"level": 1, "text": "Top"},
|
||||
{"level": 2, "text": "Section A"},
|
||||
{"level": 3, "text": "Sub A1"},
|
||||
]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch(_STEALTH_PATH)
|
||||
@patch(_PW_PATH)
|
||||
async def test_inline_links_when_include_links(self, mock_pw, mock_stealth, web_scrape_fn):
|
||||
"""include_links=True inlines anchors as [text](url) in content."""
|
||||
html = """
|
||||
<html><body>
|
||||
<p>See <a href="/docs">our docs</a> for details.</p>
|
||||
</body></html>
|
||||
"""
|
||||
mock_cm, _, _ = _make_playwright_mocks(html, final_url="https://example.com")
|
||||
mock_pw.return_value = mock_cm
|
||||
mock_stealth.return_value.apply_stealth_async = AsyncMock()
|
||||
|
||||
result = await web_scrape_fn(url="https://example.com", include_links=True)
|
||||
assert "[our docs](https://example.com/docs)" in result["content"]
|
||||
# Separate links list still present for back-compat
|
||||
assert any(link["text"] == "our docs" for link in result["links"])
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch(_STEALTH_PATH)
|
||||
@patch(_PW_PATH)
|
||||
async def test_structured_data_json_ld(self, mock_pw, mock_stealth, web_scrape_fn):
|
||||
"""JSON-LD blocks are parsed and surfaced under structured_data."""
|
||||
html = """
|
||||
<html><head>
|
||||
<script type="application/ld+json">
|
||||
{"@type": "Article", "headline": "Hello"}
|
||||
</script>
|
||||
</head><body><p>body</p></body></html>
|
||||
"""
|
||||
mock_cm, _, _ = _make_playwright_mocks(html, final_url="https://example.com")
|
||||
mock_pw.return_value = mock_cm
|
||||
mock_stealth.return_value.apply_stealth_async = AsyncMock()
|
||||
|
||||
result = await web_scrape_fn(url="https://example.com")
|
||||
assert "structured_data" in result
|
||||
assert result["structured_data"]["json_ld"] == [
|
||||
{"@type": "Article", "headline": "Hello"}
|
||||
]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch(_STEALTH_PATH)
|
||||
@patch(_PW_PATH)
|
||||
async def test_structured_data_open_graph(self, mock_pw, mock_stealth, web_scrape_fn):
|
||||
"""OpenGraph meta tags are surfaced under structured_data.open_graph."""
|
||||
html = """
|
||||
<html><head>
|
||||
<meta property="og:title" content="OG Title">
|
||||
<meta property="og:type" content="article">
|
||||
</head><body><p>body</p></body></html>
|
||||
"""
|
||||
mock_cm, _, _ = _make_playwright_mocks(html, final_url="https://example.com")
|
||||
mock_pw.return_value = mock_cm
|
||||
mock_stealth.return_value.apply_stealth_async = AsyncMock()
|
||||
|
||||
result = await web_scrape_fn(url="https://example.com")
|
||||
assert result["structured_data"]["open_graph"] == {
|
||||
"title": "OG Title",
|
||||
"type": "article",
|
||||
}
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch(_STEALTH_PATH)
|
||||
@patch(_PW_PATH)
|
||||
async def test_truncation_metadata(self, mock_pw, mock_stealth, web_scrape_fn):
|
||||
"""Truncated responses set truncated/total_length/next_offset."""
|
||||
html = f"<html><body>{'a' * 5000}</body></html>"
|
||||
mock_cm, _, _ = _make_playwright_mocks(html, final_url="https://example.com")
|
||||
mock_pw.return_value = mock_cm
|
||||
mock_stealth.return_value.apply_stealth_async = AsyncMock()
|
||||
|
||||
result = await web_scrape_fn(url="https://example.com", max_length=1000)
|
||||
assert result["truncated"] is True
|
||||
assert result["total_length"] == 5000
|
||||
assert result["next_offset"] == 1000
|
||||
assert result["offset"] == 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch(_STEALTH_PATH)
|
||||
@patch(_PW_PATH)
|
||||
async def test_offset_pagination(self, mock_pw, mock_stealth, web_scrape_fn):
|
||||
"""offset arg returns content starting from the given character."""
|
||||
body = "a" * 1000 + "b" * 1000 + "c" * 1000
|
||||
html = f"<html><body>{body}</body></html>"
|
||||
mock_cm, _, _ = _make_playwright_mocks(html, final_url="https://example.com")
|
||||
mock_pw.return_value = mock_cm
|
||||
mock_stealth.return_value.apply_stealth_async = AsyncMock()
|
||||
|
||||
result = await web_scrape_fn(url="https://example.com", max_length=1000, offset=1000)
|
||||
assert result["offset"] == 1000
|
||||
# Window should start in the b-region
|
||||
assert result["content"].startswith("b")
|
||||
assert result["truncated"] is True
|
||||
assert result["next_offset"] == 2000
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch(_STEALTH_PATH)
|
||||
@patch(_PW_PATH)
|
||||
async def test_page_type_listing(self, mock_pw, mock_stealth, web_scrape_fn):
|
||||
"""3+ <article> elements => page_type 'listing'."""
|
||||
html = """
|
||||
<html><body>
|
||||
<article><h2>Post 1</h2></article>
|
||||
<article><h2>Post 2</h2></article>
|
||||
<article><h2>Post 3</h2></article>
|
||||
</body></html>
|
||||
"""
|
||||
mock_cm, _, _ = _make_playwright_mocks(html, final_url="https://example.com")
|
||||
mock_pw.return_value = mock_cm
|
||||
mock_stealth.return_value.apply_stealth_async = AsyncMock()
|
||||
|
||||
result = await web_scrape_fn(url="https://example.com")
|
||||
assert result["page_type"] == "listing"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch(_STEALTH_PATH)
|
||||
@patch(_PW_PATH)
|
||||
async def test_page_type_article(self, mock_pw, mock_stealth, web_scrape_fn):
|
||||
"""Single <article> => page_type 'article'."""
|
||||
html = "<html><body><article><p>Hello</p></article></body></html>"
|
||||
mock_cm, _, _ = _make_playwright_mocks(html, final_url="https://example.com")
|
||||
mock_pw.return_value = mock_cm
|
||||
mock_stealth.return_value.apply_stealth_async = AsyncMock()
|
||||
|
||||
result = await web_scrape_fn(url="https://example.com")
|
||||
assert result["page_type"] == "article"
|
||||
|
||||
|
||||
class TestWebScrapeToolErrorHandling:
|
||||
"""Tests for error handling and early exit before JS wait."""
|
||||
|
||||
@@ -388,7 +572,9 @@ class TestWebScrapeToolErrorHandling:
|
||||
mock_stealth.return_value.apply_stealth_async = AsyncMock()
|
||||
|
||||
result = await web_scrape_fn(url="https://example.com/missing")
|
||||
assert result == {"error": "HTTP 404: Failed to fetch URL"}
|
||||
assert result["error"] == "HTTP 404: Failed to fetch URL"
|
||||
assert result["status"] == 404
|
||||
assert "hint" in result
|
||||
mock_page.wait_for_load_state.assert_not_called()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
Reference in New Issue
Block a user