diff --git a/.claude/settings.json b/.claude/settings.json index ceda20b8..e71fe9e9 100644 --- a/.claude/settings.json +++ b/.claude/settings.json @@ -47,7 +47,6 @@ "Bash(grep -v ':0$')", "Bash(curl -s -m 2 http://127.0.0.1:4002/sse -o /dev/null -w 'status=%{http_code} time=%{time_total}s\\\\n')", "mcp__gcu-tools__browser_status", - "mcp__gcu-tools__browser_start", "mcp__gcu-tools__browser_navigate", "mcp__gcu-tools__browser_evaluate", "mcp__gcu-tools__browser_screenshot", diff --git a/.claude/skills/browser-edge-cases/registry.md b/.claude/skills/browser-edge-cases/registry.md index ad248add..dcbcf8c2 100644 --- a/.claude/skills/browser-edge-cases/registry.md +++ b/.claude/skills/browser-edge-cases/registry.md @@ -214,7 +214,7 @@ Curated list of known browser automation edge cases with symptoms, causes, and f | **Symptom** | `browser_open()` returns `"No group with id: XXXXXXX"` even though `browser_status` shows `running: true` | | **Root Cause** | In-memory `_contexts` dict has a stale `groupId` from a Chrome tab group that was closed outside the tool (e.g. user closed the tab group) | | **Detection** | `browser_status` returns `running: true` but `browser_open` fails with "No group with id" | -| **Fix** | Call `browser_stop()` to clear stale context from `_contexts`, then `browser_start()` again | +| **Fix** | Call `browser_stop()` to clear stale context from `_contexts`, then `browser_open(url)` to lazy-create a fresh one | | **Code** | `tools/lifecycle.py:144-160` - `already_running` check uses cached dict without validating against Chrome | | **Verified** | 2026-04-03 ✓ | diff --git a/core/framework/agents/queen/nodes/__init__.py b/core/framework/agents/queen/nodes/__init__.py index 0832385f..6ef60eca 100644 --- a/core/framework/agents/queen/nodes/__init__.py +++ b/core/framework/agents/queen/nodes/__init__.py @@ -249,7 +249,7 @@ or find files. Mtime-sorted in files mode. ## Browser Automation (gcu-tools MCP) - Use `browser_*` tools — `browser_open(url)` is the cold-start entry point \ - (lazy-creates the context; no `browser_start` first). Then `browser_navigate`, \ + (lazy-creates the context; no separate "start" call). Then `browser_navigate`, \ `browser_click`, `browser_type`, `browser_snapshot`, \ `browser_screenshot`, `browser_scroll`, \ `browser_tabs`, `browser_close`, `browser_evaluate`, etc. diff --git a/core/framework/agents/queen/queen_tools_defaults.py b/core/framework/agents/queen/queen_tools_defaults.py index 79be2b6e..888f6a9b 100644 --- a/core/framework/agents/queen/queen_tools_defaults.py +++ b/core/framework/agents/queen/queen_tools_defaults.py @@ -88,7 +88,6 @@ _TOOL_CATEGORIES: dict[str, list[str]] = { "browser_basic": [ "browser_setup", "browser_status", - "browser_start", "browser_stop", "browser_tabs", "browser_open", diff --git a/core/framework/agents/queen/reference/gcu_guide.md b/core/framework/agents/queen/reference/gcu_guide.md index 44ac32be..5c8a0f1e 100644 --- a/core/framework/agents/queen/reference/gcu_guide.md +++ b/core/framework/agents/queen/reference/gcu_guide.md @@ -17,7 +17,7 @@ Use browser nodes (with `tools: {policy: "all"}`) when: ## Available Browser Tools All tools are prefixed with `browser_`: -- `browser_open`, `browser_navigate` — preferred entry points; both lazy-create a browser context, so a single `browser_open(url)` covers the cold path. Use `browser_start` only to warm a profile without a URL or to recreate a context after `browser_stop`. +- `browser_open`, `browser_navigate` — both lazy-create the browser context, so a single `browser_open(url)` covers the cold path. To recover from a stale context, call `browser_stop` then `browser_open(url)` again. - `browser_click`, `browser_click_coordinate`, `browser_type`, `browser_type_focused` — interact - `browser_press` (with optional `modifiers=["ctrl"]` etc.) — keyboard shortcuts - `browser_snapshot` — compact accessibility-tree read (structured) diff --git a/core/framework/orchestrator/gcu.py b/core/framework/orchestrator/gcu.py index 3d6d3b48..f1c594a8 100644 --- a/core/framework/orchestrator/gcu.py +++ b/core/framework/orchestrator/gcu.py @@ -158,7 +158,7 @@ cookie consent banners if they block content. - If `browser_snapshot` fails, try `browser_get_text` with a narrow selector as fallback. - If `browser_open` fails or the page seems stale, `browser_stop` → - `browser_start` → retry. + `browser_open(url)` to lazy-create a fresh context. ## `browser_evaluate` diff --git a/core/framework/skills/_preset_skills/browser-automation/SKILL.md b/core/framework/skills/_preset_skills/browser-automation/SKILL.md index 7a25e8b0..1f02ac9b 100644 --- a/core/framework/skills/_preset_skills/browser-automation/SKILL.md +++ b/core/framework/skills/_preset_skills/browser-automation/SKILL.md @@ -410,7 +410,7 @@ In all of these cases the script is SHORT (< 10 lines) and the result is CONSUME - If a tool fails, retry once with the same approach. - If it fails a second time, STOP retrying and switch approach. - If `browser_snapshot` fails, try `browser_get_text` with a specific small selector as fallback. -- If `browser_open` fails or page seems stale, `browser_stop`, then `browser_start`, then retry. +- If `browser_open` fails or page seems stale, `browser_stop`, then `browser_open(url)` again to recreate a fresh context. ## Verified workflows diff --git a/examples/templates/local_business_extractor/nodes/__init__.py b/examples/templates/local_business_extractor/nodes/__init__.py index f0b5727f..e0ed99d5 100644 --- a/examples/templates/local_business_extractor/nodes/__init__.py +++ b/examples/templates/local_business_extractor/nodes/__init__.py @@ -17,16 +17,15 @@ map_search_gcu = NodeSpec( You are a browser agent. Your job: Search Google Maps for the provided query and extract business names and website URLs. ## Workflow -1. browser_start -2. browser_open(url="https://www.google.com/maps") -3. use the url query to search for the keyword -3.1 alternatively, use browser_type or browser_click to search for the "query" in memory.' -4. browser_wait(seconds=3) -5. browser_snapshot to find the list of results. -6. For each relevant result, extract: +1. browser_open(url="https://www.google.com/maps") # lazy-creates the context +2. use the url query to search for the keyword +2.1 alternatively, use browser_type or browser_click to search for the "query" in memory.' +3. browser_wait(seconds=3) +4. browser_snapshot to find the list of results. +5. For each relevant result, extract: - Name of the business - Website URL (look for the website icon/link) -7. set_output("business_list", [{"name": "...", "website": "..."}, ...]) +6. set_output("business_list", [{"name": "...", "website": "..."}, ...]) ## Constraints - Extract at least 5-10 businesses if possible. diff --git a/examples/templates/twitter_news_agent/nodes/__init__.py b/examples/templates/twitter_news_agent/nodes/__init__.py index b21a493d..0849d4cc 100644 --- a/examples/templates/twitter_news_agent/nodes/__init__.py +++ b/examples/templates/twitter_news_agent/nodes/__init__.py @@ -24,13 +24,12 @@ Focus on: - Hardware/Silicon breakthroughs ## Instructions -1. browser_start -2. For each handle: - a. browser_open(url=f"https://x.com/{handle}") +1. For each handle: + a. browser_open(url=f"https://x.com/{handle}") # lazy-creates the context on first call b. browser_wait(seconds=5) c. browser_snapshot d. Parse relevant tech news text -3. set_output("raw_tweets", consolidated_json) +2. set_output("raw_tweets", consolidated_json) """, ) diff --git a/scripts/browser_remote.py b/scripts/browser_remote.py index b4965ac3..55edbfef 100644 --- a/scripts/browser_remote.py +++ b/scripts/browser_remote.py @@ -244,12 +244,14 @@ def main() -> None: logger.error("Failed to connect to GCU server: %s", e) sys.exit(1) - # Auto-start browser context so tools work immediately + # Warm the browser context so the first interactive call doesn't pay the + # cold-start round trip. about:blank lazy-creates the context just like + # a real URL would, without committing to a destination page. try: - result = client.call_tool("browser_start", {}) - logger.info("browser_start: %s", result) + result = client.call_tool("browser_open", {"url": "about:blank"}) + logger.info("browser_open(about:blank): %s", result) except Exception as e: - logger.warning("browser_start failed (may already be started): %s", e) + logger.warning("browser warm-up failed (may already be running): %s", e) app = create_app() diff --git a/scripts/browser_remote_ui.html b/scripts/browser_remote_ui.html index 623d63e6..552707d2 100644 --- a/scripts/browser_remote_ui.html +++ b/scripts/browser_remote_ui.html @@ -457,7 +457,7 @@ let currentView = 'grid'; // Tool categories for sidebar grouping const CATEGORIES = { - 'Lifecycle': ['browser_setup', 'browser_start', 'browser_stop', 'browser_status'], + 'Lifecycle': ['browser_setup', 'browser_stop', 'browser_status'], 'Tabs': ['browser_tabs', 'browser_open', 'browser_close', 'browser_close_all', 'browser_close_finished', 'browser_activate_tab'], 'Navigation': ['browser_navigate', 'browser_go_back', 'browser_go_forward', 'browser_reload'], 'Interactions': ['browser_click', 'browser_click_coordinate', 'browser_type', 'browser_type_focused', 'browser_press', 'browser_press_at', 'browser_hover', 'browser_hover_coordinate', 'browser_select', 'browser_scroll', 'browser_drag'], diff --git a/tools/browser-extension/README.md b/tools/browser-extension/README.md index cdb01ec7..c26d3226 100644 --- a/tools/browser-extension/README.md +++ b/tools/browser-extension/README.md @@ -61,7 +61,7 @@ All replies carry `{ id, result }` or `{ id, error }`. # 1. At GCU server startup, open ws://localhost:9229/beeline and wait for # the extension to connect (sends { type: "hello" }). # -# 2. On browser_start(profile): +# 2. On the first browser tool call for a profile (lazy-start via _ensure_context): # - Send { id, type: "context.create", agentId: profile } # - Receive { groupId, tabId } # - Store groupId in the session object (no Chrome process, no CDP port) diff --git a/tools/src/aden_tools/tools/web_scrape_tool/web_scrape_tool.py b/tools/src/aden_tools/tools/web_scrape_tool/web_scrape_tool.py index db9514e2..fb6d0aa7 100644 --- a/tools/src/aden_tools/tools/web_scrape_tool/web_scrape_tool.py +++ b/tools/src/aden_tools/tools/web_scrape_tool/web_scrape_tool.py @@ -10,12 +10,14 @@ Validates URLs against internal network ranges to prevent SSRF attacks. from __future__ import annotations import ipaddress +import json +import re import socket from typing import Any from urllib.parse import urljoin, urlparse from urllib.robotparser import RobotFileParser -from bs4 import BeautifulSoup +from bs4 import BeautifulSoup, NavigableString from fastmcp import FastMCP from playwright.async_api import ( Error as PlaywrightError, @@ -82,6 +84,7 @@ def register_tools(mcp: FastMCP) -> None: selector: str | None = None, include_links: bool = False, max_length: int = 50000, + offset: int = 0, respect_robots_txt: bool = True, ) -> dict: """ @@ -94,12 +97,18 @@ def register_tools(mcp: FastMCP) -> None: Args: url: URL of the webpage to scrape selector: CSS selector to target specific content (e.g., 'article', '.main-content') - include_links: Include extracted links in the response - max_length: Maximum length of extracted text (1000-500000) + include_links: When True, links are inlined as `[text](url)` in + content and also returned as a `links` list + max_length: Maximum length of extracted text returned in this call (1000-500000) + offset: Character offset into the extracted text. Use with + `next_offset` from a prior truncated result to paginate. respect_robots_txt: Whether to respect robots.txt rules (default True) Returns: - Dict with scraped content (url, title, description, content, length) or error dict + Dict with: url, final_url, title, description, page_type + (article|listing|page), content, length, offset, total_length, + truncated, next_offset, headings, structured_data (json_ld + open_graph), + and optionally links. On error, returns {"error": str, ...} with a hint when applicable. """ try: # Validate URL @@ -128,6 +137,10 @@ def register_tools(mcp: FastMCP) -> None: "error": f"Blocked by robots.txt: {url}", "url": url, "skipped": True, + "hint": ( + "Pass respect_robots_txt=False if you have " + "authorization to scrape this site." + ), } except Exception: pass # If robots.txt can't be fetched, proceed anyway @@ -195,7 +208,17 @@ def register_tools(mcp: FastMCP) -> None: return {"error": "Navigation failed: no response received"} if response.status != 200: - return {"error": f"HTTP {response.status}: Failed to fetch URL"} + hint = ( + "Site likely requires auth, blocks bots, or is rate-limiting." + if response.status in (401, 403, 429) + else "Resource may not exist or server may be down." + ) + return { + "error": f"HTTP {response.status}: Failed to fetch URL", + "url": url, + "status": response.status, + "hint": hint, + } content_type = response.headers.get("content-type", "").lower() if not any(t in content_type for t in ["text/html", "application/xhtml+xml"]): @@ -218,63 +241,165 @@ def register_tools(mcp: FastMCP) -> None: # Parse rendered HTML with BeautifulSoup soup = BeautifulSoup(html_content, "html.parser") + base_url = str(response.url) # Final URL after redirects + + # Extract structured data BEFORE noise removal — JSON-LD lives + # in +
body