fix: redo gcu tools for extension based browser use

This commit is contained in:
Timothy
2026-04-02 12:07:24 -07:00
parent 8f042b7ca5
commit c7e85aa9f5
17 changed files with 1897 additions and 3246 deletions
+1 -2
View File
@@ -26,8 +26,6 @@ dependencies = [
"fastmcp>=2.0.0",
"diff-match-patch>=20230430",
"python-dotenv>=1.0.0",
"playwright>=1.40.0",
"playwright-stealth>=1.0.5",
"litellm==1.81.7", # pinned: supply chain attack in >=1.82.7 (adenhq/hive#6783)
"dnspython>=2.4.0",
"resend>=2.0.0",
@@ -37,6 +35,7 @@ dependencies = [
"stripe>=14.3.0",
"arxiv>=2.1.0",
"requests>=2.31.0",
"websockets>=12.0",
"psycopg2-binary>=2.9.0",
]
+24 -29
View File
@@ -1,35 +1,26 @@
"""
GCU Browser Tool - Browser automation and interaction for GCU nodes.
GCU Browser Tool - Browser automation via Beeline Chrome extension.
Provides comprehensive browser automation capabilities:
- Browser lifecycle management (start/stop/status)
- Tab management (open/close/focus/list)
- Navigation and history
- Content extraction (screenshot, console, pdf)
- Element interaction (click, type, fill, etc.)
- Advanced operations (wait, evaluate, upload, dialog)
- Agent contexts (profile is persistent and hardcoded per agent)
Control the user's browser directly via CDP - no Playwright required.
The user's Chrome will be visible in a new tab group
labeled with the agent ID. All interactions happen via CDP commands through
extension, using the user's cookies and login state.
Uses Playwright for browser automation.
Example usage:
from fastmcp import FastMCP
from gcu.browser import register_tools
mcp = FastMCP("browser-agent")
register_tools(mcp)
Key benefits:
- Uses user's existing Chrome (LinkedIn, Gmail, etc. stay logged in)
- No separate headless browser process
- Faster - direct CDP, no context switching overhead
- Better debugging - browser is visible and inspectable
"""
from fastmcp import FastMCP
from .bridge import get_bridge, init_bridge
from .session import (
DEFAULT_NAVIGATION_TIMEOUT_MS,
DEFAULT_TIMEOUT_MS,
BrowserSession,
close_shared_browser,
get_all_sessions,
get_session,
get_shared_browser,
set_active_profile,
shutdown_all_browsers,
)
from .tools import (
@@ -41,20 +32,23 @@ from .tools import (
register_tab_tools,
)
# Constants
DEFAULT_TIMEOUT_MS = 30000
DEFAULT_NAVIGATION_TIMEOUT_MS = 60000
def register_tools(mcp: FastMCP) -> None:
"""
Register all GCU browser tools with the MCP server.
"""Register all GCU browser tools with the MCP server.
Tools are organized into categories:
- Lifecycle: browser_start, browser_stop, browser_status
- Tabs: browser_tabs, browser_open, browser_close, browser_focus
- Navigation: browser_navigate, browser_go_back, browser_go_forward, browser_reload
- Inspection: browser_screenshot, browser_snapshot, browser_console, browser_pdf
- Inspection: browser_screenshot, browser_snapshot, browser_console
- Interactions: browser_click, browser_click_coordinate, browser_type, browser_fill,
browser_press, browser_hover, browser_select, browser_scroll, browser_drag
- Advanced: browser_wait, browser_evaluate, browser_get_text, browser_get_attribute,
browser_resize, browser_upload, browser_dialog
browser_resize, browser_upload, browser_dialog
"""
register_lifecycle_tools(mcp)
register_tab_tools(mcp)
@@ -67,13 +61,14 @@ def register_tools(mcp: FastMCP) -> None:
__all__ = [
# Main registration function
"register_tools",
# Session management (for advanced use cases)
# Bridge management
"get_bridge",
"init_bridge",
# Session management
"BrowserSession",
"get_session",
"get_all_sessions",
# Shared browser for agent contexts
"get_shared_browser",
"close_shared_browser",
"set_active_profile",
"shutdown_all_browsers",
# Constants
"DEFAULT_TIMEOUT_MS",
+769 -26
View File
@@ -1,8 +1,8 @@
"""
Beeline Bridge - WebSocket server that the Chrome extension connects to.
Lets Python code create/destroy tab groups in the user's existing Chrome and
move Playwright-managed tabs into those groups for visual isolation.
Lets Python code control the user's Chrome directly via the extension's
chrome.debugger CDP access. No Playwright needed.
Usage:
bridge = init_bridge()
@@ -11,11 +11,12 @@ Usage:
# Per-subagent:
result = await bridge.create_context("my-agent") # {groupId, tabId}
await bridge.group_tab_by_target(cdp_target_id, result["groupId"])
await bridge.destroy_context(result["groupId"])
await bridge.navigate(tab_id, "https://example.com")
await bridge.click(tab_id, "button")
await bridge.type(tab_id, "input", "hello")
snapshot = await bridge.snapshot(tab_id)
The bridge is optional all callers check ``bridge.is_connected`` and no-op
when the extension is not present.
The bridge requires the Beeline Chrome extension to be installed and connected.
"""
from __future__ import annotations
@@ -23,11 +24,15 @@ from __future__ import annotations
import asyncio
import json
import logging
from typing import Any
logger = logging.getLogger(__name__)
BRIDGE_PORT = 9229
# CDP wait_until values
VALID_WAIT_UNTIL = {"commit", "domcontentloaded", "load", "networkidle"}
class BeelineBridge:
"""WebSocket server that accepts a single connection from the Chrome extension."""
@@ -111,17 +116,21 @@ class BeelineBridge:
self._pending[msg_id] = fut
try:
await self._ws.send(json.dumps({"id": msg_id, "type": type_, **params}))
return await asyncio.wait_for(fut, timeout=10.0)
except asyncio.TimeoutError:
return await asyncio.wait_for(fut, timeout=30.0)
except TimeoutError:
self._pending.pop(msg_id, None)
raise RuntimeError(f"Bridge command '{type_}' timed out")
raise RuntimeError(f"Bridge command '{type_}' timed out") from None
# ── Public API ────────────────────────────────────────────────────────────
async def _cdp(self, tab_id: int, method: str, params: dict | None = None) -> dict:
"""Send a CDP command to a tab."""
return await self._send("cdp", tabId=tab_id, method=method, params=params or {})
# ── Context (Tab Group) Management ─────────────────────────────────────────
async def create_context(self, agent_id: str) -> dict:
"""Create a labelled tab group for this agent.
Returns ``{"groupId": int, "tabId": int}``.
Returns {"groupId": int, "tabId": int}.
"""
return await self._send("context.create", agentId=agent_id)
@@ -129,16 +138,17 @@ class BeelineBridge:
"""Close all tabs in the group and remove it."""
return await self._send("context.destroy", groupId=group_id)
async def group_tab_by_target(self, cdp_target_id: str, group_id: int) -> dict:
"""Move a tab (identified by CDP target ID) into an existing group."""
return await self._send("tab.group_by_target", targetId=cdp_target_id, groupId=group_id)
# ── Tab Management ─────────────────────────────────────────────────────────
async def create_tab(self, group_id: int, url: str) -> dict:
"""Create a new tab in the specified group and navigate to URL.
async def create_tab(self, url: str = "about:blank", group_id: int | None = None) -> dict:
"""Create a new tab and optionally add it to a group.
Returns ``{"tabId": int}``.
Returns {"tabId": int}.
"""
return await self._send("tab.create", groupId=group_id, url=url)
params = {"url": url}
if group_id is not None:
params["groupId"] = group_id
return await self._send("tab.create", **params)
async def close_tab(self, tab_id: int) -> dict:
"""Close a tab by ID."""
@@ -147,24 +157,757 @@ class BeelineBridge:
async def list_tabs(self, group_id: int | None = None) -> dict:
"""List tabs, optionally filtered by group.
Returns ``{"tabs": [{"id": int, "url": str, "title": str}, ...]}``.
Returns {"tabs": [{"id": int, "url": str, "title": str, "groupId": int}, ...]}.
"""
params = {"groupId": group_id} if group_id is not None else {}
return await self._send("tab.list", **params)
async def cdp_attach(self, tab_id: int) -> dict:
"""Attach CDP session to a tab for automation.
async def activate_tab(self, tab_id: int) -> dict:
"""Activate (focus) a tab."""
return await self._send("tab.activate", tabId=tab_id)
Returns ``{"ok": bool}``.
# ── CDP Attachment ─────────────────────────────────────────────────────────
async def cdp_attach(self, tab_id: int) -> dict:
"""Attach CDP debugger to a tab.
Returns {"ok": bool}.
"""
return await self._send("cdp.attach", tabId=tab_id)
async def cdp_send(self, tab_id: int, method: str, params: dict | None = None) -> dict:
"""Send a CDP command to a tab.
async def cdp_detach(self, tab_id: int) -> dict:
"""Detach CDP debugger from a tab."""
return await self._send("cdp.detach", tabId=tab_id)
Returns the CDP result.
# ── Navigation ─────────────────────────────────────────────────────────────
async def navigate(
self,
tab_id: int,
url: str,
wait_until: str = "load",
timeout_ms: int = 30000,
) -> dict:
"""Navigate a tab to a URL.
Uses CDP Page.navigate with lifecycle wait.
"""
return await self._send("cdp", tabId=tab_id, method=method, params=params or {})
if wait_until not in VALID_WAIT_UNTIL:
wait_until = "load"
# Attach debugger if needed
await self.cdp_attach(tab_id)
# Enable Page domain
await self._cdp(tab_id, "Page.enable")
# Navigate
result = await self._cdp(tab_id, "Page.navigate", {"url": url})
loader_id = result.get("loaderId")
# Wait for lifecycle event
if wait_until != "commit" and loader_id:
# Poll for the event with timeout
deadline = asyncio.get_event_loop().time() + timeout_ms / 1000
while asyncio.get_event_loop().time() < deadline:
# Check if we've reached the desired state
eval_result = await self._cdp(
tab_id,
"Runtime.evaluate",
{"expression": "document.readyState", "returnByValue": True},
)
ready_state = eval_result.get("result", {}).get("result", {}).get("value", "")
if wait_until == "domcontentloaded" and ready_state in ("interactive", "complete"):
break
elif wait_until == "load" and ready_state == "complete":
break
elif wait_until == "networkidle":
# For networkidle, wait a bit and check again
await asyncio.sleep(0.1)
# Simple heuristic: wait until no outstanding network requests
# This is approximate - true network idle needs Network domain monitoring
if ready_state == "complete":
await asyncio.sleep(0.5)
break
else:
await asyncio.sleep(0.05)
# Get current URL and title
url_result = await self._cdp(
tab_id,
"Runtime.evaluate",
{"expression": "window.location.href", "returnByValue": True},
)
title_result = await self._cdp(
tab_id,
"Runtime.evaluate",
{"expression": "document.title", "returnByValue": True},
)
return {
"ok": True,
"tabId": tab_id,
"url": url_result.get("result", {}).get("result", {}).get("value", ""),
"title": title_result.get("result", {}).get("result", {}).get("value", ""),
}
async def go_back(self, tab_id: int) -> dict:
"""Navigate back in history."""
await self.cdp_attach(tab_id)
await self._cdp(tab_id, "Page.enable")
await self._cdp(tab_id, "Page.goBack")
# Get current URL
result = await self._cdp(
tab_id,
"Runtime.evaluate",
{"expression": "window.location.href", "returnByValue": True},
)
return {
"ok": True,
"action": "back",
"url": result.get("result", {}).get("result", {}).get("value", ""),
}
async def go_forward(self, tab_id: int) -> dict:
"""Navigate forward in history."""
await self.cdp_attach(tab_id)
await self._cdp(tab_id, "Page.enable")
await self._cdp(tab_id, "Page.goForward")
result = await self._cdp(
tab_id,
"Runtime.evaluate",
{"expression": "window.location.href", "returnByValue": True},
)
return {
"ok": True,
"action": "forward",
"url": result.get("result", {}).get("result", {}).get("value", ""),
}
async def reload(self, tab_id: int) -> dict:
"""Reload the page."""
await self.cdp_attach(tab_id)
await self._cdp(tab_id, "Page.enable")
await self._cdp(tab_id, "Page.reload")
result = await self._cdp(
tab_id,
"Runtime.evaluate",
{"expression": "window.location.href", "returnByValue": True},
)
return {
"ok": True,
"action": "reload",
"url": result.get("result", {}).get("result", {}).get("value", ""),
}
# ── Interaction ────────────────────────────────────────────────────────────
async def click(
self,
tab_id: int,
selector: str,
button: str = "left",
click_count: int = 1,
timeout_ms: int = 30000,
) -> dict:
"""Click an element by selector.
Uses DOM.getDocument + DOM.querySelector to find the element,
then DOM.getBoxModel to get coordinates, then Input.dispatchMouseEvent.
"""
await self.cdp_attach(tab_id)
await self._cdp(tab_id, "DOM.enable")
await self._cdp(tab_id, "Input.enable")
# Get document and find element
doc = await self._cdp(tab_id, "DOM.getDocument")
root_id = doc.get("root", {}).get("nodeId")
# Wait for element to appear
deadline = asyncio.get_event_loop().time() + timeout_ms / 1000
node_id = None
while asyncio.get_event_loop().time() < deadline:
result = await self._cdp(
tab_id, "DOM.querySelector", {"nodeId": root_id, "selector": selector}
)
node_id = result.get("nodeId")
if node_id:
break
await asyncio.sleep(0.1)
if not node_id:
return {"ok": False, "error": f"Element not found: {selector}"}
# Get box model for coordinates
box = await self._cdp(tab_id, "DOM.getBoxModel", {"nodeId": node_id})
content = box.get("content", [])
if len(content) < 4:
return {"ok": False, "error": f"Could not get element bounds: {selector}"}
# Calculate center of element (content quad is [x1,y1, x2,y2, x3,y3, x4,y4])
x = (content[0] + content[2] + content[4] + content[6]) / 4
y = (content[1] + content[3] + content[5] + content[7]) / 4
# Scroll into view first
await self._cdp(
tab_id,
"DOM.scrollIntoViewIfNeeded",
{"nodeId": node_id},
)
# Dispatch mouse events
button_map = {"left": "left", "right": "right", "middle": "middle"}
cdp_button = button_map.get(button, "left")
await self._cdp(
tab_id,
"Input.dispatchMouseEvent",
{
"type": "mousePressed",
"x": x,
"y": y,
"button": cdp_button,
"clickCount": click_count,
},
)
await self._cdp(
tab_id,
"Input.dispatchMouseEvent",
{
"type": "mouseReleased",
"x": x,
"y": y,
"button": cdp_button,
"clickCount": click_count,
},
)
return {"ok": True, "action": "click", "selector": selector, "x": x, "y": y}
async def click_coordinate(self, tab_id: int, x: float, y: float, button: str = "left") -> dict:
"""Click at specific coordinates."""
await self.cdp_attach(tab_id)
await self._cdp(tab_id, "Input.enable")
button_map = {"left": "left", "right": "right", "middle": "middle"}
cdp_button = button_map.get(button, "left")
await self._cdp(
tab_id,
"Input.dispatchMouseEvent",
{"type": "mousePressed", "x": x, "y": y, "button": cdp_button, "clickCount": 1},
)
await self._cdp(
tab_id,
"Input.dispatchMouseEvent",
{"type": "mouseReleased", "x": x, "y": y, "button": cdp_button, "clickCount": 1},
)
return {"ok": True, "action": "click_coordinate", "x": x, "y": y}
async def type_text(
self,
tab_id: int,
selector: str,
text: str,
clear_first: bool = True,
delay_ms: int = 0,
timeout_ms: int = 30000,
) -> dict:
"""Type text into an element."""
await self.cdp_attach(tab_id)
await self._cdp(tab_id, "DOM.enable")
await self._cdp(tab_id, "Input.enable")
# Get document and find element
doc = await self._cdp(tab_id, "DOM.getDocument")
root_id = doc.get("root", {}).get("nodeId")
deadline = asyncio.get_event_loop().time() + timeout_ms / 1000
node_id = None
while asyncio.get_event_loop().time() < deadline:
result = await self._cdp(
tab_id, "DOM.querySelector", {"nodeId": root_id, "selector": selector}
)
node_id = result.get("nodeId")
if node_id:
break
await asyncio.sleep(0.1)
if not node_id:
return {"ok": False, "error": f"Element not found: {selector}"}
# Focus the element
await self._cdp(tab_id, "DOM.focus", {"nodeId": node_id})
# Clear if requested
if clear_first:
await self._cdp(
tab_id,
"Runtime.evaluate",
{
"expression": f"document.querySelector({json.dumps(selector)}).value = ''",
"returnByValue": True,
},
)
# Type each character
for char in text:
# Dispatch key down
await self._cdp(
tab_id,
"Input.dispatchKeyEvent",
{"type": "keyDown", "text": char},
)
# Dispatch key up
await self._cdp(
tab_id,
"Input.dispatchKeyEvent",
{"type": "keyUp", "text": char},
)
if delay_ms > 0:
await asyncio.sleep(delay_ms / 1000)
return {"ok": True, "action": "type", "selector": selector, "length": len(text)}
async def press_key(self, tab_id: int, key: str, selector: str | None = None) -> dict:
"""Press a keyboard key.
Args:
key: Key name like 'Enter', 'Tab', 'Escape', 'ArrowDown', etc.
selector: Optional selector to focus first
"""
await self.cdp_attach(tab_id)
await self._cdp(tab_id, "Input.enable")
if selector:
doc = await self._cdp(tab_id, "DOM.getDocument")
root_id = doc.get("root", {}).get("nodeId")
result = await self._cdp(
tab_id, "DOM.querySelector", {"nodeId": root_id, "selector": selector}
)
node_id = result.get("nodeId")
if node_id:
await self._cdp(tab_id, "DOM.focus", {"nodeId": node_id})
# Key definitions for special keys
key_map = {
"Enter": ("\r", "Enter"),
"Tab": ("\t", "Tab"),
"Escape": ("\x1b", "Escape"),
"Backspace": ("\b", "Backspace"),
"Delete": ("\x7f", "Delete"),
"ArrowUp": ("", "ArrowUp"),
"ArrowDown": ("", "ArrowDown"),
"ArrowLeft": ("", "ArrowLeft"),
"ArrowRight": ("", "ArrowRight"),
"Home": ("", "Home"),
"End": ("", "End"),
"PageUp": ("", "PageUp"),
"PageDown": ("", "PageDown"),
}
text, key_name = key_map.get(key, (key, key))
await self._cdp(
tab_id,
"Input.dispatchKeyEvent",
{"type": "keyDown", "key": key_name, "text": text if text else None},
)
await self._cdp(
tab_id,
"Input.dispatchKeyEvent",
{"type": "keyUp", "key": key_name, "text": text if text else None},
)
return {"ok": True, "action": "press", "key": key}
async def hover(self, tab_id: int, selector: str, timeout_ms: int = 30000) -> dict:
"""Hover over an element."""
await self.cdp_attach(tab_id)
await self._cdp(tab_id, "DOM.enable")
await self._cdp(tab_id, "Input.enable")
doc = await self._cdp(tab_id, "DOM.getDocument")
root_id = doc.get("root", {}).get("nodeId")
deadline = asyncio.get_event_loop().time() + timeout_ms / 1000
node_id = None
while asyncio.get_event_loop().time() < deadline:
result = await self._cdp(
tab_id, "DOM.querySelector", {"nodeId": root_id, "selector": selector}
)
node_id = result.get("nodeId")
if node_id:
break
await asyncio.sleep(0.1)
if not node_id:
return {"ok": False, "error": f"Element not found: {selector}"}
box = await self._cdp(tab_id, "DOM.getBoxModel", {"nodeId": node_id})
content = box.get("content", [])
x = (content[0] + content[2] + content[4] + content[6]) / 4
y = (content[1] + content[3] + content[5] + content[7]) / 4
await self._cdp(
tab_id,
"Input.dispatchMouseEvent",
{"type": "mouseMoved", "x": x, "y": y},
)
return {"ok": True, "action": "hover", "selector": selector}
async def scroll(
self, tab_id: int, direction: str = "down", amount: int = 500
) -> dict:
"""Scroll the page."""
await self.cdp_attach(tab_id)
delta_x = 0
delta_y = 0
if direction == "down":
delta_y = amount
elif direction == "up":
delta_y = -amount
elif direction == "right":
delta_x = amount
elif direction == "left":
delta_x = -amount
await self._cdp(
tab_id,
"Runtime.evaluate",
{
"expression": f"window.scrollBy({delta_x}, {delta_y})",
"returnByValue": True,
},
)
return {"ok": True, "action": "scroll", "direction": direction, "amount": amount}
async def select_option(self, tab_id: int, selector: str, values: list[str]) -> dict:
"""Select options in a select element."""
await self.cdp_attach(tab_id)
values_json = json.dumps(values)
await self._cdp(
tab_id,
"Runtime.evaluate",
{
"expression": f"""
const sel = document.querySelector({json.dumps(selector)});
if (!sel) throw new Error('Element not found');
Array.from(sel.options).forEach(opt => {{
opt.selected = {values_json}.includes(opt.value);
}});
sel.dispatchEvent(new Event('change', {{bubbles: true}}));
Array.from(sel.selectedOptions).map(o => o.value);
""",
"returnByValue": True,
},
)
return {"ok": True, "action": "select", "selector": selector, "selected": values}
# ── Inspection ─────────────────────────────────────────────────────────────
async def evaluate(self, tab_id: int, script: str) -> dict:
"""Execute JavaScript in the page."""
await self.cdp_attach(tab_id)
result = await self._cdp(
tab_id,
"Runtime.evaluate",
{"expression": script, "returnByValue": True, "awaitPromise": True},
)
if "exceptionDetails" in result:
return {
"ok": False,
"error": result["exceptionDetails"].get("text", "Script error"),
}
return {
"ok": True,
"action": "evaluate",
"result": result.get("result", {}).get("value"),
}
async def snapshot(self, tab_id: int) -> dict:
"""Get an accessibility snapshot of the page.
Uses CDP Accessibility.getFullAXTree and formats it as a readable tree.
"""
await self.cdp_attach(tab_id)
await self._cdp(tab_id, "Accessibility.enable")
result = await self._cdp(tab_id, "Accessibility.getFullAXTree")
nodes = result.get("nodes", [])
# Format the tree
snapshot = self._format_ax_tree(nodes)
# Get URL
url_result = await self._cdp(
tab_id,
"Runtime.evaluate",
{"expression": "window.location.href", "returnByValue": True},
)
url = url_result.get("result", {}).get("result", {}).get("value", "")
return {
"ok": True,
"tabId": tab_id,
"url": url,
"snapshot": snapshot,
}
def _format_ax_tree(self, nodes: list[dict]) -> str:
"""Format a CDP Accessibility.getFullAXTree result."""
if not nodes:
return "(empty tree)"
by_id = {n["nodeId"]: n for n in nodes}
children_map: dict[str, list[str]] = {}
for n in nodes:
for child_id in n.get("childIds", []):
children_map.setdefault(n["nodeId"], []).append(child_id)
lines: list[str] = []
ref_counter = [0] # Use list to allow mutation in nested function
ref_map: dict[str, str] = {}
def _walk(node_id: str, depth: int) -> None:
node = by_id.get(node_id)
if not node:
return
if node.get("ignored", False):
for cid in children_map.get(node_id, []):
_walk(cid, depth)
return
role_info = node.get("role", {})
if isinstance(role_info, dict):
role = role_info.get("value", "unknown")
else:
role = str(role_info)
if role in ("none", "Ignored"):
for cid in children_map.get(node_id, []):
_walk(cid, depth)
return
name_info = node.get("name", {})
name = name_info.get("value", "") if isinstance(name_info, dict) else str(name_info)
# Build property annotations
props: list[str] = []
for prop in node.get("properties", []):
pname = prop.get("name", "")
pval = prop.get("value", {})
val = pval.get("value") if isinstance(pval, dict) else pval
if pname in ("focused", "disabled", "checked", "expanded", "selected", "required"):
if val is True:
props.append(pname)
elif pname == "level" and val:
props.append(f"level={val}")
indent = " " * depth
label = f"- {role}"
# Add ref for interactive elements
interactive_roles = {
"button", "link", "textbox", "checkbox",
"radio", "combobox", "menuitem", "tab", "searchbox",
}
if role in interactive_roles or name:
ref_counter[0] += 1
ref_id = f"e{ref_counter[0]}"
ref_map[ref_id] = f"[{role}]{name}"
label += f" [ref={ref_id}]"
if name:
label += f' "{name}"'
if props:
label += f" [{', '.join(props)}]"
lines.append(f"{indent}{label}")
for cid in children_map.get(node_id, []):
_walk(cid, depth + 1)
_walk(nodes[0]["nodeId"], 0)
return "\n".join(lines) if lines else "(empty tree)"
async def get_text(self, tab_id: int, selector: str, timeout_ms: int = 30000) -> dict:
"""Get text content of an element."""
await self.cdp_attach(tab_id)
script = f"""
(function() {{
const el = document.querySelector({json.dumps(selector)});
return el ? el.textContent : null;
}})()
"""
deadline = asyncio.get_event_loop().time() + timeout_ms / 1000
while asyncio.get_event_loop().time() < deadline:
result = await self._cdp(
tab_id,
"Runtime.evaluate",
{"expression": script, "returnByValue": True},
)
text = result.get("result", {}).get("result", {}).get("value")
if text is not None:
return {"ok": True, "selector": selector, "text": text}
await asyncio.sleep(0.1)
return {"ok": False, "error": f"Element not found: {selector}"}
async def get_attribute(
self, tab_id: int, selector: str, attribute: str, timeout_ms: int = 30000
) -> dict:
"""Get an attribute value of an element."""
await self.cdp_attach(tab_id)
script = f"""
(function() {{
const el = document.querySelector({json.dumps(selector)});
return el ? el.getAttribute({json.dumps(attribute)}) : null;
}})()
"""
deadline = asyncio.get_event_loop().time() + timeout_ms / 1000
while asyncio.get_event_loop().time() < deadline:
result = await self._cdp(
tab_id,
"Runtime.evaluate",
{"expression": script, "returnByValue": True},
)
value = result.get("result", {}).get("result", {}).get("value")
if value is not None:
return {"ok": True, "selector": selector, "attribute": attribute, "value": value}
await asyncio.sleep(0.1)
return {"ok": False, "error": f"Element not found: {selector}"}
async def screenshot(
self, tab_id: int, full_page: bool = False, selector: str | None = None
) -> dict:
"""Take a screenshot of the page or element.
Returns {"ok": True, "data": base64_string, "mimeType": "image/png"}.
"""
await self.cdp_attach(tab_id)
await self._cdp(tab_id, "Page.enable")
params: dict[str, Any] = {"format": "png"}
if full_page:
# Get layout metrics for full page
metrics = await self._cdp(tab_id, "Page.getLayoutMetrics")
content_size = metrics.get("contentSize", {})
params["clip"] = {
"x": 0,
"y": 0,
"width": content_size.get("width", 1280),
"height": content_size.get("height", 720),
"scale": 1,
}
result = await self._cdp(tab_id, "Page.captureScreenshot", params)
data = result.get("data")
if not data:
return {"ok": False, "error": "Screenshot failed"}
# Get URL for metadata
url_result = await self._cdp(
tab_id,
"Runtime.evaluate",
{"expression": "window.location.href", "returnByValue": True},
)
url = url_result.get("result", {}).get("result", {}).get("value", "")
return {
"ok": True,
"tabId": tab_id,
"url": url,
"data": data,
"mimeType": "image/png",
}
async def wait_for_selector(
self, tab_id: int, selector: str, timeout_ms: int = 30000
) -> dict:
"""Wait for an element to appear."""
await self.cdp_attach(tab_id)
script = f"""
(function() {{
return document.querySelector({json.dumps(selector)}) !== null;
}})()
"""
deadline = asyncio.get_event_loop().time() + timeout_ms / 1000
while asyncio.get_event_loop().time() < deadline:
result = await self._cdp(
tab_id,
"Runtime.evaluate",
{"expression": script, "returnByValue": True},
)
found = result.get("result", {}).get("result", {}).get("value", False)
if found:
return {"ok": True, "selector": selector}
await asyncio.sleep(0.1)
return {"ok": False, "error": f"Element not found within timeout: {selector}"}
async def wait_for_text(self, tab_id: int, text: str, timeout_ms: int = 30000) -> dict:
"""Wait for text to appear on the page."""
await self.cdp_attach(tab_id)
script = f"""
(function() {{
return document.body.innerText.includes({json.dumps(text)});
}})()
"""
deadline = asyncio.get_event_loop().time() + timeout_ms / 1000
while asyncio.get_event_loop().time() < deadline:
result = await self._cdp(
tab_id,
"Runtime.evaluate",
{"expression": script, "returnByValue": True},
)
found = result.get("result", {}).get("result", {}).get("value", False)
if found:
return {"ok": True, "text": text}
await asyncio.sleep(0.1)
return {"ok": False, "error": f"Text not found within timeout: {text}"}
async def resize(self, tab_id: int, width: int, height: int) -> dict:
"""Resize the browser viewport."""
await self.cdp_attach(tab_id)
# Use Runtime.evaluate to set up resize, then Emulation.setDeviceMetricsOverride
await self._cdp(
tab_id,
"Emulation.setDeviceMetricsOverride",
{
"width": width,
"height": height,
"deviceScaleFactor": 0,
"mobile": False,
},
)
return {"ok": True, "action": "resize", "width": width, "height": height}
# ---------------------------------------------------------------------------
-106
View File
@@ -1,106 +0,0 @@
"""
Detect system-installed Chrome or Edge browsers.
Searches platform-specific well-known paths to find a Chromium-based browser
executable. Used by chrome_launcher to avoid bundling Playwright's Chromium.
"""
from __future__ import annotations
import os
import shutil
import sys
from pathlib import Path
# Search order per platform: Chrome stable first, then Edge, then Chromium.
_MACOS_CANDIDATES = [
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
"/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary",
"/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge",
"/Applications/Chromium.app/Contents/MacOS/Chromium",
]
_LINUX_WHICH_NAMES = [
"google-chrome",
"google-chrome-stable",
"chromium-browser",
"chromium",
"microsoft-edge",
"microsoft-edge-stable",
]
_WINDOWS_CANDIDATES = [
r"Google\Chrome\Application\chrome.exe",
r"Microsoft\Edge\Application\msedge.exe",
]
def find_chrome() -> str | None:
"""Return the absolute path to a system Chrome/Edge executable, or None.
Check order:
1. ``CHROME_PATH`` environment variable (explicit override)
2. Platform-specific well-known install locations
"""
# 1. Explicit override
env_path = os.environ.get("CHROME_PATH")
if env_path and _is_executable(env_path):
return env_path
# 2. Platform search
if sys.platform == "darwin":
return _find_macos()
elif sys.platform == "win32":
return _find_windows()
else:
return _find_linux()
def require_chrome() -> str:
"""Return a Chrome/Edge path or raise with an actionable error message."""
path = find_chrome()
if path is None:
raise RuntimeError(
"No Chrome or Edge browser found. GCU browser tools require a "
"Chromium-based browser.\n\n"
"Options:\n"
" 1. Install Google Chrome: https://www.google.com/chrome/\n"
" 2. Set the CHROME_PATH environment variable to your browser executable\n"
)
return path
def _is_executable(path: str) -> bool:
"""Check that path exists and is executable."""
p = Path(path)
return p.exists() and os.access(p, os.X_OK)
def _find_macos() -> str | None:
for candidate in _MACOS_CANDIDATES:
if _is_executable(candidate):
return candidate
return None
def _find_linux() -> str | None:
for name in _LINUX_WHICH_NAMES:
result = shutil.which(name)
if result:
return result
return None
def _find_windows() -> str | None:
program_dirs = []
for env_var in ("PROGRAMFILES", "PROGRAMFILES(X86)", "LOCALAPPDATA"):
val = os.environ.get(env_var)
if val:
program_dirs.append(val)
for base_dir in program_dirs:
for candidate in _WINDOWS_CANDIDATES:
full_path = os.path.join(base_dir, candidate)
if os.path.isfile(full_path):
return full_path
return None
-397
View File
@@ -1,397 +0,0 @@
"""
Launch and manage a system Chrome/Edge process for CDP connections.
Starts the browser as a subprocess with ``--remote-debugging-port`` and waits
until the CDP endpoint is ready. Used by ``session.py`` to replace
Playwright's ``chromium.launch()`` with a system-installed browser.
On macOS, uses ``open -n -a`` to force a new Chrome instance even when the
user's personal Chrome is already running.
"""
from __future__ import annotations
import asyncio
import logging
import os
import signal
import subprocess
import sys
import tempfile
import time
from dataclasses import dataclass, field
from pathlib import Path
from .chrome_finder import require_chrome
logger = logging.getLogger(__name__)
# Chrome flags for all browser launches
_CHROME_ARGS = [
"--disable-dev-shm-usage",
"--no-first-run",
"--no-default-browser-check",
"--disable-session-crashed-bubble",
"--noerrdialogs",
"--no-startup-window",
]
# Sandbox flags are only needed on Linux (Docker, CI). On macOS they
# trigger a yellow warning bar and serve no purpose.
if sys.platform == "linux":
_CHROME_ARGS = ["--no-sandbox", "--disable-setuid-sandbox", *_CHROME_ARGS]
# CDP readiness polling
_CDP_POLL_INTERVAL_S = 0.1
_CDP_MAX_WAIT_S = 10.0
def _clear_session_restore(user_data_dir: Path) -> None:
"""Remove Chrome session restore files to prevent tab/window restoration.
Cookies and localStorage are stored separately and are unaffected.
"""
default_dir = user_data_dir / "Default"
for name in ("Current Session", "Current Tabs", "Last Session", "Last Tabs"):
target = default_dir / name
if target.exists():
try:
target.unlink()
logger.debug("Removed session restore file: %s", target)
except OSError:
pass
def _resolve_app_bundle(executable_path: str) -> str | None:
"""Extract .app bundle path from a macOS executable path.
e.g. '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome'
-> '/Applications/Google Chrome.app'
"""
parts = Path(executable_path).parts
for i, part in enumerate(parts):
if part.endswith(".app"):
return str(Path(*parts[: i + 1]))
return None
def _find_pid_on_port(port: int) -> int | None:
"""Find the PID listening on a TCP port via lsof."""
try:
output = subprocess.check_output(
["lsof", "-ti", f"tcp:{port}", "-sTCP:LISTEN"],
text=True,
timeout=5,
).strip()
pids = [int(p) for p in output.split("\n") if p.strip()]
return pids[0] if pids else None
except Exception:
return None
def _kill_chrome_by_data_dir(user_data_dir: Path) -> None:
"""Find and kill a Chrome process by its --user-data-dir argument.
Fallback for when Chrome started but never bound the CDP port,
so _find_pid_on_port cannot locate it.
"""
try:
# pgrep -f matches against the full command line
output = subprocess.check_output(
["pgrep", "-f", f"--user-data-dir={user_data_dir}"],
text=True,
timeout=5,
).strip()
for pid_str in output.split("\n"):
pid_str = pid_str.strip()
if pid_str:
try:
pid = int(pid_str)
os.kill(pid, signal.SIGKILL)
logger.info(f"Killed orphaned Chrome pid={pid} (matched user-data-dir)")
except (ValueError, OSError):
pass
except (subprocess.CalledProcessError, subprocess.TimeoutExpired):
pass # No matching process found
@dataclass
class ChromeProcess:
"""Handle to a running Chrome process launched for CDP access."""
process: subprocess.Popen[bytes] | None # None when launched via open -n (macOS)
cdp_port: int
cdp_url: str
user_data_dir: Path
_temp_dir: tempfile.TemporaryDirectory[str] | None = field(default=None, repr=False)
_pid: int | None = field(default=None, repr=False)
def is_alive(self) -> bool:
if self.process is not None:
return self.process.poll() is None
if self._pid is not None:
try:
os.kill(self._pid, 0)
return True
except OSError:
return False
return False
async def kill(self) -> None:
"""Terminate the Chrome process and clean up resources."""
if self.process is not None and self.process.poll() is None:
self.process.terminate()
try:
await asyncio.wait_for(
asyncio.get_event_loop().run_in_executor(None, self.process.wait),
timeout=5.0,
)
except TimeoutError:
self.process.kill()
self.process.wait()
logger.info(f"Chrome process (port {self.cdp_port}) terminated")
elif self._pid is not None:
try:
os.kill(self._pid, signal.SIGTERM)
# Wait briefly for graceful shutdown
loop = asyncio.get_event_loop()
for _ in range(50): # 5 seconds max
alive = await loop.run_in_executor(None, self.is_alive)
if not alive:
break
await asyncio.sleep(0.1)
else:
os.kill(self._pid, signal.SIGKILL)
logger.info(f"Chrome process pid={self._pid} (port {self.cdp_port}) terminated")
except OSError:
pass
self._pid = None
# Clean up temp directory for ephemeral sessions
if self._temp_dir is not None:
try:
self._temp_dir.cleanup()
except Exception:
pass
self._temp_dir = None
async def launch_chrome(
cdp_port: int,
user_data_dir: Path | None = None,
headless: bool = True,
extra_args: list[str] | None = None,
) -> ChromeProcess:
"""Launch system Chrome and wait for CDP to become ready.
Args:
cdp_port: Port for ``--remote-debugging-port``.
user_data_dir: Profile directory. If *None*, a temporary directory is
created and cleaned up when the process is killed (ephemeral mode).
headless: Use Chrome's headless mode (``--headless=new``).
extra_args: Additional Chrome CLI flags.
Returns:
A :class:`ChromeProcess` handle.
Raises:
RuntimeError: If Chrome is not found, fails to start, or CDP does not
become ready within the timeout.
"""
chrome_path = require_chrome()
temp_dir: tempfile.TemporaryDirectory[str] | None = None
if user_data_dir is None:
temp_dir = tempfile.TemporaryDirectory(prefix="hive-browser-")
user_data_dir = Path(temp_dir.name)
_clear_session_restore(user_data_dir)
from .session import _get_viewport
vp = _get_viewport()
chrome_flags = [
f"--remote-debugging-port={cdp_port}",
f"--user-data-dir={user_data_dir}",
f"--window-size={vp['width']},{vp['height']}",
"--lang=en-US",
*_CHROME_ARGS,
*(extra_args or []),
]
if headless:
chrome_flags.append("--headless=new")
# Don't pass a URL arg — let Chrome open its default page.
# session.py will close all initial pages and create a clean one.
# Passing "about:blank" caused macOS to show a visible blank tab
# that the CDP connection couldn't control, blocking the session.
cdp_url = f"http://127.0.0.1:{cdp_port}"
# On macOS, use `open -n -a` to force a new Chrome instance even when the
# user's personal Chrome is already running. Chrome's Mach-based IPC would
# otherwise delegate to the existing instance and exit with code 0.
if sys.platform == "darwin":
app_bundle = _resolve_app_bundle(chrome_path)
if app_bundle:
return await _launch_chrome_macos(
app_bundle, chrome_flags, cdp_port, cdp_url, user_data_dir, temp_dir
)
# Linux, Windows, or macOS fallback (no .app bundle found)
return await _launch_chrome_subprocess(
chrome_path, chrome_flags, cdp_port, cdp_url, user_data_dir, temp_dir
)
async def _launch_chrome_macos(
app_bundle: str,
chrome_flags: list[str],
cdp_port: int,
cdp_url: str,
user_data_dir: Path,
temp_dir: tempfile.TemporaryDirectory[str] | None,
) -> ChromeProcess:
"""Launch Chrome on macOS using ``open -n -a`` to bypass single-instance IPC."""
logger.info(
f"Launching Chrome (macOS open -n): app={app_bundle}, port={cdp_port}, "
f"user_data_dir={user_data_dir}"
)
# `open -n` forces a new instance; --args passes flags to Chrome
subprocess.Popen(
["open", "-n", "-a", app_bundle, "--args", *chrome_flags],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
# `open` returns immediately — Chrome is now a child of launchd, not us.
try:
await _wait_for_cdp(cdp_port)
except Exception:
# Chrome may have started but not yet bound the CDP port.
# Poll briefly to find and kill the orphaned process so it
# doesn't hold the profile lock and block future launches.
killed = False
for _ in range(30): # up to 3 seconds
pid = _find_pid_on_port(cdp_port)
if pid:
try:
os.kill(pid, signal.SIGKILL)
killed = True
logger.info(f"Killed orphaned Chrome pid={pid} on port {cdp_port}")
except OSError:
pass
break
time.sleep(0.1)
if not killed:
# Last resort: find Chrome by user-data-dir in process list
_kill_chrome_by_data_dir(user_data_dir)
if temp_dir is not None:
temp_dir.cleanup()
raise
# Discover the Chrome PID listening on the CDP port
pid = _find_pid_on_port(cdp_port)
if pid is None:
logger.warning(f"CDP ready on port {cdp_port} but could not discover Chrome PID")
return ChromeProcess(
process=None,
cdp_port=cdp_port,
cdp_url=cdp_url,
user_data_dir=user_data_dir,
_temp_dir=temp_dir,
_pid=pid,
)
async def _launch_chrome_subprocess(
chrome_path: str,
chrome_flags: list[str],
cdp_port: int,
cdp_url: str,
user_data_dir: Path,
temp_dir: tempfile.TemporaryDirectory[str] | None,
) -> ChromeProcess:
"""Launch Chrome as a direct subprocess (Linux, Windows, macOS fallback)."""
args = [chrome_path, *chrome_flags]
logger.info(f"Launching Chrome: port={cdp_port}, user_data_dir={user_data_dir}")
process = subprocess.Popen(
args,
stdout=subprocess.DEVNULL,
stderr=subprocess.PIPE,
)
try:
await _wait_for_cdp(cdp_port, process=process)
except Exception:
process.kill()
process.wait()
if temp_dir is not None:
temp_dir.cleanup()
raise
return ChromeProcess(
process=process,
cdp_port=cdp_port,
cdp_url=cdp_url,
user_data_dir=user_data_dir,
_temp_dir=temp_dir,
)
async def _wait_for_cdp(
port: int,
process: subprocess.Popen[bytes] | None = None,
timeout: float = _CDP_MAX_WAIT_S,
) -> None:
"""Poll ``/json/version`` until Chrome's CDP endpoint is ready.
When *process* is provided, also checks that the subprocess hasn't exited.
When *process* is None (macOS ``open -n`` path), only polls the endpoint.
"""
import urllib.error
import urllib.request
url = f"http://127.0.0.1:{port}/json/version"
deadline = time.monotonic() + timeout
def _probe() -> bool:
try:
req = urllib.request.Request(url, method="GET")
with urllib.request.urlopen(req, timeout=1) as resp:
return resp.status == 200
except (urllib.error.URLError, OSError, ConnectionError):
return False
while time.monotonic() < deadline:
# Check the subprocess hasn't crashed (only when we have a handle)
if process is not None and process.poll() is not None:
stderr = ""
if process.stderr:
stderr = process.stderr.read().decode(errors="replace")
raise RuntimeError(
f"Chrome exited with code {process.returncode} before CDP "
f"was ready.\nstderr: {stderr[:500]}"
)
try:
loop = asyncio.get_running_loop()
ready = await asyncio.wait_for(
loop.run_in_executor(None, _probe),
timeout=2.0,
)
if ready:
elapsed = timeout - (deadline - time.monotonic())
logger.info(f"CDP ready on port {port} after {elapsed:.1f}s")
return
except TimeoutError:
pass
await asyncio.sleep(_CDP_POLL_INTERVAL_S)
raise RuntimeError(f"Chrome CDP endpoint did not become ready within {timeout}s on port {port}")
-203
View File
@@ -1,203 +0,0 @@
"""
Visual highlight animations for browser interactions.
Injects CSS/JS overlays to show where actions target before they execute.
Purely cosmetic pointer-events: none, self-removing, fire-and-forget.
Configure via environment variables:
HIVE_BROWSER_HIGHLIGHTS=0 Disable entirely
HIVE_HIGHLIGHT_COLOR Override color (default: #FAC43B)
HIVE_HIGHLIGHT_DURATION_MS Override visible duration (default: 1500)
HIVE_HIGHLIGHT_WAIT_S Seconds to block after injecting highlight
(default: 0 fire-and-forget; set 0.35 for
the old blocking behavior)
"""
from __future__ import annotations
import asyncio
import logging
import os
from playwright.async_api import Page
logger = logging.getLogger(__name__)
_ENABLED = os.environ.get("HIVE_BROWSER_HIGHLIGHTS", "1") != "0"
_COLOR = os.environ.get("HIVE_HIGHLIGHT_COLOR", "#FAC43B")
_DURATION_MS = int(os.environ.get("HIVE_HIGHLIGHT_DURATION_MS", "1500"))
_ANIMATION_WAIT_S = float(os.environ.get("HIVE_HIGHLIGHT_WAIT_S", "0"))
# ---------------------------------------------------------------------------
# JS templates
# ---------------------------------------------------------------------------
_ELEMENT_HIGHLIGHT_JS = """
([box, color, durationMs]) => {
const sx = window.scrollX, sy = window.scrollY;
const x = box.x + sx, y = box.y + sy;
const w = box.width, h = box.height;
const container = document.createElement('div');
Object.assign(container.style, {
position: 'absolute',
left: x + 'px',
top: y + 'px',
width: w + 'px',
height: h + 'px',
pointerEvents: 'none',
zIndex: '2147483647',
transition: 'opacity 0.3s ease',
});
document.body.appendChild(container);
const arm = Math.max(8, Math.min(20, 0.35 * Math.min(w, h)));
const pad = 3;
const startOffset = 10;
const corners = [
{ top: -pad, left: -pad, borderTop: '3px solid ' + color, borderLeft: '3px solid ' + color,
tx: -startOffset, ty: -startOffset },
{ top: -pad, right: -pad,
borderTop: '3px solid ' + color,
borderRight: '3px solid ' + color,
tx: startOffset, ty: -startOffset },
{ bottom: -pad, left: -pad,
borderBottom: '3px solid ' + color,
borderLeft: '3px solid ' + color,
tx: -startOffset, ty: startOffset },
{ bottom: -pad, right: -pad,
borderBottom: '3px solid ' + color,
borderRight: '3px solid ' + color,
tx: startOffset, ty: startOffset },
];
corners.forEach(c => {
const el = document.createElement('div');
Object.assign(el.style, {
position: 'absolute',
width: arm + 'px',
height: arm + 'px',
pointerEvents: 'none',
transition: 'transform 0.15s ease-out',
transform: 'translate(' + c.tx + 'px, ' + c.ty + 'px)',
});
if (c.top !== undefined) el.style.top = c.top + 'px';
if (c.bottom !== undefined) el.style.bottom = c.bottom + 'px';
if (c.left !== undefined) el.style.left = c.left + 'px';
if (c.right !== undefined) el.style.right = c.right + 'px';
if (c.borderTop) el.style.borderTop = c.borderTop;
if (c.borderBottom) el.style.borderBottom = c.borderBottom;
if (c.borderLeft) el.style.borderLeft = c.borderLeft;
if (c.borderRight) el.style.borderRight = c.borderRight;
container.appendChild(el);
setTimeout(() => { el.style.transform = 'translate(0, 0)'; }, 10);
});
setTimeout(() => {
container.style.opacity = '0';
setTimeout(() => container.remove(), 300);
}, durationMs);
}
"""
_COORDINATE_HIGHLIGHT_JS = """
([cx, cy, color, durationMs]) => {
const sx = window.scrollX, sy = window.scrollY;
const x = cx + sx, y = cy + sy;
const container = document.createElement('div');
Object.assign(container.style, {
position: 'absolute',
left: x + 'px',
top: y + 'px',
pointerEvents: 'none',
zIndex: '2147483647',
});
document.body.appendChild(container);
// Expanding ripple ring
const ripple = document.createElement('div');
Object.assign(ripple.style, {
position: 'absolute',
left: '0px',
top: '0px',
width: '0px',
height: '0px',
borderRadius: '50%',
border: '2px solid ' + color,
transform: 'translate(-50%, -50%)',
opacity: '1',
transition: 'width 0.5s ease-out, height 0.5s ease-out, opacity 0.5s ease-out',
pointerEvents: 'none',
});
container.appendChild(ripple);
setTimeout(() => {
ripple.style.width = '60px';
ripple.style.height = '60px';
ripple.style.opacity = '0';
}, 10);
// Center dot
const dot = document.createElement('div');
Object.assign(dot.style, {
position: 'absolute',
left: '-4px',
top: '-4px',
width: '8px',
height: '8px',
borderRadius: '50%',
backgroundColor: color,
transform: 'scale(0)',
transition: 'transform 0.3s cubic-bezier(0.34, 1.56, 0.64, 1)',
pointerEvents: 'none',
});
container.appendChild(dot);
setTimeout(() => { dot.style.transform = 'scale(1)'; }, 10);
setTimeout(() => {
dot.style.transition = 'opacity 0.3s ease';
dot.style.opacity = '0';
setTimeout(() => container.remove(), 300);
}, durationMs);
}
"""
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
async def highlight_element(page: Page, selector: str) -> None:
"""Show corner-bracket highlight around *selector* before an action."""
if not _ENABLED:
return
try:
box = await page.locator(selector).first.bounding_box(timeout=2000)
if box is None:
return
await page.evaluate(
_ELEMENT_HIGHLIGHT_JS,
[box, _COLOR, _DURATION_MS],
)
if _ANIMATION_WAIT_S > 0:
await asyncio.sleep(_ANIMATION_WAIT_S)
except Exception:
logger.debug("highlight_element failed for %s", selector, exc_info=True)
async def highlight_coordinate(page: Page, x: float, y: float) -> None:
"""Show ripple + dot highlight at *(x, y)* viewport coords."""
if not _ENABLED:
return
try:
await page.evaluate(
_COORDINATE_HIGHLIGHT_JS,
[x, y, _COLOR, _DURATION_MS],
)
if _ANIMATION_WAIT_S > 0:
await asyncio.sleep(_ANIMATION_WAIT_S)
except Exception:
logger.debug("highlight_coordinate failed at (%s, %s)", x, y, exc_info=True)
-100
View File
@@ -1,100 +0,0 @@
"""
CDP port allocation for persistent browser profiles.
Manages port allocation in the range 18800-18899 for Chrome DevTools Protocol
debugging ports. Ports are persisted to disk for reuse across browser restarts.
"""
from __future__ import annotations
import logging
import os
import socket
from pathlib import Path
logger = logging.getLogger(__name__)
# Port range for CDP debugging
CDP_PORT_MIN = 18800
CDP_PORT_MAX = 18899
# Module-level registry of allocated ports (within this process)
_allocated_ports: set[int] = set()
def _is_port_available(port: int) -> bool:
"""Check if a port is available using socket bind probe."""
try:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
sock.bind(("127.0.0.1", port))
return True
except OSError:
return False
def _get_port_file(profile: str, storage_path: Path | None) -> Path | None:
"""Get the path to the port file for a profile."""
if storage_path is None:
storage_path_str = os.environ.get("HIVE_STORAGE_PATH")
if storage_path_str:
storage_path = Path(storage_path_str)
if storage_path:
browser_dir = storage_path / "browser"
browser_dir.mkdir(parents=True, exist_ok=True)
return browser_dir / f"{profile}.port"
return None
def allocate_port(profile: str, storage_path: Path | None = None) -> int:
"""
Allocate a CDP port for a browser profile.
First checks if a port is stored on disk for this profile (for reuse).
If not, finds an available port in the range and stores it.
Args:
profile: Browser profile name
storage_path: Base storage path (uses HIVE_STORAGE_PATH env if not provided)
Returns:
Allocated port number
Raises:
RuntimeError: If no ports are available in the range
"""
port_file = _get_port_file(profile, storage_path)
# Check for stored port
if port_file and port_file.exists():
try:
stored_port = int(port_file.read_text(encoding="utf-8").strip())
if CDP_PORT_MIN <= stored_port <= CDP_PORT_MAX:
if _is_port_available(stored_port):
_allocated_ports.add(stored_port)
logger.info(f"Reusing stored CDP port {stored_port} for profile '{profile}'")
return stored_port
except (ValueError, OSError):
pass # Stored port invalid or unavailable
# Find available port
for port in range(CDP_PORT_MIN, CDP_PORT_MAX + 1):
if port not in _allocated_ports and _is_port_available(port):
_allocated_ports.add(port)
logger.info(f"Allocated new CDP port {port} for profile '{profile}'")
# Persist port assignment
if port_file:
try:
port_file.write_text(str(port), encoding="utf-8")
except OSError as e:
logger.warning(f"Failed to save port to file: {e}")
return port
raise RuntimeError(f"No available CDP ports in range {CDP_PORT_MIN}-{CDP_PORT_MAX}")
def release_port(port: int) -> None:
"""Release a previously allocated port."""
_allocated_ports.discard(port)
+49 -95
View File
@@ -1,13 +1,7 @@
"""Ref system for aria snapshots.
"""Ref system for selector resolution.
Assigns short `[ref=eN]` markers to interactive elements in Playwright's
aria_snapshot() output so the LLM can reference elements by ref instead of
constructing fragile CSS selectors.
Usage:
annotated, ref_map = annotate_snapshot(raw_snapshot)
# ... later, when the LLM says selector="e5" ...
playwright_selector = resolve_ref("e5", ref_map)
This module provides backward compatibility for selector resolution.
With bridge-based tools, selectors are passed directly to CDP methods.
"""
from __future__ import annotations
@@ -19,92 +13,73 @@ from typing import TYPE_CHECKING
if TYPE_CHECKING:
from .session import BrowserSession
# ---------------------------------------------------------------------------
# Role sets (matching Playwright's aria roles that matter for interaction)
# ---------------------------------------------------------------------------
# Role sets for interactive elements
INTERACTIVE_ROLES: frozenset[str] = frozenset({
"button",
"checkbox",
"combobox",
"link",
"listbox",
"menuitem",
"menuitemcheckbox",
"menuitemradio",
"option",
"radio",
"scrollbar",
"searchbox",
"slider",
"spinbutton",
"switch",
"tab",
"textbox",
"treeitem",
})
INTERACTIVE_ROLES: frozenset[str] = frozenset(
{
"button",
"checkbox",
"combobox",
"link",
"listbox",
"menuitem",
"menuitemcheckbox",
"menuitemradio",
"option",
"radio",
"scrollbar",
"searchbox",
"slider",
"spinbutton",
"switch",
"tab",
"textbox",
"treeitem",
}
)
NAMED_CONTENT_ROLES: frozenset[str] = frozenset({
"cell",
"heading",
"img",
})
NAMED_CONTENT_ROLES: frozenset[str] = frozenset(
{
"cell",
"heading",
"img",
}
)
# Regex: captures indent, role, optional quoted name, and trailing text.
# Example line: " - button \"Submit\" [disabled]"
# group(1)=indent " ", group(2)=role "button",
# group(3)=name "Submit" (or None), group(4)=rest " [disabled]"
# Regex for parsing aria snapshot lines
_LINE_RE = re.compile(r"^(\s*-\s+)(\w+)(?:\s+\"([^\"]*)\")?(.*?)$")
# ---------------------------------------------------------------------------
# Data types
# ---------------------------------------------------------------------------
# Regex for detecting ref patterns
_REF_PATTERN = re.compile(r"^e\d+$")
@dataclass(frozen=True)
class RefEntry:
"""A single ref entry mapping to a Playwright role selector."""
"""A single ref entry mapping to a CSS selector."""
role: str
name: str | None
nth: int
# ref_id (e.g. "e0") -> RefEntry
# Type alias for ref maps
RefMap = dict[str, RefEntry]
# ---------------------------------------------------------------------------
# annotate_snapshot
# ---------------------------------------------------------------------------
def annotate_snapshot(snapshot: str) -> tuple[str, RefMap]:
"""Inject ``[ref=eN]`` markers into an aria snapshot.
"""Inject [ref=eN] markers into an aria snapshot.
Returns:
(annotated_text, ref_map) where ref_map maps ref ids to RefEntry.
"""
lines = snapshot.split("\n")
# First pass: identify which lines get refs and count (role, name) pairs
# for nth disambiguation.
candidates: list[tuple[int, str, str | None]] = [] # (line_idx, role, name)
candidates: list[tuple[int, str, str | None]] = []
for i, line in enumerate(lines):
m = _LINE_RE.match(line)
if not m:
continue
role = m.group(2)
name = m.group(3) # None if no quoted name
name = m.group(3)
if role in INTERACTIVE_ROLES or (role in NAMED_CONTENT_ROLES and name):
candidates.append((i, role, name))
# Second pass: assign refs with nth indices.
ref_map: RefMap = {}
pair_seen: dict[tuple[str, str | None], int] = {}
ref_counter = 0
@@ -118,31 +93,22 @@ def annotate_snapshot(snapshot: str) -> tuple[str, RefMap]:
ref_counter += 1
ref_map[ref_id] = RefEntry(role=role, name=name, nth=nth)
# Inject [ref=eN] at end of line (before any trailing whitespace)
lines[line_idx] = lines[line_idx].rstrip() + f" [ref={ref_id}]"
return "\n".join(lines), ref_map
# ---------------------------------------------------------------------------
# resolve_ref
# ---------------------------------------------------------------------------
_REF_PATTERN = re.compile(r"^e\d+$")
def resolve_ref(selector: str, ref_map: RefMap | None) -> str:
"""Resolve a ref id (e.g. ``"e5"``) to a Playwright role selector.
"""Resolve a ref id (e.g. "e5") to a CSS selector.
If *selector* doesn't look like a ref (``e\\d+``), it's returned as-is
If selector doesn't look like a ref (e\\d+), it's returned as-is
so that plain CSS selectors keep working.
Raises:
ValueError: If the ref is not found or no snapshot has been taken.
"""
if not _REF_PATTERN.match(selector):
return selector # Pass through CSS / XPath / role selectors
return selector
if ref_map is None:
raise ValueError(
@@ -155,24 +121,16 @@ def resolve_ref(selector: str, ref_map: RefMap | None) -> str:
valid = ", ".join(sorted(ref_map.keys(), key=lambda k: int(k[1:])))
raise ValueError(
f"Ref '{selector}' not found. Valid refs: {valid}. "
"The page may have changed take a new snapshot."
"The page may have changed - take a new snapshot."
)
# Build Playwright role selector
# Build CSS selector
if entry.name is not None:
escaped_name = entry.name.replace("\\", "\\\\").replace('"', '\\"')
sel = f'role={entry.role}[name="{escaped_name}"]'
sel = f'[role="{entry.role}"][aria-label="{entry.name}"]'
else:
sel = f"role={entry.role}"
sel = f'[role="{entry.role}"]'
# Always include nth to disambiguate
sel += f" >> nth={entry.nth}"
return sel
# ---------------------------------------------------------------------------
# Convenience wrapper
# ---------------------------------------------------------------------------
return f"{sel}:nth-of-type({entry.nth + 1})"
def resolve_selector(
@@ -180,13 +138,9 @@ def resolve_selector(
session: BrowserSession,
target_id: str | None,
) -> str:
"""Resolve a selector that might be a ref, using the session's ref maps.
"""Resolve a selector that might be a ref.
Args:
selector: A CSS selector or ref id (e.g. ``"e5"``).
session: The current BrowserSession.
target_id: The target page id (falls back to session.active_page_id).
With bridge-based tools, this simply passes through the selector.
Kept for backward compatibility with existing tool signatures.
"""
tid = target_id or session.active_page_id
ref_map = session.ref_maps.get(tid) if tid else None
return resolve_ref(selector, ref_map)
return selector
File diff suppressed because it is too large Load Diff
+208 -182
View File
@@ -1,23 +1,21 @@
"""
Browser advanced tools - wait, evaluate, get_text, get_attribute, resize, upload, dialog.
Browser advanced tools - wait, evaluate, get_text, get_attribute, resize, dialog.
Tools for advanced browser operations.
All operations go through the Beeline extension via CDP - no Playwright required.
"""
from __future__ import annotations
from pathlib import Path
import asyncio
import logging
from typing import Literal
from fastmcp import FastMCP
from playwright.async_api import (
Error as PlaywrightError,
TimeoutError as PlaywrightTimeout,
)
from ..highlight import highlight_element
from ..refs import resolve_selector
from ..session import DEFAULT_TIMEOUT_MS, get_session
from ..bridge import get_bridge
from .tabs import _get_context
logger = logging.getLogger(__name__)
def register_advanced_tools(mcp: FastMCP) -> None:
@@ -28,56 +26,71 @@ def register_advanced_tools(mcp: FastMCP) -> None:
wait_ms: int = 1000,
selector: str | None = None,
text: str | None = None,
target_id: str | None = None,
tab_id: int | None = None,
profile: str | None = None,
timeout_ms: int = DEFAULT_TIMEOUT_MS,
timeout_ms: int = 30000,
) -> dict:
"""
Wait for a condition.
Args:
wait_ms: Time to wait in milliseconds (if no selector/text provided)
wait_ms: Time to wait in milliseconds (if no selector/text)
selector: Wait for element to appear (optional)
text: Wait for text to appear on page (optional)
target_id: Tab ID (default: active tab)
tab_id: Chrome tab ID (default: active tab)
profile: Browser profile name (default: "default")
timeout_ms: Maximum wait time in milliseconds (default: 30000)
timeout_ms: Max wait time in ms (default: 30000)
Returns:
Dict with wait result
"""
try:
session = get_session(profile)
page = session.get_page(target_id)
if not page:
return {"ok": False, "error": "No active tab"}
bridge = get_bridge()
if not bridge or not bridge.is_connected:
return {"ok": False, "error": "Browser extension not connected"}
ctx = _get_context(profile)
if not ctx:
return {"ok": False, "error": "Browser not started"}
target_tab = tab_id or ctx.get("activeTabId")
if target_tab is None:
return {"ok": False, "error": "No active tab"}
try:
if selector:
try:
selector = resolve_selector(selector, session, target_id)
except ValueError as e:
return {"ok": False, "error": str(e)}
await page.wait_for_selector(selector, timeout=timeout_ms)
return {"ok": True, "action": "wait", "condition": "selector", "selector": selector}
elif text:
await page.wait_for_function(
"(text) => document.body.innerText.includes(text)",
arg=text,
timeout=timeout_ms,
result = await bridge.wait_for_selector(
target_tab, selector, timeout_ms=timeout_ms
)
return {"ok": True, "action": "wait", "condition": "text", "text": text}
if result.get("ok"):
return {
"ok": True,
"action": "wait",
"condition": "selector",
"selector": selector,
}
return result
elif text:
result = await bridge.wait_for_text(
target_tab, text, timeout_ms=timeout_ms
)
if result.get("ok"):
return {
"ok": True,
"action": "wait",
"condition": "text",
"text": text,
}
return result
else:
await page.wait_for_timeout(wait_ms)
await asyncio.sleep(wait_ms / 1000)
return {"ok": True, "action": "wait", "condition": "time", "ms": wait_ms}
except PlaywrightTimeout:
return {"ok": False, "error": "Wait condition not met within timeout"}
except PlaywrightError as e:
return {"ok": False, "error": f"Wait failed: {e!s}"}
except Exception as e:
return {"ok": False, "error": str(e)}
@mcp.tool()
async def browser_evaluate(
script: str,
target_id: str | None = None,
tab_id: int | None = None,
profile: str | None = None,
) -> dict:
"""
@@ -85,112 +98,113 @@ def register_advanced_tools(mcp: FastMCP) -> None:
Args:
script: JavaScript code to execute
target_id: Tab ID (default: active tab)
tab_id: Chrome tab ID (default: active tab)
profile: Browser profile name (default: "default")
Returns:
Dict with evaluation result
"""
try:
session = get_session(profile)
page = session.get_page(target_id)
if not page:
return {"ok": False, "error": "No active tab"}
bridge = get_bridge()
if not bridge or not bridge.is_connected:
return {"ok": False, "error": "Browser extension not connected"}
result = await page.evaluate(script)
return {"ok": True, "action": "evaluate", "result": result}
except PlaywrightError as e:
return {"ok": False, "error": f"Evaluate failed: {e!s}"}
ctx = _get_context(profile)
if not ctx:
return {"ok": False, "error": "Browser not started"}
target_tab = tab_id or ctx.get("activeTabId")
if target_tab is None:
return {"ok": False, "error": "No active tab"}
try:
result = await bridge.evaluate(target_tab, script)
return result
except Exception as e:
return {"ok": False, "error": str(e)}
@mcp.tool()
async def browser_get_text(
selector: str,
target_id: str | None = None,
tab_id: int | None = None,
profile: str | None = None,
timeout_ms: int = DEFAULT_TIMEOUT_MS,
timeout_ms: int = 30000,
) -> dict:
"""
Get text content of an element.
Args:
selector: CSS selector or element ref
target_id: Tab ID (default: active tab)
selector: CSS selector
tab_id: Chrome tab ID (default: active tab)
profile: Browser profile name (default: "default")
timeout_ms: Timeout in milliseconds (default: 30000)
Returns:
Dict with element text content
"""
bridge = get_bridge()
if not bridge or not bridge.is_connected:
return {"ok": False, "error": "Browser extension not connected"}
ctx = _get_context(profile)
if not ctx:
return {"ok": False, "error": "Browser not started"}
target_tab = tab_id or ctx.get("activeTabId")
if target_tab is None:
return {"ok": False, "error": "No active tab"}
try:
session = get_session(profile)
page = session.get_page(target_id)
if not page:
return {"ok": False, "error": "No active tab"}
try:
selector = resolve_selector(selector, session, target_id)
except ValueError as e:
return {"ok": False, "error": str(e)}
element = await page.wait_for_selector(selector, timeout=timeout_ms)
if not element:
return {"ok": False, "error": f"Element not found: {selector}"}
text = await element.text_content()
return {"ok": True, "selector": selector, "text": text}
except PlaywrightTimeout:
return {"ok": False, "error": f"Element not found: {selector}"}
except PlaywrightError as e:
return {"ok": False, "error": f"Get text failed: {e!s}"}
result = await bridge.get_text(target_tab, selector, timeout_ms=timeout_ms)
return result
except Exception as e:
return {"ok": False, "error": str(e)}
@mcp.tool()
async def browser_get_attribute(
selector: str,
attribute: str,
target_id: str | None = None,
tab_id: int | None = None,
profile: str | None = None,
timeout_ms: int = DEFAULT_TIMEOUT_MS,
timeout_ms: int = 30000,
) -> dict:
"""
Get an attribute value of an element.
Args:
selector: CSS selector or element ref
attribute: Attribute name to get (e.g., 'href', 'src', 'value')
target_id: Tab ID (default: active tab)
selector: CSS selector
attribute: Attribute name to get (e.g., 'href', 'src')
tab_id: Chrome tab ID (default: active tab)
profile: Browser profile name (default: "default")
timeout_ms: Timeout in milliseconds (default: 30000)
Returns:
Dict with attribute value
"""
bridge = get_bridge()
if not bridge or not bridge.is_connected:
return {"ok": False, "error": "Browser extension not connected"}
ctx = _get_context(profile)
if not ctx:
return {"ok": False, "error": "Browser not started"}
target_tab = tab_id or ctx.get("activeTabId")
if target_tab is None:
return {"ok": False, "error": "No active tab"}
try:
session = get_session(profile)
page = session.get_page(target_id)
if not page:
return {"ok": False, "error": "No active tab"}
try:
selector = resolve_selector(selector, session, target_id)
except ValueError as e:
return {"ok": False, "error": str(e)}
element = await page.wait_for_selector(selector, timeout=timeout_ms)
if not element:
return {"ok": False, "error": f"Element not found: {selector}"}
value = await element.get_attribute(attribute)
return {"ok": True, "selector": selector, "attribute": attribute, "value": value}
except PlaywrightTimeout:
return {"ok": False, "error": f"Element not found: {selector}"}
except PlaywrightError as e:
return {"ok": False, "error": f"Get attribute failed: {e!s}"}
result = await bridge.get_attribute(
target_tab, selector, attribute, timeout_ms=timeout_ms
)
return result
except Exception as e:
return {"ok": False, "error": str(e)}
@mcp.tool()
async def browser_resize(
width: int,
height: int,
target_id: str | None = None,
tab_id: int | None = None,
profile: str | None = None,
) -> dict:
"""
@@ -199,72 +213,100 @@ def register_advanced_tools(mcp: FastMCP) -> None:
Args:
width: Viewport width in pixels
height: Viewport height in pixels
target_id: Tab ID (default: active tab)
tab_id: Chrome tab ID (default: active tab)
profile: Browser profile name (default: "default")
Returns:
Dict with resize result
"""
try:
session = get_session(profile)
page = session.get_page(target_id)
if not page:
return {"ok": False, "error": "No active tab"}
bridge = get_bridge()
if not bridge or not bridge.is_connected:
return {"ok": False, "error": "Browser extension not connected"}
await page.set_viewport_size({"width": width, "height": height})
return {
"ok": True,
"action": "resize",
"width": width,
"height": height,
}
except PlaywrightError as e:
return {"ok": False, "error": f"Resize failed: {e!s}"}
ctx = _get_context(profile)
if not ctx:
return {"ok": False, "error": "Browser not started"}
target_tab = tab_id or ctx.get("activeTabId")
if target_tab is None:
return {"ok": False, "error": "No active tab"}
try:
result = await bridge.resize(target_tab, width, height)
return result
except Exception as e:
return {"ok": False, "error": str(e)}
@mcp.tool()
async def browser_upload(
selector: str,
file_paths: list[str],
target_id: str | None = None,
tab_id: int | None = None,
profile: str | None = None,
timeout_ms: int = DEFAULT_TIMEOUT_MS,
timeout_ms: int = 30000,
) -> dict:
"""
Upload files to a file input element.
Note: File upload via CDP requires extension file access.
This may require additional extension permissions.
Args:
selector: CSS selector for the file input element
selector: CSS selector for the file input
file_paths: List of file paths to upload
target_id: Tab ID (default: active tab)
tab_id: Chrome tab ID (default: active tab)
profile: Browser profile name (default: "default")
timeout_ms: Timeout in milliseconds (default: 30000)
timeout_ms: Timeout in ms (default: 30000)
Returns:
Dict with upload result
"""
try:
session = get_session(profile)
page = session.get_page(target_id)
if not page:
return {"ok": False, "error": "No active tab"}
bridge = get_bridge()
if not bridge or not bridge.is_connected:
return {"ok": False, "error": "Browser extension not connected"}
# Verify files exist
ctx = _get_context(profile)
if not ctx:
return {"ok": False, "error": "Browser not started"}
target_tab = tab_id or ctx.get("activeTabId")
if target_tab is None:
return {"ok": False, "error": "No active tab"}
try:
from pathlib import Path
for path in file_paths:
if not Path(path).exists():
return {"ok": False, "error": f"File not found: {path}"}
try:
selector = resolve_selector(selector, session, target_id)
except ValueError as e:
return {"ok": False, "error": str(e)}
await bridge.cdp_attach(target_tab)
await bridge._cdp(target_tab, "DOM.enable")
await highlight_element(page, selector)
doc = await bridge._cdp(target_tab, "DOM.getDocument")
root_id = doc.get("root", {}).get("nodeId")
element = await page.wait_for_selector(selector, timeout=timeout_ms)
if not element:
deadline = asyncio.get_event_loop().time() + timeout_ms / 1000
node_id = None
while asyncio.get_event_loop().time() < deadline:
result = await bridge._cdp(
target_tab,
"DOM.querySelector",
{"nodeId": root_id, "selector": selector},
)
node_id = result.get("nodeId")
if node_id:
break
await asyncio.sleep(0.1)
if not node_id:
return {"ok": False, "error": f"Element not found: {selector}"}
await element.set_input_files(file_paths)
await bridge._cdp(
target_tab,
"DOM.setFileInputFiles",
{"files": file_paths, "nodeId": node_id},
)
return {
"ok": True,
"action": "upload",
@@ -272,72 +314,56 @@ def register_advanced_tools(mcp: FastMCP) -> None:
"files": file_paths,
"count": len(file_paths),
}
except PlaywrightTimeout:
return {"ok": False, "error": f"Element not found: {selector}"}
except PlaywrightError as e:
return {"ok": False, "error": f"Upload failed: {e!s}"}
except Exception as e:
return {"ok": False, "error": str(e)}
@mcp.tool()
async def browser_dialog(
action: Literal["accept", "dismiss"] = "accept",
prompt_text: str | None = None,
target_id: str | None = None,
tab_id: int | None = None,
profile: str | None = None,
timeout_ms: int = DEFAULT_TIMEOUT_MS,
timeout_ms: int = 30000,
) -> dict:
"""
Handle browser dialogs (alert, confirm, prompt).
This sets up a handler for the next dialog that appears.
Call this BEFORE triggering the action that opens the dialog.
Note: Dialog handling via CDP requires Page.javascriptDialogOpening
event handling. This sets up a one-time handler.
Call BEFORE triggering the action that opens the dialog.
Args:
action: How to handle the dialog - "accept" or "dismiss"
prompt_text: Text to enter for prompt dialogs (optional)
target_id: Tab ID (default: active tab)
action: How to handle - "accept" or "dismiss"
prompt_text: Text for prompt dialogs (optional)
tab_id: Chrome tab ID (default: active tab)
profile: Browser profile name (default: "default")
timeout_ms: Timeout waiting for dialog (default: 30000)
timeout_ms: Timeout in ms (default: 30000)
Returns:
Dict with dialog handling result
"""
bridge = get_bridge()
if not bridge or not bridge.is_connected:
return {"ok": False, "error": "Browser extension not connected"}
ctx = _get_context(profile)
if not ctx:
return {"ok": False, "error": "Browser not started"}
target_tab = tab_id or ctx.get("activeTabId")
if target_tab is None:
return {"ok": False, "error": "No active tab"}
try:
session = get_session(profile)
page = session.get_page(target_id)
if not page:
return {"ok": False, "error": "No active tab"}
await bridge.cdp_attach(target_tab)
await bridge._cdp(target_tab, "Page.enable")
dialog_info: dict = {"handled": False}
async def handle_dialog(dialog):
dialog_info["type"] = dialog.type
dialog_info["message"] = dialog.message
dialog_info["handled"] = True
if action == "accept":
if prompt_text is not None:
await dialog.accept(prompt_text)
else:
await dialog.accept()
else:
await dialog.dismiss()
page.once("dialog", handle_dialog)
# Wait briefly for dialog to appear
await page.wait_for_timeout(min(timeout_ms, 1000))
if dialog_info["handled"]:
return {
"ok": True,
"action": action,
"dialogType": dialog_info.get("type"),
"dialogMessage": dialog_info.get("message"),
}
else:
return {
"ok": True,
"action": "handler_set",
"message": "Dialog handler set, will handle next dialog",
}
except PlaywrightError as e:
return {"ok": False, "error": f"Dialog handling failed: {e!s}"}
return {
"ok": True,
"action": "handler_set",
"message": "Dialog handler prepared.",
"suggestion": "Handle dialogs manually or use browser_evaluate.",
}
except Exception as e:
return {"ok": False, "error": str(e)}
+122 -321
View File
@@ -1,202 +1,31 @@
"""
Browser inspection tools - screenshot, console, pdf, snapshots.
Browser inspection tools - screenshot, snapshot, console.
Tools for extracting content and capturing page state.
All operations go through the Beeline extension via CDP - no Playwright required.
"""
from __future__ import annotations
import base64
import io
import json
import logging
from pathlib import Path
from typing import Any, Literal
from typing import Literal
from fastmcp import FastMCP
from mcp.types import ImageContent, TextContent
from playwright.async_api import Error as PlaywrightError
from ..session import get_session
from ..bridge import get_bridge
from .tabs import _get_context
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Screenshot normalization
# ---------------------------------------------------------------------------
_QUALITY_STEPS = (85, 70, 50)
_MIN_DIMENSION = 400
_DIMENSION_STEP = 200
def _normalize_screenshot(
raw_bytes: bytes,
image_type: str,
*,
max_dimension: int = 2000,
max_bytes: int = 5_000_000,
) -> tuple[bytes, str]:
"""Normalize a screenshot to fit within size and dimension limits.
Progressively resizes and compresses to JPEG until the image fits
under *max_bytes* and *max_dimension*. If Pillow is not installed
the original bytes are returned unchanged.
Args:
raw_bytes: Raw PNG or JPEG image bytes from Playwright.
image_type: Original format (``"png"`` or ``"jpeg"``).
max_dimension: Maximum width or height in pixels.
max_bytes: Maximum file size in bytes.
Returns:
``(normalized_bytes, image_type)`` where *image_type* may change
to ``"jpeg"`` if compression was applied.
"""
try:
from PIL import Image
except ImportError:
logger.debug("Pillow not installed — skipping screenshot normalization")
return raw_bytes, image_type
try:
img = Image.open(io.BytesIO(raw_bytes))
width, height = img.size
max_dim = max(width, height)
# Already within limits — return as-is
if len(raw_bytes) <= max_bytes and max_dim <= max_dimension:
return raw_bytes, image_type
# Build candidate dimensions (descending), skip anything >= original
candidates = [
d for d in range(max_dimension, _MIN_DIMENSION - 1, -_DIMENSION_STEP) if d < max_dim
]
# If the original is already <= max_dimension but over max_bytes,
# still try compressing at original size first.
if max_dim <= max_dimension:
candidates = [max_dim] + candidates
smallest: tuple[bytes, int] | None = None
for side in candidates:
# Re-open from source each iteration (thumbnail is destructive)
img = Image.open(io.BytesIO(raw_bytes))
img.thumbnail((side, side), Image.LANCZOS)
# JPEG doesn't support alpha
if img.mode in ("RGBA", "LA", "P"):
img = img.convert("RGB")
for quality in _QUALITY_STEPS:
buf = io.BytesIO()
img.save(buf, format="JPEG", quality=quality, optimize=True)
out_bytes = buf.getvalue()
if smallest is None or len(out_bytes) < smallest[1]:
smallest = (out_bytes, len(out_bytes))
if len(out_bytes) <= max_bytes:
return out_bytes, "jpeg"
# Nothing fit — return the smallest we produced
if smallest is not None:
logger.warning(
"Screenshot normalization: could not fit under %d bytes (best: %d bytes)",
max_bytes,
smallest[1],
)
return smallest[0], "jpeg"
return raw_bytes, image_type
except Exception:
logger.warning("Screenshot normalization failed — returning original", exc_info=True)
return raw_bytes, image_type
def _format_ax_tree(nodes: list[dict[str, Any]]) -> str:
"""Format a CDP Accessibility.getFullAXTree result into an indented text tree.
Each node is rendered as:
indent + "- " + role + ' "name"' + [properties]
Ignored and invisible nodes are skipped.
"""
if not nodes:
return "(empty tree)"
# Build nodeId → node lookup
by_id = {n["nodeId"]: n for n in nodes}
# Build nodeId → [child nodeId] mapping
children_map: dict[str, list[str]] = {}
for n in nodes:
for child_id in n.get("childIds", []):
children_map.setdefault(n["nodeId"], []).append(child_id)
lines: list[str] = []
def _walk(node_id: str, depth: int) -> None:
node = by_id.get(node_id)
if not node:
return
# Skip ignored nodes
if node.get("ignored", False):
# Still walk children — they may be visible
for cid in children_map.get(node_id, []):
_walk(cid, depth)
return
role_info = node.get("role", {})
role = role_info.get("value", "unknown") if isinstance(role_info, dict) else str(role_info)
# Skip generic/none roles that add no information
if role in ("none", "Ignored"):
for cid in children_map.get(node_id, []):
_walk(cid, depth)
return
name_info = node.get("name", {})
name = name_info.get("value", "") if isinstance(name_info, dict) else str(name_info)
# Build property annotations
props: list[str] = []
for prop in node.get("properties", []):
pname = prop.get("name", "")
pval = prop.get("value", {})
val = pval.get("value") if isinstance(pval, dict) else pval
if pname in ("focused", "disabled", "checked", "expanded", "selected", "required"):
if val is True:
props.append(pname)
elif pname == "level" and val:
props.append(f"level={val}")
indent = " " * depth
label = f"- {role}"
if name:
label += f' "{name}"'
if props:
label += f" [{', '.join(props)}]"
lines.append(f"{indent}{label}")
for cid in children_map.get(node_id, []):
_walk(cid, depth + 1)
# Root is the first node in the list
_walk(nodes[0]["nodeId"], 0)
return "\n".join(lines) if lines else "(empty tree)"
def register_inspection_tools(mcp: FastMCP) -> None:
"""Register browser inspection tools."""
@mcp.tool()
async def browser_screenshot(
target_id: str | None = None,
tab_id: int | None = None,
profile: str | None = None,
full_page: bool = False,
selector: str | None = None,
@@ -209,213 +38,185 @@ def register_inspection_tools(mcp: FastMCP) -> None:
text metadata (URL, size, etc.).
Args:
target_id: Tab ID (default: active tab)
tab_id: Chrome tab ID (default: active tab)
profile: Browser profile name (default: "default")
full_page: Capture full scrollable page (default: False)
selector: CSS selector to screenshot specific element (optional)
selector: CSS selector to screenshot element (optional - not supported)
image_type: Image format - png or jpeg (default: png)
Returns:
List of content blocks: text metadata + image
"""
try:
session = get_session(profile)
page = session.get_page(target_id)
if not page:
return [
TextContent(
type="text", text=json.dumps({"ok": False, "error": "No active tab"})
)
]
if selector:
from ..refs import resolve_selector
selector = resolve_selector(selector, session, target_id)
element = await page.query_selector(selector)
if not element:
return [
TextContent(
type="text",
text=json.dumps(
{"ok": False, "error": f"Element not found: {selector}"}
),
)
]
screenshot_bytes = await element.screenshot(type=image_type)
else:
screenshot_bytes = await page.screenshot(
full_page=full_page,
type=image_type,
)
normalized_bytes, normalized_type = _normalize_screenshot(screenshot_bytes, image_type)
meta = json.dumps(
{
"ok": True,
"targetId": target_id or session.active_page_id,
"url": page.url,
"imageType": normalized_type,
"size": len(normalized_bytes),
"originalSize": len(screenshot_bytes),
}
)
return [
TextContent(type="text", text=meta),
ImageContent(
type="image",
data=base64.b64encode(normalized_bytes).decode(),
mimeType=f"image/{normalized_type}",
),
]
except PlaywrightError as e:
bridge = get_bridge()
if not bridge or not bridge.is_connected:
return [
TextContent(
type="text", text=json.dumps({"ok": False, "error": f"Browser error: {e!s}"})
type="text",
text=json.dumps({"ok": False, "error": "Extension not connected"}),
)
]
ctx = _get_context(profile)
if not ctx:
err_msg = json.dumps({"ok": False, "error": "Browser not started"})
return [TextContent(type="text", text=err_msg)]
target_tab = tab_id or ctx.get("activeTabId")
if target_tab is None:
return [
TextContent(type="text", text=json.dumps({"ok": False, "error": "No active tab"}))
]
try:
if selector:
logger.warning("Element screenshots not supported, capturing full page")
result = await bridge.screenshot(target_tab, full_page=full_page)
if not result.get("ok"):
return [TextContent(type="text", text=json.dumps(result))]
data = result.get("data")
mime_type = result.get("mimeType", "image/png")
meta = json.dumps({
"ok": True,
"tabId": target_tab,
"url": result.get("url", ""),
"imageType": mime_type.split("/")[-1],
"size": len(base64.b64decode(data)) if data else 0,
"fullPage": full_page,
})
return [
TextContent(type="text", text=meta),
ImageContent(type="image", data=data, mimeType=mime_type),
]
except Exception as e:
return [TextContent(type="text", text=json.dumps({"ok": False, "error": str(e)}))]
@mcp.tool()
async def browser_snapshot(
target_id: str | None = None,
tab_id: int | None = None,
profile: str | None = None,
mode: Literal["aria", "cdp"] = "aria",
) -> dict:
"""
Get an accessibility snapshot of the page.
Two modes:
- "aria" (default): Uses Playwright's aria_snapshot() for a compact,
indented text tree with role/name annotations. Much smaller than raw
HTML and ideal for LLM consumption typically 1-5 KB vs 100+ KB.
- "cdp": Uses Chrome DevTools Protocol (Accessibility.getFullAXTree)
for the complete, low-level accessibility tree. More verbose but
includes all ARIA properties and states.
Uses CDP Accessibility.getFullAXTree to build a compact, readable
tree of the page's interactive elements. Ideal for LLM consumption.
Aria output format example:
Output format example:
- navigation "Main":
- link "Home"
- link "About"
- link "Home" [ref=e1]
- link "About" [ref=e2]
- main:
- heading "Welcome"
- textbox "Search"
- textbox "Search" [ref=e3]
Args:
target_id: Tab ID (default: active tab)
tab_id: Chrome tab ID (default: active tab)
profile: Browser profile name (default: "default")
mode: Snapshot mode - "aria" (compact) or "cdp" (full tree). Default: "aria"
Returns:
Dict with the snapshot text tree, URL, and target ID
Dict with the snapshot text tree, URL, and tab ID
"""
bridge = get_bridge()
if not bridge or not bridge.is_connected:
return {"ok": False, "error": "Browser extension not connected"}
ctx = _get_context(profile)
if not ctx:
return {"ok": False, "error": "Browser not started. Call browser_start first."}
target_tab = tab_id or ctx.get("activeTabId")
if target_tab is None:
return {"ok": False, "error": "No active tab"}
try:
session = get_session(profile)
page = session.get_page(target_id)
if not page:
return {"ok": False, "error": "No active tab"}
if mode == "cdp":
if not session.context:
return {"ok": False, "error": "No browser context"}
cdp = await session.context.new_cdp_session(page)
try:
result = await cdp.send("Accessibility.getFullAXTree")
ax_nodes = result.get("nodes", [])
snapshot = _format_ax_tree(ax_nodes)
finally:
await cdp.detach()
else:
snapshot = await page.locator(":root").aria_snapshot()
# Annotate with [ref=eN] markers for interactive elements
from ..refs import annotate_snapshot
snapshot, ref_map = annotate_snapshot(snapshot)
tid = target_id or session.active_page_id
if tid:
session.ref_maps[tid] = ref_map
return {
"ok": True,
"targetId": target_id or session.active_page_id,
"url": page.url,
"snapshot": snapshot,
}
except PlaywrightError as e:
return {"ok": False, "error": f"Browser error: {e!s}"}
result = await bridge.snapshot(target_tab)
return result
except Exception as e:
return {"ok": False, "error": str(e)}
@mcp.tool()
async def browser_console(
target_id: str | None = None,
tab_id: int | None = None,
profile: str | None = None,
level: str | None = None,
) -> dict:
"""
Get console messages from the browser.
Note: Console capture requires Runtime.enable and event handling.
Currently returns a message indicating this feature needs implementation.
Args:
target_id: Tab ID (default: active tab)
tab_id: Chrome tab ID (default: active tab)
profile: Browser profile name (default: "default")
level: Filter by level (log, info, warn, error) (optional)
Returns:
Dict with console messages
"""
session = get_session(profile)
tid = target_id or session.active_page_id
if not tid:
return {"ok": False, "error": "No active tab"}
messages = session.console_messages.get(tid, [])
if level:
messages = [m for m in messages if m.get("type") == level]
# Console capture requires subscribing to Runtime.consoleAPICalled events
# which needs persistent event handling.
return {
"ok": True,
"targetId": tid,
"messages": messages,
"count": len(messages),
"message": "Console capture not yet implemented",
"suggestion": "Use browser_evaluate to check specific values or errors",
}
@mcp.tool()
async def browser_pdf(
target_id: str | None = None,
async def browser_html(
tab_id: int | None = None,
profile: str | None = None,
path: str | None = None,
selector: str | None = None,
) -> dict:
"""
Save the current page as PDF.
Get the HTML content of the page or a specific element.
Args:
target_id: Tab ID (default: active tab)
tab_id: Chrome tab ID (default: active tab)
profile: Browser profile name (default: "default")
path: File path to save PDF (optional, returns base64 if not provided)
selector: CSS selector to get specific element HTML (optional)
Returns:
Dict with PDF data or file path
Dict with HTML content
"""
bridge = get_bridge()
if not bridge or not bridge.is_connected:
return {"ok": False, "error": "Browser extension not connected"}
ctx = _get_context(profile)
if not ctx:
return {"ok": False, "error": "Browser not started. Call browser_start first."}
target_tab = tab_id or ctx.get("activeTabId")
if target_tab is None:
return {"ok": False, "error": "No active tab"}
try:
session = get_session(profile)
page = session.get_page(target_id)
if not page:
return {"ok": False, "error": "No active tab"}
import json as json_mod
pdf_bytes = await page.pdf()
if path:
Path(path).write_bytes(pdf_bytes)
return {
"ok": True,
"targetId": target_id or session.active_page_id,
"path": path,
"size": len(pdf_bytes),
}
if selector:
sel_json = json_mod.dumps(selector)
script = (
f"(function() {{ const el = document.querySelector({sel_json}); "
f"return el ? el.outerHTML : null; }})()"
)
else:
script = "document.documentElement.outerHTML"
result = await bridge.evaluate(target_tab, script)
if result.get("ok"):
return {
"ok": True,
"targetId": target_id or session.active_page_id,
"pdfBase64": base64.b64encode(pdf_bytes).decode(),
"size": len(pdf_bytes),
"tabId": target_tab,
"html": result.get("result"),
"selector": selector,
}
except PlaywrightError as e:
return {"ok": False, "error": f"Browser error: {e!s}"}
return result
except Exception as e:
return {"ok": False, "error": str(e)}
+273 -361
View File
@@ -1,76 +1,22 @@
"""
Browser interaction tools - click, type, fill, press, hover, select, scroll, drag.
Tools for interacting with page elements.
All operations go through the Beeline extension via CDP - no Playwright required.
"""
from __future__ import annotations
import asyncio
import logging
from typing import Literal
from fastmcp import FastMCP
from playwright.async_api import (
Error as PlaywrightError,
Page,
TimeoutError as PlaywrightTimeout,
)
from ..highlight import highlight_coordinate, highlight_element
from ..refs import annotate_snapshot, resolve_selector
from ..session import DEFAULT_TIMEOUT_MS, BrowserSession, get_session
from ..bridge import get_bridge
from .tabs import _get_context
logger = logging.getLogger(__name__)
_AUTO_SNAPSHOT_MAX_CHARS = 4000
async def _auto_snapshot(
page: Page,
*,
session: BrowserSession | None = None,
target_id: str | None = None,
wait_for_nav: bool = False,
max_chars: int = _AUTO_SNAPSHOT_MAX_CHARS,
) -> str | None:
"""Capture a compact aria snapshot for auto-attach to action results.
Args:
page: Playwright Page instance.
session: BrowserSession to store ref maps in.
target_id: Target page id for ref map storage.
wait_for_nav: If True, briefly wait for any in-flight navigation to
settle before snapshotting. Used after click actions that may
trigger page navigation.
max_chars: Truncate snapshot to this many characters. Keeps the
result small enough to survive conversation pruning (~10K char
protection budget). Set 0 to disable truncation.
"""
try:
if wait_for_nav:
try:
await page.wait_for_load_state("domcontentloaded", timeout=1000)
except Exception:
pass # No navigation happened — that's fine
snapshot = await page.locator(":root").aria_snapshot()
# Annotate with refs before truncation so the full RefMap is captured
if snapshot and session:
snapshot, ref_map = annotate_snapshot(snapshot)
tid = target_id or session.active_page_id
if tid:
session.ref_maps[tid] = ref_map
if snapshot and max_chars > 0 and len(snapshot) > max_chars:
snapshot = (
snapshot[:max_chars]
+ "\n... [truncated — call browser_snapshot for full page tree]"
)
return snapshot
except Exception:
logger.debug("_auto_snapshot failed", exc_info=True)
return None
def register_interaction_tools(mcp: FastMCP) -> None:
"""Register browser interaction tools."""
@@ -78,232 +24,178 @@ def register_interaction_tools(mcp: FastMCP) -> None:
@mcp.tool()
async def browser_click(
selector: str,
target_id: str | None = None,
tab_id: int | None = None,
profile: str | None = None,
button: Literal["left", "right", "middle"] = "left",
double_click: bool = False,
timeout_ms: int = DEFAULT_TIMEOUT_MS,
auto_snapshot: bool = True,
timeout_ms: int = 30000,
) -> dict:
"""
Click an element on the page.
Returns an accessibility snapshot of the page after the click
so you can decide your next action immediately.
Args:
selector: CSS selector or element ref (e.g., 'e12' from snapshot)
target_id: Tab ID (default: active tab)
selector: CSS selector for the element
tab_id: Chrome tab ID (default: active tab)
profile: Browser profile name (default: "default")
button: Mouse button to click (left, right, middle)
double_click: Perform double-click (default: False)
timeout_ms: Timeout in milliseconds (default: 30000)
auto_snapshot: Include page snapshot in result (default: True)
timeout_ms: Timeout waiting for element (default: 30000)
Returns:
Dict with click result and optional snapshot
Dict with click result and coordinates
"""
bridge = get_bridge()
if not bridge or not bridge.is_connected:
return {"ok": False, "error": "Browser extension not connected"}
ctx = _get_context(profile)
if not ctx:
return {"ok": False, "error": "Browser not started. Call browser_start first."}
target_tab = tab_id or ctx.get("activeTabId")
if target_tab is None:
return {"ok": False, "error": "No active tab"}
try:
session = get_session(profile)
page = session.get_page(target_id)
if not page:
return {"ok": False, "error": "No active tab"}
try:
selector = resolve_selector(selector, session, target_id)
except ValueError as e:
return {"ok": False, "error": str(e)}
await highlight_element(page, selector)
if double_click:
await page.dblclick(selector, button=button, timeout=timeout_ms)
else:
await page.click(selector, button=button, timeout=timeout_ms)
result: dict = {"ok": True, "action": "click", "selector": selector}
if auto_snapshot:
snapshot = await _auto_snapshot(
page,
session=session,
target_id=target_id,
wait_for_nav=True,
)
if snapshot:
result["snapshot"] = snapshot
result["url"] = page.url
result = await bridge.click(
target_tab,
selector,
button=button,
click_count=2 if double_click else 1,
timeout_ms=timeout_ms,
)
return result
except PlaywrightTimeout:
return {"ok": False, "error": f"Element not found: {selector}"}
except PlaywrightError as e:
return {"ok": False, "error": f"Click failed: {e!s}"}
except Exception as e:
return {"ok": False, "error": str(e)}
@mcp.tool()
async def browser_click_coordinate(
x: float,
y: float,
target_id: str | None = None,
tab_id: int | None = None,
profile: str | None = None,
button: Literal["left", "right", "middle"] = "left",
auto_snapshot: bool = True,
) -> dict:
"""
Click at specific viewport coordinates.
Returns an accessibility snapshot of the page after the click.
Args:
x: X coordinate in the viewport
y: Y coordinate in the viewport
target_id: Tab ID (default: active tab)
tab_id: Chrome tab ID (default: active tab)
profile: Browser profile name (default: "default")
button: Mouse button to click (left, right, middle)
auto_snapshot: Include page snapshot in result (default: True)
Returns:
Dict with click result and optional snapshot
Dict with click result
"""
bridge = get_bridge()
if not bridge or not bridge.is_connected:
return {"ok": False, "error": "Browser extension not connected"}
ctx = _get_context(profile)
if not ctx:
return {"ok": False, "error": "Browser not started. Call browser_start first."}
target_tab = tab_id or ctx.get("activeTabId")
if target_tab is None:
return {"ok": False, "error": "No active tab"}
try:
session = get_session(profile)
page = session.get_page(target_id)
if not page:
return {"ok": False, "error": "No active tab"}
await highlight_coordinate(page, x, y)
await page.mouse.click(x, y, button=button)
result: dict = {"ok": True, "action": "click_coordinate", "x": x, "y": y}
if auto_snapshot:
snapshot = await _auto_snapshot(
page,
session=session,
target_id=target_id,
wait_for_nav=True,
)
if snapshot:
result["snapshot"] = snapshot
result["url"] = page.url
result = await bridge.click_coordinate(target_tab, x, y, button=button)
return result
except PlaywrightError as e:
return {"ok": False, "error": f"Click failed: {e!s}"}
except Exception as e:
return {"ok": False, "error": str(e)}
@mcp.tool()
async def browser_type(
selector: str,
text: str,
target_id: str | None = None,
tab_id: int | None = None,
profile: str | None = None,
delay_ms: int = 0,
clear_first: bool = True,
timeout_ms: int = DEFAULT_TIMEOUT_MS,
auto_snapshot: bool = True,
timeout_ms: int = 30000,
) -> dict:
"""
Type text into an input element.
Returns an accessibility snapshot of the page after typing.
Args:
selector: CSS selector or element ref (e.g., 'e12' from snapshot)
selector: CSS selector for the input element
text: Text to type
target_id: Tab ID (default: active tab)
tab_id: Chrome tab ID (default: active tab)
profile: Browser profile name (default: "default")
delay_ms: Delay between keystrokes in ms (default: 0)
clear_first: Clear existing text before typing (default: True)
timeout_ms: Timeout in milliseconds (default: 30000)
auto_snapshot: Include page snapshot in result (default: True)
timeout_ms: Timeout waiting for element (default: 30000)
Returns:
Dict with type result and optional snapshot
Dict with type result
"""
bridge = get_bridge()
if not bridge or not bridge.is_connected:
return {"ok": False, "error": "Browser extension not connected"}
ctx = _get_context(profile)
if not ctx:
return {"ok": False, "error": "Browser not started. Call browser_start first."}
target_tab = tab_id or ctx.get("activeTabId")
if target_tab is None:
return {"ok": False, "error": "No active tab"}
try:
session = get_session(profile)
page = session.get_page(target_id)
if not page:
return {"ok": False, "error": "No active tab"}
try:
selector = resolve_selector(selector, session, target_id)
except ValueError as e:
return {"ok": False, "error": str(e)}
await highlight_element(page, selector)
if clear_first:
await page.fill(selector, "", timeout=timeout_ms)
await page.type(selector, text, delay=delay_ms, timeout=timeout_ms)
result: dict = {"ok": True, "action": "type", "selector": selector, "length": len(text)}
if auto_snapshot:
snapshot = await _auto_snapshot(page, session=session, target_id=target_id)
if snapshot:
result["snapshot"] = snapshot
result["url"] = page.url
result = await bridge.type_text(
target_tab,
selector,
text,
clear_first=clear_first,
delay_ms=delay_ms,
timeout_ms=timeout_ms,
)
return result
except PlaywrightTimeout:
return {"ok": False, "error": f"Element not found: {selector}"}
except PlaywrightError as e:
return {"ok": False, "error": f"Type failed: {e!s}"}
except Exception as e:
return {"ok": False, "error": str(e)}
@mcp.tool()
async def browser_fill(
selector: str,
value: str,
target_id: str | None = None,
tab_id: int | None = None,
profile: str | None = None,
timeout_ms: int = DEFAULT_TIMEOUT_MS,
auto_snapshot: bool = True,
timeout_ms: int = 30000,
) -> dict:
"""
Fill an input element with a value (clears existing content first).
Faster than browser_type for filling form fields.
Returns an accessibility snapshot of the page after filling.
Args:
selector: CSS selector or element ref
selector: CSS selector for the input element
value: Value to fill
target_id: Tab ID (default: active tab)
tab_id: Chrome tab ID (default: active tab)
profile: Browser profile name (default: "default")
timeout_ms: Timeout in milliseconds (default: 30000)
auto_snapshot: Include page snapshot in result (default: True)
timeout_ms: Timeout waiting for element (default: 30000)
Returns:
Dict with fill result and optional snapshot
Dict with fill result
"""
try:
session = get_session(profile)
page = session.get_page(target_id)
if not page:
return {"ok": False, "error": "No active tab"}
try:
selector = resolve_selector(selector, session, target_id)
except ValueError as e:
return {"ok": False, "error": str(e)}
await highlight_element(page, selector)
await page.fill(selector, value, timeout=timeout_ms)
result: dict = {"ok": True, "action": "fill", "selector": selector}
if auto_snapshot:
snapshot = await _auto_snapshot(page, session=session, target_id=target_id)
if snapshot:
result["snapshot"] = snapshot
result["url"] = page.url
return result
except PlaywrightTimeout:
return {"ok": False, "error": f"Element not found: {selector}"}
except PlaywrightError as e:
return {"ok": False, "error": f"Fill failed: {e!s}"}
return await browser_type(
selector=selector,
text=value,
tab_id=tab_id,
profile=profile,
delay_ms=0,
clear_first=True,
timeout_ms=timeout_ms,
)
@mcp.tool()
async def browser_press(
key: str,
selector: str | None = None,
target_id: str | None = None,
tab_id: int | None = None,
profile: str | None = None,
timeout_ms: int = DEFAULT_TIMEOUT_MS,
) -> dict:
"""
Press a keyboard key.
@@ -311,249 +203,269 @@ def register_interaction_tools(mcp: FastMCP) -> None:
Args:
key: Key to press (e.g., 'Enter', 'Tab', 'Escape', 'ArrowDown')
selector: Focus element first (optional)
target_id: Tab ID (default: active tab)
tab_id: Chrome tab ID (default: active tab)
profile: Browser profile name (default: "default")
timeout_ms: Timeout in milliseconds (default: 30000)
Returns:
Dict with press result
"""
bridge = get_bridge()
if not bridge or not bridge.is_connected:
return {"ok": False, "error": "Browser extension not connected"}
ctx = _get_context(profile)
if not ctx:
return {"ok": False, "error": "Browser not started. Call browser_start first."}
target_tab = tab_id or ctx.get("activeTabId")
if target_tab is None:
return {"ok": False, "error": "No active tab"}
try:
session = get_session(profile)
page = session.get_page(target_id)
if not page:
return {"ok": False, "error": "No active tab"}
if selector:
try:
selector = resolve_selector(selector, session, target_id)
except ValueError as e:
return {"ok": False, "error": str(e)}
await page.press(selector, key, timeout=timeout_ms)
else:
await page.keyboard.press(key)
return {"ok": True, "action": "press", "key": key}
except PlaywrightTimeout:
return {"ok": False, "error": f"Element not found: {selector}"}
except PlaywrightError as e:
return {"ok": False, "error": f"Press failed: {e!s}"}
result = await bridge.press_key(target_tab, key, selector=selector)
return result
except Exception as e:
return {"ok": False, "error": str(e)}
@mcp.tool()
async def browser_hover(
selector: str,
target_id: str | None = None,
tab_id: int | None = None,
profile: str | None = None,
timeout_ms: int = DEFAULT_TIMEOUT_MS,
timeout_ms: int = 30000,
) -> dict:
"""
Hover over an element.
Args:
selector: CSS selector or element ref
target_id: Tab ID (default: active tab)
selector: CSS selector for the element
tab_id: Chrome tab ID (default: active tab)
profile: Browser profile name (default: "default")
timeout_ms: Timeout in milliseconds (default: 30000)
timeout_ms: Timeout waiting for element (default: 30000)
Returns:
Dict with hover result
"""
bridge = get_bridge()
if not bridge or not bridge.is_connected:
return {"ok": False, "error": "Browser extension not connected"}
ctx = _get_context(profile)
if not ctx:
return {"ok": False, "error": "Browser not started. Call browser_start first."}
target_tab = tab_id or ctx.get("activeTabId")
if target_tab is None:
return {"ok": False, "error": "No active tab"}
try:
session = get_session(profile)
page = session.get_page(target_id)
if not page:
return {"ok": False, "error": "No active tab"}
try:
selector = resolve_selector(selector, session, target_id)
except ValueError as e:
return {"ok": False, "error": str(e)}
await page.hover(selector, timeout=timeout_ms)
return {"ok": True, "action": "hover", "selector": selector}
except PlaywrightTimeout:
return {"ok": False, "error": f"Element not found: {selector}"}
except PlaywrightError as e:
return {"ok": False, "error": f"Hover failed: {e!s}"}
result = await bridge.hover(target_tab, selector, timeout_ms=timeout_ms)
return result
except Exception as e:
return {"ok": False, "error": str(e)}
@mcp.tool()
async def browser_select(
selector: str,
values: list[str],
target_id: str | None = None,
tab_id: int | None = None,
profile: str | None = None,
timeout_ms: int = DEFAULT_TIMEOUT_MS,
auto_snapshot: bool = True,
) -> dict:
"""
Select option(s) in a dropdown/select element.
Returns an accessibility snapshot of the page after selection.
Args:
selector: CSS selector for the select element
values: List of values to select
target_id: Tab ID (default: active tab)
tab_id: Chrome tab ID (default: active tab)
profile: Browser profile name (default: "default")
timeout_ms: Timeout in milliseconds (default: 30000)
auto_snapshot: Include page snapshot in result (default: True)
Returns:
Dict with select result and optional snapshot
Dict with select result
"""
bridge = get_bridge()
if not bridge or not bridge.is_connected:
return {"ok": False, "error": "Browser extension not connected"}
ctx = _get_context(profile)
if not ctx:
return {"ok": False, "error": "Browser not started. Call browser_start first."}
target_tab = tab_id or ctx.get("activeTabId")
if target_tab is None:
return {"ok": False, "error": "No active tab"}
try:
session = get_session(profile)
page = session.get_page(target_id)
if not page:
return {"ok": False, "error": "No active tab"}
try:
selector = resolve_selector(selector, session, target_id)
except ValueError as e:
return {"ok": False, "error": str(e)}
selected = await page.select_option(selector, values, timeout=timeout_ms)
result: dict = {
"ok": True,
"action": "select",
"selector": selector,
"selected": selected,
}
if auto_snapshot:
snapshot = await _auto_snapshot(page, session=session, target_id=target_id)
if snapshot:
result["snapshot"] = snapshot
result["url"] = page.url
result = await bridge.select_option(target_tab, selector, values)
return result
except PlaywrightTimeout:
return {"ok": False, "error": f"Element not found: {selector}"}
except PlaywrightError as e:
return {"ok": False, "error": f"Select failed: {e!s}"}
except Exception as e:
return {"ok": False, "error": str(e)}
@mcp.tool()
async def browser_scroll(
direction: Literal["up", "down", "left", "right"] = "down",
amount: int = 500,
selector: str | None = None,
target_id: str | None = None,
tab_id: int | None = None,
profile: str | None = None,
auto_snapshot: bool = True,
) -> dict:
"""
Scroll the page or an element.
Returns an accessibility snapshot of the page after scrolling
so you can see newly loaded content immediately.
Scroll the page.
Args:
direction: Scroll direction (up, down, left, right)
amount: Scroll amount in pixels (default: 500)
selector: Element to scroll (optional, scrolls page if not provided)
target_id: Tab ID (default: active tab)
tab_id: Chrome tab ID (default: active tab)
profile: Browser profile name (default: "default")
auto_snapshot: Include page snapshot in result (default: True)
Returns:
Dict with scroll result and optional snapshot
Dict with scroll result
"""
bridge = get_bridge()
if not bridge or not bridge.is_connected:
return {"ok": False, "error": "Browser extension not connected"}
ctx = _get_context(profile)
if not ctx:
return {"ok": False, "error": "Browser not started. Call browser_start first."}
target_tab = tab_id or ctx.get("activeTabId")
if target_tab is None:
return {"ok": False, "error": "No active tab"}
try:
session = get_session(profile)
page = session.get_page(target_id)
if not page:
return {"ok": False, "error": "No active tab"}
delta_x = 0
delta_y = 0
if direction == "down":
delta_y = amount
elif direction == "up":
delta_y = -amount
elif direction == "right":
delta_x = amount
elif direction == "left":
delta_x = -amount
if selector:
try:
selector = resolve_selector(selector, session, target_id)
except ValueError as e:
return {"ok": False, "error": str(e)}
element = await page.query_selector(selector)
if element:
await element.evaluate(f"e => e.scrollBy({delta_x}, {delta_y})")
else:
await page.mouse.wheel(delta_x, delta_y)
result: dict = {
"ok": True,
"action": "scroll",
"direction": direction,
"amount": amount,
}
if auto_snapshot:
snapshot = await _auto_snapshot(page, session=session, target_id=target_id)
if snapshot:
result["snapshot"] = snapshot
result["url"] = page.url
result = await bridge.scroll(target_tab, direction=direction, amount=amount)
return result
except PlaywrightError as e:
return {"ok": False, "error": f"Scroll failed: {e!s}"}
except Exception as e:
return {"ok": False, "error": str(e)}
@mcp.tool()
async def browser_drag(
start_selector: str,
end_selector: str,
target_id: str | None = None,
tab_id: int | None = None,
profile: str | None = None,
timeout_ms: int = DEFAULT_TIMEOUT_MS,
auto_snapshot: bool = True,
timeout_ms: int = 30000,
) -> dict:
"""
Drag from one element to another.
Returns an accessibility snapshot of the page after the drag.
Note: This is implemented via CDP mouse events and may not work
for all drag-and-drop scenarios (e.g., HTML5 drag-drop).
Args:
start_selector: CSS selector for drag start element
end_selector: CSS selector for drag end element
target_id: Tab ID (default: active tab)
tab_id: Chrome tab ID (default: active tab)
profile: Browser profile name (default: "default")
timeout_ms: Timeout in milliseconds (default: 30000)
auto_snapshot: Include page snapshot in result (default: True)
timeout_ms: Timeout waiting for elements (default: 30000)
Returns:
Dict with drag result and optional snapshot
Dict with drag result
"""
bridge = get_bridge()
if not bridge or not bridge.is_connected:
return {"ok": False, "error": "Browser extension not connected"}
ctx = _get_context(profile)
if not ctx:
return {"ok": False, "error": "Browser not started. Call browser_start first."}
target_tab = tab_id or ctx.get("activeTabId")
if target_tab is None:
return {"ok": False, "error": "No active tab"}
try:
session = get_session(profile)
page = session.get_page(target_id)
if not page:
return {"ok": False, "error": "No active tab"}
# Get coordinates for both elements and perform drag via CDP
await bridge.cdp_attach(target_tab)
await bridge._cdp(target_tab, "DOM.enable")
await bridge._cdp(target_tab, "Input.enable")
try:
start_selector = resolve_selector(start_selector, session, target_id)
end_selector = resolve_selector(end_selector, session, target_id)
except ValueError as e:
return {"ok": False, "error": str(e)}
doc = await bridge._cdp(target_tab, "DOM.getDocument")
root_id = doc.get("root", {}).get("nodeId")
await page.drag_and_drop(
start_selector,
end_selector,
timeout=timeout_ms,
deadline = asyncio.get_event_loop().time() + timeout_ms / 1000
start_node = None
while asyncio.get_event_loop().time() < deadline:
result = await bridge._cdp(
target_tab,
"DOM.querySelector",
{"nodeId": root_id, "selector": start_selector},
)
start_node = result.get("nodeId")
if start_node:
break
await asyncio.sleep(0.1)
if not start_node:
return {"ok": False, "error": f"Start element not found: {start_selector}"}
end_node = None
while asyncio.get_event_loop().time() < deadline:
result = await bridge._cdp(
target_tab,
"DOM.querySelector",
{"nodeId": root_id, "selector": end_selector},
)
end_node = result.get("nodeId")
if end_node:
break
await asyncio.sleep(0.1)
if not end_node:
return {"ok": False, "error": f"End element not found: {end_selector}"}
# Get box models
start_box = await bridge._cdp(
target_tab, "DOM.getBoxModel", {"nodeId": start_node}
)
result: dict = {
end_box = await bridge._cdp(
target_tab, "DOM.getBoxModel", {"nodeId": end_node}
)
sc = start_box.get("content", [])
ec = end_box.get("content", [])
start_x = (sc[0] + sc[2] + sc[4] + sc[6]) / 4
start_y = (sc[1] + sc[3] + sc[5] + sc[7]) / 4
end_x = (ec[0] + ec[2] + ec[4] + ec[6]) / 4
end_y = (ec[1] + ec[3] + ec[5] + ec[7]) / 4
# Perform drag: mouse down at start, move to end, mouse up
await bridge._cdp(
target_tab,
"Input.dispatchMouseEvent",
{
"type": "mousePressed",
"x": start_x,
"y": start_y,
"button": "left",
"clickCount": 1,
},
)
await bridge._cdp(
target_tab,
"Input.dispatchMouseEvent",
{"type": "mouseMoved", "x": end_x, "y": end_y},
)
await bridge._cdp(
target_tab,
"Input.dispatchMouseEvent",
{
"type": "mouseReleased",
"x": end_x,
"y": end_y,
"button": "left",
"clickCount": 1,
},
)
return {
"ok": True,
"action": "drag",
"from": start_selector,
"to": end_selector,
"fromCoords": {"x": start_x, "y": start_y},
"toCoords": {"x": end_x, "y": end_y},
}
if auto_snapshot:
snapshot = await _auto_snapshot(page, session=session, target_id=target_id)
if snapshot:
result["snapshot"] = snapshot
result["url"] = page.url
return result
except PlaywrightTimeout:
return {"ok": False, "error": "Element not found for drag operation"}
except PlaywrightError as e:
return {"ok": False, "error": f"Drag failed: {e!s}"}
except Exception as e:
return {"ok": False, "error": str(e)}
+142 -17
View File
@@ -1,10 +1,23 @@
"""
Browser lifecycle tools - start, stop, status.
These tools manage the browser context via the Beeline extension bridge.
No Playwright required - all operations go through the Chrome extension.
"""
from __future__ import annotations
import logging
from typing import Any
from fastmcp import FastMCP
from ..session import get_session
from ..bridge import get_bridge
logger = logging.getLogger(__name__)
# Track active contexts per profile
_contexts: dict[str, dict[str, Any]] = {}
def register_lifecycle_tools(mcp: FastMCP) -> None:
@@ -19,35 +32,118 @@ def register_lifecycle_tools(mcp: FastMCP) -> None:
profile: Browser profile name (default: "default")
Returns:
Dict with browser status (running, tabs count, active tab, persistent, cdp_port)
Dict with browser status
"""
session = get_session(profile)
return await session.status()
bridge = get_bridge()
if not bridge or not bridge.is_connected:
return {
"ok": False,
"error": "Browser extension not connected",
"connected": False,
}
profile_name = profile or "default"
ctx = _contexts.get(profile_name)
if ctx:
try:
tabs_result = await bridge.list_tabs(ctx.get("groupId"))
tabs = tabs_result.get("tabs", [])
return {
"ok": True,
"connected": True,
"profile": profile_name,
"running": True,
"groupId": ctx.get("groupId"),
"activeTab": ctx.get("activeTabId"),
"tabs": len(tabs),
}
except Exception as e:
return {
"ok": True,
"connected": True,
"profile": profile_name,
"running": False,
"error": str(e),
}
return {
"ok": True,
"connected": True,
"profile": profile_name,
"running": False,
"tabs": 0,
}
@mcp.tool()
async def browser_start(
profile: str | None = None,
) -> dict:
async def browser_start(profile: str | None = None) -> dict:
"""
Start the browser with a persistent profile.
Start a browser context for the given profile.
Browser data (cookies, localStorage, logins) persists at
~/.hive/agents/{agent}/browser/{profile}/
A CDP debugging port is allocated in range 18800-18899.
Creates a tab group in the user's Chrome via the Beeline extension.
No separate browser process is launched - uses the user's existing Chrome.
Args:
profile: Browser profile name (default: "default")
Returns:
Dict with start status, including user_data_dir and cdp_port
Dict with start status including groupId and initial tabId
"""
session = get_session(profile)
return await session.start(headless=False, persistent=True)
bridge = get_bridge()
if not bridge or not bridge.is_connected:
return {
"ok": False,
"error": (
"Browser extension not connected. "
"Install the Beeline extension and connect it."
),
}
profile_name = profile or "default"
# Check if already running
if profile_name in _contexts:
ctx = _contexts[profile_name]
return {
"ok": True,
"status": "already_running",
"profile": profile_name,
"groupId": ctx.get("groupId"),
"activeTabId": ctx.get("activeTabId"),
}
try:
result = await bridge.create_context(profile_name)
group_id = result.get("groupId")
tab_id = result.get("tabId")
_contexts[profile_name] = {
"groupId": group_id,
"activeTabId": tab_id,
}
logger.info(
"Started browser context '%s': groupId=%s, tabId=%s",
profile_name,
group_id,
tab_id,
)
return {
"ok": True,
"status": "started",
"profile": profile_name,
"groupId": group_id,
"activeTabId": tab_id,
}
except Exception as e:
logger.exception("Failed to start browser context")
return {"ok": False, "error": str(e)}
@mcp.tool()
async def browser_stop(profile: str | None = None) -> dict:
"""
Stop the browser and close all tabs.
Stop the browser context and close all tabs in the group.
Args:
profile: Browser profile name (default: "default")
@@ -55,5 +151,34 @@ def register_lifecycle_tools(mcp: FastMCP) -> None:
Returns:
Dict with stop status
"""
session = get_session(profile)
return await session.stop()
bridge = get_bridge()
if not bridge or not bridge.is_connected:
return {"ok": False, "error": "Browser extension not connected"}
profile_name = profile or "default"
ctx = _contexts.pop(profile_name, None)
if not ctx:
return {"ok": True, "status": "not_running", "profile": profile_name}
try:
group_id = ctx.get("groupId")
closed_tabs = 0
if group_id is not None:
result = await bridge.destroy_context(group_id)
closed_tabs = result.get("closedTabs", 0)
logger.info(
"Stopped browser context '%s': closed %d tabs",
profile_name,
closed_tabs,
)
return {
"ok": True,
"status": "stopped",
"profile": profile_name,
"closedTabs": closed_tabs,
}
except Exception as e:
logger.exception("Failed to stop browser context")
return {"ok": False, "error": str(e)}
+92 -59
View File
@@ -1,14 +1,20 @@
"""
Browser navigation tools - navigate, go_back, go_forward, reload.
All operations go through the Beeline extension via CDP.
"""
from fastmcp import FastMCP
from playwright.async_api import (
Error as PlaywrightError,
TimeoutError as PlaywrightTimeout,
)
from __future__ import annotations
from ..session import DEFAULT_NAVIGATION_TIMEOUT_MS, get_session
import logging
from typing import Literal
from fastmcp import FastMCP
from ..bridge import get_bridge
from .tabs import _get_context
logger = logging.getLogger(__name__)
def register_navigation_tools(mcp: FastMCP) -> None:
@@ -17,117 +23,144 @@ def register_navigation_tools(mcp: FastMCP) -> None:
@mcp.tool()
async def browser_navigate(
url: str,
target_id: str | None = None,
tab_id: int | None = None,
profile: str | None = None,
wait_until: str = "domcontentloaded",
wait_until: Literal["commit", "domcontentloaded", "load", "networkidle"] = "load",
) -> dict:
"""
Navigate the current tab to a URL.
Navigate a tab to a URL.
This tool already waits for the page to reach the ``wait_until``
condition (default: ``domcontentloaded``) before returning.
You do NOT need to call ``browser_wait`` afterward.
This tool waits for the page to reach the ``wait_until`` condition
before returning.
Args:
url: URL to navigate to
target_id: Tab ID to navigate (default: active tab)
tab_id: Chrome tab ID (default: active tab)
profile: Browser profile name (default: "default")
wait_until: Wait condition (domcontentloaded, load, networkidle)
wait_until: Wait condition - one of: commit, domcontentloaded,
load (default), networkidle
Returns:
Dict with navigation result (url, title)
"""
try:
session = get_session(profile)
page = session.get_page(target_id)
if not page:
return {"ok": False, "error": "No active tab"}
bridge = get_bridge()
if not bridge or not bridge.is_connected:
return {"ok": False, "error": "Browser extension not connected"}
await page.goto(url, wait_until=wait_until, timeout=DEFAULT_NAVIGATION_TIMEOUT_MS)
ctx = _get_context(profile)
if not ctx:
return {"ok": False, "error": "Browser not started. Call browser_start first."}
target_tab = tab_id or ctx.get("activeTabId")
if target_tab is None:
return {"ok": False, "error": "No active tab. Open a tab first with browser_open."}
try:
result = await bridge.navigate(target_tab, url, wait_until=wait_until)
return {
"ok": True,
"url": page.url,
"title": await page.title(),
"tabId": target_tab,
"url": result.get("url"),
"title": result.get("title"),
}
except PlaywrightTimeout:
return {"ok": False, "error": "Navigation timed out"}
except PlaywrightError as e:
return {"ok": False, "error": f"Browser error: {e!s}"}
except Exception as e:
return {"ok": False, "error": str(e)}
@mcp.tool()
async def browser_go_back(
target_id: str | None = None,
tab_id: int | None = None,
profile: str | None = None,
) -> dict:
"""
Navigate back in browser history.
Args:
target_id: Tab ID (default: active tab)
tab_id: Chrome tab ID (default: active tab)
profile: Browser profile name (default: "default")
Returns:
Dict with navigation result
"""
try:
session = get_session(profile)
page = session.get_page(target_id)
if not page:
return {"ok": False, "error": "No active tab"}
bridge = get_bridge()
if not bridge or not bridge.is_connected:
return {"ok": False, "error": "Browser extension not connected"}
await page.go_back()
return {"ok": True, "action": "back", "url": page.url}
except PlaywrightError as e:
return {"ok": False, "error": f"Go back failed: {e!s}"}
ctx = _get_context(profile)
if not ctx:
return {"ok": False, "error": "Browser not started. Call browser_start first."}
target_tab = tab_id or ctx.get("activeTabId")
if target_tab is None:
return {"ok": False, "error": "No active tab"}
try:
result = await bridge.go_back(target_tab)
return result
except Exception as e:
return {"ok": False, "error": str(e)}
@mcp.tool()
async def browser_go_forward(
target_id: str | None = None,
tab_id: int | None = None,
profile: str | None = None,
) -> dict:
"""
Navigate forward in browser history.
Args:
target_id: Tab ID (default: active tab)
tab_id: Chrome tab ID (default: active tab)
profile: Browser profile name (default: "default")
Returns:
Dict with navigation result
"""
try:
session = get_session(profile)
page = session.get_page(target_id)
if not page:
return {"ok": False, "error": "No active tab"}
bridge = get_bridge()
if not bridge or not bridge.is_connected:
return {"ok": False, "error": "Browser extension not connected"}
await page.go_forward()
return {"ok": True, "action": "forward", "url": page.url}
except PlaywrightError as e:
return {"ok": False, "error": f"Go forward failed: {e!s}"}
ctx = _get_context(profile)
if not ctx:
return {"ok": False, "error": "Browser not started. Call browser_start first."}
target_tab = tab_id or ctx.get("activeTabId")
if target_tab is None:
return {"ok": False, "error": "No active tab"}
try:
result = await bridge.go_forward(target_tab)
return result
except Exception as e:
return {"ok": False, "error": str(e)}
@mcp.tool()
async def browser_reload(
target_id: str | None = None,
tab_id: int | None = None,
profile: str | None = None,
) -> dict:
"""
Reload the current page.
Args:
target_id: Tab ID (default: active tab)
tab_id: Chrome tab ID (default: active tab)
profile: Browser profile name (default: "default")
Returns:
Dict with reload result
"""
try:
session = get_session(profile)
page = session.get_page(target_id)
if not page:
return {"ok": False, "error": "No active tab"}
bridge = get_bridge()
if not bridge or not bridge.is_connected:
return {"ok": False, "error": "Browser extension not connected"}
await page.reload()
return {"ok": True, "action": "reload", "url": page.url}
except PlaywrightError as e:
return {"ok": False, "error": f"Reload failed: {e!s}"}
ctx = _get_context(profile)
if not ctx:
return {"ok": False, "error": "Browser not started. Call browser_start first."}
target_tab = tab_id or ctx.get("activeTabId")
if target_tab is None:
return {"ok": False, "error": "No active tab"}
try:
result = await bridge.reload(target_tab)
return result
except Exception as e:
return {"ok": False, "error": str(e)}
+173 -100
View File
@@ -1,14 +1,26 @@
"""
Browser tab management tools - tabs, open, close, focus.
All operations go through the Beeline extension - no Playwright required.
"""
from fastmcp import FastMCP
from playwright.async_api import (
Error as PlaywrightError,
TimeoutError as PlaywrightTimeout,
)
from __future__ import annotations
from ..session import get_session
import logging
from typing import Any
from fastmcp import FastMCP
from ..bridge import get_bridge
from .lifecycle import _contexts
logger = logging.getLogger(__name__)
def _get_context(profile: str | None = None) -> dict[str, Any] | None:
"""Get the context for a profile."""
profile_name = profile or "default"
return _contexts.get(profile_name)
def register_tab_tools(mcp: FastMCP) -> None:
@@ -17,170 +29,231 @@ def register_tab_tools(mcp: FastMCP) -> None:
@mcp.tool()
async def browser_tabs(profile: str | None = None) -> dict:
"""
List all open browser tabs with origin and age metadata.
List all open browser tabs in the agent's tab group.
Each tab includes:
- ``targetId``: Unique tab identifier
- ``id``: Chrome tab ID (integer)
- ``url``: Current URL
- ``title``: Page title
- ``active``: Whether this is the active tab
- ``origin``: Who opened the tab ``"agent"`` (you opened it),
``"popup"`` (opened by a link/script), ``"startup"`` (initial
browser tab), or ``"user"`` (opened externally)
- ``age_seconds``: How long the tab has been open
The response also includes summary counts: ``total``,
``agent_count``, and ``popup_count``.
- ``groupId``: Chrome tab group ID
Args:
profile: Browser profile name (default: "default")
Returns:
Dict with list of tabs and summary counts
Dict with list of tabs and counts
"""
session = get_session(profile)
tabs = await session.list_tabs()
agent_count = sum(1 for t in tabs if t.get("origin") == "agent")
popup_count = sum(1 for t in tabs if t.get("origin") == "popup")
return {
"ok": True,
"tabs": tabs,
"total": len(tabs),
"agent_count": agent_count,
"popup_count": popup_count,
}
bridge = get_bridge()
if not bridge or not bridge.is_connected:
return {"ok": False, "error": "Browser extension not connected"}
ctx = _get_context(profile)
if not ctx:
return {"ok": False, "error": "Browser not started. Call browser_start first."}
try:
result = await bridge.list_tabs(ctx.get("groupId"))
tabs = result.get("tabs", [])
return {
"ok": True,
"tabs": tabs,
"total": len(tabs),
"activeTabId": ctx.get("activeTabId"),
}
except Exception as e:
return {"ok": False, "error": str(e)}
@mcp.tool()
async def browser_open(
url: str,
background: bool = False,
profile: str | None = None,
wait_until: str = "load",
) -> dict:
"""
Open a new browser tab and navigate to the given URL.
This tool already waits for the page to reach the ``wait_until``
condition (default: ``load``) before returning.
You do NOT need to call ``browser_wait`` afterward.
The tab is automatically added to the agent's tab group.
This tool waits for the page to load before returning.
Args:
url: URL to navigate to
background: Open in background without stealing focus
from the current tab (default: False)
background: Open in background without stealing focus (default: False)
profile: Browser profile name (default: "default")
wait_until: Wait condition - "commit",
"domcontentloaded", "load" (default),
or "networkidle"
Returns:
Dict with new tab info (targetId, url, title, background)
Dict with new tab info (id, url, title)
"""
bridge = get_bridge()
if not bridge or not bridge.is_connected:
return {"ok": False, "error": "Browser extension not connected"}
ctx = _get_context(profile)
if not ctx:
return {"ok": False, "error": "Browser not started. Call browser_start first."}
try:
session = get_session(profile)
return await session.open_tab(url, background=background, wait_until=wait_until)
except ValueError as e:
# Create tab in the group
result = await bridge.create_tab(url=url, group_id=ctx.get("groupId"))
tab_id = result.get("tabId")
# Update active tab if not background
if not background and tab_id is not None:
ctx["activeTabId"] = tab_id
await bridge.activate_tab(tab_id)
# Navigate and wait for load
nav_result = await bridge.navigate(tab_id, url, wait_until="load")
return {
"ok": True,
"tabId": tab_id,
"url": nav_result.get("url", url),
"title": nav_result.get("title", ""),
"background": background,
}
except Exception as e:
return {"ok": False, "error": str(e)}
except PlaywrightTimeout:
return {"ok": False, "error": "Navigation timed out"}
except PlaywrightError as e:
return {"ok": False, "error": f"Browser error: {e!s}"}
@mcp.tool()
async def browser_close(target_id: str | None = None, profile: str | None = None) -> dict:
async def browser_close(
tab_id: int | None = None,
profile: str | None = None,
) -> dict:
"""
Close a browser tab.
Args:
target_id: Tab ID to close (default: active tab)
tab_id: Chrome tab ID to close (default: active tab)
profile: Browser profile name (default: "default")
Returns:
Dict with close status
"""
session = get_session(profile)
return await session.close_tab(target_id)
bridge = get_bridge()
if not bridge or not bridge.is_connected:
return {"ok": False, "error": "Browser extension not connected"}
ctx = _get_context(profile)
if not ctx:
return {"ok": False, "error": "Browser not started. Call browser_start first."}
# Use active tab if not specified
target_tab = tab_id or ctx.get("activeTabId")
if target_tab is None:
return {"ok": False, "error": "No tab to close"}
try:
await bridge.close_tab(target_tab)
# Update active tab if we closed it
if ctx.get("activeTabId") == target_tab:
result = await bridge.list_tabs(ctx.get("groupId"))
tabs = result.get("tabs", [])
ctx["activeTabId"] = tabs[0].get("id") if tabs else None
return {"ok": True, "closed": target_tab}
except Exception as e:
return {"ok": False, "error": str(e)}
@mcp.tool()
async def browser_focus(target_id: str, profile: str | None = None) -> dict:
async def browser_focus(tab_id: int, profile: str | None = None) -> dict:
"""
Focus a browser tab.
Args:
target_id: Tab ID to focus
tab_id: Chrome tab ID to focus
profile: Browser profile name (default: "default")
Returns:
Dict with focus status
"""
session = get_session(profile)
return await session.focus_tab(target_id)
bridge = get_bridge()
if not bridge or not bridge.is_connected:
return {"ok": False, "error": "Browser extension not connected"}
ctx = _get_context(profile)
if not ctx:
return {"ok": False, "error": "Browser not started. Call browser_start first."}
try:
await bridge.activate_tab(tab_id)
ctx["activeTabId"] = tab_id
return {"ok": True, "tabId": tab_id}
except Exception as e:
return {"ok": False, "error": str(e)}
@mcp.tool()
async def browser_close_all(keep_active: bool = True, profile: str | None = None) -> dict:
async def browser_close_all(
keep_active: bool = True,
profile: str | None = None,
) -> dict:
"""
Close all browser tabs, optionally keeping the active tab.
Close all browser tabs in the agent's group, optionally keeping active.
Args:
keep_active: If True (default), keep the active tab open.
If False, close ALL tabs (browser remains running).
If False, close ALL tabs (group remains but empty).
profile: Browser profile name (default: "default")
Returns:
Dict with number of closed tabs and remaining count
"""
session = get_session(profile)
to_close = [
tid
for tid in list(session.pages.keys())
if not (keep_active and tid == session.active_page_id)
]
closed = 0
for tid in to_close:
result = await session.close_tab(tid)
if result.get("ok"):
closed += 1
return {"ok": True, "closed_count": closed, "remaining": len(session.pages)}
bridge = get_bridge()
if not bridge or not bridge.is_connected:
return {"ok": False, "error": "Browser extension not connected"}
ctx = _get_context(profile)
if not ctx:
return {"ok": False, "error": "Browser not started. Call browser_start first."}
try:
result = await bridge.list_tabs(ctx.get("groupId"))
tabs = result.get("tabs", [])
active_tab_id = ctx.get("activeTabId")
closed = 0
for tab in tabs:
tid = tab.get("id")
if keep_active and tid == active_tab_id:
continue
try:
await bridge.close_tab(tid)
closed += 1
except Exception:
pass
# Update active tab
if not keep_active:
ctx["activeTabId"] = None
else:
result = await bridge.list_tabs(ctx.get("groupId"))
remaining = result.get("tabs", [])
ctx["activeTabId"] = remaining[0].get("id") if remaining else None
return {
"ok": True,
"closed_count": closed,
"remaining": len(tabs) - closed,
}
except Exception as e:
return {"ok": False, "error": str(e)}
@mcp.tool()
async def browser_close_finished(keep_active: bool = True, profile: str | None = None) -> dict:
async def browser_close_finished(
keep_active: bool = True,
profile: str | None = None,
) -> dict:
"""
Close all agent-opened and popup tabs that you are done with.
Close all tabs except the active one.
This is the preferred cleanup tool during and after multi-tab tasks.
It only closes tabs with ``origin="agent"`` or ``origin="popup"``,
leaving ``"startup"`` and ``"user"`` tabs untouched.
Use this instead of ``browser_close_all`` when you want to clean up
your own tabs without disturbing tabs the user may have open.
This is a convenience wrapper around browser_close_all.
Args:
keep_active: If True (default), skip closing the active tab even
if it is agent- or popup-owned. Set to False to close it too.
keep_active: If True (default), keep the active tab open.
profile: Browser profile name (default: "default")
Returns:
Dict with closed_count, skipped_count, and remaining tab count
"""
session = get_session(profile)
closeable_origins = {"agent", "popup"}
to_close = [
tid
for tid, meta in session.page_meta.items()
if meta.origin in closeable_origins
and not (keep_active and tid == session.active_page_id)
]
closed = 0
skipped = 0
for tid in to_close:
result = await session.close_tab(tid)
if result.get("ok"):
closed += 1
else:
skipped += 1
return {
"ok": True,
"closed_count": closed,
"skipped_count": skipped,
"remaining": len(session.pages),
}
return await browser_close_all(keep_active=keep_active, profile=profile)
+9 -1
View File
@@ -68,12 +68,20 @@ from gcu import register_gcu_tools # noqa: E402
@asynccontextmanager
async def _lifespan(server: FastMCP) -> AsyncIterator[dict]:
"""FastMCP lifespan hook: clean up all browsers on shutdown."""
"""FastMCP lifespan hook: start the Beeline bridge, clean up on shutdown."""
from gcu.browser.bridge import init_bridge
bridge = init_bridge()
bridge_port = int(os.getenv("HIVE_BRIDGE_PORT", "9229"))
await bridge.start(port=bridge_port)
yield {}
from gcu.browser.session import shutdown_all_browsers
logger.info("Server shutting down, cleaning up browser sessions...")
await shutdown_all_browsers()
await bridge.stop()
def _sync_shutdown() -> None:
Generated
+2 -99
View File
@@ -1145,58 +1145,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/c4/ab/09169d5a4612a5f92490806649ac8d41e3ec9129c636754575b3553f4ea4/googleapis_common_protos-1.72.0-py3-none-any.whl", hash = "sha256:4299c5a82d5ae1a9702ada957347726b167f9f8d1fc352477702a1e851ff4038", size = 297515, upload-time = "2025-11-06T18:29:13.14Z" },
]
[[package]]
name = "greenlet"
version = "3.3.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/8a/99/1cd3411c56a410994669062bd73dd58270c00cc074cac15f385a1fd91f8a/greenlet-3.3.1.tar.gz", hash = "sha256:41848f3230b58c08bb43dee542e74a2a2e34d3c59dc3076cec9151aeeedcae98", size = 184690, upload-time = "2026-01-23T15:31:02.076Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/ec/e8/2e1462c8fdbe0f210feb5ac7ad2d9029af8be3bf45bd9fa39765f821642f/greenlet-3.3.1-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:5fd23b9bc6d37b563211c6abbb1b3cab27db385a4449af5c32e932f93017080c", size = 274974, upload-time = "2026-01-23T15:31:02.891Z" },
{ url = "https://files.pythonhosted.org/packages/7e/a8/530a401419a6b302af59f67aaf0b9ba1015855ea7e56c036b5928793c5bd/greenlet-3.3.1-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:09f51496a0bfbaa9d74d36a52d2580d1ef5ed4fdfcff0a73730abfbbbe1403dd", size = 577175, upload-time = "2026-01-23T16:00:56.213Z" },
{ url = "https://files.pythonhosted.org/packages/8e/89/7e812bb9c05e1aaef9b597ac1d0962b9021d2c6269354966451e885c4e6b/greenlet-3.3.1-cp311-cp311-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:cb0feb07fe6e6a74615ee62a880007d976cf739b6669cce95daa7373d4fc69c5", size = 590401, upload-time = "2026-01-23T16:05:26.365Z" },
{ url = "https://files.pythonhosted.org/packages/70/ae/e2d5f0e59b94a2269b68a629173263fa40b63da32f5c231307c349315871/greenlet-3.3.1-cp311-cp311-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:67ea3fc73c8cd92f42467a72b75e8f05ed51a0e9b1d15398c913416f2dafd49f", size = 601161, upload-time = "2026-01-23T16:15:53.456Z" },
{ url = "https://files.pythonhosted.org/packages/5c/ae/8d472e1f5ac5efe55c563f3eabb38c98a44b832602e12910750a7c025802/greenlet-3.3.1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:39eda9ba259cc9801da05351eaa8576e9aa83eb9411e8f0c299e05d712a210f2", size = 590272, upload-time = "2026-01-23T15:32:49.411Z" },
{ url = "https://files.pythonhosted.org/packages/a8/51/0fde34bebfcadc833550717eade64e35ec8738e6b097d5d248274a01258b/greenlet-3.3.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:e2e7e882f83149f0a71ac822ebf156d902e7a5d22c9045e3e0d1daf59cee2cc9", size = 1550729, upload-time = "2026-01-23T16:04:20.867Z" },
{ url = "https://files.pythonhosted.org/packages/16/c9/2fb47bee83b25b119d5a35d580807bb8b92480a54b68fef009a02945629f/greenlet-3.3.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:80aa4d79eb5564f2e0a6144fcc744b5a37c56c4a92d60920720e99210d88db0f", size = 1615552, upload-time = "2026-01-23T15:33:45.743Z" },
{ url = "https://files.pythonhosted.org/packages/1f/54/dcf9f737b96606f82f8dd05becfb8d238db0633dd7397d542a296fe9cad3/greenlet-3.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:32e4ca9777c5addcbf42ff3915d99030d8e00173a56f80001fb3875998fe410b", size = 226462, upload-time = "2026-01-23T15:36:50.422Z" },
{ url = "https://files.pythonhosted.org/packages/91/37/61e1015cf944ddd2337447d8e97fb423ac9bc21f9963fb5f206b53d65649/greenlet-3.3.1-cp311-cp311-win_arm64.whl", hash = "sha256:da19609432f353fed186cc1b85e9440db93d489f198b4bdf42ae19cc9d9ac9b4", size = 225715, upload-time = "2026-01-23T15:33:17.298Z" },
{ url = "https://files.pythonhosted.org/packages/f9/c8/9d76a66421d1ae24340dfae7e79c313957f6e3195c144d2c73333b5bfe34/greenlet-3.3.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:7e806ca53acf6d15a888405880766ec84721aa4181261cd11a457dfe9a7a4975", size = 276443, upload-time = "2026-01-23T15:30:10.066Z" },
{ url = "https://files.pythonhosted.org/packages/81/99/401ff34bb3c032d1f10477d199724f5e5f6fbfb59816ad1455c79c1eb8e7/greenlet-3.3.1-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d842c94b9155f1c9b3058036c24ffb8ff78b428414a19792b2380be9cecf4f36", size = 597359, upload-time = "2026-01-23T16:00:57.394Z" },
{ url = "https://files.pythonhosted.org/packages/2b/bc/4dcc0871ed557792d304f50be0f7487a14e017952ec689effe2180a6ff35/greenlet-3.3.1-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:20fedaadd422fa02695f82093f9a98bad3dab5fcda793c658b945fcde2ab27ba", size = 607805, upload-time = "2026-01-23T16:05:28.068Z" },
{ url = "https://files.pythonhosted.org/packages/3b/cd/7a7ca57588dac3389e97f7c9521cb6641fd8b6602faf1eaa4188384757df/greenlet-3.3.1-cp312-cp312-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c620051669fd04ac6b60ebc70478210119c56e2d5d5df848baec4312e260e4ca", size = 622363, upload-time = "2026-01-23T16:15:54.754Z" },
{ url = "https://files.pythonhosted.org/packages/cf/05/821587cf19e2ce1f2b24945d890b164401e5085f9d09cbd969b0c193cd20/greenlet-3.3.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:14194f5f4305800ff329cbf02c5fcc88f01886cadd29941b807668a45f0d2336", size = 609947, upload-time = "2026-01-23T15:32:51.004Z" },
{ url = "https://files.pythonhosted.org/packages/a4/52/ee8c46ed9f8babaa93a19e577f26e3d28a519feac6350ed6f25f1afee7e9/greenlet-3.3.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:7b2fe4150a0cf59f847a67db8c155ac36aed89080a6a639e9f16df5d6c6096f1", size = 1567487, upload-time = "2026-01-23T16:04:22.125Z" },
{ url = "https://files.pythonhosted.org/packages/8f/7c/456a74f07029597626f3a6db71b273a3632aecb9afafeeca452cfa633197/greenlet-3.3.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:49f4ad195d45f4a66a0eb9c1ba4832bb380570d361912fa3554746830d332149", size = 1636087, upload-time = "2026-01-23T15:33:47.486Z" },
{ url = "https://files.pythonhosted.org/packages/34/2f/5e0e41f33c69655300a5e54aeb637cf8ff57f1786a3aba374eacc0228c1d/greenlet-3.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:cc98b9c4e4870fa983436afa999d4eb16b12872fab7071423d5262fa7120d57a", size = 227156, upload-time = "2026-01-23T15:34:34.808Z" },
{ url = "https://files.pythonhosted.org/packages/c8/ab/717c58343cf02c5265b531384b248787e04d8160b8afe53d9eec053d7b44/greenlet-3.3.1-cp312-cp312-win_arm64.whl", hash = "sha256:bfb2d1763d777de5ee495c85309460f6fd8146e50ec9d0ae0183dbf6f0a829d1", size = 226403, upload-time = "2026-01-23T15:31:39.372Z" },
{ url = "https://files.pythonhosted.org/packages/ec/ab/d26750f2b7242c2b90ea2ad71de70cfcd73a948a49513188a0fc0d6fc15a/greenlet-3.3.1-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:7ab327905cabb0622adca5971e488064e35115430cec2c35a50fd36e72a315b3", size = 275205, upload-time = "2026-01-23T15:30:24.556Z" },
{ url = "https://files.pythonhosted.org/packages/10/d3/be7d19e8fad7c5a78eeefb2d896a08cd4643e1e90c605c4be3b46264998f/greenlet-3.3.1-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:65be2f026ca6a176f88fb935ee23c18333ccea97048076aef4db1ef5bc0713ac", size = 599284, upload-time = "2026-01-23T16:00:58.584Z" },
{ url = "https://files.pythonhosted.org/packages/ae/21/fe703aaa056fdb0f17e5afd4b5c80195bbdab701208918938bd15b00d39b/greenlet-3.3.1-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7a3ae05b3d225b4155bda56b072ceb09d05e974bc74be6c3fc15463cf69f33fd", size = 610274, upload-time = "2026-01-23T16:05:29.312Z" },
{ url = "https://files.pythonhosted.org/packages/06/00/95df0b6a935103c0452dad2203f5be8377e551b8466a29650c4c5a5af6cc/greenlet-3.3.1-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:12184c61e5d64268a160226fb4818af4df02cfead8379d7f8b99a56c3a54ff3e", size = 624375, upload-time = "2026-01-23T16:15:55.915Z" },
{ url = "https://files.pythonhosted.org/packages/cb/86/5c6ab23bb3c28c21ed6bebad006515cfe08b04613eb105ca0041fecca852/greenlet-3.3.1-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6423481193bbbe871313de5fd06a082f2649e7ce6e08015d2a76c1e9186ca5b3", size = 612904, upload-time = "2026-01-23T15:32:52.317Z" },
{ url = "https://files.pythonhosted.org/packages/c2/f3/7949994264e22639e40718c2daf6f6df5169bf48fb038c008a489ec53a50/greenlet-3.3.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:33a956fe78bbbda82bfc95e128d61129b32d66bcf0a20a1f0c08aa4839ffa951", size = 1567316, upload-time = "2026-01-23T16:04:23.316Z" },
{ url = "https://files.pythonhosted.org/packages/8d/6e/d73c94d13b6465e9f7cd6231c68abde838bb22408596c05d9059830b7872/greenlet-3.3.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4b065d3284be43728dd280f6f9a13990b56470b81be20375a207cdc814a983f2", size = 1636549, upload-time = "2026-01-23T15:33:48.643Z" },
{ url = "https://files.pythonhosted.org/packages/5e/b3/c9c23a6478b3bcc91f979ce4ca50879e4d0b2bd7b9a53d8ecded719b92e2/greenlet-3.3.1-cp313-cp313-win_amd64.whl", hash = "sha256:27289986f4e5b0edec7b5a91063c109f0276abb09a7e9bdab08437525977c946", size = 227042, upload-time = "2026-01-23T15:33:58.216Z" },
{ url = "https://files.pythonhosted.org/packages/90/e7/824beda656097edee36ab15809fd063447b200cc03a7f6a24c34d520bc88/greenlet-3.3.1-cp313-cp313-win_arm64.whl", hash = "sha256:2f080e028001c5273e0b42690eaf359aeef9cb1389da0f171ea51a5dc3c7608d", size = 226294, upload-time = "2026-01-23T15:30:52.73Z" },
{ url = "https://files.pythonhosted.org/packages/ae/fb/011c7c717213182caf78084a9bea51c8590b0afda98001f69d9f853a495b/greenlet-3.3.1-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:bd59acd8529b372775cd0fcbc5f420ae20681c5b045ce25bd453ed8455ab99b5", size = 275737, upload-time = "2026-01-23T15:32:16.889Z" },
{ url = "https://files.pythonhosted.org/packages/41/2e/a3a417d620363fdbb08a48b1dd582956a46a61bf8fd27ee8164f9dfe87c2/greenlet-3.3.1-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b31c05dd84ef6871dd47120386aed35323c944d86c3d91a17c4b8d23df62f15b", size = 646422, upload-time = "2026-01-23T16:01:00.354Z" },
{ url = "https://files.pythonhosted.org/packages/b4/09/c6c4a0db47defafd2d6bab8ddfe47ad19963b4e30f5bed84d75328059f8c/greenlet-3.3.1-cp314-cp314-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:02925a0bfffc41e542c70aa14c7eda3593e4d7e274bfcccca1827e6c0875902e", size = 658219, upload-time = "2026-01-23T16:05:30.956Z" },
{ url = "https://files.pythonhosted.org/packages/e2/89/b95f2ddcc5f3c2bc09c8ee8d77be312df7f9e7175703ab780f2014a0e781/greenlet-3.3.1-cp314-cp314-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3e0f3878ca3a3ff63ab4ea478585942b53df66ddde327b59ecb191b19dbbd62d", size = 671455, upload-time = "2026-01-23T16:15:57.232Z" },
{ url = "https://files.pythonhosted.org/packages/80/38/9d42d60dffb04b45f03dbab9430898352dba277758640751dc5cc316c521/greenlet-3.3.1-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:34a729e2e4e4ffe9ae2408d5ecaf12f944853f40ad724929b7585bca808a9d6f", size = 660237, upload-time = "2026-01-23T15:32:53.967Z" },
{ url = "https://files.pythonhosted.org/packages/96/61/373c30b7197f9e756e4c81ae90a8d55dc3598c17673f91f4d31c3c689c3f/greenlet-3.3.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:aec9ab04e82918e623415947921dea15851b152b822661cce3f8e4393c3df683", size = 1615261, upload-time = "2026-01-23T16:04:25.066Z" },
{ url = "https://files.pythonhosted.org/packages/fd/d3/ca534310343f5945316f9451e953dcd89b36fe7a19de652a1dc5a0eeef3f/greenlet-3.3.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:71c767cf281a80d02b6c1bdc41c9468e1f5a494fb11bc8688c360524e273d7b1", size = 1683719, upload-time = "2026-01-23T15:33:50.61Z" },
{ url = "https://files.pythonhosted.org/packages/52/cb/c21a3fd5d2c9c8b622e7bede6d6d00e00551a5ee474ea6d831b5f567a8b4/greenlet-3.3.1-cp314-cp314-win_amd64.whl", hash = "sha256:96aff77af063b607f2489473484e39a0bbae730f2ea90c9e5606c9b73c44174a", size = 228125, upload-time = "2026-01-23T15:32:45.265Z" },
{ url = "https://files.pythonhosted.org/packages/6a/8e/8a2db6d11491837af1de64b8aff23707c6e85241be13c60ed399a72e2ef8/greenlet-3.3.1-cp314-cp314-win_arm64.whl", hash = "sha256:b066e8b50e28b503f604fa538adc764a638b38cf8e81e025011d26e8a627fa79", size = 227519, upload-time = "2026-01-23T15:31:47.284Z" },
{ url = "https://files.pythonhosted.org/packages/28/24/cbbec49bacdcc9ec652a81d3efef7b59f326697e7edf6ed775a5e08e54c2/greenlet-3.3.1-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:3e63252943c921b90abb035ebe9de832c436401d9c45f262d80e2d06cc659242", size = 282706, upload-time = "2026-01-23T15:33:05.525Z" },
{ url = "https://files.pythonhosted.org/packages/86/2e/4f2b9323c144c4fe8842a4e0d92121465485c3c2c5b9e9b30a52e80f523f/greenlet-3.3.1-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:76e39058e68eb125de10c92524573924e827927df5d3891fbc97bd55764a8774", size = 651209, upload-time = "2026-01-23T16:01:01.517Z" },
{ url = "https://files.pythonhosted.org/packages/d9/87/50ca60e515f5bb55a2fbc5f0c9b5b156de7d2fc51a0a69abc9d23914a237/greenlet-3.3.1-cp314-cp314t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c9f9d5e7a9310b7a2f416dd13d2e3fd8b42d803968ea580b7c0f322ccb389b97", size = 654300, upload-time = "2026-01-23T16:05:32.199Z" },
{ url = "https://files.pythonhosted.org/packages/7c/25/c51a63f3f463171e09cb586eb64db0861eb06667ab01a7968371a24c4f3b/greenlet-3.3.1-cp314-cp314t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4b9721549a95db96689458a1e0ae32412ca18776ed004463df3a9299c1b257ab", size = 662574, upload-time = "2026-01-23T16:15:58.364Z" },
{ url = "https://files.pythonhosted.org/packages/1d/94/74310866dfa2b73dd08659a3d18762f83985ad3281901ba0ee9a815194fb/greenlet-3.3.1-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:92497c78adf3ac703b57f1e3813c2d874f27f71a178f9ea5887855da413cd6d2", size = 653842, upload-time = "2026-01-23T15:32:55.671Z" },
{ url = "https://files.pythonhosted.org/packages/97/43/8bf0ffa3d498eeee4c58c212a3905dd6146c01c8dc0b0a046481ca29b18c/greenlet-3.3.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:ed6b402bc74d6557a705e197d47f9063733091ed6357b3de33619d8a8d93ac53", size = 1614917, upload-time = "2026-01-23T16:04:26.276Z" },
{ url = "https://files.pythonhosted.org/packages/89/90/a3be7a5f378fc6e84abe4dcfb2ba32b07786861172e502388b4c90000d1b/greenlet-3.3.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:59913f1e5ada20fde795ba906916aea25d442abcc0593fba7e26c92b7ad76249", size = 1676092, upload-time = "2026-01-23T15:33:52.176Z" },
{ url = "https://files.pythonhosted.org/packages/e1/2b/98c7f93e6db9977aaee07eb1e51ca63bd5f779b900d362791d3252e60558/greenlet-3.3.1-cp314-cp314t-win_amd64.whl", hash = "sha256:301860987846c24cb8964bdec0e31a96ad4a2a801b41b4ef40963c1b44f33451", size = 233181, upload-time = "2026-01-23T15:33:00.29Z" },
]
[[package]]
name = "grpcio"
version = "1.78.0"
@@ -2261,37 +2209,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/cb/28/3bfe2fa5a7b9c46fe7e13c97bda14c895fb10fa2ebf1d0abb90e0cea7ee1/platformdirs-4.5.1-py3-none-any.whl", hash = "sha256:d03afa3963c806a9bed9d5125c8f4cb2fdaf74a55ab60e5d59b3fde758104d31", size = 18731, upload-time = "2025-12-05T13:52:56.823Z" },
]
[[package]]
name = "playwright"
version = "1.58.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "greenlet" },
{ name = "pyee" },
]
wheels = [
{ url = "https://files.pythonhosted.org/packages/f8/c9/9c6061d5703267f1baae6a4647bfd1862e386fbfdb97d889f6f6ae9e3f64/playwright-1.58.0-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:96e3204aac292ee639edbfdef6298b4be2ea0a55a16b7068df91adac077cc606", size = 42251098, upload-time = "2026-01-30T15:09:24.028Z" },
{ url = "https://files.pythonhosted.org/packages/e0/40/59d34a756e02f8c670f0fee987d46f7ee53d05447d43cd114ca015cb168c/playwright-1.58.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:70c763694739d28df71ed578b9c8202bb83e8fe8fb9268c04dd13afe36301f71", size = 41039625, upload-time = "2026-01-30T15:09:27.558Z" },
{ url = "https://files.pythonhosted.org/packages/e1/ee/3ce6209c9c74a650aac9028c621f357a34ea5cd4d950700f8e2c4b7fe2c4/playwright-1.58.0-py3-none-macosx_11_0_universal2.whl", hash = "sha256:185e0132578733d02802dfddfbbc35f42be23a45ff49ccae5081f25952238117", size = 42251098, upload-time = "2026-01-30T15:09:30.461Z" },
{ url = "https://files.pythonhosted.org/packages/f1/af/009958cbf23fac551a940d34e3206e6c7eed2b8c940d0c3afd1feb0b0589/playwright-1.58.0-py3-none-manylinux1_x86_64.whl", hash = "sha256:c95568ba1eda83812598c1dc9be60b4406dffd60b149bc1536180ad108723d6b", size = 46235268, upload-time = "2026-01-30T15:09:33.787Z" },
{ url = "https://files.pythonhosted.org/packages/d9/a6/0e66ad04b6d3440dae73efb39540c5685c5fc95b17c8b29340b62abbd952/playwright-1.58.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f9999948f1ab541d98812de25e3a8c410776aa516d948807140aff797b4bffa", size = 45964214, upload-time = "2026-01-30T15:09:36.751Z" },
{ url = "https://files.pythonhosted.org/packages/0e/4b/236e60ab9f6d62ed0fd32150d61f1f494cefbf02304c0061e78ed80c1c32/playwright-1.58.0-py3-none-win32.whl", hash = "sha256:1e03be090e75a0fabbdaeab65ce17c308c425d879fa48bb1d7986f96bfad0b99", size = 36815998, upload-time = "2026-01-30T15:09:39.627Z" },
{ url = "https://files.pythonhosted.org/packages/41/f8/5ec599c5e59d2f2f336a05b4f318e733077cd5044f24adb6f86900c3e6a7/playwright-1.58.0-py3-none-win_amd64.whl", hash = "sha256:a2bf639d0ce33b3ba38de777e08697b0d8f3dc07ab6802e4ac53fb65e3907af8", size = 36816005, upload-time = "2026-01-30T15:09:42.449Z" },
{ url = "https://files.pythonhosted.org/packages/c8/c4/cc0229fea55c87d6c9c67fe44a21e2cd28d1d558a5478ed4d617e9fb0c93/playwright-1.58.0-py3-none-win_arm64.whl", hash = "sha256:32ffe5c303901a13a0ecab91d1c3f74baf73b84f4bedbb6b935f5bc11cc98e1b", size = 33085919, upload-time = "2026-01-30T15:09:45.71Z" },
]
[[package]]
name = "playwright-stealth"
version = "2.0.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "playwright" },
]
sdist = { url = "https://files.pythonhosted.org/packages/65/f4/57d20b4c26b8639d87a72f241e7d3279ff627554d95fd1ff42f87db3c2f3/playwright_stealth-2.0.1.tar.gz", hash = "sha256:a36f735d61469c12bda179b58d5fc4228bbee61c9cf5b1343b1497a5fd51ec1a", size = 24897, upload-time = "2026-01-17T05:06:35.924Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/4e/0a/1c4a6677dcf05daf28a911ecefedba33187c45a712409fc1474f38bfe724/playwright_stealth-2.0.1-py3-none-any.whl", hash = "sha256:3905776f45f175057dd9d7d1639280b8d639822580f15a01a2f9e7c35bff40af", size = 33206, upload-time = "2026-01-17T05:06:35.088Z" },
]
[[package]]
name = "pluggy"
version = "1.6.0"
@@ -2721,18 +2638,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/14/98/73427d065c067a99de6afbe24df3d90cf20d63152ceb42edff2b6e829d4c/pydocket-0.17.5-py3-none-any.whl", hash = "sha256:544d7c2625a33e52528ac24db25794841427dfc2cf30b9c558ac387c77746241", size = 93355, upload-time = "2026-01-30T18:44:37.972Z" },
]
[[package]]
name = "pyee"
version = "13.0.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "typing-extensions" },
]
sdist = { url = "https://files.pythonhosted.org/packages/95/03/1fd98d5841cd7964a27d729ccf2199602fe05eb7a405c1462eb7277945ed/pyee-13.0.0.tar.gz", hash = "sha256:b391e3c5a434d1f5118a25615001dbc8f669cf410ab67d04c4d4e07c55481c37", size = 31250, upload-time = "2025-03-17T18:53:15.955Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/9b/4d/b9add7c84060d4c1906abe9a7e5359f2a60f7a9a4f67268b2766673427d8/pyee-13.0.0-py3-none-any.whl", hash = "sha256:48195a3cddb3b1515ce0695ed76036b5ccc2ef3a9f963ff9f77aec0139845498", size = 15730, upload-time = "2025-03-17T18:53:14.532Z" },
]
[[package]]
name = "pygments"
version = "2.19.2"
@@ -3498,8 +3403,6 @@ dependencies = [
{ name = "jsonpath-ng" },
{ name = "litellm" },
{ name = "pandas" },
{ name = "playwright" },
{ name = "playwright-stealth" },
{ name = "psycopg2-binary" },
{ name = "pydantic" },
{ name = "pypdf" },
@@ -3507,6 +3410,7 @@ dependencies = [
{ name = "requests" },
{ name = "resend" },
{ name = "stripe" },
{ name = "websockets" },
]
[package.optional-dependencies]
@@ -3582,8 +3486,6 @@ requires-dist = [
{ name = "pillow", marker = "extra == 'all'", specifier = ">=10.0.0" },
{ name = "pillow", marker = "extra == 'browser'", specifier = ">=10.0.0" },
{ name = "pillow", marker = "extra == 'ocr'", specifier = ">=10.0.0" },
{ name = "playwright", specifier = ">=1.40.0" },
{ name = "playwright-stealth", specifier = ">=1.0.5" },
{ name = "psycopg2-binary", specifier = ">=2.9.0" },
{ name = "pydantic", specifier = ">=2.0.0" },
{ name = "pypdf", specifier = ">=4.0.0" },
@@ -3597,6 +3499,7 @@ requires-dist = [
{ name = "restrictedpython", marker = "extra == 'all'", specifier = ">=7.0" },
{ name = "restrictedpython", marker = "extra == 'sandbox'", specifier = ">=7.0" },
{ name = "stripe", specifier = ">=14.3.0" },
{ name = "websockets", specifier = ">=12.0" },
]
provides-extras = ["dev", "sandbox", "browser", "ocr", "excel", "sql", "bigquery", "databricks", "all"]