feat: browswer control tools improvement and debugger

This commit is contained in:
Richard Tang
2026-04-16 15:14:08 -07:00
parent 9051c443fb
commit 916803889f
5 changed files with 1407 additions and 78 deletions
+260
View File
@@ -0,0 +1,260 @@
"""
Browser Remote Control act as an agent to call browser tools via a UI.
Spawns its own GCU MCP server subprocess (same way a real agent does),
connects as an MCP client, and exposes the tools over HTTP for the web UI.
Usage:
uv run scripts/browser_remote.py # starts server + opens UI
uv run scripts/browser_remote.py --no-ui # API only, no browser open
Then use the UI at http://localhost:9250/ui or curl directly:
curl -X POST http://localhost:9250/browser_click \
-H 'Content-Type: application/json' \
-d '{"selector": "#login-btn"}'
"""
from __future__ import annotations
import argparse
import asyncio
import json
import logging
import os
import sys
import webbrowser
from pathlib import Path
from typing import Any
from aiohttp import web
# Add framework to path so we can use the existing MCPClient
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "core"))
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "tools", "src"))
from framework.loader.mcp_client import MCPClient, MCPServerConfig
logger = logging.getLogger("browser_remote")
DEFAULT_PORT = 9250
TOOLS_DIR = str((Path(__file__).parent.parent / "tools").resolve())
# ---------------------------------------------------------------------------
# MCP client — connects to GCU server exactly like an agent would
# ---------------------------------------------------------------------------
_mcp_client: MCPClient | None = None
def get_mcp_client() -> MCPClient:
"""Get or create the MCP client connected to the GCU server."""
global _mcp_client
if _mcp_client is None:
bridge_port = os.environ.get("HIVE_BRIDGE_PORT", "9229")
config = MCPServerConfig(
name="gcu-tools",
transport="stdio",
command="uv",
args=["run", "python", "-m", "gcu.server", "--stdio", "--capabilities", "browser"],
cwd=TOOLS_DIR,
env={"HIVE_BRIDGE_PORT": bridge_port},
)
_mcp_client = MCPClient(config)
_mcp_client.connect()
logger.info(
"Connected to GCU server, %d tools available: %s",
len(_mcp_client.get_tools()),
[t.name for t in _mcp_client.get_tools()],
)
return _mcp_client
# ---------------------------------------------------------------------------
# HTTP Handlers
# ---------------------------------------------------------------------------
async def handle_ui(request: web.Request) -> web.Response:
"""GET /ui — serve the web UI."""
ui_path = Path(__file__).parent / "browser_remote_ui.html"
return web.FileResponse(ui_path)
async def handle_index(request: web.Request) -> web.Response:
"""GET / — redirect to UI."""
raise web.HTTPFound("/ui")
async def handle_status(request: web.Request) -> web.Response:
"""GET /status — connection status."""
try:
client = get_mcp_client()
tools = client.get_tools()
return web.json_response({
"connected": True,
"tools_count": len(tools),
})
except Exception as e:
return web.json_response({"connected": False, "error": str(e)})
async def handle_tools(request: web.Request) -> web.Response:
"""GET /tools — list available tools with their schemas."""
try:
client = get_mcp_client()
tools = client.get_tools()
schemas = {}
for tool in tools:
props = tool.input_schema.get("properties", {})
required = tool.input_schema.get("required", [])
params = {}
for pname, pspec in props.items():
param_def: dict[str, Any] = {"type": pspec.get("type", "string")}
if pname in required:
param_def["required"] = True
if "default" in pspec:
param_def["default"] = pspec["default"]
if "enum" in pspec:
param_def["enum"] = pspec["enum"]
if pspec.get("type") == "array" and "items" in pspec:
param_def["items"] = pspec["items"].get("type", "string")
params[pname] = param_def
schemas[tool.name] = {
"description": tool.description.split("\n")[0].strip() if tool.description else "",
"params": params,
}
return web.json_response(schemas)
except Exception as e:
return web.json_response({"error": str(e)}, status=500)
async def handle_tool_call(request: web.Request) -> web.Response:
"""POST /<tool_name> — call a browser tool."""
tool_name = request.match_info["tool"]
try:
body = await request.read()
params = json.loads(body) if body.strip() else {}
except json.JSONDecodeError:
return web.json_response({"ok": False, "error": "Invalid JSON"}, status=400)
logger.info("=> %s %s", tool_name, json.dumps(params, default=str)[:200])
try:
client = get_mcp_client()
# call_tool is synchronous (blocks on the stdio subprocess)
# Run it in a thread so we don't block the event loop
loop = asyncio.get_event_loop()
result = await loop.run_in_executor(None, client.call_tool, tool_name, params)
# MCP returns a list of content blocks — extract text/image
response = _format_mcp_result(result)
logger.info("<= %s ok=%s", tool_name, response.get("ok", True))
return web.json_response(response)
except Exception as e:
logger.error("<= %s error: %s", tool_name, e)
return web.json_response({"ok": False, "error": str(e)}, status=500)
def _format_mcp_result(result: Any) -> dict:
"""Convert MCP tool result into a JSON-friendly dict."""
if result is None:
return {"ok": True}
# MCPClient.call_tool returns the raw result from the MCP SDK
# which could be a list of content blocks, a dict, or a string
if isinstance(result, dict):
return result
if isinstance(result, str):
try:
return json.loads(result)
except (json.JSONDecodeError, TypeError):
return {"ok": True, "text": result}
if isinstance(result, list):
# List of MCP content blocks (TextContent, ImageContent, etc.)
texts = []
images = []
for item in result:
if hasattr(item, "text"):
try:
parsed = json.loads(item.text)
if isinstance(parsed, dict):
return parsed # Tool returned structured JSON
except (json.JSONDecodeError, TypeError):
pass
texts.append(item.text)
elif hasattr(item, "data"):
images.append({"mime_type": getattr(item, "mime_type", "image/png"), "data": item.data})
response: dict[str, Any] = {"ok": True}
if texts:
response["text"] = "\n".join(texts)
if images:
response["images"] = images
return response
return {"ok": True, "result": str(result)}
# ---------------------------------------------------------------------------
# Server setup
# ---------------------------------------------------------------------------
@web.middleware
async def cors_middleware(request: web.Request, handler):
if request.method == "OPTIONS":
resp = web.Response()
else:
resp = await handler(request)
resp.headers["Access-Control-Allow-Origin"] = "*"
resp.headers["Access-Control-Allow-Methods"] = "GET, POST, OPTIONS"
resp.headers["Access-Control-Allow-Headers"] = "Content-Type"
return resp
def create_app() -> web.Application:
app = web.Application(middlewares=[cors_middleware])
app.router.add_get("/", handle_index)
app.router.add_get("/ui", handle_ui)
app.router.add_get("/tools", handle_tools)
app.router.add_get("/status", handle_status)
app.router.add_post("/{tool}", handle_tool_call)
return app
def main() -> None:
parser = argparse.ArgumentParser(description="Browser Remote Control")
parser.add_argument("--port", type=int, default=int(os.environ.get("BROWSER_REMOTE_PORT", DEFAULT_PORT)))
parser.add_argument("--no-ui", action="store_true", help="Don't auto-open the browser")
args = parser.parse_args()
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(name)s %(levelname)s %(message)s")
# Connect to GCU server eagerly so we fail fast if something is wrong
try:
get_mcp_client()
except Exception as e:
logger.error("Failed to connect to GCU server: %s", e)
sys.exit(1)
app = create_app()
async def on_startup(app: web.Application) -> None:
if not args.no_ui:
webbrowser.open(f"http://localhost:{args.port}/ui")
app.on_startup.append(on_startup)
print(f"Browser Remote Control on http://localhost:{args.port}")
print(f" UI: http://localhost:{args.port}/ui")
print(f" API: POST http://localhost:{args.port}/<tool>")
print()
web.run_app(app, host="127.0.0.1", port=args.port, print=None)
if __name__ == "__main__":
main()
+820
View File
@@ -0,0 +1,820 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Browser Remote Control</title>
<style>
:root {
--bg: #0d1117;
--surface: #161b22;
--surface2: #21262d;
--border: #30363d;
--text: #e6edf3;
--text2: #8b949e;
--accent: #58a6ff;
--accent-dim: #1f6feb;
--green: #3fb950;
--red: #f85149;
--orange: #d29922;
--radius: 8px;
}
* { box-sizing: border-box; margin: 0; padding: 0; }
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif;
background: var(--bg);
color: var(--text);
line-height: 1.5;
padding: 0;
}
header {
background: var(--surface);
border-bottom: 1px solid var(--border);
padding: 16px 24px;
display: flex;
align-items: center;
justify-content: space-between;
position: sticky;
top: 0;
z-index: 100;
}
header h1 {
font-size: 18px;
font-weight: 600;
}
#status-badge {
font-size: 13px;
padding: 4px 12px;
border-radius: 20px;
font-weight: 500;
}
#status-badge.connected { background: rgba(63,185,80,0.15); color: var(--green); }
#status-badge.disconnected { background: rgba(248,81,73,0.15); color: var(--red); }
#status-badge.checking { background: rgba(210,153,34,0.15); color: var(--orange); }
.layout {
display: flex;
height: calc(100vh - 57px);
}
/* Sidebar */
.sidebar {
width: 240px;
min-width: 240px;
background: var(--surface);
border-right: 1px solid var(--border);
overflow-y: auto;
padding: 12px 0;
}
.sidebar-group {
margin-bottom: 8px;
}
.sidebar-group-label {
font-size: 11px;
font-weight: 600;
text-transform: uppercase;
letter-spacing: 0.5px;
color: var(--text2);
padding: 8px 16px 4px;
}
.sidebar-item {
display: block;
width: 100%;
text-align: left;
background: none;
border: none;
color: var(--text2);
font-size: 13px;
padding: 6px 16px 6px 24px;
cursor: pointer;
font-family: 'SF Mono', 'Fira Code', monospace;
transition: background 0.1s, color 0.1s;
}
.sidebar-item:hover {
background: var(--surface2);
color: var(--text);
}
.sidebar-item.active {
background: rgba(88,166,255,0.1);
color: var(--accent);
border-right: 2px solid var(--accent);
}
/* Main content */
.main {
flex: 1;
overflow-y: auto;
padding: 24px 32px;
}
.tools-grid {
display: grid;
grid-template-columns: repeat(auto-fill, minmax(420px, 1fr));
gap: 16px;
}
.tool-card {
background: var(--surface);
border: 1px solid var(--border);
border-radius: var(--radius);
overflow: hidden;
transition: border-color 0.15s;
}
.tool-card:hover { border-color: var(--accent-dim); }
.tool-card.active { border-color: var(--accent); }
.tool-card-header {
display: flex;
align-items: center;
justify-content: space-between;
padding: 12px 16px;
border-bottom: 1px solid var(--border);
cursor: pointer;
user-select: none;
}
.tool-card-header:hover { background: var(--surface2); }
.tool-name {
font-family: 'SF Mono', 'Fira Code', monospace;
font-size: 13px;
font-weight: 600;
color: var(--accent);
}
.tool-desc {
font-size: 12px;
color: var(--text2);
margin-left: 8px;
}
.tool-card-body {
padding: 16px;
display: none;
}
.tool-card.open .tool-card-body { display: block; }
.chevron {
color: var(--text2);
transition: transform 0.2s;
font-size: 12px;
}
.tool-card.open .chevron { transform: rotate(90deg); }
/* Form fields */
.field {
margin-bottom: 12px;
}
.field:last-of-type { margin-bottom: 16px; }
.field label {
display: flex;
align-items: center;
gap: 6px;
font-size: 12px;
font-weight: 500;
color: var(--text2);
margin-bottom: 4px;
}
.field label .required {
color: var(--red);
font-size: 10px;
}
.field label .type-tag {
font-size: 10px;
padding: 1px 5px;
border-radius: 3px;
background: var(--surface2);
color: var(--text2);
font-family: 'SF Mono', 'Fira Code', monospace;
}
.field input, .field select, .field textarea {
width: 100%;
background: var(--bg);
border: 1px solid var(--border);
border-radius: 6px;
color: var(--text);
font-size: 13px;
padding: 8px 10px;
font-family: 'SF Mono', 'Fira Code', monospace;
outline: none;
transition: border-color 0.15s;
}
.field input:focus, .field select:focus, .field textarea:focus {
border-color: var(--accent);
}
.field textarea { min-height: 60px; resize: vertical; }
.field input[type="checkbox"] {
width: auto;
margin-right: 4px;
}
.checkbox-row {
display: flex;
align-items: center;
gap: 6px;
padding: 4px 0;
}
.checkbox-row label {
margin-bottom: 0;
cursor: pointer;
}
/* Buttons */
.btn-run {
display: inline-flex;
align-items: center;
gap: 6px;
background: var(--accent-dim);
color: #fff;
border: none;
border-radius: 6px;
padding: 8px 20px;
font-size: 13px;
font-weight: 600;
cursor: pointer;
transition: background 0.15s;
}
.btn-run:hover { background: var(--accent); }
.btn-run:disabled { opacity: 0.5; cursor: not-allowed; }
.btn-run.running { background: var(--orange); }
/* Result area */
.result-area {
margin-top: 12px;
display: none;
}
.result-area.visible { display: block; }
.result-header {
display: flex;
align-items: center;
gap: 8px;
margin-bottom: 6px;
}
.result-status {
font-size: 12px;
font-weight: 600;
padding: 2px 8px;
border-radius: 4px;
}
.result-status.ok { background: rgba(63,185,80,0.15); color: var(--green); }
.result-status.error { background: rgba(248,81,73,0.15); color: var(--red); }
.result-duration {
font-size: 11px;
color: var(--text2);
}
.result-json {
background: var(--bg);
border: 1px solid var(--border);
border-radius: 6px;
padding: 12px;
font-family: 'SF Mono', 'Fira Code', monospace;
font-size: 12px;
line-height: 1.6;
max-height: 300px;
overflow: auto;
white-space: pre-wrap;
word-break: break-word;
}
.result-screenshot {
max-width: 100%;
border: 1px solid var(--border);
border-radius: 6px;
margin-top: 8px;
}
/* History panel */
.history-panel {
width: 320px;
min-width: 320px;
background: var(--surface);
border-left: 1px solid var(--border);
overflow-y: auto;
padding: 12px;
}
.history-title {
font-size: 12px;
font-weight: 600;
text-transform: uppercase;
letter-spacing: 0.5px;
color: var(--text2);
padding: 4px 4px 8px;
border-bottom: 1px solid var(--border);
margin-bottom: 8px;
}
.history-item {
padding: 8px;
border-radius: 6px;
margin-bottom: 4px;
cursor: pointer;
transition: background 0.1s;
border: 1px solid transparent;
}
.history-item:hover {
background: var(--surface2);
}
.history-item-tool {
font-family: 'SF Mono', 'Fira Code', monospace;
font-size: 12px;
font-weight: 600;
}
.history-item-tool.ok { color: var(--green); }
.history-item-tool.error { color: var(--red); }
.history-item-time {
font-size: 11px;
color: var(--text2);
}
.history-item-params {
font-size: 11px;
color: var(--text2);
font-family: 'SF Mono', 'Fira Code', monospace;
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
max-width: 280px;
}
.history-empty {
color: var(--text2);
font-size: 13px;
text-align: center;
padding: 24px 0;
}
.clear-history {
background: none;
border: none;
color: var(--text2);
font-size: 11px;
cursor: pointer;
float: right;
padding: 0;
}
.clear-history:hover { color: var(--red); }
/* View mode toggle */
.view-toggle {
display: flex;
gap: 4px;
background: var(--surface2);
border-radius: 6px;
padding: 2px;
}
.view-toggle button {
background: none;
border: none;
color: var(--text2);
font-size: 12px;
padding: 4px 12px;
border-radius: 4px;
cursor: pointer;
}
.view-toggle button.active {
background: var(--accent-dim);
color: #fff;
}
/* Scrollbar */
::-webkit-scrollbar { width: 8px; height: 8px; }
::-webkit-scrollbar-track { background: transparent; }
::-webkit-scrollbar-thumb { background: var(--border); border-radius: 4px; }
::-webkit-scrollbar-thumb:hover { background: var(--text2); }
</style>
</head>
<body>
<header>
<div style="display:flex;align-items:center;gap:16px;">
<h1>Browser Remote Control</h1>
<div class="view-toggle">
<button class="active" onclick="setView('grid')">Grid</button>
<button onclick="setView('single')">Focus</button>
</div>
</div>
<div style="display:flex;align-items:center;gap:12px;">
<span id="context-info" style="font-size:12px;color:var(--text2)"></span>
<span id="status-badge" class="checking">checking...</span>
</div>
</header>
<div class="layout">
<nav class="sidebar" id="sidebar"></nav>
<main class="main" id="main-content"></main>
<aside class="history-panel" id="history-panel">
<div class="history-title">
History
<button class="clear-history" onclick="clearHistory()">clear</button>
</div>
<div id="history-list">
<div class="history-empty">No calls yet</div>
</div>
</aside>
</div>
<script>
const API_BASE = window.location.origin;
let toolSchemas = {};
let history = [];
let currentView = 'grid';
// Tool categories for sidebar grouping
const CATEGORIES = {
'Interactions': ['browser_click', 'browser_click_coordinate', 'browser_type', 'browser_fill', 'browser_press', 'browser_press_at', 'browser_hover', 'browser_hover_coordinate', 'browser_select', 'browser_scroll'],
'Navigation': ['browser_navigate', 'browser_go_back', 'browser_go_forward', 'browser_reload'],
'Inspection': ['browser_screenshot', 'browser_snapshot', 'browser_get_text', 'browser_evaluate', 'browser_wait'],
};
async function init() {
await checkStatus();
await loadTools();
setInterval(checkStatus, 5000);
}
async function checkStatus() {
const badge = document.getElementById('status-badge');
const ctx = document.getElementById('context-info');
try {
const res = await fetch(`${API_BASE}/status`);
const data = await res.json();
if (data.connected || data.bridge_connected) {
badge.textContent = 'connected';
badge.className = 'connected';
const contexts = Object.entries(data.contexts || {});
if (contexts.length > 0) {
const parts = contexts.map(([k,v]) => `${k}: tab ${v.activeTabId}`);
ctx.textContent = parts.join(', ');
} else {
ctx.textContent = 'no active context';
}
} else {
badge.textContent = 'disconnected';
badge.className = 'disconnected';
ctx.textContent = '';
}
} catch {
badge.textContent = 'unreachable';
badge.className = 'disconnected';
ctx.textContent = '';
}
}
async function loadTools() {
try {
const res = await fetch(`${API_BASE}/tools`);
toolSchemas = await res.json();
renderSidebar();
renderToolCards();
} catch (e) {
document.getElementById('main-content').innerHTML =
`<div style="color:var(--red);padding:40px;">Failed to load tools: ${e.message}</div>`;
}
}
function renderSidebar() {
const sidebar = document.getElementById('sidebar');
let html = '';
for (const [group, tools] of Object.entries(CATEGORIES)) {
html += `<div class="sidebar-group"><div class="sidebar-group-label">${group}</div>`;
for (const tool of tools) {
if (toolSchemas[tool]) {
const shortName = tool.replace('browser_', '');
html += `<button class="sidebar-item" data-tool="${tool}" onclick="scrollToTool('${tool}')">${shortName}</button>`;
}
}
html += '</div>';
}
sidebar.innerHTML = html;
}
function renderToolCards() {
const main = document.getElementById('main-content');
let html = '<div class="tools-grid" id="tools-grid">';
for (const [tool, schema] of Object.entries(toolSchemas)) {
html += buildToolCard(tool, schema);
}
html += '</div>';
main.innerHTML = html;
}
function buildToolCard(tool, schema) {
const shortName = tool.replace('browser_', '');
let fieldsHtml = '';
for (const [param, spec] of Object.entries(schema.params)) {
fieldsHtml += buildField(tool, param, spec);
}
return `
<div class="tool-card" id="card-${tool}" data-tool="${tool}">
<div class="tool-card-header" onclick="toggleCard('${tool}')">
<div>
<span class="tool-name">${shortName}</span>
<span class="tool-desc">${schema.description}</span>
</div>
<span class="chevron">&#9654;</span>
</div>
<div class="tool-card-body">
<form id="form-${tool}" onsubmit="runTool(event, '${tool}')">
${fieldsHtml}
<button class="btn-run" type="submit" id="btn-${tool}">Run</button>
</form>
<div class="result-area" id="result-${tool}"></div>
</div>
</div>`;
}
function buildField(tool, param, spec) {
const id = `${tool}__${param}`;
const required = spec.required ? '<span class="required">*</span>' : '';
const typeTag = `<span class="type-tag">${spec.type}</span>`;
const defaultVal = spec.default !== undefined ? spec.default : '';
if (spec.type === 'boolean') {
return `
<div class="field">
<div class="checkbox-row">
<input type="checkbox" id="${id}" ${defaultVal ? 'checked' : ''}>
<label for="${id}">${param} ${typeTag} ${required}</label>
</div>
</div>`;
}
if (spec.enum) {
const opts = spec.enum.map(v => `<option value="${v}" ${v === defaultVal ? 'selected' : ''}>${v}</option>`).join('');
return `
<div class="field">
<label for="${id}">${param} ${typeTag} ${required}</label>
<select id="${id}">${opts}</select>
</div>`;
}
if (spec.type === 'array') {
return `
<div class="field">
<label for="${id}">${param} ${typeTag} ${required}
<span class="type-tag" style="margin-left:2px">JSON</span>
</label>
<input type="text" id="${id}" placeholder='["value1", "value2"]'>
</div>`;
}
// For expression / text that might be multiline
if (param === 'expression' || param === 'text') {
return `
<div class="field">
<label for="${id}">${param} ${typeTag} ${required}</label>
<textarea id="${id}" placeholder="${param}">${defaultVal}</textarea>
</div>`;
}
const inputType = (spec.type === 'integer' || spec.type === 'number') ? 'number' : 'text';
const step = spec.type === 'number' ? ' step="any"' : '';
return `
<div class="field">
<label for="${id}">${param} ${typeTag} ${required}</label>
<input type="${inputType}" id="${id}"${step} placeholder="${defaultVal !== '' ? defaultVal : param}" value="${defaultVal !== '' && spec.type !== 'string' ? defaultVal : ''}">
</div>`;
}
function toggleCard(tool) {
const card = document.getElementById(`card-${tool}`);
const wasOpen = card.classList.contains('open');
if (currentView === 'single') {
document.querySelectorAll('.tool-card.open').forEach(c => c.classList.remove('open'));
}
card.classList.toggle('open', !wasOpen);
// Update sidebar active state
document.querySelectorAll('.sidebar-item').forEach(s => s.classList.remove('active'));
if (!wasOpen) {
const sideItem = document.querySelector(`.sidebar-item[data-tool="${tool}"]`);
if (sideItem) sideItem.classList.add('active');
}
}
function scrollToTool(tool) {
const card = document.getElementById(`card-${tool}`);
if (!card) return;
// Open it
if (!card.classList.contains('open')) {
if (currentView === 'single') {
document.querySelectorAll('.tool-card.open').forEach(c => c.classList.remove('open'));
}
card.classList.add('open');
}
card.scrollIntoView({ behavior: 'smooth', block: 'start' });
document.querySelectorAll('.sidebar-item').forEach(s => s.classList.remove('active'));
const sideItem = document.querySelector(`.sidebar-item[data-tool="${tool}"]`);
if (sideItem) sideItem.classList.add('active');
}
function collectParams(tool) {
const schema = toolSchemas[tool];
const params = {};
for (const [param, spec] of Object.entries(schema.params)) {
const el = document.getElementById(`${tool}__${param}`);
if (!el) continue;
if (spec.type === 'boolean') {
params[param] = el.checked;
} else if (spec.type === 'array') {
const v = el.value.trim();
if (v) {
try { params[param] = JSON.parse(v); }
catch { params[param] = v.split(',').map(s => s.trim()); }
}
} else if (spec.type === 'integer') {
const v = el.value.trim();
if (v) params[param] = parseInt(v, 10);
} else if (spec.type === 'number') {
const v = el.value.trim();
if (v) params[param] = parseFloat(v);
} else {
const v = (el.value || '').trim();
if (v) params[param] = v;
}
}
return params;
}
async function runTool(event, tool) {
event.preventDefault();
const btn = document.getElementById(`btn-${tool}`);
const resultArea = document.getElementById(`result-${tool}`);
const params = collectParams(tool);
btn.textContent = 'Running...';
btn.classList.add('running');
btn.disabled = true;
const startTime = Date.now();
let result;
try {
const res = await fetch(`${API_BASE}/${tool}`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(params),
});
result = await res.json();
} catch (e) {
result = { ok: false, error: e.message };
}
const elapsed = Date.now() - startTime;
btn.textContent = 'Run';
btn.classList.remove('running');
btn.disabled = false;
// Render result
const isOk = result.ok !== false;
const statusClass = isOk ? 'ok' : 'error';
const statusText = isOk ? 'OK' : 'ERROR';
const duration = result._duration_ms ? `${result._duration_ms}ms` : `${elapsed}ms`;
let bodyHtml = '';
// Special handling for screenshot — show the image
if (tool === 'browser_screenshot' && result.data) {
bodyHtml = `<img class="result-screenshot" src="data:image/png;base64,${result.data}">`;
// Don't show the raw base64 in JSON
const display = { ...result };
display.data = `[${result.data.length} chars base64]`;
bodyHtml += `<pre class="result-json">${JSON.stringify(display, null, 2)}</pre>`;
} else {
bodyHtml = `<pre class="result-json">${JSON.stringify(result, null, 2)}</pre>`;
}
resultArea.innerHTML = `
<div class="result-header">
<span class="result-status ${statusClass}">${statusText}</span>
<span class="result-duration">${duration}</span>
</div>
${bodyHtml}`;
resultArea.classList.add('visible');
// Add to history
addHistory(tool, params, result, duration);
}
function addHistory(tool, params, result, duration) {
const entry = {
tool,
params,
result,
duration,
time: new Date().toLocaleTimeString(),
ok: result.ok !== false,
};
history.unshift(entry);
if (history.length > 50) history.pop();
renderHistory();
}
function renderHistory() {
const list = document.getElementById('history-list');
if (history.length === 0) {
list.innerHTML = '<div class="history-empty">No calls yet</div>';
return;
}
list.innerHTML = history.map((h, i) => {
const shortName = h.tool.replace('browser_', '');
const paramsStr = JSON.stringify(h.params);
const statusCls = h.ok ? 'ok' : 'error';
return `
<div class="history-item" onclick="replayHistory(${i})" title="Click to load params">
<div style="display:flex;justify-content:space-between;align-items:center;">
<span class="history-item-tool ${statusCls}">${shortName}</span>
<span class="history-item-time">${h.time} (${h.duration})</span>
</div>
<div class="history-item-params">${paramsStr}</div>
</div>`;
}).join('');
}
function replayHistory(idx) {
const h = history[idx];
const tool = h.tool;
// Open the card and scroll to it
scrollToTool(tool);
// Fill the form with saved params
const schema = toolSchemas[tool];
for (const [param, spec] of Object.entries(schema.params)) {
const el = document.getElementById(`${tool}__${param}`);
if (!el) continue;
const val = h.params[param];
if (val === undefined) continue;
if (spec.type === 'boolean') {
el.checked = !!val;
} else if (spec.type === 'array') {
el.value = JSON.stringify(val);
} else {
el.value = val;
}
}
}
function clearHistory() {
history = [];
renderHistory();
}
function setView(mode) {
currentView = mode;
document.querySelectorAll('.view-toggle button').forEach(b => b.classList.remove('active'));
event.target.classList.add('active');
const grid = document.getElementById('tools-grid');
if (mode === 'single') {
grid.style.gridTemplateColumns = '1fr';
} else {
grid.style.gridTemplateColumns = 'repeat(auto-fill, minmax(420px, 1fr))';
}
}
init();
</script>
</body>
</html>
+145 -76
View File
@@ -930,12 +930,50 @@ class BeelineBridge:
)
await self.highlight_point(tab_id, x, y, label=f"click ({x},{y})")
return {"ok": True, "action": "click_coordinate", "x": x, "y": y}
# Query the focused element after the click
focused_info = None
try:
await self._try_enable_domain(tab_id, "Runtime")
result = await self.evaluate(
tab_id,
"""
(function() {
var el = document.activeElement;
if (!el || el === document.body) return null;
var rect = el.getBoundingClientRect();
var attrs = {};
for (var i = 0; i < el.attributes.length && i < 10; i++) {
attrs[el.attributes[i].name] = el.attributes[i].value.substring(0, 200);
}
return {
tag: el.tagName.toLowerCase(),
id: el.id || null,
className: el.className || null,
name: el.getAttribute('name') || null,
type: el.getAttribute('type') || null,
role: el.getAttribute('role') || null,
text: (el.innerText || '').substring(0, 200),
value: (el.value !== undefined ? String(el.value).substring(0, 200) : null),
attributes: attrs,
rect: { x: rect.x, y: rect.y, width: rect.width, height: rect.height }
};
})()
""",
)
focused_info = (result or {}).get("result")
except Exception:
pass
resp = {"ok": True, "action": "click_coordinate", "x": x, "y": y}
if focused_info:
resp["focused_element"] = focused_info
return resp
async def type_text(
self,
tab_id: int,
selector: str,
selector: str | None,
text: str,
clear_first: bool = True,
delay_ms: int = 0,
@@ -974,79 +1012,98 @@ class BeelineBridge:
await self._try_enable_domain(tab_id, "Input")
await self._try_enable_domain(tab_id, "Runtime")
# Find + scroll + (optionally) clear via JS. We still need the
# rect, and clearing via `.value = ''` / `.textContent = ''`
# is the most reliable way to reset pre-existing content.
focus_script = f"""
(function() {{
const el = document.querySelector({json.dumps(selector)});
if (!el) return null;
if selector is not None:
# Find + scroll + (optionally) clear via JS. We still need the
# rect, and clearing via `.value = ''` / `.textContent = ''`
# is the most reliable way to reset pre-existing content.
focus_script = f"""
(function() {{
const el = document.querySelector({json.dumps(selector)});
if (!el) return null;
// Scroll into view so the click lands in-viewport.
el.scrollIntoView({{ block: 'center' }});
// Scroll into view so the click lands in-viewport.
el.scrollIntoView({{ block: 'center' }});
// Clear if requested.
if ({str(clear_first).lower()}) {{
if (el.value !== undefined) {{
el.value = '';
// Nudge React's onChange — the framework reads
// .value via a setter hook, and without firing
// an input event the component state remains
// stale after our value assignment.
el.dispatchEvent(new Event('input', {{bubbles: true}}));
}} else if (el.isContentEditable) {{
el.textContent = '';
el.dispatchEvent(new Event('input', {{bubbles: true}}));
// Clear if requested.
if ({str(clear_first).lower()}) {{
if (el.value !== undefined) {{
el.value = '';
// Nudge React's onChange — the framework reads
// .value via a setter hook, and without firing
// an input event the component state remains
// stale after our value assignment.
el.dispatchEvent(new Event('input', {{bubbles: true}}));
}} else if (el.isContentEditable) {{
el.textContent = '';
el.dispatchEvent(new Event('input', {{bubbles: true}}));
}}
}}
}}
const r = el.getBoundingClientRect();
return {{
x: r.left + r.width / 2,
y: r.top + r.height / 2,
w: r.width,
h: r.height,
}};
}})();
"""
const r = el.getBoundingClientRect();
return {{
x: r.left + r.width / 2,
y: r.top + r.height / 2,
w: r.width,
h: r.height,
}};
}})();
"""
focus_result = await self.evaluate(tab_id, focus_script)
rect = (focus_result or {}).get("result")
if not rect:
# Element not found — wait + retry until timeout.
deadline = asyncio.get_event_loop().time() + timeout_ms / 1000
while asyncio.get_event_loop().time() < deadline:
result = await self.evaluate(tab_id, focus_script)
rect = (result or {}).get("result") if result else None
if rect:
break
await asyncio.sleep(0.1)
focus_result = await self.evaluate(tab_id, focus_script)
rect = (focus_result or {}).get("result")
if not rect:
return {"ok": False, "error": f"Element not found: {selector}"}
# Element not found — wait + retry until timeout.
deadline = asyncio.get_event_loop().time() + timeout_ms / 1000
while asyncio.get_event_loop().time() < deadline:
result = await self.evaluate(tab_id, focus_script)
rect = (result or {}).get("result") if result else None
if rect:
break
await asyncio.sleep(0.1)
if not rect.get("w") or not rect.get("h"):
return {
"ok": False,
"error": f"Element has zero dimensions, can't click to focus: {selector}",
}
if not rect:
return {"ok": False, "error": f"Element not found: {selector}"}
# Fire a real CDP pointer click at the element's center. This is
# what unblocks rich-text editors — JS el.focus() is not enough.
click_x = rect["x"]
click_y = rect["y"]
await self._cdp(
tab_id,
"Input.dispatchMouseEvent",
{"type": "mousePressed", "x": click_x, "y": click_y, "button": "left", "clickCount": 1},
)
await self._cdp(
tab_id,
"Input.dispatchMouseEvent",
{"type": "mouseReleased", "x": click_x, "y": click_y, "button": "left", "clickCount": 1},
)
await asyncio.sleep(0.15) # Let focus / editor-init animations settle.
if not rect.get("w") or not rect.get("h"):
return {
"ok": False,
"error": f"Element has zero dimensions, can't click to focus: {selector}",
}
# Fire a real CDP pointer click at the element's center. This is
# what unblocks rich-text editors — JS el.focus() is not enough.
click_x = rect["x"]
click_y = rect["y"]
await self._cdp(
tab_id,
"Input.dispatchMouseEvent",
{"type": "mousePressed", "x": click_x, "y": click_y, "button": "left", "clickCount": 1},
)
await self._cdp(
tab_id,
"Input.dispatchMouseEvent",
{"type": "mouseReleased", "x": click_x, "y": click_y, "button": "left", "clickCount": 1},
)
await asyncio.sleep(0.15) # Let focus / editor-init animations settle.
else:
# No selector — assume the caller already focused the target
# element (e.g. via browser_click_coordinate). Just clear the
# active element if requested, then insert text directly.
if clear_first:
await self.evaluate(tab_id, """
(function() {
const el = document.activeElement;
if (!el) return;
if (el.value !== undefined) {
el.value = '';
el.dispatchEvent(new Event('input', {bubbles: true}));
} else if (el.isContentEditable) {
el.textContent = '';
el.dispatchEvent(new Event('input', {bubbles: true}));
}
})();
""")
if use_insert_text and delay_ms <= 0:
# CDP Input.insertText is the most reliable way to insert
@@ -1086,16 +1143,28 @@ class BeelineBridge:
await asyncio.sleep(delay_ms / 1000)
# Highlight the element that was typed into
rect_result = await self.evaluate(
tab_id,
f"(function(){{const el=document.querySelector("
f"{json.dumps(selector)});if(!el)return null;"
f"const r=el.getBoundingClientRect();"
f"return{{x:r.left,y:r.top,w:r.width,h:r.height}};}})()",
)
rect = (rect_result or {}).get("result")
if rect:
await self.highlight_rect(tab_id, rect["x"], rect["y"], rect["w"], rect["h"], label=selector)
if selector is not None:
rect_result = await self.evaluate(
tab_id,
f"(function(){{const el=document.querySelector("
f"{json.dumps(selector)});if(!el)return null;"
f"const r=el.getBoundingClientRect();"
f"return{{x:r.left,y:r.top,w:r.width,h:r.height}};}})()",
)
rect = (rect_result or {}).get("result")
if rect:
await self.highlight_rect(tab_id, rect["x"], rect["y"], rect["w"], rect["h"], label=selector)
else:
# Highlight the active element when no selector was provided
rect_result = await self.evaluate(
tab_id,
"(function(){const el=document.activeElement;if(!el)return null;"
"const r=el.getBoundingClientRect();"
"return{x:r.left,y:r.top,w:r.width,h:r.height};})()",
)
rect = (rect_result or {}).get("result")
if rect:
await self.highlight_rect(tab_id, rect["x"], rect["y"], rect["w"], rect["h"], label="active element")
return {"ok": True, "action": "type", "selector": selector, "length": len(text)}
# CDP Input.dispatchKeyEvent modifiers bitmask.
+175
View File
@@ -0,0 +1,175 @@
"""Tool schemas for the bridge remote HTTP API (port 9230)."""
TOOL_SCHEMAS: dict[str, dict] = {
"browser_click": {
"description": "Click an element on the page.",
"params": {
"selector": {"type": "string", "required": True},
"tab_id": {"type": "integer"},
"profile": {"type": "string"},
"button": {"type": "string", "default": "left", "enum": ["left", "right", "middle"]},
"double_click": {"type": "boolean", "default": False},
"timeout_ms": {"type": "integer", "default": 5000},
},
},
"browser_click_coordinate": {
"description": "Click at specific viewport coordinates (CSS pixels).",
"params": {
"x": {"type": "number", "required": True},
"y": {"type": "number", "required": True},
"tab_id": {"type": "integer"},
"profile": {"type": "string"},
"button": {"type": "string", "default": "left"},
},
},
"browser_type": {
"description": "Type text into an input element. Omit selector to type into the already-focused element (e.g. after browser_click_coordinate).",
"params": {
"selector": {"type": "string"},
"text": {"type": "string", "required": True},
"tab_id": {"type": "integer"},
"profile": {"type": "string"},
"delay_ms": {"type": "integer", "default": 0},
"clear_first": {"type": "boolean", "default": True},
"timeout_ms": {"type": "integer", "default": 30000},
"use_insert_text": {"type": "boolean", "default": True},
},
},
"browser_fill": {
"description": "Fill an input element (clears existing content first).",
"params": {
"selector": {"type": "string", "required": True},
"value": {"type": "string", "required": True},
"tab_id": {"type": "integer"},
"profile": {"type": "string"},
"timeout_ms": {"type": "integer", "default": 30000},
},
},
"browser_press": {
"description": "Press a keyboard key, optionally with modifiers.",
"params": {
"key": {"type": "string", "required": True},
"selector": {"type": "string"},
"tab_id": {"type": "integer"},
"profile": {"type": "string"},
"modifiers": {"type": "array", "items": "string"},
},
},
"browser_press_at": {
"description": "Move mouse to coordinates then press a key.",
"params": {
"x": {"type": "number", "required": True},
"y": {"type": "number", "required": True},
"key": {"type": "string", "required": True},
"tab_id": {"type": "integer"},
"profile": {"type": "string"},
},
},
"browser_navigate": {
"description": "Navigate a tab to a URL.",
"params": {
"url": {"type": "string", "required": True},
"tab_id": {"type": "integer"},
"profile": {"type": "string"},
"wait_until": {"type": "string", "default": "load"},
},
},
"browser_go_back": {
"description": "Navigate back in browser history.",
"params": {
"tab_id": {"type": "integer"},
"profile": {"type": "string"},
},
},
"browser_go_forward": {
"description": "Navigate forward in browser history.",
"params": {
"tab_id": {"type": "integer"},
"profile": {"type": "string"},
},
},
"browser_reload": {
"description": "Reload the current page.",
"params": {
"tab_id": {"type": "integer"},
"profile": {"type": "string"},
},
},
"browser_scroll": {
"description": "Scroll the page.",
"params": {
"direction": {"type": "string", "default": "down", "enum": ["up", "down", "left", "right"]},
"amount": {"type": "integer", "default": 500},
"tab_id": {"type": "integer"},
"profile": {"type": "string"},
},
},
"browser_hover": {
"description": "Hover over an element.",
"params": {
"selector": {"type": "string", "required": True},
"tab_id": {"type": "integer"},
"profile": {"type": "string"},
"timeout_ms": {"type": "integer", "default": 30000},
},
},
"browser_hover_coordinate": {
"description": "Hover at CSS pixel coordinates.",
"params": {
"x": {"type": "number", "required": True},
"y": {"type": "number", "required": True},
"tab_id": {"type": "integer"},
"profile": {"type": "string"},
},
},
"browser_select": {
"description": "Select option(s) in a dropdown.",
"params": {
"selector": {"type": "string", "required": True},
"values": {"type": "array", "required": True},
"tab_id": {"type": "integer"},
"profile": {"type": "string"},
},
},
"browser_screenshot": {
"description": "Take a screenshot of the page (returns base64 PNG).",
"params": {
"tab_id": {"type": "integer"},
"profile": {"type": "string"},
"full_page": {"type": "boolean", "default": False},
},
},
"browser_snapshot": {
"description": "Get the accessibility tree snapshot of the page.",
"params": {
"tab_id": {"type": "integer"},
"profile": {"type": "string"},
},
},
"browser_evaluate": {
"description": "Evaluate JavaScript in the page.",
"params": {
"expression": {"type": "string", "required": True},
"tab_id": {"type": "integer"},
"profile": {"type": "string"},
},
},
"browser_get_text": {
"description": "Get text content of an element.",
"params": {
"selector": {"type": "string", "required": True},
"tab_id": {"type": "integer"},
"profile": {"type": "string"},
},
},
"browser_wait": {
"description": "Wait for an element or text to appear on the page.",
"params": {
"selector": {"type": "string"},
"text": {"type": "string"},
"tab_id": {"type": "integer"},
"profile": {"type": "string"},
"timeout_ms": {"type": "integer", "default": 30000},
},
},
}
+7 -2
View File
@@ -175,7 +175,7 @@ def register_interaction_tools(mcp: FastMCP) -> None:
@mcp.tool()
async def browser_type(
selector: str,
selector: str | None,
text: str,
tab_id: int | None = None,
profile: str | None = None,
@@ -194,6 +194,10 @@ def register_interaction_tools(mcp: FastMCP) -> None:
submit buttons. See the gcu-browser skill for the full "click-
then-type" pattern.
When ``selector`` is omitted (None), types into the currently
focused element useful after ``browser_click_coordinate``
has already focused the target.
By default uses CDP Input.insertText which is the most reliable
way to insert text into rich editors. Set
``use_insert_text=False`` to fall back to per-character
@@ -202,7 +206,8 @@ def register_interaction_tools(mcp: FastMCP) -> None:
is required).
Args:
selector: CSS selector for the input element
selector: CSS selector for the input element (None to type
into the already-focused element)
text: Text to type
tab_id: Chrome tab ID (default: active tab)
profile: Browser profile name (default: "default")