feat: separate type focus tool

This commit is contained in:
Richard Tang
2026-04-16 16:08:43 -07:00
parent 916803889f
commit 28cad2376c
4 changed files with 130 additions and 27 deletions
+13 -5
View File
@@ -62,10 +62,11 @@ def get_mcp_client() -> MCPClient:
)
_mcp_client = MCPClient(config)
_mcp_client.connect()
tools = _mcp_client.list_tools()
logger.info(
"Connected to GCU server, %d tools available: %s",
len(_mcp_client.get_tools()),
[t.name for t in _mcp_client.get_tools()],
len(tools),
[t.name for t in tools],
)
return _mcp_client
@@ -90,7 +91,7 @@ async def handle_status(request: web.Request) -> web.Response:
"""GET /status — connection status."""
try:
client = get_mcp_client()
tools = client.get_tools()
tools = client.list_tools()
return web.json_response({
"connected": True,
"tools_count": len(tools),
@@ -103,7 +104,7 @@ async def handle_tools(request: web.Request) -> web.Response:
"""GET /tools — list available tools with their schemas."""
try:
client = get_mcp_client()
tools = client.get_tools()
tools = client.list_tools()
schemas = {}
for tool in tools:
props = tool.input_schema.get("properties", {})
@@ -236,11 +237,18 @@ def main() -> None:
# Connect to GCU server eagerly so we fail fast if something is wrong
try:
get_mcp_client()
client = get_mcp_client()
except Exception as e:
logger.error("Failed to connect to GCU server: %s", e)
sys.exit(1)
# Auto-start browser context so tools work immediately
try:
result = client.call_tool("browser_start", {})
logger.info("browser_start: %s", result)
except Exception as e:
logger.warning("browser_start failed (may already be started): %s", e)
app = create_app()
async def on_startup(app: web.Application) -> None:
+31 -13
View File
@@ -457,9 +457,12 @@ let currentView = 'grid';
// Tool categories for sidebar grouping
const CATEGORIES = {
'Interactions': ['browser_click', 'browser_click_coordinate', 'browser_type', 'browser_fill', 'browser_press', 'browser_press_at', 'browser_hover', 'browser_hover_coordinate', 'browser_select', 'browser_scroll'],
'Lifecycle': ['browser_start', 'browser_stop', 'browser_status'],
'Tabs': ['browser_tabs', 'browser_open', 'browser_close', 'browser_focus'],
'Navigation': ['browser_navigate', 'browser_go_back', 'browser_go_forward', 'browser_reload'],
'Inspection': ['browser_screenshot', 'browser_snapshot', 'browser_get_text', 'browser_evaluate', 'browser_wait'],
'Interactions': ['browser_click', 'browser_click_coordinate', 'browser_type', 'browser_fill', 'browser_press', 'browser_press_at', 'browser_hover', 'browser_hover_coordinate', 'browser_select', 'browser_scroll'],
'Inspection': ['browser_screenshot', 'browser_snapshot', 'browser_console', 'browser_get_text', 'browser_evaluate', 'browser_wait'],
'Advanced': ['browser_resize', 'browser_upload', 'browser_dialog', 'browser_coords'],
};
async function init() {
@@ -474,15 +477,18 @@ async function checkStatus() {
try {
const res = await fetch(`${API_BASE}/status`);
const data = await res.json();
if (data.connected || data.bridge_connected) {
if (data.connected) {
badge.textContent = 'connected';
badge.className = 'connected';
const contexts = Object.entries(data.contexts || {});
if (contexts.length > 0) {
const parts = contexts.map(([k,v]) => `${k}: tab ${v.activeTabId}`);
ctx.textContent = parts.join(', ');
if (data.tools_count) {
ctx.textContent = `${data.tools_count} tools`;
} else if (data.contexts) {
const contexts = Object.entries(data.contexts);
ctx.textContent = contexts.length > 0
? contexts.map(([k,v]) => `${k}: tab ${v.activeTabId}`).join(', ')
: 'no active context';
} else {
ctx.textContent = 'no active context';
ctx.textContent = '';
}
} else {
badge.textContent = 'disconnected';
@@ -511,13 +517,25 @@ async function loadTools() {
function renderSidebar() {
const sidebar = document.getElementById('sidebar');
let html = '';
const categorized = new Set();
for (const [group, tools] of Object.entries(CATEGORIES)) {
const available = tools.filter(t => toolSchemas[t]);
if (available.length === 0) continue;
html += `<div class="sidebar-group"><div class="sidebar-group-label">${group}</div>`;
for (const tool of tools) {
if (toolSchemas[tool]) {
const shortName = tool.replace('browser_', '');
html += `<button class="sidebar-item" data-tool="${tool}" onclick="scrollToTool('${tool}')">${shortName}</button>`;
}
for (const tool of available) {
categorized.add(tool);
const shortName = tool.replace('browser_', '');
html += `<button class="sidebar-item" data-tool="${tool}" onclick="scrollToTool('${tool}')">${shortName}</button>`;
}
html += '</div>';
}
// Show any uncategorized tools from the server
const other = Object.keys(toolSchemas).filter(t => !categorized.has(t));
if (other.length > 0) {
html += `<div class="sidebar-group"><div class="sidebar-group-label">Other</div>`;
for (const tool of other) {
const shortName = tool.replace('browser_', '');
html += `<button class="sidebar-item" data-tool="${tool}" onclick="scrollToTool('${tool}')">${shortName}</button>`;
}
html += '</div>';
}
+13 -2
View File
@@ -23,9 +23,9 @@ TOOL_SCHEMAS: dict[str, dict] = {
},
},
"browser_type": {
"description": "Type text into an input element. Omit selector to type into the already-focused element (e.g. after browser_click_coordinate).",
"description": "Type text into an input element.",
"params": {
"selector": {"type": "string"},
"selector": {"type": "string", "required": True},
"text": {"type": "string", "required": True},
"tab_id": {"type": "integer"},
"profile": {"type": "string"},
@@ -45,6 +45,17 @@ TOOL_SCHEMAS: dict[str, dict] = {
"timeout_ms": {"type": "integer", "default": 30000},
},
},
"browser_type_focused": {
"description": "Type text into the already-focused element. Use after browser_click_coordinate has focused the target. Faster than browser_press for multi-character input.",
"params": {
"text": {"type": "string", "required": True},
"tab_id": {"type": "integer"},
"profile": {"type": "string"},
"delay_ms": {"type": "integer", "default": 0},
"clear_first": {"type": "boolean", "default": True},
"use_insert_text": {"type": "boolean", "default": True},
},
},
"browser_press": {
"description": "Press a keyboard key, optionally with modifiers.",
"params": {
+73 -7
View File
@@ -175,7 +175,7 @@ def register_interaction_tools(mcp: FastMCP) -> None:
@mcp.tool()
async def browser_type(
selector: str | None,
selector: str,
text: str,
tab_id: int | None = None,
profile: str | None = None,
@@ -194,10 +194,6 @@ def register_interaction_tools(mcp: FastMCP) -> None:
submit buttons. See the gcu-browser skill for the full "click-
then-type" pattern.
When ``selector`` is omitted (None), types into the currently
focused element useful after ``browser_click_coordinate``
has already focused the target.
By default uses CDP Input.insertText which is the most reliable
way to insert text into rich editors. Set
``use_insert_text=False`` to fall back to per-character
@@ -206,8 +202,7 @@ def register_interaction_tools(mcp: FastMCP) -> None:
is required).
Args:
selector: CSS selector for the input element (None to type
into the already-focused element)
selector: CSS selector for the input element
text: Text to type
tab_id: Chrome tab ID (default: active tab)
profile: Browser profile name (default: "default")
@@ -298,6 +293,77 @@ def register_interaction_tools(mcp: FastMCP) -> None:
timeout_ms=timeout_ms,
)
@mcp.tool()
async def browser_type_focused(
text: str,
tab_id: int | None = None,
profile: str | None = None,
delay_ms: int = 0,
clear_first: bool = True,
use_insert_text: bool = True,
) -> dict:
"""
Type text into the already-focused element.
Use after browser_click_coordinate (or browser_click) has
focused the target element. Inserts text via CDP
Input.insertText by default much faster than repeated
browser_press calls for multi-character input.
Args:
text: Text to type
tab_id: Chrome tab ID (default: active tab)
profile: Browser profile name (default: "default")
delay_ms: Delay between keystrokes in ms (default: 0).
Forces per-keystroke dispatch when > 0.
clear_first: Clear existing text before typing (default: True)
use_insert_text: Use CDP Input.insertText (default: True)
Returns:
Dict with type result
"""
start = time.perf_counter()
params = {"text": text, "tab_id": tab_id, "profile": profile}
bridge = get_bridge()
if not bridge or not bridge.is_connected:
result = {"ok": False, "error": "Browser extension not connected"}
log_tool_call("browser_type_focused", params, result=result)
return result
ctx = _get_context(profile)
if not ctx:
result = {"ok": False, "error": "Browser not started. Call browser_start first."}
log_tool_call("browser_type_focused", params, result=result)
return result
target_tab = tab_id or ctx.get("activeTabId")
if target_tab is None:
result = {"ok": False, "error": "No active tab"}
log_tool_call("browser_type_focused", params, result=result)
return result
try:
type_result = await bridge.type_text(
target_tab,
None,
text,
clear_first=clear_first,
delay_ms=delay_ms,
use_insert_text=use_insert_text,
)
log_tool_call(
"browser_type_focused",
params,
result=type_result,
duration_ms=(time.perf_counter() - start) * 1000,
)
return type_result
except Exception as e:
result = {"ok": False, "error": str(e)}
log_tool_call("browser_type_focused", params, error=e, duration_ms=(time.perf_counter() - start) * 1000)
return result
@mcp.tool()
async def browser_press(
key: str,