fix: diagnostics
This commit is contained in:
@@ -57,7 +57,9 @@
|
||||
"mcp__gcu-tools__browser_type_focused",
|
||||
"mcp__gcu-tools__browser_wait",
|
||||
"Bash(python3 -c ' *)",
|
||||
"Bash(python3 scripts/debug_queen_prompt.py independent)"
|
||||
"Bash(python3 scripts/debug_queen_prompt.py independent)",
|
||||
"Bash(curl -s --max-time 2 http://127.0.0.1:9230/status)",
|
||||
"Bash(python3 -c \"import json, sys; print\\(json.loads\\(sys.stdin.read\\(\\)\\)['data']['content']\\)\")"
|
||||
],
|
||||
"additionalDirectories": [
|
||||
"/home/timothy/.hive/skills/writing-hive-skills",
|
||||
|
||||
@@ -211,6 +211,34 @@ chrome.runtime.onMessage.addListener((msg, _sender, sendResponse) => {
|
||||
chrome.runtime.onInstalled.addListener(ensureOffscreen);
|
||||
chrome.runtime.onStartup.addListener(ensureOffscreen);
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// CDP event forwarder — diagnostic channel
|
||||
// ---------------------------------------------------------------------------
|
||||
//
|
||||
// chrome.debugger.sendCommand (the cdp handler above) only responds to
|
||||
// requests. CDP also emits unsolicited EVENTS (Runtime.consoleAPICalled,
|
||||
// Page.frameResized, Target.targetInfoChanged, …) that the bridge doesn't
|
||||
// see today. Forward the narrow subset we're currently diagnosing so the
|
||||
// Python side can correlate viewport changes with page lifecycle events.
|
||||
// Filtered at the source to keep the wire slim.
|
||||
const FORWARDED_CDP_EVENTS = new Set([
|
||||
"Runtime.consoleAPICalled",
|
||||
"Page.lifecycleEvent",
|
||||
"Page.frameResized",
|
||||
"Page.frameNavigated",
|
||||
"Target.targetInfoChanged",
|
||||
]);
|
||||
|
||||
chrome.debugger.onEvent.addListener((source, method, params) => {
|
||||
if (!FORWARDED_CDP_EVENTS.has(method)) return;
|
||||
wsSend({
|
||||
type: "cdp_event",
|
||||
tabId: source.tabId,
|
||||
method,
|
||||
params: params ?? {},
|
||||
});
|
||||
});
|
||||
|
||||
// Periodic alarm keeps the service worker from being garbage-collected and
|
||||
// recreates the offscreen document if it was evicted.
|
||||
chrome.alarms.create("keepAlive", { periodInMinutes: 0.4 });
|
||||
|
||||
@@ -166,6 +166,41 @@ _HIT_ELEMENT_JS = """
|
||||
"""
|
||||
|
||||
|
||||
# Diagnostic probe — installs viewport/visibility listeners on the page
|
||||
# and posts their observations through console.info so the CDP event
|
||||
# channel (Runtime.consoleAPICalled) forwards them to our telemetry.
|
||||
# Idempotent via ``window.__hive_vp_instrumented``.
|
||||
_HIVE_VP_PROBE_JS = """
|
||||
(function () {
|
||||
if (window.__hive_vp_instrumented) return;
|
||||
window.__hive_vp_instrumented = true;
|
||||
function sample(kind) {
|
||||
try {
|
||||
console.info('[hive_vp]', JSON.stringify({
|
||||
kind: kind,
|
||||
innerWidth: window.innerWidth,
|
||||
innerHeight: window.innerHeight,
|
||||
visualW: window.visualViewport && window.visualViewport.width,
|
||||
visualH: window.visualViewport && window.visualViewport.height,
|
||||
docHidden: document.hidden,
|
||||
visibilityState: document.visibilityState,
|
||||
scrollX: window.scrollX,
|
||||
scrollY: window.scrollY,
|
||||
dpr: window.devicePixelRatio,
|
||||
ts: Date.now()
|
||||
}));
|
||||
} catch (e) {}
|
||||
}
|
||||
sample('init');
|
||||
window.addEventListener('resize', function () { sample('resize'); });
|
||||
if (window.visualViewport) {
|
||||
window.visualViewport.addEventListener('resize', function () { sample('visualResize'); });
|
||||
}
|
||||
document.addEventListener('visibilitychange', function () { sample('visibility'); });
|
||||
})();
|
||||
"""
|
||||
|
||||
|
||||
_FOCUSED_ELEMENT_JS = """
|
||||
(function() {
|
||||
function describe(el) {
|
||||
@@ -368,6 +403,23 @@ class BeelineBridge:
|
||||
log_connection_event("hello", {"version": msg.get("version")})
|
||||
continue
|
||||
|
||||
if msg.get("type") == "cdp_event":
|
||||
# Unsolicited CDP event forwarded by the extension.
|
||||
# Narrow diagnostic channel — see FORWARDED_CDP_EVENTS
|
||||
# in browser-extension/background.js. We pick out
|
||||
# the [hive_vp] console probe as a structured
|
||||
# viewport_event telemetry entry and also log the
|
||||
# raw event for correlation with page lifecycle.
|
||||
try:
|
||||
self._handle_cdp_event(
|
||||
msg.get("tabId"),
|
||||
msg.get("method", ""),
|
||||
msg.get("params") or {},
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
continue
|
||||
|
||||
msg_id = msg.get("id")
|
||||
if msg_id and msg_id in self._pending:
|
||||
fut = self._pending.pop(msg_id)
|
||||
@@ -392,6 +444,84 @@ class BeelineBridge:
|
||||
fut.cancel()
|
||||
self._pending.clear()
|
||||
|
||||
def _handle_cdp_event(self, tab_id: int | None, method: str, params: dict) -> None:
|
||||
"""Decode a CDP event forwarded from the extension and route it
|
||||
to telemetry. Keep this method sync and best-effort — a bad
|
||||
event must never break the bridge's read loop.
|
||||
|
||||
Runtime.consoleAPICalled with our ``[hive_vp]`` prefix is
|
||||
split off as a structured ``viewport_event`` entry so the
|
||||
reader can ``grep`` it without touching the raw console log.
|
||||
All other forwarded events are logged verbatim under
|
||||
``cdp_event`` so we can correlate viewport changes with
|
||||
lifecycle / resize / target-info events.
|
||||
"""
|
||||
from .telemetry import write_log
|
||||
|
||||
if method == "Runtime.consoleAPICalled":
|
||||
args = params.get("args") or []
|
||||
first = args[0].get("value") if args and isinstance(args[0], dict) else None
|
||||
payload = args[1].get("value") if len(args) >= 2 and isinstance(args[1], dict) else None
|
||||
|
||||
# Structured [hive_vp] viewport probe → viewport_event
|
||||
if first == "[hive_vp]" and isinstance(payload, str):
|
||||
try:
|
||||
parsed = json.loads(payload)
|
||||
except Exception:
|
||||
parsed = {"_raw": payload}
|
||||
write_log({
|
||||
"type": "viewport_event",
|
||||
"tab_id": tab_id,
|
||||
**parsed,
|
||||
})
|
||||
return
|
||||
|
||||
# Attach-time canary → attach_canary (proves extension
|
||||
# forwarder is alive end-to-end).
|
||||
if first == "[hive_attach_canary]" and isinstance(payload, str):
|
||||
try:
|
||||
parsed = json.loads(payload)
|
||||
except Exception:
|
||||
parsed = {"_raw": payload}
|
||||
write_log({
|
||||
"type": "attach_canary",
|
||||
"tab_id": tab_id,
|
||||
**parsed,
|
||||
})
|
||||
return
|
||||
|
||||
# Everything else — keep a compact row so we can tell
|
||||
# whether ANY console output is flowing through the
|
||||
# pipe. Truncate each arg so a chatty page can't flood
|
||||
# the log.
|
||||
compact = []
|
||||
for a in args[:4]:
|
||||
if not isinstance(a, dict):
|
||||
continue
|
||||
v = a.get("value")
|
||||
if isinstance(v, str):
|
||||
compact.append(v[:120])
|
||||
elif v is not None:
|
||||
compact.append(str(v)[:120])
|
||||
write_log({
|
||||
"type": "cdp_event",
|
||||
"tab_id": tab_id,
|
||||
"method": method,
|
||||
"level": params.get("type"),
|
||||
"args": compact,
|
||||
})
|
||||
return
|
||||
|
||||
# Other forwarded events (Page.lifecycleEvent, frameResized,
|
||||
# frameNavigated, Target.targetInfoChanged) are rare and high
|
||||
# signal — keep the full param dict but truncate strings.
|
||||
write_log({
|
||||
"type": "cdp_event",
|
||||
"tab_id": tab_id,
|
||||
"method": method,
|
||||
"params": params,
|
||||
})
|
||||
|
||||
# Default wait on a bridge command. Callers with known-slow ops
|
||||
# (full-page screenshots on slow networks, AX tree on huge pages)
|
||||
# can pass a longer value via _send(..., timeout=...). Using the
|
||||
@@ -594,12 +724,111 @@ class BeelineBridge:
|
||||
"""Attach CDP debugger to a tab.
|
||||
|
||||
Returns {"ok": bool}.
|
||||
|
||||
First-attach-per-tab triggers Chrome's "<extension> started
|
||||
debugging this browser" infobar, which shrinks the layout
|
||||
viewport by ~30–70 CSS px. The banner's commit is async from
|
||||
the attach return, so a screenshot taken immediately after
|
||||
can capture the pre-banner layout, leaving the viewport
|
||||
cache stale until the next screenshot or
|
||||
``_ensure_viewport_size`` call. We wait a short grace here
|
||||
and proactively prime the viewport cache with the settled
|
||||
(post-banner) dimensions, so the very first coord-conversion
|
||||
after attach already operates on the real frame.
|
||||
"""
|
||||
if tab_id in self._cdp_attached:
|
||||
return {"ok": True, "attached": False, "message": "Already attached"}
|
||||
result = await self._send("cdp.attach", tabId=tab_id)
|
||||
if result.get("ok"):
|
||||
self._cdp_attached.add(tab_id)
|
||||
if not result.get("ok"):
|
||||
return result
|
||||
self._cdp_attached.add(tab_id)
|
||||
# Prime the viewport cache so the first coord-conversion
|
||||
# after attach has a reasonable seed. Also install the
|
||||
# diagnostic viewport-change probe ([hive_vp] console
|
||||
# messages that stream through our CDP-event channel).
|
||||
# Failures are silent — cache will heal on next screenshot
|
||||
# or _ensure_viewport_size call.
|
||||
try:
|
||||
from .tools.inspection import _viewport_sizes
|
||||
|
||||
eval_res = await self._cdp(
|
||||
tab_id,
|
||||
"Runtime.evaluate",
|
||||
{
|
||||
"expression": "({w: window.innerWidth, h: window.innerHeight})",
|
||||
"returnByValue": True,
|
||||
},
|
||||
)
|
||||
inner = (eval_res or {}).get("result", {}).get("value") or {}
|
||||
cw = int(float(inner.get("w") or 0))
|
||||
ch = int(float(inner.get("h") or 0))
|
||||
if cw > 0 and ch > 0:
|
||||
_viewport_sizes[tab_id] = (cw, ch)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Runtime must be enabled for consoleAPICalled events to
|
||||
# fire; Page must be enabled for frame* / lifecycle events
|
||||
# to reach the extension. Page.setLifecycleEventsEnabled
|
||||
# is the critical one — without it Chrome withholds the
|
||||
# DOMContentLoaded / load / firstMeaningfulPaint stream.
|
||||
# Each wrapped in try so a failure on one domain doesn't
|
||||
# block the others.
|
||||
try:
|
||||
await self._cdp(tab_id, "Runtime.enable", {})
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
await self._cdp(tab_id, "Page.enable", {})
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
await self._cdp(tab_id, "Page.setLifecycleEventsEnabled", {"enabled": True})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# [hive_vp] probe — install resize / visibility listeners on
|
||||
# the page so Chrome tells us when the renderer sees a
|
||||
# viewport change. Uses console.info as a cheap transport
|
||||
# through CDP; filtered server-side by the cdp_event
|
||||
# handler. Idempotent via __hive_vp_instrumented.
|
||||
try:
|
||||
await self._cdp(
|
||||
tab_id,
|
||||
"Runtime.evaluate",
|
||||
{
|
||||
"expression": _HIVE_VP_PROBE_JS,
|
||||
"returnByValue": True,
|
||||
"awaitPromise": False,
|
||||
},
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Canary — emit a recognisable marker from the page so we
|
||||
# can verify end-to-end (page → CDP → extension → bridge →
|
||||
# telemetry) is wired. Should produce one ``cdp_event``
|
||||
# with method=Runtime.consoleAPICalled whose args start
|
||||
# ``[hive_attach_canary]``. Zero canary entries after a
|
||||
# run means the extension forwarder is stale and the user
|
||||
# needs to reload the Hive extension in chrome://extensions.
|
||||
try:
|
||||
await self._cdp(
|
||||
tab_id,
|
||||
"Runtime.evaluate",
|
||||
{
|
||||
"expression": (
|
||||
"console.info('[hive_attach_canary]', "
|
||||
"JSON.stringify({tabId: "
|
||||
+ str(tab_id) + ", ts: Date.now()}))"
|
||||
),
|
||||
"returnByValue": True,
|
||||
"awaitPromise": False,
|
||||
},
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return result
|
||||
|
||||
async def cdp_detach(self, tab_id: int) -> dict:
|
||||
|
||||
@@ -46,16 +46,17 @@ _screenshot_scales: dict[int, float] = {}
|
||||
|
||||
|
||||
def clear_tab_state(tab_ids) -> None:
|
||||
"""Drop cached screenshot scales for the given tab_ids.
|
||||
"""Drop cached screenshot scales and viewport sizes for the given tab_ids.
|
||||
|
||||
Called when a tab closes or a profile's context is destroyed so stale
|
||||
scale values can't bleed into a later tab that Chrome happens to assign
|
||||
cache values can't bleed into a later tab that Chrome happens to assign
|
||||
the same id. Accepts a single id or any iterable.
|
||||
"""
|
||||
if isinstance(tab_ids, int):
|
||||
tab_ids = (tab_ids,)
|
||||
for tid in tab_ids:
|
||||
_screenshot_scales.pop(tid, None)
|
||||
_viewport_sizes.pop(tid, None)
|
||||
|
||||
|
||||
def _resize_and_annotate(
|
||||
@@ -195,34 +196,71 @@ def _resize_and_annotate(
|
||||
return data, 1.0
|
||||
|
||||
|
||||
async def _ensure_viewport_size(tab_id: int) -> tuple[int, int]:
|
||||
"""Return ``(cssWidth, cssHeight)`` for ``tab_id``, populating the
|
||||
cache via ``window.innerWidth`` / ``window.innerHeight`` on miss.
|
||||
async def _ensure_viewport_size(tab_id: int, _caller: str = "unknown") -> tuple[int, int]:
|
||||
"""Return ``(cssWidth, cssHeight)`` for ``tab_id``, always
|
||||
refreshing from ``window.innerWidth`` / ``window.innerHeight``.
|
||||
|
||||
Used by click / hover / press tools to turn fractional inputs
|
||||
(0..1) into CSS px, and by rect tools to turn CSS-px rects into
|
||||
fractions. Degrades to ``(1, 1)`` if the bridge can't be queried
|
||||
— that makes every coord an identity op, which is a safe no-op
|
||||
(and preferable to crashing).
|
||||
fractions.
|
||||
|
||||
Every call emits a ``viewport_sample`` telemetry entry so we
|
||||
can build a timeline of Chrome's reported viewport across an
|
||||
agent run — needed to diagnose the sessions where cssH changes
|
||||
silently (no visible layout shift) between screenshot and
|
||||
click. The entry records the live value, the cached value, and
|
||||
the delta so the transition point is trivial to locate in
|
||||
``~/.hive/browser-logs/browser-YYYY-MM-DD.jsonl``.
|
||||
|
||||
Falls back to the cached value on evaluate failure, then to
|
||||
``(1, 1)`` if there's no cache — identity-op is a safe no-op.
|
||||
"""
|
||||
cached = _viewport_sizes.get(tab_id)
|
||||
if cached is not None and cached[0] > 0 and cached[1] > 0:
|
||||
return cached
|
||||
bridge = get_bridge()
|
||||
cw = ch = 0
|
||||
evaluate_error: str | None = None
|
||||
try:
|
||||
result = await bridge.evaluate(tab_id, "({w: window.innerWidth, h: window.innerHeight})")
|
||||
inner = (result or {}).get("result") or {}
|
||||
cw = int(float(inner.get("w") or 0))
|
||||
ch = int(float(inner.get("h") or 0))
|
||||
except Exception:
|
||||
cw, ch = 0, 0
|
||||
except Exception as e:
|
||||
evaluate_error = str(e)
|
||||
cw = ch = 0
|
||||
|
||||
cached_before = _viewport_sizes.get(tab_id)
|
||||
|
||||
if cw <= 0 or ch <= 0:
|
||||
# Degraded: bridge didn't return viewport. Cache an identity
|
||||
# so we don't retry on every call; corrects itself after the
|
||||
# next successful browser_screenshot.
|
||||
cw, ch = 1, 1
|
||||
_viewport_sizes[tab_id] = (cw, ch)
|
||||
return cw, ch
|
||||
if cached_before is not None and cached_before[0] > 0 and cached_before[1] > 0:
|
||||
result_cw, result_ch = cached_before
|
||||
else:
|
||||
result_cw, result_ch = 1, 1
|
||||
else:
|
||||
result_cw, result_ch = cw, ch
|
||||
_viewport_sizes[tab_id] = (cw, ch)
|
||||
|
||||
try:
|
||||
from ..telemetry import write_log
|
||||
write_log({
|
||||
"type": "viewport_sample",
|
||||
"tab_id": tab_id,
|
||||
"caller": _caller,
|
||||
"live_w": cw,
|
||||
"live_h": ch,
|
||||
"cached_w": cached_before[0] if cached_before else None,
|
||||
"cached_h": cached_before[1] if cached_before else None,
|
||||
"deltaH_vs_cache": (
|
||||
(ch - cached_before[1])
|
||||
if (cached_before and ch > 0)
|
||||
else None
|
||||
),
|
||||
"returned_w": result_cw,
|
||||
"returned_h": result_ch,
|
||||
"evaluate_error": evaluate_error,
|
||||
})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return result_cw, result_ch
|
||||
|
||||
|
||||
def register_inspection_tools(mcp: FastMCP) -> None:
|
||||
@@ -475,7 +513,7 @@ def register_inspection_tools(mcp: FastMCP) -> None:
|
||||
return result
|
||||
|
||||
rect = result["rect"]
|
||||
cw, ch = await _ensure_viewport_size(target_tab)
|
||||
cw, ch = await _ensure_viewport_size(target_tab, _caller="browser_shadow_query")
|
||||
cw_f = float(cw) if cw > 0 else 1.0
|
||||
ch_f = float(ch) if ch > 0 else 1.0
|
||||
return {
|
||||
@@ -538,7 +576,7 @@ def register_inspection_tools(mcp: FastMCP) -> None:
|
||||
return result
|
||||
|
||||
rect = result["rect"]
|
||||
cw, ch = await _ensure_viewport_size(target_tab)
|
||||
cw, ch = await _ensure_viewport_size(target_tab, _caller="browser_get_rect")
|
||||
cw_f = float(cw) if cw > 0 else 1.0
|
||||
ch_f = float(ch) if ch > 0 else 1.0
|
||||
return {
|
||||
|
||||
@@ -7,11 +7,13 @@ All operations go through the Beeline extension via CDP - no Playwright required
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
from typing import Literal
|
||||
|
||||
from fastmcp import FastMCP
|
||||
from mcp.types import ImageContent, TextContent
|
||||
|
||||
from ..bridge import get_bridge
|
||||
from ..telemetry import log_tool_call
|
||||
@@ -28,6 +30,57 @@ _AUTO_SNAPSHOT_SETTLE_S = 0.5
|
||||
AutoSnapshotMode = Literal["default", "simple", "interactive", "off"]
|
||||
|
||||
|
||||
def _text_only(result: dict) -> list:
|
||||
"""Wrap a dict result as a single-block MCP text response.
|
||||
|
||||
Used for early-error returns from coordinate interaction tools that
|
||||
promise a list shape — keeps the result round-trippable through the
|
||||
MCP transport without a fragile dict-vs-list union.
|
||||
"""
|
||||
return [TextContent(type="text", text=json.dumps(result))]
|
||||
|
||||
|
||||
async def _build_visual_response(result: dict, bridge, target_tab: int | None) -> list:
|
||||
"""Wrap an interaction result and append an annotated post-action screenshot.
|
||||
|
||||
Every coordinate-based interaction (click / hover / press_at) goes
|
||||
through here so the agent ALWAYS sees what the page looks like
|
||||
immediately after — with the click marker overlaid — and can
|
||||
self-correct on a near-miss in the same turn instead of issuing a
|
||||
separate ``browser_screenshot`` call. The marker comes from
|
||||
``_interaction_highlights`` which is populated by ``highlight_point``
|
||||
inside the bridge call, so it's guaranteed to be present here.
|
||||
|
||||
Degrades to text-only on any failure (action errored, no tab,
|
||||
screenshot timed out) — never blocks the interaction itself.
|
||||
"""
|
||||
text_block = TextContent(type="text", text=json.dumps(result))
|
||||
if not result.get("ok") or target_tab is None or bridge is None:
|
||||
return [text_block]
|
||||
try:
|
||||
from ..bridge import _interaction_highlights
|
||||
from .inspection import _resize_and_annotate
|
||||
|
||||
shot = await bridge.screenshot(target_tab, full_page=False)
|
||||
if not shot.get("ok"):
|
||||
return [text_block]
|
||||
highlights = (
|
||||
[_interaction_highlights[target_tab]]
|
||||
if target_tab in _interaction_highlights
|
||||
else None
|
||||
)
|
||||
data, _ = await asyncio.to_thread(
|
||||
_resize_and_annotate,
|
||||
shot["data"],
|
||||
shot.get("cssWidth", 0),
|
||||
shot.get("devicePixelRatio", 1.0),
|
||||
highlights,
|
||||
)
|
||||
return [text_block, ImageContent(type="image", data=data, mimeType="image/jpeg")]
|
||||
except Exception:
|
||||
return [text_block]
|
||||
|
||||
|
||||
async def _attach_snapshot(result: dict, bridge, target_tab: int, auto_snapshot_mode: str) -> dict:
|
||||
"""If the interaction succeeded and the caller opted into auto-snapshot,
|
||||
wait for the page to settle and attach an accessibility snapshot under
|
||||
@@ -139,7 +192,7 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
tab_id: int | None = None,
|
||||
profile: str | None = None,
|
||||
button: Literal["left", "right", "middle"] = "left",
|
||||
) -> dict:
|
||||
) -> list:
|
||||
"""
|
||||
Click at a FRACTION of the viewport (0..1, 0..1).
|
||||
|
||||
@@ -155,6 +208,22 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
tiles, etc.). Proportional positions survive every such
|
||||
transform; pixel coords do not.
|
||||
|
||||
Precision floor: visual coordinate picking from a screenshot
|
||||
is reliable to roughly **3 % of the viewport** (~25–50 CSS px
|
||||
on a 1280×800 window). The y-axis tends to drift more than x
|
||||
because vision models perceive vertical centres less
|
||||
accurately. For targets smaller than that — narrow buttons,
|
||||
checkboxes, dense rows, links — look up the rect with
|
||||
``browser_get_rect`` (selector-based) or ``browser_shadow_query``
|
||||
(web-component) and pass ``rect.cx`` / ``rect.cy`` directly.
|
||||
|
||||
The response is a 2-block list: a JSON text block with the
|
||||
click result, and a fresh annotated screenshot showing where
|
||||
the click landed (red marker at the dispatched coord). Use
|
||||
the screenshot to verify; if the marker is sitting on the
|
||||
wrong element, retry with the rect-derived centre instead of
|
||||
re-eyeballing.
|
||||
|
||||
Args:
|
||||
x: X fraction of the viewport (0..1).
|
||||
y: Y fraction of the viewport (0..1).
|
||||
@@ -163,9 +232,11 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
button: Mouse button to click (left, right, middle)
|
||||
|
||||
Returns:
|
||||
Dict with click result, including ``focused_element``
|
||||
describing what the click focused. ``focused_element.rect``
|
||||
is also in fractions.
|
||||
List with two content blocks: TextContent(JSON of the
|
||||
click result, including ``focused_element`` and its rect
|
||||
in fractions) and ImageContent(annotated post-click
|
||||
screenshot). Falls back to a single-block text-only
|
||||
response on any error.
|
||||
"""
|
||||
start = time.perf_counter()
|
||||
params = {"x": x, "y": y, "tab_id": tab_id, "profile": profile, "button": button}
|
||||
@@ -174,19 +245,19 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
if not bridge or not bridge.is_connected:
|
||||
result = {"ok": False, "error": "Browser extension not connected"}
|
||||
log_tool_call("browser_click_coordinate", params, result=result)
|
||||
return result
|
||||
return _text_only(result)
|
||||
|
||||
ctx = _get_context(profile)
|
||||
if not ctx:
|
||||
result = {"ok": False, "error": "Browser not started. Call browser_start first."}
|
||||
log_tool_call("browser_click_coordinate", params, result=result)
|
||||
return result
|
||||
return _text_only(result)
|
||||
|
||||
target_tab = tab_id or ctx.get("activeTabId")
|
||||
if target_tab is None:
|
||||
result = {"ok": False, "error": "No active tab"}
|
||||
log_tool_call("browser_click_coordinate", params, result=result)
|
||||
return result
|
||||
return _text_only(result)
|
||||
|
||||
# Pixel-input guard: legitimate fractions live in [0, 1]. Allow a
|
||||
# small overshoot tolerance for edge targets.
|
||||
@@ -202,12 +273,12 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
),
|
||||
}
|
||||
log_tool_call("browser_click_coordinate", params, result=result)
|
||||
return result
|
||||
return _text_only(result)
|
||||
|
||||
try:
|
||||
from .inspection import _ensure_viewport_size
|
||||
|
||||
cw, ch = await _ensure_viewport_size(target_tab)
|
||||
cw, ch = await _ensure_viewport_size(target_tab, _caller="browser_click_coordinate")
|
||||
css_x = x * cw
|
||||
css_y = y * ch
|
||||
click_result = await bridge.click_coordinate(target_tab, css_x, css_y, button=button)
|
||||
@@ -217,7 +288,7 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
result={**click_result, "cssWidth": cw, "cssHeight": ch},
|
||||
duration_ms=(time.perf_counter() - start) * 1000,
|
||||
)
|
||||
return click_result
|
||||
return await _build_visual_response(click_result, bridge, target_tab)
|
||||
except Exception as e:
|
||||
result = {"ok": False, "error": str(e)}
|
||||
log_tool_call(
|
||||
@@ -226,7 +297,7 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
error=e,
|
||||
duration_ms=(time.perf_counter() - start) * 1000,
|
||||
)
|
||||
return result
|
||||
return _text_only(result)
|
||||
|
||||
@mcp.tool()
|
||||
async def browser_type(
|
||||
@@ -558,7 +629,7 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
y: float,
|
||||
tab_id: int | None = None,
|
||||
profile: str | None = None,
|
||||
) -> dict:
|
||||
) -> list:
|
||||
"""
|
||||
Hover at a FRACTION of the viewport (0..1, 0..1).
|
||||
|
||||
@@ -567,6 +638,10 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
``x`` / ``y`` are fractions of the viewport (``0.5`` = center);
|
||||
the tool converts to CSS px internally.
|
||||
|
||||
Same precision-floor caveat as ``browser_click_coordinate``:
|
||||
for sub-3 % targets, use rect-derived coords from
|
||||
``browser_get_rect`` / ``browser_shadow_query``.
|
||||
|
||||
Args:
|
||||
x: X fraction of the viewport (0..1).
|
||||
y: Y fraction of the viewport (0..1).
|
||||
@@ -574,7 +649,11 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
profile: Browser profile name (default: "default")
|
||||
|
||||
Returns:
|
||||
Dict with hover result
|
||||
List with two content blocks: TextContent(JSON of the
|
||||
hover result) and ImageContent(annotated post-hover
|
||||
screenshot showing the cursor marker). Useful for
|
||||
verifying tooltip / hover-state changes triggered. Falls
|
||||
back to text-only on error.
|
||||
"""
|
||||
start = time.perf_counter()
|
||||
params = {"x": x, "y": y, "tab_id": tab_id, "profile": profile}
|
||||
@@ -583,19 +662,19 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
if not bridge or not bridge.is_connected:
|
||||
result = {"ok": False, "error": "Browser extension not connected"}
|
||||
log_tool_call("browser_hover_coordinate", params, result=result)
|
||||
return result
|
||||
return _text_only(result)
|
||||
|
||||
ctx = _get_context(profile)
|
||||
if not ctx:
|
||||
result = {"ok": False, "error": "Browser not started. Call browser_start first."}
|
||||
log_tool_call("browser_hover_coordinate", params, result=result)
|
||||
return result
|
||||
return _text_only(result)
|
||||
|
||||
target_tab = tab_id or ctx.get("activeTabId")
|
||||
if target_tab is None:
|
||||
result = {"ok": False, "error": "No active tab"}
|
||||
log_tool_call("browser_hover_coordinate", params, result=result)
|
||||
return result
|
||||
return _text_only(result)
|
||||
|
||||
if x > 1.5 or y > 1.5 or x < -0.1 or y < -0.1:
|
||||
result = {
|
||||
@@ -603,12 +682,12 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
"error": (f"Coords ({x}, {y}) look like pixels. This tool expects fractions 0..1 of the viewport."),
|
||||
}
|
||||
log_tool_call("browser_hover_coordinate", params, result=result)
|
||||
return result
|
||||
return _text_only(result)
|
||||
|
||||
try:
|
||||
from .inspection import _ensure_viewport_size
|
||||
|
||||
cw, ch = await _ensure_viewport_size(target_tab)
|
||||
cw, ch = await _ensure_viewport_size(target_tab, _caller="browser_hover_coordinate")
|
||||
hover_result = await bridge.hover_coordinate(target_tab, x * cw, y * ch)
|
||||
log_tool_call(
|
||||
"browser_hover_coordinate",
|
||||
@@ -616,7 +695,7 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
result=hover_result,
|
||||
duration_ms=(time.perf_counter() - start) * 1000,
|
||||
)
|
||||
return hover_result
|
||||
return await _build_visual_response(hover_result, bridge, target_tab)
|
||||
except Exception as e:
|
||||
result = {"ok": False, "error": str(e)}
|
||||
log_tool_call(
|
||||
@@ -625,7 +704,7 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
error=e,
|
||||
duration_ms=(time.perf_counter() - start) * 1000,
|
||||
)
|
||||
return result
|
||||
return _text_only(result)
|
||||
|
||||
@mcp.tool()
|
||||
async def browser_press_at(
|
||||
@@ -634,7 +713,7 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
key: str,
|
||||
tab_id: int | None = None,
|
||||
profile: str | None = None,
|
||||
) -> dict:
|
||||
) -> list:
|
||||
"""
|
||||
Move mouse to a FRACTION of the viewport (0..1, 0..1), then press a key.
|
||||
|
||||
@@ -644,6 +723,10 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
``x`` / ``y`` are fractions of the viewport; the tool converts
|
||||
to CSS px internally.
|
||||
|
||||
Same precision-floor caveat as ``browser_click_coordinate``:
|
||||
for sub-3 % targets, use rect-derived coords from
|
||||
``browser_get_rect`` / ``browser_shadow_query``.
|
||||
|
||||
Args:
|
||||
x: X fraction of the viewport (0..1).
|
||||
y: Y fraction of the viewport (0..1).
|
||||
@@ -652,7 +735,10 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
profile: Browser profile name (default: "default")
|
||||
|
||||
Returns:
|
||||
Dict with press result
|
||||
List with two content blocks: TextContent(JSON of the
|
||||
press result) and ImageContent(annotated post-press
|
||||
screenshot showing where the key was dispatched). Falls
|
||||
back to text-only on error.
|
||||
"""
|
||||
start = time.perf_counter()
|
||||
params = {"x": x, "y": y, "key": key, "tab_id": tab_id, "profile": profile}
|
||||
@@ -661,19 +747,19 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
if not bridge or not bridge.is_connected:
|
||||
result = {"ok": False, "error": "Browser extension not connected"}
|
||||
log_tool_call("browser_press_at", params, result=result)
|
||||
return result
|
||||
return _text_only(result)
|
||||
|
||||
ctx = _get_context(profile)
|
||||
if not ctx:
|
||||
result = {"ok": False, "error": "Browser not started. Call browser_start first."}
|
||||
log_tool_call("browser_press_at", params, result=result)
|
||||
return result
|
||||
return _text_only(result)
|
||||
|
||||
target_tab = tab_id or ctx.get("activeTabId")
|
||||
if target_tab is None:
|
||||
result = {"ok": False, "error": "No active tab"}
|
||||
log_tool_call("browser_press_at", params, result=result)
|
||||
return result
|
||||
return _text_only(result)
|
||||
|
||||
if x > 1.5 or y > 1.5 or x < -0.1 or y < -0.1:
|
||||
result = {
|
||||
@@ -681,12 +767,12 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
"error": (f"Coords ({x}, {y}) look like pixels. This tool expects fractions 0..1 of the viewport."),
|
||||
}
|
||||
log_tool_call("browser_press_at", params, result=result)
|
||||
return result
|
||||
return _text_only(result)
|
||||
|
||||
try:
|
||||
from .inspection import _ensure_viewport_size
|
||||
|
||||
cw, ch = await _ensure_viewport_size(target_tab)
|
||||
cw, ch = await _ensure_viewport_size(target_tab, _caller="browser_press_at")
|
||||
press_result = await bridge.press_key_at(target_tab, x * cw, y * ch, key)
|
||||
log_tool_call(
|
||||
"browser_press_at",
|
||||
@@ -694,7 +780,7 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
result=press_result,
|
||||
duration_ms=(time.perf_counter() - start) * 1000,
|
||||
)
|
||||
return press_result
|
||||
return await _build_visual_response(press_result, bridge, target_tab)
|
||||
except Exception as e:
|
||||
result = {"ok": False, "error": str(e)}
|
||||
log_tool_call(
|
||||
@@ -703,7 +789,7 @@ def register_interaction_tools(mcp: FastMCP) -> None:
|
||||
error=e,
|
||||
duration_ms=(time.perf_counter() - start) * 1000,
|
||||
)
|
||||
return result
|
||||
return _text_only(result)
|
||||
|
||||
@mcp.tool()
|
||||
async def browser_select(
|
||||
|
||||
Reference in New Issue
Block a user