fix: y-offset inspection

This commit is contained in:
Timothy
2026-04-17 19:24:41 -07:00
parent e972112074
commit 2fd7e9172a
2 changed files with 146 additions and 0 deletions
+107
View File
@@ -101,6 +101,71 @@ def clear_tab_highlights(tab_ids) -> None:
# always sees {tag: "iframe"} and can't tell whether it hit the
# composer or something else inside the frame (e.g. a sidebar item
# in LinkedIn's #interop-outlet messaging overlay).
# Diagnostic probe for the Y-offset hunt. Returns the element under
# the (x, y) the click is about to hit, plus its bounding rect and
# the click's offset relative to that rect. If clicks are landing on
# the wrong element or near a rect boundary, we'll see it in the log
# without having to ask the agent what it intended to click.
_HIT_ELEMENT_JS = """
(function(x, y) {
function describe(el) {
if (!el) return null;
var rect = el.getBoundingClientRect();
return {
tag: el.tagName ? el.tagName.toLowerCase() : null,
id: el.id || null,
className: typeof el.className === 'string' ? el.className.substring(0, 120) : null,
role: el.getAttribute ? el.getAttribute('role') : null,
text: ((el.innerText || el.textContent || '') + '').substring(0, 80),
rect: { x: rect.x, y: rect.y, width: rect.width, height: rect.height }
};
}
var topEl = document.elementFromPoint(x, y);
var stack = [];
if (typeof document.elementsFromPoint === 'function') {
var els = document.elementsFromPoint(x, y);
for (var i = 0; i < Math.min(els.length, 4); i++) {
stack.push(describe(els[i]));
}
} else {
stack.push(describe(topEl));
}
// Vertical-stripe sweep: query elementFromPoint at y±5 and y±15
// so we can detect "click is just barely outside the element a
// human would have hit". Records only tag+text for compactness.
function brief(el) {
if (!el) return null;
return {
tag: el.tagName ? el.tagName.toLowerCase() : null,
text: ((el.innerText || el.textContent || '') + '').substring(0, 40)
};
}
var sweep = {};
[-15, -5, 5, 15].forEach(function (dy) {
sweep['y' + (dy >= 0 ? '+' : '') + dy] = brief(document.elementFromPoint(x, y + dy));
});
var hit = describe(topEl);
var offsetInRect = null;
if (hit && hit.rect && hit.rect.width > 0 && hit.rect.height > 0) {
offsetInRect = {
xFrac: (x - hit.rect.x) / hit.rect.width,
yFrac: (y - hit.rect.y) / hit.rect.height,
dxFromCenter: x - (hit.rect.x + hit.rect.width / 2),
dyFromCenter: y - (hit.rect.y + hit.rect.height / 2)
};
}
return {
clickPoint: { x: x, y: y },
viewport: { w: window.innerWidth, h: window.innerHeight, sx: window.scrollX, sy: window.scrollY },
hit: hit,
stack: stack,
sweep: sweep,
offsetInRect: offsetInRect
};
})
"""
_FOCUSED_ELEMENT_JS = """
(function() {
function describe(el) {
@@ -1026,6 +1091,23 @@ class BeelineBridge:
y,
)
# Pre-click hit probe — log the element actually under (x, y)
# right before the dispatch so we can compare intended vs
# actual landing for the y-offset hunt. Best-effort, never
# blocks the click.
hit_probe = None
try:
# `return` prefix ensures evaluate() wraps as
# `(function(){ return (...)(x,y) })()` and the value
# actually comes back — without it the wrapper drops
# the result on the floor (returns undefined).
probe_result = await self.evaluate(
tab_id, f"return ({_HIT_ELEMENT_JS})({x}, {y})"
)
hit_probe = (probe_result or {}).get("result")
except Exception:
hit_probe = None
await self._cdp(
tab_id,
"Input.dispatchMouseEvent",
@@ -1043,6 +1125,25 @@ class BeelineBridge:
resp = {"ok": True, "action": "click_coordinate", "x": x, "y": y}
if focused_info:
resp["focused_element"] = focused_info
# Telemetry side-channel: record where the click actually
# landed so we can audit the y-axis offset. Kept out of the
# response payload to avoid bloating what the agent sees.
if hit_probe is not None:
try:
from .telemetry import write_log
write_log({
"type": "click_hit_probe",
"tab_id": tab_id,
"intended": {"x": x, "y": y},
"viewport": hit_probe.get("viewport"),
"hit": hit_probe.get("hit"),
"stack": hit_probe.get("stack"),
"sweep": hit_probe.get("sweep"),
"offsetInRect": hit_probe.get("offsetInRect"),
})
except Exception:
pass
return resp
async def type_text(
@@ -2526,6 +2627,12 @@ class BeelineBridge:
"devicePixelRatio": dpr,
"cssWidth": css_w,
"cssHeight": css_h,
# Raw PNG pixel dims so callers can compare against
# cssWidth/cssHeight × dpr and detect viewport ↔
# capture mismatches (e.g. devtools-attached infobar
# shifting one but not the other).
"pngWidth": png_w,
"pngHeight": png_h,
"data": data,
"mimeType": "image/png",
}
+39
View File
@@ -312,7 +312,46 @@ def register_inspection_tools(mcp: FastMCP) -> None:
data = screenshot_result.get("data")
css_width = screenshot_result.get("cssWidth", 0)
css_height_raw = screenshot_result.get("cssHeight", 0)
dpr = screenshot_result.get("devicePixelRatio", 1.0)
png_w = screenshot_result.get("pngWidth", 0)
png_h = screenshot_result.get("pngHeight", 0)
# Diagnostic for the y-axis offset hunt: clicks convert
# fractions through cssHeight, but the displayed image is
# resized using the PNG's aspect ratio. If png_h differs
# from cssHeight × dpr the two coordinate systems drift.
# X is checked too — should always read delta_x ≈ 0 since
# physical_scale is derived from pngWidth.
try:
from ..telemetry import write_log
expected_w = css_width * dpr
expected_h = css_height_raw * dpr
write_log({
"type": "screenshot_geometry",
"tab_id": target_tab,
"url": screenshot_result.get("url", ""),
"pngWidth": png_w,
"pngHeight": png_h,
"cssWidth": css_width,
"cssHeight": css_height_raw,
"dpr": dpr,
"expectedPngWidth": expected_w,
"expectedPngHeight": expected_h,
"deltaPngWidthPx": png_w - expected_w,
"deltaPngHeightPx": png_h - expected_h,
# If the PNG is taller than cssHeight×dpr (e.g. a
# devtools-attached banner adds rows above the page
# in the capture), clicks land BELOW intended at
# the top of the page and converge to 0 error at
# the bottom. Reverse signs if PNG is shorter.
# Worst-case error in CSS px at fy=0:
"yErrorAtTopCssPx": (
(png_h - expected_h) / dpr if dpr else 0
),
})
except Exception:
pass
# Collect highlights: last interaction from bridge + CDP already drew in browser
from ..bridge import _interaction_highlights