fix: tool credential filter

2026-04-11 12:54:26 -07:00
parent b5e05fefae
commit eeb46a2b3e
6 changed files with 561 additions and 93 deletions
@@ -19,11 +19,18 @@
      "Bash(find /home/timothy/aden/hive/core/framework -name \"*.py\" -type f -exec grep -l \"FileConversationStore\\\\|class.*ConversationStore\" {} \\\\;)",
      "Bash(find /home/timothy/aden/hive/core/framework -name \"*.py\" -exec grep -l \"run_parallel_workers\\\\|create_colony\" {} \\\\;)",
      "Bash(awk '/^    async def execute\\\\\\(self, ctx: AgentContext\\\\\\)/,/^    async def [a-z_]+/ {print NR\": \"$0}' /home/timothy/aden/hive/core/framework/agent_loop/agent_loop.py)",
-      "Bash(grep -r \"max_concurrent_workers\\\\|max_depth\\\\|recursion\\\\|spawn.*bomb\" /home/timothy/aden/hive/core/framework/host/*.py)"
+      "Bash(grep -r \"max_concurrent_workers\\\\|max_depth\\\\|recursion\\\\|spawn.*bomb\" /home/timothy/aden/hive/core/framework/host/*.py)",
+      "Bash(wc -l /home/timothy/aden/hive/tools/src/gcu/browser/*.py /home/timothy/aden/hive/tools/src/gcu/browser/tools/*.py)",
+      "Bash(file /tmp/gcu_verify/*.png)",
+      "Bash(ps -eo pid,cmd)",
+      "Bash(ps -o pid,lstart,cmd -p 746640)",
+      "Bash(kill 746636)",
+      "Bash(ps -eo pid,lstart,cmd)"
    ],
    "additionalDirectories": [
      "/home/timothy/.hive/skills/writing-hive-skills",
-      "/tmp"
+      "/tmp",
+      "/home/timothy/.hive/skills"
    ]
  },
  "hooks": {
@@ -85,6 +85,7 @@ from .template import TemplateResolver
 from .validation import (
    CredentialStatus,
    CredentialValidationResult,
+    compute_unavailable_tools,
    ensure_credential_key_env,
    validate_agent_credentials,
 )
@@ -150,6 +151,7 @@ __all__ = [
    # Validation
    "ensure_credential_key_env",
    "validate_agent_credentials",
+    "compute_unavailable_tools",
    "CredentialStatus",
    "CredentialValidationResult",
    # Interactive setup
@@ -236,6 +236,46 @@ def _presync_aden_tokens(credential_specs: dict, *, force: bool = False) -> None
            )


+def compute_unavailable_tools(nodes: list) -> tuple[set[str], list[str]]:
+    """Return (tool_names_to_drop, human_messages).
+
+    Runs credential validation *without* raising, collects every tool
+    bound to a failed credential (missing / invalid / Aden-not-connected
+    and no alternative provider available), and returns the set of tool
+    names that should be silently dropped from the worker's effective
+    tool list.
+
+    Use this at every worker-spawn preflight so missing credentials
+    filter tools out of the graph instead of hard-failing the whole
+    spawn. Only affects non-MCP tools — the MCP admission gate
+    (``_build_mcp_admission_gate``) already handles MCP tools at
+    registration time.
+    """
+    try:
+        result = validate_agent_credentials(nodes, verify=False, raise_on_error=False)
+    except Exception as exc:
+        logger.debug("compute_unavailable_tools: validation raised: %s", exc)
+        return set(), []
+
+    drop: set[str] = set()
+    messages: list[str] = []
+    for status in result.failed:
+        if not status.tools:
+            continue
+        drop.update(status.tools)
+        reason = "missing"
+        if status.aden_not_connected:
+            reason = "aden_not_connected"
+        elif status.available and status.valid is False:
+            reason = "invalid"
+        messages.append(
+            f"{status.env_var} ({reason}) → drops {len(status.tools)} tool(s): "
+            f"{', '.join(status.tools[:6])}"
+            + (f" +{len(status.tools) - 6} more" if len(status.tools) > 6 else "")
+        )
+    return drop, messages
+
+
 def validate_agent_credentials(
    nodes: list,
    quiet: bool = False,
@@ -44,7 +44,7 @@ from typing import TYPE_CHECKING, Any

 from framework.credentials.models import CredentialError
 from framework.host.event_bus import AgentEvent, EventType
-from framework.loader.preload_validation import credential_errors_to_json, validate_credentials
+from framework.loader.preload_validation import credential_errors_to_json
 from framework.server.app import validate_agent_path
 from framework.tools.flowchart_utils import (
    FLOWCHART_TYPES,
@@ -3875,24 +3875,50 @@ def register_queen_lifecycle_tools(
            )

        try:
-            # Pre-flight: validate credentials and resync MCP servers.
-            # Still uses the legacy AgentHost handles because that's
-            # where credentials live; the actual run is via colony.
+            # Pre-flight: compute the set of tools whose credentials are
+            # NOT currently available, and resync MCP servers. We do NOT
+            # hard-fail on missing credentials anymore — instead we drop
+            # the affected tools from the worker's spawn_tools list a
+            # few lines below. Hard-failing here caused unrelated tools
+            # (e.g. GitHub tools leaking into a LinkedIn worker config)
+            # to block the whole spawn with a CredentialError; the fix
+            # is to treat unset credentials as "drop these tools" rather
+            # than "abort the worker".
+            #
+            # Note: the MCP admission gate (_build_mcp_admission_gate in
+            # tool_registry.py) already filters MCP tools at registration
+            # time. This preflight covers the non-MCP path — tools.py
+            # discoveries via discover_from_module — which has no
+            # credential gate of its own.
            loop = asyncio.get_running_loop()
+            unavailable_tools: set[str] = set()

            async def _preflight():
-                cred_error: CredentialError | None = None
+                nonlocal unavailable_tools
                try:
-                    await loop.run_in_executor(
+                    from framework.credentials.validation import compute_unavailable_tools
+
+                    drop, messages = await loop.run_in_executor(
                        None,
-                        lambda: validate_credentials(
-                            legacy.graph.nodes,
-                            interactive=False,
-                            skip=False,
-                        ),
+                        lambda: compute_unavailable_tools(legacy.graph.nodes),
+                    )
+                    unavailable_tools = drop
+                    if drop:
+                        logger.warning(
+                            "run_agent_with_input: dropping %d tool(s) with "
+                            "unavailable credentials from worker spawn: %s",
+                            len(drop),
+                            "; ".join(messages),
+                        )
+                except Exception as exc:
+                    # Validation itself failing (not a credential failure —
+                    # a code error in the validator) should not block the
+                    # spawn. Log and proceed as if nothing was dropped.
+                    logger.warning(
+                        "compute_unavailable_tools raised, proceeding without "
+                        "credential-based tool filtering: %s",
+                        exc,
                    )
-                except CredentialError as e:
-                    cred_error = e

                runner = getattr(session, "runner", None)
                if runner:
@@ -3904,9 +3930,6 @@ def register_queen_lifecycle_tools(
                    except Exception as e:
                        logger.warning("MCP resync failed: %s", e)

-                if cred_error is not None:
-                    raise cred_error
-
            try:
                await asyncio.wait_for(_preflight(), timeout=_START_PREFLIGHT_TIMEOUT)
            except TimeoutError:
@@ -3914,8 +3937,6 @@ def register_queen_lifecycle_tools(
                    "run_agent_with_input preflight timed out after %ds — proceeding",
                    _START_PREFLIGHT_TIMEOUT,
                )
-            except CredentialError:
-                raise  # handled below

            # Build a per-spawn AgentSpec that mirrors the loaded
            # worker's entry-node identity. This is what makes the
@@ -3944,6 +3965,24 @@ def register_queen_lifecycle_tools(
                else []
            )

+            # Drop any tool whose credential isn't available (GitHub
+            # tools when GITHUB_TOKEN is unset, etc). The preflight
+            # above populated ``unavailable_tools``; apply the filter
+            # HERE — before the AgentSpec is built — so the worker
+            # only sees tools it can actually run.
+            dropped_from_names: list[str] = []
+            if unavailable_tools:
+                original = worker_tool_names
+                worker_tool_names = [t for t in original if t not in unavailable_tools]
+                dropped_from_names = [t for t in original if t in unavailable_tools]
+                if dropped_from_names:
+                    logger.warning(
+                        "run_agent_with_input: dropped %d tool(s) from worker "
+                        "AgentSpec due to unavailable credentials: %s",
+                        len(dropped_from_names),
+                        ", ".join(dropped_from_names),
+                    )
+
            spawn_spec = AgentSpec(
                id=f"loaded_worker:{getattr(graph, 'id', 'unknown')}",
                name=getattr(graph, "id", "loaded_worker"),
@@ -3962,6 +4001,26 @@ def register_queen_lifecycle_tools(
            spawn_tools = list(getattr(legacy, "_tools", []) or [])
            spawn_tool_executor = getattr(legacy, "_tool_executor", None)

+            # Same credential-based filter on the live Tool objects
+            # passed to the worker. Without this the worker would still
+            # receive the GitHub tool definitions in its registry —
+            # it just wouldn't see them in its AgentSpec, so the LLM
+            # wouldn't know to use them. Dropping from both lists
+            # makes the filter complete.
+            if unavailable_tools:
+                before = len(spawn_tools)
+                spawn_tools = [
+                    t for t in spawn_tools
+                    if getattr(t, "name", None) not in unavailable_tools
+                ]
+                dropped_count = before - len(spawn_tools)
+                if dropped_count:
+                    logger.info(
+                        "run_agent_with_input: dropped %d tool object(s) from "
+                        "spawn_tools (unavailable credentials)",
+                        dropped_count,
+                    )
+
            worker_ids = await colony.spawn(
                task=task,
                count=1,
@@ -123,6 +123,21 @@ class BeelineBridge:
            logger.warning("Bridge status server could not start on port %d: %s", status_port, e)

    async def stop(self) -> None:
+        # Cancel in-flight bridge requests so any caller stuck in _send
+        # sees CancelledError immediately instead of waiting the full
+        # 30s timeout. Mirrors the cleanup in _handle_connection's
+        # disconnect branch so both exit paths behave the same.
+        for fut in self._pending.values():
+            if not fut.done():
+                fut.cancel()
+        self._pending.clear()
+        # Drop CDP attach cache — next run must re-attach fresh.
+        self._cdp_attached.clear()
+        # Drop highlight state — stale entries would otherwise carry
+        # over into a subsequent run and confuse screenshot annotation.
+        _interaction_highlights.clear()
+        self._ws = None
+
        if self._server:
            self._server.close()
            try:
@@ -222,7 +237,14 @@ class BeelineBridge:
                        fut.cancel()
                self._pending.clear()

-    async def _send(self, type_: str, **params) -> dict:
+    # Default wait on a bridge command. Callers with known-slow ops
+    # (full-page screenshots on slow networks, AX tree on huge pages)
+    # can pass a longer value via _send(..., timeout=...). Using the
+    # same default as the old hard-coded value so existing call sites
+    # don't regress.
+    _DEFAULT_SEND_TIMEOUT_S: float = 30.0
+
+    async def _send(self, type_: str, *, timeout: float | None = None, **params) -> dict:
        """Send a command to the extension and wait for the result."""
        if not self._ws:
            raise RuntimeError("Extension not connected")
@@ -231,27 +253,58 @@ class BeelineBridge:
        fut: asyncio.Future = asyncio.get_event_loop().create_future()
        self._pending[msg_id] = fut
        start = time.perf_counter()
+        effective_timeout = timeout if timeout is not None else self._DEFAULT_SEND_TIMEOUT_S

        log_bridge_message("send", type_, msg_id=msg_id, params=params)

        try:
            await self._ws.send(json.dumps({"id": msg_id, "type": type_, **params}))
-            result = await asyncio.wait_for(fut, timeout=30.0)
+            result = await asyncio.wait_for(fut, timeout=effective_timeout)
            duration_ms = (time.perf_counter() - start) * 1000
            log_bridge_message("send", type_, msg_id=msg_id, result=result, duration_ms=duration_ms)
            return result
        except TimeoutError:
            self._pending.pop(msg_id, None)
            log_bridge_message("send", type_, msg_id=msg_id, error="timeout")
-            raise RuntimeError(f"Bridge command '{type_}' timed out") from None
+            # Include which CDP method (if any) so the caller can see
+            # what actually hung — the generic 'cdp' type is useless
+            # when ten different CDP calls use the same type.
+            detail = f" method={params.get('method')}" if params.get("method") else ""
+            raise RuntimeError(
+                f"Bridge command '{type_}'{detail} timed out after {effective_timeout:.0f}s"
+            ) from None
        except BaseException:
            # CancelledError or any other exception — remove stale future so a late
            # response from the extension doesn't try to resolve a cancelled future.
            self._pending.pop(msg_id, None)
            raise

+    # Substrings that indicate Chrome detached the debugger out from
+    # under us (tab closed, user opened DevTools, cross-origin nav).
+    # Our in-memory _cdp_attached set is now stale; next call should
+    # re-attach rather than reporting a cryptic "Target not found".
+    _CDP_DEAD_SESSION_MARKERS = (
+        "target closed",
+        "target not found",
+        "not attached",
+        "session closed",
+        "inspector already attached",
+        "no target with given id",
+    )
+
+    def _is_cdp_dead_session(self, exc: BaseException) -> bool:
+        msg = str(exc).lower()
+        return any(m in msg for m in self._CDP_DEAD_SESSION_MARKERS)
+
    async def _cdp(self, tab_id: int, method: str, params: dict | None = None) -> dict:
-        """Send a CDP command to a tab."""
+        """Send a CDP command to a tab.
+
+        On a dead-session error (Chrome detached externally — tab closed,
+        DevTools opened, cross-origin nav), evict the stale attach
+        cache entry, reattach, and retry once. Without this the Python
+        side would keep assuming it's attached and every subsequent call
+        would hit the same error until someone restarted the bridge.
+        """
        start = time.perf_counter()
        try:
            result = await self._send("cdp", tabId=tab_id, method=method, params=params or {})
@@ -261,6 +314,33 @@ class BeelineBridge:
        except Exception as e:
            duration_ms = (time.perf_counter() - start) * 1000
            log_cdp_command(tab_id, method, params, error=str(e), duration_ms=duration_ms)
+            if self._is_cdp_dead_session(e):
+                logger.info(
+                    "CDP session for tab %d looks dead (%s) — re-attaching and retrying",
+                    tab_id,
+                    str(e)[:120],
+                )
+                self._cdp_attached.discard(tab_id)
+                try:
+                    reattach = await self._send("cdp.attach", tabId=tab_id)
+                    if reattach.get("ok"):
+                        self._cdp_attached.add(tab_id)
+                        retry_start = time.perf_counter()
+                        result = await self._send(
+                            "cdp", tabId=tab_id, method=method, params=params or {}
+                        )
+                        log_cdp_command(
+                            tab_id,
+                            method,
+                            params,
+                            result,
+                            duration_ms=(time.perf_counter() - retry_start) * 1000,
+                        )
+                        return result
+                except Exception as retry_exc:
+                    logger.debug(
+                        "CDP reattach+retry for tab %d failed: %s", tab_id, retry_exc
+                    )
            raise

    async def _try_enable_domain(self, tab_id: int, domain: str) -> None:
@@ -311,7 +391,14 @@ class BeelineBridge:

    async def close_tab(self, tab_id: int) -> dict:
        """Close a tab by ID."""
-        return await self._send("tab.close", tabId=tab_id)
+        result = await self._send("tab.close", tabId=tab_id)
+        # Drop per-tab state — the id may be reused by Chrome much
+        # later, and carrying a stale highlight or "attached" flag
+        # forward would misannotate screenshots or skip a needed
+        # reattach on the reused id.
+        self._cdp_attached.discard(tab_id)
+        _interaction_highlights.pop(tab_id, None)
+        return result

    async def list_tabs(self, group_id: int | None = None) -> dict:
        """List tabs, optionally filtered by group.
@@ -361,6 +448,11 @@ class BeelineBridge:
        if wait_until not in VALID_WAIT_UNTIL:
            wait_until = "load"

+        # Drop the stale interaction highlight before loading a new
+        # page — otherwise the next screenshot will annotate the new
+        # page with a rect from the previous page's coordinate system.
+        _interaction_highlights.pop(tab_id, None)
+
        # Attach debugger if needed
        await self.cdp_attach(tab_id)

@@ -382,9 +474,11 @@ class BeelineBridge:
                    "Runtime.evaluate",
                    {"expression": "document.readyState", "returnByValue": True},
                )
-                ready_state = (
-                    (eval_result or {}).get("result", {}).get("result", {}).get("value", "")
-                )
+                # _cdp returns the CDP response body; Runtime.evaluate shape
+                # is {"result": {"type": ..., "value": ...}} — one "result"
+                # hop, not two. The extra hop was always returning "" and
+                # this entire lifecycle loop was running until the deadline.
+                ready_state = (eval_result or {}).get("result", {}).get("value", "")

                if wait_until == "domcontentloaded" and ready_state in ("interactive", "complete"):
                    break
@@ -416,17 +510,31 @@ class BeelineBridge:
        return {
            "ok": True,
            "tabId": tab_id,
-            "url": (url_result or {}).get("result", {}).get("result", {}).get("value", ""),
-            "title": (title_result or {}).get("result", {}).get("result", {}).get("value", ""),
+            "url": (url_result or {}).get("result", {}).get("value", ""),
+            "title": (title_result or {}).get("result", {}).get("value", ""),
        }

    async def go_back(self, tab_id: int) -> dict:
-        """Navigate back in history."""
+        """Navigate back in history.
+
+        Uses ``history.back()`` via Runtime.evaluate — modern Chrome CDP
+        no longer exposes ``Page.goBack`` / ``Page.goForward`` (removed
+        in favour of ``Page.navigateToHistoryEntry``, which requires
+        first fetching the history list). ``history.back()`` is simpler,
+        works across every Chrome version, and matches what the user
+        expects when they call ``browser_go_back``.
+        """
+        _interaction_highlights.pop(tab_id, None)
        await self.cdp_attach(tab_id)
        await self._cdp(tab_id, "Page.enable")
-        await self._cdp(tab_id, "Page.goBack")
-
-        # Get current URL
+        await self._cdp(
+            tab_id,
+            "Runtime.evaluate",
+            {"expression": "history.back()", "returnByValue": True},
+        )
+        # Give the browser a beat to commit the navigation before we
+        # read the new URL.
+        await asyncio.sleep(0.3)
        result = await self._cdp(
            tab_id,
            "Runtime.evaluate",
@@ -435,15 +543,20 @@ class BeelineBridge:
        return {
            "ok": True,
            "action": "back",
-            "url": (result or {}).get("result", {}).get("result", {}).get("value", ""),
+            "url": (result or {}).get("result", {}).get("value", ""),
        }

    async def go_forward(self, tab_id: int) -> dict:
-        """Navigate forward in history."""
+        """Navigate forward in history. See go_back() for why we use JS."""
+        _interaction_highlights.pop(tab_id, None)
        await self.cdp_attach(tab_id)
        await self._cdp(tab_id, "Page.enable")
-        await self._cdp(tab_id, "Page.goForward")
-
+        await self._cdp(
+            tab_id,
+            "Runtime.evaluate",
+            {"expression": "history.forward()", "returnByValue": True},
+        )
+        await asyncio.sleep(0.3)
        result = await self._cdp(
            tab_id,
            "Runtime.evaluate",
@@ -452,11 +565,12 @@ class BeelineBridge:
        return {
            "ok": True,
            "action": "forward",
-            "url": (result or {}).get("result", {}).get("result", {}).get("value", ""),
+            "url": (result or {}).get("result", {}).get("value", ""),
        }

    async def reload(self, tab_id: int) -> dict:
        """Reload the page."""
+        _interaction_highlights.pop(tab_id, None)
        await self.cdp_attach(tab_id)
        await self._cdp(tab_id, "Page.enable")
        await self._cdp(tab_id, "Page.reload")
@@ -469,7 +583,7 @@ class BeelineBridge:
        return {
            "ok": True,
            "action": "reload",
-            "url": (result or {}).get("result", {}).get("result", {}).get("value", ""),
+            "url": (result or {}).get("result", {}).get("value", ""),
        }

    # ── Interaction ────────────────────────────────────────────────────────────
@@ -759,75 +873,150 @@ class BeelineBridge:
        clear_first: bool = True,
        delay_ms: int = 0,
        timeout_ms: int = 30000,
+        use_insert_text: bool = True,
    ) -> dict:
        """Type text into an element.

-        Uses JavaScript focus for reliability, then CDP key events.
+        Routes through a real CDP pointer click on the target rect BEFORE
+        inserting text. This is critical for rich-text editors (Draft.js,
+        Lexical, ProseMirror, React-controlled contenteditable): those
+        frameworks only register input as "real" after seeing a native
+        focus event sourced from a real pointer interaction — a
+        JS-sourced ``el.focus()`` is ignored, and the submit button
+        stays disabled because the framework's internal state never
+        updates. Sending a CDP click first fires the real
+        pointerdown/pointerup/click/focus sequence that every modern
+        framework listens to.
+
+        After clicking, we insert text via ``Input.insertText`` by
+        default (``use_insert_text=True``). insertText is a dedicated
+        CDP method that asks the browser to commit text into the
+        focused element as if IME just committed it — it works
+        cleanly on rich editors where per-character keyDown events
+        would otherwise be eaten or mis-timed (empirically verified
+        against LinkedIn's Lexical message composer 2026-04-11).
+        Playwright uses the same approach under the hood.
+
+        Set ``use_insert_text=False`` to get the old per-character
+        keyDown/keyUp path when an editor needs precise keystroke
+        timing (autocomplete triggers, code editors that fire on
+        specific chars, ``delay_ms`` typing animations).
        """
        await self.cdp_attach(tab_id)
        await self._try_enable_domain(tab_id, "DOM")
        await self._try_enable_domain(tab_id, "Input")
        await self._try_enable_domain(tab_id, "Runtime")

-        # First, scroll into view and focus via JavaScript (more reliable than CDP)
+        # Find + scroll + (optionally) clear via JS. We still need the
+        # rect, and clearing via `.value = ''` / `.textContent = ''`
+        # is the most reliable way to reset pre-existing content.
        focus_script = f"""
            (function() {{
                const el = document.querySelector({json.dumps(selector)});
-                if (!el) return false;
+                if (!el) return null;

-                // Scroll into view
+                // Scroll into view so the click lands in-viewport.
                el.scrollIntoView({{ block: 'center' }});

-                // Focus the element
-                el.focus();
-
-                // Clear if requested
+                // Clear if requested.
                if ({str(clear_first).lower()}) {{
                    if (el.value !== undefined) {{
                        el.value = '';
+                        // Nudge React's onChange — the framework reads
+                        // .value via a setter hook, and without firing
+                        // an input event the component state remains
+                        // stale after our value assignment.
+                        el.dispatchEvent(new Event('input', {{bubbles: true}}));
                    }} else if (el.isContentEditable) {{
                        el.textContent = '';
+                        el.dispatchEvent(new Event('input', {{bubbles: true}}));
                    }}
                }}

-                return true;
+                const r = el.getBoundingClientRect();
+                return {{
+                    x: r.left + r.width / 2,
+                    y: r.top + r.height / 2,
+                    w: r.width,
+                    h: r.height,
+                }};
            }})();
        """

        focus_result = await self.evaluate(tab_id, focus_script)
-        success = (focus_result or {}).get("result", False)
+        rect = (focus_result or {}).get("result")

-        if not success:
-            # Element not found - wait and retry
+        if not rect:
+            # Element not found — wait + retry until timeout.
            deadline = asyncio.get_event_loop().time() + timeout_ms / 1000
            while asyncio.get_event_loop().time() < deadline:
                result = await self.evaluate(tab_id, focus_script)
-                if result and (result or {}).get("result", False):
-                    success = True
+                rect = (result or {}).get("result") if result else None
+                if rect:
                    break
                await asyncio.sleep(0.1)

-            if not success:
+            if not rect:
                return {"ok": False, "error": f"Element not found: {selector}"}

-        await asyncio.sleep(0.05)  # Wait for focus to take effect
+        if not rect.get("w") or not rect.get("h"):
+            return {
+                "ok": False,
+                "error": f"Element has zero dimensions, can't click to focus: {selector}",
+            }

-        # Type each character using CDP key events
-        for char in text:
-            # Dispatch key down
-            await self._cdp(
-                tab_id,
-                "Input.dispatchKeyEvent",
-                {"type": "keyDown", "text": char},
-            )
-            # Dispatch key up
-            await self._cdp(
-                tab_id,
-                "Input.dispatchKeyEvent",
-                {"type": "keyUp", "text": char},
-            )
-            if delay_ms > 0:
-                await asyncio.sleep(delay_ms / 1000)
+        # Fire a real CDP pointer click at the element's center. This is
+        # what unblocks rich-text editors — JS el.focus() is not enough.
+        click_x = rect["x"]
+        click_y = rect["y"]
+        await self._cdp(
+            tab_id,
+            "Input.dispatchMouseEvent",
+            {"type": "mousePressed", "x": click_x, "y": click_y, "button": "left", "clickCount": 1},
+        )
+        await self._cdp(
+            tab_id,
+            "Input.dispatchMouseEvent",
+            {"type": "mouseReleased", "x": click_x, "y": click_y, "button": "left", "clickCount": 1},
+        )
+        await asyncio.sleep(0.15)  # Let focus / editor-init animations settle.
+
+        if use_insert_text and delay_ms <= 0:
+            # CDP Input.insertText is the most reliable way to insert
+            # text into a rich-text editor. It bypasses the keyboard
+            # event pipeline entirely and commits text into the focused
+            # element as if IME just committed it. Works on plain
+            # <input>/<textarea>, contenteditable, Lexical, Draft.js,
+            # ProseMirror, Monaco textarea buffers — verified empirically
+            # against LinkedIn's message composer (Lexical) on 2026-04-11
+            # where the per-char keyDown path left the editor empty.
+            await self._cdp(tab_id, "Input.insertText", {"text": text})
+        else:
+            # Fallback path: per-character keyDown/keyUp with full key,
+            # code, and text fields. Used when the caller explicitly
+            # wants per-keystroke dispatch (autocomplete testing, code
+            # editors that fire on specific chars, animated typing
+            # with ``delay_ms``). Populating ``code`` for ASCII is
+            # needed so frameworks that branch on ``event.code`` see
+            # the right values.
+            for char in text:
+                key_params: dict[str, Any] = {
+                    "type": "keyDown",
+                    "text": char,
+                    "key": char,
+                }
+                if len(char) == 1 and char.isalpha():
+                    key_params["code"] = f"Key{char.upper()}"
+                elif len(char) == 1 and char.isdigit():
+                    key_params["code"] = f"Digit{char}"
+                await self._cdp(tab_id, "Input.dispatchKeyEvent", key_params)
+
+                key_up = {"type": "keyUp", "key": char}
+                if "code" in key_params:
+                    key_up["code"] = key_params["code"]
+                await self._cdp(tab_id, "Input.dispatchKeyEvent", key_up)
+                if delay_ms > 0:
+                    await asyncio.sleep(delay_ms / 1000)

        # Highlight the element that was typed into
        rect_result = await self.evaluate(
@@ -844,12 +1033,47 @@ class BeelineBridge:
            )
        return {"ok": True, "action": "type", "selector": selector, "length": len(text)}

-    async def press_key(self, tab_id: int, key: str, selector: str | None = None) -> dict:
-        """Press a keyboard key.
+    # CDP Input.dispatchKeyEvent modifiers bitmask.
+    _CDP_MODIFIERS = {"alt": 1, "ctrl": 2, "control": 2, "meta": 4, "cmd": 4, "shift": 8}
+
+    # How Chrome expects each modifier key as its OWN keyDown event —
+    # name, code, and Windows virtual key code. Dispatched before the
+    # main key so Chrome sees the modifier as "held" during the main
+    # event, which is what actually triggers browser shortcuts like
+    # Ctrl+A, Cmd+L, Shift+Tab.
+    _MODIFIER_KEYS = {
+        "alt":     {"key": "Alt",     "code": "AltLeft",     "windowsVirtualKeyCode": 18},
+        "ctrl":    {"key": "Control", "code": "ControlLeft", "windowsVirtualKeyCode": 17},
+        "control": {"key": "Control", "code": "ControlLeft", "windowsVirtualKeyCode": 17},
+        "meta":    {"key": "Meta",    "code": "MetaLeft",    "windowsVirtualKeyCode": 91},
+        "cmd":     {"key": "Meta",    "code": "MetaLeft",    "windowsVirtualKeyCode": 91},
+        "shift":   {"key": "Shift",   "code": "ShiftLeft",   "windowsVirtualKeyCode": 16},
+    }
+
+    def _cdp_modifier_mask(self, modifiers: list[str] | None) -> int:
+        if not modifiers:
+            return 0
+        mask = 0
+        for m in modifiers:
+            mask |= self._CDP_MODIFIERS.get(m.lower(), 0)
+        return mask
+
+    async def press_key(
+        self,
+        tab_id: int,
+        key: str,
+        selector: str | None = None,
+        modifiers: list[str] | None = None,
+    ) -> dict:
+        """Press a keyboard key, optionally with modifier keys held.

        Args:
            key: Key name like 'Enter', 'Tab', 'Escape', 'ArrowDown', etc.
            selector: Optional selector to focus first
+            modifiers: Optional list of modifier keys to hold while pressing
+                ``key``. Accepted values: "alt", "ctrl"/"control", "meta"/"cmd",
+                "shift". Example: ``modifiers=["ctrl"]`` → Ctrl+key, which
+                enables shortcuts like Ctrl+A, Ctrl+L, Cmd+Enter, Shift+Tab.
        """
        await self.cdp_attach(tab_id)
        await self._try_enable_domain(tab_id, "Input")
@@ -882,19 +1106,110 @@ class BeelineBridge:
        }

        text, key_name = key_map.get(key, (key, key))
+        mod_mask = self._cdp_modifier_mask(modifiers)

-        await self._cdp(
-            tab_id,
-            "Input.dispatchKeyEvent",
-            {"type": "keyDown", "key": key_name, "text": text if text else None},
-        )
-        await self._cdp(
-            tab_id,
-            "Input.dispatchKeyEvent",
-            {"type": "keyUp", "key": key_name, "text": text if text else None},
-        )
+        # With modifiers held, suppress the printable text so that
+        # e.g. Ctrl+A doesn't also type the character "a" into the
+        # focused field (CDP will still fire the shortcut).
+        effective_text = text if (text and mod_mask == 0) else None

-        return {"ok": True, "action": "press", "key": key}
+        # Compute ``code`` and ``windowsVirtualKeyCode`` for the main
+        # key. These are MANDATORY for Chrome's shortcut dispatcher —
+        # without them, Ctrl+A etc. reach the DOM with ``code=""`` and
+        # ``which=0`` and Chrome doesn't recognise them as shortcuts.
+        # Verified empirically on chrome 131 against a real input.
+        main_code: str | None = None
+        main_vk: int | None = None
+        special_vk = {
+            "Enter": (13, "Enter"),
+            "Tab": (9, "Tab"),
+            "Escape": (27, "Escape"),
+            "Backspace": (8, "Backspace"),
+            "Delete": (46, "Delete"),
+            "ArrowUp": (38, "ArrowUp"),
+            "ArrowDown": (40, "ArrowDown"),
+            "ArrowLeft": (37, "ArrowLeft"),
+            "ArrowRight": (39, "ArrowRight"),
+            "Home": (36, "Home"),
+            "End": (35, "End"),
+            "PageUp": (33, "PageUp"),
+            "PageDown": (34, "PageDown"),
+        }
+        if key_name in special_vk:
+            main_vk, main_code = special_vk[key_name]
+        elif len(key_name) == 1 and key_name.isalpha():
+            main_code = f"Key{key_name.upper()}"
+            main_vk = ord(key_name.upper())  # 'A' = 65 ... 'Z' = 90
+        elif len(key_name) == 1 and key_name.isdigit():
+            main_code = f"Digit{key_name}"
+            main_vk = ord(key_name)  # '0' = 48 ... '9' = 57
+
+        # Press each modifier as a separate keyDown BEFORE the main
+        # key. Sending ``modifiers: mask`` on the main key alone isn't
+        # enough — Chrome's shortcut dispatcher looks for a held
+        # modifier event, not just a flag. Matches the Playwright /
+        # Puppeteer sequence. Release modifiers in reverse order after
+        # the main key so the "held" state is correct throughout.
+        pressed_mods: list[dict] = []
+        if modifiers:
+            for m in modifiers:
+                spec = self._MODIFIER_KEYS.get(m.lower())
+                if spec is None:
+                    continue
+                await self._cdp(
+                    tab_id,
+                    "Input.dispatchKeyEvent",
+                    {
+                        "type": "keyDown",
+                        "key": spec["key"],
+                        "code": spec["code"],
+                        "windowsVirtualKeyCode": spec["windowsVirtualKeyCode"],
+                        "modifiers": mod_mask,
+                    },
+                )
+                pressed_mods.append(spec)
+
+        main_down: dict[str, Any] = {
+            # Use rawKeyDown when a modifier is held so Chrome skips
+            # text insertion and routes the event to the shortcut
+            # dispatcher. For plain press_key without modifiers we can
+            # use regular keyDown.
+            "type": "rawKeyDown" if mod_mask else "keyDown",
+            "key": key_name,
+            "text": effective_text,
+            "modifiers": mod_mask,
+        }
+        main_up: dict[str, Any] = {
+            "type": "keyUp",
+            "key": key_name,
+            "text": effective_text,
+            "modifiers": mod_mask,
+        }
+        if main_code is not None:
+            main_down["code"] = main_code
+            main_up["code"] = main_code
+        if main_vk is not None:
+            main_down["windowsVirtualKeyCode"] = main_vk
+            main_up["windowsVirtualKeyCode"] = main_vk
+
+        await self._cdp(tab_id, "Input.dispatchKeyEvent", main_down)
+        await self._cdp(tab_id, "Input.dispatchKeyEvent", main_up)
+
+        # Release modifiers in reverse order.
+        for spec in reversed(pressed_mods):
+            await self._cdp(
+                tab_id,
+                "Input.dispatchKeyEvent",
+                {
+                    "type": "keyUp",
+                    "key": spec["key"],
+                    "code": spec["code"],
+                    "windowsVirtualKeyCode": spec["windowsVirtualKeyCode"],
+                    "modifiers": 0,
+                },
+            )
+
+        return {"ok": True, "action": "press", "key": key, "modifiers": modifiers or []}

    # Shared JS snippet: shadow-piercing querySelector via ">>>" separator
    _SHADOW_QUERY_JS = """
@@ -916,9 +1231,15 @@ class BeelineBridge:
        Example: '#interop-outlet >>> #ember37 >>> p'
        """
        await self.cdp_attach(tab_id)
+        # IMPORTANT: the whole script must be a single IIFE so that
+        # bridge.evaluate() detects it as "already wrapped" and returns
+        # its value. If you let evaluate() re-wrap a script that
+        # starts with a function declaration, the outer wrapper
+        # discards the inner IIFE's return and you always get None —
+        # which is exactly the bug this code had until 2026-04-11.
        script = (
-            f"{self._SHADOW_QUERY_JS}"
            f"(function(){{"
+            f"{self._SHADOW_QUERY_JS}"
            f"const el=_shadowQuery({json.dumps(selector)});"
            f"if(!el)return null;"
            f"const r=el.getBoundingClientRect();"
@@ -1945,7 +2266,10 @@ class BeelineBridge:
                "Runtime.evaluate",
                {"expression": script, "returnByValue": True},
            )
-            found = (result or {}).get("result", {}).get("result", {}).get("value", False)
+            # One "result" hop — see navigate() comment. This was silently
+            # returning False on every poll, so wait_for_selector always
+            # reported "not found" after the full timeout.
+            found = (result or {}).get("result", {}).get("value", False)
            if found:
                return {"ok": True, "selector": selector}
            await asyncio.sleep(0.1)
@@ -1969,7 +2293,8 @@ class BeelineBridge:
                "Runtime.evaluate",
                {"expression": script, "returnByValue": True},
            )
-            found = (result or {}).get("result", {}).get("result", {}).get("value", False)
+            # Same unwrap bug as wait_for_selector.
+            found = (result or {}).get("result", {}).get("value", False)
            if found:
                return {"ok": True, "text": text}
            await asyncio.sleep(0.1)
@@ -178,18 +178,37 @@ def register_interaction_tools(mcp: FastMCP) -> None:
        delay_ms: int = 0,
        clear_first: bool = True,
        timeout_ms: int = 30000,
+        use_insert_text: bool = True,
    ) -> dict:
        """
        Type text into an input element.

+        Automatically routes through a real CDP pointer click on the
+        element before inserting text — so that rich-text editors like
+        Lexical (Gmail, LinkedIn DMs), Draft.js (X compose), and
+        ProseMirror (Reddit) see a native focus event and enable their
+        submit buttons. See the gcu-browser skill for the full "click-
+        then-type" pattern.
+
+        By default uses CDP Input.insertText which is the most reliable
+        way to insert text into rich editors. Set
+        ``use_insert_text=False`` to fall back to per-character
+        keyDown/keyUp events (needed only for code editors that fire
+        on specific keystrokes, or when ``delay_ms`` typing animation
+        is required).
+
        Args:
            selector: CSS selector for the input element
            text: Text to type
            tab_id: Chrome tab ID (default: active tab)
            profile: Browser profile name (default: "default")
-            delay_ms: Delay between keystrokes in ms (default: 0)
+            delay_ms: Delay between keystrokes in ms (default: 0).
+                      Forces the per-keystroke fallback when > 0.
            clear_first: Clear existing text before typing (default: True)
            timeout_ms: Timeout waiting for element (default: 30000)
+            use_insert_text: Use CDP Input.insertText (default: True) for
+                             reliable insertion into rich-text editors.
+                             Set False for per-keystroke dispatch.

        Returns:
            Dict with type result
@@ -223,6 +242,7 @@ def register_interaction_tools(mcp: FastMCP) -> None:
                clear_first=clear_first,
                delay_ms=delay_ms,
                timeout_ms=timeout_ms,
+                use_insert_text=use_insert_text,
            )
            log_tool_call(
                "browser_type",
@@ -277,21 +297,34 @@ def register_interaction_tools(mcp: FastMCP) -> None:
        selector: str | None = None,
        tab_id: int | None = None,
        profile: str | None = None,
+        modifiers: list[str] | None = None,
    ) -> dict:
        """
-        Press a keyboard key.
+        Press a keyboard key, optionally with modifier keys held.

        Args:
-            key: Key to press (e.g., 'Enter', 'Tab', 'Escape', 'ArrowDown')
+            key: Key to press (e.g., 'Enter', 'Tab', 'Escape', 'ArrowDown',
+                 or a character like 'a')
            selector: Focus element first (optional)
            tab_id: Chrome tab ID (default: active tab)
            profile: Browser profile name (default: "default")
+            modifiers: Hold these modifier keys while pressing ``key``. Accepted
+                values (case-insensitive): "alt", "ctrl"/"control", "meta"/"cmd",
+                "shift". Examples: ``modifiers=["ctrl"], key="a"`` = Ctrl+A
+                (select all); ``modifiers=["shift"], key="Tab"`` = Shift+Tab;
+                ``modifiers=["meta"], key="Enter"`` = Cmd+Enter.

        Returns:
            Dict with press result
        """
        start = time.perf_counter()
-        params = {"key": key, "selector": selector, "tab_id": tab_id, "profile": profile}
+        params = {
+            "key": key,
+            "selector": selector,
+            "tab_id": tab_id,
+            "profile": profile,
+            "modifiers": modifiers,
+        }

        bridge = get_bridge()
        if not bridge or not bridge.is_connected:
@@ -312,7 +345,9 @@ def register_interaction_tools(mcp: FastMCP) -> None:
            return result

        try:
-            press_result = await bridge.press_key(target_tab, key, selector=selector)
+            press_result = await bridge.press_key(
+                target_tab, key, selector=selector, modifiers=modifiers
+            )
            log_tool_call(
                "browser_press",
                params,