fix: suppress typing flicker on queue auto-flush and dedup user bubble on bootstrap race

feat: queue messages during queen turns in colony/queen chats
feat: add pending-queue hook and Steer/Cancel UI in ChatPanel
2026-04-20 15:30:01 -07:00 · 2026-04-20 12:45:38 -07:00 · 2026-04-20 12:45:14 -07:00 · 2026-04-20 12:44:56 -07:00 · 2026-04-20 18:13:49 +08:00 · 2026-04-20 17:54:41 +08:00
143 changed files with 13766 additions and 5540 deletions
@@ -44,12 +44,29 @@
      "WebFetch(domain:docs.litellm.ai)",
      "Bash(cat /home/timothy/aden/hive/.venv/lib/python3.11/site-packages/litellm-*.dist-info/METADATA)",
      "Bash(find \"/home/timothy/.hive/agents/queens/queen_brand_design/sessions/session_20260415_100751_d49f4c28/\" -type f -name \"*.json*\" -exec grep -l \"协日\" {} \\\\;)",
-      "Bash(grep -v ':0$')"
+      "Bash(grep -v ':0$')",
+      "Bash(curl -s -m 2 http://127.0.0.1:4002/sse -o /dev/null -w 'status=%{http_code} time=%{time_total}s\\\\n')",
+      "mcp__gcu-tools__browser_status",
+      "mcp__gcu-tools__browser_start",
+      "mcp__gcu-tools__browser_navigate",
+      "mcp__gcu-tools__browser_evaluate",
+      "mcp__gcu-tools__browser_screenshot",
+      "mcp__gcu-tools__browser_open",
+      "mcp__gcu-tools__browser_click_coordinate",
+      "mcp__gcu-tools__browser_get_rect",
+      "mcp__gcu-tools__browser_type_focused",
+      "mcp__gcu-tools__browser_wait",
+      "Bash(python3 -c ' *)",
+      "Bash(python3 scripts/debug_queen_prompt.py independent)",
+      "Bash(curl -s --max-time 2 http://127.0.0.1:9230/status)",
+      "Bash(python3 -c \"import json, sys; print\\(json.loads\\(sys.stdin.read\\(\\)\\)['data']['content']\\)\")",
+      "Bash(python3 -c \"import json; json.load\\(open\\('/home/timothy/aden/hive/tools/browser-extension/manifest.json'\\)\\)\")"
    ],
    "additionalDirectories": [
      "/home/timothy/.hive/skills/writing-hive-skills",
      "/tmp",
-      "/home/timothy/.hive/skills"
+      "/home/timothy/.hive/skills",
+      "/home/timothy/aden/hive/core/frontend/src/components"
    ]
  },
  "hooks": {
@@ -1,18 +0,0 @@
-This project uses ruff for Python linting and formatting.
-
-Rules:
- Line length: 100 characters
- Python target: 3.11+
- Use double quotes for strings
- Sort imports with isort (ruff I rules): stdlib, third-party, first-party (framework), local
- Combine as-imports
- Use type hints on all function signatures
- Use `from __future__ import annotations` for modern type syntax
- Raise exceptions with `from` in except blocks (B904)
- No unused imports (F401), no unused variables (F841)
- Prefer list/dict/set comprehensions over map/filter (C4)
-
-Run `make lint` to auto-fix, `make check` to verify without modifying files.
-Run `make format` to apply ruff formatting.
-
-The ruff config lives in core/pyproject.toml under [tool.ruff].
@@ -1,35 +0,0 @@
-# Git
-.git/
-.gitignore
-
-# Documentation
-*.md
-docs/
-LICENSE
-
-# IDE
-.idea/
-.vscode/
-
-# Dependencies (rebuilt in container)
-node_modules/
-
-# Build artifacts
-dist/
-build/
-coverage/
-
-# Environment files
-.env*
-config.yaml
-
-# Logs
-*.log
-logs/
-
-# OS
-.DS_Store
-Thumbs.db
-
-# GitHub
-.github/
@@ -22,3 +22,6 @@ indent_size = 2

 [Makefile]
 indent_style = tab
+
+[*.{sh,ps1}]
+end_of_line = lf
@@ -16,7 +16,6 @@

 # Shell scripts (must use LF)
 *.sh text eol=lf
-quickstart.sh text eol=lf

 # PowerShell scripts (Windows-friendly)
 *.ps1 text eol=lf
@@ -122,3 +121,8 @@ CODE_OF_CONDUCT* text
 *.db binary
 *.sqlite binary
 *.sqlite3 binary
+
+# Lockfiles — mark generated so GitHub collapses them in PR diffs
+*.lock            linguist-generated=true -diff
+package-lock.json linguist-generated=true -diff
+uv.lock           linguist-generated=true -diff
@@ -1,3 +0,0 @@
-{
-  "mcpServers": {}
-}
@@ -2335,6 +2335,11 @@ class AgentLoop(AgentProtocol):
            execution_id,
        )

+        # Continue-nudge counter: how many times we've re-streamed within this
+        # _run_single_turn because the idle/TTFT watchdog fired. Caps to avoid
+        # nudging forever when the endpoint is genuinely dead.
+        _nudge_count_this_turn = 0
+
        # Inner tool loop: stream may produce tool calls requiring re-invocation
        while True:
            # Pre-send guard: if context is at or over budget, compact before
@@ -2423,7 +2428,16 @@ class AgentLoop(AgentProtocol):
            # Capture loop-scoped variables as defaults to satisfy B023.
            # _stream_last_event_at is bumped on every event; the watchdog
            # below uses it to detect silently hung HTTP connections.
-            _stream_last_event_at = time.monotonic()
+            _stream_start_at = time.monotonic()
+            _stream_last_event_at = _stream_start_at
+            # None until the first event arrives. Before first event, the
+            # watchdog uses the (much looser) TTFT budget — large-context
+            # local models legitimately take minutes to first token. Once
+            # any event has been observed, tight inter-event idle applies.
+            _first_event_at: float | None = None
+            # Partial tool_calls accumulated so far, as OpenAI-format dicts
+            # ready for persistence if the stream is cut short.
+            _partial_tc_dicts: list[dict[str, Any]] = []

            async def _do_stream(
                _msgs: list = messages,  # noqa: B006
@@ -2432,8 +2446,10 @@ class AgentLoop(AgentProtocol):
                _safe_names: set = _early_safe_names,  # noqa: B006,B008
                _tasks: dict = _early_tasks,  # noqa: B006,B008
                _exec_fn=_timed_execute,
+                _partial_dicts: list[dict[str, Any]] = _partial_tc_dicts,  # noqa: B006,B008
            ) -> None:
                nonlocal accumulated_text, _stream_error, _stream_last_event_at
+                nonlocal _first_event_at
                _clean_snapshot = ""  # visible-only text for the frontend

                async for event in ctx.llm.stream(
@@ -2443,6 +2459,8 @@ class AgentLoop(AgentProtocol):
                    max_tokens=ctx.max_tokens,
                ):
                    _stream_last_event_at = time.monotonic()
+                    if _first_event_at is None:
+                        _first_event_at = _stream_last_event_at
                    if isinstance(event, TextDeltaEvent):
                        accumulated_text = event.snapshot
                        # Strip internal reasoning tags from the full
@@ -2462,9 +2480,46 @@ class AgentLoop(AgentProtocol):
                                iteration=iteration,
                                inner_turn=inner_turn,
                            )
+                        # Checkpoint partial state so a watchdog cancel or
+                        # crash doesn't discard whatever the model has
+                        # produced so far. Cheap — one atomic file write.
+                        try:
+                            await conversation.checkpoint_partial_assistant(
+                                accumulated_text,
+                                _partial_dicts or None,
+                            )
+                        except Exception as _cp_err:  # noqa: BLE001
+                            logger.debug(
+                                "[_run_single_turn] partial checkpoint failed: %s",
+                                _cp_err,
+                            )

                    elif isinstance(event, ToolCallEvent):
                        _tc.append(event)
+                        _partial_dicts.append(
+                            {
+                                "id": event.tool_use_id,
+                                "type": "function",
+                                "function": {
+                                    "name": event.tool_name,
+                                    "arguments": json.dumps(event.tool_input),
+                                },
+                            }
+                        )
+                        # Checkpoint now that a tool call has landed —
+                        # this is the important one: if the stream dies
+                        # right after a tool call but before FinishEvent,
+                        # we still have the intent recorded.
+                        try:
+                            await conversation.checkpoint_partial_assistant(
+                                accumulated_text,
+                                _partial_dicts or None,
+                            )
+                        except Exception as _cp_err:  # noqa: BLE001
+                            logger.debug(
+                                "[_run_single_turn] partial checkpoint failed: %s",
+                                _cp_err,
+                            )
                        # Gap 1: start concurrency-safe tools immediately
                        # while the rest of the stream is still arriving,
                        # so read-heavy turns don't stall after the last
@@ -2492,55 +2547,91 @@ class AgentLoop(AgentProtocol):
            _llm_stream_t0 = time.monotonic()
            self._stream_task = asyncio.create_task(_do_stream())
            logger.debug("[_run_single_turn] inner_turn=%d: Stream task created, waiting...", inner_turn)
-            _inactivity_limit = self._config.llm_stream_inactivity_timeout_seconds
+
+            # Watchdog budgets — see LoopConfig docstring for rationale.
+            _ttft_limit = self._config.llm_stream_ttft_timeout_seconds
+            _inter_event_limit = self._config.llm_stream_inter_event_idle_seconds
+            # Back-compat: if the legacy inactivity knob was overridden to
+            # a value below the new default, respect it as the inter-event
+            # budget (historic behaviour) so existing configs don't regress.
+            _legacy = self._config.llm_stream_inactivity_timeout_seconds
+            if _legacy and _legacy > 0 and _legacy < _inter_event_limit:
+                _inter_event_limit = _legacy
+            _watchdog_active = (_ttft_limit and _ttft_limit > 0) or (_inter_event_limit and _inter_event_limit > 0)
+            # Result of the watchdog: "ok" (stream finished), "ttft" (no first
+            # event in budget), "inactive" (silence after first event).
+            _watchdog_verdict: str = "ok"
+            _watchdog_elapsed: float = 0.0
+            _watchdog_limit: float = 0.0
+
            try:
-                if _inactivity_limit and _inactivity_limit > 0:
-                    # Heartbeat-aware wait: poll the task and cancel it if
-                    # no stream event has been observed within the window.
-                    # A silently dead HTTP connection otherwise hangs here
-                    # forever — no exception, no delta, no timeout.
-                    #
-                    # Must use asyncio.wait (not wait_for) so we can tell
-                    # "poll interval elapsed" apart from "task raised a
-                    # TimeoutError of its own" — wait_for conflates them.
-                    _check_interval = min(5.0, _inactivity_limit / 2)
+                if _watchdog_active:
+                    # Poll cheapest-valid interval: at most every 5s, at least
+                    # half the tighter budget. Must use asyncio.wait (not
+                    # wait_for) so "poll interval elapsed" and "task raised
+                    # TimeoutError of its own" stay distinguishable.
+                    _tight = min(
+                        _ttft_limit or float("inf"),
+                        _inter_event_limit or float("inf"),
+                    )
+                    _check_interval = max(1.0, min(5.0, _tight / 2))
                    while True:
                        done, _pending = await asyncio.wait({self._stream_task}, timeout=_check_interval)
                        if self._stream_task in done:
-                            # Let any exception the task raised propagate
-                            # naturally via the outer ``await`` below.
                            break
-                        idle = time.monotonic() - _stream_last_event_at
-                        if idle >= _inactivity_limit:
-                            logger.warning(
-                                "[_run_single_turn] inner_turn=%d: "
-                                "stream inactivity %.0fs >= %.0fs — "
-                                "cancelling stream task",
-                                inner_turn,
-                                idle,
-                                _inactivity_limit,
-                            )
-                            self._bump("stream_inactivity_watchdog")
-                            self._stream_task.cancel()
-                            try:
-                                await self._stream_task
-                            except BaseException:
-                                pass
-                            raise ConnectionError(
-                                f"LLM stream idle for {idle:.0f}s "
-                                f"(inactivity limit {_inactivity_limit:.0f}s) — "
-                                "connection presumed dead"
-                            ) from None
+                        now = time.monotonic()
+                        if _first_event_at is None:
+                            # TTFT phase — stream open but silent. Use the
+                            # looser budget; don't confuse slow models with
+                            # dead connections.
+                            elapsed = now - _stream_start_at
+                            if _ttft_limit and _ttft_limit > 0 and elapsed >= _ttft_limit:
+                                _watchdog_verdict = "ttft"
+                                _watchdog_elapsed = elapsed
+                                _watchdog_limit = _ttft_limit
+                                break
+                        else:
+                            # Post-first-event silence. A stream that produced
+                            # events and then went quiet is a real stall.
+                            idle = now - _stream_last_event_at
+                            if _inter_event_limit and _inter_event_limit > 0 and idle >= _inter_event_limit:
+                                _watchdog_verdict = "inactive"
+                                _watchdog_elapsed = idle
+                                _watchdog_limit = _inter_event_limit
+                                break
                        # Still active — keep polling.
-                # Re-raise any exception the stream task stored. When the
-                # watchdog loop exited via ``break`` the task is done, and
-                # ``await`` is the cheapest way to surface its exception.
-                await self._stream_task
-                logger.debug("[_run_single_turn] inner_turn=%d: Stream task completed normally", inner_turn)
+
+                if _watchdog_verdict != "ok":
+                    logger.warning(
+                        "[_run_single_turn] inner_turn=%d: watchdog=%s %.0fs >= %.0fs — cancelling stream",
+                        inner_turn,
+                        _watchdog_verdict,
+                        _watchdog_elapsed,
+                        _watchdog_limit,
+                    )
+                    self._bump(f"stream_watchdog_{_watchdog_verdict}")
+                    self._stream_task.cancel()
+                    try:
+                        await self._stream_task
+                    except BaseException:
+                        pass
+                else:
+                    # Re-raise any exception the stream task stored. When the
+                    # watchdog loop exited via ``break`` the task is done, and
+                    # ``await`` is the cheapest way to surface its exception.
+                    await self._stream_task
+                    logger.debug(
+                        "[_run_single_turn] inner_turn=%d: Stream task completed normally",
+                        inner_turn,
+                    )
            except asyncio.CancelledError:
                logger.debug("[_run_single_turn] inner_turn=%d: Stream task cancelled", inner_turn)
-                if accumulated_text:
-                    await conversation.add_assistant_message(content=accumulated_text)
+                if accumulated_text or _partial_tc_dicts:
+                    await conversation.add_assistant_message(
+                        content=accumulated_text,
+                        tool_calls=_partial_tc_dicts or None,
+                        truncated=True,
+                    )
                # Gap 1: kill any early-dispatched tool tasks too.
                # Without this, a safe tool started during streaming
                # would leak past cancellation and keep running.
@@ -2568,6 +2659,100 @@ class AgentLoop(AgentProtocol):
                raise
            finally:
                self._stream_task = None
+
+            # Continue-nudge recovery path. Runs AFTER the stream task is
+            # cleaned up so all state is consistent. We persist whatever
+            # partial text + tool-calls the model produced (as a truncated
+            # message so the model can see its own in-flight work on the
+            # next turn), cancel early tool tasks, append a terse
+            # continuation hint, and restart the stream.
+            if _watchdog_verdict != "ok":
+                # Kill any safe-tool tasks the stream dispatched early —
+                # their results would have had nowhere to land anyway
+                # because the assistant message was incomplete.
+                for _early in _early_tasks.values():
+                    if not _early.done():
+                        _early.cancel()
+                # Promote whatever we captured into a real truncated
+                # message. The partial checkpoint for this seq is cleared
+                # automatically when add_assistant_message persists.
+                if accumulated_text or _partial_tc_dicts:
+                    await conversation.add_assistant_message(
+                        content=accumulated_text,
+                        tool_calls=_partial_tc_dicts or None,
+                        truncated=True,
+                    )
+
+                reason_label = (
+                    "no tokens before TTFT budget"
+                    if _watchdog_verdict == "ttft"
+                    else "stream went silent after producing events"
+                )
+                if self._event_bus:
+                    if _watchdog_verdict == "ttft":
+                        await self._event_bus.emit_stream_ttft_exceeded(
+                            stream_id=stream_id,
+                            node_id=node_id,
+                            ttft_seconds=_watchdog_elapsed,
+                            limit_seconds=_watchdog_limit,
+                            execution_id=execution_id,
+                        )
+                    else:
+                        await self._event_bus.emit_stream_inactive(
+                            stream_id=stream_id,
+                            node_id=node_id,
+                            idle_seconds=_watchdog_elapsed,
+                            limit_seconds=_watchdog_limit,
+                            execution_id=execution_id,
+                        )
+
+                nudge_enabled = self._config.continue_nudge_enabled
+                nudge_cap = self._config.continue_nudge_max_per_turn
+                if nudge_enabled and _nudge_count_this_turn < nudge_cap:
+                    _nudge_count_this_turn += 1
+                    nudge_msg = (
+                        f"[System: the previous stream stalled ({reason_label}, "
+                        f"{_watchdog_elapsed:.0f}s). Continue from the last tool "
+                        "result already in this conversation. Do NOT repeat tool "
+                        "calls whose results are visible above — reuse them and "
+                        "move to the next step.]"
+                    )
+                    await conversation.add_user_message(
+                        nudge_msg,
+                        is_system_nudge=True,
+                    )
+                    if self._event_bus:
+                        await self._event_bus.emit_stream_nudge_sent(
+                            stream_id=stream_id,
+                            node_id=node_id,
+                            reason=_watchdog_verdict,
+                            nudge_count=_nudge_count_this_turn,
+                            execution_id=execution_id,
+                        )
+                    logger.info(
+                        "[%s] continue-nudge sent (count=%d/%d, reason=%s)",
+                        node_id,
+                        _nudge_count_this_turn,
+                        nudge_cap,
+                        _watchdog_verdict,
+                    )
+                    # Reset the outer _turn_t0 timer so the "LLM done in
+                    # Xms" log line reflects real work not the nudge cycle.
+                    _llm_stream_ms = int((time.monotonic() - _llm_stream_t0) * 1000)
+                    logger.debug(
+                        "[_run_single_turn] inner_turn=%d: nudge restart after %dms",
+                        inner_turn,
+                        _llm_stream_ms,
+                    )
+                    continue  # restart the inner loop, re-fetches messages
+                # Nudge disabled or cap exhausted — fall back to the
+                # existing retry path so a truly dead endpoint eventually
+                # surfaces as an error.
+                raise ConnectionError(
+                    f"LLM stream {_watchdog_verdict} for {_watchdog_elapsed:.0f}s "
+                    f"(limit {_watchdog_limit:.0f}s) — nudge cap reached"
+                )
+
            _llm_stream_ms = int((time.monotonic() - _llm_stream_t0) * 1000)

            # If a recoverable stream error produced an empty response,
@@ -2667,6 +2852,12 @@ class AgentLoop(AgentProtocol):
            results_by_id: dict[str, ToolResult] = {}
            timing_by_id: dict[str, dict[str, Any]] = {}  # tool_use_id -> {start_timestamp, duration_s}
            pending_real: list[ToolCallEvent] = []
+            # Replay detector: per-turn map from tool_use_id -> steer prefix.
+            # Populated below when we detect that the model is re-emitting a
+            # tool call whose (name + canonical args) matches a prior success.
+            # Applied to the stored tool result content so the model sees the
+            # nudge on its next turn without losing the real execution output.
+            replay_prefixes_by_id: dict[str, str] = {}

            for tc in tool_calls:
                tool_call_count += 1
@@ -2939,6 +3130,39 @@ class AgentLoop(AgentProtocol):
                        )
                        results_by_id[tc.tool_use_id] = result
                    else:
+                        # Replay detector: flag re-executions of recent
+                        # successful calls. We still run the tool (some
+                        # are legitimately repeated, e.g. screenshots and
+                        # read-only evaluates) but prepend a terse steer
+                        # onto the stored result so the model sees the
+                        # signal on its next turn.
+                        if self._config.replay_detector_enabled:
+                            prior = conversation.find_completed_tool_call(
+                                tc.tool_name,
+                                tc.tool_input,
+                                within_last_turns=self._config.replay_detector_within_last_turns,
+                            )
+                            if prior is not None:
+                                logger.warning(
+                                    "[%s] replay detected: %s matches prior seq=%d — executing anyway",
+                                    node_id,
+                                    tc.tool_name,
+                                    prior.seq,
+                                )
+                                self._bump("tool_call_replay_detected")
+                                if self._event_bus:
+                                    await self._event_bus.emit_tool_call_replay_detected(
+                                        stream_id=stream_id,
+                                        node_id=node_id,
+                                        tool_name=tc.tool_name,
+                                        prior_seq=prior.seq,
+                                        execution_id=execution_id,
+                                    )
+                                replay_prefixes_by_id[tc.tool_use_id] = (
+                                    f"[Replay detected: {tc.tool_name} matches "
+                                    f"seq={prior.seq}. Result still produced below — "
+                                    "consider whether the retry was necessary.]\n"
+                                )
                        pending_real.append(tc)

            # Phase 2a: partition real tools by concurrency safety.
@@ -3136,9 +3360,18 @@ class AgentLoop(AgentProtocol):
                    )
                    image_content = None

+                # Apply replay-detector steer prefix if this call matched a
+                # recent successful invocation. Only applies to non-error
+                # results — an error already breaks the replay chain.
+                stored_content = result.content
+                if not result.is_error:
+                    _prefix = replay_prefixes_by_id.get(tc.tool_use_id)
+                    if _prefix:
+                        stored_content = f"{_prefix}{stored_content or ''}"
+
                await conversation.add_tool_result(
                    tool_use_id=tc.tool_use_id,
-                    content=result.content,
+                    content=stored_content,
                    is_error=result.is_error,
                    image_content=image_content,
                    is_skill_content=result.is_skill_content,
@@ -48,6 +48,14 @@ class Message:
    is_skill_content: bool = False
    # Logical worker run identifier for shared-session persistence
    run_id: str | None = None
+    # True when this is a framework-injected continuation hint (continue-nudge
+    # on stream stall). Stored as a user message for API compatibility, but
+    # the UI should render it as a compact system notice, not user speech.
+    is_system_nudge: bool = False
+    # True when this message is a partial/truncated assistant turn reconstructed
+    # from a crashed or watchdog-cancelled stream. Signals that the original
+    # turn never finished — the model may or may not choose to redo it.
+    truncated: bool = False

    def to_llm_dict(self) -> dict[str, Any]:
        """Convert to OpenAI-format message dict."""
@@ -109,6 +117,10 @@ class Message:
            d["image_content"] = self.image_content
        if self.run_id is not None:
            d["run_id"] = self.run_id
+        if self.is_system_nudge:
+            d["is_system_nudge"] = self.is_system_nudge
+        if self.truncated:
+            d["truncated"] = self.truncated
        return d

    @classmethod
@@ -126,6 +138,8 @@ class Message:
            is_client_input=data.get("is_client_input", False),
            image_content=data.get("image_content"),
            run_id=data.get("run_id"),
+            is_system_nudge=data.get("is_system_nudge", False),
+            truncated=data.get("truncated", False),
        )


@@ -317,6 +331,14 @@ class ConversationStore(Protocol):

    async def delete_parts_before(self, seq: int, run_id: str | None = None) -> None: ...

+    async def write_partial(self, seq: int, data: dict[str, Any]) -> None: ...
+
+    async def read_partial(self, seq: int) -> dict[str, Any] | None: ...
+
+    async def read_all_partials(self) -> list[dict[str, Any]]: ...
+
+    async def clear_partial(self, seq: int) -> None: ...
+
    async def close(self) -> None: ...

    async def destroy(self) -> None: ...
@@ -462,6 +484,7 @@ class NodeConversation:
        is_transition_marker: bool = False,
        is_client_input: bool = False,
        image_content: list[dict[str, Any]] | None = None,
+        is_system_nudge: bool = False,
    ) -> Message:
        msg = Message(
            seq=self._next_seq,
@@ -472,6 +495,7 @@ class NodeConversation:
            is_transition_marker=is_transition_marker,
            is_client_input=is_client_input,
            image_content=image_content,
+            is_system_nudge=is_system_nudge,
        )
        self._messages.append(msg)
        self._next_seq += 1
@@ -485,6 +509,8 @@ class NodeConversation:
        self,
        content: str,
        tool_calls: list[dict[str, Any]] | None = None,
+        *,
+        truncated: bool = False,
    ) -> Message:
        msg = Message(
            seq=self._next_seq,
@@ -493,6 +519,7 @@ class NodeConversation:
            tool_calls=tool_calls,
            phase_id=self._current_phase,
            run_id=self._run_id,
+            truncated=truncated,
        )
        self._messages.append(msg)
        self._next_seq += 1
@@ -548,6 +575,59 @@ class NodeConversation:

    # --- Query -------------------------------------------------------------

+    def find_completed_tool_call(
+        self,
+        name: str,
+        tool_input: dict[str, Any],
+        within_last_turns: int = 3,
+    ) -> Message | None:
+        """Return the most recent assistant message that issued a tool call
+        with the same (name + canonical-json args) AND received a non-error
+        tool result, within the last ``within_last_turns`` assistant turns.
+
+        Used by the replay detector to flag when the model is about to redo
+        a successful call — we prepend a steer onto the upcoming result but
+        still execute, so tools like browser_screenshot that are legitimately
+        repeated are not silently skipped.
+        """
+        try:
+            target_canonical = json.dumps(tool_input, sort_keys=True, default=str)
+        except (TypeError, ValueError):
+            target_canonical = str(tool_input)
+
+        # Walk backwards over recent assistant messages
+        assistant_turns_seen = 0
+        for idx in range(len(self._messages) - 1, -1, -1):
+            m = self._messages[idx]
+            if m.role != "assistant":
+                continue
+            assistant_turns_seen += 1
+            if assistant_turns_seen > within_last_turns:
+                break
+            if not m.tool_calls:
+                continue
+            for tc in m.tool_calls:
+                func = tc.get("function", {}) if isinstance(tc, dict) else {}
+                tc_name = func.get("name")
+                if tc_name != name:
+                    continue
+                args_str = func.get("arguments", "")
+                try:
+                    parsed = json.loads(args_str) if isinstance(args_str, str) else args_str
+                    canonical = json.dumps(parsed, sort_keys=True, default=str)
+                except (TypeError, ValueError):
+                    canonical = str(args_str)
+                if canonical != target_canonical:
+                    continue
+                # Found a match — now verify its result was not an error.
+                tc_id = tc.get("id")
+                for later in self._messages[idx + 1 :]:
+                    if later.role == "tool" and later.tool_use_id == tc_id:
+                        if not later.is_error:
+                            return m
+                        break
+        return None
+
    def to_llm_messages(self) -> list[dict[str, Any]]:
        """Return messages as OpenAI-format dicts (system prompt excluded).

@@ -1365,6 +1445,45 @@ class NodeConversation:
            await self._persist_meta()
        await self._store.write_part(message.seq, message.to_storage_dict())
        await self._write_next_seq()
+        # Any partial checkpoint for this seq is now superseded by the real
+        # part — clear it so a future restore doesn't resurrect stale text.
+        try:
+            await self._store.clear_partial(message.seq)
+        except AttributeError:
+            # Older stores may not implement partials; ignore.
+            pass
+
+    async def checkpoint_partial_assistant(
+        self,
+        accumulated_text: str,
+        tool_calls: list[dict[str, Any]] | None = None,
+    ) -> None:
+        """Write an in-flight assistant turn's state to disk under the next seq.
+
+        Called from the stream event loop. Safe to call repeatedly — each call
+        overwrites the prior checkpoint. Persisted via ``write_partial`` so it
+        does NOT appear in ``read_parts()`` and cannot be double-loaded. Cleared
+        automatically when ``add_assistant_message`` for this seq lands.
+        """
+        if self._store is None:
+            return
+        if not self._meta_persisted:
+            await self._persist_meta()
+        payload: dict[str, Any] = {
+            "seq": self._next_seq,
+            "role": "assistant",
+            "content": accumulated_text,
+            "phase_id": self._current_phase,
+            "run_id": self._run_id,
+            "truncated": True,
+        }
+        if tool_calls:
+            payload["tool_calls"] = tool_calls
+        try:
+            await self._store.write_partial(self._next_seq, payload)
+        except AttributeError:
+            # Older stores may not implement partials; ignore.
+            pass

    async def _persist_meta(self) -> None:
        """Lazily write conversation metadata to the store (called once).
@@ -1461,4 +1580,45 @@ class NodeConversation:
        elif conv._messages:
            conv._next_seq = conv._messages[-1].seq + 1

+        # Surface any leftover partial checkpoints as truncated messages so
+        # the next turn sees what the interrupted stream was in the middle
+        # of producing. Only partials whose seq is >= next_seq are meaningful;
+        # anything lower was already superseded by a real part.
+        try:
+            partials = await store.read_all_partials()
+        except AttributeError:
+            partials = []
+        for p in partials:
+            pseq = p.get("seq", -1)
+            if pseq < conv._next_seq:
+                # Stale — clean it up.
+                try:
+                    await store.clear_partial(pseq)
+                except AttributeError:
+                    pass
+                continue
+            # Only resurrect partials relevant to this run / phase.
+            if run_id and not is_legacy_run_id(run_id) and p.get("run_id") != run_id:
+                continue
+            if phase_id and p.get("phase_id") is not None and p.get("phase_id") != phase_id:
+                continue
+            # Reconstruct as a truncated assistant message.
+            msg = Message(
+                seq=pseq,
+                role="assistant",
+                content=p.get("content", "") or "",
+                tool_calls=p.get("tool_calls"),
+                phase_id=p.get("phase_id"),
+                run_id=p.get("run_id"),
+                truncated=True,
+            )
+            conv._messages.append(msg)
+            conv._next_seq = max(conv._next_seq, pseq + 1)
+            logger.info(
+                "restore: resurrected truncated partial seq=%d (text=%d chars, tool_calls=%d)",
+                pseq,
+                len(msg.content),
+                len(msg.tool_calls or []),
+            )
+
        return conv
@@ -131,14 +131,39 @@ class LoopConfig:
    # Per-tool-call timeout.
    tool_call_timeout_seconds: float = 60.0

-    # LLM stream inactivity watchdog. If no stream event (delta, tool call,
-    # finish) arrives within this many seconds, the stream task is cancelled
-    # and a transient error is raised so the retry loop can back off and
-    # reconnect. Prevents agents from hanging forever on a silently dead
-    # HTTP connection (no provider heartbeat, no exception, just silence).
-    # Set to 0 to disable.
+    # LLM stream inactivity watchdog. Split into two budgets so legitimate
+    # slow TTFT on large contexts doesn't get mistaken for a dead connection.
+    # - ttft: stream open -> first event. Large-context local models can
+    #   legitimately take minutes before the first token arrives.
+    # - inter_event: last event -> now, ONLY after the first event. A stream
+    #   that started producing and then went silent is a real stall.
+    # Whichever fires first cancels the stream. Set to 0 to disable that
+    # individual budget; set both to 0 to fully disable the watchdog.
+    llm_stream_ttft_timeout_seconds: float = 600.0
+    llm_stream_inter_event_idle_seconds: float = 120.0
+    # Deprecated alias — kept so existing configs keep working. If set to a
+    # non-default value it overrides inter_event_idle (historical behavior).
    llm_stream_inactivity_timeout_seconds: float = 120.0

+    # Continue-nudge recovery. When the idle watchdog fires on a live but
+    # stuck stream, cancel the stream and append a short continuation
+    # hint to the conversation instead of raising a ConnectionError and
+    # re-running the whole turn. Preserves any partial text/tool-calls the
+    # stream emitted before the stall.
+    continue_nudge_enabled: bool = True
+    # Cap so a truly dead endpoint eventually falls back to the error path
+    # instead of nudging forever.
+    continue_nudge_max_per_turn: int = 3
+
+    # Tool-call replay detector. When the model emits a tool call whose
+    # (name + canonical-args) matches a prior successful call in the last
+    # K assistant turns, emit telemetry and prepend a short steer onto the
+    # tool result — but still execute. Weaker models legitimately repeat
+    # read-only calls (screenshot, evaluate), so silent skipping would
+    # cause surprising behavior.
+    replay_detector_enabled: bool = True
+    replay_detector_within_last_turns: int = 3
+
    # Subagent delegation timeout (wall-clock max).
    subagent_timeout_seconds: float = 3600.0

@@ -1099,12 +1099,17 @@ def ensure_default_queens() -> None:

    Safe to call multiple times — skips any profile that already has a file.
    """
+    created = 0
    for queen_id, profile in DEFAULT_QUEENS.items():
        queen_dir = QUEENS_DIR / queen_id
        profile_path = queen_dir / "profile.yaml"
+        if profile_path.exists():
+            continue
        queen_dir.mkdir(parents=True, exist_ok=True)
        profile_path.write_text(yaml.safe_dump(profile, sort_keys=False, allow_unicode=True))
-    logger.info("Queen profiles ensured at %s", QUEENS_DIR)
+        created += 1
+    if created:
+        logger.info("Created %d default queen profile(s) at %s", created, QUEENS_DIR)


 def list_queens() -> list[dict[str, str]]:
@@ -1143,6 +1148,10 @@ def load_queen_profile(queen_id: str) -> dict[str, Any]:
 def update_queen_profile(queen_id: str, updates: dict[str, Any]) -> dict[str, Any]:
    """Merge partial updates into an existing queen profile and persist.

+    Performs a shallow merge at the top level, but deep-merges dict values
+    (e.g. world_lore, hidden_background) so partial sub-field updates don't
+    clobber sibling keys.
+
    Returns the full updated profile.
    Raises FileNotFoundError if the profile doesn't exist.
    """
@@ -1150,7 +1159,11 @@ def update_queen_profile(queen_id: str, updates: dict[str, Any]) -> dict[str, An
    if not profile_path.exists():
        raise FileNotFoundError(f"Queen profile not found: {queen_id}")
    data = yaml.safe_load(profile_path.read_text())
-    data.update(updates)
+    for key, value in updates.items():
+        if isinstance(value, dict) and isinstance(data.get(key), dict):
+            data[key].update(value)
+        else:
+            data[key] = value
    profile_path.write_text(yaml.safe_dump(data, sort_keys=False, allow_unicode=True))
    return data

@@ -1160,7 +1173,7 @@ def update_queen_profile(queen_id: str, updates: dict[str, Any]) -> dict[str, An
 # ---------------------------------------------------------------------------


-def format_queen_identity_prompt(profile: dict[str, Any]) -> str:
+def format_queen_identity_prompt(profile: dict[str, Any], *, max_examples: int | None = None) -> str:
    """Convert a queen profile into a high-dimensional character prompt.

    Uses the 5-pillar character construction system: core identity,
@@ -1168,6 +1181,11 @@ def format_queen_identity_prompt(profile: dict[str, Any]) -> str:
    behavior rules, and world lore.  The hidden background and
    psychological profile are never shown to the user but shape
    every response.
+
+    ``max_examples`` caps the roleplay_examples block — profiles ship
+    four worked examples (~2.4 KB) but one is enough at runtime to show
+    the internal-then-external pattern. Full rendering stays available
+    for profile authoring / eval playback by leaving ``max_examples=None``.
    """
    name = profile.get("name", "the Queen")
    title = profile.get("title", "Senior Advisor")
@@ -1248,6 +1266,8 @@ def format_queen_identity_prompt(profile: dict[str, Any]) -> str:

    # Few-shot examples showing the full internal process
    examples = profile.get("examples", [])
+    if examples and max_examples is not None:
+        examples = examples[:max_examples]
    if examples:
        example_parts: list[str] = []
        for ex in examples:
@@ -25,7 +25,6 @@ All tools are prefixed with `browser_`:
 - `browser_screenshot` — visual capture (annotated PNG)
 <!-- /vision-only -->
 - `browser_shadow_query`, `browser_get_rect` — locate elements (shadow-piercing via `>>>`)
- `browser_coords` — convert image pixels to CSS pixels (always use `css_x/y`, never `physical_x/y`)
 - `browser_scroll`, `browser_wait` — navigation helpers
 - `browser_evaluate` — run JavaScript
 - `browser_close`, `browser_close_finished` — tab cleanup
@@ -34,20 +33,20 @@ All tools are prefixed with `browser_`:

 **`browser_snapshot`** — compact accessibility tree of interactive elements. Fast, cheap, good for static or form-heavy pages where the DOM matches what's visually rendered (documentation, simple dashboards, search results, settings pages).

-**`browser_screenshot`** — visual capture + metadata (`cssWidth`, `devicePixelRatio`, scale fields). **Use this on any complex SPA** — LinkedIn, Twitter/X, Reddit, Gmail, Notion, Slack, Discord, any site using shadow DOM, virtual scrolling, React reconciliation, or dynamic layout. On these pages, snapshot refs go stale in seconds, shadow contents aren't in the AX tree, and virtual-scrolled elements disappear from the tree entirely. Screenshot is the **only** reliable way to orient yourself.
+**`browser_screenshot`** — visual capture + metadata (`cssWidth`, `devicePixelRatio`, scale fields). Use this when `browser_snapshot` does not show the thing you need, when refs look stale, or when visual position/layout matters. This often happens on complex SPAs — LinkedIn, Twitter/X, Reddit, Gmail, Notion, Slack, Discord — and on sites using shadow DOM, virtual scrolling, React reconciliation, or dynamic layout.

-Neither tool is "preferred" universally — they're for different jobs. Default to snapshot on text-heavy static pages, screenshot on SPAs and anything shadow-DOM-heavy. Activate the `browser-automation` skill for the full decision tree.
+Neither tool is "preferred" universally — they're for different jobs. Start with snapshot for page structure and ordinary controls; use screenshot as the fallback when snapshot can't find or verify the visible target. Activate the `browser-automation` skill for the full decision tree.

-## Coordinate rule: always CSS pixels
+## Coordinate rule

-Chrome DevTools Protocol `Input.dispatchMouseEvent` takes **CSS pixels**, not physical pixels. After a screenshot, use `browser_coords(image_x, image_y)` and feed the returned `css_x/y` (NOT `physical_x/y`) to `browser_click_coordinate`, `browser_hover_coordinate`, `browser_press_at`. Feeding physical pixels on a HiDPI display (DPR=1.6, 2, or 3) overshoots by `DPR×` and clicks land in the wrong place. `getBoundingClientRect()` already returns CSS pixels — pass through unchanged, no DPR multiplication.
+Every browser tool that takes or returns coordinates operates in **fractions of the viewport (0..1 for both axes)**. Read a target's proportional position off `browser_screenshot` ("~35% from the left, ~20% from the top" → `(0.35, 0.20)`) and pass that to `browser_click_coordinate` / `browser_hover_coordinate` / `browser_press_at`. `browser_get_rect` and `browser_shadow_query` return `rect.cx` / `rect.cy` as fractions. The tools multiply by `cssWidth` / `cssHeight` internally — no scale awareness required. Fractions are used because every vision model (Claude, GPT-4o, Gemini, local VLMs) resizes/tiles images differently; proportions are invariant. Avoid raw `getBoundingClientRect()` via `browser_evaluate` for coord lookup; use `browser_get_rect` instead.

 ## System prompt tips for browser nodes

 ```
-1. On LinkedIn / X / Reddit / Gmail / any SPA — use browser_screenshot to orient,
-   not browser_snapshot. Shadow DOM and virtual scrolling make snapshots unreliable.
-2. For static pages (docs, forms, search results), browser_snapshot is fine.
+1. Start with browser_snapshot or the snapshot returned by the latest interaction.
+2. If the target is missing, ambiguous, stale, or visibly present but absent from the tree,
+   use browser_screenshot to orient and then click by fractional coordinates.
 3. Before typing into a rich-text editor (X compose, LinkedIn DM, Gmail, Reddit),
   click the input area first with browser_click_coordinate so React / Draft.js /
   Lexical register a native focus event, then use browser_type_focused(text=...)
@@ -67,7 +66,7 @@ Chrome DevTools Protocol `Input.dispatchMouseEvent` takes **CSS pixels**, not ph
  "tools": {"policy": "all"},
  "input_keys": ["search_url"],
  "output_keys": ["profiles"],
-  "system_prompt": "Navigate to the search URL via browser_navigate(wait_until='load', timeout_ms=20000). Wait 3s for SPA hydration. On LinkedIn, use browser_screenshot to see the page — browser_snapshot misses shadow-DOM and virtual-scrolled content. Paginate through results by scrolling and screenshotting; extract each profile card by reading its visible layout..."
+  "system_prompt": "Navigate to the search URL via browser_navigate(wait_until='load', timeout_ms=20000). Wait 3s for SPA hydration. Use the returned snapshot to look for result cards first. If the cards are missing, stale, or visually present but absent from the tree, use browser_screenshot to orient; paginate through results by scrolling and use screenshots only when the snapshot cannot find or verify the visible cards..."
 }
 ```

@@ -823,8 +823,8 @@ async def run_shutdown_reflection(
 # ---------------------------------------------------------------------------

 _LONG_REFLECT_INTERVAL = 5
-_SHORT_REFLECT_TURN_INTERVAL = 2
-_SHORT_REFLECT_COOLDOWN_SEC = 120.0
+_SHORT_REFLECT_TURN_INTERVAL = 3
+_SHORT_REFLECT_COOLDOWN_SEC = 300.0


 async def subscribe_reflection_triggers(
@@ -1672,7 +1672,7 @@ class AgentHost:
        entry_point_id: str,
        execution_id: str,
        graph_id: str | None = None,
-    ) -> bool:
+    ) -> str:
        """
        Cancel a running execution.

@@ -1682,11 +1682,11 @@ class AgentHost:
            graph_id: Graph to search (defaults to active graph)

        Returns:
-            True if cancelled, False if not found
+            Cancellation outcome from the stream.
        """
        stream = self._resolve_stream(entry_point_id, graph_id)
        if stream is None:
-            return False
+            return "not_found"
        return await stream.cancel_execution(execution_id)

    # === QUERY OPERATIONS ===
@@ -14,6 +14,7 @@ from __future__ import annotations
 import asyncio
 import json
 import logging
+import os
 import time
 from collections import OrderedDict
 from collections.abc import Callable
@@ -73,9 +74,28 @@ def _format_spawn_task_message(task: str, input_data: dict[str, Any]) -> str:
    return "\n".join(lines)


+def _env_int(name: str, default: int) -> int:
+    """Read a positive int from env; fall back to default on missing/invalid."""
+    raw = os.environ.get(name)
+    if not raw:
+        return default
+    try:
+        value = int(raw)
+    except ValueError:
+        logger.warning("Invalid %s=%r; using default %d", name, raw, default)
+        return default
+    return value if value > 0 else default
+
+
+# Laptop-safe default. Each worker is a full AgentLoop (Claude SDK session +
+# tool catalog), so ~4 concurrent is the realistic ceiling on a dev machine.
+# Override via HIVE_MAX_CONCURRENT_WORKERS for servers.
+_DEFAULT_MAX_CONCURRENT_WORKERS = _env_int("HIVE_MAX_CONCURRENT_WORKERS", 4)
+
+
@dataclass
 class ColonyConfig:
-    max_concurrent_workers: int = 100
+    max_concurrent_workers: int = _DEFAULT_MAX_CONCURRENT_WORKERS
    cache_ttl: float = 60.0
    batch_interval: float = 0.1
    max_history: int = 1000
@@ -238,6 +258,13 @@ class ColonyRuntime:
        self._timer_tasks: list[asyncio.Task] = []
        self._timer_next_fire: dict[str, float] = {}
        self._webhook_server: Any = None
+        # Background tasks owned by the runtime that aren't timers —
+        # e.g. the per-spawn soft/hard timeout watchers kicked off by
+        # run_parallel_workers. We hold strong references so asyncio
+        # does not garbage-collect them mid-sleep (Python's asyncio
+        # docs explicitly warn that create_task() needs a referenced
+        # handle).
+        self._background_tasks: set[asyncio.Task] = set()

        # Idempotency
        self._idempotency_keys: OrderedDict[str, str] = OrderedDict()
@@ -631,6 +658,44 @@ class ColonyRuntime:
        spawn_tools = tools if tools is not None else self._tools
        spawn_executor = tool_executor or self._tool_executor

+        # Colony progress tracker: when the caller supplied a db_path
+        # in input_data, this worker is part of a SQLite task queue
+        # and must see the hive.colony-progress-tracker skill body in
+        # its system prompt from turn 0. Rebuild the catalog with the
+        # skill pre-activated; falls back to the colony default when
+        # no db_path is present.
+        _spawn_catalog = self.skills_catalog_prompt
+        _spawn_skill_dirs = self.skill_dirs
+        if isinstance(input_data, dict) and input_data.get("db_path"):
+            try:
+                from framework.skills.config import SkillsConfig
+                from framework.skills.manager import SkillsManager, SkillsManagerConfig
+
+                _pre = SkillsManager(
+                    SkillsManagerConfig(
+                        skills_config=SkillsConfig.from_agent_vars(
+                            skills=["hive.colony-progress-tracker"],
+                        ),
+                    )
+                )
+                _pre.load()
+                _spawn_catalog = _pre.skills_catalog_prompt
+                _spawn_skill_dirs = (
+                    list(_pre.allowlisted_dirs) if hasattr(_pre, "allowlisted_dirs") else self.skill_dirs
+                )
+                logger.info(
+                    "spawn: pre-activated hive.colony-progress-tracker "
+                    "(catalog %d → %d chars) for worker with db_path=%s",
+                    len(self.skills_catalog_prompt),
+                    len(_spawn_catalog),
+                    input_data.get("db_path"),
+                )
+            except Exception as exc:
+                logger.warning(
+                    "spawn: failed to pre-activate colony-progress-tracker skill, falling back to base catalog: %s",
+                    exc,
+                )
+
        # Resolve the SSE stream_id once. When the caller didn't supply
        # one we use the per-worker fan-out tag (filtered out by the
        # SSE handler). When the caller passed an explicit value we
@@ -685,9 +750,9 @@ class ColonyRuntime:
                llm=self._llm,
                available_tools=list(spawn_tools),
                accounts_prompt=self._accounts_prompt,
-                skills_catalog_prompt=self.skills_catalog_prompt,
+                skills_catalog_prompt=_spawn_catalog,
                protocols_prompt=self.protocols_prompt,
-                skill_dirs=self.skill_dirs,
+                skill_dirs=_spawn_skill_dirs,
                execution_id=worker_id,
                stream_id=explicit_stream_id or f"worker:{worker_id}",
            )
@@ -720,6 +785,8 @@ class ColonyRuntime:
    async def spawn_batch(
        self,
        tasks: list[dict[str, Any]],
+        *,
+        tools_override: list[Any] | None = None,
    ) -> list[str]:
        """Spawn a batch of parallel workers, one per task spec.

@@ -732,6 +799,12 @@ class ColonyRuntime:
        The overseer's ``run_parallel_workers`` tool is the usual
        caller; it pairs ``spawn_batch`` + ``wait_for_worker_reports``
        into a single fan-out/fan-in primitive.
+
+        When ``tools_override`` is supplied, every spawned worker
+        receives that tool list instead of the colony's default.  Used
+        by ``run_parallel_workers`` to drop tools whose credentials
+        failed the pre-flight check (so the spawned workers don't
+        waste a startup trying to use them).
        """
        worker_ids: list[str] = []
        for spec in tasks:
@@ -743,6 +816,7 @@ class ColonyRuntime:
                task=task_text,
                count=1,
                input_data=task_data or {"task": task_text},
+                tools=tools_override,
            )
            worker_ids.extend(ids)
        return worker_ids
@@ -1054,6 +1128,96 @@ class ColonyRuntime:
            return True
        return False

+    def watch_batch_timeouts(
+        self,
+        worker_ids: list[str],
+        *,
+        soft_timeout: float,
+        hard_timeout: float,
+        warning_message: str | None = None,
+    ) -> asyncio.Task:
+        """Schedule a background task that enforces soft + hard timeouts.
+
+        Semantics:
+          * At ``t = soft_timeout`` every worker in ``worker_ids`` that is
+            still active AND hasn't already filed an ``_explicit_report``
+            receives ``warning_message`` via ``send_to_worker`` — the inject
+            appears as a user turn at the next agent-loop boundary, so the
+            worker's LLM can see it and call ``report_to_parent`` with
+            partial results.
+          * At ``t = hard_timeout`` any worker still active is force-stopped
+            via ``stop_worker``. ``Worker.run`` still emits its
+            ``SUBAGENT_REPORT`` on cancel (the explicit report survives,
+            if the worker reported just before the stop) so the queen
+            always sees a terminal inject for every spawned worker.
+
+        Returns the scheduled task so callers can await or cancel it.
+        Non-blocking for the caller — the watcher runs on the event loop
+        independently.
+        """
+        if warning_message is None:
+            grace = max(0.0, hard_timeout - soft_timeout)
+            warning_message = (
+                f"[SOFT TIMEOUT] You've been running for {soft_timeout:.0f}s. "
+                "Wrap up now: call report_to_parent with whatever partial "
+                "results you have. You have "
+                f"~{grace:.0f}s more before a hard stop — anything not "
+                "reported by then will be lost."
+            )
+
+        async def _watch() -> None:
+            try:
+                await asyncio.sleep(soft_timeout)
+                for wid in worker_ids:
+                    worker = self._workers.get(wid)
+                    if worker is None or not worker.is_active:
+                        continue
+                    if getattr(worker, "_explicit_report", None) is not None:
+                        continue
+                    try:
+                        await self.send_to_worker(wid, warning_message)
+                    except Exception:
+                        logger.warning(
+                            "watch_batch_timeouts: soft-timeout inject failed for %s",
+                            wid,
+                            exc_info=True,
+                        )
+
+                remaining = hard_timeout - soft_timeout
+                if remaining <= 0:
+                    return
+                await asyncio.sleep(remaining)
+                for wid in worker_ids:
+                    worker = self._workers.get(wid)
+                    if worker is None or not worker.is_active:
+                        continue
+                    try:
+                        await self.stop_worker(wid)
+                        logger.info(
+                            "watch_batch_timeouts: hard-stopped %s after %ss (no report)",
+                            wid,
+                            hard_timeout,
+                        )
+                    except Exception:
+                        logger.warning(
+                            "watch_batch_timeouts: hard-stop failed for %s",
+                            wid,
+                            exc_info=True,
+                        )
+            except asyncio.CancelledError:
+                raise
+            except Exception:
+                logger.exception("watch_batch_timeouts: watcher crashed")
+
+        task = asyncio.create_task(_watch(), name=f"batch-timeout:{worker_ids[0] if worker_ids else '?'}")
+        # Hold a strong reference until completion. Without this the
+        # task can be garbage-collected during `await asyncio.sleep`,
+        # silently swallowing the soft-timeout inject (the exact bug
+        # surfaced by workers never seeing [SOFT TIMEOUT]).
+        self._background_tasks.add(task)
+        task.add_done_callback(self._background_tasks.discard)
+        return task
+
    # ── Status & Query ──────────────────────────────────────────

    def list_workers(self) -> list[WorkerInfo]:
@@ -111,6 +111,15 @@ class EventType(StrEnum):
    # Retry tracking
    NODE_RETRY = "node_retry"

+    # Stream-health observability. Split from NODE_RETRY so the UI can
+    # distinguish "slow TTFT on a huge context" (healthy, just slow) from
+    # "stream went silent mid-generation" (probable stall) from "we nudged
+    # the model to continue" (recovery), which NODE_RETRY used to conflate.
+    STREAM_TTFT_EXCEEDED = "stream_ttft_exceeded"
+    STREAM_INACTIVE = "stream_inactive"
+    STREAM_NUDGE_SENT = "stream_nudge_sent"
+    TOOL_CALL_REPLAY_DETECTED = "tool_call_replay_detected"
+
    # Worker agent lifecycle
    WORKER_COMPLETED = "worker_completed"
    WORKER_FAILED = "worker_failed"
@@ -1061,6 +1070,94 @@ class EventBus:
            )
        )

+    async def emit_stream_ttft_exceeded(
+        self,
+        stream_id: str,
+        node_id: str,
+        ttft_seconds: float,
+        limit_seconds: float,
+        execution_id: str | None = None,
+    ) -> None:
+        """Emit when a stream stayed silent past the TTFT budget (no first event)."""
+        await self.publish(
+            AgentEvent(
+                type=EventType.STREAM_TTFT_EXCEEDED,
+                stream_id=stream_id,
+                node_id=node_id,
+                execution_id=execution_id,
+                data={
+                    "ttft_seconds": ttft_seconds,
+                    "limit_seconds": limit_seconds,
+                },
+            )
+        )
+
+    async def emit_stream_inactive(
+        self,
+        stream_id: str,
+        node_id: str,
+        idle_seconds: float,
+        limit_seconds: float,
+        execution_id: str | None = None,
+    ) -> None:
+        """Emit when a stream that had produced events went silent past budget."""
+        await self.publish(
+            AgentEvent(
+                type=EventType.STREAM_INACTIVE,
+                stream_id=stream_id,
+                node_id=node_id,
+                execution_id=execution_id,
+                data={
+                    "idle_seconds": idle_seconds,
+                    "limit_seconds": limit_seconds,
+                },
+            )
+        )
+
+    async def emit_stream_nudge_sent(
+        self,
+        stream_id: str,
+        node_id: str,
+        reason: str,
+        nudge_count: int,
+        execution_id: str | None = None,
+    ) -> None:
+        """Emit when the continue-nudge was injected (recovery, not retry)."""
+        await self.publish(
+            AgentEvent(
+                type=EventType.STREAM_NUDGE_SENT,
+                stream_id=stream_id,
+                node_id=node_id,
+                execution_id=execution_id,
+                data={
+                    "reason": reason,
+                    "nudge_count": nudge_count,
+                },
+            )
+        )
+
+    async def emit_tool_call_replay_detected(
+        self,
+        stream_id: str,
+        node_id: str,
+        tool_name: str,
+        prior_seq: int,
+        execution_id: str | None = None,
+    ) -> None:
+        """Emit when the model is about to re-execute a prior successful call."""
+        await self.publish(
+            AgentEvent(
+                type=EventType.TOOL_CALL_REPLAY_DETECTED,
+                stream_id=stream_id,
+                node_id=node_id,
+                execution_id=execution_id,
+                data={
+                    "tool_name": tool_name,
+                    "prior_seq": prior_seq,
+                },
+            )
+        )
+
    async def emit_worker_completed(
        self,
        stream_id: str,
@@ -16,7 +16,7 @@ from collections import OrderedDict
 from collections.abc import Callable
 from dataclasses import dataclass, field
 from datetime import datetime
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Any, Literal

 from framework.host.event_bus import EventBus
 from framework.host.shared_state import IsolationLevel, SharedBufferManager
@@ -48,6 +48,8 @@ class ExecutionAlreadyRunningError(RuntimeError):

 logger = logging.getLogger(__name__)

+CancelExecutionResult = Literal["cancelled", "cancelling", "not_found"]
+

 class GraphScopedEventBus(EventBus):
    """Proxy that stamps ``graph_id`` on every published event.
@@ -130,7 +132,7 @@ class ExecutionContext:
    run_id: str | None = None  # Unique ID per trigger() invocation
    started_at: datetime = field(default_factory=datetime.now)
    completed_at: datetime | None = None
-    status: str = "pending"  # pending, running, completed, failed, paused
+    status: str = "pending"  # pending, running, cancelling, completed, failed, paused, cancelled


 class ExecutionManager:
@@ -315,6 +317,22 @@ class ExecutionManager:
        """Return IDs of all currently active executions."""
        return list(self._active_executions.keys())

+    def _get_blocking_execution_ids_locked(self) -> list[str]:
+        """Return executions that still block a replacement from starting.
+
+        An execution continues to block replacement until its task has
+        terminated and the task's final cleanup has removed its bookkeeping.
+        This is intentional: a timed-out cancellation does not mean the old
+        task is harmless. If it is still alive, it can still write shared
+        session state, so letting a replacement start would guarantee
+        overlapping mutations on the same session.
+        """
+        blocking_ids: list[str] = list(self._active_executions.keys())
+        for execution_id, task in self._execution_tasks.items():
+            if not task.done() and execution_id not in self._active_executions:
+                blocking_ids.append(execution_id)
+        return blocking_ids
+
    @property
    def agent_idle_seconds(self) -> float:
        """Seconds since the last agent activity (LLM call, tool call, node transition).
@@ -396,15 +414,22 @@ class ExecutionManager:

    async def stop(self) -> None:
        """Stop the execution stream and cancel active executions."""
-        if not self._running:
-            return
+        async with self._lock:
+            if not self._running:
+                return

-        self._running = False
+            self._running = False

-        # Cancel all active executions
-        tasks_to_wait = []
-        for _, task in self._execution_tasks.items():
-            if not task.done():
+            # Cancel all active executions, but keep bookkeeping until each
+            # task reaches its own cleanup path.
+            tasks_to_wait: list[asyncio.Task] = []
+            for execution_id, task in self._execution_tasks.items():
+                if task.done():
+                    continue
+                ctx = self._active_executions.get(execution_id)
+                if ctx is not None:
+                    ctx.status = "cancelling"
+                self._cancel_reasons.setdefault(execution_id, "Execution cancelled")
                task.cancel()
                tasks_to_wait.append(task)

@@ -418,9 +443,6 @@ class ExecutionManager:
                    len(pending),
                )

-        self._execution_tasks.clear()
-        self._active_executions.clear()
-
        logger.info(f"ExecutionStream '{self.stream_id}' stopped")

        # Emit stream stopped event
@@ -569,12 +591,16 @@ class ExecutionManager:
        )

        async with self._lock:
+            if not self._running:
+                raise RuntimeError(f"ExecutionStream '{self.stream_id}' is not running")
+
+            blocking_ids = self._get_blocking_execution_ids_locked()
+            if blocking_ids:
+                raise ExecutionAlreadyRunningError(self.stream_id, blocking_ids)
+
            self._active_executions[execution_id] = ctx
            self._completion_events[execution_id] = asyncio.Event()
-
-        # Start execution task
-        task = asyncio.create_task(self._run_execution(ctx))
-        self._execution_tasks[execution_id] = task
+            self._execution_tasks[execution_id] = asyncio.create_task(self._run_execution(ctx))

        logger.debug(f"Queued execution {execution_id} for stream {self.stream_id}")
        return execution_id
@@ -1183,7 +1209,7 @@ class ExecutionManager:
        """Get execution context."""
        return self._active_executions.get(execution_id)

-    async def cancel_execution(self, execution_id: str, *, reason: str | None = None) -> bool:
+    async def cancel_execution(self, execution_id: str, *, reason: str | None = None) -> CancelExecutionResult:
        """
        Cancel a running execution.

@@ -1194,33 +1220,38 @@ class ExecutionManager:
                provided, defaults to "Execution cancelled".

        Returns:
-            True if cancelled, False if not found
+            "cancelled" if the task fully exited within the grace period,
+            "cancelling" if cancellation was requested but the task is still
+            shutting down, or "not_found" if no active task exists.
        """
-        task = self._execution_tasks.get(execution_id)
-        if task and not task.done():
+        async with self._lock:
+            task = self._execution_tasks.get(execution_id)
+            if task is None or task.done():
+                return "not_found"
+
            # Store the reason so the CancelledError handler can use it
            # when emitting the pause/fail event.
            self._cancel_reasons[execution_id] = reason or "Execution cancelled"
+            ctx = self._active_executions.get(execution_id)
+            if ctx is not None:
+                ctx.status = "cancelling"
            task.cancel()
-            # Wait briefly for the task to finish. Don't block indefinitely —
-            # the task may be stuck in a long LLM API call that doesn't
-            # respond to cancellation quickly.
-            done, _ = await asyncio.wait({task}, timeout=5.0)
-            if not done:
-                # Task didn't finish within timeout — clean up bookkeeping now
-                # so the session doesn't think it still has running executions.
-                # The task will continue winding down in the background and its
-                # finally block will harmlessly pop already-removed keys.
-                logger.warning(
-                    "Execution %s did not finish within cancel timeout; force-cleaning bookkeeping",
-                    execution_id,
-                )
-                async with self._lock:
-                    self._active_executions.pop(execution_id, None)
-                    self._execution_tasks.pop(execution_id, None)
-                self._active_executors.pop(execution_id, None)
-            return True
-        return False
+
+        # Wait briefly for the task to finish. Don't block indefinitely —
+        # the task may be stuck in a long LLM API call that doesn't
+        # respond to cancellation quickly.
+        done, _ = await asyncio.wait({task}, timeout=5.0)
+        if not done:
+            # Keep bookkeeping in place until the task's own finally block runs.
+            # We intentionally do not add deferred cleanup keyed by execution_id
+            # here because resumed executions reuse the same id; a delayed pop
+            # could otherwise delete bookkeeping that belongs to the new run.
+            logger.warning(
+                "Execution %s did not finish within cancel timeout; leaving bookkeeping in place until task exit",
+                execution_id,
+            )
+            return "cancelling"
+        return "cancelled"

    # === STATS AND MONITORING ===

@@ -0,0 +1,487 @@
+"""Per-colony SQLite task queue + progress ledger.
+
+Every colony gets its own ``progress.db`` under ``~/.hive/colonies/{name}/data/``.
+The DB holds the colony's task queue plus per-task step and SOP checklist
+rows. Workers claim tasks atomically, write progress as they execute, and
+verify SOP gates before marking a task done. This gives cross-run memory
+that the existing per-iteration stall detectors don't have.
+
+The DB is driven by agents via the ``sqlite3`` CLI through
+``execute_command_tool``. This module handles framework-side lifecycle:
+creation, migration, queen-side bulk seeding, stale-claim reclamation.
+
+Concurrency model:
+- WAL mode on from day one so 100 concurrent workers don't serialize.
+- Workers hold NO long-running connection — they ``sqlite3`` per call,
+  which naturally releases locks between LLM turns.
+- Atomic claim via ``BEGIN IMMEDIATE; UPDATE tasks SET status='claimed'
+  WHERE id=(SELECT ... LIMIT 1)``. The subquery-form UPDATE runs inside
+  the immediate transaction so racers either win the row or find zero
+  affected rows.
+- Stale-claim reclaimer runs on host startup: claims older than
+  ``stale_after_minutes`` get returned to ``pending`` and the row's
+  ``retry_count`` increments. When ``retry_count >= max_retries`` the
+  row is moved to ``failed`` instead.
+
+All writes go through ``BEGIN IMMEDIATE`` so racing readers see
+consistent snapshots.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import sqlite3
+import uuid
+from datetime import UTC, datetime
+from pathlib import Path
+from typing import Any
+
+logger = logging.getLogger(__name__)
+
+SCHEMA_VERSION = 1
+
+_SCHEMA_V1 = """
+CREATE TABLE IF NOT EXISTS tasks (
+    id              TEXT PRIMARY KEY,
+    seq             INTEGER,
+    priority        INTEGER NOT NULL DEFAULT 0,
+    goal            TEXT NOT NULL,
+    payload         TEXT,
+    status          TEXT NOT NULL DEFAULT 'pending',
+    worker_id       TEXT,
+    claim_token     TEXT,
+    claimed_at      TEXT,
+    started_at      TEXT,
+    completed_at    TEXT,
+    created_at      TEXT NOT NULL,
+    updated_at      TEXT NOT NULL,
+    retry_count     INTEGER NOT NULL DEFAULT 0,
+    max_retries     INTEGER NOT NULL DEFAULT 3,
+    last_error      TEXT,
+    parent_task_id  TEXT REFERENCES tasks(id) ON DELETE SET NULL,
+    source          TEXT
+);
+
+CREATE TABLE IF NOT EXISTS steps (
+    id              TEXT PRIMARY KEY,
+    task_id         TEXT NOT NULL REFERENCES tasks(id) ON DELETE CASCADE,
+    seq             INTEGER NOT NULL,
+    title           TEXT NOT NULL,
+    detail          TEXT,
+    status          TEXT NOT NULL DEFAULT 'pending',
+    evidence        TEXT,
+    worker_id       TEXT,
+    started_at      TEXT,
+    completed_at    TEXT,
+    UNIQUE (task_id, seq)
+);
+
+CREATE TABLE IF NOT EXISTS sop_checklist (
+    id              TEXT PRIMARY KEY,
+    task_id         TEXT NOT NULL REFERENCES tasks(id) ON DELETE CASCADE,
+    key             TEXT NOT NULL,
+    description     TEXT NOT NULL,
+    required        INTEGER NOT NULL DEFAULT 1,
+    done_at         TEXT,
+    done_by         TEXT,
+    note            TEXT,
+    UNIQUE (task_id, key)
+);
+
+CREATE TABLE IF NOT EXISTS colony_meta (
+    key             TEXT PRIMARY KEY,
+    value           TEXT NOT NULL,
+    updated_at      TEXT NOT NULL
+);
+
+CREATE INDEX IF NOT EXISTS idx_tasks_claimable
+    ON tasks(status, priority DESC, seq, created_at)
+    WHERE status = 'pending';
+
+CREATE INDEX IF NOT EXISTS idx_steps_task_seq
+    ON steps(task_id, seq);
+
+CREATE INDEX IF NOT EXISTS idx_sop_required_open
+    ON sop_checklist(task_id, required, done_at);
+
+CREATE INDEX IF NOT EXISTS idx_tasks_status
+    ON tasks(status, updated_at);
+"""
+
+_PRAGMAS = (
+    "PRAGMA journal_mode = WAL;",
+    "PRAGMA synchronous = NORMAL;",
+    "PRAGMA foreign_keys = ON;",
+    "PRAGMA busy_timeout = 5000;",
+)
+
+
+def _now_iso() -> str:
+    return datetime.now(UTC).isoformat(timespec="seconds")
+
+
+def _new_id() -> str:
+    return str(uuid.uuid4())
+
+
+def _connect(db_path: Path) -> sqlite3.Connection:
+    """Open a connection with the standard pragmas applied.
+
+    WAL mode is sticky on the file once set, so re-applying on every
+    open is cheap. The other pragmas are per-connection and must be
+    set each time.
+    """
+    con = sqlite3.connect(str(db_path), isolation_level=None, timeout=5.0)
+    for pragma in _PRAGMAS:
+        con.execute(pragma)
+    return con
+
+
+def ensure_progress_db(colony_dir: Path) -> Path:
+    """Create or migrate ``{colony_dir}/data/progress.db``.
+
+    Idempotent: safe to call on an already-initialized DB. Returns the
+    absolute path to the DB file.
+
+    Steps:
+    1. Ensure ``data/`` subdir exists.
+    2. Open the DB (creates the file if missing).
+    3. Apply WAL + pragmas.
+    4. Read ``PRAGMA user_version``; if < SCHEMA_VERSION, run the
+       schema block and bump user_version.
+    5. Reclaim any stale claims left from previous runs.
+    6. Patch every ``*.json`` worker config in the colony dir to
+       inject ``input_data.db_path`` and ``input_data.colony_id`` so
+       pre-existing colonies (forked before this feature landed) get
+       the tracker wiring on their next spawn.
+    """
+    data_dir = Path(colony_dir) / "data"
+    data_dir.mkdir(parents=True, exist_ok=True)
+    db_path = data_dir / "progress.db"
+
+    con = _connect(db_path)
+    try:
+        current_version = con.execute("PRAGMA user_version").fetchone()[0]
+        if current_version < SCHEMA_VERSION:
+            con.executescript(_SCHEMA_V1)
+            con.execute(f"PRAGMA user_version = {SCHEMA_VERSION}")
+            con.execute(
+                "INSERT OR REPLACE INTO colony_meta(key, value, updated_at) VALUES (?, ?, ?)",
+                ("schema_version", str(SCHEMA_VERSION), _now_iso()),
+            )
+            logger.info("progress_db: initialized schema v%d at %s", SCHEMA_VERSION, db_path)
+
+        reclaimed = _reclaim_stale_inner(con, stale_after_minutes=15)
+        if reclaimed:
+            logger.info(
+                "progress_db: reclaimed %d stale claims at startup (%s)",
+                reclaimed,
+                db_path,
+            )
+    finally:
+        con.close()
+
+    resolved_db_path = db_path.resolve()
+    _patch_worker_configs(Path(colony_dir), resolved_db_path)
+    return resolved_db_path
+
+
+def _patch_worker_configs(colony_dir: Path, db_path: Path) -> int:
+    """Inject ``input_data.db_path`` + ``input_data.colony_id`` +
+    ``input_data.colony_data_dir`` into existing ``worker.json`` files
+    in a colony directory.
+
+    Runs on every ``ensure_progress_db`` call so colonies that were
+    forked before this feature landed get their worker spawn messages
+    patched in place. Idempotent: if ``input_data`` already contains
+    all three values, the file is not rewritten.
+
+    Returns the number of files that were actually modified (0 on
+    the common case of already-patched colonies).
+
+    Why ``colony_data_dir``? ``db_path`` alone points agents at
+    ``progress.db``; for anything else (custom SQLite stores, JSON
+    ledgers, scraped artefacts) they need the *directory* so they
+    stop creating state under ``~/.hive/skills/`` — which holds skill
+    *definitions*, not runtime data. See
+    ``_default_skills/colony-storage-paths/SKILL.md``.
+    """
+    colony_id = colony_dir.name
+    abs_db = str(db_path)
+    abs_data_dir = str(db_path.parent)
+    patched = 0
+
+    for worker_cfg in colony_dir.glob("*.json"):
+        # Only patch files that look like worker configs (have the
+        # worker_meta shape). ``metadata.json`` and ``triggers.json``
+        # are colony-level and must not be touched.
+        if worker_cfg.name in ("metadata.json", "triggers.json"):
+            continue
+        try:
+            data = json.loads(worker_cfg.read_text(encoding="utf-8"))
+        except (json.JSONDecodeError, OSError):
+            continue
+        if not isinstance(data, dict) or "system_prompt" not in data:
+            # Not a worker config (lacks the worker_meta schema).
+            continue
+
+        input_data = data.get("input_data")
+        if not isinstance(input_data, dict):
+            input_data = {}
+
+        if (
+            input_data.get("db_path") == abs_db
+            and input_data.get("colony_id") == colony_id
+            and input_data.get("colony_data_dir") == abs_data_dir
+        ):
+            continue  # already patched
+
+        input_data["db_path"] = abs_db
+        input_data["colony_id"] = colony_id
+        input_data["colony_data_dir"] = abs_data_dir
+        data["input_data"] = input_data
+
+        try:
+            worker_cfg.write_text(json.dumps(data, indent=2, ensure_ascii=False), encoding="utf-8")
+            patched += 1
+        except OSError as e:
+            logger.warning("progress_db: failed to patch worker config %s: %s", worker_cfg, e)
+
+    if patched:
+        logger.info(
+            "progress_db: patched %d worker config(s) in colony '%s' with db_path + colony_data_dir",
+            patched,
+            colony_id,
+        )
+    return patched
+
+
+def ensure_all_colony_dbs(colonies_root: Path | None = None) -> list[Path]:
+    """Idempotently ensure every existing colony has a progress.db.
+
+    Called on framework host startup to backfill older colonies and
+    run the stale-claim reclaimer on all of them in one pass.
+    """
+    if colonies_root is None:
+        colonies_root = Path.home() / ".hive" / "colonies"
+    if not colonies_root.is_dir():
+        return []
+
+    initialized: list[Path] = []
+    for entry in sorted(colonies_root.iterdir()):
+        if not entry.is_dir():
+            continue
+        try:
+            initialized.append(ensure_progress_db(entry))
+        except Exception as e:
+            logger.warning("progress_db: failed to ensure DB for colony '%s': %s", entry.name, e)
+    return initialized
+
+
+def seed_tasks(
+    db_path: Path,
+    tasks: list[dict[str, Any]],
+    *,
+    source: str = "queen_create",
+) -> list[str]:
+    """Bulk-insert tasks (with optional nested steps + sop_items).
+
+    Each task dict accepts:
+      - goal: str (required)
+      - seq: int (optional ordering hint)
+      - priority: int (default 0)
+      - payload: dict | str | None (stored as JSON text)
+      - max_retries: int (default 3)
+      - parent_task_id: str | None
+      - steps: list[{"title": str, "detail"?: str}] (optional)
+      - sop_items: list[{"key": str, "description": str, "required"?: bool, "note"?: str}] (optional)
+
+    All rows are inserted in a single BEGIN IMMEDIATE transaction so
+    10k-row seeds finish in one disk flush. Returns the created task ids
+    in the same order as input.
+    """
+    if not tasks:
+        return []
+
+    created_ids: list[str] = []
+    now = _now_iso()
+    con = _connect(Path(db_path))
+    try:
+        con.execute("BEGIN IMMEDIATE")
+        for idx, task in enumerate(tasks):
+            goal = task.get("goal")
+            if not goal:
+                raise ValueError(f"task[{idx}] missing required 'goal' field")
+
+            task_id = task.get("id") or _new_id()
+            payload = task.get("payload")
+            if payload is not None and not isinstance(payload, str):
+                payload = json.dumps(payload, ensure_ascii=False)
+
+            con.execute(
+                """
+                INSERT INTO tasks (
+                    id, seq, priority, goal, payload, status,
+                    created_at, updated_at, max_retries, parent_task_id, source
+                ) VALUES (?, ?, ?, ?, ?, 'pending', ?, ?, ?, ?, ?)
+                """,
+                (
+                    task_id,
+                    task.get("seq"),
+                    int(task.get("priority", 0)),
+                    goal,
+                    payload,
+                    now,
+                    now,
+                    int(task.get("max_retries", 3)),
+                    task.get("parent_task_id"),
+                    source,
+                ),
+            )
+
+            for step_seq, step in enumerate(task.get("steps") or [], start=1):
+                if not step.get("title"):
+                    raise ValueError(f"task[{idx}].steps[{step_seq - 1}] missing required 'title'")
+                con.execute(
+                    """
+                    INSERT INTO steps (id, task_id, seq, title, detail, status)
+                    VALUES (?, ?, ?, ?, ?, 'pending')
+                    """,
+                    (
+                        _new_id(),
+                        task_id,
+                        step.get("seq", step_seq),
+                        step["title"],
+                        step.get("detail"),
+                    ),
+                )
+
+            for sop in task.get("sop_items") or []:
+                key = sop.get("key")
+                description = sop.get("description")
+                if not key or not description:
+                    raise ValueError(f"task[{idx}].sop_items missing 'key' or 'description'")
+                con.execute(
+                    """
+                    INSERT INTO sop_checklist
+                        (id, task_id, key, description, required, note)
+                    VALUES (?, ?, ?, ?, ?, ?)
+                    """,
+                    (
+                        _new_id(),
+                        task_id,
+                        key,
+                        description,
+                        1 if sop.get("required", True) else 0,
+                        sop.get("note"),
+                    ),
+                )
+
+            created_ids.append(task_id)
+
+        con.execute("COMMIT")
+    except Exception:
+        con.execute("ROLLBACK")
+        raise
+    finally:
+        con.close()
+
+    return created_ids
+
+
+def enqueue_task(
+    db_path: Path,
+    goal: str,
+    *,
+    steps: list[dict[str, Any]] | None = None,
+    sop_items: list[dict[str, Any]] | None = None,
+    payload: Any = None,
+    priority: int = 0,
+    parent_task_id: str | None = None,
+    source: str = "enqueue_tool",
+) -> str:
+    """Append a single task to an existing queue. Thin wrapper over seed_tasks."""
+    ids = seed_tasks(
+        db_path,
+        [
+            {
+                "goal": goal,
+                "steps": steps,
+                "sop_items": sop_items,
+                "payload": payload,
+                "priority": priority,
+                "parent_task_id": parent_task_id,
+            }
+        ],
+        source=source,
+    )
+    return ids[0]
+
+
+def _reclaim_stale_inner(con: sqlite3.Connection, *, stale_after_minutes: int) -> int:
+    """Reclaim stale claims. Runs inside an existing open connection.
+
+    Two-step:
+    1. Tasks past max_retries go to 'failed' with last_error populated.
+    2. Remaining stale claims return to 'pending', retry_count++.
+    """
+    cutoff_expr = f"datetime('now', '-{int(stale_after_minutes)} minutes')"
+
+    con.execute("BEGIN IMMEDIATE")
+    try:
+        con.execute(
+            f"""
+            UPDATE tasks
+            SET status = 'failed',
+                last_error = COALESCE(last_error, 'exceeded max_retries after stale claim'),
+                completed_at = datetime('now'),
+                updated_at = datetime('now')
+            WHERE status IN ('claimed', 'in_progress')
+              AND claimed_at IS NOT NULL
+              AND claimed_at < {cutoff_expr}
+              AND retry_count >= max_retries
+            """
+        )
+
+        cur = con.execute(
+            f"""
+            UPDATE tasks
+            SET status = 'pending',
+                worker_id = NULL,
+                claim_token = NULL,
+                claimed_at = NULL,
+                started_at = NULL,
+                retry_count = retry_count + 1,
+                updated_at = datetime('now')
+            WHERE status IN ('claimed', 'in_progress')
+              AND claimed_at IS NOT NULL
+              AND claimed_at < {cutoff_expr}
+              AND retry_count < max_retries
+            """
+        )
+        reclaimed = cur.rowcount or 0
+        con.execute("COMMIT")
+        return reclaimed
+    except Exception:
+        con.execute("ROLLBACK")
+        raise
+
+
+def reclaim_stale(db_path: Path, stale_after_minutes: int = 15) -> int:
+    """Public wrapper that opens its own connection."""
+    con = _connect(Path(db_path))
+    try:
+        return _reclaim_stale_inner(con, stale_after_minutes=stale_after_minutes)
+    finally:
+        con.close()
+
+
+__all__ = [
+    "SCHEMA_VERSION",
+    "ensure_progress_db",
+    "ensure_all_colony_dbs",
+    "seed_tasks",
+    "enqueue_task",
+    "reclaim_stale",
+]
@@ -145,6 +145,24 @@ class Worker:
        self.status = WorkerStatus.RUNNING
        self._started_at = time.monotonic()

+        # Scope browser profile (and any other CONTEXT_PARAMS) to this
+        # worker. asyncio.create_task() copies the parent's contextvars,
+        # so without this override every spawned worker inherits the
+        # queen's `profile=<queen_session_id>` and its browser_* tool
+        # calls end up driving the queen's Chrome tab group. Setting
+        # it here (inside the new Task's context) shadows the parent
+        # value without affecting the queen's ongoing calls.
+        try:
+            from framework.loader.tool_registry import ToolRegistry
+
+            ToolRegistry.set_execution_context(profile=self.id)
+        except Exception:
+            logger.debug(
+                "Worker %s: failed to scope browser profile",
+                self.id,
+                exc_info=True,
+            )
+
        try:
            result = await self._agent_loop.execute(self._context)
            duration = time.monotonic() - self._started_at
@@ -170,13 +188,28 @@ class Worker:
        except asyncio.CancelledError:
            self.status = WorkerStatus.STOPPED
            duration = time.monotonic() - self._started_at
-            self._result = WorkerResult(
-                error="Worker stopped by queen",
-                duration_seconds=duration,
-                status="stopped",
-                summary="Worker was cancelled before completion.",
-            )
-            await self._emit_terminal_events(None, force_status="stopped")
+            # Preserve any explicit report the worker's LLM already filed
+            # via ``report_to_parent`` before being cancelled — the caller
+            # cares about that payload even on a hard stop. Only fall back
+            # to the canned "stopped" message when no explicit report exists.
+            explicit = self._explicit_report
+            if explicit is not None:
+                self._result = WorkerResult(
+                    error="Worker stopped by queen after reporting",
+                    duration_seconds=duration,
+                    status=explicit["status"],
+                    summary=explicit["summary"],
+                    data=explicit["data"],
+                )
+                await self._emit_terminal_events(None, force_status=explicit["status"])
+            else:
+                self._result = WorkerResult(
+                    error="Worker stopped by queen",
+                    duration_seconds=duration,
+                    status="stopped",
+                    summary="Worker was cancelled before completion.",
+                )
+                await self._emit_terminal_events(None, force_status="stopped")
            return self._result

        except Exception as exc:
@@ -1959,6 +1959,10 @@ class LiteLLMProvider(LLMProvider):
        if self._codex_backend:
            kwargs.pop("max_tokens", None)
            kwargs.pop("stream_options", None)
+            # Pass store directly to OpenAI in case litellm drops it as unknown
+            if "extra_body" not in kwargs:
+                kwargs["extra_body"] = {}
+            kwargs["extra_body"]["store"] = False

        request_summary = _summarize_request_for_log(kwargs)
        logger.debug(
@@ -61,14 +61,14 @@
          "label": "Gemini 3 Flash - Fast",
          "recommended": false,
          "max_tokens": 32768,
-          "max_context_tokens": 900000
+          "max_context_tokens": 240000
        },
        {
          "id": "gemini-3.1-pro-preview-customtools",
          "label": "Gemini 3.1 Pro - Best quality",
          "recommended": true,
          "max_tokens": 32768,
-          "max_context_tokens": 900000
+          "max_context_tokens": 240000
        }
      ]
    },
@@ -115,13 +115,6 @@
          "max_tokens": 40960,
          "max_context_tokens": 131072
        },
-        {
-          "id": "llama3.1-8b",
-          "label": "Llama 3.1 8B - Fastest production",
-          "recommended": false,
-          "max_tokens": 8192,
-          "max_context_tokens": 32768
-        },
        {
          "id": "zai-glm-4.7",
          "label": "Z.ai GLM 4.7 - Strong coding preview",
@@ -145,15 +138,15 @@
          "id": "MiniMax-M2.7",
          "label": "MiniMax M2.7 - Best coding quality",
          "recommended": true,
-          "max_tokens": 32768,
-          "max_context_tokens": 204800
+          "max_tokens": 40960,
+          "max_context_tokens": 180000
        },
        {
          "id": "MiniMax-M2.5",
          "label": "MiniMax M2.5 - Strong value",
          "recommended": false,
-          "max_tokens": 32768,
-          "max_context_tokens": 204800
+          "max_tokens": 40960,
+          "max_context_tokens": 180000
        }
      ]
    },
@@ -288,14 +281,14 @@
          "label": "GPT-5.4 - Best overall",
          "recommended": true,
          "max_tokens": 128000,
-          "max_context_tokens": 922000
+          "max_context_tokens": 872000
        },
        {
          "id": "anthropic/claude-sonnet-4.6",
          "label": "Claude Sonnet 4.6 - Best coding balance",
          "recommended": false,
          "max_tokens": 64000,
-          "max_context_tokens": 936000
+          "max_context_tokens": 872000
        },
        {
          "id": "anthropic/claude-opus-4.6",
@@ -309,14 +302,42 @@
          "label": "Gemini 3.1 Pro Preview - Long-context reasoning",
          "recommended": false,
          "max_tokens": 32768,
-          "max_context_tokens": 1048576
+          "max_context_tokens": 872000
        },
        {
-          "id": "deepseek/deepseek-v3.2",
-          "label": "DeepSeek V3.2 - Best value",
-          "recommended": false,
+          "id": "qwen/qwen3.6-plus",
+          "label": "Qwen 3.6 Plus - Strong reasoning",
+          "recommended": true,
          "max_tokens": 32768,
-          "max_context_tokens": 163840
+          "max_context_tokens": 240000
+        },
+        {
+          "id": "z-ai/glm-5v-turbo",
+          "label": "GLM-5V Turbo - Vision capable",
+          "recommended": true,
+          "max_tokens": 32768,
+          "max_context_tokens": 192000
+        },
+        {
+          "id": "z-ai/glm-5.1",
+          "label": "GLM-5.1 - Better but Slower",
+          "recommended": true,
+          "max_tokens": 40960,
+          "max_context_tokens": 192000
+        },
+        {
+          "id": "minimax/minimax-m2.7",
+          "label": "Minimax M2.7 - Minimax flagship",
+          "recommended": false,
+          "max_tokens": 40960,
+          "max_context_tokens": 180000
+        },
+        {
+          "id": "xiaomi/mimo-v2-pro",
+          "label": "MiMo V2 Pro - Xiaomi multimodal",
+          "recommended": true,
+          "max_tokens": 64000,
+          "max_context_tokens": 872000
        }
      ]
    }
@@ -347,8 +368,8 @@
      "provider": "minimax",
      "api_key_env_var": "MINIMAX_API_KEY",
      "model": "MiniMax-M2.7",
-      "max_tokens": 32768,
-      "max_context_tokens": 204800,
+      "max_tokens": 40960,
+      "max_context_tokens": 180800,
      "api_base": "https://api.minimax.io/v1"
    },
    "kimi_code": {
@@ -397,4 +418,4 @@
      "api_base": "http://localhost:11434"
    }
  }
-}
+}
@@ -1404,7 +1404,18 @@ class AgentLoader:
            credential_store=credential_store,
        )
        runner._agent_default_skills = None
-        runner._agent_skills = None
+        # Colony workers attached to a SQLite task queue get the
+        # colony-progress-tracker skill pre-activated so its full
+        # claim / step / SOP-gate protocol lands in the system prompt
+        # on turn 0, bypassing the progressive-disclosure catalog
+        # lookup. Triggered by the presence of ``input_data.db_path``
+        # in worker.json (written by fork_session_into_colony and
+        # backfilled by ensure_progress_db for pre-existing colonies).
+        _preactivate: list[str] = []
+        _input_data = first_worker.get("input_data") or {}
+        if isinstance(_input_data, dict) and _input_data.get("db_path"):
+            _preactivate.append("hive.colony-progress-tracker")
+        runner._agent_skills = _preactivate or None
        return runner

    def register_tool(
@@ -21,6 +21,7 @@ import os
 import shutil
 import subprocess
 import sys
+import threading
 from pathlib import Path
 from typing import Any
 from urllib import error as urlerror, parse as urlparse, request as urlrequest
@@ -214,7 +215,13 @@ def cmd_serve(args: argparse.Namespace) -> int:

 def cmd_open(args: argparse.Namespace) -> int:
    """Start the HTTP server and open the dashboard in the browser."""
-    _ping_hive_gateway_availability("hive-open")
+    # Don't block local startup on a best-effort analytics probe.
+    threading.Thread(
+        target=_ping_hive_gateway_availability,
+        args=("hive-open",),
+        daemon=True,
+        name="hive-open-gateway-ping",
+    ).start()
    args.open = True
    return cmd_serve(args)

@@ -497,12 +497,22 @@ class ToolRegistry:
            config["cwd"] = str(resolved_cwd)
            return config

-        # For coder_tools_server, inject --project-root so writes go to the expected workspace
+        # For coder_tools_server, inject --project-root so reads land
+        # in the expected workspace (hive repo, for framework skills
+        # and docs), and inject --write-root so writes land under
+        # ~/.hive/workspace/ instead of polluting the git checkout
+        # with queen-authored skills, ledgers, and scripts. Without
+        # the split, every ``write_file`` call from the queen landed
+        # in the hive repo root.
        if script_name and "coder_tools" in script_name:
            project_root = str(resolved_cwd.parent.resolve())
            args = list(args)
            if "--project-root" not in args:
                args.extend(["--project-root", project_root])
+            if "--write-root" not in args:
+                _write_root = Path.home() / ".hive" / "workspace"
+                _write_root.mkdir(parents=True, exist_ok=True)
+                args.extend(["--write-root", str(_write_root)])
            config["args"] = args

        if os.name == "nt":
@@ -571,8 +581,18 @@ class ToolRegistry:
        tool_cap: int | None = None,
        log_collisions: bool = False,
    ) -> tuple[bool, int, str | None]:
-        """Register a single MCP server with one retry for transient failures."""
+        """Register a single MCP server with one retry for transient failures.
+
+        When ``preserve_existing_tools=True`` and the server's tools are
+        already present from a prior registration, ``register_mcp_server``
+        returns ``count=0`` because every tool was shadowed. That's a
+        no-op success, not a failure — don't retry / warn in that case.
+        Otherwise a duplicate-init path (e.g. a worker spawn re-loading
+        the MCP servers the queen already registered) spams shadow
+        warnings, sleeps 2s, and retries for no reason.
+        """
        name = server_config.get("name", "unknown")
+        already_loaded = bool(self._mcp_server_tools.get(name))
        last_error: str | None = None

        for attempt in range(2):
@@ -585,6 +605,10 @@ class ToolRegistry:
                )
                if count > 0:
                    return True, count, None
+                if already_loaded and preserve_existing_tools:
+                    # All tools shadowed by the prior registration of
+                    # the same server — nothing to do, server is usable.
+                    return True, 0, None
                last_error = "registered 0 tools"
            except Exception as exc:
                last_error = str(exc)
@@ -752,12 +776,18 @@ class ToolRegistry:
                if preserve_existing_tools and mcp_tool.name in self._tools:
                    if log_collisions:
                        origin_server = self._find_mcp_origin_server_for_tool(mcp_tool.name) or "<existing>"
-                        logger.warning(
-                            "MCP tool '%s' from '%s' shadowed by '%s' (loaded first)",
-                            mcp_tool.name,
-                            server_name,
-                            origin_server,
-                        )
+                        # Don't warn when a server is being re-registered
+                        # by itself — that's a redundant-init case (e.g.
+                        # the same tool_registry seeing the same server
+                        # twice via pooled reconnect), not a real
+                        # cross-server shadow worth flagging.
+                        if origin_server != server_name:
+                            logger.warning(
+                                "MCP tool '%s' from '%s' shadowed by '%s' (loaded first)",
+                                mcp_tool.name,
+                                server_name,
+                                origin_server,
+                            )
                    # Skip registration; do not update MCP tool bookkeeping for this server.
                    continue

@@ -26,41 +26,47 @@ Follow these rules for reliable, efficient browser interaction.
 - **`browser_snapshot`** — compact accessibility tree. Fast, cheap, good
  for static / text-heavy pages where the DOM matches what's visually
  rendered (docs, forms, search results, settings pages).
- **`browser_screenshot`** — visual capture + scale metadata. Use on any
-  complex SPA (LinkedIn, X / Twitter, Reddit, Gmail, Notion, Slack,
-  Discord) and on any site using shadow DOM or virtual scrolling. On
-  those pages, snapshot refs go stale in seconds, shadow contents
-  aren't in the AX tree, and virtual-scrolled elements disappear from
-  the tree entirely — screenshots are the only reliable way to orient.
+- **`browser_screenshot`** — visual capture + scale metadata. Use when
+  the snapshot does not show the thing you need, when refs look stale,
+  or when you need visual position/layout to act. This is common on
+  complex SPAs (LinkedIn, X / Twitter, Reddit, Gmail, Notion, Slack,
+  Discord), shadow DOM, and virtual scrolling.

-Neither tool is "preferred" universally — they're for different jobs.
-Default to snapshot on static pages, screenshot on SPAs and
-shadow-heavy sites. Interaction tools (click/type/fill/scroll) return
-a snapshot automatically, so don't call `browser_snapshot` separately
-after an interaction unless you need a fresh view.
+Use snapshot first for structure and ordinary controls; switch to
+screenshot when snapshot can't find or verify the target. Interaction
+tools (`browser_click`, `browser_type`, `browser_type_focused`,
+`browser_fill`, `browser_scroll`) wait 0.5 s for the page to settle
+after a successful action, then attach a fresh snapshot under the
+`snapshot` key of their result — so don't call `browser_snapshot`
+separately after an interaction unless you need a newer view. Tune
+with `auto_snapshot_mode`: `"default"` (full tree) is the default;
+`"simple"` trims unnamed structural nodes; `"interactive"` returns
+only controls (tightest token footprint); `"off"` skips the capture
+entirely — use when batching several interactions.

 Only fall back to `browser_get_text` for extracting small elements by
 CSS selector.

-## Coordinates: always CSS pixels
+## Coordinates

-Chrome DevTools Protocol `Input.dispatchMouseEvent` takes **CSS
-pixels**, not physical pixels. This is critical and often gets wrong:
+Every browser tool that takes or returns coordinates operates in
+**fractions of the viewport (0..1 for both axes)**. Read a target's
+proportional position off `browser_screenshot` — "this button is
+~35% from the left, ~20% from the top" → pass `(0.35, 0.20)`.
+`browser_get_rect` and `browser_shadow_query` return `rect.cx` /
+`rect.cy` as fractions in the same space. The tools handle the
+fraction → CSS-px multiplication internally; you do not need to
+track image pixels, DPR, or any scale factor.

-| Tool | Unit |
-|---|---|
-| `browser_click_coordinate(x, y)` | **CSS pixels** |
-| `browser_hover_coordinate(x, y)` | **CSS pixels** |
-| `browser_press_at(x, y, key)` | **CSS pixels** |
-| `getBoundingClientRect()` | already CSS pixels — pass straight through |
-| `browser_coords(img_x, img_y)` | returns `css_x/y` (use this) and `physical_x/y` (debug only) |
+Why fractions: every vision model (Claude, GPT-4o, Gemini, local
+VLMs) resizes or tiles images differently before the model sees the
+pixels. Proportions survive every such transform; pixel coordinates
+only "work" per-model and break when you swap backends.

-**Always use `css_x/y`** from `browser_coords`. Feeding `physical_x/y`
-on a HiDPI display overshoots by `DPR×` — clicks land DPR times too
-far right and down. On a DPR=1.6 display that's 60% off.
-
-Never multiply `getBoundingClientRect()` by `devicePixelRatio` — it's
-already in the right unit.
+Avoid raw `browser_evaluate` + `getBoundingClientRect()` for coord
+lookup — that returns CSS px and will be wrong when fed to click
+tools. Prefer `browser_get_rect` / `browser_shadow_query`, which
+return fractions.

 ## Rich-text editors (X, LinkedIn DMs, Gmail, Reddit, Slack, Discord)

@@ -88,11 +94,10 @@ reach shadow elements transparently.

 **Shadow-heavy site workflow:**
 1. `browser_screenshot()` → visual image
-2. Identify target visually → image coordinate
-3. `browser_coords(x, y)` → CSS px
-4. `browser_click_coordinate(css_x, css_y)` → lands via native hit
-   test; inputs get focused regardless of shadow depth
-5. Type via `browser_type_focused` (no selector needed — types into the
+2. Identify target visually → pixel `(x, y)` read straight off the image
+3. `browser_click_coordinate(x, y)` → lands via native hit test;
+   inputs get focused regardless of shadow depth
+4. Type via `browser_type_focused` (no selector needed — types into the
   already-focused element), or `browser_type` if you have a selector

 For selector-style access when you know the shadow path:
@@ -0,0 +1,180 @@
+"""Regression tests for forced cancellation overlap in ExecutionStream."""
+
+from __future__ import annotations
+
+import asyncio
+from types import SimpleNamespace
+from unittest.mock import MagicMock
+
+import pytest
+
+from framework.host.event_bus import AgentEvent, EventBus, EventType
+from framework.host.execution_manager import (
+    EntryPointSpec,
+    ExecutionAlreadyRunningError,
+    ExecutionManager,
+)
+from framework.orchestrator.edge import GraphSpec
+from framework.orchestrator.goal import Goal
+from framework.orchestrator.orchestrator import ExecutionResult
+
+
+def _build_stream(tmp_path, *, event_bus: EventBus | None = None) -> ExecutionManager:
+    graph = GraphSpec(
+        id="test-graph",
+        goal_id="goal-1",
+        version="1.0.0",
+        entry_node="start",
+        entry_points={"start": "start"},
+        terminal_nodes=[],
+        pause_nodes=[],
+        nodes=[],
+        edges=[],
+    )
+    goal = Goal(id="goal-1", name="goal-1", description="test goal")
+    entry_spec = EntryPointSpec(
+        id="webhook",
+        name="Webhook",
+        entry_node="start",
+        trigger_type="webhook",
+        isolation_level="shared",
+        max_concurrent=1,
+    )
+
+    storage = SimpleNamespace(base_path=tmp_path)
+    stream = ExecutionManager(
+        stream_id="webhook",
+        entry_spec=entry_spec,
+        graph=graph,
+        goal=goal,
+        state_manager=MagicMock(),
+        storage=storage,
+        outcome_aggregator=MagicMock(),
+        event_bus=event_bus,
+    )
+    stream._running = True
+    return stream
+
+
+def _install_blocking_executor(monkeypatch, release: asyncio.Event) -> None:
+    class BlockingExecutor:
+        def __init__(self, *args, **kwargs):
+            self.node_registry = {}
+
+        async def execute(self, *args, **kwargs):
+            while True:
+                try:
+                    await release.wait()
+                    break
+                except asyncio.CancelledError:
+                    continue
+            return ExecutionResult(success=True, output={"ok": True})
+
+    monkeypatch.setattr("framework.host.execution_manager.Orchestrator", BlockingExecutor)
+
+
+@pytest.mark.asyncio
+async def test_forced_cancel_timeout_keeps_stream_locked_until_task_exit(tmp_path, monkeypatch):
+    event_bus = EventBus()
+    stream = _build_stream(tmp_path, event_bus=event_bus)
+    release = asyncio.Event()
+    _install_blocking_executor(monkeypatch, release)
+
+    started_events: list[AgentEvent] = []
+    first_started = asyncio.Event()
+    second_started = asyncio.Event()
+
+    async def on_started(event: AgentEvent) -> None:
+        started_events.append(event)
+        if len(started_events) == 1:
+            first_started.set()
+        elif len(started_events) == 2:
+            second_started.set()
+
+    event_bus.subscribe(
+        event_types=[EventType.EXECUTION_STARTED],
+        handler=on_started,
+        filter_stream="webhook",
+    )
+
+    async def immediate_timeout(_tasks, timeout=None):
+        return set(), set(_tasks)
+
+    execution_id = await stream.execute({}, session_state={"resume_session_id": "session-1"})
+    await asyncio.wait_for(first_started.wait(), timeout=1)
+
+    old_task = stream._execution_tasks[execution_id]
+    monkeypatch.setattr("framework.host.execution_manager.asyncio.wait", immediate_timeout)
+
+    try:
+        cancelled = await stream.cancel_execution(execution_id, reason="forced timeout")
+
+        assert cancelled == "cancelling"
+        assert execution_id in stream._execution_tasks
+        assert execution_id in stream._active_executions
+        assert execution_id in stream._completion_events
+        assert stream._active_executions[execution_id].status == "cancelling"
+        assert not old_task.done()
+
+        with pytest.raises(ExecutionAlreadyRunningError):
+            await stream.execute({}, session_state={"resume_session_id": execution_id})
+
+        assert len(started_events) == 1
+
+        release.set()
+        await asyncio.wait_for(old_task, timeout=1)
+
+        restarted_id = await stream.execute({}, session_state={"resume_session_id": execution_id})
+        assert restarted_id == execution_id
+        await asyncio.wait_for(second_started.wait(), timeout=1)
+    finally:
+        release.set()
+        await asyncio.gather(*stream._execution_tasks.values(), return_exceptions=True)
+
+
+@pytest.mark.asyncio
+async def test_repeated_forced_restarts_do_not_accumulate_parallel_tasks(tmp_path, monkeypatch):
+    event_bus = EventBus()
+    stream = _build_stream(tmp_path, event_bus=event_bus)
+    release = asyncio.Event()
+    _install_blocking_executor(monkeypatch, release)
+
+    started_events: list[AgentEvent] = []
+    first_started = asyncio.Event()
+
+    async def on_started(event: AgentEvent) -> None:
+        started_events.append(event)
+        first_started.set()
+
+    event_bus.subscribe(
+        event_types=[EventType.EXECUTION_STARTED],
+        handler=on_started,
+        filter_stream="webhook",
+    )
+
+    async def immediate_timeout(_tasks, timeout=None):
+        return set(), set(_tasks)
+
+    monkeypatch.setattr("framework.host.execution_manager.asyncio.wait", immediate_timeout)
+
+    execution_id = await stream.execute({}, session_state={"resume_session_id": "session-1"})
+    await asyncio.wait_for(first_started.wait(), timeout=1)
+
+    first_task = stream._execution_tasks[execution_id]
+
+    try:
+        assert await stream.cancel_execution(execution_id, reason="restart-1") == "cancelling"
+
+        with pytest.raises(ExecutionAlreadyRunningError):
+            await stream.execute({}, session_state={"resume_session_id": execution_id})
+
+        with pytest.raises(ExecutionAlreadyRunningError):
+            await stream.execute({}, session_state={"resume_session_id": execution_id})
+
+        assert len(started_events) == 1
+        assert list(stream._execution_tasks) == [execution_id]
+        assert stream._execution_tasks[execution_id] is first_task
+        assert not first_task.done()
+    finally:
+        release.set()
+        await asyncio.wait_for(first_task, timeout=1)
@@ -173,11 +173,12 @@ async def handle_health(request: web.Request) -> web.Response:
    )


-async def handle_browser_status(request: web.Request) -> web.Response:
-    """GET /api/browser/status — proxy the GCU bridge status check server-side.
+async def _probe_browser_bridge() -> dict:
+    """Probe the local GCU bridge and return ``{bridge, connected}``.

-    Checks http://127.0.0.1:9230/status so the browser never makes a
-    cross-origin request that would log ERR_CONNECTION_REFUSED in the console.
+    Shared by the one-shot ``GET /api/browser/status`` handler and the
+    ``/api/browser/status/stream`` SSE feed so both see the same data
+    source.
    """
    import asyncio

@@ -190,17 +191,66 @@ async def handle_browser_status(request: web.Request) -> web.Response:
        await writer.drain()
        raw = await asyncio.wait_for(reader.read(512), timeout=0.5)
        writer.close()
-        # Parse JSON body after the blank line
        if b"\r\n\r\n" in raw:
            body = raw.split(b"\r\n\r\n", 1)[1]
-            import json
+            import json as _json

-            data = json.loads(body)
-            return web.json_response({"bridge": True, "connected": data.get("connected", False)})
+            data = _json.loads(body)
+            return {"bridge": True, "connected": bool(data.get("connected", False))}
    except Exception:
        pass
+    return {"bridge": False, "connected": False}

-    return web.json_response({"bridge": False, "connected": False})
+
+async def handle_browser_status(request: web.Request) -> web.Response:
+    """GET /api/browser/status — proxy the GCU bridge status check server-side.
+
+    Checks http://127.0.0.1:9230/status so the browser never makes a
+    cross-origin request that would log ERR_CONNECTION_REFUSED in the console.
+    """
+    return web.json_response(await _probe_browser_bridge())
+
+
+async def handle_browser_status_stream(request: web.Request) -> web.StreamResponse:
+    """GET /api/browser/status/stream — SSE feed of bridge status.
+
+    Emits a ``status`` event immediately, then again only when the
+    probe result changes. Polls the local bridge every 3s; that's the
+    same cadence the frontend used before, but we absorb it
+    server-side instead of the browser burning a request.
+    """
+    import asyncio
+    import json as _json
+
+    resp = web.StreamResponse(
+        status=200,
+        headers={
+            "Content-Type": "text/event-stream",
+            "Cache-Control": "no-cache, no-transform",
+            "Connection": "keep-alive",
+            "X-Accel-Buffering": "no",
+        },
+    )
+    await resp.prepare(request)
+
+    async def _send(event: str, data: dict) -> None:
+        payload = f"event: {event}\ndata: {_json.dumps(data)}\n\n"
+        await resp.write(payload.encode("utf-8"))
+
+    last: tuple | None = None
+    try:
+        while True:
+            status = await _probe_browser_bridge()
+            signature = (status["bridge"], status["connected"])
+            if signature != last:
+                await _send("status", status)
+                last = signature
+            await asyncio.sleep(3.0)
+    except (asyncio.CancelledError, ConnectionResetError):
+        raise
+    except Exception as exc:
+        logger.warning("browser status stream error: %s", exc, exc_info=True)
+    return resp


 def create_app(model: str | None = None) -> web.Application:
@@ -233,49 +283,22 @@ def create_app(model: str | None = None) -> web.Application:
            except Exception as exc:
                logger.warning("Could not auto-persist HIVE_CREDENTIAL_KEY: %s", exc)

-        credential_store = CredentialStore.with_aden_sync()
+        # Local server startup should not wait on an eager Aden sync.
+        # The store can still fetch/refresh credentials on demand.
+        credential_store = CredentialStore.with_aden_sync(auto_sync=False)
    except Exception:
        logger.debug("Encrypted credential store unavailable, using in-memory fallback")
        credential_store = CredentialStore.for_testing({})

    app["credential_store"] = credential_store

-    # Pre-load queen MCP tools once at startup (cached for all sessions)
-    # This avoids rebuilding the tool registry for every queen session
-    from framework.loader.mcp_registry import MCPRegistry
-    from framework.loader.tool_registry import ToolRegistry
-
-    _queen_tool_registry: ToolRegistry | None = None
-    try:
-        _queen_tool_registry = ToolRegistry()
-        import framework.agents.queen as _queen_pkg
-
-        queen_pkg_dir = Path(_queen_pkg.__file__).parent
-        mcp_config = queen_pkg_dir / "mcp_servers.json"
-        if mcp_config.exists():
-            _queen_tool_registry.load_mcp_config(mcp_config)
-            logger.info("Pre-loaded queen MCP tools from %s", mcp_config)
-
-        registry = MCPRegistry()
-        registry.initialize()
-        registry.ensure_defaults()
-        if (queen_pkg_dir / "mcp_registry.json").is_file():
-            _queen_tool_registry.set_mcp_registry_agent_path(queen_pkg_dir)
-        registry_configs, selection_max_tools = registry.load_agent_selection(queen_pkg_dir)
-        if registry_configs:
-            _queen_tool_registry.load_registry_servers(
-                registry_configs,
-                preserve_existing_tools=True,
-                log_collisions=True,
-                max_tools=selection_max_tools,
-            )
-        logger.info("Pre-loaded queen tool registry with %d tools", len(_queen_tool_registry.get_tools()))
-    except Exception as e:
-        logger.warning("Failed to pre-load queen tool registry: %s", e)
-
-    app["queen_tool_registry"] = _queen_tool_registry
+    # Let queen sessions build their registry lazily on first use instead of
+    # paying the MCP discovery cost during `hive open`.
+    app["queen_tool_registry"] = None
    app["manager"] = SessionManager(
-        model=model, credential_store=credential_store, queen_tool_registry=_queen_tool_registry
+        model=model,
+        credential_store=credential_store,
+        queen_tool_registry=None,
    )

    # Register shutdown hook
@@ -284,14 +307,17 @@ def create_app(model: str | None = None) -> web.Application:
    # Health check
    app.router.add_get("/api/health", handle_health)
    app.router.add_get("/api/browser/status", handle_browser_status)
+    app.router.add_get("/api/browser/status/stream", handle_browser_status_stream)

    # Register route modules
+    from framework.server.routes_colony_workers import register_routes as register_colony_worker_routes
    from framework.server.routes_config import register_routes as register_config_routes
    from framework.server.routes_credentials import register_routes as register_credential_routes
    from framework.server.routes_events import register_routes as register_event_routes
    from framework.server.routes_execution import register_routes as register_execution_routes
    from framework.server.routes_logs import register_routes as register_log_routes
    from framework.server.routes_messages import register_routes as register_message_routes
+    from framework.server.routes_prompts import register_routes as register_prompt_routes
    from framework.server.routes_queens import register_routes as register_queen_routes
    from framework.server.routes_sessions import register_routes as register_session_routes
    from framework.server.routes_workers import register_routes as register_worker_routes
@@ -305,6 +331,8 @@ def create_app(model: str | None = None) -> web.Application:
    register_worker_routes(app)
    register_log_routes(app)
    register_queen_routes(app)
+    register_colony_worker_routes(app)
+    register_prompt_routes(app)

    # Static file serving — Option C production mode
    # If frontend/dist/ exists, serve built frontend files on /
@@ -175,12 +175,10 @@ def _build_credentials_provider() -> Any:

            adapter = CredentialStoreAdapter.default()
            accounts = adapter.get_all_account_info()
-            tool_provider_map = adapter.get_tool_provider_map()
-            rendered = build_accounts_prompt(
-                accounts,
-                tool_provider_map=tool_provider_map,
-                node_tool_names=None,
-            )
+            # Compact form (no tool_provider_map) — tool schemas already
+            # surface function names; baking the full per-provider list
+            # into the system prompt on every turn was ~2 KB of redundancy.
+            rendered = build_accounts_prompt(accounts)
        except Exception:
            logger.debug("Failed to render ambient credentials block", exc_info=True)
            rendered = ""
@@ -231,7 +229,7 @@ async def materialize_queen_identity(

    phase_state.queen_id = queen_id
    phase_state.queen_profile = queen_profile
-    phase_state.queen_identity_prompt = format_queen_identity_prompt(queen_profile)
+    phase_state.queen_identity_prompt = format_queen_identity_prompt(queen_profile, max_examples=1)

    if event_bus is not None:
        await event_bus.publish(
@@ -268,38 +266,19 @@ async def create_queen(
        queen_loop_config as _base_loop_config,
    )
    from framework.agents.queen.nodes import (
-        _QUEEN_BUILDING_TOOLS,
-        _QUEEN_EDITING_TOOLS,
        _QUEEN_INDEPENDENT_TOOLS,
-        _QUEEN_PLANNING_TOOLS,
-        _QUEEN_RUNNING_TOOLS,
-        _QUEEN_STAGING_TOOLS,
-        _appendices,
-        _building_knowledge,
-        _planning_knowledge,
+        _QUEEN_REVIEWING_TOOLS,
+        _QUEEN_WORKING_TOOLS,
        _queen_behavior_always,
-        _queen_behavior_building,
-        _queen_behavior_editing,
        _queen_behavior_independent,
-        _queen_behavior_planning,
-        _queen_behavior_running,
-        _queen_behavior_staging,
        _queen_character_core,
-        _queen_identity_editing,
-        _queen_phase_7,
-        _queen_role_building,
        _queen_role_independent,
-        _queen_role_planning,
-        _queen_role_running,
-        _queen_role_staging,
+        _queen_role_reviewing,
+        _queen_role_working,
        _queen_style,
-        _queen_tools_building,
-        _queen_tools_editing,
        _queen_tools_independent,
-        _queen_tools_planning,
-        _queen_tools_running,
-        _queen_tools_staging,
-        _shared_building_knowledge,
+        _queen_tools_reviewing,
+        _queen_tools_working,
        finalize_queen_prompt,
    )
    from framework.host.event_bus import AgentEvent, EventType
@@ -348,7 +327,10 @@ async def create_queen(
            logger.warning("Queen: MCP registry config failed to load", exc_info=True)

    # ---- Phase state --------------------------------------------------
-    effective_phase = initial_phase or ("staging" if worker_identity else "planning")
+    # 3-phase model: caller supplies the phase directly (DM → independent,
+    # colony bootstrap → working). Fall back to independent when nothing
+    # is specified — there is no "staging"/"planning" bootstrap anymore.
+    effective_phase = initial_phase or ("working" if worker_identity else "independent")
    phase_state = QueenPhaseState(phase=effective_phase, event_bus=session.event_bus)
    session.phase_state = phase_state

@@ -360,28 +342,6 @@ async def create_queen(
    # when the user adds/removes an integration.
    phase_state.credentials_prompt_provider = _build_credentials_provider()

-    # ---- Track ask rounds during planning ----------------------------
-    # Increment planning_ask_rounds each time the queen requests user
-    # input (ask_user or ask_user_multiple) while in the planning phase.
-    async def _track_planning_asks(event: AgentEvent) -> None:
-        if phase_state.phase != "planning":
-            return
-        # Only count explicit ask_user / ask_user_multiple calls, not
-        # auto-block (text-only turns emit CLIENT_INPUT_REQUESTED with
-        # an empty prompt and no options/questions).
-        data = event.data or {}
-        has_prompt = bool(data.get("prompt"))
-        has_questions = bool(data.get("questions"))
-        has_options = bool(data.get("options"))
-        if has_prompt or has_questions or has_options:
-            phase_state.planning_ask_rounds += 1
-
-    session.event_bus.subscribe(
-        [EventType.CLIENT_INPUT_REQUESTED],
-        _track_planning_asks,
-        filter_stream="queen",
-    )
-
    # ---- Lifecycle tools (always registered) --------------------------
    register_queen_lifecycle_tools(
        queen_registry,
@@ -417,33 +377,19 @@ async def create_queen(
    session._queen_tool_executor = queen_tool_executor  # type: ignore[attr-defined]

    # ---- Partition tools by phase ------------------------------------
-    planning_names = set(_QUEEN_PLANNING_TOOLS)
-    building_names = set(_QUEEN_BUILDING_TOOLS)
-    staging_names = set(_QUEEN_STAGING_TOOLS)
-    running_names = set(_QUEEN_RUNNING_TOOLS)
-    editing_names = set(_QUEEN_EDITING_TOOLS)
    independent_names = set(_QUEEN_INDEPENDENT_TOOLS)
+    working_names = set(_QUEEN_WORKING_TOOLS)
+    reviewing_names = set(_QUEEN_REVIEWING_TOOLS)

    registered_names = {t.name for t in queen_tools}
-    missing_building = building_names - registered_names
-    if missing_building:
-        logger.warning(
-            "Queen: %d/%d building tools NOT registered: %s",
-            len(missing_building),
-            len(building_names),
-            sorted(missing_building),
-        )
    logger.info("Queen: registered tools: %s", sorted(registered_names))

-    phase_state.planning_tools = [t for t in queen_tools if t.name in planning_names]
-    phase_state.building_tools = [t for t in queen_tools if t.name in building_names]
-    phase_state.staging_tools = [t for t in queen_tools if t.name in staging_names]
-    phase_state.running_tools = [t for t in queen_tools if t.name in running_names]
-    phase_state.editing_tools = [t for t in queen_tools if t.name in editing_names]
+    phase_state.working_tools = [t for t in queen_tools if t.name in working_names]
+    phase_state.reviewing_tools = [t for t in queen_tools if t.name in reviewing_names]

    # Independent phase gets core tools + all MCP tools not claimed by any
    # other phase (coder-tools file I/O, gcu-tools browser, etc.).
-    all_phase_names = planning_names | building_names | staging_names | running_names | editing_names
+    all_phase_names = independent_names | working_names | reviewing_names
    mcp_tools = [t for t in queen_tools if t.name not in all_phase_names]
    phase_state.independent_tools = [t for t in queen_tools if t.name in independent_names] + mcp_tools
    logger.info(
@@ -466,81 +412,11 @@ async def create_queen(
    # ---- Compose phase-specific prompts ------------------------------
    from framework.agents.queen.nodes import queen_node as _orig_node

-    if worker_identity is None:
-        worker_identity = (
-            "\n\n# Worker Profile\n"
-            "No worker agent loaded. You are operating independently.\n"
-            "Design or build the agent to solve the user's problem "
-            "according to your current phase."
-        )
-
    # Resolve vision-only prompt sections based on the session's LLM.
    # session.llm is immutable for the session's lifetime, so this check
    # is stable — prompts never need to be recomposed mid-session.
    _has_vision = bool(session.llm and supports_image_tool_results(getattr(session.llm, "model", "")))

-    _planning_body = (
-        _queen_character_core
-        + _queen_role_planning
-        + _queen_style
-        + _shared_building_knowledge
-        + _queen_tools_planning
-        + _queen_behavior_always
-        + _queen_behavior_planning
-        + _planning_knowledge
-        + worker_identity
-    )
-    phase_state.prompt_planning = finalize_queen_prompt(_planning_body, _has_vision)
-
-    _building_body = (
-        _queen_character_core
-        + _queen_role_building
-        + _queen_style
-        + _shared_building_knowledge
-        + _queen_tools_building
-        + _queen_behavior_always
-        + _queen_behavior_building
-        + _building_knowledge
-        + _queen_phase_7
-        + _appendices
-        + worker_identity
-    )
-    phase_state.prompt_building = finalize_queen_prompt(_building_body, _has_vision)
-    phase_state.prompt_staging = finalize_queen_prompt(
-        (
-            _queen_character_core
-            + _queen_role_staging
-            + _queen_style
-            + _queen_tools_staging
-            + _queen_behavior_always
-            + _queen_behavior_staging
-            + worker_identity
-        ),
-        _has_vision,
-    )
-    phase_state.prompt_running = finalize_queen_prompt(
-        (
-            _queen_character_core
-            + _queen_role_running
-            + _queen_style
-            + _queen_tools_running
-            + _queen_behavior_always
-            + _queen_behavior_running
-            + worker_identity
-        ),
-        _has_vision,
-    )
-    phase_state.prompt_editing = finalize_queen_prompt(
-        (
-            _queen_identity_editing
-            + _queen_style
-            + _queen_tools_editing
-            + _queen_behavior_always
-            + _queen_behavior_editing
-            + worker_identity
-        ),
-        _has_vision,
-    )
    phase_state.prompt_independent = finalize_queen_prompt(
        (
            _queen_character_core
@@ -552,6 +428,20 @@ async def create_queen(
        ),
        _has_vision,
    )
+    phase_state.prompt_working = finalize_queen_prompt(
+        (_queen_character_core + _queen_role_working + _queen_style + _queen_tools_working + _queen_behavior_always),
+        _has_vision,
+    )
+    phase_state.prompt_reviewing = finalize_queen_prompt(
+        (
+            _queen_character_core
+            + _queen_role_reviewing
+            + _queen_style
+            + _queen_tools_reviewing
+            + _queen_behavior_always
+        ),
+        _has_vision,
+    )

    # ---- Default skill protocols -------------------------------------
    _queen_skill_dirs: list[str] = []
@@ -565,6 +455,10 @@ async def create_queen(
        _queen_skills_mgr.load()
        phase_state.protocols_prompt = _queen_skills_mgr.protocols_prompt
        phase_state.skills_catalog_prompt = _queen_skills_mgr.skills_catalog_prompt
+        # Also store the manager so get_current_prompt() can render a
+        # phase-filtered catalog on each turn (skills with a `visibility`
+        # frontmatter that excludes the current phase are dropped).
+        phase_state.skills_manager = _queen_skills_mgr
        _queen_skill_dirs = _queen_skills_mgr.allowlisted_dirs
    except Exception:
        logger.debug("Queen skill loading failed (non-fatal)", exc_info=True)
@@ -632,7 +526,7 @@ async def create_queen(
        except FileNotFoundError:
            logger.warning("Queen profile %s not found after selection", queen_id)
            return None
-        identity_prompt = format_queen_identity_prompt(profile)
+        identity_prompt = format_queen_identity_prompt(profile, max_examples=1)
        # Store on phase_state so identity persists across dynamic prompt refreshes
        phase_state.queen_id = queen_id
        phase_state.queen_profile = profile
@@ -822,44 +716,71 @@ async def create_queen(

            phase_state.inject_notification = _inject_phase_notification

-            async def _on_worker_done(event):
+            async def _on_worker_report(event):
+                """Inject [WORKER_REPORT] into queen as each worker finishes.
+
+                Subscribes to SUBAGENT_REPORT events which carry the worker's
+                real summary/data (preferring any explicit ``report_to_parent``
+                call). Every spawned worker emits exactly one — success,
+                partial, failed, timeout, or stopped. The queen sees the
+                report as the next user turn and can react (reply to user,
+                kick off follow-up work, etc.) without being blocked by the
+                spawn call itself.
+                """
                if event.stream_id == "queen":
                    return
-                if phase_state.phase == "running":
-                    if event.type == EventType.EXECUTION_COMPLETED:
-                        session.worker_configured = True
-                        output = event.data.get("output", {})
-                        output_summary = ""
-                        if output:
-                            for key, value in output.items():
-                                val_str = str(value)
-                                if len(val_str) > 200:
-                                    val_str = val_str[:200] + "..."
-                                output_summary += f"\n  {key}: {val_str}"
-                        _out = output_summary or " (no output keys set)"
-                        notification = (
-                            "[WORKER_TERMINAL] Worker finished successfully.\n"
-                            f"Output:{_out}\n"
-                            "Report this to the user. "
-                            "Ask if they want to re-run with different input "
-                            "or tweak the configuration."
-                        )
-                    else:
-                        error = event.data.get("error", "Unknown error")
-                        notification = (
-                            "[WORKER_TERMINAL] Worker failed.\n"
-                            f"Error: {error}\n"
-                            "Report this to the user and help them troubleshoot. "
-                            "You can re-run with different input or escalate to "
-                            "building/planning if code changes are needed."
-                        )
+                data = event.data or {}
+                worker_id = data.get("worker_id", event.node_id or "unknown")
+                status = data.get("status", "unknown")
+                summary = data.get("summary") or "(no summary)"
+                err = data.get("error")
+                payload_data = data.get("data") or {}
+                duration = data.get("duration_seconds")

-                    await agent_loop.inject_event(notification)
-                    await phase_state.switch_to_editing(source="auto")
+                lines = ["[WORKER_REPORT]", f"worker_id: {worker_id}", f"status: {status}"]
+                if duration is not None:
+                    try:
+                        lines.append(f"duration: {float(duration):.1f}s")
+                    except (TypeError, ValueError):
+                        pass
+                lines.append(f"summary: {summary}")
+                if err:
+                    lines.append(f"error: {err}")
+                if payload_data:
+                    # Compact JSON so the queen sees all keys without the
+                    # indentation blowing up the turn's token count.
+                    try:
+                        import json as _json
+
+                        lines.append("data: " + _json.dumps(payload_data, ensure_ascii=False, default=str))
+                    except Exception:
+                        lines.append(f"data: {payload_data!r}")
+                notification = "\n".join(lines)
+
+                await agent_loop.inject_event(notification)
+                session.worker_configured = True
+
+                # Only transition to reviewing once the batch has quieted —
+                # if other workers from a parallel spawn are still live, stay
+                # in working so the queen's tool access (run_parallel_workers,
+                # inject_message, stop_worker) remains available.
+                colony_runtime = getattr(session, "colony_runtime", None)
+                still_active = 0
+                if colony_runtime is not None:
+                    try:
+                        still_active = sum(
+                            1
+                            for w in colony_runtime._workers.values()  # type: ignore[attr-defined]
+                            if getattr(w, "is_active", False)
+                        )
+                    except Exception:
+                        still_active = 0
+                if still_active == 0 and phase_state.phase in ("working", "running"):
+                    await phase_state.switch_to_reviewing(source="auto")

            session.event_bus.subscribe(
-                event_types=[EventType.EXECUTION_COMPLETED, EventType.EXECUTION_FAILED],
-                handler=_on_worker_done,
+                event_types=[EventType.SUBAGENT_REPORT],
+                handler=_on_worker_report,
            )

            # ---- Colony-scoped worker escalation routing ----
@@ -0,0 +1,708 @@
+"""Colony worker inspection routes.
+
+These expose per-spawned-worker data (identified by worker_id) so the
+frontend can render a colony-workers sidebar analogous to the queen
+profile panel. Distinct from ``routes_workers.py``, which deals with
+*graph nodes* inside a worker definition rather than live worker
+instances.
+
+Session-scoped (bound to a live session's runtime):
+- GET /api/sessions/{session_id}/workers            — live + completed workers
+- GET /api/sessions/{session_id}/colony/skills      — colony's shared skills catalog
+- GET /api/sessions/{session_id}/colony/tools       — colony's default tools
+
+Colony-scoped (bound to the on-disk colony directory, independent of any
+live session — one colony has exactly one progress.db):
+- GET /api/colonies/{colony_name}/progress/snapshot — progress.db tasks/steps snapshot
+- GET /api/colonies/{colony_name}/progress/stream   — SSE feed of upserts (polled)
+- GET /api/colonies/{colony_name}/data/tables       — list user tables in progress.db
+- GET /api/colonies/{colony_name}/data/tables/{table}/rows — paginated rows
+- PATCH /api/colonies/{colony_name}/data/tables/{table}/rows — edit a row
+"""
+
+import asyncio
+import json
+import logging
+import re
+import sqlite3
+from pathlib import Path
+
+from aiohttp import web
+
+from framework.server.app import resolve_session
+
+# Same validation used by create_colony — keep them in sync. Blocks path
+# traversal (``..``) and shell-special chars; the endpoint would 400 on
+# anything else anyway, but validating early avoids a disk hit.
+_COLONY_NAME_RE = re.compile(r"^[a-z0-9_]+$")
+
+logger = logging.getLogger(__name__)
+
+# Poll interval for the progress SSE stream. Progress rows flip on the
+# order of seconds as workers finish LLM turns, so 1s feels live without
+# hammering the DB.
+_PROGRESS_POLL_INTERVAL = 1.0
+
+
+def _worker_info_to_dict(info) -> dict:
+    """Serialize a WorkerInfo dataclass to a JSON-friendly dict."""
+    result_dict = None
+    if info.result is not None:
+        r = info.result
+        result_dict = {
+            "status": r.status,
+            "summary": r.summary,
+            "error": r.error,
+            "tokens_used": r.tokens_used,
+            "duration_seconds": r.duration_seconds,
+        }
+    return {
+        "worker_id": info.id,
+        "task": info.task,
+        "status": str(info.status),
+        "started_at": info.started_at,
+        "result": result_dict,
+    }
+
+
+async def handle_list_workers(request: web.Request) -> web.Response:
+    """GET /api/sessions/{session_id}/workers -- list workers in a session's colony.
+
+    Returns two populations merged:
+      1. In-memory workers from the session's unified ColonyRuntime
+         (``session.colony._workers``). Includes live + just-finished
+         entries since ``_workers`` isn't pruned on termination.
+      2. Historical worker directories on disk under
+         ``<session_dir>/workers/`` that are not in memory. Populated
+         from dir name / first user message / dir mtime. These appear
+         as ``status="historical"`` so the frontend can style them
+         distinctly from actives.
+
+    Falls back to the legacy ``session.colony_runtime`` for the
+    in-memory half when ``session.colony`` isn't set.
+    """
+    session, err = resolve_session(request)
+    if err:
+        return err
+
+    runtime = getattr(session, "colony", None) or getattr(session, "colony_runtime", None)
+
+    workers: list[dict] = []
+    known_ids: set[str] = set()
+    storage_path: Path | None = None
+    if runtime is not None:
+        for info in runtime.list_workers():
+            workers.append(_worker_info_to_dict(info))
+            known_ids.add(info.id)
+        raw_storage = getattr(runtime, "_storage_path", None)
+        if raw_storage is not None:
+            storage_path = Path(raw_storage)
+
+    # Fall back to the session's directory if the runtime didn't expose one.
+    if storage_path is None:
+        session_dir = getattr(session, "queen_dir", None) or getattr(session, "session_dir", None)
+        if session_dir is not None:
+            storage_path = Path(session_dir)
+
+    if storage_path is not None:
+        workers.extend(await asyncio.to_thread(_walk_historical_workers, storage_path, known_ids))
+
+    return web.json_response({"workers": workers})
+
+
+def _walk_historical_workers(storage_path: Path, known_ids: set[str]) -> list[dict]:
+    """Scan ``<storage_path>/workers/`` for worker session dirs not already
+    in memory and return minimal ``WorkerSummary``-shaped entries.
+
+    We don't persist a standalone status file per worker, so the on-disk
+    entries get ``status="historical"`` and ``result=None``. The task is
+    reconstructed from the first non-boilerplate user message in the
+    worker's conversation parts.
+    """
+    workers_dir = storage_path / "workers"
+    if not workers_dir.exists() or not workers_dir.is_dir():
+        return []
+
+    out: list[dict] = []
+    try:
+        entries = list(workers_dir.iterdir())
+    except OSError:
+        return []
+
+    # Newest dir first so recent runs surface first in the tab.
+    entries.sort(key=lambda p: _safe_mtime(p), reverse=True)
+
+    for entry in entries:
+        if not entry.is_dir():
+            continue
+        wid = entry.name
+        if wid in known_ids:
+            continue
+        out.append(
+            {
+                "worker_id": wid,
+                "task": _extract_historical_task(entry),
+                "status": "historical",
+                "started_at": _safe_mtime(entry),
+                "result": None,
+            }
+        )
+    return out
+
+
+def _safe_mtime(path: Path) -> float:
+    try:
+        return path.stat().st_mtime
+    except OSError:
+        return 0.0
+
+
+def _extract_historical_task(worker_dir: Path) -> str:
+    """Pull the worker's initial task from its conversation parts.
+
+    seq 0 is a boilerplate "Hello" greeting in most flows; the real
+    task lands in an early user message (typically seq 1 or 2). Scan
+    the first few parts and return the first ``role="user"`` content
+    that isn't the greeting. Bounded at 5 parts to stay cheap on
+    directory listings containing hundreds of workers.
+    """
+    parts_dir = worker_dir / "conversations" / "parts"
+    if not parts_dir.exists():
+        return ""
+    try:
+        for i in range(5):
+            p = parts_dir / f"{i:010d}.json"
+            if not p.exists():
+                break
+            data = json.loads(p.read_text(encoding="utf-8"))
+            if data.get("role") != "user":
+                continue
+            content = data.get("content", "")
+            if not isinstance(content, str):
+                continue
+            text = content.strip()
+            if not text or text.lower() == "hello":
+                continue
+            return text[:400]
+    except Exception:
+        return ""
+    return ""
+
+
+# ── Skills & tools ─────────────────────────────────────────────────
+
+
+def _parsed_skill_to_dict(skill) -> dict:
+    """Serialize a ParsedSkill for the frontend."""
+    return {
+        "name": skill.name,
+        "description": skill.description,
+        "location": skill.location,
+        "base_dir": skill.base_dir,
+        "source_scope": skill.source_scope,
+    }
+
+
+async def handle_list_colony_skills(request: web.Request) -> web.Response:
+    """GET /api/sessions/{session_id}/colony/skills -- list skills the colony sees."""
+    session, err = resolve_session(request)
+    if err:
+        return err
+
+    runtime = session.colony_runtime
+    if runtime is None:
+        return web.json_response({"skills": []})
+
+    # Reach into the skills manager's catalog. There is no public
+    # iterator yet; we touch the private dict directly and defensively
+    # tolerate either shape (bare SkillsManager, or the
+    # from_precomputed variant which has no catalog).
+    catalog = getattr(runtime._skills_manager, "_catalog", None)
+    skills_dict = getattr(catalog, "_skills", None) if catalog is not None else None
+    if not isinstance(skills_dict, dict):
+        return web.json_response({"skills": []})
+
+    skills = [_parsed_skill_to_dict(s) for s in skills_dict.values()]
+    skills.sort(key=lambda s: s["name"])
+    return web.json_response({"skills": skills})
+
+
+# Tools that ship with the framework and have no credential provider,
+# but still deserve their own logical group. Surfaced to the frontend
+# as ``provider="system"`` so the UI treats them exactly like a
+# credential-backed group.
+_SYSTEM_TOOLS: frozenset[str] = frozenset(
+    {
+        "get_account_info",
+        "get_current_time",
+        "bash_kill",
+        "bash_output",
+        "execute_command_tool",
+        "example_tool",
+    }
+)
+
+
+def _tool_to_dict(tool, provider_map: dict[str, str] | None) -> dict:
+    """Serialize a Tool dataclass for the frontend.
+
+    ``provider_map`` is the colony runtime's tool_name → credential
+    provider map (built by the CredentialResolver pipeline stage from
+    ``CredentialStoreAdapter.get_tool_provider_map()``). Credential-
+    backed tools get a canonical provider key (e.g. ``"hubspot"``,
+    ``"gmail"``); framework / core tools return ``None``, except for
+    the hand-picked entries in ``_SYSTEM_TOOLS`` which are tagged
+    ``"system"``.
+    """
+    name = getattr(tool, "name", "")
+    provider = (provider_map or {}).get(name)
+    if provider is None and name in _SYSTEM_TOOLS:
+        provider = "system"
+    return {
+        "name": name,
+        "description": getattr(tool, "description", ""),
+        "provider": provider,
+    }
+
+
+async def handle_list_colony_tools(request: web.Request) -> web.Response:
+    """GET /api/sessions/{session_id}/colony/tools -- list the colony's default tools."""
+    session, err = resolve_session(request)
+    if err:
+        return err
+
+    runtime = session.colony_runtime
+    if runtime is None:
+        return web.json_response({"tools": []})
+
+    provider_map = getattr(runtime, "_tool_provider_map", None)
+    tools = [_tool_to_dict(t, provider_map) for t in (runtime._tools or [])]
+    tools.sort(key=lambda t: t["name"])
+    return web.json_response({"tools": tools})
+
+
+# ── Progress DB (tasks/steps) ──────────────────────────────────────
+
+
+def _resolve_progress_db_by_name(colony_name: str) -> Path | None:
+    """Resolve a colony's progress.db path by directory name.
+
+    Returns ``None`` when the name fails validation or the file does not
+    exist. Both conditions render as an empty Data tab in the UI rather
+    than a hard error so an operator can open the panel before any
+    workers have actually run.
+    """
+    if not _COLONY_NAME_RE.match(colony_name):
+        return None
+    db_path = Path.home() / ".hive" / "colonies" / colony_name / "data" / "progress.db"
+    return db_path if db_path.exists() else None
+
+
+def _read_progress_snapshot(db_path: Path, worker_id: str | None) -> dict:
+    """Read tasks + steps from progress.db, optionally filtered by worker_id.
+
+    The worker_id filter applies to tasks (claimed by that worker) and
+    to steps (executed by that worker). If omitted, returns all rows.
+    """
+    con = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True, timeout=5.0)
+    try:
+        con.row_factory = sqlite3.Row
+        if worker_id:
+            task_rows = con.execute(
+                "SELECT * FROM tasks WHERE worker_id = ? ORDER BY updated_at DESC",
+                (worker_id,),
+            ).fetchall()
+            step_rows = con.execute(
+                "SELECT * FROM steps WHERE worker_id = ? ORDER BY task_id, seq",
+                (worker_id,),
+            ).fetchall()
+        else:
+            task_rows = con.execute("SELECT * FROM tasks ORDER BY updated_at DESC LIMIT 500").fetchall()
+            step_rows = con.execute("SELECT * FROM steps ORDER BY task_id, seq LIMIT 2000").fetchall()
+        return {
+            "tasks": [dict(r) for r in task_rows],
+            "steps": [dict(r) for r in step_rows],
+        }
+    finally:
+        con.close()
+
+
+async def handle_progress_snapshot(request: web.Request) -> web.Response:
+    """GET /api/colonies/{colony_name}/progress/snapshot
+
+    Optional ?worker_id=... to filter to rows touched by a specific worker.
+    """
+    colony_name = request.match_info["colony_name"]
+    db_path = _resolve_progress_db_by_name(colony_name)
+    if db_path is None:
+        return web.json_response({"tasks": [], "steps": []})
+
+    worker_id = request.query.get("worker_id") or None
+    snapshot = await asyncio.to_thread(_read_progress_snapshot, db_path, worker_id)
+    return web.json_response(snapshot)
+
+
+def _read_progress_upserts(
+    db_path: Path,
+    worker_id: str | None,
+    since: str | None,
+) -> tuple[list[dict], list[dict], str | None]:
+    """Return task/step rows with ``updated_at`` (tasks) or a derived
+    timestamp (steps) newer than ``since``, plus the new high-water mark.
+
+    Steps don't carry an ``updated_at`` column — we use
+    ``COALESCE(completed_at, started_at)`` as the change witness. A step
+    without either timestamp hasn't changed since the last poll and is
+    skipped.
+
+    ``since`` is an ISO8601 string (as produced by progress_db._now_iso).
+    ``None`` means "give me everything" — used for the SSE priming frame.
+    """
+    con = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True, timeout=5.0)
+    try:
+        con.row_factory = sqlite3.Row
+        task_sql = "SELECT * FROM tasks"
+        step_sql = (
+            "SELECT *, COALESCE(completed_at, started_at) AS _ts "
+            "FROM steps WHERE COALESCE(completed_at, started_at) IS NOT NULL"
+        )
+        task_args: list = []
+        step_args: list = []
+        if since is not None:
+            task_sql += " WHERE updated_at > ?"
+            step_sql += " AND COALESCE(completed_at, started_at) > ?"
+            task_args.append(since)
+            step_args.append(since)
+        if worker_id:
+            joiner_t = " AND " if since is not None else " WHERE "
+            task_sql += joiner_t + "worker_id = ?"
+            step_sql += " AND worker_id = ?"
+            task_args.append(worker_id)
+            step_args.append(worker_id)
+        task_sql += " ORDER BY updated_at"
+        step_sql += " ORDER BY _ts"
+
+        task_rows = con.execute(task_sql, task_args).fetchall()
+        step_rows = con.execute(step_sql, step_args).fetchall()
+
+        tasks = [dict(r) for r in task_rows]
+        steps = [dict(r) for r in step_rows]
+        # High-water mark = max timestamp across both sets. Fall back to
+        # the previous ``since`` when nothing changed.
+        ts_values = [t["updated_at"] for t in tasks]
+        ts_values.extend(s["_ts"] for s in steps if s.get("_ts"))
+        new_since = max(ts_values) if ts_values else since
+        return tasks, steps, new_since
+    finally:
+        con.close()
+
+
+async def handle_progress_stream(request: web.Request) -> web.StreamResponse:
+    """GET /api/colonies/{colony_name}/progress/stream
+
+    SSE feed that emits ``snapshot`` once (current state) followed by
+    ``upsert`` events whenever a task/step row changes. Polls the DB
+    every ``_PROGRESS_POLL_INTERVAL`` seconds — the sqlite3 CLI path
+    workers use for writes doesn't fire SQLite's update hook on our
+    connection, so polling is the robust option.
+    """
+    colony_name = request.match_info["colony_name"]
+    worker_id = request.query.get("worker_id") or None
+
+    resp = web.StreamResponse(
+        status=200,
+        headers={
+            "Content-Type": "text/event-stream",
+            "Cache-Control": "no-cache, no-transform",
+            "Connection": "keep-alive",
+            "X-Accel-Buffering": "no",
+        },
+    )
+    await resp.prepare(request)
+
+    async def _send(event: str, data: dict) -> None:
+        payload = f"event: {event}\ndata: {json.dumps(data)}\n\n"
+        await resp.write(payload.encode("utf-8"))
+
+    db_path = _resolve_progress_db_by_name(colony_name)
+    if db_path is None:
+        await _send("snapshot", {"tasks": [], "steps": []})
+        await _send("end", {"reason": "no_progress_db"})
+        return resp
+
+    try:
+        snapshot = await asyncio.to_thread(_read_progress_snapshot, db_path, worker_id)
+        await _send("snapshot", snapshot)
+
+        since: str | None = None
+        # Initialize the high-water mark from the snapshot so we don't
+        # re-emit every row as "new" on the first poll.
+        ts_values: list[str] = [t.get("updated_at") for t in snapshot["tasks"] if t.get("updated_at")]
+        ts_values.extend(
+            s.get("completed_at") or s.get("started_at")
+            for s in snapshot["steps"]
+            if s.get("completed_at") or s.get("started_at")
+        )
+        if ts_values:
+            since = max(v for v in ts_values if v)
+
+        # The loop relies on client disconnect surfacing as
+        # ConnectionResetError from ``_send`` — no explicit alive check
+        # required.
+        while True:
+            await asyncio.sleep(_PROGRESS_POLL_INTERVAL)
+            tasks, steps, new_since = await asyncio.to_thread(_read_progress_upserts, db_path, worker_id, since)
+            if tasks or steps:
+                await _send("upsert", {"tasks": tasks, "steps": steps})
+                since = new_since
+    except (asyncio.CancelledError, ConnectionResetError):
+        # Client disconnected; clean exit.
+        raise
+    except Exception as exc:
+        logger.warning("progress stream error: %s", exc, exc_info=True)
+        try:
+            await _send("error", {"message": str(exc)})
+        except Exception:
+            pass
+    return resp
+
+
+# ── Raw data grid (airtable-style view/edit of progress.db tables) ─────
+#
+# The Data tab lets the operator inspect and hand-edit SQLite rows.
+# Identifier-quoting note: SQLite params can only bind values, never
+# identifiers, so we have to interpolate table/column names into SQL.
+# Every name is *validated against sqlite_master / PRAGMA table_info*
+# before use and then wrapped with ``_q()`` which escapes embedded
+# quotes. Do NOT accept raw names from the request without running them
+# through ``_validate_ident`` first.
+
+
+def _q(ident: str) -> str:
+    """Quote a SQLite identifier (table or column) safely."""
+    return '"' + ident.replace('"', '""') + '"'
+
+
+def _list_user_tables(con: sqlite3.Connection) -> list[str]:
+    return [
+        r["name"]
+        for r in con.execute(
+            "SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%' ORDER BY name"
+        )
+    ]
+
+
+def _table_columns(con: sqlite3.Connection, table: str) -> list[dict]:
+    """Return PRAGMA table_info rows as dicts. Empty list if no such table."""
+    return [
+        {
+            "name": r["name"],
+            "type": r["type"] or "",
+            "notnull": bool(r["notnull"]),
+            # pk>0 means the column is part of the primary key (ordinal);
+            # 0 means non-PK.
+            "pk": int(r["pk"]),
+            "dflt_value": r["dflt_value"],
+        }
+        for r in con.execute(f"PRAGMA table_info({_q(table)})")
+    ]
+
+
+def _read_tables_overview(db_path: Path) -> list[dict]:
+    """List user tables with columns + row counts."""
+    con = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True, timeout=5.0)
+    try:
+        con.row_factory = sqlite3.Row
+        out: list[dict] = []
+        for name in _list_user_tables(con):
+            cols = _table_columns(con, name)
+            count_row = con.execute(f"SELECT COUNT(*) AS c FROM {_q(name)}").fetchone()
+            out.append(
+                {
+                    "name": name,
+                    "columns": cols,
+                    "row_count": int(count_row["c"]),
+                    "primary_key": [c["name"] for c in cols if c["pk"] > 0],
+                }
+            )
+        return out
+    finally:
+        con.close()
+
+
+def _validate_ident(name: str, known: set[str]) -> str | None:
+    """Return ``name`` if present in ``known``, else ``None``."""
+    return name if name in known else None
+
+
+def _read_table_rows(
+    db_path: Path,
+    table: str,
+    limit: int,
+    offset: int,
+    order_by: str | None,
+    order_dir: str,
+) -> dict:
+    con = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True, timeout=5.0)
+    try:
+        con.row_factory = sqlite3.Row
+        tables = set(_list_user_tables(con))
+        if _validate_ident(table, tables) is None:
+            return {"error": f"unknown table: {table}"}
+        cols = _table_columns(con, table)
+        col_names = {c["name"] for c in cols}
+
+        sql = f"SELECT * FROM {_q(table)}"
+        if order_by and order_by in col_names:
+            direction = "DESC" if order_dir.lower() == "desc" else "ASC"
+            sql += f" ORDER BY {_q(order_by)} {direction}"
+        sql += " LIMIT ? OFFSET ?"
+        rows = con.execute(sql, (int(limit), int(offset))).fetchall()
+        total = con.execute(f"SELECT COUNT(*) AS c FROM {_q(table)}").fetchone()["c"]
+        return {
+            "table": table,
+            "columns": cols,
+            "primary_key": [c["name"] for c in cols if c["pk"] > 0],
+            "rows": [dict(r) for r in rows],
+            "total": int(total),
+            "limit": int(limit),
+            "offset": int(offset),
+        }
+    finally:
+        con.close()
+
+
+def _update_table_row(
+    db_path: Path,
+    table: str,
+    pk: dict,
+    updates: dict,
+) -> dict:
+    """Apply ``updates`` (column->value) to the row matching ``pk``.
+
+    Returns ``{"updated": n}`` with the number of rows affected (0 or 1),
+    or ``{"error": ...}`` on validation failure.
+    """
+    if not updates:
+        return {"error": "no updates provided"}
+    con = sqlite3.connect(db_path, timeout=5.0)
+    try:
+        con.row_factory = sqlite3.Row
+        tables = set(_list_user_tables(con))
+        if _validate_ident(table, tables) is None:
+            return {"error": f"unknown table: {table}"}
+        cols = _table_columns(con, table)
+        col_names = {c["name"] for c in cols}
+        pk_cols = [c["name"] for c in cols if c["pk"] > 0]
+        if not pk_cols:
+            return {"error": f"table {table!r} has no primary key; cannot edit by row"}
+
+        # Validate pk has every pk column and all values are scalars.
+        missing = [p for p in pk_cols if p not in pk]
+        if missing:
+            return {"error": f"missing primary key columns: {missing}"}
+
+        # Validate update columns exist and aren't part of the primary key
+        # (changing a PK column would silently break joins/foreign refs).
+        bad = [c for c in updates if c not in col_names]
+        if bad:
+            return {"error": f"unknown columns: {bad}"}
+        pk_update = [c for c in updates if c in pk_cols]
+        if pk_update:
+            return {"error": f"cannot edit primary key columns: {pk_update}"}
+
+        set_sql = ", ".join(f"{_q(c)} = ?" for c in updates)
+        where_sql = " AND ".join(f"{_q(c)} = ?" for c in pk_cols)
+        sql = f"UPDATE {_q(table)} SET {set_sql} WHERE {where_sql}"
+        params = list(updates.values()) + [pk[c] for c in pk_cols]
+        cur = con.execute(sql, params)
+        con.commit()
+        return {"updated": cur.rowcount}
+    finally:
+        con.close()
+
+
+async def handle_list_tables(request: web.Request) -> web.Response:
+    """GET /api/colonies/{colony_name}/data/tables"""
+    colony_name = request.match_info["colony_name"]
+    db_path = _resolve_progress_db_by_name(colony_name)
+    if db_path is None:
+        return web.json_response({"tables": []})
+    tables = await asyncio.to_thread(_read_tables_overview, db_path)
+    return web.json_response({"tables": tables})
+
+
+async def handle_table_rows(request: web.Request) -> web.Response:
+    """GET /api/colonies/{colony_name}/data/tables/{table}/rows"""
+    colony_name = request.match_info["colony_name"]
+    db_path = _resolve_progress_db_by_name(colony_name)
+    if db_path is None:
+        return web.json_response({"error": "no progress.db"}, status=404)
+
+    table = request.match_info["table"]
+    # Clamp limit: 500 is enough for the grid's virtualization window;
+    # a larger cap would make accidental full-table loads cheap.
+    try:
+        limit = max(1, min(500, int(request.query.get("limit", "100"))))
+        offset = max(0, int(request.query.get("offset", "0")))
+    except ValueError:
+        return web.json_response({"error": "invalid limit/offset"}, status=400)
+    order_by = request.query.get("order_by") or None
+    order_dir = request.query.get("order_dir", "asc")
+
+    result = await asyncio.to_thread(_read_table_rows, db_path, table, limit, offset, order_by, order_dir)
+    if "error" in result:
+        return web.json_response(result, status=400)
+    return web.json_response(result)
+
+
+async def handle_update_row(request: web.Request) -> web.Response:
+    """PATCH /api/colonies/{colony_name}/data/tables/{table}/rows
+
+    Body: ``{"pk": {col: value, ...}, "updates": {col: value, ...}}``.
+    """
+    colony_name = request.match_info["colony_name"]
+    db_path = _resolve_progress_db_by_name(colony_name)
+    if db_path is None:
+        return web.json_response({"error": "no progress.db"}, status=404)
+
+    try:
+        body = await request.json()
+    except Exception:
+        return web.json_response({"error": "invalid JSON body"}, status=400)
+    pk = body.get("pk") or {}
+    updates = body.get("updates") or {}
+    if not isinstance(pk, dict) or not isinstance(updates, dict):
+        return web.json_response({"error": "pk and updates must be objects"}, status=400)
+
+    table = request.match_info["table"]
+    result = await asyncio.to_thread(_update_table_row, db_path, table, pk, updates)
+    if "error" in result:
+        return web.json_response(result, status=400)
+    return web.json_response(result)
+
+
+def register_routes(app: web.Application) -> None:
+    """Register colony worker routes."""
+    # Session-scoped — these read live runtime state from a session.
+    app.router.add_get("/api/sessions/{session_id}/workers", handle_list_workers)
+    app.router.add_get("/api/sessions/{session_id}/colony/skills", handle_list_colony_skills)
+    app.router.add_get("/api/sessions/{session_id}/colony/tools", handle_list_colony_tools)
+    # Colony-scoped — one progress.db per colony, no session indirection.
+    app.router.add_get(
+        "/api/colonies/{colony_name}/progress/snapshot",
+        handle_progress_snapshot,
+    )
+    app.router.add_get(
+        "/api/colonies/{colony_name}/progress/stream",
+        handle_progress_stream,
+    )
+    app.router.add_get("/api/colonies/{colony_name}/data/tables", handle_list_tables)
+    app.router.add_get(
+        "/api/colonies/{colony_name}/data/tables/{table}/rows",
+        handle_table_rows,
+    )
+    app.router.add_patch(
+        "/api/colonies/{colony_name}/data/tables/{table}/rows",
+        handle_update_row,
+    )
@@ -6,6 +6,7 @@ Routes:
 - GET  /api/config/models        — curated provider→models list
 """

+import asyncio
 import json
 import logging
 import os
@@ -301,6 +302,53 @@ def _hot_swap_sessions(request: web.Request, full_model: str, api_key: str | Non
    return swapped


+async def _validate_provider_key(
+    provider: str,
+    api_key: str,
+    api_base: str | None = None,
+    model: str | None = None,
+) -> dict:
+    """Validate an API key against the provider. Returns {"valid": bool, "message": str}.
+
+    Runs the check in a thread pool to avoid blocking the event loop.
+    """
+    from scripts.check_llm_key import (
+        PROVIDERS as CHECK_PROVIDERS,
+        check_anthropic_compatible,
+        check_minimax,
+        check_openai_compatible,
+        check_openrouter,
+        check_openrouter_model,
+    )
+
+    def _check() -> dict:
+        pid = provider.lower()
+        try:
+            # Subscription providers with custom api_base
+            if pid == "openrouter" and model:
+                return check_openrouter_model(api_key, model=model, api_base=api_base or "https://openrouter.ai/api/v1")
+            if api_base and pid == "minimax":
+                return check_minimax(api_key, api_base)
+            if api_base and pid == "openrouter":
+                return check_openrouter(api_key, api_base)
+            if api_base and pid == "kimi":
+                return check_anthropic_compatible(api_key, api_base.rstrip("/") + "/v1/messages", "Kimi")
+            if api_base and pid == "hive":
+                return check_anthropic_compatible(api_key, api_base.rstrip("/") + "/v1/messages", "Hive")
+            if api_base:
+                endpoint = api_base.rstrip("/") + "/models"
+                name = {"zai": "ZAI"}.get(pid, "Custom provider")
+                return check_openai_compatible(api_key, endpoint, name)
+            if pid in CHECK_PROVIDERS:
+                return CHECK_PROVIDERS[pid](api_key)
+            # No check available — assume valid
+            return {"valid": True, "message": f"No health check for {pid}"}
+        except Exception as exc:
+            return {"valid": None, "message": f"Validation error: {exc}"}
+
+    return await asyncio.get_event_loop().run_in_executor(None, _check)
+
+
 # ------------------------------------------------------------------
 # Handlers
 # ------------------------------------------------------------------
@@ -324,9 +372,9 @@ async def handle_get_llm_config(request: web.Request) -> web.Response:
        if _resolve_api_key(pid, request) is not None:
            connected.append(pid)

-    # Subscription detection
+    # Subscription detection — only include subscriptions whose tokens exist
    active_subscription = _get_active_subscription(llm)
-    detected_subscriptions = _detect_subscriptions()
+    detected_subscriptions = [sid for sid in _detect_subscriptions() if _get_subscription_token(sid)]

    return web.json_response(
        {
@@ -369,6 +417,21 @@ async def handle_update_llm_config(request: web.Request) -> web.Response:
        provider = sub["provider"]
        api_base = sub.get("api_base")

+        # Validate the subscription token before committing
+        token = _get_subscription_token(subscription_id)
+        if not token:
+            return web.json_response(
+                {"error": f"No credential found for {sub['name']}. Please check your subscription or API key."},
+                status=400,
+            )
+
+        check = await _validate_provider_key(provider, token, api_base=api_base)
+        if check.get("valid") is False:
+            return web.json_response(
+                {"error": f"{sub['name']} key validation failed: {check.get('message', 'unknown error')}"},
+                status=400,
+            )
+
        # Look up token limits from preset
        max_tokens: int | None = None
        max_context_tokens: int | None = None
@@ -399,8 +462,7 @@ async def handle_update_llm_config(request: web.Request) -> web.Response:

        _write_config_atomic(config)

-        # Hot-swap with subscription token
-        token = _get_subscription_token(subscription_id)
+        # Hot-swap with subscription token (already validated above)
        full_model = f"{provider}/{model}"
        swapped = _hot_swap_sessions(request, full_model, api_key=token, api_base=api_base)

@@ -430,15 +492,36 @@ async def handle_update_llm_config(request: web.Request) -> web.Response:
        if not provider or not model:
            return web.json_response({"error": "Both 'provider' and 'model' are required"}, status=400)

-        # Look up token limits from catalogue
+        # Verify model exists in the catalogue
        model_info = _find_model_info(provider, model)
-        max_tokens = model_info["max_tokens"] if model_info else 8192
-        max_context_tokens = model_info["max_context_tokens"] if model_info else 120000
+        if not model_info:
+            return web.json_response(
+                {"error": f"Model '{model}' is not available for provider '{provider}'."},
+                status=400,
+            )
+
+        max_tokens = model_info["max_tokens"]
+        max_context_tokens = model_info["max_context_tokens"]

        # Determine env var and api_base
        env_var = PROVIDER_ENV_VARS.get(provider.lower(), "")
        api_base = _get_api_base_for_provider(provider)

+        # Validate the API key before committing
+        api_key = _resolve_api_key(provider, request)
+        if not api_key:
+            return web.json_response(
+                {"error": f"No API key found for {provider}. Please add one in Manage Keys."},
+                status=400,
+            )
+
+        check = await _validate_provider_key(provider, api_key, api_base=api_base, model=model)
+        if check.get("valid") is False:
+            return web.json_response(
+                {"error": f"API key validation failed for {provider}: {check.get('message', 'unknown error')}"},
+                status=400,
+            )
+
        # Update ~/.hive/configuration.json
        config = get_hive_config()
        llm_section = config.setdefault("llm", {})
@@ -458,8 +541,7 @@ async def handle_update_llm_config(request: web.Request) -> web.Response:

        _write_config_atomic(config)

-        # Hot-swap all running sessions
-        api_key = _resolve_api_key(provider, request)
+        # Hot-swap all running sessions (api_key already validated above)
        full_model = f"{provider}/{model}"
        swapped = _hot_swap_sessions(request, full_model, api_key=api_key, api_base=api_base)

@@ -594,6 +676,64 @@ async def handle_get_models(request: web.Request) -> web.Response:
    return web.json_response({"models": MODELS_CATALOGUE})


+# ------------------------------------------------------------------
+# User avatar
+# ------------------------------------------------------------------
+
+MAX_AVATAR_BYTES = 2 * 1024 * 1024  # 2 MB
+_ALLOWED_AVATAR_TYPES = {
+    "image/jpeg": ".jpg",
+    "image/png": ".png",
+    "image/webp": ".webp",
+}
+
+
+async def handle_upload_user_avatar(request: web.Request) -> web.Response:
+    """POST /api/config/profile/avatar — upload user profile picture."""
+    reader = await request.multipart()
+    field = await reader.next()
+    if field is None or field.name != "avatar":
+        return web.json_response({"error": "Expected a file field named 'avatar'"}, status=400)
+
+    content_type = getattr(field, "content_type", None) or field.headers.get("Content-Type", "")
+    ext = _ALLOWED_AVATAR_TYPES.get(content_type)
+    if not ext:
+        return web.json_response(
+            {"error": f"Unsupported image type: {content_type}. Use JPEG, PNG, or WebP."},
+            status=400,
+        )
+
+    data = bytearray()
+    while True:
+        chunk = await field.read_chunk(8192)
+        if not chunk:
+            break
+        data.extend(chunk)
+        if len(data) > MAX_AVATAR_BYTES:
+            return web.json_response({"error": "Image too large. Maximum size is 2 MB."}, status=400)
+
+    if not data:
+        return web.json_response({"error": "Empty file"}, status=400)
+
+    # Remove existing avatar files
+    for existing in HIVE_CONFIG_FILE.parent.glob("avatar.*"):
+        existing.unlink(missing_ok=True)
+
+    avatar_path = HIVE_CONFIG_FILE.parent / f"avatar{ext}"
+    avatar_path.write_bytes(data)
+    logger.info("User avatar uploaded: %s (%d bytes)", avatar_path.name, len(data))
+    return web.json_response({"avatar_url": "/api/config/profile/avatar"})
+
+
+async def handle_get_user_avatar(request: web.Request) -> web.Response:
+    """GET /api/config/profile/avatar — serve user profile picture."""
+    for ext in _ALLOWED_AVATAR_TYPES.values():
+        avatar_path = HIVE_CONFIG_FILE.parent / f"avatar{ext}"
+        if avatar_path.exists():
+            return web.FileResponse(avatar_path, headers={"Cache-Control": "public, max-age=3600"})
+    return web.json_response({"error": "No avatar found"}, status=404)
+
+
 # ------------------------------------------------------------------
 # Route registration
 # ------------------------------------------------------------------
@@ -606,3 +746,5 @@ def register_routes(app: web.Application) -> None:
    app.router.add_get("/api/config/models", handle_get_models)
    app.router.add_get("/api/config/profile", handle_get_profile)
    app.router.add_put("/api/config/profile", handle_update_profile)
+    app.router.add_post("/api/config/profile/avatar", handle_upload_user_avatar)
+    app.router.add_get("/api/config/profile/avatar", handle_get_user_avatar)
@@ -51,13 +51,18 @@ DEFAULT_EVENT_TYPES = [
 # Keepalive interval in seconds
 KEEPALIVE_INTERVAL = 15.0

-# Phase 5 SSE filter: parallel-worker streams (stream_id="worker:{uuid}")
-# publish high-frequency LLM deltas / tool calls that would flood the
-# user's queen DM chat. We let only this small allowlist of worker
-# events through to the queen-chat SSE so the frontend can render
-# fan-out lifecycle and structured fan-in reports without seeing the
-# raw worker chatter. Per-worker SSE panels (Phase 5b) bypass this
-# filter via a dedicated /workers/{worker_id}/events route.
+# Session-SSE worker filter: workers run outside the queen's DM
+# chat. Worker activity is observable via the dedicated
+# ``/api/workers/{worker_id}/events`` per-worker SSE route, not via
+# the session chat. This keeps the queen↔user conversation clean of
+# tool-call chatter regardless of whether the worker was spawned by
+# ``run_agent_with_input`` (stream_id="worker") or
+# ``run_parallel_workers`` (stream_id="worker:{uuid}").
+#
+# Lifecycle events the frontend needs for fan-in summaries
+# (SUBAGENT_REPORT, EXECUTION_COMPLETED, EXECUTION_FAILED) are still
+# allowed through so the queen can show "N workers done" surfaces
+# without exposing the per-turn chatter.
 _WORKER_EVENT_ALLOWLIST = {
    EventType.SUBAGENT_REPORT.value,
    EventType.EXECUTION_COMPLETED.value,
@@ -66,9 +71,17 @@ _WORKER_EVENT_ALLOWLIST = {


 def _is_worker_noise(evt_dict: dict) -> bool:
-    """True if the event is a parallel-worker event we should drop."""
+    """True if the event belongs to a worker stream and should not
+    surface in the queen DM chat.
+
+    Matches any stream starting with ``worker`` — both the bare
+    ``"worker"`` tag used by single-worker spawns and the
+    ``"worker:{uuid}"`` tag used by parallel fan-outs. The allowlist
+    carves out the three terminal/lifecycle events the UI still
+    needs to render fan-in summaries.
+    """
    stream_id = evt_dict.get("stream_id") or ""
-    if not stream_id.startswith("worker:"):
+    if not stream_id.startswith("worker"):
        return False
    return evt_dict.get("type") not in _WORKER_EVENT_ALLOWLIST

@@ -106,6 +119,22 @@ async def handle_events(request: web.Request) -> web.StreamResponse:
    event_bus = session.event_bus
    event_types = _parse_event_types(request.query.get("types"))

+    # Worker-noise filter is phase-aware. In DM mode (queen phase
+    # "independent") the queen's chat should stay clean — workers
+    # are invisible. In colony mode (phase "working"/"reviewing")
+    # the user IS supervising the workers and wants to see the
+    # tool-call/text-delta chatter as it happens. Sample the phase
+    # once at SSE connect; if the queen later transitions the
+    # frontend reconnects.
+    def _should_filter_worker_noise() -> bool:
+        phase_state = getattr(session, "phase_state", None)
+        if phase_state is None:
+            return True  # unknown phase → be conservative, filter noise
+        phase = getattr(phase_state, "phase", "independent")
+        return phase == "independent"
+
+    filter_worker_noise = _should_filter_worker_noise()
+
    # Per-client buffer queue
    queue: asyncio.Queue = asyncio.Queue(maxsize=1000)

@@ -132,7 +161,7 @@ async def handle_events(request: web.Request) -> web.StreamResponse:
            return

        evt_dict = event.to_dict()
-        if _is_worker_noise(evt_dict):
+        if filter_worker_noise and _is_worker_noise(evt_dict):
            return
        if evt_dict.get("type") in _CRITICAL_EVENTS:
            try:
@@ -189,7 +218,7 @@ async def handle_events(request: web.Request) -> web.StreamResponse:
    for past_event in event_bus._event_history:
        if past_event.type.value in replay_types:
            past_dict = past_event.to_dict()
-            if _is_worker_noise(past_dict):
+            if filter_worker_noise and _is_worker_noise(past_dict):
                continue
            try:
                queue.put_nowait(past_dict)
@@ -10,6 +10,7 @@ from aiohttp import web

 from framework.agent_loop.conversation import LEGACY_RUN_ID
 from framework.credentials.validation import validate_agent_credentials
+from framework.host.execution_manager import ExecutionAlreadyRunningError
 from framework.server.app import resolve_session, safe_path_segment, sessions_dir
 from framework.server.routes_sessions import _credential_error_response

@@ -53,12 +54,19 @@ _WORKER_INHERITED_TOOLS: frozenset[str] = frozenset(

 # Queen-lifecycle tools that are registered into the queen's tool registry
 # but NOT listed in any _QUEEN_*_TOOLS phase list (they're reachable only via
-# explicit registration, not phase-based gating). These must still be stripped
-# from forked worker configs.
+# explicit registration or as frontend-visible helpers, not phase-based
+# gating). These must still be stripped from forked / parallel-spawned
+# worker tool inventories.
 _QUEEN_LIFECYCLE_EXTRAS: frozenset[str] = frozenset(
    {
-        "stop_worker_and_plan",
-        "stop_worker_and_review",
+        # Phase-transition wrappers (method variants are on QueenPhaseState
+        # but the queen also sees them as tools).
+        "switch_to_reviewing",
+        "switch_to_independent",
+        # Frontend helpers that live outside phase lists.
+        "list_credentials",
+        "get_worker_health_summary",
+        "enqueue_task",
    }
 )

@@ -78,28 +86,33 @@ def _resolve_queen_only_tools() -> frozenset[str]:
    nodes package is loaded.
    """
    from framework.agents.queen.nodes import (
-        _QUEEN_BUILDING_TOOLS,
-        _QUEEN_EDITING_TOOLS,
        _QUEEN_INDEPENDENT_TOOLS,
-        _QUEEN_PLANNING_TOOLS,
-        _QUEEN_RUNNING_TOOLS,
-        _QUEEN_STAGING_TOOLS,
+        _QUEEN_REVIEWING_TOOLS,
+        _QUEEN_WORKING_TOOLS,
    )

    union: set[str] = set()
    for tool_list in (
-        _QUEEN_PLANNING_TOOLS,
-        _QUEEN_BUILDING_TOOLS,
-        _QUEEN_STAGING_TOOLS,
-        _QUEEN_RUNNING_TOOLS,
-        _QUEEN_EDITING_TOOLS,
        _QUEEN_INDEPENDENT_TOOLS,
+        _QUEEN_WORKING_TOOLS,
+        _QUEEN_REVIEWING_TOOLS,
    ):
        union.update(tool_list)
    derived = union - _WORKER_INHERITED_TOOLS
    return frozenset(derived | _QUEEN_LIFECYCLE_EXTRAS)


+def _execution_already_running_response(exc: ExecutionAlreadyRunningError) -> web.Response:
+    return web.json_response(
+        {
+            "error": str(exc),
+            "stream_id": exc.stream_id,
+            "active_execution_ids": exc.active_ids,
+        },
+        status=409,
+    )
+
+
 async def handle_trigger(request: web.Request) -> web.Response:
    """POST /api/sessions/{session_id}/trigger — start an execution.

@@ -141,11 +154,14 @@ async def handle_trigger(request: web.Request) -> web.Response:
    if "resume_session_id" not in session_state:
        session_state["resume_session_id"] = session.id

-    execution_id = await session.colony_runtime.trigger(
-        entry_point_id,
-        input_data,
-        session_state=session_state,
-    )
+    try:
+        execution_id = await session.colony_runtime.trigger(
+            entry_point_id,
+            input_data,
+            session_state=session_state,
+        )
+    except ExecutionAlreadyRunningError as exc:
+        return _execution_already_running_response(exc)

    # Cancel queen's in-progress LLM turn so it picks up the phase change cleanly
    if session.queen_executor:
@@ -153,9 +169,9 @@ async def handle_trigger(request: web.Request) -> web.Response:
        if node and hasattr(node, "cancel_current_turn"):
            node.cancel_current_turn()

-    # Switch queen to running phase (mirrors run_agent_with_input tool behavior)
+    # Switch queen to working phase — workers just started from the UI.
    if session.phase_state is not None:
-        await session.phase_state.switch_to_running(source="frontend")
+        await session.phase_state.switch_to_working(source="frontend")

    return web.json_response({"execution_id": execution_id})

@@ -434,11 +450,14 @@ async def handle_resume(request: web.Request) -> web.Response:

    input_data = state.get("input_data", {})

-    execution_id = await session.colony_runtime.trigger(
-        entry_points[0].id,
-        input_data=input_data,
-        session_state=resume_session_state,
-    )
+    try:
+        execution_id = await session.colony_runtime.trigger(
+            entry_points[0].id,
+            input_data=input_data,
+            session_state=resume_session_state,
+        )
+    except ExecutionAlreadyRunningError as exc:
+        return _execution_already_running_response(exc)

    return web.json_response(
        {
@@ -465,6 +484,7 @@ async def handle_pause(request: web.Request) -> web.Response:

    runtime = session.colony_runtime
    cancelled = []
+    cancelling = []

    for colony_id in runtime.list_graphs():
        reg = runtime.get_graph_registration(colony_id)
@@ -481,23 +501,27 @@ async def handle_pause(request: web.Request) -> web.Response:

            for exec_id in list(stream.active_execution_ids):
                try:
-                    ok = await stream.cancel_execution(exec_id, reason="Execution paused by user")
-                    if ok:
+                    outcome = await stream.cancel_execution(exec_id, reason="Execution paused by user")
+                    if outcome == "cancelled":
                        cancelled.append(exec_id)
+                    elif outcome == "cancelling":
+                        cancelling.append(exec_id)
                except Exception:
                    pass

    # Pause timers so the next tick doesn't restart execution
    runtime.pause_timers()

-    # Switch to staging (agent still loaded, ready to re-run)
+    # Switch to reviewing — workers stopped, queen now helps the user
+    # interpret whatever they produced and decide next steps.
    if session.phase_state is not None:
-        await session.phase_state.switch_to_staging(source="frontend")
+        await session.phase_state.switch_to_reviewing(source="frontend")

    return web.json_response(
        {
-            "stopped": bool(cancelled),
+            "stopped": bool(cancelled) and not cancelling,
            "cancelled": cancelled,
+            "cancelling": cancelling,
            "timers_paused": True,
        }
    )
@@ -534,24 +558,36 @@ async def handle_stop(request: web.Request) -> web.Response:
                    if hasattr(node, "cancel_current_turn"):
                        node.cancel_current_turn()

-            cancelled = await stream.cancel_execution(execution_id, reason="Execution stopped by user")
-            if cancelled:
+            outcome = await stream.cancel_execution(execution_id, reason="Execution stopped by user")
+
+            if outcome == "cancelled":
                # Cancel queen's in-progress LLM turn
                if session.queen_executor:
                    node = session.queen_executor.node_registry.get("queen")
                    if node and hasattr(node, "cancel_current_turn"):
                        node.cancel_current_turn()

-                # Switch to staging (agent still loaded, ready to re-run)
+                # Switch to reviewing — worker stopped, queen helps the user
+                # interpret what happened and decide next steps.
                if session.phase_state is not None:
-                    await session.phase_state.switch_to_staging(source="frontend")
+                    await session.phase_state.switch_to_reviewing(source="frontend")

                return web.json_response(
                    {
                        "stopped": True,
+                        "cancelling": False,
                        "execution_id": execution_id,
                    }
                )
+            if outcome == "cancelling":
+                return web.json_response(
+                    {
+                        "stopped": False,
+                        "cancelling": True,
+                        "execution_id": execution_id,
+                    },
+                    status=202,
+                )

    return web.json_response({"stopped": False, "error": "Execution not found"}, status=404)

@@ -594,11 +630,14 @@ async def handle_replay(request: web.Request) -> web.Response:
        "run_id": _load_checkpoint_run_id(cp_path),
    }

-    execution_id = await session.colony_runtime.trigger(
-        entry_points[0].id,
-        input_data={},
-        session_state=replay_session_state,
-    )
+    try:
+        execution_id = await session.colony_runtime.trigger(
+            entry_points[0].id,
+            input_data={},
+            session_state=replay_session_state,
+        )
+    except ExecutionAlreadyRunningError as exc:
+        return _execution_already_running_response(exc)

    return web.json_response(
        {
@@ -644,6 +683,7 @@ async def handle_colony_spawn(request: web.Request) -> web.Response:
    body = await request.json()
    colony_name = body.get("colony_name", "").strip()
    task = body.get("task", "").strip()
+    tasks = body.get("tasks")

    if not colony_name:
        return web.json_response({"error": "colony_name is required"}, status=400)
@@ -661,6 +701,7 @@ async def handle_colony_spawn(request: web.Request) -> web.Response:
            session=session,
            colony_name=colony_name,
            task=task,
+            tasks=tasks if isinstance(tasks, list) else None,
        )
    except Exception as e:
        logger.exception("colony_spawn fork failed")
@@ -674,6 +715,7 @@ async def fork_session_into_colony(
    session: Any,
    colony_name: str,
    task: str,
+    tasks: list[dict] | None = None,
 ) -> dict:
    """Fork a queen session into a colony directory.

@@ -690,8 +732,14 @@ async def fork_session_into_colony(
       the colony resumes with the queen's entire conversation history.
    3. Multiple independent sessions can be created against the same colony,
       giving parallel execution capacity without separate worker configs.
+    4. Initializes (or ensures) ``data/progress.db`` — the colony's SQLite
+       task queue + progress ledger. When *tasks* is provided, the queen-
+       authored task batch is seeded into the queue in one transaction.
+       The absolute DB path is threaded into the worker's ``input_data``
+       so spawned workers see it in their first user message.

-    Returns ``{"colony_path", "colony_name", "queen_session_id", "is_new"}``.
+    Returns ``{"colony_path", "colony_name", "queen_session_id", "is_new",
+              "db_path", "task_ids"}``.
    """
    import asyncio
    import json
@@ -701,20 +749,115 @@ async def fork_session_into_colony(

    from framework.agent_loop.agent_loop import AgentLoop, LoopConfig
    from framework.agent_loop.types import AgentContext
+    from framework.host.progress_db import ensure_progress_db, seed_tasks
    from framework.server.session_manager import _queen_session_dir

-    queen_loop: AgentLoop = session.queen_executor.node_registry["queen"]
-    queen_ctx: AgentContext = getattr(queen_loop, "_last_ctx", None)
+    # Diagnostic capture: when the fork fails here we want to know which
+    # piece of queen state was missing (executor cleared vs. node missing
+    # vs. _last_ctx never stamped). Without this, callers only see
+    # "'NoneType' object has no attribute 'node_registry'" with no hint
+    # whether the queen loop exited, is mid-revive, or ran a different
+    # path that never ran AgentLoop._execute_impl.
+    queen_executor = getattr(session, "queen_executor", None)
+    queen_task = getattr(session, "queen_task", None)
+    phase_state_dbg = getattr(session, "phase_state", None)
+    logger.info(
+        "[fork_session_into_colony] session=%s colony=%s "
+        "queen_executor=%s queen_task=%s queen_task_done=%s "
+        "phase=%s queen_name=%s",
+        session.id,
+        colony_name,
+        queen_executor,
+        queen_task,
+        queen_task.done() if queen_task is not None else None,
+        getattr(phase_state_dbg, "phase", None),
+        getattr(session, "queen_name", None),
+    )

+    if queen_executor is None:
+        raise RuntimeError(
+            f"queen_executor is None for session {session.id!r} — the "
+            "queen loop isn't running right now. Wait for the queen to "
+            "come back (or send her a chat message to revive her) and "
+            "retry create_colony. The skill folder is already written, "
+            "so the retry is free."
+        )
+
+    node_registry = getattr(queen_executor, "node_registry", None)
+    if not isinstance(node_registry, dict) or "queen" not in node_registry:
+        raise RuntimeError(
+            f"queen node is missing from the executor's registry for "
+            f"session {session.id!r} (registry keys="
+            f"{list(node_registry.keys()) if isinstance(node_registry, dict) else type(node_registry).__name__}"
+            "). The queen loop is in an initialization or teardown "
+            "window; retry after a moment."
+        )
+
+    queen_loop: AgentLoop = node_registry["queen"]
+    queen_ctx: AgentContext = getattr(queen_loop, "_last_ctx", None)
+    if queen_ctx is None:
+        logger.warning(
+            "[fork_session_into_colony] queen_loop has no _last_ctx yet "
+            "(session=%s) — falling back to empty tool/skill snapshot; "
+            "the forked worker will inherit no tools.",
+            session.id,
+        )
+
+    # "is_new" keys off worker.json, not bare dir existence: the queen's
+    # create_colony tool now pre-creates colony_dir (so it can
+    # materialize the colony-scoped skill folder BEFORE the fork), which
+    # would wrongly flag every fresh colony as "already-exists" if we
+    # used ``not colony_dir.exists()``. A colony is "new" until its
+    # worker config has actually been written.
    colony_dir = Path.home() / ".hive" / "colonies" / colony_name
-    is_new = not colony_dir.exists()
+    worker_name = "worker"
+    worker_config_path = colony_dir / f"{worker_name}.json"
+    is_new = not worker_config_path.exists()
    colony_dir.mkdir(parents=True, exist_ok=True)
    (colony_dir / "data").mkdir(exist_ok=True)

-    # Fixed worker name -- sessions are the unit of parallelism, not workers
-    worker_name = "worker"
+    # ── 0. Ensure the colony's progress DB exists and seed tasks ──
+    # Runs before worker.json is written so the DB path can be threaded
+    # into input_data. Idempotent on reruns of the same colony name.
+    db_path = await asyncio.to_thread(ensure_progress_db, colony_dir)
+    seeded_task_ids: list[str] = []
+    if tasks:
+        seeded_task_ids = await asyncio.to_thread(seed_tasks, db_path, tasks, source="queen_create")
+        logger.info(
+            "progress_db: seeded %d task(s) into colony '%s'",
+            len(seeded_task_ids),
+            colony_name,
+        )
+    elif task and task.strip():
+        # Phase 2 auto-seed: when the queen uses the simple single-task
+        # form of create_colony (no explicit ``tasks=[{...}]`` list),
+        # insert exactly one row so the first worker spawned into this
+        # colony has something to claim. Without this the queue is
+        # empty and the worker falls back to executing from the chat
+        # spawn message, defeating the cross-run durability the tracker
+        # exists for.
+        try:
+            seeded_task_ids = await asyncio.to_thread(
+                seed_tasks,
+                db_path,
+                [{"goal": task.strip()}],
+                source="create_colony_auto",
+            )
+            logger.info(
+                "progress_db: auto-seeded 1 task into colony '%s' (task_id=%s, from single-task create_colony form)",
+                colony_name,
+                seeded_task_ids[0] if seeded_task_ids else "?",
+            )
+        except Exception as exc:
+            logger.warning(
+                "progress_db: auto-seed failed for colony '%s' (continuing without a pre-seeded row): %s",
+                colony_name,
+                exc,
+            )

-    worker_config_path = colony_dir / f"{worker_name}.json"
+    # Fixed worker name and config path are already computed above so
+    # ``is_new`` can be derived from worker.json rather than the colony
+    # directory (see comment on the ``is_new`` block).

    # ── 1. Gather queen state ─────────────────────────────────────
    # Queen-lifecycle + agent-management tools are registered ONLY against
@@ -772,10 +915,26 @@ async def fork_session_into_colony(
    # worker is not Charlotte / Alexandra / etc., it is a task executor.
    # Inheriting the queen's persona made the worker greet the user in
    # first person with no memory of the task it was actually given.
+    # Thread the first seeded task_id into input_data so the worker's
+    # first claim pins to a specific row (skill's assigned-task-id
+    # branch). When multiple tasks were seeded we only pin the first —
+    # subsequent workers (via run_agent_with_input or parallel spawns)
+    # get their own task_id assigned at spawn time.
+    _worker_input_data: dict[str, Any] = {
+        "db_path": str(db_path),
+        "colony_id": colony_name,
+    }
+    if seeded_task_ids:
+        _worker_input_data["task_id"] = seeded_task_ids[0]
+
    worker_meta = {
        "name": worker_name,
        "version": "1.0.0",
        "description": f"Worker clone from queen session {session.id}",
+        # Colony progress tracker: worker sees these in its first user
+        # message via _format_spawn_task_message.  The colony-progress-
+        # tracker default skill teaches the worker how to use them.
+        "input_data": _worker_input_data,
        "goal": {
            "description": worker_task,
            "success_criteria": [],
@@ -907,6 +1066,8 @@ async def fork_session_into_colony(
        "colony_name": colony_name,
        "queen_session_id": colony_session_id,
        "is_new": is_new,
+        "db_path": str(db_path),
+        "task_ids": seeded_task_ids,
    }


@@ -0,0 +1,87 @@
+"""Custom user prompts — CRUD for user-uploaded prompts.
+
+- GET    /api/prompts        — list all custom prompts
+- POST   /api/prompts        — add a new custom prompt
+- DELETE /api/prompts/{id}   — delete a custom prompt
+"""
+
+import json
+import logging
+import time
+
+from aiohttp import web
+
+from framework.config import HIVE_HOME
+
+logger = logging.getLogger(__name__)
+
+CUSTOM_PROMPTS_FILE = HIVE_HOME / "custom_prompts.json"
+
+
+def _load_custom_prompts() -> list[dict]:
+    if not CUSTOM_PROMPTS_FILE.exists():
+        return []
+    try:
+        data = json.loads(CUSTOM_PROMPTS_FILE.read_text(encoding="utf-8"))
+        return data if isinstance(data, list) else []
+    except Exception:
+        return []
+
+
+def _save_custom_prompts(prompts: list[dict]) -> None:
+    CUSTOM_PROMPTS_FILE.parent.mkdir(parents=True, exist_ok=True)
+    CUSTOM_PROMPTS_FILE.write_text(
+        json.dumps(prompts, indent=2, ensure_ascii=False) + "\n",
+        encoding="utf-8",
+    )
+
+
+async def handle_list_prompts(request: web.Request) -> web.Response:
+    """GET /api/prompts — list all custom prompts."""
+    return web.json_response({"prompts": _load_custom_prompts()})
+
+
+async def handle_create_prompt(request: web.Request) -> web.Response:
+    """POST /api/prompts — add a new custom prompt."""
+    try:
+        body = await request.json()
+    except Exception:
+        return web.json_response({"error": "Invalid JSON body"}, status=400)
+
+    title = (body.get("title") or "").strip()
+    category = (body.get("category") or "").strip()
+    content = (body.get("content") or "").strip()
+
+    if not title or not content:
+        return web.json_response({"error": "Title and content are required"}, status=400)
+
+    prompts = _load_custom_prompts()
+    new_prompt = {
+        "id": f"custom_{int(time.time() * 1000)}",
+        "title": title,
+        "category": category or "custom",
+        "content": content,
+        "custom": True,
+    }
+    prompts.append(new_prompt)
+    _save_custom_prompts(prompts)
+    logger.info("Custom prompt added: %s", title)
+    return web.json_response(new_prompt, status=201)
+
+
+async def handle_delete_prompt(request: web.Request) -> web.Response:
+    """DELETE /api/prompts/{prompt_id} — delete a custom prompt."""
+    prompt_id = request.match_info["prompt_id"]
+    prompts = _load_custom_prompts()
+    before = len(prompts)
+    prompts = [p for p in prompts if p.get("id") != prompt_id]
+    if len(prompts) == before:
+        return web.json_response({"error": "Prompt not found"}, status=404)
+    _save_custom_prompts(prompts)
+    return web.json_response({"deleted": prompt_id})
+
+
+def register_routes(app: web.Application) -> None:
+    app.router.add_get("/api/prompts", handle_list_prompts)
+    app.router.add_post("/api/prompts", handle_create_prompt)
+    app.router.add_delete("/api/prompts/{prompt_id}", handle_delete_prompt)
@@ -3,6 +3,8 @@
 - GET    /api/queen/profiles                -- list all queen profiles (id, name, title)
 - GET    /api/queen/{queen_id}/profile      -- get full queen profile
 - PATCH  /api/queen/{queen_id}/profile      -- update queen profile fields
+- POST   /api/queen/{queen_id}/avatar       -- upload queen avatar image
+- GET    /api/queen/{queen_id}/avatar       -- serve queen avatar image
 - POST   /api/queen/{queen_id}/session      -- get or create a persistent session for a queen
 - POST   /api/queen/{queen_id}/session/select -- resume a specific session for a queen
 - POST   /api/queen/{queen_id}/session/new  -- create a fresh session for a queen
@@ -166,6 +168,34 @@ async def handle_get_profile(request: web.Request) -> web.Response:
    return web.json_response({"id": queen_id, **api_profile})


+def _reverse_transform_for_yaml(body: dict) -> dict:
+    """Map API-format fields back to YAML profile fields.
+
+    The API exposes a simplified view (summary, skills, signature_achievement)
+    that maps onto the underlying YAML structure (core_traits, hidden_background,
+    psychological_profile, world_lore, etc.).
+    """
+    yaml_updates: dict[str, Any] = {}
+
+    if "name" in body:
+        yaml_updates["name"] = body["name"]
+    if "title" in body:
+        yaml_updates["title"] = body["title"]
+
+    if "summary" in body:
+        # Summary is displayed as core_traits + anti_stereotype joined by \n\n.
+        # Store the full text in core_traits for simplicity.
+        yaml_updates["core_traits"] = body["summary"]
+
+    if "skills" in body:
+        yaml_updates["skills"] = body["skills"]
+
+    if "signature_achievement" in body:
+        yaml_updates.setdefault("world_lore", {})["habitat"] = body["signature_achievement"]
+
+    return yaml_updates
+
+
 async def handle_update_profile(request: web.Request) -> web.Response:
    """PATCH /api/queen/{queen_id}/profile — update queen profile fields."""
    queen_id = request.match_info["queen_id"]
@@ -175,11 +205,18 @@ async def handle_update_profile(request: web.Request) -> web.Response:
        return web.json_response({"error": "Invalid JSON body"}, status=400)
    if not isinstance(body, dict):
        return web.json_response({"error": "Body must be a JSON object"}, status=400)
+
+    yaml_updates = _reverse_transform_for_yaml(body)
+    if not yaml_updates:
+        return web.json_response({"error": "No valid fields to update"}, status=400)
+
    try:
-        updated = update_queen_profile(queen_id, body)
+        updated = update_queen_profile(queen_id, yaml_updates)
    except FileNotFoundError:
        return web.json_response({"error": f"Queen '{queen_id}' not found"}, status=404)
-    return web.json_response({"id": queen_id, **updated})
+
+    api_profile = _transform_profile_for_api(updated)
+    return web.json_response({"id": queen_id, **api_profile})


 async def handle_queen_session(request: web.Request) -> web.Response:
@@ -207,9 +244,12 @@ async def handle_queen_session(request: web.Request) -> web.Response:
    initial_prompt = body.get("initial_prompt")
    initial_phase = body.get("initial_phase")

-    # 1. Check for an existing live session bound to this queen.
+    # 1. Check for an existing live DM session bound to this queen.
+    # Skip colony sessions: a colony forked from this queen also carries
+    # queen_name == queen_id, but it has a worker loaded (colony_id /
+    # worker_path set) and is the colony's chat, not the queen's DM.
    for session in manager.list_sessions():
-        if session.queen_name == queen_id:
+        if session.queen_name == queen_id and session.colony_id is None and session.worker_path is None:
            return web.json_response(
                {
                    "session_id": session.id,
@@ -318,7 +358,9 @@ async def handle_select_queen_session(request: web.Request) -> web.Response:

    meta = _read_queen_session_meta(queen_id, target_session_id)
    agent_path = meta.get("agent_path")
-    initial_phase = None if agent_path else "independent"
+    # Colony resume (agent loaded) → "working" (3-phase target).
+    # Standalone queen resume → "independent" (DM mode).
+    initial_phase = "working" if agent_path else "independent"
    session = await _create_bound_queen_session(
        manager,
        queen_id,
@@ -363,11 +405,98 @@ async def handle_new_queen_session(request: web.Request) -> web.Response:
    )


+MAX_AVATAR_BYTES = 2 * 1024 * 1024  # 2 MB max after compression
+_ALLOWED_AVATAR_TYPES = {
+    "image/jpeg": ".jpg",
+    "image/png": ".png",
+    "image/webp": ".webp",
+}
+
+
+async def handle_upload_avatar(request: web.Request) -> web.Response:
+    """POST /api/queen/{queen_id}/avatar — upload queen avatar image.
+
+    Accepts multipart/form-data with a single file field named 'avatar'.
+    Stores as avatar.{ext} in the queen's profile directory.
+    """
+    from framework.config import QUEENS_DIR
+
+    queen_id = request.match_info["queen_id"]
+    queen_dir = QUEENS_DIR / queen_id
+    if not (queen_dir / "profile.yaml").exists():
+        return web.json_response({"error": f"Queen '{queen_id}' not found"}, status=404)
+
+    reader = await request.multipart()
+    field = await reader.next()
+    if field is None or field.name != "avatar":
+        return web.json_response({"error": "Expected a file field named 'avatar'"}, status=400)
+
+    content_type = field.headers.get("Content-Type", "application/octet-stream")
+    # Also check by content_type from the field
+    if hasattr(field, "content_type"):
+        content_type = field.content_type or content_type
+
+    ext = _ALLOWED_AVATAR_TYPES.get(content_type)
+    if not ext:
+        return web.json_response(
+            {"error": f"Unsupported image type: {content_type}. Use JPEG, PNG, or WebP."},
+            status=400,
+        )
+
+    # Read the file data with size limit
+    data = bytearray()
+    while True:
+        chunk = await field.read_chunk(8192)
+        if not chunk:
+            break
+        data.extend(chunk)
+        if len(data) > MAX_AVATAR_BYTES:
+            return web.json_response(
+                {"error": f"Image too large. Maximum size is {MAX_AVATAR_BYTES // 1024 // 1024} MB."},
+                status=400,
+            )
+
+    if not data:
+        return web.json_response({"error": "Empty file"}, status=400)
+
+    # Remove any existing avatar files
+    for existing in queen_dir.glob("avatar.*"):
+        existing.unlink(missing_ok=True)
+
+    # Write the new avatar
+    avatar_path = queen_dir / f"avatar{ext}"
+    avatar_path.write_bytes(data)
+
+    logger.info("Avatar uploaded for queen %s: %s (%d bytes)", queen_id, avatar_path.name, len(data))
+    return web.json_response({"avatar_url": f"/api/queen/{queen_id}/avatar"})
+
+
+async def handle_get_avatar(request: web.Request) -> web.Response:
+    """GET /api/queen/{queen_id}/avatar — serve queen avatar image."""
+    from framework.config import QUEENS_DIR
+
+    queen_id = request.match_info["queen_id"]
+    queen_dir = QUEENS_DIR / queen_id
+
+    # Find avatar file with any supported extension
+    for ext in _ALLOWED_AVATAR_TYPES.values():
+        avatar_path = queen_dir / f"avatar{ext}"
+        if avatar_path.exists():
+            return web.FileResponse(
+                avatar_path,
+                headers={"Cache-Control": "public, max-age=3600"},
+            )
+
+    return web.json_response({"error": "No avatar found"}, status=404)
+
+
 def register_routes(app: web.Application) -> None:
    """Register queen profile routes."""
    app.router.add_get("/api/queen/profiles", handle_list_profiles)
    app.router.add_get("/api/queen/{queen_id}/profile", handle_get_profile)
    app.router.add_patch("/api/queen/{queen_id}/profile", handle_update_profile)
+    app.router.add_post("/api/queen/{queen_id}/avatar", handle_upload_avatar)
+    app.router.add_get("/api/queen/{queen_id}/avatar", handle_get_avatar)
    app.router.add_post("/api/queen/{queen_id}/session", handle_queen_session)
    app.router.add_post("/api/queen/{queen_id}/session/select", handle_select_queen_session)
    app.router.add_post("/api/queen/{queen_id}/session/new", handle_new_queen_session)
@@ -686,6 +686,10 @@ async def handle_session_colonies(request: web.Request) -> web.Response:
    return web.json_response({"colonies": colonies})


+_EVENTS_HISTORY_DEFAULT_LIMIT = 2000
+_EVENTS_HISTORY_MAX_LIMIT = 10000
+
+
 async def handle_session_events_history(request: web.Request) -> web.Response:
    """GET /api/sessions/{session_id}/events/history — persisted eventbus log.

@@ -693,17 +697,58 @@ async def handle_session_events_history(request: web.Request) -> web.Response:
    both live sessions and cold (post-server-restart) sessions.  The frontend
    replays these events through ``sseEventToChatMessage`` to fully reconstruct
    the UI state on resume.
+
+    Query params:
+        limit: maximum number of events to return (default 2000, max 10000).
+            The TAIL of the file is returned — i.e. the most recent N events.
+            Older events are dropped and ``truncated`` is set to True.
+
+    Response shape::
+
+        {
+            "events": [...],          # up to ``limit`` events, oldest-first
+            "session_id": "...",
+            "total": 12345,           # total events in the file
+            "returned": 2000,         # len(events)
+            "truncated": true,        # total > returned
+            "limit": 2000,            # the effective limit used
+        }
+
+    ``events.jsonl`` is append-only chronological, so "last N lines" == "most
+    recent N events". Long-running colonies have produced files with 50k+
+    events; before this cap, restoring on page-mount shipped the whole thing
+    down the wire and blocked the UI for seconds.
    """
    session_id = request.match_info["session_id"]

+    try:
+        limit = int(request.query.get("limit", str(_EVENTS_HISTORY_DEFAULT_LIMIT)))
+    except ValueError:
+        limit = _EVENTS_HISTORY_DEFAULT_LIMIT
+    limit = max(1, min(limit, _EVENTS_HISTORY_MAX_LIMIT))
+
    from framework.server.session_manager import _find_queen_session_dir

    queen_dir = _find_queen_session_dir(session_id)
    events_path = queen_dir / "events.jsonl"
    if not events_path.exists():
-        return web.json_response({"events": [], "session_id": session_id})
+        return web.json_response(
+            {
+                "events": [],
+                "session_id": session_id,
+                "total": 0,
+                "returned": 0,
+                "truncated": False,
+                "limit": limit,
+            }
+        )

-    events: list[dict] = []
+    # Tail the file using a bounded deque — O(limit) memory regardless
+    # of file size. No need to materialize the whole list only to slice it.
+    from collections import deque
+
+    tail: deque[dict] = deque(maxlen=limit)
+    total = 0
    try:
        with open(events_path, encoding="utf-8") as f:
            for line in f:
@@ -711,13 +756,34 @@ async def handle_session_events_history(request: web.Request) -> web.Response:
                if not line:
                    continue
                try:
-                    events.append(json.loads(line))
+                    evt = json.loads(line)
                except json.JSONDecodeError:
                    continue
+                total += 1
+                tail.append(evt)
    except OSError:
-        return web.json_response({"events": [], "session_id": session_id})
+        return web.json_response(
+            {
+                "events": [],
+                "session_id": session_id,
+                "total": 0,
+                "returned": 0,
+                "truncated": False,
+                "limit": limit,
+            }
+        )

-    return web.json_response({"events": events, "session_id": session_id})
+    events = list(tail)
+    return web.json_response(
+        {
+            "events": events,
+            "session_id": session_id,
+            "total": total,
+            "returned": len(events),
+            "truncated": total > len(events),
+            "limit": limit,
+        }
+    )


 async def handle_session_history(request: web.Request) -> web.Response:
@@ -236,6 +236,217 @@ async def handle_node_tools(request: web.Request) -> web.Response:
    return web.json_response({"tools": tools_out})


+# ---------------------------------------------------------------------------
+# Live worker control — list / stop a specific worker / stop all
+# ---------------------------------------------------------------------------
+
+
+def _active_colony(session):
+    """Return the session's unified ColonyRuntime (``session.colony``) if present.
+
+    All spawned workers (queen-overseer + run_parallel_workers fan-outs)
+    are hosted here. ``session.colony_runtime`` is a different concept
+    (loaded agent graph) and doesn't hold the live worker registry we
+    need to enumerate / stop.
+    """
+    return getattr(session, "colony", None)
+
+
+def _build_live_workers_payload(colony) -> list[dict]:
+    """Serialize the colony's current worker registry.
+
+    Extracted so both the one-shot ``GET /workers`` handler and the SSE
+    ``/workers/stream`` handler render the exact same shape.
+    """
+    if colony is None:
+        return []
+
+    now = time.monotonic()
+    payload: list[dict] = []
+    try:
+        workers = list(colony._workers.values())  # type: ignore[attr-defined]
+    except Exception:
+        workers = []
+
+    for w in workers:
+        started_at = getattr(w, "_started_at", 0.0) or 0.0
+        duration = (now - started_at) if started_at else 0.0
+        result = getattr(w, "_result", None)
+        payload.append(
+            {
+                "worker_id": w.id,
+                "task": (w.task or "")[:400],
+                "status": str(getattr(w, "status", "unknown")),
+                "is_active": bool(getattr(w, "is_active", False)),
+                "duration_seconds": round(duration, 1),
+                "explicit_report": getattr(w, "_explicit_report", None),
+                "result_status": (result.status if result else None),
+                "result_summary": (result.summary if result else None),
+            }
+        )
+
+    # Active workers first, then terminated, newest-started first within group.
+    payload.sort(key=lambda r: (not r["is_active"], -(r["duration_seconds"] or 0)))
+    return payload
+
+
+def _payload_change_signature(payload: list[dict]) -> tuple:
+    """Cheap fingerprint for change detection on the SSE stream.
+
+    We intentionally exclude ``duration_seconds`` — it ticks every call
+    and would make every poll look like a change, defeating the "only
+    emit on change" optimisation. Everything else (status, result,
+    explicit_report) actually reflects worker state transitions.
+    """
+    return tuple(
+        (
+            w["worker_id"],
+            w["status"],
+            w["is_active"],
+            w["result_status"],
+            w["result_summary"],
+            bool(w["explicit_report"]),
+        )
+        for w in payload
+    )
+
+
+async def handle_live_workers_stream(request: web.Request) -> web.StreamResponse:
+    """GET /api/sessions/{session_id}/workers/stream — SSE feed.
+
+    Emits a ``snapshot`` event immediately, then re-emits every time
+    the worker registry changes (status transitions, new spawns, new
+    reports). Polls the runtime every 2s internally — the colony's
+    ``_workers`` dict is not observable otherwise. Clients disconnecting
+    bubbles up as ConnectionResetError from ``resp.write``.
+    """
+    session, err = resolve_session(request)
+    if err:
+        return err
+
+    import asyncio
+
+    resp = web.StreamResponse(
+        status=200,
+        headers={
+            "Content-Type": "text/event-stream",
+            "Cache-Control": "no-cache, no-transform",
+            "Connection": "keep-alive",
+            "X-Accel-Buffering": "no",
+        },
+    )
+    await resp.prepare(request)
+
+    async def _send(event: str, data) -> None:
+        payload = f"event: {event}\ndata: {json.dumps(data)}\n\n"
+        await resp.write(payload.encode("utf-8"))
+
+    last_signature: tuple | None = None
+    try:
+        while True:
+            colony = _active_colony(session)
+            workers = _build_live_workers_payload(colony)
+            signature = _payload_change_signature(workers)
+            if signature != last_signature:
+                await _send("snapshot", {"workers": workers})
+                last_signature = signature
+            await asyncio.sleep(2.0)
+    except (asyncio.CancelledError, ConnectionResetError):
+        raise
+    except Exception as exc:
+        logger.warning("live workers stream error: %s", exc, exc_info=True)
+    return resp
+
+
+async def handle_stop_live_worker(request: web.Request) -> web.Response:
+    """POST /api/sessions/{session_id}/workers/{worker_id}/stop — force-stop one worker.
+
+    Calls ``colony.stop_worker(worker_id)`` which cancels the worker's
+    background task. The worker's terminal SUBAGENT_REPORT still fires
+    (preserving any _explicit_report) so the queen sees a `[WORKER_REPORT]`
+    with ``status="stopped"``.
+    """
+    session, err = resolve_session(request)
+    if err:
+        return err
+
+    worker_id = request.match_info.get("worker_id", "")
+    if not worker_id:
+        return web.json_response({"error": "worker_id required"}, status=400)
+
+    colony = _active_colony(session)
+    if colony is None:
+        return web.json_response({"error": "No active colony on this session"}, status=503)
+
+    worker = colony._workers.get(worker_id)  # type: ignore[attr-defined]
+    if worker is None:
+        return web.json_response({"error": f"Worker '{worker_id}' not found"}, status=404)
+    if not worker.is_active:
+        return web.json_response(
+            {
+                "stopped": False,
+                "reason": "Worker already terminated",
+                "worker_id": worker_id,
+                "status": str(worker.status),
+            }
+        )
+
+    try:
+        await colony.stop_worker(worker_id)
+    except Exception as exc:
+        logger.exception("stop_worker failed for %s", worker_id)
+        return web.json_response(
+            {"stopped": False, "error": str(exc), "worker_id": worker_id},
+            status=500,
+        )
+
+    return web.json_response({"stopped": True, "worker_id": worker_id})
+
+
+async def handle_stop_all_live_workers(request: web.Request) -> web.Response:
+    """POST /api/sessions/{session_id}/workers/stop-all — force-stop every active worker.
+
+    The persistent overseer (if any) is skipped — it is the queen itself
+    and stopping it would end the session. Only ephemeral fan-out workers
+    are targeted.
+    """
+    session, err = resolve_session(request)
+    if err:
+        return err
+
+    colony = _active_colony(session)
+    if colony is None:
+        return web.json_response({"stopped": [], "error": "No active colony on this session"})
+
+    stopped: list[str] = []
+    errors: list[dict] = []
+    try:
+        workers = list(colony._workers.values())  # type: ignore[attr-defined]
+    except Exception:
+        workers = []
+
+    for w in workers:
+        if not w.is_active:
+            continue
+        if getattr(w, "_persistent", False):
+            # The overseer — don't kill the queen.
+            continue
+        try:
+            await colony.stop_worker(w.id)
+            stopped.append(w.id)
+        except Exception as exc:
+            logger.warning("stop-all: failed to stop %s: %s", w.id, exc)
+            errors.append({"worker_id": w.id, "error": str(exc)})
+
+    return web.json_response(
+        {
+            "stopped": stopped,
+            "stopped_count": len(stopped),
+            "errors": errors if errors else None,
+        }
+    )
+
+
 def register_routes(app: web.Application) -> None:
    """Register worker inspection routes."""
    app.router.add_get("/api/sessions/{session_id}/colonies/{colony_id}/nodes", handle_list_nodes)
@@ -248,3 +459,18 @@ def register_routes(app: web.Application) -> None:
        "/api/sessions/{session_id}/colonies/{colony_id}/nodes/{node_id}/tools",
        handle_node_tools,
    )
+    # Live worker control. The GET /workers list endpoint lives in
+    # routes_colony_workers.py — it reads from session.colony (the
+    # unified ColonyRuntime where run_parallel_workers-spawned workers
+    # actually live) and returns the WorkerSummary shape the frontend
+    # types against. Registering a duplicate here shadowed it in
+    # aiohttp's router and broke the Sessions tab.
+    app.router.add_get("/api/sessions/{session_id}/workers/stream", handle_live_workers_stream)
+    app.router.add_post(
+        "/api/sessions/{session_id}/workers/stop-all",
+        handle_stop_all_live_workers,
+    )
+    app.router.add_post(
+        "/api/sessions/{session_id}/workers/{worker_id}/stop",
+        handle_stop_live_worker,
+    )
@@ -139,6 +139,20 @@ class SessionManager:
        except Exception:
            logger.warning("v2 migration failed (non-fatal)", exc_info=True)

+        # Ensure every existing colony has an up-to-date progress.db
+        # (schema v1, WAL mode) and reclaim any stale claims left behind
+        # by crashed workers from the previous run.  Idempotent and
+        # fast; runs synchronously because the event loop hasn't
+        # started yet at __init__ time.
+        from framework.host.progress_db import ensure_all_colony_dbs
+
+        try:
+            ensured = ensure_all_colony_dbs()
+            if ensured:
+                logger.info("progress_db: ensured %d colony DB(s) at startup", len(ensured))
+        except Exception:
+            logger.warning("progress_db: backfill at startup failed (non-fatal)", exc_info=True)
+
    def build_llm(self, model: str | None = None):
        """Construct an LLM provider using the server's configured defaults."""
        from framework.config import RuntimeConfig, get_hive_config
@@ -1383,34 +1397,24 @@ class SessionManager:
            )

        # Auto-load worker on cold restore — the queen's conversation expects
-        # the agent to be loaded, but the new session has no worker.
+        # the colony to be loaded, but the new session has no worker.
        if session.queen_resume_from and not session.colony_runtime:
            meta_path = queen_dir / "meta.json"
            if meta_path.exists():
                try:
                    _meta = json.loads(meta_path.read_text(encoding="utf-8"))
                    _agent_path = _meta.get("agent_path")
-                    _phase = _meta.get("phase")

                    if _agent_path and Path(_agent_path).exists():
-                        if _phase in ("staging", "running", None):
-                            # Agent fully built — load worker and resume
-                            await self.load_colony(session.id, _agent_path)
-                            if session.phase_state:
-                                await session.phase_state.switch_to_staging(source="auto")
-                            logger.info("Cold restore: auto-loaded worker from %s", _agent_path)
-                        elif _phase == "building":
-                            # Agent folder exists but incomplete — resume building
-                            if session.phase_state:
-                                session.phase_state.agent_path = _agent_path
-                                await session.phase_state.switch_to_building(source="auto")
-                            logger.info("Cold restore: resumed BUILDING phase for %s", _agent_path)
-                        elif _phase == "planning":
-                            if session.phase_state:
-                                session.phase_state.agent_path = _agent_path
-                            logger.info("Cold restore: PLANNING phase for %s", _agent_path)
+                        await self.load_colony(session.id, _agent_path)
+                        if session.phase_state:
+                            # Restored colony session lands in reviewing — the
+                            # queen summarises whatever the last run produced
+                            # before the user decides what to do next.
+                            await session.phase_state.switch_to_reviewing(source="auto")
+                        logger.info("Cold restore: auto-loaded colony from %s", _agent_path)
                except Exception:
-                    logger.warning("Cold restore: failed to auto-load worker", exc_info=True)
+                    logger.warning("Cold restore: failed to auto-load colony", exc_info=True)

    # ------------------------------------------------------------------
    # Phase 2: unified ColonyRuntime construction
@@ -14,6 +14,7 @@ from unittest.mock import AsyncMock, MagicMock
 import pytest
 from aiohttp.test_utils import TestClient, TestServer

+from framework.host.execution_manager import ExecutionAlreadyRunningError
 from framework.host.triggers import TriggerDefinition
 from framework.llm.model_catalog import get_models_catalogue
 from framework.server import (
@@ -89,8 +90,8 @@ class MockStream:
    _active_executors: dict = field(default_factory=dict)
    active_execution_ids: set = field(default_factory=set)

-    async def cancel_execution(self, execution_id: str, reason: str | None = None) -> bool:
-        return execution_id in self._execution_tasks
+    async def cancel_execution(self, execution_id: str, reason: str | None = None) -> str:
+        return "cancelled" if execution_id in self._execution_tasks else "not_found"


@dataclass
@@ -780,6 +781,21 @@ class TestExecution:
            data = await resp.json()
            assert data["execution_id"] == "exec_test_123"

+    @pytest.mark.asyncio
+    async def test_trigger_returns_409_when_execution_still_running(self):
+        session = _make_session()
+        session.colony_runtime.trigger = AsyncMock(side_effect=ExecutionAlreadyRunningError("default", ["session-1"]))
+        app = _make_app_with_session(session)
+        async with TestClient(TestServer(app)) as client:
+            resp = await client.post(
+                "/api/sessions/test_agent/trigger",
+                json={"entry_point_id": "default", "input_data": {"msg": "hi"}},
+            )
+            assert resp.status == 409
+            data = await resp.json()
+            assert data["stream_id"] == "default"
+            assert data["active_execution_ids"] == ["session-1"]
+
    @pytest.mark.asyncio
    async def test_trigger_not_found(self):
        app = create_app()
@@ -918,6 +934,7 @@ class TestExecution:
            data = await resp.json()
            assert data["stopped"] is False
            assert data["cancelled"] == []
+            assert data["cancelling"] == []
            assert data["timers_paused"] is True

    @pytest.mark.asyncio
@@ -1027,6 +1044,22 @@ class TestStop:
            assert resp.status == 200
            data = await resp.json()
            assert data["stopped"] is True
+            assert data["cancelling"] is False
+
+    @pytest.mark.asyncio
+    async def test_stop_returns_accepted_while_execution_is_still_cancelling(self):
+        session = _make_session()
+        session.colony_runtime._mock_streams["default"].cancel_execution = AsyncMock(return_value="cancelling")
+        app = _make_app_with_session(session)
+        async with TestClient(TestServer(app)) as client:
+            resp = await client.post(
+                "/api/sessions/test_agent/stop",
+                json={"execution_id": "exec_abc"},
+            )
+            assert resp.status == 202
+            data = await resp.json()
+            assert data["stopped"] is False
+            assert data["cancelling"] is True

    @pytest.mark.asyncio
    async def test_stop_not_found(self):
@@ -1,24 +0,0 @@
---
-name: hive.batch-ledger
-description: Track per-item status when processing collections to prevent skipped or duplicated items.
-metadata:
-  author: hive
-  type: default-skill
---
-
-## Operational Protocol: Batch Progress Ledger
-
-When processing a collection of items, maintain a batch ledger in `_batch_ledger`.
-
-Initialize when you identify the batch:
- `_batch_total`: total item count
- `_batch_ledger`: JSON with per-item status
-
-Per-item statuses: pending → in_progress → completed|failed|skipped
-
- Set `in_progress` BEFORE processing
- Set final status AFTER processing with 1-line result_summary
- Include error reason for failed/skipped items
- Update aggregate counts after each item
- NEVER remove items from the ledger
- If resuming, skip items already marked completed
@@ -12,33 +12,30 @@ metadata:

 All GCU browser tools drive a real Chrome instance through the Beeline extension and Chrome DevTools Protocol (CDP). That means clicks, keystrokes, and screenshots are processed by the actual browser's native hit testing, focus, and layout engines — **not** a synthetic event layer. Understanding this unlocks strategies that make hard sites easy.

-## Coordinates: always CSS pixels
+## Coordinates

-**Chrome DevTools Protocol `Input.dispatchMouseEvent` operates in CSS pixels, not physical pixels.**
-
-When you call `browser_coords(image_x, image_y)` after a screenshot, the returned dict has both `css_x/y` and `physical_x/y`. **Always use `css_x/y` for clicks, hovers, and key presses.**
+Every browser tool that takes or returns coordinates operates in **fractions of the viewport (0..1 for both axes)**. Read a target's proportional position off `browser_screenshot` — "this button is about 35% from the left and 20% from the top" → pass `(0.35, 0.20)`. Rect-returning tools (`browser_get_rect`, `browser_shadow_query`, and the `rect` inside `focused_element`) also return fractions. The tools convert to CSS pixels internally before dispatching to Chrome.

 ```
-browser_screenshot()          → image (downscaled to 800/900 px wide)
-browser_coords(img_x, img_y)  → {css_x, css_y, physical_x, physical_y}
-browser_click_coordinate(css_x, css_y)   ← USE css_x/y
-browser_hover_coordinate(css_x, css_y)   ← USE css_x/y
-browser_press_at(css_x, css_y, key)      ← USE css_x/y
+browser_screenshot()                  → image + cssWidth/cssHeight in meta
+browser_click_coordinate(x, y)        → x, y are fractions 0..1
+browser_hover_coordinate(x, y)        → fractions
+browser_press_at(x, y, key)           → fractions
+browser_get_rect(selector) → rect     → rect.cx / rect.cy are fractions
+browser_shadow_query(...)  → rect     → same
 ```

-Feeding `physical_x/y` on a HiDPI display overshoots by DPR× — on a DPR=1.6 laptop, clicks land 60% too far right and down. The ratio between `physicalScale` and `cssScale` tells you the effective DPR.
+**Why fractions:** every vision model (Claude ~1.15 MP target, GPT-4o 512-px tiles, Gemini, local VLMs) resizes or tiles images differently before the model sees the pixels. Proportions survive every such transform; pixel coordinates only "work" per-model and silently break when you swap backends. Four-decimal precision (`0.0001` ≈ 0.17 CSS px on a 1717-wide viewport) is more than enough for the tightest targets.

-`getBoundingClientRect()` already returns CSS pixels — feed those values straight through to click/hover tools without any DPR multiplication.
-
-**Exception for zoomed elements:** pages that use `zoom` or `transform: scale()` on a container (LinkedIn's `#interop-outlet`, some embedded iframes) render in a scaled local coordinate space. `getBoundingClientRect` there may not match CDP's hit space. Use `browser_shadow_query` which handles the math, or fall back to visually picking coordinates from a screenshot.
+**Exception for zoomed elements:** pages that use `zoom` or `transform: scale()` on a container (LinkedIn's `#interop-outlet`, some embedded iframes) render in a scaled local coordinate space. `getBoundingClientRect` there may not match CDP's hit space. Prefer `browser_shadow_query` (which handles the math and returns fractions) or visually pick coordinates from a screenshot. Avoid raw `browser_evaluate` + `getBoundingClientRect()` for coord lookup — that returns CSS px and will be wrong when fed to click tools.

 ## Screenshot + coordinates is shadow-agnostic — prefer it on shadow-heavy sites

-On sites that use Shadow DOM heavily (Reddit's faceplate Web Components, LinkedIn's `#interop-outlet` messaging overlay, some X custom elements), **coordinate-based operations reach elements that selector-based tools can't see.**
+Start with `browser_snapshot` when you need to inspect the page structure or find ordinary controls. If the snapshot does not show the thing you need, shows stale or misleading refs, or cannot prove where a visible target is, take `browser_screenshot` and use the screenshot + coordinate path. This is especially useful on sites that use Shadow DOM heavily

 Why:

- **CDP hit testing walks shadow roots natively.** `browser_click_coordinate(css_x, css_y)` routes through Chrome's native hit tester, which traverses open shadow roots automatically. You don't need to know the shadow structure.
+- **CDP hit testing walks shadow roots natively.** `browser_click_coordinate(x, y)` routes through Chrome's native hit tester, which traverses open shadow roots automatically. You don't need to know the shadow structure.
 - **Keyboard dispatch follows focus** into shadow roots. After a click focuses an input (even one three shadow levels deep), `browser_press(...)` with no selector dispatches keys to `document.activeElement`'s computed focus target.
 - **Screenshots render the real layout** regardless of DOM implementation.

@@ -46,18 +43,17 @@ Whereas `wait_for_selector`, `browser_click(selector=...)`, `browser_type(select

 ### Recommended workflow on shadow-heavy sites

-1. `browser_screenshot()` → visual image
-2. Identify the target visually → image pixel `(x, y)` (eyeball from the screenshot)
-3. `browser_coords(x, y)` → convert to CSS px
-4. `browser_click_coordinate(css_x, css_y)` → lands on the element via native hit testing; inputs get focused. **The response now includes `focused_element: {tag, id, role, contenteditable, rect, ...}`** — use it to verify you actually focused what you intended.
-5. `browser_type_focused(text="...")` → dispatches CDP `Input.insertText` to `document.activeElement`. Shadow roots, iframes, Lexical, Draft.js, ProseMirror all just work. Use `browser_type(selector, text)` instead when you have a reliable CSS selector for a light-DOM element.
-6. Verify via `browser_screenshot` OR `browser_get_attribute` on a known-reachable marker (e.g. check that the Send button's `aria-disabled` flipped to `false`).
+1. `browser_screenshot()` → JPEG; meta includes `cssWidth`/`cssHeight` for reference.
+2. Identify the target visually → estimate its proportional position `(fx, fy)` where each is in `0..1`.
+3. `browser_click_coordinate(fx, fy)` → tool converts to CSS px and dispatches; CDP native hit testing focuses the element. **The response includes `focused_element: {tag, id, role, contenteditable, rect, inFrame?, ...}`** — use it to verify you actually focused what you intended. `rect` is in fractions (same space as your input). When focus is inside a same-origin iframe, the descriptor reports the inner element and adds `inFrame: [...]` breadcrumbs.
+4. `browser_type_focused(text="...")` → inserts text into `document.activeElement` (traverses into same-origin iframes automatically). Shadow roots, iframes, Lexical, Draft.js, ProseMirror all just work. Use `browser_type(selector, text)` instead when you have a reliable CSS selector for a light-DOM element.
+5. Verify via `browser_screenshot` OR `browser_get_attribute` on a known-reachable marker (e.g. check that the Send button's `aria-disabled` flipped to `false`).

 ### The click→type loop (canonical pattern)

 1. Call `browser_click_coordinate(x, y)` to click the target element.
 2. Check the `focused_element` field in the response — it tells you what actually received focus (tag, id, role, contenteditable, rect).
-3. If the focused element is editable, call `browser_type_focused(text="...")` to insert text. use tools to verify the text took effect.
+3. If the focused element is editable, call `browser_type_focused(text="...")` to insert text. Use tools to verify the text took effect — prefer checking the underlying `.value` / `innerText` via `browser_evaluate` or confirming the submit button enabled. A screenshot alone can mislead: narrow input boxes visually clip long text, so only a portion may appear on screen even though the full string was accepted.
 4. If it is NOT editable, your click landed on the wrong thing — refine coordinates and retry. Do NOT reach for `browser_evaluate` + `execCommand('insertText')` or shadow-root traversals. The problem is the click target, not the typing method.

 `browser_click` (selector-based) also returns `focused_element`, so the same check works whether you clicked by selector or coordinate.
@@ -65,6 +61,7 @@ Whereas `wait_for_selector`, `browser_click(selector=...)`, `browser_type(select
 ### Empirically verified (2026-04-11)

 Tested against `https://www.reddit.com/r/programming/` whose search input lives at:
+
 ```
 document > reddit-search-large [shadow]
         > faceplate-search-input#search-input [shadow]
@@ -80,7 +77,7 @@ browser_shadow_query("reddit-search-large >>> #search-input")
 browser_get_rect("#interop-outlet >>> #ember37 >>> p")
 ```

-Returns the element's rect in **CSS pixels** (feed directly to click tools). Remember: `browser_type` and `wait_for_selector` do **not** support `>>>` — only shadow_query and get_rect do.
+Returns the element's rect as **fractions of the viewport** (feed `rect.cx` / `rect.cy` directly to click tools). Remember: `browser_type` and `wait_for_selector` do **not** support `>>>` — only shadow_query and get_rect do.

 ## Navigation and waiting

@@ -99,13 +96,13 @@ All return real URLs and titles. On a fast page `navigate(wait_until="load")` re

 ### Timing expectations (measured against real sites)

-| Site | Navigate load time |
-|---|---|
-| example.com | 100–400 ms |
-| wikipedia.org | 200–500 ms |
-| reddit.com | 1.5–2 s |
-| x.com/twitter | 1.2–1.6 s |
-| linkedin.com (logged in) | 4–5 s |
+| Site                     | Navigate load time |
+| ------------------------ | ------------------ |
+| example.com              | 100–400 ms         |
+| wikipedia.org            | 200–500 ms         |
+| reddit.com               | 1.5–2 s            |
+| x.com/twitter            | 1.2–1.6 s          |
+| linkedin.com (logged in) | 4–5 s              |

 For LinkedIn and other heavy SPAs, rely on `sleep()` after navigation to let the page hydrate.

@@ -116,8 +113,8 @@ Even after `wait_until="load"`, React/Vue SPAs often render their real chrome in
 ### Reading pages efficiently

 - **Prefer `browser_snapshot` over `browser_get_text("body")`** — returns a compact ~1–5 KB accessibility tree vs 100+ KB of raw HTML.
- Interaction tools (`browser_click`, `browser_type`, `browser_type_focused`, `browser_fill`, `browser_scroll`, etc.) return a page snapshot automatically in their result. Use it to decide your next action — do NOT call `browser_snapshot` separately after every action. Only call `browser_snapshot` when you need a fresh view without performing an action, or after setting `auto_snapshot=false`.
- Complex pages (LinkedIn, Twitter/X, SPAs with virtual scrolling) have DOMs that don't match what's visually rendered — snapshot refs may be stale, missing, or misaligned with visible layout. On these pages, `browser_screenshot` is the only reliable way to orient yourself.
+- Interaction tools `browser_click`, `browser_type`, `browser_type_focused`, `browser_fill`, and `browser_scroll` wait 0.5 s for the page to settle after a successful action, then attach a fresh accessibility snapshot under the `snapshot` key of their result. Use it to decide your next action — do NOT call `browser_snapshot` separately after every action. Tune the capture via `auto_snapshot_mode`: `"default"` (full tree, the default), `"simple"` (trims unnamed structural nodes), `"interactive"` (only controls — tightest token footprint), or `"off"` to skip the capture entirely (useful when batching several interactions and you don't need the intermediate trees). Call `browser_snapshot` explicitly only when you need a newer view or a different mode than what was auto-captured.
+- Complex pages (LinkedIn, Twitter/X, SPAs with virtual scrolling) can have DOMs that don't match what's visually rendered — snapshot refs may be stale, missing, or misaligned with visible layout. Try the available snapshot first; when the target is not present in that snapshot or visual position matters, switch to `browser_screenshot` to orient yourself.
 - Only fall back to `browser_get_text` for extracting specific small elements by CSS selector.

 ## Typing and keyboard input
@@ -128,7 +125,7 @@ Even after `wait_until="load"`, React/Vue SPAs often render their real chrome in

 Why this is necessary:

- **React / Vue controlled components** don't trust JS-sourced `.focus()`. React uses event delegation and watches for *native* pointer/focus events — a `click` dispatched via CDP fires the real `pointerdown`/`pointerup`/`click`/`focus` sequence that React listens to, and updates its internal state. A JS-only `.focus()` sets `document.activeElement` but the framework's controlled state doesn't see it.
+- **React / Vue controlled components** don't trust JS-sourced `.focus()`. React uses event delegation and watches for _native_ pointer/focus events — a `click` dispatched via CDP fires the real `pointerdown`/`pointerup`/`click`/`focus` sequence that React listens to, and updates its internal state. A JS-only `.focus()` sets `document.activeElement` but the framework's controlled state doesn't see it.
 - **Draft.js** (X/Twitter compose) and **Lexical** (Gmail, LinkedIn DMs) use contenteditable divs with immutable editor state. They only enter "edit mode" after a real click on the editor surface. Typing at them without clicking routes keys to `document.body` or gets silently discarded.
 - **Send/submit buttons are bound to framework state**, not DOM state. They're typically `disabled={!hasRealContent}` where `hasRealContent` is computed from React/Vue/Svelte state. The input field can have characters in the DOM but the button stays disabled because the framework never saw a real input event.

@@ -142,6 +139,8 @@ The symptom is always the same: **you type, the characters appear visually, and

 3. **Verify** the submit button is enabled before clicking it. Use `browser_evaluate` to check the button's `disabled` or `aria-disabled` attribute. Do NOT trust that typing worked — always check state.

+   **Partial visibility is fine.** Small single-line inputs, chat boxes with fixed width, and search fields commonly clip or truncate long text visually — only the tail or head may be shown on screen. Don't treat that as failure. What matters is that the framework accepted the input: the submit button enabled, or `element.value` / `innerText` read via `browser_evaluate` contains the full string. If the visible pixels don't match what you typed but the button is enabled and the underlying value is correct, typing succeeded — proceed.
+
 4. **Only click send if the button is enabled.** If the button is still disabled, try the recovery dance: click the textarea again, press `End`, press a space, press `Backspace` — this forces React to recompute `hasRealContent`. Then re-check the button state.

 ### Why `browser_type` uses `Input.insertText` by default
@@ -175,16 +174,16 @@ Always include an equivalent cleanup block in any script that types into a compo

 ### Verified site-specific quirks

-| Site | Editor | Workaround |
-|---|---|---|
-| **X / Twitter** compose | Draft.js | Click `[data-testid='tweetTextarea_0']` first, then type with `delay_ms=20`. First 1-2 chars may be eaten — accept truncation or prepend a throwaway char. Verify `[data-testid='tweetButton']` has `disabled: false` before clicking. |
-| **LinkedIn** messaging | contenteditable (inside `#interop-outlet` shadow root) | Use `browser_shadow_query` to find the rect, click-coordinate to focus, then `browser_type_focused(text=...)` (selector-based `browser_type` can't reach shadow). Send button is `.msg-form__send-button`. |
-| **LinkedIn** feed post composer | Quill/LinkedIn custom | Click the "Start a post" trigger first, wait 1s for modal, click the textarea, type. |
-| **Reddit** comment/post box | ProseMirror | Click the textarea, wait 0.5s for the toolbar to mount, then type. Submit is `button[slot="submit-button"]` inside a shreddit-composer. |
-| **Gmail** compose | Lexical | Click the body first. Gmail has a visible `div[contenteditable=true][aria-label*='Message Body']` after opening a compose window. |
-| **Slack** message box | contenteditable | Click first, then type. Send is a paper-plane button with `data-qa='texty_send_button'`. |
-| **Discord** | Slate | Click first. Discord's send is implicit on Enter (no button), so just press Enter after typing. |
-| **Monaco** editors (GitHub code review, CodeSandbox) | Monaco | Click first, type with `delay_ms=10`. Monaco listens for `textarea` input events on a hidden textarea — requires focus to be on that textarea. |
+| Site                                                 | Editor                                                 | Workaround                                                                                                                                                                                                                             |
+| ---------------------------------------------------- | ------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| **X / Twitter** compose                              | Draft.js                                               | Click `[data-testid='tweetTextarea_0']` first, then type with `delay_ms=20`. First 1-2 chars may be eaten — accept truncation or prepend a throwaway char. Verify `[data-testid='tweetButton']` has `disabled: false` before clicking. |
+| **LinkedIn** messaging                               | contenteditable (inside `#interop-outlet` shadow root) | Use `browser_shadow_query` to find the rect, click-coordinate to focus, then `browser_type_focused(text=...)` (selector-based `browser_type` can't reach shadow). Send button is `.msg-form__send-button`.                             |
+| **LinkedIn** feed post composer                      | Quill/LinkedIn custom                                  | Click the "Start a post" trigger first, wait 1s for modal, click the textarea, type.                                                                                                                                                   |
+| **Reddit** comment/post box                          | ProseMirror                                            | Click the textarea, wait 0.5s for the toolbar to mount, then type. Submit is `button[slot="submit-button"]` inside a shreddit-composer.                                                                                                |
+| **Gmail** compose                                    | Lexical                                                | Click the body first. Gmail has a visible `div[contenteditable=true][aria-label*='Message Body']` after opening a compose window.                                                                                                      |
+| **Slack** message box                                | contenteditable                                        | Click first, then type. Send is a paper-plane button with `data-qa='texty_send_button'`.                                                                                                                                               |
+| **Discord**                                          | Slate                                                  | Click first. Discord's send is implicit on Enter (no button), so just press Enter after typing.                                                                                                                                        |
+| **Monaco** editors (GitHub code review, CodeSandbox) | Monaco                                                 | Click first, type with `delay_ms=10`. Monaco listens for `textarea` input events on a hidden textarea — requires focus to be on that textarea.                                                                                         |

 ### Plain text into a real input

@@ -220,25 +219,15 @@ Recognized without modifiers: `Enter`, `Tab`, `Escape`, `Backspace`, `Delete`, `
 ## Screenshots

 ```
-browser_screenshot()                    # viewport, 900 px wide by default
-browser_screenshot(full_page=True)      # full scrollable page
+browser_screenshot()                    # viewport, 800 px wide JPEG
+browser_screenshot(full_page=True)      # full scrollable page (overview only — don't click off a full-page shot)
 browser_screenshot(selector="#header")  # clip to element's rect
 ```

-Returns a PNG with automatic downscaling to a target width (default 900 px) plus a JSON metadata block containing `cssWidth`, `devicePixelRatio`, `physicalScale`, `cssScale`, and a `scaleHint` string. The image is also annotated with a highlight rectangle/dot showing the last interaction (click, hover, type) if one happened on this tab.
+Returns a JPEG (quality 75, ~50–120 KB) at 800 px wide. The pixel width is purely a bandwidth choice; all tool coordinates are fractions of the viewport and are invariant to image size. Metadata includes `imageWidth` (800), `cssWidth`, `cssHeight` (for reference), and `physicalScale`. The image is annotated with a highlight rectangle/dot showing the last interaction (click, hover, type) if one happened on this tab.

 The highlight overlay stays visible on the page for **10 seconds** after each interaction, then fades. Before a screenshot is likely, make sure your click / hover / type happens <10 s before the screenshot.

-### Anatomy of the scale fields
-
- `cssWidth` = `window.innerWidth` (CSS px)
- `devicePixelRatio` = `window.devicePixelRatio` (often 1.6, 2, or 3 on modern displays)
- `physicalScale = png_width / image_width` (how many physical-px per image-px)
- `cssScale = cssWidth / image_width` (how many CSS-px per image-px)
- Effective DPR = `physicalScale / cssScale` (should match `devicePixelRatio`)
-
-When converting image coordinates for clicks, always use `cssScale`. The `physicalScale` field is there for debugging HiDPI displays, not for inputs.
-
 ## Scrolling

 - Use large scroll amounts (~2000) when loading more content — sites like Twitter and LinkedIn have lazy loading for paging.
@@ -261,6 +250,7 @@ When converting image coordinates for clicks, always use `cssScale`. The `physic
 - Popup appeared that you didn't need? Close it immediately

 `browser_tabs` returns an `origin` field for each tab:
+
 - `"agent"` — you opened it; you own it; close it when done
 - `"popup"` — opened by a link or script; close after extracting what you need
 - `"startup"` or `"user"` — leave these alone unless the task requires it
@@ -273,22 +263,22 @@ The bridge automatically evicts per-tab state (`_cdp_attached`, `_interaction_hi

 ### LinkedIn

-| Target | Selector |
-|---|---|
-| Global search input | `input[data-testid='typeahead-input']` |
-| Own profile link | `a[href*='linkedin.com/in/']` |
-| Messaging overlay | `#interop-outlet >>> [aria-label]` (use shadow_query) |
+| Target              | Selector                                              |
+| ------------------- | ----------------------------------------------------- |
+| Global search input | `input[data-testid='typeahead-input']`                |
+| Own profile link    | `a[href*='linkedin.com/in/']`                         |
+| Messaging overlay   | `#interop-outlet >>> [aria-label]` (use shadow_query) |

 LinkedIn enforces **strict Trusted Types CSP**. Any script you inject via `browser_evaluate` that uses `innerHTML = "<...>"` will be **silently dropped** — the wrapper element gets added but its content is empty, no console error. Always use `createElement` + `appendChild` + `setAttribute` for DOM injection on LinkedIn. `style.cssText`, `textContent`, and `.value` assignments are fine (they don't go through the Trusted Types sink).

 ### Reddit (new reddit / shreddit)

-| Target | Selector |
-|---|---|
+| Target                | Selector                                                                     |
+| --------------------- | ---------------------------------------------------------------------------- |
 | Search input (shadow) | `reddit-search-large >>> #search-input` (rect only; type via click-to-focus) |
-| Reddit logo (home) | `#reddit-logo` |
-| Subreddit posts | `shreddit-post` custom elements |
-| Create post button | `a[href*='/submit']` |
+| Reddit logo (home)    | `#reddit-logo`                                                               |
+| Subreddit posts       | `shreddit-post` custom elements                                              |
+| Create post button    | `a[href*='/submit']`                                                         |

 Reddit's search input lives **two shadow levels deep** inside `reddit-search-large > faceplate-search-input`. You cannot reach it with `browser_type(selector=)`. The working pattern:

@@ -299,15 +289,15 @@ Reddit's search input lives **two shadow levels deep** inside `reddit-search-lar

 ### X / Twitter

-| Target | Selector |
-|---|---|
-| Main search input | `input[data-testid='SearchBox_Search_Input']` |
-| Home nav link | `a[data-testid='AppTabBar_Home_Link']` |
-| Post text area (compose) | `[data-testid='tweetTextarea_0']` |
-| Reply buttons on feed | `[data-testid='reply']` |
-| Post / Tweet submit button | `[data-testid='tweetButton']` |
-| Caret (⋯) menu on a post | `[data-testid='caret']` |
-| Confirmation sheet button | `[data-testid='confirmationSheetConfirm']` |
+| Target                     | Selector                                      |
+| -------------------------- | --------------------------------------------- |
+| Main search input          | `input[data-testid='SearchBox_Search_Input']` |
+| Home nav link              | `a[data-testid='AppTabBar_Home_Link']`        |
+| Post text area (compose)   | `[data-testid='tweetTextarea_0']`             |
+| Reply buttons on feed      | `[data-testid='reply']`                       |
+| Post / Tweet submit button | `[data-testid='tweetButton']`                 |
+| Caret (⋯) menu on a post   | `[data-testid='caret']`                       |
+| Confirmation sheet button  | `[data-testid='confirmationSheetConfirm']`    |

 **X uses Draft.js for the compose text editor**, which does NOT accept synthetic input reliably. Working workaround: `browser_type(selector='[data-testid="tweetTextarea_0"]', text="...", delay_ms=20)`. The delay gives Draft.js time to process each keystroke. The first 1–2 characters may still get eaten — accept minor truncation or prepend a throwaway character. After typing, check `[data-testid="tweetButton"]` has `disabled: false` before clicking submit.

@@ -363,7 +353,8 @@ Then pass the most specific selector that uniquely identifies the right input (e
 - **Typing into a rich-text editor without clicking first → send button stays disabled.** Draft.js (X), Lexical (Gmail, LinkedIn DMs), ProseMirror (Reddit), and React-controlled `contenteditable` elements only register input as "real" when the element received a native focus event — JS-sourced `.focus()` is not enough. `browser_type` now does this automatically via a real CDP pointer click before inserting text, but always verify the submit button's `disabled` state before clicking send. See the "ALWAYS click before typing" section above.
 - **Using per-character `keyDown` on Lexical / Draft.js editors → keys dispatch but text never appears.** Those editors intercept `beforeinput` and route insertion through their own state machine; raw keyDown events are silently dropped. `browser_type` now uses `Input.insertText` by default (the CDP IME-commit method) which these editors accept cleanly. Only set `use_insert_text=False` when you explicitly need per-keystroke dispatch.
 - **Leaving a composer with text then trying to navigate → `beforeunload` dialog hangs the bridge.** LinkedIn and several other sites pop a native "unsent message" confirm. `browser_navigate` and `close_tab` both time out against this. Always strip `window.onbeforeunload = null` via `browser_evaluate` before any navigation after typing in a composer, or wrap your logic in a `try/finally` that runs the cleanup block.
- **Clicking at physical pixels.** CDP uses CSS px. `browser_coords` returns both for debugging, but always feed `css_x/y` to click tools.
+- **Click landed in the wrong region (sidebar / header instead of target).** Check `focused_element` in the click response — it's ground truth for what actually got focused, including the `inFrame` breadcrumb when focus ends up inside a same-origin iframe. If it isn't the target (e.g. `className: "msg-conversation-listitem__link"` when you meant to hit a composer), adjust the fraction and retry. Coordinates you pass are fractions of the viewport; the tool multiplies by `cssWidth` / `cssHeight` internally, so a wrong result means your estimated proportion was off — not that any scale went sideways.
+- **Accidentally passing pixels to click / hover / press_at.** The tools reject any coord outside `[-0.1, 1.5]` with a clear error. If you see that error, you passed a pixel (like 815) instead of a fraction (like 0.475). Use `browser_get_rect` to get exact fractional cx/cy, or read proportions off `browser_screenshot`.
 - **Calling `wait_for_selector` on a shadow element.** It'll always time out. Use `browser_shadow_query` or the screenshot + coordinate strategy.
 - **Relying on `innerHTML` in injected scripts on LinkedIn.** Silently discarded. Use `createElement` + `appendChild`.
 - **Not waiting for SPA hydration.** `wait_until="load"` fires before React/Vue rendering on many sites. Add a 2–3 s sleep before querying for chrome elements.
@@ -379,17 +370,35 @@ If Chrome detaches the debugger for its own reasons (tab closed, user opened Dev

 If reattach also fails, you'll get the underlying CDP error string — that's a real problem, usually the tab is gone.

-## When to reach for `browser_evaluate`
+## `browser_evaluate` is a last-resort escape hatch

-Use it when:
- You need to read state from inside a shadow root that `browser_get_rect` doesn't handle
- You need a one-shot JS snippet to trigger a site-specific action (scroll a specific container, open a menu, set a form field value directly)
- You need to walk an AX tree or measure layout that the standard tools don't expose
+**Before using `browser_evaluate`, try these first — in this order:**

-Avoid it when:
- A standard tool (`browser_click_coordinate`, `browser_type`, `browser_press`) already does what you need. Those go through CDP's native event pipeline, which real sites trust more than synthetic JS dispatch.
- You're on a strict-CSP site and want to inject DOM — stick to `createElement` + `appendChild`, never `innerHTML`.
- You need to trigger React / Vue / framework state changes — those frameworks watch for real browser events (`input`, `change`, `click`), not scripted `dispatchEvent` calls. Native-event tools are more reliable.
+1. **`browser_screenshot` + `browser_click_coordinate`** — works on every site regardless of shadow DOM, iframes, obfuscated classes. This is the default path for "click a thing you can see."
+2. **`browser_type(use_insert_text=True, text=...)`** — for typing into ANY input/contenteditable, including Lexical and Draft.js. Handles click-focus-insert with built-in retries. Do **not** call `document.execCommand('insertText')` via evaluate; this tool already does it correctly.
+3. **`browser_shadow_query`** or **`browser_get_rect(selector)`** with the `>>>` shadow-piercing syntax — for selector-based lookups across shadow roots.
+4. **`browser_get_text` / `browser_get_attribute`** — for reading element state by selector.
+5. **`browser_snapshot`** — for dumping the accessibility tree of the page.
+
+If all five of those fit your goal, **do not use `browser_evaluate`.** Each evaluate call is a small LLM round-trip of ~30-100 tokens of JS plus a JSON response; five of them burn more context than a single screenshot-and-coordinate does, with less reliability.
+
+### Anti-patterns — stop immediately if you catch yourself doing these
+
+- **Trying multiple `querySelectorAll` variants when the first returned `[]`.** Different selectors on the same page rarely work if the first guess failed — modern SPAs obfuscate class names at build time. After one empty result, switch to `browser_screenshot` + `browser_click_coordinate`. Do not write `.artdeco-list__item`, then `[data-test-incoming-invitation-card]`, then `[class*="invitation"]` — you are already on the wrong path.
+- **Writing `walk(root)` recursive shadow-DOM traversal functions.** Use `browser_shadow_query` — it traverses at the CDP level (native C++), not by re-running a recursive JS function every call.
+- **Calling `document.execCommand('insertText', ...)` to type into a contenteditable.** Use `browser_type(use_insert_text=True, text='...')`. The high-level tool handles the exact same Lexical/Draft.js case but with click-focus-retry logic built in.
+- **Accessing `iframe.contentDocument`.** Rarely works (cross-origin, late hydration) and when it does, the code is brittle. Use `browser_screenshot` to see the iframe, then `browser_click_coordinate` to interact.
+- **Using `innerHTML = "<...>"` on a Trusted Types site (LinkedIn, GitHub).** The assignment is silently dropped. Use `createElement` + `appendChild` if you must inject DOM — but first, ask whether you really need to.
+- **Triggering React/Vue state via synthetic `dispatchEvent`.** Frameworks watch for real browser events. Use `browser_click_coordinate`, `browser_press`, or `browser_type` — all go through CDP's native event pipeline.
+
+### Legitimate uses (when nothing semantic fits)
+
+- Reading a computed style, `window.innerWidth/Height`, `document.scrollingElement.scrollTop`, or other layout values the tools don't expose.
+- Firing a one-shot site-specific API call (analytics beacon, feature-flag toggle).
+- Stripping `onbeforeunload` before navigating away from a page with an unsent draft (LinkedIn, Gmail).
+- Detecting whether a specific shadow-root host exists before a follow-up screenshot.
+
+In all of these cases the script is SHORT (< 10 lines) and the result is CONSUMED (read, then acted on), not further probed.

 ## Login & auth walls

@@ -0,0 +1,112 @@
+---
+name: hive.colony-progress-tracker
+description: Claim tasks, record step progress, and verify SOP gates in the colony SQLite queue. Applies when your spawn message includes a db_path field.
+metadata:
+  author: hive
+  type: default-skill
+  visibility: [worker]
+---
+
+## Operational Protocol: Colony Progress Tracker
+
+**Applies when** your spawn message has `db_path:` and `colony_id:` fields. The DB is your durable working memory — tells you what's done, what to skip, which SOP gates you owe.
+
+Access via `execute_command_tool` running `sqlite3 "<db_path>" "..."`. Tables: `tasks` (queue), `steps` (per-task decomposition), `sop_checklist` (hard gates).
+
+### Claim: assigned task (check this FIRST)
+
+If your spawn message includes a `task_id:` field, the queen pre-assigned a specific row to you. Claim that row by id — **do not** use the generic next-pending pattern below:
+
+```bash
+sqlite3 "<db_path>" <<'SQL'
+UPDATE tasks SET status='claimed', worker_id='<worker-id>',
+  claim_token=lower(hex(randomblob(8))),
+  claimed_at=datetime('now'), updated_at=datetime('now')
+WHERE id='<task_id>' AND status='pending'
+RETURNING id, goal, payload;
+SQL
+```
+
+Empty output → another worker raced you or the row is already done. Stop and report.  Non-empty → that row is yours, proceed to "Load the plan".
+
+### Claim: next pending (fallback when no task_id is assigned)
+
+If your spawn message did NOT include `task_id:` — you are a generic fan-out worker racing on a shared queue. Use the generic next-pending claim:
+
+```bash
+sqlite3 "<db_path>" <<'SQL'
+UPDATE tasks SET status='claimed', worker_id='<worker-id>',
+  claim_token=lower(hex(randomblob(8))),
+  claimed_at=datetime('now'), updated_at=datetime('now')
+WHERE id=(SELECT id FROM tasks WHERE status='pending'
+  ORDER BY priority DESC, seq, created_at LIMIT 1)
+RETURNING id, goal, payload;
+SQL
+```
+
+Empty output → queue drained, exit. Otherwise the returned `id` is yours. **Never SELECT-then-UPDATE** — races.
+
+### Load the plan
+
+```bash
+sqlite3 "<db_path>" "SELECT seq, id, title, status FROM steps WHERE task_id='<task-id>' ORDER BY seq;"
+sqlite3 "<db_path>" "SELECT key, description, required, done_at FROM sop_checklist WHERE task_id='<task-id>';"
+```
+
+**Skip any step where status='done'.** That's the point — don't redo completed work.
+
+### Execute a step
+
+Before tool calls:
+```bash
+sqlite3 "<db_path>" "UPDATE steps SET status='in_progress', worker_id='<worker-id>', started_at=datetime('now') WHERE id='<step-id>';"
+```
+After success (one-line evidence: path, URL, key result):
+```bash
+sqlite3 "<db_path>" "UPDATE steps SET status='done', evidence='<what you did>', completed_at=datetime('now') WHERE id='<step-id>';"
+```
+
+### MANDATORY: SOP gate check before marking task done
+
+```bash
+sqlite3 "<db_path>" "SELECT key, description FROM sop_checklist WHERE task_id='<task-id>' AND required=1 AND done_at IS NULL;"
+```
+
+- Empty → proceed to "Mark task done".
+- Non-empty → each row is work you still owe. Do it, then check it off:
+
+```bash
+sqlite3 "<db_path>" "UPDATE sop_checklist SET done_at=datetime('now'), done_by='<worker-id>', note='<why>' WHERE task_id='<task-id>' AND key='<key>';"
+```
+
+**Never mark a task done while this SELECT returns rows.** This gate exists specifically to stop you from declaring success while skipping required steps.
+
+### Mark task done / failed
+
+```bash
+# Success:
+sqlite3 "<db_path>" "UPDATE tasks SET status='done', completed_at=datetime('now'), updated_at=datetime('now') WHERE id='<task-id>' AND worker_id='<worker-id>';"
+
+# Unrecoverable failure:
+sqlite3 "<db_path>" "UPDATE tasks SET status='failed', last_error='<one sentence>', completed_at=datetime('now'), updated_at=datetime('now') WHERE id='<task-id>' AND worker_id='<worker-id>';"
+```
+
+The `AND worker_id=?` guard means a reclaimed row won't accept your write — treat zero rows affected as "your claim was revoked, stop."
+
+### Loop
+
+After done/failed → claim the next task. Exit only when claim returns empty.
+
+### Errors + debug
+
+- **"database is locked"**: retry with 100ms → 1s backoff, max 5 attempts. `busy_timeout=5000` handles most contention silently.
+- **Queue health**: `SELECT status, count(*) FROM tasks GROUP BY status;`
+- **Your in-flight work**: `SELECT id, goal, status FROM tasks WHERE worker_id='<worker-id>';`
+
+### Anti-patterns (will break the queue)
+
+- Don't DDL (CREATE/ALTER/DROP).
+- Don't DELETE — failed tasks stay as `failed` for audit.
+- Don't skip Protocol 4 (SOP gate) before marking done.
+- Don't hold a task >15min without updates — the stale-claim reclaimer revokes your claim.
+- Don't invent task IDs. Workers update existing rows; only the queen enqueues new ones.
@@ -1,24 +1,24 @@
 ---
 name: hive.context-preservation
-description: Proactively preserve critical information before automatic context pruning destroys it.
+description: Proactively extract critical values from tool results into working notes before automatic context pruning destroys them.
 metadata:
  author: hive
  type: default-skill
+  visibility: [worker]
 ---

 ## Operational Protocol: Context Preservation

-You operate under a finite context window. Important information WILL be pruned.
+You operate under a finite context window. Older tool results WILL be pruned. Extract what you need while it's still in context.

-Save-As-You-Go: After any tool call producing information you'll need later,
-immediately extract key data into `_working_notes` or `_preserved_data`.
-Do NOT rely on referring back to old tool results.
+**Save-as-you-go.** After any tool call producing information you'll need later, immediately extract the key data into `_working_notes` or `_preserved_data`. Do not rely on referring back to old tool results — once they're pruned they're gone.

-What to extract: URLs and key snippets (not full pages), relevant API fields
-(not raw JSON), specific lines/values (not entire files), analysis results
-(not raw data).
+**What to extract:**
+- URLs and key snippets (not full pages)
+- Relevant API fields (not raw JSON blobs)
+- Specific lines, values, or IDs (not entire files)
+- Analysis conclusions (not raw data)

-Before transitioning to the next phase/node, write a handoff summary to
-`_handoff_context` with everything the next phase needs to know.
+**Handoffs between tasks** happen through `progress.db`, not through shared-buffer handoff blobs. When you finish a task, any state the next worker needs goes into the task row itself (`steps.evidence`, `tasks.last_error`, `sop_checklist.note`) — see `hive.colony-progress-tracker`. Use `_working_notes` for things the DB schema doesn't cover.

 You will receive an alert when context reaches {{warn_at_usage_ratio_pct}}% — preserve immediately.
@@ -1,18 +1,30 @@
 ---
 name: hive.error-recovery
-description: Follow a structured recovery protocol when tool calls fail instead of blindly retrying or giving up.
+description: Follow a structured recovery decision tree when tool calls fail instead of blindly retrying or giving up.
 metadata:
  author: hive
  type: default-skill
+  visibility: [worker]
 ---

 ## Operational Protocol: Error Recovery

 When a tool call fails:

-1. Diagnose — record error in notes, classify as transient or structural
-2. Decide — transient: retry once. Structural fixable: fix and retry.
-   Structural unfixable: record as failed, move to next item.
-   Blocking all progress: record escalation note.
-3. Adapt — if same tool failed {{max_retries_per_tool}}+ times, stop using it and find alternative.
-   Update plan in notes. Never silently drop the failed item.
+1. **Diagnose** — classify the failure as *transient* (network blip, rate limit, timeout) or *structural* (wrong selector, missing auth, invalid schema, permission denied).
+
+2. **Decide:**
+   - Transient → retry once.
+   - Structural + fixable → fix the input and retry.
+   - Structural + unfixable → record the failure and move to the next item.
+   - Blocking all progress → escalate.
+
+3. **Adapt** — if the same tool has failed {{max_retries_per_tool}}+ times in a row, stop using it and find an alternative approach.
+
+**Never silently drop a failed item.** If the item is a task in the colony queue, write the failure to the DB instead of an in-memory buffer:
+
+```bash
+sqlite3 "$DB_PATH" "UPDATE tasks SET status='failed', last_error='<one-sentence reason>', completed_at=datetime('now'), updated_at=datetime('now') WHERE id='<task-id>' AND worker_id='<your-worker-id>';"
+```
+
+The `tasks.retry_count` column and the stale-claim reclaimer handle auto-retry for crashes; your job is the within-run decision tree above. See `hive.colony-progress-tracker` for the full queue protocol.
@@ -15,6 +15,28 @@ LinkedIn is the hardest mainstream site to automate because it combines **shadow

 **Always activate `browser-automation` first.** This skill assumes you already know about CSS-px coordinates, `browser_type`/`browser_type_focused`, and `browser_shadow_query`. The guidance below is LinkedIn-specific; general browser rules are there.

+## Rule #0: screenshot + coordinates, not selectors
+
+LinkedIn changes class names aggressively and hides composers inside shadow roots AND iframes. **Selectors break constantly.** Your default strategy on every LinkedIn page should be:
+
+1. `browser_screenshot()` — see the page visually
+2. Pick the target's position from the image
+3. `browser_coords(image_x, image_y)` → get CSS pixels
+4. `browser_click_coordinate(css_x, css_y)` — reaches shadow DOM, iframes, and React elements indifferently
+5. `browser_type(use_insert_text=True, text=...)` — types into whatever is focused, including Lexical composers
+
+**If `browser_evaluate(...querySelectorAll...)` returns `[]` even once, do not try a different selector.** Stop, screenshot, and click. The "what if I try `.artdeco-list__item` next" instinct has burned ~50 tool calls in real sessions before the agent pivoted. Don't fall into that loop.
+
+The selectors in the table below are **only** for when you already know the target is in the light DOM and you want a faster path than screenshot+coord. **When in doubt, default to coordinates.**
+
+## Invitation manager — inline message button path is BROKEN
+
+If the user asks to message a connection request **from the invitation manager page without accepting first**, the inline "Message" button opens a composer inside a nested **iframe overlay** (not a shadow root). The iframe's `contentDocument` is either cross-origin-blocked or not hydrated at access time. This path is **not reliably automatable today.**
+
+**Redirect:** click the person's name/profile link on the card, go to the profile page, and use the standard Profile Message flow below. The profile flow is battle-tested; the inline-iframe flow isn't.
+
+If you end up writing `document.activeElement.tagName === 'IFRAME'` inside a `browser_evaluate`, you've hit this trap. Stop and go to the profile page.
+
 ## Timing expectations

 - `browser_navigate(wait_until="load")` — LinkedIn takes **4–5 seconds** to load the feed cold.
@@ -34,7 +56,7 @@ LinkedIn is the hardest mainstream site to automate because it combines **shadow
 | Pending connection card | `.invitation-card, .invitations-card, [data-test-incoming-invitation-card]` | Filter out "invited you to follow" / "subscribe" cards |
 | Accept button | `button[aria-label*="Accept"]` within the card scope | Per-card scoping is critical — there are many Accept buttons on the page |

-LinkedIn changes class names aggressively. If a class-based selector breaks, fall back to **`browser_screenshot` → visual identification → `browser_coords` → `browser_click_coordinate`**. The screenshot + coord path works regardless of class-name churn and regardless of shadow DOM.
+LinkedIn changes class names aggressively. If a class-based selector breaks, fall back to **`browser_screenshot` → visual identification → `browser_click_coordinate`** with the pixel you read straight off the image (screenshots are CSS-sized, no conversion). The screenshot + coord path works regardless of class-name churn and regardless of shadow DOM.

 ## Profile Message flow (verified end-to-end 2026-04-11)

@@ -359,24 +381,15 @@ is_logged_in = browser_evaluate("""

 ## Deduplication pattern

-For any daily loop (connection acceptance, profile visits, DMs), maintain a ledger file:
+Dedup is handled by the colony progress queue, not a separate JSON file. For any daily loop (connection acceptance, profile visits, DMs), the queen enqueues one row in the `tasks` table per `(profile_url, action)` pair; workers claim, act, and mark done. Already-`done` rows are skipped on the next claim — that's your crash-resume and cross-day dedup. See `hive.colony-progress-tracker` for the full claim/update protocol.

-```
-# data/linkedin_contacts.json
-{
-  "contacts": [
-    {
-      "profile_url": "https://www.linkedin.com/in/username/",
-      "name": "First Last",
-      "action": "connection_accepted+message_sent",
-      "timestamp": "2026-04-13T09:30:00Z",
-      "message_preview": "first 50 chars of message sent"
-    }
-  ]
-}
+If you need to check whether a given `(profile_url, action)` has already been handled in a prior run before enqueuing a new row, query the queue directly:
+
+```bash
+sqlite3 "<db_path>" "SELECT status FROM tasks WHERE payload LIKE '%\"profile_url\":\"<url>\"%' AND payload LIKE '%\"action\":\"<action>\"%';"
 ```

-Before any action, check if the profile URL already has a recent entry for the same action. Skip if yes. Atomic-write the ledger after each success so crash-resume works.
+Empty → not yet enqueued, safe to add. Otherwise honor the existing row's status.

 ## See also

@@ -1,27 +1,29 @@
 ---
 name: hive.note-taking
-description: Maintain structured working notes throughout execution to prevent information loss during context pruning.
+description: Maintain a free-form scratchpad of decisions, extracted values, and open questions so context pruning doesn't lose anything you still need.
 metadata:
  author: hive
  type: default-skill
+  visibility: [worker]
 ---

 ## Operational Protocol: Structured Note-Taking

-Maintain structured working notes in shared buffer key `_working_notes`.
+Maintain free-form working notes in shared buffer key `_working_notes` for data that *you* need to remember but that isn't captured by the colony task queue.
+
+**Do not duplicate the queue in here.** Per-task goal, ordered steps, and SOP gates live in `progress.db` — use `hive.colony-progress-tracker` for those. These notes are for things the DB schema doesn't cover.
+
 Update at these checkpoints:

- After completing each discrete subtask or batch item
- After receiving new information that changes your plan
- Before any tool call that will produce substantial output
+- After receiving new information that changes how you plan to approach the current step
+- Before any tool call that will produce substantial output you'll need to reference later
+- When you make a non-obvious decision whose *why* would be lost if the tool call history gets pruned

 Structure:

-### Objective — restate the goal
-### Current Plan — numbered steps, mark completed with ✓
 ### Key Decisions — decisions made and WHY
-### Working Data — intermediate results, extracted values
-### Open Questions — uncertainties to verify
-### Blockers — anything preventing progress
+### Working Data — intermediate results, extracted values (URLs, IDs, key snippets — not full pages)
+### Open Questions — uncertainties you plan to verify
+### Blockers — anything preventing progress that isn't already captured in `tasks.last_error`

 Update incrementally — do not rewrite from scratch each time.
@@ -4,6 +4,7 @@ description: Periodically self-assess output quality to catch degradation before
 metadata:
  author: hive
  type: default-skill
+  visibility: [worker]
 ---

 ## Operational Protocol: Quality Self-Assessment
@@ -1,17 +0,0 @@
---
-name: hive.task-decomposition
-description: Decompose complex tasks into explicit subtasks before diving in.
-metadata:
-  author: hive
-  type: default-skill
---
-
-## Operational Protocol: Task Decomposition
-
-Before starting a complex task:
-
-1. Decompose — break into numbered subtasks in `_working_notes` Current Plan
-2. Estimate — relative effort per subtask (small/medium/large)
-3. Execute — work through in order, mark ✓ when complete
-4. Budget — if running low on iterations, prioritize by impact
-5. Verify — before declaring done, every subtask must be ✓, skipped (with reason), or blocked
@@ -21,10 +21,32 @@ Each skill is a directory containing a `SKILL.md`. At startup, only the frontmat

 ### Choosing where to put a new skill

- **Project-scoped**: put under `<project>/.hive/skills/` when the skill is tied to that codebase's APIs, conventions, or infra.
- **User-scoped**: put under `~/.hive/skills/` when the skill is reusable across projects for this machine/user.
+- **Colony-scoped (via `create_colony`)**: when the skill is the operational protocol a single colony needs — its API auth, DOM selectors, DB schema, task-queue conventions — do NOT place it under `~/.hive/skills/` or `<project>/.hive/skills/` yourself. Those roots are SHARED and every colony on the machine will see it. Instead, pass the skill content INLINE to the `create_colony` tool (`skill_name`, `skill_description`, `skill_body`, optional `skill_files`). The tool materializes the folder under `~/.hive/colonies/<colony_name>/.hive/skills/<skill-name>/` where it is discovered as **project scope** by only that colony's workers. See the subsection below.
+- **Project-scoped**: put under `<project>/.hive/skills/` when the skill is tied to that codebase's APIs, conventions, or infra and multiple agents in the project should share it.
+- **User-scoped**: put under `~/.hive/skills/` when the skill is reusable across projects for this machine/user and all agents should see it.
 - **Framework default**: add under `core/framework/skills/_default_skills/` AND register in `framework/skills/defaults.py::SKILL_REGISTRY` only when the skill is a universal operational protocol shipped with Hive. Default skills use the `hive.<name>` naming convention and include `type: default-skill` in metadata.

+### Colony-scoped skills via `create_colony`
+
+A colony-scoped skill is one that belongs to exactly ONE colony — e.g. it encodes the HoneyComb staging API the `honeycomb_research` colony polls, or the LinkedIn outbound flow the `linkedin_outbound_campaign` colony runs. Writing such a skill at `~/.hive/skills/` or `<project>/.hive/skills/` leaks it to every other colony, which will then see it at selection time.
+
+**Do not reach for `write_file` to create the folder.** The `create_colony` tool takes the skill content INLINE and places it for you:
+
+```
+create_colony(
+    colony_name="honeycomb_research",
+    task="Build a daily honeycomb market report…",
+    skill_name="honeycomb-api-protocol",
+    skill_description="How to query the HoneyComb staging API…",
+    skill_body="## Operational Protocol\n\nAuth: …",
+    skill_files=[{"path": "scripts/fetch_tickers.py", "content": "…"}],  # optional
+)
+```
+
+The tool writes `~/.hive/colonies/honeycomb_research/.hive/skills/honeycomb-api-protocol/SKILL.md` (plus any `skill_files`), which `SkillDiscovery` picks up as project scope when that colony's workers start — and ONLY that colony's workers. No cross-colony leakage.
+
+Do not write colony-bound skill folders by hand under `~/.hive/skills/`. A skill placed there is user-scoped and becomes visible to every colony on the machine — defeating the isolation you wanted.
+
 ### Directory layout

 ```
@@ -124,8 +146,8 @@ For Python scripts in a Hive project, prefer `uv run scripts/foo.py ...`.
 ### Creating a new skill — workflow

 1. Pick a `<skill-name>` (lowercase-hyphenated).
-2. Decide scope: project (`<project>/.hive/skills/`), user (`~/.hive/skills/`), or framework default (`core/framework/skills/_default_skills/` + registry entry).
-3. Create the directory and write `SKILL.md` with frontmatter + body.
+2. Decide scope: **colony** (pass content INLINE to `create_colony` — STOP here, do not hand-author the folder), project (`<project>/.hive/skills/`), user (`~/.hive/skills/`), or framework default (`core/framework/skills/_default_skills/` + registry entry).
+3. For the non-colony scopes: create the directory and write `SKILL.md` with frontmatter + body.
 4. Add `scripts/`, `references/`, `assets/` only if needed.
 5. Validate the frontmatter: name matches dir, description is specific, no forbidden characters.
 6. Validate using the Hive CLI:
@@ -203,7 +203,7 @@ for c in candidates:
    else:
        browser_click("[data-testid='tweetButton']")
        sleep(2)
-        record_sent(c['preview'], reply_text)  # append to ledger
+        # Mark the task done in progress.db — see hive.colony-progress-tracker

    # Close the composer (press Escape or click the Close button)
    browser_press("Escape")
@@ -307,24 +307,9 @@ If any of these appear, **stop the run, screenshot the state, and surface the is

 ## Deduplication pattern

-Every daily loop should maintain a ledger file. Append after each successful reply/post, atomic-write to survive crashes.
+Dedup is handled by the colony progress queue, not a separate JSON file. The queen enqueues one row in the `tasks` table per reply target (keyed by tweet URL); workers claim, reply, and mark done. Already-`done` rows are skipped on the next claim — that's your crash-resume and cross-day dedup, for free. See `hive.colony-progress-tracker` for the full claim/update protocol.

-```
-# data/x_replies_ledger.json
-{
-  "replies": [
-    {
-      "tweet_url": "https://x.com/<author>/status/<id>",
-      "author": "username",
-      "original_preview": "first 100 chars of the tweet",
-      "reply_text": "what you sent",
-      "timestamp": "2026-04-13T09:30:00Z"
-    }
-  ]
-}
-```
-
-Extract the tweet URL via `browser_evaluate`:
+Extract the tweet URL via `browser_evaluate` so the queen can use it as the task key:

 ```
 url = browser_evaluate("""
@@ -337,7 +322,13 @@ url = browser_evaluate("""
 """, article_index)
 ```

-Before each reply, check if the URL already has a ledger entry. If yes, skip. This survives across runs and across days.
+If you need to check whether a given tweet URL has already been replied to in a prior run (e.g., scanning live search results before enqueuing), query the queue directly:
+
+```bash
+sqlite3 "<db_path>" "SELECT status FROM tasks WHERE payload LIKE '%\"tweet_url\":\"<url>\"%';"
+```
+
+Empty → not yet enqueued, safe to add. Otherwise honor the existing row's status.

 ## Reply style guidelines

@@ -20,6 +20,11 @@ logger = logging.getLogger(__name__)
 # visible. Preserving awareness of every skill beats truncating entries.
 _COMPACT_THRESHOLD_CHARS = 5000

+# Per-skill description cap. Descriptions often run 300–500 chars of
+# context that's only useful once — the first sentence is enough to
+# decide whether a skill applies. Truncated entries get a trailing "…".
+_DESCRIPTION_CAP_CHARS = 140
+
 _MANDATORY_HEADER_FULL = """## Skills (mandatory)
 Before replying: scan <available_skills> <description> entries.
 - If exactly one skill clearly applies: read its SKILL.md at <location> with `read_file`, then follow it.
@@ -88,18 +93,27 @@ class SkillCatalog:
        """All skill base directories for file access allowlisting."""
        return [skill.base_dir for skill in self._skills.values()]

-    def to_prompt(self) -> str:
+    def to_prompt(self, *, phase: str | None = None) -> str:
        """Generate the catalog prompt for system prompt injection.

        Returns empty string when no skills are present. Otherwise returns
        a mandatory pre-reply checklist + decision rules + rate-limit note,
        followed by the <available_skills> XML body.

-        When the full XML body exceeds ``_COMPACT_THRESHOLD_CHARS``, the
-        compact variant is emitted instead: <description> elements are
-        dropped so every skill stays visible before any gets truncated.
+        When ``phase`` is set, skills whose ``visibility`` list is present
+        and does not include that phase are filtered out. Skills with
+        ``visibility=None`` always appear.
+
+        Descriptions are capped to the first sentence or
+        ``_DESCRIPTION_CAP_CHARS`` (whichever is shorter) with a trailing
+        "…" on truncation. When the full XML body still exceeds
+        ``_COMPACT_THRESHOLD_CHARS`` the compact variant is emitted:
+        <description> elements are dropped so every skill stays visible
+        before any gets truncated.
        """
        all_skills = sorted(self._skills.values(), key=lambda s: s.name)
+        if phase is not None:
+            all_skills = [s for s in all_skills if s.visibility is None or phase in s.visibility]
        if not all_skills:
            return ""

@@ -111,7 +125,25 @@ class SkillCatalog:
        return f"{_MANDATORY_HEADER_COMPACT}\n\n{compact_xml}"

    @staticmethod
-    def _render_xml(skills: list[ParsedSkill], *, compact: bool) -> str:
+    def _cap_description(description: str) -> str:
+        """Return the first sentence or first ``_DESCRIPTION_CAP_CHARS`` chars."""
+        text = description.strip()
+        if not text:
+            return text
+        # First sentence boundary — look for '. ', '! ', '? '. Avoid matching
+        # decimals or abbreviations by requiring whitespace after the mark.
+        for i, ch in enumerate(text):
+            if ch in ".!?" and (i + 1 == len(text) or text[i + 1].isspace()):
+                sentence = text[: i + 1]
+                if len(sentence) <= _DESCRIPTION_CAP_CHARS:
+                    return sentence
+                break
+        if len(text) <= _DESCRIPTION_CAP_CHARS:
+            return text
+        return text[: _DESCRIPTION_CAP_CHARS - 1].rstrip() + "…"
+
+    @classmethod
+    def _render_xml(cls, skills: list[ParsedSkill], *, compact: bool) -> str:
        """Render the `<available_skills>` block.

        ``compact=True`` drops `<description>` to preserve skill awareness
@@ -122,7 +154,8 @@ class SkillCatalog:
            lines.append("  <skill>")
            lines.append(f"    <name>{escape(skill.name)}</name>")
            if not compact:
-                lines.append(f"    <description>{escape(skill.description)}</description>")
+                capped = cls._cap_description(skill.description)
+                lines.append(f"    <description>{escape(capped)}</description>")
            lines.append(f"    <location>{escape(skill.location)}</location>")
            lines.append("  </skill>")
        lines.append("</available_skills>")
@@ -36,8 +36,8 @@ class SkillsConfig:
        # Default skill configuration
        default_skills = {
            "hive.note-taking": {"enabled": True},
-            "hive.batch-ledger": {"enabled": True, "checkpoint_every_n": 10},
-            "hive.quality-monitor": {"enabled": False},
+            "hive.quality-monitor": {"enabled": False, "assessment_interval": 10},
+            "hive.error-recovery": {"max_retries_per_tool": 5},
        }
    """

@@ -24,34 +24,21 @@ _SKILL_DEFAULTS: dict[str, dict[str, Any]] = {
    "hive.quality-monitor": {"assessment_interval": 5},
    "hive.error-recovery": {"max_retries_per_tool": 3},
    "hive.context-preservation": {"warn_at_usage_ratio_pct": 45},
-    "hive.batch-ledger": {"checkpoint_every_n": 5},
 }

-# Keywords that indicate a batch processing scenario (DS-12)
-_BATCH_KEYWORDS: tuple[str, ...] = (
-    "list of",
-    "collection of",
-    "set of",
-    "batch of",
-    "each item",
-    "for each",
-    "process all",
-    "records",
-    "entries",
-    "rows",
-    "items",
-)
-
-_BATCH_INIT_NUDGE = (
-    "Note: your input appears to describe a batch operation. "
-    "Initialize `_batch_ledger` with the total item count before processing."
-)
-

 def is_batch_scenario(text: str) -> bool:
-    """Return True if *text* contains batch-processing indicators (DS-12)."""
-    lower = text.lower()
-    return any(kw in lower for kw in _BATCH_KEYWORDS)
+    """Deprecated: batch auto-detection is no longer used.
+
+    Kept as a no-op so the agent_loop call site (which wraps it in an
+    ``if ctx.default_skill_batch_nudge:`` guard that's also now always
+    empty) can stay unchanged until a broader cleanup.  The old
+    ``_batch_ledger`` shared-buffer feature was replaced by the
+    per-colony SQLite task queue (``hive.colony-progress-tracker``),
+    which lives in ``progress.db`` and is authoritative for batch
+    state across workers and runs.
+    """
+    return False


 def _apply_overrides(skill_name: str, body: str, overrides: dict[str, Any]) -> str:
@@ -67,40 +54,37 @@ def _apply_overrides(skill_name: str, body: str, overrides: dict[str, Any]) -> s
    return body


-# Ordered list of default skills (name → directory)
+# Ordered list of default skills (name → directory).
+#
+# Removed on 2026-04-15 as part of the colony-progress-tracker rollout:
+#   - hive.task-decomposition — steps table in progress.db supersedes
+#     in-memory ``_working_notes → Current Plan`` decomposition.
+#   - hive.batch-ledger       — tasks table in progress.db supersedes
+#     the ``_batch_ledger`` dict-shaped queue with its pending →
+#     in_progress → completed/failed/skipped state machine.
+# Both were duplicating state that belongs in SQLite.
 SKILL_REGISTRY: dict[str, str] = {
    "hive.note-taking": "note-taking",
-    "hive.batch-ledger": "batch-ledger",
    "hive.context-preservation": "context-preservation",
    "hive.quality-monitor": "quality-monitor",
    "hive.error-recovery": "error-recovery",
-    "hive.task-decomposition": "task-decomposition",
+    "hive.colony-progress-tracker": "colony-progress-tracker",
    "hive.writing-hive-skills": "writing-hive-skills",
 }

-# All shared buffer keys used by default skills (for permission auto-inclusion)
+# Shared buffer keys referenced by the remaining default skills (used
+# for permission auto-inclusion). The dead keys for batch-ledger,
+# task-decomposition, the handoff buffer, and the error-log buffers
+# were removed when those features migrated to progress.db.
 DATA_BUFFER_KEYS: list[str] = [
    # note-taking
    "_working_notes",
    "_notes_updated_at",
-    # batch-ledger
-    "_batch_ledger",
-    "_batch_total",
-    "_batch_completed",
-    "_batch_failed",
    # context-preservation
-    "_handoff_context",
    "_preserved_data",
    # quality-monitor
    "_quality_log",
    "_quality_degradation_count",
-    # error-recovery
-    "_error_log",
-    "_failed_tools",
-    "_escalation_needed",
-    # task-decomposition
-    "_subtasks",
-    "_iteration_budget_remaining",
 ]


@@ -252,16 +236,15 @@ class DefaultSkillManager:

    @property
    def batch_init_nudge(self) -> str | None:
-        """Nudge text to prepend to system prompt when batch input detected (DS-12).
+        """Deprecated: always returns None.

-        Returns None if ``hive.batch-ledger`` is disabled or auto_detect_batch is False.
+        The ``hive.batch-ledger`` default skill was removed when batch
+        tracking moved into ``progress.db`` (``hive.colony-progress-
+        tracker``). Callers in agent_host, colony_runtime, and
+        orchestrator still read this property; returning None keeps
+        them functional with no system-prompt nudge.
        """
-        if "hive.batch-ledger" not in self._skills:
-            return None
-        overrides = self._config.get_default_overrides("hive.batch-ledger")
-        if overrides.get("auto_detect_batch") is False:
-            return None
-        return _BATCH_INIT_NUDGE
+        return None

    @property
    def context_warn_ratio(self) -> float | None:
@@ -64,6 +64,7 @@ class SkillsManager:
    def __init__(self, config: SkillsManagerConfig | None = None) -> None:
        self._config = config or SkillsManagerConfig()
        self._loaded = False
+        self._catalog: object = None  # SkillCatalog, set after load()
        self._catalog_prompt: str = ""
        self._protocols_prompt: str = ""
        self._allowlisted_dirs: list[str] = []
@@ -91,6 +92,7 @@ class SkillsManager:
        mgr = cls.__new__(cls)
        mgr._config = SkillsManagerConfig()
        mgr._loaded = True  # skip load()
+        mgr._catalog = None
        mgr._catalog_prompt = skills_catalog_prompt
        mgr._protocols_prompt = protocols_prompt
        mgr._allowlisted_dirs = []
@@ -140,6 +142,7 @@ class SkillsManager:
            )

        catalog = SkillCatalog(discovered)
+        self._catalog = catalog
        self._allowlisted_dirs = catalog.allowlisted_dirs
        catalog_prompt = catalog.to_prompt()

@@ -271,6 +274,18 @@ class SkillsManager:
        """Community skills XML catalog for system prompt injection."""
        return self._catalog_prompt

+    def skills_catalog_prompt_for_phase(self, phase: str | None) -> str:
+        """Render the catalog filtered for the given queen phase.
+
+        Skills whose frontmatter ``visibility`` list is present and
+        excludes ``phase`` are dropped. Falls back to the cached
+        phase-agnostic prompt when no live catalog is available
+        (e.g. ``from_precomputed``).
+        """
+        if self._catalog is None or phase is None:
+            return self._catalog_prompt
+        return self._catalog.to_prompt(phase=phase)  # type: ignore[attr-defined]
+
    @property
    def protocols_prompt(self) -> str:
        """Default skill operational protocols for system prompt injection."""
@@ -37,6 +37,10 @@ class ParsedSkill:
    compatibility: list[str] | None = None
    metadata: dict[str, Any] | None = None
    allowed_tools: list[str] | None = None
+    # List of queen phases in which this skill appears in the catalog.
+    # None = visible in all phases. Example: ["planning", "building"]
+    # hides a framework-authoring skill from the INDEPENDENT/DM prompt.
+    visibility: list[str] | None = None


 def _try_fix_yaml(raw: str) -> str:
@@ -219,6 +223,19 @@ def parse_skill_md(path: Path, source_scope: str = "project") -> ParsedSkill | N
    raw_tools = frontmatter.get("allowed-tools")
    if isinstance(raw_tools, str):
        raw_tools = [raw_tools]
+    # `visibility` lives under `metadata.visibility` so it stays inside
+    # the open `metadata` map (the skill-file schema used by the IDE
+    # and other tooling only allows a fixed set of top-level keys).
+    raw_metadata = frontmatter.get("metadata")
+    raw_visibility: Any = None
+    if isinstance(raw_metadata, dict):
+        raw_visibility = raw_metadata.get("visibility")
+    if isinstance(raw_visibility, str):
+        raw_visibility = [raw_visibility]
+    if isinstance(raw_visibility, list):
+        raw_visibility = [str(v).strip() for v in raw_visibility if str(v).strip()] or None
+    else:
+        raw_visibility = None

    return ParsedSkill(
        name=name,
@@ -231,4 +248,5 @@ def parse_skill_md(path: Path, source_scope: str = "project") -> ParsedSkill | N
        compatibility=raw_compat,
        metadata=frontmatter.get("metadata"),
        allowed_tools=raw_tools,
+        visibility=raw_visibility,
    )
@@ -43,6 +43,10 @@ class FileConversationStore:
    def __init__(self, base_path: str | Path) -> None:
        self._base = Path(base_path)
        self._parts_dir = self._base / "parts"
+        # Partial checkpoints for in-flight assistant turns. Written on every
+        # stream event, deleted atomically when the final part lands. Kept
+        # in a sibling dir so the parts/ glob doesn't pick them up.
+        self._partials_dir = self._base / "partials"

    # --- sync helpers --------------------------------------------------------

@@ -99,6 +103,44 @@ class FileConversationStore:
    async def read_cursor(self) -> dict[str, Any] | None:
        return await self._run(self._read_json, self._base / "cursor.json")

+    async def write_partial(self, seq: int, data: dict[str, Any]) -> None:
+        """Checkpoint an in-flight assistant turn. Overwrites any prior partial
+        for the same seq. Caller is expected to clear_partial() once the real
+        part is written via write_part().
+        """
+        path = self._partials_dir / f"{seq:010d}.json"
+        await self._run(self._write_json, path, data)
+
+    async def read_partial(self, seq: int) -> dict[str, Any] | None:
+        path = self._partials_dir / f"{seq:010d}.json"
+        return await self._run(self._read_json, path)
+
+    async def read_all_partials(self) -> list[dict[str, Any]]:
+        """Return all partial checkpoints, sorted by seq. Used during restore
+        to surface any in-flight turn that the last process didn't finish.
+        """
+
+        def _read_all() -> list[dict[str, Any]]:
+            if not self._partials_dir.exists():
+                return []
+            files = sorted(self._partials_dir.glob("*.json"))
+            partials: list[dict[str, Any]] = []
+            for f in files:
+                data = self._read_json(f)
+                if data is not None:
+                    partials.append(data)
+            return partials
+
+        return await self._run(_read_all)
+
+    async def clear_partial(self, seq: int) -> None:
+        def _clear() -> None:
+            path = self._partials_dir / f"{seq:010d}.json"
+            if path.exists():
+                path.unlink()
+
+        await self._run(_clear)
+
    async def delete_parts_before(self, seq: int, run_id: str | None = None) -> None:
        def _delete() -> None:
            if not self._parts_dir.exists():
@@ -125,6 +167,10 @@ class FileConversationStore:
            if self._parts_dir.exists():
                for f in self._parts_dir.glob("*.json"):
                    f.unlink()
+            # Clear partial checkpoints
+            if self._partials_dir.exists():
+                for f in self._partials_dir.glob("*.json"):
+                    f.unlink()
            # Clear cursor
            cursor_path = self._base / "cursor.json"
            if cursor_path.exists():
@@ -12,12 +12,13 @@ export class ApiError extends Error {

 async function request<T>(path: string, options: RequestInit = {}): Promise<T> {
  const url = `${API_BASE}${path}`;
+  const isFormData = options.body instanceof FormData;
+  const headers: Record<string, string> = isFormData
+    ? {}  // Let browser set Content-Type with boundary for multipart
+    : { "Content-Type": "application/json", ...options.headers as Record<string, string> };
  const response = await fetch(url, {
    ...options,
-    headers: {
-      "Content-Type": "application/json",
-      ...options.headers,
-    },
+    headers,
  });

  if (!response.ok) {
@@ -52,4 +53,6 @@ export const api = {
      method: "PATCH",
      body: body ? JSON.stringify(body) : undefined,
    }),
+  upload: <T>(path: string, formData: FormData) =>
+    request<T>(path, { method: "POST", body: formData }),
 };
@@ -0,0 +1,80 @@
+import { api } from "./client";
+
+/** A SQLite cell value, constrained to JSON-serialisable types that
+ *  Python maps into sqlite3 param placeholders without surprises. */
+export type CellValue = string | number | boolean | null;
+
+export interface ColumnInfo {
+  name: string;
+  /** SQLite declared type (e.g. "TEXT", "INTEGER"). May be empty string. */
+  type: string;
+  notnull: boolean;
+  /** >0 means part of the primary key (ordinal position). 0 = not PK. */
+  pk: number;
+  dflt_value: string | null;
+}
+
+export interface TableOverview {
+  name: string;
+  columns: ColumnInfo[];
+  row_count: number;
+  primary_key: string[];
+}
+
+export interface TableRowsResponse {
+  table: string;
+  columns: ColumnInfo[];
+  primary_key: string[];
+  rows: Record<string, CellValue>[];
+  total: number;
+  limit: number;
+  offset: number;
+}
+
+export interface UpdateRowRequest {
+  /** Primary key column(s) → value(s). All PK columns must be present. */
+  pk: Record<string, CellValue>;
+  /** Column(s) → new value(s). Cannot include PK columns. */
+  updates: Record<string, CellValue>;
+}
+
+export const colonyDataApi = {
+  /** List user tables in the colony's progress.db with row counts.
+   *
+   *  Routed by colony directory name (not session) because progress.db
+   *  is per-colony — one DB serves every session for that colony, and
+   *  the data is reachable even when no session is live. */
+  listTables: (colonyName: string) =>
+    api.get<{ tables: TableOverview[] }>(
+      `/colonies/${encodeURIComponent(colonyName)}/data/tables`,
+    ),
+
+  /** Paginated rows for a table. Server enforces limit ≤ 500. */
+  listRows: (
+    colonyName: string,
+    table: string,
+    opts: {
+      limit?: number;
+      offset?: number;
+      orderBy?: string | null;
+      orderDir?: "asc" | "desc";
+    } = {},
+  ) => {
+    const params = new URLSearchParams();
+    if (opts.limit != null) params.set("limit", String(opts.limit));
+    if (opts.offset != null) params.set("offset", String(opts.offset));
+    if (opts.orderBy) params.set("order_by", opts.orderBy);
+    if (opts.orderDir) params.set("order_dir", opts.orderDir);
+    const qs = params.toString();
+    return api.get<TableRowsResponse>(
+      `/colonies/${encodeURIComponent(colonyName)}/data/tables/${encodeURIComponent(table)}/rows${qs ? `?${qs}` : ""}`,
+    );
+  },
+
+  /** Update a single row by primary key. Returns {updated: 0|1}. */
+  updateRow: (colonyName: string, table: string, body: UpdateRowRequest) =>
+    api.patch<{ updated: number }>(
+      `/colonies/${encodeURIComponent(colonyName)}/data/tables/${encodeURIComponent(table)}/rows`,
+      body,
+    ),
+};
@@ -0,0 +1,105 @@
+import { api } from "./client";
+
+export interface WorkerResult {
+  status: string;
+  summary: string;
+  error: string | null;
+  tokens_used: number;
+  duration_seconds: number;
+}
+
+export interface WorkerSummary {
+  worker_id: string;
+  task: string;
+  status: string;
+  started_at: number;
+  result: WorkerResult | null;
+}
+
+export interface ColonySkill {
+  name: string;
+  description: string;
+  location: string;
+  base_dir: string;
+  source_scope: string;
+}
+
+export interface ColonyTool {
+  name: string;
+  description: string;
+  /** Canonical credential/provider key (e.g. "hubspot", "gmail") for
+   *  tools bound to an Aden credential. ``null`` for framework/core
+   *  tools that don't require a provider credential. */
+  provider: string | null;
+}
+
+export interface ProgressTask {
+  id: string;
+  seq: number | null;
+  priority: number;
+  goal: string;
+  payload: string | null;
+  status: string;
+  worker_id: string | null;
+  claim_token: string | null;
+  claimed_at: string | null;
+  started_at: string | null;
+  completed_at: string | null;
+  created_at: string;
+  updated_at: string;
+  retry_count: number;
+  max_retries: number;
+  last_error: string | null;
+  parent_task_id: string | null;
+  source: string | null;
+}
+
+export interface ProgressStep {
+  id: string;
+  task_id: string;
+  seq: number;
+  title: string;
+  detail: string | null;
+  status: string;
+  evidence: string | null;
+  worker_id: string | null;
+  started_at: string | null;
+  completed_at: string | null;
+  /** Present only on upsert events; not on snapshot rows. */
+  _ts?: string | null;
+}
+
+export interface ProgressSnapshot {
+  tasks: ProgressTask[];
+  steps: ProgressStep[];
+}
+
+export const colonyWorkersApi = {
+  /** List spawned workers (live + completed) for a colony session. */
+  list: (sessionId: string) =>
+    api.get<{ workers: WorkerSummary[] }>(`/sessions/${sessionId}/workers`),
+
+  /** List the colony's shared skills catalog. */
+  listSkills: (sessionId: string) =>
+    api.get<{ skills: ColonySkill[] }>(`/sessions/${sessionId}/colony/skills`),
+
+  /** List the colony's default tools. */
+  listTools: (sessionId: string) =>
+    api.get<{ tools: ColonyTool[] }>(`/sessions/${sessionId}/colony/tools`),
+
+  /** Snapshot of progress.db tasks + steps, optionally filtered by
+   *  worker_id. Routed by colony directory name (not session) because
+   *  progress.db is per-colony. */
+  progressSnapshot: (colonyName: string, workerId?: string) => {
+    const qs = workerId ? `?worker_id=${encodeURIComponent(workerId)}` : "";
+    return api.get<ProgressSnapshot>(
+      `/colonies/${encodeURIComponent(colonyName)}/progress/snapshot${qs}`,
+    );
+  },
+
+  /** Build the URL for the live progress SSE stream. */
+  progressStreamUrl: (colonyName: string, workerId?: string): string => {
+    const qs = workerId ? `?worker_id=${encodeURIComponent(workerId)}` : "";
+    return `/api/colonies/${encodeURIComponent(colonyName)}/progress/stream${qs}`;
+  },
+};
@@ -64,4 +64,10 @@ export const configApi = {
      about,
      ...(theme ? { theme } : {}),
    }),
+
+  uploadAvatar: (file: File) => {
+    const fd = new FormData();
+    fd.append("avatar", file);
+    return api.upload<{ avatar_url: string }>("/config/profile/avatar", fd);
+  },
 };
@@ -0,0 +1,19 @@
+import { api } from "./client";
+
+export interface CustomPrompt {
+  id: string;
+  title: string;
+  category: string;
+  content: string;
+  custom: true;
+}
+
+export const promptsApi = {
+  list: () => api.get<{ prompts: CustomPrompt[] }>("/prompts"),
+
+  create: (title: string, category: string, content: string) =>
+    api.post<CustomPrompt>("/prompts", { title, category, content }),
+
+  delete: (promptId: string) =>
+    api.delete<{ deleted: string }>(`/prompts/${promptId}`),
+};
@@ -31,6 +31,13 @@ export const queensApi = {
  updateProfile: (queenId: string, updates: Partial<QueenProfile>) =>
    api.patch<QueenProfile>(`/queen/${queenId}/profile`, updates),

+  /** Upload queen avatar image. */
+  uploadAvatar: (queenId: string, file: File) => {
+    const fd = new FormData();
+    fd.append("avatar", file);
+    return api.upload<{ avatar_url: string }>(`/queen/${queenId}/avatar`, fd);
+  },
+
  /** Get or create a persistent session for a queen. */
  getOrCreateSession: (queenId: string, initialPrompt?: string, initialPhase?: string) =>
    api.post<QueenSessionResult>(`/queen/${queenId}/session`, {
@@ -87,9 +87,25 @@ export const sessionsApi = {
  colonies: (sessionId: string) =>
    api.get<{ colonies: string[] }>(`/sessions/${sessionId}/colonies`),

-  /** Get persisted eventbus log for a session (works for cold sessions — used for full UI replay). */
-  eventsHistory: (sessionId: string) =>
-    api.get<{ events: AgentEvent[]; session_id: string }>(`/sessions/${sessionId}/events/history`),
+  /** Get persisted eventbus log for a session (works for cold sessions — used for full UI replay).
+   *
+   * Returns the TAIL of the event log. Default limit 2000 (server
+   * clamps to [1, 10000]); older events get dropped and
+   * ``truncated: true`` is set so the UI can show an indicator.
+   */
+  eventsHistory: (sessionId: string, limit?: number) =>
+    api.get<{
+      events: AgentEvent[];
+      session_id: string;
+      total: number;
+      returned: number;
+      truncated: boolean;
+      limit: number;
+    }>(
+      `/sessions/${sessionId}/events/history${
+        limit ? `?limit=${limit}` : ""
+      }`,
+    ),

  /** Open the session's data folder in the OS file manager. */
  revealFolder: (sessionId: string) =>
@@ -12,8 +12,8 @@ export interface LiveSession {
  loaded_at: number;
  uptime_seconds: number;
  intro_message?: string;
-  /** Queen operating phase — "planning", "building", "staging", or "running" */
-  queen_phase?: "planning" | "building" | "staging" | "running" | "independent";
+  /** Queen operating phase — "independent" (DM), "working" (workers running), or "reviewing" (workers done) */
+  queen_phase?: "independent" | "working" | "reviewing";
  /** Whether the queen's LLM supports image content in messages */
  queen_supports_images?: boolean;
  /** Selected queen identity ID (e.g. "queen_technology") */
@@ -1,6 +1,31 @@
 import { api } from "./client";
 import type { GraphTopology, NodeDetail, NodeCriteria, ToolInfo } from "./types";

+export interface LiveWorker {
+  worker_id: string;
+  task: string;
+  status: string;
+  is_active: boolean;
+  duration_seconds: number;
+  explicit_report: Record<string, unknown> | null;
+  result_status: string | null;
+  result_summary: string | null;
+}
+
+export interface StopWorkerResult {
+  stopped: boolean;
+  worker_id?: string;
+  reason?: string;
+  status?: string;
+  error?: string;
+}
+
+export interface StopAllWorkersResult {
+  stopped: string[];
+  stopped_count: number;
+  errors?: { worker_id: string; error: string }[] | null;
+}
+
 export const workersApi = {
  nodes: (sessionId: string, colonyId: string, workerSessionId?: string) =>
    api.get<GraphTopology>(
@@ -26,4 +51,17 @@ export const workersApi = {
    api.get<{ tools: ToolInfo[] }>(
      `/sessions/${sessionId}/colonies/${colonyId}/nodes/${nodeId}/tools`,
    ),
+
+  // Live fan-out control
+  listLive: (sessionId: string) =>
+    api.get<{ workers: LiveWorker[] }>(`/sessions/${sessionId}/workers`),
+
+  stopLive: (sessionId: string, workerId: string) =>
+    api.post<StopWorkerResult>(
+      `/sessions/${sessionId}/workers/${workerId}/stop`,
+      {},
+    ),
+
+  stopAllLive: (sessionId: string) =>
+    api.post<StopAllWorkersResult>(`/sessions/${sessionId}/workers/stop-all`, {}),
 };
@@ -1,11 +1,31 @@
-import { useState } from "react";
+import { useState, useEffect } from "react";
 import { useLocation } from "react-router-dom";
 import { useColony } from "@/context/ColonyContext";
 import { useHeaderActions } from "@/context/HeaderActionsContext";
+import { useModel } from "@/context/ModelContext";
 import { getQueenForAgent } from "@/lib/colony-registry";
-import { Crown, KeyRound, Network } from "lucide-react";
+import { Crown, KeyRound, Network, ChevronDown } from "lucide-react";
 import SettingsModal from "@/components/SettingsModal";
-import ModelSwitcher from "@/components/ModelSwitcher";
+
+function UserAvatarButton({ initials, onClick, avatarVersion }: { initials: string; onClick: () => void; avatarVersion: number }) {
+  const [hasAvatar, setHasAvatar] = useState(true);
+  const url = `/api/config/profile/avatar?v=${avatarVersion}`;
+  // Reset hasAvatar when version changes (new upload)
+  useEffect(() => setHasAvatar(true), [avatarVersion]);
+  return (
+    <button
+      onClick={onClick}
+      className="w-7 h-7 rounded-full bg-primary/15 flex items-center justify-center hover:bg-primary/25 transition-colors overflow-hidden"
+      title="Profile settings"
+    >
+      {hasAvatar ? (
+        <img src={url} alt="" className="w-full h-full object-cover" onError={() => setHasAvatar(false)} />
+      ) : (
+        <span className="text-[10px] font-bold text-primary">{initials || "U"}</span>
+      )}
+    </button>
+  );
+}

 interface AppHeaderProps {
  onOpenQueenProfile?: (queenId: string) => void;
@@ -13,11 +33,23 @@ interface AppHeaderProps {

 export default function AppHeader({ onOpenQueenProfile }: AppHeaderProps) {
  const location = useLocation();
-  const { colonies, queens, queenProfiles, userProfile } = useColony();
+  const { colonies, queens, queenProfiles, userProfile, userAvatarVersion } = useColony();
  const { actions } = useHeaderActions();
+  const { currentModel, currentProvider, availableModels, activeSubscription, subscriptions } = useModel();
  const [settingsOpen, setSettingsOpen] = useState(false);
  const [settingsSection, setSettingsSection] = useState<"profile" | "byok">("profile");

+  // Derive active model display label
+  const activeSubInfo = activeSubscription
+    ? subscriptions.find((s) => s.id === activeSubscription)
+    : null;
+  const modelsProvider = activeSubInfo?.provider || currentProvider;
+  const models = availableModels[modelsProvider] || [];
+  const currentModelInfo = models.find((m) => m.id === currentModel);
+  const modelLabel = currentModelInfo
+    ? currentModelInfo.label.split(" - ")[0]
+    : currentModel || "No model";
+
  // Derive page title + icon from current route
  const colonyMatch = location.pathname.match(/^\/colony\/(.+)/);
  const queenMatch = location.pathname.match(/^\/queen\/(.+)/);
@@ -31,6 +63,15 @@ export default function AppHeader({ onOpenQueenProfile }: AppHeaderProps) {
    const colonyId = colonyMatch[1];
    const colony = colonies.find((c) => c.id === colonyId);
    title = colony?.name ?? colonyId;
+    // Show queen profile button when the colony has a linked queen profile
+    if (colony?.queenProfileId) {
+      const profile = queenProfiles.find((q) => q.id === colony.queenProfileId);
+      if (profile) {
+        queenIdForProfile = profile.id;
+        queenTitle = profile.title ?? null;
+        icon = <Crown className="w-4 h-4 text-primary" />;
+      }
+    }
  } else if (queenMatch) {
    const queenId = queenMatch[1];
    const profile = queenProfiles.find((q) => q.id === queenId);
@@ -86,24 +127,24 @@ export default function AppHeader({ onOpenQueenProfile }: AppHeaderProps) {
        )}
        <div className="flex items-center gap-2">
          {actions}
-          <ModelSwitcher
-            onOpenSettings={() => {
+          <button
+            onClick={() => {
              setSettingsSection("byok");
              setSettingsOpen(true);
            }}
-          />
-          <button
+            className="flex items-center gap-1.5 px-2.5 py-1 rounded-md text-xs font-medium text-muted-foreground hover:text-foreground hover:bg-muted/40 transition-colors border border-transparent hover:border-border/40"
+          >
+            <span className="max-w-[120px] truncate">{modelLabel}</span>
+            <ChevronDown className="w-3 h-3" />
+          </button>
+          <UserAvatarButton
+            initials={initials}
+            avatarVersion={userAvatarVersion}
            onClick={() => {
              setSettingsSection("profile");
              setSettingsOpen(true);
            }}
-            className="w-7 h-7 rounded-full bg-primary/15 flex items-center justify-center hover:bg-primary/25 transition-colors"
-            title="Profile settings"
-          >
-            <span className="text-[10px] font-bold text-primary">
-              {initials || "U"}
-            </span>
-          </button>
+          />
        </div>
      </div>

@@ -2,38 +2,34 @@ import { useState, useEffect } from "react";

 type BridgeStatus = "checking" | "connected" | "disconnected" | "offline";

-const BRIDGE_STATUS_URL = "/api/browser/status";
-const POLL_INTERVAL_MS = 3000;
+const BRIDGE_STATUS_STREAM_URL = "/api/browser/status/stream";

 export default function BrowserStatusBadge() {
  const [status, setStatus] = useState<BridgeStatus>("checking");

  useEffect(() => {
-    let cancelled = false;
+    const es = new EventSource(BRIDGE_STATUS_STREAM_URL);

-    const check = async () => {
+    es.addEventListener("status", (e) => {
      try {
-        const res = await fetch(BRIDGE_STATUS_URL, {
-          signal: AbortSignal.timeout(2000),
-        });
-        if (cancelled) return;
-        if (res.ok) {
-          const data = await res.json();
-          setStatus(data.connected ? "connected" : "disconnected");
-        } else {
-          setStatus("offline");
-        }
+        const data = JSON.parse((e as MessageEvent).data) as {
+          bridge: boolean;
+          connected: boolean;
+        };
+        if (!data.bridge) setStatus("offline");
+        else setStatus(data.connected ? "connected" : "disconnected");
      } catch {
-        if (!cancelled) setStatus("offline");
+        setStatus("offline");
      }
-    };
+    });

-    check();
-    const timer = setInterval(check, POLL_INTERVAL_MS);
-    return () => {
-      cancelled = true;
-      clearInterval(timer);
-    };
+    // EventSource auto-reconnects on transient errors; the next
+    // successful ``status`` event will overwrite this. We only flip
+    // to "offline" so the badge doesn't get stuck on "connected"
+    // after a backend restart.
+    es.onerror = () => setStatus("offline");
+
+    return () => es.close();
  }, []);

  if (status === "checking") return null;
@@ -9,7 +9,10 @@ import {
  Loader2,
  Paperclip,
  X,
+  Zap,
 } from "lucide-react";
+import WorkerRunBubble from "@/components/WorkerRunBubble";
+import type { WorkerRunGroup } from "@/components/WorkerRunBubble";

 export interface ImageContent {
  type: "image_url";
@@ -25,12 +28,15 @@ export interface ContextUsageEntry {
 import MarkdownContent from "@/components/MarkdownContent";
 import QuestionWidget from "@/components/QuestionWidget";
 import MultiQuestionWidget from "@/components/MultiQuestionWidget";
+import { useQueenProfile } from "@/context/QueenProfileContext";
+import { useColonyWorkers } from "@/context/ColonyWorkersContext";
 import ParallelSubagentBubble, {
  type SubagentGroup,
 } from "@/components/ParallelSubagentBubble";
 import {
  formatMessageTime,
  formatDayDividerLabel,
+  workerIdFromStreamId,
 } from "@/lib/chat-helpers";

 export interface ChatMessage {
@@ -53,13 +59,19 @@ export interface ChatMessage {
  /** Epoch ms when this message was first created — used for ordering queen/worker interleaving */
  createdAt?: number;
  /** Queen phase active when this message was created */
-  phase?: "planning" | "building" | "staging" | "running" | "independent";
+  phase?: "independent" | "working" | "reviewing";
  /** Images attached to a user message */
  images?: ImageContent[];
  /** Backend node_id that produced this message — used for subagent grouping */
  nodeId?: string;
  /** Backend execution_id for this message */
  executionId?: string;
+  /** Backend stream_id — the per-worker identity used for grouping
+   *  parallel-spawn workers into their own stacked WorkerRunBubble.
+   *  "queen" for queen messages, "worker" for the single loaded
+   *  worker (run_agent_with_input), or "worker:{uuid}" for each
+   *  parallel worker spawned via run_parallel_workers. */
+  streamId?: string;
  /** True when the message was sent while the queen was still processing */
  queued?: boolean;
 }
@@ -79,6 +91,12 @@ interface ChatPanelProps {
  supportsImages?: boolean;
  /** Called when user clicks the stop button to cancel the queen's current turn */
  onCancel?: () => void;
+  /** Called when the user steers a queued message into the current turn —
+   *  the message is sent to the backend immediately so it influences the
+   *  agent after the next tool call completes. */
+  onSteer?: (messageId: string) => void;
+  /** Called when the user cancels a still-queued (not-yet-sent) message. */
+  onCancelQueued?: (messageId: string) => void;
  /** Pending question from ask_user — replaces textarea when present */
  pendingQuestion?: string | null;
  /** Options for the pending question */
@@ -94,13 +112,21 @@ interface ChatPanelProps {
  /** Called when user dismisses the pending question without answering */
  onQuestionDismiss?: () => void;
  /** Queen operating phase — shown as a tag on queen messages */
-  queenPhase?: "planning" | "building" | "staging" | "running" | "independent";
+  queenPhase?: "independent" | "working" | "reviewing";
  /** When false, queen messages omit the phase badge */
  showQueenPhaseBadge?: boolean;
  /** Context window usage for queen and workers */
  contextUsage?: Record<string, ContextUsageEntry>;
  /** One-shot composer prefill. Applied to the textarea whenever the value changes. */
  initialDraft?: string | null;
+  /** Queen profile this panel is attached to. When provided, clicking a
+   *  queen avatar/name opens that queen's profile panel directly —
+   *  no fragile name-based lookup against ``queenProfiles``. Nullable
+   *  to tolerate pages that render the panel before the queen is
+   *  resolved (e.g. new-chat bootstrap). */
+  queenProfileId?: string | null;
+  /** Queen ID — used to display the queen's avatar photo in messages */
+  queenId?: string;
 }

 const queenColor = "hsl(45,95%,58%)";
@@ -124,14 +150,14 @@ const TOOL_HEX = [
  "#e5a820", // sunflower
 ];

-function toolHex(name: string): string {
+export function toolHex(name: string): string {
  let hash = 0;
  for (let i = 0; i < name.length; i++)
    hash = (hash * 31 + name.charCodeAt(i)) | 0;
  return TOOL_HEX[Math.abs(hash) % TOOL_HEX.length];
 }

-function ToolActivityRow({ content }: { content: string }) {
+export function ToolActivityRow({ content }: { content: string }) {
  let tools: { name: string; done: boolean }[] = [];
  try {
    const parsed = JSON.parse(content);
@@ -300,17 +326,21 @@ function InlineAskUserBubble({
  onSend,
  queenPhase,
  showQueenPhaseBadge = true,
+  queenProfileId,
+  queenAvatarUrl,
 }: {
  msg: ChatMessage;
  payload: AskUserInlinePayload;
  activeThread: string;
+  queenAvatarUrl?: string | null;
  onSend: (
    message: string,
    thread: string,
    images?: ImageContent[],
  ) => void;
-  queenPhase?: "planning" | "building" | "staging" | "running" | "independent";
+  queenPhase?: "independent" | "working" | "reviewing";
  showQueenPhaseBadge?: boolean;
+  queenProfileId?: string | null;
 }) {
  const [state, setState] = useState<"pending" | "submitted" | "dismissed">(
    "pending",
@@ -328,6 +358,8 @@ function InlineAskUserBubble({
        msg={msg}
        queenPhase={queenPhase}
        showQueenPhaseBadge={showQueenPhaseBadge}
+        queenProfileId={queenProfileId}
+        queenAvatarUrl={queenAvatarUrl}
      />
    );
  }
@@ -336,6 +368,27 @@ function InlineAskUserBubble({
  const color = getColor(msg.agent, msg.role);
  const thread = msg.thread || activeThread;

+  const { openQueenProfile } = useQueenProfile();
+  const { openColonyWorkers } = useColonyWorkers();
+  const resolvedQueenProfileId = isQueen ? queenProfileId ?? null : null;
+  const handleQueenClick = resolvedQueenProfileId
+    ? () => openQueenProfile(resolvedQueenProfileId)
+    : undefined;
+  const workerId =
+    !isQueen && msg.role === "worker"
+      ? workerIdFromStreamId(msg.streamId)
+      : null;
+  const handleWorkerClick =
+    msg.role === "worker"
+      ? () => openColonyWorkers(workerId ?? undefined)
+      : undefined;
+  const handleAvatarClick = handleQueenClick ?? handleWorkerClick;
+  const avatarTitle = handleQueenClick
+    ? `View ${msg.agent}'s profile`
+    : handleWorkerClick
+      ? "Open worker in colony sidebar"
+      : undefined;
+
  const handleSingle = (answer: string) => {
    setState("submitted");
    onSend(answer, thread);
@@ -355,15 +408,17 @@ function InlineAskUserBubble({
  return (
    <div className="flex gap-3">
      <div
-        className={`flex-shrink-0 ${isQueen ? "w-9 h-9" : "w-7 h-7"} rounded-xl flex items-center justify-center`}
-        style={{
+        className={`flex-shrink-0 ${isQueen ? "w-9 h-9" : "w-7 h-7"} rounded-xl flex items-center justify-center overflow-hidden${handleAvatarClick ? " cursor-pointer hover:opacity-80 transition-opacity" : ""}`}
+        style={isQueen && queenAvatarUrl ? undefined : {
          backgroundColor: `${color}18`,
          border: `1.5px solid ${color}35`,
          boxShadow: isQueen ? `0 0 12px ${color}20` : undefined,
        }}
+        onClick={handleAvatarClick}
+        title={avatarTitle}
      >
        {isQueen ? (
-          <Crown className="w-4 h-4" style={{ color }} />
+          <QueenAvatarIcon url={queenAvatarUrl ?? null} size={9} />
        ) : (
          <Cpu className="w-3.5 h-3.5" style={{ color }} />
        )}
@@ -373,8 +428,9 @@ function InlineAskUserBubble({
      >
        <div className="flex items-center gap-2 mb-1">
          <span
-            className={`font-medium ${isQueen ? "text-sm" : "text-xs"}`}
+            className={`font-medium ${isQueen ? "text-sm" : "text-xs"}${handleQueenClick ? " cursor-pointer hover:underline" : ""}`}
            style={{ color }}
+            onClick={handleQueenClick}
          >
            {msg.agent}
          </span>
@@ -387,15 +443,11 @@ function InlineAskUserBubble({
              }`}
            >
              {isQueen
-                ? (msg.phase ?? queenPhase) === "independent"
-                  ? "independent"
-                  : (msg.phase ?? queenPhase) === "running"
-                    ? "running"
-                    : (msg.phase ?? queenPhase) === "staging"
-                      ? "staging"
-                      : (msg.phase ?? queenPhase) === "planning"
-                        ? "planning"
-                        : "building"
+                ? (msg.phase ?? queenPhase) === "working"
+                  ? "working"
+                  : (msg.phase ?? queenPhase) === "reviewing"
+                    ? "reviewing"
+                    : "independent"
                : "Worker"}
            </span>
          )}
@@ -421,20 +473,54 @@ function InlineAskUserBubble({
  );
 }

+function QueenAvatarIcon({ url, size }: { url: string | null; size: number }) {
+  const [ok, setOk] = useState(!!url);
+  const dim = size === 9 ? "w-9 h-9" : "w-7 h-7";
+  if (ok && url) {
+    return <img src={url} alt="" className={`${dim} rounded-xl object-cover`} onError={() => setOk(false)} />;
+  }
+  return <Crown className={size === 9 ? "w-4 h-4" : "w-3.5 h-3.5"} style={{ color: queenColor }} />;
+}
+
 const MessageBubble = memo(
  function MessageBubble({
    msg,
    queenPhase,
    showQueenPhaseBadge = true,
+    queenProfileId,
+    queenAvatarUrl,
+    onSteer,
+    onCancelQueued,
  }: {
    msg: ChatMessage;
-    queenPhase?: "planning" | "building" | "staging" | "running" | "independent";
+    queenPhase?: "independent" | "working" | "reviewing";
    showQueenPhaseBadge?: boolean;
+    queenProfileId?: string | null;
+    queenAvatarUrl?: string | null;
+    onSteer?: (messageId: string) => void;
+    onCancelQueued?: (messageId: string) => void;
  }) {
    const isUser = msg.type === "user";
    const isQueen = msg.role === "queen";
    const color = getColor(msg.agent, msg.role);

+    // Clicking a queen avatar/name opens the queen profile panel. The
+    // owning page passes its queenProfileId down — we don't fall back
+    // to a name-match against ``queenProfiles`` because display names
+    // aren't unique or stable (colony chat uses static QUEEN_REGISTRY
+    // labels, queen-dm uses user-editable profile names; matching by
+    // name silently breaks when the profile is renamed or not listed).
+    const { openQueenProfile } = useQueenProfile();
+    const { openColonyWorkers } = useColonyWorkers();
+    const resolvedQueenProfileId = isQueen ? queenProfileId ?? null : null;
+    // Worker messages: clicking the avatar opens the Colony Workers
+    // sidebar, pre-selecting this worker when its uuid is embedded in
+    // the streamId (parallel fan-out case).
+    const workerId =
+      !isQueen && msg.role === "worker"
+        ? workerIdFromStreamId(msg.streamId)
+        : null;
+
    if (msg.type === "run_divider") {
      return (
        <div className="flex items-center gap-3 py-2 my-1">
@@ -499,9 +585,9 @@ const MessageBubble = memo(

    if (isUser) {
      return (
-        <div className="flex justify-end">
+        <div className="flex flex-col items-end gap-1">
          <div
-            className={`max-w-[75%] bg-primary text-primary-foreground text-sm leading-relaxed rounded-2xl rounded-br-md px-4 py-3${msg.queued ? " animate-pulse opacity-80" : ""}`}
+            className={`max-w-[75%] bg-primary text-primary-foreground text-sm leading-relaxed rounded-2xl rounded-br-md px-4 py-3${msg.queued ? " ring-1 ring-amber-500/50" : ""}`}
          >
            {msg.images && msg.images.length > 0 && (
              <div className="flex flex-wrap gap-2 mb-2">
@@ -520,27 +606,74 @@ const MessageBubble = memo(
            )}
            {(msg.queued || msg.createdAt) && (
              <div className="flex justify-end items-center gap-1.5 mt-1 text-[10px] opacity-60">
-                {msg.queued && <span>queued</span>}
+                {msg.queued && (
+                  <span className="inline-flex items-center gap-1">
+                    <span className="w-1 h-1 rounded-full bg-amber-400 animate-pulse" />
+                    queued
+                  </span>
+                )}
                {msg.createdAt && <span>{formatMessageTime(msg.createdAt)}</span>}
              </div>
            )}
          </div>
+          {msg.queued && (onSteer || onCancelQueued) && (
+            <div className="flex items-center gap-1.5">
+              {onSteer && (
+                <button
+                  type="button"
+                  onClick={() => onSteer(msg.id)}
+                  className="inline-flex items-center gap-1 text-[11px] font-medium px-2 py-0.5 rounded-full bg-amber-500/15 text-amber-600 hover:bg-amber-500/25 border border-amber-500/30 transition-colors"
+                  title="Send now — influence the current turn after the next tool call"
+                >
+                  <Zap className="w-3 h-3" />
+                  Steer
+                </button>
+              )}
+              {onCancelQueued && (
+                <button
+                  type="button"
+                  onClick={() => onCancelQueued(msg.id)}
+                  className="inline-flex items-center gap-1 text-[11px] font-medium px-2 py-0.5 rounded-full bg-muted/60 text-muted-foreground hover:bg-muted border border-border transition-colors"
+                  title="Remove this queued message"
+                >
+                  <X className="w-3 h-3" />
+                  Cancel
+                </button>
+              )}
+            </div>
+          )}
        </div>
      );
    }

+    const handleQueenClick = resolvedQueenProfileId
+      ? () => openQueenProfile(resolvedQueenProfileId)
+      : undefined;
+    const handleWorkerClick =
+      msg.role === "worker"
+        ? () => openColonyWorkers(workerId ?? undefined)
+        : undefined;
+    const handleAvatarClick = handleQueenClick ?? handleWorkerClick;
+    const avatarTitle = handleQueenClick
+      ? `View ${msg.agent}'s profile`
+      : handleWorkerClick
+        ? "Open worker in colony sidebar"
+        : undefined;
+
    return (
      <div className="flex gap-3">
        <div
-          className={`flex-shrink-0 ${isQueen ? "w-9 h-9" : "w-7 h-7"} rounded-xl flex items-center justify-center`}
-          style={{
+          className={`flex-shrink-0 ${isQueen ? "w-9 h-9" : "w-7 h-7"} rounded-xl flex items-center justify-center overflow-hidden${handleAvatarClick ? " cursor-pointer hover:opacity-80 transition-opacity" : ""}`}
+          style={isQueen && queenAvatarUrl ? undefined : {
            backgroundColor: `${color}18`,
            border: `1.5px solid ${color}35`,
            boxShadow: isQueen ? `0 0 12px ${color}20` : undefined,
          }}
+          onClick={handleAvatarClick}
+          title={avatarTitle}
        >
          {isQueen ? (
-            <Crown className="w-4 h-4" style={{ color }} />
+            <QueenAvatarIcon url={queenAvatarUrl ?? null} size={9} />
          ) : (
            <Cpu className="w-3.5 h-3.5" style={{ color }} />
          )}
@@ -550,8 +683,9 @@ const MessageBubble = memo(
        >
          <div className="flex items-center gap-2 mb-1">
            <span
-              className={`font-medium ${isQueen ? "text-sm" : "text-xs"}`}
+              className={`font-medium ${isQueen ? "text-sm" : "text-xs"}${handleQueenClick ? " cursor-pointer hover:underline" : ""}`}
              style={{ color }}
+              onClick={handleQueenClick}
            >
              {msg.agent}
            </span>
@@ -564,15 +698,11 @@ const MessageBubble = memo(
                }`}
              >
                {isQueen
-                  ? (msg.phase ?? queenPhase) === "independent"
-                    ? "independent"
-                    : (msg.phase ?? queenPhase) === "running"
-                      ? "running"
-                      : (msg.phase ?? queenPhase) === "staging"
-                        ? "staging"
-                        : (msg.phase ?? queenPhase) === "planning"
-                          ? "planning"
-                          : "building"
+                  ? (msg.phase ?? queenPhase) === "working"
+                    ? "working"
+                    : (msg.phase ?? queenPhase) === "reviewing"
+                      ? "reviewing"
+                      : "independent"
                  : "Worker"}
              </span>
            )}
@@ -597,8 +727,11 @@ const MessageBubble = memo(
    prev.msg.id === next.msg.id &&
    prev.msg.content === next.msg.content &&
    prev.msg.phase === next.msg.phase &&
+    prev.msg.queued === next.msg.queued &&
    prev.queenPhase === next.queenPhase &&
-    prev.showQueenPhaseBadge === next.showQueenPhaseBadge,
+    prev.showQueenPhaseBadge === next.showQueenPhaseBadge &&
+    prev.onSteer === next.onSteer &&
+    prev.onCancelQueued === next.onCancelQueued,
 );

 export default function ChatPanel({
@@ -610,6 +743,8 @@ export default function ChatPanel({
  activeThread,
  disabled,
  onCancel,
+  onSteer,
+  onCancelQueued,
  pendingQuestion,
  pendingOptions,
  pendingQuestions,
@@ -621,6 +756,8 @@ export default function ChatPanel({
  contextUsage,
  supportsImages = true,
  initialDraft,
+  queenProfileId,
+  queenId,
 }: ChatPanelProps) {
  const [input, setInput] = useState("");
  const [pendingImages, setPendingImages] = useState<ImageContent[]>([]);
@@ -631,6 +768,7 @@ export default function ChatPanel({
  const textareaRef = useRef<HTMLTextAreaElement>(null);
  const fileInputRef = useRef<HTMLInputElement>(null);
  const lastAppliedDraftRef = useRef<string | null | undefined>(undefined);
+  const queenAvatarUrl = queenId ? `/api/queen/${queenId}/avatar` : null;

  useEffect(() => {
    if (!initialDraft || initialDraft === lastAppliedDraftRef.current) return;
@@ -665,14 +803,157 @@ export default function ChatPanel({
  type RenderItem =
    | { kind: "message"; msg: ChatMessage }
    | { kind: "parallel"; groupId: string; groups: SubagentGroup[] }
+    | {
+        kind: "worker_run";
+        runId: string;
+        group: WorkerRunGroup;
+        /** Optional short label shown next to the "Worker" badge.
+         *  Only set when there are multiple parallel workers in the
+         *  same run span (so users can tell them apart). */
+        label?: string;
+      }
    | { kind: "day_divider"; key: string; createdAt: number };

+  /** Derive a short label from a parallel-worker stream id.
+   *  `worker:abcdef12-3456-...` → `abcdef12` (first 8 chars of the
+   *  uuid after the `worker:` prefix). Falls back to the first
+   *  message's nodeId when the streamId isn't the expected shape. */
+  function deriveWorkerLabel(
+    streamKey: string,
+    msgs: ChatMessage[],
+  ): string {
+    if (streamKey.startsWith("worker:")) {
+      const suffix = streamKey.slice("worker:".length);
+      // sessions are `session_YYYYMMDD_HHMMSS_<8-hex>` — show the
+      // trailing hex if present, else first 8 chars of the suffix.
+      const tail = suffix.match(/_[0-9a-f]{6,}$/i)?.[0]?.slice(1);
+      return tail ? tail.slice(0, 8) : suffix.slice(0, 8);
+    }
+    const nid = msgs.find((m) => m.nodeId)?.nodeId;
+    return nid || streamKey;
+  }
+
  const renderItems = useMemo<RenderItem[]>(() => {
    const items: RenderItem[] = [];
    let i = 0;
    while (i < threadMessages.length) {
      const msg = threadMessages[i];
      const isSubagent = msg.nodeId?.includes(":subagent:");
+
+      // Worker run grouping: collect consecutive WORKER-role
+      // messages (and worker tool_status pills) into a collapsible
+      // card. Queen tool_status pills (``role === "queen"``) are
+      // deliberately excluded — the queen's own tool calls are part
+      // of the queen↔user conversation and should render inline as
+      // ToolActivityRows, not fold into a "Worker" bubble. Without
+      // this guard, every queen run_command / read_file / etc. shows
+      // up under a misleading "Worker" label in the DM.
+      const isWorkerCandidate =
+        msg.role === "worker" ||
+        (msg.type === "tool_status" && msg.role !== "queen");
+      if (
+        !isSubagent &&
+        isWorkerCandidate &&
+        msg.type !== "user" &&
+        msg.type !== "run_divider"
+      ) {
+        const workerMsgs: ChatMessage[] = [];
+        const firstWorkerMsg = msg;
+
+        while (i < threadMessages.length) {
+          const m = threadMessages[i];
+
+          // Hard boundary — stop the worker run group
+          if (m.type === "user" || m.type === "run_divider") break;
+          // Queen message with real text — boundary (queen is talking
+          // to the user, not just emitting a tool)
+          if (m.role === "queen" && m.content?.trim() && !m.type) break;
+          // Queen tool_status — NOT a worker activity, don't bucket
+          // it. Break so the grouping stops and the queen pill
+          // renders inline.
+          if (m.type === "tool_status" && m.role === "queen") break;
+          // Subagent message — different group type, stop here
+          if (m.nodeId?.includes(":subagent:")) break;
+
+          // Worker text messages and worker tool_status belong to the run
+          if (
+            m.role === "worker" ||
+            (m.type === "tool_status" && m.role !== "queen")
+          ) {
+            workerMsgs.push(m);
+            i++;
+            continue;
+          }
+
+          // System message or other — include in the worker run
+          // group to preserve ordering (they'll render inside the
+          // expanded view)
+          workerMsgs.push(m);
+          i++;
+        }
+
+        if (workerMsgs.length > 0) {
+          // Parallel fan-out detection: if any message in this span
+          // is tagged with a parallel-worker streamId (``worker:{uuid}``),
+          // split the span by streamId and emit one ``worker_run``
+          // per worker — they render as stacked independent
+          // ``WorkerRunBubble``s. Un-tagged legacy messages and the
+          // single-worker ``streamId="worker"`` case fall through to
+          // the existing single-bubble behavior.
+          const hasParallel = workerMsgs.some(
+            (m) => !!m.streamId && /^worker:./.test(m.streamId),
+          );
+
+          if (hasParallel) {
+            const buckets = new Map<
+              string,
+              { messages: ChatMessage[]; firstAt: number }
+            >();
+            // Messages with no streamId (system notes, orphans from
+            // old restore) attach to the most-recent keyed message's
+            // bucket so chronology is preserved.
+            let currentKey: string | null = null;
+            for (const m of workerMsgs) {
+              const key =
+                m.streamId && m.streamId.length > 0
+                  ? m.streamId
+                  : currentKey;
+              if (!key) continue;
+              if (m.streamId && m.streamId.length > 0) currentKey = m.streamId;
+              let bucket = buckets.get(key);
+              if (!bucket) {
+                bucket = { messages: [], firstAt: m.createdAt ?? 0 };
+                buckets.set(key, bucket);
+              }
+              bucket.messages.push(m);
+              bucket.firstAt = Math.min(
+                bucket.firstAt,
+                m.createdAt ?? Number.POSITIVE_INFINITY,
+              );
+            }
+
+            const sorted = Array.from(buckets.entries()).sort(
+              ([, a], [, b]) => a.firstAt - b.firstAt,
+            );
+            for (const [streamKey, { messages: bucketMsgs }] of sorted) {
+              items.push({
+                kind: "worker_run",
+                runId: `wrun-${firstWorkerMsg.id}-${streamKey}`,
+                group: { messages: bucketMsgs },
+                label: deriveWorkerLabel(streamKey, bucketMsgs),
+              });
+            }
+          } else {
+            items.push({
+              kind: "worker_run",
+              runId: `wrun-${firstWorkerMsg.id}`,
+              group: { messages: workerMsgs },
+            });
+          }
+        }
+        continue;
+      }
+
      if (!isSubagent) {
        items.push({ kind: "message", msg });
        i++;
@@ -872,6 +1153,17 @@ export default function ChatPanel({
              </div>
            );
          }
+          if (item.kind === "worker_run") {
+            return (
+              <div key={item.runId}>
+                <WorkerRunBubble
+                  runId={item.runId}
+                  group={item.group}
+                  label={item.label}
+                />
+              </div>
+            );
+          }
          const msg = item.msg;
          // Detect misformatted ask_user payloads emitted as plain text and
          // substitute the nicer widget-based bubble.  Only inspect regular
@@ -892,6 +1184,8 @@ export default function ChatPanel({
                  onSend={onSend}
                  queenPhase={queenPhase}
                  showQueenPhaseBadge={showQueenPhaseBadge}
+                  queenProfileId={queenProfileId}
+                  queenAvatarUrl={queenAvatarUrl}
                />
              </div>
            );
@@ -902,6 +1196,10 @@ export default function ChatPanel({
                msg={msg}
                queenPhase={queenPhase}
                showQueenPhaseBadge={showQueenPhaseBadge}
+                queenProfileId={queenProfileId}
+                queenAvatarUrl={queenAvatarUrl}
+                onSteer={onSteer}
+                onCancelQueued={onCancelQueued}
              />
            </div>
          );
@@ -911,14 +1209,14 @@ export default function ChatPanel({
        {(isWaiting || (disabled && threadMessages.length === 0)) && (
          <div className="flex gap-3">
            <div
-              className="flex-shrink-0 w-9 h-9 rounded-xl flex items-center justify-center"
-              style={{
+              className="flex-shrink-0 w-9 h-9 rounded-xl flex items-center justify-center overflow-hidden"
+              style={queenAvatarUrl ? undefined : {
                backgroundColor: `${queenColor}18`,
                border: `1.5px solid ${queenColor}35`,
                boxShadow: `0 0 12px ${queenColor}20`,
              }}
            >
-              <Crown className="w-4 h-4" style={{ color: queenColor }} />
+              <QueenAvatarIcon url={queenAvatarUrl} size={9} />
            </div>
            <div className="border border-primary/20 bg-primary/5 rounded-2xl rounded-tl-md px-4 py-3">
              <div className="flex gap-1.5">
@@ -1135,30 +1433,47 @@ export default function ChatPanel({
                }
              }}
              placeholder={
-                disabled ? "Connecting to agent..." : "Message Queen Bee..."
+                disabled
+                  ? "Connecting to agent..."
+                  : isBusy
+                    ? "Queue a message — or click Steer to inject now..."
+                    : "Message Queen Bee..."
              }
              disabled={disabled}
              className="flex-1 bg-transparent text-sm text-foreground outline-none placeholder:text-muted-foreground disabled:opacity-50 disabled:cursor-not-allowed resize-none overflow-y-auto"
            />
-            {isBusy && onCancel ? (
+            {isBusy && onCancel && (
              <button
                type="button"
                onClick={onCancel}
+                title="Stop the queen's current turn"
                className="p-2 rounded-lg bg-amber-500/15 text-amber-400 border border-amber-500/40 hover:bg-amber-500/25 transition-colors"
              >
                <Square className="w-4 h-4" />
              </button>
-            ) : (
-              <button
-                type="submit"
-                disabled={
-                  (!input.trim() && pendingImages.length === 0) || disabled
-                }
-                className="p-2 rounded-lg bg-primary text-primary-foreground disabled:opacity-30 hover:opacity-90 transition-opacity"
-              >
-                <Send className="w-4 h-4" />
-              </button>
            )}
+            <button
+              type="submit"
+              disabled={
+                (!input.trim() && pendingImages.length === 0) || disabled
+              }
+              title={
+                isBusy
+                  ? "Queue message — sent after the current turn, or click Steer on the bubble to send now"
+                  : "Send"
+              }
+              className={`p-2 rounded-lg disabled:opacity-30 hover:opacity-90 transition-opacity ${
+                isBusy
+                  ? "bg-amber-500/20 text-amber-600 border border-amber-500/40"
+                  : "bg-primary text-primary-foreground"
+              }`}
+            >
+              {isBusy ? (
+                <Zap className="w-4 h-4" />
+              ) : (
+                <Send className="w-4 h-4" />
+              )}
+            </button>
          </div>
        </form>
      )}
@@ -1,7 +1,8 @@
 import { useState, useRef, useEffect } from "react";
-import { ChevronDown, Check, Settings, ThumbsUp } from "lucide-react";
+import { ChevronDown, Check, Settings, ThumbsUp, AlertCircle } from "lucide-react";
 import { useModel, LLM_PROVIDERS } from "@/context/ModelContext";
 import type { ModelOption } from "@/api/config";
+import { ApiError } from "@/api/client";

 interface ModelSwitcherProps {
  onOpenSettings?: () => void;
@@ -22,6 +23,7 @@ export default function ModelSwitcher({ onOpenSettings }: ModelSwitcherProps) {
  } = useModel();

  const [open, setOpen] = useState(false);
+  const [error, setError] = useState<string | null>(null);
  const ref = useRef<HTMLDivElement>(null);

  // Close on click outside
@@ -55,26 +57,30 @@ export default function ModelSwitcher({ onOpenSettings }: ModelSwitcherProps) {
  );

  const handleSelectApiKey = async (provider: string, modelId: string) => {
-    setOpen(false);
+    setError(null);
    try {
      await setModel(provider, modelId);
+      setOpen(false);
    } catch (err) {
-      console.error("Failed to switch model:", err);
+      const msg = err instanceof ApiError ? err.message : "Failed to switch model";
+      setError(msg);
    }
  };

  const handleSelectSubscription = async (subscriptionId: string) => {
-    setOpen(false);
+    setError(null);
    try {
      await activateSubscription(subscriptionId);
+      setOpen(false);
    } catch (err) {
-      console.error("Failed to activate subscription:", err);
+      const msg = err instanceof ApiError ? err.message : "Failed to activate subscription";
+      setError(msg);
    }
  };

-  // Get detected but inactive subscriptions
-  const availableSubscriptions = subscriptions.filter(
-    (sub) => detectedSubscriptions.has(sub.id) && activeSubscription !== sub.id
+  // All detected subscriptions (active ones shown with checkmark)
+  const detectedSubs = subscriptions.filter(
+    (sub) => detectedSubscriptions.has(sub.id)
  );

  const recommendedIcon = (
@@ -89,12 +95,12 @@ export default function ModelSwitcher({ onOpenSettings }: ModelSwitcherProps) {
    </span>
  );

-  const hasAnyProvider = apiKeyProviders.length > 0 || availableSubscriptions.length > 0 || activeSubInfo;
+  const hasAnyProvider = apiKeyProviders.length > 0 || detectedSubs.length > 0;

  return (
    <div className="relative" ref={ref}>
      <button
-        onClick={() => setOpen(!open)}
+        onClick={() => { setOpen(!open); setError(null); }}
        className="flex items-center gap-1.5 px-2.5 py-1 rounded-md text-xs font-medium text-muted-foreground hover:text-foreground hover:bg-muted/40 transition-colors border border-transparent hover:border-border/40"
      >
        <span className="max-w-[120px] truncate">{shortLabel}</span>
@@ -106,92 +112,98 @@ export default function ModelSwitcher({ onOpenSettings }: ModelSwitcherProps) {
      {open && (
        <div className="absolute right-0 top-full mt-1.5 w-[260px] bg-card border border-border/60 rounded-lg shadow-xl z-50 overflow-hidden">
          <div className="max-h-[320px] overflow-y-auto">
-            {/* Active subscription */}
-            {activeSubInfo && (
-              <div className="px-3 py-2 bg-purple-500/5 border-b border-border/40">
-                <p className="text-[10px] font-semibold text-purple-400/80 uppercase tracking-wider mb-1">
-                  Active Subscription
-                </p>
-                <div className="flex items-center gap-2">
-                  <Check className="w-3 h-3 text-purple-400" />
-                  <span className="text-xs font-medium text-foreground">
-                    {activeSubInfo.name}
-                  </span>
-                </div>
-              </div>
-            )}
-
-            {/* Available subscriptions */}
-            {availableSubscriptions.length > 0 && (
-              <div>
-                <p className="px-3 pt-2.5 pb-1 text-[10px] font-semibold text-purple-400/80 uppercase tracking-wider">
-                  Available Subscriptions
-                </p>
-                {availableSubscriptions.map((sub) => (
-                  <button
-                    key={sub.id}
-                    onClick={() => handleSelectSubscription(sub.id)}
-                    className="w-full text-left px-3 py-1.5 text-xs flex items-center gap-2 transition-colors text-foreground hover:bg-muted/30"
-                  >
-                    <span className="w-3" />
-                    <span className="truncate">{sub.name}</span>
-                  </button>
-                ))}
-              </div>
-            )}
-
-            {/* API key provider models */}
            {!hasAnyProvider ? (
              <p className="px-4 py-3 text-xs text-muted-foreground">
                No providers available. Add an API key or subscription.
              </p>
            ) : (
-              apiKeyProviders.length > 0 && (
-                <div>
-                  <p className="px-3 pt-2.5 pb-1 text-[10px] font-semibold text-muted-foreground/60 uppercase tracking-wider">
-                    API Key Providers
-                  </p>
-                  {apiKeyProviders.map((provider) => (
-                    <div key={provider.id}>
-                      <p className="px-3 pt-2.5 pb-1 text-[10px] font-semibold text-muted-foreground/60 uppercase tracking-wider">
-                        {provider.name}
-                      </p>
-                      {(availableModels[provider.id] || []).map(
-                        (model: ModelOption) => {
-                          const isActive =
-                            currentProvider === provider.id &&
-                            currentModel === model.id &&
-                            !activeSubscription;
-                          return (
-                            <button
-                              key={model.id}
-                              onClick={() => handleSelectApiKey(provider.id, model.id)}
-                              className={`w-full text-left px-3 py-1.5 text-xs flex items-center gap-2 transition-colors ${
-                                isActive
-                                  ? "bg-primary/10 text-primary"
-                                  : "text-foreground hover:bg-muted/30"
-                              }`}
-                            >
-                              {isActive ? (
-                                <Check className="w-3 h-3 flex-shrink-0" />
-                              ) : (
-                                <span className="w-3" />
-                              )}
-                              <span className="truncate">
-                                {model.label.split(" - ")[0]}
-                              </span>
-                              {model.recommended && recommendedIcon}
-                            </button>
-                          );
-                        },
-                      )}
-                    </div>
-                  ))}
-                </div>
-              )
+              <>
+                {/* Subscriptions */}
+                {detectedSubs.length > 0 && (
+                  <div>
+                    <p className="px-3 pt-2.5 pb-1 text-[10px] font-semibold text-muted-foreground/60 uppercase tracking-wider">
+                      Subscriptions
+                    </p>
+                    {detectedSubs.map((sub) => {
+                      const isActive = activeSubscription === sub.id;
+                      return (
+                        <button
+                          key={sub.id}
+                          onClick={() => handleSelectSubscription(sub.id)}
+                          className={`w-full text-left px-3 py-1.5 text-xs flex items-center gap-2 transition-colors ${
+                            isActive
+                              ? "bg-primary/10 text-primary"
+                              : "text-foreground hover:bg-muted/30"
+                          }`}
+                        >
+                          {isActive ? (
+                            <Check className="w-3 h-3 flex-shrink-0" />
+                          ) : (
+                            <span className="w-3" />
+                          )}
+                          <span className="truncate">{sub.name}</span>
+                        </button>
+                      );
+                    })}
+                  </div>
+                )}
+
+                {/* API Keys */}
+                {apiKeyProviders.length > 0 && (
+                  <div>
+                    <p className="px-3 pt-2.5 pb-1 text-[10px] font-semibold text-muted-foreground/60 uppercase tracking-wider">
+                      API Keys
+                    </p>
+                    {apiKeyProviders.map((provider) => (
+                      <div key={provider.id}>
+                        <p className="px-3 pt-2 pb-0.5 text-xs font-medium text-foreground">
+                          {provider.name}
+                        </p>
+                        {(availableModels[provider.id] || []).map(
+                          (model: ModelOption) => {
+                            const isActive =
+                              currentProvider === provider.id &&
+                              currentModel === model.id &&
+                              !activeSubscription;
+                            return (
+                              <button
+                                key={model.id}
+                                onClick={() => handleSelectApiKey(provider.id, model.id)}
+                                className={`w-full text-left pl-8 pr-3 py-1.5 text-xs flex items-center gap-2 transition-colors ${
+                                  isActive
+                                    ? "bg-primary/10 text-primary"
+                                    : "text-foreground hover:bg-muted/30"
+                                }`}
+                              >
+                                {isActive ? (
+                                  <Check className="w-3 h-3 flex-shrink-0" />
+                                ) : (
+                                  <span className="w-3" />
+                                )}
+                                <span className="truncate">
+                                  {model.label.split(" - ")[0]}
+                                </span>
+                                {model.recommended && recommendedIcon}
+                              </button>
+                            );
+                          },
+                        )}
+                      </div>
+                    ))}
+                  </div>
+                )}
+              </>
            )}
          </div>

+          {/* Validation error */}
+          {error && (
+            <div className="px-3 py-2 bg-destructive/10 border-t border-border/40 flex items-start gap-2">
+              <AlertCircle className="w-3 h-3 text-destructive flex-shrink-0 mt-0.5" />
+              <p className="text-[11px] text-destructive">{error}</p>
+            </div>
+          )}
+
          {/* Footer link */}
          {onOpenSettings && (
            <div className="border-t border-border/40">
@@ -3,6 +3,7 @@ import { ChevronDown, ChevronUp, Cpu } from "lucide-react";
 import type { ChatMessage, ContextUsageEntry } from "@/components/ChatPanel";
 import MarkdownContent from "@/components/MarkdownContent";
 import { cssVar } from "@/lib/graphUtils";
+import { useColonyWorkers } from "@/context/ColonyWorkersContext";

 // ---------------------------------------------------------------------------
 // Shared helpers
@@ -317,6 +318,7 @@ const ParallelSubagentBubble = memo(
    const [expanded, setExpanded] = useState(false);
    const [zoomedIdx, setZoomedIdx] = useState<number | null>(null);
    const mux = useMuxColors();
+    const { openColonyWorkers } = useColonyWorkers();

    // Labels with instance numbers for duplicates
    const labels: string[] = (() => {
@@ -371,16 +373,21 @@ const ParallelSubagentBubble = memo(

    return (
      <div className="flex gap-3">
-        {/* Left icon */}
-        <div
-          className="flex-shrink-0 w-7 h-7 rounded-xl flex items-center justify-center mt-1"
+        {/* Left icon — subagents aren't top-level colony workers, so the
+            click opens the sidebar without pre-selection. */}
+        <button
+          type="button"
+          onClick={() => openColonyWorkers()}
+          aria-label="Open colony workers sidebar"
+          title="Open colony workers sidebar"
+          className="flex-shrink-0 w-7 h-7 rounded-xl flex items-center justify-center mt-1 transition-opacity hover:opacity-80 cursor-pointer"
          style={{
            backgroundColor: `${workerColor}18`,
            border: `1.5px solid ${workerColor}35`,
          }}
        >
          <Cpu className="w-3.5 h-3.5" style={{ color: workerColor }} />
-        </div>
+        </button>

        <div className="flex-1 min-w-0 max-w-[90%]">
          {/* Header */}
@@ -1,15 +1,9 @@
-import { useState, useEffect } from "react";
+import { useState, useEffect, useCallback, useRef } from "react";
 import { NavLink, useLocation, useNavigate } from "react-router-dom";
-import {
-  X,
-  MessageSquare,
-  Crown,
-  ChevronRight,
-  Briefcase,
-  Award,
-} from "lucide-react";
+import { X, MessageSquare, Crown, ChevronRight, Briefcase, Award, Pencil, Check, Loader2, Camera } from "lucide-react";
 import { useColony } from "@/context/ColonyContext";
 import { queensApi, type QueenProfile } from "@/api/queens";
+import { compressImage } from "@/lib/image-utils";
 import type { Colony } from "@/types/colony";

 interface QueenProfilePanelProps {
@@ -18,46 +12,192 @@ interface QueenProfilePanelProps {
  onClose: () => void;
 }

-export default function QueenProfilePanel({
-  queenId,
-  colonies,
-  onClose,
-}: QueenProfilePanelProps) {
+function SectionHeader({ children, onEdit }: { children: React.ReactNode; onEdit?: () => void }) {
+  return (
+    <div className="flex items-center justify-between mb-2">
+      <h4 className="text-[11px] font-semibold text-muted-foreground uppercase tracking-wider">{children}</h4>
+      {onEdit && (
+        <button onClick={onEdit} className="p-0.5 rounded text-muted-foreground/40 hover:text-foreground" title="Edit">
+          <Pencil className="w-3 h-3" />
+        </button>
+      )}
+    </div>
+  );
+}
+
+export default function QueenProfilePanel({ queenId, colonies, onClose }: QueenProfilePanelProps) {
  const navigate = useNavigate();
  const location = useLocation();
-  const { queenProfiles } = useColony();
+  const { queenProfiles, refresh } = useColony();
  const summary = queenProfiles.find((q) => q.id === queenId);
  const [profile, setProfile] = useState<QueenProfile | null>(null);
  const [loading, setLoading] = useState(true);
+  const [editing, setEditing] = useState(false);
+  const [saving, setSaving] = useState(false);
+
+  // Avatar state
+  const [avatarUrl, setAvatarUrl] = useState<string | null>(null);
+  const [uploadingAvatar, setUploadingAvatar] = useState(false);
+  const fileInputRef = useRef<HTMLInputElement>(null);
+
+  // Edit form state
+  const [editName, setEditName] = useState("");
+  const [editTitle, setEditTitle] = useState("");
+  const [editSummary, setEditSummary] = useState("");
+  const [editSkills, setEditSkills] = useState("");
+  const [editAchievement, setEditAchievement] = useState("");

-  // Hide the "Message {name}" button when we're already in this queen's PM.
  const alreadyInQueenPm = location.pathname === `/queen/${queenId}`;

  useEffect(() => {
    setLoading(true);
    setProfile(null);
-    queensApi
-      .getProfile(queenId)
-      .then(setProfile)
-      .catch(() => {})
-      .finally(() => setLoading(false));
+    setEditing(false);
+    // Set avatar URL with cache buster
+    setAvatarUrl(`/api/queen/${queenId}/avatar?t=${Date.now()}`);
+    queensApi.getProfile(queenId).then(setProfile).catch(() => {}).finally(() => setLoading(false));
  }, [queenId]);

+  const startEditing = () => {
+    if (!profile) return;
+    setEditName(profile.name);
+    setEditTitle(profile.title);
+    setEditSummary(profile.summary || "");
+    setEditSkills(profile.skills || "");
+    setEditAchievement(profile.signature_achievement || "");
+    setEditing(true);
+  };
+
+  const cancelEditing = () => setEditing(false);
+
+  const handleSave = async () => {
+    setSaving(true);
+    try {
+      const updated = await queensApi.updateProfile(queenId, {
+        name: editName.trim(),
+        title: editTitle.trim(),
+        summary: editSummary.trim(),
+        skills: editSkills.trim(),
+        signature_achievement: editAchievement.trim(),
+      });
+      setProfile(updated);
+      setEditing(false);
+      refresh();
+    } catch (err) {
+      console.error("Failed to save profile:", err);
+    } finally {
+      setSaving(false);
+    }
+  };
+
+  const handleAvatarClick = () => fileInputRef.current?.click();
+
+  const handleAvatarUpload = async (e: React.ChangeEvent<HTMLInputElement>) => {
+    const file = e.target.files?.[0];
+    if (!file) return;
+    // Reset input so same file can be re-selected
+    e.target.value = "";
+
+    if (!file.type.startsWith("image/")) return;
+
+    setUploadingAvatar(true);
+    try {
+      const compressed = await compressImage(file);
+      await queensApi.uploadAvatar(queenId, compressed);
+      setAvatarUrl(`/api/queen/${queenId}/avatar?t=${Date.now()}`);
+    } catch (err) {
+      console.error("Failed to upload avatar:", err);
+    } finally {
+      setUploadingAvatar(false);
+    }
+  };
+
  const name = profile?.name ?? summary?.name ?? "Queen";
  const title = profile?.title ?? summary?.title ?? "";

+  // ── Resizable width ──────────────────────────────────────────────────
+  const MIN_WIDTH = 280;
+  const MAX_WIDTH = 600;
+  const [width, setWidth] = useState(340);
+  const dragging = useRef(false);
+  const startX = useRef(0);
+  const startWidth = useRef(0);
+
+  const onDragStart = useCallback((e: React.MouseEvent) => {
+    e.preventDefault();
+    dragging.current = true;
+    startX.current = e.clientX;
+    startWidth.current = width;
+
+    const onMove = (ev: MouseEvent) => {
+      if (!dragging.current) return;
+      // Panel is on the right, so dragging left (negative delta) grows it
+      const delta = startX.current - ev.clientX;
+      setWidth(Math.min(MAX_WIDTH, Math.max(MIN_WIDTH, startWidth.current + delta)));
+    };
+    const onUp = () => {
+      dragging.current = false;
+      document.removeEventListener("mousemove", onMove);
+      document.removeEventListener("mouseup", onUp);
+      document.body.style.cursor = "";
+      document.body.style.userSelect = "";
+    };
+    document.addEventListener("mousemove", onMove);
+    document.addEventListener("mouseup", onUp);
+    document.body.style.cursor = "col-resize";
+    document.body.style.userSelect = "none";
+  }, [width]);
+
+  const inputCls = "w-full bg-muted/30 border border-border/50 rounded-lg px-3 py-2 text-sm text-foreground focus:outline-none focus:ring-1 focus:ring-primary/40";
+  const textareaCls = `${inputCls} resize-none`;
+
+  const avatarElement = (
+    <div className="relative group">
+      <div className="w-16 h-16 rounded-full bg-primary/15 flex items-center justify-center overflow-hidden">
+        {avatarUrl ? (
+          <img
+            src={avatarUrl}
+            alt={name}
+            className="w-full h-full object-cover"
+            onError={() => setAvatarUrl(null)}
+          />
+        ) : (
+          <span className="text-xl font-bold text-primary">{name.charAt(0)}</span>
+        )}
+      </div>
+      <button
+        onClick={handleAvatarClick}
+        disabled={uploadingAvatar}
+        className="absolute inset-0 w-16 h-16 rounded-full flex items-center justify-center bg-black/50 opacity-0 group-hover:opacity-100 cursor-pointer"
+        title="Change photo"
+      >
+        {uploadingAvatar ? (
+          <Loader2 className="w-4 h-4 text-white animate-spin" />
+        ) : (
+          <Camera className="w-4 h-4 text-white" />
+        )}
+      </button>
+      <input ref={fileInputRef} type="file" accept="image/*" className="hidden" onChange={handleAvatarUpload} />
+    </div>
+  );
+
  return (
-    <aside className="w-[340px] flex-shrink-0 border-l border-border/60 bg-card overflow-y-auto">
+    <aside
+      className="flex-shrink-0 border-l border-border/60 bg-card overflow-y-auto overscroll-contain relative"
+      style={{ width }}
+    >
+      {/* Drag handle */}
+      <div
+        onMouseDown={onDragStart}
+        className="absolute top-0 left-0 w-1 h-full cursor-col-resize hover:bg-primary/30 active:bg-primary/50 transition-colors z-10"
+      />
      {/* Header */}
      <div className="flex items-center justify-between px-5 py-3.5 border-b border-border/60">
        <div className="flex items-center gap-2 text-sm font-semibold text-foreground">
          <Crown className="w-4 h-4 text-primary" />
          QUEEN PROFILE
        </div>
-        <button
-          onClick={onClose}
-          className="p-1 rounded-md text-muted-foreground hover:text-foreground hover:bg-muted/60 transition-colors"
-        >
+        <button onClick={onClose} className="p-1 rounded-md text-muted-foreground hover:text-foreground hover:bg-muted/60">
          <X className="w-4 h-4" />
        </button>
      </div>
@@ -67,70 +207,93 @@ export default function QueenProfilePanel({
          <div className="flex justify-center py-10">
            <div className="w-6 h-6 border-2 border-primary/30 border-t-primary rounded-full animate-spin" />
          </div>
-        ) : (
-          <>
-            {/* Avatar + name + title */}
-            <div className="flex flex-col items-center text-center mb-6">
-              <div className="w-16 h-16 rounded-full bg-primary/15 flex items-center justify-center mb-3">
-                <span className="text-xl font-bold text-primary">
-                  {name.charAt(0)}
-                </span>
-              </div>
-              <h3 className="text-base font-semibold text-foreground">
-                {name}
-              </h3>
-              <p className="text-xs text-muted-foreground mt-0.5">{title}</p>
+        ) : editing ? (
+          /* ── Edit Mode ──────────────────────────────────────────── */
+          <div className="flex flex-col gap-5">
+            {/* Avatar */}
+            <div className="flex justify-center mb-1">
+              {avatarElement}
+            </div>
+
+            <div>
+              <label className="text-[11px] font-semibold text-muted-foreground uppercase tracking-wider mb-1.5 block">Name</label>
+              <input type="text" value={editName} onChange={(e) => setEditName(e.target.value)} className={inputCls} />
+            </div>
+
+            <div>
+              <label className="text-[11px] font-semibold text-muted-foreground uppercase tracking-wider mb-1.5 block">Title</label>
+              <input type="text" value={editTitle} onChange={(e) => setEditTitle(e.target.value)} className={inputCls} />
+            </div>
+
+            <div>
+              <label className="text-[11px] font-semibold text-muted-foreground uppercase tracking-wider mb-1.5 block">About</label>
+              <textarea value={editSummary} onChange={(e) => setEditSummary(e.target.value)} rows={10} className={textareaCls} />
+            </div>
+
+            <div>
+              <label className="text-[11px] font-semibold text-muted-foreground uppercase tracking-wider mb-1.5 block">Skills (comma-separated)</label>
+              <textarea value={editSkills} onChange={(e) => setEditSkills(e.target.value)} rows={3} className={textareaCls} />
+            </div>
+
+            <div>
+              <label className="text-[11px] font-semibold text-muted-foreground uppercase tracking-wider mb-1.5 block">Signature Achievement</label>
+              <textarea value={editAchievement} onChange={(e) => setEditAchievement(e.target.value)} rows={5} className={textareaCls} />
+            </div>
+
+            <div className="flex items-center gap-2 pt-1">
+              <button onClick={handleSave} disabled={saving || !editName.trim() || !editTitle.trim()}
+                className="flex items-center gap-1.5 px-4 py-2 rounded-lg bg-primary text-primary-foreground text-sm font-medium hover:bg-primary/90 disabled:opacity-50 disabled:cursor-not-allowed">
+                {saving ? <Loader2 className="w-3.5 h-3.5 animate-spin" /> : <Check className="w-3.5 h-3.5" />}
+                {saving ? "Saving..." : "Save"}
+              </button>
+              <button onClick={cancelEditing} disabled={saving}
+                className="px-4 py-2 rounded-lg text-sm font-medium text-muted-foreground hover:text-foreground hover:bg-muted/30">
+                Cancel
+              </button>
+            </div>
+          </div>
+        ) : (
+          /* ── View Mode ──────────────────────────────────────────── */
+          <>
+            {/* Avatar + name + title */}
+            <div className="flex flex-col items-center text-center mb-6 group relative">
+              <div className="mb-3">
+                {avatarElement}
+              </div>
+              <h3 className="text-base font-semibold text-foreground">{name}</h3>
+              <p className="text-xs text-muted-foreground mt-0.5">{title}</p>
+              <button onClick={startEditing}
+                className="absolute top-0 right-0 p-1 rounded text-muted-foreground/40 hover:text-foreground opacity-0 group-hover:opacity-100" title="Edit name & title">
+                <Pencil className="w-3 h-3" />
+              </button>
            </div>

-            {/* Message button — hidden when already in this queen's PM */}
            {!alreadyInQueenPm && (
-              <button
-                onClick={() => {
-                  navigate(`/queen/${queenId}`);
-                  onClose();
-                }}
-                className="w-full flex items-center justify-center gap-2 rounded-lg border border-border/60 py-2.5 text-sm font-medium text-foreground hover:bg-muted/40 transition-colors mb-6"
-              >
+              <button onClick={() => { navigate(`/queen/${queenId}`); onClose(); }}
+                className="w-full flex items-center justify-center gap-2 rounded-lg border border-border/60 py-2.5 text-sm font-medium text-foreground hover:bg-muted/40 mb-6">
                <MessageSquare className="w-4 h-4" />
                Message {name}
              </button>
            )}

-            {/* About */}
            {profile?.summary && (
              <div className="mb-6">
-                <h4 className="text-[11px] font-semibold text-muted-foreground uppercase tracking-wider mb-2">
-                  About
-                </h4>
-                <p className="text-sm text-foreground/80 leading-relaxed">
-                  {profile.summary}
-                </p>
+                <SectionHeader onEdit={startEditing}>About</SectionHeader>
+                <p className="text-sm text-foreground/80 leading-relaxed">{profile.summary}</p>
              </div>
            )}

-            {/* Experience */}
            {profile?.experience && profile.experience.length > 0 && (
              <div className="mb-6">
-                <h4 className="text-[11px] font-semibold text-muted-foreground uppercase tracking-wider mb-2">
-                  Experience
-                </h4>
+                <SectionHeader onEdit={startEditing}>Experience</SectionHeader>
                <div className="space-y-3">
                  {profile.experience.map((exp, i) => (
                    <div key={i} className="flex items-start gap-2">
                      <Briefcase className="w-3.5 h-3.5 text-muted-foreground mt-0.5 flex-shrink-0" />
                      <div>
-                        <p className="text-sm font-medium text-foreground">
-                          {exp.role}
-                        </p>
+                        <p className="text-sm font-medium text-foreground">{exp.role}</p>
                        <ul className="mt-1 space-y-0.5">
-                          {exp.details.map((d, j) => (
-                            <li
-                              key={j}
-                              className="text-xs text-muted-foreground"
-                            >
-                              {d}
-                            </li>
-                          ))}
+                          {exp.details.map((d, j) => <li key={j} className="text-xs text-muted-foreground">{d}</li>)}
                        </ul>
                      </div>
                    </div>
@@ -139,54 +302,34 @@ export default function QueenProfilePanel({
              </div>
            )}

-            {/* Skills */}
            {profile?.skills && (
              <div className="mb-6">
-                <h4 className="text-[11px] font-semibold text-muted-foreground uppercase tracking-wider mb-2">
-                  Skills
-                </h4>
+                <SectionHeader onEdit={startEditing}>Skills</SectionHeader>
                <div className="flex flex-wrap gap-1.5">
                  {profile.skills.split(",").map((skill, i) => (
-                    <span
-                      key={i}
-                      className="px-2 py-0.5 rounded-full bg-muted/60 text-xs text-muted-foreground"
-                    >
-                      {skill.trim()}
-                    </span>
+                    <span key={i} className="px-2 py-0.5 rounded-full bg-muted/60 text-xs text-muted-foreground">{skill.trim()}</span>
                  ))}
                </div>
              </div>
            )}

-            {/* Signature achievement */}
            {profile?.signature_achievement && (
              <div className="mb-6">
-                <h4 className="text-[11px] font-semibold text-muted-foreground uppercase tracking-wider mb-2">
-                  Signature Achievement
-                </h4>
+                <SectionHeader onEdit={startEditing}>Signature Achievement</SectionHeader>
                <div className="flex items-start gap-2">
                  <Award className="w-3.5 h-3.5 text-primary mt-0.5 flex-shrink-0" />
-                  <p className="text-sm text-foreground/80">
-                    {profile.signature_achievement}
-                  </p>
+                  <p className="text-sm text-foreground/80">{profile.signature_achievement}</p>
                </div>
              </div>
            )}

-            {/* Assigned colonies */}
            {colonies.length > 0 && (
              <div>
-                <h4 className="text-[11px] font-semibold text-muted-foreground uppercase tracking-wider mb-2">
-                  Assigned Colonies
-                </h4>
+                <h4 className="text-[11px] font-semibold text-muted-foreground uppercase tracking-wider mb-2">Assigned Colonies</h4>
                <div className="flex flex-col gap-1.5">
                  {colonies.map((colony) => (
-                    <NavLink
-                      key={colony.id}
-                      to={`/colony/${colony.id}`}
-                      onClick={onClose}
-                      className="flex items-center justify-between rounded-lg border border-primary/20 bg-primary/[0.04] px-3 py-2 text-sm text-primary hover:bg-primary/[0.08] transition-colors"
-                    >
+                    <NavLink key={colony.id} to={`/colony/${colony.id}`} onClick={onClose}
+                      className="flex items-center justify-between rounded-lg border border-primary/20 bg-primary/[0.04] px-3 py-2 text-sm text-primary hover:bg-primary/[0.08]">
                      <span className="font-medium">#{colony.id}</span>
                      <ChevronRight className="w-3.5 h-3.5" />
                    </NavLink>
@@ -1,10 +1,11 @@
 import { useEffect, useRef, useState } from "react";
-import { X, Eye, EyeOff, Check, Pencil, ChevronDown, Zap, ThumbsUp, Loader2, AlertCircle } from "lucide-react";
+import { X, Eye, EyeOff, Check, Pencil, ChevronDown, Zap, ThumbsUp, Loader2, AlertCircle, Camera } from "lucide-react";
 import { useColony } from "@/context/ColonyContext";
 import { useTheme } from "@/context/ThemeContext";
 import { useModel, LLM_PROVIDERS } from "@/context/ModelContext";
 import { credentialsApi } from "@/api/credentials";
-import type { ModelOption } from "@/api/config";
+import { configApi, type ModelOption } from "@/api/config";
+import { compressImage } from "@/lib/image-utils";

 interface SettingsModalProps {
  open: boolean;
@@ -12,58 +13,54 @@ interface SettingsModalProps {
  initialSection?: "profile" | "byok";
 }

+function ValidationBadge({ state }: { state: "validating" | { valid: boolean | null; message: string } | undefined }) {
+  if (!state) return <StatusText icon={<Check className="w-3 h-3" />} color="green">Connected</StatusText>;
+  if (state === "validating") return <StatusText icon={<Loader2 className="w-3 h-3 animate-spin" />} color="muted">Verifying...</StatusText>;
+  if (state.valid === false) return <StatusText icon={<AlertCircle className="w-3 h-3" />} color="red" title={state.message}>Invalid key</StatusText>;
+  if (state.valid === true) return <StatusText icon={<Check className="w-3 h-3" />} color="green">Verified</StatusText>;
+  return <StatusText icon={<Check className="w-3 h-3" />} color="green">Connected</StatusText>;
+}
+
+function StatusText({ icon, color, title, children }: { icon: React.ReactNode; color: "green" | "red" | "muted"; title?: string; children: React.ReactNode }) {
+  const cls = color === "green" ? "text-green-500" : color === "red" ? "text-red-400" : "text-muted-foreground";
+  return <span className={`flex items-center gap-1 text-xs font-medium ${cls}`} title={title}>{icon}{children}</span>;
+}
+
 export default function SettingsModal({ open, onClose, initialSection }: SettingsModalProps) {
-  const { userProfile, setUserProfile } = useColony();
+  const { userProfile, setUserProfile, userAvatarVersion, bumpUserAvatar } = useColony();
  const { theme, setTheme } = useTheme();
  const {
-    currentProvider,
-    currentModel,
-    connectedProviders,
-    availableModels,
-    setModel,
-    saveProviderKey,
-    subscriptions,
-    detectedSubscriptions,
-    activeSubscription,
-    activateSubscription,
+    currentProvider, currentModel, connectedProviders, availableModels,
+    setModel, saveProviderKey, subscriptions, detectedSubscriptions,
+    activeSubscription, activateSubscription,
  } = useModel();

  const [displayName, setDisplayName] = useState(userProfile.displayName);
  const [about, setAbout] = useState(userProfile.about);
-  const [activeSection, setActiveSection] = useState<"profile" | "byok">(
-    initialSection || "profile",
-  );
-
-  // Key entry state
+  const [activeSection, setActiveSection] = useState<"profile" | "byok">(initialSection || "profile");
  const [editingProvider, setEditingProvider] = useState<string | null>(null);
  const [keyInput, setKeyInput] = useState("");
  const [showKey, setShowKey] = useState(false);
  const [saving, setSaving] = useState(false);
-
-  // Validation state per provider: "validating" | {valid, message}
-  const [validation, setValidation] = useState<
-    Record<string, "validating" | { valid: boolean | null; message: string }>
-  >({});
-
-  // Model selection state
+  const [validation, setValidation] = useState<Record<string, "validating" | { valid: boolean | null; message: string }>>({});
  const [modelDropdownOpen, setModelDropdownOpen] = useState(false);
-
-  // Theme dropdown state
  const [themeDropdownOpen, setThemeDropdownOpen] = useState(false);
+  const avatarUrl = `/api/config/profile/avatar?v=${userAvatarVersion}`;
+  const [avatarFailed, setAvatarFailed] = useState(false);
+  const [uploadingAvatar, setUploadingAvatar] = useState(false);
+  const avatarInputRef = useRef<HTMLInputElement>(null);
  const themeDropdownRef = useRef<HTMLDivElement>(null);

  useEffect(() => {
    if (!themeDropdownOpen) return;
    const handler = (e: MouseEvent) => {
-      if (themeDropdownRef.current && !themeDropdownRef.current.contains(e.target as Node)) {
+      if (themeDropdownRef.current && !themeDropdownRef.current.contains(e.target as Node))
        setThemeDropdownOpen(false);
-      }
    };
    document.addEventListener("mousedown", handler);
    return () => document.removeEventListener("mousedown", handler);
  }, [themeDropdownOpen]);

-  // Sync form fields when modal opens
  useEffect(() => {
    if (open) {
      setDisplayName(userProfile.displayName);
@@ -79,51 +76,47 @@ export default function SettingsModal({ open, onClose, initialSection }: Setting
    onClose();
  };

+  const handleAvatarUpload = async (e: React.ChangeEvent<HTMLInputElement>) => {
+    const file = e.target.files?.[0];
+    if (!file || !file.type.startsWith("image/")) return;
+    e.target.value = "";
+    setUploadingAvatar(true);
+    try {
+      const compressed = await compressImage(file);
+      await configApi.uploadAvatar(compressed);
+      bumpUserAvatar();
+      setAvatarFailed(false);
+    } catch {}
+    setUploadingAvatar(false);
+  };
+
+  const clearValidation = (providerId: string) => {
+    setTimeout(() => setValidation((v) => { const next = { ...v }; delete next[providerId]; return next; }), 4000);
+  };
+
  const handleSaveKey = async (providerId: string) => {
    const trimmedKey = keyInput.trim();
    if (!trimmedKey) return;
    setSaving(true);
    setValidation((v) => ({ ...v, [providerId]: "validating" }));

-    // Validate first — only persist the key if validation passes or is inconclusive.
    const validateResult = await credentialsApi
      .validateKey(providerId, trimmedKey)
      .catch(() => ({ valid: null as boolean | null, message: "Could not verify key" }));

    if (validateResult.valid === false) {
-      // Key is definitively invalid — don't save it.
      setSaving(false);
-      setValidation((v) => ({
-        ...v,
-        [providerId]: { valid: false, message: validateResult.message },
-      }));
-      setTimeout(() => {
-        setValidation((v) => {
-          const next = { ...v };
-          delete next[providerId];
-          return next;
-        });
-      }, 4000);
+      setValidation((v) => ({ ...v, [providerId]: { valid: false, message: validateResult.message } }));
+      clearValidation(providerId);
      return;
    }

-    // Validation passed or was inconclusive — save the key.
    try {
      await saveProviderKey(providerId, trimmedKey);
-    } catch (err) {
-      console.error("Failed to save key:", err);
+    } catch {
      setSaving(false);
-      setValidation((v) => ({
-        ...v,
-        [providerId]: { valid: false, message: "Failed to save key" },
-      }));
-      setTimeout(() => {
-        setValidation((v) => {
-          const next = { ...v };
-          delete next[providerId];
-          return next;
-        });
-      }, 4000);
+      setValidation((v) => ({ ...v, [providerId]: { valid: false, message: "Failed to save key" } }));
+      clearValidation(providerId);
      return;
    }

@@ -131,128 +124,66 @@ export default function SettingsModal({ open, onClose, initialSection }: Setting
    setEditingProvider(null);
    setKeyInput("");
    setShowKey(false);
-
-    setValidation((v) => ({
-      ...v,
-      [providerId]: { valid: validateResult.valid, message: validateResult.message },
-    }));
-
-    // Auto-clear validation result after 4s
-    setTimeout(() => {
-      setValidation((v) => {
-        const next = { ...v };
-        delete next[providerId];
-        return next;
-      });
-    }, 4000);
+    setValidation((v) => ({ ...v, [providerId]: { valid: validateResult.valid, message: validateResult.message } }));
+    clearValidation(providerId);
  };

  const handleSelectModel = async (provider: string, modelId: string) => {
-    try {
-      await setModel(provider, modelId);
-      setModelDropdownOpen(false);
-    } catch (err) {
-      console.error("Failed to set model:", err);
-    }
+    try { await setModel(provider, modelId); setModelDropdownOpen(false); } catch {}
  };

-  // Initials for avatar
-  const initials = displayName
-    .trim()
-    .split(/\s+/)
-    .map((w) => w[0])
-    .join("")
-    .toUpperCase()
-    .slice(0, 2);
+  const handleActivateSubscription = async (subId: string) => {
+    try { await activateSubscription(subId); } catch {}
+  };

-  // Get human-readable model label
-  const currentModelLabel = (() => {
-    // Check subscription provider's models too
-    const sub = activeSubscription
-      ? subscriptions.find((s) => s.id === activeSubscription)
-      : null;
-    const providerForModels = sub?.provider || currentProvider;
-    const models = availableModels[providerForModels] || [];
-    const m = models.find((m) => m.id === currentModel);
-    return m?.label || currentModel || "Not configured";
-  })();
+  const initials = displayName.trim().split(/\s+/).map((w) => w[0]).join("").toUpperCase().slice(0, 2);

-  const currentProviderName = (() => {
-    if (activeSubscription) {
-      const sub = subscriptions.find((s) => s.id === activeSubscription);
-      return sub?.name || currentProvider;
-    }
-    return LLM_PROVIDERS.find((p) => p.id === currentProvider)?.name || currentProvider;
-  })();
+  const activeSubInfo = activeSubscription ? subscriptions.find((s) => s.id === activeSubscription) : null;
+  const providerForModels = activeSubInfo?.provider || currentProvider;
+  const modelsForLabel = availableModels[providerForModels] || [];
+  const currentModelLabel = modelsForLabel.find((m) => m.id === currentModel)?.label || currentModel || "Not configured";

-  const recommendedIcon = (
-    <span
-      className="group/recommend ml-auto relative inline-flex items-center justify-center rounded bg-primary/10 text-primary p-1 flex-shrink-0"
-      aria-label="Recommended model"
-    >
-      <ThumbsUp className="w-3 h-3" />
-      <span className="pointer-events-none absolute right-full mr-2 top-1/2 -translate-y-1/2 whitespace-nowrap rounded border border-border/60 bg-card px-2 py-1 text-[10px] font-medium text-foreground opacity-0 invisible group-hover/recommend:opacity-100 group-hover/recommend:visible transition-none shadow-sm">
-        Recommended model
-      </span>
-    </span>
-  );
+  const currentProviderName = activeSubscription
+    ? (subscriptions.find((s) => s.id === activeSubscription)?.name || currentProvider)
+    : (LLM_PROVIDERS.find((p) => p.id === currentProvider)?.name || currentProvider);

-  // Models available for selection (only API key providers - subscriptions use fixed models)
  const selectableProviders = LLM_PROVIDERS.filter(
    (p) => connectedProviders.has(p.id) && availableModels[p.id]?.length,
  );

-  const handleActivateSubscription = async (subId: string) => {
-    try {
-      await activateSubscription(subId);
-    } catch (err) {
-      console.error("Failed to activate subscription:", err);
-    }
+  const startEditing = (providerId: string) => {
+    setEditingProvider(providerId);
+    setKeyInput("");
+    setShowKey(false);
+  };
+
+  const cancelEditing = () => {
+    setEditingProvider(null);
+    setKeyInput("");
  };

  return (
    <div className="fixed inset-0 z-50 flex items-center justify-center">
-      {/* Backdrop */}
-      <div
-        className="absolute inset-0 bg-black/40 backdrop-blur-sm"
-        onClick={onClose}
-      />
+      <div className="absolute inset-0 bg-black/40" onClick={onClose} />

-      {/* Modal */}
      <div className="relative bg-card border border-border/60 rounded-2xl shadow-2xl w-full max-w-[720px] h-[520px] max-h-[80vh] flex overflow-hidden">
-        {/* Sidebar nav */}
+        {/* Sidebar */}
        <div className="w-[180px] flex-shrink-0 border-r border-border/40 py-6 px-3 flex flex-col gap-6">
-          <h2 className="text-sm font-semibold text-foreground px-3">
-            SETTINGS
-          </h2>
-
+          <h2 className="text-sm font-semibold text-foreground px-3">SETTINGS</h2>
          <div className="flex flex-col gap-1">
-            <p className="text-[11px] font-semibold text-muted-foreground/60 uppercase tracking-wider px-3 mb-1">
-              Account
-            </p>
+            <p className="text-[11px] font-semibold text-muted-foreground/60 uppercase tracking-wider px-3 mb-1">Account</p>
            <button
              onClick={() => setActiveSection("profile")}
-              className={`text-left text-sm px-3 py-1.5 rounded-md transition-colors ${
-                activeSection === "profile"
-                  ? "bg-primary/15 text-primary font-medium"
-                  : "text-muted-foreground hover:text-foreground hover:bg-muted/30"
-              }`}
+              className={`text-left text-sm px-3 py-1.5 rounded-md ${activeSection === "profile" ? "bg-primary/15 text-primary font-medium" : "text-muted-foreground hover:text-foreground hover:bg-muted/30"}`}
            >
              Profile
            </button>
          </div>
-
          <div className="flex flex-col gap-1">
-            <p className="text-[11px] font-semibold text-muted-foreground/60 uppercase tracking-wider px-3 mb-1">
-              System
-            </p>
+            <p className="text-[11px] font-semibold text-muted-foreground/60 uppercase tracking-wider px-3 mb-1">System</p>
            <button
              onClick={() => setActiveSection("byok")}
-              className={`text-left text-sm px-3 py-1.5 rounded-md transition-colors ${
-                activeSection === "byok"
-                  ? "bg-primary/15 text-primary font-medium"
-                  : "text-muted-foreground hover:text-foreground hover:bg-muted/30"
-              }`}
+              className={`text-left text-sm px-3 py-1.5 rounded-md ${activeSection === "byok" ? "bg-primary/15 text-primary font-medium" : "text-muted-foreground hover:text-foreground hover:bg-muted/30"}`}
            >
              BYOK
            </button>
@@ -261,89 +192,68 @@ export default function SettingsModal({ open, onClose, initialSection }: Setting

        {/* Content */}
        <div className="flex-1 flex flex-col min-h-0">
-          {/* Close button */}
-          <button
-            onClick={onClose}
-            className="absolute top-4 right-4 p-1.5 rounded-md text-muted-foreground hover:text-foreground hover:bg-muted/50 transition-colors"
-          >
+          <button onClick={onClose} className="absolute top-4 right-4 p-1.5 rounded-md text-muted-foreground hover:text-foreground hover:bg-muted/50">
            <X className="w-4 h-4" />
          </button>

-          <div className="flex-1 overflow-y-auto scrollbar-hide px-8 py-6 flex flex-col gap-6">
+          <div className="flex-1 overflow-y-auto overscroll-contain px-8 py-6 flex flex-col gap-6">
            {activeSection === "profile" && (
              <>
-                {/* Display name */}
                <div>
                  <label className="text-sm font-medium text-foreground mb-2 block">
                    Display <span className="text-primary">*</span>
                  </label>
                  <div className="flex items-center gap-3">
-                    <div className="w-10 h-10 rounded-full bg-primary/15 flex items-center justify-center flex-shrink-0">
-                      <span className="text-xs font-bold text-primary">
-                        {initials || "?"}
-                      </span>
+                    <div className="relative group flex-shrink-0">
+                      <div className="w-10 h-10 rounded-full bg-primary/15 flex items-center justify-center overflow-hidden">
+                        {!avatarFailed ? (
+                          <img src={avatarUrl} alt="" className="w-full h-full object-cover" onError={() => setAvatarFailed(true)} />
+                        ) : (
+                          <span className="text-xs font-bold text-primary">{initials || "?"}</span>
+                        )}
+                      </div>
+                      <button
+                        onClick={() => avatarInputRef.current?.click()}
+                        disabled={uploadingAvatar}
+                        className="absolute inset-0 w-10 h-10 rounded-full flex items-center justify-center bg-black/50 opacity-0 group-hover:opacity-100 cursor-pointer"
+                        title="Change photo"
+                      >
+                        {uploadingAvatar ? <Loader2 className="w-3.5 h-3.5 text-white animate-spin" /> : <Camera className="w-3.5 h-3.5 text-white" />}
+                      </button>
+                      <input ref={avatarInputRef} type="file" accept="image/*" className="hidden" onChange={handleAvatarUpload} />
                    </div>
                    <input
-                      type="text"
-                      value={displayName}
-                      onChange={(e) => setDisplayName(e.target.value)}
+                      type="text" value={displayName} onChange={(e) => setDisplayName(e.target.value)}
                      placeholder="Display name"
                      className="flex-1 bg-muted/30 border border-border/50 rounded-lg px-3 py-2 text-sm text-foreground placeholder:text-muted-foreground/50 focus:outline-none focus:ring-1 focus:ring-primary/40"
                    />
                  </div>
                </div>

-                {/* About */}
                <div>
-                  <label className="text-sm font-medium text-foreground mb-2 block">
-                    About
-                  </label>
+                  <label className="text-sm font-medium text-foreground mb-2 block">About</label>
                  <textarea
-                    value={about}
-                    onChange={(e) => setAbout(e.target.value)}
-                    placeholder="Tell people about yourself or your organization"
-                    rows={4}
+                    value={about} onChange={(e) => setAbout(e.target.value)}
+                    placeholder="Tell people about yourself or your organization" rows={4}
                    className="w-full bg-muted/30 border border-border/50 rounded-lg px-3 py-2 text-sm text-foreground placeholder:text-muted-foreground/50 focus:outline-none focus:ring-1 focus:ring-primary/40 resize-none"
                  />
                </div>

-                {/* Theme */}
                <div className="flex items-center justify-between">
-                  <label className="text-sm font-medium text-foreground">
-                    Theme
-                  </label>
+                  <label className="text-sm font-medium text-foreground">Theme</label>
                  <div className="relative" ref={themeDropdownRef}>
-                    <button
-                      onClick={() => setThemeDropdownOpen(!themeDropdownOpen)}
-                      className="flex items-center gap-2 bg-muted/30 border border-border/50 rounded-lg px-3 py-1.5 text-sm text-foreground hover:bg-muted/40 transition-colors"
-                    >
+                    <button onClick={() => setThemeDropdownOpen(!themeDropdownOpen)}
+                      className="flex items-center gap-2 bg-muted/30 border border-border/50 rounded-lg px-3 py-1.5 text-sm text-foreground hover:bg-muted/40">
                      {theme === "light" ? "Light" : "Dark"}
-                      <ChevronDown
-                        className={`w-3.5 h-3.5 text-muted-foreground transition-transform ${
-                          themeDropdownOpen ? "rotate-180" : ""
-                        }`}
-                      />
+                      <ChevronDown className={`w-3.5 h-3.5 text-muted-foreground ${themeDropdownOpen ? "rotate-180" : ""}`} />
                    </button>
-
                    {themeDropdownOpen && (
                      <div className="absolute right-0 top-full mt-1 bg-card border border-border/60 rounded-lg shadow-xl z-10 min-w-[120px]">
                        {(["light", "dark"] as const).map((option) => (
-                          <button
-                            key={option}
-                            onClick={() => {
-                              setTheme(option);
-                              setThemeDropdownOpen(false);
-                            }}
-                            className={`w-full text-left px-4 py-2 text-sm flex items-center gap-2 transition-colors first:rounded-t-lg last:rounded-b-lg ${
-                              theme === option
-                                ? "bg-primary/10 text-primary"
-                                : "text-foreground hover:bg-muted/30"
-                            }`}
-                          >
-                            {theme === option && <Check className="w-3 h-3 flex-shrink-0" />}
-                            <span className={theme === option ? "" : "ml-5"}>
-                              {option === "light" ? "Light" : "Dark"}
-                            </span>
+                          <button key={option} onClick={() => { setTheme(option); setThemeDropdownOpen(false); }}
+                            className={`w-full text-left px-4 py-2 text-sm flex items-center gap-2 first:rounded-t-lg last:rounded-b-lg ${theme === option ? "bg-primary/10 text-primary" : "text-foreground hover:bg-muted/30"}`}>
+                            {theme === option ? <Check className="w-3 h-3 flex-shrink-0" /> : <span className="w-3" />}
+                            <span>{option === "light" ? "Light" : "Dark"}</span>
                          </button>
                        ))}
                      </div>
@@ -351,79 +261,88 @@ export default function SettingsModal({ open, onClose, initialSection }: Setting
                  </div>
                </div>

-                {/* Save button */}
                <div className="flex justify-end mt-auto pt-4">
-                  <button
-                    onClick={handleSave}
-                    className="px-5 py-2 rounded-lg bg-primary text-primary-foreground text-sm font-medium hover:bg-primary/90 transition-colors"
-                  >
-                    Save
-                  </button>
+                  <button onClick={handleSave} className="px-5 py-2 rounded-lg bg-primary text-primary-foreground text-sm font-medium hover:bg-primary/90">Save</button>
                </div>
              </>
            )}

            {activeSection === "byok" && (
              <>
-                {/* Header */}
                <div>
-                  <h3 className="text-lg font-semibold text-foreground">
-                    Bring Your Own Key
-                  </h3>
+                  <h3 className="text-lg font-semibold text-foreground">Bring Your Own Key</h3>
                  <p className="text-sm text-muted-foreground mt-1">
-                    Use your own API keys for hosted model providers. Your keys
-                    are encrypted and never shared.
+                    Use your own API keys for hosted model providers. Your keys are encrypted and never shared.
                  </p>
                </div>

+                {/* Active Model */}
+                <div>
+                  <p className="text-[11px] font-semibold text-muted-foreground/60 uppercase tracking-wider mb-3">Active Model</p>
+                  <div className="relative">
+                    <button onClick={() => setModelDropdownOpen(!modelDropdownOpen)}
+                      className="w-full flex items-center justify-between bg-muted/30 border border-border/50 rounded-lg px-4 py-3 text-left hover:bg-muted/40">
+                      <div>
+                        <p className="text-sm font-medium text-foreground">{currentModelLabel}</p>
+                        <p className="text-xs text-muted-foreground">{currentProviderName}</p>
+                      </div>
+                      <ChevronDown className={`w-4 h-4 text-muted-foreground ${modelDropdownOpen ? "rotate-180" : ""}`} />
+                    </button>
+                    {modelDropdownOpen && (
+                      <div className="absolute top-full left-0 right-0 mt-1 bg-card border border-border/60 rounded-lg shadow-xl z-10 max-h-[280px] overflow-y-auto overscroll-contain">
+                        {selectableProviders.length === 0 ? (
+                          <p className="px-4 py-3 text-sm text-muted-foreground">Add an API key or enable a subscription to see available models.</p>
+                        ) : selectableProviders.map((provider) => (
+                          <div key={provider.id}>
+                            <p className="px-4 pt-3 pb-0.5 text-sm font-medium text-foreground">{provider.name}</p>
+                            {(availableModels[provider.id] || []).map((model: ModelOption) => {
+                              const isActive = currentProvider === provider.id && currentModel === model.id && !activeSubscription;
+                              return (
+                                <button key={model.id} onClick={() => handleSelectModel(provider.id, model.id)}
+                                  className={`w-full text-left pl-8 pr-4 py-2 text-sm flex items-center gap-2 ${isActive ? "bg-primary/10 text-primary" : "text-foreground hover:bg-muted/30"}`}>
+                                  {isActive ? <Check className="w-3 h-3 flex-shrink-0" /> : <span className="w-3" />}
+                                  <span>{model.label}</span>
+                                  {model.recommended && (
+                                    <span className="ml-auto inline-flex items-center justify-center rounded bg-primary/10 text-primary p-1 flex-shrink-0" title="Recommended">
+                                      <ThumbsUp className="w-3 h-3" />
+                                    </span>
+                                  )}
+                                </button>
+                              );
+                            })}
+                          </div>
+                        ))}
+                      </div>
+                    )}
+                  </div>
+                </div>
+
                {/* Subscriptions */}
                {subscriptions.length > 0 && (
                  <div>
-                    <p className="text-[11px] font-semibold text-muted-foreground/60 uppercase tracking-wider mb-3">
-                      Subscriptions
-                    </p>
+                    <p className="text-[11px] font-semibold text-muted-foreground/60 uppercase tracking-wider mb-3">Subscriptions</p>
                    <div className="flex flex-col gap-1">
                      {subscriptions.map((sub) => {
                        const isDetected = detectedSubscriptions.has(sub.id);
                        const isActive = activeSubscription === sub.id;
-
                        return (
-                          <div
-                            key={sub.id}
-                            className="flex items-center gap-3 py-2.5 px-2 rounded-lg hover:bg-muted/20 transition-colors"
-                          >
-                            {/* Icon */}
-                            <div className="w-9 h-9 rounded-full bg-purple-500/10 flex items-center justify-center flex-shrink-0">
-                              <Zap className="w-4 h-4 text-purple-400" />
+                          <div key={sub.id} className="flex items-center gap-3 py-2.5 px-2 rounded-lg hover:bg-muted/20">
+                            <div className="w-9 h-9 rounded-full bg-primary/10 flex items-center justify-center flex-shrink-0">
+                              <Zap className="w-4 h-4 text-primary" />
                            </div>
-
-                            {/* Info */}
                            <div className="flex-1 min-w-0">
-                              <p className="text-sm font-medium text-foreground">
-                                {sub.name}
-                              </p>
-                              <p className="text-xs text-muted-foreground truncate">
-                                {sub.description}
-                              </p>
+                              <p className="text-sm font-medium text-foreground">{sub.name}</p>
+                              <p className="text-xs text-muted-foreground truncate">{sub.description}</p>
                            </div>
-
-                            {/* Status / Action */}
                            {isActive ? (
-                              <span className="flex items-center gap-1 text-xs text-green-500 font-medium">
-                                <Check className="w-3 h-3" />
-                                Active
-                              </span>
+                              <StatusText icon={<Check className="w-3 h-3" />} color="green">Active</StatusText>
                            ) : isDetected ? (
-                              <button
-                                onClick={() => handleActivateSubscription(sub.id)}
-                                className="px-3 py-1.5 rounded-md text-xs font-semibold bg-purple-500/15 text-purple-400 border border-purple-500/30 hover:bg-purple-500/25 transition-colors"
-                              >
+                              <button onClick={() => handleActivateSubscription(sub.id)}
+                                className="px-3 py-1.5 rounded-md text-xs font-semibold bg-primary/15 text-primary border border-primary/30 hover:bg-primary/25">
                                Enable
                              </button>
                            ) : (
-                              <span className="text-xs text-muted-foreground/50">
-                                Not detected
-                              </span>
+                              <span className="text-xs text-muted-foreground/50">Not detected</span>
                            )}
                          </div>
                        );
@@ -432,147 +351,65 @@ export default function SettingsModal({ open, onClose, initialSection }: Setting
                  </div>
                )}

-                {/* LLM Providers */}
+                {/* API Keys */}
                <div>
-                  <p className="text-[11px] font-semibold text-muted-foreground/60 uppercase tracking-wider mb-3">
-                    API Key Providers
-                  </p>
+                  <p className="text-[11px] font-semibold text-muted-foreground/60 uppercase tracking-wider mb-3">API Keys</p>
                  <div className="flex flex-col gap-1">
                    {LLM_PROVIDERS.map((provider) => {
                      const isConnected = connectedProviders.has(provider.id);
                      const isEditing = editingProvider === provider.id;
-                      const providerValidation = validation[provider.id];
-
                      return (
                        <div key={provider.id}>
-                          <div className="flex items-center gap-3 py-2.5 px-2 rounded-lg hover:bg-muted/20 transition-colors">
-                            {/* Avatar */}
+                          <div className="flex items-center gap-3 py-2.5 px-2 rounded-lg hover:bg-muted/20">
                            <div className="w-9 h-9 rounded-full bg-primary/10 flex items-center justify-center flex-shrink-0">
-                              <span className="text-sm font-bold text-primary">
-                                {provider.initial}
-                              </span>
+                              <span className="text-sm font-bold text-primary">{provider.initial}</span>
                            </div>
-
-                            {/* Info */}
                            <div className="flex-1 min-w-0">
-                              <p className="text-sm font-medium text-foreground">
-                                {provider.name}
-                              </p>
-                              <p className="text-xs text-muted-foreground truncate">
-                                {provider.description}
-                              </p>
+                              <p className="text-sm font-medium text-foreground">{provider.name}</p>
+                              <p className="text-xs text-muted-foreground truncate">{provider.description}</p>
                            </div>
-
-                            {/* Action */}
                            {isConnected && !isEditing ? (
                              <div className="flex items-center gap-2">
-                                {providerValidation === "validating" ? (
-                                  <span className="flex items-center gap-1 text-xs text-muted-foreground font-medium">
-                                    <Loader2 className="w-3 h-3 animate-spin" />
-                                    Verifying...
-                                  </span>
-                                ) : providerValidation && typeof providerValidation === "object" && providerValidation.valid === false ? (
-                                  <span className="flex items-center gap-1 text-xs text-red-400 font-medium" title={providerValidation.message}>
-                                    <AlertCircle className="w-3 h-3" />
-                                    Invalid key
-                                  </span>
-                                ) : providerValidation && typeof providerValidation === "object" && providerValidation.valid === true ? (
-                                  <span className="flex items-center gap-1 text-xs text-green-500 font-medium">
-                                    <Check className="w-3 h-3" />
-                                    Verified
-                                  </span>
-                                ) : (
-                                  <span className="flex items-center gap-1 text-xs text-green-500 font-medium">
-                                    <Check className="w-3 h-3" />
-                                    Connected
-                                  </span>
-                                )}
-                                <button
-                                  onClick={() => {
-                                    setEditingProvider(provider.id);
-                                    setKeyInput("");
-                                    setShowKey(false);
-                                  }}
-                                  className="p-1 rounded text-muted-foreground/40 hover:text-foreground transition-colors"
-                                  title="Change key"
-                                >
+                                <ValidationBadge state={validation[provider.id]} />
+                                <button onClick={() => startEditing(provider.id)} className="p-1 rounded text-muted-foreground/40 hover:text-foreground" title="Change key">
                                  <Pencil className="w-3.5 h-3.5" />
                                </button>
                              </div>
                            ) : !isEditing ? (
-                              <button
-                                onClick={() => {
-                                  setEditingProvider(provider.id);
-                                  setKeyInput("");
-                                  setShowKey(false);
-                                }}
-                                className="px-3 py-1.5 rounded-md text-xs font-semibold bg-primary text-primary-foreground hover:bg-primary/90 transition-colors"
-                              >
+                              <button onClick={() => startEditing(provider.id)}
+                                className="px-3 py-1.5 rounded-md text-xs font-semibold bg-primary text-primary-foreground hover:bg-primary/90">
                                Add Key
                              </button>
                            ) : null}
                          </div>
-
-                          {/* Inline key entry */}
                          {isEditing && (
                            <div className="ml-12 mr-2 mb-2 flex flex-col gap-1.5">
                              <div className="flex items-center gap-2">
                                <div className="relative flex-1">
                                  <input
-                                    type={showKey ? "text" : "password"}
-                                    value={keyInput}
+                                    type={showKey ? "text" : "password"} value={keyInput}
                                    onChange={(e) => setKeyInput(e.target.value)}
-                                    placeholder={`Enter ${provider.name} API key`}
-                                    autoFocus
-                                    onKeyDown={(e) => {
-                                      if (e.key === "Enter") handleSaveKey(provider.id);
-                                      if (e.key === "Escape") {
-                                        setEditingProvider(null);
-                                        setKeyInput("");
-                                      }
-                                    }}
+                                    placeholder={`Enter ${provider.name} API key`} autoFocus
+                                    onKeyDown={(e) => { if (e.key === "Enter") handleSaveKey(provider.id); if (e.key === "Escape") cancelEditing(); }}
                                    className="w-full bg-muted/30 border border-border/50 rounded-lg px-3 py-2 pr-9 text-sm text-foreground placeholder:text-muted-foreground/50 focus:outline-none focus:ring-1 focus:ring-primary/40 font-mono"
                                  />
-                                  <button
-                                    onClick={() => setShowKey(!showKey)}
-                                    className="absolute right-2.5 top-1/2 -translate-y-1/2 text-muted-foreground/50 hover:text-foreground transition-colors"
-                                  >
-                                    {showKey ? (
-                                      <EyeOff className="w-3.5 h-3.5" />
-                                    ) : (
-                                      <Eye className="w-3.5 h-3.5" />
-                                    )}
+                                  <button onClick={() => setShowKey(!showKey)} className="absolute right-2.5 top-1/2 -translate-y-1/2 text-muted-foreground/50 hover:text-foreground">
+                                    {showKey ? <EyeOff className="w-3.5 h-3.5" /> : <Eye className="w-3.5 h-3.5" />}
                                  </button>
                                </div>
-                                <button
-                                  onClick={() => handleSaveKey(provider.id)}
-                                  disabled={!keyInput.trim() || saving}
-                                  className="px-3 py-2 rounded-lg bg-primary text-primary-foreground text-xs font-semibold hover:bg-primary/90 transition-colors disabled:opacity-50 disabled:cursor-not-allowed"
-                                >
+                                <button onClick={() => handleSaveKey(provider.id)} disabled={!keyInput.trim() || saving}
+                                  className="px-3 py-2 rounded-lg bg-primary text-primary-foreground text-xs font-semibold hover:bg-primary/90 disabled:opacity-50 disabled:cursor-not-allowed">
                                  {saving ? "..." : "Save"}
                                </button>
-                                <button
-                                  onClick={() => {
-                                    setEditingProvider(null);
-                                    setKeyInput("");
-                                  }}
-                                  className="px-3 py-2 rounded-lg text-xs font-medium text-muted-foreground hover:text-foreground hover:bg-muted/30 transition-colors"
-                                >
-                                  Cancel
-                                </button>
+                                <button onClick={cancelEditing} className="px-3 py-2 rounded-lg text-xs font-medium text-muted-foreground hover:text-foreground hover:bg-muted/30">Cancel</button>
                              </div>
-                              {/* Validation feedback inside editing mode */}
-                              {providerValidation === "validating" && (
-                                <span className="flex items-center gap-1 text-xs text-muted-foreground font-medium">
-                                  <Loader2 className="w-3 h-3 animate-spin" />
-                                  Verifying...
-                                </span>
+                              {validation[provider.id] === "validating" && (
+                                <StatusText icon={<Loader2 className="w-3 h-3 animate-spin" />} color="muted">Verifying...</StatusText>
                              )}
-                              {providerValidation && typeof providerValidation === "object" && providerValidation.valid === false && (
-                                <span className="flex items-center gap-1 text-xs text-red-400 font-medium">
-                                  <AlertCircle className="w-3 h-3" />
-                                  {providerValidation.message}
-                                </span>
+                              {validation[provider.id] && typeof validation[provider.id] === "object" && (validation[provider.id] as { valid: boolean | null; message: string }).valid === false && (
+                                <StatusText icon={<AlertCircle className="w-3 h-3" />} color="red">
+                                  {(validation[provider.id] as { message: string }).message}
+                                </StatusText>
                              )}
                            </div>
                          )}
@@ -581,83 +418,6 @@ export default function SettingsModal({ open, onClose, initialSection }: Setting
                    })}
                  </div>
                </div>
-
-                {/* Active Model */}
-                <div>
-                  <p className="text-[11px] font-semibold text-muted-foreground/60 uppercase tracking-wider mb-3">
-                    Active Model
-                  </p>
-
-                  <div className="relative">
-                    <button
-                      onClick={() => setModelDropdownOpen(!modelDropdownOpen)}
-                      className="w-full flex items-center justify-between bg-muted/30 border border-border/50 rounded-lg px-4 py-3 text-left hover:bg-muted/40 transition-colors"
-                    >
-                      <div>
-                        <p className="text-sm font-medium text-foreground">
-                          {currentModelLabel}
-                        </p>
-                        <p className="text-xs text-muted-foreground">
-                          {currentProviderName}
-                        </p>
-                      </div>
-                      <ChevronDown
-                        className={`w-4 h-4 text-muted-foreground transition-transform ${
-                          modelDropdownOpen ? "rotate-180" : ""
-                        }`}
-                      />
-                    </button>
-
-                    {modelDropdownOpen && (
-                      <div className="absolute top-full left-0 right-0 mt-1 bg-card border border-border/60 rounded-lg shadow-xl z-10 max-h-[280px] overflow-y-auto">
-                        {selectableProviders.length === 0 ? (
-                          <p className="px-4 py-3 text-sm text-muted-foreground">
-                            Add an API key or enable a subscription to see available models.
-                          </p>
-                        ) : (
-                          selectableProviders.map((provider) => (
-                            <div key={provider.id}>
-                              <p className="px-4 pt-3 pb-1 text-[11px] font-semibold text-muted-foreground/60 uppercase tracking-wider">
-                                {provider.name}
-                              </p>
-                              {(availableModels[provider.id] || []).map(
-                                (model: ModelOption) => {
-                                  const isActive =
-                                    currentProvider === provider.id &&
-                                    currentModel === model.id &&
-                                    !activeSubscription;
-                                  return (
-                                    <button
-                                      key={model.id}
-                                      onClick={() =>
-                                        handleSelectModel(provider.id, model.id)
-                                      }
-                                      className={`w-full text-left px-4 py-2 text-sm flex items-center gap-2 transition-colors ${
-                                        isActive
-                                          ? "bg-primary/10 text-primary"
-                                          : "text-foreground hover:bg-muted/30"
-                                      }`}
-                                    >
-                                      {isActive && (
-                                        <Check className="w-3 h-3 flex-shrink-0" />
-                                      )}
-                                      <span
-                                        className={isActive ? "" : "ml-5"}
-                                      >
-                                        {model.label}
-                                      </span>
-                                      {model.recommended && recommendedIcon}
-                                    </button>
-                                  );
-                                },
-                              )}
-                            </div>
-                          ))
-                        )}
-                      </div>
-                    )}
-                  </div>
-                </div>
              </>
            )}
          </div>
@@ -1,4 +1,4 @@
-import { useState } from "react";
+import { useState, useCallback, useRef } from "react";
 import { useNavigate } from "react-router-dom";
 import {
  ChevronLeft,
@@ -22,6 +22,38 @@ export default function Sidebar() {
  const [coloniesExpanded, setColoniesExpanded] = useState(true);
  const [queensExpanded, setQueensExpanded] = useState(true);

+  // ── Resizable width ──────────────────────────────────────────────────
+  const MIN_WIDTH = 180;
+  const MAX_WIDTH = 400;
+  const [width, setWidth] = useState(240);
+  const dragging = useRef(false);
+  const startX = useRef(0);
+  const startWidth = useRef(0);
+
+  const onDragStart = useCallback((e: React.MouseEvent) => {
+    e.preventDefault();
+    dragging.current = true;
+    startX.current = e.clientX;
+    startWidth.current = width;
+
+    const onMove = (ev: MouseEvent) => {
+      if (!dragging.current) return;
+      const delta = ev.clientX - startX.current;
+      setWidth(Math.min(MAX_WIDTH, Math.max(MIN_WIDTH, startWidth.current + delta)));
+    };
+    const onUp = () => {
+      dragging.current = false;
+      document.removeEventListener("mousemove", onMove);
+      document.removeEventListener("mouseup", onUp);
+      document.body.style.cursor = "";
+      document.body.style.userSelect = "";
+    };
+    document.addEventListener("mousemove", onMove);
+    document.addEventListener("mouseup", onUp);
+    document.body.style.cursor = "col-resize";
+    document.body.style.userSelect = "none";
+  }, [width]);
+
  if (sidebarCollapsed) {
    return (
      <aside className="w-[52px] flex-shrink-0 flex flex-col bg-sidebar-bg border-r border-sidebar-border h-full">
@@ -50,7 +82,15 @@ export default function Sidebar() {
  }

  return (
-    <aside className="w-[240px] flex-shrink-0 flex flex-col bg-sidebar-bg border-r border-sidebar-border h-full">
+    <aside
+      className="flex-shrink-0 flex flex-col bg-sidebar-bg border-r border-sidebar-border h-full relative"
+      style={{ width }}
+    >
+      {/* Drag handle on right edge */}
+      <div
+        onMouseDown={onDragStart}
+        className="absolute top-0 right-0 w-1 h-full cursor-col-resize hover:bg-primary/30 active:bg-primary/50 transition-colors z-10"
+      />
      {/* Header */}
      <div className="h-12 flex items-center justify-between px-4 border-b border-border/60">
        <button
@@ -1,3 +1,4 @@
+import { useState } from "react";
 import { NavLink } from "react-router-dom";
 import type { QueenProfileSummary } from "@/types/colony";

@@ -7,6 +8,9 @@ interface SidebarQueenItemProps {
 }

 export default function SidebarQueenItem({ queen, isActive }: SidebarQueenItemProps) {
+  const [hasAvatar, setHasAvatar] = useState(true);
+  const avatarUrl = `/api/queen/${queen.id}/avatar`;
+
  return (
    <NavLink
      to={`/queen/${queen.id}`}
@@ -18,8 +22,14 @@ export default function SidebarQueenItem({ queen, isActive }: SidebarQueenItemPr
        }`
      }
    >
-      <span className="relative flex-shrink-0 w-6 h-6 rounded-full bg-primary/15 flex items-center justify-center text-[10px] font-bold text-primary">
-        {queen.name.charAt(0)}
+      <span className="relative flex-shrink-0 w-6 h-6 rounded-full bg-primary/15 flex items-center justify-center">
+        <span className="w-full h-full rounded-full overflow-hidden flex items-center justify-center">
+          {hasAvatar ? (
+            <img src={avatarUrl} alt={queen.name} className="w-full h-full object-cover" onError={() => setHasAvatar(false)} />
+          ) : (
+            <span className="text-[10px] font-bold text-primary">{queen.name.charAt(0)}</span>
+          )}
+        </span>
        {isActive && (
          <span
            className="absolute -bottom-0.5 -right-0.5 w-2 h-2 rounded-full bg-emerald-500 ring-2 ring-sidebar-bg"
@@ -1,211 +0,0 @@
-import { useState } from "react";
-import { X, Webhook, Clock, Activity, ArrowRight, Zap, Play, Square, Loader2 } from "lucide-react";
-import type { GraphNode } from "./graph-types";
-import { cronToLabel } from "@/lib/graphUtils";
-import { sessionsApi } from "@/api/sessions";
-
-interface TriggerDetailPanelProps {
-  trigger: GraphNode;
-  sessionId: string;
-  onClose: () => void;
-}
-
-function TriggerIcon({ type }: { type?: string }) {
-  const cls = "w-4 h-4";
-  switch (type) {
-    case "webhook":
-      return <Webhook className={cls} />;
-    case "timer":
-      return <Clock className={cls} />;
-    case "api":
-      return <ArrowRight className={cls} />;
-    case "event":
-      return <Activity className={cls} />;
-    default:
-      return <Zap className={cls} />;
-  }
-}
-
-function formatCountdown(seconds: number): string {
-  const h = Math.floor(seconds / 3600);
-  const m = Math.floor((seconds % 3600) / 60);
-  const s = Math.floor(seconds % 60);
-  if (h > 0) return `${h}h ${String(m).padStart(2, "0")}m ${String(s).padStart(2, "0")}s`;
-  if (m > 0) return `${m}m ${String(s).padStart(2, "0")}s`;
-  return `${s}s`;
-}
-
-export default function TriggerDetailPanel({ trigger, sessionId, onClose }: TriggerDetailPanelProps) {
-  const [busy, setBusy] = useState(false);
-  const [error, setError] = useState<string | null>(null);
-  const isActive = trigger.status === "running" || trigger.status === "complete";
-  const config = (trigger.triggerConfig || {}) as Record<string, unknown>;
-  const cron = config.cron as string | undefined;
-  const interval = config.interval_minutes as number | undefined;
-  const nextFireIn = config.next_fire_in as number | undefined;
-  const triggerId = trigger.id.replace(/^__trigger_/, "");
-
-  const handleToggle = async () => {
-    if (!sessionId || busy) return;
-    setBusy(true);
-    setError(null);
-    try {
-      if (isActive) {
-        await sessionsApi.deactivateTrigger(sessionId, triggerId);
-      } else {
-        await sessionsApi.activateTrigger(sessionId, triggerId);
-      }
-      // The SSE TRIGGER_ACTIVATED / TRIGGER_DEACTIVATED event will flip
-      // the card status; we don't need to set local state here.
-    } catch (err) {
-      const msg = err instanceof Error ? err.message : String(err);
-      setError(msg);
-    } finally {
-      setBusy(false);
-    }
-  };
-
-  const schedule = cron
-    ? cronToLabel(cron)
-    : interval != null
-    ? interval >= 60
-      ? `Every ${interval / 60}h`
-      : `Every ${interval}m`
-    : null;
-
-  // Hide noisy frontend-only fields so only the raw operator config shows
-  const displayEntries = Object.entries(config).filter(
-    ([k]) => k !== "next_fire_in" && k !== "entry_node",
-  );
-
-  return (
-    <div className="flex flex-col h-full border-l border-border/40 bg-card/20 animate-in slide-in-from-right">
-      {/* Header */}
-      <div className="px-4 pt-4 pb-3 border-b border-border/30 flex items-start justify-between gap-2 flex-shrink-0">
-        <div className="flex items-start gap-3 min-w-0">
-          <div
-            className={[
-              "w-9 h-9 rounded-lg flex items-center justify-center flex-shrink-0",
-              isActive ? "bg-primary/15 text-primary" : "bg-muted/50 text-muted-foreground",
-            ].join(" ")}
-          >
-            <TriggerIcon type={trigger.triggerType} />
-          </div>
-          <div className="min-w-0">
-            <h3 className="text-sm font-semibold text-foreground leading-tight truncate">
-              {trigger.label}
-            </h3>
-            <div className="flex items-center gap-2 mt-1">
-              <span
-                className={[
-                  "text-[10px] font-medium px-1.5 py-0.5 rounded-full",
-                  isActive
-                    ? "bg-emerald-500/15 text-emerald-400"
-                    : "bg-muted/60 text-muted-foreground",
-                ].join(" ")}
-              >
-                {isActive ? "active" : "inactive"}
-              </span>
-              {trigger.triggerType && (
-                <span className="text-[10px] text-muted-foreground uppercase tracking-wider">
-                  {trigger.triggerType}
-                </span>
-              )}
-            </div>
-          </div>
-        </div>
-        <button
-          onClick={onClose}
-          className="p-1 rounded-md text-muted-foreground hover:text-foreground hover:bg-muted/50 transition-colors flex-shrink-0"
-        >
-          <X className="w-3.5 h-3.5" />
-        </button>
-      </div>
-
-      {/* Body */}
-      <div className="flex-1 overflow-auto px-4 py-4 space-y-4">
-        {schedule && (
-          <div>
-            <p className="text-[10px] font-medium text-muted-foreground uppercase tracking-wider mb-1.5">
-              Schedule
-            </p>
-            <div className="rounded-lg border border-border/30 bg-background/60 px-3 py-2.5">
-              <p className="text-xs text-foreground">{schedule}</p>
-              {cron && (
-                <p className="text-[10px] text-muted-foreground mt-1 font-mono">{cron}</p>
-              )}
-            </div>
-          </div>
-        )}
-
-        {isActive && nextFireIn != null && nextFireIn > 0 && (
-          <div>
-            <p className="text-[10px] font-medium text-muted-foreground uppercase tracking-wider mb-1.5">
-              Next fire
-            </p>
-            <div className="rounded-lg border border-border/30 bg-background/60 px-3 py-2.5">
-              <p className="text-xs text-foreground italic">in {formatCountdown(nextFireIn)}</p>
-            </div>
-          </div>
-        )}
-
-        {displayEntries.length > 0 && (
-          <div>
-            <p className="text-[10px] font-medium text-muted-foreground uppercase tracking-wider mb-1.5">
-              Config
-            </p>
-            <div className="rounded-lg border border-border/30 bg-background/60 px-3 py-2.5 space-y-1">
-              {displayEntries.map(([k, v]) => (
-                <div key={k} className="flex items-start justify-between gap-3 text-[11px]">
-                  <span className="text-muted-foreground font-mono">{k}</span>
-                  <span className="text-foreground font-mono text-right truncate">
-                    {typeof v === "object" ? JSON.stringify(v) : String(v)}
-                  </span>
-                </div>
-              ))}
-            </div>
-          </div>
-        )}
-
-        <div>
-          <p className="text-[10px] font-medium text-muted-foreground uppercase tracking-wider mb-1.5">
-            Trigger ID
-          </p>
-          <div className="rounded-lg border border-border/30 bg-background/60 px-3 py-2.5">
-            <p className="text-[11px] text-muted-foreground font-mono break-all">
-              {triggerId}
-            </p>
-          </div>
-        </div>
-      </div>
-
-      {/* Footer with start/stop control */}
-      <div className="px-4 py-3 border-t border-border/30 flex-shrink-0 space-y-2">
-        {error && (
-          <p className="text-[10.5px] text-red-400 leading-snug">{error}</p>
-        )}
-        <button
-          type="button"
-          onClick={handleToggle}
-          disabled={busy || !sessionId}
-          className={[
-            "w-full flex items-center justify-center gap-1.5 px-3 py-2 rounded-lg text-xs font-medium transition-colors",
-            "disabled:opacity-50 disabled:cursor-not-allowed",
-            isActive
-              ? "bg-muted/50 text-foreground hover:bg-muted/70 border border-border/30"
-              : "bg-primary/15 text-primary hover:bg-primary/25 border border-primary/30",
-          ].join(" ")}
-        >
-          {busy ? (
-            <Loader2 className="w-3.5 h-3.5 animate-spin" />
-          ) : isActive ? (
-            <Square className="w-3.5 h-3.5" />
-          ) : (
-            <Play className="w-3.5 h-3.5" />
-          )}
-          {busy ? "Working…" : isActive ? "Stop trigger" : "Start trigger"}
-        </button>
-      </div>
-    </div>
-  );
-}
@@ -1,143 +0,0 @@
-import { Clock, Webhook, Zap, ArrowRight, Activity } from "lucide-react";
-import type { GraphNode } from "./graph-types";
-import { cronToLabel } from "@/lib/graphUtils";
-
-interface TriggersPanelProps {
-  triggers: GraphNode[];
-  selectedId?: string | null;
-  onSelect?: (trigger: GraphNode) => void;
-}
-
-function TriggerIcon({ type }: { type?: string }) {
-  const cls = "w-3.5 h-3.5";
-  switch (type) {
-    case "webhook":
-      return <Webhook className={cls} />;
-    case "timer":
-      return <Clock className={cls} />;
-    case "api":
-      return <ArrowRight className={cls} />;
-    case "event":
-      return <Activity className={cls} />;
-    default:
-      return <Zap className={cls} />;
-  }
-}
-
-function scheduleLabel(config: Record<string, unknown> | undefined): string | null {
-  if (!config) return null;
-  const cron = config.cron as string | undefined;
-  if (cron) return cronToLabel(cron);
-  const interval = config.interval_minutes as number | undefined;
-  if (interval != null) {
-    if (interval >= 60) return `Every ${interval / 60}h`;
-    return `Every ${interval}m`;
-  }
-  return null;
-}
-
-function countdownLabel(nextFireIn: number | undefined): string | null {
-  if (nextFireIn == null || nextFireIn <= 0) return null;
-  const h = Math.floor(nextFireIn / 3600);
-  const m = Math.floor((nextFireIn % 3600) / 60);
-  const s = Math.floor(nextFireIn % 60);
-  return h > 0
-    ? `next in ${h}h ${String(m).padStart(2, "0")}m`
-    : `next in ${m}m ${String(s).padStart(2, "0")}s`;
-}
-
-function TriggerCard({
-  trigger,
-  selected,
-  onClick,
-}: {
-  trigger: GraphNode;
-  selected: boolean;
-  onClick?: () => void;
-}) {
-  const isActive = trigger.status === "running" || trigger.status === "complete";
-  const schedule = scheduleLabel(trigger.triggerConfig);
-  const nextFireIn = trigger.triggerConfig?.next_fire_in as number | undefined;
-  const countdown = isActive ? countdownLabel(nextFireIn) : null;
-
-  return (
-    <button
-      type="button"
-      onClick={onClick}
-      className={[
-        "w-full text-left rounded-lg border px-3 py-2.5 transition-colors",
-        selected
-          ? "bg-primary/10 border-primary/30"
-          : "bg-background/60 border-border/30 hover:bg-muted/40 hover:border-border/50",
-      ].join(" ")}
-    >
-      <div className="flex items-center gap-2">
-        <span
-          className={[
-            "flex-shrink-0 w-6 h-6 rounded-full flex items-center justify-center",
-            isActive ? "bg-primary/15 text-primary" : "bg-muted/60 text-muted-foreground",
-          ].join(" ")}
-        >
-          <TriggerIcon type={trigger.triggerType} />
-        </span>
-        <div className="min-w-0 flex-1">
-          <p className="text-xs font-medium text-foreground truncate">{trigger.label}</p>
-          {schedule && schedule !== trigger.label && (
-            <p className="text-[10.5px] text-muted-foreground truncate mt-0.5">{schedule}</p>
-          )}
-        </div>
-        <span
-          className={[
-            "flex-shrink-0 text-[10px] font-medium px-1.5 py-0.5 rounded-full",
-            isActive
-              ? "bg-emerald-500/15 text-emerald-400"
-              : "bg-muted/60 text-muted-foreground",
-          ].join(" ")}
-        >
-          {isActive ? "active" : "inactive"}
-        </span>
-      </div>
-      {countdown && (
-        <p className="text-[10px] text-muted-foreground mt-1.5 italic pl-8">{countdown}</p>
-      )}
-    </button>
-  );
-}
-
-export default function TriggersPanel({ triggers, selectedId, onSelect }: TriggersPanelProps) {
-  return (
-    <div className="flex flex-col h-full bg-card/30 border-l border-border/30">
-      <div className="px-4 py-3 border-b border-border/30 flex items-center gap-2">
-        <Clock className="w-3.5 h-3.5 text-muted-foreground" />
-        <h3 className="text-xs font-semibold text-foreground uppercase tracking-wide">
-          Triggers
-        </h3>
-        {triggers.length > 0 && (
-          <span className="ml-auto text-[10px] text-muted-foreground">
-            {triggers.length}
-          </span>
-        )}
-      </div>
-      <div className="flex-1 overflow-y-auto px-3 py-3 space-y-2">
-        {triggers.length === 0 ? (
-          <div className="text-center py-8">
-            <Clock className="w-6 h-6 mx-auto text-muted-foreground/40 mb-2" />
-            <p className="text-[11px] text-muted-foreground">No triggers configured</p>
-            <p className="text-[10px] text-muted-foreground/70 mt-1 px-2">
-              Ask the queen to set a schedule or webhook
-            </p>
-          </div>
-        ) : (
-          triggers.map((t) => (
-            <TriggerCard
-              key={t.id}
-              trigger={t}
-              selected={selectedId === t.id}
-              onClick={onSelect ? () => onSelect(t) : undefined}
-            />
-          ))
-        )}
-      </div>
-    </div>
-  );
-}
@@ -0,0 +1,317 @@
+import { memo, useState, useRef, useEffect } from "react";
+import { ChevronDown, ChevronUp, Cpu } from "lucide-react";
+import type { ChatMessage } from "@/components/ChatPanel";
+import { ToolActivityRow } from "@/components/ChatPanel";
+import MarkdownContent from "@/components/MarkdownContent";
+import { useColonyWorkers } from "@/context/ColonyWorkersContext";
+import { workerIdFromStreamId } from "@/lib/chat-helpers";
+
+const workerColor = "hsl(220,60%,55%)";
+
+export interface WorkerRunGroup {
+  messages: ChatMessage[];
+}
+
+interface WorkerRunBubbleProps {
+  runId: string;
+  group: WorkerRunGroup;
+  /** Short identifier shown next to the "Worker" badge. Populated
+   *  only when the parent grouping has multiple parallel workers
+   *  in the same run span, so N stacked bubbles can be told apart
+   *  at a glance. Omitted for single-worker runs. */
+  label?: string;
+}
+
+/** Parse a tool_status JSON blob into a list of tool entries. */
+function parseToolStatus(content: string): { name: string; done: boolean }[] {
+  try {
+    const parsed = JSON.parse(content);
+    return parsed.tools || [];
+  } catch {
+    return [];
+  }
+}
+
+/**
+ * Strip markdown formatting so the collapsed preview is a single
+ * readable line instead of a scatter of code pills.
+ *
+ * MarkdownContent turns every backtick-wrapped fragment into its own
+ * visually-boxed inline-code pill. In a worker text message those
+ * pills can be coordinates, UUIDs, selectors, tool names — the
+ * collapsed preview ends up looking like confetti. We just want the
+ * plain prose, one line, truncated.
+ */
+function stripMarkdownToPreview(s: string, maxLen = 160): string {
+  const cleaned = s
+    .replace(/```[\s\S]*?```/g, " [code] ") // fenced code blocks
+    .replace(/`([^`]+)`/g, "$1") // inline code — keep the text, drop the backticks
+    .replace(/\*\*([^*]+)\*\*/g, "$1") // bold
+    .replace(/\*([^*]+)\*/g, "$1") // italic
+    .replace(/~~([^~]+)~~/g, "$1") // strikethrough
+    .replace(/\[([^\]]+)\]\([^)]+\)/g, "$1") // links -> link text
+    .replace(/^#{1,6}\s+/gm, "") // ATX headers
+    .replace(/^[>\-*+]\s+/gm, "") // blockquote/list markers
+    .replace(/\s+/g, " ") // collapse whitespace
+    .trim();
+  if (cleaned.length <= maxLen) return cleaned;
+  return cleaned.slice(0, maxLen - 1).trimEnd() + "\u2026";
+}
+
+/**
+ * Collapsible card that groups all worker messages from a single run
+ * (the span between the queen's `run_agent_with_input` call and the
+ * worker's final `set_output`/`escalate`/idle).
+ *
+ * Collapsed (default): header bar with tool count + latest text snippet.
+ * Expanded: scrollable list of every message and tool status in order.
+ */
+const WorkerRunBubble = memo(
+  function WorkerRunBubble({ group, label }: WorkerRunBubbleProps) {
+    const [expanded, setExpanded] = useState(false);
+    const bodyRef = useRef<HTMLDivElement>(null);
+    const { openColonyWorkers } = useColonyWorkers();
+
+    // Derive the colony worker id from the first message that carries
+    // a parallel-worker streamId (``worker:{uuid}``). Legacy single-worker
+    // bubbles (streamId="worker") have no uuid — the click still opens
+    // the sidebar, just without a preselection.
+    const workerId = (() => {
+      for (const m of group.messages) {
+        const id = workerIdFromStreamId(m.streamId);
+        if (id) return id;
+      }
+      return null;
+    })();
+
+    // Separate text messages from tool status
+    const textMsgs = group.messages.filter(
+      (m) => m.type !== "tool_status" && m.content?.trim()
+    );
+    const toolStatusMsgs = group.messages.filter(
+      (m) => m.type === "tool_status"
+    );
+
+    // Count total tool calls from tool_status messages
+    const allTools: { name: string; done: boolean }[] = [];
+    for (const m of toolStatusMsgs) {
+      for (const t of parseToolStatus(m.content)) {
+        allTools.push(t);
+      }
+    }
+    const toolCount = allTools.length;
+    const doneCount = allTools.filter((t) => t.done).length;
+    const isFinished = toolCount > 0 && doneCount === toolCount;
+
+    // Latest text from the worker (the last non-empty text message)
+    const latestText = textMsgs.length > 0
+      ? textMsgs[textMsgs.length - 1].content
+      : "";
+
+    // Status label. We prefer concrete states over the vague
+    // "starting" fallback — if the worker has emitted any text or
+    // any tool, it's past the startup phase.
+    const statusLabel = isFinished
+      ? "done"
+      : toolCount > 0
+        ? "running"
+        : textMsgs.length > 0
+          ? "active"
+          : "starting";
+
+    // Unique tool names for the summary (deduplicated, ordered by first appearance)
+    const uniqueToolNames: string[] = [];
+    const seen = new Set<string>();
+    for (const t of allTools) {
+      if (!seen.has(t.name)) {
+        seen.add(t.name);
+        uniqueToolNames.push(t.name);
+      }
+    }
+
+    // Auto-scroll body when expanded
+    useEffect(() => {
+      if (expanded && bodyRef.current) {
+        bodyRef.current.scrollTop = bodyRef.current.scrollHeight;
+      }
+    }, [expanded, group.messages.length]);
+
+    return (
+      <div className="flex gap-3">
+        {/* Left icon — clicking opens the Colony Workers sidebar and
+            pre-selects this worker if we can derive its id. */}
+        <button
+          type="button"
+          onClick={() => openColonyWorkers(workerId ?? undefined)}
+          aria-label="Open worker in colony sidebar"
+          title="Open worker in colony sidebar"
+          className="flex-shrink-0 w-7 h-7 rounded-xl flex items-center justify-center mt-1 transition-opacity hover:opacity-80 cursor-pointer"
+          style={{
+            backgroundColor: `${workerColor}18`,
+            border: `1.5px solid ${workerColor}35`,
+          }}
+        >
+          <Cpu className="w-3.5 h-3.5" style={{ color: workerColor }} />
+        </button>
+
+        <div className="flex-1 min-w-0 max-w-[90%]">
+          {/* Clickable header */}
+          <button
+            onClick={() => setExpanded((v) => !v)}
+            className="w-full flex items-center gap-2 mb-1 text-left cursor-pointer group"
+          >
+            <span className="font-medium text-xs" style={{ color: workerColor }}>
+              Worker
+            </span>
+            {label && (
+              <span className="text-[10px] font-mono text-muted-foreground/80 tabular-nums">
+                {label}
+              </span>
+            )}
+            <span
+              className={`text-[10px] font-medium px-1.5 py-0.5 rounded-md ${
+                isFinished
+                  ? "bg-green-100 text-green-700 dark:bg-green-900/30 dark:text-green-400"
+                  : "bg-muted text-muted-foreground"
+              }`}
+            >
+              {statusLabel}
+            </span>
+            {toolCount > 0 && (
+              <span className="text-[10px] text-muted-foreground tabular-nums">
+                {doneCount}/{toolCount} tools
+              </span>
+            )}
+            <span className="ml-auto text-muted-foreground/60 group-hover:text-muted-foreground transition-colors p-0.5 rounded">
+              {expanded ? (
+                <ChevronUp className="w-3.5 h-3.5" />
+              ) : (
+                <ChevronDown className="w-3.5 h-3.5" />
+              )}
+            </span>
+          </button>
+
+          {/* Card body — use Tailwind theme tokens so dark mode
+              gets a proper dark background instead of a glaring
+              near-white hardcoded hsl. Finished runs get a subtle
+              green tint that also respects theme. */}
+          <div
+            className={`rounded-2xl rounded-tl-md overflow-hidden border ${
+              isFinished
+                ? "border-green-300/50 bg-green-50/50 dark:border-green-900/40 dark:bg-green-950/20"
+                : "border-border bg-muted/60"
+            }`}
+          >
+            {/* Collapsed: single-line plain-text preview of the
+                latest worker text, OR a tool-name chain when the
+                worker hasn't emitted any prose yet. MarkdownContent
+                is intentionally NOT used here — its inline-code
+                rendering turns every backtick-wrapped fragment into
+                a floating pill, which wrecks the preview. */}
+            {!expanded && (
+              <div className="px-4 py-2.5 text-sm text-muted-foreground">
+                {latestText ? (
+                  <div className="truncate">
+                    {stripMarkdownToPreview(latestText)}
+                  </div>
+                ) : uniqueToolNames.length > 0 ? (
+                  <span className="text-xs font-mono truncate block">
+                    {uniqueToolNames.slice(0, 5).join(" \u2192 ")}
+                    {uniqueToolNames.length > 5 &&
+                      ` + ${uniqueToolNames.length - 5} more`}
+                  </span>
+                ) : (
+                  <span className="text-xs text-muted-foreground/60 italic">
+                    {"waiting for first action\u2026"}
+                  </span>
+                )}
+              </div>
+            )}
+
+            {/* Expanded: chronological stream with tool bursts
+                coalesced into a single ToolActivityRow each.
+                Consecutive tool_status messages (no text between)
+                collapse to the LATEST snapshot — each snapshot is
+                cumulative within its turn, so the latest one tells
+                the whole story for that burst. Text messages break
+                the burst and render as markdown. */}
+            {expanded && (
+              <div
+                ref={bodyRef}
+                className="max-h-[400px] overflow-y-auto px-4 py-3 space-y-3"
+              >
+                {(() => {
+                  type RenderRow =
+                    | { kind: "tools"; content: string; key: string }
+                    | { kind: "text"; msg: ChatMessage; key: string };
+                  const rows: RenderRow[] = [];
+                  let pendingTool: { content: string; id: string } | null = null;
+                  const flushTool = () => {
+                    if (pendingTool) {
+                      rows.push({
+                        kind: "tools",
+                        content: pendingTool.content,
+                        key: `tools-${pendingTool.id}`,
+                      });
+                      pendingTool = null;
+                    }
+                  };
+                  for (let i = 0; i < group.messages.length; i++) {
+                    const m = group.messages[i];
+                    if (m.type === "tool_status") {
+                      // Overwrite — latest snapshot in the burst wins
+                      pendingTool = {
+                        content: m.content,
+                        id: m.id || `ts-${i}`,
+                      };
+                      continue;
+                    }
+                    if (m.content?.trim()) {
+                      flushTool();
+                      rows.push({
+                        kind: "text",
+                        msg: m,
+                        key: m.id || `txt-${i}`,
+                      });
+                    }
+                  }
+                  flushTool();
+
+                  return rows.map((row) => {
+                    if (row.kind === "tools") {
+                      // ToolActivityRow groups by tool name (×N), shows
+                      // running pills (spinner) before done pills (check),
+                      // and uses the per-tool color hash that matches
+                      // the rest of the chat.
+                      return (
+                        <div key={row.key} className="-ml-10">
+                          <ToolActivityRow content={row.content} />
+                        </div>
+                      );
+                    }
+                    return (
+                      <div
+                        key={row.key}
+                        className="text-sm leading-relaxed"
+                      >
+                        <MarkdownContent content={row.msg.content} />
+                      </div>
+                    );
+                  });
+                })()}
+              </div>
+            )}
+          </div>
+        </div>
+      </div>
+    );
+  },
+  (prev, next) =>
+    prev.runId === next.runId &&
+    prev.label === next.label &&
+    prev.group.messages.length === next.group.messages.length &&
+    prev.group.messages[prev.group.messages.length - 1]?.content ===
+      next.group.messages[next.group.messages.length - 1]?.content
+);
+
+export default WorkerRunBubble;
@@ -0,0 +1,158 @@
+import { useCallback } from "react";
+import { ArrowDown, ArrowUp, Loader2 } from "lucide-react";
+import type { CellValue, ColumnInfo } from "@/api/colonyData";
+import { EditableCell } from "./EditableCell";
+
+export type SortDir = "asc" | "desc";
+
+export interface DataGridProps {
+  columns: ColumnInfo[];
+  rows: Record<string, CellValue>[];
+  /** Columns that form the primary key — used to identify rows for
+   *  edits and rendered non-editable. */
+  primaryKey: string[];
+
+  orderBy: string | null;
+  orderDir: SortDir;
+  onSortChange: (column: string | null, dir: SortDir) => void;
+
+  /** If provided, non-PK cells become click-to-edit. The handler is
+   *  called with the PK values for the row, the column name, and the
+   *  parsed new value. A rejected promise surfaces as a cell-level
+   *  error tooltip without dirtying the rest of the grid. */
+  onCellEdit?: (
+    pk: Record<string, CellValue>,
+    column: string,
+    newValue: CellValue,
+  ) => Promise<void>;
+
+  loading?: boolean;
+  emptyMessage?: string;
+}
+
+/** Airtable-style editable grid. Self-contained — pass columns + rows
+ *  and wire up sort/edit callbacks to drive server-side state. */
+export function DataGrid({
+  columns,
+  rows,
+  primaryKey,
+  orderBy,
+  orderDir,
+  onSortChange,
+  onCellEdit,
+  loading = false,
+  emptyMessage = "No rows.",
+}: DataGridProps) {
+  const handleHeaderClick = useCallback(
+    (col: string) => {
+      if (orderBy === col) {
+        // Same column: flip direction, then on the 3rd click clear sort.
+        if (orderDir === "asc") onSortChange(col, "desc");
+        else onSortChange(null, "asc");
+      } else {
+        onSortChange(col, "asc");
+      }
+    },
+    [orderBy, orderDir, onSortChange],
+  );
+
+  const pkSet = new Set(primaryKey);
+
+  const extractPk = (row: Record<string, CellValue>): Record<string, CellValue> => {
+    const out: Record<string, CellValue> = {};
+    for (const k of primaryKey) out[k] = row[k];
+    return out;
+  };
+
+  return (
+    <div className="relative border border-border/60 rounded-lg overflow-hidden">
+      {loading && (
+        <div className="absolute top-1.5 right-1.5 z-10 text-muted-foreground">
+          <Loader2 className="w-3.5 h-3.5 animate-spin" />
+        </div>
+      )}
+      <div className="overflow-auto max-h-[60vh]">
+        <table className="text-[11px] w-full border-collapse">
+          <thead className="sticky top-0 z-[1] bg-card/95 backdrop-blur-sm">
+            <tr>
+              {columns.map((c) => {
+                const isPk = pkSet.has(c.name);
+                const active = orderBy === c.name;
+                return (
+                  <th
+                    key={c.name}
+                    onClick={() => handleHeaderClick(c.name)}
+                    className="text-left font-semibold text-foreground/90 border-b border-border/60 px-2 py-1.5 cursor-pointer hover:bg-muted/40 select-none whitespace-nowrap"
+                    title={`${c.name}${c.type ? ` (${c.type})` : ""}${isPk ? " — primary key" : ""}${c.notnull ? " — NOT NULL" : ""}`}
+                  >
+                    <span className="inline-flex items-center gap-1">
+                      {isPk && (
+                        <span className="text-[8px] uppercase tracking-wider bg-primary/15 text-primary px-1 rounded">
+                          pk
+                        </span>
+                      )}
+                      <span>{c.name}</span>
+                      {active &&
+                        (orderDir === "asc" ? (
+                          <ArrowUp className="w-3 h-3 text-primary" />
+                        ) : (
+                          <ArrowDown className="w-3 h-3 text-primary" />
+                        ))}
+                    </span>
+                  </th>
+                );
+              })}
+            </tr>
+          </thead>
+          <tbody>
+            {rows.length === 0 && !loading ? (
+              <tr>
+                <td
+                  colSpan={Math.max(columns.length, 1)}
+                  className="text-center text-muted-foreground py-6"
+                >
+                  {emptyMessage}
+                </td>
+              </tr>
+            ) : (
+              rows.map((row, i) => {
+                const pkValues = extractPk(row);
+                const key = primaryKey.length
+                  ? primaryKey.map((p) => String(row[p] ?? "")).join("|") || `row-${i}`
+                  : `row-${i}`;
+                return (
+                  <tr
+                    key={key}
+                    className="border-b border-border/30 hover:bg-muted/20"
+                  >
+                    {columns.map((c) => {
+                      const isPk = pkSet.has(c.name);
+                      const editable = !isPk && !!onCellEdit;
+                      return (
+                        <td
+                          key={c.name}
+                          className="align-top border-r border-border/20 last:border-r-0 p-0"
+                        >
+                          <EditableCell
+                            value={row[c.name] ?? null}
+                            column={c}
+                            editable={editable}
+                            onCommit={
+                              editable && onCellEdit
+                                ? (v) => onCellEdit(pkValues, c.name, v)
+                                : undefined
+                            }
+                          />
+                        </td>
+                      );
+                    })}
+                  </tr>
+                );
+              })
+            )}
+          </tbody>
+        </table>
+      </div>
+    </div>
+  );
+}
@@ -0,0 +1,160 @@
+import { useEffect, useRef, useState } from "react";
+import { Loader2 } from "lucide-react";
+import type { CellValue, ColumnInfo } from "@/api/colonyData";
+
+interface EditableCellProps {
+  value: CellValue;
+  column: ColumnInfo;
+  editable: boolean;
+  onCommit?: (newValue: CellValue) => Promise<void>;
+}
+
+/** Parse a textarea draft back to the typed column value. Empty input
+ *  maps to NULL when the column is nullable; otherwise empty-string.
+ *  Invalid numerics throw — caller surfaces as a cell error. */
+function parseDraft(draft: string, column: ColumnInfo): CellValue {
+  const t = column.type.toUpperCase();
+  const trimmed = draft.trim();
+  if (trimmed === "") return column.notnull ? "" : null;
+
+  if (t.includes("INT")) {
+    const n = Number(trimmed);
+    if (!Number.isFinite(n) || !Number.isInteger(n)) {
+      throw new Error(`${column.name} expects an integer`);
+    }
+    return n;
+  }
+  if (t.includes("REAL") || t.includes("FLOA") || t.includes("DOUB") || t.includes("NUMERIC")) {
+    const n = Number(trimmed);
+    if (!Number.isFinite(n)) throw new Error(`${column.name} expects a number`);
+    return n;
+  }
+  if (t.includes("BOOL")) {
+    const lower = trimmed.toLowerCase();
+    if (lower === "true" || lower === "1") return true;
+    if (lower === "false" || lower === "0") return false;
+    throw new Error(`${column.name} expects true/false`);
+  }
+  // TEXT / unknown affinity — keep as-is.
+  return draft;
+}
+
+function formatValue(v: CellValue): string {
+  if (v == null) return "";
+  if (typeof v === "boolean") return v ? "true" : "false";
+  return String(v);
+}
+
+export function EditableCell({ value, column, editable, onCommit }: EditableCellProps) {
+  const [editing, setEditing] = useState(false);
+  const [draft, setDraft] = useState<string>(formatValue(value));
+  const [saving, setSaving] = useState(false);
+  const [error, setError] = useState<string | null>(null);
+  const inputRef = useRef<HTMLTextAreaElement | null>(null);
+
+  // Reset local draft whenever the upstream value changes (e.g. after
+  // a row refresh). Skipping this leaves stale drafts visible.
+  useEffect(() => {
+    if (!editing) setDraft(formatValue(value));
+  }, [value, editing]);
+
+  useEffect(() => {
+    if (editing && inputRef.current) {
+      inputRef.current.focus();
+      inputRef.current.select();
+    }
+  }, [editing]);
+
+  const startEdit = () => {
+    if (!editable || saving) return;
+    setError(null);
+    setDraft(formatValue(value));
+    setEditing(true);
+  };
+
+  const cancel = () => {
+    setEditing(false);
+    setError(null);
+    setDraft(formatValue(value));
+  };
+
+  const commit = async () => {
+    if (!onCommit) {
+      setEditing(false);
+      return;
+    }
+    let parsed: CellValue;
+    try {
+      parsed = parseDraft(draft, column);
+    } catch (err) {
+      setError(err instanceof Error ? err.message : String(err));
+      return;
+    }
+    // No-op if value didn't change.
+    if (parsed === value || (parsed === "" && value == null)) {
+      setEditing(false);
+      return;
+    }
+    setSaving(true);
+    setError(null);
+    try {
+      await onCommit(parsed);
+      setEditing(false);
+    } catch (err) {
+      setError(err instanceof Error ? err.message : String(err));
+    } finally {
+      setSaving(false);
+    }
+  };
+
+  const display = formatValue(value);
+  const isNull = value === null;
+
+  if (editing) {
+    return (
+      <div className="relative">
+        <textarea
+          ref={inputRef}
+          value={draft}
+          onChange={(e) => setDraft(e.target.value)}
+          onBlur={commit}
+          onKeyDown={(e) => {
+            if (e.key === "Escape") {
+              e.preventDefault();
+              cancel();
+            } else if (e.key === "Enter" && !e.shiftKey) {
+              e.preventDefault();
+              commit();
+            }
+          }}
+          rows={1}
+          className="w-full min-w-[120px] bg-background text-foreground text-[11px] font-mono border-2 border-primary/60 outline-none px-1.5 py-1 resize-none"
+          disabled={saving}
+        />
+        {saving && (
+          <span className="absolute right-1 top-1 text-muted-foreground">
+            <Loader2 className="w-3 h-3 animate-spin" />
+          </span>
+        )}
+        {error && (
+          <div className="absolute z-20 top-full left-0 mt-0.5 bg-destructive text-destructive-foreground text-[10px] px-1.5 py-0.5 rounded whitespace-nowrap max-w-[300px] truncate shadow-lg">
+            {error}
+          </div>
+        )}
+      </div>
+    );
+  }
+
+  return (
+    <div
+      onClick={startEdit}
+      onDoubleClick={startEdit}
+      className={`min-w-[80px] max-w-[280px] px-1.5 py-1 font-mono truncate ${
+        editable ? "cursor-text hover:bg-muted/40" : "cursor-default"
+      } ${isNull ? "text-muted-foreground/60 italic" : "text-foreground/90"}`}
+      title={isNull ? "NULL" : display}
+    >
+      {isNull ? "NULL" : display || "\u00A0"}
+    </div>
+  );
+}
@@ -0,0 +1,3 @@
+export { DataGrid } from "./DataGrid";
+export type { DataGridProps, SortDir } from "./DataGrid";
+export { EditableCell } from "./EditableCell";
@@ -61,6 +61,9 @@ interface ColonyContextValue {
  deleteColony: (colonyId: string) => Promise<void>;
  /** Refresh colony data from the server */
  refresh: () => void;
+  /** Cache-busting version for user avatar — bump after upload */
+  userAvatarVersion: number;
+  bumpUserAvatar: () => void;
 }

 const ColonyContext = createContext<ColonyContextValue | null>(null);
@@ -88,6 +91,9 @@ export function ColonyProvider({ children }: { children: ReactNode }) {
    loadJson(LAST_VISIT_KEY, {}),
  );

+  const [userAvatarVersion, setUserAvatarVersion] = useState(0);
+  const bumpUserAvatar = useCallback(() => setUserAvatarVersion((v) => v + 1), []);
+
  const coloniesRef = useRef<Colony[]>(colonies);
  useEffect(() => {
    coloniesRef.current = colonies;
@@ -264,9 +270,14 @@ export function ColonyProvider({ children }: { children: ReactNode }) {
    // Optimistically remove from UI
    setColonies((prev) => prev.filter((c) => c.id !== colonyId));
    setQueens((prev) => prev.filter((q) => q.colonyId !== colonyId));
-    // Delete on backend (fire-and-forget)
-    agentsApi.deleteAgent(colony.agentPath).catch(() => {});
-  }, []);
+    // Delete on backend, then re-fetch to confirm it's gone
+    try {
+      await agentsApi.deleteAgent(colony.agentPath);
+    } catch {
+      // Deletion failed — re-fetch to restore the colony in the UI
+    }
+    fetchColonies();
+  }, [fetchColonies]);

  const refresh = useCallback(() => {
    fetchColonies();
@@ -312,6 +323,8 @@ export function ColonyProvider({ children }: { children: ReactNode }) {
        markVisited,
        deleteColony,
        refresh,
+        userAvatarVersion,
+        bumpUserAvatar,
      }}
    >
      {children}
@@ -0,0 +1,110 @@
+import {
+  createContext,
+  useCallback,
+  useContext,
+  useState,
+  type ReactNode,
+} from "react";
+import type { GraphNode } from "@/components/graph-types";
+
+interface ColonyWorkersContextValue {
+  /** The colony session the tabbed panel should attach to. Set by
+   *  whichever page owns a colony session (colony-chat today). The
+   *  panel auto-renders whenever this is non-null AND the user hasn't
+   *  dismissed it for the current session. */
+  sessionId: string | null;
+  setSessionId: (sessionId: string | null) => void;
+
+  /** The colony directory name (e.g. ``linkedin_honeycomb_messaging``)
+   *  the panel is attached to. Comes from ``LiveSession.colony_id`` —
+   *  legacy naming, but it's the on-disk directory under
+   *  ``~/.hive/colonies/`` and the URL segment for the colony-scoped
+   *  endpoints (progress + data). Required separately from sessionId
+   *  because the URL slug is mangled by ``slugToColonyId`` and can't
+   *  be reverse-derived. */
+  colonyName: string | null;
+  setColonyName: (colonyName: string | null) => void;
+
+  /** User dismissal: flipped by the panel's close button. Reset when
+   *  sessionId changes (so the panel re-opens on the next colony visit
+   *  / tab-switch) or when the header toggle re-requests it. */
+  dismissed: boolean;
+  /** Toggles the panel. When the panel is currently visible we dismiss
+   *  it; when hidden we un-dismiss. Both actions are no-ops if there's
+   *  no active sessionId — the header button only matters inside a
+   *  colony room. */
+  toggleColonyWorkers: () => void;
+
+  /** Worker the Sessions tab should auto-select on the next render.
+   *  Set by ``openColonyWorkers(workerId)`` when a chat avatar is
+   *  clicked; cleared by the panel after it consumes the value. */
+  focusWorkerId: string | null;
+  setFocusWorkerId: (workerId: string | null) => void;
+
+  /** Open the panel and optionally pre-select a worker. Un-dismisses
+   *  the panel even if it was previously closed. Passing no workerId
+   *  just opens the panel without changing selection. */
+  openColonyWorkers: (workerId?: string) => void;
+
+  /** Current session's triggers, pushed from whichever page is active
+   *  (colony-chat today). ``ColonyWorkersPanel`` reads these to render
+   *  its Triggers tab without having to re-subscribe to SSE itself. */
+  triggers: GraphNode[];
+  setTriggers: (triggers: GraphNode[]) => void;
+}
+
+const ColonyWorkersContext = createContext<ColonyWorkersContextValue | null>(null);
+
+export function ColonyWorkersProvider({ children }: { children: ReactNode }) {
+  const [sessionId, setSessionIdState] = useState<string | null>(null);
+  const [colonyName, setColonyName] = useState<string | null>(null);
+  const [dismissed, setDismissed] = useState(false);
+  const [focusWorkerId, setFocusWorkerId] = useState<string | null>(null);
+  const [triggers, setTriggers] = useState<GraphNode[]>([]);
+
+  const setSessionId = useCallback((next: string | null) => {
+    setSessionIdState((prev) => {
+      // Reset dismissal whenever the active session changes so entering
+      // a new colony opens the panel again even if the user closed it
+      // in the previous room.
+      if (prev !== next) setDismissed(false);
+      return next;
+    });
+  }, []);
+
+  const toggleColonyWorkers = useCallback(() => {
+    setDismissed((d) => !d);
+  }, []);
+
+  const openColonyWorkers = useCallback((workerId?: string) => {
+    setDismissed(false);
+    setFocusWorkerId(workerId ?? null);
+  }, []);
+
+  return (
+    <ColonyWorkersContext.Provider
+      value={{
+        sessionId,
+        setSessionId,
+        colonyName,
+        setColonyName,
+        dismissed,
+        toggleColonyWorkers,
+        focusWorkerId,
+        setFocusWorkerId,
+        openColonyWorkers,
+        triggers,
+        setTriggers,
+      }}
+    >
+      {children}
+    </ColonyWorkersContext.Provider>
+  );
+}
+
+export function useColonyWorkers() {
+  const ctx = useContext(ColonyWorkersContext);
+  if (!ctx)
+    throw new Error("useColonyWorkers must be used within ColonyWorkersProvider");
+  return ctx;
+}
@@ -0,0 +1,31 @@
+import { createContext, useContext, useCallback, type ReactNode } from "react";
+
+interface QueenProfileContextValue {
+  openQueenProfile: (queenId: string) => void;
+}
+
+const QueenProfileContext = createContext<QueenProfileContextValue | null>(null);
+
+export function QueenProfileProvider({
+  onOpen,
+  children,
+}: {
+  onOpen: (queenId: string) => void;
+  children: ReactNode;
+}) {
+  const openQueenProfile = useCallback(
+    (queenId: string) => onOpen(queenId),
+    [onOpen],
+  );
+  return (
+    <QueenProfileContext.Provider value={{ openQueenProfile }}>
+      {children}
+    </QueenProfileContext.Provider>
+  );
+}
+
+export function useQueenProfile() {
+  const ctx = useContext(QueenProfileContext);
+  if (!ctx) throw new Error("useQueenProfile must be used within QueenProfileProvider");
+  return ctx;
+}
@@ -0,0 +1,99 @@
+import { useCallback, useRef } from "react";
+import type { Dispatch, SetStateAction } from "react";
+import type { ChatMessage, ImageContent } from "@/components/ChatPanel";
+
+interface QueuedPayload {
+  text: string;
+  images?: ImageContent[];
+}
+
+interface UsePendingQueueArgs {
+  /** Sends a message to the backend. Must handle its own errors. */
+  sendToBackend: (text: string, images?: ImageContent[]) => void;
+  /** Setter for the chat message list — used to flip/strip the `queued` flag. */
+  setMessages: Dispatch<SetStateAction<ChatMessage[]>>;
+  /** Fires once per flush, before any message is sent. Typically sets
+   *  isTyping/queenIsTyping so the UI reflects that the queen is busy again. */
+  onFlushStart?: () => void;
+}
+
+/**
+ * Client-side queue for user messages typed while the queen is mid-turn.
+ *
+ * - `enqueue` stores a message locally keyed by its optimistic UI id.
+ * - `steer` pulls one message out and sends it now — backend injects at the
+ *   next iteration boundary.
+ * - `cancelQueued` drops a queued message entirely (no backend call).
+ * - `flushNext` pops and sends one; wire this to `llm_turn_complete` (the
+ *   real per-turn boundary — execution_completed only fires at session
+ *   shutdown because the queen's loop parks in _await_user_input between
+ *   turns). Do NOT call on pause / cancel / fail.
+ *
+ * `flushRef` exposes the latest `flush` for capture-once SSE handlers.
+ */
+export function usePendingQueue({
+  sendToBackend,
+  setMessages,
+  onFlushStart,
+}: UsePendingQueueArgs) {
+  const queueRef = useRef<Map<string, QueuedPayload>>(new Map());
+
+  const enqueue = useCallback(
+    (messageId: string, payload: QueuedPayload) => {
+      queueRef.current.set(messageId, payload);
+    },
+    [],
+  );
+
+  const steer = useCallback(
+    (messageId: string) => {
+      const pending = queueRef.current.get(messageId);
+      if (!pending) return;
+      queueRef.current.delete(messageId);
+      setMessages((prev) =>
+        prev.map((m) => (m.id === messageId ? { ...m, queued: false } : m)),
+      );
+      sendToBackend(pending.text, pending.images);
+    },
+    [sendToBackend, setMessages],
+  );
+
+  const cancelQueued = useCallback(
+    (messageId: string) => {
+      if (!queueRef.current.has(messageId)) return;
+      queueRef.current.delete(messageId);
+      setMessages((prev) => prev.filter((m) => m.id !== messageId));
+    },
+    [setMessages],
+  );
+
+  // Drop every queued payload without sending. Call on route-level resets
+  // (queen switch, colony switch) — the hook outlives those transitions,
+  // so without this, stale queue entries flush into the new session.
+  const clear = useCallback(() => {
+    queueRef.current.clear();
+  }, []);
+
+  // Pop and send the oldest queued message (Map iteration is insertion
+  // order in JS). One-at-a-time semantics: used for both the Stop-button
+  // path (cancel current turn, send next) and the natural-turn-end path
+  // (on `execution_completed`, pick up the next queued message).
+  const flushNext = useCallback(() => {
+    const first = queueRef.current.entries().next();
+    if (first.done) return;
+    const [firstId, payload] = first.value;
+    queueRef.current.delete(firstId);
+    setMessages((prev) =>
+      prev.map((m) => (m.id === firstId ? { ...m, queued: false } : m)),
+    );
+    onFlushStart?.();
+    sendToBackend(payload.text, payload.images);
+  }, [sendToBackend, setMessages, onFlushStart]);
+
+  // Ref to the latest flushNext so SSE handlers captured with narrow deps
+  // can still invoke the up-to-date closure.
+  const flushNextRef = useRef(flushNext);
+  flushNextRef.current = flushNext;
+
+  return { enqueue, steer, cancelQueued, flushNext, flushNextRef, clear };
+}
@@ -1,16 +1,24 @@
-import { useEffect, useState } from "react";
+import { useEffect, useState, useCallback, type ReactNode } from "react";
 import { Outlet, useLocation } from "react-router-dom";
 import Sidebar from "@/components/Sidebar";
 import AppHeader from "@/components/AppHeader";
 import QueenProfilePanel from "@/components/QueenProfilePanel";
+import ColonyWorkersPanel from "@/components/ColonyWorkersPanel";
 import { ColonyProvider, useColony } from "@/context/ColonyContext";
 import { HeaderActionsProvider } from "@/context/HeaderActionsContext";
+import { QueenProfileProvider } from "@/context/QueenProfileContext";
+import {
+  ColonyWorkersProvider,
+  useColonyWorkers,
+} from "@/context/ColonyWorkersContext";

 export default function AppLayout() {
  return (
    <ColonyProvider>
      <HeaderActionsProvider>
-        <AppLayoutInner />
+        <ColonyWorkersProvider>
+          <AppLayoutInner />
+        </ColonyWorkersProvider>
      </HeaderActionsProvider>
    </ColonyProvider>
  );
@@ -21,17 +29,48 @@ function AppLayoutInner() {
  const location = useLocation();
  const [openQueenId, setOpenQueenId] = useState<string | null>(null);

-  // Close the profile panel whenever the route changes so it doesn't
-  // bleed across pages (the panel state lives at the layout level).
+  // Queen profile closes on route change (it's a per-queen view).
  useEffect(() => {
    setOpenQueenId(null);
  }, [location.pathname]);

+  const handleOpenQueenProfile = useCallback(
+    (queenId: string) => setOpenQueenId((prev) => (prev === queenId ? null : queenId)),
+    [],
+  );
+
+  return (
+    <QueenProfileProvider onOpen={handleOpenQueenProfile}>
+      <LayoutShell
+        openQueenId={openQueenId}
+        onCloseQueenProfile={() => setOpenQueenId(null)}
+        onOpenQueenProfile={handleOpenQueenProfile}
+        colonies={colonies}
+      />
+    </QueenProfileProvider>
+  );
+}
+
+function LayoutShell({
+  openQueenId,
+  onCloseQueenProfile,
+  onOpenQueenProfile,
+  colonies,
+}: {
+  openQueenId: string | null;
+  onCloseQueenProfile: () => void;
+  onOpenQueenProfile: (queenId: string) => void;
+  colonies: ReturnType<typeof useColony>["colonies"];
+}) {
+  const { sessionId, colonyName, dismissed, toggleColonyWorkers } =
+    useColonyWorkers();
+  const showWorkersPanel = Boolean(sessionId && !dismissed);
+
  return (
    <div className="flex h-screen bg-background overflow-hidden">
      <Sidebar />
      <div className="flex-1 min-w-0 flex flex-col">
-        <AppHeader onOpenQueenProfile={setOpenQueenId} />
+        <AppHeader onOpenQueenProfile={onOpenQueenProfile} />
        <div className="flex-1 min-h-0 flex">
          <main className="flex-1 min-w-0 flex flex-col">
            <Outlet />
@@ -39,10 +78,15 @@ function AppLayoutInner() {
          {openQueenId && (
            <QueenProfilePanel
              queenId={openQueenId}
-              colonies={colonies.filter(
-                (c) => c.queenProfileId === openQueenId,
-              )}
-              onClose={() => setOpenQueenId(null)}
+              colonies={colonies.filter((c) => c.queenProfileId === openQueenId)}
+              onClose={onCloseQueenProfile}
+            />
+          )}
+          {showWorkersPanel && sessionId && (
+            <ColonyWorkersPanel
+              sessionId={sessionId}
+              colonyName={colonyName}
+              onClose={toggleColonyWorkers}
            />
          )}
        </div>
@@ -50,3 +94,7 @@ function AppLayoutInner() {
    </div>
  );
 }
+
+// Re-exported so tsc sees React used (removes import-only warning when
+// the file compiles down to JSX-less output).
+export type { ReactNode };
@@ -15,6 +15,23 @@ import type { AgentEvent } from "@/api/types";
 *   "inbox-management"              → "Inbox Management"
 *   "job_hunter"                    → "Job Hunter"
 */
+/**
+ * Extract the colony worker uuid from a parallel-worker ``streamId``.
+ *
+ * Worker messages tag their ``streamId`` as either ``"worker"`` (single-worker
+ * legacy case) or ``"worker:{uuid}"`` (parallel fan-out). The uuid half is
+ * the colony worker id — the same identifier the Colony Workers sidebar uses
+ * to key its Sessions cards. Returns null for the legacy single-worker case
+ * or any other stream kind.
+ */
+export function workerIdFromStreamId(
+  streamId: string | null | undefined,
+): string | null {
+  if (!streamId) return null;
+  const m = /^worker:(.+)$/.exec(streamId);
+  return m ? m[1] : null;
+}
+
 export function formatAgentDisplayName(raw: string): string {
  // Take the last path segment (in case it's a path like "examples/templates/foo")
  const base = raw.split("/").pop() || raw;
@@ -119,6 +136,7 @@ export function sseEventToChatMessage(
        createdAt,
        nodeId: event.node_id || undefined,
        executionId: event.execution_id || undefined,
+        streamId: event.stream_id || undefined,
      };
    }

@@ -138,6 +156,10 @@ export function sseEventToChatMessage(
        type: "user",
        thread,
        createdAt,
+        // Carrying execution_id here lets the optimistic-message reconciler
+        // distinguish server-echoed user bubbles from still-unflushed ones.
+        executionId: event.execution_id || undefined,
+        streamId: event.stream_id || undefined,
      };
    }

@@ -158,6 +180,7 @@ export function sseEventToChatMessage(
        createdAt,
        nodeId: event.node_id || undefined,
        executionId: event.execution_id || undefined,
+        streamId: event.stream_id || undefined,
      };
    }

@@ -172,6 +195,7 @@ export function sseEventToChatMessage(
        type: "system",
        thread,
        createdAt,
+        streamId: event.stream_id || undefined,
      };
    }

@@ -186,6 +210,7 @@ export function sseEventToChatMessage(
        type: "system",
        thread,
        createdAt,
+        streamId: event.stream_id || undefined,
      };
    }

@@ -194,6 +219,188 @@ export function sseEventToChatMessage(
  }
 }

+// ---------------------------------------------------------------------------
+// Stateful event replay — produces tool_status pills + regular messages
+// ---------------------------------------------------------------------------
+
+/**
+ * State maintained while replaying an event stream. Tracks per-stream turn
+ * counters, the set of active tool calls (so tool_status pill content
+ * reflects "tool A done, tool B running" correctly), and a tool_use_id →
+ * pill_msg_id map so deferred `tool_call_completed` events can find the
+ * pill they belong to after the turn counter moves on.
+ */
+export interface ReplayState {
+  turnCounters: Record<string, number>;
+  activeToolCalls: Record<
+    string,
+    { name: string; done: boolean; streamId: string }
+  >;
+  toolUseToPill: Record<string, { msgId: string; name: string }>;
+}
+
+export function newReplayState(): ReplayState {
+  return { turnCounters: {}, activeToolCalls: {}, toolUseToPill: {} };
+}
+
+/**
+ * Process a single event and emit zero or more ChatMessage upserts.
+ *
+ * Why this exists: `sseEventToChatMessage` is stateless — one event in, at
+ * most one message out. But the chat's tool_status pill is a SYNTHESIZED
+ * message: each tool_call_started adds to an accumulating pill, and each
+ * tool_call_completed flips one of its tools from running to done. Live
+ * SSE handlers in colony-chat and queen-dm already do this synthesis
+ * against React refs. Cold-restore from events.jsonl used to skip
+ * tool_call_* events entirely, so refreshed sessions looked completely
+ * different from live ones — no tool activity visible, just prose.
+ *
+ * This function centralizes the synthesis so cold-restore and live paths
+ * can use the exact same state machine. The caller treats the returned
+ * messages as upserts (by id) — a later event in the same replay may
+ * emit the same pill id with updated content, which should REPLACE the
+ * earlier row in the caller's message list.
+ */
+export function replayEvent(
+  state: ReplayState,
+  event: AgentEvent,
+  thread: string,
+  agentDisplayName: string | undefined,
+): ChatMessage[] {
+  const streamId = event.stream_id;
+  const isQueen = streamId === "queen";
+  const role: "queen" | "worker" = isQueen ? "queen" : "worker";
+  const turnKey = streamId;
+  const currentTurn = state.turnCounters[turnKey] ?? 0;
+  const eventCreatedAt = event.timestamp
+    ? new Date(event.timestamp).getTime()
+    : Date.now();
+
+  const out: ChatMessage[] = [];
+
+  // Update state machine BEFORE the generic converter runs so the
+  // regular message emitted for this event sees the post-update
+  // counter (matches live handler ordering at colony-chat.tsx:525).
+  switch (event.type) {
+    case "execution_started":
+      state.turnCounters[turnKey] = currentTurn + 1;
+      // New execution for a worker resets its active tools, mirroring
+      // the live handler's setAgentState at colony-chat.tsx:566.
+      if (!isQueen) {
+        const keepActive: typeof state.activeToolCalls = {};
+        for (const [k, v] of Object.entries(state.activeToolCalls)) {
+          if (v.streamId !== streamId) keepActive[k] = v;
+        }
+        state.activeToolCalls = keepActive;
+      }
+      break;
+    case "llm_turn_complete":
+      state.turnCounters[turnKey] = currentTurn + 1;
+      break;
+    case "tool_call_started": {
+      if (!event.node_id) break;
+      const toolName = (event.data?.tool_name as string) || "unknown";
+      const toolUseId = (event.data?.tool_use_id as string) || "";
+      state.activeToolCalls[toolUseId] = {
+        name: toolName,
+        done: false,
+        streamId,
+      };
+      const pillId = `tool-pill-${streamId}-${event.execution_id || "exec"}-${currentTurn}`;
+      if (toolUseId) {
+        state.toolUseToPill[toolUseId] = { msgId: pillId, name: toolName };
+      }
+      const tools = Object.values(state.activeToolCalls)
+        .filter((t) => t.streamId === streamId)
+        .map((t) => ({ name: t.name, done: t.done }));
+      const allDone = tools.length > 0 && tools.every((t) => t.done);
+      out.push({
+        id: pillId,
+        agent: agentDisplayName || event.node_id || "Agent",
+        agentColor: "",
+        content: JSON.stringify({ tools, allDone }),
+        timestamp: "",
+        type: "tool_status",
+        role,
+        thread,
+        createdAt: eventCreatedAt,
+        nodeId: event.node_id || undefined,
+        executionId: event.execution_id || undefined,
+        streamId: streamId || undefined,
+      });
+      break;
+    }
+    case "tool_call_completed": {
+      if (!event.node_id) break;
+      const toolUseId = (event.data?.tool_use_id as string) || "";
+      const tracked = state.toolUseToPill[toolUseId];
+      if (toolUseId) delete state.toolUseToPill[toolUseId];
+      if (toolUseId && state.activeToolCalls[toolUseId]) {
+        state.activeToolCalls[toolUseId].done = true;
+      }
+      if (!tracked) break;
+      const tools = Object.values(state.activeToolCalls)
+        .filter((t) => t.streamId === streamId)
+        .map((t) => ({ name: t.name, done: t.done }));
+      const allDone = tools.length > 0 && tools.every((t) => t.done);
+      // Re-emit the SAME pill id with updated content. Caller upserts
+      // by id, so this replaces the row from tool_call_started.
+      out.push({
+        id: tracked.msgId,
+        agent: agentDisplayName || event.node_id || "Agent",
+        agentColor: "",
+        content: JSON.stringify({ tools, allDone }),
+        timestamp: "",
+        type: "tool_status",
+        role,
+        thread,
+        createdAt: eventCreatedAt,
+        nodeId: event.node_id || undefined,
+        executionId: event.execution_id || undefined,
+        streamId: streamId || undefined,
+      });
+      break;
+    }
+  }
+
+  // Regular stateless conversion (prose, user input, system notes).
+  const msg = sseEventToChatMessage(
+    event,
+    thread,
+    agentDisplayName,
+    state.turnCounters[turnKey] ?? 0,
+  );
+  if (msg) {
+    if (isQueen) msg.role = "queen";
+    out.push(msg);
+  }
+
+  return out;
+}
+
+/**
+ * Replay an entire event array and return a deduplicated, chronologically
+ * sorted ChatMessage list. Used by cold-restore paths so refreshed
+ * sessions match the live stream exactly.
+ */
+export function replayEventsToMessages(
+  events: AgentEvent[],
+  thread: string,
+  agentDisplayName: string | undefined,
+): ChatMessage[] {
+  const state = newReplayState();
+  // Upsert by id — later emissions for the same pill replace earlier ones.
+  const byId = new Map<string, ChatMessage>();
+  for (const evt of events) {
+    for (const m of replayEvent(state, evt, thread, agentDisplayName)) {
+      byId.set(m.id, m);
+    }
+  }
+  return Array.from(byId.values()).sort(
+    (a, b) => (a.createdAt ?? 0) - (b.createdAt ?? 0),
+  );
+}
+
 type QueenPhase = "planning" | "building" | "staging" | "running" | "independent";
 const VALID_PHASES = new Set<string>(["planning", "building", "staging", "running", "independent"]);

@@ -0,0 +1,58 @@
+import { describe, expect, it } from "vitest";
+
+import {
+  resolveInitialColonyPhase,
+  shouldUsePrefetchedColonyRestore,
+} from "./colony-session-restore";
+
+describe("shouldUsePrefetchedColonyRestore", () => {
+  it("reuses the cold prefetch when the backend restored that same session", () => {
+    expect(
+      shouldUsePrefetchedColonyRestore("session_forked", "session_forked"),
+    ).toBe(true);
+  });
+
+  it("drops the cold prefetch when the backend restored a different session", () => {
+    expect(
+      shouldUsePrefetchedColonyRestore("session_source", "session_forked"),
+    ).toBe(false);
+  });
+});
+
+describe("resolveInitialColonyPhase", () => {
+  it("keeps the prefetched phase when the prefetched session is still current", () => {
+    expect(
+      resolveInitialColonyPhase({
+        prefetchedSessionId: "session_forked",
+        resolvedSessionId: "session_forked",
+        prefetchedPhase: "independent",
+        serverPhase: "reviewing",
+        hasWorker: true,
+      }),
+    ).toBe("independent");
+  });
+
+  it("ignores stale prefetched phase when the backend corrected the session", () => {
+    expect(
+      resolveInitialColonyPhase({
+        prefetchedSessionId: "session_source",
+        resolvedSessionId: "session_forked",
+        prefetchedPhase: "independent",
+        serverPhase: "reviewing",
+        hasWorker: true,
+      }),
+    ).toBe("reviewing");
+  });
+
+  it("falls back to worker state when neither restore nor server phase is present", () => {
+    expect(
+      resolveInitialColonyPhase({
+        prefetchedSessionId: undefined,
+        resolvedSessionId: "session_forked",
+        prefetchedPhase: null,
+        serverPhase: undefined,
+        hasWorker: true,
+      }),
+    ).toBe("working");
+  });
+});
@@ -0,0 +1,30 @@
+export type ColonyRestorePhase = "independent" | "working" | "reviewing";
+
+export function shouldUsePrefetchedColonyRestore(
+  prefetchedSessionId: string | undefined,
+  resolvedSessionId: string,
+): boolean {
+  return !!prefetchedSessionId && prefetchedSessionId === resolvedSessionId;
+}
+
+export function resolveInitialColonyPhase({
+  prefetchedSessionId,
+  resolvedSessionId,
+  prefetchedPhase,
+  serverPhase,
+  hasWorker,
+}: {
+  prefetchedSessionId: string | undefined;
+  resolvedSessionId: string;
+  prefetchedPhase: ColonyRestorePhase | null;
+  serverPhase: ColonyRestorePhase | undefined;
+  hasWorker: boolean;
+}): ColonyRestorePhase {
+  const restoredPhase = shouldUsePrefetchedColonyRestore(
+    prefetchedSessionId,
+    resolvedSessionId,
+  )
+    ? prefetchedPhase
+    : null;
+  return restoredPhase || serverPhase || (hasWorker ? "working" : "reviewing");
+}
@@ -0,0 +1,39 @@
+const MAX_IMAGE_SIZE = 512;
+const MAX_FILE_BYTES = 2 * 1024 * 1024;
+
+/** Compress an image file using canvas. Returns a JPEG blob under 2 MB. */
+export async function compressImage(file: File): Promise<File> {
+  if (file.size <= MAX_FILE_BYTES && (file.type === "image/jpeg" || file.type === "image/webp")) {
+    return file;
+  }
+
+  return new Promise((resolve, reject) => {
+    const img = new Image();
+    img.onload = () => {
+      const canvas = document.createElement("canvas");
+      let { width, height } = img;
+
+      if (width > MAX_IMAGE_SIZE || height > MAX_IMAGE_SIZE) {
+        const scale = MAX_IMAGE_SIZE / Math.max(width, height);
+        width = Math.round(width * scale);
+        height = Math.round(height * scale);
+      }
+
+      canvas.width = width;
+      canvas.height = height;
+      const ctx = canvas.getContext("2d")!;
+      ctx.drawImage(img, 0, 0, width, height);
+
+      canvas.toBlob(
+        (blob) => {
+          if (!blob) return reject(new Error("Compression failed"));
+          resolve(new File([blob], file.name.replace(/\.\w+$/, ".jpg"), { type: "image/jpeg" }));
+        },
+        "image/jpeg",
+        0.85,
+      );
+    };
+    img.onerror = () => reject(new Error("Failed to load image"));
+    img.src = URL.createObjectURL(file);
+  });
+}
@@ -1,9 +1,7 @@
 import { useState, useCallback, useRef, useEffect, useMemo } from "react";
 import { useParams, useLocation } from "react-router-dom";
-import { Loader2, WifiOff, KeyRound, FolderOpen, X } from "lucide-react";
+import { Loader2, WifiOff, KeyRound, FolderOpen, X, Users } from "lucide-react";
 import type { GraphNode, NodeStatus } from "@/components/graph-types";
-import TriggersPanel from "@/components/TriggersPanel";
-import TriggerDetailPanel from "@/components/TriggerDetailPanel";
 import ChatPanel, { type ChatMessage, type ImageContent } from "@/components/ChatPanel";
 import CredentialsModal, {
  type Credential,
@@ -12,12 +10,22 @@ import CredentialsModal, {
 import { executionApi } from "@/api/execution";
 import { sessionsApi } from "@/api/sessions";
 import { useMultiSSE } from "@/hooks/use-sse";
+import { usePendingQueue } from "@/hooks/use-pending-queue";
 import type { LiveSession, AgentEvent } from "@/api/types";
-import { sseEventToChatMessage, formatAgentDisplayName } from "@/lib/chat-helpers";
+import {
+  sseEventToChatMessage,
+  formatAgentDisplayName,
+  replayEventsToMessages,
+} from "@/lib/chat-helpers";
+import {
+  resolveInitialColonyPhase,
+  shouldUsePrefetchedColonyRestore,
+} from "@/lib/colony-session-restore";
 import { cronToLabel } from "@/lib/graphUtils";
 import { ApiError } from "@/api/client";
 import { useColony } from "@/context/ColonyContext";
 import { useHeaderActions } from "@/context/HeaderActionsContext";
+import { useColonyWorkers } from "@/context/ColonyWorkersContext";
 import { agentSlug, getQueenForAgent } from "@/lib/colony-registry";
 import BrowserStatusBadge from "@/components/BrowserStatusBadge";

@@ -40,7 +48,9 @@ function truncate(s: string, max: number): string {

 type SessionRestoreResult = {
  messages: ChatMessage[];
-  restoredPhase: "planning" | "building" | "staging" | "running" | "independent" | null;
+  restoredPhase: "independent" | "working" | "reviewing" | null;
+  truncated: boolean;
+  droppedCount: number;
 };

 async function restoreSessionMessages(
@@ -49,40 +59,80 @@ async function restoreSessionMessages(
  agentDisplayName: string,
 ): Promise<SessionRestoreResult> {
  try {
-    const { events } = await sessionsApi.eventsHistory(sessionId);
+    const { events, truncated, total, returned } =
+      await sessionsApi.eventsHistory(sessionId);
    if (events.length > 0) {
-      const messages: ChatMessage[] = [];
+      // Walk events twice:
+      //   1. Extract the trailing queen phase (unchanged logic).
+      //   2. Run the full state-machine replay so tool_status pills
+      //      are synthesized just like the live SSE handler does.
+      // Without (2), refreshed sessions showed zero tool activity
+      // because tool_call_started/completed events are ignored by
+      // the stateless converter.
      let runningPhase: ChatMessage["phase"] = undefined;
      for (const evt of events) {
        const p =
          evt.type === "queen_phase_changed"
            ? (evt.data?.phase as string)
            : evt.type === "node_loop_iteration"
-            ? (evt.data?.phase as string | undefined)
-            : undefined;
-        if (p && ["planning", "building", "staging", "running"].includes(p)) {
+              ? (evt.data?.phase as string | undefined)
+              : undefined;
+        if (p && ["independent", "working", "reviewing"].includes(p)) {
          runningPhase = p as ChatMessage["phase"];
        }
-        const msg = sseEventToChatMessage(evt, thread, agentDisplayName);
-        if (!msg) continue;
-        if (evt.stream_id === "queen") {
-          msg.role = "queen";
-          msg.phase = runningPhase;
-        }
-        messages.push(msg);
      }
-      return { messages, restoredPhase: runningPhase ?? null };
+
+      const messages = replayEventsToMessages(events, thread, agentDisplayName);
+      // Stamp the latest phase on every queen message so the UI's
+      // phase-badge rendering matches what the live path would have
+      // displayed at the time of the refresh.
+      if (runningPhase) {
+        for (const m of messages) {
+          if (m.role === "queen") m.phase = runningPhase;
+        }
+      }
+
+      // Prepend a run_divider banner when the server truncated older
+      // events so the user knows how many are hidden.
+      const droppedCount = Math.max(0, total - returned);
+      if (truncated && droppedCount > 0) {
+        const firstTs = events[0]?.timestamp;
+        const bannerCreatedAt = firstTs ? new Date(firstTs).getTime() - 1 : 0;
+        messages.unshift({
+          id: `restore-truncated-${sessionId}`,
+          agent: "System",
+          agentColor: "",
+          type: "run_divider",
+          content: `${droppedCount.toLocaleString()} older event${droppedCount === 1 ? "" : "s"} not shown (showing last ${returned.toLocaleString()})`,
+          timestamp: firstTs ?? new Date().toISOString(),
+          thread,
+          createdAt: bannerCreatedAt,
+        });
+      }
+      return {
+        messages,
+        restoredPhase: runningPhase ?? null,
+        truncated,
+        droppedCount,
+      };
    }
  } catch {
    // Event log not available
  }
-  return { messages: [], restoredPhase: null };
+  return { messages: [], restoredPhase: null, truncated: false, droppedCount: 0 };
 }

 // ── Agent backend state ──────────────────────────────────────────────────────

 interface AgentState {
  sessionId: string | null;
+  /** Colony directory name (e.g. ``linkedin_honeycomb_messaging``) —
+   *  the value used for the colony-scoped progress + data endpoints.
+   *  Comes from ``LiveSession.colony_id`` (the legacy field name; it's
+   *  the on-disk directory under ``~/.hive/colonies/``). Distinct from
+   *  the URL's ``colonyId`` route param, which is a display-mangled
+   *  slug. Null for queen-DM sessions not bound to a colony. */
+  colonyDirName: string | null;
  loading: boolean;
  ready: boolean;
  queenReady: boolean;
@@ -90,7 +140,7 @@ interface AgentState {
  displayName: string | null;
  awaitingInput: boolean;
  workerInputMessageId: string | null;
-  queenPhase: "planning" | "building" | "staging" | "running" | "independent";
+  queenPhase: "independent" | "working" | "reviewing";
  agentPath: string | null;
  currentRunId: string | null;
  nodeLogs: Record<string, string[]>;
@@ -121,6 +171,7 @@ interface AgentState {
 function defaultAgentState(): AgentState {
  return {
    sessionId: null,
+    colonyDirName: null,
    loading: true,
    ready: false,
    queenReady: false,
@@ -128,7 +179,7 @@ function defaultAgentState(): AgentState {
    displayName: null,
    awaitingInput: false,
    workerInputMessageId: null,
-    queenPhase: "planning",
+    queenPhase: "independent",
    agentPath: null,
    currentRunId: null,
    nodeLogs: {},
@@ -156,6 +207,7 @@ export default function ColonyChat() {
  const location = useLocation();
  const { colonies, markVisited, refresh: refreshColonies } = useColony();
  const { setActions } = useHeaderActions();
+  const { toggleColonyWorkers } = useColonyWorkers();

  // Route state from home page (new chat flow)
  const routeState = (location.state || {}) as {
@@ -202,7 +254,6 @@ export default function ColonyChat() {
  const [credentialsOpen, setCredentialsOpen] = useState(false);
  const [credentialAgentPath, setCredentialAgentPath] = useState<string | null>(null);
  const [dismissedBanner, setDismissedBanner] = useState<string | null>(null);
-  const [selectedNode, setSelectedNode] = useState<GraphNode | null>(null);

  // ── Header actions (Credentials, Data, Browser) ─────────────────────────
  useEffect(() => {
@@ -225,11 +276,21 @@ export default function ColonyChat() {
            Data
          </button>
        )}
+        {agentState.sessionId && (
+          <button
+            onClick={() => toggleColonyWorkers()}
+            className="flex items-center gap-1.5 px-3 py-1.5 rounded-md text-xs font-medium text-muted-foreground hover:text-foreground hover:bg-muted/50 transition-colors flex-shrink-0"
+            title="Show / hide the colony workers panel"
+          >
+            <Users className="w-3.5 h-3.5" />
+            Workers
+          </button>
+        )}
        <BrowserStatusBadge />
      </>,
    );
    return () => setActions(null);
-  }, [agentState.sessionId, setActions]);
+  }, [agentState.sessionId, setActions, toggleColonyWorkers]);

  // Refs for SSE callback stability
  const messagesRef = useRef(messages);
@@ -244,8 +305,12 @@ export default function ColonyChat() {
  const toolUseToPillRef = useRef<
    Record<string, { msgId: string; name: string }>
  >({});
-  const queenPhaseRef = useRef<string>("planning");
+  const queenPhaseRef = useRef<string>("independent");
  const queenIterTextRef = useRef<Record<string, Record<number, string>>>({});
+  // Flipped true by the auto-flush path; consumed by the next empty-prompt
+  // client_input_requested so we don't flicker the typing bubble off while
+  // the queen is about to resume on the flushed input.
+  const queenAboutToResumeRef = useRef(false);
  const suppressIntroRef = useRef(false);
  const loadingRef = useRef(false);

@@ -265,16 +330,18 @@ export default function ColonyChat() {
          );
        }
        if (options?.reconcileOptimisticUser && chatMsg.type === "user" && prev.length > 0) {
-          const lastIdx = prev.length - 1;
-          const lastMsg = prev[lastIdx];
-          const incomingTs = chatMsg.createdAt ?? Date.now();
-          const lastTs = lastMsg.createdAt ?? incomingTs;
-          if (
-            lastMsg.type === "user" &&
-            lastMsg.content === chatMsg.content &&
-            Math.abs(incomingTs - lastTs) <= 15000
-          ) {
-            return prev.map((m, i) => (i === lastIdx ? { ...m, id: chatMsg.id } : m));
+          // Optimistic user bubbles have no executionId; server echoes do.
+          // Match the oldest unreconciled optimistic with the same content —
+          // that's the FIFO-correct pick for both auto-flush and Steer.
+          const idx = prev.findIndex(
+            (m) => m.type === "user" && !m.executionId && m.content === chatMsg.content,
+          );
+          if (idx !== -1) {
+            return prev.map((m, i) =>
+              i === idx
+                ? { ...m, id: chatMsg.id, executionId: chatMsg.executionId }
+                : m,
+            );
          }
        }
        // Insert in sorted position by createdAt so tool pills and queen
@@ -344,7 +411,7 @@ export default function ColonyChat() {
        updateState({
          sessionId: session.session_id,
          displayName: "New Chat",
-          queenPhase: "planning",
+          queenPhase: "independent",
          loading: false,
          ready: true,
        });
@@ -363,6 +430,7 @@ export default function ColonyChat() {
      let liveSession: LiveSession | undefined;
      let isResumedSession = false;
      let coldRestoreId: string | undefined;
+      let prefetchedRestore: SessionRestoreResult | null = null;

      // Check for existing live session for this agent
      try {
@@ -389,43 +457,33 @@ export default function ColonyChat() {
        }
      }

-      let restoredPhase: "planning" | "building" | "staging" | "running" | "independent" | null = null;
+      let restoredPhase: "independent" | "working" | "reviewing" | null = null;

      if (!liveSession) {
-        // Pre-fetch messages from cold session
-        let preRestoredMsgs: ChatMessage[] = [];
        if (coldRestoreId) {
          const displayName = formatAgentDisplayName(agentPath);
-          const restored = await restoreSessionMessages(coldRestoreId, agentPath, displayName);
-          preRestoredMsgs = restored.messages;
-          restoredPhase = restored.restoredPhase;
+          prefetchedRestore = await restoreSessionMessages(
+            coldRestoreId,
+            agentPath,
+            displayName,
+          );
        }

-        if (coldRestoreId || preRestoredMsgs.length > 0) {
+        if (coldRestoreId || (prefetchedRestore?.messages.length ?? 0) > 0) {
          suppressIntroRef.current = true;
        }

        // Create new session (pass coldRestoreId for resume)
        liveSession = await sessionsApi.create(agentPath, undefined, undefined, undefined, coldRestoreId ?? undefined);
-
-        if (preRestoredMsgs.length > 0) {
-          preRestoredMsgs.sort((a, b) => (a.createdAt ?? 0) - (b.createdAt ?? 0));
-          setMessages(preRestoredMsgs);
-        }
      }

      const session = liveSession!;
      const displayName = formatAgentDisplayName(session.colony_name || agentPath);
-      const initialPhase =
-        restoredPhase || session.queen_phase || (session.has_worker ? "staging" : "planning");
-      queenPhaseRef.current = initialPhase;
-
-      updateState({
-        sessionId: session.session_id,
-        displayName,
-        queenPhase: initialPhase,
-        queenSupportsImages: session.queen_supports_images !== false,
-      });
+      let restoredMessages: ChatMessage[] = [];
+      const reusePrefetchedRestore = shouldUsePrefetchedColonyRestore(
+        coldRestoreId,
+        session.session_id,
+      );

      // Restore messages for live resume
      if (isResumedSession) {
@@ -435,17 +493,50 @@ export default function ColonyChat() {
          displayName,
        );
        if (restored.messages.length > 0) {
-          restored.messages.sort((a, b) => (a.createdAt ?? 0) - (b.createdAt ?? 0));
-          setMessages(restored.messages);
+          restoredMessages = restored.messages;
+        }
+        restoredPhase = restored.restoredPhase;
+      } else if (prefetchedRestore) {
+        if (reusePrefetchedRestore) {
+          restoredMessages = prefetchedRestore.messages;
+          restoredPhase = prefetchedRestore.restoredPhase;
+        } else {
+          // The backend corrected the resume target to the colony's forked
+          // session. Reload from that session so the first paint doesn't show
+          // the source queen DM or its stale independent phase.
+          const restored = await restoreSessionMessages(
+            session.session_id,
+            agentPath,
+            displayName,
+          );
+          restoredMessages = restored.messages;
+          restoredPhase = restored.restoredPhase;
        }
      }

+      if (restoredMessages.length > 0) {
+        restoredMessages.sort((a, b) => (a.createdAt ?? 0) - (b.createdAt ?? 0));
+        setMessages(restoredMessages);
+      }
+
+      const initialPhase = resolveInitialColonyPhase({
+        prefetchedSessionId: coldRestoreId,
+        resolvedSessionId: session.session_id,
+        prefetchedPhase: restoredPhase,
+        serverPhase: session.queen_phase,
+        hasWorker: session.has_worker,
+      });
+      queenPhaseRef.current = initialPhase;
+
      const hasRestoredContent = isResumedSession || !!coldRestoreId;
      if (!hasRestoredContent) suppressIntroRef.current = false;

      updateState({
        sessionId: session.session_id,
+        colonyDirName: session.colony_id,
        displayName,
+        queenPhase: initialPhase,
+        queenSupportsImages: session.queen_supports_images !== false,
        ready: true,
        loading: false,
        queenReady: hasRestoredContent,
@@ -475,7 +566,7 @@ export default function ColonyChat() {
      setAgentState(defaultAgentState());
      turnCounterRef.current = {};
      toolUseToPillRef.current = {};
-      queenPhaseRef.current = "planning";
+      queenPhaseRef.current = "independent";
      queenIterTextRef.current = {};
      suppressIntroRef.current = false;
      loadingRef.current = false;
@@ -568,6 +659,17 @@ export default function ColonyChat() {
          }
          break;

+        case "llm_turn_complete":
+          // Flush one queued message per queen LLM-turn boundary. Workers'
+          // LLM turns don't drain the queen queue. execution_completed
+          // fires only at session shutdown (the queen's loop parks in
+          // _await_user_input between turns), so this is the real "turn
+          // ended" signal. Mid-tool-call boundaries count too.
+          if (isQueen) {
+            flushNextPendingRef.current();
+          }
+          break;
+
        case "execution_paused":
        case "execution_failed":
        case "client_output_delta":
@@ -633,16 +735,29 @@ export default function ColonyChat() {
              : null;
            if (isQueen) {
              const prompt = (event.data?.prompt as string) || "";
-              updateState({
-                awaitingInput: true,
-                isTyping: false,
-                isStreaming: false,
-                queenIsTyping: false,
-                pendingQuestion: prompt || null,
-                pendingOptions: options,
-                pendingQuestions: questions,
-                pendingQuestionSource: "queen",
-              });
+              // An empty-prompt client_input_requested means the queen parked
+              // in auto-wait. If we just auto-flushed a queued message, our
+              // inject will unblock her in a moment — skip flipping isTyping
+              // off so the thinking bubble doesn't flicker.
+              if (
+                queenAboutToResumeRef.current &&
+                !prompt &&
+                !options &&
+                !questions
+              ) {
+                queenAboutToResumeRef.current = false;
+              } else {
+                updateState({
+                  awaitingInput: true,
+                  isTyping: false,
+                  isStreaming: false,
+                  queenIsTyping: false,
+                  pendingQuestion: prompt || null,
+                  pendingOptions: options,
+                  pendingQuestions: questions,
+                  pendingQuestionSource: "queen",
+                });
+              }
            }
          }

@@ -816,6 +931,7 @@ export default function ColonyChat() {
                createdAt: eventCreatedAt,
                nodeId: event.node_id || undefined,
                executionId: event.execution_id || undefined,
+                streamId: sid || undefined,
              });
              return { ...prev, isStreaming: false, activeToolCalls: newActive };
            });
@@ -946,15 +1062,11 @@ export default function ColonyChat() {
          const rawPhase = event.data?.phase as string;
          const eventAgentPath = (event.data?.agent_path as string) || null;
          const newPhase: AgentState["queenPhase"] =
-            rawPhase === "independent"
-              ? "independent"
-              : rawPhase === "running"
-              ? "running"
-              : rawPhase === "staging"
-              ? "staging"
-              : rawPhase === "planning"
-              ? "planning"
-              : "building";
+            rawPhase === "working"
+              ? "working"
+              : rawPhase === "reviewing"
+                ? "reviewing"
+                : "independent";
          queenPhaseRef.current = newPhase;
          updateState({
            queenPhase: newPhase,
@@ -1086,19 +1198,71 @@ export default function ColonyChat() {

  // ── Action handlers ────────────────────────────────────────────────────

+  // Core backend send — bypasses queue logic. Used both for the normal path
+  // (agent idle) and for Steer / auto-flush paths.
+  const sendToBackend = useCallback(
+    (text: string, images?: ImageContent[]) => {
+      if (!agentState.sessionId || !agentState.ready) return;
+      executionApi.chat(agentState.sessionId, text, images).catch((err: unknown) => {
+        const errMsg = err instanceof Error ? err.message : String(err);
+        upsertMessage({
+          id: makeId(),
+          agent: "System",
+          agentColor: "",
+          content: `Failed to send message: ${errMsg}`,
+          timestamp: "",
+          type: "system",
+          thread: agentPath,
+          createdAt: Date.now(),
+        });
+        updateState({ isTyping: false, isStreaming: false, queenIsTyping: false });
+      });
+    },
+    [agentPath, agentState.sessionId, agentState.ready, updateState, upsertMessage],
+  );
+
+  const {
+    enqueue: enqueuePending,
+    steer: handleSteer,
+    cancelQueued: handleCancelQueued,
+    flushNext: flushNextPending,
+    flushNextRef: flushNextPendingRef,
+    clear: clearPendingQueue,
+  } = usePendingQueue({
+    sendToBackend,
+    setMessages,
+    onFlushStart: useCallback(() => {
+      updateState({ isTyping: true, queenIsTyping: true });
+      queenAboutToResumeRef.current = true;
+    }, [updateState]),
+  });
+
+  // Reset the queue whenever we navigate to a different colony (or to
+  // new-chat). The hook outlives the route change, so without this, a
+  // message queued in colony A would auto-flush into colony B's next
+  // execution_completed.
+  useEffect(() => {
+    clearPendingQueue();
+  }, [agentPath, isNewChat, clearPendingQueue]);
+
  const handleCancelQueen = useCallback(async () => {
    if (!agentState.sessionId) return;
    try {
      await executionApi.cancelQueen(agentState.sessionId);
      updateState({ isTyping: false, isStreaming: false, queenIsTyping: false });
+      // After cancelling the current turn, immediately send the oldest
+      // queued message (if any). The remaining queued messages stay put
+      // so the user can review them or Steer/Cancel individually.
+      flushNextPending();
    } catch {
      // fire-and-forget
    }
-  }, [agentState.sessionId, updateState]);
+  }, [agentState.sessionId, updateState, flushNextPending]);

  const handleSend = useCallback(
    (text: string, _thread: string, images?: ImageContent[]) => {
-      if (agentState.pendingQuestionSource === "queen") {
+      const answeringQuestion = agentState.pendingQuestionSource === "queen";
+      if (answeringQuestion) {
        updateState({
          pendingQuestion: null,
          pendingOptions: null,
@@ -1107,8 +1271,15 @@ export default function ColonyChat() {
        });
      }

+      // Queue when the queen is mid-turn — unless the user is answering an
+      // ask_user prompt, in which case we send immediately so the loop can
+      // resume. Queued messages are held locally (not sent to the backend)
+      // until the user clicks Steer or the queen goes idle.
+      const shouldQueue = !answeringQuestion && (agentState.queenIsTyping ?? false);
+
+      const msgId = makeId();
      const userMsg: ChatMessage = {
-        id: makeId(),
+        id: msgId,
        agent: "You",
        agentColor: "",
        content: text,
@@ -1117,29 +1288,27 @@ export default function ColonyChat() {
        thread: agentPath,
        createdAt: Date.now(),
        images,
+        queued: shouldQueue,
      };
      setMessages((prev) => [...prev, userMsg]);
      suppressIntroRef.current = false;
-      updateState({ isTyping: true, queenIsTyping: true });

-      if (agentState.sessionId && agentState.ready) {
-        executionApi.chat(agentState.sessionId, text, images).catch((err: unknown) => {
-          const errMsg = err instanceof Error ? err.message : String(err);
-          upsertMessage({
-            id: makeId(),
-            agent: "System",
-            agentColor: "",
-            content: `Failed to send message: ${errMsg}`,
-            timestamp: "",
-            type: "system",
-            thread: agentPath,
-            createdAt: Date.now(),
-          });
-          updateState({ isTyping: false, isStreaming: false, queenIsTyping: false });
-        });
+      if (shouldQueue) {
+        enqueuePending(msgId, { text, images });
+        return;
      }
+
+      updateState({ isTyping: true, queenIsTyping: true });
+      sendToBackend(text, images);
    },
-    [agentPath, agentState.sessionId, agentState.ready, agentState.pendingQuestionSource, updateState, upsertMessage],
+    [
+      agentPath,
+      agentState.queenIsTyping,
+      agentState.pendingQuestionSource,
+      updateState,
+      sendToBackend,
+      enqueuePending,
+    ],
  );

  const handleQueenQuestionAnswer = useCallback(
@@ -1184,16 +1353,44 @@ export default function ColonyChat() {
      .catch(() => {});
  }, [agentState.sessionId, agentState.pendingQuestion, updateState]);

-  // ── Resolved selected node (sync with live graph updates) ──────────────
-
-  const liveSelectedNode = selectedNode && graphNodes.find((n) => n.id === selectedNode.id);
-  const resolvedSelectedNode = liveSelectedNode || selectedNode;
-
  const triggers = useMemo(
    () => graphNodes.filter((n) => n.nodeType === "trigger"),
    [graphNodes],
  );

+  // Mirror live triggers into the shared context so the tabbed
+  // ColonyWorkersPanel (rendered at the layout level) can render the
+  // Triggers tab without having to re-subscribe to the session SSE.
+  const {
+    setTriggers: setCtxTriggers,
+    setSessionId: setCtxSessionId,
+    setColonyName: setCtxColonyName,
+  } = useColonyWorkers();
+  useEffect(() => {
+    setCtxTriggers(triggers);
+    return () => setCtxTriggers([]);
+  }, [triggers, setCtxTriggers]);
+
+  // Publish the live colony session id to the context. The AppLayout
+  // renders ``ColonyWorkersPanel`` whenever this is non-null AND the
+  // user hasn't dismissed it (via the X button). Cleanup clears it so
+  // the panel closes when we leave the colony room.
+  useEffect(() => {
+    setCtxSessionId(agentState.sessionId ?? null);
+    return () => setCtxSessionId(null);
+  }, [agentState.sessionId, setCtxSessionId]);
+
+  // Publish the colony directory name (e.g. ``linkedin_honeycomb_messaging``)
+  // alongside the session id. The panel's progress + data tabs route by
+  // colony name, not session — one progress.db per colony, independent
+  // of which session is open. Comes from ``LiveSession.colony_id`` (the
+  // on-disk directory) rather than the URL slug, which is mangled by
+  // ``slugToColonyId``.
+  useEffect(() => {
+    setCtxColonyName(agentState.colonyDirName ?? null);
+    return () => setCtxColonyName(null);
+  }, [agentState.colonyDirName, setCtxColonyName]);
+
  // ── Render ─────────────────────────────────────────────────────────────

  if (!colony && !isNewChat && !agentState.loading) {
@@ -1269,6 +1466,8 @@ export default function ColonyChat() {
            messages={messages}
            onSend={handleSend}
            onCancel={handleCancelQueen}
+            onSteer={handleSteer}
+            onCancelQueued={handleCancelQueued}
            activeThread={agentPath}
            isWaiting={(agentState.queenIsTyping && !agentState.isStreaming) ?? false}
            isWorkerWaiting={(agentState.workerIsTyping && !agentState.isStreaming) ?? false}
@@ -1283,32 +1482,14 @@ export default function ColonyChat() {
            onQuestionDismiss={handleQuestionDismiss}
            contextUsage={agentState.contextUsage}
            supportsImages={agentState.queenSupportsImages}
+            queenProfileId={colony?.queenProfileId ?? null}
          />
        </div>

-        {/* Triggers sidebar — only rendered when the colony actually has triggers */}
-        {triggers.length > 0 && (
-          <div className="w-[260px] flex-shrink-0">
-            <TriggersPanel
-              triggers={triggers}
-              selectedId={resolvedSelectedNode?.id ?? null}
-              onSelect={(trigger) =>
-                setSelectedNode((prev) => (prev?.id === trigger.id ? null : trigger))
-              }
-            />
-          </div>
-        )}
-
-        {/* Trigger detail panel */}
-        {resolvedSelectedNode && resolvedSelectedNode.nodeType === "trigger" && (
-          <div className="w-[380px] min-w-[320px] flex-shrink-0">
-            <TriggerDetailPanel
-              trigger={resolvedSelectedNode}
-              sessionId={agentState.sessionId || ""}
-              onClose={() => setSelectedNode(null)}
-            />
-          </div>
-        )}
+        {/* Workers / Triggers / Skills / Tools now live in the tabbed
+            ColonyWorkersPanel rendered by AppLayout. Trigger data is
+            pushed up via ColonyWorkersContext (see the useEffect that
+            mirrors `triggers` into context.setTriggers). */}
      </div>

      <CredentialsModal
@@ -1,4 +1,4 @@
-import { useState, useCallback, useRef } from "react";
+import { useState, useCallback, useRef, useEffect } from "react";
 import { NavLink } from "react-router-dom";
 import { User } from "lucide-react";
 import { useColony } from "@/context/ColonyContext";
@@ -6,6 +6,25 @@ import type { QueenProfileSummary, Colony } from "@/types/colony";
 import { getColonyIcon } from "@/lib/colony-registry";
 import QueenProfilePanel from "@/components/QueenProfilePanel";

+/* ── User avatar (CEO card) ──────────────────────────────────────────── */
+
+function UserAvatar({ initials, avatarVersion }: { initials: string; avatarVersion: number }) {
+  const [hasAvatar, setHasAvatar] = useState(true);
+  const url = `/api/config/profile/avatar?v=${avatarVersion}`;
+  useEffect(() => setHasAvatar(true), [avatarVersion]);
+  return (
+    <div className="w-12 h-12 rounded-full bg-primary/15 mx-auto mb-3 flex items-center justify-center overflow-hidden">
+      {hasAvatar ? (
+        <img src={url} alt="" className="w-full h-full object-cover" onError={() => setHasAvatar(false)} />
+      ) : initials ? (
+        <span className="text-sm font-bold text-primary">{initials}</span>
+      ) : (
+        <User className="w-5 h-5 text-primary" />
+      )}
+    </div>
+  );
+}
+
 /* ── Colony tag (clickable link to colony chat) ───────────────────────── */

 function ColonyTag({ colony }: { colony: Colony }) {
@@ -23,6 +42,20 @@ function ColonyTag({ colony }: { colony: Colony }) {

 /* ── Queen card in the org grid ───────────────────────────────────────── */

+function QueenAvatar({ queenId, name, size = "w-11 h-11" }: { queenId: string; name: string; size?: string }) {
+  const [hasAvatar, setHasAvatar] = useState(true);
+  const url = `/api/queen/${queenId}/avatar`;
+  return (
+    <div className={`${size} rounded-full bg-primary/15 flex items-center justify-center overflow-hidden`}>
+      {hasAvatar ? (
+        <img src={url} alt={name} className="w-full h-full object-cover" onError={() => setHasAvatar(false)} />
+      ) : (
+        <span className="text-sm font-bold text-primary">{name.charAt(0)}</span>
+      )}
+    </div>
+  );
+}
+
 function QueenCard({
  queen,
  colonies,
@@ -48,10 +81,8 @@ function QueenCard({
            : "border-border/60 hover:border-primary/30 hover:bg-primary/[0.03]"
        }`}
      >
-        <div className="w-11 h-11 rounded-full bg-primary/15 flex items-center justify-center mb-2.5">
-          <span className="text-sm font-bold text-primary">
-            {queen.name.charAt(0)}
-          </span>
+        <div className="mb-2.5">
+          <QueenAvatar queenId={queen.id} name={queen.name} />
        </div>
        <span className="text-sm font-semibold text-foreground group-hover:text-primary transition-colors">
          {queen.name}
@@ -79,7 +110,7 @@ function QueenCard({
 /* ── Main org chart page ──────────────────────────────────────────────── */

 export default function OrgChart() {
-  const { queenProfiles, colonies, userProfile } = useColony();
+  const { queenProfiles, colonies, userProfile, userAvatarVersion } = useColony();
  const [selectedQueenId, setSelectedQueenId] = useState<string | null>(null);

  // Pan & zoom state
@@ -172,15 +203,7 @@ export default function OrgChart() {
          <div className="min-w-max px-6 pt-16 pb-10 mx-auto flex flex-col items-center">
            {/* CEO card */}
            <div className="rounded-xl border border-border/60 bg-card px-8 py-5 text-center">
-              <div className="w-12 h-12 rounded-full bg-primary/15 mx-auto mb-3 flex items-center justify-center">
-                {initials ? (
-                  <span className="text-sm font-bold text-primary">
-                    {initials}
-                  </span>
-                ) : (
-                  <User className="w-5 h-5 text-primary" />
-                )}
-              </div>
+              <UserAvatar initials={initials} avatarVersion={userAvatarVersion} />
              <div className="font-semibold text-sm text-foreground">
                {userProfile.displayName || "You"}
              </div>
@@ -1,12 +1,24 @@
-import { useState, useMemo } from "react";
+import { useState, useMemo, useEffect, useCallback } from "react";
 import { useNavigate } from "react-router-dom";
-import { Search, Copy, Check, Sparkles, MessageSquarePlus } from "lucide-react";
-import { prompts, promptCategories, categoryToQueen, queenNames } from "@/data/prompts";
+import { Search, Copy, Check, Sparkles, MessageSquarePlus, Plus, X, Trash2, ChevronLeft, ChevronRight } from "lucide-react";
+import { prompts, promptCategories, categoryToQueen, queenNames, type Prompt } from "@/data/prompts";
+import { promptsApi, type CustomPrompt } from "@/api/prompts";

-function PromptCard({ prompt, onUse }: { prompt: typeof prompts[0]; onUse: (content: string, category: string) => void }) {
+const PAGE_SIZE = 24;
+
+function PromptCard({
+  prompt,
+  onUse,
+  onDelete,
+}: {
+  prompt: Prompt | CustomPrompt;
+  onUse: (content: string, category: string) => void;
+  onDelete?: () => void;
+}) {
  const [copied, setCopied] = useState(false);
  const queenId = categoryToQueen[prompt.category];
  const queenName = queenNames[queenId] || "Queen";
+  const isCustom = "custom" in prompt && prompt.custom;

  const handleCopy = async () => {
    await navigator.clipboard.writeText(prompt.content);
@@ -15,27 +27,29 @@ function PromptCard({ prompt, onUse }: { prompt: typeof prompts[0]; onUse: (cont
  };

  return (
-    <div className="group rounded-lg border border-border/60 bg-card p-4 hover:border-primary/30 hover:shadow-sm transition-all">
-      <div className="flex items-start justify-between gap-3 mb-2">
-        <h3 className="text-sm font-medium text-foreground line-clamp-1">
-          {prompt.title}
-        </h3>
-        <div className="flex items-center gap-1 opacity-0 group-hover:opacity-100 transition-opacity">
-          <button
-            onClick={handleCopy}
-            className="p-1.5 rounded-md text-muted-foreground hover:text-foreground hover:bg-muted/60 transition-colors"
-            title="Copy prompt"
-          >
+    <div className="group rounded-lg border border-border/60 bg-card p-4 hover:border-primary/30 hover:shadow-sm transition-all flex flex-col">
+      <div className="flex items-start justify-between gap-2 mb-2">
+        <div className="flex items-center gap-2 min-w-0 flex-1">
+          <h3 className="text-sm font-medium text-foreground line-clamp-1">{prompt.title}</h3>
+          {isCustom && (
+            <span className="flex-shrink-0 px-1.5 py-0.5 rounded text-[10px] font-medium bg-primary/10 text-primary">My Prompt</span>
+          )}
+        </div>
+        <div className="flex items-center gap-0.5 flex-shrink-0 opacity-0 group-hover:opacity-100">
+          <button onClick={handleCopy} className="p-1.5 rounded-md text-muted-foreground hover:text-foreground hover:bg-muted/60" title="Copy prompt">
            {copied ? <Check className="w-3.5 h-3.5 text-emerald-500" /> : <Copy className="w-3.5 h-3.5" />}
          </button>
+          {isCustom && onDelete && (
+            <button onClick={onDelete} className="p-1.5 rounded-md text-muted-foreground hover:text-destructive hover:bg-destructive/10" title="Delete prompt">
+              <Trash2 className="w-3.5 h-3.5" />
+            </button>
+          )}
        </div>
      </div>
-      <p className="text-xs text-muted-foreground line-clamp-3 leading-relaxed mb-3">
-        {prompt.content}
-      </p>
+      <p className="text-xs text-muted-foreground line-clamp-3 leading-relaxed mb-3 flex-1">{prompt.content}</p>
      <button
        onClick={() => onUse(prompt.content, prompt.category)}
-        className="w-full flex items-center justify-center gap-1.5 rounded-md border border-primary/20 bg-primary/[0.04] py-1.5 text-xs font-medium text-primary hover:bg-primary/[0.08] transition-colors"
+        className="w-full flex items-center justify-center gap-1.5 rounded-md border border-primary/20 bg-primary/[0.04] py-1.5 text-xs font-medium text-primary hover:bg-primary/[0.08]"
      >
        <MessageSquarePlus className="w-3.5 h-3.5" />
        Ask {queenName}
@@ -44,61 +58,158 @@ function PromptCard({ prompt, onUse }: { prompt: typeof prompts[0]; onUse: (cont
  );
 }

+function AddPromptModal({ open, onClose, onSave }: { open: boolean; onClose: () => void; onSave: (title: string, category: string, content: string) => Promise<void> }) {
+  const [title, setTitle] = useState("");
+  const [category, setCategory] = useState("");
+  const [content, setContent] = useState("");
+  const [saving, setSaving] = useState(false);
+
+  if (!open) return null;
+
+  const handleSubmit = async () => {
+    if (!title.trim() || !content.trim()) return;
+    setSaving(true);
+    await onSave(title.trim(), category.trim(), content.trim());
+    setSaving(false);
+    setTitle("");
+    setCategory("");
+    setContent("");
+    onClose();
+  };
+
+  return (
+    <div className="fixed inset-0 z-50 flex items-center justify-center">
+      <div className="absolute inset-0 bg-black/40" onClick={onClose} />
+      <div className="relative bg-card border border-border/60 rounded-2xl shadow-2xl w-full max-w-[520px] p-6">
+        <div className="flex items-center justify-between mb-5">
+          <h3 className="text-lg font-semibold text-foreground">Add Custom Prompt</h3>
+          <button onClick={onClose} className="p-1 rounded-md text-muted-foreground hover:text-foreground hover:bg-muted/50">
+            <X className="w-4 h-4" />
+          </button>
+        </div>
+
+        <div className="flex flex-col gap-4">
+          <div>
+            <label className="text-sm font-medium text-foreground mb-1.5 block">Title <span className="text-primary">*</span></label>
+            <input type="text" value={title} onChange={(e) => setTitle(e.target.value)} placeholder="e.g. Weekly Report Generator"
+              className="w-full bg-muted/30 border border-border/50 rounded-lg px-3 py-2 text-sm text-foreground placeholder:text-muted-foreground/50 focus:outline-none focus:ring-1 focus:ring-primary/40" />
+          </div>
+
+          <div>
+            <label className="text-sm font-medium text-foreground mb-1.5 block">Category</label>
+            <select value={category} onChange={(e) => setCategory(e.target.value)}
+              className="w-full bg-muted/30 border border-border/50 rounded-lg px-3 py-2 text-sm text-foreground focus:outline-none focus:ring-1 focus:ring-primary/40">
+              <option value="">Custom</option>
+              {promptCategories.map((cat) => (
+                <option key={cat.id} value={cat.id}>{cat.name}</option>
+              ))}
+            </select>
+          </div>
+
+          <div>
+            <label className="text-sm font-medium text-foreground mb-1.5 block">Prompt Content <span className="text-primary">*</span></label>
+            <textarea value={content} onChange={(e) => setContent(e.target.value)} rows={8}
+              placeholder="Enter your prompt..."
+              className="w-full bg-muted/30 border border-border/50 rounded-lg px-3 py-2 text-sm text-foreground placeholder:text-muted-foreground/50 focus:outline-none focus:ring-1 focus:ring-primary/40 resize-none" />
+          </div>
+
+          <div className="flex justify-end gap-2 pt-1">
+            <button onClick={onClose} className="px-4 py-2 rounded-lg text-sm font-medium text-muted-foreground hover:text-foreground hover:bg-muted/30">Cancel</button>
+            <button onClick={handleSubmit} disabled={saving || !title.trim() || !content.trim()}
+              className="px-4 py-2 rounded-lg bg-primary text-primary-foreground text-sm font-medium hover:bg-primary/90 disabled:opacity-50 disabled:cursor-not-allowed">
+              {saving ? "Saving..." : "Add Prompt"}
+            </button>
+          </div>
+        </div>
+      </div>
+    </div>
+  );
+}
+
 export default function PromptLibrary() {
  const navigate = useNavigate();
  const [searchQuery, setSearchQuery] = useState("");
  const [selectedCategory, setSelectedCategory] = useState<string | null>(null);
-  const inactiveCategoryClass =
-    "bg-muted/60 text-foreground/75 hover:bg-muted/80 hover:text-foreground";
+  const [page, setPage] = useState(0);
+  const [addModalOpen, setAddModalOpen] = useState(false);
+  const [customPrompts, setCustomPrompts] = useState<CustomPrompt[]>([]);
+
+  const inactiveCategoryClass = "bg-muted/60 text-foreground/75 hover:bg-muted/80 hover:text-foreground";
+
+  useEffect(() => {
+    promptsApi.list().then((r) => setCustomPrompts(r.prompts)).catch(() => {});
+  }, []);
+
+  // Merge built-in + custom prompts
+  const allPrompts = useMemo(() => [...customPrompts, ...prompts], [customPrompts]);

  const filteredPrompts = useMemo(() => {
-    let result = prompts;
-    
-    if (selectedCategory) {
+    let result = allPrompts;
+    if (selectedCategory === "custom") {
+      result = result.filter((p) => "custom" in p && p.custom);
+    } else if (selectedCategory) {
      result = result.filter((p) => p.category === selectedCategory);
    }
-    
    if (searchQuery.trim()) {
      const query = searchQuery.toLowerCase();
      result = result.filter(
-        (p) =>
-          p.title.toLowerCase().includes(query) ||
-          p.content.toLowerCase().includes(query)
+        (p) => p.title.toLowerCase().includes(query) || p.content.toLowerCase().includes(query),
      );
    }
-    
    return result;
-  }, [searchQuery, selectedCategory]);
+  }, [allPrompts, searchQuery, selectedCategory]);
+
+  // Reset page when filters change
+  useEffect(() => setPage(0), [searchQuery, selectedCategory]);
+
+  const totalPages = Math.max(1, Math.ceil(filteredPrompts.length / PAGE_SIZE));
+  const pagedPrompts = filteredPrompts.slice(page * PAGE_SIZE, (page + 1) * PAGE_SIZE);

  const handleUsePrompt = (content: string, category: string) => {
    const queenId = categoryToQueen[category];
+    if (!queenId) return;
    sessionStorage.setItem(`queenFirstMessage:${queenId}`, content);
    navigate(`/queen/${queenId}?new=1`);
  };

+  const handleAddPrompt = useCallback(async (title: string, category: string, content: string) => {
+    const created = await promptsApi.create(title, category, content);
+    setCustomPrompts((prev) => [created, ...prev]);
+  }, []);
+
+  const handleDeletePrompt = useCallback(async (id: string) => {
+    await promptsApi.delete(id);
+    setCustomPrompts((prev) => prev.filter((p) => p.id !== id));
+  }, []);
+
+  const customCount = customPrompts.length;
+
  return (
    <div className="flex-1 flex overflow-hidden">
-      {/* Main content */}
      <div className="flex-1 flex flex-col min-w-0">
        {/* Header */}
        <div className="px-6 py-4 border-b border-border/60">
-          <div className="flex items-baseline gap-3 mb-4">
-            <h2 className="text-lg font-semibold text-foreground flex items-center gap-2">
-              <Sparkles className="w-5 h-5 text-primary" />
-              Prompt Library
-            </h2>
-            <span className="text-xs text-muted-foreground">
-              {prompts.length} prompts across {promptCategories.length} categories
-            </span>
+          <div className="flex items-center justify-between mb-4">
+            <div className="flex items-baseline gap-3">
+              <h2 className="text-lg font-semibold text-foreground flex items-center gap-2">
+                <Sparkles className="w-5 h-5 text-primary" />
+                Prompt Library
+              </h2>
+              <span className="text-xs text-muted-foreground">
+                {allPrompts.length} prompts across {promptCategories.length + (customCount > 0 ? 1 : 0)} categories
+              </span>
+            </div>
+            <button onClick={() => setAddModalOpen(true)}
+              className="flex items-center gap-1.5 px-3 py-1.5 rounded-lg bg-primary text-primary-foreground text-xs font-medium hover:bg-primary/90">
+              <Plus className="w-3.5 h-3.5" />
+              Add Prompt
+            </button>
          </div>
-          
-          {/* Search bar */}
+
          <div className="relative">
            <Search className="absolute left-3 top-1/2 -translate-y-1/2 w-4 h-4 text-muted-foreground" />
            <input
-              type="text"
-              placeholder="Search prompts by title or content..."
-              value={searchQuery}
+              type="text" placeholder="Search prompts by title or content..." value={searchQuery}
              onChange={(e) => setSearchQuery(e.target.value)}
              className="w-full pl-9 pr-4 py-2 rounded-lg border border-border/60 bg-background text-sm focus:outline-none focus:border-primary/40 focus:ring-1 focus:ring-primary/20"
            />
@@ -108,28 +219,20 @@ export default function PromptLibrary() {
        {/* Category filter */}
        <div className="px-6 py-3 border-b border-border/60 bg-muted/20">
          <div className="flex items-center gap-2 flex-wrap">
-            <button
-              onClick={() => setSelectedCategory(null)}
-              className={`px-3 py-1.5 rounded-full text-xs font-medium transition-colors ${
-                selectedCategory === null
-                  ? "bg-primary text-primary-foreground"
-                  : inactiveCategoryClass
-              }`}
-            >
+            <button onClick={() => setSelectedCategory(null)}
+              className={`px-3 py-1.5 rounded-full text-xs font-medium ${selectedCategory === null ? "bg-primary text-primary-foreground" : inactiveCategoryClass}`}>
              All Categories
            </button>
+            {customCount > 0 && (
+              <button onClick={() => setSelectedCategory("custom")}
+                className={`px-3 py-1.5 rounded-full text-xs font-medium ${selectedCategory === "custom" ? "bg-primary text-primary-foreground" : inactiveCategoryClass}`}>
+                My Prompts <span className="ml-1.5 opacity-60">({customCount})</span>
+              </button>
+            )}
            {promptCategories.map((cat) => (
-              <button
-                key={cat.id}
-                onClick={() => setSelectedCategory(cat.id)}
-                className={`px-3 py-1.5 rounded-full text-xs font-medium transition-colors ${
-                  selectedCategory === cat.id
-                    ? "bg-primary text-primary-foreground"
-                    : inactiveCategoryClass
-                }`}
-              >
-                {cat.name}
-                <span className="ml-1.5 opacity-60">({cat.count})</span>
+              <button key={cat.id} onClick={() => setSelectedCategory(cat.id)}
+                className={`px-3 py-1.5 rounded-full text-xs font-medium ${selectedCategory === cat.id ? "bg-primary text-primary-foreground" : inactiveCategoryClass}`}>
+                {cat.name} <span className="ml-1.5 opacity-60">({cat.count})</span>
              </button>
            ))}
          </div>
@@ -137,23 +240,67 @@ export default function PromptLibrary() {

        {/* Prompts grid */}
        <div className="flex-1 overflow-y-auto p-6">
-          {filteredPrompts.length > 0 ? (
-            <div className="grid grid-cols-1 gap-6 md:grid-cols-2 lg:grid-cols-3">
-              {filteredPrompts.map((prompt) => (
-                <PromptCard key={prompt.id} prompt={prompt} onUse={handleUsePrompt} />
+          {pagedPrompts.length > 0 ? (
+            <div className="grid grid-cols-1 gap-4 md:grid-cols-2 lg:grid-cols-3">
+              {pagedPrompts.map((prompt) => (
+                <PromptCard
+                  key={typeof prompt.id === "string" ? prompt.id : `builtin-${prompt.id}`}
+                  prompt={prompt}
+                  onUse={handleUsePrompt}
+                  onDelete={"custom" in prompt && prompt.custom ? () => handleDeletePrompt(prompt.id as string) : undefined}
+                />
              ))}
            </div>
          ) : (
            <div className="flex flex-col items-center justify-center h-full text-center">
              <Sparkles className="w-10 h-10 text-muted-foreground/30 mb-3" />
              <p className="text-sm text-muted-foreground">No prompts found</p>
-              <p className="text-xs text-muted-foreground/60 mt-1">
-                Try adjusting your search or category filter
-              </p>
+              <p className="text-xs text-muted-foreground/60 mt-1">Try adjusting your search or category filter</p>
            </div>
          )}
        </div>
+
+        {/* Pagination */}
+        {totalPages > 1 && (
+          <div className="px-6 py-3 border-t border-border/60 flex items-center justify-between">
+            <span className="text-xs text-muted-foreground">
+              {page * PAGE_SIZE + 1}–{Math.min((page + 1) * PAGE_SIZE, filteredPrompts.length)} of {filteredPrompts.length}
+            </span>
+            <div className="flex items-center gap-1">
+              <button onClick={() => setPage((p) => Math.max(0, p - 1))} disabled={page === 0}
+                className="p-1.5 rounded-md text-muted-foreground hover:text-foreground hover:bg-muted/60 disabled:opacity-30 disabled:cursor-not-allowed">
+                <ChevronLeft className="w-4 h-4" />
+              </button>
+              {Array.from({ length: totalPages }, (_, i) => i)
+                .filter((i) => i === 0 || i === totalPages - 1 || Math.abs(i - page) <= 1)
+                .reduce<(number | "...")[]>((acc, i) => {
+                  if (acc.length > 0) {
+                    const last = acc[acc.length - 1];
+                    if (typeof last === "number" && i - last > 1) acc.push("...");
+                  }
+                  acc.push(i);
+                  return acc;
+                }, [])
+                .map((item, idx) =>
+                  item === "..." ? (
+                    <span key={`ellipsis-${idx}`} className="px-1 text-xs text-muted-foreground">...</span>
+                  ) : (
+                    <button key={item} onClick={() => setPage(item as number)}
+                      className={`min-w-[28px] h-7 rounded-md text-xs font-medium ${page === item ? "bg-primary text-primary-foreground" : "text-muted-foreground hover:text-foreground hover:bg-muted/60"}`}>
+                      {(item as number) + 1}
+                    </button>
+                  ),
+                )}
+              <button onClick={() => setPage((p) => Math.min(totalPages - 1, p + 1))} disabled={page >= totalPages - 1}
+                className="p-1.5 rounded-md text-muted-foreground hover:text-foreground hover:bg-muted/60 disabled:opacity-30 disabled:cursor-not-allowed">
+                <ChevronRight className="w-4 h-4" />
+              </button>
+            </div>
+          </div>
+        )}
      </div>
+
+      <AddPromptModal open={addModalOpen} onClose={() => setAddModalOpen(false)} onSave={handleAddPrompt} />
    </div>
  );
 }
@@ -10,8 +10,12 @@ import { executionApi } from "@/api/execution";
 import { sessionsApi } from "@/api/sessions";
 import { queensApi } from "@/api/queens";
 import { useMultiSSE } from "@/hooks/use-sse";
+import { usePendingQueue } from "@/hooks/use-pending-queue";
 import type { AgentEvent, HistorySession } from "@/api/types";
-import { sseEventToChatMessage } from "@/lib/chat-helpers";
+import {
+  sseEventToChatMessage,
+  replayEventsToMessages,
+} from "@/lib/chat-helpers";
 import { useColony } from "@/context/ColonyContext";
 import { useHeaderActions } from "@/context/HeaderActionsContext";
 import { getQueenForAgent, slugToColonyId } from "@/lib/colony-registry";
@@ -65,8 +69,12 @@ export default function QueenDM() {
    Record<string, { msgId: string; name: string }>
  >({});
  const queenIterTextRef = useRef<Record<string, Record<number, string>>>({});
+  // Flipped true by the auto-flush path; consumed by the next empty-prompt
+  // client_input_requested so we don't flicker the typing bubble off while
+  // the queen is about to resume on the flushed input.
+  const queenAboutToResumeRef = useRef(false);
  const [queenPhase, setQueenPhase] = useState<
-    "planning" | "building" | "staging" | "running" | "independent"
+    "independent" | "working" | "reviewing"
  >("independent");

  const resetViewState = useCallback(() => {
@@ -90,17 +98,34 @@ export default function QueenDM() {
  const restoreMessages = useCallback(
    async (sid: string, cancelled: () => boolean) => {
      try {
-        const { events } = await sessionsApi.eventsHistory(sid);
+        const { events, truncated, total, returned } =
+          await sessionsApi.eventsHistory(sid);
        if (cancelled()) return;
-        const restored: ChatMessage[] = [];
-        for (const evt of events) {
-          const msg = sseEventToChatMessage(evt, "queen-dm", queenName);
-          if (!msg) continue;
-          if (evt.stream_id === "queen") msg.role = "queen";
-          restored.push(msg);
+
+        // Use the stateful replay so tool_status pills are synthesized
+        // the same way the live SSE handler does — without this the
+        // refreshed queen DM shows zero tool activity.
+        const restored = replayEventsToMessages(events, "queen-dm", queenName);
+
+        // Show a banner if the server truncated older events.
+        const droppedCount = Math.max(0, total - returned);
+        if (truncated && droppedCount > 0) {
+          const firstTs = events[0]?.timestamp;
+          const bannerCreatedAt = firstTs
+            ? new Date(firstTs).getTime() - 1
+            : 0;
+          restored.unshift({
+            id: `restore-truncated-${sid}`,
+            agent: "System",
+            agentColor: "",
+            type: "run_divider",
+            content: `${droppedCount.toLocaleString()} older event${droppedCount === 1 ? "" : "s"} not shown (showing last ${returned.toLocaleString()})`,
+            timestamp: firstTs ?? new Date().toISOString(),
+            thread: "queen-dm",
+            createdAt: bannerCreatedAt,
+          });
        }
        if (restored.length > 0 && !cancelled()) {
-          restored.sort((a, b) => (a.createdAt ?? 0) - (b.createdAt ?? 0));
          setMessages(restored);
          // Only clear typing if the history contains a completed execution;
          // during bootstrap the queen is still processing.
@@ -137,18 +162,18 @@ export default function QueenDM() {

    (async () => {
      try {
+        let bootstrapSessionId: string | null = null;
        if (isBootstrap) {
          // Pass the pending message as initial_prompt so the queen
          // processes it immediately (no phantom "Hello" greeting).
-          await queensApi.createNewSession(
+          const bootstrapResult = await queensApi.createNewSession(
            queenId,
            pendingFirstMessage ?? undefined,
            "independent",
          );
+          bootstrapSessionId = bootstrapResult.session_id;
        } else if (selectedSessionParam) {
          await queensApi.selectSession(queenId, selectedSessionParam);
-        } else {
-          await queensApi.getOrCreateSession(queenId, undefined, "independent");
        }
        if (cancelled) return;
        let sid: string;
@@ -186,14 +211,20 @@ export default function QueenDM() {
            setSearchParams({ session: sid }, { replace: true });
          }
        } else {
-          // No session specified - get or create one
-          const result = await queensApi.getOrCreateSession(
-            queenId,
-            undefined,
-            "independent",
-          );
-          if (cancelled) return;
-          sid = result.session_id;
+          // Bootstrap uses the session id from createNewSession directly so a
+          // stale live session for this queen can't steal the flow. Otherwise
+          // fall back to get-or-create.
+          if (bootstrapSessionId) {
+            sid = bootstrapSessionId;
+          } else {
+            const result = await queensApi.getOrCreateSession(
+              queenId,
+              undefined,
+              "independent",
+            );
+            if (cancelled) return;
+            sid = result.session_id;
+          }
          setSessionId(sid);
          setQueenReady(true);

@@ -398,11 +429,11 @@ export default function QueenDM() {
          setQueenReady(true);
          setActiveToolCalls({});
          toolUseToPillRef.current = {};
-          // Clear queued flag on all user messages now that the queen is processing
-          setMessages((prev) => {
-            if (!prev.some((m) => m.queued)) return prev;
-            return prev.map((m) => (m.queued ? { ...m, queued: undefined } : m));
-          });
+          // Do NOT clear `queued` on user messages here. The pending queue
+          // hook owns that flag — it's cleared on steer / cancel / flush.
+          // If the user has queued messages that haven't been flushed yet,
+          // the queen starting a new turn (e.g. from a steer or from the
+          // flush itself) shouldn't hide the still-queued ones.
          break;

        case "execution_completed":
@@ -413,6 +444,13 @@ export default function QueenDM() {
        case "llm_turn_complete":
          turnCounterRef.current++;
          setActiveToolCalls({});
+          // Flush one queued message per LLM turn boundary. This is the
+          // real "turn ended" signal in a queen DM — execution_completed
+          // only fires at session shutdown because the event loop parks in
+          // _await_user_input between turns. Mid-tool-call boundaries
+          // count too: sending now lets the queen pick up the message on
+          // her next drain, same as clicking Steer.
+          flushNextPendingRef.current();
          break;

        case "client_output_delta":
@@ -477,6 +515,19 @@ export default function QueenDM() {
                options?: string[];
              }[])
            : null;
+          // An empty-prompt client_input_requested means the queen parked
+          // in auto-wait. If we just auto-flushed a queued message, our
+          // inject will unblock her in a moment — skip flipping isTyping
+          // off so the thinking bubble doesn't flicker.
+          if (
+            queenAboutToResumeRef.current &&
+            !prompt &&
+            !options &&
+            !questions
+          ) {
+            queenAboutToResumeRef.current = false;
+            break;
+          }
          setAwaitingInput(true);
          setIsTyping(false);
          setIsStreaming(false);
@@ -495,17 +546,40 @@ export default function QueenDM() {
          );
          if (chatMsg) {
            setMessages((prev) => {
-              // Reconcile optimistic user message
+              // Dedup by id first. During ?new=1 bootstrap, restoreMessages
+              // and SSE replay race to land the same server-echoed bubble
+              // (both derive id from the event timestamp). Without this,
+              // if restoreMessages lands first the reconciler below misses
+              // (executionId is already set) and SSE appends a duplicate.
+              const existingIdx = prev.findIndex((m) => m.id === chatMsg.id);
+              if (existingIdx !== -1) {
+                return prev.map((m, i) =>
+                  i === existingIdx
+                    ? { ...chatMsg, createdAt: m.createdAt ?? chatMsg.createdAt }
+                    : m,
+                );
+              }
+              // Reconcile an optimistic user bubble with the server echo.
+              // Optimistics have no executionId; server echoes do. Match the
+              // oldest unreconciled optimistic with the same content — that's
+              // the FIFO-correct pick for both auto-flush and Steer.
              if (chatMsg.type === "user" && prev.length > 0) {
-                const last = prev[prev.length - 1];
-                if (
-                  last.type === "user" &&
-                  last.content === chatMsg.content &&
-                  Math.abs((chatMsg.createdAt ?? 0) - (last.createdAt ?? 0)) <=
-                    15000
-                ) {
+                const idx = prev.findIndex(
+                  (m) =>
+                    m.type === "user" &&
+                    !m.executionId &&
+                    m.content === chatMsg.content,
+                );
+                if (idx !== -1) {
                  return prev.map((m, i) =>
-                    i === prev.length - 1 ? { ...m, id: chatMsg.id } : m,
+                    i === idx
+                      ? {
+                          ...m,
+                          id: chatMsg.id,
+                          executionId: chatMsg.executionId,
+                          queued: undefined,
+                        }
+                      : m,
                  );
                }
              }
@@ -519,10 +593,8 @@ export default function QueenDM() {
          const rawPhase = event.data?.phase as string;
          if (
            rawPhase === "independent" ||
-            rawPhase === "planning" ||
-            rawPhase === "building" ||
-            rawPhase === "staging" ||
-            rawPhase === "running"
+            rawPhase === "working" ||
+            rawPhase === "reviewing"
          ) {
            setQueenPhase(rawPhase);
          }
@@ -601,6 +673,7 @@ export default function QueenDM() {
              createdAt: eventCreatedAt,
              nodeId: event.node_id || undefined,
              executionId: event.execution_id || undefined,
+              streamId: sid || undefined,
            };
            setMessages((prevMsgs) => {
              const idx = prevMsgs.findIndex((m) => m.id === msgId);
@@ -709,18 +782,61 @@ export default function QueenDM() {

  useMultiSSE({ sessions: sseSessions, onEvent: handleSSEEvent });

-  // Send handler
+  // Core backend send — used both for immediate sends and for Steer /
+  // auto-flush paths out of the pending queue.
+  const sendToBackend = useCallback(
+    (text: string, images?: ImageContent[]) => {
+      if (!sessionId) return;
+      executionApi.chat(sessionId, text, images).catch(() => {
+        setIsTyping(false);
+        setIsStreaming(false);
+      });
+    },
+    [sessionId],
+  );
+
+  const {
+    enqueue: enqueuePending,
+    steer: handleSteer,
+    cancelQueued: handleCancelQueued,
+    flushNext: flushNextPending,
+    flushNextRef: flushNextPendingRef,
+    clear: clearPendingQueue,
+  } = usePendingQueue({
+    sendToBackend,
+    setMessages,
+    onFlushStart: useCallback(() => {
+      setIsTyping(true);
+      queenAboutToResumeRef.current = true;
+    }, []),
+  });
+
+  // Reset the queue whenever we navigate to a different queen. The hook
+  // outlives the route change (same component instance), so without this,
+  // a message queued for Queen A would auto-flush into Queen B's session
+  // on B's next execution_completed.
+  useEffect(() => {
+    clearPendingQueue();
+  }, [queenId, clearPendingQueue]);
+
+  // Send handler. Queues when the queen is mid-turn (unless the user is
+  // answering an ask_user prompt, which must send immediately to unblock
+  // the loop). Queued messages are held locally until Steer, Cancel, or
+  // the next `execution_completed` auto-flush.
  const handleSend = useCallback(
    (text: string, _thread: string, images?: ImageContent[]) => {
-      if (awaitingInput) {
+      const answeringQuestion = awaitingInput;
+      if (answeringQuestion) {
        setAwaitingInput(false);
        setPendingQuestion(null);
        setPendingOptions(null);
      }

-      const isQueenBusy = isTyping;
+      const shouldQueue = !answeringQuestion && isTyping;
+
+      const msgId = makeId();
      const userMsg: ChatMessage = {
-        id: makeId(),
+        id: msgId,
        agent: "You",
        agentColor: "",
        content: text,
@@ -729,19 +845,19 @@ export default function QueenDM() {
        thread: "queen-dm",
        createdAt: Date.now(),
        images,
-        queued: isQueenBusy || undefined,
+        queued: shouldQueue,
      };
      setMessages((prev) => [...prev, userMsg]);
-      setIsTyping(true);

-      if (sessionId) {
-        executionApi.chat(sessionId, text, images).catch(() => {
-          setIsTyping(false);
-          setIsStreaming(false);
-        });
+      if (shouldQueue) {
+        enqueuePending(msgId, { text, images });
+        return;
      }
+
+      setIsTyping(true);
+      sendToBackend(text, images);
    },
-    [sessionId, awaitingInput, isTyping],
+    [awaitingInput, isTyping, sendToBackend, enqueuePending],
  );

  const handleQuestionAnswer = useCallback(
@@ -776,15 +892,14 @@ export default function QueenDM() {
      setIsStreaming(false);
      setActiveToolCalls({});
      toolUseToPillRef.current = {};
-      // Clear queued flags since the queen is now idle
-      setMessages((prev) => {
-        if (!prev.some((m) => m.queued)) return prev;
-        return prev.map((m) => (m.queued ? { ...m, queued: undefined } : m));
-      });
+      // After cancelling the current turn, immediately send the oldest
+      // queued message (if any). The remaining queued messages stay put
+      // so the user can review them or Steer/Cancel individually.
+      flushNextPending();
    } catch {
      // ignore
    }
-  }, [sessionId]);
+  }, [sessionId, flushNextPending]);

  return (
    <div className="flex flex-col h-full">
@@ -807,6 +922,8 @@ export default function QueenDM() {
          messages={messages}
          onSend={handleSend}
          onCancel={handleCancelQueen}
+          onSteer={handleSteer}
+          onCancelQueued={handleCancelQueued}
          activeThread="queen-dm"
          isWaiting={isTyping && !isStreaming}
          isBusy={isTyping}
@@ -825,6 +942,8 @@ export default function QueenDM() {
          }}
          supportsImages={true}
          initialDraft={initialDraft}
+          queenProfileId={queenId ?? null}
+          queenId={queenId}
        />
      </div>

@@ -63,6 +63,10 @@ lint.isort.section-order = [
  "local-folder",
 ]
 [tool.pytest.ini_options]
+addopts = "-m 'not live'"
+markers = [
+    "live: Tests that call real external APIs (require credentials, never run in CI)",
+]
 filterwarnings = [
    "ignore::DeprecationWarning:litellm.*"
 ]
@@ -45,10 +45,13 @@ def _has_any_llm_key() -> bool:
    return any(os.environ.get(k) for k in _LLM_KEY_ENV_VARS)


-pytestmark = pytest.mark.skipif(
-    not _has_any_llm_key(),
-    reason="No LLM API key set; skipping live integration test",
-)
+pytestmark = [
+    pytest.mark.live,
+    pytest.mark.skipif(
+        not _has_any_llm_key(),
+        reason="No LLM API key set; skipping live integration test",
+    ),
+]


 # ---------------------------------------------------------------------------
@@ -274,9 +274,12 @@ class TestReportToParent:
            worker = colony.get_worker(worker_ids[0])
            assert worker is not None

-            # Wait for the worker's background task to finish
+            # Wait for the worker to finish AND for the SUBAGENT_REPORT event
+            # to propagate. On Windows the event loop scheduling differs from
+            # POSIX, so a worker can be marked inactive a few ticks before the
+            # subscriber callback runs. Waiting on both avoids that race.
            deadline = asyncio.get_event_loop().time() + 5.0
-            while worker.is_active and asyncio.get_event_loop().time() < deadline:
+            while (worker.is_active or len(reports) == 0) and asyncio.get_event_loop().time() < deadline:
                await asyncio.sleep(0.05)
            assert not worker.is_active, "Worker did not finish within timeout"

@@ -1,14 +1,17 @@
 """Tests for the queen-side ``create_colony`` tool.

-New contract (two-step flow):
+Contract (atomic inline-skill flow):

-1. The queen authors a skill folder out-of-band (via write_file etc.)
-   containing a SKILL.md with YAML frontmatter {name, description} and
-   an optional body.
-2. The queen calls ``create_colony(colony_name, task, skill_path)``
-   pointing at that folder. The tool validates the folder, installs it
-   under ``~/.hive/skills/{name}/`` if it's not already there, and
-   forks the session into a colony.
+The queen calls ``create_colony(colony_name, task, skill_name,
+skill_description, skill_body, skill_files?, tasks?)`` in a single
+call. The tool materializes
+``~/.hive/colonies/{colony_name}/.hive/skills/{skill_name}/`` from the
+inline content (writing SKILL.md and any supporting files), then forks
+the queen session into that colony. The skill is **colony-scoped** —
+discovered as project scope by that colony's workers, invisible to
+every other colony on the machine. Reusing an existing skill name
+inside the colony simply replaces the old skill — the queen owns her
+skill namespace inside the colony.

 We monkeypatch ``fork_session_into_colony`` so the test doesn't need a
 real queen / session directory. We also redirect ``$HOME`` so the test's
@@ -62,23 +65,43 @@ async def _call(executor, **inputs) -> dict:

@pytest.fixture
 def patched_home(tmp_path, monkeypatch):
-    """Redirect $HOME so ~/.hive/skills/ lands in tmp_path."""
+    """Redirect $HOME so ~/.hive/colonies/ lands in tmp_path."""
    monkeypatch.setenv("HOME", str(tmp_path))
    return tmp_path


+def _colony_skill_path(home: Path, colony_name: str, skill_name: str) -> Path:
+    """Where the tool now materializes the skill (colony-scoped project dir)."""
+    return home / ".hive" / "colonies" / colony_name / ".hive" / "skills" / skill_name
+
+
@pytest.fixture
 def patched_fork(monkeypatch):
    """Stub out fork_session_into_colony so we don't need a real queen."""
    calls: list[dict] = []

-    async def _stub_fork(*, session: Any, colony_name: str, task: str) -> dict:
-        calls.append({"session": session, "colony_name": colony_name, "task": task})
+    async def _stub_fork(
+        *,
+        session: Any,
+        colony_name: str,
+        task: str,
+        tasks: list[dict] | None = None,
+    ) -> dict:
+        calls.append(
+            {
+                "session": session,
+                "colony_name": colony_name,
+                "task": task,
+                "tasks": tasks,
+            }
+        )
        return {
            "colony_path": f"/tmp/fake_colonies/{colony_name}",
            "colony_name": colony_name,
            "queen_session_id": "session_fake_fork_id",
            "is_new": True,
+            "db_path": f"/tmp/fake_colonies/{colony_name}/data/progress.db",
+            "task_ids": [],
        }

    monkeypatch.setattr(
@@ -88,23 +111,11 @@ def patched_fork(monkeypatch):
    return calls


-def _write_skill(
-    root: Path,
-    *,
-    dir_name: str,
-    fm_name: str,
-    description: str = "Default test skill description with enough text.",
-    body: str = "## Body\n\nOperational details go here.\n",
-) -> Path:
-    """Write a valid skill folder under ``root`` and return its path."""
-    skill_dir = root / dir_name
-    skill_dir.mkdir(parents=True, exist_ok=True)
-    skill_md = skill_dir / "SKILL.md"
-    skill_md.write_text(
-        f'---\nname: {fm_name}\ndescription: "{description}"\n---\n\n{body}',
-        encoding="utf-8",
-    )
-    return skill_dir
+_DEFAULT_BODY = (
+    "## Operational Protocol\n\n"
+    "Auth: Bearer token from ~/.hive/credentials/honeycomb.json.\n"
+    "Pagination: ?page=1&page_size=50 (max 50 per page).\n"
+)


 # ---------------------------------------------------------------------------
@@ -113,9 +124,7 @@ def _write_skill(


@pytest.mark.asyncio
-async def test_happy_path_emits_colony_created_event(
-    tmp_path: Path, patched_home: Path, patched_fork: list[dict]
-) -> None:
+async def test_happy_path_emits_colony_created_event(patched_home: Path, patched_fork: list[dict]) -> None:
    """Successful create_colony must publish a COLONY_CREATED event."""
    from framework.host.event_bus import AgentEvent, EventType

@@ -131,53 +140,41 @@ async def test_happy_path_emits_colony_created_event(
        handler=_on_colony_created,
    )

-    skill_src = _write_skill(tmp_path / "scratch", dir_name="my-skill", fm_name="my-skill")
-    skill_src.parent.mkdir(parents=True, exist_ok=True)
-    # Re-create after parent mkdir
-    skill_src = _write_skill(tmp_path / "scratch", dir_name="my-skill", fm_name="my-skill")
-
    payload = await _call(
        executor,
        colony_name="event_check",
        task="t",
-        skill_path=str(skill_src),
+        skill_name="my-skill",
+        skill_description="My test skill for event-check happy path.",
+        skill_body=_DEFAULT_BODY,
    )
    assert payload.get("status") == "created", payload
+    assert payload["skill_replaced"] is False
    assert len(received) == 1
    ev = received[0]
    assert ev.type == EventType.COLONY_CREATED
    assert ev.data.get("colony_name") == "event_check"
    assert ev.data.get("skill_name") == "my-skill"
+    assert ev.data.get("skill_replaced") is False
    assert ev.data.get("is_new") is True


@pytest.mark.asyncio
-async def test_happy_path_external_folder_is_copied_into_skills_root(
-    tmp_path: Path, patched_home: Path, patched_fork: list[dict]
-) -> None:
-    """Skill authored outside ~/.hive/skills/ is copied in on install."""
+async def test_happy_path_materializes_skill_under_colony_dir(patched_home: Path, patched_fork: list[dict]) -> None:
+    """Inline skill content is written to ~/.hive/colonies/{colony}/.hive/skills/{name}/."""
    executor, session = _make_executor()

-    # Queen authors skill in a scratch dir, not under ~/.hive/skills/
-    scratch = tmp_path / "scratch"
-    scratch.mkdir()
-    skill_src = _write_skill(
-        scratch,
-        dir_name="honeycomb-api-protocol",
-        fm_name="honeycomb-api-protocol",
-        description=(
-            "How to query the HoneyComb staging API for ticker, pool, "
-            "and trade data. Covers auth, pagination, pool detail "
-            "shape. Use when fetching market data."
-        ),
-        body=(
-            "## HoneyComb API Operational Protocol\n\n"
-            "Auth: Bearer token from ~/.hive/credentials/honeycomb.json.\n"
-            "Pagination: ?page=1&page_size=50 (max 50 per page).\n"
-            "Endpoints:\n"
-            "- /api/ticker — list tickers\n"
-            "- /api/ticker/{id} — pool detail\n"
-        ),
+    description = (
+        "How to query the HoneyComb staging API for ticker, pool, "
+        "and trade data. Covers auth, pagination, pool detail shape."
+    )
+    body = (
+        "## HoneyComb API Operational Protocol\n\n"
+        "Auth: Bearer token from ~/.hive/credentials/honeycomb.json.\n"
+        "Pagination: ?page=1&page_size=50 (max 50 per page).\n"
+        "Endpoints:\n"
+        "- /api/ticker — list tickers\n"
+        "- /api/ticker/{id} — pool detail\n"
    )

    payload = await _call(
@@ -187,17 +184,27 @@ async def test_happy_path_external_folder_is_copied_into_skills_root(
            "Build a daily honeycomb market report covering top gainers, "
            "losers, volume leaders, and category breakdowns."
        ),
-        skill_path=str(skill_src),
+        skill_name="honeycomb-api-protocol",
+        skill_description=description,
+        skill_body=body,
    )

    assert payload.get("status") == "created", f"Tool error: {payload}"
    assert payload["colony_name"] == "honeycomb_research"
    assert payload["skill_name"] == "honeycomb-api-protocol"
+    assert payload["skill_replaced"] is False

-    # The skill was installed under ~/.hive/skills/
-    installed = patched_home / ".hive" / "skills" / "honeycomb-api-protocol" / "SKILL.md"
+    installed = _colony_skill_path(patched_home, "honeycomb_research", "honeycomb-api-protocol") / "SKILL.md"
    assert installed.exists()
-    assert "HoneyComb API Operational Protocol" in installed.read_text(encoding="utf-8")
+    text = installed.read_text(encoding="utf-8")
+    assert text.startswith("---\n")
+    assert "name: honeycomb-api-protocol" in text
+    assert f"description: {description}" in text
+    assert "HoneyComb API Operational Protocol" in text
+
+    # Critically: the skill must NOT land in the shared user-scope dir —
+    # that was the leak we are fixing.
+    assert not (patched_home / ".hive" / "skills" / "honeycomb-api-protocol").exists()

    # Fork was called with the right args
    assert len(patched_fork) == 1
@@ -207,31 +214,104 @@ async def test_happy_path_external_folder_is_copied_into_skills_root(


@pytest.mark.asyncio
-async def test_happy_path_in_place_authored_skill(patched_home: Path, patched_fork: list[dict]) -> None:
-    """Skill authored directly at ~/.hive/skills/{name}/ is accepted in-place."""
+async def test_two_colonies_do_not_share_skill_namespace(patched_home: Path, patched_fork: list[dict]) -> None:
+    """A skill authored via create_colony is invisible to other colonies' worker dirs.
+
+    This is the core isolation guarantee: colony A's create_colony call
+    must NOT plant files under colony B's project root or under the
+    user-global skills dir.
+    """
    executor, _ = _make_executor()

-    skills_root = patched_home / ".hive" / "skills"
-    skills_root.mkdir(parents=True)
-    skill_src = _write_skill(
-        skills_root,
-        dir_name="in-place-skill",
-        fm_name="in-place-skill",
-        description="An in-place skill.",
-        body="Contents that are already at the right location." * 3,
+    payload_a = await _call(
+        executor,
+        colony_name="alpha",
+        task="t",
+        skill_name="alpha-only-skill",
+        skill_description="Only the alpha colony should see this skill.",
+        skill_body=_DEFAULT_BODY,
    )
+    assert payload_a.get("status") == "created", payload_a
+
+    payload_b = await _call(
+        executor,
+        colony_name="bravo",
+        task="t",
+        skill_name="bravo-only-skill",
+        skill_description="Only the bravo colony should see this skill.",
+        skill_body=_DEFAULT_BODY,
+    )
+    assert payload_b.get("status") == "created", payload_b
+
+    alpha_dir = patched_home / ".hive" / "colonies" / "alpha" / ".hive" / "skills"
+    bravo_dir = patched_home / ".hive" / "colonies" / "bravo" / ".hive" / "skills"
+    user_skills = patched_home / ".hive" / "skills"
+
+    # Each colony only contains its own skill
+    assert (alpha_dir / "alpha-only-skill" / "SKILL.md").exists()
+    assert not (alpha_dir / "bravo-only-skill").exists()
+    assert (bravo_dir / "bravo-only-skill" / "SKILL.md").exists()
+    assert not (bravo_dir / "alpha-only-skill").exists()
+
+    # Nothing landed in the shared user-global dir.
+    assert not user_skills.exists() or not any(user_skills.iterdir())
+
+
+@pytest.mark.asyncio
+async def test_skill_files_are_written_alongside_skill_md(patched_home: Path, patched_fork: list[dict]) -> None:
+    """skill_files entries land at the right relative paths."""
+    executor, _ = _make_executor()

    payload = await _call(
        executor,
-        colony_name="in_place_colony",
-        task="task text",
-        skill_path=str(skill_src),
+        colony_name="fancy_skill",
+        task="t",
+        skill_name="fancy-skill",
+        skill_description="Has supporting scripts and references.",
+        skill_body=_DEFAULT_BODY,
+        skill_files=[
+            {"path": "scripts/run.sh", "content": "#!/bin/sh\necho hi\n"},
+            {"path": "references/shapes.md", "content": "# Shapes\nfoo\n"},
+        ],
+    )
+    assert payload.get("status") == "created", payload
+
+    skill_dir = _colony_skill_path(patched_home, "fancy_skill", "fancy-skill")
+    assert (skill_dir / "SKILL.md").exists()
+    assert (skill_dir / "scripts" / "run.sh").read_text() == "#!/bin/sh\necho hi\n"
+    assert (skill_dir / "references" / "shapes.md").read_text() == "# Shapes\nfoo\n"
+
+
+@pytest.mark.asyncio
+async def test_existing_skill_is_replaced(patched_home: Path, patched_fork: list[dict]) -> None:
+    """Reusing a skill_name within the same colony replaces the old skill."""
+    executor, _ = _make_executor()
+
+    skill_root = _colony_skill_path(patched_home, "replier_colony", "x-job-market-replier")
+    skill_root.mkdir(parents=True)
+    (skill_root / "SKILL.md").write_text(
+        "---\nname: x-job-market-replier\ndescription: stale\n---\n\nold body\n",
+        encoding="utf-8",
+    )
+    (skill_root / "stale.txt").write_text("leftover from prior version", encoding="utf-8")
+
+    payload = await _call(
+        executor,
+        colony_name="replier_colony",
+        task="t",
+        skill_name="x-job-market-replier",
+        skill_description="Reply to job-market posts on X.",
+        skill_body="## New procedure\nUse this instead.\n",
    )

    assert payload.get("status") == "created", payload
-    installed = skills_root / "in-place-skill" / "SKILL.md"
-    assert installed.exists()
-    assert len(patched_fork) == 1
+    assert payload["skill_replaced"] is True
+
+    fresh = (skill_root / "SKILL.md").read_text(encoding="utf-8")
+    assert "stale" not in fresh
+    assert "New procedure" in fresh
+    # Old sidecar files from the prior version must be gone.
+    assert not (skill_root / "stale.txt").exists()


 # ---------------------------------------------------------------------------
@@ -240,117 +320,111 @@ async def test_happy_path_in_place_authored_skill(patched_home: Path, patched_fo


@pytest.mark.asyncio
-async def test_missing_skill_path_rejected(patched_home, patched_fork) -> None:
+async def test_missing_skill_name_rejected(patched_home, patched_fork) -> None:
    executor, _ = _make_executor()
    payload = await _call(
        executor,
        colony_name="ok_name",
        task="t",
-        skill_path=str(patched_home / "does_not_exist"),
+        skill_name="",
+        skill_description="desc",
+        skill_body=_DEFAULT_BODY,
    )
    assert "error" in payload
-    assert "does not exist" in payload["error"]
+    assert "skill_name" in payload["error"]
    assert len(patched_fork) == 0


@pytest.mark.asyncio
-async def test_skill_path_is_file_not_directory_rejected(tmp_path, patched_home, patched_fork) -> None:
+async def test_invalid_skill_name_characters_rejected(patched_home, patched_fork) -> None:
    executor, _ = _make_executor()
-    bogus = tmp_path / "not-a-dir.md"
-    bogus.write_text("hi", encoding="utf-8")
    payload = await _call(
        executor,
        colony_name="ok_name",
        task="t",
-        skill_path=str(bogus),
+        skill_name="Bad_Name",
+        skill_description="desc",
+        skill_body=_DEFAULT_BODY,
    )
    assert "error" in payload
-    assert "must be a directory" in payload["error"]
+    assert "[a-z0-9-]" in payload["error"]
    assert len(patched_fork) == 0


@pytest.mark.asyncio
-async def test_skill_missing_skill_md_rejected(tmp_path, patched_home, patched_fork) -> None:
+async def test_skill_name_with_double_hyphen_rejected(patched_home, patched_fork) -> None:
    executor, _ = _make_executor()
-    folder = tmp_path / "no-skill-md"
-    folder.mkdir()
    payload = await _call(
        executor,
        colony_name="ok_name",
        task="t",
-        skill_path=str(folder),
+        skill_name="bad--name",
+        skill_description="desc",
+        skill_body=_DEFAULT_BODY,
    )
    assert "error" in payload
-    assert "SKILL.md" in payload["error"]
+    assert "hyphen" in payload["error"]
    assert len(patched_fork) == 0


@pytest.mark.asyncio
-async def test_skill_md_missing_frontmatter_marker_rejected(tmp_path, patched_home, patched_fork) -> None:
+async def test_missing_skill_description_rejected(patched_home, patched_fork) -> None:
    executor, _ = _make_executor()
-    folder = tmp_path / "broken-fm"
-    folder.mkdir()
-    (folder / "SKILL.md").write_text("no frontmatter here, just body\n", encoding="utf-8")
    payload = await _call(
        executor,
        colony_name="ok_name",
        task="t",
-        skill_path=str(folder),
+        skill_name="ok-skill",
+        skill_description="",
+        skill_body=_DEFAULT_BODY,
    )
    assert "error" in payload
-    assert "frontmatter" in payload["error"]
+    assert "skill_description" in payload["error"]
    assert len(patched_fork) == 0


@pytest.mark.asyncio
-async def test_skill_md_missing_description_rejected(tmp_path, patched_home, patched_fork) -> None:
+async def test_multiline_description_rejected(patched_home, patched_fork) -> None:
    executor, _ = _make_executor()
-    folder = tmp_path / "no-description"
-    folder.mkdir()
-    (folder / "SKILL.md").write_text(
-        "---\nname: no-description\n---\n\nbody\n",
-        encoding="utf-8",
-    )
    payload = await _call(
        executor,
        colony_name="ok_name",
        task="t",
-        skill_path=str(folder),
+        skill_name="ok-skill",
+        skill_description="line one\nline two",
+        skill_body=_DEFAULT_BODY,
    )
    assert "error" in payload
-    assert "description" in payload["error"]
+    assert "single line" in payload["error"]
    assert len(patched_fork) == 0


@pytest.mark.asyncio
-async def test_directory_name_mismatch_with_frontmatter_rejected(tmp_path, patched_home, patched_fork) -> None:
+async def test_empty_skill_body_rejected(patched_home, patched_fork) -> None:
    executor, _ = _make_executor()
-    folder = tmp_path / "wrong-dir-name"
-    folder.mkdir()
-    (folder / "SKILL.md").write_text(
-        '---\nname: correct-name\ndescription: "d"\n---\n\nbody\n',
-        encoding="utf-8",
-    )
    payload = await _call(
        executor,
        colony_name="ok_name",
        task="t",
-        skill_path=str(folder),
+        skill_name="ok-skill",
+        skill_description="desc",
+        skill_body="   \n  ",
    )
    assert "error" in payload
-    assert "does not match" in payload["error"]
+    assert "skill_body" in payload["error"]
    assert len(patched_fork) == 0


@pytest.mark.asyncio
-async def test_invalid_colony_name_rejected(tmp_path, patched_home, patched_fork) -> None:
+async def test_invalid_colony_name_rejected(patched_home, patched_fork) -> None:
    executor, _ = _make_executor()
-    skill_src = _write_skill(tmp_path, dir_name="valid-skill", fm_name="valid-skill")
    payload = await _call(
        executor,
        colony_name="NotValid-Colony",
        task="t",
-        skill_path=str(skill_src),
+        skill_name="valid-skill",
+        skill_description="desc",
+        skill_body=_DEFAULT_BODY,
    )
    assert "error" in payload
    assert "colony_name" in payload["error"]
@@ -358,8 +432,59 @@ async def test_invalid_colony_name_rejected(tmp_path, patched_home, patched_fork


@pytest.mark.asyncio
-async def test_fork_failure_keeps_installed_skill(tmp_path, patched_home, monkeypatch) -> None:
-    """If the fork raises, the installed skill stays under ~/.hive/skills/."""
+async def test_skill_files_reject_absolute_path(patched_home, patched_fork) -> None:
+    executor, _ = _make_executor()
+    payload = await _call(
+        executor,
+        colony_name="ok_name",
+        task="t",
+        skill_name="ok-skill",
+        skill_description="desc",
+        skill_body=_DEFAULT_BODY,
+        skill_files=[{"path": "/etc/passwd", "content": "evil"}],
+    )
+    assert "error" in payload
+    assert "relative" in payload["error"]
+    assert len(patched_fork) == 0
+
+
+@pytest.mark.asyncio
+async def test_skill_files_reject_parent_traversal(patched_home, patched_fork) -> None:
+    executor, _ = _make_executor()
+    payload = await _call(
+        executor,
+        colony_name="ok_name",
+        task="t",
+        skill_name="ok-skill",
+        skill_description="desc",
+        skill_body=_DEFAULT_BODY,
+        skill_files=[{"path": "../escape.txt", "content": "evil"}],
+    )
+    assert "error" in payload
+    assert "relative" in payload["error"]
+    assert len(patched_fork) == 0
+
+
+@pytest.mark.asyncio
+async def test_skill_files_reject_skill_md_override(patched_home, patched_fork) -> None:
+    executor, _ = _make_executor()
+    payload = await _call(
+        executor,
+        colony_name="ok_name",
+        task="t",
+        skill_name="ok-skill",
+        skill_description="desc",
+        skill_body=_DEFAULT_BODY,
+        skill_files=[{"path": "SKILL.md", "content": "sneaky"}],
+    )
+    assert "error" in payload
+    assert "SKILL.md" in payload["error"]
+    assert len(patched_fork) == 0
+
+
+@pytest.mark.asyncio
+async def test_fork_failure_keeps_materialized_skill(patched_home, monkeypatch) -> None:
+    """If the fork raises, the materialized skill stays under ~/.hive/skills/."""

    async def _failing_fork(**kwargs):
        raise RuntimeError("simulated fork crash")
@@ -370,17 +495,18 @@ async def test_fork_failure_keeps_installed_skill(tmp_path, patched_home, monkey
    )

    executor, _ = _make_executor()
-    skill_src = _write_skill(tmp_path, dir_name="durable-skill", fm_name="durable-skill")

    payload = await _call(
        executor,
        colony_name="will_fail",
        task="t",
-        skill_path=str(skill_src),
+        skill_name="durable-skill",
+        skill_description="desc",
+        skill_body=_DEFAULT_BODY,
    )
    assert "error" in payload
    assert "fork failed" in payload["error"]
    assert "skill_installed" in payload
-    installed = patched_home / ".hive" / "skills" / "durable-skill" / "SKILL.md"
+    installed = _colony_skill_path(patched_home, "will_fail", "durable-skill") / "SKILL.md"
    assert installed.exists()
    assert "hint" in payload
@@ -17,10 +17,10 @@ _DEFAULT_SKILLS_DIR = Path(__file__).resolve().parent.parent / "framework" / "sk


 class TestDefaultSkillFiles:
-    """Verify all 7 built-in SKILL.md files parse correctly."""
+    """Verify all built-in SKILL.md files parse correctly."""

-    def test_all_seven_skills_exist(self):
-        assert len(SKILL_REGISTRY) == 7
+    def test_all_skills_exist(self):
+        assert len(SKILL_REGISTRY) == 6

    @pytest.mark.parametrize("skill_name,dir_name", list(SKILL_REGISTRY.items()))
    def test_skill_parses(self, skill_name, dir_name):
@@ -35,7 +35,13 @@ class TestDefaultSkillFiles:
        assert parsed.source_scope == "framework"

    def test_combined_token_budget(self):
-        """All default skill bodies combined should be under 3000 tokens (~12000 chars)."""
+        """All default skill bodies combined should stay within the protocols budget.
+
+        Ceiling is 5000 tokens (~20000 chars): the prompt-injection path
+        appends every registered skill body to the system prompt, so
+        uncontrolled growth would balloon every LLM call. 5000 gives
+        headroom over today's ~3500 while still catching obvious bloat.
+        """
        total_chars = 0
        for dir_name in SKILL_REGISTRY.values():
            path = _DEFAULT_SKILLS_DIR / dir_name / "SKILL.md"
@@ -44,9 +50,9 @@ class TestDefaultSkillFiles:
            total_chars += len(parsed.body)

        approx_tokens = total_chars // 4
-        assert approx_tokens < 3000, (
+        assert approx_tokens < 5000, (
            f"Combined default skill bodies are ~{approx_tokens} tokens "
-            f"({total_chars} chars), exceeding the 3000 token budget"
+            f"({total_chars} chars), exceeding the 5000 token budget"
        )

    def test_data_buffer_keys_all_prefixed(self):
@@ -60,7 +66,7 @@ class TestDefaultSkillManager:
        manager = DefaultSkillManager()
        manager.load()

-        assert len(manager.active_skill_names) == 7
+        assert len(manager.active_skill_names) == len(SKILL_REGISTRY)
        for name in SKILL_REGISTRY:
            assert name in manager.active_skill_names

@@ -97,7 +103,7 @@ class TestDefaultSkillManager:
        manager.load()

        assert "hive.quality-monitor" not in manager.active_skill_names
-        assert len(manager.active_skill_names) == 6
+        assert len(manager.active_skill_names) == len(SKILL_REGISTRY) - 1

    def test_disable_all_via_convention(self):
        config = SkillsConfig.from_agent_vars(default_skills={"_all": {"enabled": False}})
@@ -136,7 +142,7 @@ class TestSkillsConfig:
    def test_explicit_disable(self):
        config = SkillsConfig(default_skills={"hive.note-taking": DefaultSkillConfig(enabled=False)})
        assert config.is_default_enabled("hive.note-taking") is False
-        assert config.is_default_enabled("hive.batch-ledger") is True
+        assert config.is_default_enabled("hive.quality-monitor") is True

    def test_all_disabled_flag(self):
        config = SkillsConfig(all_defaults_disabled=True)
@@ -166,11 +172,11 @@ class TestSkillsConfig:
    def test_get_default_overrides(self):
        config = SkillsConfig.from_agent_vars(
            default_skills={
-                "hive.batch-ledger": {"enabled": True, "checkpoint_every_n": 10},
+                "hive.quality-monitor": {"enabled": True, "assessment_interval": 10},
            }
        )
-        overrides = config.get_default_overrides("hive.batch-ledger")
-        assert overrides == {"checkpoint_every_n": 10}
+        overrides = config.get_default_overrides("hive.quality-monitor")
+        assert overrides == {"assessment_interval": 10}

    def test_get_default_overrides_empty(self):
        config = SkillsConfig()
@@ -244,40 +250,20 @@ class TestConfigOverrideSubstitution:
        assert "{{" not in cleaned


-class TestBatchAutoDetection:
-    """DS-12: is_batch_scenario() and batch_init_nudge property."""
+class TestBatchDeprecatedNoOps:
+    """batch-ledger skill was removed; is_batch_scenario() and batch_init_nudge
+    are deprecated no-ops that return False / None unconditionally. They are
+    kept in-tree to avoid touching every orchestrator/execution_manager call
+    site that still reads the nudge through the config plumbing."""

-    def test_detects_list_of(self):
-        assert is_batch_scenario("process a list of 100 leads") is True
+    def test_is_batch_scenario_always_false(self):
+        assert is_batch_scenario("process a list of 100 leads") is False
+        assert is_batch_scenario("for each record, send an email") is False
+        assert is_batch_scenario("write a summary") is False

-    def test_detects_collection_of(self):
-        assert is_batch_scenario("a collection of invoices") is True
-
-    def test_detects_items(self):
-        assert is_batch_scenario("go through all items in the spreadsheet") is True
-
-    def test_detects_for_each(self):
-        assert is_batch_scenario("for each record, send an email") is True
-
-    def test_no_match_single_task(self):
-        assert is_batch_scenario("write a summary of the quarterly report") is False
-
-    def test_batch_nudge_active_by_default(self):
+    def test_batch_init_nudge_always_none(self):
        manager = DefaultSkillManager()
        manager.load()
-        assert manager.batch_init_nudge is not None
-        assert "_batch_ledger" in manager.batch_init_nudge
-
-    def test_batch_nudge_none_when_skill_disabled(self):
-        config = SkillsConfig.from_agent_vars(default_skills={"hive.batch-ledger": {"enabled": False}})
-        manager = DefaultSkillManager(config)
-        manager.load()
-        assert manager.batch_init_nudge is None
-
-    def test_batch_nudge_none_when_auto_detect_disabled(self):
-        config = SkillsConfig.from_agent_vars(default_skills={"hive.batch-ledger": {"auto_detect_batch": False}})
-        manager = DefaultSkillManager(config)
-        manager.load()
        assert manager.batch_init_nudge is None


@@ -2109,3 +2109,129 @@ class TestToolConcurrencyPartition:

        # Both tools must have run: soft errors don't cascade.
        assert executed == ["call_1", "call_2"]
+
+
+# ===========================================================================
+# Replay detector (warn + execute)
+# ===========================================================================
+
+
+class TestReplayDetector:
+    @pytest.mark.asyncio
+    async def test_replay_emits_event_and_prefixes_result(self, tmp_path, runtime, node_spec, buffer):
+        """Re-emitting a tool call whose prior result succeeded fires the
+        TOOL_CALL_REPLAY_DETECTED event and prepends a steer onto the stored
+        result, but still executes the call (warn + execute)."""
+        node_spec.output_keys = []
+
+        async def tool_exec(tool_use: ToolUse) -> ToolResult:
+            return ToolResult(
+                tool_use_id=tool_use.id,
+                content=f"fresh result for {tool_use.id}",
+                is_error=False,
+            )
+
+        # Turn 1: model calls browser_setup with id=call_1
+        # Turn 2: model calls browser_setup AGAIN with id=call_2 (the replay)
+        # Turn 3: text stop
+        llm = MockStreamingLLM(
+            scenarios=[
+                tool_call_scenario("browser_setup", {}, tool_use_id="call_1"),
+                tool_call_scenario("browser_setup", {}, tool_use_id="call_2"),
+                text_scenario("done"),
+            ]
+        )
+
+        tools = [Tool(name="browser_setup", description="", parameters={})]
+
+        # Capture events from the bus.
+        captured: list[Any] = []
+        bus = EventBus()
+
+        async def _collect(evt):
+            captured.append(evt)
+
+        bus.subscribe([EventType.TOOL_CALL_REPLAY_DETECTED], _collect)
+
+        ctx = build_ctx(
+            runtime,
+            node_spec,
+            buffer,
+            llm,
+            tools=tools,
+            is_subagent_mode=True,
+        )
+        store = FileConversationStore(tmp_path / "conv")
+        node = EventLoopNode(
+            tool_executor=tool_exec,
+            conversation_store=store,
+            event_bus=bus,
+            config=LoopConfig(max_iterations=5),
+        )
+        await node.execute(ctx)
+
+        # Exactly one replay-detected event fired for the second call.
+        assert len(captured) == 1
+        assert captured[0].data["tool_name"] == "browser_setup"
+
+        # The stored tool result for the replay carries the steer prefix,
+        # and the real execution output is preserved.
+        parts = await store.read_parts()
+        tool_msgs = [p for p in parts if p.get("role") == "tool" and p.get("tool_use_id") == "call_2"]
+        assert len(tool_msgs) == 1
+        assert tool_msgs[0]["content"].startswith("[Replay detected: browser_setup")
+        assert "fresh result for call_2" in tool_msgs[0]["content"]
+
+        # The first call's result is untouched.
+        first = [p for p in parts if p.get("role") == "tool" and p.get("tool_use_id") == "call_1"]
+        assert first[0]["content"] == "fresh result for call_1"
+
+    @pytest.mark.asyncio
+    async def test_replay_with_error_prior_does_not_fire(self, tmp_path, runtime, node_spec, buffer):
+        """A prior call that errored does not count as a successful completion,
+        so re-emitting it is legitimate (not a replay)."""
+        node_spec.output_keys = []
+
+        async def tool_exec(tool_use: ToolUse) -> ToolResult:
+            is_err = tool_use.id == "call_1"
+            return ToolResult(
+                tool_use_id=tool_use.id,
+                content=("boom" if is_err else "ok"),
+                is_error=is_err,
+            )
+
+        llm = MockStreamingLLM(
+            scenarios=[
+                tool_call_scenario("flaky", {}, tool_use_id="call_1"),
+                tool_call_scenario("flaky", {}, tool_use_id="call_2"),
+                text_scenario("recovered"),
+            ]
+        )
+        tools = [Tool(name="flaky", description="", parameters={})]
+
+        captured: list[Any] = []
+        bus = EventBus()
+
+        async def _collect(evt):
+            captured.append(evt)
+
+        bus.subscribe([EventType.TOOL_CALL_REPLAY_DETECTED], _collect)
+
+        ctx = build_ctx(
+            runtime,
+            node_spec,
+            buffer,
+            llm,
+            tools=tools,
+            is_subagent_mode=True,
+        )
+        store = FileConversationStore(tmp_path / "conv")
+        node = EventLoopNode(
+            tool_executor=tool_exec,
+            conversation_store=store,
+            event_bus=bus,
+            config=LoopConfig(max_iterations=5),
+        )
+        await node.execute(ctx)
+
+        assert captured == []
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
bryan	59ccbba810	fix: suppress typing flicker on queue auto-flush and dedup user bubble on bootstrap race	2026-04-20 15:30:01 -07:00
bryan	16ea9b52d3	feat: queue messages during queen turns in colony/queen chats	2026-04-20 12:45:38 -07:00
bryan	dcbfd4ab01	feat: add pending-queue hook and Steer/Cancel UI in ChatPanel	2026-04-20 12:45:14 -07:00
bryan	b762020793	refactor: carry executionId on user SSE events	2026-04-20 12:44:56 -07:00
Naresh Chandanbatve	199c3a235e	feat(tool): add Prometheus tool support (#7047 ) Adds prometheus_query (instant PromQL) and prometheus_query_range (time-range) tools. Includes credential spec, /-/ready health check, unit tests, and docs. Optional Bearer token and Basic auth via env vars (PROMETHEUS_TOKEN, PROMETHEUS_USERNAME/PASSWORD). Fixes #6945.	2026-04-20 18:13:49 +08:00
Kavin	a881fe68da	fix(llm): ensure store=False is passed to Codex Responses API (#7089 ) Forces store: false into the extra_body payload for Codex-style models so that LiteLLM successfully passes it down to the ChatGPT Responses API backend, fixing the BadRequestError. Fixes #7056. Original investigation and first PR by @Darshan174 (#7065). Co-authored-by: Darshan174 <Darshan002321@gmail.com>	2026-04-20 17:54:41 +08:00
Hundao	6b9040477f	fix(ci): unbreak main, ruff format browser and refresh test_model_catalog (#7095 ) * chore: ruff format browser bridge and tools * fix(tests): refresh test_model_catalog expectations after catalog drift	2026-04-20 17:23:26 +08:00
Richard Tang	bb39424e99	chore: update model context config	2026-04-19 15:19:26 -07:00
Richard Tang	b27c7a029e	chore: update openrouter model selections	2026-04-19 15:10:36 -07:00
Timothy	a3433f2c9e	Merge branch 'main' into fix/image-coordinate-precision	2026-04-19 13:25:41 -07:00
Richard Tang	24ef2c247d	chore: tidy editorconfig and gitattributes, drop unused reference	2026-04-19 13:24:34 -07:00
Richard Tang	a8f9661626	chore: remove unused files	2026-04-19 13:19:01 -07:00
Timothy	3005bcaa96	chore: bump extension version to 1.0.1	2026-04-19 13:06:51 -07:00
Timothy	40c4591d65	fix: extension icons	2026-04-19 13:06:13 -07:00
Timothy	e2bfb9d3af	fix: frame resize	2026-04-19 13:02:12 -07:00
Timothy	e55cea97ef	fix: diagnostics	2026-04-19 12:52:04 -07:00
Timothy	ddaafe0307	Merge remote-tracking branch 'origin/main' into fix/image-coordinate-precision	2026-04-18 23:32:28 -07:00
Richard Tang	c17205a453	test: align stale tests with current behavior	2026-04-18 22:02:03 -07:00
Richard Tang	8e4468851c	chore: ruff format	2026-04-18 21:45:34 -07:00
Richard Tang	ccf4216841	fix: resolve merge conflict markers and ruff issues	2026-04-18 21:45:11 -07:00
Richard Tang	82ffcb17ac	Merge remote-tracking branch 'origin/main' into fix/colony-skill-leak	2026-04-18 21:36:23 -07:00
Richard Tang	4da5bcc1e4	feat: queen bar in colony	2026-04-18 21:30:19 -07:00
Richard Tang	3df7194003	feat: worker tab by clicking on the worker	2026-04-18 21:21:22 -07:00
Richard Tang	6f1f27b6e9	feat: load table by colony	2026-04-18 20:55:20 -07:00
Richard Tang	7b52ed9fa7	fix: outdated jsonledger	2026-04-18 20:35:05 -07:00
Richard Tang	4d32526a29	feat: real available parallel size	2026-04-18 20:18:54 -07:00
Richard Tang	656401e199	feat: real snapshot after interaction	2026-04-18 19:51:52 -07:00
Richard Tang	f2e51157dc	feat: snapshot related prompts	2026-04-18 19:39:00 -07:00
Timothy	0d13c805b1	fix: colony skill leakage	2026-04-18 15:34:31 -07:00
Kowshik Mente	b1ec64438c	fix(runtime): prevent session restart until cancelled execution fully terminates (#7001 ) * fix(runtime): prevent dual execution after forced cancel - keep bookkeeping until task termination - block restart while any execution task is still alive - make execution registration atomic under lock - avoid premature cleanup on cancel timeout - add regression tests for forced-cancel restart scenarios * chore: ruff format and import order --------- Co-authored-by: kowshikmente <kowshikmente@kowshikmentes-MacBook-Pro.local> Co-authored-by: hundao <alchemy_wimp@hotmail.com>	2026-04-18 19:36:50 +08:00
Hundao	90aadf247a	fix(ci): unbreak main — ruff format, test_refs, test_model_catalog (#7084 ) * fix(ci): apply ruff format to browser tool files Refs #7083 * fix(ci): unbreak test_refs (img regression) and test_model_catalog test_refs: - Add `img` back to CONTENT_ROLES so named images get refs again. The recent `cc6ec97a feat: multiple modes browser snapshot tool` refactor renamed NAMED_CONTENT_ROLES → CONTENT_ROLES and accidentally dropped `img`, breaking `test_named_content_roles_get_refs`. - Drop the `navigation` assertion from `test_skips_structural_roles`. That same refactor intentionally added landmark roles (navigation, main, listitem) to CONTENT_ROLES so AI agents can ref them by name, and the test was not updated to reflect that. test_model_catalog: - Add 5 openrouter models that were added to model_catalog.json by #7081 (UI/UX improvements) but not reflected in the test. Refs #7083 * fix(ci): wait for event propagation in subagent report test on Windows `test_worker_report_emits_subagent_report_event` waited only for `worker.is_active` to flip to False, then immediately asserted on the collected events. On Windows the event loop scheduling differs enough that the SUBAGENT_REPORT subscriber callback can run a few ticks after the worker is marked inactive, so the assertion fires against an empty list. Wait for both conditions. Refs #7083	2026-04-18 19:09:15 +08:00
RichardTang-Aden	49317ac5f5	Merge pull request #7081 from vincentjiang777/feat/ui-ux-improvements feat: UI/UX improvements across BYOK, org chart, profiles, and prompt…	2026-04-17 21:03:01 -07:00
Richard Tang	7216e9d9f0	chore: ruff lint and format Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>	2026-04-17 21:01:18 -07:00
Richard Tang	91b1070d80	Merge remote-tracking branch 'origin/main' into feat/ui-ux-improvements # Conflicts: # core/frontend/src/components/SidebarQueenItem.tsx	2026-04-17 20:58:20 -07:00
Richard Tang	08aeffd977	chore: more create colony logs	2026-04-17 20:27:22 -07:00
Richard Tang	651b57b928	feat: hive open performance issue	2026-04-17 20:16:01 -07:00
Richard Tang	8c10fc2e1c	fix: queen dm session loading	2026-04-17 20:11:48 -07:00
Richard Tang	e3154ca0ee	fix: colony session loading	2026-04-17 19:45:31 -07:00
Richard Tang	84a92af41b	fix: patch the correct db path	2026-04-17 19:40:59 -07:00
Richard Tang	78fc62210a	feat table tab improvements	2026-04-17 19:25:15 -07:00
Timothy	2fd7e9172a	fix: y-offset inspection	2026-04-17 19:24:41 -07:00
Richard Tang	ca63fd9ee9	feat: create skill along with colony	2026-04-17 19:03:28 -07:00
Richard Tang	b99f25c8d7	feat: DataGrid for colony side bar	2026-04-17 18:47:19 -07:00
Timothy	e972112074	feature: merge sidebars with functionalities	2026-04-17 18:12:18 -07:00
Vincent Jiang	6e97191f21	feat: UI/UX improvements across BYOK, org chart, profiles, and prompt library - BYOK: unified styling (remove purple, consistent grey headers), model selector opens settings modal directly, backend validates API keys before activation - Org chart: queen profiles are now editable (name, title, about, skills, achievement) with changes persisted to YAML - Avatars: upload profile pictures for queens and user with client-side compression, displayed across org chart, sidebar, chat, and header - Colony deletion: await backend delete and re-fetch to prevent ghost colonies - Prompt library: add pagination (24/page), custom prompt upload/delete with backend persistence - Settings modal: performance cleanup (remove backdrop-blur, reduce transitions) - Fix ensure_default_queens() overwriting user edits on every API call Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>	2026-04-17 14:21:18 -07:00
Richard Tang	023fb9b8d0	refactor: use SSE for worker and browser status	2026-04-17 14:11:19 -07:00
Richard Tang	b7924b1ad0	feat: colony tab by group	2026-04-17 14:05:55 -07:00
Timothy	b6640b8592	fix: prevent watcher to be gced	2026-04-17 13:13:39 -07:00
Timothy	43a1d5797c	Merge branch 'fix/worker-tab-groups' into feature/clean-context	2026-04-17 12:35:09 -07:00
Timothy	5cb814f2dc	fix: worker tab groups	2026-04-17 12:34:38 -07:00
Richard Tang	f52c44821a	feat: partially validation after typing	2026-04-17 12:16:13 -07:00
Richard Tang	97432ea08c	feat: colony side bar	2026-04-17 11:52:49 -07:00
Timothy	0abd1125b7	fix: parallel execution	2026-04-17 11:20:06 -07:00
Timothy	803337ec74	feat: new queen phases	2026-04-17 06:19:15 -07:00
Timothy	2b055d4d42	fix: simplify system prompt	2026-04-17 04:47:51 -07:00
Timothy	dde4dfaec9	Merge branch 'feature/colony-sqlite' into feature/clean-context	2026-04-17 04:12:35 -07:00
Timothy	6be026fcb1	fix: partial parts and system nudge	2026-04-17 04:06:59 -07:00
Richard Tang	3c2161aad5	chore: release v0.10.2 Release / Create Release (push) Waiting to run Details Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>	2026-04-16 23:43:20 -07:00
Richard Tang	e74ebe6835	feat: reduce gemini context window to improve reliability	2026-04-16 23:41:24 -07:00
Richard Tang	d788e5b2f7	chore: ruff lint	2026-04-16 23:33:48 -07:00
Richard Tang	583a5b41b4	fix: ununsed reference	2026-04-16 23:23:38 -07:00
Richard Tang	83cc44bdef	Merge branch 'feature/full-image-size'	2026-04-16 23:15:59 -07:00
Timothy	558813e7fa	feat: fraction-based visual clicks	2026-04-16 22:36:41 -07:00
Timothy	aba0ff07ba	fix: model invariant screenshot	2026-04-16 20:29:05 -07:00
Timothy	b50f237506	fix: screenshot skill diction	2026-04-16 15:16:22 -07:00
Timothy	59b1bc9338	fix: tool grouping logic	2026-04-16 12:55:10 -07:00
Timothy	37672c5581	fix: remove worker tool from dm	2026-04-16 12:23:19 -07:00
Timothy	7b0948cd62	Merge branch 'refactor/worker-message' into feature/colony-sqlite	2026-04-16 11:26:46 -07:00
Timothy	4aa5fd7a90	refactor: align worker display	2026-04-16 11:26:32 -07:00
Richard Tang	d20b617008	feat: queen profile in message bubbles	2026-04-16 11:21:02 -07:00
Timothy	c4ee12532f	fix: worker message display	2026-04-16 11:20:17 -07:00
Richard Tang	36ebf27e3e	feat: make side bar size adjustble	2026-04-16 11:15:47 -07:00
Richard Tang	ae1599c66a	feat: queen profile side bar	2026-04-16 11:15:30 -07:00
Richard Tang	810cf5a6d3	Merge remote-tracking branch 'origin/main' into feature/colony-sqlite	2026-04-16 11:10:34 -07:00
Timothy	1ee0d5a2e8	feat: worker bubble display	2026-04-16 10:48:44 -07:00
Richard Tang	be94c611bd	fix: queen fail when no worker is running	2026-04-15 22:14:36 -07:00
Timothy	45df68c146	feat: ensure sqlite3 installation	2026-04-15 18:34:33 -07:00
Timothy	2231dc5742	fix: delete spilled skill	2026-04-15 18:14:10 -07:00
Timothy	446844b2ad	fix: tighten worker with sqlite skills	2026-04-15 18:11:15 -07:00
Timothy	e719523434	fix: remove conflicting tools	2026-04-15 17:38:05 -07:00
Timothy	79c5d43006	feat: colony sqlite and skills	2026-04-15 15:28:37 -07:00