fix: compaction issues

2026-04-28 10:43:54 -07:00
parent ccb6556a41
commit 4ef951447d
19 changed files with 1075 additions and 134 deletions
@@ -486,10 +486,18 @@ class ColonyRuntime:
        if colony_name:
            colony_home = COLONIES_DIR / colony_name
            colony_overrides_path = colony_home / "skills_overrides.json"
-            # Colony-scope SKILL.md dir is the project-scope from discovery's
-            # point of view (colony_dir is the project_root). Add it also as
-            # a tagged ``colony_ui`` scope so UI-created entries resolve with
-            # correct provenance.
+            # Surface both the new flat ``skills/`` (where new skills are
+            # written) and the legacy nested ``.hive/skills/`` (left intact
+            # for pre-flatten colonies) as tagged ``colony_ui`` scopes, so
+            # UI-created entries resolve with correct provenance regardless
+            # of which on-disk layout the colony has.
+            extras.append(
+                ExtraScope(
+                    directory=colony_home / "skills",
+                    label="colony_ui",
+                    priority=3,
+                )
+            )
            extras.append(
                ExtraScope(
                    directory=colony_home / ".hive" / "skills",
@@ -1503,6 +1503,7 @@ class AgentLoader:
        from framework.pipeline.stages.mcp_registry import McpRegistryStage
        from framework.pipeline.stages.skill_registry import SkillRegistryStage
        from framework.skills.config import SkillsConfig
+        from framework.skills.discovery import ExtraScope

        configure_logging(level="INFO", format="auto")

@@ -1545,6 +1546,19 @@ class AgentLoader:
                    default_skills=getattr(self, "_agent_default_skills", None),
                    skills=getattr(self, "_agent_skills", None),
                ),
+                # Surface the colony's flat ``skills/`` directory as a
+                # ``colony_ui`` extra scope so SKILL.md files written there
+                # by ``create_colony`` (or the HTTP routes) are picked up
+                # with correct provenance. The legacy nested
+                # ``<colony>/.hive/skills/`` path is still picked up via
+                # project-scope auto-discovery (project_root above).
+                extra_scope_dirs=[
+                    ExtraScope(
+                        directory=self.agent_path / "skills",
+                        label="colony_ui",
+                        priority=3,
+                    )
+                ],
            ),
        ]

@@ -26,11 +26,15 @@ class SkillRegistryStage(PipelineStage):
        project_root: str | Path | None = None,
        interactive: bool = True,
        skills_config: Any = None,
+        extra_scope_dirs: list[Any] | None = None,
        **kwargs: Any,
    ) -> None:
        self._project_root = Path(project_root) if project_root else None
        self._interactive = interactive
        self._skills_config = skills_config
+        # Optional list of ExtraScope entries layered between user and
+        # project scope (e.g. ``colony_ui`` for a colony agent's skills/).
+        self._extra_scope_dirs = list(extra_scope_dirs) if extra_scope_dirs else []
        self.skills_manager: Any = None

    async def initialize(self) -> None:
@@ -41,6 +45,7 @@ class SkillRegistryStage(PipelineStage):
            skills_config=self._skills_config or SkillsConfig(),
            project_root=self._project_root,
            interactive=self._interactive,
+            extra_scope_dirs=self._extra_scope_dirs,
        )
        self.skills_manager = SkillsManager(config)
        self.skills_manager.load()
@@ -335,6 +335,18 @@ def create_app(model: str | None = None) -> web.Application:
        queen_tool_registry=None,
    )

+    # Clear orphaned compaction markers from prior server crashes. Without
+    # this, any session whose compaction was interrupted would block the
+    # next colony cold-load for the full await_completion timeout (180s)
+    # before falling through. See compaction_status.sweep_stale_in_progress.
+    try:
+        from framework.config import QUEENS_DIR
+        from framework.server import compaction_status
+
+        compaction_status.sweep_stale_in_progress(QUEENS_DIR)
+    except Exception:
+        logger.debug("compaction_status: startup sweep skipped", exc_info=True)
+
    # Register shutdown hook
    app.on_shutdown.append(_on_shutdown)

@@ -147,3 +147,55 @@ async def await_completion(
            )
            return last
        await asyncio.sleep(poll)
+
+
+def sweep_stale_in_progress(queens_root: Path) -> int:
+    """Rewrite any orphaned ``in_progress`` markers under ``queens_root`` to
+    ``failed``. Returns the count of rewritten markers.
+
+    Whatever process owned the original compaction is gone (server crash,
+    SIGKILL, etc.), so leaving the marker at ``in_progress`` would cause every
+    subsequent colony cold-load for that queen session to wait the full
+    ``await_completion`` timeout (default 180s) before falling through.
+
+    Called once during server bootstrap. Best-effort: any per-file failure is
+    logged and skipped — the sweep should never prevent the server from
+    coming up.
+    """
+    if not queens_root.exists():
+        return 0
+    cleaned = 0
+    try:
+        for queen_dir in queens_root.iterdir():
+            if not queen_dir.is_dir():
+                continue
+            sessions_dir = queen_dir / "sessions"
+            if not sessions_dir.exists():
+                continue
+            try:
+                for session_dir in sessions_dir.iterdir():
+                    if not session_dir.is_dir():
+                        continue
+                    status = get_status(session_dir)
+                    if status is None or status.get("status") != "in_progress":
+                        continue
+                    mark_failed(session_dir, "server restarted while compaction was in progress")
+                    cleaned += 1
+            except OSError:
+                logger.debug(
+                    "compaction_status: sweep failed under %s",
+                    sessions_dir,
+                    exc_info=True,
+                )
+    except OSError:
+        logger.debug(
+            "compaction_status: sweep failed under %s",
+            queens_root,
+            exc_info=True,
+        )
+    if cleaned:
+        logger.info(
+            "compaction_status: cleared %d stale 'in_progress' marker(s) at startup",
+            cleaned,
+        )
+    return cleaned
@@ -798,6 +798,110 @@ async def handle_session_colonies(request: web.Request) -> web.Response:
 _EVENTS_HISTORY_DEFAULT_LIMIT = 2000
 _EVENTS_HISTORY_MAX_LIMIT = 10000

+# Files at or below this size use the simple forward-scan path (cheap enough
+# that the seek-backward dance isn't worth it). Above this threshold we read
+# the tail directly from end-of-file so a 50 MB log doesn't have to be paged
+# through entirely just to surface the last 2000 lines.
+_EVENTS_HISTORY_REVERSE_TAIL_THRESHOLD_BYTES = 1 << 20  # 1 MB
+_EVENTS_HISTORY_REVERSE_TAIL_CHUNK_BYTES = 64 * 1024
+
+
+def _read_events_tail(events_path: Path, limit: int) -> tuple[list[dict], int, bool]:
+    """Read the tail of an append-only JSONL events log.
+
+    Returns ``(events, total, truncated)``.  ``events`` is at most ``limit``
+    lines, oldest-first.  ``total`` is the total number of non-blank lines in
+    the file (exact for the small-file path, exact for the large-file path
+    too — we do a separate fast newline-count pass).
+
+    Two paths:
+    - Small files (< ~1 MB): forward scan.  Cheap; gives an exact total for
+      free.  Defers ``json.loads`` to the bounded deque so we never parse a
+      line that's about to be dropped.
+    - Large files: seek to EOF and read backward in 64 KB chunks until we have
+      at least ``limit`` complete lines.  Parses only the tail.  ``total`` is
+      counted by a separate forward byte-scan that just counts newlines —
+      no JSON parse — so it stays cheap even for huge files.
+
+    Without these optimizations, mounting the chat for a long-running queen
+    with a ~50 k-event log used to spend most of its time inside ``json.loads``
+    on the server thread (and block the event loop while doing it).
+    """
+    from collections import deque
+
+    file_size = events_path.stat().st_size
+
+    if file_size <= _EVENTS_HISTORY_REVERSE_TAIL_THRESHOLD_BYTES:
+        tail_raw: deque[str] = deque(maxlen=limit)
+        total = 0
+        with open(events_path, encoding="utf-8") as f:
+            for line in f:
+                line = line.strip()
+                if not line:
+                    continue
+                total += 1
+                tail_raw.append(line)
+        events: list[dict] = []
+        for raw in tail_raw:
+            try:
+                events.append(json.loads(raw))
+            except json.JSONDecodeError:
+                continue
+        return events, total, total > len(events)
+
+    # Large-file path: read backward until we have enough lines.
+    import os as _os
+
+    chunk_size = _EVENTS_HISTORY_REVERSE_TAIL_CHUNK_BYTES
+    pieces: list[bytes] = []
+    newline_count = 0
+    with open(events_path, "rb") as fb:
+        fb.seek(0, _os.SEEK_END)
+        pos = fb.tell()
+        while pos > 0 and newline_count <= limit:
+            read_size = min(chunk_size, pos)
+            pos -= read_size
+            fb.seek(pos)
+            chunk = fb.read(read_size)
+            newline_count += chunk.count(b"\n")
+            pieces.append(chunk)
+    pieces.reverse()
+    blob = b"".join(pieces)
+
+    # Drop the leading partial line unless we read from offset 0.
+    raw_lines = blob.split(b"\n")
+    if pos > 0 and raw_lines:
+        raw_lines = raw_lines[1:]
+    decoded = [ln.decode("utf-8", errors="replace").strip() for ln in raw_lines]
+    decoded = [ln for ln in decoded if ln]
+    if len(decoded) > limit:
+        decoded = decoded[-limit:]
+
+    events = []
+    for raw in decoded:
+        try:
+            events.append(json.loads(raw))
+        except json.JSONDecodeError:
+            continue
+
+    # Separate fast pass for total: count newlines only, no JSON parse.
+    total = 0
+    with open(events_path, "rb") as fb:
+        while True:
+            chunk = fb.read(1 << 20)
+            if not chunk:
+                break
+            total += chunk.count(b"\n")
+    # File may end without a trailing newline; if so, the last non-empty line
+    # was missed. Count it.
+    if file_size > 0:
+        with open(events_path, "rb") as fb:
+            fb.seek(-1, _os.SEEK_END)
+            if fb.read(1) != b"\n":
+                total += 1
+
+    return events, total, total > len(events)
+

 async def handle_session_events_history(request: web.Request) -> web.Response:
    """GET /api/sessions/{session_id}/events/history — persisted eventbus log.
@@ -827,6 +931,9 @@ async def handle_session_events_history(request: web.Request) -> web.Response:
    recent N events". Long-running colonies have produced files with 50k+
    events; before this cap, restoring on page-mount shipped the whole thing
    down the wire and blocked the UI for seconds.
+
+    The actual file read runs in a worker thread via ``asyncio.to_thread`` so
+    it doesn't block the event loop while other requests are in flight.
    """
    session_id = request.match_info["session_id"]

@@ -852,24 +959,8 @@ async def handle_session_events_history(request: web.Request) -> web.Response:
            }
        )

-    # Tail the file using a bounded deque — O(limit) memory regardless
-    # of file size. No need to materialize the whole list only to slice it.
-    from collections import deque
-
-    tail: deque[dict] = deque(maxlen=limit)
-    total = 0
    try:
-        with open(events_path, encoding="utf-8") as f:
-            for line in f:
-                line = line.strip()
-                if not line:
-                    continue
-                try:
-                    evt = json.loads(line)
-                except json.JSONDecodeError:
-                    continue
-                total += 1
-                tail.append(evt)
+        events, total, truncated = await asyncio.to_thread(_read_events_tail, events_path, limit)
    except OSError:
        return web.json_response(
            {
@@ -882,14 +973,13 @@ async def handle_session_events_history(request: web.Request) -> web.Response:
            }
        )

-    events = list(tail)
    return web.json_response(
        {
            "events": events,
            "session_id": session_id,
            "total": total,
            "returned": len(events),
-            "truncated": total > len(events),
+            "truncated": truncated,
            "limit": limit,
        }
    )
@@ -180,7 +180,7 @@ def _colony_scope(manager: Any, colony_name: str) -> SkillScope | None:

    overrides_path = colony_home / "skills_overrides.json"
    store = SkillOverrideStore.load(overrides_path, scope_label=f"colony:{colony_name}")
-    write_dir = colony_home / ".hive" / "skills"
+    write_dir = colony_home / "skills"

    admin_manager = _build_admin_manager(queen_id=queen_id, colony_name=colony_name)

@@ -210,12 +210,13 @@ def _build_admin_manager(
    """Build a read-only SkillsManager for GET when no live session exists.

    Intentionally leaves ``project_root`` unset even for a colony: the
-    colony's ``.hive/skills/`` directory is surfaced via the ``colony_ui``
-    extra scope. Also routing it through ``project_root`` would double-
-    scan the same dir, and last-wins collision resolution would retag the
-    skills as ``source_scope="project"`` — which flips the provenance
-    fallback to ``PROJECT_DROPPED`` and drops ``editable`` to ``False``
-    for anything without an explicit override-store entry.
+    colony's ``skills/`` directory (and the legacy ``.hive/skills/`` for
+    pre-flatten colonies) is surfaced via the ``colony_ui`` extra scope.
+    Routing it through ``project_root`` would double-scan the same dir,
+    and last-wins collision resolution would retag the skills as
+    ``source_scope="project"`` — which flips the provenance fallback to
+    ``PROJECT_DROPPED`` and drops ``editable`` to ``False`` for anything
+    without an explicit override-store entry.
    """
    extras: list[ExtraScope] = []
    queen_overrides_path: Path | None = None
@@ -227,6 +228,10 @@ def _build_admin_manager(
    if colony_name:
        colony_home = COLONIES_DIR / colony_name
        colony_overrides_path = colony_home / "skills_overrides.json"
+        # Surface both the new flat path (where new skills are written) and
+        # the legacy nested path (left intact for pre-flatten colonies). UI
+        # writes always target the flat path; reads see both.
+        extras.append(ExtraScope(directory=colony_home / "skills", label="colony_ui", priority=3))
        extras.append(ExtraScope(directory=colony_home / ".hive" / "skills", label="colony_ui", priority=3))
    cfg = SkillsManagerConfig(
        queen_id=queen_id,
@@ -442,10 +447,18 @@ async def handle_list_all_skills(request: web.Request) -> web.Response:
        extras.append(ExtraScope(directory=QUEENS_DIR / qid / "skills", label="queen_ui", priority=2))
    # We intentionally don't plumb every colony's project_root into one
    # manager — discovery only allows a single project_root. For the
-    # aggregator we scan every colony's .hive/skills/ as a tagged extra
-    # scope instead. That keeps the xml-catalog-per-scope invariant
-    # intact without requiring N managers.
+    # aggregator we scan every colony's skills/ (and the legacy nested
+    # .hive/skills/ for pre-flatten colonies) as tagged extra scopes
+    # instead. That keeps the xml-catalog-per-scope invariant intact
+    # without requiring N managers.
    for cn in colony_names:
+        extras.append(
+            ExtraScope(
+                directory=COLONIES_DIR / cn / "skills",
+                label="colony_ui",
+                priority=3,
+            )
+        )
        extras.append(
            ExtraScope(
                directory=COLONIES_DIR / cn / ".hive" / "skills",
@@ -1918,73 +1918,38 @@ class SessionManager:
            if meta.get("colony_fork"):
                continue

-            # Build a quick preview of the last human/assistant exchange.
-            # We read all conversation parts, filter to client-facing messages,
-            # and return the last assistant message content as a snippet.
+            # Preview of the last client-facing exchange. Cached in
+            # ``summary.json`` next to ``meta.json`` so the sidebar doesn't
+            # have to rescan every part on each list call. The cache is
+            # written incrementally by FileConversationStore.write_part; if
+            # missing or stale (parts dir mtime newer than the summary file)
+            # we do a one-time full rebuild and write a fresh summary.
+            #
+            # NOTE on activity timestamps: the session directory's own mtime
+            # is NOT reliable as a "last activity" marker — POSIX dir mtime
+            # only updates when direct entries change, and conversation
+            # parts live under conversations/parts/, so writing a new part
+            # does not bubble up to the session dir.
+            from framework.storage import session_summary
+
            last_message: str | None = None
            message_count: int = 0
-            # Last-activity timestamp — mtime of the latest client-facing message.
-            # Falls back to session creation time for empty sessions. NOTE: the
-            # session directory's own mtime is NOT reliable here — POSIX dir mtime
-            # only updates when direct entries change, and conversation parts are
-            # nested under conversations/parts/, so writing a new part does not
-            # bubble up to the session dir.
            last_active_at: float = float(created_at) if isinstance(created_at, (int, float)) else 0.0
            convs_dir = d / "conversations"
+
+            summary: dict | None = None
            if convs_dir.exists():
-                try:
-                    all_parts: list[dict] = []
+                if session_summary.is_stale(d):
+                    summary = session_summary.rebuild_summary(d)
+                else:
+                    summary = session_summary.read_summary(d)

-                    def _collect_parts(parts_dir: Path, _dest: list[dict] = all_parts) -> None:
-                        if not parts_dir.exists():
-                            return
-                        for part_file in sorted(parts_dir.iterdir()):
-                            if part_file.suffix != ".json":
-                                continue
-                            try:
-                                part = json.loads(part_file.read_text(encoding="utf-8"))
-                                part.setdefault("created_at", part_file.stat().st_mtime)
-                                _dest.append(part)
-                            except (json.JSONDecodeError, OSError):
-                                continue
-
-                    # Flat layout: conversations/parts/*.json
-                    _collect_parts(convs_dir / "parts")
-                    # Node-based layout: conversations/<node_id>/parts/*.json
-                    for node_dir in convs_dir.iterdir():
-                        if not node_dir.is_dir() or node_dir.name == "parts":
-                            continue
-                        _collect_parts(node_dir / "parts")
-                    # Filter to client-facing messages only
-                    client_msgs = [
-                        p
-                        for p in all_parts
-                        if not p.get("is_transition_marker")
-                        and p.get("role") != "tool"
-                        and not (p.get("role") == "assistant" and p.get("tool_calls"))
-                    ]
-                    client_msgs.sort(key=lambda m: m.get("created_at", m.get("seq", 0)))
-                    message_count = len(client_msgs)
-                    # Take the latest message's timestamp as the activity marker.
-                    # _collect_parts sets created_at via setdefault to the part
-                    # file's mtime, so this is always a valid float.
-                    if client_msgs:
-                        latest_ts = client_msgs[-1].get("created_at")
-                        if isinstance(latest_ts, (int, float)) and latest_ts > last_active_at:
-                            last_active_at = float(latest_ts)
-                    # Last assistant message as preview snippet
-                    for msg in reversed(client_msgs):
-                        content = msg.get("content") or ""
-                        if isinstance(content, list):
-                            # Anthropic-style content blocks
-                            content = " ".join(
-                                b.get("text", "") for b in content if isinstance(b, dict) and b.get("type") == "text"
-                            )
-                        if content and msg.get("role") == "assistant":
-                            last_message = content[:120].strip()
-                            break
-                except OSError:
-                    pass
+            if summary is not None:
+                message_count = int(summary.get("message_count") or 0)
+                last_message = summary.get("last_message")
+                cached_active = summary.get("last_active_at")
+                if isinstance(cached_active, (int, float)) and cached_active > last_active_at:
+                    last_active_at = float(cached_active)

            # Derive queen_id from directory structure: queens/{queen_id}/sessions/{session_id}
            queen_id = d.parent.parent.name if d.parent.name == "sessions" else None
@@ -146,7 +146,7 @@ def write_skill(

    ``target_root`` is the parent scope dir (e.g.
    ``~/.hive/agents/queens/{id}/skills`` or
-    ``{colony_dir}/.hive/skills``). The function creates it if needed.
+    ``{colony_dir}/skills``). The function creates it if needed.

    Returns ``(installed_path, error, replaced)``. On success ``error`` is
    ``None``; on failure ``installed_path`` is ``None`` and the target is
@@ -0,0 +1,248 @@
+"""Sidecar summary cache for cold-session listings.
+
+Each queen session directory grows a ``summary.json`` file that mirrors the
+expensive-to-recompute fields surfaced by ``SessionManager.list_cold_sessions``:
+``message_count``, ``last_message`` snippet, and ``last_active_at``.
+
+Without this cache the queen-history sidebar reads **every** part file of
+**every** session on the disk for each list request. That cost grows with
+total messages across all sessions, not just the one being opened, and is
+visible whenever the user navigates to the session list.
+
+Update path: ``FileConversationStore.write_part`` calls ``update_summary``
+after each successful part write — best-effort, never blocks the caller on
+failure.
+
+Read path: ``list_cold_sessions`` reads ``summary.json`` and only falls back
+to a full part scan when the file is missing or stale (parts dir mtime newer
+than the summary). The rebuild path also writes a fresh summary, so the
+slow path is paid at most once per session per upgrade.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+from pathlib import Path
+from typing import Any
+
+from framework.utils.io import atomic_write
+
+logger = logging.getLogger(__name__)
+
+_SUMMARY_FILENAME = "summary.json"
+_LAST_MESSAGE_MAX_CHARS = 120
+
+
+def is_client_facing(part: dict[str, Any]) -> bool:
+    """Whether this part appears in the client-visible chat list.
+
+    Mirrors the predicate in ``SessionManager.list_cold_sessions`` so the
+    cached counts agree with a full rebuild.
+    """
+    if part.get("is_transition_marker"):
+        return False
+    role = part.get("role")
+    if role == "tool":
+        return False
+    if role == "assistant" and part.get("tool_calls"):
+        return False
+    return True
+
+
+def _extract_text(content: Any) -> str:
+    """Render a part's ``content`` field as a flat string for the snippet."""
+    if isinstance(content, str):
+        return content
+    if isinstance(content, list):
+        # Anthropic-style content blocks: [{"type": "text", "text": "..."}]
+        return " ".join(b.get("text", "") for b in content if isinstance(b, dict) and b.get("type") == "text")
+    return ""
+
+
+def _summary_path(session_dir: Path) -> Path:
+    return session_dir / _SUMMARY_FILENAME
+
+
+def read_summary(session_dir: Path) -> dict | None:
+    """Return the cached summary dict, or ``None`` if missing/corrupt."""
+    path = _summary_path(session_dir)
+    if not path.exists():
+        return None
+    try:
+        return json.loads(path.read_text(encoding="utf-8"))
+    except (json.JSONDecodeError, OSError):
+        return None
+
+
+def is_stale(session_dir: Path) -> bool:
+    """True when the summary is missing or older than the latest part write.
+
+    Compares ``summary.json`` mtime against ``conversations/parts/`` (and
+    any node-based ``conversations/<node>/parts/``) directory mtime.
+    POSIX dir mtime updates whenever entries are added, so a new part flush
+    bumps the parts-dir mtime above the summary's.
+    """
+    summary_path = _summary_path(session_dir)
+    if not summary_path.exists():
+        return True
+    try:
+        summary_mtime = summary_path.stat().st_mtime
+    except OSError:
+        return True
+
+    convs_dir = session_dir / "conversations"
+    if not convs_dir.exists():
+        return False
+
+    candidate_dirs: list[Path] = [convs_dir / "parts"]
+    try:
+        for child in convs_dir.iterdir():
+            if child.is_dir() and child.name != "parts":
+                candidate_dirs.append(child / "parts")
+    except OSError:
+        return False
+
+    for d in candidate_dirs:
+        if not d.exists():
+            continue
+        try:
+            if d.stat().st_mtime > summary_mtime + 0.001:
+                return True
+        except OSError:
+            continue
+    return False
+
+
+def _write_summary(session_dir: Path, data: dict) -> None:
+    path = _summary_path(session_dir)
+    try:
+        path.parent.mkdir(parents=True, exist_ok=True)
+        with atomic_write(path) as f:
+            json.dump(data, f)
+    except OSError:
+        logger.debug("session_summary: failed to write %s", path, exc_info=True)
+
+
+def update_summary(session_dir: Path, part: dict[str, Any]) -> None:
+    """Incrementally fold ``part`` into the cached summary.
+
+    Best-effort; swallows errors so the part-write path is never broken by
+    a summary failure. Reads the prior summary, mutates a few fields, and
+    writes back atomically.
+
+    Only client-facing parts (see :func:`is_client_facing`) bump the count
+    and the ``last_message`` snippet — tool calls and transition markers
+    are persisted but not surfaced in the sidebar.
+    """
+    try:
+        if not is_client_facing(part):
+            return
+
+        existing = read_summary(session_dir) or {}
+        message_count = int(existing.get("message_count") or 0) + 1
+        last_active_at = float(existing.get("last_active_at") or 0.0)
+        last_message = existing.get("last_message")
+
+        # Prefer an explicit timestamp on the part; fall back to the current
+        # summary's most-recent activity. Parts also carry ``seq`` which is
+        # monotonic per-session, but seq is not a wall-clock — keep both.
+        part_ts = part.get("created_at")
+        if isinstance(part_ts, (int, float)) and part_ts > last_active_at:
+            last_active_at = float(part_ts)
+
+        # Update the snippet with the latest assistant message; user messages
+        # don't replace it, matching the existing list_cold_sessions behavior
+        # (it scans backward for the last assistant message).
+        if part.get("role") == "assistant":
+            text = _extract_text(part.get("content")).strip()
+            if text:
+                last_message = text[:_LAST_MESSAGE_MAX_CHARS]
+
+        last_part_seq = part.get("seq")
+        if last_part_seq is None:
+            last_part_seq = existing.get("last_part_seq")
+
+        _write_summary(
+            session_dir,
+            {
+                "message_count": message_count,
+                "last_message": last_message,
+                "last_active_at": last_active_at,
+                "last_part_seq": last_part_seq,
+            },
+        )
+    except Exception:
+        logger.debug("session_summary: update_summary failed", exc_info=True)
+
+
+def rebuild_summary(session_dir: Path) -> dict | None:
+    """Full-scan rebuild — reads every part file and recomputes the summary.
+
+    Returns the rebuilt dict and writes it to ``summary.json``. Returns
+    ``None`` when the conversations directory is absent (no parts yet).
+
+    Used by ``list_cold_sessions`` as the migration / fallback path when
+    the cache is missing or stale.
+    """
+    convs_dir = session_dir / "conversations"
+    if not convs_dir.exists():
+        return None
+
+    all_parts: list[dict] = []
+
+    def _collect(parts_dir: Path) -> None:
+        if not parts_dir.exists():
+            return
+        try:
+            for part_file in sorted(parts_dir.iterdir()):
+                if part_file.suffix != ".json":
+                    continue
+                try:
+                    part = json.loads(part_file.read_text(encoding="utf-8"))
+                    part.setdefault("created_at", part_file.stat().st_mtime)
+                    all_parts.append(part)
+                except (json.JSONDecodeError, OSError):
+                    continue
+        except OSError:
+            return
+
+    _collect(convs_dir / "parts")
+    try:
+        for node_dir in convs_dir.iterdir():
+            if node_dir.is_dir() and node_dir.name != "parts":
+                _collect(node_dir / "parts")
+    except OSError:
+        pass
+
+    client_msgs = [p for p in all_parts if is_client_facing(p)]
+    client_msgs.sort(key=lambda m: m.get("created_at", m.get("seq", 0)))
+
+    last_active_at = 0.0
+    last_message: str | None = None
+    if client_msgs:
+        latest_ts = client_msgs[-1].get("created_at")
+        if isinstance(latest_ts, (int, float)):
+            last_active_at = float(latest_ts)
+        for msg in reversed(client_msgs):
+            if msg.get("role") != "assistant":
+                continue
+            text = _extract_text(msg.get("content")).strip()
+            if text:
+                last_message = text[:_LAST_MESSAGE_MAX_CHARS]
+                break
+
+    last_part_seq = None
+    if all_parts:
+        seqs = [p.get("seq") for p in all_parts if isinstance(p.get("seq"), int)]
+        if seqs:
+            last_part_seq = max(seqs)
+
+    summary = {
+        "message_count": len(client_msgs),
+        "last_message": last_message,
+        "last_active_at": last_active_at,
+        "last_part_seq": last_part_seq,
+    }
+    _write_summary(session_dir, summary)
+    return summary
@@ -1532,10 +1532,10 @@ def register_queen_lifecycle_tools(
    # the skill content INLINE as tool arguments (skill_name,
    # skill_description, skill_body, and optional skill_files for
    # supporting scripts/references). The tool materializes the skill
-    # folder under ``~/.hive/colonies/{colony_name}/.hive/skills/{name}/``
-    # itself — colony-scoped, discovered as project scope by the
-    # colony's worker and invisible to every other colony on the
-    # machine — then forks.
+    # folder under ``~/.hive/colonies/{colony_name}/skills/{name}/``
+    # itself — colony-scoped (surfaced as ``colony_ui`` to that
+    # colony's workers, invisible to every other colony on the
+    # machine) — then forks.
    #
    # Why inline instead of a pre-authored folder path: earlier versions
    # required the queen to write SKILL.md with her own write_file tool
@@ -1653,7 +1653,7 @@ def register_queen_lifecycle_tools(
        The queen passes skill content inline: ``skill_name``,
        ``skill_description``, ``skill_body``, and optional
        ``skill_files`` (supporting scripts/references). The tool
-        writes ``~/.hive/colonies/{colony_name}/.hive/skills/{skill_name}/``
+        writes ``~/.hive/colonies/{colony_name}/skills/{skill_name}/``
        (colony-scoped, only this colony's workers see it), then forks
        the queen session into that colony directory and stores the
        task in ``worker.json``. NOTHING RUNS after fork.
@@ -1739,7 +1739,7 @@ def register_queen_lifecycle_tools(

        installed_skill, write_err, skill_replaced = write_skill(
            draft,
-            target_root=colony_dir / ".hive" / "skills",
+            target_root=colony_dir / "skills",
            replace_existing=True,
        )
        if write_err is not None or installed_skill is None:
@@ -1803,9 +1803,9 @@ def register_queen_lifecycle_tools(
        # Fork the queen session into the colony directory. The fork
        # copies conversations + writes worker.json + metadata.json.
        # NO worker runs after this call. The new colony's worker
-        # inherits ~/.hive/skills/ on first run (whenever the user
-        # actually starts it), so the freshly installed skill is
-        # discoverable then.
+        # picks up its colony-scoped ``skills/`` directory (where we
+        # just wrote the skill) on first run via the ``colony_ui``
+        # extra scope, plus the usual user-scope ~/.hive/skills/.
        try:
            from framework.server.routes_execution import fork_session_into_colony
        except Exception as e:
@@ -1965,8 +1965,8 @@ def register_queen_lifecycle_tools(
            "ATOMIC CALL: you pass the skill content INLINE as "
            "arguments (skill_name, skill_description, skill_body, "
            "optional skill_files). The tool writes the folder at "
-            "~/.hive/colonies/{colony_name}/.hive/skills/{skill_name}/ "
-            "— scoped to THIS colony only (project scope); no other "
+            "~/.hive/colonies/{colony_name}/skills/{skill_name}/ "
+            "— scoped to THIS colony only; no other "
            "colony on the machine can see it. Do NOT write the folder "
            "yourself with write_file; folders hand-authored at "
            "~/.hive/skills/ are user-scoped and LEAK to every colony. "
@@ -1979,7 +1979,8 @@ def register_queen_lifecycle_tools(
            "worker.json. No worker is started. The user navigates to "
            "the new colony when they're ready (or wires up a "
            "trigger); at that point the worker reads the task from "
-            "worker.json and the skill from ~/.hive/skills/, and "
+            "worker.json and the skill from "
+            "~/.hive/colonies/{colony_name}/skills/, and "
            "starts informed instead of clueless.\n\n"
            "WHY THE SKILL IS REQUIRED: a fresh worker running "
            "unattended has zero memory of your chat with the user. "
@@ -3,6 +3,7 @@ import {
  extractLastPhase,
  sseEventToChatMessage,
  formatAgentDisplayName,
+  newTokenAccumulator,
  replayEventsToMessages,
 } from "./chat-helpers";
 import type { AgentEvent } from "@/api/types";
@@ -675,6 +676,126 @@ describe("formatAgentDisplayName", () => {
 // extractLastPhase
 // ---------------------------------------------------------------------------

+// ---------------------------------------------------------------------------
+// TokenAccumulator (folded into replayEventsToMessages)
+// ---------------------------------------------------------------------------
+
+describe("replayEventsToMessages tokenAccumulator", () => {
+  it("sums llm_turn_complete payloads in a single pass", () => {
+    const events = [
+      makeEvent({
+        type: "llm_turn_complete",
+        stream_id: "queen",
+        node_id: "queen",
+        execution_id: "exec-1",
+        data: {
+          input_tokens: 100,
+          output_tokens: 50,
+          cached_tokens: 10,
+          cache_creation_tokens: 5,
+          cost_usd: 0.0015,
+        },
+      }),
+      makeEvent({
+        type: "llm_turn_complete",
+        stream_id: "queen",
+        node_id: "queen",
+        execution_id: "exec-2",
+        data: {
+          input_tokens: 200,
+          output_tokens: 75,
+          cached_tokens: 20,
+          cache_creation_tokens: 0,
+          cost_usd: 0.003,
+        },
+      }),
+    ];
+
+    const tokens = newTokenAccumulator();
+    replayEventsToMessages(
+      events,
+      "queen-dm",
+      "Alexandra",
+      undefined,
+      undefined,
+      tokens,
+    );
+
+    expect(tokens.input).toBe(300);
+    expect(tokens.output).toBe(125);
+    expect(tokens.cached).toBe(30);
+    expect(tokens.cacheCreated).toBe(5);
+    expect(tokens.costUsd).toBeCloseTo(0.0045, 5);
+  });
+
+  it("does not mutate the accumulator when no llm_turn_complete events", () => {
+    const events = [
+      makeEvent({
+        type: "execution_started",
+        stream_id: "queen",
+        node_id: "queen",
+        execution_id: "exec-1",
+      }),
+    ];
+    const tokens = newTokenAccumulator();
+    replayEventsToMessages(
+      events,
+      "queen-dm",
+      "Alexandra",
+      undefined,
+      undefined,
+      tokens,
+    );
+    expect(tokens.input).toBe(0);
+    expect(tokens.costUsd).toBe(0);
+  });
+
+  it("treats missing token fields as zero", () => {
+    const events = [
+      makeEvent({
+        type: "llm_turn_complete",
+        stream_id: "queen",
+        node_id: "queen",
+        execution_id: "exec-1",
+        data: { input_tokens: 50 }, // only one field set
+      }),
+    ];
+    const tokens = newTokenAccumulator();
+    replayEventsToMessages(
+      events,
+      "queen-dm",
+      "Alexandra",
+      undefined,
+      undefined,
+      tokens,
+    );
+    expect(tokens.input).toBe(50);
+    expect(tokens.output).toBe(0);
+    expect(tokens.cached).toBe(0);
+    expect(tokens.cacheCreated).toBe(0);
+    expect(tokens.costUsd).toBe(0);
+  });
+
+  it("is a no-op when accumulator is omitted", () => {
+    const events = [
+      makeEvent({
+        type: "llm_turn_complete",
+        stream_id: "queen",
+        node_id: "queen",
+        execution_id: "exec-1",
+        data: { input_tokens: 100 },
+      }),
+    ];
+    // Should not throw, and should return messages normally.
+    const restored = replayEventsToMessages(events, "queen-dm", "Alexandra");
+    expect(Array.isArray(restored)).toBe(true);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// extractLastPhase
+// ---------------------------------------------------------------------------
+
 describe("extractLastPhase", () => {
  it("keeps incubating as a valid queen phase", () => {
    expect(
@@ -282,6 +282,26 @@ export function newReplayState(): ReplayState {
  };
 }

+/**
+ * Token / cost accumulator for cold-restore.
+ *
+ * Folded into ``replayEventsToMessages`` so callers don't need a second
+ * pass over the event array just to sum ``llm_turn_complete`` payloads.
+ * The accumulator object is mutated in place — pass a fresh one in,
+ * read its fields out after the call.
+ */
+export interface TokenAccumulator {
+  input: number;
+  output: number;
+  cached: number;
+  cacheCreated: number;
+  costUsd: number;
+}
+
+export function newTokenAccumulator(): TokenAccumulator {
+  return { input: 0, output: 0, cached: 0, cacheCreated: 0, costUsd: 0 };
+}
+
 function toolLookupKey(
  streamId: string,
  executionId: string | null | undefined,
@@ -470,6 +490,7 @@ export function replayEventsToMessages(
  agentDisplayName: string | undefined,
  queenDisplayName?: string,
  state: ReplayState = newReplayState(),
+  tokenAccumulator?: TokenAccumulator,
 ): ChatMessage[] {
  // Upsert by id — later emissions for the same pill replace earlier ones.
  const byId = new Map<string, ChatMessage>();
@@ -482,6 +503,18 @@ export function replayEventsToMessages(
  const inheritedIds = new Set<string>();

  for (const evt of events) {
+    // Fold the token-usage sum into this same loop so cold-restore
+    // doesn't need a second pass over the event array. SSE does not
+    // replay llm_turn_complete (see routes_events.py _REPLAY_TYPES) so
+    // there's no double-count risk against later live updates.
+    if (tokenAccumulator && evt.type === "llm_turn_complete" && evt.data) {
+      const d = evt.data as Record<string, unknown>;
+      tokenAccumulator.input += (d.input_tokens as number) || 0;
+      tokenAccumulator.output += (d.output_tokens as number) || 0;
+      tokenAccumulator.cached += (d.cached_tokens as number) || 0;
+      tokenAccumulator.cacheCreated += (d.cache_creation_tokens as number) || 0;
+      tokenAccumulator.costUsd += (d.cost_usd as number) || 0;
+    }
    if (evt.type === "colony_fork_marker") {
      if (markerEvent === null) {
        markerEvent = evt;
@@ -14,6 +14,7 @@ import { usePendingQueue } from "@/hooks/use-pending-queue";
 import type { AgentEvent, HistorySession } from "@/api/types";
 import {
  newReplayState,
+  newTokenAccumulator,
  replayEvent,
  replayEventsToMessages,
 } from "@/lib/chat-helpers";
@@ -196,32 +197,25 @@ export default function QueenDM() {

        // Use the stateful replay so tool_status pills are synthesized
        // the same way the live SSE handler does — without this the
-        // refreshed queen DM shows zero tool activity.
+        // refreshed queen DM shows zero tool activity. The token
+        // accumulator folds the llm_turn_complete sum into the same
+        // pass so we don't iterate the (potentially large) event array
+        // twice. SSE does not replay llm_turn_complete (see
+        // routes_events.py _REPLAY_TYPES), so no double-count risk —
+        // live SSE deltas that may have already landed are kept via the
+        // functional merge below.
        const replayState = newReplayState();
+        const seed = newTokenAccumulator();
        const restored = replayEventsToMessages(
          events,
          "queen-dm",
          queenName,
          undefined,
          replayState,
+          seed,
        );
        replayStateRef.current = replayState;

-        // Sum historical llm_turn_complete events so Tokens/Cost carry over
-        // across resume. SSE does not replay llm_turn_complete (see
-        // routes_events.py _REPLAY_TYPES), so no double-count risk — live
-        // SSE deltas that may have already landed are kept via functional
-        // merge below.
-        const seed = { input: 0, output: 0, cached: 0, cacheCreated: 0, costUsd: 0 };
-        for (const evt of events) {
-          if (evt.type !== "llm_turn_complete" || !evt.data) continue;
-          const d = evt.data as Record<string, unknown>;
-          seed.input += (d.input_tokens as number) || 0;
-          seed.output += (d.output_tokens as number) || 0;
-          seed.cached += (d.cached_tokens as number) || 0;
-          seed.cacheCreated += (d.cache_creation_tokens as number) || 0;
-          seed.costUsd += (d.cost_usd as number) || 0;
-        }
        if (!cancelled()) {
          setTokenUsage((prev) => ({
            input: prev.input + seed.input,
@@ -0,0 +1,65 @@
+"""Tests for ``compaction_status.sweep_stale_in_progress``.
+
+The sweep runs at server boot and rewrites orphaned ``in_progress`` markers
+to ``failed``. Without it, any colony whose compaction was interrupted by a
+server crash would block its first cold-load for the full ``await_completion``
+timeout before falling through.
+"""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+from framework.server import compaction_status
+
+
+def _seed(queens_root: Path, queen: str, session: str, status: str) -> Path:
+    sd = queens_root / queen / "sessions" / session
+    sd.mkdir(parents=True)
+    (sd / "compaction_status.json").write_text(
+        json.dumps({"status": status}),
+        encoding="utf-8",
+    )
+    return sd
+
+
+def test_sweep_missing_root_is_noop(tmp_path: Path) -> None:
+    cleaned = compaction_status.sweep_stale_in_progress(tmp_path / "nope")
+    assert cleaned == 0
+
+
+def test_sweep_clears_in_progress_markers(tmp_path: Path) -> None:
+    sd = _seed(tmp_path, "alpha", "session_1", "in_progress")
+    cleaned = compaction_status.sweep_stale_in_progress(tmp_path)
+    assert cleaned == 1
+    final = compaction_status.get_status(sd)
+    assert final is not None
+    assert final["status"] == "failed"
+    assert "server restarted" in final.get("error", "")
+
+
+def test_sweep_leaves_done_and_failed_alone(tmp_path: Path) -> None:
+    done_dir = _seed(tmp_path, "alpha", "s_done", "done")
+    failed_dir = _seed(tmp_path, "alpha", "s_failed", "failed")
+    cleaned = compaction_status.sweep_stale_in_progress(tmp_path)
+    assert cleaned == 0
+    assert compaction_status.get_status(done_dir)["status"] == "done"
+    assert compaction_status.get_status(failed_dir)["status"] == "failed"
+
+
+def test_sweep_handles_multiple_queens(tmp_path: Path) -> None:
+    _seed(tmp_path, "alpha", "s_a", "in_progress")
+    _seed(tmp_path, "beta", "s_b", "in_progress")
+    _seed(tmp_path, "gamma", "s_c", "done")
+    cleaned = compaction_status.sweep_stale_in_progress(tmp_path)
+    assert cleaned == 2
+
+
+def test_sweep_skips_sessions_without_marker(tmp_path: Path) -> None:
+    # Session dir exists but no compaction_status.json
+    sd = tmp_path / "alpha" / "sessions" / "s_clean"
+    sd.mkdir(parents=True)
+    cleaned = compaction_status.sweep_stale_in_progress(tmp_path)
+    assert cleaned == 0
+    assert not (sd / "compaction_status.json").exists()
@@ -5,13 +5,13 @@ Contract (atomic inline-skill flow):
 The queen calls ``create_colony(colony_name, task, skill_name,
 skill_description, skill_body, skill_files?, tasks?)`` in a single
 call. The tool materializes
-``~/.hive/colonies/{colony_name}/.hive/skills/{skill_name}/`` from the
+``~/.hive/colonies/{colony_name}/skills/{skill_name}/`` from the
 inline content (writing SKILL.md and any supporting files), then forks
 the queen session into that colony. The skill is **colony-scoped** —
-discovered as project scope by that colony's workers, invisible to
-every other colony on the machine. Reusing an existing skill name
-inside the colony simply replaces the old skill — the queen owns her
-skill namespace inside the colony.
+surfaced via the ``colony_ui`` extra scope to that colony's workers,
+invisible to every other colony on the machine. Reusing an existing
+skill name inside the colony simply replaces the old skill — the queen
+owns her skill namespace inside the colony.

 We monkeypatch ``fork_session_into_colony`` so the test doesn't need a
 real queen / session directory. We also redirect ``$HOME`` so the test's
@@ -72,8 +72,8 @@ def patched_home(tmp_path, monkeypatch):


 def _colony_skill_path(home: Path, colony_name: str, skill_name: str) -> Path:
-    """Where the tool now materializes the skill (colony-scoped project dir)."""
-    return home / ".hive" / "colonies" / colony_name / ".hive" / "skills" / skill_name
+    """Where the tool now materializes the skill (colony-scoped, flat layout)."""
+    return home / ".hive" / "colonies" / colony_name / "skills" / skill_name


@pytest.fixture
@@ -218,7 +218,7 @@ async def test_colony_inherits_queen_override_state(patched_home: Path, patched_

@pytest.mark.asyncio
 async def test_happy_path_materializes_skill_under_colony_dir(patched_home: Path, patched_fork: list[dict]) -> None:
-    """Inline skill content is written to ~/.hive/colonies/{colony}/.hive/skills/{name}/."""
+    """Inline skill content is written to ~/.hive/colonies/{colony}/skills/{name}/."""
    executor, session = _make_executor()

    description = (
@@ -314,8 +314,8 @@ async def test_two_colonies_do_not_share_skill_namespace(patched_home: Path, pat
    )
    assert payload_b.get("status") == "created", payload_b

-    alpha_dir = patched_home / ".hive" / "colonies" / "alpha" / ".hive" / "skills"
-    bravo_dir = patched_home / ".hive" / "colonies" / "bravo" / ".hive" / "skills"
+    alpha_dir = patched_home / ".hive" / "colonies" / "alpha" / "skills"
+    bravo_dir = patched_home / ".hive" / "colonies" / "bravo" / "skills"
    user_skills = patched_home / ".hive" / "skills"

    # Each colony only contains its own skill
@@ -676,7 +676,7 @@ async def test_triggers_invalid_cron_fails_before_fork(patched_home: Path, patch
    assert "cron" in payload["error"]
    # Fork was not called, skill not materialized.
    assert len(patched_fork) == 0
-    assert not (patched_home / ".hive" / "colonies" / "bad_cron" / ".hive" / "skills" / "skill").exists()
+    assert not (patched_home / ".hive" / "colonies" / "bad_cron" / "skills" / "skill").exists()


@pytest.mark.asyncio
@@ -0,0 +1,135 @@
+"""Tests for ``_read_events_tail`` — the events.jsonl tail reader.
+
+Covers both the small-file forward-scan path and the large-file
+reverse-tail path. Verifies tail correctness, total count, and the
+``truncated`` flag.
+"""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+import pytest
+
+from framework.server.routes_sessions import (
+    _EVENTS_HISTORY_REVERSE_TAIL_THRESHOLD_BYTES,
+    _read_events_tail,
+)
+
+
+def _write_jsonl(path: Path, count: int, *, line_padding: int = 0) -> None:
+    """Write ``count`` JSON objects to ``path``, one per line.
+
+    Each object is ``{"i": <index>}`` plus optional padding to control file
+    size for testing the path threshold.
+    """
+    pad = "x" * line_padding
+    with open(path, "w", encoding="utf-8") as f:
+        for i in range(count):
+            obj = {"i": i, "pad": pad} if pad else {"i": i}
+            f.write(json.dumps(obj) + "\n")
+
+
+def test_empty_file(tmp_path: Path) -> None:
+    p = tmp_path / "events.jsonl"
+    p.write_text("", encoding="utf-8")
+    events, total, truncated = _read_events_tail(p, limit=2000)
+    assert events == []
+    assert total == 0
+    assert truncated is False
+
+
+def test_small_file_under_limit(tmp_path: Path) -> None:
+    p = tmp_path / "events.jsonl"
+    _write_jsonl(p, count=5)
+    events, total, truncated = _read_events_tail(p, limit=2000)
+    assert [e["i"] for e in events] == [0, 1, 2, 3, 4]
+    assert total == 5
+    assert truncated is False
+
+
+def test_small_file_over_limit_returns_tail(tmp_path: Path) -> None:
+    p = tmp_path / "events.jsonl"
+    _write_jsonl(p, count=100)
+    events, total, truncated = _read_events_tail(p, limit=10)
+    assert [e["i"] for e in events] == list(range(90, 100))
+    assert total == 100
+    assert truncated is True
+
+
+def test_blank_lines_ignored(tmp_path: Path) -> None:
+    p = tmp_path / "events.jsonl"
+    with open(p, "w", encoding="utf-8") as f:
+        f.write('{"i": 0}\n')
+        f.write("\n")
+        f.write('{"i": 1}\n')
+        f.write("   \n")
+        f.write('{"i": 2}\n')
+    events, total, _ = _read_events_tail(p, limit=2000)
+    assert [e["i"] for e in events] == [0, 1, 2]
+    assert total == 3
+
+
+def test_no_trailing_newline_small(tmp_path: Path) -> None:
+    p = tmp_path / "events.jsonl"
+    with open(p, "w", encoding="utf-8") as f:
+        f.write('{"i": 0}\n{"i": 1}')
+    events, total, _ = _read_events_tail(p, limit=2000)
+    assert [e["i"] for e in events] == [0, 1]
+    assert total == 2
+
+
+def test_large_file_uses_reverse_tail(tmp_path: Path) -> None:
+    p = tmp_path / "events.jsonl"
+    # Pad each line so the file exceeds the reverse-tail threshold even with
+    # a modest event count. This forces the reverse-tail code path.
+    bytes_per_line = 4096
+    line_count = (_EVENTS_HISTORY_REVERSE_TAIL_THRESHOLD_BYTES // bytes_per_line) + 50
+    _write_jsonl(p, count=line_count, line_padding=bytes_per_line - 64)
+    assert p.stat().st_size > _EVENTS_HISTORY_REVERSE_TAIL_THRESHOLD_BYTES
+
+    events, total, truncated = _read_events_tail(p, limit=10)
+    assert [e["i"] for e in events] == list(range(line_count - 10, line_count))
+    assert total == line_count
+    assert truncated is True
+
+
+def test_large_file_no_trailing_newline(tmp_path: Path) -> None:
+    p = tmp_path / "events.jsonl"
+    bytes_per_line = 4096
+    line_count = (_EVENTS_HISTORY_REVERSE_TAIL_THRESHOLD_BYTES // bytes_per_line) + 5
+    pad = "x" * (bytes_per_line - 64)
+    with open(p, "w", encoding="utf-8") as f:
+        for i in range(line_count - 1):
+            f.write(json.dumps({"i": i, "pad": pad}) + "\n")
+        # Last line, no trailing newline.
+        f.write(json.dumps({"i": line_count - 1, "pad": pad}))
+    assert p.stat().st_size > _EVENTS_HISTORY_REVERSE_TAIL_THRESHOLD_BYTES
+
+    events, total, _ = _read_events_tail(p, limit=3)
+    assert [e["i"] for e in events] == [line_count - 3, line_count - 2, line_count - 1]
+    assert total == line_count
+
+
+def test_large_file_limit_larger_than_file(tmp_path: Path) -> None:
+    p = tmp_path / "events.jsonl"
+    bytes_per_line = 4096
+    line_count = (_EVENTS_HISTORY_REVERSE_TAIL_THRESHOLD_BYTES // bytes_per_line) + 3
+    _write_jsonl(p, count=line_count, line_padding=bytes_per_line - 64)
+    assert p.stat().st_size > _EVENTS_HISTORY_REVERSE_TAIL_THRESHOLD_BYTES
+
+    events, total, truncated = _read_events_tail(p, limit=line_count + 100)
+    assert [e["i"] for e in events] == list(range(line_count))
+    assert total == line_count
+    assert truncated is False
+
+
+@pytest.mark.parametrize("limit", [1, 2, 7])
+def test_small_path_various_limits(tmp_path: Path, limit: int) -> None:
+    p = tmp_path / "events.jsonl"
+    _write_jsonl(p, count=20)
+    events, total, truncated = _read_events_tail(p, limit=limit)
+    assert [e["i"] for e in events] == list(range(20 - limit, 20))
+    assert total == 20
+    assert truncated is True
@@ -189,6 +189,42 @@ async def test_upload_zip_bundle_places_in_queen_scope(client: TestClient, _seed
    assert (skill_dir / "scripts" / "helper.py").exists()


+async def test_create_colony_skill_writes_to_flat_path(client: TestClient, _seed_colony) -> None:
+    """POSTing a new colony skill must write to the flat ``colonies/{name}/skills/``
+    layout, not the legacy nested ``.hive/skills/`` path.
+    """
+    payload = {
+        "name": "new-flat-skill",
+        "description": "Created via UI",
+        "body": "## Body\nstuff\n",
+        "enabled": True,
+    }
+    resp = await client.post("/api/colonies/research_one/skills", json=payload)
+    assert resp.status == 201
+
+    flat_md = _seed_colony / "skills" / "new-flat-skill" / "SKILL.md"
+    nested_md = _seed_colony / ".hive" / "skills" / "new-flat-skill" / "SKILL.md"
+    assert flat_md.exists(), "new colony skill should land in flat skills/ dir"
+    assert not nested_md.exists(), "new colony skill must NOT land in legacy nested .hive/skills/"
+
+
+async def test_legacy_nested_colony_skill_still_lists(client: TestClient, _seed_colony) -> None:
+    """Pre-flatten colonies keep their skills under ``colonies/{name}/.hive/skills/``.
+    They must continue to surface in GET responses.
+    """
+    skill_dir = _seed_colony / ".hive" / "skills" / "legacy-flat-test"
+    skill_dir.mkdir(parents=True)
+    (skill_dir / "SKILL.md").write_text(
+        "---\nname: legacy-flat-test\ndescription: Legacy nested\n---\n\nbody\n",
+        encoding="utf-8",
+    )
+
+    resp = await client.get("/api/colonies/research_one/skills")
+    assert resp.status == 200
+    rows = {r["name"]: r for r in (await resp.json())["skills"]}
+    assert "legacy-flat-test" in rows
+
+
 async def test_patch_does_not_mislabel_legacy_colony_skill_as_framework(client: TestClient, _seed_colony) -> None:
    """Regression: toggling a legacy colony skill (no ledger entry yet)
    must not stamp provenance=FRAMEWORK on the new entry. Before the fix,
@@ -0,0 +1,149 @@
+"""Tests for the session summary sidecar cache."""
+
+from __future__ import annotations
+
+import json
+import time
+from pathlib import Path
+
+from framework.storage import session_summary
+
+
+def _make_session_dir(tmp_path: Path) -> Path:
+    d = tmp_path / "session_x"
+    (d / "conversations" / "parts").mkdir(parents=True)
+    return d
+
+
+def _write_part(session_dir: Path, seq: int, data: dict) -> None:
+    parts_dir = session_dir / "conversations" / "parts"
+    p = parts_dir / f"{seq:010d}.json"
+    p.write_text(json.dumps(data), encoding="utf-8")
+
+
+def test_is_client_facing() -> None:
+    assert session_summary.is_client_facing({"role": "user", "content": "hi"})
+    assert session_summary.is_client_facing({"role": "assistant", "content": "ok"})
+    assert not session_summary.is_client_facing({"role": "tool", "content": "x"})
+    assert not session_summary.is_client_facing(
+        {"role": "assistant", "content": "", "tool_calls": [{"id": "1"}]}
+    )
+    assert not session_summary.is_client_facing({"is_transition_marker": True})
+
+
+def test_rebuild_empty_session(tmp_path: Path) -> None:
+    d = _make_session_dir(tmp_path)
+    summary = session_summary.rebuild_summary(d)
+    assert summary is not None
+    assert summary["message_count"] == 0
+    assert summary["last_message"] is None
+    assert summary["last_active_at"] == 0.0
+    # Persisted to disk
+    assert (d / "summary.json").exists()
+
+
+def test_rebuild_counts_only_client_facing(tmp_path: Path) -> None:
+    d = _make_session_dir(tmp_path)
+    _write_part(d, 0, {"role": "user", "content": "hello", "seq": 0, "created_at": 1.0})
+    _write_part(d, 1, {"role": "assistant", "content": "", "tool_calls": [{"id": "1"}], "seq": 1, "created_at": 2.0})
+    _write_part(d, 2, {"role": "tool", "content": "result", "seq": 2, "created_at": 3.0})
+    _write_part(d, 3, {"role": "assistant", "content": "answer", "seq": 3, "created_at": 4.0})
+
+    summary = session_summary.rebuild_summary(d)
+    assert summary["message_count"] == 2  # user + final assistant
+    assert summary["last_message"] == "answer"
+    assert summary["last_active_at"] == 4.0
+    assert summary["last_part_seq"] == 3
+
+
+def test_rebuild_truncates_long_message(tmp_path: Path) -> None:
+    d = _make_session_dir(tmp_path)
+    long = "a" * 500
+    _write_part(d, 0, {"role": "assistant", "content": long, "seq": 0, "created_at": 1.0})
+    summary = session_summary.rebuild_summary(d)
+    assert summary["last_message"] is not None
+    assert len(summary["last_message"]) == 120
+
+
+def test_rebuild_handles_anthropic_content_blocks(tmp_path: Path) -> None:
+    d = _make_session_dir(tmp_path)
+    _write_part(
+        d,
+        0,
+        {
+            "role": "assistant",
+            "content": [
+                {"type": "text", "text": "block one"},
+                {"type": "text", "text": "block two"},
+            ],
+            "seq": 0,
+            "created_at": 1.0,
+        },
+    )
+    summary = session_summary.rebuild_summary(d)
+    assert summary["last_message"] == "block one block two"
+
+
+def test_update_summary_increments(tmp_path: Path) -> None:
+    d = _make_session_dir(tmp_path)
+    session_summary.update_summary(d, {"role": "user", "content": "hi", "seq": 0, "created_at": 1.0})
+    s = session_summary.read_summary(d)
+    assert s["message_count"] == 1
+    assert s["last_active_at"] == 1.0
+
+    session_summary.update_summary(d, {"role": "assistant", "content": "ok", "seq": 1, "created_at": 2.0})
+    s = session_summary.read_summary(d)
+    assert s["message_count"] == 2
+    assert s["last_message"] == "ok"
+    assert s["last_active_at"] == 2.0
+
+
+def test_update_summary_skips_non_client_parts(tmp_path: Path) -> None:
+    d = _make_session_dir(tmp_path)
+    session_summary.update_summary(d, {"role": "tool", "content": "x", "seq": 0, "created_at": 1.0})
+    session_summary.update_summary(
+        d, {"role": "assistant", "content": "", "tool_calls": [{"id": "x"}], "seq": 1, "created_at": 2.0}
+    )
+    # Neither part bumps the count or creates a summary file
+    assert session_summary.read_summary(d) is None
+
+
+def test_update_summary_user_message_keeps_prior_assistant_snippet(tmp_path: Path) -> None:
+    d = _make_session_dir(tmp_path)
+    session_summary.update_summary(d, {"role": "assistant", "content": "answer", "seq": 0, "created_at": 1.0})
+    session_summary.update_summary(d, {"role": "user", "content": "next q", "seq": 1, "created_at": 2.0})
+    s = session_summary.read_summary(d)
+    assert s["message_count"] == 2
+    # Last assistant snippet preserved through a user message
+    assert s["last_message"] == "answer"
+    assert s["last_active_at"] == 2.0
+
+
+def test_is_stale_when_missing(tmp_path: Path) -> None:
+    d = _make_session_dir(tmp_path)
+    assert session_summary.is_stale(d) is True
+
+
+def test_is_stale_after_part_write(tmp_path: Path) -> None:
+    d = _make_session_dir(tmp_path)
+    # Create a summary, then add a part with a newer mtime.
+    session_summary.update_summary(d, {"role": "assistant", "content": "x", "seq": 0, "created_at": 1.0})
+    assert session_summary.is_stale(d) is False
+
+    time.sleep(0.05)
+    _write_part(d, 1, {"role": "assistant", "content": "y", "seq": 1})
+    assert session_summary.is_stale(d) is True
+
+
+def test_rebuild_picks_up_node_based_layout(tmp_path: Path) -> None:
+    d = tmp_path / "sess"
+    (d / "conversations" / "node_a" / "parts").mkdir(parents=True)
+    (d / "conversations" / "node_b" / "parts").mkdir(parents=True)
+    p1 = d / "conversations" / "node_a" / "parts" / "0000000000.json"
+    p1.write_text(json.dumps({"role": "user", "content": "hi", "seq": 0, "created_at": 1.0}))
+    p2 = d / "conversations" / "node_b" / "parts" / "0000000001.json"
+    p2.write_text(json.dumps({"role": "assistant", "content": "yo", "seq": 1, "created_at": 2.0}))
+
+    summary = session_summary.rebuild_summary(d)
+    assert summary["message_count"] == 2
+    assert summary["last_message"] == "yo"