Merge branch 'feature/colony-sqlite' into feature/clean-context

2026-04-17 04:12:35 -07:00
parent 6be026fcb1 b50f237506
commit dde4dfaec9
44 changed files with 3414 additions and 492 deletions
@@ -0,0 +1,11 @@
+import json
+
+with open('/home/timothy/aden/hive/x_rapid_ledger.json', 'r') as f:
+    data = json.load(f)
+
+data['replies'].append({
+    'original_preview': 'Alright, I give in. Here’s my picture with the boss, courtesy of @johnkrausphotos. Oh, and hook ‘em!'
+})
+
+with open('/home/timothy/aden/hive/x_rapid_ledger.json', 'w') as f:
+    json.dump(data, f, indent=2)
@@ -0,0 +1,11 @@
+import json, sys
+
+with open('/home/timothy/aden/hive/x_rapid_ledger.json', 'r') as f:
+    ledger = json.load(f)
+
+text = sys.argv[1]
+for r in ledger['replies']:
+    if r.get('original_preview') == text:
+        print("YES")
+        sys.exit(0)
+print("NO")
@@ -184,8 +184,16 @@ _QUEEN_INDEPENDENT_TOOLS = [
    "search_files",
    "run_command",
    "undo_changes",
-    # Parallel fan-out (Phase 4 unified ColonyRuntime)
-    "run_parallel_workers",
+    # NOTE (2026-04-16): ``run_parallel_workers`` was removed from the
+    # independent phase. The queen's pure DM mode is for conversation
+    # with the user; spawning workers from here puts their activity
+    # into a chat surface that's supposed to stay queen↔user only.
+    # Users who want to fan out parallel work should (a) use
+    # ``create_colony`` to fork into a persistent colony (where
+    # worker activity has its own page), or (b) load an agent via
+    # build/stage and use ``run_parallel_workers`` in the running
+    # phase where a worker context already exists.
+    #
    # Fork this session into a persistent colony for headless /
    # recurring / background work that needs to keep running in
    # parallel to (or after) this chat.
@@ -631,6 +631,43 @@ class ColonyRuntime:
        spawn_tools = tools if tools is not None else self._tools
        spawn_executor = tool_executor or self._tool_executor

+        # Colony progress tracker: when the caller supplied a db_path
+        # in input_data, this worker is part of a SQLite task queue
+        # and must see the hive.colony-progress-tracker skill body in
+        # its system prompt from turn 0. Rebuild the catalog with the
+        # skill pre-activated; falls back to the colony default when
+        # no db_path is present.
+        _spawn_catalog = self.skills_catalog_prompt
+        _spawn_skill_dirs = self.skill_dirs
+        if isinstance(input_data, dict) and input_data.get("db_path"):
+            try:
+                from framework.skills.config import SkillsConfig
+                from framework.skills.manager import SkillsManager, SkillsManagerConfig
+
+                _pre = SkillsManager(
+                    SkillsManagerConfig(
+                        skills_config=SkillsConfig.from_agent_vars(
+                            skills=["hive.colony-progress-tracker"],
+                        ),
+                    )
+                )
+                _pre.load()
+                _spawn_catalog = _pre.skills_catalog_prompt
+                _spawn_skill_dirs = list(_pre.allowlisted_dirs) if hasattr(_pre, "allowlisted_dirs") else self.skill_dirs
+                logger.info(
+                    "spawn: pre-activated hive.colony-progress-tracker "
+                    "(catalog %d → %d chars) for worker with db_path=%s",
+                    len(self.skills_catalog_prompt),
+                    len(_spawn_catalog),
+                    input_data.get("db_path"),
+                )
+            except Exception as exc:
+                logger.warning(
+                    "spawn: failed to pre-activate colony-progress-tracker "
+                    "skill, falling back to base catalog: %s",
+                    exc,
+                )
+
        # Resolve the SSE stream_id once. When the caller didn't supply
        # one we use the per-worker fan-out tag (filtered out by the
        # SSE handler). When the caller passed an explicit value we
@@ -685,9 +722,9 @@ class ColonyRuntime:
                llm=self._llm,
                available_tools=list(spawn_tools),
                accounts_prompt=self._accounts_prompt,
-                skills_catalog_prompt=self.skills_catalog_prompt,
+                skills_catalog_prompt=_spawn_catalog,
                protocols_prompt=self.protocols_prompt,
-                skill_dirs=self.skill_dirs,
+                skill_dirs=_spawn_skill_dirs,
                execution_id=worker_id,
                stream_id=explicit_stream_id or f"worker:{worker_id}",
            )
@@ -720,6 +757,8 @@ class ColonyRuntime:
    async def spawn_batch(
        self,
        tasks: list[dict[str, Any]],
+        *,
+        tools_override: list[Any] | None = None,
    ) -> list[str]:
        """Spawn a batch of parallel workers, one per task spec.

@@ -732,6 +771,12 @@ class ColonyRuntime:
        The overseer's ``run_parallel_workers`` tool is the usual
        caller; it pairs ``spawn_batch`` + ``wait_for_worker_reports``
        into a single fan-out/fan-in primitive.
+
+        When ``tools_override`` is supplied, every spawned worker
+        receives that tool list instead of the colony's default.  Used
+        by ``run_parallel_workers`` to drop tools whose credentials
+        failed the pre-flight check (so the spawned workers don't
+        waste a startup trying to use them).
        """
        worker_ids: list[str] = []
        for spec in tasks:
@@ -743,6 +788,7 @@ class ColonyRuntime:
                task=task_text,
                count=1,
                input_data=task_data or {"task": task_text},
+                tools=tools_override,
            )
            worker_ids.extend(ids)
        return worker_ids
@@ -0,0 +1,491 @@
+"""Per-colony SQLite task queue + progress ledger.
+
+Every colony gets its own ``progress.db`` under ``~/.hive/colonies/{name}/data/``.
+The DB holds the colony's task queue plus per-task step and SOP checklist
+rows. Workers claim tasks atomically, write progress as they execute, and
+verify SOP gates before marking a task done. This gives cross-run memory
+that the existing per-iteration stall detectors don't have.
+
+The DB is driven by agents via the ``sqlite3`` CLI through
+``execute_command_tool``. This module handles framework-side lifecycle:
+creation, migration, queen-side bulk seeding, stale-claim reclamation.
+
+Concurrency model:
+- WAL mode on from day one so 100 concurrent workers don't serialize.
+- Workers hold NO long-running connection — they ``sqlite3`` per call,
+  which naturally releases locks between LLM turns.
+- Atomic claim via ``BEGIN IMMEDIATE; UPDATE tasks SET status='claimed'
+  WHERE id=(SELECT ... LIMIT 1)``. The subquery-form UPDATE runs inside
+  the immediate transaction so racers either win the row or find zero
+  affected rows.
+- Stale-claim reclaimer runs on host startup: claims older than
+  ``stale_after_minutes`` get returned to ``pending`` and the row's
+  ``retry_count`` increments. When ``retry_count >= max_retries`` the
+  row is moved to ``failed`` instead.
+
+All writes go through ``BEGIN IMMEDIATE`` so racing readers see
+consistent snapshots.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import sqlite3
+import uuid
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any
+
+logger = logging.getLogger(__name__)
+
+SCHEMA_VERSION = 1
+
+_SCHEMA_V1 = """
+CREATE TABLE IF NOT EXISTS tasks (
+    id              TEXT PRIMARY KEY,
+    seq             INTEGER,
+    priority        INTEGER NOT NULL DEFAULT 0,
+    goal            TEXT NOT NULL,
+    payload         TEXT,
+    status          TEXT NOT NULL DEFAULT 'pending',
+    worker_id       TEXT,
+    claim_token     TEXT,
+    claimed_at      TEXT,
+    started_at      TEXT,
+    completed_at    TEXT,
+    created_at      TEXT NOT NULL,
+    updated_at      TEXT NOT NULL,
+    retry_count     INTEGER NOT NULL DEFAULT 0,
+    max_retries     INTEGER NOT NULL DEFAULT 3,
+    last_error      TEXT,
+    parent_task_id  TEXT REFERENCES tasks(id) ON DELETE SET NULL,
+    source          TEXT
+);
+
+CREATE TABLE IF NOT EXISTS steps (
+    id              TEXT PRIMARY KEY,
+    task_id         TEXT NOT NULL REFERENCES tasks(id) ON DELETE CASCADE,
+    seq             INTEGER NOT NULL,
+    title           TEXT NOT NULL,
+    detail          TEXT,
+    status          TEXT NOT NULL DEFAULT 'pending',
+    evidence        TEXT,
+    worker_id       TEXT,
+    started_at      TEXT,
+    completed_at    TEXT,
+    UNIQUE (task_id, seq)
+);
+
+CREATE TABLE IF NOT EXISTS sop_checklist (
+    id              TEXT PRIMARY KEY,
+    task_id         TEXT NOT NULL REFERENCES tasks(id) ON DELETE CASCADE,
+    key             TEXT NOT NULL,
+    description     TEXT NOT NULL,
+    required        INTEGER NOT NULL DEFAULT 1,
+    done_at         TEXT,
+    done_by         TEXT,
+    note            TEXT,
+    UNIQUE (task_id, key)
+);
+
+CREATE TABLE IF NOT EXISTS colony_meta (
+    key             TEXT PRIMARY KEY,
+    value           TEXT NOT NULL,
+    updated_at      TEXT NOT NULL
+);
+
+CREATE INDEX IF NOT EXISTS idx_tasks_claimable
+    ON tasks(status, priority DESC, seq, created_at)
+    WHERE status = 'pending';
+
+CREATE INDEX IF NOT EXISTS idx_steps_task_seq
+    ON steps(task_id, seq);
+
+CREATE INDEX IF NOT EXISTS idx_sop_required_open
+    ON sop_checklist(task_id, required, done_at);
+
+CREATE INDEX IF NOT EXISTS idx_tasks_status
+    ON tasks(status, updated_at);
+"""
+
+_PRAGMAS = (
+    "PRAGMA journal_mode = WAL;",
+    "PRAGMA synchronous = NORMAL;",
+    "PRAGMA foreign_keys = ON;",
+    "PRAGMA busy_timeout = 5000;",
+)
+
+
+def _now_iso() -> str:
+    return datetime.now(timezone.utc).isoformat(timespec="seconds")
+
+
+def _new_id() -> str:
+    return str(uuid.uuid4())
+
+
+def _connect(db_path: Path) -> sqlite3.Connection:
+    """Open a connection with the standard pragmas applied.
+
+    WAL mode is sticky on the file once set, so re-applying on every
+    open is cheap. The other pragmas are per-connection and must be
+    set each time.
+    """
+    con = sqlite3.connect(str(db_path), isolation_level=None, timeout=5.0)
+    for pragma in _PRAGMAS:
+        con.execute(pragma)
+    return con
+
+
+def ensure_progress_db(colony_dir: Path) -> Path:
+    """Create or migrate ``{colony_dir}/data/progress.db``.
+
+    Idempotent: safe to call on an already-initialized DB. Returns the
+    absolute path to the DB file.
+
+    Steps:
+    1. Ensure ``data/`` subdir exists.
+    2. Open the DB (creates the file if missing).
+    3. Apply WAL + pragmas.
+    4. Read ``PRAGMA user_version``; if < SCHEMA_VERSION, run the
+       schema block and bump user_version.
+    5. Reclaim any stale claims left from previous runs.
+    6. Patch every ``*.json`` worker config in the colony dir to
+       inject ``input_data.db_path`` and ``input_data.colony_id`` so
+       pre-existing colonies (forked before this feature landed) get
+       the tracker wiring on their next spawn.
+    """
+    data_dir = Path(colony_dir) / "data"
+    data_dir.mkdir(parents=True, exist_ok=True)
+    db_path = data_dir / "progress.db"
+
+    con = _connect(db_path)
+    try:
+        current_version = con.execute("PRAGMA user_version").fetchone()[0]
+        if current_version < SCHEMA_VERSION:
+            con.executescript(_SCHEMA_V1)
+            con.execute(f"PRAGMA user_version = {SCHEMA_VERSION}")
+            con.execute(
+                "INSERT OR REPLACE INTO colony_meta(key, value, updated_at) "
+                "VALUES (?, ?, ?)",
+                ("schema_version", str(SCHEMA_VERSION), _now_iso()),
+            )
+            logger.info(
+                "progress_db: initialized schema v%d at %s", SCHEMA_VERSION, db_path
+            )
+
+        reclaimed = _reclaim_stale_inner(con, stale_after_minutes=15)
+        if reclaimed:
+            logger.info(
+                "progress_db: reclaimed %d stale claims at startup (%s)",
+                reclaimed,
+                db_path,
+            )
+    finally:
+        con.close()
+
+    resolved_db_path = db_path.resolve()
+    _patch_worker_configs(Path(colony_dir), resolved_db_path)
+    return resolved_db_path
+
+
+def _patch_worker_configs(colony_dir: Path, db_path: Path) -> int:
+    """Inject ``input_data.db_path`` + ``input_data.colony_id`` into
+    existing ``worker.json`` files in a colony directory.
+
+    Runs on every ``ensure_progress_db`` call so colonies that were
+    forked before this feature landed get their worker spawn messages
+    patched in place. Idempotent: if ``input_data`` already contains
+    the correct ``db_path``, the file is not rewritten.
+
+    Returns the number of files that were actually modified (0 on
+    the common case of already-patched colonies).
+    """
+    colony_id = colony_dir.name
+    abs_db = str(db_path)
+    patched = 0
+
+    for worker_cfg in colony_dir.glob("*.json"):
+        # Only patch files that look like worker configs (have the
+        # worker_meta shape). ``metadata.json`` and ``triggers.json``
+        # are colony-level and must not be touched.
+        if worker_cfg.name in ("metadata.json", "triggers.json"):
+            continue
+        try:
+            data = json.loads(worker_cfg.read_text(encoding="utf-8"))
+        except (json.JSONDecodeError, OSError):
+            continue
+        if not isinstance(data, dict) or "system_prompt" not in data:
+            # Not a worker config (lacks the worker_meta schema).
+            continue
+
+        input_data = data.get("input_data")
+        if not isinstance(input_data, dict):
+            input_data = {}
+
+        if (
+            input_data.get("db_path") == abs_db
+            and input_data.get("colony_id") == colony_id
+        ):
+            continue  # already patched
+
+        input_data["db_path"] = abs_db
+        input_data["colony_id"] = colony_id
+        data["input_data"] = input_data
+
+        try:
+            worker_cfg.write_text(
+                json.dumps(data, indent=2, ensure_ascii=False), encoding="utf-8"
+            )
+            patched += 1
+        except OSError as e:
+            logger.warning(
+                "progress_db: failed to patch worker config %s: %s", worker_cfg, e
+            )
+
+    if patched:
+        logger.info(
+            "progress_db: patched %d worker config(s) in colony '%s' with db_path",
+            patched,
+            colony_id,
+        )
+    return patched
+
+
+def ensure_all_colony_dbs(colonies_root: Path | None = None) -> list[Path]:
+    """Idempotently ensure every existing colony has a progress.db.
+
+    Called on framework host startup to backfill older colonies and
+    run the stale-claim reclaimer on all of them in one pass.
+    """
+    if colonies_root is None:
+        colonies_root = Path.home() / ".hive" / "colonies"
+    if not colonies_root.is_dir():
+        return []
+
+    initialized: list[Path] = []
+    for entry in sorted(colonies_root.iterdir()):
+        if not entry.is_dir():
+            continue
+        try:
+            initialized.append(ensure_progress_db(entry))
+        except Exception as e:
+            logger.warning(
+                "progress_db: failed to ensure DB for colony '%s': %s", entry.name, e
+            )
+    return initialized
+
+
+def seed_tasks(
+    db_path: Path,
+    tasks: list[dict[str, Any]],
+    *,
+    source: str = "queen_create",
+) -> list[str]:
+    """Bulk-insert tasks (with optional nested steps + sop_items).
+
+    Each task dict accepts:
+      - goal: str (required)
+      - seq: int (optional ordering hint)
+      - priority: int (default 0)
+      - payload: dict | str | None (stored as JSON text)
+      - max_retries: int (default 3)
+      - parent_task_id: str | None
+      - steps: list[{"title": str, "detail"?: str}] (optional)
+      - sop_items: list[{"key": str, "description": str, "required"?: bool, "note"?: str}] (optional)
+
+    All rows are inserted in a single BEGIN IMMEDIATE transaction so
+    10k-row seeds finish in one disk flush. Returns the created task ids
+    in the same order as input.
+    """
+    if not tasks:
+        return []
+
+    created_ids: list[str] = []
+    now = _now_iso()
+    con = _connect(Path(db_path))
+    try:
+        con.execute("BEGIN IMMEDIATE")
+        for idx, task in enumerate(tasks):
+            goal = task.get("goal")
+            if not goal:
+                raise ValueError(f"task[{idx}] missing required 'goal' field")
+
+            task_id = task.get("id") or _new_id()
+            payload = task.get("payload")
+            if payload is not None and not isinstance(payload, str):
+                payload = json.dumps(payload, ensure_ascii=False)
+
+            con.execute(
+                """
+                INSERT INTO tasks (
+                    id, seq, priority, goal, payload, status,
+                    created_at, updated_at, max_retries, parent_task_id, source
+                ) VALUES (?, ?, ?, ?, ?, 'pending', ?, ?, ?, ?, ?)
+                """,
+                (
+                    task_id,
+                    task.get("seq"),
+                    int(task.get("priority", 0)),
+                    goal,
+                    payload,
+                    now,
+                    now,
+                    int(task.get("max_retries", 3)),
+                    task.get("parent_task_id"),
+                    source,
+                ),
+            )
+
+            for step_seq, step in enumerate(task.get("steps") or [], start=1):
+                if not step.get("title"):
+                    raise ValueError(
+                        f"task[{idx}].steps[{step_seq - 1}] missing required 'title'"
+                    )
+                con.execute(
+                    """
+                    INSERT INTO steps (id, task_id, seq, title, detail, status)
+                    VALUES (?, ?, ?, ?, ?, 'pending')
+                    """,
+                    (
+                        _new_id(),
+                        task_id,
+                        step.get("seq", step_seq),
+                        step["title"],
+                        step.get("detail"),
+                    ),
+                )
+
+            for sop in task.get("sop_items") or []:
+                key = sop.get("key")
+                description = sop.get("description")
+                if not key or not description:
+                    raise ValueError(
+                        f"task[{idx}].sop_items missing 'key' or 'description'"
+                    )
+                con.execute(
+                    """
+                    INSERT INTO sop_checklist
+                        (id, task_id, key, description, required, note)
+                    VALUES (?, ?, ?, ?, ?, ?)
+                    """,
+                    (
+                        _new_id(),
+                        task_id,
+                        key,
+                        description,
+                        1 if sop.get("required", True) else 0,
+                        sop.get("note"),
+                    ),
+                )
+
+            created_ids.append(task_id)
+
+        con.execute("COMMIT")
+    except Exception:
+        con.execute("ROLLBACK")
+        raise
+    finally:
+        con.close()
+
+    return created_ids
+
+
+def enqueue_task(
+    db_path: Path,
+    goal: str,
+    *,
+    steps: list[dict[str, Any]] | None = None,
+    sop_items: list[dict[str, Any]] | None = None,
+    payload: Any = None,
+    priority: int = 0,
+    parent_task_id: str | None = None,
+    source: str = "enqueue_tool",
+) -> str:
+    """Append a single task to an existing queue. Thin wrapper over seed_tasks."""
+    ids = seed_tasks(
+        db_path,
+        [
+            {
+                "goal": goal,
+                "steps": steps,
+                "sop_items": sop_items,
+                "payload": payload,
+                "priority": priority,
+                "parent_task_id": parent_task_id,
+            }
+        ],
+        source=source,
+    )
+    return ids[0]
+
+
+def _reclaim_stale_inner(
+    con: sqlite3.Connection, *, stale_after_minutes: int
+) -> int:
+    """Reclaim stale claims. Runs inside an existing open connection.
+
+    Two-step:
+    1. Tasks past max_retries go to 'failed' with last_error populated.
+    2. Remaining stale claims return to 'pending', retry_count++.
+    """
+    cutoff_expr = f"datetime('now', '-{int(stale_after_minutes)} minutes')"
+
+    con.execute("BEGIN IMMEDIATE")
+    try:
+        con.execute(
+            f"""
+            UPDATE tasks
+            SET status = 'failed',
+                last_error = COALESCE(last_error, 'exceeded max_retries after stale claim'),
+                completed_at = datetime('now'),
+                updated_at = datetime('now')
+            WHERE status IN ('claimed', 'in_progress')
+              AND claimed_at IS NOT NULL
+              AND claimed_at < {cutoff_expr}
+              AND retry_count >= max_retries
+            """
+        )
+
+        cur = con.execute(
+            f"""
+            UPDATE tasks
+            SET status = 'pending',
+                worker_id = NULL,
+                claim_token = NULL,
+                claimed_at = NULL,
+                started_at = NULL,
+                retry_count = retry_count + 1,
+                updated_at = datetime('now')
+            WHERE status IN ('claimed', 'in_progress')
+              AND claimed_at IS NOT NULL
+              AND claimed_at < {cutoff_expr}
+              AND retry_count < max_retries
+            """
+        )
+        reclaimed = cur.rowcount or 0
+        con.execute("COMMIT")
+        return reclaimed
+    except Exception:
+        con.execute("ROLLBACK")
+        raise
+
+
+def reclaim_stale(db_path: Path, stale_after_minutes: int = 15) -> int:
+    """Public wrapper that opens its own connection."""
+    con = _connect(Path(db_path))
+    try:
+        return _reclaim_stale_inner(con, stale_after_minutes=stale_after_minutes)
+    finally:
+        con.close()
+
+
+__all__ = [
+    "SCHEMA_VERSION",
+    "ensure_progress_db",
+    "ensure_all_colony_dbs",
+    "seed_tasks",
+    "enqueue_task",
+    "reclaim_stale",
+]
@@ -1404,7 +1404,18 @@ class AgentLoader:
            credential_store=credential_store,
        )
        runner._agent_default_skills = None
-        runner._agent_skills = None
+        # Colony workers attached to a SQLite task queue get the
+        # colony-progress-tracker skill pre-activated so its full
+        # claim / step / SOP-gate protocol lands in the system prompt
+        # on turn 0, bypassing the progressive-disclosure catalog
+        # lookup. Triggered by the presence of ``input_data.db_path``
+        # in worker.json (written by fork_session_into_colony and
+        # backfilled by ensure_progress_db for pre-existing colonies).
+        _preactivate: list[str] = []
+        _input_data = first_worker.get("input_data") or {}
+        if isinstance(_input_data, dict) and _input_data.get("db_path"):
+            _preactivate.append("hive.colony-progress-tracker")
+        runner._agent_skills = _preactivate or None
        return runner

    def register_tool(
@@ -497,12 +497,22 @@ class ToolRegistry:
            config["cwd"] = str(resolved_cwd)
            return config

-        # For coder_tools_server, inject --project-root so writes go to the expected workspace
+        # For coder_tools_server, inject --project-root so reads land
+        # in the expected workspace (hive repo, for framework skills
+        # and docs), and inject --write-root so writes land under
+        # ~/.hive/workspace/ instead of polluting the git checkout
+        # with queen-authored skills, ledgers, and scripts. Without
+        # the split, every ``write_file`` call from the queen landed
+        # in the hive repo root.
        if script_name and "coder_tools" in script_name:
            project_root = str(resolved_cwd.parent.resolve())
            args = list(args)
            if "--project-root" not in args:
                args.extend(["--project-root", project_root])
+            if "--write-root" not in args:
+                _write_root = Path.home() / ".hive" / "workspace"
+                _write_root.mkdir(parents=True, exist_ok=True)
+                args.extend(["--write-root", str(_write_root)])
            config["args"] = args

        if os.name == "nt":
@@ -51,13 +51,18 @@ DEFAULT_EVENT_TYPES = [
 # Keepalive interval in seconds
 KEEPALIVE_INTERVAL = 15.0

-# Phase 5 SSE filter: parallel-worker streams (stream_id="worker:{uuid}")
-# publish high-frequency LLM deltas / tool calls that would flood the
-# user's queen DM chat. We let only this small allowlist of worker
-# events through to the queen-chat SSE so the frontend can render
-# fan-out lifecycle and structured fan-in reports without seeing the
-# raw worker chatter. Per-worker SSE panels (Phase 5b) bypass this
-# filter via a dedicated /workers/{worker_id}/events route.
+# Session-SSE worker filter: workers run outside the queen's DM
+# chat. Worker activity is observable via the dedicated
+# ``/api/workers/{worker_id}/events`` per-worker SSE route, not via
+# the session chat. This keeps the queen↔user conversation clean of
+# tool-call chatter regardless of whether the worker was spawned by
+# ``run_agent_with_input`` (stream_id="worker") or
+# ``run_parallel_workers`` (stream_id="worker:{uuid}").
+#
+# Lifecycle events the frontend needs for fan-in summaries
+# (SUBAGENT_REPORT, EXECUTION_COMPLETED, EXECUTION_FAILED) are still
+# allowed through so the queen can show "N workers done" surfaces
+# without exposing the per-turn chatter.
 _WORKER_EVENT_ALLOWLIST = {
    EventType.SUBAGENT_REPORT.value,
    EventType.EXECUTION_COMPLETED.value,
@@ -66,9 +71,17 @@ _WORKER_EVENT_ALLOWLIST = {


 def _is_worker_noise(evt_dict: dict) -> bool:
-    """True if the event is a parallel-worker event we should drop."""
+    """True if the event belongs to a worker stream and should not
+    surface in the queen DM chat.
+
+    Matches any stream starting with ``worker`` — both the bare
+    ``"worker"`` tag used by single-worker spawns and the
+    ``"worker:{uuid}"`` tag used by parallel fan-outs. The allowlist
+    carves out the three terminal/lifecycle events the UI still
+    needs to render fan-in summaries.
+    """
    stream_id = evt_dict.get("stream_id") or ""
-    if not stream_id.startswith("worker:"):
+    if not stream_id.startswith("worker"):
        return False
    return evt_dict.get("type") not in _WORKER_EVENT_ALLOWLIST

@@ -644,6 +644,7 @@ async def handle_colony_spawn(request: web.Request) -> web.Response:
    body = await request.json()
    colony_name = body.get("colony_name", "").strip()
    task = body.get("task", "").strip()
+    tasks = body.get("tasks")

    if not colony_name:
        return web.json_response({"error": "colony_name is required"}, status=400)
@@ -661,6 +662,7 @@ async def handle_colony_spawn(request: web.Request) -> web.Response:
            session=session,
            colony_name=colony_name,
            task=task,
+            tasks=tasks if isinstance(tasks, list) else None,
        )
    except Exception as e:
        logger.exception("colony_spawn fork failed")
@@ -674,6 +676,7 @@ async def fork_session_into_colony(
    session: Any,
    colony_name: str,
    task: str,
+    tasks: list[dict] | None = None,
 ) -> dict:
    """Fork a queen session into a colony directory.

@@ -690,8 +693,14 @@ async def fork_session_into_colony(
       the colony resumes with the queen's entire conversation history.
    3. Multiple independent sessions can be created against the same colony,
       giving parallel execution capacity without separate worker configs.
+    4. Initializes (or ensures) ``data/progress.db`` — the colony's SQLite
+       task queue + progress ledger. When *tasks* is provided, the queen-
+       authored task batch is seeded into the queue in one transaction.
+       The absolute DB path is threaded into the worker's ``input_data``
+       so spawned workers see it in their first user message.

-    Returns ``{"colony_path", "colony_name", "queen_session_id", "is_new"}``.
+    Returns ``{"colony_path", "colony_name", "queen_session_id", "is_new",
+              "db_path", "task_ids"}``.
    """
    import asyncio
    import json
@@ -700,7 +709,8 @@ async def fork_session_into_colony(
    from pathlib import Path

    from framework.agent_loop.agent_loop import AgentLoop, LoopConfig
-    from framework.agent_loop.types import AgentContext
+    from framework.agent_loop.types import AgentContext, AgentSpec
+    from framework.host.progress_db import ensure_progress_db, seed_tasks
    from framework.server.session_manager import _queen_session_dir

    queen_loop: AgentLoop = session.queen_executor.node_registry["queen"]
@@ -711,6 +721,49 @@ async def fork_session_into_colony(
    colony_dir.mkdir(parents=True, exist_ok=True)
    (colony_dir / "data").mkdir(exist_ok=True)

+    # ── 0. Ensure the colony's progress DB exists and seed tasks ──
+    # Runs before worker.json is written so the DB path can be threaded
+    # into input_data. Idempotent on reruns of the same colony name.
+    db_path = await asyncio.to_thread(ensure_progress_db, colony_dir)
+    seeded_task_ids: list[str] = []
+    if tasks:
+        seeded_task_ids = await asyncio.to_thread(
+            seed_tasks, db_path, tasks, source="queen_create"
+        )
+        logger.info(
+            "progress_db: seeded %d task(s) into colony '%s'",
+            len(seeded_task_ids),
+            colony_name,
+        )
+    elif task and task.strip():
+        # Phase 2 auto-seed: when the queen uses the simple single-task
+        # form of create_colony (no explicit ``tasks=[{...}]`` list),
+        # insert exactly one row so the first worker spawned into this
+        # colony has something to claim. Without this the queue is
+        # empty and the worker falls back to executing from the chat
+        # spawn message, defeating the cross-run durability the tracker
+        # exists for.
+        try:
+            seeded_task_ids = await asyncio.to_thread(
+                seed_tasks,
+                db_path,
+                [{"goal": task.strip()}],
+                source="create_colony_auto",
+            )
+            logger.info(
+                "progress_db: auto-seeded 1 task into colony '%s' "
+                "(task_id=%s, from single-task create_colony form)",
+                colony_name,
+                seeded_task_ids[0] if seeded_task_ids else "?",
+            )
+        except Exception as exc:
+            logger.warning(
+                "progress_db: auto-seed failed for colony '%s' (continuing "
+                "without a pre-seeded row): %s",
+                colony_name,
+                exc,
+            )
+
    # Fixed worker name -- sessions are the unit of parallelism, not workers
    worker_name = "worker"

@@ -772,10 +825,26 @@ async def fork_session_into_colony(
    # worker is not Charlotte / Alexandra / etc., it is a task executor.
    # Inheriting the queen's persona made the worker greet the user in
    # first person with no memory of the task it was actually given.
+    # Thread the first seeded task_id into input_data so the worker's
+    # first claim pins to a specific row (skill's assigned-task-id
+    # branch). When multiple tasks were seeded we only pin the first —
+    # subsequent workers (via run_agent_with_input or parallel spawns)
+    # get their own task_id assigned at spawn time.
+    _worker_input_data: dict[str, Any] = {
+        "db_path": str(db_path),
+        "colony_id": colony_name,
+    }
+    if seeded_task_ids:
+        _worker_input_data["task_id"] = seeded_task_ids[0]
+
    worker_meta = {
        "name": worker_name,
        "version": "1.0.0",
        "description": f"Worker clone from queen session {session.id}",
+        # Colony progress tracker: worker sees these in its first user
+        # message via _format_spawn_task_message.  The colony-progress-
+        # tracker default skill teaches the worker how to use them.
+        "input_data": _worker_input_data,
        "goal": {
            "description": worker_task,
            "success_criteria": [],
@@ -907,6 +976,8 @@ async def fork_session_into_colony(
        "colony_name": colony_name,
        "queen_session_id": colony_session_id,
        "is_new": is_new,
+        "db_path": str(db_path),
+        "task_ids": seeded_task_ids,
    }


@@ -686,6 +686,10 @@ async def handle_session_colonies(request: web.Request) -> web.Response:
    return web.json_response({"colonies": colonies})


+_EVENTS_HISTORY_DEFAULT_LIMIT = 2000
+_EVENTS_HISTORY_MAX_LIMIT = 10000
+
+
 async def handle_session_events_history(request: web.Request) -> web.Response:
    """GET /api/sessions/{session_id}/events/history — persisted eventbus log.

@@ -693,17 +697,58 @@ async def handle_session_events_history(request: web.Request) -> web.Response:
    both live sessions and cold (post-server-restart) sessions.  The frontend
    replays these events through ``sseEventToChatMessage`` to fully reconstruct
    the UI state on resume.
+
+    Query params:
+        limit: maximum number of events to return (default 2000, max 10000).
+            The TAIL of the file is returned — i.e. the most recent N events.
+            Older events are dropped and ``truncated`` is set to True.
+
+    Response shape::
+
+        {
+            "events": [...],          # up to ``limit`` events, oldest-first
+            "session_id": "...",
+            "total": 12345,           # total events in the file
+            "returned": 2000,         # len(events)
+            "truncated": true,        # total > returned
+            "limit": 2000,            # the effective limit used
+        }
+
+    ``events.jsonl`` is append-only chronological, so "last N lines" == "most
+    recent N events". Long-running colonies have produced files with 50k+
+    events; before this cap, restoring on page-mount shipped the whole thing
+    down the wire and blocked the UI for seconds.
    """
    session_id = request.match_info["session_id"]

+    try:
+        limit = int(request.query.get("limit", str(_EVENTS_HISTORY_DEFAULT_LIMIT)))
+    except ValueError:
+        limit = _EVENTS_HISTORY_DEFAULT_LIMIT
+    limit = max(1, min(limit, _EVENTS_HISTORY_MAX_LIMIT))
+
    from framework.server.session_manager import _find_queen_session_dir

    queen_dir = _find_queen_session_dir(session_id)
    events_path = queen_dir / "events.jsonl"
    if not events_path.exists():
-        return web.json_response({"events": [], "session_id": session_id})
+        return web.json_response(
+            {
+                "events": [],
+                "session_id": session_id,
+                "total": 0,
+                "returned": 0,
+                "truncated": False,
+                "limit": limit,
+            }
+        )

-    events: list[dict] = []
+    # Tail the file using a bounded deque — O(limit) memory regardless
+    # of file size. No need to materialize the whole list only to slice it.
+    from collections import deque
+
+    tail: deque[dict] = deque(maxlen=limit)
+    total = 0
    try:
        with open(events_path, encoding="utf-8") as f:
            for line in f:
@@ -711,13 +756,34 @@ async def handle_session_events_history(request: web.Request) -> web.Response:
                if not line:
                    continue
                try:
-                    events.append(json.loads(line))
+                    evt = json.loads(line)
                except json.JSONDecodeError:
                    continue
+                total += 1
+                tail.append(evt)
    except OSError:
-        return web.json_response({"events": [], "session_id": session_id})
+        return web.json_response(
+            {
+                "events": [],
+                "session_id": session_id,
+                "total": 0,
+                "returned": 0,
+                "truncated": False,
+                "limit": limit,
+            }
+        )

-    return web.json_response({"events": events, "session_id": session_id})
+    events = list(tail)
+    return web.json_response(
+        {
+            "events": events,
+            "session_id": session_id,
+            "total": total,
+            "returned": len(events),
+            "truncated": total > len(events),
+            "limit": limit,
+        }
+    )


 async def handle_session_history(request: web.Request) -> web.Response:
@@ -139,6 +139,24 @@ class SessionManager:
        except Exception:
            logger.warning("v2 migration failed (non-fatal)", exc_info=True)

+        # Ensure every existing colony has an up-to-date progress.db
+        # (schema v1, WAL mode) and reclaim any stale claims left behind
+        # by crashed workers from the previous run.  Idempotent and
+        # fast; runs synchronously because the event loop hasn't
+        # started yet at __init__ time.
+        from framework.host.progress_db import ensure_all_colony_dbs
+
+        try:
+            ensured = ensure_all_colony_dbs()
+            if ensured:
+                logger.info(
+                    "progress_db: ensured %d colony DB(s) at startup", len(ensured)
+                )
+        except Exception:
+            logger.warning(
+                "progress_db: backfill at startup failed (non-fatal)", exc_info=True
+            )
+
    def build_llm(self, model: str | None = None):
        """Construct an LLM provider using the server's configured defaults."""
        from framework.config import RuntimeConfig, get_hive_config
@@ -1,24 +0,0 @@
---
-name: hive.batch-ledger
-description: Track per-item status when processing collections to prevent skipped or duplicated items.
-metadata:
-  author: hive
-  type: default-skill
---
-
-## Operational Protocol: Batch Progress Ledger
-
-When processing a collection of items, maintain a batch ledger in `_batch_ledger`.
-
-Initialize when you identify the batch:
- `_batch_total`: total item count
- `_batch_ledger`: JSON with per-item status
-
-Per-item statuses: pending → in_progress → completed|failed|skipped
-
- Set `in_progress` BEFORE processing
- Set final status AFTER processing with 1-line result_summary
- Include error reason for failed/skipped items
- Update aggregate counts after each item
- NEVER remove items from the ledger
- If resuming, skip items already marked completed
@@ -61,6 +61,7 @@ Whereas `wait_for_selector`, `browser_click(selector=...)`, `browser_type(select
 ### Empirically verified (2026-04-11)

 Tested against `https://www.reddit.com/r/programming/` whose search input lives at:
+
 ```
 document > reddit-search-large [shadow]
         > faceplate-search-input#search-input [shadow]
@@ -95,13 +96,13 @@ All return real URLs and titles. On a fast page `navigate(wait_until="load")` re

 ### Timing expectations (measured against real sites)

-| Site | Navigate load time |
-|---|---|
-| example.com | 100–400 ms |
-| wikipedia.org | 200–500 ms |
-| reddit.com | 1.5–2 s |
-| x.com/twitter | 1.2–1.6 s |
-| linkedin.com (logged in) | 4–5 s |
+| Site                     | Navigate load time |
+| ------------------------ | ------------------ |
+| example.com              | 100–400 ms         |
+| wikipedia.org            | 200–500 ms         |
+| reddit.com               | 1.5–2 s            |
+| x.com/twitter            | 1.2–1.6 s          |
+| linkedin.com (logged in) | 4–5 s              |

 For LinkedIn and other heavy SPAs, rely on `sleep()` after navigation to let the page hydrate.

@@ -124,7 +125,7 @@ Even after `wait_until="load"`, React/Vue SPAs often render their real chrome in

 Why this is necessary:

- **React / Vue controlled components** don't trust JS-sourced `.focus()`. React uses event delegation and watches for *native* pointer/focus events — a `click` dispatched via CDP fires the real `pointerdown`/`pointerup`/`click`/`focus` sequence that React listens to, and updates its internal state. A JS-only `.focus()` sets `document.activeElement` but the framework's controlled state doesn't see it.
+- **React / Vue controlled components** don't trust JS-sourced `.focus()`. React uses event delegation and watches for _native_ pointer/focus events — a `click` dispatched via CDP fires the real `pointerdown`/`pointerup`/`click`/`focus` sequence that React listens to, and updates its internal state. A JS-only `.focus()` sets `document.activeElement` but the framework's controlled state doesn't see it.
 - **Draft.js** (X/Twitter compose) and **Lexical** (Gmail, LinkedIn DMs) use contenteditable divs with immutable editor state. They only enter "edit mode" after a real click on the editor surface. Typing at them without clicking routes keys to `document.body` or gets silently discarded.
 - **Send/submit buttons are bound to framework state**, not DOM state. They're typically `disabled={!hasRealContent}` where `hasRealContent` is computed from React/Vue/Svelte state. The input field can have characters in the DOM but the button stays disabled because the framework never saw a real input event.

@@ -171,16 +172,16 @@ Always include an equivalent cleanup block in any script that types into a compo

 ### Verified site-specific quirks

-| Site | Editor | Workaround |
-|---|---|---|
-| **X / Twitter** compose | Draft.js | Click `[data-testid='tweetTextarea_0']` first, then type with `delay_ms=20`. First 1-2 chars may be eaten — accept truncation or prepend a throwaway char. Verify `[data-testid='tweetButton']` has `disabled: false` before clicking. |
-| **LinkedIn** messaging | contenteditable (inside `#interop-outlet` shadow root) | Use `browser_shadow_query` to find the rect, click-coordinate to focus, then `browser_type_focused(text=...)` (selector-based `browser_type` can't reach shadow). Send button is `.msg-form__send-button`. |
-| **LinkedIn** feed post composer | Quill/LinkedIn custom | Click the "Start a post" trigger first, wait 1s for modal, click the textarea, type. |
-| **Reddit** comment/post box | ProseMirror | Click the textarea, wait 0.5s for the toolbar to mount, then type. Submit is `button[slot="submit-button"]` inside a shreddit-composer. |
-| **Gmail** compose | Lexical | Click the body first. Gmail has a visible `div[contenteditable=true][aria-label*='Message Body']` after opening a compose window. |
-| **Slack** message box | contenteditable | Click first, then type. Send is a paper-plane button with `data-qa='texty_send_button'`. |
-| **Discord** | Slate | Click first. Discord's send is implicit on Enter (no button), so just press Enter after typing. |
-| **Monaco** editors (GitHub code review, CodeSandbox) | Monaco | Click first, type with `delay_ms=10`. Monaco listens for `textarea` input events on a hidden textarea — requires focus to be on that textarea. |
+| Site                                                 | Editor                                                 | Workaround                                                                                                                                                                                                                             |
+| ---------------------------------------------------- | ------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| **X / Twitter** compose                              | Draft.js                                               | Click `[data-testid='tweetTextarea_0']` first, then type with `delay_ms=20`. First 1-2 chars may be eaten — accept truncation or prepend a throwaway char. Verify `[data-testid='tweetButton']` has `disabled: false` before clicking. |
+| **LinkedIn** messaging                               | contenteditable (inside `#interop-outlet` shadow root) | Use `browser_shadow_query` to find the rect, click-coordinate to focus, then `browser_type_focused(text=...)` (selector-based `browser_type` can't reach shadow). Send button is `.msg-form__send-button`.                             |
+| **LinkedIn** feed post composer                      | Quill/LinkedIn custom                                  | Click the "Start a post" trigger first, wait 1s for modal, click the textarea, type.                                                                                                                                                   |
+| **Reddit** comment/post box                          | ProseMirror                                            | Click the textarea, wait 0.5s for the toolbar to mount, then type. Submit is `button[slot="submit-button"]` inside a shreddit-composer.                                                                                                |
+| **Gmail** compose                                    | Lexical                                                | Click the body first. Gmail has a visible `div[contenteditable=true][aria-label*='Message Body']` after opening a compose window.                                                                                                      |
+| **Slack** message box                                | contenteditable                                        | Click first, then type. Send is a paper-plane button with `data-qa='texty_send_button'`.                                                                                                                                               |
+| **Discord**                                          | Slate                                                  | Click first. Discord's send is implicit on Enter (no button), so just press Enter after typing.                                                                                                                                        |
+| **Monaco** editors (GitHub code review, CodeSandbox) | Monaco                                                 | Click first, type with `delay_ms=10`. Monaco listens for `textarea` input events on a hidden textarea — requires focus to be on that textarea.                                                                                         |

 ### Plain text into a real input

@@ -247,6 +248,7 @@ The highlight overlay stays visible on the page for **10 seconds** after each in
 - Popup appeared that you didn't need? Close it immediately

 `browser_tabs` returns an `origin` field for each tab:
+
 - `"agent"` — you opened it; you own it; close it when done
 - `"popup"` — opened by a link or script; close after extracting what you need
 - `"startup"` or `"user"` — leave these alone unless the task requires it
@@ -259,22 +261,22 @@ The bridge automatically evicts per-tab state (`_cdp_attached`, `_interaction_hi

 ### LinkedIn

-| Target | Selector |
-|---|---|
-| Global search input | `input[data-testid='typeahead-input']` |
-| Own profile link | `a[href*='linkedin.com/in/']` |
-| Messaging overlay | `#interop-outlet >>> [aria-label]` (use shadow_query) |
+| Target              | Selector                                              |
+| ------------------- | ----------------------------------------------------- |
+| Global search input | `input[data-testid='typeahead-input']`                |
+| Own profile link    | `a[href*='linkedin.com/in/']`                         |
+| Messaging overlay   | `#interop-outlet >>> [aria-label]` (use shadow_query) |

 LinkedIn enforces **strict Trusted Types CSP**. Any script you inject via `browser_evaluate` that uses `innerHTML = "<...>"` will be **silently dropped** — the wrapper element gets added but its content is empty, no console error. Always use `createElement` + `appendChild` + `setAttribute` for DOM injection on LinkedIn. `style.cssText`, `textContent`, and `.value` assignments are fine (they don't go through the Trusted Types sink).

 ### Reddit (new reddit / shreddit)

-| Target | Selector |
-|---|---|
+| Target                | Selector                                                                     |
+| --------------------- | ---------------------------------------------------------------------------- |
 | Search input (shadow) | `reddit-search-large >>> #search-input` (rect only; type via click-to-focus) |
-| Reddit logo (home) | `#reddit-logo` |
-| Subreddit posts | `shreddit-post` custom elements |
-| Create post button | `a[href*='/submit']` |
+| Reddit logo (home)    | `#reddit-logo`                                                               |
+| Subreddit posts       | `shreddit-post` custom elements                                              |
+| Create post button    | `a[href*='/submit']`                                                         |

 Reddit's search input lives **two shadow levels deep** inside `reddit-search-large > faceplate-search-input`. You cannot reach it with `browser_type(selector=)`. The working pattern:

@@ -285,15 +287,15 @@ Reddit's search input lives **two shadow levels deep** inside `reddit-search-lar

 ### X / Twitter

-| Target | Selector |
-|---|---|
-| Main search input | `input[data-testid='SearchBox_Search_Input']` |
-| Home nav link | `a[data-testid='AppTabBar_Home_Link']` |
-| Post text area (compose) | `[data-testid='tweetTextarea_0']` |
-| Reply buttons on feed | `[data-testid='reply']` |
-| Post / Tweet submit button | `[data-testid='tweetButton']` |
-| Caret (⋯) menu on a post | `[data-testid='caret']` |
-| Confirmation sheet button | `[data-testid='confirmationSheetConfirm']` |
+| Target                     | Selector                                      |
+| -------------------------- | --------------------------------------------- |
+| Main search input          | `input[data-testid='SearchBox_Search_Input']` |
+| Home nav link              | `a[data-testid='AppTabBar_Home_Link']`        |
+| Post text area (compose)   | `[data-testid='tweetTextarea_0']`             |
+| Reply buttons on feed      | `[data-testid='reply']`                       |
+| Post / Tweet submit button | `[data-testid='tweetButton']`                 |
+| Caret (⋯) menu on a post   | `[data-testid='caret']`                       |
+| Confirmation sheet button  | `[data-testid='confirmationSheetConfirm']`    |

 **X uses Draft.js for the compose text editor**, which does NOT accept synthetic input reliably. Working workaround: `browser_type(selector='[data-testid="tweetTextarea_0"]', text="...", delay_ms=20)`. The delay gives Draft.js time to process each keystroke. The first 1–2 characters may still get eaten — accept minor truncation or prepend a throwaway character. After typing, check `[data-testid="tweetButton"]` has `disabled: false` before clicking submit.

@@ -366,17 +368,35 @@ If Chrome detaches the debugger for its own reasons (tab closed, user opened Dev

 If reattach also fails, you'll get the underlying CDP error string — that's a real problem, usually the tab is gone.

-## When to reach for `browser_evaluate`
+## `browser_evaluate` is a last-resort escape hatch

-Use it when:
- You need to read state from inside a shadow root that `browser_get_rect` doesn't handle
- You need a one-shot JS snippet to trigger a site-specific action (scroll a specific container, open a menu, set a form field value directly)
- You need to walk an AX tree or measure layout that the standard tools don't expose
+**Before using `browser_evaluate`, try these first — in this order:**

-Avoid it when:
- A standard tool (`browser_click_coordinate`, `browser_type`, `browser_press`) already does what you need. Those go through CDP's native event pipeline, which real sites trust more than synthetic JS dispatch.
- You're on a strict-CSP site and want to inject DOM — stick to `createElement` + `appendChild`, never `innerHTML`.
- You need to trigger React / Vue / framework state changes — those frameworks watch for real browser events (`input`, `change`, `click`), not scripted `dispatchEvent` calls. Native-event tools are more reliable.
+1. **`browser_screenshot` + `browser_click_coordinate`** — works on every site regardless of shadow DOM, iframes, obfuscated classes. This is the default path for "click a thing you can see."
+2. **`browser_type(use_insert_text=True, text=...)`** — for typing into ANY input/contenteditable, including Lexical and Draft.js. Handles click-focus-insert with built-in retries. Do **not** call `document.execCommand('insertText')` via evaluate; this tool already does it correctly.
+3. **`browser_shadow_query`** or **`browser_get_rect(selector)`** with the `>>>` shadow-piercing syntax — for selector-based lookups across shadow roots.
+4. **`browser_get_text` / `browser_get_attribute`** — for reading element state by selector.
+5. **`browser_snapshot`** — for dumping the accessibility tree of the page.
+
+If all five of those fit your goal, **do not use `browser_evaluate`.** Each evaluate call is a small LLM round-trip of ~30-100 tokens of JS plus a JSON response; five of them burn more context than a single screenshot-and-coordinate does, with less reliability.
+
+### Anti-patterns — stop immediately if you catch yourself doing these
+
+- **Trying multiple `querySelectorAll` variants when the first returned `[]`.** Different selectors on the same page rarely work if the first guess failed — modern SPAs obfuscate class names at build time. After one empty result, switch to `browser_screenshot` + `browser_click_coordinate`. Do not write `.artdeco-list__item`, then `[data-test-incoming-invitation-card]`, then `[class*="invitation"]` — you are already on the wrong path.
+- **Writing `walk(root)` recursive shadow-DOM traversal functions.** Use `browser_shadow_query` — it traverses at the CDP level (native C++), not by re-running a recursive JS function every call.
+- **Calling `document.execCommand('insertText', ...)` to type into a contenteditable.** Use `browser_type(use_insert_text=True, text='...')`. The high-level tool handles the exact same Lexical/Draft.js case but with click-focus-retry logic built in.
+- **Accessing `iframe.contentDocument`.** Rarely works (cross-origin, late hydration) and when it does, the code is brittle. Use `browser_screenshot` to see the iframe, then `browser_click_coordinate` to interact.
+- **Using `innerHTML = "<...>"` on a Trusted Types site (LinkedIn, GitHub).** The assignment is silently dropped. Use `createElement` + `appendChild` if you must inject DOM — but first, ask whether you really need to.
+- **Triggering React/Vue state via synthetic `dispatchEvent`.** Frameworks watch for real browser events. Use `browser_click_coordinate`, `browser_press`, or `browser_type` — all go through CDP's native event pipeline.
+
+### Legitimate uses (when nothing semantic fits)
+
+- Reading a computed style, `window.innerWidth/Height`, `document.scrollingElement.scrollTop`, or other layout values the tools don't expose.
+- Firing a one-shot site-specific API call (analytics beacon, feature-flag toggle).
+- Stripping `onbeforeunload` before navigating away from a page with an unsent draft (LinkedIn, Gmail).
+- Detecting whether a specific shadow-root host exists before a follow-up screenshot.
+
+In all of these cases the script is SHORT (< 10 lines) and the result is CONSUMED (read, then acted on), not further probed.

 ## Login & auth walls

@@ -0,0 +1,111 @@
+---
+name: hive.colony-progress-tracker
+description: Claim tasks, record step progress, and verify SOP gates in the colony SQLite queue. Applies when your spawn message includes a db_path field.
+metadata:
+  author: hive
+  type: default-skill
+---
+
+## Operational Protocol: Colony Progress Tracker
+
+**Applies when** your spawn message has `db_path:` and `colony_id:` fields. The DB is your durable working memory — tells you what's done, what to skip, which SOP gates you owe.
+
+Access via `execute_command_tool` running `sqlite3 "<db_path>" "..."`. Tables: `tasks` (queue), `steps` (per-task decomposition), `sop_checklist` (hard gates).
+
+### Claim: assigned task (check this FIRST)
+
+If your spawn message includes a `task_id:` field, the queen pre-assigned a specific row to you. Claim that row by id — **do not** use the generic next-pending pattern below:
+
+```bash
+sqlite3 "<db_path>" <<'SQL'
+UPDATE tasks SET status='claimed', worker_id='<worker-id>',
+  claim_token=lower(hex(randomblob(8))),
+  claimed_at=datetime('now'), updated_at=datetime('now')
+WHERE id='<task_id>' AND status='pending'
+RETURNING id, goal, payload;
+SQL
+```
+
+Empty output → another worker raced you or the row is already done. Stop and report.  Non-empty → that row is yours, proceed to "Load the plan".
+
+### Claim: next pending (fallback when no task_id is assigned)
+
+If your spawn message did NOT include `task_id:` — you are a generic fan-out worker racing on a shared queue. Use the generic next-pending claim:
+
+```bash
+sqlite3 "<db_path>" <<'SQL'
+UPDATE tasks SET status='claimed', worker_id='<worker-id>',
+  claim_token=lower(hex(randomblob(8))),
+  claimed_at=datetime('now'), updated_at=datetime('now')
+WHERE id=(SELECT id FROM tasks WHERE status='pending'
+  ORDER BY priority DESC, seq, created_at LIMIT 1)
+RETURNING id, goal, payload;
+SQL
+```
+
+Empty output → queue drained, exit. Otherwise the returned `id` is yours. **Never SELECT-then-UPDATE** — races.
+
+### Load the plan
+
+```bash
+sqlite3 "<db_path>" "SELECT seq, id, title, status FROM steps WHERE task_id='<task-id>' ORDER BY seq;"
+sqlite3 "<db_path>" "SELECT key, description, required, done_at FROM sop_checklist WHERE task_id='<task-id>';"
+```
+
+**Skip any step where status='done'.** That's the point — don't redo completed work.
+
+### Execute a step
+
+Before tool calls:
+```bash
+sqlite3 "<db_path>" "UPDATE steps SET status='in_progress', worker_id='<worker-id>', started_at=datetime('now') WHERE id='<step-id>';"
+```
+After success (one-line evidence: path, URL, key result):
+```bash
+sqlite3 "<db_path>" "UPDATE steps SET status='done', evidence='<what you did>', completed_at=datetime('now') WHERE id='<step-id>';"
+```
+
+### MANDATORY: SOP gate check before marking task done
+
+```bash
+sqlite3 "<db_path>" "SELECT key, description FROM sop_checklist WHERE task_id='<task-id>' AND required=1 AND done_at IS NULL;"
+```
+
+- Empty → proceed to "Mark task done".
+- Non-empty → each row is work you still owe. Do it, then check it off:
+
+```bash
+sqlite3 "<db_path>" "UPDATE sop_checklist SET done_at=datetime('now'), done_by='<worker-id>', note='<why>' WHERE task_id='<task-id>' AND key='<key>';"
+```
+
+**Never mark a task done while this SELECT returns rows.** This gate exists specifically to stop you from declaring success while skipping required steps.
+
+### Mark task done / failed
+
+```bash
+# Success:
+sqlite3 "<db_path>" "UPDATE tasks SET status='done', completed_at=datetime('now'), updated_at=datetime('now') WHERE id='<task-id>' AND worker_id='<worker-id>';"
+
+# Unrecoverable failure:
+sqlite3 "<db_path>" "UPDATE tasks SET status='failed', last_error='<one sentence>', completed_at=datetime('now'), updated_at=datetime('now') WHERE id='<task-id>' AND worker_id='<worker-id>';"
+```
+
+The `AND worker_id=?` guard means a reclaimed row won't accept your write — treat zero rows affected as "your claim was revoked, stop."
+
+### Loop
+
+After done/failed → claim the next task. Exit only when claim returns empty.
+
+### Errors + debug
+
+- **"database is locked"**: retry with 100ms → 1s backoff, max 5 attempts. `busy_timeout=5000` handles most contention silently.
+- **Queue health**: `SELECT status, count(*) FROM tasks GROUP BY status;`
+- **Your in-flight work**: `SELECT id, goal, status FROM tasks WHERE worker_id='<worker-id>';`
+
+### Anti-patterns (will break the queue)
+
+- Don't DDL (CREATE/ALTER/DROP).
+- Don't DELETE — failed tasks stay as `failed` for audit.
+- Don't skip Protocol 4 (SOP gate) before marking done.
+- Don't hold a task >15min without updates — the stale-claim reclaimer revokes your claim.
+- Don't invent task IDs. Workers update existing rows; only the queen enqueues new ones.
@@ -1,6 +1,6 @@
 ---
 name: hive.context-preservation
-description: Proactively preserve critical information before automatic context pruning destroys it.
+description: Proactively extract critical values from tool results into working notes before automatic context pruning destroys them.
 metadata:
  author: hive
  type: default-skill
@@ -8,17 +8,16 @@ metadata:

 ## Operational Protocol: Context Preservation

-You operate under a finite context window. Important information WILL be pruned.
+You operate under a finite context window. Older tool results WILL be pruned. Extract what you need while it's still in context.

-Save-As-You-Go: After any tool call producing information you'll need later,
-immediately extract key data into `_working_notes` or `_preserved_data`.
-Do NOT rely on referring back to old tool results.
+**Save-as-you-go.** After any tool call producing information you'll need later, immediately extract the key data into `_working_notes` or `_preserved_data`. Do not rely on referring back to old tool results — once they're pruned they're gone.

-What to extract: URLs and key snippets (not full pages), relevant API fields
-(not raw JSON), specific lines/values (not entire files), analysis results
-(not raw data).
+**What to extract:**
+- URLs and key snippets (not full pages)
+- Relevant API fields (not raw JSON blobs)
+- Specific lines, values, or IDs (not entire files)
+- Analysis conclusions (not raw data)

-Before transitioning to the next phase/node, write a handoff summary to
-`_handoff_context` with everything the next phase needs to know.
+**Handoffs between tasks** happen through `progress.db`, not through shared-buffer handoff blobs. When you finish a task, any state the next worker needs goes into the task row itself (`steps.evidence`, `tasks.last_error`, `sop_checklist.note`) — see `hive.colony-progress-tracker`. Use `_working_notes` for things the DB schema doesn't cover.

 You will receive an alert when context reaches {{warn_at_usage_ratio_pct}}% — preserve immediately.
@@ -1,6 +1,6 @@
 ---
 name: hive.error-recovery
-description: Follow a structured recovery protocol when tool calls fail instead of blindly retrying or giving up.
+description: Follow a structured recovery decision tree when tool calls fail instead of blindly retrying or giving up.
 metadata:
  author: hive
  type: default-skill
@@ -10,9 +10,20 @@ metadata:

 When a tool call fails:

-1. Diagnose — record error in notes, classify as transient or structural
-2. Decide — transient: retry once. Structural fixable: fix and retry.
-   Structural unfixable: record as failed, move to next item.
-   Blocking all progress: record escalation note.
-3. Adapt — if same tool failed {{max_retries_per_tool}}+ times, stop using it and find alternative.
-   Update plan in notes. Never silently drop the failed item.
+1. **Diagnose** — classify the failure as *transient* (network blip, rate limit, timeout) or *structural* (wrong selector, missing auth, invalid schema, permission denied).
+
+2. **Decide:**
+   - Transient → retry once.
+   - Structural + fixable → fix the input and retry.
+   - Structural + unfixable → record the failure and move to the next item.
+   - Blocking all progress → escalate.
+
+3. **Adapt** — if the same tool has failed {{max_retries_per_tool}}+ times in a row, stop using it and find an alternative approach.
+
+**Never silently drop a failed item.** If the item is a task in the colony queue, write the failure to the DB instead of an in-memory buffer:
+
+```bash
+sqlite3 "$DB_PATH" "UPDATE tasks SET status='failed', last_error='<one-sentence reason>', completed_at=datetime('now'), updated_at=datetime('now') WHERE id='<task-id>' AND worker_id='<your-worker-id>';"
+```
+
+The `tasks.retry_count` column and the stale-claim reclaimer handle auto-retry for crashes; your job is the within-run decision tree above. See `hive.colony-progress-tracker` for the full queue protocol.
@@ -15,6 +15,28 @@ LinkedIn is the hardest mainstream site to automate because it combines **shadow

 **Always activate `browser-automation` first.** This skill assumes you already know about CSS-px coordinates, `browser_type`/`browser_type_focused`, and `browser_shadow_query`. The guidance below is LinkedIn-specific; general browser rules are there.

+## Rule #0: screenshot + coordinates, not selectors
+
+LinkedIn changes class names aggressively and hides composers inside shadow roots AND iframes. **Selectors break constantly.** Your default strategy on every LinkedIn page should be:
+
+1. `browser_screenshot()` — see the page visually
+2. Pick the target's position from the image
+3. `browser_coords(image_x, image_y)` → get CSS pixels
+4. `browser_click_coordinate(css_x, css_y)` — reaches shadow DOM, iframes, and React elements indifferently
+5. `browser_type(use_insert_text=True, text=...)` — types into whatever is focused, including Lexical composers
+
+**If `browser_evaluate(...querySelectorAll...)` returns `[]` even once, do not try a different selector.** Stop, screenshot, and click. The "what if I try `.artdeco-list__item` next" instinct has burned ~50 tool calls in real sessions before the agent pivoted. Don't fall into that loop.
+
+The selectors in the table below are **only** for when you already know the target is in the light DOM and you want a faster path than screenshot+coord. **When in doubt, default to coordinates.**
+
+## Invitation manager — inline message button path is BROKEN
+
+If the user asks to message a connection request **from the invitation manager page without accepting first**, the inline "Message" button opens a composer inside a nested **iframe overlay** (not a shadow root). The iframe's `contentDocument` is either cross-origin-blocked or not hydrated at access time. This path is **not reliably automatable today.**
+
+**Redirect:** click the person's name/profile link on the card, go to the profile page, and use the standard Profile Message flow below. The profile flow is battle-tested; the inline-iframe flow isn't.
+
+If you end up writing `document.activeElement.tagName === 'IFRAME'` inside a `browser_evaluate`, you've hit this trap. Stop and go to the profile page.
+
 ## Timing expectations

 - `browser_navigate(wait_until="load")` — LinkedIn takes **4–5 seconds** to load the feed cold.
@@ -1,6 +1,6 @@
 ---
 name: hive.note-taking
-description: Maintain structured working notes throughout execution to prevent information loss during context pruning.
+description: Maintain a free-form scratchpad of decisions, extracted values, and open questions so context pruning doesn't lose anything you still need.
 metadata:
  author: hive
  type: default-skill
@@ -8,20 +8,21 @@ metadata:

 ## Operational Protocol: Structured Note-Taking

-Maintain structured working notes in shared buffer key `_working_notes`.
+Maintain free-form working notes in shared buffer key `_working_notes` for data that *you* need to remember but that isn't captured by the colony task queue.
+
+**Do not duplicate the queue in here.** Per-task goal, ordered steps, and SOP gates live in `progress.db` — use `hive.colony-progress-tracker` for those. These notes are for things the DB schema doesn't cover.
+
 Update at these checkpoints:

- After completing each discrete subtask or batch item
- After receiving new information that changes your plan
- Before any tool call that will produce substantial output
+- After receiving new information that changes how you plan to approach the current step
+- Before any tool call that will produce substantial output you'll need to reference later
+- When you make a non-obvious decision whose *why* would be lost if the tool call history gets pruned

 Structure:

-### Objective — restate the goal
-### Current Plan — numbered steps, mark completed with ✓
 ### Key Decisions — decisions made and WHY
-### Working Data — intermediate results, extracted values
-### Open Questions — uncertainties to verify
-### Blockers — anything preventing progress
+### Working Data — intermediate results, extracted values (URLs, IDs, key snippets — not full pages)
+### Open Questions — uncertainties you plan to verify
+### Blockers — anything preventing progress that isn't already captured in `tasks.last_error`

 Update incrementally — do not rewrite from scratch each time.
@@ -1,17 +0,0 @@
---
-name: hive.task-decomposition
-description: Decompose complex tasks into explicit subtasks before diving in.
-metadata:
-  author: hive
-  type: default-skill
---
-
-## Operational Protocol: Task Decomposition
-
-Before starting a complex task:
-
-1. Decompose — break into numbered subtasks in `_working_notes` Current Plan
-2. Estimate — relative effort per subtask (small/medium/large)
-3. Execute — work through in order, mark ✓ when complete
-4. Budget — if running low on iterations, prioritize by impact
-5. Verify — before declaring done, every subtask must be ✓, skipped (with reason), or blocked
@@ -36,8 +36,8 @@ class SkillsConfig:
        # Default skill configuration
        default_skills = {
            "hive.note-taking": {"enabled": True},
-            "hive.batch-ledger": {"enabled": True, "checkpoint_every_n": 10},
-            "hive.quality-monitor": {"enabled": False},
+            "hive.quality-monitor": {"enabled": False, "assessment_interval": 10},
+            "hive.error-recovery": {"max_retries_per_tool": 5},
        }
    """

@@ -24,34 +24,21 @@ _SKILL_DEFAULTS: dict[str, dict[str, Any]] = {
    "hive.quality-monitor": {"assessment_interval": 5},
    "hive.error-recovery": {"max_retries_per_tool": 3},
    "hive.context-preservation": {"warn_at_usage_ratio_pct": 45},
-    "hive.batch-ledger": {"checkpoint_every_n": 5},
 }

-# Keywords that indicate a batch processing scenario (DS-12)
-_BATCH_KEYWORDS: tuple[str, ...] = (
-    "list of",
-    "collection of",
-    "set of",
-    "batch of",
-    "each item",
-    "for each",
-    "process all",
-    "records",
-    "entries",
-    "rows",
-    "items",
-)
-
-_BATCH_INIT_NUDGE = (
-    "Note: your input appears to describe a batch operation. "
-    "Initialize `_batch_ledger` with the total item count before processing."
-)
-

 def is_batch_scenario(text: str) -> bool:
-    """Return True if *text* contains batch-processing indicators (DS-12)."""
-    lower = text.lower()
-    return any(kw in lower for kw in _BATCH_KEYWORDS)
+    """Deprecated: batch auto-detection is no longer used.
+
+    Kept as a no-op so the agent_loop call site (which wraps it in an
+    ``if ctx.default_skill_batch_nudge:`` guard that's also now always
+    empty) can stay unchanged until a broader cleanup.  The old
+    ``_batch_ledger`` shared-buffer feature was replaced by the
+    per-colony SQLite task queue (``hive.colony-progress-tracker``),
+    which lives in ``progress.db`` and is authoritative for batch
+    state across workers and runs.
+    """
+    return False


 def _apply_overrides(skill_name: str, body: str, overrides: dict[str, Any]) -> str:
@@ -67,40 +54,37 @@ def _apply_overrides(skill_name: str, body: str, overrides: dict[str, Any]) -> s
    return body


-# Ordered list of default skills (name → directory)
+# Ordered list of default skills (name → directory).
+#
+# Removed on 2026-04-15 as part of the colony-progress-tracker rollout:
+#   - hive.task-decomposition — steps table in progress.db supersedes
+#     in-memory ``_working_notes → Current Plan`` decomposition.
+#   - hive.batch-ledger       — tasks table in progress.db supersedes
+#     the ``_batch_ledger`` dict-shaped queue with its pending →
+#     in_progress → completed/failed/skipped state machine.
+# Both were duplicating state that belongs in SQLite.
 SKILL_REGISTRY: dict[str, str] = {
    "hive.note-taking": "note-taking",
-    "hive.batch-ledger": "batch-ledger",
    "hive.context-preservation": "context-preservation",
    "hive.quality-monitor": "quality-monitor",
    "hive.error-recovery": "error-recovery",
-    "hive.task-decomposition": "task-decomposition",
+    "hive.colony-progress-tracker": "colony-progress-tracker",
    "hive.writing-hive-skills": "writing-hive-skills",
 }

-# All shared buffer keys used by default skills (for permission auto-inclusion)
+# Shared buffer keys referenced by the remaining default skills (used
+# for permission auto-inclusion). The dead keys for batch-ledger,
+# task-decomposition, the handoff buffer, and the error-log buffers
+# were removed when those features migrated to progress.db.
 DATA_BUFFER_KEYS: list[str] = [
    # note-taking
    "_working_notes",
    "_notes_updated_at",
-    # batch-ledger
-    "_batch_ledger",
-    "_batch_total",
-    "_batch_completed",
-    "_batch_failed",
    # context-preservation
-    "_handoff_context",
    "_preserved_data",
    # quality-monitor
    "_quality_log",
    "_quality_degradation_count",
-    # error-recovery
-    "_error_log",
-    "_failed_tools",
-    "_escalation_needed",
-    # task-decomposition
-    "_subtasks",
-    "_iteration_budget_remaining",
 ]


@@ -252,16 +236,15 @@ class DefaultSkillManager:

    @property
    def batch_init_nudge(self) -> str | None:
-        """Nudge text to prepend to system prompt when batch input detected (DS-12).
+        """Deprecated: always returns None.

-        Returns None if ``hive.batch-ledger`` is disabled or auto_detect_batch is False.
+        The ``hive.batch-ledger`` default skill was removed when batch
+        tracking moved into ``progress.db`` (``hive.colony-progress-
+        tracker``). Callers in agent_host, colony_runtime, and
+        orchestrator still read this property; returning None keeps
+        them functional with no system-prompt nudge.
        """
-        if "hive.batch-ledger" not in self._skills:
-            return None
-        overrides = self._config.get_default_overrides("hive.batch-ledger")
-        if overrides.get("auto_detect_batch") is False:
-            return None
-        return _BATCH_INIT_NUDGE
+        return None

    @property
    def context_warn_ratio(self) -> float | None:
@@ -903,10 +903,76 @@ def register_queen_lifecycle_tools(
    # ``start_worker`` was removed in the Phase 4 unification — its
    # bare-bones spawn duplicated ``run_agent_with_input`` (which has
    # credential preflight, concurrency guard, and phase tracking on
-    # top). The shared preflight timeout below is still used by
-    # ``run_agent_with_input``.
+    # top). The shared preflight timeout below is used by both
+    # ``run_agent_with_input`` and ``run_parallel_workers``.
    _START_PREFLIGHT_TIMEOUT = 15  # seconds

+    async def _preflight_credentials(
+        legacy: Any,
+        *,
+        tool_label: str,
+    ) -> set[str]:
+        """Compute tools whose credentials are missing and resync MCP servers.
+
+        Shared between ``run_agent_with_input`` (single spawn) and
+        ``run_parallel_workers`` (batch spawn). Returns the set of
+        tool names whose credentials failed validation; the caller
+        filters these out of the spawn's tool lists.
+
+        Exceptions (including validator bugs) are logged and treated
+        as "no tools dropped" so a broken validator can't block a
+        spawn. Wall-clock bound at ``_START_PREFLIGHT_TIMEOUT`` —
+        slow credential HTTP health checks can't stall the LLM turn.
+        """
+        unavailable: set[str] = set()
+
+        async def _run() -> None:
+            nonlocal unavailable
+            try:
+                from framework.credentials.validation import compute_unavailable_tools
+
+                loop = asyncio.get_running_loop()
+                drop, messages = await loop.run_in_executor(
+                    None,
+                    lambda: compute_unavailable_tools(legacy.graph.nodes),
+                )
+                unavailable = drop
+                if drop:
+                    logger.warning(
+                        "%s: dropping %d tool(s) with unavailable credentials: %s",
+                        tool_label,
+                        len(drop),
+                        "; ".join(messages),
+                    )
+            except Exception as exc:
+                logger.warning(
+                    "%s: compute_unavailable_tools raised, proceeding without "
+                    "credential-based tool filtering: %s",
+                    tool_label,
+                    exc,
+                )
+
+            runner = getattr(session, "runner", None)
+            if runner is not None:
+                try:
+                    loop = asyncio.get_running_loop()
+                    await loop.run_in_executor(
+                        None,
+                        lambda: runner._tool_registry.resync_mcp_servers_if_needed(),
+                    )
+                except Exception as exc:
+                    logger.warning("%s: MCP resync failed: %s", tool_label, exc)
+
+        try:
+            await asyncio.wait_for(_run(), timeout=_START_PREFLIGHT_TIMEOUT)
+        except TimeoutError:
+            logger.warning(
+                "%s: credential preflight timed out after %ds — proceeding",
+                tool_label,
+                _START_PREFLIGHT_TIMEOUT,
+            )
+        return unavailable
+
    # --- stop_worker -----------------------------------------------------------

    async def stop_worker(*, reason: str = "Stopped by queen") -> str:
@@ -1078,6 +1144,105 @@ def register_queen_lifecycle_tools(
                    }
                )

+        # Credential preflight — mirrors the one run_agent_with_input
+        # performs. Without this, missing credentials (e.g. stale
+        # GITHUB_TOKEN) fail once PER spawned worker, yielding N
+        # duplicate error reports for a single fixable issue. Catch
+        # once upfront, build a filtered tool list, and pass it to
+        # every spawn via tools_override.
+        legacy_for_preflight = _get_runtime()
+        unavailable_tools_parallel: set[str] = set()
+        tools_override_parallel: list[Any] | None = None
+        if legacy_for_preflight is not None:
+            try:
+                unavailable_tools_parallel = await _preflight_credentials(
+                    legacy_for_preflight, tool_label="run_parallel_workers"
+                )
+            except CredentialError as e:
+                # Structured credential failure: publish the
+                # CREDENTIALS_REQUIRED event so the frontend's modal
+                # can fire, and return the same shape the single-path
+                # tool returns on the same failure.
+                error_payload = credential_errors_to_json(e)
+                error_payload["agent_path"] = str(getattr(session, "worker_path", "") or "")
+                bus = getattr(session, "event_bus", None)
+                if bus is not None:
+                    await bus.publish(
+                        AgentEvent(
+                            type=EventType.CREDENTIALS_REQUIRED,
+                            stream_id="queen",
+                            data=error_payload,
+                        )
+                    )
+                return json.dumps(error_payload)
+
+            if unavailable_tools_parallel:
+                colony_tools = list(getattr(colony, "_tools", []) or [])
+                before = len(colony_tools)
+                tools_override_parallel = [
+                    t
+                    for t in colony_tools
+                    if getattr(t, "name", None) not in unavailable_tools_parallel
+                ]
+                logger.info(
+                    "run_parallel_workers: dropped %d tool object(s) from spawn_tools (unavailable credentials)",
+                    before - len(tools_override_parallel),
+                )
+
+        # Colony progress tracker wiring: if the session's loaded
+        # worker points at a colony directory that has a progress.db,
+        # inject db_path + colony_id into every per-task ``data``
+        # dict so each spawned worker sees them in its first user
+        # message and can claim rows from the queue. ColonyRuntime.
+        # spawn() detects db_path in input_data and pre-activates
+        # hive.colony-progress-tracker into the catalog prompt.
+        _colony_db_path: str | None = None
+        _colony_id: str | None = None
+        _worker_path = getattr(session, "worker_path", None)
+        if _worker_path:
+            from pathlib import Path as _Path
+
+            _wp = _Path(_worker_path)
+            _pdb = _wp / "data" / "progress.db"
+            if _pdb.exists():
+                _colony_db_path = str(_pdb.resolve())
+                _colony_id = _wp.name
+
+        # Phase 2: enqueue each task into progress.db BEFORE building
+        # spawn specs so every parallel worker has a pre-assigned row
+        # to claim. Without this the queue stays empty and each
+        # worker's claim UPDATE affects zero rows, silently falling
+        # back to executing from its spawn message.
+        _enqueued_task_ids: list[str | None] = [None] * len(tasks)
+        if _colony_db_path:
+            from pathlib import Path as _PathP
+
+            from framework.host.progress_db import (
+                enqueue_task as _enqueue_task_fn,
+            )
+
+            _pdb_path_obj = _PathP(_colony_db_path)
+            for _i, _spec in enumerate(tasks):
+                if not isinstance(_spec, dict):
+                    continue
+                _task_text_pre = str(_spec.get("task", "")).strip()
+                if not _task_text_pre:
+                    continue
+                try:
+                    _enqueued_task_ids[_i] = await asyncio.to_thread(
+                        _enqueue_task_fn,
+                        _pdb_path_obj,
+                        _task_text_pre,
+                        source="run_parallel_workers",
+                    )
+                except Exception as _enqueue_exc:
+                    logger.warning(
+                        "run_parallel_workers: failed to enqueue tasks[%d] "
+                        "(spawn proceeding without pinned task_id): %s",
+                        _i,
+                        _enqueue_exc,
+                    )
+
        # Normalise: each entry must have a non-empty "task" string.
        normalised: list[dict] = []
        for i, spec in enumerate(tasks):
@@ -1086,18 +1251,58 @@ def register_queen_lifecycle_tools(
            task_text = str(spec.get("task", "")).strip()
            if not task_text:
                return json.dumps({"error": f"tasks[{i}].task is empty"})
+            spec_data = spec.get("data") if isinstance(spec.get("data"), dict) else {}
+            if _colony_db_path:
+                spec_data = {
+                    **spec_data,
+                    "db_path": _colony_db_path,
+                    "colony_id": _colony_id,
+                }
+                if _enqueued_task_ids[i]:
+                    spec_data["task_id"] = _enqueued_task_ids[i]
            normalised.append(
                {
                    "task": task_text,
-                    "data": spec.get("data") if isinstance(spec.get("data"), dict) else None,
+                    "data": spec_data or None,
                }
            )

+        if _colony_db_path:
+            _pinned = sum(1 for tid in _enqueued_task_ids if tid)
+            logger.info(
+                "run_parallel_workers: attached progress_db context to "
+                "%d spawn(s) (colony_id=%s, %d pinned task_ids)",
+                len(normalised),
+                _colony_id,
+                _pinned,
+            )
+
        try:
-            worker_ids = await colony.spawn_batch(normalised)
+            worker_ids = await colony.spawn_batch(
+                normalised,
+                tools_override=tools_override_parallel,
+            )
        except Exception as e:
            return json.dumps({"error": f"spawn_batch failed: {e}"})

+        # Phase transition — mirrors run_agent_with_input. With the
+        # batch now spawned, the queen is semantically "running" until
+        # wait_for_worker_reports returns, so phase-gated running
+        # tools (inject_message, reply_to_worker, ...) should be
+        # available. Without this change run_parallel_workers left
+        # the queen in whatever phase she was in (typically staging).
+        if phase_state is not None:
+            try:
+                await phase_state.switch_to_running()
+                _update_meta_json(
+                    session_manager, manager_session_id, {"phase": "running"}
+                )
+            except Exception as exc:
+                logger.warning(
+                    "run_parallel_workers: phase transition to 'running' failed (non-fatal): %s",
+                    exc,
+                )
+
        try:
            reports = await colony.wait_for_worker_reports(
                worker_ids,
@@ -1322,6 +1527,35 @@ def register_queen_lifecycle_tools(
        except OSError as e:
            return None, f"failed to install skill into {target}: {e}"

+        # Cleanup the source directory after a successful install so
+        # the authored skill doesn't linger as debris in the agent
+        # workspace (or — pre-sandbox-split — in the hive git
+        # checkout). Only removes paths that are OUTSIDE
+        # ``~/.hive/skills/`` so we never nuke the canonical install
+        # target or user-owned skill dirs.
+        try:
+            src_resolved = src.resolve()
+            skills_root_resolved = target_root.resolve()
+            try:
+                src_resolved.relative_to(skills_root_resolved)
+                _under_skills_root = True
+            except ValueError:
+                _under_skills_root = False
+            if not _under_skills_root:
+                _shutil.rmtree(src_resolved)
+                logger.info(
+                    "create_colony: cleaned up authored skill source at %s "
+                    "(installed to %s)",
+                    src_resolved,
+                    target,
+                )
+        except OSError as e:
+            logger.warning(
+                "create_colony: failed to clean up skill source at %s (non-fatal): %s",
+                src,
+                e,
+            )
+
        return target, None

    async def create_colony(
@@ -1329,6 +1563,7 @@ def register_queen_lifecycle_tools(
        colony_name: str,
        task: str,
        skill_path: str,
+        tasks: list[dict] | None = None,
    ) -> str:
        """Create a colony after installing a pre-authored skill folder.

@@ -1338,6 +1573,13 @@ def register_queen_lifecycle_tools(
        they're ready to start the worker — at that point the worker
        reads the task from ``worker.json`` and the skill from
        ``~/.hive/skills/`` and starts informed.
+
+        When *tasks* is provided, each entry is seeded into the
+        colony's ``progress.db`` task queue in a single transaction.
+        Workers then claim rows from the queue using the
+        ``hive.colony-progress-tracker`` default skill. Each task dict
+        accepts: ``goal`` (required), optional ``steps``,
+        ``sop_items``, ``priority``, ``payload``, ``parent_task_id``.
        """
        if session is None:
            return json.dumps({"error": "No session bound to this tool registry."})
@@ -1392,6 +1634,7 @@ def register_queen_lifecycle_tools(
                session=session,
                colony_name=cn,
                task=(task or "").strip(),
+                tasks=tasks if isinstance(tasks, list) else None,
            )
        except Exception as e:
            logger.exception("create_colony: fork failed after installing skill")
@@ -1444,6 +1687,8 @@ def register_queen_lifecycle_tools(
                "is_new": fork_result.get("is_new", True),
                "skill_installed": str(installed_skill),
                "skill_name": installed_skill.name if installed_skill else None,
+                "db_path": fork_result.get("db_path"),
+                "tasks_seeded": len(fork_result.get("task_ids") or []),
            }
        )

@@ -1541,6 +1786,57 @@ def register_queen_lifecycle_tools(
                        "protocol'."
                    ),
                },
+                "tasks": {
+                    "type": "array",
+                    "description": (
+                        "Optional pre-seeded task queue for the colony. "
+                        "When the colony is a fan-out of many similar "
+                        "units of work (e.g. 'process record #1234', "
+                        "'scrape profile X'), pass them here as an "
+                        "array and workers will claim rows atomically "
+                        "from the SQLite queue using the "
+                        "hive.colony-progress-tracker skill. Each task "
+                        "needs a 'goal' string; optionally include "
+                        "'steps' (ordered subtasks), 'sop_items' "
+                        "(required checklist gates), 'priority' "
+                        "(higher runs first), and 'payload' "
+                        "(task-specific parameters). Can be hundreds "
+                        "or thousands of entries — the bulk insert "
+                        "runs in a single transaction."
+                    ),
+                    "items": {
+                        "type": "object",
+                        "properties": {
+                            "goal": {"type": "string"},
+                            "priority": {"type": "integer"},
+                            "payload": {},
+                            "steps": {
+                                "type": "array",
+                                "items": {
+                                    "type": "object",
+                                    "properties": {
+                                        "title": {"type": "string"},
+                                        "detail": {"type": "string"},
+                                    },
+                                    "required": ["title"],
+                                },
+                            },
+                            "sop_items": {
+                                "type": "array",
+                                "items": {
+                                    "type": "object",
+                                    "properties": {
+                                        "key": {"type": "string"},
+                                        "description": {"type": "string"},
+                                        "required": {"type": "boolean"},
+                                    },
+                                    "required": ["key", "description"],
+                                },
+                            },
+                        },
+                        "required": ["goal"],
+                    },
+                },
            },
            "required": ["colony_name", "task", "skill_path"],
        },
@@ -1552,6 +1848,158 @@ def register_queen_lifecycle_tools(
    )
    tools_registered += 1

+    # --- enqueue_task ------------------------------------------------------------
+
+    async def enqueue_task_tool(
+        *,
+        colony_name: str,
+        goal: str,
+        steps: list[dict] | None = None,
+        sop_items: list[dict] | None = None,
+        payload: Any = None,
+        priority: int = 0,
+        parent_task_id: str | None = None,
+    ) -> str:
+        """Append a single task to an existing colony's progress.db queue.
+
+        Use this when the colony is already created and more work
+        needs to be fanned out (webhook-driven, follow-up requests,
+        worker-generated subtasks). The colony's workers pick it up
+        on their next claim cycle.
+        """
+        cn = (colony_name or "").strip()
+        if not _COLONY_NAME_RE.match(cn):
+            return json.dumps(
+                {"error": "colony_name must be lowercase alphanumeric with underscores"}
+            )
+
+        from pathlib import Path as _Path
+
+        from framework.host.progress_db import (
+            enqueue_task as _enqueue_task,
+            ensure_progress_db as _ensure_db,
+        )
+
+        colony_dir = _Path.home() / ".hive" / "colonies" / cn
+        if not colony_dir.is_dir():
+            return json.dumps({"error": f"colony '{cn}' not found"})
+
+        try:
+            db_path = await asyncio.to_thread(_ensure_db, colony_dir)
+            task_id = await asyncio.to_thread(
+                _enqueue_task,
+                db_path,
+                goal,
+                steps=steps,
+                sop_items=sop_items,
+                payload=payload,
+                priority=priority,
+                parent_task_id=parent_task_id,
+            )
+        except Exception as e:
+            logger.exception("enqueue_task: failed to insert row")
+            return json.dumps({"error": f"enqueue_task failed: {e}"})
+
+        return json.dumps(
+            {
+                "status": "enqueued",
+                "colony_name": cn,
+                "task_id": task_id,
+                "db_path": str(db_path),
+            }
+        )
+
+    _enqueue_task_tool = Tool(
+        name="enqueue_task",
+        description=(
+            "Append a single task to an existing colony's progress.db "
+            "queue. Use this after create_colony when more work needs "
+            "to be fanned out — e.g. a webhook fired, the user asked "
+            "for a follow-up run, or a worker spawned a subtask. The "
+            "colony's workers pick it up on their next claim cycle "
+            "(atomic UPDATE … WHERE status='pending'). For bulk "
+            "authoring at colony creation time, pass the 'tasks' "
+            "array to create_colony instead."
+        ),
+        parameters={
+            "type": "object",
+            "properties": {
+                "colony_name": {
+                    "type": "string",
+                    "description": "Target colony name (lowercase + underscores).",
+                },
+                "goal": {
+                    "type": "string",
+                    "description": (
+                        "Human-readable task description. Self-contained — "
+                        "the worker has no context beyond this string plus "
+                        "any steps/sop_items/payload you attach."
+                    ),
+                },
+                "steps": {
+                    "type": "array",
+                    "description": (
+                        "Optional ordered subtasks the worker should "
+                        "check off as it executes. Each step needs a "
+                        "'title'; optional 'detail' for longer "
+                        "instructions."
+                    ),
+                    "items": {
+                        "type": "object",
+                        "properties": {
+                            "title": {"type": "string"},
+                            "detail": {"type": "string"},
+                        },
+                        "required": ["title"],
+                    },
+                },
+                "sop_items": {
+                    "type": "array",
+                    "description": (
+                        "Optional hard-gate checklist items the worker "
+                        "MUST address before marking the task done. "
+                        "Each item needs a 'key' (slug) and "
+                        "'description'; 'required' defaults to true."
+                    ),
+                    "items": {
+                        "type": "object",
+                        "properties": {
+                            "key": {"type": "string"},
+                            "description": {"type": "string"},
+                            "required": {"type": "boolean"},
+                        },
+                        "required": ["key", "description"],
+                    },
+                },
+                "payload": {
+                    "description": (
+                        "Optional task-specific parameters. Stored as "
+                        "JSON in the 'payload' column."
+                    ),
+                },
+                "priority": {
+                    "type": "integer",
+                    "description": "Higher values run first. Default 0.",
+                },
+                "parent_task_id": {
+                    "type": "string",
+                    "description": (
+                        "Optional reference to an existing task this "
+                        "one was spawned from (audit only; no blocking "
+                        "dependency resolver today)."
+                    ),
+                },
+            },
+            "required": ["colony_name", "goal"],
+        },
+    )
+    registry.register(
+        "enqueue_task",
+        _enqueue_task_tool,
+        lambda inputs: enqueue_task_tool(**inputs),
+    )
+    tools_registered += 1
+
    # --- switch_to_reviewing ----------------------------------------------------

    async def switch_to_reviewing_tool() -> str:
@@ -2969,7 +3417,8 @@ def register_queen_lifecycle_tools(
        if preamble.get("pending_question"):
            result["pending_question"] = preamble["pending_question"]

-        result["agent_idle_seconds"] = round(runtime.agent_idle_seconds, 1)
+        _idle = runtime.agent_idle_seconds
+        result["agent_idle_seconds"] = round(_idle, 1) if _idle != float("inf") else -1

        for key in ("current_node", "current_iteration"):
            if key in preamble:
@@ -3713,6 +4162,33 @@ def register_queen_lifecycle_tools(
                task,
            )

+        # Concurrency budget check — mirrors run_parallel_workers so a
+        # queen in a loop can't silently exceed max_concurrent_workers
+        # by hammering run_agent_with_input. Per-call count is 1, so
+        # the check is ``active + 1 > max_concurrent``.
+        colony_cfg = getattr(colony, "_config", None) or getattr(colony, "config", None)
+        max_concurrent = getattr(colony_cfg, "max_concurrent_workers", None)
+        if max_concurrent and max_concurrent > 0:
+            active = 0
+            try:
+                workers = getattr(colony, "_workers", {}) or {}
+                for w in workers.values():
+                    handle = getattr(w, "_task_handle", None)
+                    if handle is not None and not handle.done():
+                        active += 1
+            except Exception:
+                active = 0
+            if active + 1 > max_concurrent:
+                return json.dumps(
+                    {
+                        "error": (
+                            f"run_agent_with_input would exceed max_concurrent_workers "
+                            f"({active} active + 1 new > {max_concurrent}). "
+                            "Wait for an existing worker to finish or stop one."
+                        )
+                    }
+                )
+
        try:
            # Pre-flight: compute the set of tools whose credentials are
            # NOT currently available, and resync MCP servers. We do NOT
@@ -3723,58 +4199,9 @@ def register_queen_lifecycle_tools(
            # to block the whole spawn with a CredentialError; the fix
            # is to treat unset credentials as "drop these tools" rather
            # than "abort the worker".
-            #
-            # Note: the MCP admission gate (_build_mcp_admission_gate in
-            # tool_registry.py) already filters MCP tools at registration
-            # time. This preflight covers the non-MCP path — tools.py
-            # discoveries via discover_from_module — which has no
-            # credential gate of its own.
-            loop = asyncio.get_running_loop()
-            unavailable_tools: set[str] = set()
-
-            async def _preflight():
-                nonlocal unavailable_tools
-                try:
-                    from framework.credentials.validation import compute_unavailable_tools
-
-                    drop, messages = await loop.run_in_executor(
-                        None,
-                        lambda: compute_unavailable_tools(legacy.graph.nodes),
-                    )
-                    unavailable_tools = drop
-                    if drop:
-                        logger.warning(
-                            "run_agent_with_input: dropping %d tool(s) with "
-                            "unavailable credentials from worker spawn: %s",
-                            len(drop),
-                            "; ".join(messages),
-                        )
-                except Exception as exc:
-                    # Validation itself failing (not a credential failure —
-                    # a code error in the validator) should not block the
-                    # spawn. Log and proceed as if nothing was dropped.
-                    logger.warning(
-                        "compute_unavailable_tools raised, proceeding without credential-based tool filtering: %s",
-                        exc,
-                    )
-
-                runner = getattr(session, "runner", None)
-                if runner:
-                    try:
-                        await loop.run_in_executor(
-                            None,
-                            lambda: runner._tool_registry.resync_mcp_servers_if_needed(),
-                        )
-                    except Exception as e:
-                        logger.warning("MCP resync failed: %s", e)
-
-            try:
-                await asyncio.wait_for(_preflight(), timeout=_START_PREFLIGHT_TIMEOUT)
-            except TimeoutError:
-                logger.warning(
-                    "run_agent_with_input preflight timed out after %ds — proceeding",
-                    _START_PREFLIGHT_TIMEOUT,
-                )
+            unavailable_tools = await _preflight_credentials(
+                legacy, tool_label="run_agent_with_input"
+            )

            # Build a per-spawn AgentSpec that mirrors the loaded
            # worker's entry-node identity. This is what makes the
@@ -3848,10 +4275,66 @@ def register_queen_lifecycle_tools(
                        dropped_count,
                    )

+            # Colony progress tracker wiring: if the loaded worker
+            # lives under ~/.hive/colonies/{name}/ and has a
+            # progress.db, inject db_path + colony_id into input_data
+            # so the spawned worker sees them in its first user
+            # message and can use the hive.colony-progress-tracker
+            # skill to claim tasks from the queue.
+            _spawn_input_data: dict[str, Any] = {"user_request": task}
+            _worker_path = getattr(session, "worker_path", None)
+            if _worker_path:
+                from pathlib import Path as _Path
+
+                _worker_path_p = _Path(_worker_path)
+                _progress_db = _worker_path_p / "data" / "progress.db"
+                if _progress_db.exists():
+                    _spawn_input_data["db_path"] = str(_progress_db.resolve())
+                    _spawn_input_data["colony_id"] = _worker_path_p.name
+                    logger.info(
+                        "run_agent_with_input: attached progress_db context "
+                        "(colony_id=%s, db_path=%s)",
+                        _worker_path_p.name,
+                        _progress_db,
+                    )
+
+                    # Phase 2: enqueue the task into progress.db BEFORE
+                    # spawning so the worker has a concrete row to
+                    # claim. Without this the queue is empty and the
+                    # worker's claim UPDATE affects zero rows, so it
+                    # silently falls back to executing from the chat
+                    # spawn message. Any enqueue failure is logged and
+                    # the spawn proceeds without a pinned task_id
+                    # (degrades to the pre-Phase-2 behavior).
+                    try:
+                        from framework.host.progress_db import (
+                            enqueue_task as _enqueue_task_fn,
+                        )
+
+                        _task_id = await asyncio.to_thread(
+                            _enqueue_task_fn,
+                            _progress_db,
+                            task,
+                            source="run_agent_with_input",
+                        )
+                        _spawn_input_data["task_id"] = _task_id
+                        logger.info(
+                            "run_agent_with_input: enqueued task %s into %s",
+                            _task_id,
+                            _progress_db,
+                        )
+                    except Exception as _enqueue_exc:
+                        logger.warning(
+                            "run_agent_with_input: failed to enqueue task "
+                            "into progress.db (spawn proceeding without "
+                            "pinned task_id): %s",
+                            _enqueue_exc,
+                        )
+
            worker_ids = await colony.spawn(
                task=task,
                count=1,
-                input_data={"user_request": task},
+                input_data=_spawn_input_data,
                agent_spec=spawn_spec,
                tools=spawn_tools,
                tool_executor=spawn_tool_executor,
@@ -87,9 +87,25 @@ export const sessionsApi = {
  colonies: (sessionId: string) =>
    api.get<{ colonies: string[] }>(`/sessions/${sessionId}/colonies`),

-  /** Get persisted eventbus log for a session (works for cold sessions — used for full UI replay). */
-  eventsHistory: (sessionId: string) =>
-    api.get<{ events: AgentEvent[]; session_id: string }>(`/sessions/${sessionId}/events/history`),
+  /** Get persisted eventbus log for a session (works for cold sessions — used for full UI replay).
+   *
+   * Returns the TAIL of the event log. Default limit 2000 (server
+   * clamps to [1, 10000]); older events get dropped and
+   * ``truncated: true`` is set so the UI can show an indicator.
+   */
+  eventsHistory: (sessionId: string, limit?: number) =>
+    api.get<{
+      events: AgentEvent[];
+      session_id: string;
+      total: number;
+      returned: number;
+      truncated: boolean;
+      limit: number;
+    }>(
+      `/sessions/${sessionId}/events/history${
+        limit ? `?limit=${limit}` : ""
+      }`,
+    ),

  /** Open the session's data folder in the OS file manager. */
  revealFolder: (sessionId: string) =>
@@ -31,6 +31,15 @@ export default function AppHeader({ onOpenQueenProfile }: AppHeaderProps) {
    const colonyId = colonyMatch[1];
    const colony = colonies.find((c) => c.id === colonyId);
    title = colony?.name ?? colonyId;
+    // Show queen profile button when the colony has a linked queen profile
+    if (colony?.queenProfileId) {
+      const profile = queenProfiles.find((q) => q.id === colony.queenProfileId);
+      if (profile) {
+        queenIdForProfile = profile.id;
+        queenTitle = profile.title ?? null;
+        icon = <Crown className="w-4 h-4 text-primary" />;
+      }
+    }
  } else if (queenMatch) {
    const queenId = queenMatch[1];
    const profile = queenProfiles.find((q) => q.id === queenId);
@@ -10,6 +10,8 @@ import {
  Paperclip,
  X,
 } from "lucide-react";
+import WorkerRunBubble from "@/components/WorkerRunBubble";
+import type { WorkerRunGroup } from "@/components/WorkerRunBubble";

 export interface ImageContent {
  type: "image_url";
@@ -25,6 +27,8 @@ export interface ContextUsageEntry {
 import MarkdownContent from "@/components/MarkdownContent";
 import QuestionWidget from "@/components/QuestionWidget";
 import MultiQuestionWidget from "@/components/MultiQuestionWidget";
+import { useColony } from "@/context/ColonyContext";
+import { useQueenProfile } from "@/context/QueenProfileContext";
 import ParallelSubagentBubble, {
  type SubagentGroup,
 } from "@/components/ParallelSubagentBubble";
@@ -60,6 +64,12 @@ export interface ChatMessage {
  nodeId?: string;
  /** Backend execution_id for this message */
  executionId?: string;
+  /** Backend stream_id — the per-worker identity used for grouping
+   *  parallel-spawn workers into their own stacked WorkerRunBubble.
+   *  "queen" for queen messages, "worker" for the single loaded
+   *  worker (run_agent_with_input), or "worker:{uuid}" for each
+   *  parallel worker spawned via run_parallel_workers. */
+  streamId?: string;
  /** True when the message was sent while the queen was still processing */
  queued?: boolean;
 }
@@ -124,14 +134,14 @@ const TOOL_HEX = [
  "#e5a820", // sunflower
 ];

-function toolHex(name: string): string {
+export function toolHex(name: string): string {
  let hash = 0;
  for (let i = 0; i < name.length; i++)
    hash = (hash * 31 + name.charCodeAt(i)) | 0;
  return TOOL_HEX[Math.abs(hash) % TOOL_HEX.length];
 }

-function ToolActivityRow({ content }: { content: string }) {
+export function ToolActivityRow({ content }: { content: string }) {
  let tools: { name: string; done: boolean }[] = [];
  try {
    const parsed = JSON.parse(content);
@@ -336,6 +346,15 @@ function InlineAskUserBubble({
  const color = getColor(msg.agent, msg.role);
  const thread = msg.thread || activeThread;

+  const { queenProfiles } = useColony();
+  const { openQueenProfile } = useQueenProfile();
+  const queenProfileId = isQueen
+    ? queenProfiles.find((q) => q.name === msg.agent)?.id ?? null
+    : null;
+  const handleQueenClick = queenProfileId
+    ? () => openQueenProfile(queenProfileId)
+    : undefined;
+
  const handleSingle = (answer: string) => {
    setState("submitted");
    onSend(answer, thread);
@@ -355,12 +374,14 @@ function InlineAskUserBubble({
  return (
    <div className="flex gap-3">
      <div
-        className={`flex-shrink-0 ${isQueen ? "w-9 h-9" : "w-7 h-7"} rounded-xl flex items-center justify-center`}
+        className={`flex-shrink-0 ${isQueen ? "w-9 h-9" : "w-7 h-7"} rounded-xl flex items-center justify-center${handleQueenClick ? " cursor-pointer hover:opacity-80 transition-opacity" : ""}`}
        style={{
          backgroundColor: `${color}18`,
          border: `1.5px solid ${color}35`,
          boxShadow: isQueen ? `0 0 12px ${color}20` : undefined,
        }}
+        onClick={handleQueenClick}
+        title={handleQueenClick ? `View ${msg.agent}'s profile` : undefined}
      >
        {isQueen ? (
          <Crown className="w-4 h-4" style={{ color }} />
@@ -373,8 +394,9 @@ function InlineAskUserBubble({
      >
        <div className="flex items-center gap-2 mb-1">
          <span
-            className={`font-medium ${isQueen ? "text-sm" : "text-xs"}`}
+            className={`font-medium ${isQueen ? "text-sm" : "text-xs"}${handleQueenClick ? " cursor-pointer hover:underline" : ""}`}
            style={{ color }}
+            onClick={handleQueenClick}
          >
            {msg.agent}
          </span>
@@ -435,6 +457,13 @@ const MessageBubble = memo(
    const isQueen = msg.role === "queen";
    const color = getColor(msg.agent, msg.role);

+    // Resolve queen profile ID so clicking avatar/name opens the profile panel
+    const { queenProfiles } = useColony();
+    const { openQueenProfile } = useQueenProfile();
+    const queenProfileId = isQueen
+      ? queenProfiles.find((q) => q.name === msg.agent)?.id ?? null
+      : null;
+
    if (msg.type === "run_divider") {
      return (
        <div className="flex items-center gap-3 py-2 my-1">
@@ -529,15 +558,21 @@ const MessageBubble = memo(
      );
    }

+    const handleQueenClick = queenProfileId
+      ? () => openQueenProfile(queenProfileId)
+      : undefined;
+
    return (
      <div className="flex gap-3">
        <div
-          className={`flex-shrink-0 ${isQueen ? "w-9 h-9" : "w-7 h-7"} rounded-xl flex items-center justify-center`}
+          className={`flex-shrink-0 ${isQueen ? "w-9 h-9" : "w-7 h-7"} rounded-xl flex items-center justify-center${handleQueenClick ? " cursor-pointer hover:opacity-80 transition-opacity" : ""}`}
          style={{
            backgroundColor: `${color}18`,
            border: `1.5px solid ${color}35`,
            boxShadow: isQueen ? `0 0 12px ${color}20` : undefined,
          }}
+          onClick={handleQueenClick}
+          title={handleQueenClick ? `View ${msg.agent}'s profile` : undefined}
        >
          {isQueen ? (
            <Crown className="w-4 h-4" style={{ color }} />
@@ -550,8 +585,9 @@ const MessageBubble = memo(
        >
          <div className="flex items-center gap-2 mb-1">
            <span
-              className={`font-medium ${isQueen ? "text-sm" : "text-xs"}`}
+              className={`font-medium ${isQueen ? "text-sm" : "text-xs"}${handleQueenClick ? " cursor-pointer hover:underline" : ""}`}
              style={{ color }}
+              onClick={handleQueenClick}
            >
              {msg.agent}
            </span>
@@ -665,14 +701,157 @@ export default function ChatPanel({
  type RenderItem =
    | { kind: "message"; msg: ChatMessage }
    | { kind: "parallel"; groupId: string; groups: SubagentGroup[] }
+    | {
+        kind: "worker_run";
+        runId: string;
+        group: WorkerRunGroup;
+        /** Optional short label shown next to the "Worker" badge.
+         *  Only set when there are multiple parallel workers in the
+         *  same run span (so users can tell them apart). */
+        label?: string;
+      }
    | { kind: "day_divider"; key: string; createdAt: number };

+  /** Derive a short label from a parallel-worker stream id.
+   *  `worker:abcdef12-3456-...` → `abcdef12` (first 8 chars of the
+   *  uuid after the `worker:` prefix). Falls back to the first
+   *  message's nodeId when the streamId isn't the expected shape. */
+  function deriveWorkerLabel(
+    streamKey: string,
+    msgs: ChatMessage[],
+  ): string {
+    if (streamKey.startsWith("worker:")) {
+      const suffix = streamKey.slice("worker:".length);
+      // sessions are `session_YYYYMMDD_HHMMSS_<8-hex>` — show the
+      // trailing hex if present, else first 8 chars of the suffix.
+      const tail = suffix.match(/_[0-9a-f]{6,}$/i)?.[0]?.slice(1);
+      return tail ? tail.slice(0, 8) : suffix.slice(0, 8);
+    }
+    const nid = msgs.find((m) => m.nodeId)?.nodeId;
+    return nid || streamKey;
+  }
+
  const renderItems = useMemo<RenderItem[]>(() => {
    const items: RenderItem[] = [];
    let i = 0;
    while (i < threadMessages.length) {
      const msg = threadMessages[i];
      const isSubagent = msg.nodeId?.includes(":subagent:");
+
+      // Worker run grouping: collect consecutive WORKER-role
+      // messages (and worker tool_status pills) into a collapsible
+      // card. Queen tool_status pills (``role === "queen"``) are
+      // deliberately excluded — the queen's own tool calls are part
+      // of the queen↔user conversation and should render inline as
+      // ToolActivityRows, not fold into a "Worker" bubble. Without
+      // this guard, every queen run_command / read_file / etc. shows
+      // up under a misleading "Worker" label in the DM.
+      const isWorkerCandidate =
+        msg.role === "worker" ||
+        (msg.type === "tool_status" && msg.role !== "queen");
+      if (
+        !isSubagent &&
+        isWorkerCandidate &&
+        msg.type !== "user" &&
+        msg.type !== "run_divider"
+      ) {
+        const workerMsgs: ChatMessage[] = [];
+        const firstWorkerMsg = msg;
+
+        while (i < threadMessages.length) {
+          const m = threadMessages[i];
+
+          // Hard boundary — stop the worker run group
+          if (m.type === "user" || m.type === "run_divider") break;
+          // Queen message with real text — boundary (queen is talking
+          // to the user, not just emitting a tool)
+          if (m.role === "queen" && m.content?.trim() && !m.type) break;
+          // Queen tool_status — NOT a worker activity, don't bucket
+          // it. Break so the grouping stops and the queen pill
+          // renders inline.
+          if (m.type === "tool_status" && m.role === "queen") break;
+          // Subagent message — different group type, stop here
+          if (m.nodeId?.includes(":subagent:")) break;
+
+          // Worker text messages and worker tool_status belong to the run
+          if (
+            m.role === "worker" ||
+            (m.type === "tool_status" && m.role !== "queen")
+          ) {
+            workerMsgs.push(m);
+            i++;
+            continue;
+          }
+
+          // System message or other — include in the worker run
+          // group to preserve ordering (they'll render inside the
+          // expanded view)
+          workerMsgs.push(m);
+          i++;
+        }
+
+        if (workerMsgs.length > 0) {
+          // Parallel fan-out detection: if any message in this span
+          // is tagged with a parallel-worker streamId (``worker:{uuid}``),
+          // split the span by streamId and emit one ``worker_run``
+          // per worker — they render as stacked independent
+          // ``WorkerRunBubble``s. Un-tagged legacy messages and the
+          // single-worker ``streamId="worker"`` case fall through to
+          // the existing single-bubble behavior.
+          const hasParallel = workerMsgs.some(
+            (m) => !!m.streamId && /^worker:./.test(m.streamId),
+          );
+
+          if (hasParallel) {
+            const buckets = new Map<
+              string,
+              { messages: ChatMessage[]; firstAt: number }
+            >();
+            // Messages with no streamId (system notes, orphans from
+            // old restore) attach to the most-recent keyed message's
+            // bucket so chronology is preserved.
+            let currentKey: string | null = null;
+            for (const m of workerMsgs) {
+              const key =
+                m.streamId && m.streamId.length > 0
+                  ? m.streamId
+                  : currentKey;
+              if (!key) continue;
+              if (m.streamId && m.streamId.length > 0) currentKey = m.streamId;
+              let bucket = buckets.get(key);
+              if (!bucket) {
+                bucket = { messages: [], firstAt: m.createdAt ?? 0 };
+                buckets.set(key, bucket);
+              }
+              bucket.messages.push(m);
+              bucket.firstAt = Math.min(
+                bucket.firstAt,
+                m.createdAt ?? Number.POSITIVE_INFINITY,
+              );
+            }
+
+            const sorted = Array.from(buckets.entries()).sort(
+              ([, a], [, b]) => a.firstAt - b.firstAt,
+            );
+            for (const [streamKey, { messages: bucketMsgs }] of sorted) {
+              items.push({
+                kind: "worker_run",
+                runId: `wrun-${firstWorkerMsg.id}-${streamKey}`,
+                group: { messages: bucketMsgs },
+                label: deriveWorkerLabel(streamKey, bucketMsgs),
+              });
+            }
+          } else {
+            items.push({
+              kind: "worker_run",
+              runId: `wrun-${firstWorkerMsg.id}`,
+              group: { messages: workerMsgs },
+            });
+          }
+        }
+        continue;
+      }
+
      if (!isSubagent) {
        items.push({ kind: "message", msg });
        i++;
@@ -872,6 +1051,17 @@ export default function ChatPanel({
              </div>
            );
          }
+          if (item.kind === "worker_run") {
+            return (
+              <div key={item.runId}>
+                <WorkerRunBubble
+                  runId={item.runId}
+                  group={item.group}
+                  label={item.label}
+                />
+              </div>
+            );
+          }
          const msg = item.msg;
          // Detect misformatted ask_user payloads emitted as plain text and
          // substitute the nicer widget-based bubble.  Only inspect regular
@@ -1,4 +1,4 @@
-import { useState, useEffect } from "react";
+import { useState, useEffect, useCallback, useRef } from "react";
 import { NavLink, useLocation, useNavigate } from "react-router-dom";
 import {
  X,
@@ -46,8 +46,49 @@ export default function QueenProfilePanel({
  const name = profile?.name ?? summary?.name ?? "Queen";
  const title = profile?.title ?? summary?.title ?? "";

+  // ── Resizable width ──────────────────────────────────────────────────
+  const MIN_WIDTH = 280;
+  const MAX_WIDTH = 600;
+  const [width, setWidth] = useState(340);
+  const dragging = useRef(false);
+  const startX = useRef(0);
+  const startWidth = useRef(0);
+
+  const onDragStart = useCallback((e: React.MouseEvent) => {
+    e.preventDefault();
+    dragging.current = true;
+    startX.current = e.clientX;
+    startWidth.current = width;
+
+    const onMove = (ev: MouseEvent) => {
+      if (!dragging.current) return;
+      // Panel is on the right, so dragging left (negative delta) grows it
+      const delta = startX.current - ev.clientX;
+      setWidth(Math.min(MAX_WIDTH, Math.max(MIN_WIDTH, startWidth.current + delta)));
+    };
+    const onUp = () => {
+      dragging.current = false;
+      document.removeEventListener("mousemove", onMove);
+      document.removeEventListener("mouseup", onUp);
+      document.body.style.cursor = "";
+      document.body.style.userSelect = "";
+    };
+    document.addEventListener("mousemove", onMove);
+    document.addEventListener("mouseup", onUp);
+    document.body.style.cursor = "col-resize";
+    document.body.style.userSelect = "none";
+  }, [width]);
+
  return (
-    <aside className="w-[340px] flex-shrink-0 border-l border-border/60 bg-card overflow-y-auto">
+    <aside
+      className="flex-shrink-0 border-l border-border/60 bg-card overflow-y-auto relative"
+      style={{ width }}
+    >
+      {/* Drag handle */}
+      <div
+        onMouseDown={onDragStart}
+        className="absolute top-0 left-0 w-1 h-full cursor-col-resize hover:bg-primary/30 active:bg-primary/50 transition-colors z-10"
+      />
      {/* Header */}
      <div className="flex items-center justify-between px-5 py-3.5 border-b border-border/60">
        <div className="flex items-center gap-2 text-sm font-semibold text-foreground">
@@ -1,4 +1,4 @@
-import { useState } from "react";
+import { useState, useCallback, useRef } from "react";
 import { useNavigate } from "react-router-dom";
 import {
  ChevronLeft,
@@ -22,6 +22,38 @@ export default function Sidebar() {
  const [coloniesExpanded, setColoniesExpanded] = useState(true);
  const [queensExpanded, setQueensExpanded] = useState(true);

+  // ── Resizable width ──────────────────────────────────────────────────
+  const MIN_WIDTH = 180;
+  const MAX_WIDTH = 400;
+  const [width, setWidth] = useState(240);
+  const dragging = useRef(false);
+  const startX = useRef(0);
+  const startWidth = useRef(0);
+
+  const onDragStart = useCallback((e: React.MouseEvent) => {
+    e.preventDefault();
+    dragging.current = true;
+    startX.current = e.clientX;
+    startWidth.current = width;
+
+    const onMove = (ev: MouseEvent) => {
+      if (!dragging.current) return;
+      const delta = ev.clientX - startX.current;
+      setWidth(Math.min(MAX_WIDTH, Math.max(MIN_WIDTH, startWidth.current + delta)));
+    };
+    const onUp = () => {
+      dragging.current = false;
+      document.removeEventListener("mousemove", onMove);
+      document.removeEventListener("mouseup", onUp);
+      document.body.style.cursor = "";
+      document.body.style.userSelect = "";
+    };
+    document.addEventListener("mousemove", onMove);
+    document.addEventListener("mouseup", onUp);
+    document.body.style.cursor = "col-resize";
+    document.body.style.userSelect = "none";
+  }, [width]);
+
  if (sidebarCollapsed) {
    return (
      <aside className="w-[52px] flex-shrink-0 flex flex-col bg-sidebar-bg border-r border-sidebar-border h-full">
@@ -50,7 +82,15 @@ export default function Sidebar() {
  }

  return (
-    <aside className="w-[240px] flex-shrink-0 flex flex-col bg-sidebar-bg border-r border-sidebar-border h-full">
+    <aside
+      className="flex-shrink-0 flex flex-col bg-sidebar-bg border-r border-sidebar-border h-full relative"
+      style={{ width }}
+    >
+      {/* Drag handle on right edge */}
+      <div
+        onMouseDown={onDragStart}
+        className="absolute top-0 right-0 w-1 h-full cursor-col-resize hover:bg-primary/30 active:bg-primary/50 transition-colors z-10"
+      />
      {/* Header */}
      <div className="h-12 flex items-center justify-between px-4 border-b border-border/60">
        <button
@@ -0,0 +1,297 @@
+import { memo, useState, useRef, useEffect } from "react";
+import { ChevronDown, ChevronUp, Cpu } from "lucide-react";
+import type { ChatMessage } from "@/components/ChatPanel";
+import { ToolActivityRow } from "@/components/ChatPanel";
+import MarkdownContent from "@/components/MarkdownContent";
+
+const workerColor = "hsl(220,60%,55%)";
+
+export interface WorkerRunGroup {
+  messages: ChatMessage[];
+}
+
+interface WorkerRunBubbleProps {
+  runId: string;
+  group: WorkerRunGroup;
+  /** Short identifier shown next to the "Worker" badge. Populated
+   *  only when the parent grouping has multiple parallel workers
+   *  in the same run span, so N stacked bubbles can be told apart
+   *  at a glance. Omitted for single-worker runs. */
+  label?: string;
+}
+
+/** Parse a tool_status JSON blob into a list of tool entries. */
+function parseToolStatus(content: string): { name: string; done: boolean }[] {
+  try {
+    const parsed = JSON.parse(content);
+    return parsed.tools || [];
+  } catch {
+    return [];
+  }
+}
+
+/**
+ * Strip markdown formatting so the collapsed preview is a single
+ * readable line instead of a scatter of code pills.
+ *
+ * MarkdownContent turns every backtick-wrapped fragment into its own
+ * visually-boxed inline-code pill. In a worker text message those
+ * pills can be coordinates, UUIDs, selectors, tool names — the
+ * collapsed preview ends up looking like confetti. We just want the
+ * plain prose, one line, truncated.
+ */
+function stripMarkdownToPreview(s: string, maxLen = 160): string {
+  const cleaned = s
+    .replace(/```[\s\S]*?```/g, " [code] ") // fenced code blocks
+    .replace(/`([^`]+)`/g, "$1") // inline code — keep the text, drop the backticks
+    .replace(/\*\*([^*]+)\*\*/g, "$1") // bold
+    .replace(/\*([^*]+)\*/g, "$1") // italic
+    .replace(/~~([^~]+)~~/g, "$1") // strikethrough
+    .replace(/\[([^\]]+)\]\([^)]+\)/g, "$1") // links -> link text
+    .replace(/^#{1,6}\s+/gm, "") // ATX headers
+    .replace(/^[>\-*+]\s+/gm, "") // blockquote/list markers
+    .replace(/\s+/g, " ") // collapse whitespace
+    .trim();
+  if (cleaned.length <= maxLen) return cleaned;
+  return cleaned.slice(0, maxLen - 1).trimEnd() + "\u2026";
+}
+
+/**
+ * Collapsible card that groups all worker messages from a single run
+ * (the span between the queen's `run_agent_with_input` call and the
+ * worker's final `set_output`/`escalate`/idle).
+ *
+ * Collapsed (default): header bar with tool count + latest text snippet.
+ * Expanded: scrollable list of every message and tool status in order.
+ */
+const WorkerRunBubble = memo(
+  function WorkerRunBubble({ group, label }: WorkerRunBubbleProps) {
+    const [expanded, setExpanded] = useState(false);
+    const bodyRef = useRef<HTMLDivElement>(null);
+
+    // Separate text messages from tool status
+    const textMsgs = group.messages.filter(
+      (m) => m.type !== "tool_status" && m.content?.trim()
+    );
+    const toolStatusMsgs = group.messages.filter(
+      (m) => m.type === "tool_status"
+    );
+
+    // Count total tool calls from tool_status messages
+    const allTools: { name: string; done: boolean }[] = [];
+    for (const m of toolStatusMsgs) {
+      for (const t of parseToolStatus(m.content)) {
+        allTools.push(t);
+      }
+    }
+    const toolCount = allTools.length;
+    const doneCount = allTools.filter((t) => t.done).length;
+    const isFinished = toolCount > 0 && doneCount === toolCount;
+
+    // Latest text from the worker (the last non-empty text message)
+    const latestText = textMsgs.length > 0
+      ? textMsgs[textMsgs.length - 1].content
+      : "";
+
+    // Status label. We prefer concrete states over the vague
+    // "starting" fallback — if the worker has emitted any text or
+    // any tool, it's past the startup phase.
+    const statusLabel = isFinished
+      ? "done"
+      : toolCount > 0
+        ? "running"
+        : textMsgs.length > 0
+          ? "active"
+          : "starting";
+
+    // Unique tool names for the summary (deduplicated, ordered by first appearance)
+    const uniqueToolNames: string[] = [];
+    const seen = new Set<string>();
+    for (const t of allTools) {
+      if (!seen.has(t.name)) {
+        seen.add(t.name);
+        uniqueToolNames.push(t.name);
+      }
+    }
+
+    // Auto-scroll body when expanded
+    useEffect(() => {
+      if (expanded && bodyRef.current) {
+        bodyRef.current.scrollTop = bodyRef.current.scrollHeight;
+      }
+    }, [expanded, group.messages.length]);
+
+    return (
+      <div className="flex gap-3">
+        {/* Left icon */}
+        <div
+          className="flex-shrink-0 w-7 h-7 rounded-xl flex items-center justify-center mt-1"
+          style={{
+            backgroundColor: `${workerColor}18`,
+            border: `1.5px solid ${workerColor}35`,
+          }}
+        >
+          <Cpu className="w-3.5 h-3.5" style={{ color: workerColor }} />
+        </div>
+
+        <div className="flex-1 min-w-0 max-w-[90%]">
+          {/* Clickable header */}
+          <button
+            onClick={() => setExpanded((v) => !v)}
+            className="w-full flex items-center gap-2 mb-1 text-left cursor-pointer group"
+          >
+            <span className="font-medium text-xs" style={{ color: workerColor }}>
+              Worker
+            </span>
+            {label && (
+              <span className="text-[10px] font-mono text-muted-foreground/80 tabular-nums">
+                {label}
+              </span>
+            )}
+            <span
+              className={`text-[10px] font-medium px-1.5 py-0.5 rounded-md ${
+                isFinished
+                  ? "bg-green-100 text-green-700 dark:bg-green-900/30 dark:text-green-400"
+                  : "bg-muted text-muted-foreground"
+              }`}
+            >
+              {statusLabel}
+            </span>
+            {toolCount > 0 && (
+              <span className="text-[10px] text-muted-foreground tabular-nums">
+                {doneCount}/{toolCount} tools
+              </span>
+            )}
+            <span className="ml-auto text-muted-foreground/60 group-hover:text-muted-foreground transition-colors p-0.5 rounded">
+              {expanded ? (
+                <ChevronUp className="w-3.5 h-3.5" />
+              ) : (
+                <ChevronDown className="w-3.5 h-3.5" />
+              )}
+            </span>
+          </button>
+
+          {/* Card body — use Tailwind theme tokens so dark mode
+              gets a proper dark background instead of a glaring
+              near-white hardcoded hsl. Finished runs get a subtle
+              green tint that also respects theme. */}
+          <div
+            className={`rounded-2xl rounded-tl-md overflow-hidden border ${
+              isFinished
+                ? "border-green-300/50 bg-green-50/50 dark:border-green-900/40 dark:bg-green-950/20"
+                : "border-border bg-muted/60"
+            }`}
+          >
+            {/* Collapsed: single-line plain-text preview of the
+                latest worker text, OR a tool-name chain when the
+                worker hasn't emitted any prose yet. MarkdownContent
+                is intentionally NOT used here — its inline-code
+                rendering turns every backtick-wrapped fragment into
+                a floating pill, which wrecks the preview. */}
+            {!expanded && (
+              <div className="px-4 py-2.5 text-sm text-muted-foreground">
+                {latestText ? (
+                  <div className="truncate">
+                    {stripMarkdownToPreview(latestText)}
+                  </div>
+                ) : uniqueToolNames.length > 0 ? (
+                  <span className="text-xs font-mono truncate block">
+                    {uniqueToolNames.slice(0, 5).join(" \u2192 ")}
+                    {uniqueToolNames.length > 5 &&
+                      ` + ${uniqueToolNames.length - 5} more`}
+                  </span>
+                ) : (
+                  <span className="text-xs text-muted-foreground/60 italic">
+                    {"waiting for first action\u2026"}
+                  </span>
+                )}
+              </div>
+            )}
+
+            {/* Expanded: chronological stream with tool bursts
+                coalesced into a single ToolActivityRow each.
+                Consecutive tool_status messages (no text between)
+                collapse to the LATEST snapshot — each snapshot is
+                cumulative within its turn, so the latest one tells
+                the whole story for that burst. Text messages break
+                the burst and render as markdown. */}
+            {expanded && (
+              <div
+                ref={bodyRef}
+                className="max-h-[400px] overflow-y-auto px-4 py-3 space-y-3"
+              >
+                {(() => {
+                  type RenderRow =
+                    | { kind: "tools"; content: string; key: string }
+                    | { kind: "text"; msg: ChatMessage; key: string };
+                  const rows: RenderRow[] = [];
+                  let pendingTool: { content: string; id: string } | null = null;
+                  const flushTool = () => {
+                    if (pendingTool) {
+                      rows.push({
+                        kind: "tools",
+                        content: pendingTool.content,
+                        key: `tools-${pendingTool.id}`,
+                      });
+                      pendingTool = null;
+                    }
+                  };
+                  for (let i = 0; i < group.messages.length; i++) {
+                    const m = group.messages[i];
+                    if (m.type === "tool_status") {
+                      // Overwrite — latest snapshot in the burst wins
+                      pendingTool = {
+                        content: m.content,
+                        id: m.id || `ts-${i}`,
+                      };
+                      continue;
+                    }
+                    if (m.content?.trim()) {
+                      flushTool();
+                      rows.push({
+                        kind: "text",
+                        msg: m,
+                        key: m.id || `txt-${i}`,
+                      });
+                    }
+                  }
+                  flushTool();
+
+                  return rows.map((row) => {
+                    if (row.kind === "tools") {
+                      // ToolActivityRow groups by tool name (×N), shows
+                      // running pills (spinner) before done pills (check),
+                      // and uses the per-tool color hash that matches
+                      // the rest of the chat.
+                      return (
+                        <div key={row.key} className="-ml-10">
+                          <ToolActivityRow content={row.content} />
+                        </div>
+                      );
+                    }
+                    return (
+                      <div
+                        key={row.key}
+                        className="text-sm leading-relaxed"
+                      >
+                        <MarkdownContent content={row.msg.content} />
+                      </div>
+                    );
+                  });
+                })()}
+              </div>
+            )}
+          </div>
+        </div>
+      </div>
+    );
+  },
+  (prev, next) =>
+    prev.runId === next.runId &&
+    prev.label === next.label &&
+    prev.group.messages.length === next.group.messages.length &&
+    prev.group.messages[prev.group.messages.length - 1]?.content ===
+      next.group.messages[next.group.messages.length - 1]?.content
+);
+
+export default WorkerRunBubble;
@@ -0,0 +1,31 @@
+import { createContext, useContext, useCallback, type ReactNode } from "react";
+
+interface QueenProfileContextValue {
+  openQueenProfile: (queenId: string) => void;
+}
+
+const QueenProfileContext = createContext<QueenProfileContextValue | null>(null);
+
+export function QueenProfileProvider({
+  onOpen,
+  children,
+}: {
+  onOpen: (queenId: string) => void;
+  children: ReactNode;
+}) {
+  const openQueenProfile = useCallback(
+    (queenId: string) => onOpen(queenId),
+    [onOpen],
+  );
+  return (
+    <QueenProfileContext.Provider value={{ openQueenProfile }}>
+      {children}
+    </QueenProfileContext.Provider>
+  );
+}
+
+export function useQueenProfile() {
+  const ctx = useContext(QueenProfileContext);
+  if (!ctx) throw new Error("useQueenProfile must be used within QueenProfileProvider");
+  return ctx;
+}
@@ -1,10 +1,11 @@
-import { useEffect, useState } from "react";
+import { useEffect, useState, useCallback } from "react";
 import { Outlet, useLocation } from "react-router-dom";
 import Sidebar from "@/components/Sidebar";
 import AppHeader from "@/components/AppHeader";
 import QueenProfilePanel from "@/components/QueenProfilePanel";
 import { ColonyProvider, useColony } from "@/context/ColonyContext";
 import { HeaderActionsProvider } from "@/context/HeaderActionsContext";
+import { QueenProfileProvider } from "@/context/QueenProfileContext";

 export default function AppLayout() {
  return (
@@ -27,26 +28,33 @@ function AppLayoutInner() {
    setOpenQueenId(null);
  }, [location.pathname]);

+  const handleOpenQueenProfile = useCallback(
+    (queenId: string) => setOpenQueenId((prev) => (prev === queenId ? null : queenId)),
+    [],
+  );
+
  return (
-    <div className="flex h-screen bg-background overflow-hidden">
-      <Sidebar />
-      <div className="flex-1 min-w-0 flex flex-col">
-        <AppHeader onOpenQueenProfile={setOpenQueenId} />
-        <div className="flex-1 min-h-0 flex">
-          <main className="flex-1 min-w-0 flex flex-col">
-            <Outlet />
-          </main>
-          {openQueenId && (
-            <QueenProfilePanel
-              queenId={openQueenId}
-              colonies={colonies.filter(
-                (c) => c.queenProfileId === openQueenId,
-              )}
-              onClose={() => setOpenQueenId(null)}
-            />
-          )}
+    <QueenProfileProvider onOpen={handleOpenQueenProfile}>
+      <div className="flex h-screen bg-background overflow-hidden">
+        <Sidebar />
+        <div className="flex-1 min-w-0 flex flex-col">
+          <AppHeader onOpenQueenProfile={handleOpenQueenProfile} />
+          <div className="flex-1 min-h-0 flex">
+            <main className="flex-1 min-w-0 flex flex-col">
+              <Outlet />
+            </main>
+            {openQueenId && (
+              <QueenProfilePanel
+                queenId={openQueenId}
+                colonies={colonies.filter(
+                  (c) => c.queenProfileId === openQueenId,
+                )}
+                onClose={() => setOpenQueenId(null)}
+              />
+            )}
+          </div>
        </div>
      </div>
-    </div>
+    </QueenProfileProvider>
  );
 }
@@ -119,6 +119,7 @@ export function sseEventToChatMessage(
        createdAt,
        nodeId: event.node_id || undefined,
        executionId: event.execution_id || undefined,
+        streamId: event.stream_id || undefined,
      };
    }

@@ -138,6 +139,7 @@ export function sseEventToChatMessage(
        type: "user",
        thread,
        createdAt,
+        streamId: event.stream_id || undefined,
      };
    }

@@ -158,6 +160,7 @@ export function sseEventToChatMessage(
        createdAt,
        nodeId: event.node_id || undefined,
        executionId: event.execution_id || undefined,
+        streamId: event.stream_id || undefined,
      };
    }

@@ -172,6 +175,7 @@ export function sseEventToChatMessage(
        type: "system",
        thread,
        createdAt,
+        streamId: event.stream_id || undefined,
      };
    }

@@ -186,6 +190,7 @@ export function sseEventToChatMessage(
        type: "system",
        thread,
        createdAt,
+        streamId: event.stream_id || undefined,
      };
    }

@@ -194,6 +199,188 @@ export function sseEventToChatMessage(
  }
 }

+// ---------------------------------------------------------------------------
+// Stateful event replay — produces tool_status pills + regular messages
+// ---------------------------------------------------------------------------
+
+/**
+ * State maintained while replaying an event stream. Tracks per-stream turn
+ * counters, the set of active tool calls (so tool_status pill content
+ * reflects "tool A done, tool B running" correctly), and a tool_use_id →
+ * pill_msg_id map so deferred `tool_call_completed` events can find the
+ * pill they belong to after the turn counter moves on.
+ */
+export interface ReplayState {
+  turnCounters: Record<string, number>;
+  activeToolCalls: Record<
+    string,
+    { name: string; done: boolean; streamId: string }
+  >;
+  toolUseToPill: Record<string, { msgId: string; name: string }>;
+}
+
+export function newReplayState(): ReplayState {
+  return { turnCounters: {}, activeToolCalls: {}, toolUseToPill: {} };
+}
+
+/**
+ * Process a single event and emit zero or more ChatMessage upserts.
+ *
+ * Why this exists: `sseEventToChatMessage` is stateless — one event in, at
+ * most one message out. But the chat's tool_status pill is a SYNTHESIZED
+ * message: each tool_call_started adds to an accumulating pill, and each
+ * tool_call_completed flips one of its tools from running to done. Live
+ * SSE handlers in colony-chat and queen-dm already do this synthesis
+ * against React refs. Cold-restore from events.jsonl used to skip
+ * tool_call_* events entirely, so refreshed sessions looked completely
+ * different from live ones — no tool activity visible, just prose.
+ *
+ * This function centralizes the synthesis so cold-restore and live paths
+ * can use the exact same state machine. The caller treats the returned
+ * messages as upserts (by id) — a later event in the same replay may
+ * emit the same pill id with updated content, which should REPLACE the
+ * earlier row in the caller's message list.
+ */
+export function replayEvent(
+  state: ReplayState,
+  event: AgentEvent,
+  thread: string,
+  agentDisplayName: string | undefined,
+): ChatMessage[] {
+  const streamId = event.stream_id;
+  const isQueen = streamId === "queen";
+  const role: "queen" | "worker" = isQueen ? "queen" : "worker";
+  const turnKey = streamId;
+  const currentTurn = state.turnCounters[turnKey] ?? 0;
+  const eventCreatedAt = event.timestamp
+    ? new Date(event.timestamp).getTime()
+    : Date.now();
+
+  const out: ChatMessage[] = [];
+
+  // Update state machine BEFORE the generic converter runs so the
+  // regular message emitted for this event sees the post-update
+  // counter (matches live handler ordering at colony-chat.tsx:525).
+  switch (event.type) {
+    case "execution_started":
+      state.turnCounters[turnKey] = currentTurn + 1;
+      // New execution for a worker resets its active tools, mirroring
+      // the live handler's setAgentState at colony-chat.tsx:566.
+      if (!isQueen) {
+        const keepActive: typeof state.activeToolCalls = {};
+        for (const [k, v] of Object.entries(state.activeToolCalls)) {
+          if (v.streamId !== streamId) keepActive[k] = v;
+        }
+        state.activeToolCalls = keepActive;
+      }
+      break;
+    case "llm_turn_complete":
+      state.turnCounters[turnKey] = currentTurn + 1;
+      break;
+    case "tool_call_started": {
+      if (!event.node_id) break;
+      const toolName = (event.data?.tool_name as string) || "unknown";
+      const toolUseId = (event.data?.tool_use_id as string) || "";
+      state.activeToolCalls[toolUseId] = {
+        name: toolName,
+        done: false,
+        streamId,
+      };
+      const pillId = `tool-pill-${streamId}-${event.execution_id || "exec"}-${currentTurn}`;
+      if (toolUseId) {
+        state.toolUseToPill[toolUseId] = { msgId: pillId, name: toolName };
+      }
+      const tools = Object.values(state.activeToolCalls)
+        .filter((t) => t.streamId === streamId)
+        .map((t) => ({ name: t.name, done: t.done }));
+      const allDone = tools.length > 0 && tools.every((t) => t.done);
+      out.push({
+        id: pillId,
+        agent: agentDisplayName || event.node_id || "Agent",
+        agentColor: "",
+        content: JSON.stringify({ tools, allDone }),
+        timestamp: "",
+        type: "tool_status",
+        role,
+        thread,
+        createdAt: eventCreatedAt,
+        nodeId: event.node_id || undefined,
+        executionId: event.execution_id || undefined,
+        streamId: streamId || undefined,
+      });
+      break;
+    }
+    case "tool_call_completed": {
+      if (!event.node_id) break;
+      const toolUseId = (event.data?.tool_use_id as string) || "";
+      const tracked = state.toolUseToPill[toolUseId];
+      if (toolUseId) delete state.toolUseToPill[toolUseId];
+      if (toolUseId && state.activeToolCalls[toolUseId]) {
+        state.activeToolCalls[toolUseId].done = true;
+      }
+      if (!tracked) break;
+      const tools = Object.values(state.activeToolCalls)
+        .filter((t) => t.streamId === streamId)
+        .map((t) => ({ name: t.name, done: t.done }));
+      const allDone = tools.length > 0 && tools.every((t) => t.done);
+      // Re-emit the SAME pill id with updated content. Caller upserts
+      // by id, so this replaces the row from tool_call_started.
+      out.push({
+        id: tracked.msgId,
+        agent: agentDisplayName || event.node_id || "Agent",
+        agentColor: "",
+        content: JSON.stringify({ tools, allDone }),
+        timestamp: "",
+        type: "tool_status",
+        role,
+        thread,
+        createdAt: eventCreatedAt,
+        nodeId: event.node_id || undefined,
+        executionId: event.execution_id || undefined,
+        streamId: streamId || undefined,
+      });
+      break;
+    }
+  }
+
+  // Regular stateless conversion (prose, user input, system notes).
+  const msg = sseEventToChatMessage(
+    event,
+    thread,
+    agentDisplayName,
+    state.turnCounters[turnKey] ?? 0,
+  );
+  if (msg) {
+    if (isQueen) msg.role = "queen";
+    out.push(msg);
+  }
+
+  return out;
+}
+
+/**
+ * Replay an entire event array and return a deduplicated, chronologically
+ * sorted ChatMessage list. Used by cold-restore paths so refreshed
+ * sessions match the live stream exactly.
+ */
+export function replayEventsToMessages(
+  events: AgentEvent[],
+  thread: string,
+  agentDisplayName: string | undefined,
+): ChatMessage[] {
+  const state = newReplayState();
+  // Upsert by id — later emissions for the same pill replace earlier ones.
+  const byId = new Map<string, ChatMessage>();
+  for (const evt of events) {
+    for (const m of replayEvent(state, evt, thread, agentDisplayName)) {
+      byId.set(m.id, m);
+    }
+  }
+  return Array.from(byId.values()).sort(
+    (a, b) => (a.createdAt ?? 0) - (b.createdAt ?? 0),
+  );
+}
+
 type QueenPhase = "planning" | "building" | "staging" | "running" | "independent";
 const VALID_PHASES = new Set<string>(["planning", "building", "staging", "running", "independent"]);

@@ -13,7 +13,11 @@ import { executionApi } from "@/api/execution";
 import { sessionsApi } from "@/api/sessions";
 import { useMultiSSE } from "@/hooks/use-sse";
 import type { LiveSession, AgentEvent } from "@/api/types";
-import { sseEventToChatMessage, formatAgentDisplayName } from "@/lib/chat-helpers";
+import {
+  sseEventToChatMessage,
+  formatAgentDisplayName,
+  replayEventsToMessages,
+} from "@/lib/chat-helpers";
 import { cronToLabel } from "@/lib/graphUtils";
 import { ApiError } from "@/api/client";
 import { useColony } from "@/context/ColonyContext";
@@ -41,6 +45,8 @@ function truncate(s: string, max: number): string {
 type SessionRestoreResult = {
  messages: ChatMessage[];
  restoredPhase: "planning" | "building" | "staging" | "running" | "independent" | null;
+  truncated: boolean;
+  droppedCount: number;
 };

 async function restoreSessionMessages(
@@ -49,34 +55,67 @@ async function restoreSessionMessages(
  agentDisplayName: string,
 ): Promise<SessionRestoreResult> {
  try {
-    const { events } = await sessionsApi.eventsHistory(sessionId);
+    const { events, truncated, total, returned } =
+      await sessionsApi.eventsHistory(sessionId);
    if (events.length > 0) {
-      const messages: ChatMessage[] = [];
+      // Walk events twice:
+      //   1. Extract the trailing queen phase (unchanged logic).
+      //   2. Run the full state-machine replay so tool_status pills
+      //      are synthesized just like the live SSE handler does.
+      // Without (2), refreshed sessions showed zero tool activity
+      // because tool_call_started/completed events are ignored by
+      // the stateless converter.
      let runningPhase: ChatMessage["phase"] = undefined;
      for (const evt of events) {
        const p =
          evt.type === "queen_phase_changed"
            ? (evt.data?.phase as string)
            : evt.type === "node_loop_iteration"
-            ? (evt.data?.phase as string | undefined)
-            : undefined;
+              ? (evt.data?.phase as string | undefined)
+              : undefined;
        if (p && ["planning", "building", "staging", "running"].includes(p)) {
          runningPhase = p as ChatMessage["phase"];
        }
-        const msg = sseEventToChatMessage(evt, thread, agentDisplayName);
-        if (!msg) continue;
-        if (evt.stream_id === "queen") {
-          msg.role = "queen";
-          msg.phase = runningPhase;
-        }
-        messages.push(msg);
      }
-      return { messages, restoredPhase: runningPhase ?? null };
+
+      const messages = replayEventsToMessages(events, thread, agentDisplayName);
+      // Stamp the latest phase on every queen message so the UI's
+      // phase-badge rendering matches what the live path would have
+      // displayed at the time of the refresh.
+      if (runningPhase) {
+        for (const m of messages) {
+          if (m.role === "queen") m.phase = runningPhase;
+        }
+      }
+
+      // Prepend a run_divider banner when the server truncated older
+      // events so the user knows how many are hidden.
+      const droppedCount = Math.max(0, total - returned);
+      if (truncated && droppedCount > 0) {
+        const firstTs = events[0]?.timestamp;
+        const bannerCreatedAt = firstTs ? new Date(firstTs).getTime() - 1 : 0;
+        messages.unshift({
+          id: `restore-truncated-${sessionId}`,
+          agent: "System",
+          agentColor: "",
+          type: "run_divider",
+          content: `${droppedCount.toLocaleString()} older event${droppedCount === 1 ? "" : "s"} not shown (showing last ${returned.toLocaleString()})`,
+          timestamp: firstTs ?? new Date().toISOString(),
+          thread,
+          createdAt: bannerCreatedAt,
+        });
+      }
+      return {
+        messages,
+        restoredPhase: runningPhase ?? null,
+        truncated,
+        droppedCount,
+      };
    }
  } catch {
    // Event log not available
  }
-  return { messages: [], restoredPhase: null };
+  return { messages: [], restoredPhase: null, truncated: false, droppedCount: 0 };
 }

 // ── Agent backend state ──────────────────────────────────────────────────────
@@ -816,6 +855,7 @@ export default function ColonyChat() {
                createdAt: eventCreatedAt,
                nodeId: event.node_id || undefined,
                executionId: event.execution_id || undefined,
+                streamId: sid || undefined,
              });
              return { ...prev, isStreaming: false, activeToolCalls: newActive };
            });
@@ -11,7 +11,10 @@ import { sessionsApi } from "@/api/sessions";
 import { queensApi } from "@/api/queens";
 import { useMultiSSE } from "@/hooks/use-sse";
 import type { AgentEvent, HistorySession } from "@/api/types";
-import { sseEventToChatMessage } from "@/lib/chat-helpers";
+import {
+  sseEventToChatMessage,
+  replayEventsToMessages,
+} from "@/lib/chat-helpers";
 import { useColony } from "@/context/ColonyContext";
 import { useHeaderActions } from "@/context/HeaderActionsContext";
 import { getQueenForAgent, slugToColonyId } from "@/lib/colony-registry";
@@ -90,17 +93,34 @@ export default function QueenDM() {
  const restoreMessages = useCallback(
    async (sid: string, cancelled: () => boolean) => {
      try {
-        const { events } = await sessionsApi.eventsHistory(sid);
+        const { events, truncated, total, returned } =
+          await sessionsApi.eventsHistory(sid);
        if (cancelled()) return;
-        const restored: ChatMessage[] = [];
-        for (const evt of events) {
-          const msg = sseEventToChatMessage(evt, "queen-dm", queenName);
-          if (!msg) continue;
-          if (evt.stream_id === "queen") msg.role = "queen";
-          restored.push(msg);
+
+        // Use the stateful replay so tool_status pills are synthesized
+        // the same way the live SSE handler does — without this the
+        // refreshed queen DM shows zero tool activity.
+        const restored = replayEventsToMessages(events, "queen-dm", queenName);
+
+        // Show a banner if the server truncated older events.
+        const droppedCount = Math.max(0, total - returned);
+        if (truncated && droppedCount > 0) {
+          const firstTs = events[0]?.timestamp;
+          const bannerCreatedAt = firstTs
+            ? new Date(firstTs).getTime() - 1
+            : 0;
+          restored.unshift({
+            id: `restore-truncated-${sid}`,
+            agent: "System",
+            agentColor: "",
+            type: "run_divider",
+            content: `${droppedCount.toLocaleString()} older event${droppedCount === 1 ? "" : "s"} not shown (showing last ${returned.toLocaleString()})`,
+            timestamp: firstTs ?? new Date().toISOString(),
+            thread: "queen-dm",
+            createdAt: bannerCreatedAt,
+          });
        }
        if (restored.length > 0 && !cancelled()) {
-          restored.sort((a, b) => (a.createdAt ?? 0) - (b.createdAt ?? 0));
          setMessages(restored);
          // Only clear typing if the history contains a completed execution;
          // during bootstrap the queen is still processing.
@@ -601,6 +621,7 @@ export default function QueenDM() {
              createdAt: eventCreatedAt,
              nodeId: event.node_id || undefined,
              executionId: event.execution_id || undefined,
+              streamId: sid || undefined,
            };
            setMessages((prevMsgs) => {
              const idx = prevMsgs.findIndex((m) => m.id === msgId);
@@ -72,13 +72,28 @@ def patched_fork(monkeypatch):
    """Stub out fork_session_into_colony so we don't need a real queen."""
    calls: list[dict] = []

-    async def _stub_fork(*, session: Any, colony_name: str, task: str) -> dict:
-        calls.append({"session": session, "colony_name": colony_name, "task": task})
+    async def _stub_fork(
+        *,
+        session: Any,
+        colony_name: str,
+        task: str,
+        tasks: list[dict] | None = None,
+    ) -> dict:
+        calls.append(
+            {
+                "session": session,
+                "colony_name": colony_name,
+                "task": task,
+                "tasks": tasks,
+            }
+        )
        return {
            "colony_path": f"/tmp/fake_colonies/{colony_name}",
            "colony_name": colony_name,
            "queen_session_id": "session_fake_fork_id",
            "is_new": True,
+            "db_path": f"/tmp/fake_colonies/{colony_name}/data/progress.db",
+            "task_ids": [],
        }

    monkeypatch.setattr(
@@ -17,10 +17,10 @@ _DEFAULT_SKILLS_DIR = Path(__file__).resolve().parent.parent / "framework" / "sk


 class TestDefaultSkillFiles:
-    """Verify all 7 built-in SKILL.md files parse correctly."""
+    """Verify all built-in SKILL.md files parse correctly."""

-    def test_all_seven_skills_exist(self):
-        assert len(SKILL_REGISTRY) == 7
+    def test_all_skills_exist(self):
+        assert len(SKILL_REGISTRY) == 6

    @pytest.mark.parametrize("skill_name,dir_name", list(SKILL_REGISTRY.items()))
    def test_skill_parses(self, skill_name, dir_name):
@@ -35,7 +35,13 @@ class TestDefaultSkillFiles:
        assert parsed.source_scope == "framework"

    def test_combined_token_budget(self):
-        """All default skill bodies combined should be under 3000 tokens (~12000 chars)."""
+        """All default skill bodies combined should stay within the protocols budget.
+
+        Ceiling is 5000 tokens (~20000 chars): the prompt-injection path
+        appends every registered skill body to the system prompt, so
+        uncontrolled growth would balloon every LLM call. 5000 gives
+        headroom over today's ~3500 while still catching obvious bloat.
+        """
        total_chars = 0
        for dir_name in SKILL_REGISTRY.values():
            path = _DEFAULT_SKILLS_DIR / dir_name / "SKILL.md"
@@ -44,9 +50,9 @@ class TestDefaultSkillFiles:
            total_chars += len(parsed.body)

        approx_tokens = total_chars // 4
-        assert approx_tokens < 3000, (
+        assert approx_tokens < 5000, (
            f"Combined default skill bodies are ~{approx_tokens} tokens "
-            f"({total_chars} chars), exceeding the 3000 token budget"
+            f"({total_chars} chars), exceeding the 5000 token budget"
        )

    def test_data_buffer_keys_all_prefixed(self):
@@ -60,7 +66,7 @@ class TestDefaultSkillManager:
        manager = DefaultSkillManager()
        manager.load()

-        assert len(manager.active_skill_names) == 7
+        assert len(manager.active_skill_names) == len(SKILL_REGISTRY)
        for name in SKILL_REGISTRY:
            assert name in manager.active_skill_names

@@ -97,7 +103,7 @@ class TestDefaultSkillManager:
        manager.load()

        assert "hive.quality-monitor" not in manager.active_skill_names
-        assert len(manager.active_skill_names) == 6
+        assert len(manager.active_skill_names) == len(SKILL_REGISTRY) - 1

    def test_disable_all_via_convention(self):
        config = SkillsConfig.from_agent_vars(default_skills={"_all": {"enabled": False}})
@@ -136,7 +142,7 @@ class TestSkillsConfig:
    def test_explicit_disable(self):
        config = SkillsConfig(default_skills={"hive.note-taking": DefaultSkillConfig(enabled=False)})
        assert config.is_default_enabled("hive.note-taking") is False
-        assert config.is_default_enabled("hive.batch-ledger") is True
+        assert config.is_default_enabled("hive.quality-monitor") is True

    def test_all_disabled_flag(self):
        config = SkillsConfig(all_defaults_disabled=True)
@@ -166,11 +172,11 @@ class TestSkillsConfig:
    def test_get_default_overrides(self):
        config = SkillsConfig.from_agent_vars(
            default_skills={
-                "hive.batch-ledger": {"enabled": True, "checkpoint_every_n": 10},
+                "hive.quality-monitor": {"enabled": True, "assessment_interval": 10},
            }
        )
-        overrides = config.get_default_overrides("hive.batch-ledger")
-        assert overrides == {"checkpoint_every_n": 10}
+        overrides = config.get_default_overrides("hive.quality-monitor")
+        assert overrides == {"assessment_interval": 10}

    def test_get_default_overrides_empty(self):
        config = SkillsConfig()
@@ -244,40 +250,20 @@ class TestConfigOverrideSubstitution:
        assert "{{" not in cleaned


-class TestBatchAutoDetection:
-    """DS-12: is_batch_scenario() and batch_init_nudge property."""
+class TestBatchDeprecatedNoOps:
+    """batch-ledger skill was removed; is_batch_scenario() and batch_init_nudge
+    are deprecated no-ops that return False / None unconditionally. They are
+    kept in-tree to avoid touching every orchestrator/execution_manager call
+    site that still reads the nudge through the config plumbing."""

-    def test_detects_list_of(self):
-        assert is_batch_scenario("process a list of 100 leads") is True
+    def test_is_batch_scenario_always_false(self):
+        assert is_batch_scenario("process a list of 100 leads") is False
+        assert is_batch_scenario("for each record, send an email") is False
+        assert is_batch_scenario("write a summary") is False

-    def test_detects_collection_of(self):
-        assert is_batch_scenario("a collection of invoices") is True
-
-    def test_detects_items(self):
-        assert is_batch_scenario("go through all items in the spreadsheet") is True
-
-    def test_detects_for_each(self):
-        assert is_batch_scenario("for each record, send an email") is True
-
-    def test_no_match_single_task(self):
-        assert is_batch_scenario("write a summary of the quarterly report") is False
-
-    def test_batch_nudge_active_by_default(self):
+    def test_batch_init_nudge_always_none(self):
        manager = DefaultSkillManager()
        manager.load()
-        assert manager.batch_init_nudge is not None
-        assert "_batch_ledger" in manager.batch_init_nudge
-
-    def test_batch_nudge_none_when_skill_disabled(self):
-        config = SkillsConfig.from_agent_vars(default_skills={"hive.batch-ledger": {"enabled": False}})
-        manager = DefaultSkillManager(config)
-        manager.load()
-        assert manager.batch_init_nudge is None
-
-    def test_batch_nudge_none_when_auto_detect_disabled(self):
-        config = SkillsConfig.from_agent_vars(default_skills={"hive.batch-ledger": {"auto_detect_batch": False}})
-        manager = DefaultSkillManager(config)
-        manager.load()
        assert manager.batch_init_nudge is None


@@ -0,0 +1,590 @@
+"""Tests for framework.host.progress_db — per-colony task queue."""
+
+from __future__ import annotations
+
+import sqlite3
+import threading
+import time
+from pathlib import Path
+
+import pytest
+
+from framework.host.progress_db import (
+    SCHEMA_VERSION,
+    ensure_all_colony_dbs,
+    ensure_progress_db,
+    enqueue_task,
+    reclaim_stale,
+    seed_tasks,
+)
+
+
+# ----------------------------------------------------------------------
+# Schema / init
+# ----------------------------------------------------------------------
+
+
+def test_ensure_progress_db_fresh(tmp_path: Path) -> None:
+    colony = tmp_path / "c"
+    db_path = ensure_progress_db(colony)
+    assert db_path.exists()
+    assert db_path.name == "progress.db"
+    assert db_path.parent.name == "data"
+
+    con = sqlite3.connect(str(db_path))
+    try:
+        assert con.execute("PRAGMA journal_mode").fetchone()[0].lower() == "wal"
+        assert con.execute("PRAGMA user_version").fetchone()[0] == SCHEMA_VERSION
+        tables = {r[0] for r in con.execute("SELECT name FROM sqlite_master WHERE type='table'")}
+        assert {"tasks", "steps", "sop_checklist", "colony_meta"}.issubset(tables)
+
+        indexes = {r[0] for r in con.execute("SELECT name FROM sqlite_master WHERE type='index'")}
+        # Named indexes we declared
+        assert "idx_tasks_claimable" in indexes
+        assert "idx_steps_task_seq" in indexes
+        assert "idx_sop_required_open" in indexes
+        assert "idx_tasks_status" in indexes
+    finally:
+        con.close()
+
+
+def test_ensure_progress_db_idempotent(tmp_path: Path) -> None:
+    colony = tmp_path / "c"
+    p1 = ensure_progress_db(colony)
+    p2 = ensure_progress_db(colony)
+    assert p1 == p2
+    con = sqlite3.connect(str(p1))
+    try:
+        assert con.execute("PRAGMA user_version").fetchone()[0] == SCHEMA_VERSION
+    finally:
+        con.close()
+
+
+def test_ensure_all_colony_dbs_backfill(tmp_path: Path) -> None:
+    colonies_root = tmp_path / "colonies"
+    (colonies_root / "alpha").mkdir(parents=True)
+    (colonies_root / "beta").mkdir(parents=True)
+    (colonies_root / "gamma_not_dir").touch()  # should be ignored
+
+    initialized = ensure_all_colony_dbs(colonies_root)
+    names = {p.parent.parent.name for p in initialized}
+    assert names == {"alpha", "beta"}
+    for p in initialized:
+        assert p.exists()
+
+
+def test_ensure_all_colony_dbs_missing_root(tmp_path: Path) -> None:
+    missing = tmp_path / "nonexistent"
+    assert ensure_all_colony_dbs(missing) == []
+
+
+# ----------------------------------------------------------------------
+# Seeding / enqueue
+# ----------------------------------------------------------------------
+
+
+def test_seed_tasks_basic(tmp_path: Path) -> None:
+    db = ensure_progress_db(tmp_path / "c")
+    ids = seed_tasks(
+        db,
+        [
+            {
+                "goal": "task one",
+                "priority": 5,
+                "payload": {"url": "https://example.com"},
+                "steps": [
+                    {"title": "open page"},
+                    {"title": "extract data", "detail": "selector .content"},
+                ],
+                "sop_items": [
+                    {"key": "captcha_handled", "description": "Verify no CAPTCHA blocks"},
+                    {"key": "soft_hint", "description": "optional", "required": False},
+                ],
+            },
+            {"goal": "task two"},
+        ],
+    )
+    assert len(ids) == 2
+
+    con = sqlite3.connect(str(db))
+    try:
+        rows = list(con.execute("SELECT id, goal, priority, status, source, payload FROM tasks ORDER BY goal"))
+        assert len(rows) == 2
+        assert rows[0][1] == "task one"
+        assert rows[0][2] == 5
+        assert rows[0][3] == "pending"
+        assert rows[0][4] == "queen_create"
+        assert '"url"' in rows[0][5]
+
+        step_count = con.execute(
+            "SELECT count(*) FROM steps WHERE task_id=?", (ids[0],)
+        ).fetchone()[0]
+        assert step_count == 2
+
+        sop_rows = list(con.execute(
+            "SELECT key, required FROM sop_checklist WHERE task_id=? ORDER BY key", (ids[0],)
+        ))
+        assert sop_rows == [("captcha_handled", 1), ("soft_hint", 0)]
+    finally:
+        con.close()
+
+
+def test_seed_tasks_rejects_missing_goal(tmp_path: Path) -> None:
+    db = ensure_progress_db(tmp_path / "c")
+    with pytest.raises(ValueError):
+        seed_tasks(db, [{"priority": 1}])
+
+
+def test_seed_tasks_empty_is_noop(tmp_path: Path) -> None:
+    db = ensure_progress_db(tmp_path / "c")
+    assert seed_tasks(db, []) == []
+
+
+def test_seed_tasks_rollback_on_partial_failure(tmp_path: Path) -> None:
+    """A bad row mid-batch must roll back the whole transaction."""
+    db = ensure_progress_db(tmp_path / "c")
+    with pytest.raises(ValueError):
+        seed_tasks(
+            db,
+            [
+                {"goal": "good one"},
+                {"priority": 1},  # missing goal -> boom
+                {"goal": "never inserted"},
+            ],
+        )
+    con = sqlite3.connect(str(db))
+    try:
+        count = con.execute("SELECT count(*) FROM tasks").fetchone()[0]
+        assert count == 0
+    finally:
+        con.close()
+
+
+def test_enqueue_task(tmp_path: Path) -> None:
+    db = ensure_progress_db(tmp_path / "c")
+    tid = enqueue_task(
+        db,
+        "appended",
+        steps=[{"title": "s1"}],
+        sop_items=[{"key": "k", "description": "d"}],
+        priority=3,
+    )
+    assert tid
+
+    con = sqlite3.connect(str(db))
+    try:
+        row = con.execute(
+            "SELECT goal, priority, source FROM tasks WHERE id=?", (tid,)
+        ).fetchone()
+        assert row == ("appended", 3, "enqueue_tool")
+        assert con.execute(
+            "SELECT count(*) FROM steps WHERE task_id=?", (tid,)
+        ).fetchone()[0] == 1
+    finally:
+        con.close()
+
+
+def test_enqueue_task_custom_source(tmp_path: Path) -> None:
+    """enqueue_task must accept a custom source value (e.g. run_agent_with_input).
+
+    Phase 2 wiring adds source values: create_colony_auto,
+    run_agent_with_input, run_parallel_workers. Verify the source
+    column stores them verbatim.
+    """
+    db = ensure_progress_db(tmp_path / "c")
+    tid = enqueue_task(db, "chat task", source="run_agent_with_input")
+    con = sqlite3.connect(str(db))
+    try:
+        row = con.execute("SELECT goal, source FROM tasks WHERE id=?", (tid,)).fetchone()
+        assert row == ("chat task", "run_agent_with_input")
+    finally:
+        con.close()
+
+
+def test_claim_by_assigned_id(tmp_path: Path) -> None:
+    """Worker protocol: claim a specific row by id (not the generic next-pending).
+
+    The Phase 2 fix threads ``task_id`` into ``input_data`` when the
+    queen pre-assigns a row. The worker must be able to claim THAT
+    row atomically with an ``UPDATE ... WHERE id=? AND status='pending'``
+    pattern, and a second claim on the same id must return 0 rows.
+    """
+    db = ensure_progress_db(tmp_path / "c")
+    [tid] = seed_tasks(db, [{"goal": "pinned task"}])
+
+    con = sqlite3.connect(str(db), isolation_level=None, timeout=5.0)
+    try:
+        cur = con.execute(
+            """
+            UPDATE tasks SET status='claimed', worker_id=?,
+                claim_token=lower(hex(randomblob(8))),
+                claimed_at=datetime('now'),
+                updated_at=datetime('now')
+            WHERE id=? AND status='pending'
+            RETURNING id, goal
+            """,
+            ("w1", tid),
+        )
+        row = cur.fetchone()
+        assert row == (tid, "pinned task"), f"expected one claim, got {row}"
+
+        # Second attempt on the same id must affect zero rows.
+        cur2 = con.execute(
+            """
+            UPDATE tasks SET status='claimed', worker_id=?,
+                claim_token=lower(hex(randomblob(8))),
+                claimed_at=datetime('now')
+            WHERE id=? AND status='pending'
+            RETURNING id
+            """,
+            ("w2", tid),
+        )
+        assert cur2.fetchone() is None, "second claim should affect zero rows"
+
+        # Ensure worker_id on the row is still the first claimant.
+        owner = con.execute(
+            "SELECT worker_id, status FROM tasks WHERE id=?", (tid,)
+        ).fetchone()
+        assert owner == ("w1", "claimed")
+    finally:
+        con.close()
+
+
+def test_claim_by_id_does_not_steal_unrelated_rows(tmp_path: Path) -> None:
+    """Claim-by-id must only touch the named row, not siblings."""
+    db = ensure_progress_db(tmp_path / "c")
+    ids = seed_tasks(db, [{"goal": "a"}, {"goal": "b"}, {"goal": "c"}])
+    target = ids[1]
+
+    con = sqlite3.connect(str(db), isolation_level=None)
+    try:
+        con.execute(
+            "UPDATE tasks SET status='claimed', worker_id='w1', "
+            "claimed_at=datetime('now') WHERE id=? AND status='pending'",
+            (target,),
+        )
+        statuses = dict(con.execute("SELECT goal, status FROM tasks").fetchall())
+        assert statuses == {"a": "pending", "b": "claimed", "c": "pending"}
+    finally:
+        con.close()
+
+
+def test_seed_tasks_bulk_10k(tmp_path: Path) -> None:
+    """10k rows in one transaction should finish under a second on local disk."""
+    db = ensure_progress_db(tmp_path / "c")
+    tasks = [{"goal": f"task {i}", "seq": i} for i in range(10_000)]
+    start = time.perf_counter()
+    ids = seed_tasks(db, tasks)
+    elapsed = time.perf_counter() - start
+    assert len(ids) == 10_000
+    # Generous ceiling — on CI with slow disk we've seen ~300ms.
+    assert elapsed < 3.0, f"bulk seed too slow: {elapsed:.2f}s"
+
+
+# ----------------------------------------------------------------------
+# Atomic claim under concurrency
+# ----------------------------------------------------------------------
+
+
+_CLAIM_SQL = """
+BEGIN IMMEDIATE;
+UPDATE tasks
+SET
+    status = 'claimed',
+    worker_id = ?,
+    claim_token = lower(hex(randomblob(8))),
+    claimed_at = datetime('now'),
+    updated_at = datetime('now')
+WHERE id = (
+    SELECT id FROM tasks
+    WHERE status = 'pending'
+    ORDER BY priority DESC, seq, created_at
+    LIMIT 1
+);
+"""
+
+
+def _claim_one(db_path: Path, worker_id: str) -> str | None:
+    """Atomic single-shot claim using RETURNING (SQLite 3.35+).
+
+    The skill teaches agents the BEGIN IMMEDIATE + subquery UPDATE
+    pattern; for an in-process test helper we use RETURNING so the
+    claimed row id is returned from the same statement (no racing
+    follow-up SELECT). Functionally equivalent: both approaches rely
+    on the atomic subquery-UPDATE.
+    """
+    con = sqlite3.connect(str(db_path), isolation_level=None, timeout=10.0)
+    con.execute("PRAGMA busy_timeout = 10000")
+    try:
+        cur = con.execute(
+            """
+            UPDATE tasks
+            SET status = 'claimed',
+                worker_id = ?,
+                claim_token = lower(hex(randomblob(8))),
+                claimed_at = datetime('now'),
+                updated_at = datetime('now')
+            WHERE id = (
+                SELECT id FROM tasks
+                WHERE status = 'pending'
+                ORDER BY priority DESC, seq, created_at
+                LIMIT 1
+            )
+            RETURNING id
+            """,
+            (worker_id,),
+        )
+        row = cur.fetchone()
+        return row[0] if row else None
+    finally:
+        con.close()
+
+
+def test_claim_atomicity_under_concurrency(tmp_path: Path) -> None:
+    """20 threads racing to drain 100 tasks — each task claimed exactly once."""
+    db = ensure_progress_db(tmp_path / "c")
+    seed_tasks(db, [{"goal": f"task {i}", "seq": i} for i in range(100)])
+
+    claims: list[tuple[str, str]] = []
+    claims_lock = threading.Lock()
+
+    def worker(worker_id: str) -> None:
+        while True:
+            tid = _claim_one(db, worker_id)
+            if tid is None:
+                return
+            with claims_lock:
+                claims.append((worker_id, tid))
+
+    threads = [threading.Thread(target=worker, args=(f"w{i}",)) for i in range(20)]
+    for t in threads:
+        t.start()
+    for t in threads:
+        t.join(timeout=30)
+
+    task_ids = [tid for _, tid in claims]
+    assert len(task_ids) == 100, f"expected 100 claims, got {len(task_ids)}"
+    assert len(set(task_ids)) == 100, "duplicate claims detected"
+
+    con = sqlite3.connect(str(db))
+    try:
+        remaining = con.execute(
+            "SELECT count(*) FROM tasks WHERE status='pending'"
+        ).fetchone()[0]
+        assert remaining == 0
+        claimed = con.execute(
+            "SELECT count(*) FROM tasks WHERE status='claimed'"
+        ).fetchone()[0]
+        assert claimed == 100
+    finally:
+        con.close()
+
+
+# ----------------------------------------------------------------------
+# Stale-claim reclaimer
+# ----------------------------------------------------------------------
+
+
+def test_reclaim_stale_returns_to_pending(tmp_path: Path) -> None:
+    db = ensure_progress_db(tmp_path / "c")
+    [tid] = seed_tasks(db, [{"goal": "stuck"}])
+
+    # Simulate a claim made 20 minutes ago.
+    con = sqlite3.connect(str(db), isolation_level=None)
+    try:
+        con.execute(
+            "UPDATE tasks SET status='claimed', worker_id='w1', "
+            "claimed_at=datetime('now', '-20 minutes') WHERE id=?",
+            (tid,),
+        )
+    finally:
+        con.close()
+
+    reclaimed = reclaim_stale(db, stale_after_minutes=15)
+    assert reclaimed == 1
+
+    con = sqlite3.connect(str(db))
+    try:
+        row = con.execute(
+            "SELECT status, worker_id, retry_count FROM tasks WHERE id=?", (tid,)
+        ).fetchone()
+        assert row == ("pending", None, 1)
+    finally:
+        con.close()
+
+
+def test_reclaim_stale_fails_after_max_retries(tmp_path: Path) -> None:
+    db = ensure_progress_db(tmp_path / "c")
+    [tid] = seed_tasks(db, [{"goal": "doomed", "max_retries": 2}])
+
+    con = sqlite3.connect(str(db), isolation_level=None)
+    try:
+        con.execute(
+            "UPDATE tasks SET status='claimed', worker_id='w1', retry_count=2, "
+            "claimed_at=datetime('now', '-20 minutes') WHERE id=?",
+            (tid,),
+        )
+    finally:
+        con.close()
+
+    reclaim_stale(db, stale_after_minutes=15)
+
+    con = sqlite3.connect(str(db))
+    try:
+        row = con.execute(
+            "SELECT status, last_error FROM tasks WHERE id=?", (tid,)
+        ).fetchone()
+        assert row[0] == "failed"
+        assert row[1] is not None and "max_retries" in row[1]
+    finally:
+        con.close()
+
+
+def test_reclaim_stale_ignores_fresh_claims(tmp_path: Path) -> None:
+    db = ensure_progress_db(tmp_path / "c")
+    [tid] = seed_tasks(db, [{"goal": "working"}])
+
+    con = sqlite3.connect(str(db), isolation_level=None)
+    try:
+        con.execute(
+            "UPDATE tasks SET status='claimed', worker_id='w1', "
+            "claimed_at=datetime('now') WHERE id=?",
+            (tid,),
+        )
+    finally:
+        con.close()
+
+    reclaimed = reclaim_stale(db, stale_after_minutes=15)
+    assert reclaimed == 0
+
+
+# ----------------------------------------------------------------------
+# Foreign key cascade
+# ----------------------------------------------------------------------
+
+
+# ----------------------------------------------------------------------
+# Worker config patching for pre-existing colonies
+# ----------------------------------------------------------------------
+
+
+def _write_worker_cfg(path: Path, *, with_input_data: dict | None = None) -> None:
+    """Write a minimal worker.json that matches the shape ensure_progress_db patches."""
+    import json as _json
+
+    cfg = {
+        "name": "worker",
+        "system_prompt": "You are a worker.",
+        "goal": {"description": "do stuff", "success_criteria": [], "constraints": []},
+        "tools": [],
+    }
+    if with_input_data is not None:
+        cfg["input_data"] = with_input_data
+    path.write_text(_json.dumps(cfg, indent=2))
+
+
+def test_ensure_progress_db_patches_existing_worker_json(tmp_path: Path) -> None:
+    """Pre-existing worker.json without input_data gets db_path injected."""
+    import json as _json
+
+    colony = tmp_path / "legacy_colony"
+    colony.mkdir()
+    _write_worker_cfg(colony / "worker.json")
+
+    # Before: no input_data
+    before = _json.loads((colony / "worker.json").read_text())
+    assert "input_data" not in before
+
+    db = ensure_progress_db(colony)
+
+    after = _json.loads((colony / "worker.json").read_text())
+    assert after["input_data"]["db_path"] == str(db)
+    assert after["input_data"]["colony_id"] == "legacy_colony"
+    # Other fields untouched
+    assert after["system_prompt"] == "You are a worker."
+    assert after["goal"]["description"] == "do stuff"
+
+
+def test_ensure_progress_db_patch_is_idempotent(tmp_path: Path) -> None:
+    """Second call must not rewrite the file (mtime unchanged)."""
+    import time as _time
+
+    colony = tmp_path / "idem"
+    colony.mkdir()
+    _write_worker_cfg(colony / "worker.json")
+
+    ensure_progress_db(colony)
+    mtime1 = (colony / "worker.json").stat().st_mtime
+
+    _time.sleep(0.02)  # ensure any rewrite would bump mtime
+    ensure_progress_db(colony)
+    mtime2 = (colony / "worker.json").stat().st_mtime
+
+    assert mtime1 == mtime2, "second ensure_progress_db must not rewrite worker.json"
+
+
+def test_ensure_progress_db_preserves_existing_input_data_keys(tmp_path: Path) -> None:
+    """Pre-existing input_data keys (other than db_path/colony_id) are preserved."""
+    import json as _json
+
+    colony = tmp_path / "preserved"
+    colony.mkdir()
+    _write_worker_cfg(
+        colony / "worker.json",
+        with_input_data={"custom_key": "hello", "db_path": "/stale/path.db"},
+    )
+
+    db = ensure_progress_db(colony)
+    after = _json.loads((colony / "worker.json").read_text())
+
+    assert after["input_data"]["custom_key"] == "hello"
+    assert after["input_data"]["db_path"] == str(db)
+    assert after["input_data"]["colony_id"] == "preserved"
+
+
+def test_ensure_progress_db_skips_metadata_and_triggers(tmp_path: Path) -> None:
+    """metadata.json and triggers.json are not worker configs — must not be touched."""
+    import json as _json
+
+    colony = tmp_path / "guarded"
+    colony.mkdir()
+    (colony / "metadata.json").write_text(_json.dumps({"colony_name": "guarded"}))
+    (colony / "triggers.json").write_text(_json.dumps([{"id": "t1"}]))
+    _write_worker_cfg(colony / "worker.json")
+
+    ensure_progress_db(colony)
+
+    meta = _json.loads((colony / "metadata.json").read_text())
+    trig = _json.loads((colony / "triggers.json").read_text())
+    assert "input_data" not in meta
+    assert trig == [{"id": "t1"}]
+
+    worker = _json.loads((colony / "worker.json").read_text())
+    assert "input_data" in worker
+
+
+def test_task_delete_cascades_to_steps_and_sop(tmp_path: Path) -> None:
+    db = ensure_progress_db(tmp_path / "c")
+    [tid] = seed_tasks(
+        db,
+        [
+            {
+                "goal": "cascade test",
+                "steps": [{"title": "a"}, {"title": "b"}],
+                "sop_items": [{"key": "k", "description": "d"}],
+            }
+        ],
+    )
+
+    con = sqlite3.connect(str(db), isolation_level=None)
+    try:
+        con.execute("PRAGMA foreign_keys = ON")
+        con.execute("DELETE FROM tasks WHERE id=?", (tid,))
+        assert con.execute(
+            "SELECT count(*) FROM steps WHERE task_id=?", (tid,)
+        ).fetchone()[0] == 0
+        assert con.execute(
+            "SELECT count(*) FROM sop_checklist WHERE task_id=?", (tid,)
+        ).fetchone()[0] == 0
+    finally:
+        con.close()
@@ -141,7 +141,7 @@ class TestSkillDiscovery:
        framework_skills = [s for s in skills if s.source_scope == "framework"]
        names = {s.name for s in framework_skills}
        assert "hive.note-taking" in names
-        assert "hive.batch-ledger" in names
+        assert "hive.colony-progress-tracker" in names

    def test_max_depth_limit(self, tmp_path):
        # Create a skill nested beyond max_depth
@@ -271,6 +271,48 @@ else
    exit 1
 fi

+# Check for sqlite3 CLI (required for colony progress tracking)
+echo -n "  Checking for sqlite3... "
+if command -v sqlite3 &> /dev/null; then
+    echo -e "${GREEN}ok${NC}"
+else
+    echo -e "${YELLOW}not found${NC}"
+    # Attempt auto-install on common package managers
+    SQLITE_INSTALLED=false
+    if command -v apt-get &> /dev/null; then
+        echo -n "  Installing sqlite3 via apt... "
+        if sudo apt-get install -y sqlite3 > /dev/null 2>&1; then
+            SQLITE_INSTALLED=true
+        fi
+    elif command -v brew &> /dev/null; then
+        echo -n "  Installing sqlite3 via brew... "
+        if brew install sqlite > /dev/null 2>&1; then
+            SQLITE_INSTALLED=true
+        fi
+    elif command -v apk &> /dev/null; then
+        echo -n "  Installing sqlite3 via apk... "
+        if apk add sqlite > /dev/null 2>&1; then
+            SQLITE_INSTALLED=true
+        fi
+    elif command -v dnf &> /dev/null; then
+        echo -n "  Installing sqlite3 via dnf... "
+        if sudo dnf install -y sqlite > /dev/null 2>&1; then
+            SQLITE_INSTALLED=true
+        fi
+    elif command -v pacman &> /dev/null; then
+        echo -n "  Installing sqlite3 via pacman... "
+        if sudo pacman -S --noconfirm sqlite > /dev/null 2>&1; then
+            SQLITE_INSTALLED=true
+        fi
+    fi
+    if [ "$SQLITE_INSTALLED" = true ]; then
+        echo -e "${GREEN}ok${NC}"
+    else
+        echo -e "${YELLOW}  ⚠ Could not install sqlite3 automatically${NC}"
+        echo -e "${DIM}    Install manually: apt install sqlite3 / brew install sqlite / apk add sqlite${NC}"
+    fi
+fi
+
 # Check for Chrome/Edge (required for GCU browser tools)
 echo -n "  Checking for Chrome/Edge browser... "
 # Check common browser locations
@@ -1,132 +0,0 @@
---
-name: linkedin-connection-greeter
-description: Automates accepting LinkedIn connections and sending a welcome message about the HoneyComb prediction market. Handles shadow DOM and Lexical editors.
---
-
-# LinkedIn Connection Greeter
-
-This skill outlines the exact flow to accept connection requests and send a specific welcome message without triggering spam filters.
-
-## 1. Load Ledger
-Before starting, read `data/linkedin_contacts.json`. If it doesn't exist, initialize with `{"contacts": []}`. You will use this to skip people you've already messaged.
-
-## 2. Scan Pending Connections
-Navigate to `https://www.linkedin.com/mynetwork/invitation-manager/received/`. Wait until load + sleep 4s.
-Strip unload handlers:
-`browser_evaluate("(function(){window.onbeforeunload=null;})()")`
-
-Extract cards using this specific snippet (handles changing classes and follow invites):
-```javascript
-(function(){
-    const btns = Array.from(document.querySelectorAll('button')).filter(b => b.textContent.includes('Accept'));
-    let results = [];
-    for (let b of btns) {
-        let card = b.closest('[role="listitem"]');
-        if (!card) continue;
-        let text = card.textContent.toLowerCase();
-        if (text.includes('invited you to follow') || text.includes('invited you to subscribe')) continue;
-        
-        let nameEls = Array.from(card.querySelectorAll('a[href*="/in/"]'));
-        let nameEl = nameEls.find(el => el.textContent.trim().length > 0);
-        
-        let r = b.getBoundingClientRect();
-        results.push({
-            first_name: nameEl ? nameEl.textContent.trim().split(/\s+/)[0] : 'there',
-            profile_url: nameEl ? nameEl.href : '',
-            cx: r.x + r.width/2,
-            cy: r.y + r.height/2
-        });
-    }
-    return results;
-})();
-```
-
-## 3. Process Each Card (Max 10 per run)
-For each card, check if `profile_url` is already in the ledger. If not:
-1. `browser_click_coordinate(cx, cy)` to click the specific Accept button.
-2. `sleep(2)`
-3. `browser_navigate(profile_url, wait_until="load")`
-4. `sleep(4)`
-5. `browser_evaluate("(function(){window.onbeforeunload=null; window.addEventListener('beforeunload', e => e.stopImmediatePropagation(), true);})()")`
-
-## 4. Message the User
-Click Message Button on their profile:
-```javascript
-(function(){
-    const links = Array.from(document.querySelectorAll('a[href*="/messaging/compose/"]'));
-    for (const a of links){
-      if (!a.href.includes('NON_SELF_PROFILE_VIEW') || a.href.includes('body=')) continue;
-      const r = a.getBoundingClientRect();
-      if (r.width === 0 || r.x > 700) continue;
-      return {cx: r.x + r.width / 2, cy: r.y + r.height / 2};
-    }
-    return null;
-})();
-```
-Click that coordinate, then `sleep(2.5)`.
-
-Find Textarea (it is hidden inside shadow DOM):
-```javascript
-(function(){
-    const vh = window.innerHeight, vw = window.innerWidth;
-    const candidates = [];
-    function walk(root){
-      const els = root.querySelectorAll ? root.querySelectorAll('div.msg-form__contenteditable') : [];
-      for (const el of els){
-        const r = el.getBoundingClientRect();
-        if (r.width > 0 && r.height > 0 && r.y >= 0 && r.y + r.height <= vh && r.x >= 0 && r.x + r.width <= vw) {
-            candidates.push({cx: r.x + r.width/2, cy: r.y + r.height/2, area: r.width * r.height});
-        }
-      }
-      const all = root.querySelectorAll ? root.querySelectorAll('*') : [];
-      for (const host of all){ if (host.shadowRoot) walk(host.shadowRoot); }
-    }
-    walk(document);
-    candidates.sort((a, b) => b.area - a.area);
-    return candidates.length ? candidates[0] : null;
-})();
-```
-Click that coordinate, `sleep(1)`.
-
-Type the message:
-Construct the message: `Hey {first_name}, thanks for the connection invite! I'm currently building a prediction market for jobs: https://honeycomb.open-hive.com/. If you could check it out and share some feedback, I'd really appreciate it.`
-
-Use `browser_type_focused` — it dispatches CDP `Input.insertText` to the already-focused composer (document.activeElement), which works through shadow DOM without JSON-escaping issues:
-```
-browser_type_focused(text=message_text)
-sleep(1.0)
-```
-
-Find Send button (also inside shadow DOM):
-```javascript
-(function(){
-    const vh = window.innerHeight;
-    function walk(root){
-      const btns = root.querySelectorAll ? root.querySelectorAll('button') : [];
-      for (const b of btns){
-        const cls = (b.className || '').toString();
-        if (!cls.includes('send-button') && b.textContent.trim() !== 'Send') continue;
-        const r = b.getBoundingClientRect();
-        if (r.width <= 0 || r.y + r.height > vh) continue;
-        return { cx: r.x + r.width/2, cy: r.y + r.height/2, disabled: b.disabled || b.getAttribute('aria-disabled') === 'true' };
-      }
-      const all = root.querySelectorAll ? root.querySelectorAll('*') : [];
-      for (const host of all){ if (host.shadowRoot) { const got = walk(host.shadowRoot); if (got) return got; } }
-      return null;
-    }
-    return walk(document);
-})();
-```
-Click send coordinate, `sleep(2)`.
-
-## 5. Update Ledger
-Append the user to `data/linkedin_contacts.json`.
-```json
-{
-  "profile_url": "...",
-  "name": "...",
-  "action": "connection_accepted+message_sent",
-  "timestamp": "2026-..."
-}
-```
-`sleep(5)` before moving to the next card to mimic human pacing.
@@ -82,10 +82,29 @@ def _find_project_root() -> str:
    return os.path.dirname(os.path.abspath(__file__))


-def _resolve_path(path: str) -> str:
-    """Resolve path relative to PROJECT_ROOT. Raises ValueError if outside.
+# When ``--write-root`` is passed on the CLI, ``WRITE_ROOT`` diverges
+# from ``PROJECT_ROOT``: reads stay permissive (so the queen can
+# reference framework skills, docs, and the hive repo), but writes
+# are confined to the write root plus the ``~/.hive/`` escape hatch.
+# Without this split, the coder-tools sandbox IS the hive git
+# checkout — every queen-authored skill/ledger/script lands there as
+# untracked debris, which was the 2026-04-15 incident
+# (``~/aden/hive/x-rapid-reply/`` and siblings).
+WRITE_ROOT: str = ""

-    Also allows access to ~/.hive/ directory for agent session data files.
+
+def _resolve_read_path(path: str) -> str:
+    """Resolve path for READ operations.
+
+    Allowlist (in order):
+    1. Paths under ``~/.hive/`` — agent session data, colonies, skills.
+    2. Paths under ``PROJECT_ROOT`` — hive repo, for reading framework
+       defaults, docs, examples, etc.
+    3. Relative paths — joined against ``PROJECT_ROOT`` (read-side
+       default; writes use ``WRITE_ROOT`` instead).
+
+    Raises ``ValueError`` when the resolved path falls outside all
+    allowed roots.
    """
    # Normalize slashes for cross-platform (e.g. exports/hi_agent from LLM)
    path = path.replace("/", os.sep)
@@ -153,6 +172,88 @@ def _resolve_path(path: str) -> str:
    return resolved


+def _resolve_write_path(path: str) -> str:
+    """Resolve path for WRITE operations.
+
+    Stricter than the read resolver: only allows writes under:
+    1. ``WRITE_ROOT`` — the agent workspace (default: ``~/.hive/workspace/``
+       when ``--write-root`` is passed).
+    2. ``~/.hive/`` — agent session data.
+
+    Writes to the hive repo (``PROJECT_ROOT``) are REJECTED to keep
+    the git checkout clean of queen-authored debris. Relative paths
+    resolve against ``WRITE_ROOT``, not ``PROJECT_ROOT``.
+
+    When ``WRITE_ROOT`` equals ``PROJECT_ROOT`` (no split configured),
+    this function is semantically identical to ``_resolve_read_path``.
+    """
+    # Normalize slashes + expand ~
+    path = path.replace("/", os.sep)
+    if path.startswith("~"):
+        path = os.path.expanduser(path)
+
+    hive_dir = os.path.expanduser("~/.hive")
+
+    if os.path.isabs(path):
+        resolved = os.path.abspath(path)
+
+        # Always allow writes under ~/.hive/
+        try:
+            if os.path.commonpath([resolved, hive_dir]) == hive_dir:
+                return resolved
+        except ValueError:
+            pass
+
+        # Writes are ALSO allowed under WRITE_ROOT (the agent workspace).
+        try:
+            if os.path.commonpath([resolved, WRITE_ROOT]) == WRITE_ROOT:
+                return resolved
+        except ValueError:
+            pass
+
+        # If WRITE_ROOT == PROJECT_ROOT (legacy behavior: no split),
+        # fall through to the read-side resolver so existing callers
+        # keep working unchanged.
+        if WRITE_ROOT == PROJECT_ROOT:
+            return _resolve_read_path(path)
+
+        # Split configured AND the path isn't under WRITE_ROOT or
+        # ~/.hive/. Reject — this is the whole point of the split.
+        raise ValueError(
+            f"Access denied: writes must be under '{WRITE_ROOT}' or "
+            f"'{hive_dir}'. Path '{path}' is outside both "
+            "(use an absolute path under one of those roots, or a "
+            "relative path which will resolve under the write root)."
+        )
+    else:
+        # Relative path: resolve against WRITE_ROOT, not PROJECT_ROOT.
+        resolved = os.path.abspath(os.path.join(WRITE_ROOT, path))
+
+    # Double-check the resolved absolute path is inside WRITE_ROOT or
+    # ~/.hive/ (covers edge cases like "../../etc/passwd" that escape).
+    try:
+        wr_common = os.path.commonpath([resolved, WRITE_ROOT])
+    except ValueError:
+        wr_common = ""
+    try:
+        hv_common = os.path.commonpath([resolved, hive_dir])
+    except ValueError:
+        hv_common = ""
+    if wr_common != WRITE_ROOT and hv_common != hive_dir:
+        raise ValueError(
+            f"Access denied: resolved write path '{resolved}' escaped the "
+            f"allowed roots ('{WRITE_ROOT}', '{hive_dir}')."
+        )
+    return resolved
+
+
+# Back-compat alias: existing call sites in this module call
+# ``_resolve_path`` directly (e.g. for snapshot dirs, agent tool
+# introspection). Those are all non-user-driven paths; route them
+# through the read resolver.
+_resolve_path = _resolve_read_path
+
+
 # ── Git snapshot system (ported from opencode's shadow git) ───────────────


@@ -1637,32 +1738,45 @@ def validate_agent_package(agent_name: str) -> str:


 def main() -> None:
-    global PROJECT_ROOT, SNAPSHOT_DIR
+    global PROJECT_ROOT, SNAPSHOT_DIR, WRITE_ROOT

    from aden_tools.file_ops import register_file_tools

    parser = argparse.ArgumentParser(description="Coder Tools MCP Server")
    parser.add_argument("--project-root", default="")
+    # ``--write-root`` isolates file writes from the project root so
+    # queen-authored skills, ledgers, and scripts don't land in the
+    # hive git checkout. Reads remain permissive under PROJECT_ROOT
+    # so framework skills, docs, and examples stay accessible.
+    # Defaults to PROJECT_ROOT when empty (legacy behavior).
+    parser.add_argument("--write-root", default="")
    parser.add_argument("--port", type=int, default=int(os.getenv("CODER_TOOLS_PORT", "4002")))
    parser.add_argument("--host", default="0.0.0.0")
    parser.add_argument("--stdio", action="store_true")
    args = parser.parse_args()

    PROJECT_ROOT = os.path.abspath(args.project_root) if args.project_root else _find_project_root()
+    if args.write_root:
+        WRITE_ROOT = os.path.abspath(os.path.expanduser(args.write_root))
+        os.makedirs(WRITE_ROOT, exist_ok=True)
+    else:
+        WRITE_ROOT = PROJECT_ROOT  # legacy: no split
    SNAPSHOT_DIR = os.path.join(
        os.path.expanduser("~"),
        ".hive",
        "snapshots",
        os.path.basename(PROJECT_ROOT),
    )
-    logger.info(f"Project root: {PROJECT_ROOT}")
+    logger.info(f"Project root (reads): {PROJECT_ROOT}")
+    logger.info(f"Write root (writes): {WRITE_ROOT}")
    logger.info(f"Snapshot dir: {SNAPSHOT_DIR}")

    register_file_tools(
        mcp,
-        resolve_path=_resolve_path,
+        resolve_path=_resolve_read_path,
+        resolve_path_write=_resolve_write_path,
        before_write=None,  # Git snapshot causes stdio deadlock on Windows; undo_changes limited
-        project_root=PROJECT_ROOT,
+        project_root=WRITE_ROOT,
    )

    if args.stdio:
@@ -328,6 +328,7 @@ def register_file_tools(
    mcp: FastMCP,
    *,
    resolve_path: Callable[[str], str] | None = None,
+    resolve_path_write: Callable[[str], str] | None = None,
    before_write: Callable[[], None] | None = None,
    project_root: str | None = None,
 ) -> None:
@@ -335,12 +336,18 @@ def register_file_tools(

    Args:
        mcp: FastMCP instance to register tools on.
-        resolve_path: Path resolver. Default: resolve to absolute path.
-            Raise ValueError to reject paths (e.g. outside sandbox).
+        resolve_path: Path resolver for READ operations. Default:
+            resolve to absolute path. Raise ValueError to reject paths
+            (e.g. outside sandbox).
+        resolve_path_write: Path resolver for WRITE/EDIT operations.
+            Defaults to ``resolve_path`` when not provided. Split
+            resolvers let callers keep reads permissive (framework
+            skills, docs) while confining writes to an agent workspace.
        before_write: Hook called before write/edit operations (e.g. git snapshot).
        project_root: If set, search_files relativizes output paths to this root.
    """
    _resolve = resolve_path or _default_resolve_path
+    _resolve_write = resolve_path_write or _resolve

    @mcp.tool()
    def read_file(path: str, offset: int = 1, limit: int = 0, hashline: bool = False) -> str:
@@ -440,7 +447,7 @@ def register_file_tools(
            path: Absolute file path to write.
            content: Complete file content to write.
        """
-        resolved = _resolve(path)
+        resolved = _resolve_write(path)
        resolved_path = Path(resolved)

        # Stale-edit guard: an existing file must have been read recently
@@ -509,7 +516,7 @@ def register_file_tools(
            new_text: Replacement text.
            replace_all: Replace all occurrences (default: first only).
        """
-        resolved = _resolve(path)
+        resolved = _resolve_write(path)
        if not os.path.isfile(resolved):
            return f"Error: File not found: {path}"

@@ -815,7 +822,7 @@ def register_file_tools(
            return "Error: Too many edits in one call (max 100). Split into multiple calls."

        # 2. Read file
-        resolved = _resolve(path)
+        resolved = _resolve_write(path)
        if not os.path.isfile(resolved):
            return f"Error: File not found: {path}"

@@ -96,15 +96,59 @@ def register_advanced_tools(mcp: FastMCP) -> None:
        profile: str | None = None,
    ) -> dict:
        """
-        Execute JavaScript in the browser context.
+        ESCAPE HATCH — execute raw JavaScript. USE ONLY as a last
+        resort. 99% of browser automation does NOT need this tool.
+        Before reaching for it, try a semantic tool first:
+
+          - browser_click / browser_click_coordinate  → for clicks
+          - browser_type(use_insert_text=True)        → for text input
+          - browser_screenshot + browser_get_rect     → for locating elements
+          - browser_shadow_query                      → for shadow-DOM selectors
+          - browser_get_text / browser_get_attribute  → for reading state
+
+        ANTI-PATTERNS — stop and switch tools if you notice yourself:
+
+          1. Calling browser_evaluate 2+ times in a row to guess at
+             selectors. Each attempt costs ~30 tokens of JS + a full
+             LLM round-trip. After 2 empty results, the selector
+             strategy is wrong — pivot to browser_screenshot +
+             browser_click_coordinate. The screenshot + coord path
+             works on shadow DOM, iframes, and React-obfuscated
+             class names indifferently.
+
+          2. Writing a walk(root) recursive shadow-DOM traversal
+             function. Use browser_shadow_query — it does the
+             traversal in C++ via CDP's querySelector, not in JS.
+
+          3. Calling document.execCommand('insertText', ...) to type
+             into Lexical / contenteditable. Use
+             browser_type(use_insert_text=True, text='...') instead.
+             It handles the click-then-focus-then-insert sequence
+             with built-in retries.
+
+          4. Trying to read a nested iframe's contentDocument. That
+             usually fails (cross-origin or late hydration). Use
+             browser_screenshot to see it, then browser_click_coordinate.
+
+        LEGITIMATE uses (when nothing semantic fits):
+
+          - Reading a computed style, window size, or scroll position
+            that no tool exposes.
+          - Firing a one-shot site-specific API call (e.g. an analytics
+            beacon the test needs).
+          - Stripping an onbeforeunload handler that blocks navigation.
+          - Probing for shadow roots whose existence is conditional.

        Args:
-            script: JavaScript code to execute
+            script: JavaScript code to execute. Keep it small. If you
+                need to traverse the DOM, prefer browser_shadow_query.
            tab_id: Chrome tab ID (default: active tab)
            profile: Browser profile name (default: "default")

        Returns:
-            Dict with evaluation result
+            Dict with evaluation result. On a "find X" script that
+            returns [] or null: do NOT retry with a different
+            selector — take a screenshot and switch to coordinates.
        """
        bridge = get_bridge()
        if not bridge or not bridge.is_connected:
@@ -0,0 +1,15 @@
+import json
+
+try:
+    with open('data/linkedin_ledger.json', 'r') as f:
+        data = json.load(f)
+    
+    profiles = data.get('messaged_profiles', [])
+    for p in profiles:
+        if 'variant' not in p:
+            p['variant'] = 'Control' # Retroactively label our first runs
+            
+    with open('data/linkedin_ledger.json', 'w') as f:
+        json.dump({"messaged_profiles": profiles}, f, indent=2)
+except Exception as e:
+    print(f"Error: {e}")
@@ -0,0 +1,16 @@
+{
+  "replies": [
+    {
+      "original_preview": "NASA Ames@NASAAmes\u00b75hWe\u2019re just getting started\n\nDuring their historic journey around the Moon, Artemis II observed lunar targets to study color, text"
+    },
+    {
+      "original_preview": "NASA Marshall@NASA_Marshall\u00b74h Enjoy these views of the Artemis II launch from cameras affixed to the rocket! On April 1, 2026, the SLS (Space Launch "
+    },
+    {
+      "original_preview": "U.S. Navy@USNavy\u00b711hFirst contact. On April 10, U.S. Navy divers were the first on the scene as the Navy and NASA successfully recovered the Orion s"
+    },
+    {
+      "original_preview": "Alright, I give in. Here\u2019s my picture with the boss, courtesy of @johnkrausphotos. Oh, and hook \u2018em!"
+    }
+  ]
+}