fix: colony skill leakage

2026-04-18 15:34:31 -07:00
parent 08aeffd977
commit 0d13c805b1
4 changed files with 171 additions and 40 deletions
@@ -767,8 +767,16 @@ async def fork_session_into_colony(
            session.id,
        )

+    # "is_new" keys off worker.json, not bare dir existence: the queen's
+    # create_colony tool now pre-creates colony_dir (so it can
+    # materialize the colony-scoped skill folder BEFORE the fork), which
+    # would wrongly flag every fresh colony as "already-exists" if we
+    # used ``not colony_dir.exists()``. A colony is "new" until its
+    # worker config has actually been written.
    colony_dir = Path.home() / ".hive" / "colonies" / colony_name
-    is_new = not colony_dir.exists()
+    worker_name = "worker"
+    worker_config_path = colony_dir / f"{worker_name}.json"
+    is_new = not worker_config_path.exists()
    colony_dir.mkdir(parents=True, exist_ok=True)
    (colony_dir / "data").mkdir(exist_ok=True)

@@ -815,10 +823,9 @@ async def fork_session_into_colony(
                exc,
            )

-    # Fixed worker name -- sessions are the unit of parallelism, not workers
-    worker_name = "worker"
-
-    worker_config_path = colony_dir / f"{worker_name}.json"
+    # Fixed worker name and config path are already computed above so
+    # ``is_new`` can be derived from worker.json rather than the colony
+    # directory (see comment on the ``is_new`` block).

    # ── 1. Gather queen state ─────────────────────────────────────
    # Queen-lifecycle + agent-management tools are registered ONLY against
@@ -21,10 +21,32 @@ Each skill is a directory containing a `SKILL.md`. At startup, only the frontmat

 ### Choosing where to put a new skill

- **Project-scoped**: put under `<project>/.hive/skills/` when the skill is tied to that codebase's APIs, conventions, or infra.
- **User-scoped**: put under `~/.hive/skills/` when the skill is reusable across projects for this machine/user.
+- **Colony-scoped (via `create_colony`)**: when the skill is the operational protocol a single colony needs — its API auth, DOM selectors, DB schema, task-queue conventions — do NOT place it under `~/.hive/skills/` or `<project>/.hive/skills/` yourself. Those roots are SHARED and every colony on the machine will see it. Instead, pass the skill content INLINE to the `create_colony` tool (`skill_name`, `skill_description`, `skill_body`, optional `skill_files`). The tool materializes the folder under `~/.hive/colonies/<colony_name>/.hive/skills/<skill-name>/` where it is discovered as **project scope** by only that colony's workers. See the subsection below.
+- **Project-scoped**: put under `<project>/.hive/skills/` when the skill is tied to that codebase's APIs, conventions, or infra and multiple agents in the project should share it.
+- **User-scoped**: put under `~/.hive/skills/` when the skill is reusable across projects for this machine/user and all agents should see it.
 - **Framework default**: add under `core/framework/skills/_default_skills/` AND register in `framework/skills/defaults.py::SKILL_REGISTRY` only when the skill is a universal operational protocol shipped with Hive. Default skills use the `hive.<name>` naming convention and include `type: default-skill` in metadata.

+### Colony-scoped skills via `create_colony`
+
+A colony-scoped skill is one that belongs to exactly ONE colony — e.g. it encodes the HoneyComb staging API the `honeycomb_research` colony polls, or the LinkedIn outbound flow the `linkedin_outbound_campaign` colony runs. Writing such a skill at `~/.hive/skills/` or `<project>/.hive/skills/` leaks it to every other colony, which will then see it at selection time.
+
+**Do not reach for `write_file` to create the folder.** The `create_colony` tool takes the skill content INLINE and places it for you:
+
+```
+create_colony(
+    colony_name="honeycomb_research",
+    task="Build a daily honeycomb market report…",
+    skill_name="honeycomb-api-protocol",
+    skill_description="How to query the HoneyComb staging API…",
+    skill_body="## Operational Protocol\n\nAuth: …",
+    skill_files=[{"path": "scripts/fetch_tickers.py", "content": "…"}],  # optional
+)
+```
+
+The tool writes `~/.hive/colonies/honeycomb_research/.hive/skills/honeycomb-api-protocol/SKILL.md` (plus any `skill_files`), which `SkillDiscovery` picks up as project scope when that colony's workers start — and ONLY that colony's workers. No cross-colony leakage.
+
+Do not write colony-bound skill folders by hand under `~/.hive/skills/`. A skill placed there is user-scoped and becomes visible to every colony on the machine — defeating the isolation you wanted.
+
 ### Directory layout

 ```
@@ -124,8 +146,8 @@ For Python scripts in a Hive project, prefer `uv run scripts/foo.py ...`.
 ### Creating a new skill — workflow

 1. Pick a `<skill-name>` (lowercase-hyphenated).
-2. Decide scope: project (`<project>/.hive/skills/`), user (`~/.hive/skills/`), or framework default (`core/framework/skills/_default_skills/` + registry entry).
-3. Create the directory and write `SKILL.md` with frontmatter + body.
+2. Decide scope: **colony** (pass content INLINE to `create_colony` — STOP here, do not hand-author the folder), project (`<project>/.hive/skills/`), user (`~/.hive/skills/`), or framework default (`core/framework/skills/_default_skills/` + registry entry).
+3. For the non-colony scopes: create the directory and write `SKILL.md` with frontmatter + body.
 4. Add `scripts/`, `references/`, `assets/` only if needed.
 5. Validate the frontmatter: name matches dir, description is specific, no forbidden characters.
 6. Validate using the Hive CLI:
@@ -1130,7 +1130,10 @@ def register_queen_lifecycle_tools(
    # the skill content INLINE as tool arguments (skill_name,
    # skill_description, skill_body, and optional skill_files for
    # supporting scripts/references). The tool materializes the skill
-    # folder under ``~/.hive/skills/{name}/`` itself, then forks.
+    # folder under ``~/.hive/colonies/{colony_name}/.hive/skills/{name}/``
+    # itself — colony-scoped, discovered as project scope by the
+    # colony's worker and invisible to every other colony on the
+    # machine — then forks.
    #
    # Why inline instead of a pre-authored folder path: earlier versions
    # required the queen to write SKILL.md with her own write_file tool
@@ -1140,8 +1143,17 @@ def register_queen_lifecycle_tools(
    # "refusing to overwrite" error and didn't know how to recover. By
    # inlining the content we make colony creation a single atomic
    # operation with domain-level semantics: the queen owns her skill
-    # namespace, so calling create_colony with an existing name simply
-    # replaces the old skill (her latest content wins).
+    # namespace inside the colony, so calling create_colony with an
+    # existing name simply replaces the old skill (her latest content
+    # wins).
+    #
+    # Why colony-scoped instead of user-scoped: an earlier version
+    # materialized the folder at ``~/.hive/skills/{name}/``. That made
+    # every colony on the machine see every colony-specific skill via
+    # user-scope discovery — a worker in colony A could be offered
+    # colony B's hyper-specific skill during selection. Writing into
+    # the colony's own project dir kills that leak while still keeping
+    # re-runs idempotent.

    import re as _re
    import shutil as _shutil
@@ -1155,8 +1167,16 @@ def register_queen_lifecycle_tools(
        skill_description: str,
        skill_body: str,
        skill_files: list[dict] | None,
+        colony_dir: Path,
    ) -> tuple[Path | None, str | None, bool]:
-        """Write a skill folder at ``~/.hive/skills/{name}/`` from inline content.
+        """Write a skill folder under ``{colony_dir}/.hive/skills/{name}/`` from inline content.
+
+        The skill is scoped to a single colony: ``SkillDiscovery`` scans
+        ``{project_root}/.hive/skills/`` as project-scope, and the
+        colony's worker uses ``project_root = colony_dir`` — so only
+        that colony's workers see it, not every colony on the machine.
+        We deliberately avoid ``~/.hive/skills/`` here because that
+        directory is scanned as user scope and leaks into every agent.

        Returns ``(installed_path, error, replaced)``. On success
        ``error`` is ``None`` and ``installed_path`` is the final
@@ -1228,7 +1248,7 @@ def register_queen_lifecycle_tools(
                    ), False
                normalized_files.append((rel_path, content))

-        target_root = Path.home() / ".hive" / "skills"
+        target_root = colony_dir / ".hive" / "skills"
        target = target_root / name
        try:
            target_root.mkdir(parents=True, exist_ok=True)
@@ -1276,13 +1296,15 @@ def register_queen_lifecycle_tools(
        The queen passes skill content inline: ``skill_name``,
        ``skill_description``, ``skill_body``, and optional
        ``skill_files`` (supporting scripts/references). The tool
-        writes ``~/.hive/skills/{skill_name}/SKILL.md`` and any extras,
-        then forks the queen session into a new colony directory and
-        stores the task in ``worker.json``. NOTHING RUNS after fork.
+        writes ``~/.hive/colonies/{colony_name}/.hive/skills/{skill_name}/``
+        (colony-scoped, only this colony's workers see it), then forks
+        the queen session into that colony directory and stores the
+        task in ``worker.json``. NOTHING RUNS after fork.

-        If a skill of the same name already exists, it is overwritten —
-        the queen owns her skill namespace, and calling create_colony
-        with an existing name means "my latest content wins."
+        If a skill of the same name already exists inside this colony,
+        it is overwritten — the queen owns her skill namespace inside
+        the colony, and calling create_colony with an existing name
+        means "my latest content wins."

        When *tasks* is provided, each entry is seeded into the
        colony's ``progress.db`` task queue in a single transaction.
@@ -1300,11 +1322,22 @@ def register_queen_lifecycle_tools(
                {"error": ("colony_name must be lowercase alphanumeric with underscores (e.g. 'honeycomb_research').")}
            )

+        # Pre-create the colony dir so the skill can be materialized
+        # INSIDE it (project scope, colony-local). fork_session_into_colony
+        # keys "is_new" off worker.json rather than the dir itself, so
+        # pre-creating here does not wrongly flag fresh colonies as "old".
+        colony_dir = Path.home() / ".hive" / "colonies" / cn
+        try:
+            colony_dir.mkdir(parents=True, exist_ok=True)
+        except OSError as e:
+            return json.dumps({"error": f"failed to create colony dir {colony_dir}: {e}"})
+
        installed_skill, skill_err, skill_replaced = _materialize_skill_folder(
            skill_name=skill_name,
            skill_description=skill_description,
            skill_body=skill_body,
            skill_files=skill_files,
+            colony_dir=colony_dir,
        )
        if skill_err is not None:
            return json.dumps(
@@ -1425,11 +1458,15 @@ def register_queen_lifecycle_tools(
            "chat, use run_parallel_workers instead.\n\n"
            "ATOMIC CALL: you pass the skill content INLINE as "
            "arguments (skill_name, skill_description, skill_body, "
-            "optional skill_files). Do NOT write the skill folder "
-            "yourself beforehand — this tool materializes "
-            "~/.hive/skills/{skill_name}/ for you and then forks. If a "
-            "skill of the same name already exists it is replaced by "
-            "your latest content (you own your skill namespace).\n\n"
+            "optional skill_files). The tool writes the folder at "
+            "~/.hive/colonies/{colony_name}/.hive/skills/{skill_name}/ "
+            "— scoped to THIS colony only (project scope); no other "
+            "colony on the machine can see it. Do NOT write the folder "
+            "yourself with write_file; folders hand-authored at "
+            "~/.hive/skills/ are user-scoped and LEAK to every colony. "
+            "If a skill of the same name already exists under this "
+            "colony, it is replaced by your latest content (you own "
+            "your skill namespace inside the colony).\n\n"
            "NOTHING RUNS AFTER FORK. This tool is file-system only: "
            "it writes the skill folder, copies the queen session "
            "into a new colony directory, and stores the task in "
@@ -1486,10 +1523,11 @@ def register_queen_lifecycle_tools(
                        "Identifier for the skill folder. Lowercase "
                        "[a-z0-9-], no leading/trailing/consecutive "
                        "hyphens, ≤64 chars. Becomes the directory "
-                        "under ~/.hive/skills/ and the frontmatter "
-                        "'name' field. Example: "
-                        "'honeycomb-api-protocol'. Reusing an existing "
-                        "name replaces that skill."
+                        "under ~/.hive/colonies/<colony_name>/.hive/"
+                        "skills/ and the frontmatter 'name' field. "
+                        "Example: 'honeycomb-api-protocol'. Reusing "
+                        "an existing name within this colony replaces "
+                        "that skill."
                    ),
                },
                "skill_description": {
@@ -4,10 +4,14 @@ Contract (atomic inline-skill flow):

 The queen calls ``create_colony(colony_name, task, skill_name,
 skill_description, skill_body, skill_files?, tasks?)`` in a single
-call. The tool materializes ``~/.hive/skills/{skill_name}/`` from the
+call. The tool materializes
+``~/.hive/colonies/{colony_name}/.hive/skills/{skill_name}/`` from the
 inline content (writing SKILL.md and any supporting files), then forks
-the queen session into a colony. Reusing an existing skill name simply
-replaces the old skill — the queen owns her skill namespace.
+the queen session into that colony. The skill is **colony-scoped** —
+discovered as project scope by that colony's workers, invisible to
+every other colony on the machine. Reusing an existing skill name
+inside the colony simply replaces the old skill — the queen owns her
+skill namespace inside the colony.

 We monkeypatch ``fork_session_into_colony`` so the test doesn't need a
 real queen / session directory. We also redirect ``$HOME`` so the test's
@@ -61,11 +65,16 @@ async def _call(executor, **inputs) -> dict:

@pytest.fixture
 def patched_home(tmp_path, monkeypatch):
-    """Redirect $HOME so ~/.hive/skills/ lands in tmp_path."""
+    """Redirect $HOME so ~/.hive/colonies/ lands in tmp_path."""
    monkeypatch.setenv("HOME", str(tmp_path))
    return tmp_path


+def _colony_skill_path(home: Path, colony_name: str, skill_name: str) -> Path:
+    """Where the tool now materializes the skill (colony-scoped project dir)."""
+    return home / ".hive" / "colonies" / colony_name / ".hive" / "skills" / skill_name
+
+
@pytest.fixture
 def patched_fork(monkeypatch):
    """Stub out fork_session_into_colony so we don't need a real queen."""
@@ -153,10 +162,10 @@ async def test_happy_path_emits_colony_created_event(


@pytest.mark.asyncio
-async def test_happy_path_materializes_skill_under_home(
+async def test_happy_path_materializes_skill_under_colony_dir(
    patched_home: Path, patched_fork: list[dict]
 ) -> None:
-    """Inline skill content is written to ~/.hive/skills/{name}/."""
+    """Inline skill content is written to ~/.hive/colonies/{colony}/.hive/skills/{name}/."""
    executor, session = _make_executor()

    description = (
@@ -189,7 +198,10 @@ async def test_happy_path_materializes_skill_under_home(
    assert payload["skill_name"] == "honeycomb-api-protocol"
    assert payload["skill_replaced"] is False

-    installed = patched_home / ".hive" / "skills" / "honeycomb-api-protocol" / "SKILL.md"
+    installed = (
+        _colony_skill_path(patched_home, "honeycomb_research", "honeycomb-api-protocol")
+        / "SKILL.md"
+    )
    assert installed.exists()
    text = installed.read_text(encoding="utf-8")
    assert text.startswith("---\n")
@@ -197,6 +209,10 @@ async def test_happy_path_materializes_skill_under_home(
    assert f"description: {description}" in text
    assert "HoneyComb API Operational Protocol" in text

+    # Critically: the skill must NOT land in the shared user-scope dir —
+    # that was the leak we are fixing.
+    assert not (patched_home / ".hive" / "skills" / "honeycomb-api-protocol").exists()
+
    # Fork was called with the right args
    assert len(patched_fork) == 1
    assert patched_fork[0]["colony_name"] == "honeycomb_research"
@@ -204,6 +220,52 @@ async def test_happy_path_materializes_skill_under_home(
    assert patched_fork[0]["session"] is session


+@pytest.mark.asyncio
+async def test_two_colonies_do_not_share_skill_namespace(
+    patched_home: Path, patched_fork: list[dict]
+) -> None:
+    """A skill authored via create_colony is invisible to other colonies' worker dirs.
+
+    This is the core isolation guarantee: colony A's create_colony call
+    must NOT plant files under colony B's project root or under the
+    user-global skills dir.
+    """
+    executor, _ = _make_executor()
+
+    payload_a = await _call(
+        executor,
+        colony_name="alpha",
+        task="t",
+        skill_name="alpha-only-skill",
+        skill_description="Only the alpha colony should see this skill.",
+        skill_body=_DEFAULT_BODY,
+    )
+    assert payload_a.get("status") == "created", payload_a
+
+    payload_b = await _call(
+        executor,
+        colony_name="bravo",
+        task="t",
+        skill_name="bravo-only-skill",
+        skill_description="Only the bravo colony should see this skill.",
+        skill_body=_DEFAULT_BODY,
+    )
+    assert payload_b.get("status") == "created", payload_b
+
+    alpha_dir = patched_home / ".hive" / "colonies" / "alpha" / ".hive" / "skills"
+    bravo_dir = patched_home / ".hive" / "colonies" / "bravo" / ".hive" / "skills"
+    user_skills = patched_home / ".hive" / "skills"
+
+    # Each colony only contains its own skill
+    assert (alpha_dir / "alpha-only-skill" / "SKILL.md").exists()
+    assert not (alpha_dir / "bravo-only-skill").exists()
+    assert (bravo_dir / "bravo-only-skill" / "SKILL.md").exists()
+    assert not (bravo_dir / "alpha-only-skill").exists()
+
+    # Nothing landed in the shared user-global dir.
+    assert not user_skills.exists() or not any(user_skills.iterdir())
+
+
@pytest.mark.asyncio
 async def test_skill_files_are_written_alongside_skill_md(
    patched_home: Path, patched_fork: list[dict]
@@ -225,7 +287,7 @@ async def test_skill_files_are_written_alongside_skill_md(
    )
    assert payload.get("status") == "created", payload

-    skill_dir = patched_home / ".hive" / "skills" / "fancy-skill"
+    skill_dir = _colony_skill_path(patched_home, "fancy_skill", "fancy-skill")
    assert (skill_dir / "SKILL.md").exists()
    assert (skill_dir / "scripts" / "run.sh").read_text() == "#!/bin/sh\necho hi\n"
    assert (skill_dir / "references" / "shapes.md").read_text() == "# Shapes\nfoo\n"
@@ -235,10 +297,10 @@ async def test_skill_files_are_written_alongside_skill_md(
 async def test_existing_skill_is_replaced(
    patched_home: Path, patched_fork: list[dict]
 ) -> None:
-    """Reusing a skill_name replaces the old skill with fresh content."""
+    """Reusing a skill_name within the same colony replaces the old skill."""
    executor, _ = _make_executor()

-    skill_root = patched_home / ".hive" / "skills" / "x-job-market-replier"
+    skill_root = _colony_skill_path(patched_home, "replier_colony", "x-job-market-replier")
    skill_root.mkdir(parents=True)
    (skill_root / "SKILL.md").write_text(
        "---\nname: x-job-market-replier\ndescription: stale\n---\n\nold body\n",
@@ -460,6 +522,8 @@ async def test_fork_failure_keeps_materialized_skill(
    assert "error" in payload
    assert "fork failed" in payload["error"]
    assert "skill_installed" in payload
-    installed = patched_home / ".hive" / "skills" / "durable-skill" / "SKILL.md"
+    installed = (
+        _colony_skill_path(patched_home, "will_fail", "durable-skill") / "SKILL.md"
+    )
    assert installed.exists()
    assert "hint" in payload