Files
hive/core/tests/test_create_colony_tool.py
T
2026-04-21 18:48:22 -07:00

652 lines
22 KiB
Python

"""Tests for the queen-side ``create_colony`` tool.
Contract (atomic inline-skill flow):
The queen calls ``create_colony(colony_name, task, skill_name,
skill_description, skill_body, skill_files?, tasks?)`` in a single
call. The tool materializes
``~/.hive/colonies/{colony_name}/.hive/skills/{skill_name}/`` from the
inline content (writing SKILL.md and any supporting files), then forks
the queen session into that colony. The skill is **colony-scoped** —
discovered as project scope by that colony's workers, invisible to
every other colony on the machine. Reusing an existing skill name
inside the colony simply replaces the old skill — the queen owns her
skill namespace inside the colony.
We monkeypatch ``fork_session_into_colony`` so the test doesn't need a
real queen / session directory. We also redirect ``$HOME`` so the test's
skill installation lands in a tmp tree, not the real user home.
"""
from __future__ import annotations
import asyncio
import json
from pathlib import Path
from typing import Any
import pytest
from framework.host.event_bus import EventBus
from framework.llm.provider import ToolUse
from framework.loader.tool_registry import ToolRegistry
from framework.tools.queen_lifecycle_tools import register_queen_lifecycle_tools
# ---------------------------------------------------------------------------
# Fixtures + helpers
# ---------------------------------------------------------------------------
class _FakeSession:
def __init__(self, sid: str = "session_test_create_colony"):
self.id = sid
self.colony = None
self.colony_runtime = None
self.event_bus = EventBus()
self.worker_path = None
self.available_triggers: dict = {}
self.active_trigger_ids: set = set()
def _make_executor():
"""Build a tool executor with create_colony registered."""
registry = ToolRegistry()
session = _FakeSession()
register_queen_lifecycle_tools(registry, session=session, session_id=session.id)
return registry.get_executor(), session
async def _call(executor, **inputs) -> dict:
result = executor(ToolUse(id="tu_create_colony", name="create_colony", input=inputs))
if asyncio.iscoroutine(result):
result = await result
return json.loads(result.content)
@pytest.fixture
def patched_home(tmp_path, monkeypatch):
"""Redirect $HOME so ~/.hive/colonies/ lands in tmp_path."""
monkeypatch.setenv("HOME", str(tmp_path))
return tmp_path
def _colony_skill_path(home: Path, colony_name: str, skill_name: str) -> Path:
"""Where the tool now materializes the skill (colony-scoped project dir)."""
return home / ".hive" / "colonies" / colony_name / ".hive" / "skills" / skill_name
@pytest.fixture
def patched_fork(monkeypatch):
"""Stub out fork_session_into_colony so we don't need a real queen."""
calls: list[dict] = []
async def _stub_fork(
*,
session: Any,
colony_name: str,
task: str,
tasks: list[dict] | None = None,
concurrency_hint: int | None = None,
) -> dict:
calls.append(
{
"session": session,
"colony_name": colony_name,
"task": task,
"tasks": tasks,
"concurrency_hint": concurrency_hint,
}
)
return {
"colony_path": f"/tmp/fake_colonies/{colony_name}",
"colony_name": colony_name,
"queen_session_id": "session_fake_fork_id",
"is_new": True,
"db_path": f"/tmp/fake_colonies/{colony_name}/data/progress.db",
"task_ids": [],
}
monkeypatch.setattr(
"framework.server.routes_execution.fork_session_into_colony",
_stub_fork,
)
return calls
_DEFAULT_BODY = (
"## Operational Protocol\n\n"
"Auth: Bearer token from ~/.hive/credentials/honeycomb.json.\n"
"Pagination: ?page=1&page_size=50 (max 50 per page).\n"
)
# ---------------------------------------------------------------------------
# Happy path
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_happy_path_emits_colony_created_event(patched_home: Path, patched_fork: list[dict]) -> None:
"""Successful create_colony must publish a COLONY_CREATED event."""
from framework.host.event_bus import AgentEvent, EventType
executor, session = _make_executor()
received: list[AgentEvent] = []
async def _on_colony_created(event: AgentEvent) -> None:
received.append(event)
session.event_bus.subscribe(
event_types=[EventType.COLONY_CREATED],
handler=_on_colony_created,
)
payload = await _call(
executor,
colony_name="event_check",
task="t",
skill_name="my-skill",
skill_description="My test skill for event-check happy path.",
skill_body=_DEFAULT_BODY,
)
assert payload.get("status") == "created", payload
assert payload["skill_replaced"] is False
assert len(received) == 1
ev = received[0]
assert ev.type == EventType.COLONY_CREATED
assert ev.data.get("colony_name") == "event_check"
assert ev.data.get("skill_name") == "my-skill"
assert ev.data.get("skill_replaced") is False
assert ev.data.get("is_new") is True
@pytest.mark.asyncio
async def test_happy_path_materializes_skill_under_colony_dir(patched_home: Path, patched_fork: list[dict]) -> None:
"""Inline skill content is written to ~/.hive/colonies/{colony}/.hive/skills/{name}/."""
executor, session = _make_executor()
description = (
"How to query the HoneyComb staging API for ticker, pool, "
"and trade data. Covers auth, pagination, pool detail shape."
)
body = (
"## HoneyComb API Operational Protocol\n\n"
"Auth: Bearer token from ~/.hive/credentials/honeycomb.json.\n"
"Pagination: ?page=1&page_size=50 (max 50 per page).\n"
"Endpoints:\n"
"- /api/ticker — list tickers\n"
"- /api/ticker/{id} — pool detail\n"
)
payload = await _call(
executor,
colony_name="honeycomb_research",
task=(
"Build a daily honeycomb market report covering top gainers, "
"losers, volume leaders, and category breakdowns."
),
skill_name="honeycomb-api-protocol",
skill_description=description,
skill_body=body,
)
assert payload.get("status") == "created", f"Tool error: {payload}"
assert payload["colony_name"] == "honeycomb_research"
assert payload["skill_name"] == "honeycomb-api-protocol"
assert payload["skill_replaced"] is False
installed = _colony_skill_path(patched_home, "honeycomb_research", "honeycomb-api-protocol") / "SKILL.md"
assert installed.exists()
text = installed.read_text(encoding="utf-8")
assert text.startswith("---\n")
assert "name: honeycomb-api-protocol" in text
assert f"description: {description}" in text
assert "HoneyComb API Operational Protocol" in text
# create_colony should also register the skill in the colony's
# override store with ``queen_created`` provenance so the UI can
# display it as queen-authored + editable.
from framework.skills.overrides import Provenance, SkillOverrideStore
overrides_path = (
patched_home / ".hive" / "colonies" / "honeycomb_research" / "skills_overrides.json"
)
assert overrides_path.exists(), "create_colony should write a skills_overrides.json ledger"
store = SkillOverrideStore.load(overrides_path)
entry = store.get("honeycomb-api-protocol")
assert entry is not None
assert entry.provenance == Provenance.QUEEN_CREATED
assert entry.enabled is True
assert (entry.created_by or "").startswith("queen:")
# Critically: the skill must NOT land in the shared user-scope dir —
# that was the leak we are fixing.
assert not (patched_home / ".hive" / "skills" / "honeycomb-api-protocol").exists()
# Fork was called with the right args
assert len(patched_fork) == 1
assert patched_fork[0]["colony_name"] == "honeycomb_research"
assert "honeycomb market report" in patched_fork[0]["task"]
assert patched_fork[0]["session"] is session
@pytest.mark.asyncio
async def test_two_colonies_do_not_share_skill_namespace(patched_home: Path, patched_fork: list[dict]) -> None:
"""A skill authored via create_colony is invisible to other colonies' worker dirs.
This is the core isolation guarantee: colony A's create_colony call
must NOT plant files under colony B's project root or under the
user-global skills dir.
"""
executor, _ = _make_executor()
payload_a = await _call(
executor,
colony_name="alpha",
task="t",
skill_name="alpha-only-skill",
skill_description="Only the alpha colony should see this skill.",
skill_body=_DEFAULT_BODY,
)
assert payload_a.get("status") == "created", payload_a
payload_b = await _call(
executor,
colony_name="bravo",
task="t",
skill_name="bravo-only-skill",
skill_description="Only the bravo colony should see this skill.",
skill_body=_DEFAULT_BODY,
)
assert payload_b.get("status") == "created", payload_b
alpha_dir = patched_home / ".hive" / "colonies" / "alpha" / ".hive" / "skills"
bravo_dir = patched_home / ".hive" / "colonies" / "bravo" / ".hive" / "skills"
user_skills = patched_home / ".hive" / "skills"
# Each colony only contains its own skill
assert (alpha_dir / "alpha-only-skill" / "SKILL.md").exists()
assert not (alpha_dir / "bravo-only-skill").exists()
assert (bravo_dir / "bravo-only-skill" / "SKILL.md").exists()
assert not (bravo_dir / "alpha-only-skill").exists()
# Nothing landed in the shared user-global dir.
assert not user_skills.exists() or not any(user_skills.iterdir())
@pytest.mark.asyncio
async def test_skill_files_are_written_alongside_skill_md(patched_home: Path, patched_fork: list[dict]) -> None:
"""skill_files entries land at the right relative paths."""
executor, _ = _make_executor()
payload = await _call(
executor,
colony_name="fancy_skill",
task="t",
skill_name="fancy-skill",
skill_description="Has supporting scripts and references.",
skill_body=_DEFAULT_BODY,
skill_files=[
{"path": "scripts/run.sh", "content": "#!/bin/sh\necho hi\n"},
{"path": "references/shapes.md", "content": "# Shapes\nfoo\n"},
],
)
assert payload.get("status") == "created", payload
skill_dir = _colony_skill_path(patched_home, "fancy_skill", "fancy-skill")
assert (skill_dir / "SKILL.md").exists()
assert (skill_dir / "scripts" / "run.sh").read_text() == "#!/bin/sh\necho hi\n"
assert (skill_dir / "references" / "shapes.md").read_text() == "# Shapes\nfoo\n"
@pytest.mark.asyncio
async def test_existing_skill_is_replaced(patched_home: Path, patched_fork: list[dict]) -> None:
"""Reusing a skill_name within the same colony replaces the old skill."""
executor, _ = _make_executor()
skill_root = _colony_skill_path(patched_home, "replier_colony", "x-job-market-replier")
skill_root.mkdir(parents=True)
(skill_root / "SKILL.md").write_text(
"---\nname: x-job-market-replier\ndescription: stale\n---\n\nold body\n",
encoding="utf-8",
)
(skill_root / "stale.txt").write_text("leftover from prior version", encoding="utf-8")
payload = await _call(
executor,
colony_name="replier_colony",
task="t",
skill_name="x-job-market-replier",
skill_description="Reply to job-market posts on X.",
skill_body="## New procedure\nUse this instead.\n",
)
assert payload.get("status") == "created", payload
assert payload["skill_replaced"] is True
fresh = (skill_root / "SKILL.md").read_text(encoding="utf-8")
assert "stale" not in fresh
assert "New procedure" in fresh
# Old sidecar files from the prior version must be gone.
assert not (skill_root / "stale.txt").exists()
# ---------------------------------------------------------------------------
# Validation failures
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_missing_skill_name_rejected(patched_home, patched_fork) -> None:
executor, _ = _make_executor()
payload = await _call(
executor,
colony_name="ok_name",
task="t",
skill_name="",
skill_description="desc",
skill_body=_DEFAULT_BODY,
)
assert "error" in payload
assert "skill_name" in payload["error"]
assert len(patched_fork) == 0
@pytest.mark.asyncio
async def test_invalid_skill_name_characters_rejected(patched_home, patched_fork) -> None:
executor, _ = _make_executor()
payload = await _call(
executor,
colony_name="ok_name",
task="t",
skill_name="Bad_Name",
skill_description="desc",
skill_body=_DEFAULT_BODY,
)
assert "error" in payload
assert "[a-z0-9-]" in payload["error"]
assert len(patched_fork) == 0
@pytest.mark.asyncio
async def test_skill_name_with_double_hyphen_rejected(patched_home, patched_fork) -> None:
executor, _ = _make_executor()
payload = await _call(
executor,
colony_name="ok_name",
task="t",
skill_name="bad--name",
skill_description="desc",
skill_body=_DEFAULT_BODY,
)
assert "error" in payload
assert "hyphen" in payload["error"]
assert len(patched_fork) == 0
@pytest.mark.asyncio
async def test_missing_skill_description_rejected(patched_home, patched_fork) -> None:
executor, _ = _make_executor()
payload = await _call(
executor,
colony_name="ok_name",
task="t",
skill_name="ok-skill",
skill_description="",
skill_body=_DEFAULT_BODY,
)
assert "error" in payload
assert "skill_description" in payload["error"]
assert len(patched_fork) == 0
@pytest.mark.asyncio
async def test_multiline_description_rejected(patched_home, patched_fork) -> None:
executor, _ = _make_executor()
payload = await _call(
executor,
colony_name="ok_name",
task="t",
skill_name="ok-skill",
skill_description="line one\nline two",
skill_body=_DEFAULT_BODY,
)
assert "error" in payload
assert "single line" in payload["error"]
assert len(patched_fork) == 0
@pytest.mark.asyncio
async def test_empty_skill_body_rejected(patched_home, patched_fork) -> None:
executor, _ = _make_executor()
payload = await _call(
executor,
colony_name="ok_name",
task="t",
skill_name="ok-skill",
skill_description="desc",
skill_body=" \n ",
)
assert "error" in payload
assert "skill_body" in payload["error"]
assert len(patched_fork) == 0
@pytest.mark.asyncio
async def test_invalid_colony_name_rejected(patched_home, patched_fork) -> None:
executor, _ = _make_executor()
payload = await _call(
executor,
colony_name="NotValid-Colony",
task="t",
skill_name="valid-skill",
skill_description="desc",
skill_body=_DEFAULT_BODY,
)
assert "error" in payload
assert "colony_name" in payload["error"]
assert len(patched_fork) == 0
@pytest.mark.asyncio
async def test_skill_files_reject_absolute_path(patched_home, patched_fork) -> None:
executor, _ = _make_executor()
payload = await _call(
executor,
colony_name="ok_name",
task="t",
skill_name="ok-skill",
skill_description="desc",
skill_body=_DEFAULT_BODY,
skill_files=[{"path": "/etc/passwd", "content": "evil"}],
)
assert "error" in payload
assert "relative" in payload["error"]
assert len(patched_fork) == 0
@pytest.mark.asyncio
async def test_skill_files_reject_parent_traversal(patched_home, patched_fork) -> None:
executor, _ = _make_executor()
payload = await _call(
executor,
colony_name="ok_name",
task="t",
skill_name="ok-skill",
skill_description="desc",
skill_body=_DEFAULT_BODY,
skill_files=[{"path": "../escape.txt", "content": "evil"}],
)
assert "error" in payload
assert "relative" in payload["error"]
assert len(patched_fork) == 0
@pytest.mark.asyncio
async def test_skill_files_reject_skill_md_override(patched_home, patched_fork) -> None:
executor, _ = _make_executor()
payload = await _call(
executor,
colony_name="ok_name",
task="t",
skill_name="ok-skill",
skill_description="desc",
skill_body=_DEFAULT_BODY,
skill_files=[{"path": "SKILL.md", "content": "sneaky"}],
)
assert "error" in payload
assert "SKILL.md" in payload["error"]
assert len(patched_fork) == 0
@pytest.mark.asyncio
async def test_fork_failure_keeps_materialized_skill(patched_home, monkeypatch) -> None:
"""If the fork raises, the materialized skill stays under ~/.hive/skills/."""
async def _failing_fork(**kwargs):
raise RuntimeError("simulated fork crash")
monkeypatch.setattr(
"framework.server.routes_execution.fork_session_into_colony",
_failing_fork,
)
executor, _ = _make_executor()
payload = await _call(
executor,
colony_name="will_fail",
task="t",
skill_name="durable-skill",
skill_description="desc",
skill_body=_DEFAULT_BODY,
)
assert "error" in payload
assert "fork failed" in payload["error"]
assert "skill_installed" in payload
installed = _colony_skill_path(patched_home, "will_fail", "durable-skill") / "SKILL.md"
assert installed.exists()
assert "hint" in payload
# ---------------------------------------------------------------------------
# triggers — inline schedule persisted to triggers.json
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_triggers_written_to_triggers_json(patched_home: Path, patched_fork: list[dict]) -> None:
"""A valid ``triggers`` arg is written to {colony_dir}/triggers.json."""
executor, _ = _make_executor()
triggers = [
{
"id": "daily-report",
"trigger_type": "timer",
"trigger_config": {"cron": "0 9 * * *"},
"task": "Generate the daily report",
},
{
"id": "github-webhook",
"trigger_type": "webhook",
"trigger_config": {"path": "/hooks/github"},
"task": "Process the github event",
"name": "GitHub webhook",
},
]
payload = await _call(
executor,
colony_name="scheduled",
task="t",
skill_name="scheduled-skill",
skill_description="d",
skill_body=_DEFAULT_BODY,
triggers=triggers,
)
assert payload.get("status") == "created", payload
triggers_path = patched_home / ".hive" / "colonies" / "scheduled" / "triggers.json"
assert triggers_path.exists()
written = json.loads(triggers_path.read_text(encoding="utf-8"))
assert len(written) == 2
assert written[0]["id"] == "daily-report"
assert written[0]["trigger_type"] == "timer"
assert written[0]["trigger_config"] == {"cron": "0 9 * * *"}
assert written[0]["task"] == "Generate the daily report"
# Unspecified name defaults to id; specified name is preserved.
assert written[0]["name"] == "daily-report"
assert written[1]["name"] == "GitHub webhook"
@pytest.mark.asyncio
async def test_triggers_omitted_does_not_write_triggers_json(patched_home: Path, patched_fork: list[dict]) -> None:
"""No triggers arg → no triggers.json (colony runs on-demand)."""
executor, _ = _make_executor()
payload = await _call(
executor,
colony_name="no_schedule",
task="t",
skill_name="plain-skill",
skill_description="d",
skill_body=_DEFAULT_BODY,
)
assert payload.get("status") == "created", payload
triggers_path = patched_home / ".hive" / "colonies" / "no_schedule" / "triggers.json"
assert not triggers_path.exists()
@pytest.mark.asyncio
async def test_triggers_invalid_cron_fails_before_fork(patched_home: Path, patched_fork: list[dict]) -> None:
"""A bad cron fails fast: no skill written, no fork call."""
executor, _ = _make_executor()
payload = await _call(
executor,
colony_name="bad_cron",
task="t",
skill_name="skill",
skill_description="d",
skill_body=_DEFAULT_BODY,
triggers=[
{
"id": "broken",
"trigger_type": "timer",
"trigger_config": {"cron": "not a cron"},
"task": "x",
}
],
)
assert "error" in payload
assert "cron" in payload["error"]
# Fork was not called, skill not materialized.
assert len(patched_fork) == 0
assert not (patched_home / ".hive" / "colonies" / "bad_cron" / ".hive" / "skills" / "skill").exists()
@pytest.mark.asyncio
async def test_triggers_missing_task_fails(patched_home: Path, patched_fork: list[dict]) -> None:
"""A trigger without a ``task`` is rejected before any write happens."""
executor, _ = _make_executor()
payload = await _call(
executor,
colony_name="no_task",
task="t",
skill_name="skill",
skill_description="d",
skill_body=_DEFAULT_BODY,
triggers=[
{
"id": "notask",
"trigger_type": "timer",
"trigger_config": {"interval_minutes": 5},
}
],
)
assert "error" in payload
assert "task" in payload["error"]
assert len(patched_fork) == 0