fix: integration tests
This commit is contained in:
@@ -413,7 +413,9 @@ class TestSessionCRUD:
|
||||
model=None,
|
||||
initial_prompt=None,
|
||||
queen_resume_from=None,
|
||||
queen_name=None,
|
||||
initial_phase=None,
|
||||
worker_name=None,
|
||||
)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
@@ -17,23 +17,7 @@ def project_root():
|
||||
|
||||
|
||||
class TestConfigurePaths:
|
||||
"""Test _configure_paths auto-discovers exports/ and core/."""
|
||||
|
||||
def test_adds_exports_to_sys_path(self, project_root):
|
||||
exports_dir = project_root / "exports"
|
||||
if not exports_dir.is_dir():
|
||||
pytest.skip("exports/ directory does not exist in this environment")
|
||||
|
||||
exports_str = str(exports_dir)
|
||||
# Remove if already present to test fresh addition
|
||||
original_path = sys.path.copy()
|
||||
sys.path = [p for p in sys.path if p != exports_str]
|
||||
|
||||
try:
|
||||
_configure_paths()
|
||||
assert exports_str in sys.path
|
||||
finally:
|
||||
sys.path = original_path
|
||||
"""Test _configure_paths auto-discovers core/."""
|
||||
|
||||
def test_adds_core_to_sys_path(self, project_root):
|
||||
core_dir = project_root / "core"
|
||||
@@ -49,21 +33,15 @@ class TestConfigurePaths:
|
||||
|
||||
def test_does_not_duplicate_paths(self):
|
||||
_configure_paths()
|
||||
# Call twice — should not create duplicates
|
||||
before = sys.path.copy()
|
||||
_configure_paths()
|
||||
assert sys.path == before
|
||||
|
||||
def test_handles_missing_exports_gracefully(self):
|
||||
"""If exports/ doesn't exist, _configure_paths should not crash."""
|
||||
_configure_paths()
|
||||
|
||||
|
||||
class TestFrameworkModule:
|
||||
"""Test ``python -m framework`` invocation (the underlying module)."""
|
||||
"""Test ``python -m framework`` invocation."""
|
||||
|
||||
def test_module_help(self, project_root):
|
||||
"""Verify ``python -m framework --help`` prints usage."""
|
||||
result = subprocess.run(
|
||||
[sys.executable, "-m", "framework", "--help"],
|
||||
capture_output=True,
|
||||
@@ -72,28 +50,23 @@ class TestFrameworkModule:
|
||||
cwd=str(project_root / "core"),
|
||||
)
|
||||
assert result.returncode == 0
|
||||
assert "hive" in result.stdout.lower() or "goal" in result.stdout.lower()
|
||||
assert "hive" in result.stdout.lower()
|
||||
|
||||
def test_module_list_subcommand(self, project_root):
|
||||
"""Verify ``python -m framework list --help`` registers the subcommand."""
|
||||
def test_module_serve_subcommand(self, project_root):
|
||||
"""Verify ``python -m framework serve --help`` prints usage."""
|
||||
result = subprocess.run(
|
||||
[sys.executable, "-m", "framework", "list", "--help"],
|
||||
[sys.executable, "-m", "framework", "serve", "--help"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
encoding="utf-8",
|
||||
cwd=str(project_root / "core"),
|
||||
)
|
||||
assert result.returncode == 0
|
||||
assert "agents" in result.stdout.lower() or "directory" in result.stdout.lower()
|
||||
assert "host" in result.stdout.lower() or "port" in result.stdout.lower()
|
||||
|
||||
|
||||
class TestHiveEntryPoint:
|
||||
"""Test the ``hive`` console_scripts entry point.
|
||||
|
||||
These tests verify the actual ``hive`` command installed by
|
||||
``pip install -e core/``. If the entry point is not installed,
|
||||
the tests are skipped gracefully.
|
||||
"""
|
||||
"""Test the ``hive`` console_scripts entry point."""
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _require_hive(self):
|
||||
@@ -101,7 +74,7 @@ class TestHiveEntryPoint:
|
||||
pytest.skip("'hive' entry point not installed (run: pip install -e core/)")
|
||||
|
||||
def test_hive_help(self):
|
||||
"""Verify ``hive --help`` exits 0 and prints usage."""
|
||||
"""Verify ``hive --help`` exits 0 and lists the new commands."""
|
||||
result = subprocess.run(
|
||||
["hive", "--help"],
|
||||
capture_output=True,
|
||||
@@ -109,23 +82,38 @@ class TestHiveEntryPoint:
|
||||
encoding="utf-8",
|
||||
)
|
||||
assert result.returncode == 0
|
||||
assert "run" in result.stdout.lower()
|
||||
assert "validate" in result.stdout.lower()
|
||||
out = result.stdout.lower()
|
||||
# New CLI surface (post-cleanup)
|
||||
assert "serve" in out
|
||||
assert "queen" in out
|
||||
assert "colony" in out
|
||||
assert "session" in out
|
||||
assert "chat" in out
|
||||
|
||||
def test_hive_list_help(self):
|
||||
"""Verify ``hive list --help`` exits 0."""
|
||||
def test_hive_queen_list_help(self):
|
||||
"""``hive queen list --help`` is one of the new core commands."""
|
||||
result = subprocess.run(
|
||||
["hive", "list", "--help"],
|
||||
["hive", "queen", "list", "--help"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
encoding="utf-8",
|
||||
)
|
||||
assert result.returncode == 0
|
||||
|
||||
def test_hive_run_missing_agent(self):
|
||||
"""Verify ``hive run`` with a non-existent agent prints an error."""
|
||||
def test_hive_colony_list_help(self):
|
||||
"""``hive colony list --help`` is one of the new core commands."""
|
||||
result = subprocess.run(
|
||||
["hive", "run", "nonexistent_agent_xyz"],
|
||||
["hive", "colony", "list", "--help"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
encoding="utf-8",
|
||||
)
|
||||
assert result.returncode == 0
|
||||
|
||||
def test_hive_unknown_command_exits_nonzero(self):
|
||||
"""An unknown subcommand must error out."""
|
||||
result = subprocess.run(
|
||||
["hive", "definitely-not-a-command"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
encoding="utf-8",
|
||||
|
||||
@@ -0,0 +1,401 @@
|
||||
"""Wiring smoke test for the queen → fork → colony flow.
|
||||
|
||||
Validates the on-disk artifacts produced by ``handle_colony_spawn`` and
|
||||
that ``create_session_with_worker_colony`` resolves the colony's forked
|
||||
session ID from ``metadata.json`` rather than spinning up a fresh ID.
|
||||
|
||||
These tests do NOT exercise the LLM or the queen identity hook -- they
|
||||
construct a Session object with the minimum state ``handle_colony_spawn``
|
||||
needs and run everything against a temp directory.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
import pytest
|
||||
from aiohttp.test_utils import TestClient, TestServer
|
||||
|
||||
from framework.agent_loop.internals.types import LoopConfig
|
||||
from framework.host.event_bus import EventBus
|
||||
from framework.server.app import create_app
|
||||
from framework.server.session_manager import Session, _queen_session_dir
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _make_fake_queen_session(
|
||||
home: Path,
|
||||
*,
|
||||
queen_name: str,
|
||||
session_id: str,
|
||||
) -> Path:
|
||||
"""Create a fake queen session directory with conversations and meta.json."""
|
||||
queen_dir = home / ".hive" / "agents" / "queens" / queen_name / "sessions" / session_id
|
||||
(queen_dir / "conversations" / "parts").mkdir(parents=True)
|
||||
(queen_dir / "data").mkdir()
|
||||
|
||||
# Two fake conversation parts so we can verify they get copied
|
||||
parts = queen_dir / "conversations" / "parts"
|
||||
(parts / "0000000000.json").write_text(
|
||||
json.dumps({"seq": 0, "role": "user", "content": "trade honeycomb"}),
|
||||
encoding="utf-8",
|
||||
)
|
||||
(parts / "0000000001.json").write_text(
|
||||
json.dumps({"seq": 1, "role": "assistant", "content": "on it"}),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
# Conversation cursor + meta
|
||||
(queen_dir / "conversations" / "cursor.json").write_text("{}", encoding="utf-8")
|
||||
(queen_dir / "conversations" / "meta.json").write_text("{}", encoding="utf-8")
|
||||
|
||||
# Session meta.json (this is the queen-session meta, distinct from convs/meta.json)
|
||||
(queen_dir / "meta.json").write_text(
|
||||
json.dumps({"created_at": 1, "queen_id": queen_name}),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
# Empty events log
|
||||
(queen_dir / "events.jsonl").write_text("", encoding="utf-8")
|
||||
|
||||
return queen_dir
|
||||
|
||||
|
||||
def _make_session_with_queen_state(
|
||||
*,
|
||||
session_id: str,
|
||||
queen_name: str,
|
||||
queen_dir: Path,
|
||||
) -> Session:
|
||||
"""Construct a Session pre-populated with the state colony-spawn reads."""
|
||||
bus = MagicMock()
|
||||
bus.publish = AsyncMock()
|
||||
|
||||
# Fake queen_loop with the attributes the spawn handler reads
|
||||
fake_loop = SimpleNamespace(
|
||||
_last_ctx=SimpleNamespace(
|
||||
available_tools=[
|
||||
SimpleNamespace(name="read_file"),
|
||||
SimpleNamespace(name="search_files"),
|
||||
],
|
||||
skills_catalog_prompt="<skills/>",
|
||||
protocols_prompt="<protocols/>",
|
||||
skill_dirs=["/fake/skills"],
|
||||
),
|
||||
_config=LoopConfig(
|
||||
max_iterations=42,
|
||||
max_tool_calls_per_turn=7,
|
||||
max_context_tokens=99_000,
|
||||
max_tool_result_chars=2048,
|
||||
),
|
||||
_conversation_store=None,
|
||||
)
|
||||
queen_executor = SimpleNamespace(node_registry={"queen": fake_loop})
|
||||
|
||||
# Fake phase_state with the attributes the spawn handler reads
|
||||
phase_state = SimpleNamespace(
|
||||
phase="planning",
|
||||
queen_id=queen_name,
|
||||
queen_identity_prompt="You are Charlotte, head of finance.",
|
||||
_cached_global_recall_block="",
|
||||
get_current_prompt=lambda: "you are the queen",
|
||||
)
|
||||
|
||||
session = Session(
|
||||
id=session_id,
|
||||
event_bus=bus,
|
||||
llm=MagicMock(),
|
||||
loaded_at=0.0,
|
||||
queen_executor=queen_executor,
|
||||
queen_dir=queen_dir,
|
||||
queen_name=queen_name,
|
||||
phase_state=phase_state,
|
||||
)
|
||||
return session
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 1. AgentLoader skips metadata.json when picking a worker config
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_agent_loader_picks_worker_json_not_metadata_json(tmp_path):
|
||||
"""AgentLoader.load must select worker.json from a colony, not metadata.json.
|
||||
|
||||
Regression: ``metadata.json`` sorts before ``worker.json`` alphabetically;
|
||||
if it isn't excluded, the loader treats colony provenance as a worker spec
|
||||
and the worker spawns under the wrong storage path with no goal/tools.
|
||||
"""
|
||||
from framework.loader.agent_loader import AgentLoader
|
||||
|
||||
colony_dir = tmp_path / "colonies" / "honeycomb"
|
||||
colony_dir.mkdir(parents=True)
|
||||
(colony_dir / "data").mkdir()
|
||||
|
||||
# Colony provenance (must NOT be picked)
|
||||
(colony_dir / "metadata.json").write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"colony_name": "honeycomb",
|
||||
"queen_name": "queen_finance_fundraising",
|
||||
"queen_session_id": "session_xxx",
|
||||
"workers": {"worker": {"task": "trade"}},
|
||||
}
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
# Real worker config
|
||||
(colony_dir / "worker.json").write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"name": "worker",
|
||||
"version": "1.0.0",
|
||||
"description": "trader",
|
||||
"goal": {"description": "trade honeycomb", "success_criteria": [], "constraints": []},
|
||||
"system_prompt": "be a careful trader",
|
||||
"tools": ["read_file", "search_files"],
|
||||
"loop_config": {"max_iterations": 50},
|
||||
}
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
runner = AgentLoader.load(
|
||||
colony_dir,
|
||||
interactive=False,
|
||||
skip_credential_validation=True,
|
||||
)
|
||||
|
||||
# Picked the right config: name comes from worker.json
|
||||
assert runner.graph.nodes[0].id == "worker"
|
||||
assert runner.goal.description == "trade honeycomb"
|
||||
assert "read_file" in runner.graph.nodes[0].tools
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 2. handle_colony_spawn produces the correct on-disk artifacts
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_colony_spawn_creates_correct_artifacts(tmp_path, monkeypatch):
|
||||
"""End-to-end POST /api/sessions/{id}/colony-spawn against an in-process app.
|
||||
|
||||
Validates the full set of artifacts produced by the spawn handler,
|
||||
catching the bugs we hit yesterday:
|
||||
- queen_name in metadata.json must be the actual queen profile, not "default"
|
||||
- queen_session_id in metadata.json must point to the duplicated dir
|
||||
- duplicated session dir must live under the correct queen identity
|
||||
- duplicated session must be flagged colony_fork=true
|
||||
- worker.json must contain the queen state snapshot
|
||||
- worker storage must receive the queen conversations
|
||||
- source queen session meta must be linked back to the colony
|
||||
"""
|
||||
monkeypatch.setattr(Path, "home", classmethod(lambda cls: tmp_path))
|
||||
|
||||
queen_name = "queen_finance_fundraising"
|
||||
source_session_id = "session_20260410_120000_aaaaaaaa"
|
||||
|
||||
# Pre-create a fake queen session on disk
|
||||
source_queen_dir = _make_fake_queen_session(
|
||||
tmp_path,
|
||||
queen_name=queen_name,
|
||||
session_id=source_session_id,
|
||||
)
|
||||
|
||||
# Build the in-process aiohttp app and inject our fake session
|
||||
app = create_app()
|
||||
manager = app["manager"]
|
||||
session = _make_session_with_queen_state(
|
||||
session_id=source_session_id,
|
||||
queen_name=queen_name,
|
||||
queen_dir=source_queen_dir,
|
||||
)
|
||||
manager._sessions[session.id] = session
|
||||
|
||||
async with TestClient(TestServer(app)) as client:
|
||||
resp = await client.post(
|
||||
f"/api/sessions/{source_session_id}/colony-spawn",
|
||||
json={"colony_name": "honeycomb", "task": "trade carefully"},
|
||||
)
|
||||
assert resp.status == 200, await resp.text()
|
||||
body = await resp.json()
|
||||
|
||||
colony_session_id = body["queen_session_id"]
|
||||
assert body["colony_name"] == "honeycomb"
|
||||
assert body["is_new"] is True
|
||||
assert colony_session_id != source_session_id
|
||||
|
||||
# ── colony_dir layout ──────────────────────────────────────────
|
||||
colony_dir = tmp_path / ".hive" / "colonies" / "honeycomb"
|
||||
assert colony_dir.is_dir()
|
||||
assert (colony_dir / "data").is_dir()
|
||||
assert (colony_dir / "worker.json").is_file()
|
||||
assert (colony_dir / "metadata.json").is_file()
|
||||
|
||||
# ── metadata.json contents ─────────────────────────────────────
|
||||
metadata = json.loads((colony_dir / "metadata.json").read_text())
|
||||
assert metadata["colony_name"] == "honeycomb"
|
||||
assert metadata["queen_name"] == queen_name, (
|
||||
f"queen_name should be the actual queen profile, got {metadata['queen_name']!r}"
|
||||
)
|
||||
assert metadata["queen_session_id"] == colony_session_id
|
||||
assert metadata["source_session_id"] == source_session_id
|
||||
assert "worker" in metadata["workers"]
|
||||
assert metadata["workers"]["worker"]["task"] == "trade carefully"
|
||||
|
||||
# ── worker.json contents ───────────────────────────────────────
|
||||
worker_meta = json.loads((colony_dir / "worker.json").read_text())
|
||||
assert worker_meta["name"] == "worker"
|
||||
assert worker_meta["queen_id"] == queen_name
|
||||
assert worker_meta["queen_phase"] == "planning"
|
||||
assert worker_meta["spawned_from"] == source_session_id
|
||||
assert worker_meta["goal"]["description"] == "trade carefully"
|
||||
assert worker_meta["system_prompt"] == "you are the queen"
|
||||
assert worker_meta["identity_prompt"] == "You are Charlotte, head of finance."
|
||||
assert worker_meta["tools"] == ["read_file", "search_files"]
|
||||
assert worker_meta["skills_catalog_prompt"] == "<skills/>"
|
||||
assert worker_meta["protocols_prompt"] == "<protocols/>"
|
||||
assert worker_meta["loop_config"]["max_iterations"] == 42
|
||||
assert worker_meta["loop_config"]["max_tool_calls_per_turn"] == 7
|
||||
|
||||
# ── duplicated queen session dir ──────────────────────────────
|
||||
dest_queen_dir = _queen_session_dir(colony_session_id, queen_name)
|
||||
assert dest_queen_dir.is_dir(), (
|
||||
f"Forked session dir not under {queen_name}/, got {dest_queen_dir}"
|
||||
)
|
||||
|
||||
# Conversations were copied
|
||||
assert (dest_queen_dir / "conversations" / "parts" / "0000000000.json").is_file()
|
||||
assert (dest_queen_dir / "conversations" / "parts" / "0000000001.json").is_file()
|
||||
|
||||
# Forked meta.json carries the colony_fork flag and links to the colony
|
||||
dest_meta = json.loads((dest_queen_dir / "meta.json").read_text())
|
||||
assert dest_meta["colony_fork"] is True
|
||||
assert dest_meta["forked_from"] == source_session_id
|
||||
assert dest_meta["queen_id"] == queen_name
|
||||
assert dest_meta["agent_path"] == str(colony_dir)
|
||||
assert dest_meta["agent_name"] == "Honeycomb"
|
||||
|
||||
# ── worker storage receives queen conversations ───────────────
|
||||
worker_storage_convs = (
|
||||
tmp_path / ".hive" / "agents" / "honeycomb" / "worker" / "conversations"
|
||||
)
|
||||
assert worker_storage_convs.is_dir()
|
||||
assert (worker_storage_convs / "parts" / "0000000000.json").is_file()
|
||||
|
||||
# ── source queen session updated with agent_path ──────────────
|
||||
source_meta = json.loads((source_queen_dir / "meta.json").read_text())
|
||||
assert source_meta["agent_path"] == str(colony_dir)
|
||||
assert source_meta["agent_name"] == "Honeycomb"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 3. create_session_with_worker_colony resolves the forked session ID from
|
||||
# metadata.json (not whatever the caller passed in)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_session_with_worker_colony_uses_forked_session_id(
|
||||
tmp_path, monkeypatch
|
||||
):
|
||||
"""When a colony is loaded, its metadata.json's queen_session_id wins.
|
||||
|
||||
Regression: returning to a colony was loading the SOURCE queen session
|
||||
instead of the forked one because the frontend's history scan found the
|
||||
source first. The backend now overrides ``queen_resume_from`` with the
|
||||
colony's designated session ID.
|
||||
"""
|
||||
monkeypatch.setattr(Path, "home", classmethod(lambda cls: tmp_path))
|
||||
|
||||
from framework.server.session_manager import SessionManager
|
||||
|
||||
queen_name = "queen_finance_fundraising"
|
||||
source_id = "session_20260410_120000_aaaaaaaa"
|
||||
forked_id = "session_20260410_130000_bbbbbbbb"
|
||||
|
||||
# Pre-create the forked queen session that the colony points at
|
||||
_make_fake_queen_session(tmp_path, queen_name=queen_name, session_id=forked_id)
|
||||
# Also create a source session (the one we don't want to be picked)
|
||||
_make_fake_queen_session(tmp_path, queen_name=queen_name, session_id=source_id)
|
||||
|
||||
# Build the colony dir with metadata pointing at the forked session
|
||||
colony_dir = tmp_path / ".hive" / "colonies" / "honeycomb"
|
||||
colony_dir.mkdir(parents=True)
|
||||
(colony_dir / "data").mkdir()
|
||||
(colony_dir / "metadata.json").write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"colony_name": "honeycomb",
|
||||
"queen_name": queen_name,
|
||||
"queen_session_id": forked_id,
|
||||
"source_session_id": source_id,
|
||||
"workers": {"worker": {"task": "trade"}},
|
||||
}
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
(colony_dir / "worker.json").write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"name": "worker",
|
||||
"version": "1.0.0",
|
||||
"description": "trader",
|
||||
"goal": {"description": "trade", "success_criteria": [], "constraints": []},
|
||||
"system_prompt": "be a trader",
|
||||
"tools": [],
|
||||
"loop_config": {},
|
||||
}
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
manager = SessionManager(model="claude-haiku-4-5-20251001")
|
||||
|
||||
# Stub out the heavy bits: we only care about session-id resolution.
|
||||
captured: dict = {}
|
||||
|
||||
async def fake_load_worker_core(self, session, agent_path, *, colony_id=None, model=None):
|
||||
session.colony_id = colony_id or Path(agent_path).name
|
||||
session.worker_path = Path(agent_path)
|
||||
session.colony_runtime = MagicMock()
|
||||
session.worker_info = SimpleNamespace(name="worker")
|
||||
|
||||
async def fake_start_queen(self, session, **kwargs):
|
||||
captured["session_id"] = session.id
|
||||
captured["queen_resume_from"] = session.queen_resume_from
|
||||
captured["queen_name"] = session.queen_name
|
||||
session.queen_executor = SimpleNamespace(node_registry={"queen": MagicMock()})
|
||||
|
||||
async def fake_restore_active_triggers(self, session, session_id):
|
||||
return None
|
||||
|
||||
monkeypatch.setattr(SessionManager, "_load_worker_core", fake_load_worker_core)
|
||||
monkeypatch.setattr(SessionManager, "_start_queen", fake_start_queen)
|
||||
monkeypatch.setattr(
|
||||
SessionManager, "_restore_active_triggers", fake_restore_active_triggers
|
||||
)
|
||||
|
||||
# Caller passes the SOURCE session id (mimicking the frontend's history scan)
|
||||
session = await manager.create_session_with_worker_colony(
|
||||
agent_path=colony_dir,
|
||||
queen_resume_from=source_id,
|
||||
)
|
||||
|
||||
# The colony's forked session ID should win, not the caller's source ID
|
||||
assert captured["queen_resume_from"] == forked_id, (
|
||||
f"Expected forked id {forked_id}, got {captured['queen_resume_from']}"
|
||||
)
|
||||
assert session.id == forked_id, (
|
||||
f"Live session ID should match forked session, got {session.id}"
|
||||
)
|
||||
assert captured["queen_name"] == queen_name
|
||||
@@ -0,0 +1,293 @@
|
||||
"""Live end-to-end test of the queen → fork → colony flow.
|
||||
|
||||
Hits the real LLM via the in-process aiohttp app. Validates that:
|
||||
- The queen identity hook fires after queen startup
|
||||
- ``handle_colony_spawn`` produces the right artifacts under the actual
|
||||
selected queen identity (not "default")
|
||||
- The forked queen session dir lives under the correct queen profile
|
||||
- The colony's metadata.json picks up the real queen_name
|
||||
|
||||
Skipped automatically if no LLM API key is configured.
|
||||
|
||||
Costs a few cents per run.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import importlib
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from aiohttp.test_utils import TestClient, TestServer
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Skip if no live LLM credentials are available
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
_LLM_KEY_ENV_VARS = (
|
||||
"ANTHROPIC_API_KEY",
|
||||
"OPENAI_API_KEY",
|
||||
"ZAI_API_KEY",
|
||||
"OPENROUTER_API_KEY",
|
||||
"CEREBRAS_API_KEY",
|
||||
"GROQ_API_KEY",
|
||||
"GOOGLE_AI_API_KEY",
|
||||
"MINIMAX_API_KEY",
|
||||
)
|
||||
|
||||
|
||||
def _has_any_llm_key() -> bool:
|
||||
return any(os.environ.get(k) for k in _LLM_KEY_ENV_VARS)
|
||||
|
||||
|
||||
pytestmark = pytest.mark.skipif(
|
||||
not _has_any_llm_key(),
|
||||
reason="No LLM API key set; skipping live integration test",
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixture: isolated ~/.hive in a temp dir
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
# Modules that import HIVE_HOME / QUEENS_DIR / COLONIES_DIR / MEMORIES_DIR /
|
||||
# HIVE_CONFIG_FILE at import time and need their bindings rewritten when we
|
||||
# redirect ~/.hive to a temp directory.
|
||||
_HIVE_PATH_CONSUMERS = (
|
||||
"framework.config",
|
||||
"framework.server.session_manager",
|
||||
"framework.server.queen_orchestrator",
|
||||
"framework.server.routes_queens",
|
||||
"framework.server.app",
|
||||
"framework.agents.discovery",
|
||||
"framework.agents.queen.queen_profiles",
|
||||
"framework.tools.queen_lifecycle_tools",
|
||||
"framework.storage.migrate_v2",
|
||||
"framework.loader.cli",
|
||||
)
|
||||
|
||||
_HIVE_PATH_NAMES = (
|
||||
("HIVE_HOME", lambda h: h),
|
||||
("QUEENS_DIR", lambda h: h / "agents" / "queens"),
|
||||
("COLONIES_DIR", lambda h: h / "colonies"),
|
||||
("MEMORIES_DIR", lambda h: h / "memories"),
|
||||
("HIVE_CONFIG_FILE", lambda h: h / "configuration.json"),
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def isolated_hive_home(tmp_path, monkeypatch):
|
||||
"""Redirect ~/.hive to a temp directory.
|
||||
|
||||
Patches Path.home() AND every module-level binding of HIVE_HOME and
|
||||
its derivatives, since those constants were captured at import time
|
||||
and won't follow Path.home() changes alone.
|
||||
|
||||
Copies the user's real ~/.hive/configuration.json into the temp home
|
||||
so the LLM provider config (model, api_base) is preserved.
|
||||
"""
|
||||
fake_home_root = tmp_path
|
||||
fake_hive = fake_home_root / ".hive"
|
||||
fake_hive.mkdir()
|
||||
|
||||
# Copy LLM configuration so the framework picks up the user's model.
|
||||
# Done BEFORE we monkey-patch Path.home so the source resolves correctly.
|
||||
real_config = Path.home() / ".hive" / "configuration.json"
|
||||
if real_config.exists():
|
||||
shutil.copy(real_config, fake_hive / "configuration.json")
|
||||
|
||||
# Patch Path.home -> tmp_path so any call-site computation goes there.
|
||||
monkeypatch.setattr(Path, "home", classmethod(lambda cls: fake_home_root))
|
||||
|
||||
# Patch every module-level binding that captured a path constant.
|
||||
for mod_name in _HIVE_PATH_CONSUMERS:
|
||||
try:
|
||||
mod = importlib.import_module(mod_name)
|
||||
except ImportError:
|
||||
continue
|
||||
for attr_name, builder in _HIVE_PATH_NAMES:
|
||||
if hasattr(mod, attr_name):
|
||||
monkeypatch.setattr(mod, attr_name, builder(fake_hive))
|
||||
|
||||
yield fake_hive
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def _wait_for_queen_identity(
|
||||
client: TestClient,
|
||||
session_id: str,
|
||||
*,
|
||||
timeout: float = 60.0,
|
||||
poll_interval: float = 0.5,
|
||||
) -> str:
|
||||
"""Poll /api/sessions/{id} until queen_id is set to a non-default value.
|
||||
|
||||
Returns the resolved queen_id. Fails the test on timeout.
|
||||
"""
|
||||
deadline = time.time() + timeout
|
||||
last_qid: str | None = None
|
||||
while time.time() < deadline:
|
||||
r = await client.get(f"/api/sessions/{session_id}")
|
||||
if r.status == 200:
|
||||
d = await r.json()
|
||||
qid = d.get("queen_id")
|
||||
if qid:
|
||||
last_qid = qid
|
||||
if qid != "default":
|
||||
return qid
|
||||
await asyncio.sleep(poll_interval)
|
||||
pytest.fail(
|
||||
f"Queen identity not selected within {timeout}s "
|
||||
f"(last queen_id={last_qid!r}). The queen identity hook may not be firing."
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# The live test
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_live_queen_fork_to_colony(isolated_hive_home):
|
||||
"""Spin up a real queen, let her select an identity, fork to a colony.
|
||||
|
||||
Validates the full wiring against a live LLM:
|
||||
1. Queen-only session starts and runs the identity hook
|
||||
2. session.queen_dir gets relocated from default/ to the selected queen
|
||||
3. handle_colony_spawn produces metadata pointing at the real queen
|
||||
4. The forked queen session dir lives under the correct queen identity
|
||||
5. Conversations are copied through to worker storage
|
||||
"""
|
||||
from framework.agents.queen.queen_profiles import ensure_default_queens
|
||||
from framework.server.app import create_app
|
||||
from framework.server.session_manager import _queen_session_dir
|
||||
|
||||
# Pre-populate queen profiles in the temp ~/.hive so the identity
|
||||
# hook has something to choose from.
|
||||
ensure_default_queens()
|
||||
|
||||
app = create_app() # picks up model from copied configuration.json
|
||||
manager = app["manager"]
|
||||
|
||||
async with TestClient(TestServer(app)) as client:
|
||||
# ── 1. Create a queen-only session ─────────────────────────
|
||||
# The initial_prompt steers the identity hook toward a finance queen.
|
||||
resp = await client.post(
|
||||
"/api/sessions",
|
||||
json={
|
||||
"initial_prompt": (
|
||||
"I want to incubate a finance colony to help me trade "
|
||||
"carefully on a small honeycomb market. Just briefly "
|
||||
"acknowledge — one sentence is fine."
|
||||
),
|
||||
},
|
||||
)
|
||||
assert resp.status == 201, await resp.text()
|
||||
body = await resp.json()
|
||||
session_id = body["session_id"]
|
||||
assert session_id.startswith("session_")
|
||||
|
||||
# ── 2. Wait for queen identity hook to fire ────────────────
|
||||
queen_name = await _wait_for_queen_identity(client, session_id)
|
||||
assert queen_name != "default", (
|
||||
f"Identity hook didn't pick a real queen, got {queen_name!r}"
|
||||
)
|
||||
|
||||
# ── 3. Fork to a colony ────────────────────────────────────
|
||||
colony_name = "live_test_honeycomb"
|
||||
resp = await client.post(
|
||||
f"/api/sessions/{session_id}/colony-spawn",
|
||||
json={"colony_name": colony_name, "task": "trade carefully"},
|
||||
)
|
||||
assert resp.status == 200, await resp.text()
|
||||
spawn_data = await resp.json()
|
||||
colony_session_id = spawn_data["queen_session_id"]
|
||||
assert spawn_data["colony_name"] == colony_name
|
||||
assert spawn_data["is_new"] is True
|
||||
assert colony_session_id != session_id
|
||||
|
||||
# ── 4. Validate on-disk artifacts ──────────────────────────
|
||||
colony_dir = isolated_hive_home / "colonies" / colony_name
|
||||
assert colony_dir.is_dir()
|
||||
assert (colony_dir / "worker.json").is_file()
|
||||
assert (colony_dir / "metadata.json").is_file()
|
||||
|
||||
metadata = json.loads((colony_dir / "metadata.json").read_text())
|
||||
assert metadata["colony_name"] == colony_name
|
||||
# The crucial assertion: the metadata's queen_name must be the
|
||||
# auto-selected queen, not "default". This is what failed
|
||||
# repeatedly yesterday before the queen-dir relocate fix.
|
||||
assert metadata["queen_name"] == queen_name, (
|
||||
f"metadata.queen_name should be {queen_name!r}, got "
|
||||
f"{metadata['queen_name']!r}. The session-dir relocation in "
|
||||
f"queen_orchestrator may not be firing."
|
||||
)
|
||||
assert metadata["queen_session_id"] == colony_session_id
|
||||
assert metadata["source_session_id"] == session_id
|
||||
|
||||
worker_meta = json.loads((colony_dir / "worker.json").read_text())
|
||||
assert worker_meta["queen_id"] == queen_name
|
||||
assert worker_meta["spawned_from"] == session_id
|
||||
# The queen always has at least the framework-default tools
|
||||
assert len(worker_meta["tools"]) > 0
|
||||
# Goal carries the task we passed in
|
||||
assert worker_meta["goal"]["description"] == "trade carefully"
|
||||
|
||||
# ── 5. Validate the forked queen session dir ──────────────
|
||||
# It must live under the SELECTED queen identity, not "default".
|
||||
dest_queen_dir = _queen_session_dir(colony_session_id, queen_name)
|
||||
assert dest_queen_dir.is_dir(), (
|
||||
f"Forked session dir not under {queen_name}/, expected "
|
||||
f"{dest_queen_dir}"
|
||||
)
|
||||
# Conversations from the original queen session were copied
|
||||
assert (dest_queen_dir / "conversations").is_dir()
|
||||
|
||||
dest_meta = json.loads((dest_queen_dir / "meta.json").read_text())
|
||||
assert dest_meta["colony_fork"] is True
|
||||
assert dest_meta["queen_id"] == queen_name
|
||||
assert dest_meta["forked_from"] == session_id
|
||||
assert dest_meta["agent_path"] == str(colony_dir)
|
||||
|
||||
# ── 6. The forked session must NOT show up in the queen DM history.
|
||||
from framework.server.session_manager import SessionManager
|
||||
|
||||
cold = SessionManager.list_cold_sessions()
|
||||
forked_in_history = [s for s in cold if s.get("session_id") == colony_session_id]
|
||||
assert not forked_in_history, (
|
||||
f"Forked colony session leaked into queen DM history: {forked_in_history}"
|
||||
)
|
||||
|
||||
# ── 7. Worker storage received the conversations ──────────
|
||||
worker_storage_convs = (
|
||||
isolated_hive_home / "agents" / colony_name / "worker" / "conversations"
|
||||
)
|
||||
assert worker_storage_convs.is_dir()
|
||||
# The queen has had at least one turn (the initial_prompt acknowledgment),
|
||||
# so there should be conversation parts.
|
||||
parts_dir = worker_storage_convs / "parts"
|
||||
if parts_dir.exists():
|
||||
assert any(parts_dir.iterdir()), (
|
||||
"worker storage has conversations dir but no parts"
|
||||
)
|
||||
|
||||
# ── 8. Stop the live session cleanly ──────────────────────
|
||||
resp = await client.delete(f"/api/sessions/{session_id}")
|
||||
assert resp.status == 200
|
||||
|
||||
# Drain background queen task so pytest doesn't warn about
|
||||
# never-awaited coroutines.
|
||||
await manager.shutdown_all()
|
||||
@@ -67,11 +67,12 @@ class TestAgentEvent:
|
||||
execution_id="exec_1",
|
||||
data={"output": "result"},
|
||||
correlation_id="corr_1",
|
||||
graph_id="graph_1",
|
||||
colony_id="colony_1",
|
||||
)
|
||||
d = event.to_dict()
|
||||
assert d["type"] == "execution_completed"
|
||||
assert d["stream_id"] == "stream_1"
|
||||
assert d["colony_id"] == "colony_1"
|
||||
|
||||
def test_to_dict_includes_run_id(self):
|
||||
"""run_id is included in to_dict() when set."""
|
||||
@@ -350,28 +351,28 @@ class TestEventFiltering:
|
||||
assert len(received) == 1
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_filter_by_graph(self):
|
||||
"""filter_graph only receives events from that graph."""
|
||||
async def test_filter_by_colony(self):
|
||||
"""filter_colony only receives events from that colony."""
|
||||
bus = EventBus()
|
||||
received = []
|
||||
|
||||
async def handler(event: AgentEvent) -> None:
|
||||
received.append(event.graph_id)
|
||||
received.append(event.colony_id)
|
||||
|
||||
bus.subscribe(
|
||||
event_types=[EventType.EXECUTION_STARTED],
|
||||
handler=handler,
|
||||
filter_graph="graph_a",
|
||||
filter_colony="colony_a",
|
||||
)
|
||||
|
||||
await bus.publish(
|
||||
AgentEvent(type=EventType.EXECUTION_STARTED, stream_id="s", graph_id="graph_a")
|
||||
AgentEvent(type=EventType.EXECUTION_STARTED, stream_id="s", colony_id="colony_a")
|
||||
)
|
||||
await bus.publish(
|
||||
AgentEvent(type=EventType.EXECUTION_STARTED, stream_id="s", graph_id="graph_b")
|
||||
AgentEvent(type=EventType.EXECUTION_STARTED, stream_id="s", colony_id="colony_b")
|
||||
)
|
||||
|
||||
assert received == ["graph_a"]
|
||||
assert received == ["colony_a"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -915,13 +916,10 @@ class TestEventType:
|
||||
assert EventType.ESCALATION_REQUESTED
|
||||
assert EventType.LLM_TURN_COMPLETE
|
||||
assert EventType.NODE_ACTION_PLAN
|
||||
assert EventType.WORKER_GRAPH_LOADED
|
||||
assert EventType.WORKER_COLONY_LOADED
|
||||
assert EventType.CREDENTIALS_REQUIRED
|
||||
assert EventType.EXECUTION_RESURRECTED
|
||||
assert EventType.DRAFT_GRAPH_UPDATED
|
||||
assert EventType.FLOWCHART_MAP_UPDATED
|
||||
assert EventType.QUEEN_PHASE_CHANGED
|
||||
assert EventType.QUEEN_PERSONA_SELECTED
|
||||
assert EventType.SUBAGENT_REPORT
|
||||
assert EventType.TRIGGER_AVAILABLE
|
||||
assert EventType.TRIGGER_FIRED
|
||||
|
||||
@@ -85,12 +85,6 @@ class TestClassifyFlowchartNode:
|
||||
result = classify_flowchart_node(node, 2, 3, edges, {"n3"})
|
||||
assert result == "terminal"
|
||||
|
||||
def test_gcu_node_is_browser(self):
|
||||
node = {"id": "n2", "node_type": "gcu", "tools": []}
|
||||
edges = [{"source": "n1", "target": "n2"}]
|
||||
result = classify_flowchart_node(node, 1, 3, edges, set())
|
||||
assert result == "browser"
|
||||
|
||||
def test_subprocess_node(self):
|
||||
node = {"id": "n2", "node_type": "event_loop", "tools": [], "sub_agents": ["sub1"]}
|
||||
edges = [{"source": "n1", "target": "n2"}, {"source": "n2", "target": "n3"}]
|
||||
|
||||
@@ -1236,17 +1236,16 @@ class TestExtractToolCallHistory:
|
||||
role="assistant",
|
||||
content="",
|
||||
tool_calls=[
|
||||
_make_tool_call(
|
||||
"c2", "save_data", {"filename": "output.txt", "content": "data"}
|
||||
),
|
||||
_make_tool_call("c2", "read_file", {"path": "/tmp/output.txt"}),
|
||||
],
|
||||
),
|
||||
Message(seq=3, role="tool", content="saved", tool_use_id="c2"),
|
||||
Message(seq=3, role="tool", content="contents", tool_use_id="c2"),
|
||||
]
|
||||
result = extract_tool_call_history(msgs)
|
||||
assert "web_search (1x)" in result
|
||||
assert "save_data (1x)" in result
|
||||
assert "FILES SAVED: output.txt" in result
|
||||
assert "read_file (1x)" in result
|
||||
# read_file paths are tracked under FILES SAVED in production
|
||||
assert "FILES SAVED: /tmp/output.txt" in result
|
||||
|
||||
def test_errors_included(self):
|
||||
msgs = [
|
||||
|
||||
@@ -63,9 +63,16 @@ class TestSkillCatalog:
|
||||
assert catalog.to_prompt() == ""
|
||||
|
||||
def test_to_prompt_framework_only(self):
|
||||
"""Framework-scope skills should NOT appear in the catalog prompt."""
|
||||
"""Framework-scope skills now appear in the catalog like any other scope.
|
||||
|
||||
The old design filtered framework skills out and surfaced them via
|
||||
DefaultSkillManager only. The current design folds them into the
|
||||
normal progressive-disclosure catalog.
|
||||
"""
|
||||
catalog = SkillCatalog([_make_skill(source_scope="framework")])
|
||||
assert catalog.to_prompt() == ""
|
||||
prompt = catalog.to_prompt()
|
||||
assert "<available_skills>" in prompt
|
||||
assert "<name>my-skill</name>" in prompt
|
||||
|
||||
def test_to_prompt_xml_generation(self):
|
||||
skills = [
|
||||
@@ -109,8 +116,8 @@ class TestSkillCatalog:
|
||||
assert "<special>" in prompt
|
||||
assert "&" in prompt
|
||||
|
||||
def test_to_prompt_excludes_framework_includes_others(self):
|
||||
"""Mixed scopes: only framework skills are excluded from catalog."""
|
||||
def test_to_prompt_includes_all_scopes(self):
|
||||
"""Mixed scopes: project, user, AND framework skills all appear in the catalog."""
|
||||
skills = [
|
||||
_make_skill("proj", "Project skill", "project"),
|
||||
_make_skill("usr", "User skill", "user"),
|
||||
@@ -121,7 +128,7 @@ class TestSkillCatalog:
|
||||
|
||||
assert "<name>proj</name>" in prompt
|
||||
assert "<name>usr</name>" in prompt
|
||||
assert "fw" not in prompt
|
||||
assert "<name>fw</name>" in prompt
|
||||
|
||||
def test_to_prompt_contains_behavioral_instruction(self):
|
||||
catalog = SkillCatalog([_make_skill(source_scope="project")])
|
||||
|
||||
@@ -42,11 +42,14 @@ class TestSkillResourceBaseDir:
|
||||
|
||||
assert "<base_dir>/path/with <&> chars</base_dir>" in prompt
|
||||
|
||||
def test_base_dir_absent_for_framework_skills(self):
|
||||
"""Framework-scope skills are filtered from the catalog, so no base_dir either."""
|
||||
def test_base_dir_present_for_framework_skills(self):
|
||||
"""Framework-scope skills now appear in the catalog like any other scope,
|
||||
and their base_dir is included in the XML."""
|
||||
skill = _make_skill("fw", "/hive/_default_skills/fw", source_scope="framework")
|
||||
catalog = SkillCatalog([skill])
|
||||
assert catalog.to_prompt() == ""
|
||||
prompt = catalog.to_prompt()
|
||||
assert "<name>fw</name>" in prompt
|
||||
assert "<base_dir>/hive/_default_skills/fw</base_dir>" in prompt
|
||||
|
||||
def test_allowlisted_dirs_matches_skills(self):
|
||||
"""allowlisted_dirs returns all skill base_dirs including framework ones."""
|
||||
|
||||
@@ -44,9 +44,10 @@ class TestGetAllowedAgentRoots:
|
||||
roots = _get_allowed_agent_roots()
|
||||
assert isinstance(roots, tuple), f"Expected tuple, got {type(roots).__name__}"
|
||||
|
||||
def test_contains_three_roots(self):
|
||||
def test_contains_four_roots(self):
|
||||
# 4 roots: ~/.hive/colonies, repo/exports (compat), repo/examples, ~/.hive/agents
|
||||
roots = _get_allowed_agent_roots()
|
||||
assert len(roots) == 3
|
||||
assert len(roots) == 4
|
||||
|
||||
def test_cached_on_repeated_calls(self):
|
||||
first = _get_allowed_agent_roots()
|
||||
@@ -65,9 +66,9 @@ class TestGetAllowedAgentRoots:
|
||||
from framework.server.app import _REPO_ROOT
|
||||
|
||||
roots = _get_allowed_agent_roots()
|
||||
exports_root, examples_root = roots[0], roots[1]
|
||||
assert exports_root == (_REPO_ROOT / "exports").resolve()
|
||||
assert examples_root == (_REPO_ROOT / "examples").resolve()
|
||||
# Order: ~/.hive/colonies, repo/exports, repo/examples, ~/.hive/agents
|
||||
assert roots[1] == (_REPO_ROOT / "exports").resolve()
|
||||
assert roots[2] == (_REPO_ROOT / "examples").resolve()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -296,7 +297,7 @@ class TestHTTPEndpointsRejectMaliciousPaths:
|
||||
_reset_allowed_roots()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_load_worker_rejects_outside_path(self, tmp_path):
|
||||
async def test_load_colony_rejects_outside_path(self, tmp_path):
|
||||
import framework.server.app as app_module
|
||||
|
||||
exports = tmp_path / "exports"
|
||||
@@ -312,7 +313,7 @@ class TestHTTPEndpointsRejectMaliciousPaths:
|
||||
session_id = (await create_resp.json())["session_id"]
|
||||
|
||||
resp = await client.post(
|
||||
f"/api/sessions/{session_id}/graph",
|
||||
f"/api/sessions/{session_id}/colony",
|
||||
json={"agent_path": "/tmp/evil"},
|
||||
)
|
||||
assert resp.status == 400
|
||||
|
||||
Reference in New Issue
Block a user