Files
hive/core/tests/dummy_agents/conftest.py
T
2026-04-03 20:31:14 -07:00

365 lines
13 KiB
Python

"""Shared fixtures for dummy agent end-to-end tests.
These tests use real LLM providers — they are NOT part of regular CI.
Run via: cd core && uv run python tests/dummy_agents/run_all.py
"""
from __future__ import annotations
import asyncio
import json
import os
from pathlib import Path
import pytest
from framework.graph.executor import GraphExecutor, ParallelExecutionConfig
from framework.graph.goal import Goal
from framework.llm.litellm import LiteLLMProvider
from framework.runtime.core import Runtime
# ── module-level state set by run_all.py ─────────────────────────────
_selected_model: str | None = None
_selected_api_key: str | None = None
_selected_extra_headers: dict[str, str] | None = None
_selected_api_base: str | None = None
_EXECUTION_TIMEOUT_SECS = float(os.environ.get("DUMMY_AGENT_EXEC_TIMEOUT_SECS", "90"))
def set_llm_selection(
model: str,
api_key: str,
extra_headers: dict[str, str] | None = None,
api_base: str | None = None,
) -> None:
"""Called by run_all.py after user selects a provider."""
global _selected_model, _selected_api_key, _selected_extra_headers, _selected_api_base
_selected_model = model
_selected_api_key = api_key
_selected_extra_headers = extra_headers
_selected_api_base = api_base
# ── collection hook: skip entire directory when not configured ───────
def _try_auto_configure_from_hive_config() -> bool:
"""Try to load LLM provider from ~/.hive/configuration.json.
Returns True if successfully configured, False otherwise.
"""
try:
from framework.config import (
get_api_base,
get_api_key,
get_llm_extra_kwargs,
get_preferred_model,
)
model = get_preferred_model()
api_key = get_api_key()
if not model or not api_key:
return False
extra_kwargs = get_llm_extra_kwargs()
set_llm_selection(
model=model,
api_key=api_key,
api_base=get_api_base(),
extra_headers=extra_kwargs.get("extra_headers"),
)
return True
except Exception:
return False
def pytest_collection_modifyitems(config, items):
"""Skip all dummy_agents tests when no LLM is configured.
Resolution order:
1. Already configured via run_all.py (set_llm_selection called)
2. Auto-configure from ~/.hive/configuration.json
3. Skip tests
"""
if _selected_model is not None:
return # LLM configured via run_all.py, run normally
# Try auto-configure from hive config
if _try_auto_configure_from_hive_config():
return # Config found, run tests
skip = pytest.mark.skip(
reason="Dummy agent tests require a real LLM. "
"Configure ~/.hive/configuration.json or "
"run via: cd core && uv run python tests/dummy_agents/run_all.py"
)
for item in items:
if "dummy_agents" in str(item.fspath):
item.add_marker(skip)
# ── fixtures ─────────────────────────────────────────────────────────
@pytest.fixture(scope="session")
def llm_provider():
"""Real LLM provider using the user-selected model."""
if _selected_model is None or _selected_api_key is None:
pytest.skip("No LLM selected — run via run_all.py")
kwargs = {"model": _selected_model, "api_key": _selected_api_key}
if _selected_extra_headers:
kwargs["extra_headers"] = _selected_extra_headers
if _selected_api_base:
kwargs["api_base"] = _selected_api_base
return LiteLLMProvider(**kwargs)
@pytest.fixture(scope="session")
def tool_registry():
"""Load hive-tools MCP server and return a ToolRegistry with real tools.
Session-scoped so the MCP server is started once and reused across tests.
"""
from framework.runner.tool_registry import ToolRegistry
registry = ToolRegistry()
# Resolve the tools directory relative to the repo root
repo_root = Path(__file__).resolve().parents[3] # core/tests/dummy_agents -> repo root
tools_dir = repo_root / "tools"
mcp_config = {
"name": "hive-tools",
"transport": "stdio",
"command": "uv",
"args": ["run", "python", "mcp_server.py", "--stdio"],
"cwd": str(tools_dir),
"description": "Hive tools MCP server",
}
registry.register_mcp_server(mcp_config)
yield registry
registry.cleanup()
@pytest.fixture
def runtime(tmp_path):
"""Real Runtime backed by a temp directory."""
return Runtime(storage_path=tmp_path / "runtime")
@pytest.fixture
def goal():
return Goal(id="dummy", name="Dummy Agent Test", description="Level 2 end-to-end testing")
def make_executor(
runtime: Runtime,
llm: LiteLLMProvider,
*,
enable_parallel: bool = True,
parallel_config: ParallelExecutionConfig | None = None,
loop_config: dict | None = None,
tool_registry=None,
storage_path: Path | None = None,
event_bus=None,
stream_id: str = "",
) -> GraphExecutor:
"""Factory that creates a GraphExecutor with a real LLM."""
tools = []
tool_executor = None
if tool_registry is not None:
tools = list(tool_registry.get_tools().values())
tool_executor = tool_registry.get_executor()
executor = GraphExecutor(
runtime=runtime,
llm=llm,
tools=tools,
tool_executor=tool_executor,
enable_parallel_execution=enable_parallel,
parallel_config=parallel_config,
loop_config=loop_config or {"max_iterations": 10},
storage_path=storage_path,
event_bus=event_bus,
stream_id=stream_id,
)
original_execute = executor.execute
async def execute_with_timeout(*args, **kwargs):
try:
return await asyncio.wait_for(
original_execute(*args, **kwargs),
timeout=_EXECUTION_TIMEOUT_SECS,
)
except TimeoutError as e:
raise TimeoutError(
"Dummy agent execution timed out after "
f"{_EXECUTION_TIMEOUT_SECS:.0f}s. "
"This usually means the current worker execution path "
"(GraphExecutor -> WorkerAgent -> EventLoopNode) is stuck "
"waiting on the provider or tool-calling behavior."
) from e
executor.execute = execute_with_timeout # type: ignore[method-assign]
return executor
# ── Artifact capture: raw output written to disk for every test ──────
ARTIFACTS_DIR = Path("/tmp/hive_test_artifacts")
class TestArtifact:
"""Collects raw output + expected behavior for a single test.
Captures TWO kinds of data:
1. Checks: individual assertion results (expected vs actual)
2. Framework raw output: the real conversation, state, tool calls
written by the executor to storage_path — copied verbatim,
not curated.
Usage in tests:
def test_foo(artifact, ...):
result = await executor.execute(...)
artifact.record(result, expected="...", storage_path=tmp_path/"session")
"""
def __init__(self, test_id: str):
self.test_id = test_id
self._safe_name = test_id.replace("::", "__").replace("/", "_")
self._dir = ARTIFACTS_DIR / self._safe_name
self._data: dict = {"test_id": test_id, "raw_output": None, "expected": "", "checks": []}
def record(self, result, *, expected: str = "", storage_path=None):
"""Record an ExecutionResult and copy real framework files."""
self._data["expected"] = expected
if result is None:
self._data["raw_output"] = None
return
self._data["raw_output"] = {
"success": getattr(result, "success", None),
"output": _safe_serialize(getattr(result, "output", {})),
"error": getattr(result, "error", None),
"path": getattr(result, "path", []),
"steps_executed": getattr(result, "steps_executed", 0),
"total_tokens": getattr(result, "total_tokens", 0),
"total_latency_ms": getattr(result, "total_latency_ms", 0),
"execution_quality": getattr(result, "execution_quality", ""),
"total_retries": getattr(result, "total_retries", 0),
"node_visit_counts": getattr(result, "node_visit_counts", {}),
"nodes_with_failures": getattr(result, "nodes_with_failures", []),
"session_state_buffer": _safe_serialize(
(getattr(result, "session_state", {}) or {}).get("data_buffer", {})
),
}
# Copy real framework output files (conversations, state, runs)
if storage_path is not None:
self._copy_framework_files(Path(storage_path))
def _copy_framework_files(self, storage_path: Path):
"""Copy real framework output to persistent artifact directory."""
import shutil
raw_dir = self._dir / "raw"
raw_dir.mkdir(parents=True, exist_ok=True)
if storage_path.exists():
for src in storage_path.rglob("*"):
if src.is_file() and src.suffix in (".json", ".jsonl", ".txt"):
rel = src.relative_to(storage_path)
dst = raw_dir / rel
dst.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(src, dst)
def record_value(self, key: str, value, *, expected: str = ""):
"""Record an arbitrary key-value (for non-ExecutionResult tests)."""
self._data.setdefault("values", {})[key] = _safe_serialize(value)
if expected:
self._data["expected"] = expected
def check(self, description: str, passed: bool, actual: str = "", expected_val: str = ""):
"""Record an individual assertion check."""
self._data["checks"].append(
{
"description": description,
"passed": passed,
"actual": actual,
"expected": expected_val,
}
)
def save(self):
"""Write artifact to disk."""
self._dir.mkdir(parents=True, exist_ok=True)
path = self._dir / "artifact.json"
with open(path, "w") as f:
json.dump(self._data, f, indent=2, default=str)
def _safe_serialize(obj):
"""Convert to JSON-safe types."""
if obj is None:
return None
if isinstance(obj, (str, int, float, bool)):
return obj
if isinstance(obj, dict):
return {str(k): _safe_serialize(v) for k, v in obj.items()}
if isinstance(obj, (list, tuple)):
return [_safe_serialize(v) for v in obj]
return str(obj)[:500]
@pytest.fixture
def artifact(request, tmp_path):
"""Fixture that captures raw test output to disk.
Every test gets an artifact recorder. Call artifact.record(result)
and artifact.check("description", passed, actual, expected) to
capture data. Saved automatically on teardown.
On teardown, copies ALL framework output files (conversations, state,
tool logs) from tmp_path to the persistent artifact directory. This
captures the REAL raw output — not curated summaries.
"""
test_id = request.node.nodeid
art = TestArtifact(test_id)
yield art
# Copy all framework files from the test's tmp_path
art._copy_framework_files(tmp_path)
art.save()
# Autouse hook: for tests that DON'T use the artifact fixture,
# create a minimal artifact from pass/fail status.
@pytest.hookimpl(tryfirst=True, hookwrapper=True)
def pytest_runtest_makereport(item, call):
outcome = yield
rep = outcome.get_result()
if rep.when == "call":
item._test_report = rep
def pytest_runtest_teardown(item, nextitem):
"""Auto-save a minimal artifact for tests that didn't use the fixture."""
report = getattr(item, "_test_report", None)
if report is None:
return
# Check if the test already used the artifact fixture
if "artifact" in item.fixturenames:
return # Already handled by fixture teardown
safe_name = item.nodeid.replace("::", "__").replace("/", "_")
out_dir = ARTIFACTS_DIR / safe_name
out_dir.mkdir(parents=True, exist_ok=True)
data = {
"test_id": item.nodeid,
"raw_output": None,
"expected": "",
"checks": [],
"auto_captured": True,
"status": "PASS" if report.passed else ("FAIL" if report.failed else "SKIP"),
}
if report.failed and report.longreprtext:
data["failure_text"] = report.longreprtext[:5000]
with open(out_dir / "artifact.json", "w") as f:
json.dump(data, f, indent=2, default=str)