diff --git a/core/framework/host/colony_metadata.py b/core/framework/host/colony_metadata.py new file mode 100644 index 00000000..31b18630 --- /dev/null +++ b/core/framework/host/colony_metadata.py @@ -0,0 +1,101 @@ +"""Read/write helpers for per-colony metadata.json. + +A colony's metadata.json lives at ``{COLONIES_DIR}/{colony_name}/metadata.json`` +and holds both immutable provenance (the queen that created it, the forked +session id, timestamps) and a small number of mutable user-editable fields. + +Today the only mutable field we surface through the UI is: + +- ``enabled_mcp_tools: list[str] | null`` — the per-colony MCP tool + allowlist. ``None`` means "allow every MCP tool" (default), so + existing colonies without the key keep their current behavior. + +Keeping the read/write helpers in one place — instead of scattering +``json.loads(metadata_path.read_text())`` across the server — makes the +schema easy to evolve without chasing readers. +""" + +from __future__ import annotations + +import json +import logging +from pathlib import Path +from typing import Any + +from framework.config import COLONIES_DIR + +logger = logging.getLogger(__name__) + + +def colony_metadata_path(colony_name: str) -> Path: + """Return the on-disk path to a colony's metadata.json.""" + return COLONIES_DIR / colony_name / "metadata.json" + + +def load_colony_metadata(colony_name: str) -> dict[str, Any]: + """Load metadata.json for ``colony_name``. + + Returns an empty dict if the file is missing or malformed — callers + are expected to treat missing fields as defaults. + """ + path = colony_metadata_path(colony_name) + if not path.exists(): + return {} + try: + data = json.loads(path.read_text(encoding="utf-8")) + except (json.JSONDecodeError, OSError): + logger.warning("Failed to read colony metadata at %s", path) + return {} + return data if isinstance(data, dict) else {} + + +def update_colony_metadata(colony_name: str, updates: dict[str, Any]) -> dict[str, Any]: + """Shallow-merge ``updates`` into metadata.json and persist. + + Returns the full updated dict. Raises ``FileNotFoundError`` if the + colony does not exist. Writes atomically via ``os.replace`` to + minimize the window where a reader could see a half-written file. + """ + import os + import tempfile + + path = colony_metadata_path(colony_name) + if not path.parent.exists(): + raise FileNotFoundError(f"Colony '{colony_name}' not found") + + data = load_colony_metadata(colony_name) if path.exists() else {} + for key, value in updates.items(): + data[key] = value + + path.parent.mkdir(parents=True, exist_ok=True) + fd, tmp_path = tempfile.mkstemp( + prefix=".metadata.", + suffix=".json.tmp", + dir=str(path.parent), + ) + try: + with os.fdopen(fd, "w", encoding="utf-8") as fh: + json.dump(data, fh, indent=2) + fh.flush() + os.fsync(fh.fileno()) + os.replace(tmp_path, path) + except BaseException: + try: + os.unlink(tmp_path) + except OSError: + pass + raise + return data + + +def list_colony_names() -> list[str]: + """Return the names of every colony that has a metadata.json on disk.""" + if not COLONIES_DIR.is_dir(): + return [] + names: list[str] = [] + for entry in sorted(COLONIES_DIR.iterdir()): + if not entry.is_dir(): + continue + if (entry / "metadata.json").exists(): + names.append(entry.name) + return names diff --git a/core/framework/host/colony_runtime.py b/core/framework/host/colony_runtime.py index bc639e3d..6185101a 100644 --- a/core/framework/host/colony_runtime.py +++ b/core/framework/host/colony_runtime.py @@ -242,6 +242,19 @@ class ColonyRuntime: self._tools = tools or [] self._tool_executor = tool_executor + # Per-colony MCP tool allowlist — applied when spawning workers. A + # value of ``None`` means "allow every MCP tool" (default), an empty + # list disables every MCP tool, and a list of names only enables + # those. Lifecycle / synthetic tools always pass through the filter + # because their names are absent from ``_mcp_tool_names_all``. The + # allowlist is re-read on every ``spawn`` so a PATCH that mutates + # this attribute via ``set_tool_allowlist`` takes effect on the + # NEXT worker spawn without a runtime restart. In-flight workers + # keep the tool list they booted with — workers have no dynamic + # tools provider today. + self._enabled_mcp_tools: list[str] | None = None + self._mcp_tool_names_all: set[str] = set() + # Worker management self._workers: dict[str, Worker] = {} # The persistent client-facing overseer (optional). Set by @@ -384,6 +397,45 @@ class ColonyRuntime: return PipelineRunner([]) return build_pipeline_from_config(stages_config) + # ── Per-colony tool allowlist ─────────────────────────────── + + def set_tool_allowlist( + self, + enabled_mcp_tools: list[str] | None, + mcp_tool_names_all: set[str] | None = None, + ) -> None: + """Configure the per-colony MCP tool allowlist. + + Called at construction time (from SessionManager) and again from + the ``/api/colony/{name}/tools`` PATCH handler when a user edits + the allowlist. The change applies to the NEXT worker spawn — we + never mutate the tool list of a worker that is already running + (workers have no dynamic tools provider, so hot-reloading their + tool set would diverge from the list the LLM was already using). + """ + self._enabled_mcp_tools = list(enabled_mcp_tools) if enabled_mcp_tools is not None else None + if mcp_tool_names_all is not None: + self._mcp_tool_names_all = set(mcp_tool_names_all) + + def _apply_tool_allowlist(self, tools: list) -> list: + """Filter ``tools`` against the colony's MCP allowlist. + + Lifecycle / synthetic tools (those whose names are NOT in + ``_mcp_tool_names_all``) are never gated. MCP tools are kept only + when ``_enabled_mcp_tools`` is None (default allow) or contains + their name. Input list order is preserved so downstream cache + keys and logs stay stable. + """ + if self._enabled_mcp_tools is None: + return tools + allowed = set(self._enabled_mcp_tools) + return [ + t + for t in tools + if getattr(t, "name", None) not in self._mcp_tool_names_all + or getattr(t, "name", None) in allowed + ] + # ── Lifecycle ─────────────────────────────────────────────── async def start(self) -> None: @@ -658,6 +710,14 @@ class ColonyRuntime: spawn_tools = tools if tools is not None else self._tools spawn_executor = tool_executor or self._tool_executor + # Apply the per-colony MCP tool allowlist (if any). Done HERE — + # after spawn_tools is resolved but before it's frozen into the + # worker's AgentContext — so the next spawn reflects any PATCH + # that happened since the last spawn. A value of ``None`` on + # ``_enabled_mcp_tools`` is a no-op so the default path is + # unchanged. + spawn_tools = self._apply_tool_allowlist(spawn_tools) + # Colony progress tracker: when the caller supplied a db_path # in input_data, this worker is part of a SQLite task queue # and must see the hive.colony-progress-tracker skill body in diff --git a/core/framework/server/app.py b/core/framework/server/app.py index 71237281..5d6fa1a9 100644 --- a/core/framework/server/app.py +++ b/core/framework/server/app.py @@ -333,6 +333,9 @@ def create_app(model: str | None = None) -> web.Application: from framework.server.routes_logs import register_routes as register_log_routes from framework.server.routes_messages import register_routes as register_message_routes from framework.server.routes_prompts import register_routes as register_prompt_routes + from framework.server.routes_colony_tools import register_routes as register_colony_tools_routes + from framework.server.routes_mcp import register_routes as register_mcp_routes + from framework.server.routes_queen_tools import register_routes as register_queen_tools_routes from framework.server.routes_queens import register_routes as register_queen_routes from framework.server.routes_sessions import register_routes as register_session_routes from framework.server.routes_workers import register_routes as register_worker_routes @@ -346,6 +349,9 @@ def create_app(model: str | None = None) -> web.Application: register_worker_routes(app) register_log_routes(app) register_queen_routes(app) + register_queen_tools_routes(app) + register_colony_tools_routes(app) + register_mcp_routes(app) register_colony_worker_routes(app) register_prompt_routes(app) diff --git a/core/framework/server/queen_orchestrator.py b/core/framework/server/queen_orchestrator.py index 80656dcb..9cb2e2f9 100644 --- a/core/framework/server/queen_orchestrator.py +++ b/core/framework/server/queen_orchestrator.py @@ -253,6 +253,93 @@ async def materialize_queen_identity( ) +def build_queen_tool_registry_bare() -> tuple[Any, dict[str, list[dict[str, Any]]]]: + """Build a Queen ``ToolRegistry`` and a (server_name → tools) catalog. + + Used by the Tool Library GET route to populate the MCP tool surface + without needing a live queen session. We DO NOT register queen + lifecycle tools here (they require a Session stub); the catalog only + covers MCP-origin tools, which is what the allowlist gates. + + Loading MCP servers spawns subprocesses, so call this once per + backend process and cache the result. + """ + from pathlib import Path + from framework.loader.mcp_registry import MCPRegistry + from framework.loader.tool_registry import ToolRegistry + import framework.agents.queen as _queen_pkg + + queen_registry = ToolRegistry() + queen_pkg_dir = Path(_queen_pkg.__file__).parent + + mcp_config = queen_pkg_dir / "mcp_servers.json" + if mcp_config.exists(): + try: + queen_registry.load_mcp_config(mcp_config) + except Exception: + logger.warning("build_queen_tool_registry_bare: MCP config failed", exc_info=True) + + try: + reg = MCPRegistry() + reg.initialize() + if (queen_pkg_dir / "mcp_registry.json").is_file(): + queen_registry.set_mcp_registry_agent_path(queen_pkg_dir) + registry_configs, selection_max_tools = reg.load_agent_selection(queen_pkg_dir) + + already = {cfg.get("name") for cfg in registry_configs if cfg.get("name")} + extra: list[str] = [] + try: + for entry in reg.list_installed(): + if entry.get("source") != "local": + continue + if not entry.get("enabled", True): + continue + name = entry.get("name") + if name and name not in already: + extra.append(name) + except Exception: + pass + if extra: + try: + extra_configs = reg.resolve_for_agent(include=extra) + registry_configs = list(registry_configs) + [ + reg._server_config_to_dict(c) for c in extra_configs + ] + except Exception: + logger.debug("build_queen_tool_registry_bare: resolve_for_agent(extra) failed", exc_info=True) + + if registry_configs: + queen_registry.load_registry_servers( + registry_configs, + preserve_existing_tools=True, + log_collisions=False, + max_tools=selection_max_tools, + ) + except Exception: + logger.warning("build_queen_tool_registry_bare: MCP registry load failed", exc_info=True) + + # Build the catalog. + tools_by_name = queen_registry.get_tools() + server_map = dict(getattr(queen_registry, "_mcp_server_tools", {}) or {}) + catalog: dict[str, list[dict[str, Any]]] = {} + for server_name in sorted(server_map): + entries: list[dict[str, Any]] = [] + for tool_name in sorted(server_map[server_name]): + tool = tools_by_name.get(tool_name) + if tool is None: + continue + entries.append( + { + "name": tool.name, + "description": tool.description, + "input_schema": tool.parameters, + } + ) + catalog[server_name] = entries + + return queen_registry, catalog + + async def create_queen( session: Session, session_manager: Any, @@ -326,6 +413,45 @@ async def create_queen( if (queen_pkg_dir / "mcp_registry.json").is_file(): queen_registry.set_mcp_registry_agent_path(queen_pkg_dir) registry_configs, selection_max_tools = registry.load_agent_selection(queen_pkg_dir) + + # Auto-include every user-added local MCP server that the repo + # selection hasn't already loaded. Users register servers via + # the `/api/mcp/servers` route (or `hive mcp add`); they live in + # ~/.hive/mcp_registry/installed.json with source == "local". + # New servers take effect on the next queen session start; the + # prompt cache and ToolRegistry are still loaded once per boot. + already_loaded_names = {cfg.get("name") for cfg in registry_configs if cfg.get("name")} + extra_names: list[str] = [] + try: + for entry in registry.list_installed(): + if entry.get("source") != "local": + continue + if not entry.get("enabled", True): + continue + name = entry.get("name") + if not name or name in already_loaded_names: + continue + extra_names.append(name) + except Exception: + logger.debug("Queen: list_installed() failed while auto-including user servers", exc_info=True) + + if extra_names: + try: + extra_configs = registry.resolve_for_agent(include=extra_names) + extra_dicts = [registry._server_config_to_dict(c) for c in extra_configs] + registry_configs = list(registry_configs) + extra_dicts + logger.info( + "Queen: auto-including %d user-added MCP server(s): %s", + len(extra_dicts), + [c.get("name") for c in extra_dicts], + ) + except Exception: + logger.warning( + "Queen: failed to resolve user-added MCP servers %s", + extra_names, + exc_info=True, + ) + if registry_configs: results = queen_registry.load_registry_servers( registry_configs, @@ -417,6 +543,57 @@ async def create_queen( sorted(t.name for t in phase_state.incubating_tools), ) + # ---- Per-queen MCP tool allowlist -------------------------------- + # Capture the set of MCP-origin tool names so the allowlist in + # ``QueenPhaseState`` only gates MCP tools (lifecycle and synthetic + # tools always pass through). Then apply the queen profile's stored + # allowlist (if any) and memoize the filtered independent tool list. + mcp_server_tools_map: dict[str, set[str]] = dict( + getattr(queen_registry, "_mcp_server_tools", {}) + ) + phase_state.mcp_tool_names_all = set().union(*mcp_server_tools_map.values()) if mcp_server_tools_map else set() + phase_state.enabled_mcp_tools = queen_profile.get("enabled_mcp_tools") + phase_state.rebuild_independent_filter() + if phase_state.enabled_mcp_tools is not None: + total_mcp = len(phase_state.mcp_tool_names_all) + allowed_mcp = len(set(phase_state.enabled_mcp_tools) & phase_state.mcp_tool_names_all) + logger.info( + "Queen: per-queen MCP allowlist active — %d of %d MCP tools enabled", + allowed_mcp, + total_mcp, + ) + + # ---- MCP tool catalog for the frontend --------------------------- + # Snapshot per-server tool metadata so the Queen Tools API can render + # the tool surface without spawning MCP subprocesses. Keyed by server + # name so the UI can group tools by origin. Updated every time a + # queen boots, so installing a new server and starting a new queen + # session refreshes the catalog. + mcp_tool_catalog: dict[str, list[dict[str, Any]]] = {} + tools_by_name = {t.name: t for t in queen_tools} + for server_name, tool_names in mcp_server_tools_map.items(): + server_entries: list[dict[str, Any]] = [] + for tool_name in sorted(tool_names): + tool = tools_by_name.get(tool_name) + if tool is None: + continue + server_entries.append( + { + "name": tool.name, + "description": tool.description, + "input_schema": tool.parameters, + } + ) + mcp_tool_catalog[server_name] = server_entries + # All queens share one MCP registry, so the catalog is a manager-level + # fact; stash it on the SessionManager so the Queen Tools route can + # render the tool list even when no queen session is currently live. + if session_manager is not None: + try: + session_manager._mcp_tool_catalog = mcp_tool_catalog # type: ignore[attr-defined] + except Exception: + logger.debug("Queen: could not attach mcp_tool_catalog to manager", exc_info=True) + # ---- Global + queen-scoped memory ---------------------------------- global_dir, queen_mem_dir = initialize_memory_scopes(session, phase_state) diff --git a/core/framework/server/routes_colony_tools.py b/core/framework/server/routes_colony_tools.py new file mode 100644 index 00000000..b6c993b5 --- /dev/null +++ b/core/framework/server/routes_colony_tools.py @@ -0,0 +1,329 @@ +"""Per-colony MCP tool allowlist routes. + +- GET /api/colony/{colony_name}/tools -- enumerate colony tool surface +- PATCH /api/colony/{colony_name}/tools -- set or clear the allowlist + +A colony's tool set is inherited from the queen that forked it, so the +tool surface mirrors the queen's MCP servers. Lifecycle/synthetic tools +are included for display only. MCP tools are grouped by origin server +with per-tool ``enabled`` flags. + +Semantics: + +- ``enabled_mcp_tools: null`` → allow every MCP tool (default). +- ``enabled_mcp_tools: []`` → allow no MCP tools (only lifecycle / + synthetic pass through). +- ``enabled_mcp_tools: [...]`` → only listed names pass. + +The allowlist is persisted in ``~/.hive/colonies/{colony_name}/metadata.json`` +and takes effect on the *next* worker spawn. In-flight workers keep the +tool list they booted with because workers have no dynamic tools +provider today — mutating their tool set mid-turn would diverge from +the list the LLM is already using. +""" + +from __future__ import annotations + +import logging +from typing import Any + +from aiohttp import web + +from framework.host.colony_metadata import ( + colony_metadata_path, + load_colony_metadata, + update_colony_metadata, +) + +logger = logging.getLogger(__name__) + + +_SYNTHETIC_NAMES = {"ask_user"} + + +def _synthetic_entries() -> list[dict[str, Any]]: + try: + from framework.agent_loop.internals.synthetic_tools import build_ask_user_tool + + tool = build_ask_user_tool() + return [ + { + "name": tool.name, + "description": tool.description, + "editable": False, + } + ] + except Exception: + return [ + { + "name": "ask_user", + "description": "Pause and ask the user a structured question.", + "editable": False, + } + ] + + +def _colony_runtimes_for_name(manager: Any, colony_name: str) -> list[Any]: + """Return every live ColonyRuntime whose session is attached to ``colony_name``.""" + sessions = getattr(manager, "_sessions", None) or {} + runtimes: list[Any] = [] + for session in sessions.values(): + if getattr(session, "colony_name", None) != colony_name: + continue + # Both ``session.colony`` (queen-side unified runtime) and + # ``session.colony_runtime`` (legacy worker runtime) may carry + # tools that need the allowlist applied. We update both. + for attr in ("colony", "colony_runtime"): + rt = getattr(session, attr, None) + if rt is not None and rt not in runtimes: + runtimes.append(rt) + return runtimes + + +async def _render_catalog(manager: Any, colony_name: str) -> dict[str, list[dict[str, Any]]]: + """Build a per-server tool catalog for this colony. + + All colonies inherit the queen's MCP surface, so we reuse the + manager-level ``_mcp_tool_catalog`` populated during queen boot. + """ + # If a live runtime exists and carries its own registry, prefer it — + # it's authoritative (reflects any post-queen-boot MCP additions). + for rt in _colony_runtimes_for_name(manager, colony_name): + tools = getattr(rt, "_tools", None) + if not tools: + continue + mcp_names = set(getattr(rt, "_mcp_tool_names_all", set()) or set()) + if not mcp_names: + continue + catalog: dict[str, list[dict[str, Any]]] = {"(mcp)": []} + for tool in tools: + name = getattr(tool, "name", None) + if name in mcp_names: + catalog["(mcp)"].append( + { + "name": name, + "description": getattr(tool, "description", ""), + "input_schema": getattr(tool, "parameters", {}), + } + ) + return catalog + + # Otherwise fall back to the queen-level snapshot. Build it on demand + # (off the event loop) when empty so the Tool Library works before + # any queen has been started in this process. + cached = getattr(manager, "_mcp_tool_catalog", None) + if isinstance(cached, dict) and cached: + return cached + try: + import asyncio + + from framework.server.queen_orchestrator import build_queen_tool_registry_bare + + registry, built = await asyncio.to_thread(build_queen_tool_registry_bare) + if manager is not None: + manager._mcp_tool_catalog = built # type: ignore[attr-defined] + manager._bootstrap_tool_registry = registry # type: ignore[attr-defined] + return built + except Exception: + logger.warning("Colony tools: catalog bootstrap failed", exc_info=True) + return {} + + +def _lifecycle_entries_from_runtime(manager: Any, colony_name: str) -> list[dict[str, Any]]: + """Non-MCP tools currently registered on the colony runtime (if any). + + When no live runtime is available we fall back to the bootstrap + registry stashed on the manager by ``routes_queen_tools`` — it + already has queen lifecycle tools registered, which are also the + lifecycle tools colonies inherit at spawn time. + """ + out: list[dict[str, Any]] = [] + seen: set[str] = set() + + def _push(name: str, description: str) -> None: + if not name or name in seen: + return + if name in _SYNTHETIC_NAMES: + return + seen.add(name) + out.append({"name": name, "description": description, "editable": False}) + + runtimes = _colony_runtimes_for_name(manager, colony_name) + if runtimes: + for rt in runtimes: + mcp_names = set(getattr(rt, "_mcp_tool_names_all", set()) or set()) + for tool in getattr(rt, "_tools", []) or []: + name = getattr(tool, "name", None) + if name in mcp_names: + continue + _push(name, getattr(tool, "description", "")) + else: + # No live runtime — derive from the bootstrap registry. + from framework.server.routes_queen_tools import _lifecycle_entries_without_session + + catalog = getattr(manager, "_mcp_tool_catalog", {}) or {} + mcp_names: set[str] = set() + for entries in catalog.values(): + for entry in entries: + if entry.get("name"): + mcp_names.add(entry["name"]) + out.extend(_lifecycle_entries_without_session(manager, mcp_names)) + return out + return sorted(out, key=lambda e: e["name"]) + + +def _render_servers( + catalog: dict[str, list[dict[str, Any]]], + enabled_mcp_tools: list[str] | None, +) -> list[dict[str, Any]]: + allowed: set[str] | None = None if enabled_mcp_tools is None else set(enabled_mcp_tools) + servers: list[dict[str, Any]] = [] + for name in sorted(catalog): + tools = [] + for entry in catalog[name]: + tool_name = entry.get("name") + tools.append( + { + "name": tool_name, + "description": entry.get("description", ""), + "input_schema": entry.get("input_schema", {}), + "enabled": True if allowed is None else tool_name in allowed, + } + ) + servers.append({"name": name, "tools": tools}) + return servers + + +async def handle_get_tools(request: web.Request) -> web.Response: + """GET /api/colony/{colony_name}/tools.""" + colony_name = request.match_info["colony_name"] + if not colony_metadata_path(colony_name).exists(): + return web.json_response({"error": f"Colony '{colony_name}' not found"}, status=404) + + manager = request.app.get("manager") + meta = load_colony_metadata(colony_name) + enabled = meta.get("enabled_mcp_tools") + if enabled is not None and not isinstance(enabled, list): + enabled = None + + catalog = await _render_catalog(manager, colony_name) + stale = not catalog + + return web.json_response( + { + "colony_name": colony_name, + "enabled_mcp_tools": enabled, + "stale": stale, + "lifecycle": _lifecycle_entries_from_runtime(manager, colony_name), + "synthetic": _synthetic_entries(), + "mcp_servers": _render_servers(catalog, enabled), + } + ) + + +async def handle_patch_tools(request: web.Request) -> web.Response: + """PATCH /api/colony/{colony_name}/tools.""" + colony_name = request.match_info["colony_name"] + if not colony_metadata_path(colony_name).exists(): + return web.json_response({"error": f"Colony '{colony_name}' not found"}, status=404) + + try: + body = await request.json() + except Exception: + return web.json_response({"error": "Invalid JSON body"}, status=400) + if not isinstance(body, dict) or "enabled_mcp_tools" not in body: + return web.json_response( + {"error": "Body must be an object with an 'enabled_mcp_tools' field"}, + status=400, + ) + + enabled = body["enabled_mcp_tools"] + if enabled is not None: + if not isinstance(enabled, list) or not all(isinstance(x, str) for x in enabled): + return web.json_response( + {"error": "'enabled_mcp_tools' must be null or a list of strings"}, + status=400, + ) + + manager = request.app.get("manager") + + # Validate names against the known MCP catalog — lifts the same + # typo-catching guarantee we already offer on queen tools. + catalog = await _render_catalog(manager, colony_name) + known: set[str] = {e.get("name") for entries in catalog.values() for e in entries if e.get("name")} + if enabled is not None and known: + unknown = sorted(set(enabled) - known) + if unknown: + return web.json_response( + {"error": "Unknown MCP tool name(s)", "unknown": unknown}, + status=400, + ) + + # Persist — missing metadata.json already guarded by 404 above. + try: + update_colony_metadata(colony_name, {"enabled_mcp_tools": enabled}) + except FileNotFoundError: + return web.json_response({"error": f"Colony '{colony_name}' not found"}, status=404) + + # Update any live runtimes so the NEXT worker spawn reflects the change. + # We do NOT rebuild in-flight workers' tool lists (see module docstring). + refreshed = 0 + for rt in _colony_runtimes_for_name(manager, colony_name): + setter = getattr(rt, "set_tool_allowlist", None) + if callable(setter): + try: + setter(enabled) + refreshed += 1 + except Exception: + logger.debug( + "Colony tools: set_tool_allowlist failed on runtime for %s", + colony_name, + exc_info=True, + ) + + logger.info( + "Colony tools: colony=%s allowlist=%s refreshed_runtimes=%d", + colony_name, + "null" if enabled is None else f"{len(enabled)} tool(s)", + refreshed, + ) + return web.json_response( + { + "colony_name": colony_name, + "enabled_mcp_tools": enabled, + "refreshed_runtimes": refreshed, + "note": "Changes apply to the next worker spawn. Running workers keep their booted tool list.", + } + ) + + +async def handle_list_colonies(request: web.Request) -> web.Response: + """GET /api/colonies — list colonies with their tool allowlist status. + + Powers the Tool Library page's colony picker. + """ + from framework.host.colony_metadata import list_colony_names + + colonies: list[dict[str, Any]] = [] + for name in list_colony_names(): + meta = load_colony_metadata(name) + allowlist = meta.get("enabled_mcp_tools") + if allowlist is not None and not isinstance(allowlist, list): + allowlist = None + colonies.append( + { + "name": name, + "queen_name": meta.get("queen_name"), + "created_at": meta.get("created_at"), + "has_allowlist": allowlist is not None, + "enabled_count": len(allowlist) if isinstance(allowlist, list) else None, + } + ) + return web.json_response({"colonies": colonies}) + + +def register_routes(app: web.Application) -> None: + """Register per-colony tool routes.""" + app.router.add_get("/api/colonies/tools-index", handle_list_colonies) + app.router.add_get("/api/colony/{colony_name}/tools", handle_get_tools) + app.router.add_patch("/api/colony/{colony_name}/tools", handle_patch_tools) diff --git a/core/framework/server/routes_mcp.py b/core/framework/server/routes_mcp.py new file mode 100644 index 00000000..b7dbf627 --- /dev/null +++ b/core/framework/server/routes_mcp.py @@ -0,0 +1,291 @@ +"""MCP server registration routes. + +Thin HTTP wrapper around ``MCPRegistry`` so the frontend can add, remove, +enable, and health-check user-registered MCP servers. The CLI path +(``hive mcp add`` / ``hive mcp remove`` / etc.) is unchanged. + +- GET /api/mcp/servers -- list installed servers +- POST /api/mcp/servers -- register a local server +- DELETE /api/mcp/servers/{name} -- remove a local server +- POST /api/mcp/servers/{name}/enable -- enable a server +- POST /api/mcp/servers/{name}/disable -- disable a server +- POST /api/mcp/servers/{name}/health -- probe server health + +New servers take effect on the *next* queen session start. Existing live +queen sessions keep the tool list they booted with to avoid mid-turn +cache invalidation. The ``add`` response hints at this explicitly. +""" + +from __future__ import annotations + +import logging +from typing import Any + +from aiohttp import web + +from framework.loader.mcp_errors import MCPError +from framework.loader.mcp_registry import MCPRegistry + +logger = logging.getLogger(__name__) + + +_VALID_TRANSPORTS = {"stdio", "http", "sse", "unix"} + + +def _registry() -> MCPRegistry: + # MCPRegistry is a thin wrapper around ~/.hive/mcp_registry/installed.json + # so instantiation is cheap — no need to cache on app["..."]. + reg = MCPRegistry() + reg.initialize() + return reg + + +def _package_builtin_servers() -> list[dict[str, Any]]: + """Return the package-baked queen MCP servers from ``queen/mcp_servers.json``. + + Those servers are loaded directly by ``ToolRegistry.load_mcp_config`` + at queen boot and never go through ``MCPRegistry.list_installed``, + so the raw registry view shows them as missing. Surface them here so + the Tool Library reflects what the queen actually talks to. + + Entries carry ``source: "built-in"`` and are NOT removable / toggleable + — editing them requires changing the repo file. + """ + import json + from pathlib import Path + + import framework.agents.queen as _queen_pkg + + path = Path(_queen_pkg.__file__).parent / "mcp_servers.json" + if not path.exists(): + return [] + try: + data = json.loads(path.read_text(encoding="utf-8")) + except (json.JSONDecodeError, OSError): + return [] + + out: list[dict[str, Any]] = [] + for name, cfg in data.items(): + if not isinstance(cfg, dict): + continue + out.append( + { + "name": name, + "source": "built-in", + "transport": cfg.get("transport", "stdio"), + "description": cfg.get("description", "") or "", + "enabled": True, + "last_health_status": None, + "last_error": None, + "last_health_check_at": None, + "tool_count": None, + "removable": False, + } + ) + return out + + +def _server_to_summary(entry: dict[str, Any]) -> dict[str, Any]: + """Shape an installed.json entry for API responses. + + Strips the full manifest body (which can be large) but keeps the tool + list if the manifest already embeds one (happens for registry-installed + servers). Users with ``source: "local"`` only get a tool list after + running a health check. + """ + manifest = entry.get("manifest") or {} + tools = manifest.get("tools") if isinstance(manifest, dict) else None + if not isinstance(tools, list): + tools = None + return { + "name": entry.get("name"), + "source": entry.get("source"), + "transport": entry.get("transport"), + "description": (manifest.get("description") if isinstance(manifest, dict) else None) or "", + "enabled": entry.get("enabled", True), + "last_health_status": entry.get("last_health_status"), + "last_error": entry.get("last_error"), + "last_health_check_at": entry.get("last_health_check_at"), + "tool_count": (len(tools) if tools is not None else None), + } + + +def _mcp_error_response(exc: MCPError, *, default_status: int = 400) -> web.Response: + return web.json_response( + { + "error": exc.what, + "code": exc.code.value, + "what": exc.what, + "why": exc.why, + "fix": exc.fix, + }, + status=default_status, + ) + + +async def handle_list_servers(request: web.Request) -> web.Response: + """GET /api/mcp/servers — list every server the queen actually uses. + + Merges two sources: + + - ``MCPRegistry.list_installed()`` — servers registered via + ``hive mcp add`` / the ``/api/mcp/servers`` POST route, stored in + ``~/.hive/mcp_registry/installed.json``. These carry + ``source: "local"`` (user-added) or ``source: "registry"`` + (installed from the remote registry). + - Repo-baked queen servers from + ``core/framework/agents/queen/mcp_servers.json``. These are loaded + directly by the queen's ``ToolRegistry`` at boot and never touch + ``MCPRegistry``; we surface them here so the UI reflects what the + queen really talks to. They are not removable from the UI because + editing them requires changing the repo. + + If a name collides between the two sources, the registry entry wins + because that's the one the user has customized. + """ + reg = _registry() + registry_entries = [_server_to_summary(e) for e in reg.list_installed()] + seen_names = {e.get("name") for e in registry_entries} + + package_entries = [e for e in _package_builtin_servers() if e.get("name") not in seen_names] + + servers = [*package_entries, *registry_entries] + return web.json_response({"servers": servers}) + + +async def handle_add_server(request: web.Request) -> web.Response: + """POST /api/mcp/servers — register a local MCP server. + + Body mirrors ``MCPRegistry.add_local`` args: + + :: + + { + "name": "my-tool", + "transport": "stdio" | "http" | "sse" | "unix", + "command": "...", "args": [...], "env": {...}, "cwd": "...", + "url": "...", "headers": {...}, + "socket_path": "...", + "description": "..." + } + """ + try: + body = await request.json() + except Exception: + return web.json_response({"error": "Invalid JSON body"}, status=400) + if not isinstance(body, dict): + return web.json_response({"error": "Body must be a JSON object"}, status=400) + + name = body.get("name") + transport = body.get("transport") + if not isinstance(name, str) or not name.strip(): + return web.json_response({"error": "'name' is required"}, status=400) + if transport not in _VALID_TRANSPORTS: + return web.json_response( + {"error": f"'transport' must be one of {sorted(_VALID_TRANSPORTS)}"}, + status=400, + ) + + reg = _registry() + try: + entry = reg.add_local( + name=name.strip(), + transport=transport, + command=body.get("command"), + args=body.get("args"), + env=body.get("env"), + cwd=body.get("cwd"), + url=body.get("url"), + headers=body.get("headers"), + socket_path=body.get("socket_path"), + description=body.get("description", ""), + ) + except MCPError as exc: + status = 409 if "already exists" in exc.what else 400 + return _mcp_error_response(exc, default_status=status) + except Exception as exc: + logger.exception("MCP add_local failed for %r", name) + return web.json_response({"error": str(exc)}, status=500) + + summary = _server_to_summary({"name": name, **entry}) + return web.json_response( + { + "server": summary, + "hint": "Start a new queen session to use this server's tools.", + }, + status=201, + ) + + +async def handle_remove_server(request: web.Request) -> web.Response: + """DELETE /api/mcp/servers/{name} — remove a local server.""" + name = request.match_info["name"] + reg = _registry() + + existing = reg.get_server(name) + if existing is None: + return web.json_response({"error": f"Server '{name}' not installed"}, status=404) + if existing.get("source") != "local": + return web.json_response( + { + "error": f"Server '{name}' is a built-in; it cannot be removed from the UI.", + }, + status=400, + ) + + try: + reg.remove(name) + except MCPError as exc: + return _mcp_error_response(exc, default_status=404) + return web.json_response({"removed": name}) + + +async def handle_set_enabled(request: web.Request, *, enabled: bool) -> web.Response: + name = request.match_info["name"] + reg = _registry() + try: + if enabled: + reg.enable(name) + else: + reg.disable(name) + except MCPError as exc: + return _mcp_error_response(exc, default_status=404) + return web.json_response({"name": name, "enabled": enabled}) + + +async def handle_enable(request: web.Request) -> web.Response: + """POST /api/mcp/servers/{name}/enable.""" + return await handle_set_enabled(request, enabled=True) + + +async def handle_disable(request: web.Request) -> web.Response: + """POST /api/mcp/servers/{name}/disable.""" + return await handle_set_enabled(request, enabled=False) + + +async def handle_health(request: web.Request) -> web.Response: + """POST /api/mcp/servers/{name}/health — probe one server.""" + name = request.match_info["name"] + reg = _registry() + try: + # MCPRegistry.health_check blocks on subprocess IO — run it off + # the event loop so the HTTP worker stays responsive. + import asyncio + + result = await asyncio.to_thread(reg.health_check, name) + except MCPError as exc: + return _mcp_error_response(exc, default_status=404) + except Exception as exc: + logger.exception("MCP health_check failed for %r", name) + return web.json_response({"error": str(exc)}, status=500) + return web.json_response(result) + + +def register_routes(app: web.Application) -> None: + """Register MCP server CRUD routes.""" + app.router.add_get("/api/mcp/servers", handle_list_servers) + app.router.add_post("/api/mcp/servers", handle_add_server) + app.router.add_delete("/api/mcp/servers/{name}", handle_remove_server) + app.router.add_post("/api/mcp/servers/{name}/enable", handle_enable) + app.router.add_post("/api/mcp/servers/{name}/disable", handle_disable) + app.router.add_post("/api/mcp/servers/{name}/health", handle_health) diff --git a/core/framework/server/routes_queen_tools.py b/core/framework/server/routes_queen_tools.py new file mode 100644 index 00000000..fdaa2a0e --- /dev/null +++ b/core/framework/server/routes_queen_tools.py @@ -0,0 +1,424 @@ +"""Per-queen MCP tool allowlist routes. + +- GET /api/queen/{queen_id}/tools -- enumerate the queen's tool surface +- PATCH /api/queen/{queen_id}/tools -- set or clear the MCP tool allowlist + +Lifecycle and synthetic tools (``ask_user``) are always part of the queen's +surface in INDEPENDENT mode and are returned with ``editable: false``. MCP +tools are grouped by origin server and carry per-tool ``enabled`` flags. + +The allowlist is a persisted queen-profile field, ``enabled_mcp_tools``: + +- ``null`` / missing -> "allow every MCP tool" (default, backward-compat) +- ``[]`` -> explicitly disable every MCP tool +- ``["foo", "bar"]`` -> only these MCP tools pass through to the LLM + +Filtering happens in ``QueenPhaseState.rebuild_independent_filter`` so the +LLM prompt cache stays warm between saves. +""" + +from __future__ import annotations + +import logging +from typing import Any + +from aiohttp import web + +from framework.agents.queen.queen_profiles import ( + ensure_default_queens, + load_queen_profile, + update_queen_profile, +) + +logger = logging.getLogger(__name__) + + +_SYNTHETIC_NAMES = {"ask_user"} + + +async def _ensure_manager_catalog(manager: Any) -> dict[str, list[dict[str, Any]]]: + """Return the cached MCP tool catalog, building it on first call. + + ``queen_orchestrator.create_queen`` populates ``_mcp_tool_catalog`` on + every queen boot. On a fresh backend process the user may open the + Tool Library before any queen session has started, so the catalog is + empty. In that case we build one from the shared MCP config; the + first call pays an MCP-subprocess-spawn cost, subsequent calls are + cache hits. The build runs off the event loop via asyncio.to_thread + so the HTTP worker stays responsive while MCP servers initialize. + """ + if manager is None: + return {} + catalog = getattr(manager, "_mcp_tool_catalog", None) + if isinstance(catalog, dict) and catalog: + return catalog + try: + import asyncio + + from framework.server.queen_orchestrator import build_queen_tool_registry_bare + + registry, built = await asyncio.to_thread(build_queen_tool_registry_bare) + manager._mcp_tool_catalog = built # type: ignore[attr-defined] + manager._bootstrap_tool_registry = registry # type: ignore[attr-defined] + return built + except Exception: + logger.warning("Tool catalog bootstrap failed", exc_info=True) + return {} + + +def _lifecycle_entries_without_session( + manager: Any, + mcp_names: set[str], +) -> list[dict[str, Any]]: + """Derive lifecycle tool names from the registry even without a session. + + We register queen lifecycle tools against a temporary registry using a + minimal stub, then subtract the MCP-origin set and the synthetic set. + The result matches what the queen sees at runtime (minus context- + specific variants). + """ + registry = getattr(manager, "_bootstrap_tool_registry", None) + # If the bootstrap registry exists but doesn't carry lifecycle tools + # yet, register them now. + if registry is not None and not getattr(registry, "_lifecycle_bootstrap_done", False): + try: + from types import SimpleNamespace + + from framework.tools.queen_lifecycle_tools import register_queen_lifecycle_tools + + stub_session = SimpleNamespace( + id="tool-library-bootstrap", + colony_runtime=None, + event_bus=None, + worker_path=None, + phase_state=None, + llm=None, + ) + register_queen_lifecycle_tools( + registry, + session=stub_session, + session_id=stub_session.id, + session_manager=None, + manager_session_id=stub_session.id, + phase_state=None, + ) + registry._lifecycle_bootstrap_done = True # type: ignore[attr-defined] + except Exception: + logger.debug("lifecycle bootstrap failed", exc_info=True) + + if registry is None: + return [] + + out: list[dict[str, Any]] = [] + for name, tool in sorted(registry.get_tools().items()): + if name in mcp_names or name in _SYNTHETIC_NAMES: + continue + out.append( + { + "name": tool.name, + "description": tool.description, + "editable": False, + } + ) + return out + + +def _synthetic_entries() -> list[dict[str, Any]]: + """Return display metadata for synthetic tools injected by the agent loop. + + Kept behind a lazy import so test harnesses that don't wire the agent + loop can still hit this route without blowing up. + """ + try: + from framework.agent_loop.internals.synthetic_tools import build_ask_user_tool + + tool = build_ask_user_tool() + return [ + { + "name": tool.name, + "description": tool.description, + "editable": False, + } + ] + except Exception: + return [ + { + "name": "ask_user", + "description": "Pause and ask the user a structured question.", + "editable": False, + } + ] + + +def _live_queen_session(manager: Any, queen_id: str) -> Any: + """Return any live DM session owned by this queen, or ``None``.""" + sessions = getattr(manager, "_sessions", None) or {} + for session in sessions.values(): + if getattr(session, "queen_name", None) != queen_id: + continue + # Prefer DM (non-colony) sessions + if getattr(session, "colony_runtime", None) is None: + return session + return None + + +def _render_mcp_servers( + *, + mcp_tool_names_by_server: dict[str, list[dict[str, Any]]], + enabled_mcp_tools: list[str] | None, +) -> list[dict[str, Any]]: + """Shape the mcp_tool_catalog entries for the API response.""" + allowed: set[str] | None = None if enabled_mcp_tools is None else set(enabled_mcp_tools) + servers: list[dict[str, Any]] = [] + for server_name in sorted(mcp_tool_names_by_server): + entries = mcp_tool_names_by_server[server_name] + tools = [] + for entry in entries: + name = entry.get("name") + enabled = True if allowed is None else name in allowed + tools.append( + { + "name": name, + "description": entry.get("description", ""), + "input_schema": entry.get("input_schema", {}), + "enabled": enabled, + } + ) + servers.append({"name": server_name, "tools": tools}) + return servers + + +def _catalog_from_live_session(session: Any) -> dict[str, list[dict[str, Any]]]: + """Rebuild a per-server tool catalog from a live queen session. + + The session's registry is authoritative — this reflects any hot-added + MCP servers since the manager-level snapshot was cached. + """ + registry = getattr(session, "_queen_tool_registry", None) + if registry is None: + # session._queen_tools_by_name is a stash from create_queen; we + # only have registry via the tools list, so reconstruct from the + # phase state instead. + phase_state = getattr(session, "phase_state", None) + if phase_state is None: + return {} + mcp_names = getattr(phase_state, "mcp_tool_names_all", set()) or set() + independent_tools = getattr(phase_state, "independent_tools", []) or [] + result: dict[str, list[dict[str, Any]]] = {"(unknown)": []} + for tool in independent_tools: + if tool.name not in mcp_names: + continue + result["(unknown)"].append( + { + "name": tool.name, + "description": tool.description, + "input_schema": tool.parameters, + } + ) + return result if result["(unknown)"] else {} + + server_map = getattr(registry, "_mcp_server_tools", {}) or {} + tools_by_name = {t.name: t for t in registry.get_tools().values()} + catalog: dict[str, list[dict[str, Any]]] = {} + for server_name, tool_names in server_map.items(): + entries: list[dict[str, Any]] = [] + for name in sorted(tool_names): + tool = tools_by_name.get(name) + if tool is None: + continue + entries.append( + { + "name": tool.name, + "description": tool.description, + "input_schema": tool.parameters, + } + ) + catalog[server_name] = entries + return catalog + + +def _lifecycle_entries( + *, + session: Any, + mcp_tool_names_all: set[str], +) -> list[dict[str, Any]]: + """Lifecycle tools = independent_tools minus MCP-origin minus synthetic. + + We compute this from a live session when available so the list exactly + matches what the queen actually sees on her next turn. + """ + if session is None: + return [] + phase_state = getattr(session, "phase_state", None) + if phase_state is None: + return [] + result: list[dict[str, Any]] = [] + for tool in getattr(phase_state, "independent_tools", []) or []: + if tool.name in mcp_tool_names_all: + continue + if tool.name in _SYNTHETIC_NAMES: + continue + result.append( + { + "name": tool.name, + "description": tool.description, + "editable": False, + } + ) + return sorted(result, key=lambda x: x["name"]) + + +async def handle_get_tools(request: web.Request) -> web.Response: + """GET /api/queen/{queen_id}/tools — enumerate tool surface for the UI.""" + queen_id = request.match_info["queen_id"] + ensure_default_queens() + try: + profile = load_queen_profile(queen_id) + except FileNotFoundError: + return web.json_response({"error": f"Queen '{queen_id}' not found"}, status=404) + + manager = request.app.get("manager") + session = _live_queen_session(manager, queen_id) if manager is not None else None + + # Prefer a live session's registry for freshness. Otherwise use (or + # build on demand) the manager-level catalog so the Tool Library works + # even before any queen has been started in this process. + if session is not None: + catalog = _catalog_from_live_session(session) + else: + catalog = await _ensure_manager_catalog(manager) + stale = not catalog + + mcp_tool_names_all: set[str] = set() + for entries in catalog.values(): + for entry in entries: + if entry.get("name"): + mcp_tool_names_all.add(entry["name"]) + + if session is not None: + lifecycle = _lifecycle_entries( + session=session, + mcp_tool_names_all=mcp_tool_names_all, + ) + else: + lifecycle = _lifecycle_entries_without_session(manager, mcp_tool_names_all) + + enabled_mcp_tools = profile.get("enabled_mcp_tools") + + response = { + "queen_id": queen_id, + "enabled_mcp_tools": enabled_mcp_tools, + "stale": stale, + "lifecycle": lifecycle, + "synthetic": _synthetic_entries(), + "mcp_servers": _render_mcp_servers( + mcp_tool_names_by_server=catalog, + enabled_mcp_tools=enabled_mcp_tools, + ), + } + return web.json_response(response) + + +async def handle_patch_tools(request: web.Request) -> web.Response: + """PATCH /api/queen/{queen_id}/tools — persist the MCP tool allowlist. + + Body: ``{"enabled_mcp_tools": null | string[]}``. + + - ``null`` resets to "allow every MCP tool" (default). + - A list is validated against the known MCP catalog; unknown names + are rejected with 400 so the frontend catches typos. + """ + queen_id = request.match_info["queen_id"] + try: + body = await request.json() + except Exception: + return web.json_response({"error": "Invalid JSON body"}, status=400) + if not isinstance(body, dict) or "enabled_mcp_tools" not in body: + return web.json_response( + {"error": "Body must be an object with an 'enabled_mcp_tools' field"}, + status=400, + ) + + enabled = body["enabled_mcp_tools"] + if enabled is not None: + if not isinstance(enabled, list) or not all(isinstance(x, str) for x in enabled): + return web.json_response( + {"error": "'enabled_mcp_tools' must be null or a list of strings"}, + status=400, + ) + + ensure_default_queens() + try: + load_queen_profile(queen_id) + except FileNotFoundError: + return web.json_response({"error": f"Queen '{queen_id}' not found"}, status=404) + + # Validate names against the known MCP tool catalog. We prefer a live + # session's registry for the most up-to-date set, then fall back to + # the manager-level snapshot (building it on demand if absent). + manager = request.app.get("manager") + session = _live_queen_session(manager, queen_id) if manager is not None else None + if session is not None: + catalog = _catalog_from_live_session(session) + else: + catalog = await _ensure_manager_catalog(manager) + known_names: set[str] = set() + for entries in catalog.values(): + for entry in entries: + if entry.get("name"): + known_names.add(entry["name"]) + + if enabled is not None and known_names: + unknown = sorted(set(enabled) - known_names) + if unknown: + return web.json_response( + {"error": "Unknown MCP tool name(s)", "unknown": unknown}, + status=400, + ) + + # Persist — we pass the raw value (``None`` → stored as YAML null). + updated = update_queen_profile(queen_id, {"enabled_mcp_tools": enabled}) + + # Hot-reload every live DM session for this queen. The filter memo is + # rebuilt so the very next turn sees the new allowlist without a + # session restart, and the prompt cache is invalidated exactly once. + refreshed = 0 + sessions = getattr(manager, "_sessions", None) or {} + for sess in sessions.values(): + if getattr(sess, "queen_name", None) != queen_id: + continue + phase_state = getattr(sess, "phase_state", None) + if phase_state is None: + continue + phase_state.enabled_mcp_tools = enabled + rebuild = getattr(phase_state, "rebuild_independent_filter", None) + if callable(rebuild): + try: + rebuild() + refreshed += 1 + except Exception: + logger.debug( + "Queen tools: rebuild_independent_filter failed for session %s", + getattr(sess, "id", "?"), + exc_info=True, + ) + + logger.info( + "Queen tools: queen_id=%s allowlist=%s refreshed_sessions=%d", + queen_id, + "null" if enabled is None else f"{len(enabled)} tool(s)", + refreshed, + ) + return web.json_response( + { + "queen_id": queen_id, + "enabled_mcp_tools": updated.get("enabled_mcp_tools"), + "refreshed_sessions": refreshed, + } + ) + + +def register_routes(app: web.Application) -> None: + """Register queen-tools routes.""" + app.router.add_get("/api/queen/{queen_id}/tools", handle_get_tools) + app.router.add_patch("/api/queen/{queen_id}/tools", handle_patch_tools) diff --git a/core/framework/server/session_manager.py b/core/framework/server/session_manager.py index d65fd533..b971be78 100644 --- a/core/framework/server/session_manager.py +++ b/core/framework/server/session_manager.py @@ -1518,6 +1518,38 @@ class SessionManager: colony_id=session.id, pipeline_stages=[], # queen pipeline runs in queen_orchestrator, not here ) + + # Per-colony tool allowlist, loaded from the colony's metadata.json + # when this session is attached to a real forked colony. For pure + # queen DM sessions (session.colony_name is None) we only capture + # the MCP-origin set — the allowlist stays ``None`` so every MCP + # tool passes through by default. + try: + mcp_tool_names_all: set[str] = set() + mgr_catalog = getattr(self, "_mcp_tool_catalog", None) + if isinstance(mgr_catalog, dict): + for entries in mgr_catalog.values(): + for entry in entries: + name = entry.get("name") if isinstance(entry, dict) else None + if name: + mcp_tool_names_all.add(name) + enabled_mcp_tools: list[str] | None = None + colony_name = getattr(session, "colony_name", None) + if colony_name: + from framework.host.colony_metadata import load_colony_metadata + + colony_meta = load_colony_metadata(colony_name) + raw = colony_meta.get("enabled_mcp_tools") + if raw is None or isinstance(raw, list): + enabled_mcp_tools = raw + colony.set_tool_allowlist(enabled_mcp_tools, mcp_tool_names_all) + except Exception: + logger.debug( + "Colony allowlist bootstrap failed for session %s", + session.id, + exc_info=True, + ) + await colony.start() session.colony = colony diff --git a/core/framework/server/tests/test_colony_tools.py b/core/framework/server/tests/test_colony_tools.py new file mode 100644 index 00000000..8595f05f --- /dev/null +++ b/core/framework/server/tests/test_colony_tools.py @@ -0,0 +1,244 @@ +"""Tests for the per-colony MCP tool allowlist filter + routes. + +Covers: +1. ``ColonyRuntime`` filter semantics (default-allow, allowlist, empty, + lifecycle passes through). +2. routes_colony_tools round trip (GET/PATCH, validation, 404). +3. Colony index route for the Tool Library picker. + +Routes never touch the real ``~/.hive/colonies`` tree — we redirect +``COLONIES_DIR`` into ``tmp_path`` via monkeypatch. +""" + +from __future__ import annotations + +import json +from dataclasses import dataclass, field +from typing import Any + +import pytest +from aiohttp import web +from aiohttp.test_utils import TestClient, TestServer + +from framework.host.colony_runtime import ColonyRuntime +from framework.llm.provider import Tool +from framework.server import routes_colony_tools + + +def _tool(name: str) -> Tool: + return Tool(name=name, description=f"desc of {name}", parameters={"type": "object"}) + + +# --------------------------------------------------------------------------- +# ColonyRuntime filter unit tests +# --------------------------------------------------------------------------- + + +def _bare_runtime() -> ColonyRuntime: + rt = ColonyRuntime.__new__(ColonyRuntime) + rt._enabled_mcp_tools = None + rt._mcp_tool_names_all = set() + return rt + + +class TestColonyFilter: + def test_default_is_noop(self): + rt = _bare_runtime() + tools = [_tool("mcp_a"), _tool("lc_b")] + assert rt._apply_tool_allowlist(tools) == tools + + def test_allowlist_gates_mcp_only(self): + rt = _bare_runtime() + rt._mcp_tool_names_all = {"mcp_a", "mcp_b"} + rt._enabled_mcp_tools = ["mcp_a"] + tools = [_tool("mcp_a"), _tool("mcp_b"), _tool("lc_c")] + names = [t.name for t in rt._apply_tool_allowlist(tools)] + assert names == ["mcp_a", "lc_c"] + + def test_empty_allowlist_keeps_lifecycle(self): + rt = _bare_runtime() + rt._mcp_tool_names_all = {"mcp_a", "mcp_b"} + rt._enabled_mcp_tools = [] + tools = [_tool("mcp_a"), _tool("mcp_b"), _tool("lc_c")] + names = [t.name for t in rt._apply_tool_allowlist(tools)] + assert names == ["lc_c"] + + def test_setter_mutates_live_state(self): + rt = _bare_runtime() + rt.set_tool_allowlist(["x"], {"x", "y"}) + assert rt._enabled_mcp_tools == ["x"] + assert rt._mcp_tool_names_all == {"x", "y"} + + # Passing None on allowlist clears gating; mcp_tool_names_all + # defaults to "keep current" so a subsequent caller doesn't need + # to repeat the set. + rt.set_tool_allowlist(None) + assert rt._enabled_mcp_tools is None + assert rt._mcp_tool_names_all == {"x", "y"} + + +# --------------------------------------------------------------------------- +# Route round-trip tests +# --------------------------------------------------------------------------- + + +@dataclass +class _FakeSession: + colony_name: str + colony: Any = None + colony_runtime: Any = None + id: str = "sess-1" + + +@dataclass +class _FakeManager: + _sessions: dict = field(default_factory=dict) + _mcp_tool_catalog: dict = field(default_factory=dict) + + +@pytest.fixture +def colony_dir(tmp_path, monkeypatch): + """Point COLONIES_DIR into a tmp tree and seed a colony.""" + colonies = tmp_path / "colonies" + colonies.mkdir() + monkeypatch.setattr("framework.host.colony_metadata.COLONIES_DIR", colonies) + + name = "my_colony" + cdir = colonies / name + cdir.mkdir() + (cdir / "metadata.json").write_text( + json.dumps( + { + "colony_name": name, + "queen_name": "queen_technology", + "created_at": "2026-04-20T00:00:00+00:00", + } + ) + ) + return colonies, name + + +async def _app(manager: _FakeManager) -> web.Application: + app = web.Application() + app["manager"] = manager + routes_colony_tools.register_routes(app) + return app + + +@pytest.mark.asyncio +async def test_get_tools_default_allow(colony_dir): + _, name = colony_dir + manager = _FakeManager( + _mcp_tool_catalog={ + "coder-tools": [ + {"name": "read_file", "description": "read", "input_schema": {}}, + {"name": "write_file", "description": "write", "input_schema": {}}, + ], + } + ) + app = await _app(manager) + async with TestClient(TestServer(app)) as client: + resp = await client.get(f"/api/colony/{name}/tools") + assert resp.status == 200 + body = await resp.json() + assert body["enabled_mcp_tools"] is None + assert body["stale"] is False + tools = {t["name"]: t for t in body["mcp_servers"][0]["tools"]} + assert all(t["enabled"] for t in tools.values()) + + +@pytest.mark.asyncio +async def test_patch_persists_and_validates(colony_dir): + colonies_dir, name = colony_dir + manager = _FakeManager( + _mcp_tool_catalog={ + "coder-tools": [ + {"name": "read_file", "description": "", "input_schema": {}}, + {"name": "write_file", "description": "", "input_schema": {}}, + ] + } + ) + app = await _app(manager) + async with TestClient(TestServer(app)) as client: + resp = await client.patch( + f"/api/colony/{name}/tools", json={"enabled_mcp_tools": ["read_file"]} + ) + assert resp.status == 200 + body = await resp.json() + assert body["enabled_mcp_tools"] == ["read_file"] + + # Persisted to metadata.json + raw = json.loads((colonies_dir / name / "metadata.json").read_text()) + assert raw["enabled_mcp_tools"] == ["read_file"] + + # GET reflects the allowlist + resp = await client.get(f"/api/colony/{name}/tools") + body = await resp.json() + tools = {t["name"]: t for t in body["mcp_servers"][0]["tools"]} + assert tools["read_file"]["enabled"] is True + assert tools["write_file"]["enabled"] is False + + # Unknown → 400 + resp = await client.patch( + f"/api/colony/{name}/tools", json={"enabled_mcp_tools": ["ghost"]} + ) + assert resp.status == 400 + assert "ghost" in (await resp.json()).get("unknown", []) + + +@pytest.mark.asyncio +async def test_patch_refreshes_live_runtime(colony_dir): + _, name = colony_dir + + rt = _bare_runtime() + rt._mcp_tool_names_all = {"read_file", "write_file"} + rt.set_tool_allowlist(None) + + session = _FakeSession(colony_name=name, colony=rt) + manager = _FakeManager( + _sessions={session.id: session}, + _mcp_tool_catalog={ + "coder-tools": [ + {"name": "read_file", "description": "", "input_schema": {}}, + {"name": "write_file", "description": "", "input_schema": {}}, + ] + }, + ) + + app = await _app(manager) + async with TestClient(TestServer(app)) as client: + resp = await client.patch( + f"/api/colony/{name}/tools", json={"enabled_mcp_tools": ["read_file"]} + ) + assert resp.status == 200 + body = await resp.json() + assert body["refreshed_runtimes"] == 1 + assert rt._enabled_mcp_tools == ["read_file"] + + +@pytest.mark.asyncio +async def test_404_for_unknown_colony(colony_dir): + manager = _FakeManager() + app = await _app(manager) + async with TestClient(TestServer(app)) as client: + resp = await client.get("/api/colony/unknown/tools") + assert resp.status == 404 + resp = await client.patch( + "/api/colony/unknown/tools", json={"enabled_mcp_tools": None} + ) + assert resp.status == 404 + + +@pytest.mark.asyncio +async def test_tools_index_lists_colonies(colony_dir): + _, name = colony_dir + manager = _FakeManager() + app = await _app(manager) + async with TestClient(TestServer(app)) as client: + resp = await client.get("/api/colonies/tools-index") + assert resp.status == 200 + body = await resp.json() + entries = {c["name"]: c for c in body["colonies"]} + assert name in entries + assert entries[name]["queen_name"] == "queen_technology" + assert entries[name]["has_allowlist"] is False diff --git a/core/framework/server/tests/test_mcp_routes.py b/core/framework/server/tests/test_mcp_routes.py new file mode 100644 index 00000000..497ef8dd --- /dev/null +++ b/core/framework/server/tests/test_mcp_routes.py @@ -0,0 +1,240 @@ +"""Tests for the MCP server CRUD HTTP routes. + +Monkey-patches ``MCPRegistry`` inside ``routes_mcp`` so the HTTP layer is +exercised without reading or writing ``~/.hive/mcp_registry/installed.json`` +or spawning actual subprocesses. +""" + +from __future__ import annotations + +from typing import Any +from unittest.mock import MagicMock + +import pytest +from aiohttp import web +from aiohttp.test_utils import TestClient, TestServer + +from framework.loader.mcp_errors import MCPError, MCPErrorCode +from framework.server import routes_mcp + + +class _FakeRegistry: + """Stand-in for MCPRegistry — just enough surface for the routes.""" + + def __init__(self) -> None: + self._servers: dict[str, dict[str, Any]] = { + "built-in-seed": { + "source": "registry", + "transport": "stdio", + "enabled": True, + "manifest": {"description": "Factory-seeded server", "tools": []}, + "last_health_status": "healthy", + "last_error": None, + "last_health_check_at": None, + } + } + + def initialize(self) -> None: # noqa: D401 — registry idempotent init + return + + def list_installed(self) -> list[dict[str, Any]]: + return [{"name": name, **entry} for name, entry in self._servers.items()] + + def get_server(self, name: str) -> dict | None: + if name not in self._servers: + return None + return {"name": name, **self._servers[name]} + + def add_local(self, *, name: str, transport: str, **kwargs: Any) -> dict: + if name in self._servers: + raise MCPError( + code=MCPErrorCode.MCP_INSTALL_FAILED, + what=f"Server '{name}' already exists", + why="A server with this name is already registered locally.", + fix=f"Run: hive mcp remove {name}", + ) + entry = { + "source": "local", + "transport": transport, + "enabled": True, + "manifest": {"description": kwargs.get("description") or ""}, + "last_health_status": None, + "last_error": None, + "last_health_check_at": None, + } + self._servers[name] = entry + return entry + + def remove(self, name: str) -> None: + if name not in self._servers: + raise MCPError( + code=MCPErrorCode.MCP_INSTALL_FAILED, + what=f"Cannot remove server '{name}'", + why="Server is not installed.", + fix="Run: hive mcp list", + ) + del self._servers[name] + + def enable(self, name: str) -> None: + if name not in self._servers: + raise MCPError( + code=MCPErrorCode.MCP_INSTALL_FAILED, + what="not found", + why="not found", + fix="x", + ) + self._servers[name]["enabled"] = True + + def disable(self, name: str) -> None: + if name not in self._servers: + raise MCPError( + code=MCPErrorCode.MCP_INSTALL_FAILED, + what="not found", + why="not found", + fix="x", + ) + self._servers[name]["enabled"] = False + + def health_check(self, name: str) -> dict[str, Any]: + if name not in self._servers: + raise MCPError( + code=MCPErrorCode.MCP_HEALTH_FAILED, + what="not found", + why="not found", + fix="x", + ) + return {"name": name, "status": "healthy", "tools": 3, "error": None} + + +@pytest.fixture +def registry(monkeypatch): + reg = _FakeRegistry() + monkeypatch.setattr(routes_mcp, "_registry", lambda: reg) + return reg + + +async def _make_app() -> web.Application: + app = web.Application() + routes_mcp.register_routes(app) + return app + + +@pytest.mark.asyncio +async def test_list_servers_returns_built_in(registry): + app = await _make_app() + async with TestClient(TestServer(app)) as client: + resp = await client.get("/api/mcp/servers") + assert resp.status == 200 + body = await resp.json() + names = {s["name"] for s in body["servers"]} + # The registry fake carries one entry; the list also merges package- + # baked entries from core/framework/agents/queen/mcp_servers.json so + # the UI matches what the queen actually loads. Both should appear. + assert "built-in-seed" in names + sources = {s["name"]: s["source"] for s in body["servers"]} + assert sources.get("built-in-seed") == "registry" + # The package-baked servers (coder-tools/gcu-tools/hive_tools) carry + # source=="built-in" and are non-removable. + pkg_entries = [s for s in body["servers"] if s["source"] == "built-in"] + assert pkg_entries, "expected at least one package-baked MCP server" + assert all(s.get("removable") is False for s in pkg_entries) + + +@pytest.mark.asyncio +async def test_add_local_server(registry): + app = await _make_app() + async with TestClient(TestServer(app)) as client: + resp = await client.post( + "/api/mcp/servers", + json={ + "name": "my-tool", + "transport": "stdio", + "command": "echo", + "args": ["hi"], + "description": "says hi", + }, + ) + assert resp.status == 201 + body = await resp.json() + assert body["server"]["name"] == "my-tool" + assert body["server"]["source"] == "local" + + resp = await client.get("/api/mcp/servers") + names = [s["name"] for s in (await resp.json())["servers"]] + assert "my-tool" in names + + +@pytest.mark.asyncio +async def test_add_rejects_duplicate(registry): + app = await _make_app() + async with TestClient(TestServer(app)) as client: + for _ in range(2): + resp = await client.post( + "/api/mcp/servers", + json={"name": "dup", "transport": "stdio", "command": "x"}, + ) + assert resp.status == 409 + body = await resp.json() + assert "already exists" in body["error"].lower() + assert body["fix"] + + +@pytest.mark.asyncio +async def test_add_rejects_invalid_transport(registry): + app = await _make_app() + async with TestClient(TestServer(app)) as client: + resp = await client.post( + "/api/mcp/servers", + json={"name": "x", "transport": "nope"}, + ) + assert resp.status == 400 + + +@pytest.mark.asyncio +async def test_enable_disable_cycle(registry): + app = await _make_app() + # Seed a local server + registry.add_local(name="local-one", transport="stdio", command="x") + + async with TestClient(TestServer(app)) as client: + resp = await client.post("/api/mcp/servers/local-one/disable") + assert resp.status == 200 + assert (await resp.json())["enabled"] is False + assert registry._servers["local-one"]["enabled"] is False + + resp = await client.post("/api/mcp/servers/local-one/enable") + assert resp.status == 200 + assert (await resp.json())["enabled"] is True + + +@pytest.mark.asyncio +async def test_remove_local_only(registry): + app = await _make_app() + registry.add_local(name="local-two", transport="stdio", command="x") + + async with TestClient(TestServer(app)) as client: + # Built-ins are protected + resp = await client.delete("/api/mcp/servers/built-in-seed") + assert resp.status == 400 + + # Missing + resp = await client.delete("/api/mcp/servers/ghost") + assert resp.status == 404 + + # Happy path + resp = await client.delete("/api/mcp/servers/local-two") + assert resp.status == 200 + assert "local-two" not in registry._servers + + +@pytest.mark.asyncio +async def test_health_check(registry, monkeypatch): + app = await _make_app() + registry.add_local(name="pingable", transport="stdio", command="x") + + async with TestClient(TestServer(app)) as client: + resp = await client.post("/api/mcp/servers/pingable/health") + assert resp.status == 200 + body = await resp.json() + assert body["status"] == "healthy" + assert body["tools"] == 3 diff --git a/core/framework/server/tests/test_queen_tools.py b/core/framework/server/tests/test_queen_tools.py new file mode 100644 index 00000000..432bd12e --- /dev/null +++ b/core/framework/server/tests/test_queen_tools.py @@ -0,0 +1,297 @@ +"""Tests for the per-queen MCP tool allowlist filter + routes. + +Covers: +1. QueenPhaseState filter semantics (default-allow, allowlist, empty, phase- + isolation, memo identity for LLM prompt-cache stability). +2. routes_queen_tools round trip (GET, PATCH, validation, live-session + hot-reload). + +Route tests monkey-patch a tiny queen profile + manager catalog; they never +spawn an MCP subprocess. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any +from unittest.mock import MagicMock + +import pytest +import yaml +from aiohttp import web +from aiohttp.test_utils import TestClient, TestServer + +from framework.llm.provider import Tool +from framework.server import routes_queen_tools +from framework.tools.queen_lifecycle_tools import QueenPhaseState + + +# --------------------------------------------------------------------------- +# QueenPhaseState filter — pure unit tests +# --------------------------------------------------------------------------- + + +def _tool(name: str) -> Tool: + return Tool(name=name, description=f"desc of {name}", parameters={"type": "object"}) + + +class TestPhaseStateFilter: + def test_default_allow_returns_every_tool(self): + ps = QueenPhaseState(phase="independent") + ps.independent_tools = [_tool("mcp_a"), _tool("mcp_b"), _tool("lc_c")] + ps.mcp_tool_names_all = {"mcp_a", "mcp_b"} + ps.enabled_mcp_tools = None + ps.rebuild_independent_filter() + + names = [t.name for t in ps.get_current_tools()] + assert names == ["mcp_a", "mcp_b", "lc_c"] + + def test_allowlist_keeps_listed_mcp_plus_all_lifecycle(self): + ps = QueenPhaseState(phase="independent") + ps.independent_tools = [_tool("mcp_a"), _tool("mcp_b"), _tool("lc_c")] + ps.mcp_tool_names_all = {"mcp_a", "mcp_b"} + ps.enabled_mcp_tools = ["mcp_a"] + ps.rebuild_independent_filter() + + names = [t.name for t in ps.get_current_tools()] + assert names == ["mcp_a", "lc_c"] + + def test_empty_allowlist_keeps_only_lifecycle(self): + ps = QueenPhaseState(phase="independent") + ps.independent_tools = [_tool("mcp_a"), _tool("mcp_b"), _tool("lc_c")] + ps.mcp_tool_names_all = {"mcp_a", "mcp_b"} + ps.enabled_mcp_tools = [] + ps.rebuild_independent_filter() + + names = [t.name for t in ps.get_current_tools()] + assert names == ["lc_c"] + + def test_filter_isolated_to_independent_phase(self): + ps = QueenPhaseState(phase="independent") + ps.independent_tools = [_tool("mcp_a"), _tool("lc_c")] + ps.working_tools = [_tool("mcp_a"), _tool("lc_c")] + ps.mcp_tool_names_all = {"mcp_a"} + ps.enabled_mcp_tools = [] + ps.rebuild_independent_filter() + + # Independent → filtered + assert [t.name for t in ps.get_current_tools()] == ["lc_c"] + + # Other phases → unaffected + ps.phase = "working" + assert [t.name for t in ps.get_current_tools()] == ["mcp_a", "lc_c"] + + def test_memo_returns_stable_identity_for_prompt_cache(self): + """Same Python list object across turns → LLM prompt cache stays warm.""" + ps = QueenPhaseState(phase="independent") + ps.independent_tools = [_tool("mcp_a"), _tool("lc_c")] + ps.mcp_tool_names_all = {"mcp_a"} + ps.enabled_mcp_tools = None + ps.rebuild_independent_filter() + + first = ps.get_current_tools() + second = ps.get_current_tools() + assert first is second, "memoized list must be the same object across turns" + + # A rebuild should produce a different object so downstream caches + # correctly invalidate. + ps.enabled_mcp_tools = ["mcp_a"] + ps.rebuild_independent_filter() + third = ps.get_current_tools() + assert third is not first + assert [t.name for t in third] == ["mcp_a", "lc_c"] + + +# --------------------------------------------------------------------------- +# Route round-trip tests +# --------------------------------------------------------------------------- + + +@dataclass +class _FakeSession: + queen_name: str + phase_state: QueenPhaseState + colony_runtime: Any = None + id: str = "sess-1" + _queen_tool_registry: Any = None + + +@dataclass +class _FakeManager: + _sessions: dict = field(default_factory=dict) + _mcp_tool_catalog: dict = field(default_factory=dict) + + +@pytest.fixture +def queen_dir(tmp_path, monkeypatch): + """Redirect queen profile storage into a tmp dir.""" + queens_dir = tmp_path / "queens" + queens_dir.mkdir() + monkeypatch.setattr("framework.agents.queen.queen_profiles.QUEENS_DIR", queens_dir) + + queen_id = "queen_technology" + (queens_dir / queen_id).mkdir() + (queens_dir / queen_id / "profile.yaml").write_text( + yaml.safe_dump({"name": "Alexandra", "title": "Head of Technology"}) + ) + return queens_dir, queen_id + + +async def _make_app(*, manager: _FakeManager) -> web.Application: + app = web.Application() + app["manager"] = manager + routes_queen_tools.register_routes(app) + return app + + +@pytest.mark.asyncio +async def test_get_tools_default_allows_everything(queen_dir, monkeypatch): + # Skip ensure_default_queens; our tmp profile is enough. + monkeypatch.setattr(routes_queen_tools, "ensure_default_queens", lambda: None) + + _, queen_id = queen_dir + + manager = _FakeManager() + manager._mcp_tool_catalog = { + "coder-tools": [ + {"name": "read_file", "description": "read", "input_schema": {}}, + {"name": "write_file", "description": "write", "input_schema": {}}, + ], + } + + app = await _make_app(manager=manager) + async with TestClient(TestServer(app)) as client: + resp = await client.get(f"/api/queen/{queen_id}/tools") + assert resp.status == 200 + body = await resp.json() + + assert body["enabled_mcp_tools"] is None + assert body["stale"] is False + servers = {s["name"]: s for s in body["mcp_servers"]} + assert set(servers) == {"coder-tools"} + # Default-allow → every tool reports enabled=True + for tool in servers["coder-tools"]["tools"]: + assert tool["enabled"] is True + + +@pytest.mark.asyncio +async def test_patch_persists_and_validates(queen_dir, monkeypatch): + monkeypatch.setattr(routes_queen_tools, "ensure_default_queens", lambda: None) + queens_dir, queen_id = queen_dir + + manager = _FakeManager() + manager._mcp_tool_catalog = { + "coder-tools": [ + {"name": "read_file", "description": "", "input_schema": {}}, + {"name": "write_file", "description": "", "input_schema": {}}, + ] + } + + app = await _make_app(manager=manager) + async with TestClient(TestServer(app)) as client: + # Happy path + resp = await client.patch( + f"/api/queen/{queen_id}/tools", + json={"enabled_mcp_tools": ["read_file"]}, + ) + assert resp.status == 200 + body = await resp.json() + assert body["enabled_mcp_tools"] == ["read_file"] + + # Profile persisted + raw = yaml.safe_load((queens_dir / queen_id / "profile.yaml").read_text()) + assert raw["enabled_mcp_tools"] == ["read_file"] + + # GET reflects the new state + resp = await client.get(f"/api/queen/{queen_id}/tools") + body = await resp.json() + servers = {t["name"]: t for t in body["mcp_servers"][0]["tools"]} + assert servers["read_file"]["enabled"] is True + assert servers["write_file"]["enabled"] is False + + # Null resets + resp = await client.patch( + f"/api/queen/{queen_id}/tools", json={"enabled_mcp_tools": None} + ) + assert resp.status == 200 + body = await resp.json() + assert body["enabled_mcp_tools"] is None + + # Unknown tool name → 400; profile unchanged + resp = await client.patch( + f"/api/queen/{queen_id}/tools", + json={"enabled_mcp_tools": ["nope_not_a_tool"]}, + ) + assert resp.status == 400 + detail = await resp.json() + assert "nope_not_a_tool" in detail.get("unknown", []) + raw = yaml.safe_load((queens_dir / queen_id / "profile.yaml").read_text()) + # Still cleared from the previous successful null-reset + assert raw["enabled_mcp_tools"] is None + + +@pytest.mark.asyncio +async def test_patch_hot_reloads_live_session(queen_dir, monkeypatch): + monkeypatch.setattr(routes_queen_tools, "ensure_default_queens", lambda: None) + _, queen_id = queen_dir + + # Build a fake live session whose phase state carries a tool list the + # filter can gate. We also need a fake registry so + # _catalog_from_live_session can enumerate tools. + class _FakeRegistry: + def __init__(self, server_map, tools_by_name): + self._mcp_server_tools = server_map + self._tools_by_name = tools_by_name + + def get_tools(self): + return {n: MagicMock(name=n) for n in self._tools_by_name} + + tools_by_name = {"read_file": _tool("read_file"), "write_file": _tool("write_file")} + registry = _FakeRegistry( + server_map={"coder-tools": {"read_file", "write_file"}}, + tools_by_name=tools_by_name, + ) + # Patch get_tools to return real Tool objects for name/description plumbing. + registry.get_tools = lambda: tools_by_name # type: ignore[method-assign] + + phase_state = QueenPhaseState(phase="independent") + phase_state.independent_tools = [tools_by_name["read_file"], tools_by_name["write_file"]] + phase_state.mcp_tool_names_all = {"read_file", "write_file"} + phase_state.enabled_mcp_tools = None + phase_state.rebuild_independent_filter() + + session = _FakeSession(queen_name=queen_id, phase_state=phase_state) + session._queen_tool_registry = registry + manager = _FakeManager(_sessions={"sess-1": session}) + + app = await _make_app(manager=manager) + async with TestClient(TestServer(app)) as client: + resp = await client.patch( + f"/api/queen/{queen_id}/tools", + json={"enabled_mcp_tools": ["read_file"]}, + ) + assert resp.status == 200 + body = await resp.json() + assert body["refreshed_sessions"] == 1 + + # Session's phase state reflects the new allowlist without a restart + current = phase_state.get_current_tools() + assert [t.name for t in current] == ["read_file"] + + +@pytest.mark.asyncio +async def test_missing_queen_returns_404(queen_dir, monkeypatch): + monkeypatch.setattr(routes_queen_tools, "ensure_default_queens", lambda: None) + manager = _FakeManager() + + app = await _make_app(manager=manager) + async with TestClient(TestServer(app)) as client: + resp = await client.get("/api/queen/queen_nonexistent/tools") + assert resp.status == 404 + + resp = await client.patch( + "/api/queen/queen_nonexistent/tools", + json={"enabled_mcp_tools": None}, + ) + assert resp.status == 404 diff --git a/core/framework/tools/queen_lifecycle_tools.py b/core/framework/tools/queen_lifecycle_tools.py index 8c7228f8..86e2cf9d 100644 --- a/core/framework/tools/queen_lifecycle_tools.py +++ b/core/framework/tools/queen_lifecycle_tools.py @@ -186,6 +186,22 @@ class QueenPhaseState: global_memory_dir: Path | None = None queen_memory_dir: Path | None = None + # Per-queen MCP tool allowlist for the INDEPENDENT phase. ``None`` means + # "allow every MCP tool" (default, backward-compatible). An explicit list + # is authoritative: only tools whose name appears here pass through. + # Lifecycle / synthetic tools bypass this gate regardless. + enabled_mcp_tools: list[str] | None = None + # Union of every MCP-origin tool name currently registered — the set the + # allowlist can gate. Populated once at queen boot from + # ``ToolRegistry._mcp_server_tools``. Names outside this set (lifecycle, + # ``ask_user``) always pass through the filter. + mcp_tool_names_all: set = field(default_factory=set) + # Memoized output of the filter applied to ``independent_tools``. + # Recomputed only when ``enabled_mcp_tools`` or ``independent_tools`` + # changes, so ``get_current_tools()`` in the independent phase returns + # a byte-stable list between saves and the LLM prompt cache stays warm. + _filtered_independent_tools: list = field(default_factory=list) + async def switch_to_working(self, source: str = "tool") -> None: """Switch to working phase — colony workers are running. @@ -204,6 +220,27 @@ class QueenPhaseState: "Colony workers are running. Available tools: " + ", ".join(tool_names) + "." ) + def rebuild_independent_filter(self) -> None: + """Recompute the memoized independent-phase tool list. + + Called once at queen boot (after ``independent_tools``, + ``mcp_tool_names_all`` and ``enabled_mcp_tools`` are all populated) + and again from the tools-PATCH handler whenever the allowlist + changes. Keeping the result memoized means the independent-phase + branch of ``get_current_tools()`` returns the same Python list + object across turns, so the LLM prompt cache stays warm until + the user explicitly edits their allowlist. + """ + if self.enabled_mcp_tools is None: + self._filtered_independent_tools = list(self.independent_tools) + return + allowed = set(self.enabled_mcp_tools) + self._filtered_independent_tools = [ + t + for t in self.independent_tools + if t.name not in self.mcp_tool_names_all or t.name in allowed + ] + def get_current_tools(self) -> list: """Return tools for the current phase.""" if self.phase == "working": @@ -212,8 +249,14 @@ class QueenPhaseState: return list(self.reviewing_tools) if self.phase == "incubating": return list(self.incubating_tools) - # Default / "independent" — DM mode with full MCP tools. - return list(self.independent_tools) + # Default / "independent" — DM mode with full MCP tools, gated by + # the per-queen allowlist. Return the memoized list directly so the + # JSON sent to the LLM is byte-identical turn-to-turn. + if not self._filtered_independent_tools and self.independent_tools: + # Safety net: first-call in tests or code paths that skipped + # the explicit boot-time rebuild. + self.rebuild_independent_filter() + return self._filtered_independent_tools def get_current_prompt(self) -> str: """Return the system prompt for the current phase.""" diff --git a/core/frontend/src/App.tsx b/core/frontend/src/App.tsx index f85a2ea8..1f04e716 100644 --- a/core/frontend/src/App.tsx +++ b/core/frontend/src/App.tsx @@ -5,6 +5,7 @@ import ColonyChat from "./pages/colony-chat"; import QueenDM from "./pages/queen-dm"; import OrgChart from "./pages/org-chart"; import PromptLibrary from "./pages/prompt-library"; +import ToolLibrary from "./pages/tool-library"; import CredentialsPage from "./pages/credentials"; import NotFound from "./pages/not-found"; @@ -17,6 +18,7 @@ function App() { } /> } /> } /> + } /> } /> } /> diff --git a/core/frontend/src/api/colonies.ts b/core/frontend/src/api/colonies.ts new file mode 100644 index 00000000..ddff1d4e --- /dev/null +++ b/core/frontend/src/api/colonies.ts @@ -0,0 +1,50 @@ +import { api } from "./client"; +import type { ToolMeta, McpServerTools } from "./queens"; + +export interface ColonySummary { + name: string; + queen_name: string | null; + created_at: string | null; + has_allowlist: boolean; + enabled_count: number | null; +} + +export interface ColonyToolsResponse { + colony_name: string; + enabled_mcp_tools: string[] | null; + stale: boolean; + lifecycle: ToolMeta[]; + synthetic: ToolMeta[]; + mcp_servers: McpServerTools[]; +} + +export interface ColonyToolsUpdateResult { + colony_name: string; + enabled_mcp_tools: string[] | null; + refreshed_runtimes: number; + note?: string; +} + +export const coloniesApi = { + /** List every colony on disk with a summary of its tool allowlist. */ + list: () => + api.get<{ colonies: ColonySummary[] }>(`/colonies/tools-index`), + + /** Enumerate a colony's tool surface (lifecycle + synthetic + MCP). */ + getTools: (colonyName: string) => + api.get( + `/colony/${encodeURIComponent(colonyName)}/tools`, + ), + + /** Persist a colony's MCP tool allowlist. + * + * ``null`` resets to "allow every MCP tool". A list of names enables + * only those MCP tools. Changes take effect on the next worker spawn; + * in-flight workers keep their booted tool list. + */ + updateTools: (colonyName: string, enabled: string[] | null) => + api.patch( + `/colony/${encodeURIComponent(colonyName)}/tools`, + { enabled_mcp_tools: enabled }, + ), +}; diff --git a/core/frontend/src/api/mcp.ts b/core/frontend/src/api/mcp.ts new file mode 100644 index 00000000..57d6d6e1 --- /dev/null +++ b/core/frontend/src/api/mcp.ts @@ -0,0 +1,66 @@ +import { api } from "./client"; + +export type McpTransport = "stdio" | "http" | "sse" | "unix"; + +export interface McpServer { + name: string; + /** "local": added via UI/CLI (user-editable). "registry": installed from + * the remote MCP registry. "built-in": baked into the queen package — + * visible but not removable from the UI. */ + source: "local" | "registry" | "built-in"; + transport: McpTransport | string; + description: string; + enabled: boolean; + last_health_status: "healthy" | "unhealthy" | null; + last_error: string | null; + last_health_check_at: string | null; + tool_count: number | null; + /** Servers flagged removable:false cannot be deleted from the UI. */ + removable?: boolean; +} + +export interface AddMcpServerBody { + name: string; + transport: McpTransport; + /** stdio */ + command?: string; + args?: string[]; + env?: Record; + cwd?: string; + /** http / sse */ + url?: string; + headers?: Record; + /** unix */ + socket_path?: string; + description?: string; +} + +export interface McpHealthResult { + name: string; + status: "healthy" | "unhealthy" | "unknown"; + tools: number; + error: string | null; +} + +/** Backend MCPError shape when an operation fails. */ +export interface McpErrorBody { + error: string; + code?: string; + what?: string; + why?: string; + fix?: string; +} + +export const mcpApi = { + listServers: () => api.get<{ servers: McpServer[] }>("/mcp/servers"), + addServer: (body: AddMcpServerBody) => + api.post<{ server: McpServer; hint: string }>("/mcp/servers", body), + removeServer: (name: string) => + api.delete<{ removed: string }>(`/mcp/servers/${encodeURIComponent(name)}`), + setEnabled: (name: string, enabled: boolean) => + api.post<{ name: string; enabled: boolean }>( + `/mcp/servers/${encodeURIComponent(name)}/${enabled ? "enable" : "disable"}`, + ), + checkHealth: (name: string) => + api.post(`/mcp/servers/${encodeURIComponent(name)}/health`), +}; diff --git a/core/frontend/src/api/queens.ts b/core/frontend/src/api/queens.ts index 35e57dea..4bd345dd 100644 --- a/core/frontend/src/api/queens.ts +++ b/core/frontend/src/api/queens.ts @@ -16,6 +16,33 @@ export interface QueenSessionResult { status: "live" | "resumed" | "created"; } +export interface ToolMeta { + name: string; + description: string; + input_schema?: Record; + editable?: boolean; +} + +export interface McpServerTools { + name: string; + tools: Array; +} + +export interface QueenToolsResponse { + queen_id: string; + enabled_mcp_tools: string[] | null; + stale: boolean; + lifecycle: ToolMeta[]; + synthetic: ToolMeta[]; + mcp_servers: McpServerTools[]; +} + +export interface QueenToolsUpdateResult { + queen_id: string; + enabled_mcp_tools: string[] | null; + refreshed_sessions: number; +} + export const queensApi = { /** List all queen profiles (id, name, title). */ list: () => @@ -57,4 +84,19 @@ export const queensApi = { initial_prompt: initialPrompt, initial_phase: initialPhase || undefined, }), + + /** Enumerate the queen's tool surface (lifecycle + synthetic + MCP). */ + getTools: (queenId: string) => + api.get(`/queen/${queenId}/tools`), + + /** Persist the MCP tool allowlist for a queen. + * + * Pass ``null`` to reset to the default ("allow every MCP tool") or an + * explicit list to restrict the queen's tool surface. Lifecycle and + * synthetic tools are always enabled and cannot be listed here. + */ + updateTools: (queenId: string, enabled: string[] | null) => + api.patch(`/queen/${queenId}/tools`, { + enabled_mcp_tools: enabled, + }), }; diff --git a/core/frontend/src/components/ColonyToolsSection.tsx b/core/frontend/src/components/ColonyToolsSection.tsx new file mode 100644 index 00000000..a897c92e --- /dev/null +++ b/core/frontend/src/components/ColonyToolsSection.tsx @@ -0,0 +1,27 @@ +import { useCallback } from "react"; +import { coloniesApi } from "@/api/colonies"; +import ToolsEditor from "./ToolsEditor"; + +export default function ColonyToolsSection({ + colonyName, +}: { + colonyName: string; +}) { + const fetchSnapshot = useCallback( + () => coloniesApi.getTools(colonyName), + [colonyName], + ); + const saveAllowlist = useCallback( + (enabled: string[] | null) => coloniesApi.updateTools(colonyName, enabled), + [colonyName], + ); + return ( + + ); +} diff --git a/core/frontend/src/components/McpServersPanel.tsx b/core/frontend/src/components/McpServersPanel.tsx new file mode 100644 index 00000000..3a4eaf0f --- /dev/null +++ b/core/frontend/src/components/McpServersPanel.tsx @@ -0,0 +1,651 @@ +import { useEffect, useState } from "react"; +import { + Plus, + Trash2, + RefreshCw, + Loader2, + AlertCircle, + Check, + X, + Server, + CircleCheck, + CircleAlert, + CircleDashed, +} from "lucide-react"; +import { + mcpApi, + type McpServer, + type McpTransport, + type AddMcpServerBody, +} from "@/api/mcp"; + +type TransportKey = McpTransport; + +const TRANSPORT_OPTIONS: TransportKey[] = ["stdio", "http", "sse", "unix"]; + +function healthBadge(server: McpServer) { + if (!server.enabled) { + return ( + + Disabled + + ); + } + if (server.last_health_status === "healthy") { + return ( + + Healthy + + ); + } + if (server.last_health_status === "unhealthy") { + return ( + + Unhealthy + + ); + } + return ( + + Unknown + + ); +} + +interface AddFormState { + name: string; + transport: TransportKey; + command: string; + args: string; + env: string; + cwd: string; + url: string; + headers: string; + socketPath: string; + description: string; +} + +const EMPTY_FORM: AddFormState = { + name: "", + transport: "stdio", + command: "", + args: "", + env: "", + cwd: "", + url: "", + headers: "", + socketPath: "", + description: "", +}; + +function parseKeyValueLines(text: string): Record { + const out: Record = {}; + text + .split("\n") + .map((l) => l.trim()) + .filter(Boolean) + .forEach((line) => { + const eq = line.indexOf("="); + if (eq < 0) return; + const k = line.slice(0, eq).trim(); + const v = line.slice(eq + 1).trim(); + if (k) out[k] = v; + }); + return out; +} + +function buildAddBody(form: AddFormState): AddMcpServerBody { + const body: AddMcpServerBody = { + name: form.name.trim(), + transport: form.transport, + description: form.description.trim() || undefined, + }; + if (form.transport === "stdio") { + body.command = form.command.trim(); + const args = form.args + .split("\n") + .map((s) => s.trim()) + .filter(Boolean); + if (args.length) body.args = args; + const env = parseKeyValueLines(form.env); + if (Object.keys(env).length) body.env = env; + if (form.cwd.trim()) body.cwd = form.cwd.trim(); + } else if (form.transport === "http" || form.transport === "sse") { + body.url = form.url.trim(); + const headers = parseKeyValueLines(form.headers); + if (Object.keys(headers).length) body.headers = headers; + } else if (form.transport === "unix") { + body.socket_path = form.socketPath.trim(); + } + return body; +} + +export default function McpServersPanel() { + const [servers, setServers] = useState(null); + const [loading, setLoading] = useState(true); + const [error, setError] = useState(null); + + const [adding, setAdding] = useState(false); + const [form, setForm] = useState(EMPTY_FORM); + const [submitting, setSubmitting] = useState(false); + const [submitError, setSubmitError] = useState(null); + + const [busyByName, setBusyByName] = useState>({}); + + const refresh = async () => { + setLoading(true); + setError(null); + try { + const { servers } = await mcpApi.listServers(); + setServers(servers); + } catch (e: unknown) { + setError((e as Error)?.message || "Failed to load MCP servers"); + } finally { + setLoading(false); + } + }; + + useEffect(() => { + refresh(); + }, []); + + const setBusy = (name: string, v: boolean) => + setBusyByName((p) => ({ ...p, [name]: v })); + + const handleToggle = async (server: McpServer) => { + setBusy(server.name, true); + try { + await mcpApi.setEnabled(server.name, !server.enabled); + await refresh(); + } catch (e: unknown) { + setError((e as Error)?.message || "Toggle failed"); + } finally { + setBusy(server.name, false); + } + }; + + const handleRemove = async (server: McpServer) => { + if (!confirm(`Remove MCP server "${server.name}"?`)) return; + setBusy(server.name, true); + try { + await mcpApi.removeServer(server.name); + await refresh(); + } catch (e: unknown) { + const body = (e as { body?: { error?: string } }).body; + setError(body?.error || (e as Error)?.message || "Remove failed"); + } finally { + setBusy(server.name, false); + } + }; + + const handleHealth = async (server: McpServer) => { + setBusy(server.name, true); + try { + await mcpApi.checkHealth(server.name); + await refresh(); + } catch (e: unknown) { + setError((e as Error)?.message || "Health check failed"); + } finally { + setBusy(server.name, false); + } + }; + + const canSubmit = (() => { + if (!form.name.trim()) return false; + if (form.transport === "stdio") return !!form.command.trim(); + if (form.transport === "http" || form.transport === "sse") + return !!form.url.trim(); + if (form.transport === "unix") return !!form.socketPath.trim(); + return false; + })(); + + const handleSubmit = async () => { + if (!canSubmit) return; + setSubmitting(true); + setSubmitError(null); + try { + const body = buildAddBody(form); + const { server } = await mcpApi.addServer(body); + // Best-effort: auto-run health check so the UI shows tool count. + try { + await mcpApi.checkHealth(server.name); + } catch { + /* health check is informational; don't block the add flow */ + } + setAdding(false); + setForm(EMPTY_FORM); + await refresh(); + } catch (e: unknown) { + const body = (e as { body?: { error?: string; fix?: string } }).body; + setSubmitError( + [body?.error, body?.fix].filter(Boolean).join(" — ") || + (e as Error)?.message || + "Add failed", + ); + } finally { + setSubmitting(false); + } + }; + + // Group by origin. "local" = user-registered via the UI or CLI. Everything + // else (built-in package entries, registry-installed entries) sits under + // "Built-in" since the user can't remove them from the UI. + const builtIns = (servers || []).filter((s) => s.source !== "local"); + const custom = (servers || []).filter((s) => s.source === "local"); + + return ( +
+
+
+

MCP Servers

+

+ Register your own MCP servers so queens can use their tools. New + servers take effect in the next queen session you start. +

+
+
+ + +
+
+ + {error && ( +
+ + {error} + +
+ )} + + {loading && !servers && ( +
+ Loading MCP servers… +
+ )} + + {servers && ( + <> + {custom.length > 0 && ( +
+ {custom.map((s) => ( + handleToggle(s)} + onRemove={() => handleRemove(s)} + onHealth={() => handleHealth(s)} + isLocal + /> + ))} +
+ )} +
+ {builtIns.length === 0 ? ( +

+ No built-in servers registered. +

+ ) : ( + builtIns.map((s) => ( + handleToggle(s)} + onRemove={() => handleRemove(s)} + onHealth={() => handleHealth(s)} + /> + )) + )} +
+ + )} + + {/* Add MCP modal */} + {adding && ( +
+
!submitting && setAdding(false)} + /> +
+
+

+ Add MCP Server +

+ +
+ + + + setForm((f) => ({ + ...f, + name: e.target.value.toLowerCase().replace(/[^a-z0-9_-]/g, ""), + })) + } + placeholder="my-search-tool" + className={inputCls} + /> + + + +
+ {TRANSPORT_OPTIONS.map((t) => ( + + ))} +
+
+ + {form.transport === "stdio" && ( + <> + + + setForm((f) => ({ ...f, command: e.target.value })) + } + placeholder="uv" + className={inputCls} + /> + + +