feat: tool library

This commit is contained in:
Richard Tang
2026-04-21 17:20:54 -07:00
parent 80cd77ac30
commit 8a0ec070b8
24 changed files with 3873 additions and 4 deletions
+101
View File
@@ -0,0 +1,101 @@
"""Read/write helpers for per-colony metadata.json.
A colony's metadata.json lives at ``{COLONIES_DIR}/{colony_name}/metadata.json``
and holds both immutable provenance (the queen that created it, the forked
session id, timestamps) and a small number of mutable user-editable fields.
Today the only mutable field we surface through the UI is:
- ``enabled_mcp_tools: list[str] | null`` the per-colony MCP tool
allowlist. ``None`` means "allow every MCP tool" (default), so
existing colonies without the key keep their current behavior.
Keeping the read/write helpers in one place instead of scattering
``json.loads(metadata_path.read_text())`` across the server makes the
schema easy to evolve without chasing readers.
"""
from __future__ import annotations
import json
import logging
from pathlib import Path
from typing import Any
from framework.config import COLONIES_DIR
logger = logging.getLogger(__name__)
def colony_metadata_path(colony_name: str) -> Path:
"""Return the on-disk path to a colony's metadata.json."""
return COLONIES_DIR / colony_name / "metadata.json"
def load_colony_metadata(colony_name: str) -> dict[str, Any]:
"""Load metadata.json for ``colony_name``.
Returns an empty dict if the file is missing or malformed callers
are expected to treat missing fields as defaults.
"""
path = colony_metadata_path(colony_name)
if not path.exists():
return {}
try:
data = json.loads(path.read_text(encoding="utf-8"))
except (json.JSONDecodeError, OSError):
logger.warning("Failed to read colony metadata at %s", path)
return {}
return data if isinstance(data, dict) else {}
def update_colony_metadata(colony_name: str, updates: dict[str, Any]) -> dict[str, Any]:
"""Shallow-merge ``updates`` into metadata.json and persist.
Returns the full updated dict. Raises ``FileNotFoundError`` if the
colony does not exist. Writes atomically via ``os.replace`` to
minimize the window where a reader could see a half-written file.
"""
import os
import tempfile
path = colony_metadata_path(colony_name)
if not path.parent.exists():
raise FileNotFoundError(f"Colony '{colony_name}' not found")
data = load_colony_metadata(colony_name) if path.exists() else {}
for key, value in updates.items():
data[key] = value
path.parent.mkdir(parents=True, exist_ok=True)
fd, tmp_path = tempfile.mkstemp(
prefix=".metadata.",
suffix=".json.tmp",
dir=str(path.parent),
)
try:
with os.fdopen(fd, "w", encoding="utf-8") as fh:
json.dump(data, fh, indent=2)
fh.flush()
os.fsync(fh.fileno())
os.replace(tmp_path, path)
except BaseException:
try:
os.unlink(tmp_path)
except OSError:
pass
raise
return data
def list_colony_names() -> list[str]:
"""Return the names of every colony that has a metadata.json on disk."""
if not COLONIES_DIR.is_dir():
return []
names: list[str] = []
for entry in sorted(COLONIES_DIR.iterdir()):
if not entry.is_dir():
continue
if (entry / "metadata.json").exists():
names.append(entry.name)
return names
+60
View File
@@ -242,6 +242,19 @@ class ColonyRuntime:
self._tools = tools or []
self._tool_executor = tool_executor
# Per-colony MCP tool allowlist — applied when spawning workers. A
# value of ``None`` means "allow every MCP tool" (default), an empty
# list disables every MCP tool, and a list of names only enables
# those. Lifecycle / synthetic tools always pass through the filter
# because their names are absent from ``_mcp_tool_names_all``. The
# allowlist is re-read on every ``spawn`` so a PATCH that mutates
# this attribute via ``set_tool_allowlist`` takes effect on the
# NEXT worker spawn without a runtime restart. In-flight workers
# keep the tool list they booted with — workers have no dynamic
# tools provider today.
self._enabled_mcp_tools: list[str] | None = None
self._mcp_tool_names_all: set[str] = set()
# Worker management
self._workers: dict[str, Worker] = {}
# The persistent client-facing overseer (optional). Set by
@@ -384,6 +397,45 @@ class ColonyRuntime:
return PipelineRunner([])
return build_pipeline_from_config(stages_config)
# ── Per-colony tool allowlist ───────────────────────────────
def set_tool_allowlist(
self,
enabled_mcp_tools: list[str] | None,
mcp_tool_names_all: set[str] | None = None,
) -> None:
"""Configure the per-colony MCP tool allowlist.
Called at construction time (from SessionManager) and again from
the ``/api/colony/{name}/tools`` PATCH handler when a user edits
the allowlist. The change applies to the NEXT worker spawn we
never mutate the tool list of a worker that is already running
(workers have no dynamic tools provider, so hot-reloading their
tool set would diverge from the list the LLM was already using).
"""
self._enabled_mcp_tools = list(enabled_mcp_tools) if enabled_mcp_tools is not None else None
if mcp_tool_names_all is not None:
self._mcp_tool_names_all = set(mcp_tool_names_all)
def _apply_tool_allowlist(self, tools: list) -> list:
"""Filter ``tools`` against the colony's MCP allowlist.
Lifecycle / synthetic tools (those whose names are NOT in
``_mcp_tool_names_all``) are never gated. MCP tools are kept only
when ``_enabled_mcp_tools`` is None (default allow) or contains
their name. Input list order is preserved so downstream cache
keys and logs stay stable.
"""
if self._enabled_mcp_tools is None:
return tools
allowed = set(self._enabled_mcp_tools)
return [
t
for t in tools
if getattr(t, "name", None) not in self._mcp_tool_names_all
or getattr(t, "name", None) in allowed
]
# ── Lifecycle ───────────────────────────────────────────────
async def start(self) -> None:
@@ -658,6 +710,14 @@ class ColonyRuntime:
spawn_tools = tools if tools is not None else self._tools
spawn_executor = tool_executor or self._tool_executor
# Apply the per-colony MCP tool allowlist (if any). Done HERE —
# after spawn_tools is resolved but before it's frozen into the
# worker's AgentContext — so the next spawn reflects any PATCH
# that happened since the last spawn. A value of ``None`` on
# ``_enabled_mcp_tools`` is a no-op so the default path is
# unchanged.
spawn_tools = self._apply_tool_allowlist(spawn_tools)
# Colony progress tracker: when the caller supplied a db_path
# in input_data, this worker is part of a SQLite task queue
# and must see the hive.colony-progress-tracker skill body in
+6
View File
@@ -333,6 +333,9 @@ def create_app(model: str | None = None) -> web.Application:
from framework.server.routes_logs import register_routes as register_log_routes
from framework.server.routes_messages import register_routes as register_message_routes
from framework.server.routes_prompts import register_routes as register_prompt_routes
from framework.server.routes_colony_tools import register_routes as register_colony_tools_routes
from framework.server.routes_mcp import register_routes as register_mcp_routes
from framework.server.routes_queen_tools import register_routes as register_queen_tools_routes
from framework.server.routes_queens import register_routes as register_queen_routes
from framework.server.routes_sessions import register_routes as register_session_routes
from framework.server.routes_workers import register_routes as register_worker_routes
@@ -346,6 +349,9 @@ def create_app(model: str | None = None) -> web.Application:
register_worker_routes(app)
register_log_routes(app)
register_queen_routes(app)
register_queen_tools_routes(app)
register_colony_tools_routes(app)
register_mcp_routes(app)
register_colony_worker_routes(app)
register_prompt_routes(app)
+177
View File
@@ -253,6 +253,93 @@ async def materialize_queen_identity(
)
def build_queen_tool_registry_bare() -> tuple[Any, dict[str, list[dict[str, Any]]]]:
"""Build a Queen ``ToolRegistry`` and a (server_name → tools) catalog.
Used by the Tool Library GET route to populate the MCP tool surface
without needing a live queen session. We DO NOT register queen
lifecycle tools here (they require a Session stub); the catalog only
covers MCP-origin tools, which is what the allowlist gates.
Loading MCP servers spawns subprocesses, so call this once per
backend process and cache the result.
"""
from pathlib import Path
from framework.loader.mcp_registry import MCPRegistry
from framework.loader.tool_registry import ToolRegistry
import framework.agents.queen as _queen_pkg
queen_registry = ToolRegistry()
queen_pkg_dir = Path(_queen_pkg.__file__).parent
mcp_config = queen_pkg_dir / "mcp_servers.json"
if mcp_config.exists():
try:
queen_registry.load_mcp_config(mcp_config)
except Exception:
logger.warning("build_queen_tool_registry_bare: MCP config failed", exc_info=True)
try:
reg = MCPRegistry()
reg.initialize()
if (queen_pkg_dir / "mcp_registry.json").is_file():
queen_registry.set_mcp_registry_agent_path(queen_pkg_dir)
registry_configs, selection_max_tools = reg.load_agent_selection(queen_pkg_dir)
already = {cfg.get("name") for cfg in registry_configs if cfg.get("name")}
extra: list[str] = []
try:
for entry in reg.list_installed():
if entry.get("source") != "local":
continue
if not entry.get("enabled", True):
continue
name = entry.get("name")
if name and name not in already:
extra.append(name)
except Exception:
pass
if extra:
try:
extra_configs = reg.resolve_for_agent(include=extra)
registry_configs = list(registry_configs) + [
reg._server_config_to_dict(c) for c in extra_configs
]
except Exception:
logger.debug("build_queen_tool_registry_bare: resolve_for_agent(extra) failed", exc_info=True)
if registry_configs:
queen_registry.load_registry_servers(
registry_configs,
preserve_existing_tools=True,
log_collisions=False,
max_tools=selection_max_tools,
)
except Exception:
logger.warning("build_queen_tool_registry_bare: MCP registry load failed", exc_info=True)
# Build the catalog.
tools_by_name = queen_registry.get_tools()
server_map = dict(getattr(queen_registry, "_mcp_server_tools", {}) or {})
catalog: dict[str, list[dict[str, Any]]] = {}
for server_name in sorted(server_map):
entries: list[dict[str, Any]] = []
for tool_name in sorted(server_map[server_name]):
tool = tools_by_name.get(tool_name)
if tool is None:
continue
entries.append(
{
"name": tool.name,
"description": tool.description,
"input_schema": tool.parameters,
}
)
catalog[server_name] = entries
return queen_registry, catalog
async def create_queen(
session: Session,
session_manager: Any,
@@ -326,6 +413,45 @@ async def create_queen(
if (queen_pkg_dir / "mcp_registry.json").is_file():
queen_registry.set_mcp_registry_agent_path(queen_pkg_dir)
registry_configs, selection_max_tools = registry.load_agent_selection(queen_pkg_dir)
# Auto-include every user-added local MCP server that the repo
# selection hasn't already loaded. Users register servers via
# the `/api/mcp/servers` route (or `hive mcp add`); they live in
# ~/.hive/mcp_registry/installed.json with source == "local".
# New servers take effect on the next queen session start; the
# prompt cache and ToolRegistry are still loaded once per boot.
already_loaded_names = {cfg.get("name") for cfg in registry_configs if cfg.get("name")}
extra_names: list[str] = []
try:
for entry in registry.list_installed():
if entry.get("source") != "local":
continue
if not entry.get("enabled", True):
continue
name = entry.get("name")
if not name or name in already_loaded_names:
continue
extra_names.append(name)
except Exception:
logger.debug("Queen: list_installed() failed while auto-including user servers", exc_info=True)
if extra_names:
try:
extra_configs = registry.resolve_for_agent(include=extra_names)
extra_dicts = [registry._server_config_to_dict(c) for c in extra_configs]
registry_configs = list(registry_configs) + extra_dicts
logger.info(
"Queen: auto-including %d user-added MCP server(s): %s",
len(extra_dicts),
[c.get("name") for c in extra_dicts],
)
except Exception:
logger.warning(
"Queen: failed to resolve user-added MCP servers %s",
extra_names,
exc_info=True,
)
if registry_configs:
results = queen_registry.load_registry_servers(
registry_configs,
@@ -417,6 +543,57 @@ async def create_queen(
sorted(t.name for t in phase_state.incubating_tools),
)
# ---- Per-queen MCP tool allowlist --------------------------------
# Capture the set of MCP-origin tool names so the allowlist in
# ``QueenPhaseState`` only gates MCP tools (lifecycle and synthetic
# tools always pass through). Then apply the queen profile's stored
# allowlist (if any) and memoize the filtered independent tool list.
mcp_server_tools_map: dict[str, set[str]] = dict(
getattr(queen_registry, "_mcp_server_tools", {})
)
phase_state.mcp_tool_names_all = set().union(*mcp_server_tools_map.values()) if mcp_server_tools_map else set()
phase_state.enabled_mcp_tools = queen_profile.get("enabled_mcp_tools")
phase_state.rebuild_independent_filter()
if phase_state.enabled_mcp_tools is not None:
total_mcp = len(phase_state.mcp_tool_names_all)
allowed_mcp = len(set(phase_state.enabled_mcp_tools) & phase_state.mcp_tool_names_all)
logger.info(
"Queen: per-queen MCP allowlist active — %d of %d MCP tools enabled",
allowed_mcp,
total_mcp,
)
# ---- MCP tool catalog for the frontend ---------------------------
# Snapshot per-server tool metadata so the Queen Tools API can render
# the tool surface without spawning MCP subprocesses. Keyed by server
# name so the UI can group tools by origin. Updated every time a
# queen boots, so installing a new server and starting a new queen
# session refreshes the catalog.
mcp_tool_catalog: dict[str, list[dict[str, Any]]] = {}
tools_by_name = {t.name: t for t in queen_tools}
for server_name, tool_names in mcp_server_tools_map.items():
server_entries: list[dict[str, Any]] = []
for tool_name in sorted(tool_names):
tool = tools_by_name.get(tool_name)
if tool is None:
continue
server_entries.append(
{
"name": tool.name,
"description": tool.description,
"input_schema": tool.parameters,
}
)
mcp_tool_catalog[server_name] = server_entries
# All queens share one MCP registry, so the catalog is a manager-level
# fact; stash it on the SessionManager so the Queen Tools route can
# render the tool list even when no queen session is currently live.
if session_manager is not None:
try:
session_manager._mcp_tool_catalog = mcp_tool_catalog # type: ignore[attr-defined]
except Exception:
logger.debug("Queen: could not attach mcp_tool_catalog to manager", exc_info=True)
# ---- Global + queen-scoped memory ----------------------------------
global_dir, queen_mem_dir = initialize_memory_scopes(session, phase_state)
@@ -0,0 +1,329 @@
"""Per-colony MCP tool allowlist routes.
- GET /api/colony/{colony_name}/tools -- enumerate colony tool surface
- PATCH /api/colony/{colony_name}/tools -- set or clear the allowlist
A colony's tool set is inherited from the queen that forked it, so the
tool surface mirrors the queen's MCP servers. Lifecycle/synthetic tools
are included for display only. MCP tools are grouped by origin server
with per-tool ``enabled`` flags.
Semantics:
- ``enabled_mcp_tools: null`` allow every MCP tool (default).
- ``enabled_mcp_tools: []`` allow no MCP tools (only lifecycle /
synthetic pass through).
- ``enabled_mcp_tools: [...]`` only listed names pass.
The allowlist is persisted in ``~/.hive/colonies/{colony_name}/metadata.json``
and takes effect on the *next* worker spawn. In-flight workers keep the
tool list they booted with because workers have no dynamic tools
provider today mutating their tool set mid-turn would diverge from
the list the LLM is already using.
"""
from __future__ import annotations
import logging
from typing import Any
from aiohttp import web
from framework.host.colony_metadata import (
colony_metadata_path,
load_colony_metadata,
update_colony_metadata,
)
logger = logging.getLogger(__name__)
_SYNTHETIC_NAMES = {"ask_user"}
def _synthetic_entries() -> list[dict[str, Any]]:
try:
from framework.agent_loop.internals.synthetic_tools import build_ask_user_tool
tool = build_ask_user_tool()
return [
{
"name": tool.name,
"description": tool.description,
"editable": False,
}
]
except Exception:
return [
{
"name": "ask_user",
"description": "Pause and ask the user a structured question.",
"editable": False,
}
]
def _colony_runtimes_for_name(manager: Any, colony_name: str) -> list[Any]:
"""Return every live ColonyRuntime whose session is attached to ``colony_name``."""
sessions = getattr(manager, "_sessions", None) or {}
runtimes: list[Any] = []
for session in sessions.values():
if getattr(session, "colony_name", None) != colony_name:
continue
# Both ``session.colony`` (queen-side unified runtime) and
# ``session.colony_runtime`` (legacy worker runtime) may carry
# tools that need the allowlist applied. We update both.
for attr in ("colony", "colony_runtime"):
rt = getattr(session, attr, None)
if rt is not None and rt not in runtimes:
runtimes.append(rt)
return runtimes
async def _render_catalog(manager: Any, colony_name: str) -> dict[str, list[dict[str, Any]]]:
"""Build a per-server tool catalog for this colony.
All colonies inherit the queen's MCP surface, so we reuse the
manager-level ``_mcp_tool_catalog`` populated during queen boot.
"""
# If a live runtime exists and carries its own registry, prefer it —
# it's authoritative (reflects any post-queen-boot MCP additions).
for rt in _colony_runtimes_for_name(manager, colony_name):
tools = getattr(rt, "_tools", None)
if not tools:
continue
mcp_names = set(getattr(rt, "_mcp_tool_names_all", set()) or set())
if not mcp_names:
continue
catalog: dict[str, list[dict[str, Any]]] = {"(mcp)": []}
for tool in tools:
name = getattr(tool, "name", None)
if name in mcp_names:
catalog["(mcp)"].append(
{
"name": name,
"description": getattr(tool, "description", ""),
"input_schema": getattr(tool, "parameters", {}),
}
)
return catalog
# Otherwise fall back to the queen-level snapshot. Build it on demand
# (off the event loop) when empty so the Tool Library works before
# any queen has been started in this process.
cached = getattr(manager, "_mcp_tool_catalog", None)
if isinstance(cached, dict) and cached:
return cached
try:
import asyncio
from framework.server.queen_orchestrator import build_queen_tool_registry_bare
registry, built = await asyncio.to_thread(build_queen_tool_registry_bare)
if manager is not None:
manager._mcp_tool_catalog = built # type: ignore[attr-defined]
manager._bootstrap_tool_registry = registry # type: ignore[attr-defined]
return built
except Exception:
logger.warning("Colony tools: catalog bootstrap failed", exc_info=True)
return {}
def _lifecycle_entries_from_runtime(manager: Any, colony_name: str) -> list[dict[str, Any]]:
"""Non-MCP tools currently registered on the colony runtime (if any).
When no live runtime is available we fall back to the bootstrap
registry stashed on the manager by ``routes_queen_tools`` it
already has queen lifecycle tools registered, which are also the
lifecycle tools colonies inherit at spawn time.
"""
out: list[dict[str, Any]] = []
seen: set[str] = set()
def _push(name: str, description: str) -> None:
if not name or name in seen:
return
if name in _SYNTHETIC_NAMES:
return
seen.add(name)
out.append({"name": name, "description": description, "editable": False})
runtimes = _colony_runtimes_for_name(manager, colony_name)
if runtimes:
for rt in runtimes:
mcp_names = set(getattr(rt, "_mcp_tool_names_all", set()) or set())
for tool in getattr(rt, "_tools", []) or []:
name = getattr(tool, "name", None)
if name in mcp_names:
continue
_push(name, getattr(tool, "description", ""))
else:
# No live runtime — derive from the bootstrap registry.
from framework.server.routes_queen_tools import _lifecycle_entries_without_session
catalog = getattr(manager, "_mcp_tool_catalog", {}) or {}
mcp_names: set[str] = set()
for entries in catalog.values():
for entry in entries:
if entry.get("name"):
mcp_names.add(entry["name"])
out.extend(_lifecycle_entries_without_session(manager, mcp_names))
return out
return sorted(out, key=lambda e: e["name"])
def _render_servers(
catalog: dict[str, list[dict[str, Any]]],
enabled_mcp_tools: list[str] | None,
) -> list[dict[str, Any]]:
allowed: set[str] | None = None if enabled_mcp_tools is None else set(enabled_mcp_tools)
servers: list[dict[str, Any]] = []
for name in sorted(catalog):
tools = []
for entry in catalog[name]:
tool_name = entry.get("name")
tools.append(
{
"name": tool_name,
"description": entry.get("description", ""),
"input_schema": entry.get("input_schema", {}),
"enabled": True if allowed is None else tool_name in allowed,
}
)
servers.append({"name": name, "tools": tools})
return servers
async def handle_get_tools(request: web.Request) -> web.Response:
"""GET /api/colony/{colony_name}/tools."""
colony_name = request.match_info["colony_name"]
if not colony_metadata_path(colony_name).exists():
return web.json_response({"error": f"Colony '{colony_name}' not found"}, status=404)
manager = request.app.get("manager")
meta = load_colony_metadata(colony_name)
enabled = meta.get("enabled_mcp_tools")
if enabled is not None and not isinstance(enabled, list):
enabled = None
catalog = await _render_catalog(manager, colony_name)
stale = not catalog
return web.json_response(
{
"colony_name": colony_name,
"enabled_mcp_tools": enabled,
"stale": stale,
"lifecycle": _lifecycle_entries_from_runtime(manager, colony_name),
"synthetic": _synthetic_entries(),
"mcp_servers": _render_servers(catalog, enabled),
}
)
async def handle_patch_tools(request: web.Request) -> web.Response:
"""PATCH /api/colony/{colony_name}/tools."""
colony_name = request.match_info["colony_name"]
if not colony_metadata_path(colony_name).exists():
return web.json_response({"error": f"Colony '{colony_name}' not found"}, status=404)
try:
body = await request.json()
except Exception:
return web.json_response({"error": "Invalid JSON body"}, status=400)
if not isinstance(body, dict) or "enabled_mcp_tools" not in body:
return web.json_response(
{"error": "Body must be an object with an 'enabled_mcp_tools' field"},
status=400,
)
enabled = body["enabled_mcp_tools"]
if enabled is not None:
if not isinstance(enabled, list) or not all(isinstance(x, str) for x in enabled):
return web.json_response(
{"error": "'enabled_mcp_tools' must be null or a list of strings"},
status=400,
)
manager = request.app.get("manager")
# Validate names against the known MCP catalog — lifts the same
# typo-catching guarantee we already offer on queen tools.
catalog = await _render_catalog(manager, colony_name)
known: set[str] = {e.get("name") for entries in catalog.values() for e in entries if e.get("name")}
if enabled is not None and known:
unknown = sorted(set(enabled) - known)
if unknown:
return web.json_response(
{"error": "Unknown MCP tool name(s)", "unknown": unknown},
status=400,
)
# Persist — missing metadata.json already guarded by 404 above.
try:
update_colony_metadata(colony_name, {"enabled_mcp_tools": enabled})
except FileNotFoundError:
return web.json_response({"error": f"Colony '{colony_name}' not found"}, status=404)
# Update any live runtimes so the NEXT worker spawn reflects the change.
# We do NOT rebuild in-flight workers' tool lists (see module docstring).
refreshed = 0
for rt in _colony_runtimes_for_name(manager, colony_name):
setter = getattr(rt, "set_tool_allowlist", None)
if callable(setter):
try:
setter(enabled)
refreshed += 1
except Exception:
logger.debug(
"Colony tools: set_tool_allowlist failed on runtime for %s",
colony_name,
exc_info=True,
)
logger.info(
"Colony tools: colony=%s allowlist=%s refreshed_runtimes=%d",
colony_name,
"null" if enabled is None else f"{len(enabled)} tool(s)",
refreshed,
)
return web.json_response(
{
"colony_name": colony_name,
"enabled_mcp_tools": enabled,
"refreshed_runtimes": refreshed,
"note": "Changes apply to the next worker spawn. Running workers keep their booted tool list.",
}
)
async def handle_list_colonies(request: web.Request) -> web.Response:
"""GET /api/colonies — list colonies with their tool allowlist status.
Powers the Tool Library page's colony picker.
"""
from framework.host.colony_metadata import list_colony_names
colonies: list[dict[str, Any]] = []
for name in list_colony_names():
meta = load_colony_metadata(name)
allowlist = meta.get("enabled_mcp_tools")
if allowlist is not None and not isinstance(allowlist, list):
allowlist = None
colonies.append(
{
"name": name,
"queen_name": meta.get("queen_name"),
"created_at": meta.get("created_at"),
"has_allowlist": allowlist is not None,
"enabled_count": len(allowlist) if isinstance(allowlist, list) else None,
}
)
return web.json_response({"colonies": colonies})
def register_routes(app: web.Application) -> None:
"""Register per-colony tool routes."""
app.router.add_get("/api/colonies/tools-index", handle_list_colonies)
app.router.add_get("/api/colony/{colony_name}/tools", handle_get_tools)
app.router.add_patch("/api/colony/{colony_name}/tools", handle_patch_tools)
+291
View File
@@ -0,0 +1,291 @@
"""MCP server registration routes.
Thin HTTP wrapper around ``MCPRegistry`` so the frontend can add, remove,
enable, and health-check user-registered MCP servers. The CLI path
(``hive mcp add`` / ``hive mcp remove`` / etc.) is unchanged.
- GET /api/mcp/servers -- list installed servers
- POST /api/mcp/servers -- register a local server
- DELETE /api/mcp/servers/{name} -- remove a local server
- POST /api/mcp/servers/{name}/enable -- enable a server
- POST /api/mcp/servers/{name}/disable -- disable a server
- POST /api/mcp/servers/{name}/health -- probe server health
New servers take effect on the *next* queen session start. Existing live
queen sessions keep the tool list they booted with to avoid mid-turn
cache invalidation. The ``add`` response hints at this explicitly.
"""
from __future__ import annotations
import logging
from typing import Any
from aiohttp import web
from framework.loader.mcp_errors import MCPError
from framework.loader.mcp_registry import MCPRegistry
logger = logging.getLogger(__name__)
_VALID_TRANSPORTS = {"stdio", "http", "sse", "unix"}
def _registry() -> MCPRegistry:
# MCPRegistry is a thin wrapper around ~/.hive/mcp_registry/installed.json
# so instantiation is cheap — no need to cache on app["..."].
reg = MCPRegistry()
reg.initialize()
return reg
def _package_builtin_servers() -> list[dict[str, Any]]:
"""Return the package-baked queen MCP servers from ``queen/mcp_servers.json``.
Those servers are loaded directly by ``ToolRegistry.load_mcp_config``
at queen boot and never go through ``MCPRegistry.list_installed``,
so the raw registry view shows them as missing. Surface them here so
the Tool Library reflects what the queen actually talks to.
Entries carry ``source: "built-in"`` and are NOT removable / toggleable
editing them requires changing the repo file.
"""
import json
from pathlib import Path
import framework.agents.queen as _queen_pkg
path = Path(_queen_pkg.__file__).parent / "mcp_servers.json"
if not path.exists():
return []
try:
data = json.loads(path.read_text(encoding="utf-8"))
except (json.JSONDecodeError, OSError):
return []
out: list[dict[str, Any]] = []
for name, cfg in data.items():
if not isinstance(cfg, dict):
continue
out.append(
{
"name": name,
"source": "built-in",
"transport": cfg.get("transport", "stdio"),
"description": cfg.get("description", "") or "",
"enabled": True,
"last_health_status": None,
"last_error": None,
"last_health_check_at": None,
"tool_count": None,
"removable": False,
}
)
return out
def _server_to_summary(entry: dict[str, Any]) -> dict[str, Any]:
"""Shape an installed.json entry for API responses.
Strips the full manifest body (which can be large) but keeps the tool
list if the manifest already embeds one (happens for registry-installed
servers). Users with ``source: "local"`` only get a tool list after
running a health check.
"""
manifest = entry.get("manifest") or {}
tools = manifest.get("tools") if isinstance(manifest, dict) else None
if not isinstance(tools, list):
tools = None
return {
"name": entry.get("name"),
"source": entry.get("source"),
"transport": entry.get("transport"),
"description": (manifest.get("description") if isinstance(manifest, dict) else None) or "",
"enabled": entry.get("enabled", True),
"last_health_status": entry.get("last_health_status"),
"last_error": entry.get("last_error"),
"last_health_check_at": entry.get("last_health_check_at"),
"tool_count": (len(tools) if tools is not None else None),
}
def _mcp_error_response(exc: MCPError, *, default_status: int = 400) -> web.Response:
return web.json_response(
{
"error": exc.what,
"code": exc.code.value,
"what": exc.what,
"why": exc.why,
"fix": exc.fix,
},
status=default_status,
)
async def handle_list_servers(request: web.Request) -> web.Response:
"""GET /api/mcp/servers — list every server the queen actually uses.
Merges two sources:
- ``MCPRegistry.list_installed()`` servers registered via
``hive mcp add`` / the ``/api/mcp/servers`` POST route, stored in
``~/.hive/mcp_registry/installed.json``. These carry
``source: "local"`` (user-added) or ``source: "registry"``
(installed from the remote registry).
- Repo-baked queen servers from
``core/framework/agents/queen/mcp_servers.json``. These are loaded
directly by the queen's ``ToolRegistry`` at boot and never touch
``MCPRegistry``; we surface them here so the UI reflects what the
queen really talks to. They are not removable from the UI because
editing them requires changing the repo.
If a name collides between the two sources, the registry entry wins
because that's the one the user has customized.
"""
reg = _registry()
registry_entries = [_server_to_summary(e) for e in reg.list_installed()]
seen_names = {e.get("name") for e in registry_entries}
package_entries = [e for e in _package_builtin_servers() if e.get("name") not in seen_names]
servers = [*package_entries, *registry_entries]
return web.json_response({"servers": servers})
async def handle_add_server(request: web.Request) -> web.Response:
"""POST /api/mcp/servers — register a local MCP server.
Body mirrors ``MCPRegistry.add_local`` args:
::
{
"name": "my-tool",
"transport": "stdio" | "http" | "sse" | "unix",
"command": "...", "args": [...], "env": {...}, "cwd": "...",
"url": "...", "headers": {...},
"socket_path": "...",
"description": "..."
}
"""
try:
body = await request.json()
except Exception:
return web.json_response({"error": "Invalid JSON body"}, status=400)
if not isinstance(body, dict):
return web.json_response({"error": "Body must be a JSON object"}, status=400)
name = body.get("name")
transport = body.get("transport")
if not isinstance(name, str) or not name.strip():
return web.json_response({"error": "'name' is required"}, status=400)
if transport not in _VALID_TRANSPORTS:
return web.json_response(
{"error": f"'transport' must be one of {sorted(_VALID_TRANSPORTS)}"},
status=400,
)
reg = _registry()
try:
entry = reg.add_local(
name=name.strip(),
transport=transport,
command=body.get("command"),
args=body.get("args"),
env=body.get("env"),
cwd=body.get("cwd"),
url=body.get("url"),
headers=body.get("headers"),
socket_path=body.get("socket_path"),
description=body.get("description", ""),
)
except MCPError as exc:
status = 409 if "already exists" in exc.what else 400
return _mcp_error_response(exc, default_status=status)
except Exception as exc:
logger.exception("MCP add_local failed for %r", name)
return web.json_response({"error": str(exc)}, status=500)
summary = _server_to_summary({"name": name, **entry})
return web.json_response(
{
"server": summary,
"hint": "Start a new queen session to use this server's tools.",
},
status=201,
)
async def handle_remove_server(request: web.Request) -> web.Response:
"""DELETE /api/mcp/servers/{name} — remove a local server."""
name = request.match_info["name"]
reg = _registry()
existing = reg.get_server(name)
if existing is None:
return web.json_response({"error": f"Server '{name}' not installed"}, status=404)
if existing.get("source") != "local":
return web.json_response(
{
"error": f"Server '{name}' is a built-in; it cannot be removed from the UI.",
},
status=400,
)
try:
reg.remove(name)
except MCPError as exc:
return _mcp_error_response(exc, default_status=404)
return web.json_response({"removed": name})
async def handle_set_enabled(request: web.Request, *, enabled: bool) -> web.Response:
name = request.match_info["name"]
reg = _registry()
try:
if enabled:
reg.enable(name)
else:
reg.disable(name)
except MCPError as exc:
return _mcp_error_response(exc, default_status=404)
return web.json_response({"name": name, "enabled": enabled})
async def handle_enable(request: web.Request) -> web.Response:
"""POST /api/mcp/servers/{name}/enable."""
return await handle_set_enabled(request, enabled=True)
async def handle_disable(request: web.Request) -> web.Response:
"""POST /api/mcp/servers/{name}/disable."""
return await handle_set_enabled(request, enabled=False)
async def handle_health(request: web.Request) -> web.Response:
"""POST /api/mcp/servers/{name}/health — probe one server."""
name = request.match_info["name"]
reg = _registry()
try:
# MCPRegistry.health_check blocks on subprocess IO — run it off
# the event loop so the HTTP worker stays responsive.
import asyncio
result = await asyncio.to_thread(reg.health_check, name)
except MCPError as exc:
return _mcp_error_response(exc, default_status=404)
except Exception as exc:
logger.exception("MCP health_check failed for %r", name)
return web.json_response({"error": str(exc)}, status=500)
return web.json_response(result)
def register_routes(app: web.Application) -> None:
"""Register MCP server CRUD routes."""
app.router.add_get("/api/mcp/servers", handle_list_servers)
app.router.add_post("/api/mcp/servers", handle_add_server)
app.router.add_delete("/api/mcp/servers/{name}", handle_remove_server)
app.router.add_post("/api/mcp/servers/{name}/enable", handle_enable)
app.router.add_post("/api/mcp/servers/{name}/disable", handle_disable)
app.router.add_post("/api/mcp/servers/{name}/health", handle_health)
+424
View File
@@ -0,0 +1,424 @@
"""Per-queen MCP tool allowlist routes.
- GET /api/queen/{queen_id}/tools -- enumerate the queen's tool surface
- PATCH /api/queen/{queen_id}/tools -- set or clear the MCP tool allowlist
Lifecycle and synthetic tools (``ask_user``) are always part of the queen's
surface in INDEPENDENT mode and are returned with ``editable: false``. MCP
tools are grouped by origin server and carry per-tool ``enabled`` flags.
The allowlist is a persisted queen-profile field, ``enabled_mcp_tools``:
- ``null`` / missing -> "allow every MCP tool" (default, backward-compat)
- ``[]`` -> explicitly disable every MCP tool
- ``["foo", "bar"]`` -> only these MCP tools pass through to the LLM
Filtering happens in ``QueenPhaseState.rebuild_independent_filter`` so the
LLM prompt cache stays warm between saves.
"""
from __future__ import annotations
import logging
from typing import Any
from aiohttp import web
from framework.agents.queen.queen_profiles import (
ensure_default_queens,
load_queen_profile,
update_queen_profile,
)
logger = logging.getLogger(__name__)
_SYNTHETIC_NAMES = {"ask_user"}
async def _ensure_manager_catalog(manager: Any) -> dict[str, list[dict[str, Any]]]:
"""Return the cached MCP tool catalog, building it on first call.
``queen_orchestrator.create_queen`` populates ``_mcp_tool_catalog`` on
every queen boot. On a fresh backend process the user may open the
Tool Library before any queen session has started, so the catalog is
empty. In that case we build one from the shared MCP config; the
first call pays an MCP-subprocess-spawn cost, subsequent calls are
cache hits. The build runs off the event loop via asyncio.to_thread
so the HTTP worker stays responsive while MCP servers initialize.
"""
if manager is None:
return {}
catalog = getattr(manager, "_mcp_tool_catalog", None)
if isinstance(catalog, dict) and catalog:
return catalog
try:
import asyncio
from framework.server.queen_orchestrator import build_queen_tool_registry_bare
registry, built = await asyncio.to_thread(build_queen_tool_registry_bare)
manager._mcp_tool_catalog = built # type: ignore[attr-defined]
manager._bootstrap_tool_registry = registry # type: ignore[attr-defined]
return built
except Exception:
logger.warning("Tool catalog bootstrap failed", exc_info=True)
return {}
def _lifecycle_entries_without_session(
manager: Any,
mcp_names: set[str],
) -> list[dict[str, Any]]:
"""Derive lifecycle tool names from the registry even without a session.
We register queen lifecycle tools against a temporary registry using a
minimal stub, then subtract the MCP-origin set and the synthetic set.
The result matches what the queen sees at runtime (minus context-
specific variants).
"""
registry = getattr(manager, "_bootstrap_tool_registry", None)
# If the bootstrap registry exists but doesn't carry lifecycle tools
# yet, register them now.
if registry is not None and not getattr(registry, "_lifecycle_bootstrap_done", False):
try:
from types import SimpleNamespace
from framework.tools.queen_lifecycle_tools import register_queen_lifecycle_tools
stub_session = SimpleNamespace(
id="tool-library-bootstrap",
colony_runtime=None,
event_bus=None,
worker_path=None,
phase_state=None,
llm=None,
)
register_queen_lifecycle_tools(
registry,
session=stub_session,
session_id=stub_session.id,
session_manager=None,
manager_session_id=stub_session.id,
phase_state=None,
)
registry._lifecycle_bootstrap_done = True # type: ignore[attr-defined]
except Exception:
logger.debug("lifecycle bootstrap failed", exc_info=True)
if registry is None:
return []
out: list[dict[str, Any]] = []
for name, tool in sorted(registry.get_tools().items()):
if name in mcp_names or name in _SYNTHETIC_NAMES:
continue
out.append(
{
"name": tool.name,
"description": tool.description,
"editable": False,
}
)
return out
def _synthetic_entries() -> list[dict[str, Any]]:
"""Return display metadata for synthetic tools injected by the agent loop.
Kept behind a lazy import so test harnesses that don't wire the agent
loop can still hit this route without blowing up.
"""
try:
from framework.agent_loop.internals.synthetic_tools import build_ask_user_tool
tool = build_ask_user_tool()
return [
{
"name": tool.name,
"description": tool.description,
"editable": False,
}
]
except Exception:
return [
{
"name": "ask_user",
"description": "Pause and ask the user a structured question.",
"editable": False,
}
]
def _live_queen_session(manager: Any, queen_id: str) -> Any:
"""Return any live DM session owned by this queen, or ``None``."""
sessions = getattr(manager, "_sessions", None) or {}
for session in sessions.values():
if getattr(session, "queen_name", None) != queen_id:
continue
# Prefer DM (non-colony) sessions
if getattr(session, "colony_runtime", None) is None:
return session
return None
def _render_mcp_servers(
*,
mcp_tool_names_by_server: dict[str, list[dict[str, Any]]],
enabled_mcp_tools: list[str] | None,
) -> list[dict[str, Any]]:
"""Shape the mcp_tool_catalog entries for the API response."""
allowed: set[str] | None = None if enabled_mcp_tools is None else set(enabled_mcp_tools)
servers: list[dict[str, Any]] = []
for server_name in sorted(mcp_tool_names_by_server):
entries = mcp_tool_names_by_server[server_name]
tools = []
for entry in entries:
name = entry.get("name")
enabled = True if allowed is None else name in allowed
tools.append(
{
"name": name,
"description": entry.get("description", ""),
"input_schema": entry.get("input_schema", {}),
"enabled": enabled,
}
)
servers.append({"name": server_name, "tools": tools})
return servers
def _catalog_from_live_session(session: Any) -> dict[str, list[dict[str, Any]]]:
"""Rebuild a per-server tool catalog from a live queen session.
The session's registry is authoritative — this reflects any hot-added
MCP servers since the manager-level snapshot was cached.
"""
registry = getattr(session, "_queen_tool_registry", None)
if registry is None:
# session._queen_tools_by_name is a stash from create_queen; we
# only have registry via the tools list, so reconstruct from the
# phase state instead.
phase_state = getattr(session, "phase_state", None)
if phase_state is None:
return {}
mcp_names = getattr(phase_state, "mcp_tool_names_all", set()) or set()
independent_tools = getattr(phase_state, "independent_tools", []) or []
result: dict[str, list[dict[str, Any]]] = {"(unknown)": []}
for tool in independent_tools:
if tool.name not in mcp_names:
continue
result["(unknown)"].append(
{
"name": tool.name,
"description": tool.description,
"input_schema": tool.parameters,
}
)
return result if result["(unknown)"] else {}
server_map = getattr(registry, "_mcp_server_tools", {}) or {}
tools_by_name = {t.name: t for t in registry.get_tools().values()}
catalog: dict[str, list[dict[str, Any]]] = {}
for server_name, tool_names in server_map.items():
entries: list[dict[str, Any]] = []
for name in sorted(tool_names):
tool = tools_by_name.get(name)
if tool is None:
continue
entries.append(
{
"name": tool.name,
"description": tool.description,
"input_schema": tool.parameters,
}
)
catalog[server_name] = entries
return catalog
def _lifecycle_entries(
*,
session: Any,
mcp_tool_names_all: set[str],
) -> list[dict[str, Any]]:
"""Lifecycle tools = independent_tools minus MCP-origin minus synthetic.
We compute this from a live session when available so the list exactly
matches what the queen actually sees on her next turn.
"""
if session is None:
return []
phase_state = getattr(session, "phase_state", None)
if phase_state is None:
return []
result: list[dict[str, Any]] = []
for tool in getattr(phase_state, "independent_tools", []) or []:
if tool.name in mcp_tool_names_all:
continue
if tool.name in _SYNTHETIC_NAMES:
continue
result.append(
{
"name": tool.name,
"description": tool.description,
"editable": False,
}
)
return sorted(result, key=lambda x: x["name"])
async def handle_get_tools(request: web.Request) -> web.Response:
"""GET /api/queen/{queen_id}/tools — enumerate tool surface for the UI."""
queen_id = request.match_info["queen_id"]
ensure_default_queens()
try:
profile = load_queen_profile(queen_id)
except FileNotFoundError:
return web.json_response({"error": f"Queen '{queen_id}' not found"}, status=404)
manager = request.app.get("manager")
session = _live_queen_session(manager, queen_id) if manager is not None else None
# Prefer a live session's registry for freshness. Otherwise use (or
# build on demand) the manager-level catalog so the Tool Library works
# even before any queen has been started in this process.
if session is not None:
catalog = _catalog_from_live_session(session)
else:
catalog = await _ensure_manager_catalog(manager)
stale = not catalog
mcp_tool_names_all: set[str] = set()
for entries in catalog.values():
for entry in entries:
if entry.get("name"):
mcp_tool_names_all.add(entry["name"])
if session is not None:
lifecycle = _lifecycle_entries(
session=session,
mcp_tool_names_all=mcp_tool_names_all,
)
else:
lifecycle = _lifecycle_entries_without_session(manager, mcp_tool_names_all)
enabled_mcp_tools = profile.get("enabled_mcp_tools")
response = {
"queen_id": queen_id,
"enabled_mcp_tools": enabled_mcp_tools,
"stale": stale,
"lifecycle": lifecycle,
"synthetic": _synthetic_entries(),
"mcp_servers": _render_mcp_servers(
mcp_tool_names_by_server=catalog,
enabled_mcp_tools=enabled_mcp_tools,
),
}
return web.json_response(response)
async def handle_patch_tools(request: web.Request) -> web.Response:
"""PATCH /api/queen/{queen_id}/tools — persist the MCP tool allowlist.
Body: ``{"enabled_mcp_tools": null | string[]}``.
- ``null`` resets to "allow every MCP tool" (default).
- A list is validated against the known MCP catalog; unknown names
are rejected with 400 so the frontend catches typos.
"""
queen_id = request.match_info["queen_id"]
try:
body = await request.json()
except Exception:
return web.json_response({"error": "Invalid JSON body"}, status=400)
if not isinstance(body, dict) or "enabled_mcp_tools" not in body:
return web.json_response(
{"error": "Body must be an object with an 'enabled_mcp_tools' field"},
status=400,
)
enabled = body["enabled_mcp_tools"]
if enabled is not None:
if not isinstance(enabled, list) or not all(isinstance(x, str) for x in enabled):
return web.json_response(
{"error": "'enabled_mcp_tools' must be null or a list of strings"},
status=400,
)
ensure_default_queens()
try:
load_queen_profile(queen_id)
except FileNotFoundError:
return web.json_response({"error": f"Queen '{queen_id}' not found"}, status=404)
# Validate names against the known MCP tool catalog. We prefer a live
# session's registry for the most up-to-date set, then fall back to
# the manager-level snapshot (building it on demand if absent).
manager = request.app.get("manager")
session = _live_queen_session(manager, queen_id) if manager is not None else None
if session is not None:
catalog = _catalog_from_live_session(session)
else:
catalog = await _ensure_manager_catalog(manager)
known_names: set[str] = set()
for entries in catalog.values():
for entry in entries:
if entry.get("name"):
known_names.add(entry["name"])
if enabled is not None and known_names:
unknown = sorted(set(enabled) - known_names)
if unknown:
return web.json_response(
{"error": "Unknown MCP tool name(s)", "unknown": unknown},
status=400,
)
# Persist — we pass the raw value (``None`` → stored as YAML null).
updated = update_queen_profile(queen_id, {"enabled_mcp_tools": enabled})
# Hot-reload every live DM session for this queen. The filter memo is
# rebuilt so the very next turn sees the new allowlist without a
# session restart, and the prompt cache is invalidated exactly once.
refreshed = 0
sessions = getattr(manager, "_sessions", None) or {}
for sess in sessions.values():
if getattr(sess, "queen_name", None) != queen_id:
continue
phase_state = getattr(sess, "phase_state", None)
if phase_state is None:
continue
phase_state.enabled_mcp_tools = enabled
rebuild = getattr(phase_state, "rebuild_independent_filter", None)
if callable(rebuild):
try:
rebuild()
refreshed += 1
except Exception:
logger.debug(
"Queen tools: rebuild_independent_filter failed for session %s",
getattr(sess, "id", "?"),
exc_info=True,
)
logger.info(
"Queen tools: queen_id=%s allowlist=%s refreshed_sessions=%d",
queen_id,
"null" if enabled is None else f"{len(enabled)} tool(s)",
refreshed,
)
return web.json_response(
{
"queen_id": queen_id,
"enabled_mcp_tools": updated.get("enabled_mcp_tools"),
"refreshed_sessions": refreshed,
}
)
def register_routes(app: web.Application) -> None:
"""Register queen-tools routes."""
app.router.add_get("/api/queen/{queen_id}/tools", handle_get_tools)
app.router.add_patch("/api/queen/{queen_id}/tools", handle_patch_tools)
+32
View File
@@ -1518,6 +1518,38 @@ class SessionManager:
colony_id=session.id,
pipeline_stages=[], # queen pipeline runs in queen_orchestrator, not here
)
# Per-colony tool allowlist, loaded from the colony's metadata.json
# when this session is attached to a real forked colony. For pure
# queen DM sessions (session.colony_name is None) we only capture
# the MCP-origin set — the allowlist stays ``None`` so every MCP
# tool passes through by default.
try:
mcp_tool_names_all: set[str] = set()
mgr_catalog = getattr(self, "_mcp_tool_catalog", None)
if isinstance(mgr_catalog, dict):
for entries in mgr_catalog.values():
for entry in entries:
name = entry.get("name") if isinstance(entry, dict) else None
if name:
mcp_tool_names_all.add(name)
enabled_mcp_tools: list[str] | None = None
colony_name = getattr(session, "colony_name", None)
if colony_name:
from framework.host.colony_metadata import load_colony_metadata
colony_meta = load_colony_metadata(colony_name)
raw = colony_meta.get("enabled_mcp_tools")
if raw is None or isinstance(raw, list):
enabled_mcp_tools = raw
colony.set_tool_allowlist(enabled_mcp_tools, mcp_tool_names_all)
except Exception:
logger.debug(
"Colony allowlist bootstrap failed for session %s",
session.id,
exc_info=True,
)
await colony.start()
session.colony = colony
@@ -0,0 +1,244 @@
"""Tests for the per-colony MCP tool allowlist filter + routes.
Covers:
1. ``ColonyRuntime`` filter semantics (default-allow, allowlist, empty,
lifecycle passes through).
2. routes_colony_tools round trip (GET/PATCH, validation, 404).
3. Colony index route for the Tool Library picker.
Routes never touch the real ``~/.hive/colonies`` tree we redirect
``COLONIES_DIR`` into ``tmp_path`` via monkeypatch.
"""
from __future__ import annotations
import json
from dataclasses import dataclass, field
from typing import Any
import pytest
from aiohttp import web
from aiohttp.test_utils import TestClient, TestServer
from framework.host.colony_runtime import ColonyRuntime
from framework.llm.provider import Tool
from framework.server import routes_colony_tools
def _tool(name: str) -> Tool:
return Tool(name=name, description=f"desc of {name}", parameters={"type": "object"})
# ---------------------------------------------------------------------------
# ColonyRuntime filter unit tests
# ---------------------------------------------------------------------------
def _bare_runtime() -> ColonyRuntime:
rt = ColonyRuntime.__new__(ColonyRuntime)
rt._enabled_mcp_tools = None
rt._mcp_tool_names_all = set()
return rt
class TestColonyFilter:
def test_default_is_noop(self):
rt = _bare_runtime()
tools = [_tool("mcp_a"), _tool("lc_b")]
assert rt._apply_tool_allowlist(tools) == tools
def test_allowlist_gates_mcp_only(self):
rt = _bare_runtime()
rt._mcp_tool_names_all = {"mcp_a", "mcp_b"}
rt._enabled_mcp_tools = ["mcp_a"]
tools = [_tool("mcp_a"), _tool("mcp_b"), _tool("lc_c")]
names = [t.name for t in rt._apply_tool_allowlist(tools)]
assert names == ["mcp_a", "lc_c"]
def test_empty_allowlist_keeps_lifecycle(self):
rt = _bare_runtime()
rt._mcp_tool_names_all = {"mcp_a", "mcp_b"}
rt._enabled_mcp_tools = []
tools = [_tool("mcp_a"), _tool("mcp_b"), _tool("lc_c")]
names = [t.name for t in rt._apply_tool_allowlist(tools)]
assert names == ["lc_c"]
def test_setter_mutates_live_state(self):
rt = _bare_runtime()
rt.set_tool_allowlist(["x"], {"x", "y"})
assert rt._enabled_mcp_tools == ["x"]
assert rt._mcp_tool_names_all == {"x", "y"}
# Passing None on allowlist clears gating; mcp_tool_names_all
# defaults to "keep current" so a subsequent caller doesn't need
# to repeat the set.
rt.set_tool_allowlist(None)
assert rt._enabled_mcp_tools is None
assert rt._mcp_tool_names_all == {"x", "y"}
# ---------------------------------------------------------------------------
# Route round-trip tests
# ---------------------------------------------------------------------------
@dataclass
class _FakeSession:
colony_name: str
colony: Any = None
colony_runtime: Any = None
id: str = "sess-1"
@dataclass
class _FakeManager:
_sessions: dict = field(default_factory=dict)
_mcp_tool_catalog: dict = field(default_factory=dict)
@pytest.fixture
def colony_dir(tmp_path, monkeypatch):
"""Point COLONIES_DIR into a tmp tree and seed a colony."""
colonies = tmp_path / "colonies"
colonies.mkdir()
monkeypatch.setattr("framework.host.colony_metadata.COLONIES_DIR", colonies)
name = "my_colony"
cdir = colonies / name
cdir.mkdir()
(cdir / "metadata.json").write_text(
json.dumps(
{
"colony_name": name,
"queen_name": "queen_technology",
"created_at": "2026-04-20T00:00:00+00:00",
}
)
)
return colonies, name
async def _app(manager: _FakeManager) -> web.Application:
app = web.Application()
app["manager"] = manager
routes_colony_tools.register_routes(app)
return app
@pytest.mark.asyncio
async def test_get_tools_default_allow(colony_dir):
_, name = colony_dir
manager = _FakeManager(
_mcp_tool_catalog={
"coder-tools": [
{"name": "read_file", "description": "read", "input_schema": {}},
{"name": "write_file", "description": "write", "input_schema": {}},
],
}
)
app = await _app(manager)
async with TestClient(TestServer(app)) as client:
resp = await client.get(f"/api/colony/{name}/tools")
assert resp.status == 200
body = await resp.json()
assert body["enabled_mcp_tools"] is None
assert body["stale"] is False
tools = {t["name"]: t for t in body["mcp_servers"][0]["tools"]}
assert all(t["enabled"] for t in tools.values())
@pytest.mark.asyncio
async def test_patch_persists_and_validates(colony_dir):
colonies_dir, name = colony_dir
manager = _FakeManager(
_mcp_tool_catalog={
"coder-tools": [
{"name": "read_file", "description": "", "input_schema": {}},
{"name": "write_file", "description": "", "input_schema": {}},
]
}
)
app = await _app(manager)
async with TestClient(TestServer(app)) as client:
resp = await client.patch(
f"/api/colony/{name}/tools", json={"enabled_mcp_tools": ["read_file"]}
)
assert resp.status == 200
body = await resp.json()
assert body["enabled_mcp_tools"] == ["read_file"]
# Persisted to metadata.json
raw = json.loads((colonies_dir / name / "metadata.json").read_text())
assert raw["enabled_mcp_tools"] == ["read_file"]
# GET reflects the allowlist
resp = await client.get(f"/api/colony/{name}/tools")
body = await resp.json()
tools = {t["name"]: t for t in body["mcp_servers"][0]["tools"]}
assert tools["read_file"]["enabled"] is True
assert tools["write_file"]["enabled"] is False
# Unknown → 400
resp = await client.patch(
f"/api/colony/{name}/tools", json={"enabled_mcp_tools": ["ghost"]}
)
assert resp.status == 400
assert "ghost" in (await resp.json()).get("unknown", [])
@pytest.mark.asyncio
async def test_patch_refreshes_live_runtime(colony_dir):
_, name = colony_dir
rt = _bare_runtime()
rt._mcp_tool_names_all = {"read_file", "write_file"}
rt.set_tool_allowlist(None)
session = _FakeSession(colony_name=name, colony=rt)
manager = _FakeManager(
_sessions={session.id: session},
_mcp_tool_catalog={
"coder-tools": [
{"name": "read_file", "description": "", "input_schema": {}},
{"name": "write_file", "description": "", "input_schema": {}},
]
},
)
app = await _app(manager)
async with TestClient(TestServer(app)) as client:
resp = await client.patch(
f"/api/colony/{name}/tools", json={"enabled_mcp_tools": ["read_file"]}
)
assert resp.status == 200
body = await resp.json()
assert body["refreshed_runtimes"] == 1
assert rt._enabled_mcp_tools == ["read_file"]
@pytest.mark.asyncio
async def test_404_for_unknown_colony(colony_dir):
manager = _FakeManager()
app = await _app(manager)
async with TestClient(TestServer(app)) as client:
resp = await client.get("/api/colony/unknown/tools")
assert resp.status == 404
resp = await client.patch(
"/api/colony/unknown/tools", json={"enabled_mcp_tools": None}
)
assert resp.status == 404
@pytest.mark.asyncio
async def test_tools_index_lists_colonies(colony_dir):
_, name = colony_dir
manager = _FakeManager()
app = await _app(manager)
async with TestClient(TestServer(app)) as client:
resp = await client.get("/api/colonies/tools-index")
assert resp.status == 200
body = await resp.json()
entries = {c["name"]: c for c in body["colonies"]}
assert name in entries
assert entries[name]["queen_name"] == "queen_technology"
assert entries[name]["has_allowlist"] is False
@@ -0,0 +1,240 @@
"""Tests for the MCP server CRUD HTTP routes.
Monkey-patches ``MCPRegistry`` inside ``routes_mcp`` so the HTTP layer is
exercised without reading or writing ``~/.hive/mcp_registry/installed.json``
or spawning actual subprocesses.
"""
from __future__ import annotations
from typing import Any
from unittest.mock import MagicMock
import pytest
from aiohttp import web
from aiohttp.test_utils import TestClient, TestServer
from framework.loader.mcp_errors import MCPError, MCPErrorCode
from framework.server import routes_mcp
class _FakeRegistry:
"""Stand-in for MCPRegistry — just enough surface for the routes."""
def __init__(self) -> None:
self._servers: dict[str, dict[str, Any]] = {
"built-in-seed": {
"source": "registry",
"transport": "stdio",
"enabled": True,
"manifest": {"description": "Factory-seeded server", "tools": []},
"last_health_status": "healthy",
"last_error": None,
"last_health_check_at": None,
}
}
def initialize(self) -> None: # noqa: D401 — registry idempotent init
return
def list_installed(self) -> list[dict[str, Any]]:
return [{"name": name, **entry} for name, entry in self._servers.items()]
def get_server(self, name: str) -> dict | None:
if name not in self._servers:
return None
return {"name": name, **self._servers[name]}
def add_local(self, *, name: str, transport: str, **kwargs: Any) -> dict:
if name in self._servers:
raise MCPError(
code=MCPErrorCode.MCP_INSTALL_FAILED,
what=f"Server '{name}' already exists",
why="A server with this name is already registered locally.",
fix=f"Run: hive mcp remove {name}",
)
entry = {
"source": "local",
"transport": transport,
"enabled": True,
"manifest": {"description": kwargs.get("description") or ""},
"last_health_status": None,
"last_error": None,
"last_health_check_at": None,
}
self._servers[name] = entry
return entry
def remove(self, name: str) -> None:
if name not in self._servers:
raise MCPError(
code=MCPErrorCode.MCP_INSTALL_FAILED,
what=f"Cannot remove server '{name}'",
why="Server is not installed.",
fix="Run: hive mcp list",
)
del self._servers[name]
def enable(self, name: str) -> None:
if name not in self._servers:
raise MCPError(
code=MCPErrorCode.MCP_INSTALL_FAILED,
what="not found",
why="not found",
fix="x",
)
self._servers[name]["enabled"] = True
def disable(self, name: str) -> None:
if name not in self._servers:
raise MCPError(
code=MCPErrorCode.MCP_INSTALL_FAILED,
what="not found",
why="not found",
fix="x",
)
self._servers[name]["enabled"] = False
def health_check(self, name: str) -> dict[str, Any]:
if name not in self._servers:
raise MCPError(
code=MCPErrorCode.MCP_HEALTH_FAILED,
what="not found",
why="not found",
fix="x",
)
return {"name": name, "status": "healthy", "tools": 3, "error": None}
@pytest.fixture
def registry(monkeypatch):
reg = _FakeRegistry()
monkeypatch.setattr(routes_mcp, "_registry", lambda: reg)
return reg
async def _make_app() -> web.Application:
app = web.Application()
routes_mcp.register_routes(app)
return app
@pytest.mark.asyncio
async def test_list_servers_returns_built_in(registry):
app = await _make_app()
async with TestClient(TestServer(app)) as client:
resp = await client.get("/api/mcp/servers")
assert resp.status == 200
body = await resp.json()
names = {s["name"] for s in body["servers"]}
# The registry fake carries one entry; the list also merges package-
# baked entries from core/framework/agents/queen/mcp_servers.json so
# the UI matches what the queen actually loads. Both should appear.
assert "built-in-seed" in names
sources = {s["name"]: s["source"] for s in body["servers"]}
assert sources.get("built-in-seed") == "registry"
# The package-baked servers (coder-tools/gcu-tools/hive_tools) carry
# source=="built-in" and are non-removable.
pkg_entries = [s for s in body["servers"] if s["source"] == "built-in"]
assert pkg_entries, "expected at least one package-baked MCP server"
assert all(s.get("removable") is False for s in pkg_entries)
@pytest.mark.asyncio
async def test_add_local_server(registry):
app = await _make_app()
async with TestClient(TestServer(app)) as client:
resp = await client.post(
"/api/mcp/servers",
json={
"name": "my-tool",
"transport": "stdio",
"command": "echo",
"args": ["hi"],
"description": "says hi",
},
)
assert resp.status == 201
body = await resp.json()
assert body["server"]["name"] == "my-tool"
assert body["server"]["source"] == "local"
resp = await client.get("/api/mcp/servers")
names = [s["name"] for s in (await resp.json())["servers"]]
assert "my-tool" in names
@pytest.mark.asyncio
async def test_add_rejects_duplicate(registry):
app = await _make_app()
async with TestClient(TestServer(app)) as client:
for _ in range(2):
resp = await client.post(
"/api/mcp/servers",
json={"name": "dup", "transport": "stdio", "command": "x"},
)
assert resp.status == 409
body = await resp.json()
assert "already exists" in body["error"].lower()
assert body["fix"]
@pytest.mark.asyncio
async def test_add_rejects_invalid_transport(registry):
app = await _make_app()
async with TestClient(TestServer(app)) as client:
resp = await client.post(
"/api/mcp/servers",
json={"name": "x", "transport": "nope"},
)
assert resp.status == 400
@pytest.mark.asyncio
async def test_enable_disable_cycle(registry):
app = await _make_app()
# Seed a local server
registry.add_local(name="local-one", transport="stdio", command="x")
async with TestClient(TestServer(app)) as client:
resp = await client.post("/api/mcp/servers/local-one/disable")
assert resp.status == 200
assert (await resp.json())["enabled"] is False
assert registry._servers["local-one"]["enabled"] is False
resp = await client.post("/api/mcp/servers/local-one/enable")
assert resp.status == 200
assert (await resp.json())["enabled"] is True
@pytest.mark.asyncio
async def test_remove_local_only(registry):
app = await _make_app()
registry.add_local(name="local-two", transport="stdio", command="x")
async with TestClient(TestServer(app)) as client:
# Built-ins are protected
resp = await client.delete("/api/mcp/servers/built-in-seed")
assert resp.status == 400
# Missing
resp = await client.delete("/api/mcp/servers/ghost")
assert resp.status == 404
# Happy path
resp = await client.delete("/api/mcp/servers/local-two")
assert resp.status == 200
assert "local-two" not in registry._servers
@pytest.mark.asyncio
async def test_health_check(registry, monkeypatch):
app = await _make_app()
registry.add_local(name="pingable", transport="stdio", command="x")
async with TestClient(TestServer(app)) as client:
resp = await client.post("/api/mcp/servers/pingable/health")
assert resp.status == 200
body = await resp.json()
assert body["status"] == "healthy"
assert body["tools"] == 3
@@ -0,0 +1,297 @@
"""Tests for the per-queen MCP tool allowlist filter + routes.
Covers:
1. QueenPhaseState filter semantics (default-allow, allowlist, empty, phase-
isolation, memo identity for LLM prompt-cache stability).
2. routes_queen_tools round trip (GET, PATCH, validation, live-session
hot-reload).
Route tests monkey-patch a tiny queen profile + manager catalog; they never
spawn an MCP subprocess.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any
from unittest.mock import MagicMock
import pytest
import yaml
from aiohttp import web
from aiohttp.test_utils import TestClient, TestServer
from framework.llm.provider import Tool
from framework.server import routes_queen_tools
from framework.tools.queen_lifecycle_tools import QueenPhaseState
# ---------------------------------------------------------------------------
# QueenPhaseState filter — pure unit tests
# ---------------------------------------------------------------------------
def _tool(name: str) -> Tool:
return Tool(name=name, description=f"desc of {name}", parameters={"type": "object"})
class TestPhaseStateFilter:
def test_default_allow_returns_every_tool(self):
ps = QueenPhaseState(phase="independent")
ps.independent_tools = [_tool("mcp_a"), _tool("mcp_b"), _tool("lc_c")]
ps.mcp_tool_names_all = {"mcp_a", "mcp_b"}
ps.enabled_mcp_tools = None
ps.rebuild_independent_filter()
names = [t.name for t in ps.get_current_tools()]
assert names == ["mcp_a", "mcp_b", "lc_c"]
def test_allowlist_keeps_listed_mcp_plus_all_lifecycle(self):
ps = QueenPhaseState(phase="independent")
ps.independent_tools = [_tool("mcp_a"), _tool("mcp_b"), _tool("lc_c")]
ps.mcp_tool_names_all = {"mcp_a", "mcp_b"}
ps.enabled_mcp_tools = ["mcp_a"]
ps.rebuild_independent_filter()
names = [t.name for t in ps.get_current_tools()]
assert names == ["mcp_a", "lc_c"]
def test_empty_allowlist_keeps_only_lifecycle(self):
ps = QueenPhaseState(phase="independent")
ps.independent_tools = [_tool("mcp_a"), _tool("mcp_b"), _tool("lc_c")]
ps.mcp_tool_names_all = {"mcp_a", "mcp_b"}
ps.enabled_mcp_tools = []
ps.rebuild_independent_filter()
names = [t.name for t in ps.get_current_tools()]
assert names == ["lc_c"]
def test_filter_isolated_to_independent_phase(self):
ps = QueenPhaseState(phase="independent")
ps.independent_tools = [_tool("mcp_a"), _tool("lc_c")]
ps.working_tools = [_tool("mcp_a"), _tool("lc_c")]
ps.mcp_tool_names_all = {"mcp_a"}
ps.enabled_mcp_tools = []
ps.rebuild_independent_filter()
# Independent → filtered
assert [t.name for t in ps.get_current_tools()] == ["lc_c"]
# Other phases → unaffected
ps.phase = "working"
assert [t.name for t in ps.get_current_tools()] == ["mcp_a", "lc_c"]
def test_memo_returns_stable_identity_for_prompt_cache(self):
"""Same Python list object across turns → LLM prompt cache stays warm."""
ps = QueenPhaseState(phase="independent")
ps.independent_tools = [_tool("mcp_a"), _tool("lc_c")]
ps.mcp_tool_names_all = {"mcp_a"}
ps.enabled_mcp_tools = None
ps.rebuild_independent_filter()
first = ps.get_current_tools()
second = ps.get_current_tools()
assert first is second, "memoized list must be the same object across turns"
# A rebuild should produce a different object so downstream caches
# correctly invalidate.
ps.enabled_mcp_tools = ["mcp_a"]
ps.rebuild_independent_filter()
third = ps.get_current_tools()
assert third is not first
assert [t.name for t in third] == ["mcp_a", "lc_c"]
# ---------------------------------------------------------------------------
# Route round-trip tests
# ---------------------------------------------------------------------------
@dataclass
class _FakeSession:
queen_name: str
phase_state: QueenPhaseState
colony_runtime: Any = None
id: str = "sess-1"
_queen_tool_registry: Any = None
@dataclass
class _FakeManager:
_sessions: dict = field(default_factory=dict)
_mcp_tool_catalog: dict = field(default_factory=dict)
@pytest.fixture
def queen_dir(tmp_path, monkeypatch):
"""Redirect queen profile storage into a tmp dir."""
queens_dir = tmp_path / "queens"
queens_dir.mkdir()
monkeypatch.setattr("framework.agents.queen.queen_profiles.QUEENS_DIR", queens_dir)
queen_id = "queen_technology"
(queens_dir / queen_id).mkdir()
(queens_dir / queen_id / "profile.yaml").write_text(
yaml.safe_dump({"name": "Alexandra", "title": "Head of Technology"})
)
return queens_dir, queen_id
async def _make_app(*, manager: _FakeManager) -> web.Application:
app = web.Application()
app["manager"] = manager
routes_queen_tools.register_routes(app)
return app
@pytest.mark.asyncio
async def test_get_tools_default_allows_everything(queen_dir, monkeypatch):
# Skip ensure_default_queens; our tmp profile is enough.
monkeypatch.setattr(routes_queen_tools, "ensure_default_queens", lambda: None)
_, queen_id = queen_dir
manager = _FakeManager()
manager._mcp_tool_catalog = {
"coder-tools": [
{"name": "read_file", "description": "read", "input_schema": {}},
{"name": "write_file", "description": "write", "input_schema": {}},
],
}
app = await _make_app(manager=manager)
async with TestClient(TestServer(app)) as client:
resp = await client.get(f"/api/queen/{queen_id}/tools")
assert resp.status == 200
body = await resp.json()
assert body["enabled_mcp_tools"] is None
assert body["stale"] is False
servers = {s["name"]: s for s in body["mcp_servers"]}
assert set(servers) == {"coder-tools"}
# Default-allow → every tool reports enabled=True
for tool in servers["coder-tools"]["tools"]:
assert tool["enabled"] is True
@pytest.mark.asyncio
async def test_patch_persists_and_validates(queen_dir, monkeypatch):
monkeypatch.setattr(routes_queen_tools, "ensure_default_queens", lambda: None)
queens_dir, queen_id = queen_dir
manager = _FakeManager()
manager._mcp_tool_catalog = {
"coder-tools": [
{"name": "read_file", "description": "", "input_schema": {}},
{"name": "write_file", "description": "", "input_schema": {}},
]
}
app = await _make_app(manager=manager)
async with TestClient(TestServer(app)) as client:
# Happy path
resp = await client.patch(
f"/api/queen/{queen_id}/tools",
json={"enabled_mcp_tools": ["read_file"]},
)
assert resp.status == 200
body = await resp.json()
assert body["enabled_mcp_tools"] == ["read_file"]
# Profile persisted
raw = yaml.safe_load((queens_dir / queen_id / "profile.yaml").read_text())
assert raw["enabled_mcp_tools"] == ["read_file"]
# GET reflects the new state
resp = await client.get(f"/api/queen/{queen_id}/tools")
body = await resp.json()
servers = {t["name"]: t for t in body["mcp_servers"][0]["tools"]}
assert servers["read_file"]["enabled"] is True
assert servers["write_file"]["enabled"] is False
# Null resets
resp = await client.patch(
f"/api/queen/{queen_id}/tools", json={"enabled_mcp_tools": None}
)
assert resp.status == 200
body = await resp.json()
assert body["enabled_mcp_tools"] is None
# Unknown tool name → 400; profile unchanged
resp = await client.patch(
f"/api/queen/{queen_id}/tools",
json={"enabled_mcp_tools": ["nope_not_a_tool"]},
)
assert resp.status == 400
detail = await resp.json()
assert "nope_not_a_tool" in detail.get("unknown", [])
raw = yaml.safe_load((queens_dir / queen_id / "profile.yaml").read_text())
# Still cleared from the previous successful null-reset
assert raw["enabled_mcp_tools"] is None
@pytest.mark.asyncio
async def test_patch_hot_reloads_live_session(queen_dir, monkeypatch):
monkeypatch.setattr(routes_queen_tools, "ensure_default_queens", lambda: None)
_, queen_id = queen_dir
# Build a fake live session whose phase state carries a tool list the
# filter can gate. We also need a fake registry so
# _catalog_from_live_session can enumerate tools.
class _FakeRegistry:
def __init__(self, server_map, tools_by_name):
self._mcp_server_tools = server_map
self._tools_by_name = tools_by_name
def get_tools(self):
return {n: MagicMock(name=n) for n in self._tools_by_name}
tools_by_name = {"read_file": _tool("read_file"), "write_file": _tool("write_file")}
registry = _FakeRegistry(
server_map={"coder-tools": {"read_file", "write_file"}},
tools_by_name=tools_by_name,
)
# Patch get_tools to return real Tool objects for name/description plumbing.
registry.get_tools = lambda: tools_by_name # type: ignore[method-assign]
phase_state = QueenPhaseState(phase="independent")
phase_state.independent_tools = [tools_by_name["read_file"], tools_by_name["write_file"]]
phase_state.mcp_tool_names_all = {"read_file", "write_file"}
phase_state.enabled_mcp_tools = None
phase_state.rebuild_independent_filter()
session = _FakeSession(queen_name=queen_id, phase_state=phase_state)
session._queen_tool_registry = registry
manager = _FakeManager(_sessions={"sess-1": session})
app = await _make_app(manager=manager)
async with TestClient(TestServer(app)) as client:
resp = await client.patch(
f"/api/queen/{queen_id}/tools",
json={"enabled_mcp_tools": ["read_file"]},
)
assert resp.status == 200
body = await resp.json()
assert body["refreshed_sessions"] == 1
# Session's phase state reflects the new allowlist without a restart
current = phase_state.get_current_tools()
assert [t.name for t in current] == ["read_file"]
@pytest.mark.asyncio
async def test_missing_queen_returns_404(queen_dir, monkeypatch):
monkeypatch.setattr(routes_queen_tools, "ensure_default_queens", lambda: None)
manager = _FakeManager()
app = await _make_app(manager=manager)
async with TestClient(TestServer(app)) as client:
resp = await client.get("/api/queen/queen_nonexistent/tools")
assert resp.status == 404
resp = await client.patch(
"/api/queen/queen_nonexistent/tools",
json={"enabled_mcp_tools": None},
)
assert resp.status == 404
+45 -2
View File
@@ -186,6 +186,22 @@ class QueenPhaseState:
global_memory_dir: Path | None = None
queen_memory_dir: Path | None = None
# Per-queen MCP tool allowlist for the INDEPENDENT phase. ``None`` means
# "allow every MCP tool" (default, backward-compatible). An explicit list
# is authoritative: only tools whose name appears here pass through.
# Lifecycle / synthetic tools bypass this gate regardless.
enabled_mcp_tools: list[str] | None = None
# Union of every MCP-origin tool name currently registered — the set the
# allowlist can gate. Populated once at queen boot from
# ``ToolRegistry._mcp_server_tools``. Names outside this set (lifecycle,
# ``ask_user``) always pass through the filter.
mcp_tool_names_all: set = field(default_factory=set)
# Memoized output of the filter applied to ``independent_tools``.
# Recomputed only when ``enabled_mcp_tools`` or ``independent_tools``
# changes, so ``get_current_tools()`` in the independent phase returns
# a byte-stable list between saves and the LLM prompt cache stays warm.
_filtered_independent_tools: list = field(default_factory=list)
async def switch_to_working(self, source: str = "tool") -> None:
"""Switch to working phase — colony workers are running.
@@ -204,6 +220,27 @@ class QueenPhaseState:
"Colony workers are running. Available tools: " + ", ".join(tool_names) + "."
)
def rebuild_independent_filter(self) -> None:
"""Recompute the memoized independent-phase tool list.
Called once at queen boot (after ``independent_tools``,
``mcp_tool_names_all`` and ``enabled_mcp_tools`` are all populated)
and again from the tools-PATCH handler whenever the allowlist
changes. Keeping the result memoized means the independent-phase
branch of ``get_current_tools()`` returns the same Python list
object across turns, so the LLM prompt cache stays warm until
the user explicitly edits their allowlist.
"""
if self.enabled_mcp_tools is None:
self._filtered_independent_tools = list(self.independent_tools)
return
allowed = set(self.enabled_mcp_tools)
self._filtered_independent_tools = [
t
for t in self.independent_tools
if t.name not in self.mcp_tool_names_all or t.name in allowed
]
def get_current_tools(self) -> list:
"""Return tools for the current phase."""
if self.phase == "working":
@@ -212,8 +249,14 @@ class QueenPhaseState:
return list(self.reviewing_tools)
if self.phase == "incubating":
return list(self.incubating_tools)
# Default / "independent" — DM mode with full MCP tools.
return list(self.independent_tools)
# Default / "independent" — DM mode with full MCP tools, gated by
# the per-queen allowlist. Return the memoized list directly so the
# JSON sent to the LLM is byte-identical turn-to-turn.
if not self._filtered_independent_tools and self.independent_tools:
# Safety net: first-call in tests or code paths that skipped
# the explicit boot-time rebuild.
self.rebuild_independent_filter()
return self._filtered_independent_tools
def get_current_prompt(self) -> str:
"""Return the system prompt for the current phase."""
+2
View File
@@ -5,6 +5,7 @@ import ColonyChat from "./pages/colony-chat";
import QueenDM from "./pages/queen-dm";
import OrgChart from "./pages/org-chart";
import PromptLibrary from "./pages/prompt-library";
import ToolLibrary from "./pages/tool-library";
import CredentialsPage from "./pages/credentials";
import NotFound from "./pages/not-found";
@@ -17,6 +18,7 @@ function App() {
<Route path="/queen/:queenId" element={<QueenDM />} />
<Route path="/org-chart" element={<OrgChart />} />
<Route path="/prompt-library" element={<PromptLibrary />} />
<Route path="/tool-library" element={<ToolLibrary />} />
<Route path="/credentials" element={<CredentialsPage />} />
<Route path="*" element={<NotFound />} />
</Route>
+50
View File
@@ -0,0 +1,50 @@
import { api } from "./client";
import type { ToolMeta, McpServerTools } from "./queens";
export interface ColonySummary {
name: string;
queen_name: string | null;
created_at: string | null;
has_allowlist: boolean;
enabled_count: number | null;
}
export interface ColonyToolsResponse {
colony_name: string;
enabled_mcp_tools: string[] | null;
stale: boolean;
lifecycle: ToolMeta[];
synthetic: ToolMeta[];
mcp_servers: McpServerTools[];
}
export interface ColonyToolsUpdateResult {
colony_name: string;
enabled_mcp_tools: string[] | null;
refreshed_runtimes: number;
note?: string;
}
export const coloniesApi = {
/** List every colony on disk with a summary of its tool allowlist. */
list: () =>
api.get<{ colonies: ColonySummary[] }>(`/colonies/tools-index`),
/** Enumerate a colony's tool surface (lifecycle + synthetic + MCP). */
getTools: (colonyName: string) =>
api.get<ColonyToolsResponse>(
`/colony/${encodeURIComponent(colonyName)}/tools`,
),
/** Persist a colony's MCP tool allowlist.
*
* ``null`` resets to "allow every MCP tool". A list of names enables
* only those MCP tools. Changes take effect on the next worker spawn;
* in-flight workers keep their booted tool list.
*/
updateTools: (colonyName: string, enabled: string[] | null) =>
api.patch<ColonyToolsUpdateResult>(
`/colony/${encodeURIComponent(colonyName)}/tools`,
{ enabled_mcp_tools: enabled },
),
};
+66
View File
@@ -0,0 +1,66 @@
import { api } from "./client";
export type McpTransport = "stdio" | "http" | "sse" | "unix";
export interface McpServer {
name: string;
/** "local": added via UI/CLI (user-editable). "registry": installed from
* the remote MCP registry. "built-in": baked into the queen package —
* visible but not removable from the UI. */
source: "local" | "registry" | "built-in";
transport: McpTransport | string;
description: string;
enabled: boolean;
last_health_status: "healthy" | "unhealthy" | null;
last_error: string | null;
last_health_check_at: string | null;
tool_count: number | null;
/** Servers flagged removable:false cannot be deleted from the UI. */
removable?: boolean;
}
export interface AddMcpServerBody {
name: string;
transport: McpTransport;
/** stdio */
command?: string;
args?: string[];
env?: Record<string, string>;
cwd?: string;
/** http / sse */
url?: string;
headers?: Record<string, string>;
/** unix */
socket_path?: string;
description?: string;
}
export interface McpHealthResult {
name: string;
status: "healthy" | "unhealthy" | "unknown";
tools: number;
error: string | null;
}
/** Backend MCPError shape when an operation fails. */
export interface McpErrorBody {
error: string;
code?: string;
what?: string;
why?: string;
fix?: string;
}
export const mcpApi = {
listServers: () => api.get<{ servers: McpServer[] }>("/mcp/servers"),
addServer: (body: AddMcpServerBody) =>
api.post<{ server: McpServer; hint: string }>("/mcp/servers", body),
removeServer: (name: string) =>
api.delete<{ removed: string }>(`/mcp/servers/${encodeURIComponent(name)}`),
setEnabled: (name: string, enabled: boolean) =>
api.post<{ name: string; enabled: boolean }>(
`/mcp/servers/${encodeURIComponent(name)}/${enabled ? "enable" : "disable"}`,
),
checkHealth: (name: string) =>
api.post<McpHealthResult>(`/mcp/servers/${encodeURIComponent(name)}/health`),
};
+42
View File
@@ -16,6 +16,33 @@ export interface QueenSessionResult {
status: "live" | "resumed" | "created";
}
export interface ToolMeta {
name: string;
description: string;
input_schema?: Record<string, unknown>;
editable?: boolean;
}
export interface McpServerTools {
name: string;
tools: Array<ToolMeta & { enabled: boolean }>;
}
export interface QueenToolsResponse {
queen_id: string;
enabled_mcp_tools: string[] | null;
stale: boolean;
lifecycle: ToolMeta[];
synthetic: ToolMeta[];
mcp_servers: McpServerTools[];
}
export interface QueenToolsUpdateResult {
queen_id: string;
enabled_mcp_tools: string[] | null;
refreshed_sessions: number;
}
export const queensApi = {
/** List all queen profiles (id, name, title). */
list: () =>
@@ -57,4 +84,19 @@ export const queensApi = {
initial_prompt: initialPrompt,
initial_phase: initialPhase || undefined,
}),
/** Enumerate the queen's tool surface (lifecycle + synthetic + MCP). */
getTools: (queenId: string) =>
api.get<QueenToolsResponse>(`/queen/${queenId}/tools`),
/** Persist the MCP tool allowlist for a queen.
*
* Pass ``null`` to reset to the default ("allow every MCP tool") or an
* explicit list to restrict the queen's tool surface. Lifecycle and
* synthetic tools are always enabled and cannot be listed here.
*/
updateTools: (queenId: string, enabled: string[] | null) =>
api.patch<QueenToolsUpdateResult>(`/queen/${queenId}/tools`, {
enabled_mcp_tools: enabled,
}),
};
@@ -0,0 +1,27 @@
import { useCallback } from "react";
import { coloniesApi } from "@/api/colonies";
import ToolsEditor from "./ToolsEditor";
export default function ColonyToolsSection({
colonyName,
}: {
colonyName: string;
}) {
const fetchSnapshot = useCallback(
() => coloniesApi.getTools(colonyName),
[colonyName],
);
const saveAllowlist = useCallback(
(enabled: string[] | null) => coloniesApi.updateTools(colonyName, enabled),
[colonyName],
);
return (
<ToolsEditor
subjectKey={`colony:${colonyName}`}
title="Tools"
caveat="Changes apply to the next worker spawn. Running workers keep the tool list they booted with."
fetchSnapshot={fetchSnapshot}
saveAllowlist={saveAllowlist}
/>
);
}
@@ -0,0 +1,651 @@
import { useEffect, useState } from "react";
import {
Plus,
Trash2,
RefreshCw,
Loader2,
AlertCircle,
Check,
X,
Server,
CircleCheck,
CircleAlert,
CircleDashed,
} from "lucide-react";
import {
mcpApi,
type McpServer,
type McpTransport,
type AddMcpServerBody,
} from "@/api/mcp";
type TransportKey = McpTransport;
const TRANSPORT_OPTIONS: TransportKey[] = ["stdio", "http", "sse", "unix"];
function healthBadge(server: McpServer) {
if (!server.enabled) {
return (
<span className="flex items-center gap-1 text-[11px] text-muted-foreground">
<CircleDashed className="w-3 h-3" /> Disabled
</span>
);
}
if (server.last_health_status === "healthy") {
return (
<span className="flex items-center gap-1 text-[11px] text-green-500">
<CircleCheck className="w-3 h-3" /> Healthy
</span>
);
}
if (server.last_health_status === "unhealthy") {
return (
<span
className="flex items-center gap-1 text-[11px] text-red-400"
title={server.last_error || "Unhealthy"}
>
<CircleAlert className="w-3 h-3" /> Unhealthy
</span>
);
}
return (
<span className="flex items-center gap-1 text-[11px] text-muted-foreground">
<CircleDashed className="w-3 h-3" /> Unknown
</span>
);
}
interface AddFormState {
name: string;
transport: TransportKey;
command: string;
args: string;
env: string;
cwd: string;
url: string;
headers: string;
socketPath: string;
description: string;
}
const EMPTY_FORM: AddFormState = {
name: "",
transport: "stdio",
command: "",
args: "",
env: "",
cwd: "",
url: "",
headers: "",
socketPath: "",
description: "",
};
function parseKeyValueLines(text: string): Record<string, string> {
const out: Record<string, string> = {};
text
.split("\n")
.map((l) => l.trim())
.filter(Boolean)
.forEach((line) => {
const eq = line.indexOf("=");
if (eq < 0) return;
const k = line.slice(0, eq).trim();
const v = line.slice(eq + 1).trim();
if (k) out[k] = v;
});
return out;
}
function buildAddBody(form: AddFormState): AddMcpServerBody {
const body: AddMcpServerBody = {
name: form.name.trim(),
transport: form.transport,
description: form.description.trim() || undefined,
};
if (form.transport === "stdio") {
body.command = form.command.trim();
const args = form.args
.split("\n")
.map((s) => s.trim())
.filter(Boolean);
if (args.length) body.args = args;
const env = parseKeyValueLines(form.env);
if (Object.keys(env).length) body.env = env;
if (form.cwd.trim()) body.cwd = form.cwd.trim();
} else if (form.transport === "http" || form.transport === "sse") {
body.url = form.url.trim();
const headers = parseKeyValueLines(form.headers);
if (Object.keys(headers).length) body.headers = headers;
} else if (form.transport === "unix") {
body.socket_path = form.socketPath.trim();
}
return body;
}
export default function McpServersPanel() {
const [servers, setServers] = useState<McpServer[] | null>(null);
const [loading, setLoading] = useState(true);
const [error, setError] = useState<string | null>(null);
const [adding, setAdding] = useState(false);
const [form, setForm] = useState<AddFormState>(EMPTY_FORM);
const [submitting, setSubmitting] = useState(false);
const [submitError, setSubmitError] = useState<string | null>(null);
const [busyByName, setBusyByName] = useState<Record<string, boolean>>({});
const refresh = async () => {
setLoading(true);
setError(null);
try {
const { servers } = await mcpApi.listServers();
setServers(servers);
} catch (e: unknown) {
setError((e as Error)?.message || "Failed to load MCP servers");
} finally {
setLoading(false);
}
};
useEffect(() => {
refresh();
}, []);
const setBusy = (name: string, v: boolean) =>
setBusyByName((p) => ({ ...p, [name]: v }));
const handleToggle = async (server: McpServer) => {
setBusy(server.name, true);
try {
await mcpApi.setEnabled(server.name, !server.enabled);
await refresh();
} catch (e: unknown) {
setError((e as Error)?.message || "Toggle failed");
} finally {
setBusy(server.name, false);
}
};
const handleRemove = async (server: McpServer) => {
if (!confirm(`Remove MCP server "${server.name}"?`)) return;
setBusy(server.name, true);
try {
await mcpApi.removeServer(server.name);
await refresh();
} catch (e: unknown) {
const body = (e as { body?: { error?: string } }).body;
setError(body?.error || (e as Error)?.message || "Remove failed");
} finally {
setBusy(server.name, false);
}
};
const handleHealth = async (server: McpServer) => {
setBusy(server.name, true);
try {
await mcpApi.checkHealth(server.name);
await refresh();
} catch (e: unknown) {
setError((e as Error)?.message || "Health check failed");
} finally {
setBusy(server.name, false);
}
};
const canSubmit = (() => {
if (!form.name.trim()) return false;
if (form.transport === "stdio") return !!form.command.trim();
if (form.transport === "http" || form.transport === "sse")
return !!form.url.trim();
if (form.transport === "unix") return !!form.socketPath.trim();
return false;
})();
const handleSubmit = async () => {
if (!canSubmit) return;
setSubmitting(true);
setSubmitError(null);
try {
const body = buildAddBody(form);
const { server } = await mcpApi.addServer(body);
// Best-effort: auto-run health check so the UI shows tool count.
try {
await mcpApi.checkHealth(server.name);
} catch {
/* health check is informational; don't block the add flow */
}
setAdding(false);
setForm(EMPTY_FORM);
await refresh();
} catch (e: unknown) {
const body = (e as { body?: { error?: string; fix?: string } }).body;
setSubmitError(
[body?.error, body?.fix].filter(Boolean).join(" — ") ||
(e as Error)?.message ||
"Add failed",
);
} finally {
setSubmitting(false);
}
};
// Group by origin. "local" = user-registered via the UI or CLI. Everything
// else (built-in package entries, registry-installed entries) sits under
// "Built-in" since the user can't remove them from the UI.
const builtIns = (servers || []).filter((s) => s.source !== "local");
const custom = (servers || []).filter((s) => s.source === "local");
return (
<div className="flex flex-col gap-5">
<div className="flex items-start justify-between gap-3">
<div>
<h3 className="text-lg font-semibold text-foreground">MCP Servers</h3>
<p className="text-sm text-muted-foreground mt-1">
Register your own MCP servers so queens can use their tools. New
servers take effect in the next queen session you start.
</p>
</div>
<div className="flex items-center gap-2">
<button
onClick={refresh}
disabled={loading}
className="flex items-center gap-1.5 px-3 py-1.5 rounded-md border border-border/60 text-xs text-muted-foreground hover:text-foreground hover:bg-muted/30 disabled:opacity-50"
title="Refresh"
>
<RefreshCw className={`w-3 h-3 ${loading ? "animate-spin" : ""}`} />
</button>
<button
onClick={() => {
setAdding(true);
setForm(EMPTY_FORM);
setSubmitError(null);
}}
className="flex items-center gap-1.5 px-3 py-1.5 rounded-md bg-primary text-primary-foreground text-xs font-semibold hover:bg-primary/90"
>
<Plus className="w-3 h-3" />
Add MCP Server
</button>
</div>
</div>
{error && (
<div className="flex items-start gap-2 text-xs text-destructive p-2.5 rounded-md bg-destructive/10 border border-destructive/30">
<AlertCircle className="w-3.5 h-3.5 mt-0.5 flex-shrink-0" />
<span className="flex-1">{error}</span>
<button
onClick={() => setError(null)}
className="text-destructive/70 hover:text-destructive"
>
<X className="w-3 h-3" />
</button>
</div>
)}
{loading && !servers && (
<div className="flex items-center gap-2 text-xs text-muted-foreground">
<Loader2 className="w-3 h-3 animate-spin" /> Loading MCP servers
</div>
)}
{servers && (
<>
{custom.length > 0 && (
<Section title="My Custom">
{custom.map((s) => (
<ServerRow
key={s.name}
server={s}
busy={!!busyByName[s.name]}
onToggle={() => handleToggle(s)}
onRemove={() => handleRemove(s)}
onHealth={() => handleHealth(s)}
isLocal
/>
))}
</Section>
)}
<Section title="Built-in">
{builtIns.length === 0 ? (
<p className="text-xs text-muted-foreground px-2 py-2">
No built-in servers registered.
</p>
) : (
builtIns.map((s) => (
<ServerRow
key={s.name}
server={s}
busy={!!busyByName[s.name]}
onToggle={() => handleToggle(s)}
onRemove={() => handleRemove(s)}
onHealth={() => handleHealth(s)}
/>
))
)}
</Section>
</>
)}
{/* Add MCP modal */}
{adding && (
<div className="fixed inset-0 z-[60] flex items-center justify-center">
<div
className="absolute inset-0 bg-black/50"
onClick={() => !submitting && setAdding(false)}
/>
<div className="relative bg-card border border-border/60 rounded-xl shadow-2xl w-full max-w-lg p-5 space-y-4 max-h-[85vh] overflow-y-auto">
<div className="flex items-center justify-between">
<h3 className="text-sm font-semibold text-foreground">
Add MCP Server
</h3>
<button
onClick={() => !submitting && setAdding(false)}
className="p-1 rounded text-muted-foreground hover:text-foreground"
>
<X className="w-4 h-4" />
</button>
</div>
<FieldRow label="Name *" hint="Unique identifier, e.g. my-search-tool">
<input
autoFocus
value={form.name}
onChange={(e) =>
setForm((f) => ({
...f,
name: e.target.value.toLowerCase().replace(/[^a-z0-9_-]/g, ""),
}))
}
placeholder="my-search-tool"
className={inputCls}
/>
</FieldRow>
<FieldRow label="Transport *">
<div className="flex gap-1">
{TRANSPORT_OPTIONS.map((t) => (
<button
key={t}
onClick={() => setForm((f) => ({ ...f, transport: t }))}
className={`flex-1 px-3 py-1.5 rounded-md text-xs font-medium border ${
form.transport === t
? "bg-primary/15 text-primary border-primary/40"
: "text-muted-foreground hover:text-foreground border-border/60 hover:bg-muted/30"
}`}
>
{t}
</button>
))}
</div>
</FieldRow>
{form.transport === "stdio" && (
<>
<FieldRow
label="Command *"
hint="Executable that speaks MCP over stdin/stdout"
>
<input
value={form.command}
onChange={(e) =>
setForm((f) => ({ ...f, command: e.target.value }))
}
placeholder="uv"
className={inputCls}
/>
</FieldRow>
<FieldRow label="Args (one per line)">
<textarea
value={form.args}
onChange={(e) =>
setForm((f) => ({ ...f, args: e.target.value }))
}
rows={3}
placeholder={"run\npython\nmy_server.py\n--stdio"}
className={textareaCls}
/>
</FieldRow>
<FieldRow label="Env (KEY=VALUE, one per line)">
<textarea
value={form.env}
onChange={(e) =>
setForm((f) => ({ ...f, env: e.target.value }))
}
rows={2}
placeholder="API_KEY=abc123"
className={textareaCls}
/>
</FieldRow>
<FieldRow label="Working directory">
<input
value={form.cwd}
onChange={(e) =>
setForm((f) => ({ ...f, cwd: e.target.value }))
}
placeholder="/path/to/repo"
className={inputCls}
/>
</FieldRow>
</>
)}
{(form.transport === "http" || form.transport === "sse") && (
<>
<FieldRow label="URL *">
<input
value={form.url}
onChange={(e) =>
setForm((f) => ({ ...f, url: e.target.value }))
}
placeholder="https://example.com/mcp"
className={inputCls}
/>
</FieldRow>
<FieldRow label="Headers (KEY=VALUE, one per line)">
<textarea
value={form.headers}
onChange={(e) =>
setForm((f) => ({ ...f, headers: e.target.value }))
}
rows={2}
placeholder="Authorization=Bearer ..."
className={textareaCls}
/>
</FieldRow>
</>
)}
{form.transport === "unix" && (
<FieldRow label="Socket path *">
<input
value={form.socketPath}
onChange={(e) =>
setForm((f) => ({ ...f, socketPath: e.target.value }))
}
placeholder="/tmp/mcp.sock"
className={inputCls}
/>
</FieldRow>
)}
<FieldRow label="Description">
<input
value={form.description}
onChange={(e) =>
setForm((f) => ({ ...f, description: e.target.value }))
}
placeholder="What this server does"
className={inputCls}
/>
</FieldRow>
{submitError && (
<div className="flex items-start gap-2 text-xs text-destructive p-2 rounded-md bg-destructive/10 border border-destructive/30">
<AlertCircle className="w-3.5 h-3.5 mt-0.5 flex-shrink-0" />
<span>{submitError}</span>
</div>
)}
<div className="flex justify-end gap-2 pt-1">
<button
onClick={() => setAdding(false)}
disabled={submitting}
className="px-3 py-1.5 rounded-md text-xs font-medium text-muted-foreground hover:text-foreground hover:bg-muted/30"
>
Cancel
</button>
<button
onClick={handleSubmit}
disabled={!canSubmit || submitting}
className="flex items-center gap-1.5 px-3 py-1.5 rounded-md bg-primary text-primary-foreground text-xs font-semibold hover:bg-primary/90 disabled:opacity-50 disabled:cursor-not-allowed"
>
{submitting ? (
<Loader2 className="w-3 h-3 animate-spin" />
) : (
<Check className="w-3 h-3" />
)}
{submitting ? "Adding…" : "Add"}
</button>
</div>
</div>
</div>
)}
</div>
);
}
const inputCls =
"w-full bg-muted/30 border border-border/50 rounded-lg px-3 py-2 text-sm text-foreground focus:outline-none focus:ring-1 focus:ring-primary/40";
const textareaCls = `${inputCls} resize-none font-mono text-xs`;
function FieldRow({
label,
hint,
children,
}: {
label: string;
hint?: string;
children: React.ReactNode;
}) {
return (
<div>
<label className="text-[11px] font-semibold text-muted-foreground uppercase tracking-wider mb-1.5 block">
{label}
</label>
{children}
{hint && (
<p className="text-[11px] text-muted-foreground/70 mt-1">{hint}</p>
)}
</div>
);
}
function Section({
title,
children,
}: {
title: string;
children: React.ReactNode;
}) {
return (
<div>
<p className="text-[11px] font-semibold text-muted-foreground/60 uppercase tracking-wider mb-2">
{title}
</p>
<div className="flex flex-col gap-1">{children}</div>
</div>
);
}
function ServerRow({
server,
busy,
onToggle,
onRemove,
onHealth,
isLocal,
}: {
server: McpServer;
busy: boolean;
onToggle: () => void;
onRemove: () => void;
onHealth: () => void;
isLocal?: boolean;
}) {
// Package-baked servers live in the repo and aren't managed by
// MCPRegistry, so toggling / removing / health-checking them would
// fail against the backend. Show them as read-only.
const isBuiltIn = server.source === "built-in";
return (
<div className="flex items-center gap-3 py-2.5 px-2 rounded-lg hover:bg-muted/20">
<div className="w-9 h-9 rounded-full bg-primary/10 flex items-center justify-center flex-shrink-0">
<Server className="w-4 h-4 text-primary" />
</div>
<div className="flex-1 min-w-0">
<div className="flex items-center gap-2">
<p className="text-sm font-medium text-foreground truncate">
{server.name}
</p>
<span className="text-[10px] uppercase tracking-wider text-muted-foreground/60">
{server.transport}
</span>
{isBuiltIn && (
<span className="text-[10px] uppercase tracking-wider text-muted-foreground/80 bg-muted/40 px-1.5 py-0.5 rounded">
Built-in
</span>
)}
{server.tool_count !== null && server.tool_count !== undefined && (
<span className="text-[11px] text-muted-foreground">
{server.tool_count} tools
</span>
)}
</div>
<div className="flex items-center gap-2">
{!isBuiltIn && healthBadge(server)}
{server.description && (
<span className="text-xs text-muted-foreground truncate">
{isBuiltIn ? server.description : `· ${server.description}`}
</span>
)}
</div>
</div>
{!isBuiltIn && (
<>
<button
onClick={onHealth}
disabled={busy}
className="p-1.5 rounded-md text-muted-foreground hover:text-foreground hover:bg-muted/40 disabled:opacity-50"
title="Health check"
>
{busy ? (
<Loader2 className="w-3.5 h-3.5 animate-spin" />
) : (
<RefreshCw className="w-3.5 h-3.5" />
)}
</button>
<button
onClick={onToggle}
disabled={busy}
className={`px-3 py-1 rounded-md text-[11px] font-semibold border disabled:opacity-50 ${
server.enabled
? "text-muted-foreground border-border/60 hover:bg-muted/30"
: "bg-primary/15 text-primary border-primary/40 hover:bg-primary/25"
}`}
>
{server.enabled ? "Disable" : "Enable"}
</button>
</>
)}
{isLocal && !isBuiltIn && (
<button
onClick={onRemove}
disabled={busy}
className="p-1.5 rounded-md text-muted-foreground hover:text-red-400 hover:bg-red-500/10 disabled:opacity-50"
title="Remove"
>
<Trash2 className="w-3.5 h-3.5" />
</button>
)}
</div>
);
}
@@ -7,6 +7,7 @@ import { executionApi } from "@/api/execution";
import { compressImage } from "@/lib/image-utils";
import type { Colony } from "@/types/colony";
import { slugToColonyId } from "@/lib/colony-registry";
import QueenToolsSection from "./QueenToolsSection";
interface QueenProfilePanelProps {
queenId: string;
@@ -354,6 +355,10 @@ export default function QueenProfilePanel({ queenId, colonies, onClose }: QueenP
</div>
)}
<div className="mb-6">
<QueenToolsSection queenId={queenId} />
</div>
{colonies.length > 0 && (
<div>
<h4 className="text-[11px] font-semibold text-muted-foreground uppercase tracking-wider mb-2">Assigned Colonies</h4>
@@ -0,0 +1,22 @@
import { useCallback } from "react";
import { queensApi } from "@/api/queens";
import ToolsEditor from "./ToolsEditor";
export default function QueenToolsSection({ queenId }: { queenId: string }) {
const fetchSnapshot = useCallback(
() => queensApi.getTools(queenId),
[queenId],
);
const saveAllowlist = useCallback(
(enabled: string[] | null) => queensApi.updateTools(queenId, enabled),
[queenId],
);
return (
<ToolsEditor
subjectKey={`queen:${queenId}`}
title="Tools"
fetchSnapshot={fetchSnapshot}
saveAllowlist={saveAllowlist}
/>
);
}
+11 -2
View File
@@ -6,11 +6,12 @@ import { useModel, LLM_PROVIDERS } from "@/context/ModelContext";
import { credentialsApi } from "@/api/credentials";
import { configApi, type ModelOption } from "@/api/config";
import { compressImage } from "@/lib/image-utils";
import McpServersPanel from "./McpServersPanel";
interface SettingsModalProps {
open: boolean;
onClose: () => void;
initialSection?: "profile" | "byok";
initialSection?: "profile" | "byok" | "mcp";
}
function ValidationBadge({ state }: { state: "validating" | { valid: boolean | null; message: string } | undefined }) {
@@ -37,7 +38,7 @@ export default function SettingsModal({ open, onClose, initialSection }: Setting
const [displayName, setDisplayName] = useState(userProfile.displayName);
const [about, setAbout] = useState(userProfile.about);
const [activeSection, setActiveSection] = useState<"profile" | "byok">(initialSection || "profile");
const [activeSection, setActiveSection] = useState<"profile" | "byok" | "mcp">(initialSection || "profile");
const [editingProvider, setEditingProvider] = useState<string | null>(null);
const [keyInput, setKeyInput] = useState("");
const [showKey, setShowKey] = useState(false);
@@ -187,6 +188,12 @@ export default function SettingsModal({ open, onClose, initialSection }: Setting
>
BYOK
</button>
<button
onClick={() => setActiveSection("mcp")}
className={`text-left text-sm px-3 py-1.5 rounded-md ${activeSection === "mcp" ? "bg-primary/15 text-primary font-medium" : "text-muted-foreground hover:text-foreground hover:bg-muted/30"}`}
>
MCP Servers
</button>
</div>
</div>
@@ -267,6 +274,8 @@ export default function SettingsModal({ open, onClose, initialSection }: Setting
</>
)}
{activeSection === "mcp" && <McpServersPanel />}
{activeSection === "byok" && (
<>
<div>
+8
View File
@@ -12,6 +12,7 @@ import {
X,
Crown,
Loader2,
Wrench,
} from "lucide-react";
import SidebarColonyItem from "./SidebarColonyItem";
import SidebarQueenItem from "./SidebarQueenItem";
@@ -172,6 +173,13 @@ export default function Sidebar() {
<Sparkles className="w-4 h-4" />
<span>Prompt Library</span>
</button>
<button
onClick={() => navigate("/tool-library")}
className="flex items-center gap-2.5 px-3 py-1.5 rounded-md text-sm text-foreground/70 hover:bg-sidebar-item-hover hover:text-foreground transition-colors"
>
<Wrench className="w-4 h-4" />
<span>Tool Library</span>
</button>
<button
onClick={() => navigate("/credentials")}
className="flex items-center gap-2.5 px-3 py-1.5 rounded-md text-sm text-foreground/70 hover:bg-sidebar-item-hover hover:text-foreground transition-colors"
@@ -0,0 +1,461 @@
import { useEffect, useMemo, useRef, useState } from "react";
import {
ChevronDown,
ChevronRight,
Check,
Loader2,
Lock,
Wrench,
AlertCircle,
} from "lucide-react";
import type { ToolMeta, McpServerTools } from "@/api/queens";
/** Shape every Tools section (Queen / Colony) shares. */
export interface ToolsSnapshot {
enabled_mcp_tools: string[] | null;
stale: boolean;
lifecycle: ToolMeta[];
synthetic: ToolMeta[];
mcp_servers: McpServerTools[];
}
export interface ToolsEditorProps {
/** Stable identifier — refetches when it changes. */
subjectKey: string;
/** Title shown above the controls. */
title?: string;
/** One-line caveat rendered under the header (e.g. "Changes apply …"). */
caveat?: string;
/** Load the current snapshot. */
fetchSnapshot: () => Promise<ToolsSnapshot>;
/** Persist an allowlist. ``null`` resets to "allow all". */
saveAllowlist: (
enabled: string[] | null,
) => Promise<{ enabled_mcp_tools: string[] | null }>;
}
type TriState = "checked" | "unchecked" | "indeterminate";
function triStateForServer(
toolNames: string[],
allowed: Set<string> | null,
): TriState {
if (allowed === null) return "checked";
if (toolNames.length === 0) return "unchecked";
const enabledCount = toolNames.reduce(
(n, name) => n + (allowed.has(name) ? 1 : 0),
0,
);
if (enabledCount === 0) return "unchecked";
if (enabledCount === toolNames.length) return "checked";
return "indeterminate";
}
function TriStateCheckbox({
state,
onChange,
disabled,
}: {
state: TriState;
onChange: (next: boolean) => void;
disabled?: boolean;
}) {
const ref = useRef<HTMLInputElement>(null);
useEffect(() => {
if (ref.current) ref.current.indeterminate = state === "indeterminate";
}, [state]);
return (
<input
ref={ref}
type="checkbox"
checked={state === "checked"}
disabled={disabled}
onChange={(e) => onChange(e.target.checked)}
onClick={(e) => e.stopPropagation()}
className="h-3.5 w-3.5 rounded border-border/70 text-primary focus:ring-primary/40"
/>
);
}
function ToolRow({
name,
description,
enabled,
editable,
onToggle,
}: {
name: string;
description: string;
enabled: boolean;
editable: boolean;
onToggle?: (next: boolean) => void;
}) {
return (
<div className="flex items-start gap-2 py-1.5 px-2 rounded hover:bg-muted/30">
{editable ? (
<input
type="checkbox"
checked={enabled}
onChange={(e) => onToggle?.(e.target.checked)}
className="mt-0.5 h-3.5 w-3.5 rounded border-border/70 text-primary focus:ring-primary/40"
/>
) : (
<Lock className="mt-0.5 h-3 w-3 text-muted-foreground/60 flex-shrink-0" />
)}
<div className="min-w-0 flex-1">
<div className="text-xs font-medium text-foreground font-mono">
{name}
</div>
{description && (
<div className="text-[11px] text-muted-foreground leading-relaxed line-clamp-2">
{description}
</div>
)}
</div>
</div>
);
}
function CollapsibleGroup({
title,
count,
badge,
expanded,
onToggle,
leading,
children,
}: {
title: string;
count: number;
badge?: string;
expanded: boolean;
onToggle: () => void;
leading?: React.ReactNode;
children: React.ReactNode;
}) {
return (
<div className="mb-2 rounded-lg border border-border/40 bg-muted/10 overflow-hidden">
<button
onClick={onToggle}
className="w-full flex items-center gap-2 px-2.5 py-1.5 text-left hover:bg-muted/30"
>
{expanded ? (
<ChevronDown className="w-3.5 h-3.5 text-muted-foreground" />
) : (
<ChevronRight className="w-3.5 h-3.5 text-muted-foreground" />
)}
{leading}
<span className="text-xs font-medium text-foreground flex-1 truncate">
{title}
</span>
<span className="text-[11px] text-muted-foreground">
{badge ?? count}
</span>
</button>
{expanded && (
<div className="border-t border-border/30 px-1 py-1">{children}</div>
)}
</div>
);
}
export default function ToolsEditor({
subjectKey,
title = "Tools",
caveat,
fetchSnapshot,
saveAllowlist,
}: ToolsEditorProps) {
const [data, setData] = useState<ToolsSnapshot | null>(null);
const [loading, setLoading] = useState(true);
const [error, setError] = useState<string | null>(null);
const [draftAllowed, setDraftAllowed] = useState<Set<string> | null>(null);
const baselineRef = useRef<Set<string> | null>(null);
const [saving, setSaving] = useState(false);
const [saveError, setSaveError] = useState<string | null>(null);
const [savedRecently, setSavedRecently] = useState(false);
const [expanded, setExpanded] = useState<Record<string, boolean>>({});
useEffect(() => {
let cancelled = false;
setLoading(true);
setError(null);
fetchSnapshot()
.then((d) => {
if (cancelled) return;
setData(d);
const baseline =
d.enabled_mcp_tools === null
? null
: new Set<string>(d.enabled_mcp_tools);
baselineRef.current = baseline === null ? null : new Set(baseline);
setDraftAllowed(baseline);
})
.catch((e) => {
if (cancelled) return;
setError((e as Error)?.message || "Failed to load tools");
})
.finally(() => {
if (!cancelled) setLoading(false);
});
return () => {
cancelled = true;
};
}, [subjectKey, fetchSnapshot]);
const dirty = useMemo(() => {
const a = draftAllowed;
const b = baselineRef.current;
if (a === null && b === null) return false;
if (a === null || b === null) return true;
if (a.size !== b.size) return true;
for (const n of a) if (!b.has(n)) return true;
return false;
}, [draftAllowed]);
const allMcpNames = useMemo(() => {
const s = new Set<string>();
data?.mcp_servers.forEach((srv) => srv.tools.forEach((t) => s.add(t.name)));
return s;
}, [data]);
const toggleOne = (name: string, next: boolean) => {
setDraftAllowed((prev) => {
const base =
prev === null ? new Set<string>(allMcpNames) : new Set<string>(prev);
if (next) base.add(name);
else base.delete(name);
return base;
});
};
const toggleServer = (serverNames: string[], next: boolean) => {
setDraftAllowed((prev) => {
const base =
prev === null ? new Set<string>(allMcpNames) : new Set<string>(prev);
if (next) serverNames.forEach((n) => base.add(n));
else serverNames.forEach((n) => base.delete(n));
return base;
});
};
const handleResetToDefault = () => setDraftAllowed(null);
const handleCancel = () => {
const baseline = baselineRef.current;
setDraftAllowed(baseline === null ? null : new Set(baseline));
setSaveError(null);
};
const handleSave = async () => {
setSaving(true);
setSaveError(null);
try {
const payload =
draftAllowed === null ? null : Array.from(draftAllowed).sort();
const result = await saveAllowlist(payload);
const updated = result.enabled_mcp_tools;
baselineRef.current = updated === null ? null : new Set(updated);
setDraftAllowed(updated === null ? null : new Set(updated));
if (data) {
const u = updated === null ? null : new Set(updated);
setData({
...data,
enabled_mcp_tools: updated,
mcp_servers: data.mcp_servers.map((srv) => ({
...srv,
tools: srv.tools.map((t) => ({
...t,
enabled: u === null ? true : u.has(t.name),
})),
})),
});
}
setSavedRecently(true);
setTimeout(() => setSavedRecently(false), 2500);
} catch (e: unknown) {
const err = e as { body?: { error?: string; unknown?: string[] } };
const extra = err.body?.unknown
? ` (${err.body.unknown.join(", ")})`
: "";
setSaveError((err.body?.error || "Save failed") + extra);
} finally {
setSaving(false);
}
};
if (loading) {
return (
<div className="flex items-center gap-2 text-xs text-muted-foreground py-3">
<Loader2 className="w-3 h-3 animate-spin" />
Loading tools
</div>
);
}
if (error || !data) {
return (
<div className="flex items-start gap-2 text-xs text-destructive py-3">
<AlertCircle className="w-3.5 h-3.5 mt-0.5 flex-shrink-0" />
<span>{error || "Could not load tools"}</span>
</div>
);
}
const draftEnabledCount =
draftAllowed === null ? allMcpNames.size : draftAllowed.size;
const totalMcpCount = allMcpNames.size;
return (
<div>
<div className="flex items-center justify-between mb-1.5">
<h4 className="text-[11px] font-semibold text-muted-foreground uppercase tracking-wider flex items-center gap-1.5">
<Wrench className="w-3 h-3" /> {title}
</h4>
<span className="text-[11px] text-muted-foreground">
{draftEnabledCount}/{totalMcpCount} MCP enabled
</span>
</div>
{caveat && (
<div className="flex items-start gap-1.5 text-[11px] text-muted-foreground mb-2 px-2 py-1.5 rounded bg-muted/20 border border-border/40">
<AlertCircle className="w-3 h-3 mt-0.5 flex-shrink-0" />
<span>{caveat}</span>
</div>
)}
{data.stale && (
<div className="flex items-start gap-1.5 text-[11px] text-muted-foreground mb-3 px-2 py-1.5 rounded bg-muted/30">
<AlertCircle className="w-3 h-3 mt-0.5 flex-shrink-0" />
<span>
Catalog is unavailable. Start a session once to populate the tool list.
</span>
</div>
)}
{(data.lifecycle.length > 0 || data.synthetic.length > 0) && (
<CollapsibleGroup
title="System tools (always enabled)"
count={data.lifecycle.length + data.synthetic.length}
expanded={!!expanded["__system"]}
onToggle={() =>
setExpanded((p) => ({ ...p, __system: !p["__system"] }))
}
>
<div className="flex flex-col">
{data.synthetic.map((t) => (
<ToolRow
key={`syn-${t.name}`}
name={t.name}
description={t.description}
enabled={true}
editable={false}
/>
))}
{data.lifecycle.map((t) => (
<ToolRow
key={`lc-${t.name}`}
name={t.name}
description={t.description}
enabled={true}
editable={false}
/>
))}
</div>
</CollapsibleGroup>
)}
{data.mcp_servers.map((srv) => {
const toolNames = srv.tools.map((t) => t.name);
const state = triStateForServer(toolNames, draftAllowed);
const enabledInServer =
draftAllowed === null
? toolNames.length
: toolNames.reduce(
(n, name) => n + (draftAllowed.has(name) ? 1 : 0),
0,
);
return (
<CollapsibleGroup
key={srv.name}
title={srv.name}
count={srv.tools.length}
badge={`${enabledInServer}/${srv.tools.length}`}
expanded={!!expanded[srv.name]}
onToggle={() =>
setExpanded((p) => ({ ...p, [srv.name]: !p[srv.name] }))
}
leading={
<TriStateCheckbox
state={state}
onChange={(next) => toggleServer(toolNames, next)}
/>
}
>
<div className="flex flex-col">
{srv.tools.map((t) => {
const enabled =
draftAllowed === null ? true : draftAllowed.has(t.name);
return (
<ToolRow
key={`${srv.name}-${t.name}`}
name={t.name}
description={t.description}
enabled={enabled}
editable={true}
onToggle={(next) => toggleOne(t.name, next)}
/>
);
})}
</div>
</CollapsibleGroup>
);
})}
<div className="flex items-center gap-2 pt-3">
<button
onClick={handleSave}
disabled={!dirty || saving}
className="flex items-center gap-1.5 px-3 py-1.5 rounded-md bg-primary text-primary-foreground text-xs font-medium hover:bg-primary/90 disabled:opacity-50 disabled:cursor-not-allowed"
>
{saving ? (
<Loader2 className="w-3 h-3 animate-spin" />
) : (
<Check className="w-3 h-3" />
)}
{saving ? "Saving…" : "Save"}
</button>
<button
onClick={handleCancel}
disabled={!dirty || saving}
className="px-3 py-1.5 rounded-md text-xs font-medium text-muted-foreground hover:text-foreground hover:bg-muted/30 disabled:opacity-50"
>
Cancel
</button>
{savedRecently && !dirty && (
<span className="text-[11px] text-green-500 flex items-center gap-1">
<Check className="w-3 h-3" /> Saved
</span>
)}
{draftAllowed !== null && (
<button
onClick={handleResetToDefault}
disabled={saving}
className="ml-auto text-[11px] text-muted-foreground hover:text-foreground underline underline-offset-2 disabled:opacity-50"
>
Reset to default (allow all)
</button>
)}
</div>
{saveError && (
<div className="flex items-start gap-1.5 mt-2 text-[11px] text-destructive">
<AlertCircle className="w-3 h-3 mt-0.5 flex-shrink-0" />
<span>{saveError}</span>
</div>
)}
</div>
);
}
+282
View File
@@ -0,0 +1,282 @@
import { useEffect, useMemo, useState } from "react";
import { Wrench, Crown, Network, Server, Loader2, AlertCircle } from "lucide-react";
import { queensApi } from "@/api/queens";
import { coloniesApi, type ColonySummary } from "@/api/colonies";
import { slugToDisplayName } from "@/lib/colony-registry";
import QueenToolsSection from "@/components/QueenToolsSection";
import ColonyToolsSection from "@/components/ColonyToolsSection";
import McpServersPanel from "@/components/McpServersPanel";
type Tab = "queens" | "colonies" | "mcp";
export default function ToolLibrary() {
const [tab, setTab] = useState<Tab>("queens");
return (
<div className="flex-1 flex flex-col min-w-0 overflow-hidden">
{/* Header */}
<div className="px-6 py-4 border-b border-border/60">
<div className="flex items-baseline gap-3 mb-3">
<h2 className="text-lg font-semibold text-foreground flex items-center gap-2">
<Wrench className="w-5 h-5 text-primary" />
Tool Library
</h2>
<span className="text-xs text-muted-foreground">
Curate which tools each queen and colony can call, and register your own MCP servers.
</span>
</div>
<div className="flex items-center gap-1">
<TabButton active={tab === "queens"} onClick={() => setTab("queens")} icon={<Crown className="w-3.5 h-3.5" />}>
Queens
</TabButton>
<TabButton active={tab === "colonies"} onClick={() => setTab("colonies")} icon={<Network className="w-3.5 h-3.5" />}>
Colonies
</TabButton>
<TabButton active={tab === "mcp"} onClick={() => setTab("mcp")} icon={<Server className="w-3.5 h-3.5" />}>
MCP Servers
</TabButton>
</div>
</div>
<div className="flex-1 overflow-y-auto">
{tab === "queens" && <QueensTab />}
{tab === "colonies" && <ColoniesTab />}
{tab === "mcp" && (
<div className="px-6 py-6 max-w-4xl">
<McpServersPanel />
</div>
)}
</div>
</div>
);
}
function TabButton({
active,
onClick,
icon,
children,
}: {
active: boolean;
onClick: () => void;
icon: React.ReactNode;
children: React.ReactNode;
}) {
return (
<button
onClick={onClick}
className={`flex items-center gap-1.5 px-3 py-1.5 rounded-md text-sm font-medium ${
active
? "bg-primary/15 text-primary"
: "text-muted-foreground hover:text-foreground hover:bg-muted/30"
}`}
>
{icon}
{children}
</button>
);
}
// ----- Queens tab ---------------------------------------------------------
function QueensTab() {
const [queens, setQueens] = useState<Array<{ id: string; name: string; title: string }> | null>(null);
const [selected, setSelected] = useState<string | null>(null);
const [error, setError] = useState<string | null>(null);
useEffect(() => {
queensApi
.list()
.then((r) => {
setQueens(r.queens);
if (r.queens.length > 0) setSelected((prev) => prev ?? r.queens[0].id);
})
.catch((e: Error) => setError(e.message || "Failed to load queens"));
}, []);
if (error) return <ErrorBlock message={error} />;
if (queens === null) return <LoadingBlock label="Loading queens…" />;
if (queens.length === 0)
return <EmptyBlock label="No queens yet. Create one to curate its tools." />;
return (
<div className="flex h-full">
<SidePicker>
{queens.map((q) => (
<PickerItem
key={q.id}
active={selected === q.id}
onClick={() => setSelected(q.id)}
primary={q.name}
secondary={q.title}
/>
))}
</SidePicker>
<div className="flex-1 overflow-y-auto px-6 py-5 min-w-0">
{selected ? (
<>
{(() => {
const queen = queens.find((q) => q.id === selected);
return queen ? (
<div className="mb-4 pb-3 border-b border-border/40">
<h3 className="text-base font-semibold text-foreground">
{queen.name}
</h3>
<p className="text-xs text-muted-foreground mt-0.5">
{queen.title}
</p>
</div>
) : null;
})()}
<QueenToolsSection queenId={selected} />
</>
) : (
<EmptyBlock label="Pick a queen to edit her tool allowlist." />
)}
</div>
</div>
);
}
// ----- Colonies tab -------------------------------------------------------
function ColoniesTab() {
const [colonies, setColonies] = useState<ColonySummary[] | null>(null);
const [selected, setSelected] = useState<string | null>(null);
const [error, setError] = useState<string | null>(null);
useEffect(() => {
coloniesApi
.list()
.then((r) => {
setColonies(r.colonies);
if (r.colonies.length > 0)
setSelected((prev) => prev ?? r.colonies[0].name);
})
.catch((e: Error) => setError(e.message || "Failed to load colonies"));
}, []);
const sorted = useMemo(() => {
if (!colonies) return null;
return [...colonies].sort((a, b) => a.name.localeCompare(b.name));
}, [colonies]);
if (error) return <ErrorBlock message={error} />;
if (sorted === null) return <LoadingBlock label="Loading colonies…" />;
if (sorted.length === 0)
return (
<EmptyBlock label="No colonies yet. Ask a queen to incubate one and its tools will show up here." />
);
return (
<div className="flex h-full">
<SidePicker>
{sorted.map((c) => (
<PickerItem
key={c.name}
active={selected === c.name}
onClick={() => setSelected(c.name)}
primary={slugToDisplayName(c.name)}
secondary={
c.has_allowlist
? `${c.enabled_count ?? 0} tools allowed · ${c.queen_name ?? ""}`
: `all tools · ${c.queen_name ?? ""}`
}
tertiary={c.name}
/>
))}
</SidePicker>
<div className="flex-1 overflow-y-auto px-6 py-5 min-w-0">
{selected ? (
<>
<div className="mb-4 pb-3 border-b border-border/40">
<h3 className="text-base font-semibold text-foreground">
{slugToDisplayName(selected)}
</h3>
<p className="text-[11px] text-muted-foreground font-mono mt-0.5">
{selected}
</p>
</div>
<ColonyToolsSection colonyName={selected} />
</>
) : (
<EmptyBlock label="Pick a colony to edit its tool allowlist." />
)}
</div>
</div>
);
}
// ----- Shared primitives --------------------------------------------------
function SidePicker({ children }: { children: React.ReactNode }) {
return (
<div className="w-[260px] flex-shrink-0 border-r border-border/60 overflow-y-auto py-3 px-2 flex flex-col gap-1">
{children}
</div>
);
}
function PickerItem({
active,
onClick,
primary,
secondary,
tertiary,
}: {
active: boolean;
onClick: () => void;
primary: string;
secondary?: string;
tertiary?: string;
}) {
return (
<button
onClick={onClick}
className={`text-left px-3 py-2 rounded-md text-sm ${
active
? "bg-primary/15 text-primary"
: "text-foreground hover:bg-muted/30"
}`}
>
<div className="font-medium truncate">{primary}</div>
{secondary && (
<div className="text-[11px] text-muted-foreground truncate">
{secondary}
</div>
)}
{tertiary && (
<div className="text-[10px] text-muted-foreground/60 font-mono truncate">
{tertiary}
</div>
)}
</button>
);
}
function LoadingBlock({ label }: { label: string }) {
return (
<div className="flex items-center gap-2 text-xs text-muted-foreground px-6 py-6">
<Loader2 className="w-3 h-3 animate-spin" />
{label}
</div>
);
}
function EmptyBlock({ label }: { label: string }) {
return (
<div className="flex items-start gap-2 text-xs text-muted-foreground px-6 py-6">
<AlertCircle className="w-3.5 h-3.5 mt-0.5" />
<span>{label}</span>
</div>
);
}
function ErrorBlock({ message }: { message: string }) {
return (
<div className="flex items-start gap-2 text-xs text-destructive px-6 py-6">
<AlertCircle className="w-3.5 h-3.5 mt-0.5 flex-shrink-0" />
<span>{message}</span>
</div>
);
}