feat: configurable default tools and skills

This commit is contained in:
Richard Tang
2026-04-21 19:15:40 -07:00
parent c658a7c50b
commit 0fd96d410e
18 changed files with 805 additions and 73 deletions
@@ -119,13 +119,48 @@ def _migrate_from_profile_if_needed(queen_id: str) -> list[str] | None:
return enabled
def load_queen_tools_config(queen_id: str) -> list[str] | None:
def tools_config_exists(queen_id: str) -> bool:
"""Return True when the queen has a persisted ``tools.json`` sidecar.
Used by callers that need to tell an explicit user save apart from a
fallthrough to the role-based default (both can return the same
value from ``load_queen_tools_config``).
"""
return tools_config_path(queen_id).exists()
def delete_queen_tools_config(queen_id: str) -> bool:
"""Delete the queen's ``tools.json`` sidecar if present.
Returns ``True`` if a file was removed, ``False`` if none existed.
The next ``load_queen_tools_config`` call falls through to the
role-based default (or allow-all for unknown queens).
"""
path = tools_config_path(queen_id)
if not path.exists():
return False
try:
path.unlink()
return True
except OSError:
logger.warning("Failed to delete %s", path, exc_info=True)
return False
def load_queen_tools_config(
queen_id: str,
mcp_catalog: dict[str, list[dict]] | None = None,
) -> list[str] | None:
"""Return the queen's MCP tool allowlist, or ``None`` for default-allow.
Order of resolution:
1. ``tools.json`` sidecar (authoritative).
1. ``tools.json`` sidecar (authoritative; user has saved).
2. Legacy ``profile.yaml`` field (migrated and deleted on first read).
3. ``None`` default "allow every MCP tool".
3. Role-based default from ``queen_tools_defaults`` when the queen
is in the known persona table. ``mcp_catalog`` lets the helper
expand ``@server:NAME`` shorthands; without it, shorthand entries
are dropped.
4. ``None`` default "allow every MCP tool".
"""
path = tools_config_path(queen_id)
if path.exists():
@@ -144,7 +179,19 @@ def load_queen_tools_config(queen_id: str) -> list[str] | None:
logger.warning("Unexpected enabled_mcp_tools shape in %s; ignoring", path)
return None
return _migrate_from_profile_if_needed(queen_id)
migrated = _migrate_from_profile_if_needed(queen_id)
if migrated is not None:
return migrated
# If migration just hoisted an explicit ``null`` out of profile.yaml,
# a sidecar with allow-all semantics now exists on disk. Honor that
# over the role default so an explicit user choice wins.
if tools_config_path(queen_id).exists():
return None
# No sidecar, nothing to migrate — fall back to role-based default.
from framework.agents.queen.queen_tools_defaults import resolve_queen_default_tools
return resolve_queen_default_tools(queen_id, mcp_catalog)
def update_queen_tools_config(
@@ -0,0 +1,272 @@
"""Role-based default tool allowlists for queens.
Every queen inherits the same MCP surface (all servers loaded for the
queen agent), but exposing 94+ tools to every persona clutters the LLM
tool catalog and wastes prompt tokens. This module defines a sensible
default allowlist per queen persona so, e.g., Head of Legal doesn't
see port scanners and Head of Finance doesn't see ``apply_patch``.
Defaults apply only when the queen has no ``tools.json`` sidecar the
moment the user saves an allowlist through the Tool Library, the
sidecar becomes authoritative. A DELETE on the tools endpoint removes
the sidecar and brings the queen back to her role default.
Category entries support a ``@server:NAME`` shorthand that expands to
every tool name registered against that MCP server in the current
catalog. This keeps the category table short and drift-free when new
tools are added (e.g. browser_* auto-joins the ``browser`` category).
"""
from __future__ import annotations
import logging
from typing import Any
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Categories — reusable bundles of MCP tool names.
# ---------------------------------------------------------------------------
#
# Each category is a flat list of either concrete tool names or the
# ``@server:NAME`` shorthand. The shorthand expands to every tool the
# given MCP server currently exposes (requires a live catalog; when one
# is not available the shorthand is silently dropped so we fall back to
# the named entries only).
_TOOL_CATEGORIES: dict[str, list[str]] = {
# Read-only file operations — safe baseline for every knowledge queen.
"file_read": [
"read_file",
"list_directory",
"list_dir",
"list_files",
"search_files",
"grep_search",
"pdf_read",
],
# File mutation — only personas that author or edit artifacts.
"file_write": [
"write_file",
"edit_file",
"apply_diff",
"apply_patch",
"replace_file_content",
"hashline_edit",
"undo_changes",
],
# Shell + process control — engineering personas only.
"shell": [
"run_command",
"execute_command_tool",
"bash_kill",
"bash_output",
],
# Tabular data. CSV/Excel read/write + DuckDB SQL.
"data": [
"csv_read",
"csv_info",
"csv_write",
"csv_append",
"csv_sql",
"excel_read",
"excel_info",
"excel_write",
"excel_append",
"excel_search",
"excel_sheet_list",
"excel_sql",
],
# Browser automation — every tool from the gcu-tools MCP server.
"browser": ["@server:gcu-tools"],
# External research / information-gathering.
"research": [
"search_papers",
"download_paper",
"search_wikipedia",
"web_scrape",
],
# Security scanners — pentest-ish, only for engineering/security roles.
"security": [
"dns_security_scan",
"http_headers_scan",
"port_scan",
"ssl_tls_scan",
"subdomain_enumerate",
"tech_stack_detect",
"risk_score",
],
# Lightweight context helpers — good default for every queen.
"time_context": [
"get_current_time",
"get_account_info",
],
# Runtime log inspection — debug/observability for builder personas.
"runtime_inspection": [
"query_runtime_logs",
"query_runtime_log_details",
"query_runtime_log_raw",
],
# Agent-management tools — building/validating/checking agents.
"agent_mgmt": [
"list_agents",
"list_agent_tools",
"list_agent_sessions",
"get_agent_checkpoint",
"list_agent_checkpoints",
"run_agent_tests",
"save_agent_draft",
"confirm_and_build",
"validate_agent_package",
"validate_agent_tools",
"enqueue_task",
],
}
# ---------------------------------------------------------------------------
# Per-queen mapping.
# ---------------------------------------------------------------------------
#
# Built from the queen personas in ``queen_profiles.DEFAULT_QUEENS``. The
# goal is "just enough" — a queen should see tools she'd plausibly call
# for her stated role, nothing more. Users curate further via the Tool
# Library if they want.
#
# A queen whose ID is NOT in this map falls through to "allow every MCP
# tool" (the original behavior), which keeps the system compatible with
# user-added custom queen IDs that we don't know about.
QUEEN_DEFAULT_CATEGORIES: dict[str, list[str]] = {
# Head of Technology — builds and operates systems; full toolkit.
"queen_technology": [
"file_read",
"file_write",
"shell",
"data",
"browser",
"research",
"security",
"time_context",
"runtime_inspection",
"agent_mgmt",
],
# Head of Growth — data, experiments, competitor research; no shell/security.
"queen_growth": [
"file_read",
"file_write",
"data",
"browser",
"research",
"time_context",
],
# Head of Product Strategy — user research + roadmaps; no shell/security.
"queen_product_strategy": [
"file_read",
"file_write",
"data",
"browser",
"research",
"time_context",
],
# Head of Finance — financial models (CSV/Excel heavy), market research.
"queen_finance_fundraising": [
"file_read",
"file_write",
"data",
"browser",
"research",
"time_context",
],
# Head of Legal — reads contracts/PDFs, researches; no shell/data/security.
"queen_legal": [
"file_read",
"file_write",
"browser",
"research",
"time_context",
],
# Head of Brand & Design — visual refs, style guides; no shell/data/security.
"queen_brand_design": [
"file_read",
"file_write",
"browser",
"research",
"time_context",
],
# Head of Talent — candidate pipelines, resumes; data + browser heavy.
"queen_talent": [
"file_read",
"file_write",
"data",
"browser",
"research",
"time_context",
],
# Head of Operations — processes, automation, observability.
"queen_operations": [
"file_read",
"file_write",
"data",
"browser",
"research",
"time_context",
"runtime_inspection",
"agent_mgmt",
],
}
def has_role_default(queen_id: str) -> bool:
"""Return True when ``queen_id`` is known to the category table."""
return queen_id in QUEEN_DEFAULT_CATEGORIES
def resolve_queen_default_tools(
queen_id: str,
mcp_catalog: dict[str, list[dict[str, Any]]] | None = None,
) -> list[str] | None:
"""Return the role-based default allowlist for ``queen_id``.
Arguments:
queen_id: Profile ID (e.g. ``"queen_technology"``).
mcp_catalog: Optional mapping of ``{server_name: [{"name": ...}, ...]}``
used to expand ``@server:NAME`` shorthands in categories.
When absent, shorthand entries are dropped and the result
contains only the explicitly-named tools.
Returns:
A deduplicated list of tool names, or ``None`` if the queen has
no role entry (caller should treat as "allow every MCP tool").
"""
categories = QUEEN_DEFAULT_CATEGORIES.get(queen_id)
if not categories:
return None
names: list[str] = []
seen: set[str] = set()
def _add(name: str) -> None:
if name and name not in seen:
seen.add(name)
names.append(name)
for cat in categories:
for entry in _TOOL_CATEGORIES.get(cat, []):
if entry.startswith("@server:"):
server_name = entry[len("@server:") :]
if mcp_catalog is None:
logger.debug(
"resolve_queen_default_tools: catalog missing; cannot expand %s",
entry,
)
continue
for tool in mcp_catalog.get(server_name, []) or []:
tname = tool.get("name") if isinstance(tool, dict) else None
if tname:
_add(tname)
else:
_add(entry)
return names
+2 -2
View File
@@ -9,8 +9,8 @@ Nodes that need browser access declare ``tools: {policy: "all"}`` in their
agent.json config.
Note: the canonical source of truth for browser automation guidance is
the ``browser-automation`` default skill at
``core/framework/skills/_default_skills/browser-automation/SKILL.md``.
the ``browser-automation`` preset skill at
``core/framework/skills/_preset_skills/browser-automation/SKILL.md``.
Activate that skill for the full decision tree. This module holds a
compact subset suitable for direct inlining into a node's system prompt
when a skill activation is not desired.
+10 -1
View File
@@ -558,7 +558,16 @@ async def create_queen(
# on first read, so existing installs upgrade silently.
from framework.agents.queen.queen_tools_config import load_queen_tools_config
phase_state.enabled_mcp_tools = load_queen_tools_config(queen_dir.name)
# Build a minimal catalog for default-tool resolution. The full
# ``session_manager._mcp_tool_catalog`` snapshot is written further
# down the flow; a queen booted for the first time needs the catalog
# now so ``@server:NAME`` shorthands in the role-default table can
# expand against the just-loaded MCP servers.
_boot_catalog: dict[str, list[dict]] = {
srv: [{"name": name} for name in sorted(names)]
for srv, names in mcp_server_tools_map.items()
}
phase_state.enabled_mcp_tools = load_queen_tools_config(queen_dir.name, _boot_catalog)
phase_state.rebuild_independent_filter()
if phase_state.enabled_mcp_tools is not None:
total_mcp = len(phase_state.mcp_tool_names_all)
+75 -2
View File
@@ -30,7 +30,9 @@ from framework.agents.queen.queen_profiles import (
load_queen_profile,
)
from framework.agents.queen.queen_tools_config import (
delete_queen_tools_config,
load_queen_tools_config,
tools_config_exists,
update_queen_tools_config,
)
@@ -308,12 +310,15 @@ async def handle_get_tools(request: web.Request) -> web.Response:
lifecycle = _lifecycle_entries_without_session(manager, mcp_tool_names_all)
# Allowlist lives in the dedicated tools.json sidecar; helper
# migrates legacy profile.yaml field on first read.
enabled_mcp_tools = load_queen_tools_config(queen_id)
# migrates legacy profile.yaml field on first read, and falls back
# to the role-based default when no sidecar exists.
enabled_mcp_tools = load_queen_tools_config(queen_id, mcp_catalog=catalog)
is_role_default = not tools_config_exists(queen_id)
response = {
"queen_id": queen_id,
"enabled_mcp_tools": enabled_mcp_tools,
"is_role_default": is_role_default,
"stale": stale,
"lifecycle": lifecycle,
"synthetic": _synthetic_entries(),
@@ -427,7 +432,75 @@ async def handle_patch_tools(request: web.Request) -> web.Response:
)
async def handle_delete_tools(request: web.Request) -> web.Response:
"""DELETE /api/queen/{queen_id}/tools — drop the sidecar, fall back to role defaults.
Users click "Reset to role default" in the Tool Library. That
removes ``tools.json`` so the queen's effective allowlist becomes
the role-based default (or allow-all if the queen has no role
entry). Live sessions are refreshed so the next turn reflects the
change without a restart.
"""
queen_id = request.match_info["queen_id"]
ensure_default_queens()
try:
load_queen_profile(queen_id)
except FileNotFoundError:
return web.json_response({"error": f"Queen '{queen_id}' not found"}, status=404)
removed = delete_queen_tools_config(queen_id)
# Recompute the queen's effective allowlist from the role defaults
# so we can hot-reload live sessions in one pass (same shape as
# PATCH).
manager = request.app.get("manager")
session = _live_queen_session(manager, queen_id) if manager is not None else None
if session is not None:
catalog = _catalog_from_live_session(session)
else:
catalog = await _ensure_manager_catalog(manager)
new_enabled = load_queen_tools_config(queen_id, mcp_catalog=catalog)
refreshed = 0
sessions = getattr(manager, "_sessions", None) or {}
for sess in sessions.values():
if getattr(sess, "queen_name", None) != queen_id:
continue
phase_state = getattr(sess, "phase_state", None)
if phase_state is None:
continue
phase_state.enabled_mcp_tools = new_enabled
rebuild = getattr(phase_state, "rebuild_independent_filter", None)
if callable(rebuild):
try:
rebuild()
refreshed += 1
except Exception:
logger.debug(
"Queen tools: rebuild_independent_filter failed for session %s",
getattr(sess, "id", "?"),
exc_info=True,
)
logger.info(
"Queen tools: queen_id=%s reset-to-default removed=%s refreshed_sessions=%d",
queen_id,
removed,
refreshed,
)
return web.json_response(
{
"queen_id": queen_id,
"removed": removed,
"enabled_mcp_tools": new_enabled,
"is_role_default": True,
"refreshed_sessions": refreshed,
}
)
def register_routes(app: web.Application) -> None:
"""Register queen-tools routes."""
app.router.add_get("/api/queen/{queen_id}/tools", handle_get_tools)
app.router.add_patch("/api/queen/{queen_id}/tools", handle_patch_tools)
app.router.add_delete("/api/queen/{queen_id}/tools", handle_delete_tools)
+49 -24
View File
@@ -95,6 +95,28 @@ def _ensure_queens_known() -> None:
logger.debug("ensure_default_queens failed (non-fatal)", exc_info=True)
class _ManagerReloadAdapter:
"""Makes a bare ``SkillsManager`` look like a runtime to ``_reload_scope``.
``_reload_scope`` calls ``await rt.reload_skills()`` on every entry in
``affected_runtimes``. Live queen DM sessions expose their manager on
``phase_state.skills_manager`` but don't have a runtime wrapper, so
we provide this thin shim so PATCHes reach them with the same call.
"""
def __init__(self, skills_manager: SkillsManager) -> None:
self._mgr = skills_manager
@property
def skills_manager(self) -> SkillsManager:
return self._mgr
async def reload_skills(self) -> dict[str, Any]:
async with self._mgr.mutation_lock:
self._mgr.reload()
return {"catalog_chars": len(self._mgr.skills_catalog_prompt)}
def _queen_scope(manager: Any, queen_id: str) -> SkillScope | None:
_ensure_queens_known()
queen_home = QUEENS_DIR / queen_id
@@ -106,27 +128,30 @@ def _queen_scope(manager: Any, queen_id: str) -> SkillScope | None:
store = SkillOverrideStore.load(overrides_path, scope_label=f"queen:{queen_id}")
write_dir = queen_home / "skills"
# Prefer a live manager so reload after mutation reaches running
# sessions. Any queen-session manager is equivalent since queen-scope
# skills cascade identically to every session.
# Always build a fresh admin manager for GET so enumeration reflects
# the current disk state (including newly-installed preset skills).
# The live queen-session manager caches ``_all_skills`` at load time
# and only refreshes on explicit reload or file-watch event — reusing
# it here means newly-bundled skills stay invisible until a restart.
admin_manager = _build_admin_manager(queen_id=queen_id)
runtimes: list = []
live_manager: SkillsManager | None = None
try:
for session in manager.iter_queen_sessions(queen_id): # type: ignore[union-attr]
phase_state = getattr(session, "phase_state", None)
if phase_state is not None:
skills_mgr = getattr(phase_state, "skills_manager", None)
if isinstance(skills_mgr, SkillsManager) and live_manager is None:
live_manager = skills_mgr
# Colonies owned by this queen also need reload when queen-scope toggles.
for colony in manager.iter_colony_runtimes(queen_id=queen_id): # type: ignore[union-attr]
runtimes.append(colony)
# Also collect live DM-session managers as reload targets so a
# PATCH reaches running queens, even though we enumerate from
# the admin manager.
for session in manager.iter_queen_sessions(queen_id): # type: ignore[union-attr]
phase_state = getattr(session, "phase_state", None)
if phase_state is None:
continue
skills_mgr = getattr(phase_state, "skills_manager", None)
if isinstance(skills_mgr, SkillsManager):
runtimes.append(_ManagerReloadAdapter(skills_mgr))
except Exception:
logger.debug("queen scope: live manager lookup failed", exc_info=True)
if live_manager is None:
live_manager = _build_admin_manager(queen_id=queen_id)
return SkillScope(
kind="queen",
target_id=queen_id,
@@ -134,7 +159,7 @@ def _queen_scope(manager: Any, queen_id: str) -> SkillScope | None:
overrides_path=overrides_path,
store=store,
affected_runtimes=runtimes,
manager=live_manager,
manager=admin_manager,
)
@@ -157,20 +182,15 @@ def _colony_scope(manager: Any, colony_name: str) -> SkillScope | None:
store = SkillOverrideStore.load(overrides_path, scope_label=f"colony:{colony_name}")
write_dir = colony_home / ".hive" / "skills"
admin_manager = _build_admin_manager(queen_id=queen_id, colony_name=colony_name)
runtimes: list = []
live_manager: SkillsManager | None = None
try:
for colony in manager.iter_colony_runtimes(colony_name=colony_name): # type: ignore[union-attr]
runtimes.append(colony)
skills_mgr = getattr(colony, "skills_manager", None)
if isinstance(skills_mgr, SkillsManager) and live_manager is None:
live_manager = skills_mgr
except Exception:
logger.debug("colony scope: live manager lookup failed", exc_info=True)
if live_manager is None:
live_manager = _build_admin_manager(queen_id=queen_id, colony_name=colony_name)
return SkillScope(
kind="colony",
target_id=colony_name,
@@ -178,7 +198,7 @@ def _colony_scope(manager: Any, colony_name: str) -> SkillScope | None:
overrides_path=overrides_path,
store=store,
affected_runtimes=runtimes,
manager=live_manager,
manager=admin_manager,
)
@@ -260,7 +280,10 @@ def _resolve_provenance(
if entry is not None:
store_entry = entry
stamped = entry.provenance
if stamped == Provenance.FRAMEWORK and skill.source_scope != "framework":
# Heal a FRAMEWORK stamp that doesn't match the actual scope —
# preset/user/colony skills got stamped FRAMEWORK by the old
# PATCH default. Leave a legit framework-scoped skill alone.
if stamped == Provenance.FRAMEWORK and skill.source_scope not in {"framework"}:
stamped = Provenance.OTHER
if stamped != Provenance.OTHER:
return stamped, entry
@@ -270,6 +293,8 @@ def _resolve_provenance(
# always stamps USER_UI_CREATED.
if skill.source_scope == "framework":
return Provenance.FRAMEWORK, store_entry
if skill.source_scope == "preset":
return Provenance.PRESET, store_entry
if skill.source_scope == "user":
return Provenance.USER_DROPPED, store_entry
if skill.source_scope == "queen_ui":
+120 -5
View File
@@ -148,11 +148,18 @@ async def _make_app(*, manager: _FakeManager) -> web.Application:
@pytest.mark.asyncio
async def test_get_tools_default_allows_everything(queen_dir, monkeypatch):
# Skip ensure_default_queens; our tmp profile is enough.
async def test_get_tools_default_allows_everything_for_unknown_queen(queen_dir, monkeypatch):
"""Queens NOT in the role-default table fall back to allow-all."""
monkeypatch.setattr(routes_queen_tools, "ensure_default_queens", lambda: None)
_, queen_id = queen_dir
queens_dir, _ = queen_dir
# Use a queen id that isn't in QUEEN_DEFAULT_CATEGORIES so we exercise
# the fallback-to-allow-all path.
custom_id = "queen_custom_unknown"
(queens_dir / custom_id).mkdir()
(queens_dir / custom_id / "profile.yaml").write_text(
yaml.safe_dump({"name": "Custom", "title": "Custom Role"})
)
manager = _FakeManager()
manager._mcp_tool_catalog = {
@@ -164,19 +171,77 @@ async def test_get_tools_default_allows_everything(queen_dir, monkeypatch):
app = await _make_app(manager=manager)
async with TestClient(TestServer(app)) as client:
resp = await client.get(f"/api/queen/{queen_id}/tools")
resp = await client.get(f"/api/queen/{custom_id}/tools")
assert resp.status == 200
body = await resp.json()
assert body["enabled_mcp_tools"] is None
assert body["is_role_default"] is True # no sidecar → default-allow
assert body["stale"] is False
servers = {s["name"]: s for s in body["mcp_servers"]}
assert set(servers) == {"coder-tools"}
# Default-allow → every tool reports enabled=True
for tool in servers["coder-tools"]["tools"]:
assert tool["enabled"] is True
@pytest.mark.asyncio
async def test_get_tools_applies_role_default(queen_dir, monkeypatch):
"""Known persona queens get their role-based default allowlist."""
monkeypatch.setattr(routes_queen_tools, "ensure_default_queens", lambda: None)
_, queen_id = queen_dir # queen_technology — has a role default
manager = _FakeManager()
# Seed a catalog covering tools the role default references so the
# response reflects what the queen would actually see on boot.
manager._mcp_tool_catalog = {
"coder-tools": [
{"name": "read_file", "description": "", "input_schema": {}},
{"name": "port_scan", "description": "", "input_schema": {}}, # security
{"name": "excel_read", "description": "", "input_schema": {}}, # data
{"name": "fluffy_unknown_tool", "description": "", "input_schema": {}},
],
}
app = await _make_app(manager=manager)
async with TestClient(TestServer(app)) as client:
resp = await client.get(f"/api/queen/{queen_id}/tools")
assert resp.status == 200
body = await resp.json()
# queen_technology's role default includes file_read, data, security, etc.
assert body["is_role_default"] is True
enabled = set(body["enabled_mcp_tools"] or [])
assert "read_file" in enabled
assert "port_scan" in enabled # technology role includes security
assert "excel_read" in enabled
# Tools not in any category (and not in a @server: expansion target
# the role references) are NOT part of the default.
assert "fluffy_unknown_tool" not in enabled
def test_resolve_queen_default_tools_expands_server_shorthand():
"""@server:NAME shorthand expands against the provided catalog."""
from framework.agents.queen.queen_tools_defaults import resolve_queen_default_tools
catalog = {
"gcu-tools": [
{"name": "browser_navigate"},
{"name": "browser_click"},
],
}
# queen_brand_design uses "browser" category → expands via @server:gcu-tools.
result = resolve_queen_default_tools("queen_brand_design", catalog)
assert result is not None
assert "browser_navigate" in result
assert "browser_click" in result
def test_resolve_queen_default_tools_unknown_queen_returns_none():
from framework.agents.queen.queen_tools_defaults import resolve_queen_default_tools
assert resolve_queen_default_tools("queen_made_up", {}) is None
@pytest.mark.asyncio
async def test_patch_persists_and_validates(queen_dir, monkeypatch):
monkeypatch.setattr(routes_queen_tools, "ensure_default_queens", lambda: None)
@@ -214,6 +279,7 @@ async def test_patch_persists_and_validates(queen_dir, monkeypatch):
# GET reflects the new state
resp = await client.get(f"/api/queen/{queen_id}/tools")
body = await resp.json()
assert body["is_role_default"] is False # user has explicitly saved
servers = {t["name"]: t for t in body["mcp_servers"][0]["tools"]}
assert servers["read_file"]["enabled"] is True
assert servers["write_file"]["enabled"] is False
@@ -306,6 +372,55 @@ async def test_missing_queen_returns_404(queen_dir, monkeypatch):
assert resp.status == 404
@pytest.mark.asyncio
async def test_delete_restores_role_default(queen_dir, monkeypatch):
"""DELETE removes tools.json so the queen falls back to the role default."""
monkeypatch.setattr(routes_queen_tools, "ensure_default_queens", lambda: None)
queens_dir, queen_id = queen_dir
tools_path = queens_dir / queen_id / "tools.json"
manager = _FakeManager()
manager._mcp_tool_catalog = {
"coder-tools": [
{"name": "read_file", "description": "", "input_schema": {}},
{"name": "port_scan", "description": "", "input_schema": {}},
],
}
app = await _make_app(manager=manager)
async with TestClient(TestServer(app)) as client:
# Seed a custom allowlist first so we have a sidecar to delete.
resp = await client.patch(
f"/api/queen/{queen_id}/tools",
json={"enabled_mcp_tools": ["read_file"]},
)
assert resp.status == 200
assert tools_path.exists()
resp = await client.delete(f"/api/queen/{queen_id}/tools")
assert resp.status == 200
body = await resp.json()
assert body["removed"] is True
assert body["is_role_default"] is True
assert not tools_path.exists()
# The new effective list is the role default for queen_technology,
# which includes both read_file (file_read) and port_scan (security).
enabled = set(body["enabled_mcp_tools"] or [])
assert "read_file" in enabled
assert "port_scan" in enabled
# GET confirms.
resp = await client.get(f"/api/queen/{queen_id}/tools")
body = await resp.json()
assert body["is_role_default"] is True
# Deleting again is a no-op.
resp = await client.delete(f"/api/queen/{queen_id}/tools")
assert resp.status == 200
assert (await resp.json())["removed"] is False
def test_legacy_profile_field_migrates_to_sidecar(queen_dir):
"""A legacy enabled_mcp_tools field in profile.yaml is hoisted to tools.json."""
queens_dir, queen_id = queen_dir
+18 -5
View File
@@ -30,12 +30,16 @@ _SKIP_DIRS = frozenset(
)
# Scope priority (higher = takes precedence)
# ``preset`` sits between framework and user: bundled alongside the
# framework distribution, but off by default — capability packs the user
# opts into per queen/colony rather than globally-enabled infra.
_SCOPE_PRIORITY = {
"framework": 0,
"user": 1,
"queen_ui": 2,
"colony_ui": 3,
"project": 4,
"preset": 1,
"user": 2,
"queen_ui": 3,
"colony_ui": 4,
"project": 5,
}
# Within the same scope, Hive-specific paths override cross-client paths.
@@ -106,13 +110,22 @@ class SkillDiscovery:
all_skills: list[ParsedSkill] = []
self._scanned_dirs = []
# Framework scope (lowest precedence)
# Framework scope (lowest precedence) — always-on infra skills.
if not self._config.skip_framework_scope:
framework_dir = Path(__file__).parent / "_default_skills"
if framework_dir.is_dir():
self._scanned_dirs.append(framework_dir)
all_skills.extend(self._scan_scope(framework_dir, "framework"))
# Preset scope — bundled capability packs that ship with the
# framework but default to OFF. User opts in per queen/colony
# via the Skills Library. ``skip_framework_scope`` covers both
# bundled directories since they live side-by-side on disk.
preset_dir = Path(__file__).parent / "_preset_skills"
if preset_dir.is_dir():
self._scanned_dirs.append(preset_dir)
all_skills.extend(self._scan_scope(preset_dir, "preset"))
# User scope
if not self._config.skip_user_scope:
home = Path.home()
+11 -5
View File
@@ -246,18 +246,18 @@ class SkillsManager:
) -> list:
"""Filter ``discovered`` per the queen + colony override stores.
Resolution rule (mirrors the plan's schema):
Resolution rule:
1. Tombstoned names (``deleted_ui_skills``) drop out.
2. An explicit ``enabled=False`` override drops the skill.
3. An explicit ``enabled=True`` override keeps it (wins over
``all_defaults_disabled`` for framework defaults).
4. Otherwise the skill inherits :meth:`SkillsConfig.is_default_enabled`.
``all_defaults_disabled`` for framework defaults AND over the
preset-scope default-off rule).
4. Otherwise: preset-scope skills are off by default; everything
else inherits :meth:`SkillsConfig.is_default_enabled`.
"""
from framework.skills.overrides import SkillOverrideStore
stores: list[SkillOverrideStore] = [s for s in (queen_store, colony_store) if s is not None]
if not stores:
return discovered
tombstones: set[str] = set()
for store in stores:
@@ -282,6 +282,12 @@ class SkillsManager:
if explicit is True:
out.append(skill)
continue
# Preset-scope capability packs are bundled but ship OFF; the
# user must explicitly enable them per queen or colony. This
# runs even when no store is present so bare agents don't
# silently load x-automation etc.
if skill.source_scope == "preset":
continue
# No explicit entry — master switch takes effect against framework defaults.
default_enabled = skills_config.is_default_enabled(skill.name)
if master_disabled and default_enabled and skill.source_scope == "framework":
+1
View File
@@ -40,6 +40,7 @@ class Provenance(StrEnum):
"""
FRAMEWORK = "framework"
PRESET = "preset"
USER_DROPPED = "user_dropped"
USER_UI_CREATED = "user_ui_created"
QUEEN_CREATED = "queen_created"
+23 -4
View File
@@ -20,9 +20,17 @@ from pathlib import Path
logger = logging.getLogger(__name__)
_DEFAULT_SKILLS_DIR = Path(__file__).parent / "_default_skills"
# Bundled skills live in two sibling dirs: ``_default_skills`` (always-on
# infra) and ``_preset_skills`` (capability packs, off by default but
# still bundled). Tool-gated pre-activation walks both so ``browser_*``
# tools still pull in the browser-automation preset even though it isn't
# default-enabled in the catalog.
_BUNDLED_DIRS: tuple[Path, ...] = (
Path(__file__).parent / "_default_skills",
Path(__file__).parent / "_preset_skills",
)
# (tool-name prefix, default skill directory name, display name)
# (tool-name prefix, skill directory name, display name)
_TOOL_GATED_SKILLS: list[tuple[str, str, str]] = [
("browser_", "browser-automation", "hive.browser-automation"),
]
@@ -31,12 +39,23 @@ _BODY_CACHE: dict[str, str] = {}
def _load_body(dir_name: str) -> str:
"""Load the markdown body of a framework default skill, cached."""
"""Load the markdown body of a bundled skill, cached. Searches every
bundled directory (default + preset) so the mapping table doesn't
need to know which dir a skill lives in.
"""
if dir_name in _BODY_CACHE:
return _BODY_CACHE[dir_name]
path = _DEFAULT_SKILLS_DIR / dir_name / "SKILL.md"
path: Path | None = None
for parent in _BUNDLED_DIRS:
candidate = parent / dir_name / "SKILL.md"
if candidate.exists():
path = candidate
break
body = ""
if path is None:
_BODY_CACHE[dir_name] = body
return body
try:
raw = path.read_text(encoding="utf-8")
# Strip YAML frontmatter (between the first two '---' fences)
+3 -2
View File
@@ -327,8 +327,9 @@ class TrustGate:
import os
# UI-authored scopes bypass the trust gate — they're implicitly
# trusted because the user authored them through the UI.
_bypass_scopes = {"framework", "user", "queen_ui", "colony_ui"}
# trusted because the user authored them through the UI. ``preset``
# ships with the framework distribution, so it's trusted too.
_bypass_scopes = {"framework", "preset", "user", "queen_ui", "colony_ui"}
always_trusted = [s for s in skills if s.source_scope in _bypass_scopes]
project_skills = [s for s in skills if s.source_scope == "project"]
+20 -3
View File
@@ -31,6 +31,10 @@ export interface McpServerTools {
export interface QueenToolsResponse {
queen_id: string;
enabled_mcp_tools: string[] | null;
/** True when the effective allowlist comes from the role-based default
* (no tools.json sidecar saved for this queen). False means the user
* has explicitly saved an allowlist. */
is_role_default: boolean;
stale: boolean;
lifecycle: ToolMeta[];
synthetic: ToolMeta[];
@@ -43,6 +47,14 @@ export interface QueenToolsUpdateResult {
refreshed_sessions: number;
}
export interface QueenToolsResetResult {
queen_id: string;
removed: boolean;
enabled_mcp_tools: string[] | null;
is_role_default: true;
refreshed_sessions: number;
}
export const queensApi = {
/** List all queen profiles (id, name, title). */
list: () =>
@@ -91,12 +103,17 @@ export const queensApi = {
/** Persist the MCP tool allowlist for a queen.
*
* Pass ``null`` to reset to the default ("allow every MCP tool") or an
* explicit list to restrict the queen's tool surface. Lifecycle and
* synthetic tools are always enabled and cannot be listed here.
* Pass ``null`` to explicitly allow every MCP tool, or a list to
* restrict the queen's tool surface. Lifecycle and synthetic tools
* are always enabled and cannot be listed here.
*/
updateTools: (queenId: string, enabled: string[] | null) =>
api.patch<QueenToolsUpdateResult>(`/queen/${queenId}/tools`, {
enabled_mcp_tools: enabled,
}),
/** Drop the queen's tools.json sidecar so she falls back to the
* role-based default (or allow-all for queens without a role entry). */
resetTools: (queenId: string) =>
api.delete<QueenToolsResetResult>(`/queen/${queenId}/tools`),
};
+1
View File
@@ -4,6 +4,7 @@ export type SkillScopeKind = "queen" | "colony" | "user";
export type SkillProvenance =
| "framework"
| "preset"
| "user_dropped"
| "user_ui_created"
| "queen_created"
@@ -11,12 +11,17 @@ export default function QueenToolsSection({ queenId }: { queenId: string }) {
(enabled: string[] | null) => queensApi.updateTools(queenId, enabled),
[queenId],
);
const resetToRoleDefault = useCallback(
() => queensApi.resetTools(queenId),
[queenId],
);
return (
<ToolsEditor
subjectKey={`queen:${queenId}`}
title="Tools"
fetchSnapshot={fetchSnapshot}
saveAllowlist={saveAllowlist}
resetToRoleDefault={resetToRoleDefault}
/>
);
}
+82 -11
View File
@@ -17,6 +17,9 @@ export interface ToolsSnapshot {
lifecycle: ToolMeta[];
synthetic: ToolMeta[];
mcp_servers: McpServerTools[];
/** Optional: when true, the allowlist came from the role-based
* default (no explicit save). Only queens surface this today. */
is_role_default?: boolean;
}
export interface ToolsEditorProps {
@@ -28,10 +31,14 @@ export interface ToolsEditorProps {
caveat?: string;
/** Load the current snapshot. */
fetchSnapshot: () => Promise<ToolsSnapshot>;
/** Persist an allowlist. ``null`` resets to "allow all". */
/** Persist an allowlist. ``null`` is an explicit "allow all" save. */
saveAllowlist: (
enabled: string[] | null,
) => Promise<{ enabled_mcp_tools: string[] | null }>;
/** Optional: drop any saved allowlist so the subject falls back to
* its role-based default. Shows a "Reset to role default" button
* when provided. */
resetToRoleDefault?: () => Promise<{ enabled_mcp_tools: string[] | null }>;
}
type TriState = "checked" | "unchecked" | "indeterminate";
@@ -165,6 +172,7 @@ export default function ToolsEditor({
caveat,
fetchSnapshot,
saveAllowlist,
resetToRoleDefault,
}: ToolsEditorProps) {
const [data, setData] = useState<ToolsSnapshot | null>(null);
const [loading, setLoading] = useState(true);
@@ -241,7 +249,41 @@ export default function ToolsEditor({
});
};
const handleResetToDefault = () => setDraftAllowed(null);
const handleDraftAllowAll = () => setDraftAllowed(null);
const handleResetToRoleDefault = async () => {
if (!resetToRoleDefault) return;
setSaving(true);
setSaveError(null);
try {
const result = await resetToRoleDefault();
const updated = result.enabled_mcp_tools;
baselineRef.current = updated === null ? null : new Set(updated);
setDraftAllowed(updated === null ? null : new Set(updated));
if (data) {
const u = updated === null ? null : new Set(updated);
setData({
...data,
enabled_mcp_tools: updated,
is_role_default: true,
mcp_servers: data.mcp_servers.map((srv) => ({
...srv,
tools: srv.tools.map((t) => ({
...t,
enabled: u === null ? true : u.has(t.name),
})),
})),
});
}
setSavedRecently(true);
setTimeout(() => setSavedRecently(false), 2500);
} catch (e: unknown) {
const err = e as { body?: { error?: string } };
setSaveError(err.body?.error || "Reset failed");
} finally {
setSaving(false);
}
};
const handleCancel = () => {
const baseline = baselineRef.current;
@@ -264,6 +306,7 @@ export default function ToolsEditor({
setData({
...data,
enabled_mcp_tools: updated,
is_role_default: false,
mcp_servers: data.mcp_servers.map((srv) => ({
...srv,
tools: srv.tools.map((t) => ({
@@ -439,15 +482,43 @@ export default function ToolsEditor({
<Check className="w-3 h-3" /> Saved
</span>
)}
{draftAllowed !== null && (
<button
onClick={handleResetToDefault}
disabled={saving}
className="ml-auto text-[11px] text-muted-foreground hover:text-foreground underline underline-offset-2 disabled:opacity-50"
>
Reset to default (allow all)
</button>
)}
<div className="ml-auto flex items-center gap-3">
{data.is_role_default !== undefined && (
<span
className={`text-[10px] uppercase tracking-wider px-1.5 py-0.5 rounded ${
data.is_role_default
? "bg-muted/40 text-muted-foreground"
: "bg-primary/15 text-primary"
}`}
title={
data.is_role_default
? "Using the default allowlist for this role."
: "Custom allowlist saved by you."
}
>
{data.is_role_default ? "Role default" : "Custom"}
</span>
)}
{resetToRoleDefault && !data.is_role_default && (
<button
onClick={handleResetToRoleDefault}
disabled={saving}
className="text-[11px] text-muted-foreground hover:text-foreground underline underline-offset-2 disabled:opacity-50"
>
Reset to role default
</button>
)}
{draftAllowed !== null && (
<button
onClick={handleDraftAllowAll}
disabled={saving}
className="text-[11px] text-muted-foreground hover:text-foreground underline underline-offset-2 disabled:opacity-50"
title="Draft 'allow all' — click Save to persist."
>
Allow all
</button>
)}
</div>
</div>
{saveError && (
+8 -5
View File
@@ -32,6 +32,7 @@ type Tab = "queens" | "colonies" | "catalog";
const PROVENANCE_LABEL: Record<SkillProvenance, string> = {
framework: "Framework",
preset: "Preset",
user_dropped: "User",
user_ui_created: "User (UI)",
queen_created: "Queen",
@@ -44,11 +45,13 @@ function ProvenanceBadge({ provenance }: { provenance: SkillProvenance }) {
const tone =
provenance === "framework"
? "bg-slate-400/10 text-slate-400"
: provenance === "queen_created"
? "bg-amber-500/10 text-amber-500"
: provenance === "learned_runtime"
? "bg-purple-500/10 text-purple-500"
: "bg-primary/10 text-primary";
: provenance === "preset"
? "bg-teal-500/10 text-teal-500"
: provenance === "queen_created"
? "bg-amber-500/10 text-amber-500"
: provenance === "learned_runtime"
? "bg-purple-500/10 text-purple-500"
: "bg-primary/10 text-primary";
return (
<span className={`px-1.5 py-0.5 rounded text-[10px] font-medium ${tone}`}>
{PROVENANCE_LABEL[provenance]}
+54
View File
@@ -199,6 +199,60 @@ class TestSkillsManagerOverrides:
enabled = {s.name for s in mgr._catalog._skills.values()} # type: ignore[attr-defined]
assert "shared-skill" not in enabled
def test_preset_scope_is_off_by_default(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
"""Preset-scope skills (bundled capability packs) must stay out
of the catalog until the user explicitly opts in."""
monkeypatch.setattr(Path, "home", lambda: tmp_path / "home")
fake_presets = tmp_path / "fake_presets"
_write_skill_file(fake_presets, "hive.x-automation", "X capability pack")
_write_skill_file(fake_presets, "hive.browser-automation", "Browser pack")
mgr = SkillsManager(
SkillsManagerConfig(
extra_scope_dirs=[ExtraScope(directory=fake_presets, label="preset", priority=1)],
project_root=None,
skip_community_discovery=True,
interactive=False,
)
)
mgr.load()
enabled = {s.name for s in mgr._catalog._skills.values()} # type: ignore[attr-defined]
assert "hive.x-automation" not in enabled
assert "hive.browser-automation" not in enabled
# Enumeration still surfaces them so the UI can offer a toggle.
enumerated = {s.name for s in mgr.enumerate_skills_with_source()}
assert "hive.x-automation" in enumerated
assert "hive.browser-automation" in enumerated
def test_preset_skill_enabled_via_explicit_override(
self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
monkeypatch.setattr(Path, "home", lambda: tmp_path / "home")
fake_presets = tmp_path / "fake_presets"
_write_skill_file(fake_presets, "hive.x-automation")
overrides_path = tmp_path / "queen_overrides.json"
store = SkillOverrideStore.load(overrides_path, scope_label="queen:q")
store.upsert(
"hive.x-automation",
OverrideEntry(enabled=True, provenance=Provenance.PRESET),
)
store.save()
mgr = SkillsManager(
SkillsManagerConfig(
queen_id="q",
queen_overrides_path=overrides_path,
extra_scope_dirs=[ExtraScope(directory=fake_presets, label="preset", priority=1)],
project_root=None,
skip_community_discovery=True,
interactive=False,
)
)
mgr.load()
enabled = {s.name for s in mgr._catalog._skills.values()} # type: ignore[attr-defined]
assert "hive.x-automation" in enabled
def test_reload_picks_up_store_change(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setattr(Path, "home", lambda: tmp_path / "home")
fw = tmp_path / "fw"