Merge pull request #6624 from aden-hive/feature/agent-skills
Release / Create Release (push) Waiting to run
Release / Create Release (push) Waiting to run
feat: agent skills system and observability improvements
This commit is contained in:
@@ -89,6 +89,16 @@ def main():
|
||||
|
||||
register_testing_commands(subparsers)
|
||||
|
||||
# Register skill commands (skill list, skill trust, ...)
|
||||
from framework.skills.cli import register_skill_commands
|
||||
|
||||
register_skill_commands(subparsers)
|
||||
|
||||
# Register debugger commands (debugger)
|
||||
from framework.debugger.cli import register_debugger_commands
|
||||
|
||||
register_debugger_commands(subparsers)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if hasattr(args, "func"):
|
||||
|
||||
@@ -0,0 +1,76 @@
|
||||
"""CLI command for the LLM debug log viewer."""
|
||||
|
||||
import argparse
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
_SCRIPT = Path(__file__).resolve().parents[3] / "scripts" / "llm_debug_log_visualizer.py"
|
||||
|
||||
|
||||
def register_debugger_commands(subparsers: argparse._SubParsersAction) -> None:
|
||||
"""Register the ``hive debugger`` command."""
|
||||
parser = subparsers.add_parser(
|
||||
"debugger",
|
||||
help="Open the LLM debug log viewer",
|
||||
description=(
|
||||
"Start a local server that lets you browse LLM debug sessions "
|
||||
"recorded in ~/.hive/llm_logs. Sessions are loaded on demand so "
|
||||
"the browser stays responsive."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--session",
|
||||
help="Execution ID to select initially.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--port",
|
||||
type=int,
|
||||
default=0,
|
||||
help="Port for the local server (0 = auto-pick a free port).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--logs-dir",
|
||||
help="Directory containing JSONL log files (default: ~/.hive/llm_logs).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--limit-files",
|
||||
type=int,
|
||||
default=None,
|
||||
help="Maximum number of newest log files to scan (default: 200).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output",
|
||||
help="Write a static HTML file instead of starting a server.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-open",
|
||||
action="store_true",
|
||||
help="Start the server but do not open a browser.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--include-tests",
|
||||
action="store_true",
|
||||
help="Show test/mock sessions (hidden by default).",
|
||||
)
|
||||
parser.set_defaults(func=cmd_debugger)
|
||||
|
||||
|
||||
def cmd_debugger(args: argparse.Namespace) -> int:
|
||||
"""Launch the LLM debug log visualizer."""
|
||||
cmd: list[str] = [sys.executable, str(_SCRIPT)]
|
||||
if args.session:
|
||||
cmd += ["--session", args.session]
|
||||
if args.port:
|
||||
cmd += ["--port", str(args.port)]
|
||||
if args.logs_dir:
|
||||
cmd += ["--logs-dir", args.logs_dir]
|
||||
if args.limit_files is not None:
|
||||
cmd += ["--limit-files", str(args.limit_files)]
|
||||
if args.output:
|
||||
cmd += ["--output", args.output]
|
||||
if args.no_open:
|
||||
cmd.append("--no-open")
|
||||
if args.include_tests:
|
||||
cmd.append("--include-tests")
|
||||
return subprocess.call(cmd)
|
||||
@@ -33,6 +33,8 @@ class Message:
|
||||
is_transition_marker: bool = False
|
||||
# True when this message is real human input (from /chat), not a system prompt
|
||||
is_client_input: bool = False
|
||||
# True when message contains an activated skill body (AS-10: never prune)
|
||||
is_skill_content: bool = False
|
||||
|
||||
def to_llm_dict(self) -> dict[str, Any]:
|
||||
"""Convert to OpenAI-format message dict."""
|
||||
@@ -409,6 +411,7 @@ class NodeConversation:
|
||||
tool_use_id: str,
|
||||
content: str,
|
||||
is_error: bool = False,
|
||||
is_skill_content: bool = False,
|
||||
) -> Message:
|
||||
msg = Message(
|
||||
seq=self._next_seq,
|
||||
@@ -417,6 +420,7 @@ class NodeConversation:
|
||||
tool_use_id=tool_use_id,
|
||||
is_error=is_error,
|
||||
phase_id=self._current_phase,
|
||||
is_skill_content=is_skill_content,
|
||||
)
|
||||
self._messages.append(msg)
|
||||
self._next_seq += 1
|
||||
@@ -610,6 +614,8 @@ class NodeConversation:
|
||||
continue
|
||||
if msg.is_error:
|
||||
continue # never prune errors
|
||||
if msg.is_skill_content:
|
||||
continue # never prune activated skill instructions (AS-10)
|
||||
if msg.content.startswith("[Pruned tool result"):
|
||||
continue # already pruned
|
||||
# Tiny results (set_output acks, confirmations) — pruning
|
||||
|
||||
@@ -467,6 +467,8 @@ class EventLoopNode(NodeProtocol):
|
||||
stream_id = ctx.stream_id or ctx.node_id
|
||||
node_id = ctx.node_id
|
||||
execution_id = ctx.execution_id or ""
|
||||
# Store skill dirs for AS-9 file-read interception in _execute_tool
|
||||
self._skill_dirs: list[str] = ctx.skill_dirs
|
||||
|
||||
# Verdict counters for runtime logging
|
||||
_accept_count = _retry_count = _escalate_count = _continue_count = 0
|
||||
@@ -806,6 +808,13 @@ class EventLoopNode(NodeProtocol):
|
||||
execution_id,
|
||||
extra_data=_iter_meta,
|
||||
)
|
||||
# Sync max_context_tokens from live config so mid-session model
|
||||
# switches are reflected in compaction decisions and the UI bar.
|
||||
from framework.config import get_max_context_tokens as _live_mct
|
||||
|
||||
conversation._max_context_tokens = _live_mct()
|
||||
|
||||
await self._publish_context_usage(ctx, conversation, "iteration_start")
|
||||
|
||||
# 6d. Pre-turn compaction check (tiered)
|
||||
_compacted_this_iter = False
|
||||
@@ -2726,6 +2735,7 @@ class EventLoopNode(NodeProtocol):
|
||||
tool_use_id=tc.tool_use_id,
|
||||
content=result.content,
|
||||
is_error=result.is_error,
|
||||
is_skill_content=result.is_skill_content,
|
||||
)
|
||||
if (
|
||||
tc.tool_name in ("ask_user", "ask_user_multiple")
|
||||
@@ -2834,6 +2844,8 @@ class EventLoopNode(NodeProtocol):
|
||||
conversation.usage_ratio() * 100,
|
||||
)
|
||||
|
||||
await self._publish_context_usage(ctx, conversation, "post_tool_results")
|
||||
|
||||
# If the turn requested external input (ask_user or queen handoff),
|
||||
# return immediately so the outer loop can block before judge eval.
|
||||
if user_input_requested or queen_input_requested:
|
||||
@@ -3549,6 +3561,33 @@ class EventLoopNode(NodeProtocol):
|
||||
content=f"No tool executor configured for '{tc.tool_name}'",
|
||||
is_error=True,
|
||||
)
|
||||
|
||||
# AS-9: Intercept file-read tools for skill directories — bypass session sandbox
|
||||
_SKILL_READ_TOOLS = {"view_file", "load_data", "read_file"}
|
||||
skill_dirs = getattr(self, "_skill_dirs", [])
|
||||
if tc.tool_name in _SKILL_READ_TOOLS and skill_dirs:
|
||||
_path = tc.tool_input.get("path", "")
|
||||
if _path:
|
||||
import os
|
||||
from pathlib import Path as _Path
|
||||
|
||||
_resolved = os.path.realpath(os.path.abspath(_path))
|
||||
if any(_resolved.startswith(os.path.realpath(d)) for d in skill_dirs):
|
||||
try:
|
||||
_content = _Path(_resolved).read_text(encoding="utf-8")
|
||||
_is_skill_md = _resolved.endswith("SKILL.md")
|
||||
return ToolResult(
|
||||
tool_use_id=tc.tool_use_id,
|
||||
content=_content,
|
||||
is_skill_content=_is_skill_md, # AS-10: protect SKILL.md reads
|
||||
)
|
||||
except Exception as _exc:
|
||||
return ToolResult(
|
||||
tool_use_id=tc.tool_use_id,
|
||||
content=f"Could not read skill resource '{_path}': {_exc}",
|
||||
is_error=True,
|
||||
)
|
||||
|
||||
tool_use = ToolUse(id=tc.tool_use_id, name=tc.tool_name, input=tc.tool_input)
|
||||
timeout = self._config.tool_call_timeout_seconds
|
||||
|
||||
@@ -3980,6 +4019,12 @@ class EventLoopNode(NodeProtocol):
|
||||
ratio_before = conversation.usage_ratio()
|
||||
phase_grad = getattr(ctx, "continuous_mode", False)
|
||||
|
||||
# Capture pre-compaction message inventory when over budget,
|
||||
# since compaction mutates the conversation in place.
|
||||
pre_inventory: list[dict[str, Any]] | None = None
|
||||
if ratio_before >= 1.0:
|
||||
pre_inventory = self._build_message_inventory(conversation)
|
||||
|
||||
# --- Step 1: Prune old tool results (free, no LLM) ---
|
||||
protect = max(2000, self._config.max_context_tokens // 12)
|
||||
pruned = await conversation.prune_old_tool_results(
|
||||
@@ -3994,7 +4039,7 @@ class EventLoopNode(NodeProtocol):
|
||||
conversation.usage_ratio() * 100,
|
||||
)
|
||||
if not conversation.needs_compaction():
|
||||
await self._log_compaction(ctx, conversation, ratio_before)
|
||||
await self._log_compaction(ctx, conversation, ratio_before, pre_inventory)
|
||||
return
|
||||
|
||||
# --- Step 2: Standard structure-preserving compaction (free, no LLM) ---
|
||||
@@ -4007,7 +4052,7 @@ class EventLoopNode(NodeProtocol):
|
||||
phase_graduated=phase_grad,
|
||||
)
|
||||
if not conversation.needs_compaction():
|
||||
await self._log_compaction(ctx, conversation, ratio_before)
|
||||
await self._log_compaction(ctx, conversation, ratio_before, pre_inventory)
|
||||
return
|
||||
|
||||
# --- Step 3: LLM summary compaction ---
|
||||
@@ -4034,7 +4079,7 @@ class EventLoopNode(NodeProtocol):
|
||||
logger.warning("LLM compaction failed: %s", e)
|
||||
|
||||
if not conversation.needs_compaction():
|
||||
await self._log_compaction(ctx, conversation, ratio_before)
|
||||
await self._log_compaction(ctx, conversation, ratio_before, pre_inventory)
|
||||
return
|
||||
|
||||
# --- Step 4: Emergency deterministic summary (LLM failed/unavailable) ---
|
||||
@@ -4048,7 +4093,7 @@ class EventLoopNode(NodeProtocol):
|
||||
keep_recent=1,
|
||||
phase_graduated=phase_grad,
|
||||
)
|
||||
await self._log_compaction(ctx, conversation, ratio_before)
|
||||
await self._log_compaction(ctx, conversation, ratio_before, pre_inventory)
|
||||
|
||||
# --- LLM compaction with binary-search splitting ----------------------
|
||||
|
||||
@@ -4210,13 +4255,59 @@ class EventLoopNode(NodeProtocol):
|
||||
"re-doing work.\n"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _build_message_inventory(
|
||||
conversation: NodeConversation,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Build a per-message size inventory for debug logging."""
|
||||
inventory: list[dict[str, Any]] = []
|
||||
for m in conversation.messages:
|
||||
content_chars = len(m.content)
|
||||
tc_chars = 0
|
||||
tool_name = None
|
||||
if m.tool_calls:
|
||||
for tc in m.tool_calls:
|
||||
args = tc.get("function", {}).get("arguments", "")
|
||||
tc_chars += len(args) if isinstance(args, str) else len(json.dumps(args))
|
||||
names = [tc.get("function", {}).get("name", "?") for tc in m.tool_calls]
|
||||
tool_name = ", ".join(names)
|
||||
elif m.role == "tool" and m.tool_use_id:
|
||||
for prev in conversation.messages:
|
||||
if prev.tool_calls:
|
||||
for tc in prev.tool_calls:
|
||||
if tc.get("id") == m.tool_use_id:
|
||||
tool_name = tc.get("function", {}).get("name", "?")
|
||||
break
|
||||
if tool_name:
|
||||
break
|
||||
entry: dict[str, Any] = {
|
||||
"seq": m.seq,
|
||||
"role": m.role,
|
||||
"content_chars": content_chars,
|
||||
}
|
||||
if tc_chars:
|
||||
entry["tool_call_args_chars"] = tc_chars
|
||||
if tool_name:
|
||||
entry["tool"] = tool_name
|
||||
if m.is_error:
|
||||
entry["is_error"] = True
|
||||
if m.phase_id:
|
||||
entry["phase"] = m.phase_id
|
||||
if content_chars > 2000:
|
||||
entry["preview"] = m.content[:200] + "…"
|
||||
inventory.append(entry)
|
||||
return inventory
|
||||
|
||||
async def _log_compaction(
|
||||
self,
|
||||
ctx: NodeContext,
|
||||
conversation: NodeConversation,
|
||||
ratio_before: float,
|
||||
pre_inventory: list[dict[str, Any]] | None = None,
|
||||
) -> None:
|
||||
"""Log compaction result to runtime logger and event bus."""
|
||||
"""Log compaction result to runtime logger, event bus, and debug file."""
|
||||
import os as _os
|
||||
|
||||
ratio_after = conversation.usage_ratio()
|
||||
before_pct = round(ratio_before * 100)
|
||||
after_pct = round(ratio_after * 100)
|
||||
@@ -4249,19 +4340,103 @@ class EventLoopNode(NodeProtocol):
|
||||
if self._event_bus:
|
||||
from framework.runtime.event_bus import AgentEvent, EventType
|
||||
|
||||
event_data: dict[str, Any] = {
|
||||
"level": level,
|
||||
"usage_before": before_pct,
|
||||
"usage_after": after_pct,
|
||||
}
|
||||
if pre_inventory is not None:
|
||||
event_data["message_inventory"] = pre_inventory
|
||||
await self._event_bus.publish(
|
||||
AgentEvent(
|
||||
type=EventType.CONTEXT_COMPACTED,
|
||||
stream_id=ctx.stream_id or ctx.node_id,
|
||||
node_id=ctx.node_id,
|
||||
data={
|
||||
"level": level,
|
||||
"usage_before": before_pct,
|
||||
"usage_after": after_pct,
|
||||
},
|
||||
data=event_data,
|
||||
)
|
||||
)
|
||||
|
||||
# Emit post-compaction usage update
|
||||
await self._publish_context_usage(ctx, conversation, "post_compaction")
|
||||
|
||||
# Write detailed debug log to ~/.hive/compaction_log/ when enabled
|
||||
if _os.environ.get("HIVE_COMPACTION_DEBUG"):
|
||||
self._write_compaction_debug_log(ctx, before_pct, after_pct, level, pre_inventory)
|
||||
|
||||
@staticmethod
|
||||
def _write_compaction_debug_log(
|
||||
ctx: NodeContext,
|
||||
before_pct: int,
|
||||
after_pct: int,
|
||||
level: str,
|
||||
inventory: list[dict[str, Any]] | None,
|
||||
) -> None:
|
||||
"""Write detailed compaction analysis to ~/.hive/compaction_log/."""
|
||||
log_dir = Path.home() / ".hive" / "compaction_log"
|
||||
log_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
ts = datetime.now(UTC).strftime("%Y%m%dT%H%M%S_%f")
|
||||
node_label = ctx.node_id.replace("/", "_")
|
||||
log_path = log_dir / f"{ts}_{node_label}.md"
|
||||
|
||||
lines: list[str] = [
|
||||
f"# Compaction Debug — {ctx.node_id}",
|
||||
f"**Time:** {datetime.now(UTC).isoformat()}",
|
||||
f"**Node:** {ctx.node_spec.name} (`{ctx.node_id}`)",
|
||||
]
|
||||
if ctx.stream_id:
|
||||
lines.append(f"**Stream:** {ctx.stream_id}")
|
||||
lines.append(f"**Level:** {level}")
|
||||
lines.append(f"**Usage:** {before_pct}% → {after_pct}%")
|
||||
lines.append("")
|
||||
|
||||
if inventory:
|
||||
total_chars = sum(
|
||||
e.get("content_chars", 0) + e.get("tool_call_args_chars", 0) for e in inventory
|
||||
)
|
||||
lines.append(
|
||||
f"## Pre-Compaction Message Inventory "
|
||||
f"({len(inventory)} messages, {total_chars:,} total chars)"
|
||||
)
|
||||
lines.append("")
|
||||
ranked = sorted(
|
||||
inventory,
|
||||
key=lambda e: e.get("content_chars", 0) + e.get("tool_call_args_chars", 0),
|
||||
reverse=True,
|
||||
)
|
||||
lines.append("| # | seq | role | tool | chars | % of total | flags |")
|
||||
lines.append("|---|-----|------|------|------:|------------|-------|")
|
||||
for i, entry in enumerate(ranked, 1):
|
||||
chars = entry.get("content_chars", 0) + entry.get("tool_call_args_chars", 0)
|
||||
pct = (chars / total_chars * 100) if total_chars else 0
|
||||
tool = entry.get("tool", "")
|
||||
flags = []
|
||||
if entry.get("is_error"):
|
||||
flags.append("error")
|
||||
if entry.get("phase"):
|
||||
flags.append(f"phase={entry['phase']}")
|
||||
lines.append(
|
||||
f"| {i} | {entry['seq']} | {entry['role']} | {tool} "
|
||||
f"| {chars:,} | {pct:.1f}% | {', '.join(flags)} |"
|
||||
)
|
||||
|
||||
large = [e for e in ranked if e.get("preview")]
|
||||
if large:
|
||||
lines.append("")
|
||||
lines.append("### Large message previews")
|
||||
for entry in large:
|
||||
lines.append(
|
||||
f"\n**seq={entry['seq']}** ({entry['role']}, {entry.get('tool', '')}):"
|
||||
)
|
||||
lines.append(f"```\n{entry['preview']}\n```")
|
||||
lines.append("")
|
||||
|
||||
try:
|
||||
log_path.write_text("\n".join(lines), encoding="utf-8")
|
||||
logger.debug("Compaction debug log written to %s", log_path)
|
||||
except OSError:
|
||||
logger.debug("Failed to write compaction debug log to %s", log_path)
|
||||
|
||||
def _build_emergency_summary(
|
||||
self,
|
||||
ctx: NodeContext,
|
||||
@@ -4666,6 +4841,36 @@ class EventLoopNode(NodeProtocol):
|
||||
if result.inject:
|
||||
await conversation.add_user_message(result.inject)
|
||||
|
||||
async def _publish_context_usage(
|
||||
self,
|
||||
ctx: NodeContext,
|
||||
conversation: NodeConversation,
|
||||
trigger: str,
|
||||
) -> None:
|
||||
"""Emit a CONTEXT_USAGE_UPDATED event with current context window state."""
|
||||
if not self._event_bus:
|
||||
return
|
||||
from framework.runtime.event_bus import AgentEvent, EventType
|
||||
|
||||
estimated = conversation.estimate_tokens()
|
||||
max_tokens = conversation._max_context_tokens
|
||||
ratio = estimated / max_tokens if max_tokens > 0 else 0.0
|
||||
await self._event_bus.publish(
|
||||
AgentEvent(
|
||||
type=EventType.CONTEXT_USAGE_UPDATED,
|
||||
stream_id=ctx.stream_id or ctx.node_id,
|
||||
node_id=ctx.node_id,
|
||||
data={
|
||||
"usage_ratio": round(ratio, 4),
|
||||
"usage_pct": round(ratio * 100),
|
||||
"message_count": conversation.message_count,
|
||||
"estimated_tokens": estimated,
|
||||
"max_context_tokens": max_tokens,
|
||||
"trigger": trigger,
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
async def _publish_iteration(
|
||||
self,
|
||||
stream_id: str,
|
||||
|
||||
@@ -154,6 +154,7 @@ class GraphExecutor:
|
||||
iteration_metadata_provider: Callable | None = None,
|
||||
skills_catalog_prompt: str = "",
|
||||
protocols_prompt: str = "",
|
||||
skill_dirs: list[str] | None = None,
|
||||
):
|
||||
"""
|
||||
Initialize the executor.
|
||||
@@ -181,6 +182,7 @@ class GraphExecutor:
|
||||
system prompt (for phase switching)
|
||||
skills_catalog_prompt: Available skills catalog for system prompt
|
||||
protocols_prompt: Default skill operational protocols for system prompt
|
||||
skill_dirs: Skill base directories for Tier 3 resource access
|
||||
"""
|
||||
self.runtime = runtime
|
||||
self.llm = llm
|
||||
@@ -204,6 +206,7 @@ class GraphExecutor:
|
||||
self.iteration_metadata_provider = iteration_metadata_provider
|
||||
self.skills_catalog_prompt = skills_catalog_prompt
|
||||
self.protocols_prompt = protocols_prompt
|
||||
self.skill_dirs: list[str] = skill_dirs or []
|
||||
|
||||
if protocols_prompt:
|
||||
self.logger.info(
|
||||
@@ -1845,6 +1848,9 @@ class GraphExecutor:
|
||||
|
||||
existing_underscore = [k for k in memory._data if k.startswith("_")]
|
||||
extra_keys = set(_skill_keys) | set(existing_underscore)
|
||||
# Only inject into read_keys when it was already non-empty — an empty
|
||||
# read_keys means "allow all reads" and injecting skill keys would
|
||||
# inadvertently restrict reads to skill keys only.
|
||||
for k in extra_keys:
|
||||
if read_keys and k not in read_keys:
|
||||
read_keys.append(k)
|
||||
@@ -1899,6 +1905,7 @@ class GraphExecutor:
|
||||
iteration_metadata_provider=self.iteration_metadata_provider,
|
||||
skills_catalog_prompt=self.skills_catalog_prompt,
|
||||
protocols_prompt=self.protocols_prompt,
|
||||
skill_dirs=self.skill_dirs,
|
||||
)
|
||||
|
||||
VALID_NODE_TYPES = {
|
||||
|
||||
@@ -568,6 +568,7 @@ class NodeContext:
|
||||
# Skill system prompts — injected by the skill discovery pipeline
|
||||
skills_catalog_prompt: str = "" # Available skills XML catalog
|
||||
protocols_prompt: str = "" # Default skill operational protocols
|
||||
skill_dirs: list[str] = field(default_factory=list) # Skill base dirs for resource access
|
||||
|
||||
# Per-iteration metadata provider — when set, EventLoopNode merges
|
||||
# the returned dict into node_loop_iteration event data. Used by
|
||||
|
||||
@@ -45,6 +45,7 @@ class ToolResult:
|
||||
tool_use_id: str
|
||||
content: str
|
||||
is_error: bool = False
|
||||
is_skill_content: bool = False # AS-10: marks activated skill body, protected from pruning
|
||||
|
||||
|
||||
class LLMProvider(ABC):
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
"""Shared MCP client connection management."""
|
||||
|
||||
import logging
|
||||
import threading
|
||||
from typing import Any
|
||||
|
||||
@@ -7,6 +8,8 @@ import httpx
|
||||
|
||||
from framework.runner.mcp_client import MCPClient, MCPServerConfig
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class MCPConnectionManager:
|
||||
"""Process-wide MCP client pool keyed by server name."""
|
||||
@@ -46,8 +49,14 @@ class MCPConnectionManager:
|
||||
with self._pool_lock:
|
||||
client = self._pool.get(server_name)
|
||||
if self._is_connected(client) and server_name not in self._transitions:
|
||||
self._refcounts[server_name] = self._refcounts.get(server_name, 0) + 1
|
||||
new_refcount = self._refcounts.get(server_name, 0) + 1
|
||||
self._refcounts[server_name] = new_refcount
|
||||
self._configs[server_name] = config
|
||||
logger.debug(
|
||||
"Reusing pooled connection for MCP server '%s' (refcount=%d)",
|
||||
server_name,
|
||||
new_refcount,
|
||||
)
|
||||
return client
|
||||
|
||||
transition_event = self._transitions.get(server_name)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
"""Pre-load validation for agent graphs.
|
||||
|
||||
Runs structural and credential checks before MCP servers are spawned.
|
||||
Runs structural, credential, and skill-trust checks before MCP servers are spawned.
|
||||
Fails fast with actionable error messages.
|
||||
"""
|
||||
|
||||
@@ -169,6 +169,9 @@ def run_preload_validation(
|
||||
1. Graph structure (includes GCU subagent-only checks) — non-recoverable
|
||||
2. Credentials — potentially recoverable via interactive setup
|
||||
|
||||
Skill discovery and trust gating (AS-13) happen later in runner._setup()
|
||||
so they have access to agent-level skill configuration.
|
||||
|
||||
Raises PreloadValidationError for structural issues.
|
||||
Raises CredentialError for credential issues.
|
||||
"""
|
||||
|
||||
@@ -1343,7 +1343,7 @@ class AgentRunner:
|
||||
except Exception:
|
||||
pass # Best-effort — agent works without account info
|
||||
|
||||
# Skill configuration — the runtime handles discovery, loading, and
|
||||
# Skill configuration — the runtime handles discovery, loading, trust-gating and
|
||||
# prompt rasterization. The runner just builds the config.
|
||||
from framework.skills.config import SkillsConfig
|
||||
from framework.skills.manager import SkillsManagerConfig
|
||||
@@ -1354,6 +1354,7 @@ class AgentRunner:
|
||||
skills=getattr(self, "_agent_skills", None),
|
||||
),
|
||||
project_root=self.agent_path,
|
||||
interactive=self._interactive,
|
||||
)
|
||||
|
||||
self._setup_agent_runtime(
|
||||
@@ -1465,6 +1466,9 @@ class AgentRunner:
|
||||
accounts_data: list[dict] | None = None,
|
||||
tool_provider_map: dict[str, str] | None = None,
|
||||
event_bus=None,
|
||||
skills_catalog_prompt: str = "",
|
||||
protocols_prompt: str = "",
|
||||
skill_dirs: list[str] | None = None,
|
||||
skills_manager_config=None,
|
||||
) -> None:
|
||||
"""Set up multi-entry-point execution using AgentRuntime."""
|
||||
|
||||
@@ -482,7 +482,7 @@ class ToolRegistry:
|
||||
def register_mcp_server(
|
||||
self,
|
||||
server_config: dict[str, Any],
|
||||
use_connection_manager: bool = False,
|
||||
use_connection_manager: bool = True,
|
||||
) -> int:
|
||||
"""
|
||||
Register an MCP server and discover its tools.
|
||||
|
||||
@@ -137,6 +137,7 @@ class AgentRuntime:
|
||||
# Deprecated — pass skills_manager_config instead.
|
||||
skills_catalog_prompt: str = "",
|
||||
protocols_prompt: str = "",
|
||||
skill_dirs: list[str] | None = None,
|
||||
):
|
||||
"""
|
||||
Initialize agent runtime.
|
||||
@@ -158,6 +159,9 @@ class AgentRuntime:
|
||||
event_bus: Optional external EventBus. If provided, the runtime shares
|
||||
this bus instead of creating its own. Used by SessionManager to
|
||||
share a single bus between queen, worker, and judge.
|
||||
skills_catalog_prompt: Available skills catalog for system prompt
|
||||
protocols_prompt: Default skill operational protocols for system prompt
|
||||
skill_dirs: Skill base directories for Tier 3 resource access
|
||||
skills_manager_config: Skill configuration — the runtime owns
|
||||
discovery, loading, and prompt renderation internally.
|
||||
skills_catalog_prompt: Deprecated. Pre-rendered skills catalog.
|
||||
@@ -195,6 +199,8 @@ class AgentRuntime:
|
||||
self._skills_manager = SkillsManager()
|
||||
self._skills_manager.load()
|
||||
|
||||
self.skill_dirs: list[str] = self._skills_manager.allowlisted_dirs
|
||||
|
||||
# Primary graph identity
|
||||
self._graph_id: str = graph_id or "primary"
|
||||
|
||||
@@ -341,6 +347,7 @@ class AgentRuntime:
|
||||
tool_provider_map=self._tool_provider_map,
|
||||
skills_catalog_prompt=self.skills_catalog_prompt,
|
||||
protocols_prompt=self.protocols_prompt,
|
||||
skill_dirs=self.skill_dirs,
|
||||
)
|
||||
await stream.start()
|
||||
self._streams[ep_id] = stream
|
||||
@@ -977,6 +984,7 @@ class AgentRuntime:
|
||||
tool_provider_map=self._tool_provider_map,
|
||||
skills_catalog_prompt=self.skills_catalog_prompt,
|
||||
protocols_prompt=self.protocols_prompt,
|
||||
skill_dirs=self.skill_dirs,
|
||||
)
|
||||
if self._running:
|
||||
await stream.start()
|
||||
@@ -1760,6 +1768,7 @@ def create_agent_runtime(
|
||||
# Deprecated — pass skills_manager_config instead.
|
||||
skills_catalog_prompt: str = "",
|
||||
protocols_prompt: str = "",
|
||||
skill_dirs: list[str] | None = None,
|
||||
) -> AgentRuntime:
|
||||
"""
|
||||
Create and configure an AgentRuntime with entry points.
|
||||
@@ -1786,6 +1795,9 @@ def create_agent_runtime(
|
||||
accounts_data: Raw account data for per-node prompt generation.
|
||||
tool_provider_map: Tool name to provider name mapping for account routing.
|
||||
event_bus: Optional external EventBus to share with other components.
|
||||
skills_catalog_prompt: Available skills catalog for system prompt.
|
||||
protocols_prompt: Default skill operational protocols for system prompt.
|
||||
skill_dirs: Skill base directories for Tier 3 resource access.
|
||||
skills_manager_config: Skill configuration — the runtime owns
|
||||
discovery, loading, and prompt renderation internally.
|
||||
skills_catalog_prompt: Deprecated. Pre-rendered skills catalog.
|
||||
@@ -1819,6 +1831,7 @@ def create_agent_runtime(
|
||||
skills_manager_config=skills_manager_config,
|
||||
skills_catalog_prompt=skills_catalog_prompt,
|
||||
protocols_prompt=protocols_prompt,
|
||||
skill_dirs=skill_dirs,
|
||||
)
|
||||
|
||||
for spec in entry_points:
|
||||
|
||||
@@ -117,6 +117,7 @@ class EventType(StrEnum):
|
||||
|
||||
# Context management
|
||||
CONTEXT_COMPACTED = "context_compacted"
|
||||
CONTEXT_USAGE_UPDATED = "context_usage_updated"
|
||||
|
||||
# External triggers
|
||||
WEBHOOK_RECEIVED = "webhook_received"
|
||||
|
||||
@@ -188,6 +188,7 @@ class ExecutionStream:
|
||||
tool_provider_map: dict[str, str] | None = None,
|
||||
skills_catalog_prompt: str = "",
|
||||
protocols_prompt: str = "",
|
||||
skill_dirs: list[str] | None = None,
|
||||
):
|
||||
"""
|
||||
Initialize execution stream.
|
||||
@@ -213,6 +214,7 @@ class ExecutionStream:
|
||||
tool_provider_map: Tool name to provider name mapping for account routing
|
||||
skills_catalog_prompt: Available skills catalog for system prompt
|
||||
protocols_prompt: Default skill operational protocols for system prompt
|
||||
skill_dirs: Skill base directories for Tier 3 resource access
|
||||
"""
|
||||
self.stream_id = stream_id
|
||||
self.entry_spec = entry_spec
|
||||
@@ -236,6 +238,7 @@ class ExecutionStream:
|
||||
self._tool_provider_map = tool_provider_map
|
||||
self._skills_catalog_prompt = skills_catalog_prompt
|
||||
self._protocols_prompt = protocols_prompt
|
||||
self._skill_dirs: list[str] = skill_dirs or []
|
||||
|
||||
_es_logger = logging.getLogger(__name__)
|
||||
if protocols_prompt:
|
||||
@@ -696,6 +699,7 @@ class ExecutionStream:
|
||||
tool_provider_map=self._tool_provider_map,
|
||||
skills_catalog_prompt=self._skills_catalog_prompt,
|
||||
protocols_prompt=self._protocols_prompt,
|
||||
skill_dirs=self._skill_dirs,
|
||||
)
|
||||
# Track executor so inject_input() can reach EventLoopNode instances
|
||||
self._active_executors[execution_id] = executor
|
||||
|
||||
@@ -8,6 +8,7 @@ write. Errors are silently swallowed — this must never break the agent.
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import IO, Any
|
||||
@@ -47,6 +48,9 @@ def log_llm_turn(
|
||||
Never raises.
|
||||
"""
|
||||
try:
|
||||
# Skip logging during test runs to avoid polluting real logs.
|
||||
if os.environ.get("PYTEST_CURRENT_TEST") or os.environ.get("HIVE_DISABLE_LLM_LOGS"):
|
||||
return
|
||||
global _log_file, _log_ready # noqa: PLW0603
|
||||
if not _log_ready:
|
||||
_log_file = _open_log()
|
||||
|
||||
@@ -37,6 +37,7 @@ DEFAULT_EVENT_TYPES = [
|
||||
EventType.NODE_RETRY,
|
||||
EventType.NODE_TOOL_DOOM_LOOP,
|
||||
EventType.CONTEXT_COMPACTED,
|
||||
EventType.CONTEXT_USAGE_UPDATED,
|
||||
EventType.WORKER_LOADED,
|
||||
EventType.CREDENTIALS_REQUIRED,
|
||||
EventType.SUBAGENT_REPORT,
|
||||
|
||||
@@ -819,10 +819,11 @@ class SessionManager:
|
||||
exec_id = event.execution_id
|
||||
|
||||
if event.type == _ET.EXECUTION_STARTED:
|
||||
# New run on this execution_id — reset cooldown so the first
|
||||
# iteration always produces a mid-run snapshot.
|
||||
# New run on this execution_id — start the cooldown timer so
|
||||
# mid-run snapshots don't fire immediately at session start.
|
||||
# The first snapshot will happen after _DIGEST_COOLDOWN seconds.
|
||||
if exec_id:
|
||||
_last_digest.pop(exec_id, None)
|
||||
_last_digest[exec_id] = _time.monotonic()
|
||||
|
||||
elif event.type in (
|
||||
_ET.EXECUTION_COMPLETED,
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
"""Hive Agent Skills — discovery, parsing, and injection of SKILL.md packages.
|
||||
"""Hive Agent Skills — discovery, parsing, trust gating, and injection of SKILL.md packages.
|
||||
|
||||
Implements the open Agent Skills standard (agentskills.io) for portable
|
||||
skill discovery and activation, plus built-in default skills for runtime
|
||||
operational discipline.
|
||||
operational discipline, and AS-13 trust gating for project-scope skills.
|
||||
"""
|
||||
|
||||
from framework.skills.catalog import SkillCatalog
|
||||
@@ -10,7 +10,9 @@ from framework.skills.config import DefaultSkillConfig, SkillsConfig
|
||||
from framework.skills.defaults import DefaultSkillManager
|
||||
from framework.skills.discovery import DiscoveryConfig, SkillDiscovery
|
||||
from framework.skills.manager import SkillsManager, SkillsManagerConfig
|
||||
from framework.skills.models import TrustStatus
|
||||
from framework.skills.parser import ParsedSkill, parse_skill_md
|
||||
from framework.skills.trust import TrustedRepoStore, TrustGate
|
||||
|
||||
__all__ = [
|
||||
"DefaultSkillConfig",
|
||||
@@ -22,5 +24,8 @@ __all__ = [
|
||||
"SkillsConfig",
|
||||
"SkillsManager",
|
||||
"SkillsManagerConfig",
|
||||
"TrustGate",
|
||||
"TrustedRepoStore",
|
||||
"TrustStatus",
|
||||
"parse_skill_md",
|
||||
]
|
||||
|
||||
@@ -76,6 +76,7 @@ class SkillCatalog:
|
||||
lines.append(f" <name>{escape(skill.name)}</name>")
|
||||
lines.append(f" <description>{escape(skill.description)}</description>")
|
||||
lines.append(f" <location>{escape(skill.location)}</location>")
|
||||
lines.append(f" <base_dir>{escape(skill.base_dir)}</base_dir>")
|
||||
lines.append(" </skill>")
|
||||
lines.append("</available_skills>")
|
||||
|
||||
|
||||
@@ -0,0 +1,120 @@
|
||||
"""CLI commands for the Hive skill system.
|
||||
|
||||
Phase 1 commands (AS-13):
|
||||
hive skill list — list discovered skills across all scopes
|
||||
hive skill trust <path> — permanently trust a project repo's skills
|
||||
|
||||
Full CLI suite (CLI-1 through CLI-13) is Phase 2.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def register_skill_commands(subparsers) -> None:
|
||||
"""Register the ``hive skill`` subcommand group."""
|
||||
skill_parser = subparsers.add_parser("skill", help="Manage skills")
|
||||
skill_sub = skill_parser.add_subparsers(dest="skill_command", required=True)
|
||||
|
||||
# hive skill list
|
||||
list_parser = skill_sub.add_parser("list", help="List discovered skills across all scopes")
|
||||
list_parser.add_argument(
|
||||
"--project-dir",
|
||||
default=None,
|
||||
metavar="PATH",
|
||||
help="Project directory to scan (default: current directory)",
|
||||
)
|
||||
list_parser.set_defaults(func=cmd_skill_list)
|
||||
|
||||
# hive skill trust
|
||||
trust_parser = skill_sub.add_parser(
|
||||
"trust",
|
||||
help="Permanently trust a project repository so its skills load without prompting",
|
||||
)
|
||||
trust_parser.add_argument(
|
||||
"project_path",
|
||||
help="Path to the project directory (must contain a .git with a remote origin)",
|
||||
)
|
||||
trust_parser.set_defaults(func=cmd_skill_trust)
|
||||
|
||||
|
||||
def cmd_skill_list(args) -> int:
|
||||
"""List all discovered skills grouped by scope."""
|
||||
from framework.skills.discovery import DiscoveryConfig, SkillDiscovery
|
||||
|
||||
project_dir = Path(args.project_dir).resolve() if args.project_dir else Path.cwd()
|
||||
skills = SkillDiscovery(DiscoveryConfig(project_root=project_dir)).discover()
|
||||
|
||||
if not skills:
|
||||
print("No skills discovered.")
|
||||
return 0
|
||||
|
||||
scope_headers = {
|
||||
"project": "PROJECT SKILLS",
|
||||
"user": "USER SKILLS",
|
||||
"framework": "FRAMEWORK SKILLS",
|
||||
}
|
||||
|
||||
for scope in ("project", "user", "framework"):
|
||||
scope_skills = [s for s in skills if s.source_scope == scope]
|
||||
if not scope_skills:
|
||||
continue
|
||||
print(f"\n{scope_headers[scope]}")
|
||||
print("─" * 40)
|
||||
for skill in scope_skills:
|
||||
print(f" • {skill.name}")
|
||||
print(f" {skill.description}")
|
||||
print(f" {skill.location}")
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_skill_trust(args) -> int:
|
||||
"""Permanently trust a project repository's skills."""
|
||||
from framework.skills.trust import TrustedRepoStore, _normalize_remote_url
|
||||
|
||||
project_path = Path(args.project_path).resolve()
|
||||
|
||||
if not project_path.exists():
|
||||
print(f"Error: path does not exist: {project_path}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if not (project_path / ".git").exists():
|
||||
print(
|
||||
f"Error: {project_path} is not a git repository (no .git directory).",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 1
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", "-C", str(project_path), "remote", "get-url", "origin"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=3,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
print(
|
||||
"Error: no remote 'origin' configured in this repository.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 1
|
||||
remote_url = result.stdout.strip()
|
||||
except subprocess.TimeoutExpired:
|
||||
print("Error: git remote lookup timed out.", file=sys.stderr)
|
||||
return 1
|
||||
except (FileNotFoundError, OSError) as e:
|
||||
print(f"Error reading git remote: {e}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
repo_key = _normalize_remote_url(remote_url)
|
||||
store = TrustedRepoStore()
|
||||
store.trust(repo_key, project_path=str(project_path))
|
||||
|
||||
print(f"✓ Trusted: {repo_key}")
|
||||
print(" Stored in ~/.hive/trusted_repos.json")
|
||||
print(" Skills from this repository will load without prompting in future runs.")
|
||||
return 0
|
||||
@@ -42,11 +42,14 @@ class SkillsManagerConfig:
|
||||
When ``None``, community discovery is skipped.
|
||||
skip_community_discovery: Explicitly skip community scanning
|
||||
even when ``project_root`` is set.
|
||||
interactive: Whether trust gating can prompt the user interactively.
|
||||
When ``False``, untrusted project skills are silently skipped.
|
||||
"""
|
||||
|
||||
skills_config: SkillsConfig = field(default_factory=SkillsConfig)
|
||||
project_root: Path | None = None
|
||||
skip_community_discovery: bool = False
|
||||
interactive: bool = True
|
||||
|
||||
|
||||
class SkillsManager:
|
||||
@@ -63,6 +66,7 @@ class SkillsManager:
|
||||
self._loaded = False
|
||||
self._catalog_prompt: str = ""
|
||||
self._protocols_prompt: str = ""
|
||||
self._allowlisted_dirs: list[str] = []
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Factory for backwards-compat bridge
|
||||
@@ -85,6 +89,7 @@ class SkillsManager:
|
||||
mgr._loaded = True # skip load()
|
||||
mgr._catalog_prompt = skills_catalog_prompt
|
||||
mgr._protocols_prompt = protocols_prompt
|
||||
mgr._allowlisted_dirs = []
|
||||
return mgr
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
@@ -113,9 +118,18 @@ class SkillsManager:
|
||||
# 1. Community skill discovery (when project_root is available)
|
||||
catalog_prompt = ""
|
||||
if self._config.project_root is not None and not self._config.skip_community_discovery:
|
||||
from framework.skills.trust import TrustGate
|
||||
|
||||
discovery = SkillDiscovery(DiscoveryConfig(project_root=self._config.project_root))
|
||||
discovered = discovery.discover()
|
||||
|
||||
# Trust-gate project-scope skills (AS-13)
|
||||
discovered = TrustGate(interactive=self._config.interactive).filter_and_gate(
|
||||
discovered, project_dir=self._config.project_root
|
||||
)
|
||||
|
||||
catalog = SkillCatalog(discovered)
|
||||
self._allowlisted_dirs = catalog.allowlisted_dirs
|
||||
catalog_prompt = catalog.to_prompt()
|
||||
|
||||
# Pre-activated community skills
|
||||
@@ -160,6 +174,11 @@ class SkillsManager:
|
||||
"""Default skill operational protocols for system prompt injection."""
|
||||
return self._protocols_prompt
|
||||
|
||||
@property
|
||||
def allowlisted_dirs(self) -> list[str]:
|
||||
"""Skill base directories for Tier 3 resource access (AS-6)."""
|
||||
return self._allowlisted_dirs
|
||||
|
||||
@property
|
||||
def is_loaded(self) -> bool:
|
||||
return self._loaded
|
||||
|
||||
@@ -0,0 +1,52 @@
|
||||
"""Data models for the Hive skill system (Agent Skills standard)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from enum import StrEnum
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class SkillScope(StrEnum):
|
||||
"""Where a skill was discovered."""
|
||||
|
||||
PROJECT = "project"
|
||||
USER = "user"
|
||||
FRAMEWORK = "framework"
|
||||
|
||||
|
||||
class TrustStatus(StrEnum):
|
||||
"""Trust state of a skill entry."""
|
||||
|
||||
TRUSTED = "trusted"
|
||||
PENDING_CONSENT = "pending_consent"
|
||||
DENIED = "denied"
|
||||
|
||||
|
||||
@dataclass
|
||||
class SkillEntry:
|
||||
"""In-memory record for a discovered skill (PRD §4.2)."""
|
||||
|
||||
name: str
|
||||
"""Skill name from SKILL.md frontmatter."""
|
||||
|
||||
description: str
|
||||
"""Skill description from SKILL.md frontmatter."""
|
||||
|
||||
location: Path
|
||||
"""Absolute path to SKILL.md."""
|
||||
|
||||
base_dir: Path
|
||||
"""Parent directory of SKILL.md (skill root)."""
|
||||
|
||||
source_scope: SkillScope
|
||||
"""Which scope this skill was found in."""
|
||||
|
||||
trust_status: TrustStatus = TrustStatus.TRUSTED
|
||||
"""Trust state; project-scope skills start as PENDING_CONSENT before gating."""
|
||||
|
||||
# Optional frontmatter fields
|
||||
license: str | None = None
|
||||
compatibility: list[str] = field(default_factory=list)
|
||||
allowed_tools: list[str] = field(default_factory=list)
|
||||
metadata: dict = field(default_factory=dict)
|
||||
@@ -0,0 +1,477 @@
|
||||
"""Trust gating for project-level skills (PRD AS-13).
|
||||
|
||||
Project-level skills from untrusted repositories require explicit user consent
|
||||
before their instructions are loaded into the agent's system prompt.
|
||||
Framework and user-scope skills are always trusted.
|
||||
|
||||
Trusted repos are persisted at ~/.hive/trusted_repos.json.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import subprocess
|
||||
import sys
|
||||
from collections.abc import Callable
|
||||
from dataclasses import dataclass
|
||||
from datetime import UTC, datetime
|
||||
from enum import StrEnum
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from framework.skills.parser import ParsedSkill
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Env var to bypass trust gating in CI/headless pipelines (opt-in).
|
||||
_ENV_TRUST_ALL = "HIVE_TRUST_PROJECT_SKILLS"
|
||||
|
||||
# Env var for comma-separated own-remote glob patterns (e.g. "github.com/myorg/*").
|
||||
_ENV_OWN_REMOTES = "HIVE_OWN_REMOTES"
|
||||
|
||||
_TRUSTED_REPOS_PATH = Path.home() / ".hive" / "trusted_repos.json"
|
||||
_NOTICE_SENTINEL_PATH = Path.home() / ".hive" / ".skill_trust_notice_shown"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Trusted repo store
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class TrustedRepoEntry:
|
||||
repo_key: str
|
||||
added_at: datetime
|
||||
project_path: str = ""
|
||||
|
||||
|
||||
class TrustedRepoStore:
|
||||
"""Persists permanently-trusted repo keys to ~/.hive/trusted_repos.json."""
|
||||
|
||||
def __init__(self, path: Path | None = None) -> None:
|
||||
self._path = path or _TRUSTED_REPOS_PATH
|
||||
self._entries: dict[str, TrustedRepoEntry] = {}
|
||||
self._loaded = False
|
||||
|
||||
def is_trusted(self, repo_key: str) -> bool:
|
||||
self._ensure_loaded()
|
||||
return repo_key in self._entries
|
||||
|
||||
def trust(self, repo_key: str, project_path: str = "") -> None:
|
||||
self._ensure_loaded()
|
||||
self._entries[repo_key] = TrustedRepoEntry(
|
||||
repo_key=repo_key,
|
||||
added_at=datetime.now(tz=UTC),
|
||||
project_path=project_path,
|
||||
)
|
||||
self._save()
|
||||
logger.info("skill_trust_store: trusted repo_key=%s", repo_key)
|
||||
|
||||
def revoke(self, repo_key: str) -> bool:
|
||||
self._ensure_loaded()
|
||||
if repo_key in self._entries:
|
||||
del self._entries[repo_key]
|
||||
self._save()
|
||||
logger.info("skill_trust_store: revoked repo_key=%s", repo_key)
|
||||
return True
|
||||
return False
|
||||
|
||||
def list_entries(self) -> list[TrustedRepoEntry]:
|
||||
self._ensure_loaded()
|
||||
return list(self._entries.values())
|
||||
|
||||
def _ensure_loaded(self) -> None:
|
||||
if not self._loaded:
|
||||
self._load()
|
||||
self._loaded = True
|
||||
|
||||
def _load(self) -> None:
|
||||
try:
|
||||
data = json.loads(self._path.read_text(encoding="utf-8"))
|
||||
for raw in data.get("entries", []):
|
||||
repo_key = raw.get("repo_key", "")
|
||||
if not repo_key:
|
||||
continue
|
||||
try:
|
||||
added_at = datetime.fromisoformat(raw["added_at"])
|
||||
except (KeyError, ValueError):
|
||||
added_at = datetime.now(tz=UTC)
|
||||
self._entries[repo_key] = TrustedRepoEntry(
|
||||
repo_key=repo_key,
|
||||
added_at=added_at,
|
||||
project_path=raw.get("project_path", ""),
|
||||
)
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"skill_trust_store: could not read %s (%s); treating as empty",
|
||||
self._path,
|
||||
e,
|
||||
)
|
||||
|
||||
def _save(self) -> None:
|
||||
self._path.parent.mkdir(parents=True, exist_ok=True)
|
||||
data = {
|
||||
"version": 1,
|
||||
"entries": [
|
||||
{
|
||||
"repo_key": e.repo_key,
|
||||
"added_at": e.added_at.isoformat(),
|
||||
"project_path": e.project_path,
|
||||
}
|
||||
for e in self._entries.values()
|
||||
],
|
||||
}
|
||||
# Atomic write: write to .tmp then rename
|
||||
tmp = self._path.with_suffix(".tmp")
|
||||
tmp.write_text(json.dumps(data, indent=2), encoding="utf-8")
|
||||
tmp.replace(self._path)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Trust classification
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class ProjectTrustClassification(StrEnum):
|
||||
ALWAYS_TRUSTED = "always_trusted"
|
||||
TRUSTED_BY_USER = "trusted_by_user"
|
||||
UNTRUSTED = "untrusted"
|
||||
|
||||
|
||||
class ProjectTrustDetector:
|
||||
"""Classifies a project directory as trusted or untrusted.
|
||||
|
||||
Algorithm (PRD §4.1 trust note):
|
||||
1. No project_dir → ALWAYS_TRUSTED
|
||||
2. No .git directory → ALWAYS_TRUSTED (not a git repo)
|
||||
3. No remote 'origin' → ALWAYS_TRUSTED (local-only repo)
|
||||
4. Remote URL → repo_key; in TrustedRepoStore → TRUSTED_BY_USER
|
||||
5. Localhost remote → ALWAYS_TRUSTED
|
||||
6. ~/.hive/own_remotes match → ALWAYS_TRUSTED
|
||||
7. HIVE_OWN_REMOTES env match → ALWAYS_TRUSTED
|
||||
8. None of the above → UNTRUSTED
|
||||
"""
|
||||
|
||||
def __init__(self, store: TrustedRepoStore | None = None) -> None:
|
||||
self._store = store or TrustedRepoStore()
|
||||
|
||||
def classify(self, project_dir: Path | None) -> tuple[ProjectTrustClassification, str]:
|
||||
"""Return (classification, repo_key).
|
||||
|
||||
repo_key is empty string for ALWAYS_TRUSTED cases without a remote.
|
||||
"""
|
||||
if project_dir is None or not project_dir.exists():
|
||||
return ProjectTrustClassification.ALWAYS_TRUSTED, ""
|
||||
|
||||
if not (project_dir / ".git").exists():
|
||||
return ProjectTrustClassification.ALWAYS_TRUSTED, ""
|
||||
|
||||
remote_url = self._get_remote_origin(project_dir)
|
||||
if not remote_url:
|
||||
return ProjectTrustClassification.ALWAYS_TRUSTED, ""
|
||||
|
||||
repo_key = _normalize_remote_url(remote_url)
|
||||
|
||||
# Explicitly trusted by user
|
||||
if self._store.is_trusted(repo_key):
|
||||
return ProjectTrustClassification.TRUSTED_BY_USER, repo_key
|
||||
|
||||
# Localhost remotes are always trusted
|
||||
if _is_localhost_remote(remote_url):
|
||||
return ProjectTrustClassification.ALWAYS_TRUSTED, repo_key
|
||||
|
||||
# User-configured own-remote patterns
|
||||
if self._matches_own_remotes(repo_key):
|
||||
return ProjectTrustClassification.ALWAYS_TRUSTED, repo_key
|
||||
|
||||
return ProjectTrustClassification.UNTRUSTED, repo_key
|
||||
|
||||
def _get_remote_origin(self, project_dir: Path) -> str:
|
||||
"""Run git remote get-url origin. Returns empty string on any failure."""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", "-C", str(project_dir), "remote", "get-url", "origin"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=3,
|
||||
)
|
||||
if result.returncode == 0:
|
||||
return result.stdout.strip()
|
||||
except subprocess.TimeoutExpired:
|
||||
logger.warning(
|
||||
"skill_trust: git remote lookup timed out for %s; treating as trusted",
|
||||
project_dir,
|
||||
)
|
||||
except (FileNotFoundError, OSError):
|
||||
pass # git not found or other OS error
|
||||
return ""
|
||||
|
||||
def _matches_own_remotes(self, repo_key: str) -> bool:
|
||||
"""Check repo_key against user-configured own-remote glob patterns."""
|
||||
import fnmatch
|
||||
|
||||
patterns: list[str] = []
|
||||
|
||||
# From env var
|
||||
env_patterns = _ENV_OWN_REMOTES
|
||||
import os
|
||||
|
||||
raw = os.environ.get(env_patterns, "")
|
||||
if raw:
|
||||
patterns.extend(p.strip() for p in raw.split(",") if p.strip())
|
||||
|
||||
# From ~/.hive/own_remotes file
|
||||
own_remotes_file = Path.home() / ".hive" / "own_remotes"
|
||||
if own_remotes_file.is_file():
|
||||
try:
|
||||
for line in own_remotes_file.read_text(encoding="utf-8").splitlines():
|
||||
line = line.strip()
|
||||
if line and not line.startswith("#"):
|
||||
patterns.append(line)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
return any(fnmatch.fnmatch(repo_key, p) for p in patterns)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# URL helpers (public so CLI can reuse)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _normalize_remote_url(url: str) -> str:
|
||||
"""Normalize a git remote URL to a canonical ``host/org/repo`` key.
|
||||
|
||||
Examples:
|
||||
git@github.com:org/repo.git → github.com/org/repo
|
||||
https://github.com/org/repo → github.com/org/repo
|
||||
ssh://git@github.com/org/repo.git → github.com/org/repo
|
||||
"""
|
||||
url = url.strip()
|
||||
|
||||
# SCP-style SSH: git@github.com:org/repo.git
|
||||
if url.startswith("git@") and ":" in url and "://" not in url:
|
||||
url = url[4:] # strip git@
|
||||
url = url.replace(":", "/", 1)
|
||||
elif "://" in url:
|
||||
parsed = urlparse(url)
|
||||
host = parsed.hostname or ""
|
||||
path = parsed.path.lstrip("/")
|
||||
url = f"{host}/{path}"
|
||||
|
||||
# Strip .git suffix
|
||||
if url.endswith(".git"):
|
||||
url = url[:-4]
|
||||
|
||||
return url.lower().strip("/")
|
||||
|
||||
|
||||
def _is_localhost_remote(remote_url: str) -> bool:
|
||||
"""Return True if the remote points to a local host."""
|
||||
local_hosts = {"localhost", "127.0.0.1", "::1"}
|
||||
try:
|
||||
if "://" in remote_url:
|
||||
parsed = urlparse(remote_url)
|
||||
return (parsed.hostname or "").lower() in local_hosts
|
||||
# SCP-style: git@localhost:org/repo
|
||||
if "@" in remote_url:
|
||||
host_part = remote_url.split("@", 1)[1].split(":")[0]
|
||||
return host_part.lower() in local_hosts
|
||||
except Exception:
|
||||
pass
|
||||
return False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Trust gate
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TrustGate:
|
||||
"""Filters skill list, running consent flow for untrusted project-scope skills.
|
||||
|
||||
Framework and user-scope skills are always allowed through.
|
||||
Project-scope skills from untrusted repos require consent.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
store: TrustedRepoStore | None = None,
|
||||
detector: ProjectTrustDetector | None = None,
|
||||
interactive: bool = True,
|
||||
print_fn: Callable[[str], None] | None = None,
|
||||
input_fn: Callable[[str], str] | None = None,
|
||||
) -> None:
|
||||
self._store = store or TrustedRepoStore()
|
||||
self._detector = detector or ProjectTrustDetector(self._store)
|
||||
self._interactive = interactive
|
||||
self._print = print_fn or print
|
||||
self._input = input_fn or input
|
||||
|
||||
def filter_and_gate(
|
||||
self,
|
||||
skills: list[ParsedSkill],
|
||||
project_dir: Path | None,
|
||||
) -> list[ParsedSkill]:
|
||||
"""Return the subset of skills that are trusted for loading.
|
||||
|
||||
- Framework and user-scope skills: always included.
|
||||
- Project-scope skills: classified; consent prompt shown if untrusted.
|
||||
"""
|
||||
import os
|
||||
|
||||
# Separate project skills from always-trusted scopes
|
||||
always_trusted = [s for s in skills if s.source_scope != "project"]
|
||||
project_skills = [s for s in skills if s.source_scope == "project"]
|
||||
|
||||
if not project_skills:
|
||||
return always_trusted
|
||||
|
||||
# Env-var CI override: trust all project skills for this invocation
|
||||
if os.environ.get(_ENV_TRUST_ALL, "").strip() == "1":
|
||||
logger.info(
|
||||
"skill_trust: %s=1 set; trusting %d project skill(s) without consent",
|
||||
_ENV_TRUST_ALL,
|
||||
len(project_skills),
|
||||
)
|
||||
return always_trusted + project_skills
|
||||
|
||||
classification, repo_key = self._detector.classify(project_dir)
|
||||
|
||||
if classification in (
|
||||
ProjectTrustClassification.ALWAYS_TRUSTED,
|
||||
ProjectTrustClassification.TRUSTED_BY_USER,
|
||||
):
|
||||
logger.info(
|
||||
"skill_trust: project skills trusted classification=%s repo=%s count=%d",
|
||||
classification,
|
||||
repo_key or "(no remote)",
|
||||
len(project_skills),
|
||||
)
|
||||
return always_trusted + project_skills
|
||||
|
||||
# UNTRUSTED — need consent
|
||||
if not self._interactive or not sys.stdin.isatty():
|
||||
logger.warning(
|
||||
"skill_trust: skipping %d project-scope skill(s) from untrusted repo "
|
||||
"'%s' (non-interactive mode). "
|
||||
"To trust permanently run: hive skill trust %s",
|
||||
len(project_skills),
|
||||
repo_key,
|
||||
project_dir or ".",
|
||||
)
|
||||
logger.info(
|
||||
"skill_trust_decision repo=%s skills=%d decision=denied mode=headless",
|
||||
repo_key,
|
||||
len(project_skills),
|
||||
)
|
||||
return always_trusted
|
||||
|
||||
# Interactive consent flow
|
||||
decision = self._run_consent_flow(project_skills, project_dir, repo_key)
|
||||
|
||||
logger.info(
|
||||
"skill_trust_decision repo=%s skills=%d decision=%s mode=interactive",
|
||||
repo_key,
|
||||
len(project_skills),
|
||||
decision,
|
||||
)
|
||||
|
||||
if decision == "session":
|
||||
return always_trusted + project_skills
|
||||
|
||||
if decision == "permanent":
|
||||
self._store.trust(repo_key, project_path=str(project_dir or ""))
|
||||
return always_trusted + project_skills
|
||||
|
||||
# denied
|
||||
return always_trusted
|
||||
|
||||
def _run_consent_flow(
|
||||
self,
|
||||
project_skills: list[ParsedSkill],
|
||||
project_dir: Path | None,
|
||||
repo_key: str,
|
||||
) -> str:
|
||||
"""Show the security notice (once) and consent prompt.
|
||||
Return 'session' | 'permanent' | 'denied'."""
|
||||
from framework.credentials.setup import Colors
|
||||
|
||||
if not sys.stdout.isatty():
|
||||
Colors.disable()
|
||||
|
||||
self._maybe_show_security_notice(Colors)
|
||||
self._print_consent_prompt(project_skills, project_dir, repo_key, Colors)
|
||||
return self._prompt_consent(Colors)
|
||||
|
||||
def _maybe_show_security_notice(self, Colors) -> None: # noqa: N803
|
||||
"""Show the one-time security notice if not already shown (NFR-5)."""
|
||||
if _NOTICE_SENTINEL_PATH.exists():
|
||||
return
|
||||
self._print("")
|
||||
self._print(
|
||||
f"{Colors.YELLOW}Security notice:{Colors.NC} Skills inject instructions "
|
||||
"into the agent's system prompt."
|
||||
)
|
||||
self._print(
|
||||
" Only load skills from sources you trust. "
|
||||
"Registry skills at tier 'verified' or 'official' have been audited."
|
||||
)
|
||||
self._print("")
|
||||
try:
|
||||
_NOTICE_SENTINEL_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
_NOTICE_SENTINEL_PATH.touch()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
def _print_consent_prompt(
|
||||
self,
|
||||
project_skills: list[ParsedSkill],
|
||||
project_dir: Path | None,
|
||||
repo_key: str,
|
||||
Colors, # noqa: N803
|
||||
) -> None:
|
||||
p = self._print
|
||||
p("")
|
||||
p(f"{Colors.YELLOW}{'=' * 60}{Colors.NC}")
|
||||
p(f"{Colors.BOLD} SKILL TRUST REQUIRED{Colors.NC}")
|
||||
p(f"{Colors.YELLOW}{'=' * 60}{Colors.NC}")
|
||||
p("")
|
||||
proj_label = str(project_dir) if project_dir else "this project"
|
||||
p(
|
||||
f" The project at {Colors.CYAN}{proj_label}{Colors.NC} wants to load "
|
||||
f"{len(project_skills)} skill(s)"
|
||||
)
|
||||
p(" that will inject instructions into the agent's system prompt.")
|
||||
if repo_key:
|
||||
p(f" Source: {Colors.BOLD}{repo_key}{Colors.NC}")
|
||||
p("")
|
||||
p(" Skills requesting access:")
|
||||
for skill in project_skills:
|
||||
p(f" {Colors.CYAN}•{Colors.NC} {Colors.BOLD}{skill.name}{Colors.NC}")
|
||||
p(f' "{skill.description}"')
|
||||
p(f" {Colors.DIM}{skill.location}{Colors.NC}")
|
||||
p("")
|
||||
p(" Options:")
|
||||
p(f" {Colors.CYAN}1){Colors.NC} Trust this session only")
|
||||
p(f" {Colors.CYAN}2){Colors.NC} Trust permanently — remember for future runs")
|
||||
p(
|
||||
f" {Colors.DIM}3) Deny"
|
||||
f" — skip all project-scope skills from this repo{Colors.NC}"
|
||||
)
|
||||
p(f"{Colors.YELLOW}{'─' * 60}{Colors.NC}")
|
||||
|
||||
def _prompt_consent(self, Colors) -> str: # noqa: N803
|
||||
"""Prompt until a valid choice is entered. Returns 'session'|'permanent'|'denied'."""
|
||||
mapping = {"1": "session", "2": "permanent", "3": "denied"}
|
||||
while True:
|
||||
try:
|
||||
choice = self._input("Select option (1-3): ").strip()
|
||||
if choice in mapping:
|
||||
return mapping[choice]
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
return "denied"
|
||||
self._print(f"{Colors.RED}Invalid choice. Enter 1, 2, or 3.{Colors.NC}")
|
||||
@@ -324,6 +324,7 @@ export type EventTypeName =
|
||||
| "node_retry"
|
||||
| "edge_traversed"
|
||||
| "context_compacted"
|
||||
| "context_usage_updated"
|
||||
| "webhook_received"
|
||||
| "custom"
|
||||
| "escalation_requested"
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
import { memo, useState, useRef, useEffect } from "react";
|
||||
import { Send, Square, Crown, Cpu, Check, Loader2 } from "lucide-react";
|
||||
|
||||
export interface ContextUsageEntry {
|
||||
usagePct: number;
|
||||
messageCount: number;
|
||||
estimatedTokens: number;
|
||||
maxTokens: number;
|
||||
}
|
||||
import MarkdownContent from "@/components/MarkdownContent";
|
||||
import QuestionWidget from "@/components/QuestionWidget";
|
||||
import MultiQuestionWidget from "@/components/MultiQuestionWidget";
|
||||
@@ -47,6 +54,8 @@ interface ChatPanelProps {
|
||||
onQuestionDismiss?: () => void;
|
||||
/** Queen operating phase — shown as a tag on queen messages */
|
||||
queenPhase?: "planning" | "building" | "staging" | "running";
|
||||
/** Context window usage for queen and workers */
|
||||
contextUsage?: Record<string, ContextUsageEntry>;
|
||||
}
|
||||
|
||||
const queenColor = "hsl(45,95%,58%)";
|
||||
@@ -241,7 +250,7 @@ const MessageBubble = memo(function MessageBubble({ msg, queenPhase }: { msg: Ch
|
||||
);
|
||||
}, (prev, next) => prev.msg.id === next.msg.id && prev.msg.content === next.msg.content && prev.msg.phase === next.msg.phase && prev.queenPhase === next.queenPhase);
|
||||
|
||||
export default function ChatPanel({ messages, onSend, isWaiting, isWorkerWaiting, isBusy, activeThread, disabled, onCancel, pendingQuestion, pendingOptions, pendingQuestions, onQuestionSubmit, onMultiQuestionSubmit, onQuestionDismiss, queenPhase }: ChatPanelProps) {
|
||||
export default function ChatPanel({ messages, onSend, isWaiting, isWorkerWaiting, isBusy, activeThread, disabled, onCancel, pendingQuestion, pendingOptions, pendingQuestions, onQuestionSubmit, onMultiQuestionSubmit, onQuestionDismiss, queenPhase, contextUsage }: ChatPanelProps) {
|
||||
const [input, setInput] = useState("");
|
||||
const [readMap, setReadMap] = useState<Record<string, number>>({});
|
||||
const bottomRef = useRef<HTMLDivElement>(null);
|
||||
@@ -356,6 +365,57 @@ export default function ChatPanel({ messages, onSend, isWaiting, isWorkerWaiting
|
||||
<div ref={bottomRef} />
|
||||
</div>
|
||||
|
||||
{/* Context window usage bar — sits between messages and input */}
|
||||
{(() => {
|
||||
if (!contextUsage) return null;
|
||||
const queenUsage = contextUsage["__queen__"];
|
||||
const workerEntries = Object.entries(contextUsage).filter(([k]) => k !== "__queen__");
|
||||
const workerUsage = workerEntries.length > 0
|
||||
? workerEntries.reduce((best, [, v]) => (v.usagePct > best.usagePct ? v : best), workerEntries[0][1])
|
||||
: undefined;
|
||||
if (!queenUsage && !workerUsage) return null;
|
||||
return (
|
||||
<div className="flex items-center gap-3 mx-4 px-3 py-1 rounded-lg bg-muted/30 border border-border/20 group/ctx flex-shrink-0">
|
||||
{queenUsage && (
|
||||
<div className="flex items-center gap-2 flex-1 min-w-0" title={`Queen: ${(queenUsage.estimatedTokens / 1000).toFixed(1)}k / ${(queenUsage.maxTokens / 1000).toFixed(0)}k tokens \u00b7 ${queenUsage.messageCount} messages`}>
|
||||
<Crown className="w-3 h-3 flex-shrink-0" style={{ color: "hsl(45,95%,58%)" }} />
|
||||
<div className="flex-1 h-1.5 rounded-full bg-muted/50 overflow-hidden min-w-[60px]">
|
||||
<div
|
||||
className="h-full rounded-full transition-all duration-500 ease-out"
|
||||
style={{
|
||||
width: `${Math.min(queenUsage.usagePct, 100)}%`,
|
||||
backgroundColor: queenUsage.usagePct >= 90 ? "hsl(0,65%,55%)" : queenUsage.usagePct >= 70 ? "hsl(35,90%,55%)" : "hsl(45,95%,58%)",
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
<span className="text-[10px] text-muted-foreground/70 flex-shrink-0 tabular-nums">
|
||||
<span className="group-hover/ctx:hidden">{queenUsage.usagePct}%</span>
|
||||
<span className="hidden group-hover/ctx:inline">{(queenUsage.estimatedTokens / 1000).toFixed(1)}k / {(queenUsage.maxTokens / 1000).toFixed(0)}k</span>
|
||||
</span>
|
||||
</div>
|
||||
)}
|
||||
{workerUsage && (
|
||||
<div className="flex items-center gap-2 flex-1 min-w-0" title={`Worker: ${(workerUsage.estimatedTokens / 1000).toFixed(1)}k / ${(workerUsage.maxTokens / 1000).toFixed(0)}k tokens \u00b7 ${workerUsage.messageCount} messages`}>
|
||||
<Cpu className="w-3 h-3 flex-shrink-0" style={{ color: "hsl(220,60%,55%)" }} />
|
||||
<div className="flex-1 h-1.5 rounded-full bg-muted/50 overflow-hidden min-w-[60px]">
|
||||
<div
|
||||
className="h-full rounded-full transition-all duration-500 ease-out"
|
||||
style={{
|
||||
width: `${Math.min(workerUsage.usagePct, 100)}%`,
|
||||
backgroundColor: workerUsage.usagePct >= 90 ? "hsl(0,65%,55%)" : workerUsage.usagePct >= 70 ? "hsl(35,90%,55%)" : "hsl(220,60%,55%)",
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
<span className="text-[10px] text-muted-foreground/70 flex-shrink-0 tabular-nums">
|
||||
<span className="group-hover/ctx:hidden">{workerUsage.usagePct}%</span>
|
||||
<span className="hidden group-hover/ctx:inline">{(workerUsage.estimatedTokens / 1000).toFixed(1)}k / {(workerUsage.maxTokens / 1000).toFixed(0)}k</span>
|
||||
</span>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
})()}
|
||||
|
||||
{/* Input area — question widget replaces textarea when a question is pending */}
|
||||
{pendingQuestions && pendingQuestions.length >= 2 && onMultiQuestionSubmit ? (
|
||||
<MultiQuestionWidget
|
||||
|
||||
@@ -28,6 +28,13 @@ export interface SubagentReport {
|
||||
status?: "running" | "complete" | "error";
|
||||
}
|
||||
|
||||
interface ContextUsage {
|
||||
usagePct: number;
|
||||
messageCount: number;
|
||||
estimatedTokens: number;
|
||||
maxTokens: number;
|
||||
}
|
||||
|
||||
interface NodeDetailPanelProps {
|
||||
node: GraphNode | null;
|
||||
nodeSpec?: NodeSpec | null;
|
||||
@@ -38,6 +45,7 @@ interface NodeDetailPanelProps {
|
||||
workerSessionId?: string | null;
|
||||
nodeLogs?: string[];
|
||||
actionPlan?: string;
|
||||
contextUsage?: ContextUsage;
|
||||
onClose: () => void;
|
||||
}
|
||||
|
||||
@@ -309,7 +317,7 @@ const tabs: { id: Tab; label: string; Icon: React.FC<{ className?: string }> }[]
|
||||
{ id: "subagents", label: "Subagents", Icon: ({ className }) => <Bot className={className} /> },
|
||||
];
|
||||
|
||||
export default function NodeDetailPanel({ node, nodeSpec, allNodeSpecs, subagentReports, sessionId, graphId, workerSessionId, nodeLogs, actionPlan, onClose }: NodeDetailPanelProps) {
|
||||
export default function NodeDetailPanel({ node, nodeSpec, allNodeSpecs, subagentReports, sessionId, graphId, workerSessionId, nodeLogs, actionPlan, contextUsage, onClose }: NodeDetailPanelProps) {
|
||||
const [activeTab, setActiveTab] = useState<Tab>("overview");
|
||||
const [realTools, setRealTools] = useState<ToolInfo[] | null>(null);
|
||||
const [realCriteria, setRealCriteria] = useState<NodeCriteria | null>(null);
|
||||
@@ -389,6 +397,43 @@ export default function NodeDetailPanel({ node, nodeSpec, allNodeSpecs, subagent
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Context window usage */}
|
||||
{contextUsage && (
|
||||
<div className="px-4 py-2 border-b border-border/20 flex-shrink-0">
|
||||
<div className="flex items-center gap-2 mb-1">
|
||||
<span className="text-[10px] text-muted-foreground font-medium">Context</span>
|
||||
<span className="text-[10px] text-muted-foreground/70 ml-auto">
|
||||
{(contextUsage.estimatedTokens / 1000).toFixed(1)}k / {(contextUsage.maxTokens / 1000).toFixed(0)}k tokens
|
||||
</span>
|
||||
</div>
|
||||
<div className="w-full h-1.5 rounded-full bg-muted/50 overflow-hidden">
|
||||
<div
|
||||
className="h-full rounded-full transition-all duration-500 ease-out"
|
||||
style={{
|
||||
width: `${Math.min(contextUsage.usagePct, 100)}%`,
|
||||
backgroundColor: contextUsage.usagePct >= 90
|
||||
? "hsl(0,65%,55%)"
|
||||
: contextUsage.usagePct >= 70
|
||||
? "hsl(35,90%,55%)"
|
||||
: "hsl(45,95%,58%)",
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
<div className="flex items-center gap-2 mt-1">
|
||||
<span className="text-[10px] text-muted-foreground/60">{contextUsage.messageCount} messages</span>
|
||||
<span className="text-[10px] font-medium ml-auto" style={{
|
||||
color: contextUsage.usagePct >= 90
|
||||
? "hsl(0,65%,55%)"
|
||||
: contextUsage.usagePct >= 70
|
||||
? "hsl(35,90%,55%)"
|
||||
: "hsl(45,95%,58%)",
|
||||
}}>
|
||||
{contextUsage.usagePct}%
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Tab bar */}
|
||||
<div className="flex border-b border-border/30 flex-shrink-0 px-2 pt-1 overflow-x-auto scrollbar-hide">
|
||||
{tabs.filter(t => t.id !== "subagents" || (nodeSpec?.sub_agents && nodeSpec.sub_agents.length > 0)).map(tab => (
|
||||
|
||||
@@ -352,6 +352,8 @@ interface AgentBackendState {
|
||||
pendingQuestions: { id: string; prompt: string; options?: string[] }[] | null;
|
||||
/** Whether the pending question came from queen or worker */
|
||||
pendingQuestionSource: "queen" | "worker" | null;
|
||||
/** Per-node context window usage (from context_usage_updated events) */
|
||||
contextUsage: Record<string, { usagePct: number; messageCount: number; estimatedTokens: number; maxTokens: number }>;
|
||||
}
|
||||
|
||||
function defaultAgentState(): AgentBackendState {
|
||||
@@ -389,6 +391,7 @@ function defaultAgentState(): AgentBackendState {
|
||||
pendingOptions: null,
|
||||
pendingQuestions: null,
|
||||
pendingQuestionSource: null,
|
||||
contextUsage: {},
|
||||
};
|
||||
}
|
||||
|
||||
@@ -630,6 +633,10 @@ export default function Workspace() {
|
||||
// it was created in (avoids stale-closure when phase change and message
|
||||
// events arrive in the same React batch).
|
||||
const queenPhaseRef = useRef<Record<string, string>>({});
|
||||
// Accumulated queen text across inner_turns within the same iteration.
|
||||
// Key: `${agentType}:${execution_id}:${iteration}`, value: { [inner_turn]: snapshot }.
|
||||
// This lets us merge all inner_turn text into one chat bubble per iteration.
|
||||
const queenIterTextRef = useRef<Record<string, Record<number, string>>>({});
|
||||
// Timestamp when designingDraft was set — used to enforce minimum spinner duration.
|
||||
const designingDraftSinceRef = useRef<Record<string, number>>({});
|
||||
const designingDraftTimerRef = useRef<Record<string, ReturnType<typeof setTimeout>>>({});
|
||||
@@ -1707,14 +1714,29 @@ export default function Workspace() {
|
||||
if (isQueen) console.log('[QUEEN] chatMsg:', chatMsg?.id, chatMsg?.content?.slice(0, 50), 'turn:', currentTurn);
|
||||
if (chatMsg && !suppressQueenMessages) {
|
||||
// Queen emits multiple client_output_delta / llm_text_delta snapshots
|
||||
// across iterations and inner tool-loop turns. Build a stable ID that
|
||||
// groups streaming deltas for the *same* output (same execution +
|
||||
// iteration + inner_turn) into one bubble, while keeping distinct
|
||||
// outputs as separate bubbles so earlier text isn't overwritten.
|
||||
// across iterations and inner tool-loop turns. Merge all inner_turns
|
||||
// within the same iteration into ONE bubble so the queen's multi-step
|
||||
// tool loop (text → tool → text → tool → text) appears as one cohesive
|
||||
// message rather than many small fragments.
|
||||
if (isQueen && (event.type === "client_output_delta" || event.type === "llm_text_delta") && event.execution_id) {
|
||||
const iter = event.data?.iteration ?? 0;
|
||||
const inner = event.data?.inner_turn ?? 0;
|
||||
chatMsg.id = `queen-stream-${event.execution_id}-${iter}-${inner}`;
|
||||
const inner = (event.data?.inner_turn as number) ?? 0;
|
||||
const iterKey = `${agentType}:${event.execution_id}:${iter}`;
|
||||
|
||||
// Store the latest snapshot for this inner_turn
|
||||
if (!queenIterTextRef.current[iterKey]) {
|
||||
queenIterTextRef.current[iterKey] = {};
|
||||
}
|
||||
const snapshot = (event.data?.snapshot as string) || (event.data?.content as string) || "";
|
||||
queenIterTextRef.current[iterKey][inner] = snapshot;
|
||||
|
||||
// Concatenate all inner_turn snapshots in order
|
||||
const parts = queenIterTextRef.current[iterKey];
|
||||
const sortedInners = Object.keys(parts).map(Number).sort((a, b) => a - b);
|
||||
chatMsg.content = sortedInners.map(k => parts[k]).join("\n");
|
||||
|
||||
// Single ID per iteration — no inner_turn in the ID
|
||||
chatMsg.id = `queen-stream-${event.execution_id}-${iter}`;
|
||||
}
|
||||
if (isQueen) {
|
||||
chatMsg.role = role;
|
||||
@@ -2136,6 +2158,29 @@ export default function Workspace() {
|
||||
}
|
||||
break;
|
||||
|
||||
case "context_usage_updated": {
|
||||
const streamKey = isQueen ? "__queen__" : (event.node_id || streamId);
|
||||
const usagePct = (event.data?.usage_pct as number) ?? 0;
|
||||
const messageCount = (event.data?.message_count as number) ?? 0;
|
||||
const estimatedTokens = (event.data?.estimated_tokens as number) ?? 0;
|
||||
const maxTokens = (event.data?.max_context_tokens as number) ?? 0;
|
||||
setAgentStates(prev => {
|
||||
const state = prev[agentType];
|
||||
if (!state) return prev;
|
||||
return {
|
||||
...prev,
|
||||
[agentType]: {
|
||||
...state,
|
||||
contextUsage: {
|
||||
...state.contextUsage,
|
||||
[streamKey]: { usagePct, messageCount, estimatedTokens, maxTokens },
|
||||
},
|
||||
},
|
||||
};
|
||||
});
|
||||
}
|
||||
break;
|
||||
|
||||
case "node_action_plan":
|
||||
if (!isQueen && event.node_id) {
|
||||
const plan = (event.data?.plan as string) || "";
|
||||
@@ -3174,6 +3219,7 @@ export default function Workspace() {
|
||||
}
|
||||
onMultiQuestionSubmit={handleMultiQuestionAnswer}
|
||||
onQuestionDismiss={handleQuestionDismiss}
|
||||
contextUsage={activeAgentState?.contextUsage}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
@@ -3377,6 +3423,7 @@ export default function Workspace() {
|
||||
workerSessionId={null}
|
||||
nodeLogs={activeAgentState?.nodeLogs[resolvedSelectedNode.id] || []}
|
||||
actionPlan={activeAgentState?.nodeActionPlans[resolvedSelectedNode.id]}
|
||||
contextUsage={activeAgentState?.contextUsage[resolvedSelectedNode.id]}
|
||||
onClose={() => setSelectedNode(null)}
|
||||
/>
|
||||
)}
|
||||
|
||||
@@ -0,0 +1,142 @@
|
||||
"""Tests for AS-9: Skill directory allowlisting in file-read tool interception."""
|
||||
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from framework.llm.provider import ToolResult
|
||||
|
||||
|
||||
def _make_tool_call_event(tool_name: str, path: str):
|
||||
"""Build a minimal ToolCallEvent-like object."""
|
||||
tc = MagicMock()
|
||||
tc.tool_use_id = "tc-1"
|
||||
tc.tool_name = tool_name
|
||||
tc.tool_input = {"path": path}
|
||||
return tc
|
||||
|
||||
|
||||
def _make_node(skill_dirs: list[str]):
|
||||
"""Build a minimal EventLoopNode with skill_dirs set."""
|
||||
from framework.graph.event_loop_node import EventLoopNode
|
||||
|
||||
mock_result = ToolResult(tool_use_id="tc-1", content="from-executor")
|
||||
node = EventLoopNode(tool_executor=MagicMock(return_value=mock_result))
|
||||
node._skill_dirs = skill_dirs
|
||||
return node
|
||||
|
||||
|
||||
class TestSkillFileReadInterception:
|
||||
@pytest.mark.asyncio
|
||||
async def test_reads_file_in_skill_dir(self, tmp_path):
|
||||
"""File under a skill dir is read directly, bypassing the executor."""
|
||||
skill_dir = tmp_path / "my-skill"
|
||||
skill_dir.mkdir()
|
||||
script = skill_dir / "scripts" / "run.py"
|
||||
script.parent.mkdir()
|
||||
script.write_text("print('hello')")
|
||||
|
||||
node = _make_node([str(skill_dir)])
|
||||
tc = _make_tool_call_event("view_file", str(script))
|
||||
|
||||
result = await node._execute_tool(tc)
|
||||
|
||||
assert result.content == "print('hello')"
|
||||
assert not result.is_error
|
||||
node._tool_executor.assert_not_called()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_skill_md_read_marked_as_skill_content(self, tmp_path):
|
||||
"""Reading SKILL.md sets is_skill_content=True for AS-10 protection."""
|
||||
skill_dir = tmp_path / "my-skill"
|
||||
skill_dir.mkdir()
|
||||
skill_md = skill_dir / "SKILL.md"
|
||||
skill_md.write_text("---\nname: my-skill\ndescription: Test\n---\nInstructions.")
|
||||
|
||||
node = _make_node([str(skill_dir)])
|
||||
tc = _make_tool_call_event("view_file", str(skill_md))
|
||||
|
||||
result = await node._execute_tool(tc)
|
||||
|
||||
assert result.is_skill_content is True
|
||||
assert not result.is_error
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_non_skill_md_resource_not_marked(self, tmp_path):
|
||||
"""Bundled resource (not SKILL.md) is NOT marked as skill_content."""
|
||||
skill_dir = tmp_path / "my-skill"
|
||||
skill_dir.mkdir()
|
||||
ref = skill_dir / "references" / "api.md"
|
||||
ref.parent.mkdir()
|
||||
ref.write_text("# API Reference")
|
||||
|
||||
node = _make_node([str(skill_dir)])
|
||||
tc = _make_tool_call_event("load_data", str(ref))
|
||||
|
||||
result = await node._execute_tool(tc)
|
||||
|
||||
assert result.is_skill_content is False
|
||||
assert not result.is_error
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_path_outside_skill_dir_goes_to_executor(self, tmp_path):
|
||||
"""Path outside skill dirs is passed through to the executor unchanged."""
|
||||
skill_dir = tmp_path / "my-skill"
|
||||
skill_dir.mkdir()
|
||||
other_file = tmp_path / "other" / "file.txt"
|
||||
other_file.parent.mkdir()
|
||||
other_file.write_text("other content")
|
||||
|
||||
node = _make_node([str(skill_dir)])
|
||||
tc = _make_tool_call_event("view_file", str(other_file))
|
||||
|
||||
result = await node._execute_tool(tc)
|
||||
|
||||
assert result.content == "from-executor"
|
||||
node._tool_executor.assert_called_once()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_skill_dirs_goes_to_executor(self, tmp_path):
|
||||
"""When skill_dirs is empty, all tool calls go to executor."""
|
||||
skill_dir = tmp_path / "my-skill"
|
||||
skill_dir.mkdir()
|
||||
script = skill_dir / "scripts" / "run.py"
|
||||
script.parent.mkdir()
|
||||
script.write_text("print('hello')")
|
||||
|
||||
node = _make_node([])
|
||||
tc = _make_tool_call_event("view_file", str(script))
|
||||
|
||||
result = await node._execute_tool(tc)
|
||||
|
||||
assert result.content == "from-executor"
|
||||
node._tool_executor.assert_called_once()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_missing_file_returns_error(self, tmp_path):
|
||||
"""Non-existent file under skill dir returns is_error=True."""
|
||||
skill_dir = tmp_path / "my-skill"
|
||||
skill_dir.mkdir()
|
||||
missing = skill_dir / "scripts" / "missing.py"
|
||||
|
||||
node = _make_node([str(skill_dir)])
|
||||
tc = _make_tool_call_event("view_file", str(missing))
|
||||
|
||||
result = await node._execute_tool(tc)
|
||||
|
||||
assert result.is_error is True
|
||||
assert "Could not read skill resource" in result.content
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_non_file_read_tool_goes_to_executor(self, tmp_path):
|
||||
"""Non file-read tools (e.g. web_search) bypass the interceptor."""
|
||||
skill_dir = tmp_path / "my-skill"
|
||||
skill_dir.mkdir()
|
||||
|
||||
node = _make_node([str(skill_dir)])
|
||||
tc = _make_tool_call_event("web_search", str(skill_dir / "SKILL.md"))
|
||||
|
||||
result = await node._execute_tool(tc)
|
||||
|
||||
assert result.content == "from-executor"
|
||||
node._tool_executor.assert_called_once()
|
||||
@@ -69,7 +69,13 @@ class TestSkillCatalog:
|
||||
|
||||
def test_to_prompt_xml_generation(self):
|
||||
skills = [
|
||||
_make_skill("alpha", "Alpha skill", "project", location="/p/alpha/SKILL.md"),
|
||||
_make_skill(
|
||||
"alpha",
|
||||
"Alpha skill",
|
||||
"project",
|
||||
location="/p/alpha/SKILL.md",
|
||||
base_dir="/p/alpha",
|
||||
),
|
||||
_make_skill("beta", "Beta skill", "user", location="/u/beta/SKILL.md"),
|
||||
]
|
||||
catalog = SkillCatalog(skills)
|
||||
@@ -81,6 +87,7 @@ class TestSkillCatalog:
|
||||
assert "<name>beta</name>" in prompt
|
||||
assert "<description>Alpha skill</description>" in prompt
|
||||
assert "<location>/p/alpha/SKILL.md</location>" in prompt
|
||||
assert "<base_dir>/p/alpha</base_dir>" in prompt
|
||||
|
||||
def test_to_prompt_sorted_by_name(self):
|
||||
skills = [
|
||||
|
||||
@@ -0,0 +1,90 @@
|
||||
"""Tests for AS-10: Activated skill content protected from context pruning."""
|
||||
|
||||
import pytest
|
||||
|
||||
from framework.graph.conversation import Message, NodeConversation
|
||||
|
||||
|
||||
def _make_conversation() -> NodeConversation:
|
||||
conv = NodeConversation.__new__(NodeConversation)
|
||||
conv._messages = []
|
||||
conv._next_seq = 0
|
||||
conv._current_phase = None
|
||||
conv._store = None
|
||||
return conv
|
||||
|
||||
|
||||
async def _add_tool_msg(conv: NodeConversation, content: str, **kwargs) -> Message:
|
||||
return await conv.add_tool_result(
|
||||
tool_use_id=f"tc-{conv._next_seq}",
|
||||
content=content,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
class TestSkillContentProtection:
|
||||
@pytest.mark.asyncio
|
||||
async def test_is_skill_content_flag_persists(self):
|
||||
"""Message created with is_skill_content=True retains the flag."""
|
||||
conv = _make_conversation()
|
||||
msg = await _add_tool_msg(conv, "skill instructions", is_skill_content=True)
|
||||
assert msg.is_skill_content is True
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_regular_message_not_marked(self):
|
||||
"""Normal tool result messages are not marked as skill content."""
|
||||
conv = _make_conversation()
|
||||
msg = await _add_tool_msg(conv, "some tool output")
|
||||
assert msg.is_skill_content is False
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_skill_content_survives_prune(self):
|
||||
"""Skill content messages are skipped by prune_old_tool_results."""
|
||||
conv = _make_conversation()
|
||||
|
||||
# Add many regular tool results to push over prune threshold
|
||||
for _ in range(30):
|
||||
await _add_tool_msg(conv, "x" * 500) # ~125 tokens each
|
||||
|
||||
# Add a skill content message
|
||||
skill_msg = await _add_tool_msg(
|
||||
conv,
|
||||
"## Deep Research\n" + "instructions " * 200,
|
||||
is_skill_content=True,
|
||||
)
|
||||
|
||||
pruned = await conv.prune_old_tool_results(protect_tokens=500, min_prune_tokens=100)
|
||||
|
||||
assert pruned > 0, "Expected some messages to be pruned"
|
||||
# Find the skill message — it must not be pruned
|
||||
matching = [m for m in conv._messages if m.seq == skill_msg.seq]
|
||||
assert matching, "Skill content message was removed"
|
||||
assert not matching[0].content.startswith("[Pruned tool result")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_regular_content_can_be_pruned(self):
|
||||
"""Regular tool results are still pruned when over threshold."""
|
||||
conv = _make_conversation()
|
||||
|
||||
for _ in range(20):
|
||||
await _add_tool_msg(conv, "regular tool output " * 50)
|
||||
|
||||
pruned = await conv.prune_old_tool_results(protect_tokens=500, min_prune_tokens=100)
|
||||
|
||||
assert pruned > 0, "Expected regular messages to be pruned"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_error_messages_also_protected(self):
|
||||
"""Existing is_error protection still works alongside is_skill_content."""
|
||||
conv = _make_conversation()
|
||||
|
||||
for _ in range(20):
|
||||
await _add_tool_msg(conv, "output " * 100)
|
||||
|
||||
err_msg = await _add_tool_msg(conv, "tool failed", is_error=True)
|
||||
|
||||
await conv.prune_old_tool_results(protect_tokens=200, min_prune_tokens=50)
|
||||
|
||||
matching = [m for m in conv._messages if m.seq == err_msg.seq]
|
||||
assert matching
|
||||
assert not matching[0].content.startswith("[Pruned tool result")
|
||||
@@ -0,0 +1,92 @@
|
||||
"""Tests for AS-6 skill resource loading support.
|
||||
|
||||
Covers:
|
||||
- <base_dir> element in catalog XML
|
||||
- allowlisted_dirs property reflects trusted skill base directories
|
||||
- skill_dirs propagation to NodeContext
|
||||
"""
|
||||
|
||||
from framework.skills.catalog import SkillCatalog
|
||||
from framework.skills.parser import ParsedSkill
|
||||
|
||||
|
||||
def _make_skill(
|
||||
name: str,
|
||||
base_dir: str,
|
||||
source_scope: str = "project",
|
||||
) -> ParsedSkill:
|
||||
return ParsedSkill(
|
||||
name=name,
|
||||
description=f"Skill {name}",
|
||||
location=f"{base_dir}/SKILL.md",
|
||||
base_dir=base_dir,
|
||||
source_scope=source_scope,
|
||||
body="Instructions.",
|
||||
)
|
||||
|
||||
|
||||
class TestSkillResourceBaseDir:
|
||||
def test_base_dir_in_xml(self):
|
||||
"""Each community skill entry should expose its base_dir in the catalog XML."""
|
||||
skill = _make_skill("deploy", "/project/.hive/skills/deploy")
|
||||
catalog = SkillCatalog([skill])
|
||||
prompt = catalog.to_prompt()
|
||||
|
||||
assert "<base_dir>/project/.hive/skills/deploy</base_dir>" in prompt
|
||||
|
||||
def test_base_dir_xml_escaped(self):
|
||||
"""base_dir with XML-special chars should be escaped."""
|
||||
skill = _make_skill("s", "/path/with <&> chars")
|
||||
catalog = SkillCatalog([skill])
|
||||
prompt = catalog.to_prompt()
|
||||
|
||||
assert "<base_dir>/path/with <&> chars</base_dir>" in prompt
|
||||
|
||||
def test_base_dir_absent_for_framework_skills(self):
|
||||
"""Framework-scope skills are filtered from the catalog, so no base_dir either."""
|
||||
skill = _make_skill("fw", "/hive/_default_skills/fw", source_scope="framework")
|
||||
catalog = SkillCatalog([skill])
|
||||
assert catalog.to_prompt() == ""
|
||||
|
||||
def test_allowlisted_dirs_matches_skills(self):
|
||||
"""allowlisted_dirs returns all skill base_dirs including framework ones."""
|
||||
skills = [
|
||||
_make_skill("a", "/skills/a", "project"),
|
||||
_make_skill("b", "/skills/b", "user"),
|
||||
_make_skill("c", "/skills/c", "framework"),
|
||||
]
|
||||
catalog = SkillCatalog(skills)
|
||||
dirs = catalog.allowlisted_dirs
|
||||
|
||||
assert "/skills/a" in dirs
|
||||
assert "/skills/b" in dirs
|
||||
assert "/skills/c" in dirs
|
||||
|
||||
def test_allowlisted_dirs_empty_catalog(self):
|
||||
assert SkillCatalog().allowlisted_dirs == []
|
||||
|
||||
|
||||
class TestSkillDirsPropagation:
|
||||
def _make_ctx(self, **kwargs):
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from framework.graph.node import NodeContext
|
||||
|
||||
return NodeContext(
|
||||
runtime=MagicMock(),
|
||||
node_id="n",
|
||||
node_spec=MagicMock(),
|
||||
memory={},
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def test_node_context_skill_dirs_default(self):
|
||||
"""NodeContext.skill_dirs defaults to empty list."""
|
||||
ctx = self._make_ctx()
|
||||
assert ctx.skill_dirs == []
|
||||
|
||||
def test_node_context_skill_dirs_set(self):
|
||||
"""NodeContext.skill_dirs can be populated."""
|
||||
dirs = ["/skills/a", "/skills/b"]
|
||||
ctx = self._make_ctx(skill_dirs=dirs)
|
||||
assert ctx.skill_dirs == dirs
|
||||
@@ -0,0 +1,471 @@
|
||||
"""Tests for skill trust gating (AS-13)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from framework.skills.parser import ParsedSkill
|
||||
from framework.skills.trust import (
|
||||
ProjectTrustClassification,
|
||||
ProjectTrustDetector,
|
||||
TrustedRepoStore,
|
||||
TrustGate,
|
||||
_is_localhost_remote,
|
||||
_normalize_remote_url,
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def make_skill(name: str = "test-skill", scope: str = "project") -> ParsedSkill:
|
||||
return ParsedSkill(
|
||||
name=name,
|
||||
description="Test skill",
|
||||
location=f"/fake/{name}/SKILL.md",
|
||||
base_dir=f"/fake/{name}",
|
||||
source_scope=scope,
|
||||
body="Test skill instructions.",
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _normalize_remote_url
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestNormalizeRemoteUrl:
|
||||
def test_ssh_scp_format(self):
|
||||
assert _normalize_remote_url("git@github.com:org/repo.git") == "github.com/org/repo"
|
||||
|
||||
def test_https_format(self):
|
||||
assert _normalize_remote_url("https://github.com/org/repo.git") == "github.com/org/repo"
|
||||
|
||||
def test_https_no_dot_git(self):
|
||||
assert _normalize_remote_url("https://github.com/org/repo") == "github.com/org/repo"
|
||||
|
||||
def test_ssh_url_format(self):
|
||||
assert _normalize_remote_url("ssh://git@github.com/org/repo.git") == "github.com/org/repo"
|
||||
|
||||
def test_lowercased(self):
|
||||
assert _normalize_remote_url("git@GitHub.COM:Org/Repo.git") == "github.com/org/repo"
|
||||
|
||||
def test_trailing_slash_stripped(self):
|
||||
assert _normalize_remote_url("https://github.com/org/repo/") == "github.com/org/repo"
|
||||
|
||||
def test_gitlab(self):
|
||||
assert _normalize_remote_url("git@gitlab.com:team/project.git") == "gitlab.com/team/project"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _is_localhost_remote
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestIsLocalhostRemote:
|
||||
def test_localhost_https(self):
|
||||
assert _is_localhost_remote("http://localhost/org/repo")
|
||||
|
||||
def test_127_0_0_1(self):
|
||||
assert _is_localhost_remote("https://127.0.0.1/repo")
|
||||
|
||||
def test_github_not_local(self):
|
||||
assert not _is_localhost_remote("https://github.com/org/repo")
|
||||
|
||||
def test_scp_localhost(self):
|
||||
assert _is_localhost_remote("git@localhost:org/repo")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# TrustedRepoStore
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestTrustedRepoStore:
|
||||
def test_empty_store_is_not_trusted(self, tmp_path):
|
||||
store = TrustedRepoStore(tmp_path / "trusted.json")
|
||||
assert not store.is_trusted("github.com/org/repo")
|
||||
|
||||
def test_trust_and_lookup(self, tmp_path):
|
||||
store = TrustedRepoStore(tmp_path / "trusted.json")
|
||||
store.trust("github.com/org/repo", project_path="/some/path")
|
||||
assert store.is_trusted("github.com/org/repo")
|
||||
|
||||
def test_revoke(self, tmp_path):
|
||||
store = TrustedRepoStore(tmp_path / "trusted.json")
|
||||
store.trust("github.com/org/repo")
|
||||
assert store.revoke("github.com/org/repo")
|
||||
assert not store.is_trusted("github.com/org/repo")
|
||||
|
||||
def test_revoke_nonexistent_returns_false(self, tmp_path):
|
||||
store = TrustedRepoStore(tmp_path / "trusted.json")
|
||||
assert not store.revoke("github.com/nobody/nowhere")
|
||||
|
||||
def test_persists_across_instances(self, tmp_path):
|
||||
path = tmp_path / "trusted.json"
|
||||
store1 = TrustedRepoStore(path)
|
||||
store1.trust("github.com/org/repo")
|
||||
|
||||
store2 = TrustedRepoStore(path)
|
||||
assert store2.is_trusted("github.com/org/repo")
|
||||
|
||||
def test_atomic_write(self, tmp_path):
|
||||
"""Save must not leave a .tmp file behind."""
|
||||
path = tmp_path / "trusted.json"
|
||||
store = TrustedRepoStore(path)
|
||||
store.trust("github.com/org/repo")
|
||||
assert not (tmp_path / "trusted.tmp").exists()
|
||||
assert path.exists()
|
||||
|
||||
def test_corrupted_json_recovers_gracefully(self, tmp_path):
|
||||
path = tmp_path / "trusted.json"
|
||||
path.write_text("{not valid json{{", encoding="utf-8")
|
||||
store = TrustedRepoStore(path)
|
||||
assert not store.is_trusted("github.com/any/repo") # no crash
|
||||
|
||||
def test_json_schema(self, tmp_path):
|
||||
path = tmp_path / "trusted.json"
|
||||
store = TrustedRepoStore(path)
|
||||
store.trust("github.com/org/repo", project_path="/work/repo")
|
||||
data = json.loads(path.read_text())
|
||||
assert data["version"] == 1
|
||||
assert data["entries"][0]["repo_key"] == "github.com/org/repo"
|
||||
assert "added_at" in data["entries"][0]
|
||||
|
||||
def test_list_entries(self, tmp_path):
|
||||
store = TrustedRepoStore(tmp_path / "t.json")
|
||||
store.trust("github.com/a/b")
|
||||
store.trust("github.com/c/d")
|
||||
entries = store.list_entries()
|
||||
assert len(entries) == 2
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ProjectTrustDetector
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestProjectTrustDetector:
|
||||
def test_none_project_dir_always_trusted(self, tmp_path):
|
||||
store = TrustedRepoStore(tmp_path / "t.json")
|
||||
det = ProjectTrustDetector(store)
|
||||
cls, _ = det.classify(None)
|
||||
assert cls == ProjectTrustClassification.ALWAYS_TRUSTED
|
||||
|
||||
def test_nonexistent_dir_always_trusted(self, tmp_path):
|
||||
store = TrustedRepoStore(tmp_path / "t.json")
|
||||
det = ProjectTrustDetector(store)
|
||||
cls, _ = det.classify(tmp_path / "nonexistent")
|
||||
assert cls == ProjectTrustClassification.ALWAYS_TRUSTED
|
||||
|
||||
def test_no_git_dir_always_trusted(self, tmp_path):
|
||||
store = TrustedRepoStore(tmp_path / "t.json")
|
||||
det = ProjectTrustDetector(store)
|
||||
cls, _ = det.classify(tmp_path)
|
||||
assert cls == ProjectTrustClassification.ALWAYS_TRUSTED
|
||||
|
||||
def test_no_remote_always_trusted(self, tmp_path):
|
||||
(tmp_path / ".git").mkdir()
|
||||
store = TrustedRepoStore(tmp_path / "t.json")
|
||||
det = ProjectTrustDetector(store)
|
||||
# git command returns non-zero (no remote)
|
||||
with patch("subprocess.run") as mock_run:
|
||||
mock_run.return_value = MagicMock(returncode=1, stdout="")
|
||||
cls, _ = det.classify(tmp_path)
|
||||
assert cls == ProjectTrustClassification.ALWAYS_TRUSTED
|
||||
|
||||
def test_localhost_remote_always_trusted(self, tmp_path):
|
||||
(tmp_path / ".git").mkdir()
|
||||
store = TrustedRepoStore(tmp_path / "t.json")
|
||||
det = ProjectTrustDetector(store)
|
||||
with patch("subprocess.run") as mock_run:
|
||||
mock_run.return_value = MagicMock(
|
||||
returncode=0, stdout="http://localhost/org/repo.git\n"
|
||||
)
|
||||
cls, _ = det.classify(tmp_path)
|
||||
assert cls == ProjectTrustClassification.ALWAYS_TRUSTED
|
||||
|
||||
def test_trusted_by_store(self, tmp_path):
|
||||
(tmp_path / ".git").mkdir()
|
||||
store = TrustedRepoStore(tmp_path / "t.json")
|
||||
store.trust("github.com/trusted/repo")
|
||||
det = ProjectTrustDetector(store)
|
||||
with patch("subprocess.run") as mock_run:
|
||||
mock_run.return_value = MagicMock(
|
||||
returncode=0, stdout="git@github.com:trusted/repo.git\n"
|
||||
)
|
||||
cls, key = det.classify(tmp_path)
|
||||
assert cls == ProjectTrustClassification.TRUSTED_BY_USER
|
||||
assert key == "github.com/trusted/repo"
|
||||
|
||||
def test_unknown_remote_untrusted(self, tmp_path):
|
||||
(tmp_path / ".git").mkdir()
|
||||
store = TrustedRepoStore(tmp_path / "t.json")
|
||||
det = ProjectTrustDetector(store)
|
||||
with patch("subprocess.run") as mock_run:
|
||||
mock_run.return_value = MagicMock(
|
||||
returncode=0, stdout="https://github.com/stranger/repo.git\n"
|
||||
)
|
||||
cls, key = det.classify(tmp_path)
|
||||
assert cls == ProjectTrustClassification.UNTRUSTED
|
||||
assert key == "github.com/stranger/repo"
|
||||
|
||||
def test_own_remotes_env_var(self, tmp_path, monkeypatch):
|
||||
(tmp_path / ".git").mkdir()
|
||||
store = TrustedRepoStore(tmp_path / "t.json")
|
||||
monkeypatch.setenv("HIVE_OWN_REMOTES", "github.com/myorg/*")
|
||||
det = ProjectTrustDetector(store)
|
||||
with patch("subprocess.run") as mock_run:
|
||||
mock_run.return_value = MagicMock(
|
||||
returncode=0, stdout="git@github.com:myorg/myrepo.git\n"
|
||||
)
|
||||
cls, _ = det.classify(tmp_path)
|
||||
assert cls == ProjectTrustClassification.ALWAYS_TRUSTED
|
||||
|
||||
def test_git_timeout_treated_as_trusted(self, tmp_path):
|
||||
import subprocess
|
||||
|
||||
(tmp_path / ".git").mkdir()
|
||||
store = TrustedRepoStore(tmp_path / "t.json")
|
||||
det = ProjectTrustDetector(store)
|
||||
with patch("subprocess.run", side_effect=subprocess.TimeoutExpired("git", 3)):
|
||||
cls, _ = det.classify(tmp_path)
|
||||
assert cls == ProjectTrustClassification.ALWAYS_TRUSTED
|
||||
|
||||
def test_git_not_found_treated_as_trusted(self, tmp_path):
|
||||
(tmp_path / ".git").mkdir()
|
||||
store = TrustedRepoStore(tmp_path / "t.json")
|
||||
det = ProjectTrustDetector(store)
|
||||
with patch("subprocess.run", side_effect=FileNotFoundError("git not found")):
|
||||
cls, _ = det.classify(tmp_path)
|
||||
assert cls == ProjectTrustClassification.ALWAYS_TRUSTED
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# TrustGate
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestTrustGate:
|
||||
def test_framework_scope_always_passes(self, tmp_path):
|
||||
skill = make_skill("fw-skill", "framework")
|
||||
gate = TrustGate(store=TrustedRepoStore(tmp_path / "t.json"), interactive=False)
|
||||
result = gate.filter_and_gate([skill], project_dir=None)
|
||||
assert any(s.name == "fw-skill" for s in result)
|
||||
|
||||
def test_user_scope_always_passes(self, tmp_path):
|
||||
skill = make_skill("user-skill", "user")
|
||||
gate = TrustGate(store=TrustedRepoStore(tmp_path / "t.json"), interactive=False)
|
||||
result = gate.filter_and_gate([skill], project_dir=None)
|
||||
assert any(s.name == "user-skill" for s in result)
|
||||
|
||||
def test_no_project_skills_returns_early(self, tmp_path):
|
||||
"""When there are no project-scope skills, trust detection is skipped."""
|
||||
fw = make_skill("fw", "framework")
|
||||
gate = TrustGate(store=TrustedRepoStore(tmp_path / "t.json"), interactive=False)
|
||||
result = gate.filter_and_gate([fw], project_dir=tmp_path)
|
||||
assert result == [fw]
|
||||
|
||||
def test_trusted_project_skills_pass(self, tmp_path):
|
||||
"""Project skills from a trusted repo pass through."""
|
||||
(tmp_path / ".git").mkdir()
|
||||
store = TrustedRepoStore(tmp_path / "t.json")
|
||||
store.trust("github.com/trusted/repo")
|
||||
skill = make_skill("proj-skill", "project")
|
||||
gate = TrustGate(store=store, interactive=False)
|
||||
with patch("subprocess.run") as m:
|
||||
m.return_value = MagicMock(returncode=0, stdout="git@github.com:trusted/repo.git\n")
|
||||
result = gate.filter_and_gate([skill], project_dir=tmp_path)
|
||||
assert any(s.name == "proj-skill" for s in result)
|
||||
|
||||
def test_untrusted_headless_skips_and_logs(self, tmp_path, caplog):
|
||||
"""In non-interactive mode, untrusted project skills are skipped."""
|
||||
import logging
|
||||
|
||||
(tmp_path / ".git").mkdir()
|
||||
store = TrustedRepoStore(tmp_path / "t.json")
|
||||
skill = make_skill("evil-skill", "project")
|
||||
gate = TrustGate(store=store, interactive=False)
|
||||
with patch("subprocess.run") as m:
|
||||
m.return_value = MagicMock(
|
||||
returncode=0, stdout="https://github.com/stranger/evil.git\n"
|
||||
)
|
||||
with caplog.at_level(logging.WARNING):
|
||||
result = gate.filter_and_gate([skill], project_dir=tmp_path)
|
||||
assert not any(s.name == "evil-skill" for s in result)
|
||||
assert "untrusted" in caplog.text.lower() or "skipping" in caplog.text.lower()
|
||||
|
||||
def test_interactive_consent_session_only(self, tmp_path):
|
||||
"""Option 1 (session only) includes skills without writing to store."""
|
||||
(tmp_path / ".git").mkdir()
|
||||
store = TrustedRepoStore(tmp_path / "t.json")
|
||||
skill = make_skill("session-skill", "project")
|
||||
outputs = []
|
||||
gate = TrustGate(
|
||||
store=store,
|
||||
interactive=True,
|
||||
print_fn=outputs.append,
|
||||
input_fn=lambda _: "1", # trust this session
|
||||
)
|
||||
with (
|
||||
patch("sys.stdin.isatty", return_value=True),
|
||||
patch("sys.stdout.isatty", return_value=True),
|
||||
patch("subprocess.run") as m,
|
||||
):
|
||||
m.return_value = MagicMock(
|
||||
returncode=0, stdout="https://github.com/stranger/repo.git\n"
|
||||
)
|
||||
result = gate.filter_and_gate([skill], project_dir=tmp_path)
|
||||
assert any(s.name == "session-skill" for s in result)
|
||||
# Must NOT persist to trusted store
|
||||
assert not store.is_trusted("github.com/stranger/repo")
|
||||
|
||||
def test_interactive_consent_permanent(self, tmp_path):
|
||||
"""Option 2 (permanent) includes skills and persists to trusted store."""
|
||||
(tmp_path / ".git").mkdir()
|
||||
store = TrustedRepoStore(tmp_path / "t.json")
|
||||
skill = make_skill("perm-skill", "project")
|
||||
gate = TrustGate(
|
||||
store=store,
|
||||
interactive=True,
|
||||
print_fn=lambda _: None,
|
||||
input_fn=lambda _: "2", # trust permanently
|
||||
)
|
||||
with (
|
||||
patch("sys.stdin.isatty", return_value=True),
|
||||
patch("sys.stdout.isatty", return_value=True),
|
||||
patch("subprocess.run") as m,
|
||||
):
|
||||
m.return_value = MagicMock(
|
||||
returncode=0, stdout="https://github.com/stranger/repo.git\n"
|
||||
)
|
||||
result = gate.filter_and_gate([skill], project_dir=tmp_path)
|
||||
assert any(s.name == "perm-skill" for s in result)
|
||||
assert store.is_trusted("github.com/stranger/repo")
|
||||
|
||||
def test_interactive_consent_deny(self, tmp_path):
|
||||
"""Option 3 (deny) excludes project skills."""
|
||||
(tmp_path / ".git").mkdir()
|
||||
store = TrustedRepoStore(tmp_path / "t.json")
|
||||
skill = make_skill("bad-skill", "project")
|
||||
gate = TrustGate(
|
||||
store=store,
|
||||
interactive=True,
|
||||
print_fn=lambda _: None,
|
||||
input_fn=lambda _: "3", # deny
|
||||
)
|
||||
with (
|
||||
patch("sys.stdin.isatty", return_value=True),
|
||||
patch("sys.stdout.isatty", return_value=True),
|
||||
patch("subprocess.run") as m,
|
||||
):
|
||||
m.return_value = MagicMock(
|
||||
returncode=0, stdout="https://github.com/stranger/repo.git\n"
|
||||
)
|
||||
result = gate.filter_and_gate([skill], project_dir=tmp_path)
|
||||
assert not any(s.name == "bad-skill" for s in result)
|
||||
|
||||
def test_env_var_override_trusts_all(self, tmp_path, monkeypatch):
|
||||
"""HIVE_TRUST_PROJECT_SKILLS=1 bypasses gating entirely."""
|
||||
monkeypatch.setenv("HIVE_TRUST_PROJECT_SKILLS", "1")
|
||||
store = TrustedRepoStore(tmp_path / "t.json")
|
||||
skill = make_skill("env-skill", "project")
|
||||
gate = TrustGate(store=store, interactive=False)
|
||||
result = gate.filter_and_gate([skill], project_dir=tmp_path)
|
||||
assert any(s.name == "env-skill" for s in result)
|
||||
|
||||
def test_keyboard_interrupt_treated_as_deny(self, tmp_path):
|
||||
"""Ctrl-C during consent prompt should deny cleanly."""
|
||||
(tmp_path / ".git").mkdir()
|
||||
store = TrustedRepoStore(tmp_path / "t.json")
|
||||
skill = make_skill("interrupted-skill", "project")
|
||||
gate = TrustGate(
|
||||
store=store,
|
||||
interactive=True,
|
||||
print_fn=lambda _: None,
|
||||
input_fn=lambda _: (_ for _ in ()).throw(KeyboardInterrupt()),
|
||||
)
|
||||
with (
|
||||
patch("sys.stdin.isatty", return_value=True),
|
||||
patch("sys.stdout.isatty", return_value=True),
|
||||
patch("subprocess.run") as m,
|
||||
):
|
||||
m.return_value = MagicMock(
|
||||
returncode=0, stdout="https://github.com/stranger/repo.git\n"
|
||||
)
|
||||
result = gate.filter_and_gate([skill], project_dir=tmp_path)
|
||||
assert not any(s.name == "interrupted-skill" for s in result)
|
||||
|
||||
def test_security_notice_shown_once(self, tmp_path, monkeypatch):
|
||||
"""Security notice (NFR-5) should be shown the first time only."""
|
||||
# Use a temp sentinel path
|
||||
sentinel = tmp_path / ".skill_trust_notice_shown"
|
||||
monkeypatch.setattr("framework.skills.trust._NOTICE_SENTINEL_PATH", sentinel)
|
||||
assert not sentinel.exists()
|
||||
|
||||
(tmp_path / ".git").mkdir()
|
||||
store = TrustedRepoStore(tmp_path / "t.json")
|
||||
skill = make_skill("notice-skill", "project")
|
||||
output_lines: list[str] = []
|
||||
gate = TrustGate(
|
||||
store=store,
|
||||
interactive=True,
|
||||
print_fn=output_lines.append,
|
||||
input_fn=lambda _: "3",
|
||||
)
|
||||
with (
|
||||
patch("sys.stdin.isatty", return_value=True),
|
||||
patch("sys.stdout.isatty", return_value=True),
|
||||
patch("subprocess.run") as m,
|
||||
):
|
||||
m.return_value = MagicMock(
|
||||
returncode=0, stdout="https://github.com/stranger/repo.git\n"
|
||||
)
|
||||
gate.filter_and_gate([skill], project_dir=tmp_path)
|
||||
|
||||
assert sentinel.exists()
|
||||
assert any("Security notice" in line for line in output_lines)
|
||||
|
||||
# Second run should NOT show the notice again
|
||||
output_lines.clear()
|
||||
skill2 = make_skill("notice-skill-2", "project")
|
||||
with (
|
||||
patch("sys.stdin.isatty", return_value=True),
|
||||
patch("sys.stdout.isatty", return_value=True),
|
||||
patch("subprocess.run") as m,
|
||||
):
|
||||
m.return_value = MagicMock(
|
||||
returncode=0, stdout="https://github.com/stranger/repo.git\n"
|
||||
)
|
||||
gate.filter_and_gate([skill2], project_dir=tmp_path)
|
||||
|
||||
assert not any("Security notice" in line for line in output_lines)
|
||||
|
||||
def test_mixed_scopes_only_project_gated(self, tmp_path, monkeypatch):
|
||||
"""Framework and user skills should pass through even if project skills are denied."""
|
||||
(tmp_path / ".git").mkdir()
|
||||
store = TrustedRepoStore(tmp_path / "t.json")
|
||||
fw_skill = make_skill("fw", "framework")
|
||||
user_skill = make_skill("usr", "user")
|
||||
proj_skill = make_skill("proj", "project")
|
||||
gate = TrustGate(
|
||||
store=store,
|
||||
interactive=True,
|
||||
print_fn=lambda _: None,
|
||||
input_fn=lambda _: "3", # deny project skills
|
||||
)
|
||||
with (
|
||||
patch("sys.stdin.isatty", return_value=True),
|
||||
patch("sys.stdout.isatty", return_value=True),
|
||||
patch("subprocess.run") as m,
|
||||
):
|
||||
m.return_value = MagicMock(
|
||||
returncode=0, stdout="https://github.com/stranger/repo.git\n"
|
||||
)
|
||||
result = gate.filter_and_gate([fw_skill, user_skill, proj_skill], project_dir=tmp_path)
|
||||
names = {s.name for s in result}
|
||||
assert "fw" in names
|
||||
assert "usr" in names
|
||||
assert "proj" not in names
|
||||
@@ -152,7 +152,8 @@ def test_register_mcp_server_uses_connection_manager_when_enabled(monkeypatch):
|
||||
assert client.disconnect_calls == 0
|
||||
|
||||
|
||||
def test_register_mcp_server_defaults_to_direct_client_behavior(monkeypatch):
|
||||
def test_register_mcp_server_defaults_to_connection_manager(monkeypatch):
|
||||
"""Default behavior uses the connection manager (reuse enabled by default)."""
|
||||
registry = ToolRegistry()
|
||||
created_clients: list[_RegistryFakeClient] = []
|
||||
|
||||
@@ -161,13 +162,16 @@ def test_register_mcp_server_defaults_to_direct_client_behavior(monkeypatch):
|
||||
created_clients.append(client)
|
||||
return client
|
||||
|
||||
def fail_if_manager_used():
|
||||
raise AssertionError("connection manager should not be used by default")
|
||||
class FakeManager:
|
||||
def acquire(self, config):
|
||||
return fake_client_factory(config)
|
||||
|
||||
def release(self, server_name):
|
||||
pass
|
||||
|
||||
monkeypatch.setattr("framework.runner.mcp_client.MCPClient", fake_client_factory)
|
||||
monkeypatch.setattr(
|
||||
"framework.runner.mcp_connection_manager.MCPConnectionManager.get_instance",
|
||||
fail_if_manager_used,
|
||||
lambda: FakeManager(),
|
||||
)
|
||||
|
||||
count = registry.register_mcp_server(
|
||||
@@ -176,6 +180,27 @@ def test_register_mcp_server_defaults_to_direct_client_behavior(monkeypatch):
|
||||
|
||||
assert count == 1
|
||||
assert len(created_clients) == 1
|
||||
|
||||
|
||||
def test_register_mcp_server_direct_client_when_manager_disabled(monkeypatch):
|
||||
"""When use_connection_manager=False, a direct MCPClient is created."""
|
||||
registry = ToolRegistry()
|
||||
created_clients: list[_RegistryFakeClient] = []
|
||||
|
||||
def fake_client_factory(config):
|
||||
client = _RegistryFakeClient(config)
|
||||
created_clients.append(client)
|
||||
return client
|
||||
|
||||
monkeypatch.setattr("framework.runner.mcp_client.MCPClient", fake_client_factory)
|
||||
|
||||
count = registry.register_mcp_server(
|
||||
{"name": "direct", "transport": "stdio", "command": "echo"},
|
||||
use_connection_manager=False,
|
||||
)
|
||||
|
||||
assert count == 1
|
||||
assert len(created_clients) == 1
|
||||
assert created_clients[0].connect_calls == 1
|
||||
|
||||
registry.cleanup()
|
||||
|
||||
@@ -0,0 +1,290 @@
|
||||
# Agent Skills User Guide
|
||||
|
||||
This guide covers how to use, create, and manage Agent Skills in the Hive framework. Agent Skills follow the open [Agent Skills standard](https://agentskills.io) — skills written for Claude Code, Cursor, or other compatible agents work in Hive unchanged.
|
||||
|
||||
## What are skills?
|
||||
|
||||
Skills are folders containing a `SKILL.md` file that teaches an agent how to perform a specific task. They can also bundle scripts, templates, and reference materials. Skills are loaded on demand — the agent sees a lightweight catalog at startup and pulls in full instructions only when relevant.
|
||||
|
||||
## Quick start
|
||||
|
||||
### Install a skill
|
||||
|
||||
Drop a skill folder into one of the discovery directories:
|
||||
|
||||
```bash
|
||||
# Project-level (shared with the repo)
|
||||
mkdir -p .hive/skills/my-skill
|
||||
cat > .hive/skills/my-skill/SKILL.md << 'EOF'
|
||||
---
|
||||
name: my-skill
|
||||
description: Does X when the user asks about Y.
|
||||
---
|
||||
|
||||
# My Skill
|
||||
|
||||
Step-by-step instructions for the agent...
|
||||
EOF
|
||||
```
|
||||
|
||||
The agent will discover it automatically on the next session.
|
||||
|
||||
### List discovered skills
|
||||
|
||||
```bash
|
||||
hive skill list
|
||||
```
|
||||
|
||||
Output groups skills by scope:
|
||||
|
||||
```
|
||||
PROJECT SKILLS
|
||||
────────────────────────────────────
|
||||
• my-skill
|
||||
Does X when the user asks about Y.
|
||||
/home/user/project/.hive/skills/my-skill/SKILL.md
|
||||
|
||||
USER SKILLS
|
||||
────────────────────────────────────
|
||||
• deep-research
|
||||
Multi-step web research with source verification.
|
||||
/home/user/.hive/skills/deep-research/SKILL.md
|
||||
```
|
||||
|
||||
## Where to put skills
|
||||
|
||||
Hive scans five directories at startup, in this precedence order:
|
||||
|
||||
| Scope | Path | Use case |
|
||||
|-------|------|----------|
|
||||
| Project (Hive) | `<project>/.hive/skills/` | Skills specific to this repo |
|
||||
| Project (cross-client) | `<project>/.agents/skills/` | Skills shared across Claude Code, Cursor, etc. |
|
||||
| User (Hive) | `~/.hive/skills/` | Personal skills available in all projects |
|
||||
| User (cross-client) | `~/.agents/skills/` | Personal cross-client skills |
|
||||
| Framework | *(built-in)* | Default operational skills shipped with Hive |
|
||||
|
||||
**Precedence**: If two skills share the same name, the higher-precedence location wins. A project-level `code-review` skill overrides a user-level one with the same name.
|
||||
|
||||
**Cross-client paths**: The `.agents/skills/` directories are a convention shared across compatible agents. A skill installed at `~/.agents/skills/pdf-processing/` is visible to Hive, Claude Code, Cursor, and other compatible tools simultaneously.
|
||||
|
||||
## Creating a skill
|
||||
|
||||
### Directory structure
|
||||
|
||||
```
|
||||
my-skill/
|
||||
├── SKILL.md # Required — metadata + instructions
|
||||
├── scripts/ # Optional — executable code
|
||||
│ └── run.py
|
||||
├── references/ # Optional — supplementary docs
|
||||
│ └── api-reference.md
|
||||
└── assets/ # Optional — templates, data files
|
||||
└── template.json
|
||||
```
|
||||
|
||||
### SKILL.md format
|
||||
|
||||
Every skill needs a `SKILL.md` with YAML frontmatter and a markdown body:
|
||||
|
||||
```markdown
|
||||
---
|
||||
name: my-skill
|
||||
description: Extract and summarize PDF documents. Use when the user mentions PDFs or document extraction.
|
||||
---
|
||||
|
||||
# PDF Processing
|
||||
|
||||
## When to use
|
||||
Use this skill when the user needs to extract text from PDFs or merge documents.
|
||||
|
||||
## Steps
|
||||
1. Check if pdfplumber is available...
|
||||
2. Extract text using...
|
||||
|
||||
## Edge cases
|
||||
- Scanned PDFs need OCR first...
|
||||
```
|
||||
|
||||
### Frontmatter fields
|
||||
|
||||
| Field | Required | Description |
|
||||
|-------|----------|-------------|
|
||||
| `name` | Yes | Lowercase letters, numbers, hyphens. Must match the parent directory name. Max 64 chars. |
|
||||
| `description` | Yes | What the skill does and when to use it. Max 1024 chars. Include keywords that help the agent match tasks. |
|
||||
| `license` | No | License name or reference to a bundled LICENSE file. |
|
||||
| `compatibility` | No | Environment requirements (e.g., "Requires git, docker"). |
|
||||
| `metadata` | No | Arbitrary key-value pairs (author, version, etc.). |
|
||||
| `allowed-tools` | No | Space-delimited list of pre-approved tools. |
|
||||
|
||||
### Writing good descriptions
|
||||
|
||||
The description is critical — it's what the agent uses to decide whether to activate a skill. Be specific:
|
||||
|
||||
```yaml
|
||||
# Good — tells the agent what and when
|
||||
description: Extract text and tables from PDF files, fill PDF forms, and merge multiple PDFs. Use when working with PDF documents or when the user mentions PDFs, forms, or document extraction.
|
||||
|
||||
# Bad — too vague for the agent to match
|
||||
description: Helps with PDFs.
|
||||
```
|
||||
|
||||
### Writing good instructions
|
||||
|
||||
The markdown body is loaded into the agent's context when the skill is activated. Tips:
|
||||
|
||||
- **Be procedural**: Step-by-step instructions work better than abstract descriptions.
|
||||
- **Keep it focused**: Stay under 500 lines / 5000 tokens. Move detailed reference material to `references/`.
|
||||
- **Use relative paths**: Reference bundled files with relative paths (`scripts/run.py`, `references/guide.md`).
|
||||
- **Include examples**: Show sample inputs and expected outputs.
|
||||
- **Cover edge cases**: Tell the agent what to do when things go wrong.
|
||||
|
||||
## How skills are activated
|
||||
|
||||
Skills use **progressive disclosure** — three tiers that keep context usage efficient:
|
||||
|
||||
### Tier 1: Catalog (always loaded)
|
||||
|
||||
At session start, the agent sees a compact catalog of all available skills (name + description only, ~50-100 tokens each). This is how it knows what skills exist.
|
||||
|
||||
### Tier 2: Instructions (on demand)
|
||||
|
||||
When the agent determines a skill is relevant to the current task, it reads the full `SKILL.md` body into context. This happens automatically — the agent matches the task against skill descriptions and activates the best fit.
|
||||
|
||||
### Tier 3: Resources (on demand)
|
||||
|
||||
When skill instructions reference supporting files (`scripts/extract.py`, `references/api-docs.md`), the agent reads those individually as needed.
|
||||
|
||||
### Pre-activated skills
|
||||
|
||||
Some agents are configured to load specific skills at session start (skipping the catalog phase). This is set in the agent's configuration:
|
||||
|
||||
```python
|
||||
# In agent definition
|
||||
skills = ["code-review", "deep-research"]
|
||||
```
|
||||
|
||||
Pre-activated skills have their full instructions loaded from the start, without waiting for the agent to decide they're relevant.
|
||||
|
||||
## Trust and security
|
||||
|
||||
### Why trust gating exists
|
||||
|
||||
Project-level skills come from the repository being worked on. If you clone an untrusted repo that contains a `.hive/skills/` directory, those skills could inject instructions into the agent's system prompt. Trust gating prevents this.
|
||||
|
||||
**User-level and framework skills are always trusted.** Only project-scope skills go through trust gating.
|
||||
|
||||
### What happens with untrusted project skills
|
||||
|
||||
When Hive encounters project-level skills from a repo you haven't trusted before, it shows a consent prompt:
|
||||
|
||||
```
|
||||
============================================================
|
||||
SKILL TRUST REQUIRED
|
||||
============================================================
|
||||
|
||||
The project at /home/user/new-project wants to load 2 skill(s)
|
||||
that will inject instructions into the agent's system prompt.
|
||||
Source: github.com/org/new-project
|
||||
|
||||
Skills requesting access:
|
||||
• deploy-pipeline
|
||||
"Automated deployment workflow for this project."
|
||||
/home/user/new-project/.hive/skills/deploy-pipeline/SKILL.md
|
||||
• code-standards
|
||||
"Project-specific coding standards and review checklist."
|
||||
/home/user/new-project/.hive/skills/code-standards/SKILL.md
|
||||
|
||||
Options:
|
||||
1) Trust this session only
|
||||
2) Trust permanently — remember for future runs
|
||||
3) Deny — skip all project-scope skills from this repo
|
||||
────────────────────────────────────────────────────────────
|
||||
Select option (1-3):
|
||||
```
|
||||
|
||||
### Trust a repo via CLI
|
||||
|
||||
To trust a repo permanently without the interactive prompt:
|
||||
|
||||
```bash
|
||||
hive skill trust /path/to/project
|
||||
```
|
||||
|
||||
This stores the trust decision in `~/.hive/trusted_repos.json`, keyed by the normalized git remote URL (e.g., `github.com/org/repo`).
|
||||
|
||||
### Automatic trust
|
||||
|
||||
Some repos are trusted automatically:
|
||||
|
||||
- **No git repo**: Directories without `.git/` are always trusted.
|
||||
- **No remote**: Local-only git repos (no `origin` remote) are always trusted.
|
||||
- **Localhost remotes**: Repos with `localhost`/`127.0.0.1` remotes are always trusted.
|
||||
- **Own-remote patterns**: Repos matching patterns in `~/.hive/own_remotes` or the `HIVE_OWN_REMOTES` env var are always trusted.
|
||||
|
||||
### Configure own-remote patterns
|
||||
|
||||
If you trust all repos from your organization:
|
||||
|
||||
```bash
|
||||
# Via file (one pattern per line)
|
||||
echo "github.com/my-org/*" >> ~/.hive/own_remotes
|
||||
echo "gitlab.com/my-team/*" >> ~/.hive/own_remotes
|
||||
|
||||
# Via environment variable (comma-separated)
|
||||
export HIVE_OWN_REMOTES="github.com/my-org/*,github.com/my-corp/*"
|
||||
```
|
||||
|
||||
### CI / headless environments
|
||||
|
||||
In non-interactive environments, untrusted project skills are silently skipped. To trust them explicitly:
|
||||
|
||||
```bash
|
||||
export HIVE_TRUST_PROJECT_SKILLS=1
|
||||
hive run my-agent
|
||||
```
|
||||
|
||||
## Default skills
|
||||
|
||||
Hive ships with six built-in operational skills that provide runtime resilience. These are always loaded (unless disabled) and appear as "Operational Protocols" in the agent's system prompt.
|
||||
|
||||
| Skill | Purpose |
|
||||
|-------|---------|
|
||||
| `hive.note-taking` | Structured working notes in shared memory |
|
||||
| `hive.batch-ledger` | Track per-item status in batch operations |
|
||||
| `hive.context-preservation` | Save context before context window pruning |
|
||||
| `hive.quality-monitor` | Self-assess output quality periodically |
|
||||
| `hive.error-recovery` | Structured error classification and recovery |
|
||||
| `hive.task-decomposition` | Break complex tasks into subtasks |
|
||||
|
||||
### Disable default skills
|
||||
|
||||
In your agent configuration:
|
||||
|
||||
```python
|
||||
# Disable a specific default skill
|
||||
default_skills = {
|
||||
"hive.quality-monitor": {"enabled": False},
|
||||
}
|
||||
|
||||
# Disable all default skills
|
||||
default_skills = {
|
||||
"_all": {"enabled": False},
|
||||
}
|
||||
```
|
||||
|
||||
## Environment variables
|
||||
|
||||
| Variable | Description |
|
||||
|----------|-------------|
|
||||
| `HIVE_TRUST_PROJECT_SKILLS=1` | Bypass trust gating for all project-level skills (CI override) |
|
||||
| `HIVE_OWN_REMOTES` | Comma-separated glob patterns for auto-trusted remotes (e.g., `github.com/myorg/*`) |
|
||||
|
||||
## Compatibility with other agents
|
||||
|
||||
Skills written for any Agent Skills-compatible agent work in Hive:
|
||||
|
||||
- Place them in `.agents/skills/` (cross-client) or `.hive/skills/` (Hive-specific).
|
||||
- The `SKILL.md` format is identical across Claude Code, Cursor, Gemini CLI, and others.
|
||||
- Skills installed at `~/.agents/skills/` are visible to all compatible agents on your machine.
|
||||
|
||||
See the [Agent Skills specification](https://agentskills.io/specification) for the full format reference.
|
||||
+11
-60
@@ -1908,69 +1908,20 @@ if ($CodexAvailable) {
|
||||
Write-Host ""
|
||||
}
|
||||
|
||||
# Setup-only mode: show manual instructions
|
||||
# Final instructions and auto-launch
|
||||
Write-Host "API keys saved as User environment variables. New terminals pick them up automatically." -ForegroundColor DarkGray
|
||||
Write-Host "Launch anytime with " -NoNewline -ForegroundColor DarkGray
|
||||
Write-Color -Text "hive open" -Color Cyan -NoNewline
|
||||
Write-Host ". Run .\quickstart.ps1 again to reconfigure." -ForegroundColor DarkGray
|
||||
Write-Host ""
|
||||
|
||||
if ($FrontendBuilt) {
|
||||
Write-Color -Text "â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•" -Color Yellow
|
||||
Write-Host ""
|
||||
Write-Color -Text " IMPORTANT: Restart your terminal now!" -Color Yellow
|
||||
Write-Host ""
|
||||
Write-Color -Text "â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•" -Color Yellow
|
||||
Write-Host ""
|
||||
Write-Host 'Environment variables (uv, API keys) are now configured, but you need to'
|
||||
Write-Host 'restart your terminal for them to take effect in new sessions.'
|
||||
Write-Host ""
|
||||
|
||||
Write-Color -Text "Run an Agent:" -Color White
|
||||
Write-Host ""
|
||||
Write-Host " Quickstart only sets things up. Launch the dashboard when you're ready:"
|
||||
Write-Color -Text " hive open" -Color Cyan
|
||||
Write-Host ""
|
||||
|
||||
if ($SelectedProviderId -or $credKey) {
|
||||
Write-Color -Text "Note:" -Color White
|
||||
Write-Host "- uv has been added to your User PATH"
|
||||
if ($SelectedProviderId -and $SelectedEnvVar) {
|
||||
Write-Host "- $SelectedEnvVar is set for LLM access"
|
||||
}
|
||||
if ($credKey) {
|
||||
Write-Host "- HIVE_CREDENTIAL_KEY is set for credential encryption"
|
||||
}
|
||||
Write-Host "- All variables will persist across reboots"
|
||||
Write-Host ""
|
||||
}
|
||||
|
||||
Write-Color -Text 'Run .\quickstart.ps1 again to reconfigure.' -Color DarkGray
|
||||
Write-Color -Text "Launching dashboard..." -Color White
|
||||
Write-Host ""
|
||||
& hive open
|
||||
} else {
|
||||
Write-Color -Text "═══════════════════════════════════════════════════════" -Color Yellow
|
||||
Write-Host ""
|
||||
Write-Color -Text " IMPORTANT: Restart your terminal now!" -Color Yellow
|
||||
Write-Host ""
|
||||
Write-Color -Text "═══════════════════════════════════════════════════════" -Color Yellow
|
||||
Write-Host ""
|
||||
Write-Host 'Environment variables (uv, API keys) are now configured, but you need to'
|
||||
Write-Host 'restart your terminal for them to take effect in new sessions.'
|
||||
Write-Host ""
|
||||
|
||||
Write-Color -Text "Run an Agent:" -Color White
|
||||
Write-Host ""
|
||||
Write-Host " Frontend build was skipped or failed. Once the dashboard is available, launch it with:"
|
||||
Write-Color -Text "Frontend build was skipped or failed." -Color Yellow -NoNewline
|
||||
Write-Host " Launch manually when ready:"
|
||||
Write-Color -Text " hive open" -Color Cyan
|
||||
Write-Host ""
|
||||
|
||||
if ($SelectedProviderId -or $credKey) {
|
||||
Write-Color -Text "Note:" -Color White
|
||||
Write-Host "- uv has been added to your User PATH"
|
||||
if ($SelectedProviderId -and $SelectedEnvVar) {
|
||||
Write-Host "- $SelectedEnvVar is set for LLM access"
|
||||
}
|
||||
if ($credKey) {
|
||||
Write-Host "- HIVE_CREDENTIAL_KEY is set for credential encryption"
|
||||
}
|
||||
Write-Host "- All variables will persist across reboots"
|
||||
Write-Host ""
|
||||
}
|
||||
|
||||
Write-Color -Text 'Run .\quickstart.ps1 again to reconfigure.' -Color DarkGray
|
||||
Write-Host ""
|
||||
}
|
||||
|
||||
+8
-21
@@ -1810,29 +1810,16 @@ if [ "$CODEX_AVAILABLE" = true ]; then
|
||||
echo ""
|
||||
fi
|
||||
|
||||
echo -e "${YELLOW}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
|
||||
echo -e "${BOLD}IMPORTANT: Load your new configuration${NC}"
|
||||
echo -e "${YELLOW}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
|
||||
echo ""
|
||||
echo -e " Your API keys have been saved to ${CYAN}$SHELL_RC_FILE${NC}"
|
||||
echo -e " To use them, either:"
|
||||
echo ""
|
||||
echo -e " ${GREEN}Option 1:${NC} Source your shell config now:"
|
||||
echo -e " ${CYAN}source $SHELL_RC_FILE${NC}"
|
||||
echo ""
|
||||
echo -e " ${GREEN}Option 2:${NC} Open a new terminal window"
|
||||
echo ""
|
||||
echo -e "${YELLOW}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
|
||||
echo -e "${DIM}API keys saved to ${CYAN}$SHELL_RC_FILE${NC}${DIM}. New terminals pick them up automatically.${NC}"
|
||||
echo -e "${DIM}Launch anytime with ${CYAN}hive open${NC}${DIM}. Run ./quickstart.sh again to reconfigure.${NC}"
|
||||
echo ""
|
||||
|
||||
echo -e "${BOLD}Run an Agent:${NC}"
|
||||
echo ""
|
||||
if [ "$FRONTEND_BUILT" = true ]; then
|
||||
echo -e " Quickstart only sets things up. Launch the dashboard when you're ready:"
|
||||
echo -e "${BOLD}Launching dashboard...${NC}"
|
||||
echo ""
|
||||
hive open
|
||||
else
|
||||
echo -e " Frontend build was skipped or failed. Once the dashboard is available, launch it with:"
|
||||
echo -e "${YELLOW}Frontend build was skipped or failed.${NC} Launch manually when ready:"
|
||||
echo -e " ${CYAN}hive open${NC}"
|
||||
echo ""
|
||||
fi
|
||||
echo -e " ${CYAN}hive open${NC}"
|
||||
echo ""
|
||||
echo -e "${DIM}Run ./quickstart.sh again to reconfigure.${NC}"
|
||||
echo ""
|
||||
|
||||
@@ -1,17 +1,21 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Open a browser-based viewer for Hive LLM debug JSONL sessions.
|
||||
|
||||
Starts a local HTTP server and loads session data on demand (one at a time).
|
||||
|
||||
Usage:
|
||||
uv run --no-project scripts/llm_debug_log_visualizer.py
|
||||
uv run --no-project scripts/llm_debug_log_visualizer.py --no-open
|
||||
uv run --no-project scripts/llm_debug_log_visualizer.py --session <execution_id>
|
||||
uv run --no-project scripts/llm_debug_log_visualizer.py --port 8080
|
||||
uv run --no-project scripts/llm_debug_log_visualizer.py --output debug.html
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import http.server
|
||||
import json
|
||||
import tempfile
|
||||
import urllib.parse
|
||||
import webbrowser
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass
|
||||
@@ -55,10 +59,21 @@ def _parse_args() -> argparse.Namespace:
|
||||
default=200,
|
||||
help="Maximum number of newest log files to scan.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--port",
|
||||
type=int,
|
||||
default=0,
|
||||
help="Port for the local server (0 = auto-pick a free port).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-open",
|
||||
action="store_true",
|
||||
help="Generate the HTML but do not open a browser.",
|
||||
help="Start the server but do not open a browser.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--include-tests",
|
||||
action="store_true",
|
||||
help="Show test/mock sessions (hidden by default).",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
@@ -117,8 +132,29 @@ def _format_timestamp(raw: str) -> str:
|
||||
return raw
|
||||
|
||||
|
||||
def _is_test_session(execution_id: str, records: list[dict[str, Any]]) -> bool:
|
||||
"""Return True for sessions that look like test artifacts."""
|
||||
if execution_id.startswith("<MagicMock"):
|
||||
return True
|
||||
models = {
|
||||
str(r.get("token_counts", {}).get("model", ""))
|
||||
for r in records
|
||||
if isinstance(r.get("token_counts"), dict)
|
||||
}
|
||||
models.discard("")
|
||||
# Sessions that only used the mock LLM provider.
|
||||
if models and models <= {"mock"}:
|
||||
return True
|
||||
# Sessions with no real model at all (empty string or missing).
|
||||
if not models:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _group_sessions(
|
||||
records: list[dict[str, Any]],
|
||||
*,
|
||||
include_tests: bool = False,
|
||||
) -> tuple[list[SessionSummary], dict[str, list[dict[str, Any]]]]:
|
||||
by_session: dict[str, list[dict[str, Any]]] = defaultdict(list)
|
||||
for record in records:
|
||||
@@ -126,6 +162,13 @@ def _group_sessions(
|
||||
if execution_id:
|
||||
by_session[execution_id].append(record)
|
||||
|
||||
if not include_tests:
|
||||
by_session = {
|
||||
eid: recs
|
||||
for eid, recs in by_session.items()
|
||||
if not _is_test_session(eid, recs)
|
||||
}
|
||||
|
||||
summaries: list[SessionSummary] = []
|
||||
for execution_id, session_records in by_session.items():
|
||||
session_records.sort(
|
||||
@@ -174,7 +217,6 @@ def _group_sessions(
|
||||
|
||||
def _render_html(
|
||||
summaries: list[SessionSummary],
|
||||
sessions: dict[str, list[dict[str, Any]]],
|
||||
initial_session_id: str,
|
||||
) -> str:
|
||||
summaries_data = [
|
||||
@@ -193,16 +235,6 @@ def _render_html(
|
||||
for summary in summaries
|
||||
]
|
||||
|
||||
sessions_data = {
|
||||
execution_id: sorted(
|
||||
records,
|
||||
key=lambda record: (
|
||||
str(record.get("timestamp", "")),
|
||||
record.get("iteration", 0),
|
||||
),
|
||||
)
|
||||
for execution_id, records in sessions.items()
|
||||
}
|
||||
initial = initial_session_id or (summaries[0].execution_id if summaries else "")
|
||||
return f"""<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
@@ -579,10 +611,9 @@ def _render_html(
|
||||
</div>
|
||||
|
||||
<script id="session-summaries" type="application/json">{json.dumps(summaries_data, ensure_ascii=False)}</script>
|
||||
<script id="session-records" type="application/json">{json.dumps(sessions_data, ensure_ascii=False)}</script>
|
||||
<script>
|
||||
const summaries = JSON.parse(document.getElementById("session-summaries").textContent);
|
||||
const recordsBySession = JSON.parse(document.getElementById("session-records").textContent);
|
||||
const recordCache = {{}};
|
||||
const initialSessionId = {json.dumps(initial, ensure_ascii=False)};
|
||||
|
||||
const sessionSearch = document.getElementById("sessionSearch");
|
||||
@@ -746,10 +777,18 @@ def _render_html(
|
||||
`;
|
||||
}}
|
||||
|
||||
function renderSession(sessionId) {{
|
||||
async function fetchSession(sessionId) {{
|
||||
if (recordCache[sessionId]) return recordCache[sessionId];
|
||||
const resp = await fetch(`/api/session/${{encodeURIComponent(sessionId)}}`);
|
||||
if (!resp.ok) return [];
|
||||
const data = await resp.json();
|
||||
recordCache[sessionId] = data;
|
||||
return data;
|
||||
}}
|
||||
|
||||
async function renderSession(sessionId) {{
|
||||
activeSessionId = sessionId;
|
||||
const summary = summaries.find((entry) => entry.execution_id === sessionId);
|
||||
const records = recordsBySession[sessionId] || [];
|
||||
|
||||
renderSessionChooser();
|
||||
|
||||
@@ -773,6 +812,9 @@ def _render_html(
|
||||
renderMetaCard("Source file", summary.log_file),
|
||||
].join("");
|
||||
|
||||
turnsEl.innerHTML = '<div class="empty">Loading session\u2026</div>';
|
||||
const records = await fetchSession(sessionId);
|
||||
if (activeSessionId !== sessionId) return;
|
||||
turnsEl.innerHTML = records.length
|
||||
? records.map((record) => renderTurn(record)).join("")
|
||||
: '<div class="empty">This session has no turn records.</div>';
|
||||
@@ -804,7 +846,8 @@ def _render_html(
|
||||
}});
|
||||
|
||||
const hashSession = decodeURIComponent(window.location.hash.replace(/^#/, ""));
|
||||
const bootSession = recordsBySession[hashSession] ? hashSession : activeSessionId;
|
||||
const knownIds = new Set(summaries.map((s) => s.execution_id));
|
||||
const bootSession = knownIds.has(hashSession) ? hashSession : activeSessionId;
|
||||
renderSessionChooser();
|
||||
renderSession(bootSession);
|
||||
</script>
|
||||
@@ -813,28 +856,70 @@ def _render_html(
|
||||
"""
|
||||
|
||||
|
||||
def _write_report(html_report: str, output: Path | None) -> Path:
|
||||
if output is not None:
|
||||
output.parent.mkdir(parents=True, exist_ok=True)
|
||||
output.write_text(html_report, encoding="utf-8")
|
||||
return output
|
||||
def _sort_records(records: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
return sorted(
|
||||
records,
|
||||
key=lambda r: (str(r.get("timestamp", "")), r.get("iteration", 0)),
|
||||
)
|
||||
|
||||
with tempfile.NamedTemporaryFile(
|
||||
mode="w",
|
||||
encoding="utf-8",
|
||||
prefix="hive_llm_debug_",
|
||||
suffix=".html",
|
||||
delete=False,
|
||||
dir="/tmp",
|
||||
) as handle:
|
||||
handle.write(html_report)
|
||||
return Path(handle.name)
|
||||
|
||||
def _run_server(
|
||||
html: str,
|
||||
sessions: dict[str, list[dict[str, Any]]],
|
||||
port: int,
|
||||
no_open: bool,
|
||||
) -> None:
|
||||
html_bytes = html.encode("utf-8")
|
||||
|
||||
class Handler(http.server.BaseHTTPRequestHandler):
|
||||
def do_GET(self) -> None:
|
||||
if self.path == "/":
|
||||
self._respond(200, "text/html; charset=utf-8", html_bytes)
|
||||
elif self.path.startswith("/api/session/"):
|
||||
sid = urllib.parse.unquote(self.path[len("/api/session/"):])
|
||||
records = sessions.get(sid)
|
||||
if records is None:
|
||||
self._respond(404, "application/json", b"[]")
|
||||
else:
|
||||
body = json.dumps(
|
||||
_sort_records(records), ensure_ascii=False
|
||||
).encode("utf-8")
|
||||
self._respond(200, "application/json", body)
|
||||
else:
|
||||
self.send_error(404)
|
||||
|
||||
def _respond(self, code: int, content_type: str, body: bytes) -> None:
|
||||
self.send_response(code)
|
||||
self.send_header("Content-Type", content_type)
|
||||
self.send_header("Content-Length", str(len(body)))
|
||||
self.end_headers()
|
||||
self.wfile.write(body)
|
||||
|
||||
def log_message(self, format: str, *args: object) -> None:
|
||||
pass # silence per-request logs
|
||||
|
||||
server = http.server.HTTPServer(("127.0.0.1", port), Handler)
|
||||
actual_port = server.server_address[1]
|
||||
url = f"http://127.0.0.1:{actual_port}"
|
||||
print(f"Serving at {url} (Ctrl+C to stop)")
|
||||
|
||||
if not no_open:
|
||||
webbrowser.open(url)
|
||||
|
||||
try:
|
||||
server.serve_forever()
|
||||
except KeyboardInterrupt:
|
||||
print("\nStopped.")
|
||||
finally:
|
||||
server.server_close()
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = _parse_args()
|
||||
records = _discover_records(args.logs_dir.expanduser(), args.limit_files)
|
||||
summaries, sessions = _group_sessions(records)
|
||||
summaries, sessions = _group_sessions(
|
||||
records, include_tests=args.include_tests
|
||||
)
|
||||
|
||||
initial_session_id = args.session or (
|
||||
summaries[0].execution_id if summaries else ""
|
||||
@@ -843,13 +928,15 @@ def main() -> int:
|
||||
print(f"session not found: {initial_session_id}")
|
||||
return 1
|
||||
|
||||
html_report = _render_html(summaries, sessions, initial_session_id)
|
||||
output_path = _write_report(html_report, args.output)
|
||||
print(output_path)
|
||||
html_report = _render_html(summaries, initial_session_id)
|
||||
|
||||
if not args.no_open:
|
||||
webbrowser.open(output_path.resolve().as_uri())
|
||||
if args.output:
|
||||
args.output.parent.mkdir(parents=True, exist_ok=True)
|
||||
args.output.write_text(html_report, encoding="utf-8")
|
||||
print(args.output)
|
||||
return 0
|
||||
|
||||
_run_server(html_report, sessions, args.port, args.no_open)
|
||||
return 0
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user