Merge pull request #6624 from aden-hive/feature/agent-skills

feat: agent skills system and observability improvements
2026-03-18 20:28:34 -07:00
parent f365e219cb 5413833a69
commit 22b7e4b0c3
38 changed files with 2469 additions and 154 deletions
@@ -89,6 +89,16 @@ def main():

    register_testing_commands(subparsers)

+    # Register skill commands (skill list, skill trust, ...)
+    from framework.skills.cli import register_skill_commands
+
+    register_skill_commands(subparsers)
+
+    # Register debugger commands (debugger)
+    from framework.debugger.cli import register_debugger_commands
+
+    register_debugger_commands(subparsers)
+
    args = parser.parse_args()

    if hasattr(args, "func"):
@@ -0,0 +1,76 @@
+"""CLI command for the LLM debug log viewer."""
+
+import argparse
+import subprocess
+import sys
+from pathlib import Path
+
+_SCRIPT = Path(__file__).resolve().parents[3] / "scripts" / "llm_debug_log_visualizer.py"
+
+
+def register_debugger_commands(subparsers: argparse._SubParsersAction) -> None:
+    """Register the ``hive debugger`` command."""
+    parser = subparsers.add_parser(
+        "debugger",
+        help="Open the LLM debug log viewer",
+        description=(
+            "Start a local server that lets you browse LLM debug sessions "
+            "recorded in ~/.hive/llm_logs. Sessions are loaded on demand so "
+            "the browser stays responsive."
+        ),
+    )
+    parser.add_argument(
+        "--session",
+        help="Execution ID to select initially.",
+    )
+    parser.add_argument(
+        "--port",
+        type=int,
+        default=0,
+        help="Port for the local server (0 = auto-pick a free port).",
+    )
+    parser.add_argument(
+        "--logs-dir",
+        help="Directory containing JSONL log files (default: ~/.hive/llm_logs).",
+    )
+    parser.add_argument(
+        "--limit-files",
+        type=int,
+        default=None,
+        help="Maximum number of newest log files to scan (default: 200).",
+    )
+    parser.add_argument(
+        "--output",
+        help="Write a static HTML file instead of starting a server.",
+    )
+    parser.add_argument(
+        "--no-open",
+        action="store_true",
+        help="Start the server but do not open a browser.",
+    )
+    parser.add_argument(
+        "--include-tests",
+        action="store_true",
+        help="Show test/mock sessions (hidden by default).",
+    )
+    parser.set_defaults(func=cmd_debugger)
+
+
+def cmd_debugger(args: argparse.Namespace) -> int:
+    """Launch the LLM debug log visualizer."""
+    cmd: list[str] = [sys.executable, str(_SCRIPT)]
+    if args.session:
+        cmd += ["--session", args.session]
+    if args.port:
+        cmd += ["--port", str(args.port)]
+    if args.logs_dir:
+        cmd += ["--logs-dir", args.logs_dir]
+    if args.limit_files is not None:
+        cmd += ["--limit-files", str(args.limit_files)]
+    if args.output:
+        cmd += ["--output", args.output]
+    if args.no_open:
+        cmd.append("--no-open")
+    if args.include_tests:
+        cmd.append("--include-tests")
+    return subprocess.call(cmd)
@@ -33,6 +33,8 @@ class Message:
    is_transition_marker: bool = False
    # True when this message is real human input (from /chat), not a system prompt
    is_client_input: bool = False
+    # True when message contains an activated skill body (AS-10: never prune)
+    is_skill_content: bool = False

    def to_llm_dict(self) -> dict[str, Any]:
        """Convert to OpenAI-format message dict."""
@@ -409,6 +411,7 @@ class NodeConversation:
        tool_use_id: str,
        content: str,
        is_error: bool = False,
+        is_skill_content: bool = False,
    ) -> Message:
        msg = Message(
            seq=self._next_seq,
@@ -417,6 +420,7 @@ class NodeConversation:
            tool_use_id=tool_use_id,
            is_error=is_error,
            phase_id=self._current_phase,
+            is_skill_content=is_skill_content,
        )
        self._messages.append(msg)
        self._next_seq += 1
@@ -610,6 +614,8 @@ class NodeConversation:
                continue
            if msg.is_error:
                continue  # never prune errors
+            if msg.is_skill_content:
+                continue  # never prune activated skill instructions (AS-10)
            if msg.content.startswith("[Pruned tool result"):
                continue  # already pruned
            # Tiny results (set_output acks, confirmations) — pruning
@@ -467,6 +467,8 @@ class EventLoopNode(NodeProtocol):
        stream_id = ctx.stream_id or ctx.node_id
        node_id = ctx.node_id
        execution_id = ctx.execution_id or ""
+        # Store skill dirs for AS-9 file-read interception in _execute_tool
+        self._skill_dirs: list[str] = ctx.skill_dirs

        # Verdict counters for runtime logging
        _accept_count = _retry_count = _escalate_count = _continue_count = 0
@@ -806,6 +808,13 @@ class EventLoopNode(NodeProtocol):
                execution_id,
                extra_data=_iter_meta,
            )
+            # Sync max_context_tokens from live config so mid-session model
+            # switches are reflected in compaction decisions and the UI bar.
+            from framework.config import get_max_context_tokens as _live_mct
+
+            conversation._max_context_tokens = _live_mct()
+
+            await self._publish_context_usage(ctx, conversation, "iteration_start")

            # 6d. Pre-turn compaction check (tiered)
            _compacted_this_iter = False
@@ -2726,6 +2735,7 @@ class EventLoopNode(NodeProtocol):
                    tool_use_id=tc.tool_use_id,
                    content=result.content,
                    is_error=result.is_error,
+                    is_skill_content=result.is_skill_content,
                )
                if (
                    tc.tool_name in ("ask_user", "ask_user_multiple")
@@ -2834,6 +2844,8 @@ class EventLoopNode(NodeProtocol):
                        conversation.usage_ratio() * 100,
                    )

+            await self._publish_context_usage(ctx, conversation, "post_tool_results")
+
            # If the turn requested external input (ask_user or queen handoff),
            # return immediately so the outer loop can block before judge eval.
            if user_input_requested or queen_input_requested:
@@ -3549,6 +3561,33 @@ class EventLoopNode(NodeProtocol):
                content=f"No tool executor configured for '{tc.tool_name}'",
                is_error=True,
            )
+
+        # AS-9: Intercept file-read tools for skill directories — bypass session sandbox
+        _SKILL_READ_TOOLS = {"view_file", "load_data", "read_file"}
+        skill_dirs = getattr(self, "_skill_dirs", [])
+        if tc.tool_name in _SKILL_READ_TOOLS and skill_dirs:
+            _path = tc.tool_input.get("path", "")
+            if _path:
+                import os
+                from pathlib import Path as _Path
+
+                _resolved = os.path.realpath(os.path.abspath(_path))
+                if any(_resolved.startswith(os.path.realpath(d)) for d in skill_dirs):
+                    try:
+                        _content = _Path(_resolved).read_text(encoding="utf-8")
+                        _is_skill_md = _resolved.endswith("SKILL.md")
+                        return ToolResult(
+                            tool_use_id=tc.tool_use_id,
+                            content=_content,
+                            is_skill_content=_is_skill_md,  # AS-10: protect SKILL.md reads
+                        )
+                    except Exception as _exc:
+                        return ToolResult(
+                            tool_use_id=tc.tool_use_id,
+                            content=f"Could not read skill resource '{_path}': {_exc}",
+                            is_error=True,
+                        )
+
        tool_use = ToolUse(id=tc.tool_use_id, name=tc.tool_name, input=tc.tool_input)
        timeout = self._config.tool_call_timeout_seconds

@@ -3980,6 +4019,12 @@ class EventLoopNode(NodeProtocol):
        ratio_before = conversation.usage_ratio()
        phase_grad = getattr(ctx, "continuous_mode", False)

+        # Capture pre-compaction message inventory when over budget,
+        # since compaction mutates the conversation in place.
+        pre_inventory: list[dict[str, Any]] | None = None
+        if ratio_before >= 1.0:
+            pre_inventory = self._build_message_inventory(conversation)
+
        # --- Step 1: Prune old tool results (free, no LLM) ---
        protect = max(2000, self._config.max_context_tokens // 12)
        pruned = await conversation.prune_old_tool_results(
@@ -3994,7 +4039,7 @@ class EventLoopNode(NodeProtocol):
                conversation.usage_ratio() * 100,
            )
        if not conversation.needs_compaction():
-            await self._log_compaction(ctx, conversation, ratio_before)
+            await self._log_compaction(ctx, conversation, ratio_before, pre_inventory)
            return

        # --- Step 2: Standard structure-preserving compaction (free, no LLM) ---
@@ -4007,7 +4052,7 @@ class EventLoopNode(NodeProtocol):
                phase_graduated=phase_grad,
            )
        if not conversation.needs_compaction():
-            await self._log_compaction(ctx, conversation, ratio_before)
+            await self._log_compaction(ctx, conversation, ratio_before, pre_inventory)
            return

        # --- Step 3: LLM summary compaction ---
@@ -4034,7 +4079,7 @@ class EventLoopNode(NodeProtocol):
                logger.warning("LLM compaction failed: %s", e)

        if not conversation.needs_compaction():
-            await self._log_compaction(ctx, conversation, ratio_before)
+            await self._log_compaction(ctx, conversation, ratio_before, pre_inventory)
            return

        # --- Step 4: Emergency deterministic summary (LLM failed/unavailable) ---
@@ -4048,7 +4093,7 @@ class EventLoopNode(NodeProtocol):
            keep_recent=1,
            phase_graduated=phase_grad,
        )
-        await self._log_compaction(ctx, conversation, ratio_before)
+        await self._log_compaction(ctx, conversation, ratio_before, pre_inventory)

    # --- LLM compaction with binary-search splitting ----------------------

@@ -4210,13 +4255,59 @@ class EventLoopNode(NodeProtocol):
            "re-doing work.\n"
        )

+    @staticmethod
+    def _build_message_inventory(
+        conversation: NodeConversation,
+    ) -> list[dict[str, Any]]:
+        """Build a per-message size inventory for debug logging."""
+        inventory: list[dict[str, Any]] = []
+        for m in conversation.messages:
+            content_chars = len(m.content)
+            tc_chars = 0
+            tool_name = None
+            if m.tool_calls:
+                for tc in m.tool_calls:
+                    args = tc.get("function", {}).get("arguments", "")
+                    tc_chars += len(args) if isinstance(args, str) else len(json.dumps(args))
+                names = [tc.get("function", {}).get("name", "?") for tc in m.tool_calls]
+                tool_name = ", ".join(names)
+            elif m.role == "tool" and m.tool_use_id:
+                for prev in conversation.messages:
+                    if prev.tool_calls:
+                        for tc in prev.tool_calls:
+                            if tc.get("id") == m.tool_use_id:
+                                tool_name = tc.get("function", {}).get("name", "?")
+                                break
+                    if tool_name:
+                        break
+            entry: dict[str, Any] = {
+                "seq": m.seq,
+                "role": m.role,
+                "content_chars": content_chars,
+            }
+            if tc_chars:
+                entry["tool_call_args_chars"] = tc_chars
+            if tool_name:
+                entry["tool"] = tool_name
+            if m.is_error:
+                entry["is_error"] = True
+            if m.phase_id:
+                entry["phase"] = m.phase_id
+            if content_chars > 2000:
+                entry["preview"] = m.content[:200] + "…"
+            inventory.append(entry)
+        return inventory
+
    async def _log_compaction(
        self,
        ctx: NodeContext,
        conversation: NodeConversation,
        ratio_before: float,
+        pre_inventory: list[dict[str, Any]] | None = None,
    ) -> None:
-        """Log compaction result to runtime logger and event bus."""
+        """Log compaction result to runtime logger, event bus, and debug file."""
+        import os as _os
+
        ratio_after = conversation.usage_ratio()
        before_pct = round(ratio_before * 100)
        after_pct = round(ratio_after * 100)
@@ -4249,19 +4340,103 @@ class EventLoopNode(NodeProtocol):
        if self._event_bus:
            from framework.runtime.event_bus import AgentEvent, EventType

+            event_data: dict[str, Any] = {
+                "level": level,
+                "usage_before": before_pct,
+                "usage_after": after_pct,
+            }
+            if pre_inventory is not None:
+                event_data["message_inventory"] = pre_inventory
            await self._event_bus.publish(
                AgentEvent(
                    type=EventType.CONTEXT_COMPACTED,
                    stream_id=ctx.stream_id or ctx.node_id,
                    node_id=ctx.node_id,
-                    data={
-                        "level": level,
-                        "usage_before": before_pct,
-                        "usage_after": after_pct,
-                    },
+                    data=event_data,
                )
            )

+        # Emit post-compaction usage update
+        await self._publish_context_usage(ctx, conversation, "post_compaction")
+
+        # Write detailed debug log to ~/.hive/compaction_log/ when enabled
+        if _os.environ.get("HIVE_COMPACTION_DEBUG"):
+            self._write_compaction_debug_log(ctx, before_pct, after_pct, level, pre_inventory)
+
+    @staticmethod
+    def _write_compaction_debug_log(
+        ctx: NodeContext,
+        before_pct: int,
+        after_pct: int,
+        level: str,
+        inventory: list[dict[str, Any]] | None,
+    ) -> None:
+        """Write detailed compaction analysis to ~/.hive/compaction_log/."""
+        log_dir = Path.home() / ".hive" / "compaction_log"
+        log_dir.mkdir(parents=True, exist_ok=True)
+
+        ts = datetime.now(UTC).strftime("%Y%m%dT%H%M%S_%f")
+        node_label = ctx.node_id.replace("/", "_")
+        log_path = log_dir / f"{ts}_{node_label}.md"
+
+        lines: list[str] = [
+            f"# Compaction Debug — {ctx.node_id}",
+            f"**Time:** {datetime.now(UTC).isoformat()}",
+            f"**Node:** {ctx.node_spec.name} (`{ctx.node_id}`)",
+        ]
+        if ctx.stream_id:
+            lines.append(f"**Stream:** {ctx.stream_id}")
+        lines.append(f"**Level:** {level}")
+        lines.append(f"**Usage:** {before_pct}% → {after_pct}%")
+        lines.append("")
+
+        if inventory:
+            total_chars = sum(
+                e.get("content_chars", 0) + e.get("tool_call_args_chars", 0) for e in inventory
+            )
+            lines.append(
+                f"## Pre-Compaction Message Inventory "
+                f"({len(inventory)} messages, {total_chars:,} total chars)"
+            )
+            lines.append("")
+            ranked = sorted(
+                inventory,
+                key=lambda e: e.get("content_chars", 0) + e.get("tool_call_args_chars", 0),
+                reverse=True,
+            )
+            lines.append("| # | seq | role | tool | chars | % of total | flags |")
+            lines.append("|---|-----|------|------|------:|------------|-------|")
+            for i, entry in enumerate(ranked, 1):
+                chars = entry.get("content_chars", 0) + entry.get("tool_call_args_chars", 0)
+                pct = (chars / total_chars * 100) if total_chars else 0
+                tool = entry.get("tool", "")
+                flags = []
+                if entry.get("is_error"):
+                    flags.append("error")
+                if entry.get("phase"):
+                    flags.append(f"phase={entry['phase']}")
+                lines.append(
+                    f"| {i} | {entry['seq']} | {entry['role']} | {tool} "
+                    f"| {chars:,} | {pct:.1f}% | {', '.join(flags)} |"
+                )
+
+            large = [e for e in ranked if e.get("preview")]
+            if large:
+                lines.append("")
+                lines.append("### Large message previews")
+                for entry in large:
+                    lines.append(
+                        f"\n**seq={entry['seq']}** ({entry['role']}, {entry.get('tool', '')}):"
+                    )
+                    lines.append(f"```\n{entry['preview']}\n```")
+        lines.append("")
+
+        try:
+            log_path.write_text("\n".join(lines), encoding="utf-8")
+            logger.debug("Compaction debug log written to %s", log_path)
+        except OSError:
+            logger.debug("Failed to write compaction debug log to %s", log_path)
+
    def _build_emergency_summary(
        self,
        ctx: NodeContext,
@@ -4666,6 +4841,36 @@ class EventLoopNode(NodeProtocol):
            if result.inject:
                await conversation.add_user_message(result.inject)

+    async def _publish_context_usage(
+        self,
+        ctx: NodeContext,
+        conversation: NodeConversation,
+        trigger: str,
+    ) -> None:
+        """Emit a CONTEXT_USAGE_UPDATED event with current context window state."""
+        if not self._event_bus:
+            return
+        from framework.runtime.event_bus import AgentEvent, EventType
+
+        estimated = conversation.estimate_tokens()
+        max_tokens = conversation._max_context_tokens
+        ratio = estimated / max_tokens if max_tokens > 0 else 0.0
+        await self._event_bus.publish(
+            AgentEvent(
+                type=EventType.CONTEXT_USAGE_UPDATED,
+                stream_id=ctx.stream_id or ctx.node_id,
+                node_id=ctx.node_id,
+                data={
+                    "usage_ratio": round(ratio, 4),
+                    "usage_pct": round(ratio * 100),
+                    "message_count": conversation.message_count,
+                    "estimated_tokens": estimated,
+                    "max_context_tokens": max_tokens,
+                    "trigger": trigger,
+                },
+            )
+        )
+
    async def _publish_iteration(
        self,
        stream_id: str,
@@ -154,6 +154,7 @@ class GraphExecutor:
        iteration_metadata_provider: Callable | None = None,
        skills_catalog_prompt: str = "",
        protocols_prompt: str = "",
+        skill_dirs: list[str] | None = None,
    ):
        """
        Initialize the executor.
@@ -181,6 +182,7 @@ class GraphExecutor:
                system prompt (for phase switching)
            skills_catalog_prompt: Available skills catalog for system prompt
            protocols_prompt: Default skill operational protocols for system prompt
+            skill_dirs: Skill base directories for Tier 3 resource access
        """
        self.runtime = runtime
        self.llm = llm
@@ -204,6 +206,7 @@ class GraphExecutor:
        self.iteration_metadata_provider = iteration_metadata_provider
        self.skills_catalog_prompt = skills_catalog_prompt
        self.protocols_prompt = protocols_prompt
+        self.skill_dirs: list[str] = skill_dirs or []

        if protocols_prompt:
            self.logger.info(
@@ -1845,6 +1848,9 @@ class GraphExecutor:

            existing_underscore = [k for k in memory._data if k.startswith("_")]
            extra_keys = set(_skill_keys) | set(existing_underscore)
+            # Only inject into read_keys when it was already non-empty — an empty
+            # read_keys means "allow all reads" and injecting skill keys would
+            # inadvertently restrict reads to skill keys only.
            for k in extra_keys:
                if read_keys and k not in read_keys:
                    read_keys.append(k)
@@ -1899,6 +1905,7 @@ class GraphExecutor:
            iteration_metadata_provider=self.iteration_metadata_provider,
            skills_catalog_prompt=self.skills_catalog_prompt,
            protocols_prompt=self.protocols_prompt,
+            skill_dirs=self.skill_dirs,
        )

    VALID_NODE_TYPES = {
@@ -568,6 +568,7 @@ class NodeContext:
    # Skill system prompts — injected by the skill discovery pipeline
    skills_catalog_prompt: str = ""  # Available skills XML catalog
    protocols_prompt: str = ""  # Default skill operational protocols
+    skill_dirs: list[str] = field(default_factory=list)  # Skill base dirs for resource access

    # Per-iteration metadata provider — when set, EventLoopNode merges
    # the returned dict into node_loop_iteration event data.  Used by
@@ -45,6 +45,7 @@ class ToolResult:
    tool_use_id: str
    content: str
    is_error: bool = False
+    is_skill_content: bool = False  # AS-10: marks activated skill body, protected from pruning


 class LLMProvider(ABC):
@@ -1,5 +1,6 @@
 """Shared MCP client connection management."""

+import logging
 import threading
 from typing import Any

@@ -7,6 +8,8 @@ import httpx

 from framework.runner.mcp_client import MCPClient, MCPServerConfig

+logger = logging.getLogger(__name__)
+

 class MCPConnectionManager:
    """Process-wide MCP client pool keyed by server name."""
@@ -46,8 +49,14 @@ class MCPConnectionManager:
            with self._pool_lock:
                client = self._pool.get(server_name)
                if self._is_connected(client) and server_name not in self._transitions:
-                    self._refcounts[server_name] = self._refcounts.get(server_name, 0) + 1
+                    new_refcount = self._refcounts.get(server_name, 0) + 1
+                    self._refcounts[server_name] = new_refcount
                    self._configs[server_name] = config
+                    logger.debug(
+                        "Reusing pooled connection for MCP server '%s' (refcount=%d)",
+                        server_name,
+                        new_refcount,
+                    )
                    return client

                transition_event = self._transitions.get(server_name)
@@ -1,6 +1,6 @@
 """Pre-load validation for agent graphs.

-Runs structural and credential checks before MCP servers are spawned.
+Runs structural, credential, and skill-trust checks before MCP servers are spawned.
 Fails fast with actionable error messages.
 """

@@ -169,6 +169,9 @@ def run_preload_validation(
    1. Graph structure (includes GCU subagent-only checks) — non-recoverable
    2. Credentials — potentially recoverable via interactive setup

+    Skill discovery and trust gating (AS-13) happen later in runner._setup()
+    so they have access to agent-level skill configuration.
+
    Raises PreloadValidationError for structural issues.
    Raises CredentialError for credential issues.
    """
@@ -1343,7 +1343,7 @@ class AgentRunner:
        except Exception:
            pass  # Best-effort — agent works without account info

-        # Skill configuration — the runtime handles discovery, loading, and
+        # Skill configuration — the runtime handles discovery, loading, trust-gating and
        # prompt rasterization.  The runner just builds the config.
        from framework.skills.config import SkillsConfig
        from framework.skills.manager import SkillsManagerConfig
@@ -1354,6 +1354,7 @@ class AgentRunner:
                skills=getattr(self, "_agent_skills", None),
            ),
            project_root=self.agent_path,
+            interactive=self._interactive,
        )

        self._setup_agent_runtime(
@@ -1465,6 +1466,9 @@ class AgentRunner:
        accounts_data: list[dict] | None = None,
        tool_provider_map: dict[str, str] | None = None,
        event_bus=None,
+        skills_catalog_prompt: str = "",
+        protocols_prompt: str = "",
+        skill_dirs: list[str] | None = None,
        skills_manager_config=None,
    ) -> None:
        """Set up multi-entry-point execution using AgentRuntime."""
@@ -482,7 +482,7 @@ class ToolRegistry:
    def register_mcp_server(
        self,
        server_config: dict[str, Any],
-        use_connection_manager: bool = False,
+        use_connection_manager: bool = True,
    ) -> int:
        """
        Register an MCP server and discover its tools.
@@ -137,6 +137,7 @@ class AgentRuntime:
        # Deprecated — pass skills_manager_config instead.
        skills_catalog_prompt: str = "",
        protocols_prompt: str = "",
+        skill_dirs: list[str] | None = None,
    ):
        """
        Initialize agent runtime.
@@ -158,6 +159,9 @@ class AgentRuntime:
            event_bus: Optional external EventBus. If provided, the runtime shares
                this bus instead of creating its own. Used by SessionManager to
                share a single bus between queen, worker, and judge.
+            skills_catalog_prompt: Available skills catalog for system prompt
+            protocols_prompt: Default skill operational protocols for system prompt
+            skill_dirs: Skill base directories for Tier 3 resource access
            skills_manager_config: Skill configuration — the runtime owns
                discovery, loading, and prompt renderation internally.
            skills_catalog_prompt: Deprecated. Pre-rendered skills catalog.
@@ -195,6 +199,8 @@ class AgentRuntime:
            self._skills_manager = SkillsManager()
            self._skills_manager.load()

+        self.skill_dirs: list[str] = self._skills_manager.allowlisted_dirs
+
        # Primary graph identity
        self._graph_id: str = graph_id or "primary"

@@ -341,6 +347,7 @@ class AgentRuntime:
                    tool_provider_map=self._tool_provider_map,
                    skills_catalog_prompt=self.skills_catalog_prompt,
                    protocols_prompt=self.protocols_prompt,
+                    skill_dirs=self.skill_dirs,
                )
                await stream.start()
                self._streams[ep_id] = stream
@@ -977,6 +984,7 @@ class AgentRuntime:
                tool_provider_map=self._tool_provider_map,
                skills_catalog_prompt=self.skills_catalog_prompt,
                protocols_prompt=self.protocols_prompt,
+                skill_dirs=self.skill_dirs,
            )
            if self._running:
                await stream.start()
@@ -1760,6 +1768,7 @@ def create_agent_runtime(
    # Deprecated — pass skills_manager_config instead.
    skills_catalog_prompt: str = "",
    protocols_prompt: str = "",
+    skill_dirs: list[str] | None = None,
 ) -> AgentRuntime:
    """
    Create and configure an AgentRuntime with entry points.
@@ -1786,6 +1795,9 @@ def create_agent_runtime(
        accounts_data: Raw account data for per-node prompt generation.
        tool_provider_map: Tool name to provider name mapping for account routing.
        event_bus: Optional external EventBus to share with other components.
+        skills_catalog_prompt: Available skills catalog for system prompt.
+        protocols_prompt: Default skill operational protocols for system prompt.
+        skill_dirs: Skill base directories for Tier 3 resource access.
        skills_manager_config: Skill configuration — the runtime owns
            discovery, loading, and prompt renderation internally.
        skills_catalog_prompt: Deprecated. Pre-rendered skills catalog.
@@ -1819,6 +1831,7 @@ def create_agent_runtime(
        skills_manager_config=skills_manager_config,
        skills_catalog_prompt=skills_catalog_prompt,
        protocols_prompt=protocols_prompt,
+        skill_dirs=skill_dirs,
    )

    for spec in entry_points:
@@ -117,6 +117,7 @@ class EventType(StrEnum):

    # Context management
    CONTEXT_COMPACTED = "context_compacted"
+    CONTEXT_USAGE_UPDATED = "context_usage_updated"

    # External triggers
    WEBHOOK_RECEIVED = "webhook_received"
@@ -188,6 +188,7 @@ class ExecutionStream:
        tool_provider_map: dict[str, str] | None = None,
        skills_catalog_prompt: str = "",
        protocols_prompt: str = "",
+        skill_dirs: list[str] | None = None,
    ):
        """
        Initialize execution stream.
@@ -213,6 +214,7 @@ class ExecutionStream:
            tool_provider_map: Tool name to provider name mapping for account routing
            skills_catalog_prompt: Available skills catalog for system prompt
            protocols_prompt: Default skill operational protocols for system prompt
+            skill_dirs: Skill base directories for Tier 3 resource access
        """
        self.stream_id = stream_id
        self.entry_spec = entry_spec
@@ -236,6 +238,7 @@ class ExecutionStream:
        self._tool_provider_map = tool_provider_map
        self._skills_catalog_prompt = skills_catalog_prompt
        self._protocols_prompt = protocols_prompt
+        self._skill_dirs: list[str] = skill_dirs or []

        _es_logger = logging.getLogger(__name__)
        if protocols_prompt:
@@ -696,6 +699,7 @@ class ExecutionStream:
                        tool_provider_map=self._tool_provider_map,
                        skills_catalog_prompt=self._skills_catalog_prompt,
                        protocols_prompt=self._protocols_prompt,
+                        skill_dirs=self._skill_dirs,
                    )
                    # Track executor so inject_input() can reach EventLoopNode instances
                    self._active_executors[execution_id] = executor
@@ -8,6 +8,7 @@ write. Errors are silently swallowed — this must never break the agent.

 import json
 import logging
+import os
 from datetime import datetime
 from pathlib import Path
 from typing import IO, Any
@@ -47,6 +48,9 @@ def log_llm_turn(
    Never raises.
    """
    try:
+        # Skip logging during test runs to avoid polluting real logs.
+        if os.environ.get("PYTEST_CURRENT_TEST") or os.environ.get("HIVE_DISABLE_LLM_LOGS"):
+            return
        global _log_file, _log_ready  # noqa: PLW0603
        if not _log_ready:
            _log_file = _open_log()
@@ -37,6 +37,7 @@ DEFAULT_EVENT_TYPES = [
    EventType.NODE_RETRY,
    EventType.NODE_TOOL_DOOM_LOOP,
    EventType.CONTEXT_COMPACTED,
+    EventType.CONTEXT_USAGE_UPDATED,
    EventType.WORKER_LOADED,
    EventType.CREDENTIALS_REQUIRED,
    EventType.SUBAGENT_REPORT,
@@ -819,10 +819,11 @@ class SessionManager:
            exec_id = event.execution_id

            if event.type == _ET.EXECUTION_STARTED:
-                # New run on this execution_id — reset cooldown so the first
-                # iteration always produces a mid-run snapshot.
+                # New run on this execution_id — start the cooldown timer so
+                # mid-run snapshots don't fire immediately at session start.
+                # The first snapshot will happen after _DIGEST_COOLDOWN seconds.
                if exec_id:
-                    _last_digest.pop(exec_id, None)
+                    _last_digest[exec_id] = _time.monotonic()

            elif event.type in (
                _ET.EXECUTION_COMPLETED,
@@ -1,8 +1,8 @@
-"""Hive Agent Skills — discovery, parsing, and injection of SKILL.md packages.
+"""Hive Agent Skills — discovery, parsing, trust gating, and injection of SKILL.md packages.

 Implements the open Agent Skills standard (agentskills.io) for portable
 skill discovery and activation, plus built-in default skills for runtime
-operational discipline.
+operational discipline, and AS-13 trust gating for project-scope skills.
 """

 from framework.skills.catalog import SkillCatalog
@@ -10,7 +10,9 @@ from framework.skills.config import DefaultSkillConfig, SkillsConfig
 from framework.skills.defaults import DefaultSkillManager
 from framework.skills.discovery import DiscoveryConfig, SkillDiscovery
 from framework.skills.manager import SkillsManager, SkillsManagerConfig
+from framework.skills.models import TrustStatus
 from framework.skills.parser import ParsedSkill, parse_skill_md
+from framework.skills.trust import TrustedRepoStore, TrustGate

 __all__ = [
    "DefaultSkillConfig",
@@ -22,5 +24,8 @@ __all__ = [
    "SkillsConfig",
    "SkillsManager",
    "SkillsManagerConfig",
+    "TrustGate",
+    "TrustedRepoStore",
+    "TrustStatus",
    "parse_skill_md",
 ]
@@ -76,6 +76,7 @@ class SkillCatalog:
            lines.append(f"    <name>{escape(skill.name)}</name>")
            lines.append(f"    <description>{escape(skill.description)}</description>")
            lines.append(f"    <location>{escape(skill.location)}</location>")
+            lines.append(f"    <base_dir>{escape(skill.base_dir)}</base_dir>")
            lines.append("  </skill>")
        lines.append("</available_skills>")

@@ -0,0 +1,120 @@
+"""CLI commands for the Hive skill system.
+
+Phase 1 commands (AS-13):
+  hive skill list             — list discovered skills across all scopes
+  hive skill trust <path>    — permanently trust a project repo's skills
+
+Full CLI suite (CLI-1 through CLI-13) is Phase 2.
+"""
+
+from __future__ import annotations
+
+import subprocess
+import sys
+from pathlib import Path
+
+
+def register_skill_commands(subparsers) -> None:
+    """Register the ``hive skill`` subcommand group."""
+    skill_parser = subparsers.add_parser("skill", help="Manage skills")
+    skill_sub = skill_parser.add_subparsers(dest="skill_command", required=True)
+
+    # hive skill list
+    list_parser = skill_sub.add_parser("list", help="List discovered skills across all scopes")
+    list_parser.add_argument(
+        "--project-dir",
+        default=None,
+        metavar="PATH",
+        help="Project directory to scan (default: current directory)",
+    )
+    list_parser.set_defaults(func=cmd_skill_list)
+
+    # hive skill trust
+    trust_parser = skill_sub.add_parser(
+        "trust",
+        help="Permanently trust a project repository so its skills load without prompting",
+    )
+    trust_parser.add_argument(
+        "project_path",
+        help="Path to the project directory (must contain a .git with a remote origin)",
+    )
+    trust_parser.set_defaults(func=cmd_skill_trust)
+
+
+def cmd_skill_list(args) -> int:
+    """List all discovered skills grouped by scope."""
+    from framework.skills.discovery import DiscoveryConfig, SkillDiscovery
+
+    project_dir = Path(args.project_dir).resolve() if args.project_dir else Path.cwd()
+    skills = SkillDiscovery(DiscoveryConfig(project_root=project_dir)).discover()
+
+    if not skills:
+        print("No skills discovered.")
+        return 0
+
+    scope_headers = {
+        "project": "PROJECT SKILLS",
+        "user": "USER SKILLS",
+        "framework": "FRAMEWORK SKILLS",
+    }
+
+    for scope in ("project", "user", "framework"):
+        scope_skills = [s for s in skills if s.source_scope == scope]
+        if not scope_skills:
+            continue
+        print(f"\n{scope_headers[scope]}")
+        print("─" * 40)
+        for skill in scope_skills:
+            print(f"  • {skill.name}")
+            print(f"    {skill.description}")
+            print(f"    {skill.location}")
+
+    return 0
+
+
+def cmd_skill_trust(args) -> int:
+    """Permanently trust a project repository's skills."""
+    from framework.skills.trust import TrustedRepoStore, _normalize_remote_url
+
+    project_path = Path(args.project_path).resolve()
+
+    if not project_path.exists():
+        print(f"Error: path does not exist: {project_path}", file=sys.stderr)
+        return 1
+
+    if not (project_path / ".git").exists():
+        print(
+            f"Error: {project_path} is not a git repository (no .git directory).",
+            file=sys.stderr,
+        )
+        return 1
+
+    try:
+        result = subprocess.run(
+            ["git", "-C", str(project_path), "remote", "get-url", "origin"],
+            capture_output=True,
+            text=True,
+            timeout=3,
+        )
+        if result.returncode != 0:
+            print(
+                "Error: no remote 'origin' configured in this repository.",
+                file=sys.stderr,
+            )
+            return 1
+        remote_url = result.stdout.strip()
+    except subprocess.TimeoutExpired:
+        print("Error: git remote lookup timed out.", file=sys.stderr)
+        return 1
+    except (FileNotFoundError, OSError) as e:
+        print(f"Error reading git remote: {e}", file=sys.stderr)
+        return 1
+
+    repo_key = _normalize_remote_url(remote_url)
+    store = TrustedRepoStore()
+    store.trust(repo_key, project_path=str(project_path))
+
+    print(f"✓ Trusted: {repo_key}")
+    print("  Stored in ~/.hive/trusted_repos.json")
+    print("  Skills from this repository will load without prompting in future runs.")
+    return 0
@@ -42,11 +42,14 @@ class SkillsManagerConfig:
            When ``None``, community discovery is skipped.
        skip_community_discovery: Explicitly skip community scanning
            even when ``project_root`` is set.
+        interactive: Whether trust gating can prompt the user interactively.
+            When ``False``, untrusted project skills are silently skipped.
    """

    skills_config: SkillsConfig = field(default_factory=SkillsConfig)
    project_root: Path | None = None
    skip_community_discovery: bool = False
+    interactive: bool = True


 class SkillsManager:
@@ -63,6 +66,7 @@ class SkillsManager:
        self._loaded = False
        self._catalog_prompt: str = ""
        self._protocols_prompt: str = ""
+        self._allowlisted_dirs: list[str] = []

    # ------------------------------------------------------------------
    # Factory for backwards-compat bridge
@@ -85,6 +89,7 @@ class SkillsManager:
        mgr._loaded = True  # skip load()
        mgr._catalog_prompt = skills_catalog_prompt
        mgr._protocols_prompt = protocols_prompt
+        mgr._allowlisted_dirs = []
        return mgr

    # ------------------------------------------------------------------
@@ -113,9 +118,18 @@ class SkillsManager:
        # 1. Community skill discovery (when project_root is available)
        catalog_prompt = ""
        if self._config.project_root is not None and not self._config.skip_community_discovery:
+            from framework.skills.trust import TrustGate
+
            discovery = SkillDiscovery(DiscoveryConfig(project_root=self._config.project_root))
            discovered = discovery.discover()
+
+            # Trust-gate project-scope skills (AS-13)
+            discovered = TrustGate(interactive=self._config.interactive).filter_and_gate(
+                discovered, project_dir=self._config.project_root
+            )
+
            catalog = SkillCatalog(discovered)
+            self._allowlisted_dirs = catalog.allowlisted_dirs
            catalog_prompt = catalog.to_prompt()

            # Pre-activated community skills
@@ -160,6 +174,11 @@ class SkillsManager:
        """Default skill operational protocols for system prompt injection."""
        return self._protocols_prompt

+    @property
+    def allowlisted_dirs(self) -> list[str]:
+        """Skill base directories for Tier 3 resource access (AS-6)."""
+        return self._allowlisted_dirs
+
    @property
    def is_loaded(self) -> bool:
        return self._loaded
@@ -0,0 +1,52 @@
+"""Data models for the Hive skill system (Agent Skills standard)."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from enum import StrEnum
+from pathlib import Path
+
+
+class SkillScope(StrEnum):
+    """Where a skill was discovered."""
+
+    PROJECT = "project"
+    USER = "user"
+    FRAMEWORK = "framework"
+
+
+class TrustStatus(StrEnum):
+    """Trust state of a skill entry."""
+
+    TRUSTED = "trusted"
+    PENDING_CONSENT = "pending_consent"
+    DENIED = "denied"
+
+
+@dataclass
+class SkillEntry:
+    """In-memory record for a discovered skill (PRD §4.2)."""
+
+    name: str
+    """Skill name from SKILL.md frontmatter."""
+
+    description: str
+    """Skill description from SKILL.md frontmatter."""
+
+    location: Path
+    """Absolute path to SKILL.md."""
+
+    base_dir: Path
+    """Parent directory of SKILL.md (skill root)."""
+
+    source_scope: SkillScope
+    """Which scope this skill was found in."""
+
+    trust_status: TrustStatus = TrustStatus.TRUSTED
+    """Trust state; project-scope skills start as PENDING_CONSENT before gating."""
+
+    # Optional frontmatter fields
+    license: str | None = None
+    compatibility: list[str] = field(default_factory=list)
+    allowed_tools: list[str] = field(default_factory=list)
+    metadata: dict = field(default_factory=dict)
@@ -0,0 +1,477 @@
+"""Trust gating for project-level skills (PRD AS-13).
+
+Project-level skills from untrusted repositories require explicit user consent
+before their instructions are loaded into the agent's system prompt.
+Framework and user-scope skills are always trusted.
+
+Trusted repos are persisted at ~/.hive/trusted_repos.json.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import subprocess
+import sys
+from collections.abc import Callable
+from dataclasses import dataclass
+from datetime import UTC, datetime
+from enum import StrEnum
+from pathlib import Path
+from urllib.parse import urlparse
+
+from framework.skills.parser import ParsedSkill
+
+logger = logging.getLogger(__name__)
+
+# Env var to bypass trust gating in CI/headless pipelines (opt-in).
+_ENV_TRUST_ALL = "HIVE_TRUST_PROJECT_SKILLS"
+
+# Env var for comma-separated own-remote glob patterns (e.g. "github.com/myorg/*").
+_ENV_OWN_REMOTES = "HIVE_OWN_REMOTES"
+
+_TRUSTED_REPOS_PATH = Path.home() / ".hive" / "trusted_repos.json"
+_NOTICE_SENTINEL_PATH = Path.home() / ".hive" / ".skill_trust_notice_shown"
+
+
+# ---------------------------------------------------------------------------
+# Trusted repo store
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class TrustedRepoEntry:
+    repo_key: str
+    added_at: datetime
+    project_path: str = ""
+
+
+class TrustedRepoStore:
+    """Persists permanently-trusted repo keys to ~/.hive/trusted_repos.json."""
+
+    def __init__(self, path: Path | None = None) -> None:
+        self._path = path or _TRUSTED_REPOS_PATH
+        self._entries: dict[str, TrustedRepoEntry] = {}
+        self._loaded = False
+
+    def is_trusted(self, repo_key: str) -> bool:
+        self._ensure_loaded()
+        return repo_key in self._entries
+
+    def trust(self, repo_key: str, project_path: str = "") -> None:
+        self._ensure_loaded()
+        self._entries[repo_key] = TrustedRepoEntry(
+            repo_key=repo_key,
+            added_at=datetime.now(tz=UTC),
+            project_path=project_path,
+        )
+        self._save()
+        logger.info("skill_trust_store: trusted repo_key=%s", repo_key)
+
+    def revoke(self, repo_key: str) -> bool:
+        self._ensure_loaded()
+        if repo_key in self._entries:
+            del self._entries[repo_key]
+            self._save()
+            logger.info("skill_trust_store: revoked repo_key=%s", repo_key)
+            return True
+        return False
+
+    def list_entries(self) -> list[TrustedRepoEntry]:
+        self._ensure_loaded()
+        return list(self._entries.values())
+
+    def _ensure_loaded(self) -> None:
+        if not self._loaded:
+            self._load()
+            self._loaded = True
+
+    def _load(self) -> None:
+        try:
+            data = json.loads(self._path.read_text(encoding="utf-8"))
+            for raw in data.get("entries", []):
+                repo_key = raw.get("repo_key", "")
+                if not repo_key:
+                    continue
+                try:
+                    added_at = datetime.fromisoformat(raw["added_at"])
+                except (KeyError, ValueError):
+                    added_at = datetime.now(tz=UTC)
+                self._entries[repo_key] = TrustedRepoEntry(
+                    repo_key=repo_key,
+                    added_at=added_at,
+                    project_path=raw.get("project_path", ""),
+                )
+        except FileNotFoundError:
+            pass
+        except Exception as e:
+            logger.warning(
+                "skill_trust_store: could not read %s (%s); treating as empty",
+                self._path,
+                e,
+            )
+
+    def _save(self) -> None:
+        self._path.parent.mkdir(parents=True, exist_ok=True)
+        data = {
+            "version": 1,
+            "entries": [
+                {
+                    "repo_key": e.repo_key,
+                    "added_at": e.added_at.isoformat(),
+                    "project_path": e.project_path,
+                }
+                for e in self._entries.values()
+            ],
+        }
+        # Atomic write: write to .tmp then rename
+        tmp = self._path.with_suffix(".tmp")
+        tmp.write_text(json.dumps(data, indent=2), encoding="utf-8")
+        tmp.replace(self._path)
+
+
+# ---------------------------------------------------------------------------
+# Trust classification
+# ---------------------------------------------------------------------------
+
+
+class ProjectTrustClassification(StrEnum):
+    ALWAYS_TRUSTED = "always_trusted"
+    TRUSTED_BY_USER = "trusted_by_user"
+    UNTRUSTED = "untrusted"
+
+
+class ProjectTrustDetector:
+    """Classifies a project directory as trusted or untrusted.
+
+    Algorithm (PRD §4.1 trust note):
+    1. No project_dir               → ALWAYS_TRUSTED
+    2. No .git directory            → ALWAYS_TRUSTED (not a git repo)
+    3. No remote 'origin'           → ALWAYS_TRUSTED (local-only repo)
+    4. Remote URL → repo_key; in TrustedRepoStore → TRUSTED_BY_USER
+    5. Localhost remote             → ALWAYS_TRUSTED
+    6. ~/.hive/own_remotes match    → ALWAYS_TRUSTED
+    7. HIVE_OWN_REMOTES env match   → ALWAYS_TRUSTED
+    8. None of the above            → UNTRUSTED
+    """
+
+    def __init__(self, store: TrustedRepoStore | None = None) -> None:
+        self._store = store or TrustedRepoStore()
+
+    def classify(self, project_dir: Path | None) -> tuple[ProjectTrustClassification, str]:
+        """Return (classification, repo_key).
+
+        repo_key is empty string for ALWAYS_TRUSTED cases without a remote.
+        """
+        if project_dir is None or not project_dir.exists():
+            return ProjectTrustClassification.ALWAYS_TRUSTED, ""
+
+        if not (project_dir / ".git").exists():
+            return ProjectTrustClassification.ALWAYS_TRUSTED, ""
+
+        remote_url = self._get_remote_origin(project_dir)
+        if not remote_url:
+            return ProjectTrustClassification.ALWAYS_TRUSTED, ""
+
+        repo_key = _normalize_remote_url(remote_url)
+
+        # Explicitly trusted by user
+        if self._store.is_trusted(repo_key):
+            return ProjectTrustClassification.TRUSTED_BY_USER, repo_key
+
+        # Localhost remotes are always trusted
+        if _is_localhost_remote(remote_url):
+            return ProjectTrustClassification.ALWAYS_TRUSTED, repo_key
+
+        # User-configured own-remote patterns
+        if self._matches_own_remotes(repo_key):
+            return ProjectTrustClassification.ALWAYS_TRUSTED, repo_key
+
+        return ProjectTrustClassification.UNTRUSTED, repo_key
+
+    def _get_remote_origin(self, project_dir: Path) -> str:
+        """Run git remote get-url origin. Returns empty string on any failure."""
+        try:
+            result = subprocess.run(
+                ["git", "-C", str(project_dir), "remote", "get-url", "origin"],
+                capture_output=True,
+                text=True,
+                timeout=3,
+            )
+            if result.returncode == 0:
+                return result.stdout.strip()
+        except subprocess.TimeoutExpired:
+            logger.warning(
+                "skill_trust: git remote lookup timed out for %s; treating as trusted",
+                project_dir,
+            )
+        except (FileNotFoundError, OSError):
+            pass  # git not found or other OS error
+        return ""
+
+    def _matches_own_remotes(self, repo_key: str) -> bool:
+        """Check repo_key against user-configured own-remote glob patterns."""
+        import fnmatch
+
+        patterns: list[str] = []
+
+        # From env var
+        env_patterns = _ENV_OWN_REMOTES
+        import os
+
+        raw = os.environ.get(env_patterns, "")
+        if raw:
+            patterns.extend(p.strip() for p in raw.split(",") if p.strip())
+
+        # From ~/.hive/own_remotes file
+        own_remotes_file = Path.home() / ".hive" / "own_remotes"
+        if own_remotes_file.is_file():
+            try:
+                for line in own_remotes_file.read_text(encoding="utf-8").splitlines():
+                    line = line.strip()
+                    if line and not line.startswith("#"):
+                        patterns.append(line)
+            except OSError:
+                pass
+
+        return any(fnmatch.fnmatch(repo_key, p) for p in patterns)
+
+
+# ---------------------------------------------------------------------------
+# URL helpers (public so CLI can reuse)
+# ---------------------------------------------------------------------------
+
+
+def _normalize_remote_url(url: str) -> str:
+    """Normalize a git remote URL to a canonical ``host/org/repo`` key.
+
+    Examples:
+        git@github.com:org/repo.git  → github.com/org/repo
+        https://github.com/org/repo  → github.com/org/repo
+        ssh://git@github.com/org/repo.git → github.com/org/repo
+    """
+    url = url.strip()
+
+    # SCP-style SSH: git@github.com:org/repo.git
+    if url.startswith("git@") and ":" in url and "://" not in url:
+        url = url[4:]  # strip git@
+        url = url.replace(":", "/", 1)
+    elif "://" in url:
+        parsed = urlparse(url)
+        host = parsed.hostname or ""
+        path = parsed.path.lstrip("/")
+        url = f"{host}/{path}"
+
+    # Strip .git suffix
+    if url.endswith(".git"):
+        url = url[:-4]
+
+    return url.lower().strip("/")
+
+
+def _is_localhost_remote(remote_url: str) -> bool:
+    """Return True if the remote points to a local host."""
+    local_hosts = {"localhost", "127.0.0.1", "::1"}
+    try:
+        if "://" in remote_url:
+            parsed = urlparse(remote_url)
+            return (parsed.hostname or "").lower() in local_hosts
+        # SCP-style: git@localhost:org/repo
+        if "@" in remote_url:
+            host_part = remote_url.split("@", 1)[1].split(":")[0]
+            return host_part.lower() in local_hosts
+    except Exception:
+        pass
+    return False
+
+
+# ---------------------------------------------------------------------------
+# Trust gate
+# ---------------------------------------------------------------------------
+
+
+class TrustGate:
+    """Filters skill list, running consent flow for untrusted project-scope skills.
+
+    Framework and user-scope skills are always allowed through.
+    Project-scope skills from untrusted repos require consent.
+    """
+
+    def __init__(
+        self,
+        store: TrustedRepoStore | None = None,
+        detector: ProjectTrustDetector | None = None,
+        interactive: bool = True,
+        print_fn: Callable[[str], None] | None = None,
+        input_fn: Callable[[str], str] | None = None,
+    ) -> None:
+        self._store = store or TrustedRepoStore()
+        self._detector = detector or ProjectTrustDetector(self._store)
+        self._interactive = interactive
+        self._print = print_fn or print
+        self._input = input_fn or input
+
+    def filter_and_gate(
+        self,
+        skills: list[ParsedSkill],
+        project_dir: Path | None,
+    ) -> list[ParsedSkill]:
+        """Return the subset of skills that are trusted for loading.
+
+        - Framework and user-scope skills: always included.
+        - Project-scope skills: classified; consent prompt shown if untrusted.
+        """
+        import os
+
+        # Separate project skills from always-trusted scopes
+        always_trusted = [s for s in skills if s.source_scope != "project"]
+        project_skills = [s for s in skills if s.source_scope == "project"]
+
+        if not project_skills:
+            return always_trusted
+
+        # Env-var CI override: trust all project skills for this invocation
+        if os.environ.get(_ENV_TRUST_ALL, "").strip() == "1":
+            logger.info(
+                "skill_trust: %s=1 set; trusting %d project skill(s) without consent",
+                _ENV_TRUST_ALL,
+                len(project_skills),
+            )
+            return always_trusted + project_skills
+
+        classification, repo_key = self._detector.classify(project_dir)
+
+        if classification in (
+            ProjectTrustClassification.ALWAYS_TRUSTED,
+            ProjectTrustClassification.TRUSTED_BY_USER,
+        ):
+            logger.info(
+                "skill_trust: project skills trusted classification=%s repo=%s count=%d",
+                classification,
+                repo_key or "(no remote)",
+                len(project_skills),
+            )
+            return always_trusted + project_skills
+
+        # UNTRUSTED — need consent
+        if not self._interactive or not sys.stdin.isatty():
+            logger.warning(
+                "skill_trust: skipping %d project-scope skill(s) from untrusted repo "
+                "'%s' (non-interactive mode). "
+                "To trust permanently run: hive skill trust %s",
+                len(project_skills),
+                repo_key,
+                project_dir or ".",
+            )
+            logger.info(
+                "skill_trust_decision repo=%s skills=%d decision=denied mode=headless",
+                repo_key,
+                len(project_skills),
+            )
+            return always_trusted
+
+        # Interactive consent flow
+        decision = self._run_consent_flow(project_skills, project_dir, repo_key)
+
+        logger.info(
+            "skill_trust_decision repo=%s skills=%d decision=%s mode=interactive",
+            repo_key,
+            len(project_skills),
+            decision,
+        )
+
+        if decision == "session":
+            return always_trusted + project_skills
+
+        if decision == "permanent":
+            self._store.trust(repo_key, project_path=str(project_dir or ""))
+            return always_trusted + project_skills
+
+        # denied
+        return always_trusted
+
+    def _run_consent_flow(
+        self,
+        project_skills: list[ParsedSkill],
+        project_dir: Path | None,
+        repo_key: str,
+    ) -> str:
+        """Show the security notice (once) and consent prompt.
+        Return 'session' | 'permanent' | 'denied'."""
+        from framework.credentials.setup import Colors
+
+        if not sys.stdout.isatty():
+            Colors.disable()
+
+        self._maybe_show_security_notice(Colors)
+        self._print_consent_prompt(project_skills, project_dir, repo_key, Colors)
+        return self._prompt_consent(Colors)
+
+    def _maybe_show_security_notice(self, Colors) -> None:  # noqa: N803
+        """Show the one-time security notice if not already shown (NFR-5)."""
+        if _NOTICE_SENTINEL_PATH.exists():
+            return
+        self._print("")
+        self._print(
+            f"{Colors.YELLOW}Security notice:{Colors.NC} Skills inject instructions "
+            "into the agent's system prompt."
+        )
+        self._print(
+            "  Only load skills from sources you trust. "
+            "Registry skills at tier 'verified' or 'official' have been audited."
+        )
+        self._print("")
+        try:
+            _NOTICE_SENTINEL_PATH.parent.mkdir(parents=True, exist_ok=True)
+            _NOTICE_SENTINEL_PATH.touch()
+        except OSError:
+            pass
+
+    def _print_consent_prompt(
+        self,
+        project_skills: list[ParsedSkill],
+        project_dir: Path | None,
+        repo_key: str,
+        Colors,  # noqa: N803
+    ) -> None:
+        p = self._print
+        p("")
+        p(f"{Colors.YELLOW}{'=' * 60}{Colors.NC}")
+        p(f"{Colors.BOLD}  SKILL TRUST REQUIRED{Colors.NC}")
+        p(f"{Colors.YELLOW}{'=' * 60}{Colors.NC}")
+        p("")
+        proj_label = str(project_dir) if project_dir else "this project"
+        p(
+            f"  The project at {Colors.CYAN}{proj_label}{Colors.NC} wants to load "
+            f"{len(project_skills)} skill(s)"
+        )
+        p("  that will inject instructions into the agent's system prompt.")
+        if repo_key:
+            p(f"  Source: {Colors.BOLD}{repo_key}{Colors.NC}")
+        p("")
+        p("  Skills requesting access:")
+        for skill in project_skills:
+            p(f"    {Colors.CYAN}•{Colors.NC} {Colors.BOLD}{skill.name}{Colors.NC}")
+            p(f'      "{skill.description}"')
+            p(f"      {Colors.DIM}{skill.location}{Colors.NC}")
+        p("")
+        p("  Options:")
+        p(f"    {Colors.CYAN}1){Colors.NC} Trust this session only")
+        p(f"    {Colors.CYAN}2){Colors.NC} Trust permanently  — remember for future runs")
+        p(
+            f"    {Colors.DIM}3) Deny"
+            f"              — skip all project-scope skills from this repo{Colors.NC}"
+        )
+        p(f"{Colors.YELLOW}{'─' * 60}{Colors.NC}")
+
+    def _prompt_consent(self, Colors) -> str:  # noqa: N803
+        """Prompt until a valid choice is entered. Returns 'session'|'permanent'|'denied'."""
+        mapping = {"1": "session", "2": "permanent", "3": "denied"}
+        while True:
+            try:
+                choice = self._input("Select option (1-3): ").strip()
+                if choice in mapping:
+                    return mapping[choice]
+            except (KeyboardInterrupt, EOFError):
+                return "denied"
+            self._print(f"{Colors.RED}Invalid choice. Enter 1, 2, or 3.{Colors.NC}")
@@ -324,6 +324,7 @@ export type EventTypeName =
  | "node_retry"
  | "edge_traversed"
  | "context_compacted"
+  | "context_usage_updated"
  | "webhook_received"
  | "custom"
  | "escalation_requested"
@@ -1,5 +1,12 @@
 import { memo, useState, useRef, useEffect } from "react";
 import { Send, Square, Crown, Cpu, Check, Loader2 } from "lucide-react";
+
+export interface ContextUsageEntry {
+  usagePct: number;
+  messageCount: number;
+  estimatedTokens: number;
+  maxTokens: number;
+}
 import MarkdownContent from "@/components/MarkdownContent";
 import QuestionWidget from "@/components/QuestionWidget";
 import MultiQuestionWidget from "@/components/MultiQuestionWidget";
@@ -47,6 +54,8 @@ interface ChatPanelProps {
  onQuestionDismiss?: () => void;
  /** Queen operating phase — shown as a tag on queen messages */
  queenPhase?: "planning" | "building" | "staging" | "running";
+  /** Context window usage for queen and workers */
+  contextUsage?: Record<string, ContextUsageEntry>;
 }

 const queenColor = "hsl(45,95%,58%)";
@@ -241,7 +250,7 @@ const MessageBubble = memo(function MessageBubble({ msg, queenPhase }: { msg: Ch
  );
 }, (prev, next) => prev.msg.id === next.msg.id && prev.msg.content === next.msg.content && prev.msg.phase === next.msg.phase && prev.queenPhase === next.queenPhase);

-export default function ChatPanel({ messages, onSend, isWaiting, isWorkerWaiting, isBusy, activeThread, disabled, onCancel, pendingQuestion, pendingOptions, pendingQuestions, onQuestionSubmit, onMultiQuestionSubmit, onQuestionDismiss, queenPhase }: ChatPanelProps) {
+export default function ChatPanel({ messages, onSend, isWaiting, isWorkerWaiting, isBusy, activeThread, disabled, onCancel, pendingQuestion, pendingOptions, pendingQuestions, onQuestionSubmit, onMultiQuestionSubmit, onQuestionDismiss, queenPhase, contextUsage }: ChatPanelProps) {
  const [input, setInput] = useState("");
  const [readMap, setReadMap] = useState<Record<string, number>>({});
  const bottomRef = useRef<HTMLDivElement>(null);
@@ -356,6 +365,57 @@ export default function ChatPanel({ messages, onSend, isWaiting, isWorkerWaiting
        <div ref={bottomRef} />
      </div>

+      {/* Context window usage bar — sits between messages and input */}
+      {(() => {
+        if (!contextUsage) return null;
+        const queenUsage = contextUsage["__queen__"];
+        const workerEntries = Object.entries(contextUsage).filter(([k]) => k !== "__queen__");
+        const workerUsage = workerEntries.length > 0
+          ? workerEntries.reduce((best, [, v]) => (v.usagePct > best.usagePct ? v : best), workerEntries[0][1])
+          : undefined;
+        if (!queenUsage && !workerUsage) return null;
+        return (
+          <div className="flex items-center gap-3 mx-4 px-3 py-1 rounded-lg bg-muted/30 border border-border/20 group/ctx flex-shrink-0">
+            {queenUsage && (
+              <div className="flex items-center gap-2 flex-1 min-w-0" title={`Queen: ${(queenUsage.estimatedTokens / 1000).toFixed(1)}k / ${(queenUsage.maxTokens / 1000).toFixed(0)}k tokens \u00b7 ${queenUsage.messageCount} messages`}>
+                <Crown className="w-3 h-3 flex-shrink-0" style={{ color: "hsl(45,95%,58%)" }} />
+                <div className="flex-1 h-1.5 rounded-full bg-muted/50 overflow-hidden min-w-[60px]">
+                  <div
+                    className="h-full rounded-full transition-all duration-500 ease-out"
+                    style={{
+                      width: `${Math.min(queenUsage.usagePct, 100)}%`,
+                      backgroundColor: queenUsage.usagePct >= 90 ? "hsl(0,65%,55%)" : queenUsage.usagePct >= 70 ? "hsl(35,90%,55%)" : "hsl(45,95%,58%)",
+                    }}
+                  />
+                </div>
+                <span className="text-[10px] text-muted-foreground/70 flex-shrink-0 tabular-nums">
+                  <span className="group-hover/ctx:hidden">{queenUsage.usagePct}%</span>
+                  <span className="hidden group-hover/ctx:inline">{(queenUsage.estimatedTokens / 1000).toFixed(1)}k / {(queenUsage.maxTokens / 1000).toFixed(0)}k</span>
+                </span>
+              </div>
+            )}
+            {workerUsage && (
+              <div className="flex items-center gap-2 flex-1 min-w-0" title={`Worker: ${(workerUsage.estimatedTokens / 1000).toFixed(1)}k / ${(workerUsage.maxTokens / 1000).toFixed(0)}k tokens \u00b7 ${workerUsage.messageCount} messages`}>
+                <Cpu className="w-3 h-3 flex-shrink-0" style={{ color: "hsl(220,60%,55%)" }} />
+                <div className="flex-1 h-1.5 rounded-full bg-muted/50 overflow-hidden min-w-[60px]">
+                  <div
+                    className="h-full rounded-full transition-all duration-500 ease-out"
+                    style={{
+                      width: `${Math.min(workerUsage.usagePct, 100)}%`,
+                      backgroundColor: workerUsage.usagePct >= 90 ? "hsl(0,65%,55%)" : workerUsage.usagePct >= 70 ? "hsl(35,90%,55%)" : "hsl(220,60%,55%)",
+                    }}
+                  />
+                </div>
+                <span className="text-[10px] text-muted-foreground/70 flex-shrink-0 tabular-nums">
+                  <span className="group-hover/ctx:hidden">{workerUsage.usagePct}%</span>
+                  <span className="hidden group-hover/ctx:inline">{(workerUsage.estimatedTokens / 1000).toFixed(1)}k / {(workerUsage.maxTokens / 1000).toFixed(0)}k</span>
+                </span>
+              </div>
+            )}
+          </div>
+        );
+      })()}
+
      {/* Input area — question widget replaces textarea when a question is pending */}
      {pendingQuestions && pendingQuestions.length >= 2 && onMultiQuestionSubmit ? (
        <MultiQuestionWidget
@@ -28,6 +28,13 @@ export interface SubagentReport {
  status?: "running" | "complete" | "error";
 }

+interface ContextUsage {
+  usagePct: number;
+  messageCount: number;
+  estimatedTokens: number;
+  maxTokens: number;
+}
+
 interface NodeDetailPanelProps {
  node: GraphNode | null;
  nodeSpec?: NodeSpec | null;
@@ -38,6 +45,7 @@ interface NodeDetailPanelProps {
  workerSessionId?: string | null;
  nodeLogs?: string[];
  actionPlan?: string;
+  contextUsage?: ContextUsage;
  onClose: () => void;
 }

@@ -309,7 +317,7 @@ const tabs: { id: Tab; label: string; Icon: React.FC<{ className?: string }> }[]
  { id: "subagents", label: "Subagents", Icon: ({ className }) => <Bot className={className} /> },
 ];

-export default function NodeDetailPanel({ node, nodeSpec, allNodeSpecs, subagentReports, sessionId, graphId, workerSessionId, nodeLogs, actionPlan, onClose }: NodeDetailPanelProps) {
+export default function NodeDetailPanel({ node, nodeSpec, allNodeSpecs, subagentReports, sessionId, graphId, workerSessionId, nodeLogs, actionPlan, contextUsage, onClose }: NodeDetailPanelProps) {
  const [activeTab, setActiveTab] = useState<Tab>("overview");
  const [realTools, setRealTools] = useState<ToolInfo[] | null>(null);
  const [realCriteria, setRealCriteria] = useState<NodeCriteria | null>(null);
@@ -389,6 +397,43 @@ export default function NodeDetailPanel({ node, nodeSpec, allNodeSpecs, subagent
        </div>
      )}

+      {/* Context window usage */}
+      {contextUsage && (
+        <div className="px-4 py-2 border-b border-border/20 flex-shrink-0">
+          <div className="flex items-center gap-2 mb-1">
+            <span className="text-[10px] text-muted-foreground font-medium">Context</span>
+            <span className="text-[10px] text-muted-foreground/70 ml-auto">
+              {(contextUsage.estimatedTokens / 1000).toFixed(1)}k / {(contextUsage.maxTokens / 1000).toFixed(0)}k tokens
+            </span>
+          </div>
+          <div className="w-full h-1.5 rounded-full bg-muted/50 overflow-hidden">
+            <div
+              className="h-full rounded-full transition-all duration-500 ease-out"
+              style={{
+                width: `${Math.min(contextUsage.usagePct, 100)}%`,
+                backgroundColor: contextUsage.usagePct >= 90
+                  ? "hsl(0,65%,55%)"
+                  : contextUsage.usagePct >= 70
+                    ? "hsl(35,90%,55%)"
+                    : "hsl(45,95%,58%)",
+              }}
+            />
+          </div>
+          <div className="flex items-center gap-2 mt-1">
+            <span className="text-[10px] text-muted-foreground/60">{contextUsage.messageCount} messages</span>
+            <span className="text-[10px] font-medium ml-auto" style={{
+              color: contextUsage.usagePct >= 90
+                ? "hsl(0,65%,55%)"
+                : contextUsage.usagePct >= 70
+                  ? "hsl(35,90%,55%)"
+                  : "hsl(45,95%,58%)",
+            }}>
+              {contextUsage.usagePct}%
+            </span>
+          </div>
+        </div>
+      )}
+
      {/* Tab bar */}
      <div className="flex border-b border-border/30 flex-shrink-0 px-2 pt-1 overflow-x-auto scrollbar-hide">
        {tabs.filter(t => t.id !== "subagents" || (nodeSpec?.sub_agents && nodeSpec.sub_agents.length > 0)).map(tab => (
@@ -352,6 +352,8 @@ interface AgentBackendState {
  pendingQuestions: { id: string; prompt: string; options?: string[] }[] | null;
  /** Whether the pending question came from queen or worker */
  pendingQuestionSource: "queen" | "worker" | null;
+  /** Per-node context window usage (from context_usage_updated events) */
+  contextUsage: Record<string, { usagePct: number; messageCount: number; estimatedTokens: number; maxTokens: number }>;
 }

 function defaultAgentState(): AgentBackendState {
@@ -389,6 +391,7 @@ function defaultAgentState(): AgentBackendState {
    pendingOptions: null,
    pendingQuestions: null,
    pendingQuestionSource: null,
+    contextUsage: {},
  };
 }

@@ -630,6 +633,10 @@ export default function Workspace() {
  // it was created in (avoids stale-closure when phase change and message
  // events arrive in the same React batch).
  const queenPhaseRef = useRef<Record<string, string>>({});
+  // Accumulated queen text across inner_turns within the same iteration.
+  // Key: `${agentType}:${execution_id}:${iteration}`, value: { [inner_turn]: snapshot }.
+  // This lets us merge all inner_turn text into one chat bubble per iteration.
+  const queenIterTextRef = useRef<Record<string, Record<number, string>>>({});
  // Timestamp when designingDraft was set — used to enforce minimum spinner duration.
  const designingDraftSinceRef = useRef<Record<string, number>>({});
  const designingDraftTimerRef = useRef<Record<string, ReturnType<typeof setTimeout>>>({});
@@ -1707,14 +1714,29 @@ export default function Workspace() {
          if (isQueen) console.log('[QUEEN] chatMsg:', chatMsg?.id, chatMsg?.content?.slice(0, 50), 'turn:', currentTurn);
          if (chatMsg && !suppressQueenMessages) {
            // Queen emits multiple client_output_delta / llm_text_delta snapshots
-            // across iterations and inner tool-loop turns.  Build a stable ID that
-            // groups streaming deltas for the *same* output (same execution +
-            // iteration + inner_turn) into one bubble, while keeping distinct
-            // outputs as separate bubbles so earlier text isn't overwritten.
+            // across iterations and inner tool-loop turns.  Merge all inner_turns
+            // within the same iteration into ONE bubble so the queen's multi-step
+            // tool loop (text → tool → text → tool → text) appears as one cohesive
+            // message rather than many small fragments.
            if (isQueen && (event.type === "client_output_delta" || event.type === "llm_text_delta") && event.execution_id) {
              const iter = event.data?.iteration ?? 0;
-              const inner = event.data?.inner_turn ?? 0;
-              chatMsg.id = `queen-stream-${event.execution_id}-${iter}-${inner}`;
+              const inner = (event.data?.inner_turn as number) ?? 0;
+              const iterKey = `${agentType}:${event.execution_id}:${iter}`;
+
+              // Store the latest snapshot for this inner_turn
+              if (!queenIterTextRef.current[iterKey]) {
+                queenIterTextRef.current[iterKey] = {};
+              }
+              const snapshot = (event.data?.snapshot as string) || (event.data?.content as string) || "";
+              queenIterTextRef.current[iterKey][inner] = snapshot;
+
+              // Concatenate all inner_turn snapshots in order
+              const parts = queenIterTextRef.current[iterKey];
+              const sortedInners = Object.keys(parts).map(Number).sort((a, b) => a - b);
+              chatMsg.content = sortedInners.map(k => parts[k]).join("\n");
+
+              // Single ID per iteration — no inner_turn in the ID
+              chatMsg.id = `queen-stream-${event.execution_id}-${iter}`;
            }
            if (isQueen) {
              chatMsg.role = role;
@@ -2136,6 +2158,29 @@ export default function Workspace() {
          }
          break;

+        case "context_usage_updated": {
+            const streamKey = isQueen ? "__queen__" : (event.node_id || streamId);
+            const usagePct = (event.data?.usage_pct as number) ?? 0;
+            const messageCount = (event.data?.message_count as number) ?? 0;
+            const estimatedTokens = (event.data?.estimated_tokens as number) ?? 0;
+            const maxTokens = (event.data?.max_context_tokens as number) ?? 0;
+            setAgentStates(prev => {
+              const state = prev[agentType];
+              if (!state) return prev;
+              return {
+                ...prev,
+                [agentType]: {
+                  ...state,
+                  contextUsage: {
+                    ...state.contextUsage,
+                    [streamKey]: { usagePct, messageCount, estimatedTokens, maxTokens },
+                  },
+                },
+              };
+            });
+          }
+          break;
+
        case "node_action_plan":
          if (!isQueen && event.node_id) {
            const plan = (event.data?.plan as string) || "";
@@ -3174,6 +3219,7 @@ export default function Workspace() {
                }
                onMultiQuestionSubmit={handleMultiQuestionAnswer}
                onQuestionDismiss={handleQuestionDismiss}
+                contextUsage={activeAgentState?.contextUsage}
              />
            )}
          </div>
@@ -3377,6 +3423,7 @@ export default function Workspace() {
                  workerSessionId={null}
                  nodeLogs={activeAgentState?.nodeLogs[resolvedSelectedNode.id] || []}
                  actionPlan={activeAgentState?.nodeActionPlans[resolvedSelectedNode.id]}
+                  contextUsage={activeAgentState?.contextUsage[resolvedSelectedNode.id]}
                  onClose={() => setSelectedNode(null)}
                />
              )}
@@ -0,0 +1,142 @@
+"""Tests for AS-9: Skill directory allowlisting in file-read tool interception."""
+
+from unittest.mock import MagicMock
+
+import pytest
+
+from framework.llm.provider import ToolResult
+
+
+def _make_tool_call_event(tool_name: str, path: str):
+    """Build a minimal ToolCallEvent-like object."""
+    tc = MagicMock()
+    tc.tool_use_id = "tc-1"
+    tc.tool_name = tool_name
+    tc.tool_input = {"path": path}
+    return tc
+
+
+def _make_node(skill_dirs: list[str]):
+    """Build a minimal EventLoopNode with skill_dirs set."""
+    from framework.graph.event_loop_node import EventLoopNode
+
+    mock_result = ToolResult(tool_use_id="tc-1", content="from-executor")
+    node = EventLoopNode(tool_executor=MagicMock(return_value=mock_result))
+    node._skill_dirs = skill_dirs
+    return node
+
+
+class TestSkillFileReadInterception:
+    @pytest.mark.asyncio
+    async def test_reads_file_in_skill_dir(self, tmp_path):
+        """File under a skill dir is read directly, bypassing the executor."""
+        skill_dir = tmp_path / "my-skill"
+        skill_dir.mkdir()
+        script = skill_dir / "scripts" / "run.py"
+        script.parent.mkdir()
+        script.write_text("print('hello')")
+
+        node = _make_node([str(skill_dir)])
+        tc = _make_tool_call_event("view_file", str(script))
+
+        result = await node._execute_tool(tc)
+
+        assert result.content == "print('hello')"
+        assert not result.is_error
+        node._tool_executor.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_skill_md_read_marked_as_skill_content(self, tmp_path):
+        """Reading SKILL.md sets is_skill_content=True for AS-10 protection."""
+        skill_dir = tmp_path / "my-skill"
+        skill_dir.mkdir()
+        skill_md = skill_dir / "SKILL.md"
+        skill_md.write_text("---\nname: my-skill\ndescription: Test\n---\nInstructions.")
+
+        node = _make_node([str(skill_dir)])
+        tc = _make_tool_call_event("view_file", str(skill_md))
+
+        result = await node._execute_tool(tc)
+
+        assert result.is_skill_content is True
+        assert not result.is_error
+
+    @pytest.mark.asyncio
+    async def test_non_skill_md_resource_not_marked(self, tmp_path):
+        """Bundled resource (not SKILL.md) is NOT marked as skill_content."""
+        skill_dir = tmp_path / "my-skill"
+        skill_dir.mkdir()
+        ref = skill_dir / "references" / "api.md"
+        ref.parent.mkdir()
+        ref.write_text("# API Reference")
+
+        node = _make_node([str(skill_dir)])
+        tc = _make_tool_call_event("load_data", str(ref))
+
+        result = await node._execute_tool(tc)
+
+        assert result.is_skill_content is False
+        assert not result.is_error
+
+    @pytest.mark.asyncio
+    async def test_path_outside_skill_dir_goes_to_executor(self, tmp_path):
+        """Path outside skill dirs is passed through to the executor unchanged."""
+        skill_dir = tmp_path / "my-skill"
+        skill_dir.mkdir()
+        other_file = tmp_path / "other" / "file.txt"
+        other_file.parent.mkdir()
+        other_file.write_text("other content")
+
+        node = _make_node([str(skill_dir)])
+        tc = _make_tool_call_event("view_file", str(other_file))
+
+        result = await node._execute_tool(tc)
+
+        assert result.content == "from-executor"
+        node._tool_executor.assert_called_once()
+
+    @pytest.mark.asyncio
+    async def test_no_skill_dirs_goes_to_executor(self, tmp_path):
+        """When skill_dirs is empty, all tool calls go to executor."""
+        skill_dir = tmp_path / "my-skill"
+        skill_dir.mkdir()
+        script = skill_dir / "scripts" / "run.py"
+        script.parent.mkdir()
+        script.write_text("print('hello')")
+
+        node = _make_node([])
+        tc = _make_tool_call_event("view_file", str(script))
+
+        result = await node._execute_tool(tc)
+
+        assert result.content == "from-executor"
+        node._tool_executor.assert_called_once()
+
+    @pytest.mark.asyncio
+    async def test_missing_file_returns_error(self, tmp_path):
+        """Non-existent file under skill dir returns is_error=True."""
+        skill_dir = tmp_path / "my-skill"
+        skill_dir.mkdir()
+        missing = skill_dir / "scripts" / "missing.py"
+
+        node = _make_node([str(skill_dir)])
+        tc = _make_tool_call_event("view_file", str(missing))
+
+        result = await node._execute_tool(tc)
+
+        assert result.is_error is True
+        assert "Could not read skill resource" in result.content
+
+    @pytest.mark.asyncio
+    async def test_non_file_read_tool_goes_to_executor(self, tmp_path):
+        """Non file-read tools (e.g. web_search) bypass the interceptor."""
+        skill_dir = tmp_path / "my-skill"
+        skill_dir.mkdir()
+
+        node = _make_node([str(skill_dir)])
+        tc = _make_tool_call_event("web_search", str(skill_dir / "SKILL.md"))
+
+        result = await node._execute_tool(tc)
+
+        assert result.content == "from-executor"
+        node._tool_executor.assert_called_once()
@@ -69,7 +69,13 @@ class TestSkillCatalog:

    def test_to_prompt_xml_generation(self):
        skills = [
-            _make_skill("alpha", "Alpha skill", "project", location="/p/alpha/SKILL.md"),
+            _make_skill(
+                "alpha",
+                "Alpha skill",
+                "project",
+                location="/p/alpha/SKILL.md",
+                base_dir="/p/alpha",
+            ),
            _make_skill("beta", "Beta skill", "user", location="/u/beta/SKILL.md"),
        ]
        catalog = SkillCatalog(skills)
@@ -81,6 +87,7 @@ class TestSkillCatalog:
        assert "<name>beta</name>" in prompt
        assert "<description>Alpha skill</description>" in prompt
        assert "<location>/p/alpha/SKILL.md</location>" in prompt
+        assert "<base_dir>/p/alpha</base_dir>" in prompt

    def test_to_prompt_sorted_by_name(self):
        skills = [
@@ -0,0 +1,90 @@
+"""Tests for AS-10: Activated skill content protected from context pruning."""
+
+import pytest
+
+from framework.graph.conversation import Message, NodeConversation
+
+
+def _make_conversation() -> NodeConversation:
+    conv = NodeConversation.__new__(NodeConversation)
+    conv._messages = []
+    conv._next_seq = 0
+    conv._current_phase = None
+    conv._store = None
+    return conv
+
+
+async def _add_tool_msg(conv: NodeConversation, content: str, **kwargs) -> Message:
+    return await conv.add_tool_result(
+        tool_use_id=f"tc-{conv._next_seq}",
+        content=content,
+        **kwargs,
+    )
+
+
+class TestSkillContentProtection:
+    @pytest.mark.asyncio
+    async def test_is_skill_content_flag_persists(self):
+        """Message created with is_skill_content=True retains the flag."""
+        conv = _make_conversation()
+        msg = await _add_tool_msg(conv, "skill instructions", is_skill_content=True)
+        assert msg.is_skill_content is True
+
+    @pytest.mark.asyncio
+    async def test_regular_message_not_marked(self):
+        """Normal tool result messages are not marked as skill content."""
+        conv = _make_conversation()
+        msg = await _add_tool_msg(conv, "some tool output")
+        assert msg.is_skill_content is False
+
+    @pytest.mark.asyncio
+    async def test_skill_content_survives_prune(self):
+        """Skill content messages are skipped by prune_old_tool_results."""
+        conv = _make_conversation()
+
+        # Add many regular tool results to push over prune threshold
+        for _ in range(30):
+            await _add_tool_msg(conv, "x" * 500)  # ~125 tokens each
+
+        # Add a skill content message
+        skill_msg = await _add_tool_msg(
+            conv,
+            "## Deep Research\n" + "instructions " * 200,
+            is_skill_content=True,
+        )
+
+        pruned = await conv.prune_old_tool_results(protect_tokens=500, min_prune_tokens=100)
+
+        assert pruned > 0, "Expected some messages to be pruned"
+        # Find the skill message — it must not be pruned
+        matching = [m for m in conv._messages if m.seq == skill_msg.seq]
+        assert matching, "Skill content message was removed"
+        assert not matching[0].content.startswith("[Pruned tool result")
+
+    @pytest.mark.asyncio
+    async def test_regular_content_can_be_pruned(self):
+        """Regular tool results are still pruned when over threshold."""
+        conv = _make_conversation()
+
+        for _ in range(20):
+            await _add_tool_msg(conv, "regular tool output " * 50)
+
+        pruned = await conv.prune_old_tool_results(protect_tokens=500, min_prune_tokens=100)
+
+        assert pruned > 0, "Expected regular messages to be pruned"
+
+    @pytest.mark.asyncio
+    async def test_error_messages_also_protected(self):
+        """Existing is_error protection still works alongside is_skill_content."""
+        conv = _make_conversation()
+
+        for _ in range(20):
+            await _add_tool_msg(conv, "output " * 100)
+
+        err_msg = await _add_tool_msg(conv, "tool failed", is_error=True)
+
+        await conv.prune_old_tool_results(protect_tokens=200, min_prune_tokens=50)
+
+        matching = [m for m in conv._messages if m.seq == err_msg.seq]
+        assert matching
+        assert not matching[0].content.startswith("[Pruned tool result")
@@ -0,0 +1,92 @@
+"""Tests for AS-6 skill resource loading support.
+
+Covers:
+- <base_dir> element in catalog XML
+- allowlisted_dirs property reflects trusted skill base directories
+- skill_dirs propagation to NodeContext
+"""
+
+from framework.skills.catalog import SkillCatalog
+from framework.skills.parser import ParsedSkill
+
+
+def _make_skill(
+    name: str,
+    base_dir: str,
+    source_scope: str = "project",
+) -> ParsedSkill:
+    return ParsedSkill(
+        name=name,
+        description=f"Skill {name}",
+        location=f"{base_dir}/SKILL.md",
+        base_dir=base_dir,
+        source_scope=source_scope,
+        body="Instructions.",
+    )
+
+
+class TestSkillResourceBaseDir:
+    def test_base_dir_in_xml(self):
+        """Each community skill entry should expose its base_dir in the catalog XML."""
+        skill = _make_skill("deploy", "/project/.hive/skills/deploy")
+        catalog = SkillCatalog([skill])
+        prompt = catalog.to_prompt()
+
+        assert "<base_dir>/project/.hive/skills/deploy</base_dir>" in prompt
+
+    def test_base_dir_xml_escaped(self):
+        """base_dir with XML-special chars should be escaped."""
+        skill = _make_skill("s", "/path/with <&> chars")
+        catalog = SkillCatalog([skill])
+        prompt = catalog.to_prompt()
+
+        assert "<base_dir>/path/with &lt;&amp;&gt; chars</base_dir>" in prompt
+
+    def test_base_dir_absent_for_framework_skills(self):
+        """Framework-scope skills are filtered from the catalog, so no base_dir either."""
+        skill = _make_skill("fw", "/hive/_default_skills/fw", source_scope="framework")
+        catalog = SkillCatalog([skill])
+        assert catalog.to_prompt() == ""
+
+    def test_allowlisted_dirs_matches_skills(self):
+        """allowlisted_dirs returns all skill base_dirs including framework ones."""
+        skills = [
+            _make_skill("a", "/skills/a", "project"),
+            _make_skill("b", "/skills/b", "user"),
+            _make_skill("c", "/skills/c", "framework"),
+        ]
+        catalog = SkillCatalog(skills)
+        dirs = catalog.allowlisted_dirs
+
+        assert "/skills/a" in dirs
+        assert "/skills/b" in dirs
+        assert "/skills/c" in dirs
+
+    def test_allowlisted_dirs_empty_catalog(self):
+        assert SkillCatalog().allowlisted_dirs == []
+
+
+class TestSkillDirsPropagation:
+    def _make_ctx(self, **kwargs):
+        from unittest.mock import MagicMock
+
+        from framework.graph.node import NodeContext
+
+        return NodeContext(
+            runtime=MagicMock(),
+            node_id="n",
+            node_spec=MagicMock(),
+            memory={},
+            **kwargs,
+        )
+
+    def test_node_context_skill_dirs_default(self):
+        """NodeContext.skill_dirs defaults to empty list."""
+        ctx = self._make_ctx()
+        assert ctx.skill_dirs == []
+
+    def test_node_context_skill_dirs_set(self):
+        """NodeContext.skill_dirs can be populated."""
+        dirs = ["/skills/a", "/skills/b"]
+        ctx = self._make_ctx(skill_dirs=dirs)
+        assert ctx.skill_dirs == dirs
@@ -0,0 +1,471 @@
+"""Tests for skill trust gating (AS-13)."""
+
+from __future__ import annotations
+
+import json
+from unittest.mock import MagicMock, patch
+
+from framework.skills.parser import ParsedSkill
+from framework.skills.trust import (
+    ProjectTrustClassification,
+    ProjectTrustDetector,
+    TrustedRepoStore,
+    TrustGate,
+    _is_localhost_remote,
+    _normalize_remote_url,
+)
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def make_skill(name: str = "test-skill", scope: str = "project") -> ParsedSkill:
+    return ParsedSkill(
+        name=name,
+        description="Test skill",
+        location=f"/fake/{name}/SKILL.md",
+        base_dir=f"/fake/{name}",
+        source_scope=scope,
+        body="Test skill instructions.",
+    )
+
+
+# ---------------------------------------------------------------------------
+# _normalize_remote_url
+# ---------------------------------------------------------------------------
+
+
+class TestNormalizeRemoteUrl:
+    def test_ssh_scp_format(self):
+        assert _normalize_remote_url("git@github.com:org/repo.git") == "github.com/org/repo"
+
+    def test_https_format(self):
+        assert _normalize_remote_url("https://github.com/org/repo.git") == "github.com/org/repo"
+
+    def test_https_no_dot_git(self):
+        assert _normalize_remote_url("https://github.com/org/repo") == "github.com/org/repo"
+
+    def test_ssh_url_format(self):
+        assert _normalize_remote_url("ssh://git@github.com/org/repo.git") == "github.com/org/repo"
+
+    def test_lowercased(self):
+        assert _normalize_remote_url("git@GitHub.COM:Org/Repo.git") == "github.com/org/repo"
+
+    def test_trailing_slash_stripped(self):
+        assert _normalize_remote_url("https://github.com/org/repo/") == "github.com/org/repo"
+
+    def test_gitlab(self):
+        assert _normalize_remote_url("git@gitlab.com:team/project.git") == "gitlab.com/team/project"
+
+
+# ---------------------------------------------------------------------------
+# _is_localhost_remote
+# ---------------------------------------------------------------------------
+
+
+class TestIsLocalhostRemote:
+    def test_localhost_https(self):
+        assert _is_localhost_remote("http://localhost/org/repo")
+
+    def test_127_0_0_1(self):
+        assert _is_localhost_remote("https://127.0.0.1/repo")
+
+    def test_github_not_local(self):
+        assert not _is_localhost_remote("https://github.com/org/repo")
+
+    def test_scp_localhost(self):
+        assert _is_localhost_remote("git@localhost:org/repo")
+
+
+# ---------------------------------------------------------------------------
+# TrustedRepoStore
+# ---------------------------------------------------------------------------
+
+
+class TestTrustedRepoStore:
+    def test_empty_store_is_not_trusted(self, tmp_path):
+        store = TrustedRepoStore(tmp_path / "trusted.json")
+        assert not store.is_trusted("github.com/org/repo")
+
+    def test_trust_and_lookup(self, tmp_path):
+        store = TrustedRepoStore(tmp_path / "trusted.json")
+        store.trust("github.com/org/repo", project_path="/some/path")
+        assert store.is_trusted("github.com/org/repo")
+
+    def test_revoke(self, tmp_path):
+        store = TrustedRepoStore(tmp_path / "trusted.json")
+        store.trust("github.com/org/repo")
+        assert store.revoke("github.com/org/repo")
+        assert not store.is_trusted("github.com/org/repo")
+
+    def test_revoke_nonexistent_returns_false(self, tmp_path):
+        store = TrustedRepoStore(tmp_path / "trusted.json")
+        assert not store.revoke("github.com/nobody/nowhere")
+
+    def test_persists_across_instances(self, tmp_path):
+        path = tmp_path / "trusted.json"
+        store1 = TrustedRepoStore(path)
+        store1.trust("github.com/org/repo")
+
+        store2 = TrustedRepoStore(path)
+        assert store2.is_trusted("github.com/org/repo")
+
+    def test_atomic_write(self, tmp_path):
+        """Save must not leave a .tmp file behind."""
+        path = tmp_path / "trusted.json"
+        store = TrustedRepoStore(path)
+        store.trust("github.com/org/repo")
+        assert not (tmp_path / "trusted.tmp").exists()
+        assert path.exists()
+
+    def test_corrupted_json_recovers_gracefully(self, tmp_path):
+        path = tmp_path / "trusted.json"
+        path.write_text("{not valid json{{", encoding="utf-8")
+        store = TrustedRepoStore(path)
+        assert not store.is_trusted("github.com/any/repo")  # no crash
+
+    def test_json_schema(self, tmp_path):
+        path = tmp_path / "trusted.json"
+        store = TrustedRepoStore(path)
+        store.trust("github.com/org/repo", project_path="/work/repo")
+        data = json.loads(path.read_text())
+        assert data["version"] == 1
+        assert data["entries"][0]["repo_key"] == "github.com/org/repo"
+        assert "added_at" in data["entries"][0]
+
+    def test_list_entries(self, tmp_path):
+        store = TrustedRepoStore(tmp_path / "t.json")
+        store.trust("github.com/a/b")
+        store.trust("github.com/c/d")
+        entries = store.list_entries()
+        assert len(entries) == 2
+
+
+# ---------------------------------------------------------------------------
+# ProjectTrustDetector
+# ---------------------------------------------------------------------------
+
+
+class TestProjectTrustDetector:
+    def test_none_project_dir_always_trusted(self, tmp_path):
+        store = TrustedRepoStore(tmp_path / "t.json")
+        det = ProjectTrustDetector(store)
+        cls, _ = det.classify(None)
+        assert cls == ProjectTrustClassification.ALWAYS_TRUSTED
+
+    def test_nonexistent_dir_always_trusted(self, tmp_path):
+        store = TrustedRepoStore(tmp_path / "t.json")
+        det = ProjectTrustDetector(store)
+        cls, _ = det.classify(tmp_path / "nonexistent")
+        assert cls == ProjectTrustClassification.ALWAYS_TRUSTED
+
+    def test_no_git_dir_always_trusted(self, tmp_path):
+        store = TrustedRepoStore(tmp_path / "t.json")
+        det = ProjectTrustDetector(store)
+        cls, _ = det.classify(tmp_path)
+        assert cls == ProjectTrustClassification.ALWAYS_TRUSTED
+
+    def test_no_remote_always_trusted(self, tmp_path):
+        (tmp_path / ".git").mkdir()
+        store = TrustedRepoStore(tmp_path / "t.json")
+        det = ProjectTrustDetector(store)
+        # git command returns non-zero (no remote)
+        with patch("subprocess.run") as mock_run:
+            mock_run.return_value = MagicMock(returncode=1, stdout="")
+            cls, _ = det.classify(tmp_path)
+        assert cls == ProjectTrustClassification.ALWAYS_TRUSTED
+
+    def test_localhost_remote_always_trusted(self, tmp_path):
+        (tmp_path / ".git").mkdir()
+        store = TrustedRepoStore(tmp_path / "t.json")
+        det = ProjectTrustDetector(store)
+        with patch("subprocess.run") as mock_run:
+            mock_run.return_value = MagicMock(
+                returncode=0, stdout="http://localhost/org/repo.git\n"
+            )
+            cls, _ = det.classify(tmp_path)
+        assert cls == ProjectTrustClassification.ALWAYS_TRUSTED
+
+    def test_trusted_by_store(self, tmp_path):
+        (tmp_path / ".git").mkdir()
+        store = TrustedRepoStore(tmp_path / "t.json")
+        store.trust("github.com/trusted/repo")
+        det = ProjectTrustDetector(store)
+        with patch("subprocess.run") as mock_run:
+            mock_run.return_value = MagicMock(
+                returncode=0, stdout="git@github.com:trusted/repo.git\n"
+            )
+            cls, key = det.classify(tmp_path)
+        assert cls == ProjectTrustClassification.TRUSTED_BY_USER
+        assert key == "github.com/trusted/repo"
+
+    def test_unknown_remote_untrusted(self, tmp_path):
+        (tmp_path / ".git").mkdir()
+        store = TrustedRepoStore(tmp_path / "t.json")
+        det = ProjectTrustDetector(store)
+        with patch("subprocess.run") as mock_run:
+            mock_run.return_value = MagicMock(
+                returncode=0, stdout="https://github.com/stranger/repo.git\n"
+            )
+            cls, key = det.classify(tmp_path)
+        assert cls == ProjectTrustClassification.UNTRUSTED
+        assert key == "github.com/stranger/repo"
+
+    def test_own_remotes_env_var(self, tmp_path, monkeypatch):
+        (tmp_path / ".git").mkdir()
+        store = TrustedRepoStore(tmp_path / "t.json")
+        monkeypatch.setenv("HIVE_OWN_REMOTES", "github.com/myorg/*")
+        det = ProjectTrustDetector(store)
+        with patch("subprocess.run") as mock_run:
+            mock_run.return_value = MagicMock(
+                returncode=0, stdout="git@github.com:myorg/myrepo.git\n"
+            )
+            cls, _ = det.classify(tmp_path)
+        assert cls == ProjectTrustClassification.ALWAYS_TRUSTED
+
+    def test_git_timeout_treated_as_trusted(self, tmp_path):
+        import subprocess
+
+        (tmp_path / ".git").mkdir()
+        store = TrustedRepoStore(tmp_path / "t.json")
+        det = ProjectTrustDetector(store)
+        with patch("subprocess.run", side_effect=subprocess.TimeoutExpired("git", 3)):
+            cls, _ = det.classify(tmp_path)
+        assert cls == ProjectTrustClassification.ALWAYS_TRUSTED
+
+    def test_git_not_found_treated_as_trusted(self, tmp_path):
+        (tmp_path / ".git").mkdir()
+        store = TrustedRepoStore(tmp_path / "t.json")
+        det = ProjectTrustDetector(store)
+        with patch("subprocess.run", side_effect=FileNotFoundError("git not found")):
+            cls, _ = det.classify(tmp_path)
+        assert cls == ProjectTrustClassification.ALWAYS_TRUSTED
+
+
+# ---------------------------------------------------------------------------
+# TrustGate
+# ---------------------------------------------------------------------------
+
+
+class TestTrustGate:
+    def test_framework_scope_always_passes(self, tmp_path):
+        skill = make_skill("fw-skill", "framework")
+        gate = TrustGate(store=TrustedRepoStore(tmp_path / "t.json"), interactive=False)
+        result = gate.filter_and_gate([skill], project_dir=None)
+        assert any(s.name == "fw-skill" for s in result)
+
+    def test_user_scope_always_passes(self, tmp_path):
+        skill = make_skill("user-skill", "user")
+        gate = TrustGate(store=TrustedRepoStore(tmp_path / "t.json"), interactive=False)
+        result = gate.filter_and_gate([skill], project_dir=None)
+        assert any(s.name == "user-skill" for s in result)
+
+    def test_no_project_skills_returns_early(self, tmp_path):
+        """When there are no project-scope skills, trust detection is skipped."""
+        fw = make_skill("fw", "framework")
+        gate = TrustGate(store=TrustedRepoStore(tmp_path / "t.json"), interactive=False)
+        result = gate.filter_and_gate([fw], project_dir=tmp_path)
+        assert result == [fw]
+
+    def test_trusted_project_skills_pass(self, tmp_path):
+        """Project skills from a trusted repo pass through."""
+        (tmp_path / ".git").mkdir()
+        store = TrustedRepoStore(tmp_path / "t.json")
+        store.trust("github.com/trusted/repo")
+        skill = make_skill("proj-skill", "project")
+        gate = TrustGate(store=store, interactive=False)
+        with patch("subprocess.run") as m:
+            m.return_value = MagicMock(returncode=0, stdout="git@github.com:trusted/repo.git\n")
+            result = gate.filter_and_gate([skill], project_dir=tmp_path)
+        assert any(s.name == "proj-skill" for s in result)
+
+    def test_untrusted_headless_skips_and_logs(self, tmp_path, caplog):
+        """In non-interactive mode, untrusted project skills are skipped."""
+        import logging
+
+        (tmp_path / ".git").mkdir()
+        store = TrustedRepoStore(tmp_path / "t.json")
+        skill = make_skill("evil-skill", "project")
+        gate = TrustGate(store=store, interactive=False)
+        with patch("subprocess.run") as m:
+            m.return_value = MagicMock(
+                returncode=0, stdout="https://github.com/stranger/evil.git\n"
+            )
+            with caplog.at_level(logging.WARNING):
+                result = gate.filter_and_gate([skill], project_dir=tmp_path)
+        assert not any(s.name == "evil-skill" for s in result)
+        assert "untrusted" in caplog.text.lower() or "skipping" in caplog.text.lower()
+
+    def test_interactive_consent_session_only(self, tmp_path):
+        """Option 1 (session only) includes skills without writing to store."""
+        (tmp_path / ".git").mkdir()
+        store = TrustedRepoStore(tmp_path / "t.json")
+        skill = make_skill("session-skill", "project")
+        outputs = []
+        gate = TrustGate(
+            store=store,
+            interactive=True,
+            print_fn=outputs.append,
+            input_fn=lambda _: "1",  # trust this session
+        )
+        with (
+            patch("sys.stdin.isatty", return_value=True),
+            patch("sys.stdout.isatty", return_value=True),
+            patch("subprocess.run") as m,
+        ):
+            m.return_value = MagicMock(
+                returncode=0, stdout="https://github.com/stranger/repo.git\n"
+            )
+            result = gate.filter_and_gate([skill], project_dir=tmp_path)
+        assert any(s.name == "session-skill" for s in result)
+        # Must NOT persist to trusted store
+        assert not store.is_trusted("github.com/stranger/repo")
+
+    def test_interactive_consent_permanent(self, tmp_path):
+        """Option 2 (permanent) includes skills and persists to trusted store."""
+        (tmp_path / ".git").mkdir()
+        store = TrustedRepoStore(tmp_path / "t.json")
+        skill = make_skill("perm-skill", "project")
+        gate = TrustGate(
+            store=store,
+            interactive=True,
+            print_fn=lambda _: None,
+            input_fn=lambda _: "2",  # trust permanently
+        )
+        with (
+            patch("sys.stdin.isatty", return_value=True),
+            patch("sys.stdout.isatty", return_value=True),
+            patch("subprocess.run") as m,
+        ):
+            m.return_value = MagicMock(
+                returncode=0, stdout="https://github.com/stranger/repo.git\n"
+            )
+            result = gate.filter_and_gate([skill], project_dir=tmp_path)
+        assert any(s.name == "perm-skill" for s in result)
+        assert store.is_trusted("github.com/stranger/repo")
+
+    def test_interactive_consent_deny(self, tmp_path):
+        """Option 3 (deny) excludes project skills."""
+        (tmp_path / ".git").mkdir()
+        store = TrustedRepoStore(tmp_path / "t.json")
+        skill = make_skill("bad-skill", "project")
+        gate = TrustGate(
+            store=store,
+            interactive=True,
+            print_fn=lambda _: None,
+            input_fn=lambda _: "3",  # deny
+        )
+        with (
+            patch("sys.stdin.isatty", return_value=True),
+            patch("sys.stdout.isatty", return_value=True),
+            patch("subprocess.run") as m,
+        ):
+            m.return_value = MagicMock(
+                returncode=0, stdout="https://github.com/stranger/repo.git\n"
+            )
+            result = gate.filter_and_gate([skill], project_dir=tmp_path)
+        assert not any(s.name == "bad-skill" for s in result)
+
+    def test_env_var_override_trusts_all(self, tmp_path, monkeypatch):
+        """HIVE_TRUST_PROJECT_SKILLS=1 bypasses gating entirely."""
+        monkeypatch.setenv("HIVE_TRUST_PROJECT_SKILLS", "1")
+        store = TrustedRepoStore(tmp_path / "t.json")
+        skill = make_skill("env-skill", "project")
+        gate = TrustGate(store=store, interactive=False)
+        result = gate.filter_and_gate([skill], project_dir=tmp_path)
+        assert any(s.name == "env-skill" for s in result)
+
+    def test_keyboard_interrupt_treated_as_deny(self, tmp_path):
+        """Ctrl-C during consent prompt should deny cleanly."""
+        (tmp_path / ".git").mkdir()
+        store = TrustedRepoStore(tmp_path / "t.json")
+        skill = make_skill("interrupted-skill", "project")
+        gate = TrustGate(
+            store=store,
+            interactive=True,
+            print_fn=lambda _: None,
+            input_fn=lambda _: (_ for _ in ()).throw(KeyboardInterrupt()),
+        )
+        with (
+            patch("sys.stdin.isatty", return_value=True),
+            patch("sys.stdout.isatty", return_value=True),
+            patch("subprocess.run") as m,
+        ):
+            m.return_value = MagicMock(
+                returncode=0, stdout="https://github.com/stranger/repo.git\n"
+            )
+            result = gate.filter_and_gate([skill], project_dir=tmp_path)
+        assert not any(s.name == "interrupted-skill" for s in result)
+
+    def test_security_notice_shown_once(self, tmp_path, monkeypatch):
+        """Security notice (NFR-5) should be shown the first time only."""
+        # Use a temp sentinel path
+        sentinel = tmp_path / ".skill_trust_notice_shown"
+        monkeypatch.setattr("framework.skills.trust._NOTICE_SENTINEL_PATH", sentinel)
+        assert not sentinel.exists()
+
+        (tmp_path / ".git").mkdir()
+        store = TrustedRepoStore(tmp_path / "t.json")
+        skill = make_skill("notice-skill", "project")
+        output_lines: list[str] = []
+        gate = TrustGate(
+            store=store,
+            interactive=True,
+            print_fn=output_lines.append,
+            input_fn=lambda _: "3",
+        )
+        with (
+            patch("sys.stdin.isatty", return_value=True),
+            patch("sys.stdout.isatty", return_value=True),
+            patch("subprocess.run") as m,
+        ):
+            m.return_value = MagicMock(
+                returncode=0, stdout="https://github.com/stranger/repo.git\n"
+            )
+            gate.filter_and_gate([skill], project_dir=tmp_path)
+
+        assert sentinel.exists()
+        assert any("Security notice" in line for line in output_lines)
+
+        # Second run should NOT show the notice again
+        output_lines.clear()
+        skill2 = make_skill("notice-skill-2", "project")
+        with (
+            patch("sys.stdin.isatty", return_value=True),
+            patch("sys.stdout.isatty", return_value=True),
+            patch("subprocess.run") as m,
+        ):
+            m.return_value = MagicMock(
+                returncode=0, stdout="https://github.com/stranger/repo.git\n"
+            )
+            gate.filter_and_gate([skill2], project_dir=tmp_path)
+
+        assert not any("Security notice" in line for line in output_lines)
+
+    def test_mixed_scopes_only_project_gated(self, tmp_path, monkeypatch):
+        """Framework and user skills should pass through even if project skills are denied."""
+        (tmp_path / ".git").mkdir()
+        store = TrustedRepoStore(tmp_path / "t.json")
+        fw_skill = make_skill("fw", "framework")
+        user_skill = make_skill("usr", "user")
+        proj_skill = make_skill("proj", "project")
+        gate = TrustGate(
+            store=store,
+            interactive=True,
+            print_fn=lambda _: None,
+            input_fn=lambda _: "3",  # deny project skills
+        )
+        with (
+            patch("sys.stdin.isatty", return_value=True),
+            patch("sys.stdout.isatty", return_value=True),
+            patch("subprocess.run") as m,
+        ):
+            m.return_value = MagicMock(
+                returncode=0, stdout="https://github.com/stranger/repo.git\n"
+            )
+            result = gate.filter_and_gate([fw_skill, user_skill, proj_skill], project_dir=tmp_path)
+        names = {s.name for s in result}
+        assert "fw" in names
+        assert "usr" in names
+        assert "proj" not in names
@@ -152,7 +152,8 @@ def test_register_mcp_server_uses_connection_manager_when_enabled(monkeypatch):
    assert client.disconnect_calls == 0


-def test_register_mcp_server_defaults_to_direct_client_behavior(monkeypatch):
+def test_register_mcp_server_defaults_to_connection_manager(monkeypatch):
+    """Default behavior uses the connection manager (reuse enabled by default)."""
    registry = ToolRegistry()
    created_clients: list[_RegistryFakeClient] = []

@@ -161,13 +162,16 @@ def test_register_mcp_server_defaults_to_direct_client_behavior(monkeypatch):
        created_clients.append(client)
        return client

-    def fail_if_manager_used():
-        raise AssertionError("connection manager should not be used by default")
+    class FakeManager:
+        def acquire(self, config):
+            return fake_client_factory(config)
+
+        def release(self, server_name):
+            pass

-    monkeypatch.setattr("framework.runner.mcp_client.MCPClient", fake_client_factory)
    monkeypatch.setattr(
        "framework.runner.mcp_connection_manager.MCPConnectionManager.get_instance",
-        fail_if_manager_used,
+        lambda: FakeManager(),
    )

    count = registry.register_mcp_server(
@@ -176,6 +180,27 @@ def test_register_mcp_server_defaults_to_direct_client_behavior(monkeypatch):

    assert count == 1
    assert len(created_clients) == 1
+
+
+def test_register_mcp_server_direct_client_when_manager_disabled(monkeypatch):
+    """When use_connection_manager=False, a direct MCPClient is created."""
+    registry = ToolRegistry()
+    created_clients: list[_RegistryFakeClient] = []
+
+    def fake_client_factory(config):
+        client = _RegistryFakeClient(config)
+        created_clients.append(client)
+        return client
+
+    monkeypatch.setattr("framework.runner.mcp_client.MCPClient", fake_client_factory)
+
+    count = registry.register_mcp_server(
+        {"name": "direct", "transport": "stdio", "command": "echo"},
+        use_connection_manager=False,
+    )
+
+    assert count == 1
+    assert len(created_clients) == 1
    assert created_clients[0].connect_calls == 1

    registry.cleanup()
@@ -0,0 +1,290 @@
+# Agent Skills User Guide
+
+This guide covers how to use, create, and manage Agent Skills in the Hive framework. Agent Skills follow the open [Agent Skills standard](https://agentskills.io) — skills written for Claude Code, Cursor, or other compatible agents work in Hive unchanged.
+
+## What are skills?
+
+Skills are folders containing a `SKILL.md` file that teaches an agent how to perform a specific task. They can also bundle scripts, templates, and reference materials. Skills are loaded on demand — the agent sees a lightweight catalog at startup and pulls in full instructions only when relevant.
+
+## Quick start
+
+### Install a skill
+
+Drop a skill folder into one of the discovery directories:
+
+```bash
+# Project-level (shared with the repo)
+mkdir -p .hive/skills/my-skill
+cat > .hive/skills/my-skill/SKILL.md << 'EOF'
+---
+name: my-skill
+description: Does X when the user asks about Y.
+---
+
+# My Skill
+
+Step-by-step instructions for the agent...
+EOF
+```
+
+The agent will discover it automatically on the next session.
+
+### List discovered skills
+
+```bash
+hive skill list
+```
+
+Output groups skills by scope:
+
+```
+PROJECT SKILLS
+────────────────────────────────────
+  • my-skill
+    Does X when the user asks about Y.
+    /home/user/project/.hive/skills/my-skill/SKILL.md
+
+USER SKILLS
+────────────────────────────────────
+  • deep-research
+    Multi-step web research with source verification.
+    /home/user/.hive/skills/deep-research/SKILL.md
+```
+
+## Where to put skills
+
+Hive scans five directories at startup, in this precedence order:
+
+| Scope | Path | Use case |
+|-------|------|----------|
+| Project (Hive) | `<project>/.hive/skills/` | Skills specific to this repo |
+| Project (cross-client) | `<project>/.agents/skills/` | Skills shared across Claude Code, Cursor, etc. |
+| User (Hive) | `~/.hive/skills/` | Personal skills available in all projects |
+| User (cross-client) | `~/.agents/skills/` | Personal cross-client skills |
+| Framework | *(built-in)* | Default operational skills shipped with Hive |
+
+**Precedence**: If two skills share the same name, the higher-precedence location wins. A project-level `code-review` skill overrides a user-level one with the same name.
+
+**Cross-client paths**: The `.agents/skills/` directories are a convention shared across compatible agents. A skill installed at `~/.agents/skills/pdf-processing/` is visible to Hive, Claude Code, Cursor, and other compatible tools simultaneously.
+
+## Creating a skill
+
+### Directory structure
+
+```
+my-skill/
+├── SKILL.md              # Required — metadata + instructions
+├── scripts/              # Optional — executable code
+│   └── run.py
+├── references/           # Optional — supplementary docs
+│   └── api-reference.md
+└── assets/               # Optional — templates, data files
+    └── template.json
+```
+
+### SKILL.md format
+
+Every skill needs a `SKILL.md` with YAML frontmatter and a markdown body:
+
+```markdown
+---
+name: my-skill
+description: Extract and summarize PDF documents. Use when the user mentions PDFs or document extraction.
+---
+
+# PDF Processing
+
+## When to use
+Use this skill when the user needs to extract text from PDFs or merge documents.
+
+## Steps
+1. Check if pdfplumber is available...
+2. Extract text using...
+
+## Edge cases
+- Scanned PDFs need OCR first...
+```
+
+### Frontmatter fields
+
+| Field | Required | Description |
+|-------|----------|-------------|
+| `name` | Yes | Lowercase letters, numbers, hyphens. Must match the parent directory name. Max 64 chars. |
+| `description` | Yes | What the skill does and when to use it. Max 1024 chars. Include keywords that help the agent match tasks. |
+| `license` | No | License name or reference to a bundled LICENSE file. |
+| `compatibility` | No | Environment requirements (e.g., "Requires git, docker"). |
+| `metadata` | No | Arbitrary key-value pairs (author, version, etc.). |
+| `allowed-tools` | No | Space-delimited list of pre-approved tools. |
+
+### Writing good descriptions
+
+The description is critical — it's what the agent uses to decide whether to activate a skill. Be specific:
+
+```yaml
+# Good — tells the agent what and when
+description: Extract text and tables from PDF files, fill PDF forms, and merge multiple PDFs. Use when working with PDF documents or when the user mentions PDFs, forms, or document extraction.
+
+# Bad — too vague for the agent to match
+description: Helps with PDFs.
+```
+
+### Writing good instructions
+
+The markdown body is loaded into the agent's context when the skill is activated. Tips:
+
+- **Be procedural**: Step-by-step instructions work better than abstract descriptions.
+- **Keep it focused**: Stay under 500 lines / 5000 tokens. Move detailed reference material to `references/`.
+- **Use relative paths**: Reference bundled files with relative paths (`scripts/run.py`, `references/guide.md`).
+- **Include examples**: Show sample inputs and expected outputs.
+- **Cover edge cases**: Tell the agent what to do when things go wrong.
+
+## How skills are activated
+
+Skills use **progressive disclosure** — three tiers that keep context usage efficient:
+
+### Tier 1: Catalog (always loaded)
+
+At session start, the agent sees a compact catalog of all available skills (name + description only, ~50-100 tokens each). This is how it knows what skills exist.
+
+### Tier 2: Instructions (on demand)
+
+When the agent determines a skill is relevant to the current task, it reads the full `SKILL.md` body into context. This happens automatically — the agent matches the task against skill descriptions and activates the best fit.
+
+### Tier 3: Resources (on demand)
+
+When skill instructions reference supporting files (`scripts/extract.py`, `references/api-docs.md`), the agent reads those individually as needed.
+
+### Pre-activated skills
+
+Some agents are configured to load specific skills at session start (skipping the catalog phase). This is set in the agent's configuration:
+
+```python
+# In agent definition
+skills = ["code-review", "deep-research"]
+```
+
+Pre-activated skills have their full instructions loaded from the start, without waiting for the agent to decide they're relevant.
+
+## Trust and security
+
+### Why trust gating exists
+
+Project-level skills come from the repository being worked on. If you clone an untrusted repo that contains a `.hive/skills/` directory, those skills could inject instructions into the agent's system prompt. Trust gating prevents this.
+
+**User-level and framework skills are always trusted.** Only project-scope skills go through trust gating.
+
+### What happens with untrusted project skills
+
+When Hive encounters project-level skills from a repo you haven't trusted before, it shows a consent prompt:
+
+```
+============================================================
+  SKILL TRUST REQUIRED
+============================================================
+
+  The project at /home/user/new-project wants to load 2 skill(s)
+  that will inject instructions into the agent's system prompt.
+  Source: github.com/org/new-project
+
+  Skills requesting access:
+    • deploy-pipeline
+      "Automated deployment workflow for this project."
+      /home/user/new-project/.hive/skills/deploy-pipeline/SKILL.md
+    • code-standards
+      "Project-specific coding standards and review checklist."
+      /home/user/new-project/.hive/skills/code-standards/SKILL.md
+
+  Options:
+    1) Trust this session only
+    2) Trust permanently  — remember for future runs
+    3) Deny              — skip all project-scope skills from this repo
+────────────────────────────────────────────────────────────
+Select option (1-3):
+```
+
+### Trust a repo via CLI
+
+To trust a repo permanently without the interactive prompt:
+
+```bash
+hive skill trust /path/to/project
+```
+
+This stores the trust decision in `~/.hive/trusted_repos.json`, keyed by the normalized git remote URL (e.g., `github.com/org/repo`).
+
+### Automatic trust
+
+Some repos are trusted automatically:
+
+- **No git repo**: Directories without `.git/` are always trusted.
+- **No remote**: Local-only git repos (no `origin` remote) are always trusted.
+- **Localhost remotes**: Repos with `localhost`/`127.0.0.1` remotes are always trusted.
+- **Own-remote patterns**: Repos matching patterns in `~/.hive/own_remotes` or the `HIVE_OWN_REMOTES` env var are always trusted.
+
+### Configure own-remote patterns
+
+If you trust all repos from your organization:
+
+```bash
+# Via file (one pattern per line)
+echo "github.com/my-org/*" >> ~/.hive/own_remotes
+echo "gitlab.com/my-team/*" >> ~/.hive/own_remotes
+
+# Via environment variable (comma-separated)
+export HIVE_OWN_REMOTES="github.com/my-org/*,github.com/my-corp/*"
+```
+
+### CI / headless environments
+
+In non-interactive environments, untrusted project skills are silently skipped. To trust them explicitly:
+
+```bash
+export HIVE_TRUST_PROJECT_SKILLS=1
+hive run my-agent
+```
+
+## Default skills
+
+Hive ships with six built-in operational skills that provide runtime resilience. These are always loaded (unless disabled) and appear as "Operational Protocols" in the agent's system prompt.
+
+| Skill | Purpose |
+|-------|---------|
+| `hive.note-taking` | Structured working notes in shared memory |
+| `hive.batch-ledger` | Track per-item status in batch operations |
+| `hive.context-preservation` | Save context before context window pruning |
+| `hive.quality-monitor` | Self-assess output quality periodically |
+| `hive.error-recovery` | Structured error classification and recovery |
+| `hive.task-decomposition` | Break complex tasks into subtasks |
+
+### Disable default skills
+
+In your agent configuration:
+
+```python
+# Disable a specific default skill
+default_skills = {
+    "hive.quality-monitor": {"enabled": False},
+}
+
+# Disable all default skills
+default_skills = {
+    "_all": {"enabled": False},
+}
+```
+
+## Environment variables
+
+| Variable | Description |
+|----------|-------------|
+| `HIVE_TRUST_PROJECT_SKILLS=1` | Bypass trust gating for all project-level skills (CI override) |
+| `HIVE_OWN_REMOTES` | Comma-separated glob patterns for auto-trusted remotes (e.g., `github.com/myorg/*`) |
+
+## Compatibility with other agents
+
+Skills written for any Agent Skills-compatible agent work in Hive:
+
+- Place them in `.agents/skills/` (cross-client) or `.hive/skills/` (Hive-specific).
+- The `SKILL.md` format is identical across Claude Code, Cursor, Gemini CLI, and others.
+- Skills installed at `~/.agents/skills/` are visible to all compatible agents on your machine.
+
+See the [Agent Skills specification](https://agentskills.io/specification) for the full format reference.
@@ -1908,69 +1908,20 @@ if ($CodexAvailable) {
    Write-Host ""
 }

-# Setup-only mode: show manual instructions
+# Final instructions and auto-launch
+Write-Host "API keys saved as User environment variables. New terminals pick them up automatically." -ForegroundColor DarkGray
+Write-Host "Launch anytime with " -NoNewline -ForegroundColor DarkGray
+Write-Color -Text "hive open" -Color Cyan -NoNewline
+Write-Host ". Run .\quickstart.ps1 again to reconfigure." -ForegroundColor DarkGray
+Write-Host ""
+
 if ($FrontendBuilt) {
-    Write-Color -Text "â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•" -Color Yellow
-    Write-Host ""
-    Write-Color -Text "  IMPORTANT: Restart your terminal now!" -Color Yellow
-    Write-Host ""
-    Write-Color -Text "â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•" -Color Yellow
-    Write-Host ""
-    Write-Host 'Environment variables (uv, API keys) are now configured, but you need to'
-    Write-Host 'restart your terminal for them to take effect in new sessions.'
-    Write-Host ""
-
-    Write-Color -Text "Run an Agent:" -Color White
-    Write-Host ""
-    Write-Host "  Quickstart only sets things up. Launch the dashboard when you're ready:"
-    Write-Color -Text "     hive open" -Color Cyan
-    Write-Host ""
-
-    if ($SelectedProviderId -or $credKey) {
-        Write-Color -Text "Note:" -Color White
-        Write-Host "- uv has been added to your User PATH"
-        if ($SelectedProviderId -and $SelectedEnvVar) {
-            Write-Host "- $SelectedEnvVar is set for LLM access"
-        }
-        if ($credKey) {
-            Write-Host "- HIVE_CREDENTIAL_KEY is set for credential encryption"
-        }
-        Write-Host "- All variables will persist across reboots"
-        Write-Host ""
-    }
-
-    Write-Color -Text 'Run .\quickstart.ps1 again to reconfigure.' -Color DarkGray
+    Write-Color -Text "Launching dashboard..." -Color White
    Write-Host ""
+    & hive open
 } else {
-    Write-Color -Text "═══════════════════════════════════════════════════════" -Color Yellow
-    Write-Host ""
-    Write-Color -Text "  IMPORTANT: Restart your terminal now!" -Color Yellow
-    Write-Host ""
-    Write-Color -Text "═══════════════════════════════════════════════════════" -Color Yellow
-    Write-Host ""
-    Write-Host 'Environment variables (uv, API keys) are now configured, but you need to'
-    Write-Host 'restart your terminal for them to take effect in new sessions.'
-    Write-Host ""
-
-    Write-Color -Text "Run an Agent:" -Color White
-    Write-Host ""
-    Write-Host "  Frontend build was skipped or failed. Once the dashboard is available, launch it with:"
+    Write-Color -Text "Frontend build was skipped or failed." -Color Yellow -NoNewline
+    Write-Host " Launch manually when ready:"
    Write-Color -Text "     hive open" -Color Cyan
    Write-Host ""
-
-    if ($SelectedProviderId -or $credKey) {
-        Write-Color -Text "Note:" -Color White
-        Write-Host "- uv has been added to your User PATH"
-        if ($SelectedProviderId -and $SelectedEnvVar) {
-            Write-Host "- $SelectedEnvVar is set for LLM access"
-        }
-        if ($credKey) {
-            Write-Host "- HIVE_CREDENTIAL_KEY is set for credential encryption"
-        }
-        Write-Host "- All variables will persist across reboots"
-        Write-Host ""
-    }
-
-    Write-Color -Text 'Run .\quickstart.ps1 again to reconfigure.' -Color DarkGray
-    Write-Host ""
 }
@@ -1810,29 +1810,16 @@ if [ "$CODEX_AVAILABLE" = true ]; then
    echo ""
 fi

-echo -e "${YELLOW}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
-echo -e "${BOLD}IMPORTANT: Load your new configuration${NC}"
-echo -e "${YELLOW}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
-echo ""
-echo -e "  Your API keys have been saved to ${CYAN}$SHELL_RC_FILE${NC}"
-echo -e "  To use them, either:"
-echo ""
-echo -e "  ${GREEN}Option 1:${NC} Source your shell config now:"
-echo -e "     ${CYAN}source $SHELL_RC_FILE${NC}"
-echo ""
-echo -e "  ${GREEN}Option 2:${NC} Open a new terminal window"
-echo ""
-echo -e "${YELLOW}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
+echo -e "${DIM}API keys saved to ${CYAN}$SHELL_RC_FILE${NC}${DIM}. New terminals pick them up automatically.${NC}"
+echo -e "${DIM}Launch anytime with ${CYAN}hive open${NC}${DIM}. Run ./quickstart.sh again to reconfigure.${NC}"
 echo ""

-echo -e "${BOLD}Run an Agent:${NC}"
-echo ""
 if [ "$FRONTEND_BUILT" = true ]; then
-    echo -e "  Quickstart only sets things up. Launch the dashboard when you're ready:"
+    echo -e "${BOLD}Launching dashboard...${NC}"
+    echo ""
+    hive open
 else
-    echo -e "  Frontend build was skipped or failed. Once the dashboard is available, launch it with:"
+    echo -e "${YELLOW}Frontend build was skipped or failed.${NC} Launch manually when ready:"
+    echo -e "     ${CYAN}hive open${NC}"
+    echo ""
 fi
-echo -e "     ${CYAN}hive open${NC}"
-echo ""
-echo -e "${DIM}Run ./quickstart.sh again to reconfigure.${NC}"
-echo ""
@@ -1,17 +1,21 @@
 #!/usr/bin/env python3
 """Open a browser-based viewer for Hive LLM debug JSONL sessions.

+Starts a local HTTP server and loads session data on demand (one at a time).
+
 Usage:
    uv run --no-project scripts/llm_debug_log_visualizer.py
-    uv run --no-project scripts/llm_debug_log_visualizer.py --no-open
    uv run --no-project scripts/llm_debug_log_visualizer.py --session <execution_id>
+    uv run --no-project scripts/llm_debug_log_visualizer.py --port 8080
+    uv run --no-project scripts/llm_debug_log_visualizer.py --output debug.html
 """

 from __future__ import annotations

 import argparse
+import http.server
 import json
-import tempfile
+import urllib.parse
 import webbrowser
 from collections import defaultdict
 from dataclasses import dataclass
@@ -55,10 +59,21 @@ def _parse_args() -> argparse.Namespace:
        default=200,
        help="Maximum number of newest log files to scan.",
    )
+    parser.add_argument(
+        "--port",
+        type=int,
+        default=0,
+        help="Port for the local server (0 = auto-pick a free port).",
+    )
    parser.add_argument(
        "--no-open",
        action="store_true",
-        help="Generate the HTML but do not open a browser.",
+        help="Start the server but do not open a browser.",
+    )
+    parser.add_argument(
+        "--include-tests",
+        action="store_true",
+        help="Show test/mock sessions (hidden by default).",
    )
    return parser.parse_args()

@@ -117,8 +132,29 @@ def _format_timestamp(raw: str) -> str:
        return raw


+def _is_test_session(execution_id: str, records: list[dict[str, Any]]) -> bool:
+    """Return True for sessions that look like test artifacts."""
+    if execution_id.startswith("<MagicMock"):
+        return True
+    models = {
+        str(r.get("token_counts", {}).get("model", ""))
+        for r in records
+        if isinstance(r.get("token_counts"), dict)
+    }
+    models.discard("")
+    # Sessions that only used the mock LLM provider.
+    if models and models <= {"mock"}:
+        return True
+    # Sessions with no real model at all (empty string or missing).
+    if not models:
+        return True
+    return False
+
+
 def _group_sessions(
    records: list[dict[str, Any]],
+    *,
+    include_tests: bool = False,
 ) -> tuple[list[SessionSummary], dict[str, list[dict[str, Any]]]]:
    by_session: dict[str, list[dict[str, Any]]] = defaultdict(list)
    for record in records:
@@ -126,6 +162,13 @@ def _group_sessions(
        if execution_id:
            by_session[execution_id].append(record)

+    if not include_tests:
+        by_session = {
+            eid: recs
+            for eid, recs in by_session.items()
+            if not _is_test_session(eid, recs)
+        }
+
    summaries: list[SessionSummary] = []
    for execution_id, session_records in by_session.items():
        session_records.sort(
@@ -174,7 +217,6 @@ def _group_sessions(

 def _render_html(
    summaries: list[SessionSummary],
-    sessions: dict[str, list[dict[str, Any]]],
    initial_session_id: str,
 ) -> str:
    summaries_data = [
@@ -193,16 +235,6 @@ def _render_html(
        for summary in summaries
    ]

-    sessions_data = {
-        execution_id: sorted(
-            records,
-            key=lambda record: (
-                str(record.get("timestamp", "")),
-                record.get("iteration", 0),
-            ),
-        )
-        for execution_id, records in sessions.items()
-    }
    initial = initial_session_id or (summaries[0].execution_id if summaries else "")
    return f"""<!DOCTYPE html>
 <html lang="en">
@@ -579,10 +611,9 @@ def _render_html(
  </div>

  <script id="session-summaries" type="application/json">{json.dumps(summaries_data, ensure_ascii=False)}</script>
-  <script id="session-records" type="application/json">{json.dumps(sessions_data, ensure_ascii=False)}</script>
  <script>
    const summaries = JSON.parse(document.getElementById("session-summaries").textContent);
-    const recordsBySession = JSON.parse(document.getElementById("session-records").textContent);
+    const recordCache = {{}};
    const initialSessionId = {json.dumps(initial, ensure_ascii=False)};

    const sessionSearch = document.getElementById("sessionSearch");
@@ -746,10 +777,18 @@ def _render_html(
      `;
    }}

-    function renderSession(sessionId) {{
+    async function fetchSession(sessionId) {{
+      if (recordCache[sessionId]) return recordCache[sessionId];
+      const resp = await fetch(`/api/session/${{encodeURIComponent(sessionId)}}`);
+      if (!resp.ok) return [];
+      const data = await resp.json();
+      recordCache[sessionId] = data;
+      return data;
+    }}
+
+    async function renderSession(sessionId) {{
      activeSessionId = sessionId;
      const summary = summaries.find((entry) => entry.execution_id === sessionId);
-      const records = recordsBySession[sessionId] || [];

      renderSessionChooser();

@@ -773,6 +812,9 @@ def _render_html(
        renderMetaCard("Source file", summary.log_file),
      ].join("");

+      turnsEl.innerHTML = '<div class="empty">Loading session\u2026</div>';
+      const records = await fetchSession(sessionId);
+      if (activeSessionId !== sessionId) return;
      turnsEl.innerHTML = records.length
        ? records.map((record) => renderTurn(record)).join("")
        : '<div class="empty">This session has no turn records.</div>';
@@ -804,7 +846,8 @@ def _render_html(
    }});

    const hashSession = decodeURIComponent(window.location.hash.replace(/^#/, ""));
-    const bootSession = recordsBySession[hashSession] ? hashSession : activeSessionId;
+    const knownIds = new Set(summaries.map((s) => s.execution_id));
+    const bootSession = knownIds.has(hashSession) ? hashSession : activeSessionId;
    renderSessionChooser();
    renderSession(bootSession);
  </script>
@@ -813,28 +856,70 @@ def _render_html(
 """


-def _write_report(html_report: str, output: Path | None) -> Path:
-    if output is not None:
-        output.parent.mkdir(parents=True, exist_ok=True)
-        output.write_text(html_report, encoding="utf-8")
-        return output
+def _sort_records(records: list[dict[str, Any]]) -> list[dict[str, Any]]:
+    return sorted(
+        records,
+        key=lambda r: (str(r.get("timestamp", "")), r.get("iteration", 0)),
+    )

-    with tempfile.NamedTemporaryFile(
-        mode="w",
-        encoding="utf-8",
-        prefix="hive_llm_debug_",
-        suffix=".html",
-        delete=False,
-        dir="/tmp",
-    ) as handle:
-        handle.write(html_report)
-        return Path(handle.name)
+
+def _run_server(
+    html: str,
+    sessions: dict[str, list[dict[str, Any]]],
+    port: int,
+    no_open: bool,
+) -> None:
+    html_bytes = html.encode("utf-8")
+
+    class Handler(http.server.BaseHTTPRequestHandler):
+        def do_GET(self) -> None:
+            if self.path == "/":
+                self._respond(200, "text/html; charset=utf-8", html_bytes)
+            elif self.path.startswith("/api/session/"):
+                sid = urllib.parse.unquote(self.path[len("/api/session/"):])
+                records = sessions.get(sid)
+                if records is None:
+                    self._respond(404, "application/json", b"[]")
+                else:
+                    body = json.dumps(
+                        _sort_records(records), ensure_ascii=False
+                    ).encode("utf-8")
+                    self._respond(200, "application/json", body)
+            else:
+                self.send_error(404)
+
+        def _respond(self, code: int, content_type: str, body: bytes) -> None:
+            self.send_response(code)
+            self.send_header("Content-Type", content_type)
+            self.send_header("Content-Length", str(len(body)))
+            self.end_headers()
+            self.wfile.write(body)
+
+        def log_message(self, format: str, *args: object) -> None:
+            pass  # silence per-request logs
+
+    server = http.server.HTTPServer(("127.0.0.1", port), Handler)
+    actual_port = server.server_address[1]
+    url = f"http://127.0.0.1:{actual_port}"
+    print(f"Serving at {url}  (Ctrl+C to stop)")
+
+    if not no_open:
+        webbrowser.open(url)
+
+    try:
+        server.serve_forever()
+    except KeyboardInterrupt:
+        print("\nStopped.")
+    finally:
+        server.server_close()


 def main() -> int:
    args = _parse_args()
    records = _discover_records(args.logs_dir.expanduser(), args.limit_files)
-    summaries, sessions = _group_sessions(records)
+    summaries, sessions = _group_sessions(
+        records, include_tests=args.include_tests
+    )

    initial_session_id = args.session or (
        summaries[0].execution_id if summaries else ""
@@ -843,13 +928,15 @@ def main() -> int:
        print(f"session not found: {initial_session_id}")
        return 1

-    html_report = _render_html(summaries, sessions, initial_session_id)
-    output_path = _write_report(html_report, args.output)
-    print(output_path)
+    html_report = _render_html(summaries, initial_session_id)

-    if not args.no_open:
-        webbrowser.open(output_path.resolve().as_uri())
+    if args.output:
+        args.output.parent.mkdir(parents=True, exist_ok=True)
+        args.output.write_text(html_report, encoding="utf-8")
+        print(args.output)
+        return 0

+    _run_server(html_report, sessions, args.port, args.no_open)
    return 0