Merge branch 'feature/hive-experimental-comp-pipeline' into feat/open-hive-colony

2026-04-08 11:50:39 -07:00
parent ecbf543e4c 7daca39bb2
commit 7e1ebf1c26
167 changed files with 4504 additions and 7221 deletions
@@ -70,6 +70,8 @@ tmp/
 temp/

 exports/*
+exports.old*
+artifacts/*

 .claude/settings.local.json

@@ -1,71 +1,23 @@
-"""
-Aden Hive Framework: A goal-driven agent runtime optimized for Builder observability.
+"""Hive Agent Framework.

-The runtime is designed around DECISIONS, not just actions. Every significant
-choice the agent makes is captured with:
- What it was trying to do (intent)
- What options it considered
- What it chose and why
- What happened as a result
- Whether that was good or bad (evaluated post-hoc)
-
-This gives the Builder LLM the information it needs to improve agent behavior.
-
-## Testing Framework
-
-The framework includes a Goal-Based Testing system (Goal → Agent → Eval):
- Generate tests from Goal success_criteria and constraints
- Mandatory user approval before tests are stored
- Parallel test execution with error categorization
- Debug tools with fix suggestions
-
-See `framework.testing` for details.
+Core classes:
+    AgentHost      -- hosts agents, manages entry points and pipeline
+    Orchestrator   -- routes between nodes in a graph
+    AgentLoop      -- the LLM + tool execution loop (one per node)
+    AgentLoader    -- loads agent.json from disk, builds pipeline
+    DecisionTracker -- records decisions for post-hoc analysis
 """

-from framework.llm import LLMProvider
-
-try:
-    from framework.llm import AnthropicProvider  # noqa: F401
-except ImportError:
-    pass
-from framework.runner import AgentRunner
-from framework.runtime.core import Runtime
-from framework.schemas.decision import Decision, DecisionEvaluation, Option, Outcome
-from framework.schemas.run import Problem, Run, RunSummary
-
-# Testing framework
-from framework.testing import (
-    ApprovalStatus,
-    DebugTool,
-    ErrorCategory,
-    Test,
-    TestResult,
-    TestStorage,
-    TestSuiteResult,
-)
+from framework.agent_loop import AgentLoop
+from framework.host import AgentHost
+from framework.loader import AgentLoader
+from framework.orchestrator import Orchestrator
+from framework.tracker import DecisionTracker

 __all__ = [
-    # Schemas
-    "Decision",
-    "Option",
-    "Outcome",
-    "DecisionEvaluation",
-    "Run",
-    "RunSummary",
-    "Problem",
-    # Runtime
-    "Runtime",
-    # LLM
-    "LLMProvider",
-    "AnthropicProvider",
-    # Runner
-    "AgentRunner",
-    # Testing
-    "Test",
-    "TestResult",
-    "TestSuiteResult",
-    "TestStorage",
-    "ApprovalStatus",
-    "ErrorCategory",
-    "DebugTool",
+    "AgentHost",
+    "AgentLoader",
+    "AgentLoop",
+    "DecisionTracker",
+    "Orchestrator",
 ]
@@ -0,0 +1,32 @@
+"""Agent loop -- the core agent execution primitive."""
+
+from framework.agent_loop.conversation import (  # noqa: F401
+    ConversationStore,
+    Message,
+    NodeConversation,
+)
+
+# Lazy import to avoid circular dependency with graph/event_loop/
+# (graph/event_loop/* imports framework.graph.conversation which is a shim
+# pointing here, which would trigger agent_loop.py loading, which imports
+# graph/event_loop/* again)
+
+
+def __getattr__(name: str):
+    if name in ("AgentLoop", "JudgeProtocol", "JudgeVerdict", "LoopConfig", "OutputAccumulator"):
+        from framework.agent_loop.agent_loop import (
+            AgentLoop,
+            JudgeProtocol,
+            JudgeVerdict,
+            LoopConfig,
+            OutputAccumulator,
+        )
+        _exports = {
+            "AgentLoop": AgentLoop,
+            "JudgeProtocol": JudgeProtocol,
+            "JudgeVerdict": JudgeVerdict,
+            "LoopConfig": LoopConfig,
+            "OutputAccumulator": OutputAccumulator,
+        }
+        return _exports[name]
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
@@ -21,16 +21,16 @@ from collections.abc import Awaitable, Callable
 from datetime import UTC, datetime
 from typing import Any

-from framework.graph.conversation import ConversationStore, NodeConversation
-from framework.graph.event_loop import types as event_loop_types
-from framework.graph.event_loop.compaction import (
+from framework.agent_loop.conversation import ConversationStore, NodeConversation
+from framework.agent_loop.internals import types as event_loop_types
+from framework.agent_loop.internals.compaction import (
    build_emergency_summary,
    build_llm_compaction_prompt,
    compact,
    format_messages_for_summary,
    llm_compact,
 )
-from framework.graph.event_loop.cursor_persistence import (
+from framework.agent_loop.internals.cursor_persistence import (
    RestoredState,
    check_pause,
    drain_injection_queue,
@@ -38,7 +38,7 @@ from framework.graph.event_loop.cursor_persistence import (
    restore,
    write_cursor,
 )
-from framework.graph.event_loop.event_publishing import (
+from framework.agent_loop.internals.event_publishing import (
    generate_action_plan,
    log_skip_judge,
    publish_context_usage,
@@ -54,27 +54,24 @@ from framework.graph.event_loop.event_publishing import (
    publish_tool_started,
    run_hooks,
 )
-from framework.graph.event_loop.judge_pipeline import (
+from framework.agent_loop.internals.judge_pipeline import (
    SubagentJudge as SharedSubagentJudge,
    judge_turn,
 )
-from framework.graph.event_loop.stall_detector import (
+from framework.agent_loop.internals.stall_detector import (
    fingerprint_tool_calls,
    is_stalled,
    is_tool_doom_loop,
    ngram_similarity,
 )
-from framework.graph.event_loop.subagent_executor import execute_subagent
-from framework.graph.event_loop.synthetic_tools import (
+from framework.agent_loop.internals.synthetic_tools import (
    build_ask_user_multiple_tool,
    build_ask_user_tool,
-    build_delegate_tool,
    build_escalate_tool,
-    build_report_to_parent_tool,
    build_set_output_tool,
    handle_set_output,
 )
-from framework.graph.event_loop.tool_result_handler import (
+from framework.agent_loop.internals.tool_result_handler import (
    build_json_preview,
    execute_tool,
    extract_json_metadata,
@@ -82,12 +79,12 @@ from framework.graph.event_loop.tool_result_handler import (
    restore_spill_counter,
    truncate_tool_result,
 )
-from framework.graph.event_loop.types import (
+from framework.agent_loop.internals.types import (
    JudgeProtocol,
    JudgeVerdict,
    TriggerEvent,
 )
-from framework.graph.node import NodeContext, NodeProtocol, NodeResult
+from framework.orchestrator.node import NodeContext, NodeProtocol, NodeResult
 from framework.llm.capabilities import supports_image_tool_results
 from framework.llm.provider import Tool, ToolResult, ToolUse
 from framework.llm.stream_events import (
@@ -96,8 +93,8 @@ from framework.llm.stream_events import (
    TextDeltaEvent,
    ToolCallEvent,
 )
-from framework.runtime.event_bus import EventBus
-from framework.runtime.llm_debug_logger import log_llm_turn
+from framework.host.event_bus import EventBus
+from framework.tracker.llm_debug_logger import log_llm_turn

 logger = logging.getLogger(__name__)

@@ -163,43 +160,9 @@ def _is_context_too_large_error(exc: BaseException) -> bool:


 # ---------------------------------------------------------------------------
-# Escalation receiver (temporary routing target for subagent → user input)
 # ---------------------------------------------------------------------------


-class _EscalationReceiver:
-    """Temporary receiver registered in node_registry for subagent escalation routing.
-
-    When a subagent calls ``report_to_parent(wait_for_response=True)``, the callback
-    creates one of these, registers it under a unique escalation ID in the executor's
-    ``node_registry``, and awaits ``wait()``.  The TUI / runner calls
-    ``inject_input(escalation_id, content)`` which the ``ExecutionStream`` routes here
-    via ``inject_event()`` — matching the same ``hasattr(node, "inject_event")`` check
-    used for regular ``EventLoopNode`` instances.
-    """
-
-    def __init__(self) -> None:
-        self._event = asyncio.Event()
-        self._response: str | None = None
-        self._awaiting_input = True  # So inject_message() can prefer us
-
-    async def inject_event(
-        self,
-        content: str,
-        *,
-        is_client_input: bool = False,
-        image_content: list[dict] | None = None,
-    ) -> None:
-        """Called by ExecutionStream.inject_input() when the user responds."""
-        self._response = content
-        self._event.set()
-
-    async def wait(self) -> str | None:
-        """Block until inject_event() delivers the user's response."""
-        await self._event.wait()
-        return self._response
-
-
 # ---------------------------------------------------------------------------
 # Judge protocol (simple 3-action interface for event loop evaluation)
 # ---------------------------------------------------------------------------
@@ -224,7 +187,7 @@ OutputAccumulator = event_loop_types.OutputAccumulator
 # ---------------------------------------------------------------------------


-class EventLoopNode(NodeProtocol):
+class AgentLoop(NodeProtocol):
    """Multi-turn LLM streaming loop with tool execution and judge evaluation.

    Lifecycle:
@@ -284,9 +247,6 @@ class EventLoopNode(NodeProtocol):
        # Monotonic counter for spillover file naming (web_search_1.txt, etc.)
        self._spill_counter: int = 0
        # Subagent mark_complete: when True, _evaluate returns ACCEPT immediately
-        self._mark_complete_flag = False
-        # Counter for subagent instances (1, 2, 3, ...)
-        self._subagent_instance_counter: dict[str, int] = {}

    def validate_input(self, ctx: NodeContext) -> list[str]:
        """Validate hard requirements only.
@@ -307,7 +267,7 @@ class EventLoopNode(NodeProtocol):
    async def execute(self, ctx: NodeContext) -> NodeResult:
        """Run the event loop."""
        logger.debug(
-            "[EventLoopNode.execute] Starting execution for node=%s, stream=%s",
+            "[AgentLoop.execute] Starting execution for node=%s, stream=%s",
            ctx.node_id,
            ctx.stream_id,
        )
@@ -320,7 +280,7 @@ class EventLoopNode(NodeProtocol):
        # Store skill dirs for AS-9 file-read interception in _execute_tool
        self._skill_dirs: list[str] = ctx.skill_dirs
        logger.debug(
-            "[EventLoopNode.execute] node_id=%s, execution_id=%s, max_iterations=%d",
+            "[AgentLoop.execute] node_id=%s, execution_id=%s, max_iterations=%d",
            node_id,
            execution_id,
            self._config.max_iterations,
@@ -402,7 +362,7 @@ class EventLoopNode(NodeProtocol):
                # execution preamble and node-type preamble.  The stored
                # prompt may be stale after code changes or when runtime-
                # injected context (e.g. worker identity) has changed.
-                from framework.graph.prompting import build_system_prompt_for_node_context
+                from framework.orchestrator.prompting import build_system_prompt_for_node_context

                _current_prompt = build_system_prompt_for_node_context(ctx)
                if conversation.system_prompt != _current_prompt:
@@ -425,7 +385,7 @@ class EventLoopNode(NodeProtocol):
                    await self._conversation_store.clear()

                # Fresh conversation: either isolated mode or first node in continuous mode.
-                from framework.graph.prompting import build_system_prompt_for_node_context
+                from framework.orchestrator.prompting import build_system_prompt_for_node_context

                system_prompt = build_system_prompt_for_node_context(ctx)

@@ -484,7 +444,7 @@ class EventLoopNode(NodeProtocol):
        # 2a. Guard: ensure at least one non-system message exists.
        # A restored conversation may have 0 messages if phase_id filtering
        # removes them all, or if a prior run stored metadata without messages
-        # (e.g. subagent that failed before the first LLM call).
+        # (e.g. node that failed before the first LLM call).
        if conversation.message_count == 0:
            initial_message = self._build_initial_message(ctx)
            if initial_message:
@@ -502,37 +462,10 @@ class EventLoopNode(NodeProtocol):
            tools.append(self._build_ask_user_tool())
            if stream_id == "queen":
                tools.append(self._build_ask_user_multiple_tool())
-        # Workers/subagents can escalate blockers to the queen.
+        # Workers can escalate blockers to the queen.
        if stream_id not in ("queen", "judge"):
            tools.append(self._build_escalate_tool())

-        # Add delegate_to_sub_agent tool if:
-        # - Node has sub_agents defined
-        # - We are NOT in subagent mode (prevents nested delegation)
-        if not ctx.is_subagent_mode:
-            sub_agents = getattr(ctx.node_spec, "sub_agents", None) or []
-            if sub_agents:
-                delegate_tool = self._build_delegate_tool(sub_agents, ctx.node_registry)
-                if delegate_tool:
-                    tools.append(delegate_tool)
-                    logger.info(
-                        "[%s] delegate_to_sub_agent injected (sub_agents=%s)",
-                        node_id,
-                        sub_agents,
-                    )
-                else:
-                    logger.error(
-                        "[%s] _build_delegate_tool returned None for sub_agents=%s",
-                        node_id,
-                        sub_agents,
-                    )
-        else:
-            logger.debug("[%s] Skipped delegate tool (is_subagent_mode=True)", node_id)
-
-        # Add report_to_parent tool for sub-agents with a report callback
-        if ctx.is_subagent_mode and ctx.report_callback is not None:
-            tools.append(self._build_report_to_parent_tool())
-
        logger.info(
            "[%s] Tools available (%d): %s | direct_user_io=%s | judge=%s",
            node_id,
@@ -565,11 +498,11 @@ class EventLoopNode(NodeProtocol):

        # 6. Main loop
        logger.debug(
-            "[EventLoopNode.execute] Entering main loop, start_iteration=%d", start_iteration
+            "[AgentLoop.execute] Entering main loop, start_iteration=%d", start_iteration
        )
        for iteration in range(start_iteration, self._config.max_iterations):
            iter_start = time.time()
-            logger.debug("[EventLoopNode.execute] iteration=%d starting", iteration)
+            logger.debug("[AgentLoop.execute] iteration=%d starting", iteration)

            # 6a. Check pause (no current-iteration data yet — only log_node_complete needed)
            if await self._check_pause(ctx, conversation, iteration):
@@ -601,18 +534,18 @@ class EventLoopNode(NodeProtocol):

            # 6b. Drain injection queue
            logger.debug(
-                "[EventLoopNode.execute] iteration=%d: draining injection queue...", iteration
+                "[AgentLoop.execute] iteration=%d: draining injection queue...", iteration
            )
            drained_injections = await self._drain_injection_queue(conversation, ctx)
            logger.debug(
-                "[EventLoopNode.execute] iteration=%d: drained %d injections",
+                "[AgentLoop.execute] iteration=%d: drained %d injections",
                iteration,
                drained_injections,
            )
            # 6b1. Drain trigger queue (framework-level signals)
            drained_triggers = await self._drain_trigger_queue(conversation)
            logger.debug(
-                "[EventLoopNode.execute] iteration=%d: drained %d triggers",
+                "[AgentLoop.execute] iteration=%d: drained %d triggers",
                iteration,
                drained_triggers,
            )
@@ -685,8 +618,6 @@ class EventLoopNode(NodeProtocol):
                    "ask_user",
                    "ask_user_multiple",
                    "escalate",
-                    "delegate_to_sub_agent",
-                    "report_to_parent",
                }
                synthetic = [t for t in tools if t.name in _synthetic_names]
                tools.clear()
@@ -696,11 +627,11 @@ class EventLoopNode(NodeProtocol):
            # 6b3. Dynamic prompt refresh (phase switching / memory refresh)
            if ctx.dynamic_prompt_provider is not None or ctx.dynamic_memory_provider is not None:
                if ctx.dynamic_prompt_provider is not None:
-                    from framework.graph.prompting import stamp_prompt_datetime
+                    from framework.orchestrator.prompting import stamp_prompt_datetime

                    _new_prompt = stamp_prompt_datetime(ctx.dynamic_prompt_provider())
                else:
-                    from framework.graph.prompting import build_system_prompt_for_node_context
+                    from framework.orchestrator.prompting import build_system_prompt_for_node_context

                    _new_prompt = build_system_prompt_for_node_context(ctx)
                if _new_prompt != conversation.system_prompt:
@@ -743,7 +674,7 @@ class EventLoopNode(NodeProtocol):
                len(conversation.messages),
            )
            logger.debug(
-                "[EventLoopNode.execute] iteration=%d: entering _run_single_turn loop", iteration
+                "[AgentLoop.execute] iteration=%d: entering _run_single_turn loop", iteration
            )
            _stream_retry_count = 0
            _turn_cancelled = False
@@ -752,7 +683,7 @@ class EventLoopNode(NodeProtocol):
            while True:
                try:
                    logger.debug(
-                        "[EventLoopNode.execute] iteration=%d: calling _run_single_turn (retry=%d)",
+                        "[AgentLoop.execute] iteration=%d: calling _run_single_turn (retry=%d)",
                        iteration,
                        _stream_retry_count,
                    )
@@ -768,12 +699,12 @@ class EventLoopNode(NodeProtocol):
                        queen_input_requested,
                        request_system_prompt,
                        request_messages,
-                        reported_to_parent,
+                        _,
                    ) = await self._run_single_turn(
                        ctx, conversation, tools, iteration, accumulator
                    )
                    logger.debug(
-                        "[EventLoopNode.execute] iteration=%d:"
+                        "[AgentLoop.execute] iteration=%d:"
                        " _run_single_turn completed successfully",
                        iteration,
                    )
@@ -842,13 +773,13 @@ class EventLoopNode(NodeProtocol):
                    break  # success — exit retry loop

                except TurnCancelled:
-                    logger.debug("[EventLoopNode.execute] iteration=%d: TurnCancelled", iteration)
+                    logger.debug("[AgentLoop.execute] iteration=%d: TurnCancelled", iteration)
                    _turn_cancelled = True
                    break

                except Exception as e:
                    logger.debug(
-                        "[EventLoopNode.execute] iteration=%d:"
+                        "[AgentLoop.execute] iteration=%d:"
                        " Exception in _run_single_turn: %s (%s)",
                        iteration,
                        type(e).__name__,
@@ -1024,7 +955,7 @@ class EventLoopNode(NodeProtocol):
                and not outputs_set
                and not user_input_requested
                and not queen_input_requested
-                and not reported_to_parent
+                
            )
            if truly_empty and accumulator is not None:
                missing = self._get_missing_output_keys(
@@ -1276,14 +1207,14 @@ class EventLoopNode(NodeProtocol):
            # blocking and resumption.
            _is_worker = (
                stream_id not in ("queen", "judge")
-                and not ctx.is_subagent_mode
+                and not False
                and not ctx.supports_direct_user_io
                and self._event_bus is not None
            )
            _worker_no_tool_turn = (
                not real_tool_results
                and not outputs_set
-                and not reported_to_parent
+                
                and not queen_input_requested
                and not user_input_requested
            )
@@ -1733,7 +1664,7 @@ class EventLoopNode(NodeProtocol):

            # 6i. Judge evaluation
            should_judge = (
-                ctx.is_subagent_mode  # Always evaluate subagents
+                False
                or (iteration + 1) % self._config.judge_every_n_turns == 0
                or not real_tool_results  # no real tool calls = natural stop
            )
@@ -1789,7 +1720,7 @@ class EventLoopNode(NodeProtocol):
                missing = self._get_missing_output_keys(
                    accumulator, ctx.node_spec.output_keys, ctx.node_spec.nullable_output_keys
                )
-                if missing and self._judge is not None and not self._mark_complete_flag:
+                if missing and self._judge is not None :
                    hint = (
                        f"Task incomplete. Required outputs not yet produced: {missing}. "
                        f"Follow your system prompt instructions to complete the work."
@@ -1988,7 +1919,7 @@ class EventLoopNode(NodeProtocol):
            image_content: Optional list of OpenAI-style image blocks to attach.
        """
        logger.debug(
-            "[EventLoopNode.inject_event] content_len=%d,"
+            "[AgentLoop.inject_event] content_len=%d,"
            " is_client_input=%s, has_images=%s,"
            " queue_size_before=%d",
            len(content) if content else 0,
@@ -1998,15 +1929,15 @@ class EventLoopNode(NodeProtocol):
        )
        try:
            await self._injection_queue.put((content, is_client_input, image_content))
-            logger.debug("[EventLoopNode.inject_event] Message queued successfully")
+            logger.debug("[AgentLoop.inject_event] Message queued successfully")
        except Exception as e:
-            logger.exception("[EventLoopNode.inject_event] Failed to queue message: %s", e)
+            logger.exception("[AgentLoop.inject_event] Failed to queue message: %s", e)
            raise
        try:
            self._input_ready.set()
-            logger.debug("[EventLoopNode.inject_event] _input_ready.set() called")
+            logger.debug("[AgentLoop.inject_event] _input_ready.set() called")
        except Exception as e:
-            logger.exception("[EventLoopNode.inject_event] Failed to set _input_ready: %s", e)
+            logger.exception("[AgentLoop.inject_event] Failed to set _input_ready: %s", e)
            raise

    async def inject_trigger(self, trigger: TriggerEvent) -> None:
@@ -2157,7 +2088,6 @@ class EventLoopNode(NodeProtocol):
        ask_user_prompt = ""
        ask_user_options: list[str] | None = None
        queen_input_requested = False
-        reported_to_parent = False
        # Accumulate ALL tool calls across inner iterations for L3 logging.
        # Unlike real_tool_results (reset each inner iteration), this persists.
        logged_tool_calls: list[dict] = []
@@ -2231,16 +2161,28 @@ class EventLoopNode(NodeProtocol):
                ):
                    if isinstance(event, TextDeltaEvent):
                        accumulated_text = event.snapshot
-                        await self._publish_text_delta(
-                            stream_id,
-                            node_id,
-                            event.content,
-                            event.snapshot,
-                            ctx,
-                            execution_id,
-                            iteration=iteration,
-                            inner_turn=inner_turn,
-                        )
+                        # Filter <think>...</think> blocks from client output.
+                        # Content inside think tags is internal reasoning -- only
+                        # the text after </think> is shown to the user.
+                        _content = event.content
+                        if "<think>" in event.snapshot and "</think>" not in event.snapshot:
+                            _content = ""  # still inside think block
+                        elif "</think>" in _content:
+                            # End of think block -- emit only text after the tag
+                            _content = _content.split("</think>", 1)[-1]
+                        elif "<think>" in _content:
+                            _content = ""  # opening tag in this chunk
+                        if _content:
+                            await self._publish_text_delta(
+                                stream_id,
+                                node_id,
+                                _content,
+                                event.snapshot,
+                                ctx,
+                                execution_id,
+                                iteration=iteration,
+                                inner_turn=inner_turn,
+                            )

                    elif isinstance(event, ToolCallEvent):
                        _tc.append(event)
@@ -2348,10 +2290,27 @@ class EventLoopNode(NodeProtocol):
                    queen_input_requested,
                    final_system_prompt,
                    final_messages,
-                    reported_to_parent,
+                    False,
                )

-            # Execute tool calls — framework tools (set_output, ask_user)
+            # Priority drain: if user sent a message while the LLM was
+            # streaming, inject it into the conversation NOW -- before tool
+            # execution.  The LLM will see it on the next inner turn.
+            if not self._injection_queue.empty():
+                while not self._injection_queue.empty():
+                    _inj_content, _inj_client, _inj_images = (
+                        self._injection_queue.get_nowait()
+                    )
+                    if _inj_client:
+                        await conversation.add_user_message(_inj_content)
+                        logger.info(
+                            "[%s] Priority-injected user message mid-turn (%d chars)",
+                            node_id, len(_inj_content),
+                        )
+                    else:
+                        await conversation.add_user_message(_inj_content)
+
+            # Execute tool calls -- framework tools (set_output, ask_user)
            # run inline; real MCP tools run in parallel.
            real_tool_results: list[dict] = []
            limit_hit = False
@@ -2361,13 +2320,12 @@ class EventLoopNode(NodeProtocol):
            )

            # Phase 1: triage — handle framework tools immediately,
-            # queue real tools and subagents for parallel execution.
+            # queue real tools for parallel execution.
            results_by_id: dict[str, ToolResult] = {}
            timing_by_id: dict[
                str, dict[str, Any]
            ] = {}  # tool_use_id -> {start_timestamp, duration_s}
            pending_real: list[ToolCallEvent] = []
-            pending_subagent: list[ToolCallEvent] = []

            for tc in tool_calls:
                tool_call_count += 1
@@ -2610,76 +2568,6 @@ class EventLoopNode(NodeProtocol):
                    )
                    results_by_id[tc.tool_use_id] = result

-                elif tc.tool_name == "delegate_to_sub_agent":
-                    # Guard: in continuous mode the LLM may see delegate
-                    # calls from a previous node's conversation history and
-                    # attempt to re-use the tool on a node that doesn't own
-                    # it.  Only accept if the tool was actually offered.
-                    if not any(t.name == "delegate_to_sub_agent" for t in tools):
-                        logger.warning(
-                            "[%s] LLM called delegate_to_sub_agent but tool "
-                            "was not offered to this node — rejecting",
-                            node_id,
-                        )
-                        result = ToolResult(
-                            tool_use_id=tc.tool_use_id,
-                            content=(
-                                "ERROR: delegate_to_sub_agent is not available "
-                                "on this node. This tool belongs to a different "
-                                "node in the workflow."
-                            ),
-                            is_error=True,
-                        )
-                        results_by_id[tc.tool_use_id] = result
-                        continue
-                    # --- Framework-level subagent delegation ---
-                    # Queue for parallel execution in Phase 2
-                    logger.info(
-                        "🔄 LLM requesting subagent delegation: agent_id='%s', task='%s'",
-                        tc.tool_input.get("agent_id", "?"),
-                        (tc.tool_input.get("task", "")[:100] + "...")
-                        if len(tc.tool_input.get("task", "")) > 100
-                        else tc.tool_input.get("task", ""),
-                    )
-                    pending_subagent.append(tc)
-
-                elif tc.tool_name == "report_to_parent":
-                    # --- Report from sub-agent to parent (optionally blocking) ---
-                    reported_to_parent = True
-                    msg = tc.tool_input.get("message", "")
-                    data = tc.tool_input.get("data")
-                    wait = tc.tool_input.get("wait_for_response", False)
-                    mark_complete = tc.tool_input.get("mark_complete", False)
-                    response = None
-
-                    if ctx.report_callback:
-                        try:
-                            response = await ctx.report_callback(
-                                msg,
-                                data,
-                                wait_for_response=wait,
-                            )
-                        except Exception:
-                            logger.warning(
-                                "[%s] report_to_parent callback failed (swallowed)",
-                                node_id,
-                                exc_info=True,
-                            )
-
-                    if mark_complete:
-                        self._mark_complete_flag = True
-                        logger.info(
-                            "[%s] mark_complete=True — subagent will accept on this iteration",
-                            node_id,
-                        )
-
-                    result = ToolResult(
-                        tool_use_id=tc.tool_use_id,
-                        content=response if (wait and response) else "Report sent to parent.",
-                        is_error=False,
-                    )
-                    results_by_id[tc.tool_use_id] = result
-
                else:
                    # --- Real tool: check for truncated args, else queue ---
                    if "_raw" in tc.tool_input:
@@ -2754,175 +2642,6 @@ class EventLoopNode(NodeProtocol):
                        result = raw
                    results_by_id[tc.tool_use_id] = self._truncate_tool_result(result, tc.tool_name)

-            # Phase 2b: execute subagent delegations in parallel.
-            if pending_subagent:
-                _subagent_timeout = self._config.subagent_timeout_seconds
-                _inactivity_timeout = self._config.subagent_inactivity_timeout_seconds
-
-                async def _timed_subagent(
-                    _ctx: NodeContext,
-                    _tc: ToolCallEvent,
-                    _acc: OutputAccumulator = accumulator,
-                    _wall_timeout: float = _subagent_timeout,
-                    _activity_timeout: float = _inactivity_timeout,
-                ) -> tuple[ToolResult | BaseException, str, float]:
-                    _s = time.time()
-                    _iso = datetime.now(UTC).isoformat()
-                    _last_activity = _s
-                    _activity_event = asyncio.Event()
-
-                    async def _watchdog() -> None:
-                        """Watchdog that times out only after inactivity period."""
-                        nonlocal _last_activity
-                        while True:
-                            _now = time.time()
-                            _inactive_for = _now - _last_activity
-                            _remaining = _activity_timeout - _inactive_for
-
-                            if _remaining <= 0:
-                                # Inactivity timeout reached
-                                return
-
-                            try:
-                                await asyncio.wait_for(_activity_event.wait(), timeout=_remaining)
-                                _activity_event.clear()
-                            except TimeoutError:
-                                # Check again in case activity happened during wait
-                                continue
-
-                    async def _run_with_activity_timeout(
-                        _coro,
-                    ) -> ToolResult:
-                        """Run subagent with activity-based timeout."""
-                        _watchdog_task = asyncio.create_task(_watchdog())
-                        try:
-                            _result = await _coro
-                            return _result
-                        finally:
-                            _watchdog_task.cancel()
-                            try:
-                                await _watchdog_task
-                            except asyncio.CancelledError:
-                                pass
-
-                    try:
-                        # Subscribe to subagent activity events to reset inactivity timer
-                        async def _on_subagent_activity(event) -> None:
-                            nonlocal _last_activity
-                            _last_activity = time.time()
-                            _activity_event.set()
-
-                        _sub_id = None
-                        if self._event_bus and _activity_timeout > 0:
-                            from framework.runtime.event_bus import EventType
-
-                            _sub_id = self._event_bus.subscribe(
-                                event_types=[
-                                    EventType.TOOL_CALL_STARTED,
-                                    EventType.LLM_TEXT_DELTA,
-                                    EventType.EXECUTION_STARTED,
-                                ],
-                                handler=_on_subagent_activity,
-                            )
-
-                        try:
-                            _coro = self._execute_subagent(
-                                _ctx,
-                                _tc.tool_input.get("agent_id", ""),
-                                _tc.tool_input.get("task", ""),
-                                accumulator=_acc,
-                            )
-
-                            if _activity_timeout > 0:
-                                # Use activity-based timeout with wall-clock max
-                                _result_coro = _run_with_activity_timeout(_coro)
-                                if _wall_timeout > 0:
-                                    _r = await asyncio.wait_for(_result_coro, timeout=_wall_timeout)
-                                else:
-                                    _r = await _result_coro
-                            elif _wall_timeout > 0:
-                                _r = await asyncio.wait_for(_coro, timeout=_wall_timeout)
-                            else:
-                                _r = await _coro
-                        finally:
-                            if _sub_id and self._event_bus:
-                                self._event_bus.unsubscribe(_sub_id)
-
-                    except TimeoutError:
-                        _agent_id = _tc.tool_input.get("agent_id", "unknown")
-                        _elapsed = time.time() - _s
-                        logger.warning(
-                            "Subagent '%s' timed out after %.0fs (inactivity threshold: %.0fs)",
-                            _agent_id,
-                            _elapsed,
-                            _activity_timeout if _activity_timeout > 0 else _wall_timeout,
-                        )
-                        _r = ToolResult(
-                            tool_use_id=_tc.tool_use_id,
-                            content=(
-                                f"Subagent '{_agent_id}' timed out after "
-                                f"{_elapsed:.0f}s of inactivity. "
-                                "The subagent was not making progress. "
-                                "Try a simpler task or break it into smaller pieces."
-                            ),
-                            is_error=True,
-                        )
-                    except BaseException as _exc:
-                        _r = _exc
-                    _dur = round(time.time() - _s, 3)
-                    return _r, _iso, _dur
-
-                subagent_timed = await asyncio.gather(
-                    *(_timed_subagent(ctx, tc) for tc in pending_subagent),
-                    return_exceptions=True,
-                )
-                for tc, entry in zip(pending_subagent, subagent_timed, strict=True):
-                    if isinstance(entry, BaseException):
-                        raw = entry
-                        _start_iso = datetime.now(UTC).isoformat()
-                        _dur_s = 0
-                    else:
-                        raw, _start_iso, _dur_s = entry
-                    _sa_timing = {
-                        "start_timestamp": _start_iso,
-                        "duration_s": _dur_s,
-                    }
-                    if isinstance(raw, BaseException):
-                        result = ToolResult(
-                            tool_use_id=tc.tool_use_id,
-                            content=json.dumps(
-                                {
-                                    "message": f"Sub-agent execution raised: {raw}",
-                                    "data": None,
-                                    "metadata": {"success": False, "error": str(raw)},
-                                }
-                            ),
-                            is_error=True,
-                        )
-                    else:
-                        # Attach the tool_use_id to the result
-                        result = ToolResult(
-                            tool_use_id=tc.tool_use_id,
-                            content=raw.content,
-                            is_error=raw.is_error,
-                        )
-                    # Route through _truncate_tool_result so large
-                    # subagent results are saved to spillover files
-                    # and survive pruning (instead of being "cleared
-                    # from context" with no recovery path).
-                    result = self._truncate_tool_result(result, "delegate_to_sub_agent")
-                    results_by_id[tc.tool_use_id] = result
-                    logged_tool_calls.append(
-                        {
-                            "tool_use_id": tc.tool_use_id,
-                            "tool_name": "delegate_to_sub_agent",
-                            "tool_input": tc.tool_input,
-                            "content": result.content,
-                            "is_error": result.is_error,
-                            **_sa_timing,
-                        }
-                    )
-
            # Phase 3: record results into conversation in original order,
            # build logged/real lists, and publish completed events.
            for tc in tool_calls[:executed_in_batch]:
@@ -2936,8 +2655,6 @@ class EventLoopNode(NodeProtocol):
                    "ask_user",
                    "ask_user_multiple",
                    "escalate",
-                    "delegate_to_sub_agent",
-                    "report_to_parent",
                ):
                    tool_entry = {
                        "tool_use_id": tc.tool_use_id,
@@ -3056,7 +2773,7 @@ class EventLoopNode(NodeProtocol):
                    queen_input_requested,
                    final_system_prompt,
                    final_messages,
-                    reported_to_parent,
+                    False,
                )

            # --- Mid-turn pruning: prevent context blowup within a single turn ---
@@ -3090,7 +2807,7 @@ class EventLoopNode(NodeProtocol):
                    queen_input_requested,
                    final_system_prompt,
                    final_messages,
-                    reported_to_parent,
+                    False,
                )

            # Tool calls processed -- loop back to stream with updated conversation
@@ -3118,16 +2835,6 @@ class EventLoopNode(NodeProtocol):
        """Build the synthetic escalate tool. Delegates to synthetic_tools module."""
        return build_escalate_tool()

-    def _build_delegate_tool(
-        self, sub_agents: list[str], node_registry: dict[str, Any]
-    ) -> Tool | None:
-        """Build the synthetic delegate_to_sub_agent tool. Delegates to synthetic_tools module."""
-        return build_delegate_tool(sub_agents, node_registry)
-
-    def _build_report_to_parent_tool(self) -> Tool:
-        """Build the synthetic report_to_parent tool. Delegates to synthetic_tools module."""
-        return build_report_to_parent_tool()
-
    def _handle_set_output(
        self,
        tool_input: dict[str, Any],
@@ -3151,7 +2858,7 @@ class EventLoopNode(NodeProtocol):
    ) -> JudgeVerdict:
        """Evaluate the current state. Delegates to judge_pipeline module."""
        return await judge_turn(
-            mark_complete_flag=self._mark_complete_flag,
+            mark_complete_flag=False,
            judge=self._judge,
            ctx=ctx,
            conversation=conversation,
@@ -3176,7 +2883,7 @@ class EventLoopNode(NodeProtocol):

        Delegates to :func:`extract_tool_call_history` in conversation.py.
        """
-        from framework.graph.conversation import extract_tool_call_history
+        from framework.agent_loop.conversation import extract_tool_call_history

        return extract_tool_call_history(conversation.messages, max_entries=max_entries)

@@ -3781,46 +3488,3 @@ class EventLoopNode(NodeProtocol):
    # Subagent Execution
    # -------------------------------------------------------------------

-    async def _execute_subagent(
-        self,
-        ctx: NodeContext,
-        agent_id: str,
-        task: str,
-        *,
-        accumulator: OutputAccumulator | None = None,
-    ) -> ToolResult:
-        """Execute a subagent and return the result as a ToolResult.
-
-        The subagent:
-        - Gets a fresh conversation with just the task
-        - Has read-only access to the parent's readable data buffer
-        - Cannot delegate to its own subagents (prevents recursion)
-        - Returns its output in structured JSON format
-
-        Args:
-            ctx: Parent node's context (for data buffer, tools, LLM access).
-            agent_id: The node ID of the subagent to invoke.
-            task: The task description to give the subagent.
-            accumulator: Parent's OutputAccumulator — provides outputs that
-                have been set via ``set_output`` but not yet written to
-                data buffer (which only happens after the node completes).
-
-        Returns:
-            ToolResult with structured JSON output containing:
-            - message: Human-readable summary
-            - data: Subagent's output (free-form JSON)
-            - metadata: Execution metadata (success, tokens, latency)
-        """
-        return await execute_subagent(
-            ctx=ctx,
-            agent_id=agent_id,
-            task=task,
-            accumulator=accumulator,
-            event_bus=self._event_bus,
-            config=self._config,
-            tool_executor=self._tool_executor,
-            conversation_store=self._conversation_store,
-            subagent_instance_counter=self._subagent_instance_counter,
-            event_loop_node_cls=type(self),
-            escalation_receiver_cls=_EscalationReceiver,
-        )
@@ -324,7 +324,7 @@ def _try_extract_key(content: str, key: str) -> str | None:
    3. Colon format: ``key: value``.
    4. Equals format: ``key = value``.
    """
-    from framework.graph.node import find_json_object
+    from framework.orchestrator.node import find_json_object

    # 1. Whole message is JSON
    try:
@@ -0,0 +1,7 @@
+"""Agent loop internals -- compaction, judge, tools, subagent execution.
+
+Re-exports from legacy locations for the new import path.
+"""
+
+from framework.agent_loop.internals.compaction import *  # noqa: F401, F403
+from framework.agent_loop.internals.synthetic_tools import *  # noqa: F401, F403
@@ -19,11 +19,11 @@ from datetime import UTC, datetime
 from pathlib import Path
 from typing import Any

-from framework.graph.conversation import Message, NodeConversation
-from framework.graph.event_loop.event_publishing import publish_context_usage
-from framework.graph.event_loop.types import LoopConfig, OutputAccumulator
-from framework.graph.node import NodeContext
-from framework.runtime.event_bus import EventBus
+from framework.agent_loop.conversation import Message, NodeConversation
+from framework.agent_loop.internals.event_publishing import publish_context_usage
+from framework.agent_loop.internals.types import LoopConfig, OutputAccumulator
+from framework.orchestrator.node import NodeContext
+from framework.host.event_bus import EventBus

 logger = logging.getLogger(__name__)

@@ -368,8 +368,8 @@ async def llm_compact(
    in half and each half is summarised independently.  Tool history is
    appended once at the top-level call (``_depth == 0``).
    """
-    from framework.graph.conversation import extract_tool_call_history
-    from framework.graph.event_loop.tool_result_handler import is_context_too_large_error
+    from framework.agent_loop.conversation import extract_tool_call_history
+    from framework.agent_loop.internals.tool_result_handler import is_context_too_large_error

    if _depth > max_depth:
        raise RuntimeError(f"LLM compaction recursion limit ({max_depth})")
@@ -724,7 +724,7 @@ async def log_compaction(
        )

    if event_bus:
-        from framework.runtime.event_bus import AgentEvent, EventType
+        from framework.host.event_bus import AgentEvent, EventType

        event_data: dict[str, Any] = {
            "level": level,
@@ -861,6 +861,6 @@ def _extract_tool_call_history(conversation: NodeConversation) -> str:
    directly (vs. the module-level extract_tool_call_history in conversation.py
    which works on raw message lists).
    """
-    from framework.graph.conversation import extract_tool_call_history
+    from framework.agent_loop.conversation import extract_tool_call_history

    return extract_tool_call_history(list(conversation.messages))
@@ -14,9 +14,9 @@ from collections.abc import Awaitable, Callable
 from dataclasses import dataclass
 from typing import Any

-from framework.graph.conversation import ConversationStore, NodeConversation
-from framework.graph.event_loop.types import LoopConfig, OutputAccumulator, TriggerEvent
-from framework.graph.node import NodeContext
+from framework.agent_loop.conversation import ConversationStore, NodeConversation
+from framework.agent_loop.internals.types import LoopConfig, OutputAccumulator, TriggerEvent
+from framework.orchestrator.node import NodeContext
 from framework.llm.capabilities import supports_image_tool_results

 logger = logging.getLogger(__name__)
@@ -9,10 +9,10 @@ from __future__ import annotations
 import logging
 import time

-from framework.graph.conversation import NodeConversation
-from framework.graph.event_loop.types import HookContext
-from framework.graph.node import NodeContext
-from framework.runtime.event_bus import EventBus
+from framework.agent_loop.conversation import NodeConversation
+from framework.agent_loop.internals.types import HookContext
+from framework.orchestrator.node import NodeContext
+from framework.host.event_bus import EventBus

 logger = logging.getLogger(__name__)

@@ -177,7 +177,7 @@ async def publish_context_usage(
    if not event_bus:
        return

-    from framework.runtime.event_bus import AgentEvent, EventType
+    from framework.host.event_bus import AgentEvent, EventType

    estimated = conversation.estimate_tokens()
    max_tokens = conversation._max_context_tokens
@@ -5,9 +5,9 @@ from __future__ import annotations
 import logging
 from collections.abc import Callable

-from framework.graph.conversation import NodeConversation
-from framework.graph.event_loop.types import JudgeProtocol, JudgeVerdict, OutputAccumulator
-from framework.graph.node import NodeContext
+from framework.agent_loop.conversation import NodeConversation
+from framework.agent_loop.internals.types import JudgeProtocol, JudgeVerdict, OutputAccumulator
+from framework.orchestrator.node import NodeContext

 logger = logging.getLogger(__name__)

@@ -155,7 +155,7 @@ async def judge_turn(

    # Level 2b: conversation-aware quality check (if success_criteria set)
    if ctx.node_spec.success_criteria and ctx.llm:
-        from framework.graph.conversation_judge import evaluate_phase_completion
+        from framework.orchestrator.conversation_judge import evaluate_phase_completion

        verdict = await evaluate_phase_completion(
            llm=ctx.llm,
@@ -204,118 +204,6 @@ def build_escalate_tool() -> Tool:
        },
    )

-
-def build_delegate_tool(sub_agents: list[str], node_registry: dict[str, Any]) -> Tool | None:
-    """Build the synthetic delegate_to_sub_agent tool for subagent invocation.
-
-    Args:
-        sub_agents: List of node IDs that can be invoked as subagents.
-        node_registry: Map of node_id -> NodeSpec for looking up subagent descriptions.
-
-    Returns:
-        Tool definition if sub_agents is non-empty, None otherwise.
-    """
-    if not sub_agents:
-        return None
-
-    agent_descriptions = []
-    for agent_id in sub_agents:
-        spec = node_registry.get(agent_id)
-        if spec:
-            desc = getattr(spec, "description", "(no description)")
-            agent_descriptions.append(f"- {agent_id}: {desc}")
-        else:
-            agent_descriptions.append(f"- {agent_id}: (not found in registry)")
-
-    return Tool(
-        name="delegate_to_sub_agent",
-        description=(
-            "Delegate a task to a specialized sub-agent. The sub-agent runs "
-            "autonomously with read-only access to current memory and returns "
-            "its result. Use this to parallelize work or leverage specialized capabilities.\n\n"
-            "Available sub-agents:\n" + "\n".join(agent_descriptions)
-        ),
-        parameters={
-            "type": "object",
-            "properties": {
-                "agent_id": {
-                    "type": "string",
-                    "description": f"The sub-agent to invoke. Must be one of: {sub_agents}",
-                    "enum": sub_agents,
-                },
-                "task": {
-                    "type": "string",
-                    "description": (
-                        "The task description for the sub-agent to execute. "
-                        "Be specific about what you want the sub-agent to do and "
-                        "what information to return."
-                    ),
-                },
-            },
-            "required": ["agent_id", "task"],
-        },
-    )
-
-
-def build_report_to_parent_tool() -> Tool:
-    """Build the synthetic report_to_parent tool for sub-agent progress reports.
-
-    Sub-agents call this to send one-way progress updates, partial findings,
-    or status reports to the parent node (and external observers via event bus)
-    without blocking execution.
-
-    When ``wait_for_response`` is True, the sub-agent blocks until the parent
-    relays the user's response — used for escalation (e.g. login pages, CAPTCHAs).
-
-    When ``mark_complete`` is True, the sub-agent terminates immediately after
-    sending the report — no need to call set_output for each output key.
-    """
-    return Tool(
-        name="report_to_parent",
-        description=(
-            "Send a report to the parent agent. By default this is fire-and-forget: "
-            "the parent receives the report but does not respond. "
-            "Set wait_for_response=true to BLOCK until the user replies — use this "
-            "when you need human intervention (e.g. login pages, CAPTCHAs, "
-            "authentication walls). The user's response is returned as the tool result. "
-            "Set mark_complete=true to finish your task and terminate immediately "
-            "after sending the report — use this when your findings are in the "
-            "message/data fields and you don't need to call set_output."
-        ),
-        parameters={
-            "type": "object",
-            "properties": {
-                "message": {
-                    "type": "string",
-                    "description": "A human-readable status or progress message.",
-                },
-                "data": {
-                    "type": "object",
-                    "description": "Optional structured data to include with the report.",
-                },
-                "wait_for_response": {
-                    "type": "boolean",
-                    "description": (
-                        "If true, block execution until the user responds. "
-                        "Use for escalation scenarios requiring human intervention."
-                    ),
-                    "default": False,
-                },
-                "mark_complete": {
-                    "type": "boolean",
-                    "description": (
-                        "If true, terminate the sub-agent immediately after sending "
-                        "this report. The report message and data are delivered to the "
-                        "parent as the final result. No set_output calls are needed."
-                    ),
-                    "default": False,
-                },
-            },
-            "required": ["message"],
-        },
-    )
-
-
 def handle_set_output(
    tool_input: dict[str, Any],
    output_keys: list[str] | None,
@@ -9,7 +9,7 @@ from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any, Literal, Protocol, runtime_checkable

-from framework.graph.conversation import (
+from framework.agent_loop.conversation import (
    ConversationStore,
 )

@@ -68,7 +68,7 @@ class LoopConfig:
    max_output_value_chars: int = 2_000

    # Stream retry.
-    max_stream_retries: int = 3
+    max_stream_retries: int = 5
    stream_retry_backoff_base: float = 2.0
    stream_retry_max_delay: float = 60.0

@@ -8,6 +8,14 @@ FRAMEWORK_AGENTS_DIR = Path(__file__).parent
 def list_framework_agents() -> list[Path]:
    """List all framework agent directories."""
    return sorted(
-        [p for p in FRAMEWORK_AGENTS_DIR.iterdir() if p.is_dir() and (p / "agent.py").exists()],
+        [
+            p
+            for p in FRAMEWORK_AGENTS_DIR.iterdir()
+            if p.is_dir()
+            and (
+                (p / "agent.json").exists()
+                or (p / "agent.py").exists()
+            )
+        ],
        key=lambda p: p.name,
    )
@@ -21,15 +21,15 @@ from pathlib import Path
 from typing import TYPE_CHECKING

 from framework.config import get_max_context_tokens
-from framework.graph import Goal, NodeSpec, SuccessCriterion
-from framework.graph.checkpoint_config import CheckpointConfig
-from framework.graph.edge import GraphSpec
-from framework.graph.executor import ExecutionResult
+from framework.orchestrator import Goal, NodeSpec, SuccessCriterion
+from framework.orchestrator.checkpoint_config import CheckpointConfig
+from framework.orchestrator.edge import GraphSpec
+from framework.orchestrator.orchestrator import ExecutionResult
 from framework.llm import LiteLLMProvider
-from framework.runner.mcp_registry import MCPRegistry
-from framework.runner.tool_registry import ToolRegistry
-from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
-from framework.runtime.execution_stream import EntryPointSpec
+from framework.loader.mcp_registry import MCPRegistry
+from framework.loader.tool_registry import ToolRegistry
+from framework.host.agent_host import AgentHost
+from framework.host.execution_manager import EntryPointSpec

 from .config import default_config
 from .nodes import build_tester_node
@@ -37,7 +37,7 @@ from .nodes import build_tester_node
 logger = logging.getLogger(__name__)

 if TYPE_CHECKING:
-    from framework.runner import AgentRunner
+    from framework.loader import AgentLoader

 logger = logging.getLogger(__name__)

@@ -233,7 +233,7 @@ requires_account_selection = True
 """Signal TUI to show account picker before starting the agent."""


-def configure_for_account(runner: AgentRunner, account: dict) -> None:
+def configure_for_account(runner: AgentLoader, account: dict) -> None:
    """Scope the tester node's tools to the selected provider.

    Handles both Aden accounts (account= routing) and local accounts
@@ -325,7 +325,7 @@ def _activate_local_account(credential_id: str, alias: str) -> None:


 def _configure_aden_node(
-    runner: AgentRunner,
+    runner: AgentLoader,
    provider: str,
    alias: str,
    detail: str,
@@ -368,7 +368,7 @@ or any other identifier — always use the alias exactly as shown.


 def _configure_local_node(
-    runner: AgentRunner,
+    runner: AgentLoader,
    provider: str,
    alias: str,
    identity: dict,
@@ -497,7 +497,7 @@ class CredentialTesterAgent:
    def __init__(self, config=None):
        self.config = config or default_config
        self._selected_account: dict | None = None
-        self._agent_runtime: AgentRuntime | None = None
+        self._agent_runtime: AgentHost | None = None
        self._tool_registry: ToolRegistry | None = None
        self._storage_path: Path | None = None

@@ -613,7 +613,7 @@ class CredentialTesterAgent:

        graph = self._build_graph()

-        self._agent_runtime = create_agent_runtime(
+        self._agent_runtime = AgentHost(
            graph=graph,
            goal=goal,
            storage_path=self._storage_path,
@@ -1,6 +1,6 @@
 """Node definitions for Credential Tester agent."""

-from framework.graph import NodeSpec
+from framework.orchestrator import NodeSpec


 def build_tester_node(
@@ -27,8 +27,8 @@ def _get_last_active(agent_path: Path) -> str | None:
    """Return the most recent updated_at timestamp across all sessions.

    Checks both worker sessions (``~/.hive/agents/{name}/sessions/``) and
-    queen sessions (``~/.hive/queen/session/``) whose ``meta.json`` references
-    the same *agent_path*.
+    queen sessions (``~/.hive/agents/queens/default/sessions/``) whose
+    ``meta.json`` references the same *agent_path*.
    """
    from datetime import datetime

@@ -53,7 +53,9 @@ def _get_last_active(agent_path: Path) -> str | None:
                continue

    # 2. Queen sessions
-    queen_sessions_dir = Path.home() / ".hive" / "queen" / "session"
+    from framework.config import QUEENS_DIR
+
+    queen_sessions_dir = QUEENS_DIR / "default" / "sessions"
    if queen_sessions_dir.exists():
        resolved = agent_path.resolve()
        for d in queen_sessions_dir.iterdir():
@@ -112,13 +114,33 @@ def _count_runs(agent_name: str) -> int:
 def _extract_agent_stats(agent_path: Path) -> tuple[int, int, list[str]]:
    """Extract node count, tool count, and tags from an agent directory.

-    Prefers agent.py (AST-parsed) over agent.json for node/tool counts
-    since agent.json may be stale.  Tags are only available from agent.json.
+    Checks agent.json (declarative) first, then agent.py (legacy).
    """
    import ast

    node_count, tool_count, tags = 0, 0, []

+    # Declarative JSON agents (preferred)
+    agent_json = agent_path / "agent.json"
+    if agent_json.exists():
+        try:
+            data = json.loads(agent_json.read_text(encoding="utf-8"))
+            if isinstance(data, dict):
+                json_nodes = data.get("nodes", [])
+                node_count = len(json_nodes)
+                tools: set[str] = set()
+                for n in json_nodes:
+                    node_tools = n.get("tools", {})
+                    if isinstance(node_tools, dict):
+                        tools.update(node_tools.get("allowed", []))
+                    elif isinstance(node_tools, list):
+                        tools.update(node_tools)
+                tool_count = len(tools)
+                return node_count, tool_count, tags
+        except Exception:
+            pass
+
+    # Legacy: agent.py (AST-parsed)
    agent_py = agent_path / "agent.py"
    if agent_py.exists():
        try:
@@ -132,39 +154,31 @@ def _extract_agent_stats(agent_path: Path) -> tuple[int, int, list[str]]:
        except Exception:
            pass

-    agent_json = agent_path / "agent.json"
-    if agent_json.exists():
-        try:
-            data = json.loads(agent_json.read_text(encoding="utf-8"))
-            json_nodes = data.get("graph", {}).get("nodes", []) or data.get("nodes", [])
-            if node_count == 0:
-                node_count = len(json_nodes)
-            tools: set[str] = set()
-            for n in json_nodes:
-                tools.update(n.get("tools", []))
-            tool_count = len(tools)
-            tags = data.get("agent", {}).get("tags", [])
-        except Exception:
-            pass
-
    return node_count, tool_count, tags


 def discover_agents() -> dict[str, list[AgentEntry]]:
    """Discover agents from all known sources grouped by category."""
-    from framework.runner.cli import (
+    from framework.loader.cli import (
        _extract_python_agent_metadata,
        _get_framework_agents_dir,
        _is_valid_agent_dir,
    )

+    from framework.config import COLONIES_DIR
+
    groups: dict[str, list[AgentEntry]] = {}
    sources = [
-        ("Your Agents", Path("exports")),
+        ("Your Agents", COLONIES_DIR),
+        ("Your Agents", Path("exports")),  # compat fallback
        ("Framework", _get_framework_agents_dir()),
        ("Examples", Path("examples/templates")),
    ]

+    # Track seen agent directory names to avoid duplicates when the same
+    # agent exists in both colonies/ and exports/ (colonies takes priority).
+    _seen_agent_names: set[str] = set()
+
    for category, base_dir in sources:
        if not base_dir.exists():
            continue
@@ -172,6 +186,9 @@ def discover_agents() -> dict[str, list[AgentEntry]]:
        for path in sorted(base_dir.iterdir(), key=lambda p: p.name):
            if not _is_valid_agent_dir(path):
                continue
+            if path.name in _seen_agent_names:
+                continue
+            _seen_agent_names.add(path.name)

            name, desc = _extract_python_agent_metadata(path)
            config_fallback_name = path.name.replace("_", " ").title()
@@ -179,13 +196,19 @@ def discover_agents() -> dict[str, list[AgentEntry]]:

            node_count, tool_count, tags = _extract_agent_stats(path)
            if not used_config:
-                agent_json = path / "agent.json"
-                if agent_json.exists():
+                # Try agent.json (declarative) for metadata
+                agent_json_path = path / "agent.json"
+                if agent_json_path.exists():
                    try:
-                        data = json.loads(agent_json.read_text(encoding="utf-8"))
-                        meta = data.get("agent", {})
-                        name = meta.get("name", name)
-                        desc = meta.get("description", desc)
+                        data = json.loads(
+                            agent_json_path.read_text(encoding="utf-8"),
+                        )
+                        if isinstance(data, dict):
+                            raw_name = data.get("name", name)
+                            if "-" in raw_name and " " not in raw_name:
+                                raw_name = raw_name.replace("-", " ").title()
+                            name = raw_name
+                            desc = data.get("description", desc)
                    except Exception:
                        pass

@@ -204,6 +227,8 @@ def discover_agents() -> dict[str, list[AgentEntry]]:
                )
            )
        if entries:
-            groups[category] = entries
+            existing = groups.get(category, [])
+            existing.extend(entries)
+            groups[category] = existing

    return groups
@@ -1,19 +1,13 @@
-"""
-Queen — Native agent builder for the Hive framework.
+"""Queen -- the agent builder for the Hive framework."""

-Deeply understands the agent framework and produces complete Python packages
-with goals, nodes, edges, system prompts, MCP configuration, and tests
-from natural language specifications.
-"""
-
-from .agent import queen_goal, queen_graph
+from .agent import queen_goal, queen_loop_config
 from .config import AgentMetadata, RuntimeConfig, default_config, metadata

 __version__ = "1.0.0"

 __all__ = [
    "queen_goal",
-    "queen_graph",
+    "queen_loop_config",
    "RuntimeConfig",
    "AgentMetadata",
    "default_config",
@@ -1,38 +1,29 @@
-"""Queen graph definition."""
+"""Queen agent definition.

-from framework.graph import Goal
-from framework.graph.edge import GraphSpec
+The queen is a single AgentLoop -- no graph, no orchestrator.
+Loaded by queen_orchestrator.create_queen().
+"""
+
+from framework.orchestrator.goal import Goal

 from .nodes import queen_node

-# ---------------------------------------------------------------------------
-# Queen graph — the primary persistent conversation.
-# Loaded by queen_orchestrator.create_queen(), NOT by AgentRunner.
-# ---------------------------------------------------------------------------
-
 queen_goal = Goal(
    id="queen-manager",
    name="Queen Manager",
    description=(
-        "Manage the worker agent lifecycle and serve as the user's primary interactive interface."
+        "Manage the worker agent lifecycle and serve as the "
+        "user's primary interactive interface."
    ),
    success_criteria=[],
    constraints=[],
 )

-queen_graph = GraphSpec(
-    id="queen-graph",
-    goal_id=queen_goal.id,
-    version="1.0.0",
-    entry_node="queen",
-    entry_points={"start": "queen"},
-    terminal_nodes=[],
-    pause_nodes=[],
-    nodes=[queen_node],
-    edges=[],
-    conversation_mode="continuous",
-    loop_config={
-        "max_iterations": 999_999,
-        "max_tool_calls_per_turn": 30,
-    },
-)
+# Loop config -- used by queen_orchestrator to build LoopConfig
+queen_loop_config = {
+    "max_iterations": 999_999,
+    "max_tool_calls_per_turn": 30,
+    "max_context_tokens": 180_000,
+}
+
+__all__ = ["queen_goal", "queen_loop_config", "queen_node"]
@@ -0,0 +1,3 @@
+{
+  "include": ["gcu-tools"]
+}
@@ -2,7 +2,7 @@

 from pathlib import Path

-from framework.graph import NodeSpec
+from framework.orchestrator import NodeSpec

 # Load reference docs at import time so they're always in the system prompt.
 # No voluntary read_file() calls needed — the LLM gets everything upfront.
@@ -37,7 +37,7 @@ _appendices = _build_appendices()

 # GCU guide — shared between planning and building via _shared_building_knowledge.
 _gcu_section = (
-    ("\n\n# GCU Nodes — Browser Automation\n\n" + _gcu_guide)
+    ("\n\n# Browser Automation Nodes\n\n" + _gcu_guide)
    if _is_gcu_enabled() and _gcu_guide
    else ""
 )
@@ -81,7 +81,6 @@ _QUEEN_PLANNING_TOOLS = [
    "save_agent_draft",
    "confirm_and_build",
    # Scaffold + transition to building (requires confirm_and_build first)
-    "initialize_and_build_agent",
    # Load existing agent (after user confirms)
    "load_built_agent",
 ]
@@ -172,7 +171,7 @@ _shared_building_knowledge = (

 ## Paths (MANDATORY)
 **Always use RELATIVE paths** \
-(e.g. `exports/agent_name/config.py`, `exports/agent_name/nodes/__init__.py`).
+(e.g. `exports/agent_name/agent.json`).
 **Never use absolute paths** like `/mnt/data/...` or `/workspace/...` — they fail.
 The project root is implicit.

@@ -182,14 +181,18 @@ When designing worker nodes or writing worker system prompts, reference these \
 tool names — NOT the coder-tools names (read_file, write_file, etc.).

 Worker data tools (for large results and spillover):
- save_data(filename, data, data_dir) — save data to a file for later retrieval
- load_data(filename, data_dir, offset_bytes?, limit_bytes?) — load data \
-with byte-based pagination
- list_data_files(data_dir) — list available data files
- append_data(filename, data, data_dir) — append to a file incrementally
- edit_data(filename, old_text, new_text, data_dir) — find-and-replace in a data file
- serve_file_to_user(filename, data_dir, label?, open_in_browser?) — \
-generate a clickable file URI for the user
+Worker data tools (from files-tools MCP server):
+- read_file(path) — read a file
+- write_file(path, content) — write/create a file
+- list_files(path) — list directory contents
+- search_files(pattern, path) — regex search in files
+
+Worker data tools (from hive-tools MCP server):
+- csv_read, csv_write, csv_append — CSV operations
+- pdf_read — read PDF files
+
+All tools are registered in the global MCP registry (~/.hive/mcp_registry/). \
+Workers get tools from: hive-tools, gcu-tools, files-tools.

 IMPORTANT: Do NOT tell workers to use read_file, write_file, edit_file, \
 search_files, or list_directory — those are YOUR tools, not theirs.
@@ -204,7 +207,7 @@ _planning_knowledge = """\
 # Core Mandates (Planning)
 - **DO NOT propose a complete goal on your own.** Instead, \
 collaborate with the user to define it.
- **NEVER call `initialize_and_build_agent` without explicit user approval.** \
+- **NEVER call `confirm_and_build` without explicit user approval.** \
 Present the full design first and wait for the user to confirm before building.
 - **Discover tools dynamically.** NEVER reference tools from static \
 docs. Always run list_agent_tools() to see what actually exists.
@@ -252,9 +255,9 @@ When the stakeholder describes what they want, mentally construct:

 **After the user responds, assess fit and gaps together.** Be honest and specific. \
 Reference tools from list_agent_tools() AND built-in capabilities:
- **GCU browser automation** (`node_type="gcu"`) provides full Playwright-based \
+- **Browser automation provides full Playwright-based \
 browser control (navigation, clicking, typing, scrolling, JS-rendered pages, \
-multi-tab). Do NOT list browser automation as missing — use GCU nodes.
+multi-tab). Do NOT list browser automation as missing — use browser nodes with tools: {policy: "all"}.

 Present a short **Framework Fit Assessment**:
 - **Works well**: 2-4 strengths for this use case
@@ -306,14 +309,11 @@ explicitly on a node. Available types:
 - **io** (dusty purple, parallelogram): External data input/output
 - **document** (steel blue, wavy rect): Report or document generation
 - **database** (muted teal, cylinder): Database or data store
- **subprocess** (dark cyan, subroutine): Delegated sub-agent / predefined process
- **browser** (deep blue, hexagon): GCU browser automation / sub-agent \
-delegation. At build time, browser nodes are dissolved into the parent \
-node's sub_agents list. Use for any GCU or sub-agent leaf node.
+- **browser** (deep blue, hexagon): Browser automation node (uses gcu-tools).

 Auto-detection works well for most cases: first node → start, nodes with \
 no outgoing edges → terminal, nodes with multiple conditional outgoing \
-edges → decision, GCU nodes → browser, nodes mentioning "database" → \
+edges → decision, browser tool nodes → browser, nodes mentioning "database" → \
 database, nodes mentioning "report/document" → document, I/O tools like \
 send_email → io. Everything else defaults to process. Set flowchart_type \
 explicitly only when auto-detection would be wrong.
@@ -354,48 +354,19 @@ gather → [Valid data?] →Yes→ transform → deliver
 In the draft: the `[Valid data?]` node has `flowchart_type: "decision"`, \
 `decision_clause: "Data passes validation checks?"`, with labeled yes/no edges.

-## Sub-Agent Nodes — Planning-Only Delegation
+## Browser Automation Nodes

-Sub-agent nodes (dark teal subroutines) are **planning-only** visual elements \
-that show which nodes delegate to sub-agents. At `confirm_and_build()`, \
-sub-agent nodes are **dissolved** into their parent node:
-
- The sub-agent node's ID is added to the predecessor's `sub_agents` list
- The sub-agent node and its connecting edge are removed
- At runtime, the parent node can invoke the sub-agent via `delegate_to_sub_agent`
-
-**Rules for sub-agent nodes (INCLUDING GCU nodes):**
- GCU nodes are auto-detected as `flowchart_type: "browser"` (hexagon)
- Connect from the managing parent node to the sub-agent node
- Sub-agent nodes must be **leaf nodes** — NO outgoing edges to other nodes
- At build time, browser/GCU nodes are dissolved into the parent's \
-`sub_agents` list, just like decision nodes are dissolved into criteria
-
-**CRITICAL: GCU nodes (`node_type: "gcu"`) are ALWAYS sub-agents.** \
-They MUST NOT appear in the linear flow. NEVER chain GCU nodes \
-sequentially (A → gcu1 → gcu2 → B is WRONG). Instead, attach them \
-as leaves to the parent that orchestrates them:
+Browser nodes are regular `event_loop` nodes with browser tools \
+(from the gcu-tools MCP server) in their tool list. They are wired \
+into the graph with edges like any other node:
 ```
-WRONG:  intake → gcu_find_prospect → gcu_scan_mutuals → check_results
-WRONG:  decision_node → gcu_node (as a yes/no branch)
-RIGHT:  intake (sub_agents: [gcu_find, gcu_scan]) → check_results
+research → browser_scan → analyze_results
 ```
-The parent node delegates to its GCU sub-agents and collects results. \
-The main flow continues from the parent, not from the GCU node. \
-GCU nodes MUST NOT be children of decision nodes — decision nodes \
-dissolve at build time, which would leave the GCU as a dangling \
-workflow step.
+Use `tools: {policy: "all"}` to give browser nodes access to all \
+browser tools, or list specific ones with `policy: "explicit"`.

-**How to show delegation in the flowchart:**
-```
-research → (deep_searcher)   ← browser/GCU node, leaf
-research → [Enough results?] ← decision node
-```
-After dissolution: `research` node gets `sub_agents: ["deep_searcher"]` \
-and `success_criteria: "Enough results?"`.
-
-If the worker agent start from some initial input it is okay. \
-The queen(you) owns intake: you gathers user requirements, then calls \
+If the worker agent starts from some initial input it is okay. \
+The queen(you) owns intake: you gather user requirements, then call \
 `run_agent_with_input(task)` with a structured task description. \
 When building the agent, design the entry node's `input_keys` to \
 match what the queen will provide at run time. Worker nodes should \
@@ -411,14 +382,14 @@ You MUST get explicit user approval before ANY code is generated.
 2. **WAIT for user response.** Do NOT proceed without it.
 3. Handle the response:
   - If **Approve / Proceed**: Call confirm_and_build(), then \
-   initialize_and_build_agent(agent_name, nodes)
+   confirm_and_build(agent_name)
   - If **Adjust scope**: Discuss changes, update the draft with \
   save_agent_draft() again, and re-ask
   - If **More questions**: Answer them honestly, then ask again
   - If **Reconsider**: Discuss alternatives. If they decide to proceed, \
   that's their informed choice

-**NEVER call initialize_and_build_agent without first calling \
+**NEVER call confirm_and_build without first calling \
 confirm_and_build().** The system will block the transition if you try.
 """

@@ -477,53 +448,75 @@ When a user says "my agent is failing" or "debug this agent":
 ## 5. Implement

 **You should only reach this step after the user has approved the draft design \
-in the planning phase. The draft metadata will pre-populate descriptions, \
-goals, success criteria, and node metadata in the generated files.**
+and you have called `confirm_and_build(agent_name="my_agent")`.**

-Call `initialize_and_build_agent(agent_name, nodes)` to generate all package \
-files. The agent_name must be snake_case (e.g., "my_agent"). Pass node names \
-as comma-separated string (e.g., "gather,process,review").
-The tool creates: config.py, nodes/__init__.py, agent.py, \
-__init__.py, __main__.py, mcp_servers.json, tests/conftest.py.
+`confirm_and_build` created the agent directory (returned in agent_path). \
+Now write the complete agent config directly:

-The generated files are **structurally complete** with correct imports, \
-class definition, `validate()` method, `default_agent` export, and \
-`__init__.py` re-exports. They pass validation as-is.
+```
+write_file("<colony_path>/agent.json", <complete JSON config>)
+```

-`mcp_servers.json` is auto-generated with hive-tools as the default. \
-Do NOT manually create or overwrite `mcp_servers.json`.
+The agent.json must include ALL of these in one write:
+- `name`, `version`, `description`
+- `goal` with `description`, `success_criteria`, `constraints`
+- `identity_prompt` (agent-level behavior)
+- `nodes` — each with `id`, `description`, `system_prompt`, `tools`, \
+`input_keys`, `output_keys`, `success_criteria`
+- `edges` — connecting all nodes with proper conditions
+- `entry_node`, `terminal_nodes`
+- `mcp_servers` — REQUIRED. Always include all three: \
+`[{"name": "hive-tools"}, {"name": "gcu-tools"}, {"name": "files-tools"}]`
+- `loop_config` — `max_iterations`, `max_context_tokens`

-### Customizing generated files
+**Write the COMPLETE config in one `write_file` call. No TODOs, no placeholders.** \
+The queen writes final production-ready system prompts directly.

-**CRITICAL: Use `edit_file` to customize TODO placeholders. \
-NEVER use `write_file` to rewrite generated files from scratch. \
-Rewriting breaks imports, class structure, and causes validation failures.**
+**There are NO Python files.** The framework loads agent.json directly.

-Safe to edit with `edit_file`:
- System prompts, tools, input_keys, output_keys, success_criteria in \
-nodes/__init__.py
- Goal description, success criteria values, constraint values, edge \
-definitions, identity_prompt in agent.py
- CLI options in __main__.py
- For triggers (timers/webhooks), add entries to triggers.json in the \
-agent's export directory
+MCP servers are loaded from the global registry by name. Available servers:
+- `hive-tools` — web search, email, CRM, calendar, 100+ integrations
+- `gcu-tools` — browser automation (click, type, navigate, screenshot)
+- `files-tools` — file I/O (read, write, edit, search, list)

-Do NOT modify or rewrite:
- Import statements at top of agent.py (they are correct)
- The agent class definition, `validate()`, `_build_graph()`, `_setup()`, \
-or lifecycle methods (start/stop/run)
- `__init__.py` exports (all required variables are already re-exported)
- `default_agent = ClassName()` at bottom of agent.py
+**Template variables:** Add a `variables:` section at the top of agent.json \
+and use `{{variable_name}}` in system prompts for config injection:
+```yaml
+variables:
+  spreadsheet_id: "1ZVx..."
+nodes:
+  - id: start
+    system_prompt: |
+      Use spreadsheet: {{spreadsheet_id}}
+```
+
+### Tool access in nodes
+
+Each node declares its tool access policy:
+```yaml
+# Explicit list (recommended)
+tools:
+  policy: explicit
+  allowed: [web_search, write_file]
+
+# All tools (for browser automation nodes)
+tools:
+  policy: all
+
+# No tools (for handoff/summary nodes)
+tools:
+  policy: none
+```

 ## 6. Verify and Load

 Call `validate_agent_package("{name}")` after initialization. \
 It runs structural checks (class validation, graph validation, tool \
 validation, tests) and returns a consolidated result. If anything \
-fails: read the error, fix with edit_file, re-validate. Up to 3x.
+fails: read the error, fix with read_file+write_file, re-validate. Up to 3x.

 When validation passes, immediately call \
-`load_built_agent("exports/{name}")` to load the agent into the \
+`load_built_agent("<agent_path>")` to load the agent into the \
 session. This switches to STAGING phase and shows the graph in the \
 visualizer. Do NOT wait for user input between validation and loading.
 """
@@ -625,13 +618,11 @@ document, database, subprocess, etc.) with unique shapes and colors. Set \
 flowchart_type on a node to override. Nodes need only an id. \
 Use decision nodes (flowchart_type: "decision", with decision_clause and \
 labeled yes/no edges) to make conditional branching explicit. \
-GCU/sub-agent nodes (node_type: "gcu") are auto-detected as browser \
 hexagons — connect them as leaf nodes to their parent.
 - confirm_and_build() — Record user confirmation of the draft. Dissolves \
 planning-only nodes (decision → predecessor criteria; browser/GCU → \
-predecessor sub_agents list). Call this ONLY after the user explicitly \
 approves via ask_user.
- initialize_and_build_agent(agent_name?, nodes?) — Scaffold the agent package \
+- confirm_and_build(agent_name) — Scaffold the agent package \
 and transition to BUILDING phase. For new agents, this REQUIRES \
 save_agent_draft() + confirm_and_build() first. The draft metadata is used to \
 pre-populate the generated files. Without agent_name: transition to BUILDING \
@@ -647,8 +638,8 @@ phase. Only use this when the user explicitly asks to work with an existing agen
 2. Call save_agent_draft() to create visual draft → present to user
 3. Call ask_user() to get explicit approval
 4. Call confirm_and_build() to record approval
-5. Call initialize_and_build_agent() to scaffold and start building
-For diagnosis of existing agents, call initialize_and_build_agent() \
+5. Call confirm_and_build() to scaffold and start building
+For diagnosis of existing agents, call confirm_and_build() \
 (no args) after agreeing on a fix plan with the user.
 """

@@ -884,7 +875,7 @@ that changes the structure, call save_agent_draft() again so they see the \
 update in real-time. The flowchart is a live collaboration tool.
 8. When the design is stable, use ask_user to get explicit approval
 9. Call confirm_and_build() after the user approves
-10. Call initialize_and_build_agent(agent_name, nodes) to scaffold and start building
+10. Call confirm_and_build(agent_name) to scaffold and start building

 **The flowchart is your shared whiteboard.** Don't describe changes in text \
 and then ask "should I update the draft?" — just update it. If the user says \
@@ -895,7 +886,7 @@ see every structural change reflected in the visualizer as you discuss it.
 **CRITICAL: Planning → Building boundary.** You MUST get explicit user \
 confirmation before moving to building. The sequence is:
  save_agent_draft() → iterate with user → ask_user() → confirm_and_build() → \
-  initialize_and_build_agent()
+  confirm_and_build()
 Skipping any of these steps will be blocked by the system.

 Remember: DO NOT write or edit any files yet. This is a read-only exploration \
@@ -911,7 +902,7 @@ your priority is diagnosis, not new design:
 2. Summarize the root cause to the user
 3. Propose a fix plan (what to change, what behavior to adjust)
 4. Get user approval via ask_user
-5. Call initialize_and_build_agent() (no args) to transition to building and implement the fix
+5. Call confirm_and_build() (no args) to transition to building and implement the fix

 Do NOT start the full discovery workflow (tool discovery, gap analysis) in \
 diagnosis mode — you already have a built agent, you just need to fix it.
@@ -947,7 +938,7 @@ delegate agent construction to the worker, even as a "research" subtask.
 ## Keeping the flowchart in sync during building

 When you make structural changes to the agent (add/remove/rename nodes, \
-change edges, modify sub-agent assignments), call save_agent_draft() to \
+change edges, modify node connections), call save_agent_draft() to \
 update the flowchart. During building, this auto-dissolves planning-only \
 nodes without needing user re-confirmation. The user sees the updated \
 flowchart immediately.
@@ -966,15 +957,15 @@ user says "replan", "go back", "let's redesign", "change the approach", \

 ## CRITICAL — Graph topology errors require replanning, not code edits

-If you discover that the agent graph has structural problems — GCU nodes \
+If you discover that the agent graph has structural problems — browser nodes \
 in the linear flow, missing edges, wrong node connections, incorrect \
-sub-agent assignments — you MUST call replan_agent() and fix the draft. \
-Do NOT attempt to fix topology by editing agent.py directly. The graph \
+node connections — you MUST call replan_agent() and fix the draft. \
+Do NOT attempt to fix topology by editing agent.json directly. The graph \
 structure is defined by the draft → dissolution → code-gen pipeline. \
-Editing code to rewire nodes bypasses the flowchart and creates drift \
-between what the user sees and what the code does.
+Editing the config to rewire nodes bypasses the flowchart and creates drift \
+between what the user sees and what the config does.

-**WRONG:** "Let me fix agent.py to remove GCU nodes from edges..."
+**WRONG:** "Let me fix agent.json to remove browser nodes from edges..."
 **RIGHT:** Call replan_agent(), fix the draft with save_agent_draft(), \
 get user approval, then confirm_and_build() → the corrected code is \
 generated automatically.
@@ -1100,18 +1091,15 @@ You wake up when:
 If the user asks for progress, call get_graph_status() ONCE and report. \
 If the summary mentions issues, follow up with get_graph_status(focus="issues").

-## Subagent delegations (browser automation, GCU)
+## Browser automation nodes

-When the worker delegates to a subagent (e.g., GCU browser automation), expect it \
-to take 2-5 minutes. During this time:
- Progress will show 0% — this is NORMAL. The subagent only calls set_output at the end.
- Check get_graph_status(focus="full") for "subagent_activity" — this shows the \
-subagent's latest reasoning text and confirms it is making real progress.
- Do NOT conclude the subagent is stuck just because progress is 0% or because \
-you see repeated browser_click/browser_snapshot calls — that is the expected \
-pattern for web scraping.
- Only intervene if: the subagent has been running for 5+ minutes with no new \
-subagent_activity updates, OR the judge escalates.
+Browser nodes may take 2-5 minutes for web scraping tasks. During this time:
+- Progress will show 0% until the node calls set_output at the end.
+- Check get_graph_status(focus="full") for activity updates.
+- Do NOT conclude it is stuck just because you see repeated \
+browser_click/browser_snapshot calls — that is expected for web scraping.
+- Only intervene if: the node has been running for 5+ minutes with no new \
+activity updates, OR the judge escalates.

 ## Handling worker termination ([WORKER_TERMINAL])

@@ -1143,11 +1131,11 @@ escalations. If the user gave you instructions (e.g., "just retry on errors", \

 CRITICAL — escalation relay protocol:
 When an escalation requires user input (auth blocks, human review), the worker \
-or its subagent is BLOCKED and waiting for your response. You MUST follow this \
+or is BLOCKED and waiting for your response. You MUST follow this \
 exact two-step sequence:
  Step 1: call ask_user() to get the user's answer.
  Step 2: call inject_message() with the user's answer IMMEDIATELY after.
-If you skip Step 2, the worker/subagent stays blocked FOREVER and the task hangs. \
+If you skip Step 2, the worker stays blocked FOREVER and the task hangs. \
 NEVER respond to the user without also calling inject_message() to unblock \
 the worker. Even if the user says "skip" or "cancel", you must still relay that \
 decision via inject_message() so the worker can clean up.
@@ -1233,7 +1221,7 @@ _queen_tools_docs = (
    + "\n\n### Phase transitions\n"
    "- save_agent_draft(...) → creates visual-only draft graph (stays in PLANNING)\n"
    "- confirm_and_build() → records user approval of draft (stays in PLANNING)\n"
-    "- initialize_and_build_agent(agent_name?, nodes?) → scaffolds package + switches to "
+    "- confirm_and_build(agent_name) → scaffolds package + switches to "
    "BUILDING (requires draft + confirmation for new agents)\n"
    "- replan_agent() → switches back to PLANNING phase (only when user explicitly requests)\n"
    "- load_built_agent(path) → switches to STAGING phase\n"
@@ -1,9 +1,15 @@
 """Queen global memory helpers.

-Global memory lives in ``~/.hive/queen/global_memory/`` and stores durable
-cross-session knowledge about the user (profile, preferences, environment,
-feedback).  Each memory is an individual ``.md`` file with optional YAML
-frontmatter (name, type, description).
+Memory hierarchy::
+
+    ~/.hive/memories/
+        global/              # shared across all queens and colonies
+        colonies/{name}/     # colony-scoped memories
+        agents/queens/{name}/ # queen-specific memories
+        agents/{name}/       # per-worker-agent memories
+
+Each memory is an individual ``.md`` file with optional YAML frontmatter
+(name, type, description).
 """

 from __future__ import annotations
@@ -21,7 +27,7 @@ logger = logging.getLogger(__name__)

 GLOBAL_MEMORY_CATEGORIES: tuple[str, ...] = ("profile", "preference", "environment", "feedback")

-_HIVE_QUEEN_DIR = Path.home() / ".hive" / "queen"
+from framework.config import MEMORIES_DIR

 MAX_FILES: int = 200
 MAX_FILE_SIZE_BYTES: int = 4096  # 4 KB hard limit per memory file
@@ -31,8 +37,23 @@ _HEADER_LINE_LIMIT: int = 30


 def global_memory_dir() -> Path:
-    """Return the queen-global memory directory."""
-    return _HIVE_QUEEN_DIR / "global_memory"
+    """Return the global memory directory (shared across all queens/colonies)."""
+    return MEMORIES_DIR / "global"
+
+
+def colony_memory_dir(colony_name: str) -> Path:
+    """Return the memory directory for a named colony."""
+    return MEMORIES_DIR / "colonies" / colony_name
+
+
+def queen_memory_dir(queen_name: str = "default") -> Path:
+    """Return the memory directory for a named queen."""
+    return MEMORIES_DIR / "agents" / "queens" / queen_name
+
+
+def agent_memory_dir(agent_name: str) -> Path:
+    """Return the memory directory for a worker agent."""
+    return MEMORIES_DIR / "agents" / agent_name


 # ---------------------------------------------------------------------------
@@ -91,7 +91,19 @@ async def select_memories(
                resp.stop_reason,
            )
            return []
-        data = json.loads(raw)
+        # Some models wrap JSON in markdown fences or add preamble text.
+        # Try to extract the JSON object if raw parse fails.
+        try:
+            data = json.loads(raw)
+        except json.JSONDecodeError:
+            import re
+
+            m = re.search(r"\{.*\}", raw, re.DOTALL)
+            if m:
+                data = json.loads(m.group())
+            else:
+                logger.warning("recall: LLM returned non-JSON: %.200s", raw)
+                return []
        selected = data.get("selected_memories", [])
        valid_names = {f.filename for f in files}
        result = [s for s in selected if s in valid_names][:max_results]
@@ -25,10 +25,7 @@
 14. **Forgetting sys.path setup in conftest.py** — Tests need `exports/` and `core/` on sys.path.

 ## GCU Errors
-15. **Manually wiring browser tools on event_loop nodes** — Use `node_type="gcu"` which auto-includes browser tools. Do NOT manually list browser tool names.
-16. **Using GCU nodes as regular graph nodes** — GCU nodes are subagents only. They must ONLY appear in `sub_agents=["gcu-node-id"]` and be invoked via `delegate_to_sub_agent()`. Never connect via edges or use as entry/terminal nodes.
-17. **Reusing the same GCU node ID for parallel tasks** — Each concurrent browser task needs a distinct GCU node ID (e.g. `gcu-site-a`, `gcu-site-b`). Two `delegate_to_sub_agent` calls with the same `agent_id` share a browser profile and will interfere with each other's pages.
-18. **Passing `profile=` in GCU tool calls** — Profile isolation for parallel subagents is automatic. The framework injects a unique profile per subagent via an asyncio `ContextVar`. Hardcoding `profile="default"` in a GCU system prompt breaks this isolation.
+15. **Manually wiring browser tools on event_loop nodes** — Browser nodes use tools: {policy: "all"} to get all browser tools.

 ## Worker Agent Errors
 19. **Adding client-facing intake node to workers** — The queen owns intake. Workers should start with an autonomous processing node. Route worker review/approval through queen escalation instead of direct worker HITL.
@@ -0,0 +1,227 @@
+# Declarative Agent File Templates
+
+Agents are defined as a single `agent.yaml` file. No Python code needed.
+The runner loads this file directly -- no `agent.py`, `config.py`, or
+`nodes/__init__.py` required.
+
+## agent.yaml -- Complete Agent Definition
+
+```yaml
+name: my-agent
+version: 1.0.0
+description: What this agent does.
+
+metadata:
+  intro_message: Welcome! What would you like me to do?
+
+# Template variables -- substituted into system_prompt and identity_prompt
+# via {{variable_name}} syntax.  Use this for config values that appear
+# in prompts (spreadsheet IDs, API endpoints, account names, etc.)
+variables:
+  spreadsheet_id: "1ZVxWDL..."
+  sheet_name: "contacts"
+
+goal:
+  description: What this agent achieves.
+  success_criteria:
+    - "First success criterion"
+    - "Second success criterion"
+  constraints:
+    - "Hard constraint the agent must respect"
+
+identity_prompt: |
+  You are a helpful agent.
+
+conversation_mode: continuous   # always "continuous" for Hive agents
+
+loop_config:
+  max_iterations: 100
+  max_tool_calls_per_turn: 30
+  max_context_tokens: 32000
+
+# MCP servers to connect (resolved by name from ~/.hive/mcp_registry/)
+mcp_servers:
+  - name: hive-tools
+  - name: gcu-tools
+
+nodes:
+  # Node 1: Process (autonomous entry node)
+  # The queen handles intake and passes structured input via
+  # run_agent_with_input(task). NO client-facing intake node.
+  - id: process
+    name: Process
+    description: Execute the task using available tools
+    max_node_visits: 0   # 0 = unlimited (forever-alive agents)
+    input_keys: [user_request, feedback]
+    output_keys: [results]
+    nullable_output_keys: [feedback]
+    tools:
+      policy: explicit
+      allowed: [web_search, web_scrape, save_data, load_data, list_data_files]
+    success_criteria: Results are complete and accurate.
+    system_prompt: |
+      You are a processing agent. Your task is in memory under "user_request".
+      If "feedback" is present, this is a revision.
+
+      Work in phases:
+      1. Use tools to gather/process data
+      2. Analyze results
+      3. Call set_output in a SEPARATE turn:
+         - set_output("results", "structured results")
+
+  # Node 2: Handoff (autonomous)
+  - id: handoff
+    name: Handoff
+    description: Prepare worker results for queen review
+    max_node_visits: 0
+    input_keys: [results, user_request]
+    output_keys: [next_action, feedback, worker_summary]
+    nullable_output_keys: [feedback, worker_summary]
+    tools:
+      policy: none   # handoff nodes don't need tools
+    success_criteria: Results are packaged for queen decision-making.
+    system_prompt: |
+      Do NOT talk to the user directly. The queen is the only user interface.
+
+      If blocked, call escalate(reason, context) then set:
+      - set_output("next_action", "escalated")
+      - set_output("feedback", "what help is needed")
+
+      Otherwise summarize and set:
+      - set_output("worker_summary", "short summary for queen")
+      - set_output("next_action", "done") or "revise"
+      - set_output("feedback", "what to revise") only when revising
+
+edges:
+  - from_node: process
+    to_node: handoff
+  # Feedback loop
+  - from_node: handoff
+    to_node: process
+    condition: conditional
+    condition_expr: "str(next_action).lower() == 'revise'"
+    priority: 2
+  # Escalation loop
+  - from_node: handoff
+    to_node: process
+    condition: conditional
+    condition_expr: "str(next_action).lower() == 'escalated'"
+    priority: 3
+  # Loop back for next task
+  - from_node: handoff
+    to_node: process
+    condition: conditional
+    condition_expr: "str(next_action).lower() == 'done'"
+
+entry_node: process
+terminal_nodes: []   # [] = forever-alive
+```
+
+## Key differences from Python templates
+
+| Before (Python)                     | After (YAML)                           |
+|-------------------------------------|----------------------------------------|
+| `agent.py` (250 lines boilerplate)  | Not needed                             |
+| `config.py` (dataclass + metadata)  | `variables:` + `metadata:` in YAML     |
+| `nodes/__init__.py` (NodeSpec calls)| `nodes:` list in YAML                  |
+| `__init__.py`, `__main__.py`        | Not needed                             |
+| f-string config injection           | `{{variable_name}}` templates          |
+| `mcp_servers.json` (separate file)  | `mcp_servers:` in YAML (or keep file)  |
+
+## Node types
+
+| Type         | Description                           | Tools                    |
+|--------------|---------------------------------------|--------------------------|
+| `event_loop` | LLM-driven orchestration (default)    | Explicit list or `none`  |
+| `gcu`        | Browser automation via GCU tools      | `policy: all` (auto)     |
+
+## Tool access policies
+
+```yaml
+# Explicit list (recommended for most nodes)
+tools:
+  policy: explicit
+  allowed: [web_search, save_data]
+
+# All tools (for browser automation nodes)
+tools:
+  policy: all
+
+# No tools (for handoff/summary nodes)
+tools:
+  policy: none
+```
+
+## Edge conditions
+
+| Condition     | When to use                                           |
+|---------------|-------------------------------------------------------|
+| `on_success`  | Default. Next node after current succeeds.            |
+| `on_failure`  | Fallback path when current node fails.                |
+| `always`      | Always traverse regardless of outcome.                |
+| `conditional` | Evaluate `condition_expr` against shared memory keys. |
+| `llm_decide`  | Let the LLM decide at runtime.                        |
+
+## Template variables
+
+Use `{{variable_name}}` in `system_prompt` and `identity_prompt`.
+Variables are defined in the top-level `variables:` map.
+
+```yaml
+variables:
+  spreadsheet_id: "1ZVxWDL..."
+  api_endpoint: "https://api.example.com"
+
+nodes:
+  - id: start
+    system_prompt: |
+      Connect to spreadsheet: {{spreadsheet_id}}
+      API endpoint: {{api_endpoint}}
+```
+
+## Entry points
+
+Default is a single manual entry point. For timer/scheduled triggers:
+
+```yaml
+entry_points:
+  - id: default
+    trigger_type: manual
+  - id: daily-check
+    trigger_type: timer
+    trigger_config:
+      interval_minutes: 30
+```
+
+## mcp_servers.json -- Still Supported
+
+The `mcp_servers.json` file is still loaded automatically if present alongside
+`agent.yaml`.  You can also inline servers in the YAML:
+
+```yaml
+mcp_servers:
+  - name: hive-tools
+  - name: gcu-tools
+```
+
+Both approaches work. The JSON file takes precedence for backward compatibility.
+
+## Migration from Python agents
+
+Run the migration tool to convert existing agents:
+
+```bash
+uv run python -m framework.tools.migrate_agent exports/my_agent
+```
+
+This generates `agent.yaml` from the existing `agent.py` + `nodes/` + `config.py`.
+The original files are left untouched. Once verified, you can delete the Python files.
+
+## Files after migration
+
+```
+my_agent/
+  agent.yaml           # The only required file
+  mcp_servers.json     # Optional (can inline in YAML)
+  flowchart.json       # Optional (auto-generated)
+```
@@ -1,306 +1,193 @@
-# Hive Agent Framework — Condensed Reference
+# Hive Agent Framework -- Condensed Reference

 ## Architecture

-Agents are Python packages in `exports/`:
+Agents are declarative JSON configs in `exports/`:
 ```
 exports/my_agent/
-├── __init__.py          # MUST re-export ALL module-level vars from agent.py
-├── __main__.py          # CLI (run, tui, info, validate, shell)
-├── agent.py             # Graph construction (goal, edges, agent class)
-├── config.py            # Runtime config
-├── nodes/__init__.py    # Node definitions (NodeSpec)
-├── mcp_servers.json     # MCP tool server config
-└── tests/               # pytest tests
+  agent.json          # The entire agent definition
+  mcp_servers.json    # MCP tool server config (optional, prefer registry refs)
 ```

-## Agent Loading Contract
+No Python files. No `__init__.py`, `__main__.py`, `config.py`, or `nodes/`.

-`AgentRunner.load()` imports the package (`__init__.py`) and reads these
-module-level variables via `getattr()`:
+## Agent Loading

-| Variable | Required | Default if missing | Consequence |
-|----------|----------|--------------------|-------------|
-| `goal` | YES | `None` | **FATAL** — "must define goal, nodes, edges" |
-| `nodes` | YES | `None` | **FATAL** — same error |
-| `edges` | YES | `None` | **FATAL** — same error |
-| `entry_node` | no | `nodes[0].id` | Probably wrong node |
-| `entry_points` | no | `{}` | **Nodes unreachable** — validation fails |
-| `terminal_nodes` | **YES** | `[]` | **FATAL** — graph must have at least one terminal node |
-| `pause_nodes` | no | `[]` | OK |
-| `conversation_mode` | no | not passed | Isolated mode (no context carryover) |
-| `identity_prompt` | no | not passed | No agent-level identity |
-| `loop_config` | no | `{}` | No iteration limits |
-| `triggers.json` (file) | no | not present | No triggers (timers, webhooks) |
+`AgentLoader.load()` reads `agent.json` and builds the execution graph.
+If `agent.py` exists (legacy), it's loaded as a Python module instead.

-**CRITICAL:** `__init__.py` MUST import and re-export ALL of these from
-`agent.py`. Missing exports silently fall back to defaults, causing
-hard-to-debug failures.
+## agent.json Schema

-**Why `default_agent.validate()` is NOT sufficient:**
-`validate()` checks the agent CLASS's internal graph (self.nodes, self.edges).
-These are always correct because the constructor references agent.py's module
-vars directly. But `AgentRunner.load()` reads from the PACKAGE (`__init__.py`),
-not the class. So `validate()` passes while `AgentRunner.load()` fails.
-Always test with `AgentRunner.load("exports/{name}")` — this is the same
-code path the TUI and `hive run` use.
-
-## Goal
-
-Defines success criteria and constraints:
-```python
-goal = Goal(
-    id="kebab-case-id",
-    name="Display Name",
-    description="What the agent does",
-    success_criteria=[
-        SuccessCriterion(id="sc-id", description="...", metric="...", target="...", weight=0.25),
-    ],
-    constraints=[
-        Constraint(id="c-id", description="...", constraint_type="hard", category="quality"),
-    ],
-)
+```json
+{
+  "name": "my-agent",
+  "version": "1.0.0",
+  "description": "What this agent does",
+  "goal": {
+    "description": "What to achieve",
+    "success_criteria": ["criterion 1", "criterion 2"],
+    "constraints": ["constraint 1"]
+  },
+  "identity_prompt": "You are a helpful agent.",
+  "conversation_mode": "continuous",
+  "loop_config": {
+    "max_iterations": 100,
+    "max_tool_calls_per_turn": 30,
+    "max_context_tokens": 32000
+  },
+  "mcp_servers": [
+    {"name": "hive-tools"},
+    {"name": "gcu-tools"}
+  ],
+  "variables": {
+    "spreadsheet_id": "1ZVx..."
+  },
+  "nodes": [...],
+  "edges": [...],
+  "entry_node": "process",
+  "terminal_nodes": []
+}
 ```
- 3-5 success criteria, weights sum to 1.0
- 1-5 constraints (hard/soft, categories: quality, accuracy, interaction, functional)

-## NodeSpec Fields
+## Template Variables
+
+Use `{{variable_name}}` in `system_prompt` and `identity_prompt`. Variables
+are defined in the top-level `variables` object:
+
+```json
+{
+  "variables": {"sheet_id": "1ZVx..."},
+  "nodes": [{
+    "id": "start",
+    "system_prompt": "Use sheet: {{sheet_id}}"
+  }]
+}
+```
+
+## Node Fields

 | Field | Type | Default | Description |
 |-------|------|---------|-------------|
 | id | str | required | kebab-case identifier |
-| name | str | required | Display name |
+| name | str | id | Display name |
 | description | str | required | What the node does |
-| node_type | str | required | `"event_loop"` or `"gcu"` (browser automation — see GCU Guide appendix) |
-| input_keys | list[str] | required | Memory keys this node reads |
-| output_keys | list[str] | required | Memory keys this node writes via set_output |
+| node_type | str | "event_loop" | `"event_loop"` |
+| input_keys | list | [] | Memory keys this node reads |
+| output_keys | list | [] | Memory keys this node writes via set_output |
 | system_prompt | str | "" | LLM instructions |
-| tools | list[str] | [] | Tool names from MCP servers |
-| client_facing | bool | False | Deprecated compatibility field. Queen interactivity is implicit; workers should escalate instead |
-| nullable_output_keys | list[str] | [] | Keys that may remain unset |
-| max_node_visits | int | 0 | 0=unlimited (default); >1 for one-shot feedback loops |
-| max_retries | int | 3 | Retries on failure |
+| tools | object | {} | Tool access policy (see below) |
+| nullable_output_keys | list | [] | Keys that may remain unset |
+| max_node_visits | int | 1 | 0=unlimited (for forever-alive agents) |
 | success_criteria | str | "" | Natural language for judge evaluation |
+| client_facing | bool | false | Whether output is shown to user |

-## EdgeSpec Fields
+## Tool Access Policies
+
+Each node declares its tools via a policy object:
+
+```json
+{"tools": {"policy": "explicit", "allowed": ["web_search", "save_data"]}}
+{"tools": {"policy": "all"}}
+{"tools": {"policy": "none"}}
+```
+
+- `explicit` (default): only named tools. Empty `allowed` = zero tools.
+- `all`: all tools from registry (e.g. for browser automation nodes).
+- `none`: no tools (for handoff/summary nodes).
+
+## Edge Fields

 | Field | Type | Description |
 |-------|------|-------------|
-| id | str | kebab-case identifier |
-| source | str | Source node ID |
-| target | str | Target node ID |
-| condition | EdgeCondition | ON_SUCCESS, ON_FAILURE, ALWAYS, CONDITIONAL |
-| condition_expr | str | Python expression evaluated against memory (for CONDITIONAL) |
-| priority | int | Positive=forward (evaluated first), negative=feedback (loop-back) |
+| from_node | str | Source node ID |
+| to_node | str | Target node ID |
+| condition | str | `on_success`, `on_failure`, `always`, `conditional` |
+| condition_expr | str | Python expression for conditional routing |
+| priority | int | Higher = evaluated first |
+
+condition_expr examples:
+- `"needs_more_research == True"`
+- `"str(next_action).lower() == 'revise'"`

 ## Key Patterns

-### STEP 1/STEP 2 (Client-Facing Nodes)
-```
-**STEP 1 — Respond to the user (text only, NO tool calls):**
-[Present information, ask questions]
-
-**STEP 2 — After the user responds, call set_output:**
- set_output("key", "value based on user response")
-```
-This prevents premature set_output before user interaction.
-
 ### Fewer, Richer Nodes (CRITICAL)

-**Hard limit: 3-6 nodes for most agents.** Never exceed 6 unless the user
-explicitly requests a complex multi-phase pipeline.
+**Hard limit: 3-6 nodes for most agents.** Each node boundary serializes
+outputs and destroys in-context information. Merge unless:
+1. Client-facing boundary (different interaction models)
+2. Disjoint tool sets
+3. Parallel execution (fan-out branches)

-Each node boundary serializes outputs to the shared buffer and **destroys** all
-in-context information: tool call results, intermediate reasoning, conversation
-history. A research node that searches, fetches, and analyzes in ONE node keeps
-all source material in its conversation context. Split across 3 nodes, each
-downstream node only sees the serialized summary string.
-
-**Decision framework — merge unless ANY of these apply:**
-1. **Client-facing boundary** — Autonomous and client-facing work MUST be
-   separate nodes (different interaction models)
-2. **Disjoint tool sets** — If tools are fundamentally different (e.g., web
-   search vs database), separate nodes make sense
-3. **Parallel execution** — Fan-out branches must be separate nodes
-
-**Red flags that you have too many nodes:**
- A node with 0 tools (pure LLM reasoning) → merge into predecessor/successor
- A node that sets only 1 trivial output → collapse into predecessor
- Multiple consecutive autonomous nodes → combine into one rich node
- A "report" node that presents analysis → merge into the client-facing node
- A "confirm" or "schedule" node that doesn't call any external service → remove
-
-**Typical agent structure (2 nodes):**
+**Typical structure (2 nodes):**
 ```
-process (autonomous) ←→ review (queen-mediated)
-```
-The queen owns intake — she gathers requirements from the user, then
-passes structured input via `run_agent_with_input(task)`. When building
-the agent, design the entry node's `input_keys` to match what the queen
-will provide at run time. Worker agents should NOT have a client-facing
-intake node. Mid-execution review/approval should happen through queen
-escalation rather than direct worker HITL.
-
-For simpler agents, just 1 autonomous node:
-```
-process (autonomous) — loops back to itself
+process (autonomous) <-> review (queen-mediated)
 ```

-### nullable_output_keys
-For inputs that only arrive on certain edges:
-```python
-research_node = NodeSpec(
-    input_keys=["brief", "feedback"],
-    nullable_output_keys=["feedback"],  # Only present on feedback edge
-    max_node_visits=3,
-)
-```
-
-### Mutually Exclusive Outputs
-For routing decisions:
-```python
-review_node = NodeSpec(
-    output_keys=["approved", "feedback"],
-    nullable_output_keys=["approved", "feedback"],  # Node sets one or the other
-)
-```
-
-### Continuous Loop Pattern
-Mark the primary event_loop node as terminal: `terminal_nodes=["process"]`.
-The node has `output_keys` and can complete when the agent finishes its work.
-Use `conversation_mode="continuous"` to preserve context across transitions.
+The queen owns intake. Worker agents should NOT have a client-facing intake
+node. Mid-execution review should happen through queen escalation.

 ### set_output
 - Synthetic tool injected by framework
 - Call separately from real tool calls (separate turn)
 - `set_output("key", "value")` stores to the shared buffer

-## Edge Conditions
-
-| Condition | When |
-|-----------|------|
-| ON_SUCCESS | Node completed successfully |
-| ON_FAILURE | Node failed |
-| ALWAYS | Unconditional |
-| CONDITIONAL | condition_expr evaluates to True against memory |
-
-condition_expr examples:
- `"needs_more_research == True"`
- `"str(next_action).lower() == 'new_agent'"`
- `"feedback is not None"`
-
-## Graph Lifecycle
+### Graph Lifecycle

 | Pattern | terminal_nodes | When |
 |---------|---------------|------|
-| **Continuous loop** | `["node-with-output-keys"]` | **DEFAULT for all agents** |
+| Continuous loop | `["node-with-output-keys"]` | DEFAULT for all agents |
 | Linear | `["last-node"]` | One-shot/batch agents |

-**Every graph must have at least one terminal node.** Terminal nodes
-define where execution ends. For interactive agents that loop continuously,
-mark the primary event_loop node as terminal (it has `output_keys` and can
-complete at any point). The framework default for `max_node_visits` is 0
-(unbounded), so nodes work correctly in continuous loops without explicit
-override. Only set `max_node_visits > 0` in one-shot agents with feedback loops.
-Every node must have at least one outgoing edge — no dead ends.
+Every graph must have at least one terminal node.

-## Continuous Conversation Mode
+### Continuous Conversation Mode

 `conversation_mode` has ONLY two valid states:
- `"continuous"` — recommended for interactive agents
- Omit entirely — isolated per-node conversations (each node starts fresh)
+- `"continuous"` -- recommended (context carries across node transitions)
+- Omit entirely -- isolated per-node conversations

-**INVALID values** (do NOT use): `"client_facing"`, `"interactive"`,
-`"adaptive"`, `"shared"`. These do not exist in the framework.
-
-When `conversation_mode="continuous"`:
- Same conversation thread carries across node transitions
- Layered system prompts: identity (agent-level) + narrative + focus (per-node)
- Transition markers inserted at boundaries
- Compaction happens opportunistically at phase transitions
+**INVALID values:** `"client_facing"`, `"interactive"`, `"shared"`.

 ## loop_config

 Only three valid keys:
-```python
-loop_config = {
-    "max_iterations": 100,          # Max LLM turns per node visit
-    "max_tool_calls_per_turn": 20,  # Max tool calls per LLM response
-    "max_context_tokens": 32000,    # Triggers conversation compaction
+```json
+{
+  "max_iterations": 100,
+  "max_tool_calls_per_turn": 20,
+  "max_context_tokens": 32000
 }
 ```
-**INVALID keys** (do NOT use): `"strategy"`, `"mode"`, `"timeout"`,
-`"temperature"`. These are silently ignored or cause errors.

 ## Data Tools (Spillover)

 For large data that exceeds context:
- `save_data(filename, data)` — Write to session data dir
- `load_data(filename, offset, limit)` — Read with pagination
- `list_data_files()` — List files
- `serve_file_to_user(filename, label)` — Clickable file:// URI
+- `save_data(filename, data)` -- write to session data dir
+- `load_data(filename, offset, limit)` -- read with pagination
+- `list_data_files()` -- list files
+- `serve_file_to_user(filename, label)` -- clickable file URI

-`data_dir` is auto-injected by framework — LLM never sees it.
+`data_dir` is auto-injected by framework.

 ## Fan-Out / Fan-In

-Multiple ON_SUCCESS edges from same source → parallel execution via asyncio.gather().
- Parallel nodes must have disjoint output_keys
- Only one branch may have client_facing nodes
- Fan-in node gets all outputs in the shared buffer
+Multiple `on_success` edges from same source = parallel execution.
+Parallel nodes must have disjoint output_keys.

 ## Judge System

 - **Implicit** (default): ACCEPTs when LLM finishes with no tool calls and all required outputs set
 - **SchemaJudge**: Validates against Pydantic model
- **Custom**: Implement `evaluate(context) -> JudgeVerdict`
-
-Judge is the SOLE acceptance mechanism — no ad-hoc framework gating.
-
-## Triggers (Timers, Webhooks)
-
-For agents that react to external events, create a `triggers.json` file
-in the agent's export directory:
-
-```json
-[
-  {
-    "id": "daily-check",
-    "name": "Daily Check",
-    "trigger_type": "timer",
-    "trigger_config": {"cron": "0 9 * * *"},
-    "task": "Run the daily check process"
-  }
-]
-```
-
-### Key Fields
- `trigger_type`: `"timer"` or `"webhook"`
- `trigger_config`: `{"cron": "0 9 * * *"}` or `{"interval_minutes": 20}`
- `task`: describes what the worker should do when the trigger fires
- Triggers can also be created/removed at runtime via `set_trigger` / `remove_trigger` queen tools

 ## Tool Discovery

-Do NOT rely on a static tool list — it will be outdated. Always call
-`list_agent_tools()` with NO arguments first to see ALL available tools.
-Only use `group=` or `output_schema=` as follow-up calls after seeing the
-full list.
+Always call `list_agent_tools()` first to see available tools.
+Do NOT rely on a static tool list.

 ```
-list_agent_tools()                            # ALWAYS call this first
-list_agent_tools(group="gmail", output_schema="full")  # then drill into a category
-list_agent_tools("exports/my_agent/mcp_servers.json")  # specific agent's tools
+list_agent_tools()                                      # full summary
+list_agent_tools(group="gmail", output_schema="full")   # drill into category
 ```

-After building, run `validate_agent_package("{name}")` to check everything at once.
-
-Common tool categories (verify via list_agent_tools):
- **Web**: search, scrape, PDF
- **Data**: save/load/append/list data files, serve to user
- **File**: view, write, replace, diff, list, grep
- **Communication**: email, gmail, slack, telegram
- **CRM**: hubspot, apollo, calcom
- **GitHub**: stargazers, user profiles, repos
- **Vision**: image analysis
- **Time**: current time
+After building, run `validate_agent_package("{name}")` to check everything.
@@ -1,158 +1,53 @@
-# GCU Browser Automation Guide
+# Browser Automation Guide

-## When to Use GCU Nodes
+## When to Use Browser Nodes

-Use `node_type="gcu"` when:
- The user's workflow requires **navigating real websites** (scraping, form-filling, social media interaction, testing web UIs)
- The task involves **dynamic/JS-rendered pages** that `web_scrape` cannot handle (SPAs, infinite scroll, login-gated content)
- The agent needs to **interact with a website** — clicking, typing, scrolling, selecting, uploading files
+Use browser nodes (with `tools: {policy: "all"}`) when:
+- The task requires interacting with web pages (clicking, typing, navigating)
+- No API is available for the target service
+- The user is already logged in to the target site

-Do NOT use GCU for:
- Static content that `web_scrape` handles fine
- API-accessible data (use the API directly)
- PDF/file processing
- Anything that doesn't require a browser UI
+## What Browser Nodes Are

-## What GCU Nodes Are
+- Regular `event_loop` nodes with browser tools from gcu-tools MCP server
+- Set `tools: {policy: "all"}` to give access to all browser tools
+- Wire into the graph with edges like any other node
+- No special node_type needed

- `node_type="gcu"` — a declarative enhancement over `event_loop`
- Framework auto-prepends browser best-practices system prompt
- Framework auto-includes all 31 browser tools from `gcu-tools` MCP server
- Same underlying `EventLoopNode` class — no new imports needed
- `tools=[]` is correct — tools are auto-populated at runtime
+## Available Browser Tools

-## GCU Architecture Pattern  
+All tools are prefixed with `browser_`:
+- `browser_start`, `browser_open` -- launch/navigate
+- `browser_click`, `browser_fill`, `browser_type` -- interact
+- `browser_snapshot` -- read page content (preferred over screenshot)
+- `browser_screenshot` -- visual capture
+- `browser_scroll`, `browser_wait` -- navigation helpers
+- `browser_evaluate` -- run JavaScript

-GCU nodes are **subagents** — invoked via `delegate_to_sub_agent()`, not connected via edges.
+## System Prompt Tips for Browser Nodes

- Primary nodes (`event_loop`, client-facing) orchestrate; GCU nodes do browser work
- Parent node declares `sub_agents=["gcu-node-id"]` and calls `delegate_to_sub_agent(agent_id="gcu-node-id", task="...")`
- GCU nodes set `max_node_visits=1` (single execution per delegation), `client_facing=False`
- GCU nodes use `output_keys=["result"]` and return structured JSON via `set_output("result", ...)`
-
-## GCU Node Definition Template
-
-```python
-gcu_browser_node = NodeSpec(
-    id="gcu-browser-worker",
-    name="Browser Worker",
-    description="Browser subagent that does X.",
-    node_type="gcu",
-    client_facing=False,
-    max_node_visits=1,
-    input_keys=[],
-    output_keys=["result"],
-    tools=[],  # Auto-populated with all browser tools
-    system_prompt="""\
-You are a browser agent. Your job: [specific task].
-
-## Workflow
-1. browser_start (only if no browser is running yet)
-2. browser_open(url=TARGET_URL) — note the returned targetId
-3. browser_snapshot to read the page
-4. [task-specific steps]
-5. set_output("result", JSON)
-
-## Output format
-set_output("result", JSON) with:
- [field]: [type and description]
-""",
-)
+```
+1. Use browser_snapshot() to read page content (NOT browser_get_text)
+2. Use browser_wait(seconds=2-3) after navigation for page load
+3. If you hit an auth wall, call set_output with an error and move on
+4. Keep tool calls per turn <= 10 for reliability
 ```

-## Parent Node Template (orchestrating GCU subagents)
-
-```python
-orchestrator_node = NodeSpec(
-    id="orchestrator",
-    ...
-    node_type="event_loop",
-    sub_agents=["gcu-browser-worker"],
-    system_prompt="""\
-...
-delegate_to_sub_agent(
-    agent_id="gcu-browser-worker",
-    task="Navigate to [URL]. Do [specific task]. Return JSON with [fields]."
-)
-...
-""",
-    tools=[],  # Orchestrator doesn't need browser tools
-)
-```
-
-## mcp_servers.json with GCU
+## Example

 ```json
 {
-  "hive-tools": { ... },
-  "gcu-tools": {
-    "transport": "stdio",
-    "command": "uv",
-    "args": ["run", "python", "-m", "gcu.server", "--stdio"],
-    "cwd": "../../tools",
-    "description": "GCU tools for browser automation"
-  }
+  "id": "scan-profiles",
+  "name": "Scan LinkedIn Profiles",
+  "description": "Navigate LinkedIn search results and collect profile data",
+  "tools": {"policy": "all"},
+  "input_keys": ["search_url"],
+  "output_keys": ["profiles"],
+  "system_prompt": "Navigate to the search URL, paginate through results..."
 }
 ```

-Note: `gcu-tools` is auto-added if any node uses `node_type="gcu"`, but including it explicitly is fine.
-
-## GCU System Prompt Best Practices
-
-Key rules to bake into GCU node prompts:
-
- Prefer `browser_snapshot` over `browser_get_text("body")` — compact accessibility tree vs 100KB+ raw HTML
- Always `browser_wait` after navigation
- Use large scroll amounts (~2000-5000) for lazy-loaded content
- For spillover files, use `run_command` with grep, not `read_file`
- If auth wall detected, report immediately — don't attempt login
- Keep tool calls per turn ≤10
- Tab isolation: when browser is already running, use `browser_open(background=true)` and pass `target_id` to every call
-
-## Multiple Concurrent GCU Subagents
-
-When a task can be parallelized across multiple sites or profiles, declare a distinct GCU
-node for each and invoke them all in the same LLM turn.  The framework batches all
-`delegate_to_sub_agent` calls made in one turn and runs them with `asyncio.gather`, so
-they execute concurrently — not sequentially.
-
-**Each GCU subagent automatically gets its own isolated browser context** — no `profile=`
-argument is needed in tool calls.  The framework derives a unique profile from the subagent's
-node ID and instance counter and injects it via an asyncio `ContextVar` before the subagent
-runs.
-
-### Example: three sites in parallel
-
-```python
-# Three distinct GCU nodes
-gcu_site_a = NodeSpec(id="gcu-site-a", node_type="gcu", ...)
-gcu_site_b = NodeSpec(id="gcu-site-b", node_type="gcu", ...)
-gcu_site_c = NodeSpec(id="gcu-site-c", node_type="gcu", ...)
-
-orchestrator = NodeSpec(
-    id="orchestrator",
-    node_type="event_loop",
-    sub_agents=["gcu-site-a", "gcu-site-b", "gcu-site-c"],
-    system_prompt="""\
-Call all three subagents in a single response to run them in parallel:
-  delegate_to_sub_agent(agent_id="gcu-site-a", task="Scrape prices from site A")
-  delegate_to_sub_agent(agent_id="gcu-site-b", task="Scrape prices from site B")
-  delegate_to_sub_agent(agent_id="gcu-site-c", task="Scrape prices from site C")
-""",
-)
+Connected via regular edges:
+```
+search-setup -> scan-profiles -> process-results
 ```
-
-**Rules:**
- Use distinct node IDs for each concurrent task — sharing an ID shares the browser context.
- The GCU node prompts do not need to mention `profile=`; isolation is automatic.
- Cleanup is automatic at session end, but GCU nodes can call `browser_stop()` explicitly
-  if they want to release resources mid-run.
-
-## GCU Anti-Patterns
-
- Using `browser_screenshot` to read text (use `browser_snapshot` instead; screenshots are for visual context only)
- Re-navigating after scrolling (resets scroll position)
- Attempting login on auth walls
- Forgetting `target_id` in multi-tab scenarios
- Putting browser tools directly on `event_loop` nodes instead of using GCU subagent pattern
- Making GCU nodes `client_facing=True` (they should be autonomous subagents)
@@ -2,7 +2,7 @@

 A lightweight side agent that runs after each queen LLM turn.  It inspects
 recent conversation messages and extracts durable user knowledge into
-individual memory files in ``~/.hive/queen/global_memory/``.
+individual memory files in ``~/.hive/memories/global/``.

 Two reflection types:
  - **Short reflection**: after conversational queen turns.  Distills
@@ -493,7 +493,7 @@ async def subscribe_reflection_triggers(
    Call this once during queen setup.  Returns a list of event-bus
    subscription IDs for cleanup during session teardown.
    """
-    from framework.runtime.event_bus import EventType
+    from framework.host.event_bus import EventType

    mem_dir = memory_dir or global_memory_dir()
    _lock = asyncio.Lock()
@@ -22,10 +22,10 @@ def mock_mode():

@pytest_asyncio.fixture(scope="session")
 async def runner(tmp_path_factory, mock_mode):
-    from framework.runner.runner import AgentRunner
+    from framework.loader.agent_loader import AgentLoader

    storage = tmp_path_factory.mktemp("agent_storage")
-    r = AgentRunner.load(AGENT_PATH, mock_mode=mock_mode, storage_path=storage)
+    r = AgentLoader.load(AGENT_PATH, mock_mode=mock_mode, storage_path=storage)
    r._setup()
    yield r
    await r.cleanup_async()
@@ -79,7 +79,7 @@ def main():
    subparsers = parser.add_subparsers(dest="command", required=True)

    # Register runner commands (run, info, validate, list, shell)
-    from framework.runner.cli import register_commands
+    from framework.loader.cli import register_commands

    register_commands(subparsers)

@@ -99,7 +99,7 @@ def main():
    register_debugger_commands(subparsers)

    # Register MCP registry commands (mcp install, mcp add, ...)
-    from framework.runner.mcp_registry_cli import register_mcp_commands
+    from framework.loader.mcp_registry_cli import register_mcp_commands

    register_mcp_commands(subparsers)

@@ -12,13 +12,47 @@ from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any

-from framework.graph.edge import DEFAULT_MAX_TOKENS
+from framework.orchestrator.edge import DEFAULT_MAX_TOKENS
+
+# ---------------------------------------------------------------------------
+# Hive home directory structure
+# ---------------------------------------------------------------------------
+
+HIVE_HOME = Path.home() / ".hive"
+QUEENS_DIR = HIVE_HOME / "agents" / "queens"
+COLONIES_DIR = HIVE_HOME / "colonies"
+MEMORIES_DIR = HIVE_HOME / "memories"
+
+
+def queen_dir(queen_name: str = "default") -> Path:
+    """Return the storage directory for a named queen agent."""
+    return QUEENS_DIR / queen_name
+
+
+def colony_dir(colony_name: str) -> Path:
+    """Return the directory for a named colony."""
+    return COLONIES_DIR / colony_name
+
+
+def memory_dir(scope: str, name: str | None = None) -> Path:
+    """Return memory dir for a scope.
+
+    Examples::
+
+        memory_dir("global")                  -> ~/.hive/memories/global
+        memory_dir("colonies", "my_agent")    -> ~/.hive/memories/colonies/my_agent
+        memory_dir("agents/queens", "default")-> ~/.hive/memories/agents/queens/default
+        memory_dir("agents", "worker_name")   -> ~/.hive/memories/agents/worker_name
+    """
+    base = MEMORIES_DIR / scope
+    return base / name if name else base
+

 # ---------------------------------------------------------------------------
 # Low-level config file access
 # ---------------------------------------------------------------------------

-HIVE_CONFIG_FILE = Path.home() / ".hive" / "configuration.json"
+HIVE_CONFIG_FILE = HIVE_HOME / "configuration.json"

 # Hive LLM router endpoint (Anthropic-compatible).
 # litellm's Anthropic handler appends /v1/messages, so this is just the base host.
@@ -130,7 +164,7 @@ def get_worker_api_key() -> str | None:
    # Worker-specific subscription / env var
    if worker_llm.get("use_claude_code_subscription"):
        try:
-            from framework.runner.runner import get_claude_code_token
+            from framework.loader.agent_loader import get_claude_code_token

            token = get_claude_code_token()
            if token:
@@ -140,7 +174,7 @@ def get_worker_api_key() -> str | None:

    if worker_llm.get("use_codex_subscription"):
        try:
-            from framework.runner.runner import get_codex_token
+            from framework.loader.agent_loader import get_codex_token

            token = get_codex_token()
            if token:
@@ -150,7 +184,7 @@ def get_worker_api_key() -> str | None:

    if worker_llm.get("use_kimi_code_subscription"):
        try:
-            from framework.runner.runner import get_kimi_code_token
+            from framework.loader.agent_loader import get_kimi_code_token

            token = get_kimi_code_token()
            if token:
@@ -160,7 +194,7 @@ def get_worker_api_key() -> str | None:

    if worker_llm.get("use_antigravity_subscription"):
        try:
-            from framework.runner.runner import get_antigravity_token
+            from framework.loader.agent_loader import get_antigravity_token

            token = get_antigravity_token()
            if token:
@@ -216,7 +250,7 @@ def get_worker_llm_extra_kwargs() -> dict[str, Any]:
                "User-Agent": "CodexBar",
            }
            try:
-                from framework.runner.runner import get_codex_account_id
+                from framework.loader.agent_loader import get_codex_account_id

                account_id = get_codex_account_id()
                if account_id:
@@ -263,22 +297,43 @@ def get_max_context_tokens() -> int:
    return get_hive_config().get("llm", {}).get("max_context_tokens", DEFAULT_MAX_CONTEXT_TOKENS)


+def get_api_keys() -> list[str] | None:
+    """Return a list of API keys if ``api_keys`` is configured, else ``None``.
+
+    This supports key-pool rotation: configure multiple keys in
+    ``~/.hive/configuration.json`` under ``llm.api_keys`` and the
+    :class:`~framework.llm.key_pool.KeyPool` will rotate through them.
+    """
+    llm = get_hive_config().get("llm", {})
+    keys = llm.get("api_keys")
+    if keys and isinstance(keys, list) and len(keys) > 0:
+        return [k for k in keys if k]  # filter empties
+    return None
+
+
 def get_api_key() -> str | None:
    """Return the API key, supporting env var, Claude Code subscription, Codex, and ZAI Code.

    Priority:
+    0. Explicit key pool (``api_keys`` list) -- returns first key for
+       single-key callers; full pool available via :func:`get_api_keys`.
    1. Claude Code subscription (``use_claude_code_subscription: true``)
       reads the OAuth token from ``~/.claude/.credentials.json``.
    2. Codex subscription (``use_codex_subscription: true``)
       reads the OAuth token from macOS Keychain or ``~/.codex/auth.json``.
    3. Environment variable named in ``api_key_env_var``.
    """
+    # If an explicit key pool is configured, use the first key.
+    pool_keys = get_api_keys()
+    if pool_keys:
+        return pool_keys[0]
+
    llm = get_hive_config().get("llm", {})

    # Claude Code subscription: read OAuth token directly
    if llm.get("use_claude_code_subscription"):
        try:
-            from framework.runner.runner import get_claude_code_token
+            from framework.loader.agent_loader import get_claude_code_token

            token = get_claude_code_token()
            if token:
@@ -289,7 +344,7 @@ def get_api_key() -> str | None:
    # Codex subscription: read OAuth token from Keychain / auth.json
    if llm.get("use_codex_subscription"):
        try:
-            from framework.runner.runner import get_codex_token
+            from framework.loader.agent_loader import get_codex_token

            token = get_codex_token()
            if token:
@@ -300,7 +355,7 @@ def get_api_key() -> str | None:
    # Kimi Code subscription: read API key from ~/.kimi/config.toml
    if llm.get("use_kimi_code_subscription"):
        try:
-            from framework.runner.runner import get_kimi_code_token
+            from framework.loader.agent_loader import get_kimi_code_token

            token = get_kimi_code_token()
            if token:
@@ -311,7 +366,7 @@ def get_api_key() -> str | None:
    # Antigravity subscription: read OAuth token from accounts JSON
    if llm.get("use_antigravity_subscription"):
        try:
-            from framework.runner.runner import get_antigravity_token
+            from framework.loader.agent_loader import get_antigravity_token

            token = get_antigravity_token()
            if token:
@@ -468,7 +523,7 @@ def get_llm_extra_kwargs() -> dict[str, Any]:
                "User-Agent": "CodexBar",
            }
            try:
-                from framework.runner.runner import get_codex_account_id
+                from framework.loader.agent_loader import get_codex_account_id

                account_id = get_codex_account_id()
                if account_id:
@@ -36,7 +36,7 @@ from pathlib import Path
 from typing import TYPE_CHECKING, Any

 if TYPE_CHECKING:
-    from framework.graph import NodeSpec
+    from framework.orchestrator import NodeSpec

 logger = logging.getLogger(__name__)

@@ -533,7 +533,9 @@ class CredentialSetupSession:


 def load_agent_nodes(agent_path: str | Path) -> list:
-    """Load NodeSpec list from an agent's agent.py or agent.json.
+    """Load NodeSpec list from an agent directory.
+
+    Checks agent.json (declarative) first, then agent.py (legacy).

    Args:
        agent_path: Path to agent directory.
@@ -542,16 +544,28 @@ def load_agent_nodes(agent_path: str | Path) -> list:
        List of NodeSpec objects (empty list if agent can't be loaded).
    """
    agent_path = Path(agent_path)
+    agent_json_file = agent_path / "agent.json"
    agent_py = agent_path / "agent.py"
-    agent_json = agent_path / "agent.json"

-    if agent_py.exists():
+    if agent_json_file.exists():
+        return _load_nodes_from_json_declarative(agent_json_file)
+    elif agent_py.exists():
        return _load_nodes_from_python_agent(agent_path)
-    elif agent_json.exists():
-        return _load_nodes_from_json_agent(agent_json)
    return []


+def _load_nodes_from_json_declarative(agent_json: Path) -> list:
+    """Load nodes from a declarative JSON agent."""
+    try:
+        from framework.loader.agent_loader import load_agent_config
+
+        data = json.loads(agent_json.read_text(encoding="utf-8"))
+        graph, _ = load_agent_config(data)
+        return list(graph.nodes)
+    except Exception:
+        return []
+
+
 def _load_nodes_from_python_agent(agent_path: Path) -> list:
    """Load nodes from a Python-based agent."""
    import importlib.util
@@ -590,7 +604,7 @@ def _load_nodes_from_json_agent(agent_json: Path) -> list:
        with open(agent_json, encoding="utf-8-sig") as f:
            data = json.load(f)

-        from framework.graph import NodeSpec
+        from framework.orchestrator import NodeSpec

        nodes_data = data.get("graph", {}).get("nodes", [])
        nodes = []
@@ -1,65 +0,0 @@
-"""Graph structures: Goals, Nodes, Edges, and Execution."""
-
-from framework.graph.context import GraphContext
-from framework.graph.context_handoff import ContextHandoff, HandoffContext
-from framework.graph.conversation import ConversationStore, Message, NodeConversation
-from framework.graph.edge import DEFAULT_MAX_TOKENS, EdgeCondition, EdgeSpec, GraphSpec
-from framework.graph.event_loop_node import (
-    EventLoopNode,
-    JudgeProtocol,
-    JudgeVerdict,
-    LoopConfig,
-    OutputAccumulator,
-)
-from framework.graph.executor import GraphExecutor
-from framework.graph.goal import Constraint, Goal, GoalStatus, SuccessCriterion
-from framework.graph.node import NodeContext, NodeProtocol, NodeResult, NodeSpec
-from framework.graph.worker_agent import (
-    Activation,
-    FanOutTag,
-    FanOutTracker,
-    WorkerAgent,
-    WorkerCompletion,
-    WorkerLifecycle,
-)
-
-__all__ = [
-    # Goal
-    "Goal",
-    "SuccessCriterion",
-    "Constraint",
-    "GoalStatus",
-    # Node
-    "NodeSpec",
-    "NodeContext",
-    "NodeResult",
-    "NodeProtocol",
-    # Edge
-    "EdgeSpec",
-    "EdgeCondition",
-    "GraphSpec",
-    "DEFAULT_MAX_TOKENS",
-    # Executor
-    "GraphExecutor",
-    # Conversation
-    "NodeConversation",
-    "ConversationStore",
-    "Message",
-    # Event Loop
-    "EventLoopNode",
-    "LoopConfig",
-    "OutputAccumulator",
-    "JudgeProtocol",
-    "JudgeVerdict",
-    # Context Handoff
-    "ContextHandoff",
-    "HandoffContext",
-    # Worker Agent
-    "WorkerAgent",
-    "WorkerLifecycle",
-    "WorkerCompletion",
-    "Activation",
-    "FanOutTag",
-    "FanOutTracker",
-    "GraphContext",
-]
@@ -1,6 +0,0 @@
-"""EventLoopNode subpackage — modular components of the event loop orchestrator.
-
-All public symbols are re-exported by the parent ``event_loop_node.py`` for
-backward compatibility.  Internal consumers may import directly from these
-submodules for clarity.
-"""
@@ -1,370 +0,0 @@
-"""Subagent execution for the event loop.
-
-Handles the full subagent lifecycle: validation, context setup, tool filtering,
-conversation store derivation, execution, and cleanup.
-"""
-
-from __future__ import annotations
-
-import json
-import logging
-import time
-from collections.abc import Awaitable, Callable
-from pathlib import Path
-from typing import TYPE_CHECKING, Any
-
-from framework.graph.conversation import ConversationStore
-from framework.graph.event_loop.judge_pipeline import SubagentJudge
-from framework.graph.event_loop.types import LoopConfig, OutputAccumulator
-from framework.graph.node import DataBuffer, NodeContext
-from framework.llm.provider import ToolResult, ToolUse
-from framework.runner.tool_registry import ToolRegistry
-from framework.runtime.event_bus import EventBus
-
-if TYPE_CHECKING:
-    from framework.graph.event_loop_node import EventLoopNode
-
-logger = logging.getLogger(__name__)
-
-
-async def execute_subagent(
-    ctx: NodeContext,
-    agent_id: str,
-    task: str,
-    *,
-    config: LoopConfig,
-    event_loop_node_cls: type[EventLoopNode],
-    escalation_receiver_cls: Callable[[], Any],
-    accumulator: OutputAccumulator | None = None,
-    event_bus: EventBus | None = None,
-    tool_executor: Callable[[ToolUse], ToolResult | Awaitable[ToolResult]] | None = None,
-    conversation_store: ConversationStore | None = None,
-    subagent_instance_counter: dict[str, int] | None = None,
-) -> ToolResult:
-    """Execute a subagent and return the result as a ToolResult.
-
-    The subagent:
-    - Gets a fresh conversation with just the task
-    - Has read-only access to the parent's readable memory
-    - Cannot delegate to its own subagents (prevents recursion)
-    - Returns its output in structured JSON format
-
-    Args:
-        ctx: Parent node's context (for memory, tools, LLM access).
-        agent_id: The node ID of the subagent to invoke.
-        task: The task description to give the subagent.
-        accumulator: Parent's OutputAccumulator.
-        event_bus: EventBus for lifecycle events.
-        config: LoopConfig for iteration/tool limits.
-        tool_executor: Tool executor callable.
-        conversation_store: Parent conversation store (for deriving subagent store).
-        subagent_instance_counter: Mutable counter dict for unique subagent paths.
-
-    Returns:
-        ToolResult with structured JSON output.
-    """
-    # Log subagent invocation start
-    logger.info(
-        "\n" + "=" * 60 + "\n"
-        "🤖 SUBAGENT INVOCATION\n"
-        "=" * 60 + "\n"
-        "Parent Node: %s\n"
-        "Subagent ID: %s\n"
-        "Task: %s\n" + "=" * 60,
-        ctx.node_id,
-        agent_id,
-        task[:500] + "..." if len(task) > 500 else task,
-    )
-
-    # 1. Validate agent exists in registry
-    if agent_id not in ctx.node_registry:
-        return ToolResult(
-            tool_use_id="",
-            content=json.dumps(
-                {
-                    "message": f"Sub-agent '{agent_id}' not found in registry",
-                    "data": None,
-                    "metadata": {"agent_id": agent_id, "success": False, "error": "not_found"},
-                }
-            ),
-            is_error=True,
-        )
-
-    subagent_spec = ctx.node_registry[agent_id]
-
-    # 2. Create read-only memory snapshot
-    parent_data = ctx.buffer.read_all()
-
-    # Merge in-flight outputs from the parent's accumulator.
-    if accumulator:
-        for key, value in accumulator.to_dict().items():
-            if key not in parent_data:
-                parent_data[key] = value
-
-    subagent_buffer = DataBuffer()
-    for key, value in parent_data.items():
-        subagent_buffer.write(key, value, validate=False)
-
-    read_keys = set(parent_data.keys()) | set(subagent_spec.input_keys or [])
-    scoped_buffer = subagent_buffer.with_permissions(
-        read_keys=list(read_keys),
-        write_keys=[],  # Read-only!
-    )
-
-    # 2b. Compute instance counter early so the callback and child context
-    # share the same stable node_id for this subagent invocation.
-    if subagent_instance_counter is not None:
-        subagent_instance_counter.setdefault(agent_id, 0)
-        subagent_instance_counter[agent_id] += 1
-        subagent_instance = str(subagent_instance_counter[agent_id])
-    else:
-        subagent_instance = "1"
-
-    if subagent_instance == "1":
-        sa_node_id = f"{ctx.node_id}:subagent:{agent_id}"
-    else:
-        sa_node_id = f"{ctx.node_id}:subagent:{agent_id}:{subagent_instance}"
-
-    # 2c. Set up report callback (one-way channel to parent / event bus)
-    subagent_reports: list[dict] = []
-
-    async def _report_callback(
-        message: str,
-        data: dict | None = None,
-        *,
-        wait_for_response: bool = False,
-    ) -> str | None:
-        subagent_reports.append({"message": message, "data": data, "timestamp": time.time()})
-        if event_bus:
-            await event_bus.emit_subagent_report(
-                stream_id=ctx.node_id,
-                node_id=sa_node_id,
-                subagent_id=agent_id,
-                message=message,
-                data=data,
-                execution_id=ctx.execution_id,
-            )
-
-        if not wait_for_response:
-            return None
-
-        if not event_bus:
-            logger.warning(
-                "Subagent '%s' requested user response but no event_bus available",
-                agent_id,
-            )
-            return None
-
-        # Create isolated receiver and register for input routing
-        import uuid
-
-        escalation_id = f"{ctx.node_id}:escalation:{uuid.uuid4().hex[:8]}"
-        receiver = escalation_receiver_cls()
-        registry = ctx.shared_node_registry
-
-        registry[escalation_id] = receiver
-        try:
-            await event_bus.emit_escalation_requested(
-                stream_id=ctx.stream_id or ctx.node_id,
-                node_id=escalation_id,
-                reason=f"Subagent report (wait_for_response) from {agent_id}",
-                context=message,
-                execution_id=ctx.execution_id,
-            )
-            # Block until queen responds
-            return await receiver.wait()
-        finally:
-            registry.pop(escalation_id, None)
-
-    # 3. Filter tools for subagent
-    subagent_tool_names = set(subagent_spec.tools or [])
-    tool_source = ctx.all_tools if ctx.all_tools else ctx.available_tools
-
-    # GCU auto-population
-    if subagent_spec.node_type == "gcu" and not subagent_tool_names:
-        subagent_tools = [t for t in tool_source if t.name != "delegate_to_sub_agent"]
-    else:
-        subagent_tools = [
-            t
-            for t in tool_source
-            if t.name in subagent_tool_names and t.name != "delegate_to_sub_agent"
-        ]
-
-    missing = subagent_tool_names - {t.name for t in subagent_tools}
-    if missing:
-        logger.warning(
-            "Subagent '%s' requested tools not found in catalog: %s",
-            agent_id,
-            sorted(missing),
-        )
-
-    logger.info(
-        "📦 Subagent '%s' configuration:\n"
-        "   - System prompt: %s\n"
-        "   - Tools available (%d): %s\n"
-        "   - Memory keys inherited: %s",
-        agent_id,
-        (subagent_spec.system_prompt[:200] + "...")
-        if subagent_spec.system_prompt and len(subagent_spec.system_prompt) > 200
-        else subagent_spec.system_prompt,
-        len(subagent_tools),
-        [t.name for t in subagent_tools],
-        list(parent_data.keys()),
-    )
-
-    # 4. Build subagent context
-    max_iter = min(config.max_iterations, 10)
-    subagent_ctx = NodeContext(
-        runtime=ctx.runtime,
-        node_id=sa_node_id,
-        node_spec=subagent_spec,
-        buffer=scoped_buffer,
-        input_data={"task": task, **parent_data},
-        llm=ctx.llm,
-        available_tools=subagent_tools,
-        goal_context=(
-            f"Your specific task: {task}\n\n"
-            f"COMPLETION REQUIREMENTS:\n"
-            f"When your task is done, you MUST call set_output() "
-            f"for each required key: {subagent_spec.output_keys}\n"
-            f"Alternatively, call report_to_parent(mark_complete=true) "
-            f"with your findings in message/data.\n"
-            + (
-                "Before finishing, call browser_close_finished() to clean up your browser tabs.\n"
-                if subagent_spec.node_type == "gcu"
-                else ""
-            )
-            + f"You have a maximum of {max_iter} turns to complete this task."
-        ),
-        goal=ctx.goal,
-        max_tokens=ctx.max_tokens,
-        runtime_logger=ctx.runtime_logger,
-        is_subagent_mode=True,  # Prevents nested delegation
-        report_callback=_report_callback,
-        node_registry={},  # Empty - no nested subagents
-        shared_node_registry=ctx.shared_node_registry,  # For escalation routing
-    )
-
-    # 5. Create and execute subagent EventLoopNode
-    subagent_conv_store = None
-    if conversation_store is not None:
-        from framework.storage.conversation_store import FileConversationStore
-
-        parent_base = getattr(conversation_store, "_base", None)
-        if parent_base is not None:
-            conversations_dir = parent_base.parent
-            subagent_dir_name = f"{agent_id}-{subagent_instance}"
-            subagent_store_path = conversations_dir / subagent_dir_name
-            subagent_conv_store = FileConversationStore(base_path=subagent_store_path)
-
-    # Derive a subagent-scoped spillover dir
-    subagent_spillover = None
-    if config.spillover_dir:
-        subagent_spillover = str(Path(config.spillover_dir) / agent_id / subagent_instance)
-
-    subagent_node = event_loop_node_cls(
-        event_bus=event_bus,
-        judge=SubagentJudge(task=task, max_iterations=max_iter),
-        config=LoopConfig(
-            max_iterations=max_iter,
-            max_tool_calls_per_turn=config.max_tool_calls_per_turn,
-            tool_call_overflow_margin=config.tool_call_overflow_margin,
-            max_context_tokens=config.max_context_tokens,
-            stall_detection_threshold=config.stall_detection_threshold,
-            max_tool_result_chars=config.max_tool_result_chars,
-            spillover_dir=subagent_spillover,
-        ),
-        tool_executor=tool_executor,
-        conversation_store=subagent_conv_store,
-    )
-
-    # Each subagent instance gets its own unique browser profile so concurrent
-    # subagents don't share tab groups. The profile is set as execution context
-    # so the tool registry auto-injects it into every browser_* MCP tool call.
-    _gcu_profile = f"{agent_id}:{subagent_instance}"
-    _profile_token = ToolRegistry.set_execution_context(profile=_gcu_profile)
-
-    try:
-        logger.info("🚀 Starting subagent '%s' execution...", agent_id)
-        start_time = time.time()
-        result = await subagent_node.execute(subagent_ctx)
-        latency_ms = int((time.time() - start_time) * 1000)
-
-        separator = "-" * 60
-        logger.info(
-            "\n%s\n"
-            "✅ SUBAGENT '%s' COMPLETED\n"
-            "%s\n"
-            "Success: %s\n"
-            "Latency: %dms\n"
-            "Tokens used: %s\n"
-            "Output keys: %s\n"
-            "%s",
-            separator,
-            agent_id,
-            separator,
-            result.success,
-            latency_ms,
-            result.tokens_used,
-            list(result.output.keys()) if result.output else [],
-            separator,
-        )
-
-        result_json = {
-            "message": (
-                f"Sub-agent '{agent_id}' completed successfully"
-                if result.success
-                else f"Sub-agent '{agent_id}' failed: {result.error}"
-            ),
-            "data": result.output,
-            "reports": subagent_reports if subagent_reports else None,
-            "metadata": {
-                "agent_id": agent_id,
-                "success": result.success,
-                "tokens_used": result.tokens_used,
-                "latency_ms": latency_ms,
-                "report_count": len(subagent_reports),
-            },
-        }
-
-        return ToolResult(
-            tool_use_id="",
-            content=json.dumps(result_json, indent=2, default=str),
-            is_error=not result.success,
-        )
-
-    except Exception as e:
-        logger.exception(
-            "\n" + "!" * 60 + "\n❌ SUBAGENT '%s' FAILED\nError: %s\n" + "!" * 60,
-            agent_id,
-            str(e),
-        )
-        result_json = {
-            "message": f"Sub-agent '{agent_id}' raised exception: {e}",
-            "data": None,
-            "metadata": {
-                "agent_id": agent_id,
-                "success": False,
-                "error": str(e),
-            },
-        }
-        return ToolResult(
-            tool_use_id="",
-            content=json.dumps(result_json, indent=2),
-            is_error=True,
-        )
-    finally:
-        ToolRegistry.reset_execution_context(_profile_token)
-        # Close the tab group this subagent created, if any.
-        try:
-            from gcu.browser.bridge import get_bridge
-            from gcu.browser.tools.lifecycle import _contexts
-
-            bridge = get_bridge()
-            ctx_entry = _contexts.pop(_gcu_profile, None)
-            if bridge and bridge.is_connected and ctx_entry:
-                group_id = ctx_entry.get("groupId")
-                if group_id is not None:
-                    await bridge.destroy_context(group_id)
-        except Exception:
-            pass
@@ -0,0 +1,11 @@
+"""Host layer -- how agents are triggered and hosted."""
+
+from framework.host.agent_host import (  # noqa: F401
+    AgentHost,
+    AgentRuntimeConfig,
+)
+from framework.host.event_bus import AgentEvent, EventBus, EventType  # noqa: F401
+from framework.host.execution_manager import (  # noqa: F401
+    EntryPointSpec,
+    ExecutionManager,
+)
@@ -18,18 +18,18 @@ from dataclasses import dataclass, field
 from datetime import datetime
 from typing import TYPE_CHECKING, Any

-from framework.graph.checkpoint_config import CheckpointConfig
-from framework.graph.executor import ExecutionResult, GraphExecutor
-from framework.runtime.event_bus import EventBus
-from framework.runtime.shared_state import IsolationLevel, SharedBufferManager
-from framework.runtime.stream_runtime import StreamRuntime, StreamRuntimeAdapter
+from framework.orchestrator.checkpoint_config import CheckpointConfig
+from framework.orchestrator.orchestrator import ExecutionResult, Orchestrator
+from framework.host.event_bus import EventBus
+from framework.host.shared_state import IsolationLevel, SharedBufferManager
+from framework.host.stream_runtime import StreamDecisionTracker, StreamRuntimeAdapter

 if TYPE_CHECKING:
-    from framework.graph.edge import GraphSpec
-    from framework.graph.goal import Goal
+    from framework.orchestrator.edge import GraphSpec
+    from framework.orchestrator.goal import Goal
    from framework.llm.provider import LLMProvider, Tool
-    from framework.runtime.event_bus import AgentEvent
-    from framework.runtime.outcome_aggregator import OutcomeAggregator
+    from framework.host.event_bus import AgentEvent
+    from framework.host.outcome_aggregator import OutcomeAggregator
    from framework.storage.concurrent import ConcurrentStorage
    from framework.storage.session_store import SessionStore

@@ -133,7 +133,7 @@ class ExecutionContext:
    status: str = "pending"  # pending, running, completed, failed, paused


-class ExecutionStream:
+class ExecutionManager:
    """
    Manages concurrent executions for a single entry point.

@@ -262,7 +262,7 @@ class ExecutionStream:
            )

        # Create stream-scoped runtime
-        self._runtime = StreamRuntime(
+        self._runtime = StreamDecisionTracker(
            stream_id=stream_id,
            storage=storage,
            outcome_aggregator=outcome_aggregator,
@@ -271,7 +271,7 @@ class ExecutionStream:
        # Execution tracking
        self._active_executions: dict[str, ExecutionContext] = {}
        self._execution_tasks: dict[str, asyncio.Task] = {}
-        self._active_executors: dict[str, GraphExecutor] = {}
+        self._active_executors: dict[str, Orchestrator] = {}
        self._cancel_reasons: dict[str, str] = {}
        self._execution_results: OrderedDict[str, ExecutionResult] = OrderedDict()
        self._execution_result_times: dict[str, float] = {}
@@ -301,7 +301,7 @@ class ExecutionStream:

        # Emit stream started event
        if self._scoped_event_bus:
-            from framework.runtime.event_bus import AgentEvent, EventType
+            from framework.host.event_bus import AgentEvent, EventType

            await self._scoped_event_bus.publish(
                AgentEvent(
@@ -426,7 +426,7 @@ class ExecutionStream:

        # Emit stream stopped event
        if self._scoped_event_bus:
-            from framework.runtime.event_bus import AgentEvent, EventType
+            from framework.host.event_bus import AgentEvent, EventType

            await self._scoped_event_bus.publish(
                AgentEvent(
@@ -668,7 +668,7 @@ class ExecutionStream:
                # Create per-execution runtime logger
                runtime_logger = None
                if self._runtime_log_store:
-                    from framework.runtime.runtime_logger import RuntimeLogger
+                    from framework.tracker.runtime_logger import RuntimeLogger

                    runtime_logger = RuntimeLogger(
                        store=self._runtime_log_store, agent_id=self.graph.id
@@ -697,12 +697,7 @@ class ExecutionStream:
                # forward so the next attempt resumes at the failed node.
                while True:
                    # Create executor for this execution.
-                    # Each execution gets its own storage under sessions/{exec_id}/
-                    # so conversations, spillover, and data files are all scoped
-                    # to this execution.  The executor sets data_dir via execution
-                    # context (contextvars) so data tools and spillover share the
-                    # same session-scoped directory.
-                    executor = GraphExecutor(
+                    executor = Orchestrator(
                        runtime=runtime_adapter,
                        llm=self._llm,
                        tools=self._tools,
@@ -763,7 +758,7 @@ class ExecutionStream:

                        # Emit resurrection event
                        if self._scoped_event_bus:
-                            from framework.runtime.event_bus import AgentEvent, EventType
+                            from framework.host.event_bus import AgentEvent, EventType

                            await self._scoped_event_bus.publish(
                                AgentEvent(
@@ -1119,7 +1114,7 @@ class ExecutionStream:
        Each stream only executes from its own entry_node, but the full
        graph must validate with all entry points accounted for.
        """
-        from framework.graph.edge import GraphSpec
+        from framework.orchestrator.edge import GraphSpec

        # Merge entry points: this stream's entry + original graph's primary
        # entry + any other entry points. This ensures all nodes are
@@ -14,8 +14,8 @@ from typing import TYPE_CHECKING, Any
 from framework.schemas.decision import Decision, Outcome

 if TYPE_CHECKING:
-    from framework.graph.goal import Goal
-    from framework.runtime.event_bus import EventBus
+    from framework.orchestrator.goal import Goal
+    from framework.host.event_bus import EventBus

 logger = logging.getLogger(__name__)

@@ -18,12 +18,12 @@ from framework.schemas.run import Run, RunStatus
 from framework.storage.concurrent import ConcurrentStorage

 if TYPE_CHECKING:
-    from framework.runtime.outcome_aggregator import OutcomeAggregator
+    from framework.host.outcome_aggregator import OutcomeAggregator

 logger = logging.getLogger(__name__)


-class StreamRuntime:
+class StreamDecisionTracker:
    """
    Thread-safe runtime for a single execution stream.

@@ -431,7 +431,7 @@ class StreamRuntimeAdapter:
    by providing the same API as Runtime but routing to a specific execution.
    """

-    def __init__(self, stream_runtime: StreamRuntime, execution_id: str):
+    def __init__(self, stream_runtime: StreamDecisionTracker, execution_id: str):
        """
        Create adapter for a specific execution.

@@ -13,7 +13,7 @@ from dataclasses import dataclass

 from aiohttp import web

-from framework.runtime.event_bus import EventBus
+from framework.host.event_bus import EventBus

 logger = logging.getLogger(__name__)

@@ -0,0 +1,101 @@
+"""Thread-safe API key pool with round-robin rotation and health tracking.
+
+When multiple API keys are configured, the pool rotates through them on each
+request.  Keys that hit rate limits are temporarily cooled-down so the next
+call automatically uses a healthy key -- no sleep required.
+"""
+
+from __future__ import annotations
+
+import logging
+import threading
+import time
+from dataclasses import dataclass
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class KeyHealth:
+    """Per-key health counters."""
+
+    rate_limited_until: float = 0.0  # monotonic timestamp
+    consecutive_errors: int = 0
+    total_requests: int = 0
+    total_successes: int = 0
+
+
+class KeyPool:
+    """Round-robin key pool with health tracking.
+
+    Thread-safe: all mutations protected by a lock so concurrent LLM calls
+    (e.g. parallel tool execution in EventLoopNode) don't race.
+    """
+
+    def __init__(self, keys: list[str]) -> None:
+        if not keys:
+            raise ValueError("KeyPool requires at least one key")
+        self._keys = list(keys)
+        self._index = 0
+        self._health: dict[str, KeyHealth] = {k: KeyHealth() for k in keys}
+        self._lock = threading.Lock()
+
+    @property
+    def size(self) -> int:
+        return len(self._keys)
+
+    def get_key(self) -> str:
+        """Return the next healthy key (round-robin).
+
+        If every key is currently rate-limited, returns the one whose cooldown
+        expires soonest so the caller can proceed with minimal delay.
+        """
+        with self._lock:
+            now = time.monotonic()
+            for _ in range(len(self._keys)):
+                key = self._keys[self._index]
+                self._index = (self._index + 1) % len(self._keys)
+                health = self._health[key]
+                if health.rate_limited_until <= now:
+                    health.total_requests += 1
+                    return key
+            # All rate-limited -- pick the one that expires soonest.
+            soonest = min(self._keys, key=lambda k: self._health[k].rate_limited_until)
+            self._health[soonest].total_requests += 1
+            return soonest
+
+    def mark_rate_limited(self, key: str, retry_after: float = 60.0) -> None:
+        """Mark *key* as rate-limited for *retry_after* seconds."""
+        with self._lock:
+            health = self._health.get(key)
+            if health:
+                health.rate_limited_until = time.monotonic() + retry_after
+                health.consecutive_errors += 1
+                logger.info(
+                    "[key-pool] Key ...%s rate-limited for %.0fs (errors=%d)",
+                    key[-6:],
+                    retry_after,
+                    health.consecutive_errors,
+                )
+
+    def mark_success(self, key: str) -> None:
+        """Record a successful call on *key*."""
+        with self._lock:
+            health = self._health.get(key)
+            if health:
+                health.consecutive_errors = 0
+                health.total_successes += 1
+
+    def get_stats(self) -> dict[str, dict]:
+        """Return health stats keyed by the last 6 chars of each key."""
+        with self._lock:
+            now = time.monotonic()
+            return {
+                f"...{k[-6:]}": {
+                    "healthy": self._health[k].rate_limited_until <= now,
+                    "requests": self._health[k].total_requests,
+                    "successes": self._health[k].total_successes,
+                    "consecutive_errors": self._health[k].consecutive_errors,
+                }
+                for k in self._keys
+            }
@@ -7,6 +7,8 @@ Groq, and local models.
 See: https://docs.litellm.ai/docs/providers
 """

+from __future__ import annotations
+
 import ast
 import asyncio
 import hashlib
@@ -18,7 +20,10 @@ import time
 from collections.abc import AsyncIterator
 from datetime import datetime
 from pathlib import Path
-from typing import Any
+from typing import TYPE_CHECKING, Any
+
+if TYPE_CHECKING:
+    from framework.llm.key_pool import KeyPool

 try:
    import litellm
@@ -561,6 +566,7 @@ class LiteLLMProvider(LLMProvider):
        model: str = "gpt-4o-mini",
        api_key: str | None = None,
        api_base: str | None = None,
+        api_keys: list[str] | None = None,
        **kwargs: Any,
    ):
        """
@@ -573,6 +579,9 @@ class LiteLLMProvider(LLMProvider):
                     look for the appropriate env var (OPENAI_API_KEY,
                     ANTHROPIC_API_KEY, etc.)
            api_base: Custom API base URL (for proxies or local deployments)
+            api_keys: Optional list of API keys for key-pool rotation. When
+                      provided with 2+ keys, a :class:`KeyPool` is created and
+                      keys are rotated on rate-limit errors.
            **kwargs: Additional arguments passed to litellm.completion()
        """
        # Kimi For Coding exposes an Anthropic-compatible endpoint at
@@ -594,11 +603,24 @@ class LiteLLMProvider(LLMProvider):
            if api_base and api_base.rstrip("/").endswith("/v1"):
                api_base = api_base.rstrip("/")[:-3]
        self.model = model
-        self.api_key = api_key
+        # Key pool: when multiple keys are provided, enable rotation.
+        self._key_pool: KeyPool | None = None
+        if api_keys and len(api_keys) > 1:
+            from framework.llm.key_pool import KeyPool
+
+            self._key_pool = KeyPool(api_keys)
+            self.api_key = api_keys[0]  # default for OAuth detection below
+            logger.info(
+                "[litellm] Key pool enabled with %d keys for model %s",
+                len(api_keys),
+                model,
+            )
+        else:
+            self.api_key = api_key or (api_keys[0] if api_keys else None)
        self.api_base = api_base or self._default_api_base_for_model(_original_model)
        self.extra_kwargs = kwargs
        # Detect Claude Code OAuth subscription by checking the api_key prefix.
-        self._claude_code_oauth = bool(api_key and api_key.startswith("sk-ant-oat"))
+        self._claude_code_oauth = bool(self.api_key and self.api_key.startswith("sk-ant-oat"))
        if self._claude_code_oauth:
            # Anthropic requires a specific User-Agent for OAuth requests.
            eh = self.extra_kwargs.setdefault("extra_headers", {})
@@ -669,10 +691,20 @@ class LiteLLMProvider(LLMProvider):
    def _completion_with_rate_limit_retry(
        self, max_retries: int | None = None, **kwargs: Any
    ) -> Any:
-        """Call litellm.completion with retry on 429 rate limit errors and empty responses."""
+        """Call litellm.completion with retry on 429 rate limit errors and empty responses.
+
+        When a :class:`KeyPool` is configured, rate-limited keys are rotated
+        automatically so the next attempt uses a different key -- no sleep
+        needed between attempts.
+        """
        model = kwargs.get("model", self.model)
        retries = max_retries if max_retries is not None else RATE_LIMIT_MAX_RETRIES
        for attempt in range(retries + 1):
+            # Rotate key from pool when available.
+            current_key: str | None = None
+            if self._key_pool:
+                current_key = self._key_pool.get_key()
+                kwargs["api_key"] = current_key
            try:
                response = litellm.completion(**kwargs)  # type: ignore[union-attr]

@@ -747,8 +779,22 @@ class LiteLLMProvider(LLMProvider):
                    time.sleep(wait)
                    continue

+                if self._key_pool and current_key:
+                    self._key_pool.mark_success(current_key)
                return response
            except RateLimitError as e:
+                # Key pool: mark the offending key and rotate immediately.
+                if self._key_pool and current_key:
+                    self._key_pool.mark_rate_limited(current_key, retry_after=60.0)
+                    # When we have other healthy keys, skip the sleep -- the
+                    # next iteration will pick a different key automatically.
+                    if attempt < retries:
+                        logger.info(
+                            "[retry] Key pool rotating away from ...%s on 429",
+                            current_key[-6:],
+                        )
+                        continue
+
                # Dump full request to file for debugging
                messages = kwargs.get("messages", [])
                token_count, token_method = _estimate_tokens(model, messages)
@@ -761,7 +807,7 @@ class LiteLLMProvider(LLMProvider):
                if attempt == retries:
                    logger.error(
                        f"[retry] GAVE UP on {model} after {retries + 1} "
-                        f"attempts — rate limit error: {e!s}. "
+                        f"attempts -- rate limit error: {e!s}. "
                        f"~{token_count} tokens ({token_method}). "
                        f"Full request dumped to: {dump_path}"
                    )
@@ -880,10 +926,16 @@ class LiteLLMProvider(LLMProvider):
        """Async version of _completion_with_rate_limit_retry.

        Uses litellm.acompletion and asyncio.sleep instead of blocking calls.
+        When a :class:`KeyPool` is configured, rate-limited keys are rotated.
        """
        model = kwargs.get("model", self.model)
        retries = max_retries if max_retries is not None else RATE_LIMIT_MAX_RETRIES
        for attempt in range(retries + 1):
+            # Rotate key from pool when available.
+            current_key: str | None = None
+            if self._key_pool:
+                current_key = self._key_pool.get_key()
+                kwargs["api_key"] = current_key
            try:
                response = await litellm.acompletion(**kwargs)  # type: ignore[union-attr]

@@ -952,8 +1004,20 @@ class LiteLLMProvider(LLMProvider):
                    await asyncio.sleep(wait)
                    continue

+                if self._key_pool and current_key:
+                    self._key_pool.mark_success(current_key)
                return response
            except RateLimitError as e:
+                # Key pool: mark the offending key and rotate immediately.
+                if self._key_pool and current_key:
+                    self._key_pool.mark_rate_limited(current_key, retry_after=60.0)
+                    if attempt < retries:
+                        logger.info(
+                            "[async-retry] Key pool rotating away from ...%s on 429",
+                            current_key[-6:],
+                        )
+                        continue
+
                messages = kwargs.get("messages", [])
                token_count, token_method = _estimate_tokens(model, messages)
                dump_path = _dump_failed_request(
@@ -965,7 +1029,7 @@ class LiteLLMProvider(LLMProvider):
                if attempt == retries:
                    logger.error(
                        f"[async-retry] GAVE UP on {model} after {retries + 1} "
-                        f"attempts — rate limit error: {e!s}. "
+                        f"attempts -- rate limit error: {e!s}. "
                        f"~{token_count} tokens ({token_method}). "
                        f"Full request dumped to: {dump_path}"
                    )
@@ -0,0 +1,4 @@
+"""Loader layer -- agent loading from disk (JSON config, MCP, credentials)."""
+
+from framework.loader.agent_loader import AgentLoader  # noqa: F401
+from framework.loader.tool_registry import ToolRegistry  # noqa: F401
@@ -13,21 +13,20 @@ from framework.config import get_hive_config, get_max_context_tokens, get_prefer
 from framework.credentials.validation import (
    ensure_credential_key_env as _ensure_credential_key_env,
 )
-from framework.graph import Goal
-from framework.graph.edge import (
+from framework.orchestrator import Goal
+from framework.orchestrator.edge import (
    DEFAULT_MAX_TOKENS,
    EdgeCondition,
    EdgeSpec,
    GraphSpec,
 )
-from framework.graph.executor import ExecutionResult
-from framework.graph.node import NodeSpec
+from framework.orchestrator.orchestrator import ExecutionResult
+from framework.orchestrator.node import NodeSpec
 from framework.llm.provider import LLMProvider, Tool
-from framework.runner.preload_validation import run_preload_validation
-from framework.runner.tool_registry import ToolRegistry
-from framework.runtime.agent_runtime import AgentRuntime, AgentRuntimeConfig, create_agent_runtime
-from framework.runtime.execution_stream import EntryPointSpec
-from framework.runtime.runtime_log_store import RuntimeLogStore
+from framework.loader.preload_validation import run_preload_validation
+from framework.loader.tool_registry import ToolRegistry
+from framework.host.agent_host import AgentHost, AgentRuntimeConfig
+from framework.host.execution_manager import EntryPointSpec
 from framework.tools.flowchart_utils import generate_fallback_flowchart

 logger = logging.getLogger(__name__)
@@ -881,6 +880,172 @@ class ValidationResult:
    missing_credentials: list[str] = field(default_factory=list)


+def _resolve_template_vars(text: str | None, variables: dict[str, str]) -> str | None:
+    """Resolve ``{{variable_name}}`` placeholders in *text*."""
+    if text is None or not variables:
+        return text
+    import re
+
+    def _replace(m: re.Match) -> str:
+        key = m.group(1).strip()
+        return variables.get(key, m.group(0))
+
+    return re.sub(r"\{\{(.+?)\}\}", _replace, text)
+
+
+def load_agent_config(data: str | dict) -> tuple[GraphSpec, Goal]:
+    """Load ``GraphSpec`` and ``Goal`` from a declarative :class:`AgentConfig`.
+
+    The declarative format uses a ``name`` key at the top level, unlike the
+    legacy export format which uses ``graph``/``goal`` keys.  The runner
+    auto-detects the format in :meth:`AgentLoader.load`.
+
+    Template variables in ``config.variables`` are resolved in all
+    ``system_prompt`` and ``identity_prompt`` fields via ``{{var_name}}``.
+
+    Returns:
+        Tuple of (GraphSpec, Goal)
+    """
+    from framework.orchestrator.edge import EdgeCondition, EdgeSpec
+    from framework.orchestrator.goal import Constraint, Goal as GoalModel, SuccessCriterion
+    from framework.schemas.agent_config import AgentConfig
+
+    if isinstance(data, str):
+        data = json.loads(data)
+
+    config = AgentConfig.model_validate(data)
+    tvars = config.variables
+
+    # Build Goal
+    success_criteria = [
+        SuccessCriterion(
+            id=f"sc-{i}",
+            description=sc,
+            metric="llm_judge",
+            target="",
+        )
+        for i, sc in enumerate(config.goal.success_criteria)
+    ]
+    constraints = [
+        Constraint(
+            id=f"c-{i}",
+            description=c,
+            constraint_type="hard",
+            category="general",
+        )
+        for i, c in enumerate(config.goal.constraints)
+    ]
+    goal = GoalModel(
+        id=f"{config.name}-goal",
+        name=config.name,
+        description=config.goal.description,
+        success_criteria=success_criteria,
+        constraints=constraints,
+    )
+
+    # Build nodes
+    condition_map = {
+        "always": EdgeCondition.ALWAYS,
+        "on_success": EdgeCondition.ON_SUCCESS,
+        "on_failure": EdgeCondition.ON_FAILURE,
+        "conditional": EdgeCondition.CONDITIONAL,
+        "llm_decide": EdgeCondition.LLM_DECIDE,
+    }
+
+    nodes = []
+    for nc in config.nodes:
+        # Resolve tool access: node-level config -> agent-level fallback
+        if nc.tools.policy == "explicit" and nc.tools.allowed:
+            tools_list = nc.tools.allowed
+            tool_policy = "explicit"
+        elif nc.tools.policy == "none":
+            tools_list = []
+            tool_policy = "none"
+        elif nc.tools.policy == "all":
+            tools_list = []
+            tool_policy = "all"
+        else:
+            # Inherit agent-level tool config
+            if config.tools.policy == "explicit" and config.tools.allowed:
+                tools_list = config.tools.allowed
+            else:
+                tools_list = []
+            tool_policy = config.tools.policy
+
+        node_kwargs: dict = {
+            "id": nc.id,
+            "name": nc.name or nc.id,
+            "description": nc.description or "",
+            "node_type": nc.node_type,
+            "system_prompt": _resolve_template_vars(nc.system_prompt, tvars),
+            "tools": tools_list,
+            "tool_access_policy": tool_policy,
+            "model": nc.model,
+            "input_keys": nc.input_keys,
+            "output_keys": nc.output_keys,
+            "nullable_output_keys": nc.nullable_output_keys,
+            "max_iterations": nc.max_iterations,
+            "success_criteria": nc.success_criteria,
+            "skip_judge": nc.skip_judge,
+        }
+        # Optional fields -- only pass when set (avoids overriding defaults)
+        if nc.client_facing:
+            node_kwargs["client_facing"] = nc.client_facing
+        if nc.max_node_visits != 1:
+            node_kwargs["max_node_visits"] = nc.max_node_visits
+        if nc.failure_criteria:
+            node_kwargs["failure_criteria"] = nc.failure_criteria
+        if nc.max_retries is not None:
+            node_kwargs["max_retries"] = nc.max_retries
+
+        nodes.append(NodeSpec(**node_kwargs))
+
+    # Build edges
+    edges = []
+    for i, ec in enumerate(config.edges):
+        edges.append(
+            EdgeSpec(
+                id=f"e-{i}-{ec.from_node}-{ec.to_node}",
+                source=ec.from_node,
+                target=ec.to_node,
+                condition=condition_map.get(ec.condition, EdgeCondition.ON_SUCCESS),
+                condition_expr=ec.condition_expr,
+                priority=ec.priority,
+                input_mapping=ec.input_mapping,
+            )
+        )
+
+    # Build entry_points dict for GraphSpec
+    entry_points_dict: dict = {}
+    if config.entry_points:
+        for ep in config.entry_points:
+            entry_points_dict[ep.id] = ep.entry_node or config.entry_node
+    else:
+        entry_points_dict = {"default": config.entry_node}
+
+    # Build GraphSpec
+    graph_kwargs: dict = {
+        "id": f"{config.name}-graph",
+        "goal_id": goal.id,
+        "version": config.version,
+        "entry_node": config.entry_node,
+        "entry_points": entry_points_dict,
+        "terminal_nodes": config.terminal_nodes,
+        "pause_nodes": config.pause_nodes,
+        "nodes": nodes,
+        "edges": edges,
+        "max_tokens": config.max_tokens,
+        "loop_config": dict(config.loop_config),
+        "conversation_mode": config.conversation_mode,
+        "identity_prompt": _resolve_template_vars(
+            config.identity_prompt, tvars
+        ) or "",
+    }
+
+    graph = GraphSpec(**graph_kwargs)
+    return graph, goal
+
+
 def load_agent_export(data: str | dict) -> tuple[GraphSpec, Goal]:
    """
    Load GraphSpec and Goal from export_graph() output.
@@ -942,7 +1107,7 @@ def load_agent_export(data: str | dict) -> tuple[GraphSpec, Goal]:
    )

    # Build Goal
-    from framework.graph.goal import Constraint, SuccessCriterion
+    from framework.orchestrator.goal import Constraint, SuccessCriterion

    success_criteria = []
    for sc_data in goal_data.get("success_criteria", []):
@@ -979,7 +1144,7 @@ def load_agent_export(data: str | dict) -> tuple[GraphSpec, Goal]:
    return graph, goal


-class AgentRunner:
+class AgentLoader:
    """
    Loads and runs exported agents with minimal boilerplate.

@@ -991,15 +1156,15 @@ class AgentRunner:

    Usage:
        # Simple usage
-        runner = AgentRunner.load("exports/outbound-sales-agent")
+        runner = AgentLoader.load("exports/outbound-sales-agent")
        result = await runner.run({"lead_id": "123"})

        # With context manager
-        async with AgentRunner.load("exports/outbound-sales-agent") as runner:
+        async with AgentLoader.load("exports/outbound-sales-agent") as runner:
            result = await runner.run({"lead_id": "123"})

        # With custom tools
-        runner = AgentRunner.load("exports/outbound-sales-agent")
+        runner = AgentLoader.load("exports/outbound-sales-agent")
        runner.register_tool("my_tool", my_tool_func)
        result = await runner.run({"lead_id": "123"})
    """
@@ -1027,7 +1192,7 @@ class AgentRunner:
        credential_store: Any | None = None,
    ):
        """
-        Initialize the runner (use AgentRunner.load() instead).
+        Initialize the runner (use AgentLoader.load() instead).

        Args:
            agent_path: Path to agent folder
@@ -1082,7 +1247,7 @@ class AgentRunner:
        self._approval_callback: Callable | None = None

        # AgentRuntime — unified execution path for all agents
-        self._agent_runtime: AgentRuntime | None = None
+        self._agent_runtime: AgentHost | None = None
        # Pre-load validation: structural checks + credentials.
        # Fails fast with actionable guidance — no MCP noise on screen.
        run_preload_validation(
@@ -1101,14 +1266,7 @@ class AgentRunner:
        os.environ["HIVE_AGENT_NAME"] = agent_path.name
        os.environ["HIVE_STORAGE_PATH"] = str(self._storage_path)

-        # Auto-discover MCP servers from mcp_servers.json
-        mcp_config_path = agent_path / "mcp_servers.json"
-        if mcp_config_path.exists():
-            self._load_mcp_servers_from_config(mcp_config_path)
-
-        # Auto-discover registry-selected MCP servers from mcp_registry.json
-        self._load_registry_mcp_servers(agent_path)
-
+        # MCP tools are loaded by McpRegistryStage in the pipeline during AgentHost.start()
    @staticmethod
    def _import_agent_module(agent_path: Path):
        """Import an agent package from its directory path.
@@ -1158,7 +1316,7 @@ class AgentRunner:
        interactive: bool = True,
        skip_credential_validation: bool | None = None,
        credential_store: Any | None = None,
-    ) -> "AgentRunner":
+    ) -> "AgentLoader":
        """
        Load an agent from an export folder.

@@ -1299,21 +1457,22 @@ class AgentRunner:
            runner._agent_skills = agent_skills
            return runner

-        # Fallback: load from agent.json (legacy JSON-based agents)
+        # Fallback: load from agent.json (declarative config)
        agent_json_path = agent_path / "agent.json"
+
        if not agent_json_path.is_file():
            raise FileNotFoundError(f"No agent.py or agent.json found in {agent_path}")

-        with open(agent_json_path, encoding="utf-8") as f:
-            export_data = f.read()
-
+        export_data = agent_json_path.read_text(encoding="utf-8")
        if not export_data.strip():
-            raise ValueError(f"Empty agent export file: {agent_json_path}")
+            raise ValueError(f"Empty agent.json: {agent_json_path}")

-        try:
-            graph, goal = load_agent_export(export_data)
-        except json.JSONDecodeError as exc:
-            raise ValueError(f"Invalid JSON in agent export file: {agent_json_path}") from exc
+        parsed = json.loads(export_data)
+        graph, goal = load_agent_config(parsed)
+        logger.info(
+            "Loaded declarative agent config from agent.json (name=%s)",
+            parsed.get("name"),
+        )

        # Generate flowchart.json if missing (for legacy JSON-based agents)
        generate_fallback_flowchart(graph, goal, agent_path)
@@ -1396,60 +1555,6 @@ class AgentRunner:
        }
        return self._tool_registry.register_mcp_server(server_config)

-    def _load_mcp_servers_from_config(self, config_path: Path) -> None:
-        """Load and register MCP servers from a configuration file."""
-        self._tool_registry.load_mcp_config(config_path)
-
-    def _load_registry_mcp_servers(self, agent_path: Path) -> None:
-        """Load and register MCP servers selected via ``mcp_registry.json``."""
-        registry_json = agent_path / "mcp_registry.json"
-        if registry_json.is_file():
-            self._tool_registry.set_mcp_registry_agent_path(agent_path)
-        else:
-            self._tool_registry.set_mcp_registry_agent_path(None)
-
-        from framework.runner.mcp_registry import MCPRegistry
-
-        try:
-            registry = MCPRegistry()
-            registry.initialize()
-            server_configs, selection_max_tools = registry.load_agent_selection(agent_path)
-        except Exception as exc:
-            logger.warning(
-                "Failed to load MCP registry servers for '%s': %s",
-                agent_path.name,
-                exc,
-            )
-            return
-
-        if not server_configs:
-            return
-
-        results = self._tool_registry.load_registry_servers(
-            server_configs,
-            preserve_existing_tools=True,
-            log_collisions=True,
-            max_tools=selection_max_tools,
-        )
-        loaded = [result for result in results if result["status"] == "loaded"]
-        skipped = [result for result in results if result["status"] != "loaded"]
-
-        logger.info(
-            "Loaded %d/%d MCP registry server(s) for agent '%s'",
-            len(loaded),
-            len(results),
-            agent_path.name,
-        )
-        if skipped:
-            logger.info(
-                "Skipped MCP registry servers for agent '%s': %s",
-                agent_path.name,
-                [
-                    {"server": result["server"], "reason": result["skipped_reason"]}
-                    for result in skipped
-                ],
-            )
-
    def set_approval_callback(self, callback: Callable) -> None:
        """
        Set a callback for human-in-the-loop approval during execution.
@@ -1460,272 +1565,119 @@ class AgentRunner:
        self._approval_callback = callback

    def _setup(self, event_bus=None) -> None:
-        """Set up runtime, LLM, and executor."""
-        # Configure structured logging (auto-detects JSON vs human-readable)
+        """Set up runtime via pipeline stages.
+
+        Builds a pipeline with the default stages (LLM, credentials, MCP,
+        skills) and passes it to AgentHost.  The stages initialize during
+        ``AgentHost.start()`` and inject tools/LLM/credentials/skills.
+        """
        from framework.observability import configure_logging
+        from framework.pipeline.stages.credential_resolver import CredentialResolverStage
+        from framework.pipeline.stages.llm_provider import LlmProviderStage
+        from framework.pipeline.stages.mcp_registry import McpRegistryStage
+        from framework.pipeline.stages.skill_registry import SkillRegistryStage
+        from framework.skills.config import SkillsConfig

        configure_logging(level="INFO", format="auto")

-        # Set up session context for tools (agent_id)
+        # Set up session context for tools
        agent_id = self.graph.id or "unknown"
+        self._tool_registry.set_session_context(agent_id=agent_id)

-        self._tool_registry.set_session_context(
-            agent_id=agent_id,
-        )
+        # Read MCP server refs from agent.json
+        mcp_refs = []
+        agent_json = self.agent_path / "agent.json"
+        if agent_json.exists():
+            try:
+                import json as _json

-        # Create LLM provider
-        # Uses LiteLLM which auto-detects the provider from model name
-        # Skip if already injected (e.g. worker agents with a pre-built LLM)
-        if self._llm is not None:
-            pass  # LLM already configured externally
-        elif self.mock_mode:
-            # Use mock LLM for testing without real API calls
-            from framework.llm.mock import MockLLMProvider
+                data = _json.loads(agent_json.read_text(encoding="utf-8"))
+                mcp_refs = data.get("mcp_servers", [])
+            except Exception:
+                pass

-            self._llm = MockLLMProvider(model=self.model)
-        else:
-            from framework.llm.litellm import LiteLLMProvider
-
-            # Check if a subscription mode is configured
-            config = get_hive_config()
-            llm_config = config.get("llm", {})
-            use_claude_code = llm_config.get("use_claude_code_subscription", False)
-            use_codex = llm_config.get("use_codex_subscription", False)
-            use_kimi_code = llm_config.get("use_kimi_code_subscription", False)
-            use_antigravity = llm_config.get("use_antigravity_subscription", False)
-            api_base = llm_config.get("api_base")
-
-            api_key = None
-            if use_claude_code:
-                # Get OAuth token from Claude Code subscription
-                api_key = get_claude_code_token()
-                if not api_key:
-                    logger.warning(
-                        "Claude Code subscription configured but no token found. "
-                        "Run 'claude' to authenticate, then try again."
-                    )
-            elif use_codex:
-                # Get OAuth token from Codex subscription
-                api_key = get_codex_token()
-                if not api_key:
-                    logger.warning(
-                        "Codex subscription configured but no token found. "
-                        "Run 'codex' to authenticate, then try again."
-                    )
-            elif use_kimi_code:
-                # Get API key from Kimi Code CLI config (~/.kimi/config.toml)
-                api_key = get_kimi_code_token()
-                if not api_key:
-                    logger.warning(
-                        "Kimi Code subscription configured but no key found. "
-                        "Run 'kimi /login' to authenticate, then try again."
-                    )
-            elif use_antigravity:
-                pass  # AntigravityProvider handles credentials internally
-
-            if api_key and use_claude_code:
-                # Use litellm's built-in Anthropic OAuth support.
-                # The lowercase "authorization" key triggers OAuth detection which
-                # adds the required anthropic-beta and browser-access headers.
-                self._llm = LiteLLMProvider(
-                    model=self.model,
-                    api_key=api_key,
-                    api_base=api_base,
-                    extra_headers={"authorization": f"Bearer {api_key}"},
-                )
-            elif api_key and use_codex:
-                # OpenAI Codex subscription routes through the ChatGPT backend
-                # (chatgpt.com/backend-api/codex/responses), NOT the standard
-                # OpenAI API.  The consumer OAuth token lacks platform API scopes.
-                extra_headers: dict[str, str] = {
-                    "Authorization": f"Bearer {api_key}",
-                    "User-Agent": "CodexBar",
-                }
-                account_id = get_codex_account_id()
-                if account_id:
-                    extra_headers["ChatGPT-Account-Id"] = account_id
-                self._llm = LiteLLMProvider(
-                    model=self.model,
-                    api_key=api_key,
-                    api_base="https://chatgpt.com/backend-api/codex",
-                    extra_headers=extra_headers,
-                    store=False,
-                    allowed_openai_params=["store"],
-                )
-            elif api_key and use_kimi_code:
-                # Kimi Code subscription uses the Kimi coding API (OpenAI-compatible).
-                # The api_base is set automatically by LiteLLMProvider for kimi/ models.
-                self._llm = LiteLLMProvider(
-                    model=self.model,
-                    api_key=api_key,
-                    api_base=api_base,
-                )
-            elif use_antigravity:
-                # Direct OAuth to Google's internal Cloud Code Assist gateway.
-                # No local proxy required — AntigravityProvider handles token
-                # refresh and Gemini-format request/response conversion natively.
-                from framework.llm.antigravity import AntigravityProvider  # noqa: PLC0415
-
-                provider = AntigravityProvider(model=self.model)
-                if not provider.has_credentials():
-                    print(
-                        "Warning: Antigravity credentials not found. "
-                        "Run: uv run python core/antigravity_auth.py auth account add"
-                    )
-                self._llm = provider
-            else:
-                # Local models (e.g. Ollama) don't need an API key
-                if self._is_local_model(self.model):
-                    self._llm = LiteLLMProvider(
-                        model=self.model,
-                        api_base=api_base,
-                    )
-                else:
-                    # Fall back to environment variable
-                    # First check api_key_env_var from config (set by quickstart)
-                    api_key_env = llm_config.get("api_key_env_var") or self._get_api_key_env_var(
-                        self.model
-                    )
-                    if api_key_env and os.environ.get(api_key_env):
-                        self._llm = LiteLLMProvider(
-                            model=self.model,
-                            api_key=os.environ[api_key_env],
-                            api_base=api_base,
-                        )
-                    else:
-                        # Fall back to credential store
-                        api_key = self._get_api_key_from_credential_store()
-                        if api_key:
-                            self._llm = LiteLLMProvider(
-                                model=self.model, api_key=api_key, api_base=api_base
-                            )
-                            # Set env var so downstream code (e.g. cleanup LLM in
-                            # node._extract_json) can also find it
-                            if api_key_env:
-                                os.environ[api_key_env] = api_key
-                        elif api_key_env:
-                            logger.warning(
-                                "%s not set. LLM calls will fail. "
-                                "Set it with: export %s=your-api-key",
-                                api_key_env,
-                                api_key_env,
-                            )
-
-            # Fail fast if the agent needs an LLM but none was configured
-            if self._llm is None:
-                has_llm_nodes = any(
-                    node.node_type in ("event_loop", "gcu") for node in self.graph.nodes
-                )
-                if has_llm_nodes:
-                    from framework.credentials.models import CredentialError
-
-                    if self._is_local_model(self.model):
-                        raise CredentialError(
-                            f"Failed to initialize LLM for local model '{self.model}'. "
-                            f"Ensure your local LLM server is running "
-                            f"(e.g. 'ollama serve' for Ollama)."
-                        )
-                    api_key_env = self._get_api_key_env_var(self.model)
-                    hint = (
-                        f"Set it with: export {api_key_env}=your-api-key"
-                        if api_key_env
-                        else "Configure an API key for your LLM provider."
-                    )
-                    raise CredentialError(f"LLM API key not found for model '{self.model}'. {hint}")
-
-        # For GCU nodes: auto-register GCU MCP server if needed, then expand tool lists
-        has_gcu_nodes = any(node.node_type == "gcu" for node in self.graph.nodes)
-        if has_gcu_nodes:
-            from framework.graph.gcu import GCU_MCP_SERVER_CONFIG, GCU_SERVER_NAME
-
-            # Auto-register GCU MCP server if tools aren't loaded yet
-            gcu_tool_names = self._tool_registry.get_server_tool_names(GCU_SERVER_NAME)
-            if not gcu_tool_names:
-                # Resolve cwd to repo-level tools/ (not relative to agent_path)
-                gcu_config = dict(GCU_MCP_SERVER_CONFIG)
-                _repo_root = Path(__file__).resolve().parent.parent.parent.parent
-                gcu_config["cwd"] = str(_repo_root / "tools")
-                self._tool_registry.register_mcp_server(gcu_config)
-                gcu_tool_names = self._tool_registry.get_server_tool_names(GCU_SERVER_NAME)
-
-            # Expand each GCU node's tools list to include all GCU server tools
-            if gcu_tool_names:
-                for node in self.graph.nodes:
-                    if node.node_type == "gcu":
-                        existing = set(node.tools)
-                        for tool_name in sorted(gcu_tool_names):
-                            if tool_name not in existing:
-                                node.tools.append(tool_name)
-
-        # For event_loop/gcu nodes: auto-register file tools MCP server, then expand tool lists
-        has_loop_nodes = any(node.node_type in ("event_loop", "gcu") for node in self.graph.nodes)
-        if has_loop_nodes:
-            from framework.graph.files import FILES_MCP_SERVER_CONFIG, FILES_MCP_SERVER_NAME
-
-            files_tool_names = self._tool_registry.get_server_tool_names(FILES_MCP_SERVER_NAME)
-            if not files_tool_names:
-                # Resolve cwd to repo-level tools/ (not relative to agent_path)
-                files_config = dict(FILES_MCP_SERVER_CONFIG)
-                _repo_root = Path(__file__).resolve().parent.parent.parent.parent
-                files_config["cwd"] = str(_repo_root / "tools")
-                self._tool_registry.register_mcp_server(files_config)
-                files_tool_names = self._tool_registry.get_server_tool_names(FILES_MCP_SERVER_NAME)
-
-            if files_tool_names:
-                for node in self.graph.nodes:
-                    if node.node_type in ("event_loop", "gcu"):
-                        existing = set(node.tools)
-                        for tool_name in sorted(files_tool_names):
-                            if tool_name not in existing:
-                                node.tools.append(tool_name)
-
-        # Get tools for runtime
-        tools = list(self._tool_registry.get_tools().values())
-        tool_executor = self._tool_registry.get_executor()
-
-        # Collect connected account info for system prompt injection
-        accounts_prompt = ""
-        accounts_data: list[dict] | None = None
-        tool_provider_map: dict[str, str] | None = None
-        try:
-            from aden_tools.credentials.store_adapter import CredentialStoreAdapter
-
-            if self._credential_store is not None:
-                adapter = CredentialStoreAdapter(store=self._credential_store)
-            else:
-                adapter = CredentialStoreAdapter.default()
-            accounts_data = adapter.get_all_account_info()
-            tool_provider_map = adapter.get_tool_provider_map()
-            if accounts_data:
-                from framework.graph.prompting import build_accounts_prompt
-
-                accounts_prompt = build_accounts_prompt(accounts_data, tool_provider_map)
-        except Exception:
-            pass  # Best-effort — agent works without account info
-
-        # Skill configuration — the runtime handles discovery, loading, trust-gating and
-        # prompt rasterization.  The runner just builds the config.
-        from framework.skills.config import SkillsConfig
-        from framework.skills.manager import SkillsManagerConfig
-
-        skills_manager_config = SkillsManagerConfig(
-            skills_config=SkillsConfig.from_agent_vars(
-                default_skills=getattr(self, "_agent_default_skills", None),
-                skills=getattr(self, "_agent_skills", None),
+        # Build default pipeline stages
+        # Default infrastructure stages (always present)
+        pipeline_stages = [
+            LlmProviderStage(
+                model=self.model,
+                mock_mode=self.mock_mode,
+                llm=self._llm,
            ),
-            project_root=self.agent_path,
-            interactive=self._interactive,
-        )
+            CredentialResolverStage(
+                credential_store=self._credential_store,
+            ),
+            McpRegistryStage(
+                server_refs=mcp_refs,
+                agent_path=self.agent_path,
+                tool_registry=self._tool_registry,
+            ),
+            SkillRegistryStage(
+                project_root=self.agent_path,
+                interactive=self._interactive,
+                skills_config=SkillsConfig.from_agent_vars(
+                    default_skills=getattr(self, "_agent_default_skills", None),
+                    skills=getattr(self, "_agent_skills", None),
+                ),
+            ),
+        ]

-        self._setup_agent_runtime(
-            tools,
-            tool_executor,
-            accounts_prompt=accounts_prompt,
-            accounts_data=accounts_data,
-            tool_provider_map=tool_provider_map,
+        # Merge user-configured stages from ~/.hive/configuration.json
+        from framework.config import get_hive_config
+        from framework.pipeline.registry import build_pipeline_from_config
+
+        hive_config = get_hive_config()
+        user_stages_config = hive_config.get("pipeline", {}).get("stages", [])
+        if user_stages_config:
+            user_pipeline = build_pipeline_from_config(user_stages_config)
+            pipeline_stages.extend(user_pipeline.stages)
+
+        # Merge agent-level overrides from agent.json pipeline field
+        if agent_json.exists():
+            try:
+                agent_pipeline = (
+                    _json.loads(agent_json.read_text(encoding="utf-8"))
+                    .get("pipeline", {})
+                    .get("stages", [])
+                )
+                if agent_pipeline:
+                    agent_stages = build_pipeline_from_config(agent_pipeline)
+                    pipeline_stages.extend(agent_stages.stages)
+            except Exception:
+                pass
+
+        # Create AgentHost directly (no wrapper)
+        from framework.host.execution_manager import EntryPointSpec
+        from framework.orchestrator.checkpoint_config import CheckpointConfig
+        from framework.tracker.runtime_log_store import RuntimeLogStore
+
+        self._agent_runtime = AgentHost(
+            graph=self.graph,
+            goal=self.goal,
+            storage_path=self._storage_path,
+            runtime_log_store=RuntimeLogStore(
+                base_path=self._storage_path / "runtime_logs",
+            ),
+            checkpoint_config=CheckpointConfig(
+                enabled=True,
+                checkpoint_on_node_complete=True,
+                checkpoint_max_age_days=7,
+                async_checkpoint=True,
+            ),
+            graph_id=self.graph.id or self.agent_path.name,
            event_bus=event_bus,
-            skills_manager_config=skills_manager_config,
+            pipeline_stages=pipeline_stages,
        )
+        self._agent_runtime.register_entry_point(
+            EntryPointSpec(
+                id="default",
+                name="Default",
+                entry_node=self.graph.entry_node,
+                trigger_type="manual",
+                isolation_level="shared",
+            ),
+        )
+        self._agent_runtime.intro_message = self.intro_message

    def _get_api_key_env_var(self, model: str) -> str | None:
        """Get the environment variable name for the API key based on model name."""
@@ -1833,83 +1785,6 @@ class AgentRunner:
        )
        return model.lower().startswith(LOCAL_PREFIXES)

-    def _setup_agent_runtime(
-        self,
-        tools: list,
-        tool_executor: Callable | None,
-        accounts_prompt: str = "",
-        accounts_data: list[dict] | None = None,
-        tool_provider_map: dict[str, str] | None = None,
-        event_bus=None,
-        skills_catalog_prompt: str = "",
-        protocols_prompt: str = "",
-        skill_dirs: list[str] | None = None,
-        skills_manager_config=None,
-    ) -> None:
-        """Set up multi-entry-point execution using AgentRuntime."""
-        entry_points = []
-
-        # Always create a primary entry point for the graph's entry node.
-        # For multi-entry-point agents this ensures the primary path (e.g.
-        # user-facing rule setup) is reachable alongside async entry points.
-        if self.graph.entry_node:
-            entry_points.insert(
-                0,
-                EntryPointSpec(
-                    id="default",
-                    name="Default",
-                    entry_node=self.graph.entry_node,
-                    trigger_type="manual",
-                    isolation_level="shared",
-                ),
-            )
-
-        # Create AgentRuntime with all entry points
-        log_store = RuntimeLogStore(base_path=self._storage_path / "runtime_logs")
-
-        # Enable checkpointing by default for resumable sessions
-        from framework.graph.checkpoint_config import CheckpointConfig
-
-        checkpoint_config = CheckpointConfig(
-            enabled=True,
-            checkpoint_on_node_start=False,  # Only checkpoint after nodes complete
-            checkpoint_on_node_complete=True,
-            checkpoint_max_age_days=7,
-            async_checkpoint=True,  # Non-blocking
-        )
-
-        # Handle runtime_config - only pass through if it's actually an AgentRuntimeConfig.
-        # Agents may export a RuntimeConfig (LLM settings) or queen-generated custom classes
-        # that would crash AgentRuntime if passed through.
-        runtime_config = None
-        if self.runtime_config is not None:
-            from framework.runtime.agent_runtime import AgentRuntimeConfig
-
-            if isinstance(self.runtime_config, AgentRuntimeConfig):
-                runtime_config = self.runtime_config
-
-        self._agent_runtime = create_agent_runtime(
-            graph=self.graph,
-            goal=self.goal,
-            storage_path=self._storage_path,
-            entry_points=entry_points,
-            llm=self._llm,
-            tools=tools,
-            tool_executor=tool_executor,
-            runtime_log_store=log_store,
-            checkpoint_config=checkpoint_config,
-            config=runtime_config,
-            graph_id=self.graph.id or self.agent_path.name,
-            accounts_prompt=accounts_prompt,
-            accounts_data=accounts_data,
-            tool_provider_map=tool_provider_map,
-            event_bus=event_bus,
-            skills_manager_config=skills_manager_config,
-        )
-
-        # Pass intro_message through for TUI display
-        self._agent_runtime.intro_message = self.intro_message
-
    # ------------------------------------------------------------------
    # Execution modes
    #
@@ -1990,7 +1865,7 @@ class AgentRunner:
        sub_ids: list[str] = []

        if has_queen and sys.stdin.isatty():
-            from framework.runtime.event_bus import EventType
+            from framework.host.event_bus import EventType

            runtime = self._agent_runtime

@@ -2246,7 +2121,7 @@ class AgentRunner:
        except ImportError:
            # aden_tools not installed - fall back to direct check
            has_llm_nodes = any(
-                node.node_type in ("event_loop", "gcu") for node in self.graph.nodes
+                node.node_type == "event_loop" for node in self.graph.nodes
            )
            if has_llm_nodes:
                api_key_env = self._get_api_key_env_var(self.model)
@@ -2283,7 +2158,7 @@ class AgentRunner:
        # Run synchronous cleanup
        self.cleanup()

-    async def __aenter__(self) -> "AgentRunner":
+    async def __aenter__(self) -> "AgentLoader":
        """Context manager entry."""
        self._setup()
        if self._agent_runtime is not None:
@@ -19,7 +19,7 @@ def register_commands(subparsers: argparse._SubParsersAction) -> None:
    run_parser.add_argument(
        "agent_path",
        type=str,
-        help="Path to agent folder (containing agent.json)",
+        help="Path to agent folder (containing agent.json or agent.py)",
    )
    run_parser.add_argument(
        "--input",
@@ -87,7 +87,7 @@ def register_commands(subparsers: argparse._SubParsersAction) -> None:
    info_parser.add_argument(
        "agent_path",
        type=str,
-        help="Path to agent folder (containing agent.json)",
+        help="Path to agent folder (containing agent.json or agent.py)",
    )
    info_parser.add_argument(
        "--json",
@@ -105,7 +105,7 @@ def register_commands(subparsers: argparse._SubParsersAction) -> None:
    validate_parser.add_argument(
        "agent_path",
        type=str,
-        help="Path to agent folder (containing agent.json)",
+        help="Path to agent folder (containing agent.json or agent.py)",
    )
    validate_parser.set_defaults(func=cmd_validate)

@@ -310,7 +310,7 @@ def _prompt_before_start(agent_path: str, runner, model: str | None = None):
        Updated runner if user proceeds, None if user aborts.
    """
    from framework.credentials.setup import CredentialSetupSession
-    from framework.runner import AgentRunner
+    from framework.loader import AgentLoader

    while True:
        print()
@@ -328,7 +328,7 @@ def _prompt_before_start(agent_path: str, runner, model: str | None = None):
            if result.success:
                # Reload runner with updated credentials
                try:
-                    runner = AgentRunner.load(agent_path, model=model)
+                    runner = AgentLoader.load(agent_path, model=model)
                except Exception as e:
                    print(f"Error reloading agent: {e}")
                    return None
@@ -342,7 +342,7 @@ def cmd_run(args: argparse.Namespace) -> int:

    from framework.credentials.models import CredentialError
    from framework.observability import configure_logging
-    from framework.runner import AgentRunner
+    from framework.loader import AgentLoader

    # Set logging level (quiet by default for cleaner output)
    if args.quiet:
@@ -390,7 +390,7 @@ def cmd_run(args: argparse.Namespace) -> int:
    # Standard execution
    # AgentRunner handles credential setup interactively when stdin is a TTY.
    try:
-        runner = AgentRunner.load(
+        runner = AgentLoader.load(
            args.agent_path,
            model=args.model,
        )
@@ -528,10 +528,10 @@ def cmd_run(args: argparse.Namespace) -> int:
 def cmd_info(args: argparse.Namespace) -> int:
    """Show agent information."""
    from framework.credentials.models import CredentialError
-    from framework.runner import AgentRunner
+    from framework.loader import AgentLoader

    try:
-        runner = AgentRunner.load(args.agent_path)
+        runner = AgentLoader.load(args.agent_path)
    except CredentialError as e:
        print(f"\n{e}", file=sys.stderr)
        return 1
@@ -595,10 +595,10 @@ def cmd_info(args: argparse.Namespace) -> int:
 def cmd_validate(args: argparse.Namespace) -> int:
    """Validate an exported agent."""
    from framework.credentials.models import CredentialError
-    from framework.runner import AgentRunner
+    from framework.loader import AgentLoader

    try:
-        runner = AgentRunner.load(args.agent_path)
+        runner = AgentLoader.load(args.agent_path)
    except CredentialError as e:
        print(f"\n{e}", file=sys.stderr)
        return 1
@@ -632,7 +632,7 @@ def cmd_validate(args: argparse.Namespace) -> int:

 def cmd_list(args: argparse.Namespace) -> int:
    """List available agents."""
-    from framework.runner import AgentRunner
+    from framework.loader import AgentLoader

    directory = Path(args.directory)
    if not directory.exists():
@@ -644,7 +644,7 @@ def cmd_list(args: argparse.Namespace) -> int:
    for path in directory.iterdir():
        if _is_valid_agent_dir(path):
            try:
-                runner = AgentRunner.load(path)
+                runner = AgentLoader.load(path)
                info = runner.info()
                agents.append(
                    {
@@ -686,7 +686,7 @@ def cmd_list(args: argparse.Namespace) -> int:

 def _interactive_approval(request):
    """Interactive approval callback for HITL mode."""
-    from framework.graph import ApprovalDecision, ApprovalResult
+    from framework.orchestrator import ApprovalDecision, ApprovalResult

    print()
    print("=" * 60)
@@ -775,7 +775,7 @@ def cmd_shell(args: argparse.Namespace) -> int:

    from framework.credentials.models import CredentialError
    from framework.observability import configure_logging
-    from framework.runner import AgentRunner
+    from framework.loader import AgentLoader

    configure_logging(level="INFO")

@@ -789,7 +789,7 @@ def cmd_shell(args: argparse.Namespace) -> int:
            return 1

    try:
-        runner = AgentRunner.load(agent_path)
+        runner = AgentLoader.load(agent_path)
    except CredentialError as e:
        print(f"\n{e}", file=sys.stderr)
        return 1
@@ -1004,17 +1004,35 @@ def _get_framework_agents_dir() -> Path:


 def _extract_python_agent_metadata(agent_path: Path) -> tuple[str, str]:
-    """Extract name and description from a Python-based agent's config.py.
+    """Extract name and description from an agent directory.

-    Uses AST parsing to safely extract values without executing code.
+    Checks agent.json first (declarative), then falls back to config.py
+    (legacy Python). Uses AST parsing for Python to avoid executing code.
    Returns (name, description) tuple, with fallbacks if parsing fails.
    """
    import ast

-    config_path = agent_path / "config.py"
    fallback_name = agent_path.name.replace("_", " ").title()
    fallback_desc = "(Python-based agent)"

+    # Declarative agent: read from agent.json
+    agent_json = agent_path / "agent.json"
+    if agent_json.exists():
+        try:
+            import json
+
+            data = json.loads(agent_json.read_text(encoding="utf-8"))
+            if isinstance(data, dict):
+                name = data.get("name", fallback_name)
+                # Convert kebab-case to Title Case for display
+                if "-" in name and " " not in name:
+                    name = name.replace("-", " ").title()
+                desc = data.get("description", fallback_desc)
+                return name, desc
+        except Exception:
+            pass
+
+    config_path = agent_path / "config.py"
    if not config_path.exists():
        return fallback_name, fallback_desc

@@ -1083,7 +1101,7 @@ def _is_valid_agent_dir(path: Path) -> bool:


 def _has_agents(directory: Path) -> bool:
-    """Check if a directory contains any valid agents (folders with agent.json or agent.py)."""
+    """Check if a directory contains any valid agents."""
    if not directory.exists():
        return False
    return any(_is_valid_agent_dir(p) for p in directory.iterdir())
@@ -14,7 +14,7 @@ from typing import Any, Literal

 import httpx

-from framework.runner.mcp_errors import MCPToolNotFoundError
+from framework.loader.mcp_errors import MCPToolNotFoundError

 logger = logging.getLogger(__name__)

@@ -5,7 +5,7 @@ import threading

 import httpx

-from framework.runner.mcp_client import MCPClient, MCPServerConfig
+from framework.loader.mcp_client import MCPClient, MCPServerConfig

 logger = logging.getLogger(__name__)

@@ -14,9 +14,9 @@ from typing import Any, Literal

 import httpx

-from framework.runner.mcp_client import MCPClient, MCPServerConfig
-from framework.runner.mcp_connection_manager import MCPConnectionManager
-from framework.runner.mcp_errors import (
+from framework.loader.mcp_client import MCPClient, MCPServerConfig
+from framework.loader.mcp_connection_manager import MCPConnectionManager
+from framework.loader.mcp_errors import (
    MCPError,
    MCPErrorCode,
    MCPInstallError,
@@ -28,7 +28,7 @@ from typing import Any

 def _get_registry(base_path: Path | None = None):
    """Initialize and return an MCPRegistry instance."""
-    from framework.runner.mcp_registry import MCPRegistry
+    from framework.loader.mcp_registry import MCPRegistry

    registry = MCPRegistry(base_path=base_path)
    registry.initialize()
@@ -11,8 +11,8 @@ from dataclasses import dataclass, field
 from typing import TYPE_CHECKING

 if TYPE_CHECKING:
-    from framework.graph.edge import GraphSpec
-    from framework.graph.node import NodeSpec
+    from framework.orchestrator.edge import GraphSpec
+    from framework.orchestrator.node import NodeSpec

 logger = logging.getLogger(__name__)

@@ -262,15 +262,21 @@ class ToolRegistry:
                is_error=False,
            )

+        registry_ref = self
+
        def executor(tool_use: ToolUse) -> ToolResult:
-            if tool_use.name not in self._tools:
+            # Check if credential files changed (lightweight dir listing).
+            # If new OAuth tokens appeared, restarts MCP servers to pick them up.
+            registry_ref.resync_mcp_servers_if_needed()
+
+            if tool_use.name not in registry_ref._tools:
                return ToolResult(
                    tool_use_id=tool_use.id,
                    content=json.dumps({"error": f"Unknown tool: {tool_use.name}"}),
                    is_error=True,
                )

-            registered = self._tools[tool_use.name]
+            registered = registry_ref._tools[tool_use.name]
            try:
                result = registered.executor(tool_use.input)

@@ -635,8 +641,8 @@ class ToolRegistry:
            Number of tools registered from this server
        """
        try:
-            from framework.runner.mcp_client import MCPClient, MCPServerConfig
-            from framework.runner.mcp_connection_manager import MCPConnectionManager
+            from framework.loader.mcp_client import MCPClient, MCPServerConfig
+            from framework.loader.mcp_connection_manager import MCPConnectionManager

            # Build config object
            config = MCPServerConfig(
@@ -883,7 +889,7 @@ class ToolRegistry:
        """Re-run ``mcp_registry.json`` resolution and register servers (post-resync)."""
        if self._mcp_registry_agent_path is None:
            return
-        from framework.runner.mcp_registry import MCPRegistry
+        from framework.loader.mcp_registry import MCPRegistry

        try:
            reg = MCPRegistry()
@@ -922,6 +928,11 @@ class ToolRegistry:
        clients and re-loads them so the new subprocess picks up the fresh
        credentials.

+        Note: Individual credential TTL/refresh is handled by the MCP server
+        process internally -- it resolves tokens from the credential store
+        on every tool call, not at startup. This method only handles the case
+        where entirely new credential files appear.
+
        Returns True if a resync was performed, False otherwise.
        """
        if not self._mcp_clients or self._mcp_config_path is None:
@@ -975,7 +986,7 @@ class ToolRegistry:
            server_name = self._mcp_client_servers.get(client_id, client.config.name)
            try:
                if client_id in self._mcp_managed_clients:
-                    from framework.runner.mcp_connection_manager import MCPConnectionManager
+                    from framework.loader.mcp_connection_manager import MCPConnectionManager

                    MCPConnectionManager.get_instance().release(server_name)
                else:
@@ -0,0 +1,27 @@
+"""Orchestrator layer -- how agents are composed via graphs.
+
+Lazy imports to avoid circular dependencies with graph/event_loop/*.
+"""
+
+
+def __getattr__(name: str):
+    if name in ("GraphContext",):
+        from framework.orchestrator.context import GraphContext
+        return GraphContext
+    if name in ("DEFAULT_MAX_TOKENS", "EdgeCondition", "EdgeSpec", "GraphSpec"):
+        from framework.orchestrator import edge as _e
+        return getattr(_e, name)
+    if name in ("Orchestrator", "ExecutionResult"):
+        from framework.orchestrator import orchestrator as _o
+        return getattr(_o, name)
+    if name in ("Constraint", "Goal", "GoalStatus", "SuccessCriterion"):
+        from framework.orchestrator import goal as _g
+        return getattr(_g, name)
+    if name in ("DataBuffer", "NodeContext", "NodeProtocol", "NodeResult", "NodeSpec"):
+        from framework.orchestrator import node as _n
+        return getattr(_n, name)
+    if name in ("NodeWorker", "Activation", "FanOutTag", "FanOutTracker",
+                "WorkerCompletion", "WorkerLifecycle"):
+        from framework.orchestrator import node_worker as _nw
+        return getattr(_nw, name)
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
@@ -16,7 +16,7 @@ from collections.abc import AsyncIterator
 from typing import TYPE_CHECKING

 if TYPE_CHECKING:
-    from framework.runtime.event_bus import EventBus
+    from framework.host.event_bus import EventBus

 logger = logging.getLogger(__name__)

@@ -13,10 +13,10 @@ import asyncio
 from dataclasses import dataclass, field
 from typing import Any

-from framework.graph.edge import GraphSpec
-from framework.graph.goal import Goal
-from framework.graph.node import DataBuffer, NodeContext, NodeProtocol, NodeSpec
-from framework.runtime.core import Runtime
+from framework.orchestrator.edge import GraphSpec
+from framework.orchestrator.goal import Goal
+from framework.orchestrator.node import DataBuffer, NodeContext, NodeProtocol, NodeSpec
+from framework.tracker.decision_tracker import DecisionTracker


@dataclass
@@ -26,7 +26,7 @@ class GraphContext:
    graph: GraphSpec
    goal: Goal
    buffer: DataBuffer
-    runtime: Runtime
+    runtime: DecisionTracker
    llm: Any  # LLMProvider
    tools: list[Any]  # list[Tool]
    tool_executor: Any  # Callable
@@ -106,7 +106,7 @@ def build_node_accounts_prompt(

    resolved = accounts_prompt
    if accounts_data and tool_provider_map:
-        from framework.graph.prompting import build_accounts_prompt
+        from framework.orchestrator.prompting import build_accounts_prompt

        filtered = build_accounts_prompt(
            accounts_data,
@@ -125,11 +125,27 @@ def _resolve_available_tools(
    tools: list[Any],
    override_tools: list[Any] | None,
 ) -> list[Any]:
-    """Select tools available to the current node."""
+    """Select tools available to the current node.
+
+    Respects ``node_spec.tool_access_policy``:
+    - ``"all"``      -- all tools from the registry (no filtering).
+    - ``"explicit"``  -- only tools whose name appears in ``node_spec.tools``.
+                        If the list is empty, **no tools** are given (default-deny).
+    - ``"none"``     -- no tools at all.
+    """

    if override_tools is not None:
        return list(override_tools)

+    policy = getattr(node_spec, "tool_access_policy", "explicit")
+
+    if policy == "none":
+        return []
+
+    if policy == "all":
+        return list(tools)
+
+    # "explicit" (default): only tools named in node_spec.tools.
    if not node_spec.tools:
        return []

@@ -149,7 +165,7 @@ def _derive_input_data(buffer: DataBuffer, input_keys: list[str]) -> dict[str, A

 def build_node_context(
    *,
-    runtime: Runtime,
+    runtime: DecisionTracker,
    node_spec: NodeSpec,
    buffer: DataBuffer,
    goal: Goal,
@@ -234,9 +250,6 @@ def build_node_context(
        execution_id=execution_id,
        run_id=run_id,
        stream_id=stream_id,
-        node_registry=node_registry or {},
-        all_tools=list(all_tools or tools),
-        shared_node_registry=shared_node_registry or {},
        dynamic_tools_provider=dynamic_tools_provider,
        dynamic_prompt_provider=dynamic_prompt_provider,
        dynamic_memory_provider=dynamic_memory_provider,
@@ -308,9 +321,6 @@ def build_node_context_from_graph_context(
        execution_id=gc.execution_id,
        run_id=gc.run_id,
        stream_id=gc.stream_id,
-        node_registry=node_registry or gc.node_spec_registry,
-        all_tools=gc.tools,
-        shared_node_registry=gc.node_registry,
        dynamic_tools_provider=gc.dynamic_tools_provider,
        dynamic_prompt_provider=gc.dynamic_prompt_provider,
        dynamic_memory_provider=gc.dynamic_memory_provider,
@@ -6,10 +6,10 @@ import logging
 from dataclasses import dataclass
 from typing import TYPE_CHECKING, Any

-from framework.graph.conversation import _try_extract_key
+from framework.agent_loop.conversation import _try_extract_key

 if TYPE_CHECKING:
-    from framework.graph.conversation import NodeConversation
+    from framework.agent_loop.conversation import NodeConversation
    from framework.llm.provider import LLMProvider

 logger = logging.getLogger(__name__)
@@ -15,7 +15,7 @@ import logging
 from dataclasses import dataclass
 from typing import Any

-from framework.graph.conversation import NodeConversation
+from framework.agent_loop.conversation import NodeConversation
 from framework.llm.provider import LLMProvider

 logger = logging.getLogger(__name__)
@@ -29,7 +29,7 @@ from typing import Any

 from pydantic import BaseModel, Field, model_validator

-from framework.graph.safe_eval import safe_eval
+from framework.orchestrator.safe_eval import safe_eval

 logger = logging.getLogger(__name__)

@@ -538,13 +538,6 @@ class GraphSpec(BaseModel):
            for edge in self.get_outgoing_edges(current):
                to_visit.append(edge.target)

-        # Also mark sub-agents as reachable (they're invoked via delegate_to_sub_agent, not edges)
-        for node in self.nodes:
-            if node.id in reachable:
-                sub_agents = getattr(node, "sub_agents", []) or []
-                for sub_agent_id in sub_agents:
-                    reachable.add(sub_agent_id)
-
        for node in self.nodes:
            if node.id not in reachable:
                # Skip if node is a pause node or entry point target
@@ -583,48 +576,4 @@ class GraphSpec(BaseModel):
                        else:
                            seen_keys[key] = node_id

-        # GCU nodes must only be used as subagents
-        gcu_node_ids = {n.id for n in self.nodes if n.node_type == "gcu"}
-        if gcu_node_ids:
-            # GCU nodes must not be entry nodes
-            if self.entry_node in gcu_node_ids:
-                errors.append(
-                    f"GCU node '{self.entry_node}' is used as entry node. "
-                    "GCU nodes must only be used as subagents via delegate_to_sub_agent()."
-                )
-
-            # GCU nodes must not be terminal nodes
-            for term in self.terminal_nodes:
-                if term in gcu_node_ids:
-                    errors.append(
-                        f"GCU node '{term}' is used as terminal node. "
-                        "GCU nodes must only be used as subagents."
-                    )
-
-            # GCU nodes must not be connected via edges
-            for edge in self.edges:
-                if edge.source in gcu_node_ids:
-                    errors.append(
-                        f"GCU node '{edge.source}' is used as edge source (edge '{edge.id}'). "
-                        "GCU nodes must only be used as subagents, not connected via edges."
-                    )
-                if edge.target in gcu_node_ids:
-                    errors.append(
-                        f"GCU node '{edge.target}' is used as edge target (edge '{edge.id}'). "
-                        "GCU nodes must only be used as subagents, not connected via edges."
-                    )
-
-            # GCU nodes must be referenced in at least one parent's sub_agents
-            referenced_subagents = set()
-            for node in self.nodes:
-                for sa_id in node.sub_agents or []:
-                    referenced_subagents.add(sa_id)
-
-            orphaned = gcu_node_ids - referenced_subagents
-            for nid in orphaned:
-                errors.append(
-                    f"GCU node '{nid}' is not referenced in any node's sub_agents list. "
-                    "GCU nodes must be declared as subagents of a parent node."
-                )
-
        return {"errors": errors, "warnings": warnings}
@@ -1,34 +1,14 @@
-"""GCU (browser automation) node type constants.
+"""Browser automation best-practices prompt.

-A ``gcu`` node is an ``event_loop`` node with two automatic enhancements:
-1. A canonical browser best-practices system prompt is prepended.
-2. All tools from the GCU MCP server are auto-included.
+This module provides ``GCU_BROWSER_SYSTEM_PROMPT`` -- a canonical set of
+browser automation guidelines that can be included in any node's system
+prompt that uses browser tools from the gcu-tools MCP server.

-No new ``NodeProtocol`` subclass — the ``gcu`` type is purely a declarative
-signal processed by the runner and executor at setup time.
+Browser tools are registered via the global MCP registry (gcu-tools).
+Nodes that need browser access declare ``tools: {policy: "all"}`` in their
+agent.json config.
 """

-# ---------------------------------------------------------------------------
-# MCP server identity
-# ---------------------------------------------------------------------------
-
-GCU_SERVER_NAME = "gcu-tools"
-"""Name used to identify the GCU MCP server in ``mcp_servers.json``."""
-
-GCU_MCP_SERVER_CONFIG: dict = {
-    "name": GCU_SERVER_NAME,
-    "transport": "stdio",
-    "command": "uv",
-    "args": ["run", "python", "-m", "gcu.server", "--stdio"],
-    "cwd": "../../tools",
-    "description": "GCU tools for browser automation",
-}
-"""Default stdio config for the GCU MCP server (relative to exports/<agent>/)."""
-
-# ---------------------------------------------------------------------------
-# Browser best-practices system prompt
-# ---------------------------------------------------------------------------
-
 GCU_BROWSER_SYSTEM_PROMPT = """\
 # Browser Automation Best Practices

@@ -25,7 +25,7 @@ from typing import Any
 from pydantic import BaseModel, Field

 from framework.llm.provider import LLMProvider, Tool
-from framework.runtime.core import Runtime
+from framework.tracker.decision_tracker import DecisionTracker

 logger = logging.getLogger(__name__)

@@ -144,15 +144,19 @@ class NodeSpec(BaseModel):
    # For LLM nodes
    system_prompt: str | None = Field(default=None, description="System prompt for LLM nodes")
    tools: list[str] = Field(default_factory=list, description="Tool names this node can use")
+    tool_access_policy: str = Field(
+        default="explicit",
+        description=(
+            "Tool access policy for this node. "
+            "'all' = all tools from registry, "
+            "'explicit' = only tools listed in `tools` (default, recommended), "
+            "'none' = no tools at all."
+        ),
+    )
    model: str | None = Field(
        default=None, description="Specific model to use (defaults to graph default)"
    )

-    # For subagent delegation
-    sub_agents: list[str] = Field(
-        default_factory=list,
-        description="Node IDs that can be invoked as subagents from this node",
-    )
    # For function nodes
    function: str | None = Field(
        default=None, description="Function name or path for function nodes"
@@ -459,7 +463,7 @@ class NodeContext:
    """

    # Core runtime
-    runtime: Runtime
+    runtime: DecisionTracker

    # Node identity
    node_id: str
@@ -526,20 +530,6 @@ class NodeContext:
    # Falls back to node_id when not set (legacy / standalone executor).
    stream_id: str = ""

-    # Subagent mode
-    is_subagent_mode: bool = False  # True when running as a subagent (prevents nested delegation)
-    report_callback: Any = None  # async (message: str, data: dict | None) -> None
-    node_registry: dict[str, "NodeSpec"] = field(default_factory=dict)  # For subagent lookup
-
-    # Full tool catalog (unfiltered) — used by _execute_subagent to resolve
-    # subagent tools that aren't in the parent node's filtered available_tools.
-    all_tools: list[Tool] = field(default_factory=list)
-
-    # Shared reference to the executor's node_registry — used by subagent
-    # escalation (_EscalationReceiver) to register temporary receivers that
-    # the inject_input() routing chain can find.
-    shared_node_registry: dict[str, Any] = field(default_factory=dict)
-
    # Dynamic tool provider — when set, EventLoopNode rebuilds the tool
    # list from this callback at the start of each iteration.  Used by
    # the queen to switch between building-mode and running-mode tools.
@@ -19,15 +19,15 @@ from dataclasses import dataclass, field
 from enum import StrEnum
 from typing import Any

-from framework.graph.context import GraphContext, build_node_context_from_graph_context
-from framework.graph.edge import EdgeCondition, EdgeSpec
-from framework.graph.node import (
+from framework.orchestrator.context import GraphContext, build_node_context_from_graph_context
+from framework.orchestrator.edge import EdgeCondition, EdgeSpec
+from framework.orchestrator.node import (
    NodeContext,
    NodeProtocol,
    NodeResult,
    NodeSpec,
 )
-from framework.graph.validator import OutputValidator
+from framework.orchestrator.validator import OutputValidator

 logger = logging.getLogger(__name__)

@@ -109,7 +109,7 @@ class RetryState:
 # ---------------------------------------------------------------------------


-class WorkerAgent:
+class NodeWorker:
    """First-class autonomous worker for one node in the graph.

    Lifecycle:
@@ -355,7 +355,7 @@ class WorkerAgent:
        # Only skip retries for actual EventLoopNode instances (they handle
        # retries internally).  Custom NodeProtocol impls registered via
        # register_node should be retried by the executor.
-        from framework.graph.event_loop_node import EventLoopNode as _ELN
+        from framework.agent_loop.agent_loop import AgentLoop as _ELN

        if isinstance(node_impl, _ELN):
            max_retries = 0
@@ -603,10 +603,10 @@ class WorkerAgent:
            return self._node_impl

        # Auto-create EventLoopNode
-        if self.node_spec.node_type in ("event_loop", "gcu"):
-            from framework.graph.event_loop.types import LoopConfig
-            from framework.graph.event_loop_node import EventLoopNode
-            from framework.graph.node import warn_if_deprecated_client_facing
+        if self.node_spec.node_type == "event_loop":
+            from framework.agent_loop.internals.types import LoopConfig
+            from framework.agent_loop.agent_loop import AgentLoop
+            from framework.orchestrator.node import warn_if_deprecated_client_facing

            conv_store = None
            if gc.storage_path:
@@ -619,7 +619,7 @@ class WorkerAgent:
            warn_if_deprecated_client_facing(self.node_spec)
            default_max_iter = 100 if self.node_spec.supports_direct_user_io() else 50

-            node = EventLoopNode(
+            node = AgentLoop(
                event_bus=gc.event_bus,
                judge=None,
                config=LoopConfig(
@@ -734,7 +734,7 @@ class WorkerAgent:
        if not next_spec or next_spec.node_type != "event_loop":
            return

-        from framework.graph.prompting import (
+        from framework.orchestrator.prompting import (
            TransitionSpec,
            build_narrative,
            build_system_prompt_for_node_context,
@@ -16,21 +16,21 @@ from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any

-from framework.graph.checkpoint_config import CheckpointConfig
-from framework.graph.context import GraphContext, build_node_context
-from framework.graph.conversation import LEGACY_RUN_ID
-from framework.graph.edge import EdgeCondition, EdgeSpec, GraphSpec
-from framework.graph.goal import Goal
-from framework.graph.node import (
+from framework.orchestrator.checkpoint_config import CheckpointConfig
+from framework.orchestrator.context import GraphContext, build_node_context
+from framework.agent_loop.conversation import LEGACY_RUN_ID
+from framework.orchestrator.edge import EdgeCondition, EdgeSpec, GraphSpec
+from framework.orchestrator.goal import Goal
+from framework.orchestrator.node import (
    DataBuffer,
    NodeProtocol,
    NodeResult,
    NodeSpec,
 )
-from framework.graph.validator import OutputValidator
+from framework.orchestrator.validator import OutputValidator
 from framework.llm.provider import LLMProvider, Tool
 from framework.observability import set_trace_context
-from framework.runtime.core import Runtime
+from framework.tracker.decision_tracker import DecisionTracker
 from framework.schemas.checkpoint import Checkpoint
 from framework.storage.checkpoint_store import CheckpointStore
 from framework.utils.io import atomic_write
@@ -112,7 +112,7 @@ class ParallelExecutionConfig:
    branch_timeout_seconds: float = 300.0


-class GraphExecutor:
+class Orchestrator:
    """
    Executes agent graphs.

@@ -133,7 +133,7 @@ class GraphExecutor:

    def __init__(
        self,
-        runtime: Runtime,
+        runtime: DecisionTracker,
        llm: LLMProvider | None = None,
        tools: list[Tool] | None = None,
        tool_executor: Callable | None = None,
@@ -165,7 +165,7 @@ class GraphExecutor:
        Initialize the executor.

        Args:
-            runtime: Runtime for decision logging
+            runtime: DecisionTracker for decision logging
            llm: LLM provider for LLM nodes
            tools: Available tools
            tool_executor: Function to execute tools
@@ -202,7 +202,7 @@ class GraphExecutor:
        self.validator = OutputValidator()
        self.logger = logging.getLogger(__name__)
        self.logger.debug(
-            "[GraphExecutor.__init__] Created with"
+            "[Orchestrator.__init__] Created with"
            " stream_id=%s, execution_id=%s,"
            " initial node_registry keys: %s",
            stream_id,
@@ -361,8 +361,8 @@ class GraphExecutor:

        Uses the same recursive binary-search splitting as EventLoopNode.
        """
-        from framework.graph.conversation import extract_tool_call_history
-        from framework.graph.event_loop_node import _is_context_too_large_error
+        from framework.agent_loop.conversation import extract_tool_call_history
+        from framework.agent_loop.agent_loop import _is_context_too_large_error

        if _depth > self._PHASE_LLM_MAX_DEPTH:
            raise RuntimeError("Phase LLM compaction recursion limit")
@@ -690,7 +690,7 @@ class GraphExecutor:
        # and spillover files share the same session-scoped directory.
        _ctx_token = None
        if self._storage_path:
-            from framework.runner.tool_registry import ToolRegistry
+            from framework.loader.tool_registry import ToolRegistry

            _ctx_token = ToolRegistry.set_execution_context(
                data_dir=str(self._storage_path / "data"),
@@ -712,13 +712,12 @@ class GraphExecutor:

        finally:
            if _ctx_token is not None:
-                from framework.runner.tool_registry import ToolRegistry
+                from framework.loader.tool_registry import ToolRegistry

                ToolRegistry.reset_execution_context(_ctx_token)

    VALID_NODE_TYPES = {
        "event_loop",
-        "gcu",
    }
    # Node types removed in v0.5 — provide migration guidance
    REMOVED_NODE_TYPES = {
@@ -736,11 +735,11 @@ class GraphExecutor:
        # Check registry first
        if node_spec.id in self.node_registry:
            logger.debug(
-                "[GraphExecutor._get_node_implementation] Found node '%s' in registry", node_spec.id
+                "[Orchestrator._get_node_implementation] Found node '%s' in registry", node_spec.id
            )
            return self.node_registry[node_spec.id]
        logger.debug(
-            "[GraphExecutor._get_node_implementation]"
+            "[Orchestrator._get_node_implementation]"
            " Node '%s' not in registry (keys: %s),"
            " creating new",
            node_spec.id,
@@ -764,10 +763,10 @@ class GraphExecutor:
            )

        # Create based on type
-        if node_spec.node_type in ("event_loop", "gcu"):
+        if node_spec.node_type == "event_loop":
            # Auto-create EventLoopNode with sensible defaults.
            # Custom configs can still be pre-registered via node_registry.
-            from framework.graph.event_loop_node import EventLoopNode, LoopConfig
+            from framework.agent_loop.agent_loop import AgentLoop, LoopConfig

            # Create a FileConversationStore if a storage path is available
            conv_store = None
@@ -787,13 +786,13 @@ class GraphExecutor:
            if self._storage_path:
                spillover = str(self._storage_path / "data")

-            from framework.graph.node import warn_if_deprecated_client_facing
+            from framework.orchestrator.node import warn_if_deprecated_client_facing

            warn_if_deprecated_client_facing(node_spec)

            lc = self._loop_config
            default_max_iter = 100 if node_spec.supports_direct_user_io() else 50
-            node = EventLoopNode(
+            node = AgentLoop(
                event_bus=self._event_bus,
                judge=None,  # implicit judge: accept when output_keys are filled
                config=LoopConfig(
@@ -812,7 +811,7 @@ class GraphExecutor:
            # Cache so inject_event() is reachable for queen interaction and escalation routing
            self.node_registry[node_spec.id] = node
            logger.debug(
-                "[GraphExecutor._get_node_implementation]"
+                "[Orchestrator._get_node_implementation]"
                " Cached node '%s' in node_registry,"
                " registry now has keys: %s",
                node_spec.id,
@@ -998,10 +997,10 @@ class GraphExecutor:
            branch_impl = self._get_node_implementation(node_spec, graph.cleanup_llm_model)

            effective_max_retries = node_spec.max_retries
-            # Only override for actual EventLoopNode instances, not custom NodeProtocol impls
-            from framework.graph.event_loop_node import EventLoopNode
+            # Only override for actual AgentLoop instances, not custom NodeProtocol impls
+            from framework.agent_loop.agent_loop import AgentLoop as _AgentLoop  # noqa: F811

-            if isinstance(branch_impl, EventLoopNode) and effective_max_retries > 1:
+            if isinstance(branch_impl, _AgentLoop) and effective_max_retries > 1:
                self.logger.warning(
                    f"EventLoopNode '{node_spec.id}' has "
                    f"max_retries={effective_max_retries}. Overriding "
@@ -1042,9 +1041,6 @@ class GraphExecutor:
                        execution_id=self._execution_id,
                        run_id=self._run_id,
                        stream_id=self._stream_id,
-                        node_registry=node_registry,
-                        all_tools=self.tools,
-                        shared_node_registry=self.node_registry,
                        dynamic_tools_provider=self.dynamic_tools_provider,
                        dynamic_prompt_provider=self.dynamic_prompt_provider,
                        dynamic_memory_provider=self.dynamic_memory_provider,
@@ -1293,14 +1289,14 @@ class GraphExecutor:
        Replaces the imperative while-loop with autonomous workers that
        self-activate based on edge conditions and fan-out tracking.
        """
-        from framework.graph.worker_agent import (
+        from framework.orchestrator.node_worker import (
            Activation,
            FanOutTag,
-            WorkerAgent,
+            NodeWorker,
            WorkerCompletion,
            WorkerLifecycle,
        )
-        from framework.runtime.event_bus import AgentEvent, EventType
+        from framework.host.event_bus import AgentEvent, EventType

        # Build shared graph context
        gc = GraphContext(
@@ -1339,9 +1335,9 @@ class GraphExecutor:
        )

        # Create one WorkerAgent per node
-        workers: dict[str, WorkerAgent] = {}
+        workers: dict[str, NodeWorker] = {}
        for node_spec in graph.nodes:
-            workers[node_spec.id] = WorkerAgent(node_spec=node_spec, graph_context=gc)
+            workers[node_spec.id] = NodeWorker(node_spec=node_spec, graph_context=gc)

        # Identify entry workers (graph entry node, not based on edge count)
        # A node can be the entry point AND have incoming feedback edges.
@@ -1442,7 +1438,7 @@ class GraphExecutor:

        def _route_activation(
            activation: Activation,
-            workers_map: dict[str, WorkerAgent],
+            workers_map: dict[str, NodeWorker],
            pending_tasks_map: dict[str, asyncio.Task],
            *,
            has_event_subscription: bool,
@@ -9,7 +9,7 @@ import json
 from pathlib import Path
 from typing import TYPE_CHECKING

-from framework.graph.prompting import (
+from framework.orchestrator.prompting import (
    EXECUTION_SCOPE_PREAMBLE,
    TransitionSpec,
    build_accounts_prompt,
@@ -19,7 +19,7 @@ from framework.graph.prompting import (
 )

 if TYPE_CHECKING:
-    from framework.graph.node import DataBuffer, NodeSpec
+    from framework.orchestrator.node import DataBuffer, NodeSpec


 _with_datetime = stamp_prompt_datetime
@@ -36,7 +36,7 @@ def compose_system_prompt(
    node_type_preamble: str | None = None,
 ) -> str:
    """Compatibility wrapper for the legacy function signature."""
-    from framework.graph.prompting import NodePromptSpec
+    from framework.orchestrator.prompting import NodePromptSpec

    spec = NodePromptSpec(
        identity_prompt=identity_prompt or "",
@@ -66,7 +66,6 @@ def compose_system_prompt(
            protocols_prompt=spec.protocols_prompt,
            node_type=spec.node_type,
            output_keys=spec.output_keys,
-            is_subagent_mode=spec.is_subagent_mode,
        )
    return build_system_prompt(spec)

@@ -135,7 +134,7 @@ def build_transition_marker(
    )


-from framework.graph.prompting import build_transition_message  # noqa: E402
+from framework.orchestrator.prompting import build_transition_message  # noqa: E402

 __all__ = [
    "EXECUTION_SCOPE_PREAMBLE",
@@ -12,8 +12,8 @@ from datetime import datetime
 from typing import TYPE_CHECKING, Any

 if TYPE_CHECKING:
-    from framework.graph.edge import GraphSpec
-    from framework.graph.node import DataBuffer
+    from framework.orchestrator.edge import GraphSpec
+    from framework.orchestrator.node import DataBuffer


 # Injected into every worker node's system prompt so the LLM understands
@@ -40,7 +40,6 @@ class NodePromptSpec:
    memory_prompt: str = ""
    node_type: str = "event_loop"
    output_keys: tuple[str, ...] = ()
-    is_subagent_mode: bool = False


@dataclass(frozen=True)
@@ -165,7 +164,6 @@ def build_prompt_spec_from_node_context(
        memory_prompt=resolved_memory_prompt,
        node_type=ctx.node_spec.node_type,
        output_keys=tuple(ctx.node_spec.output_keys or ()),
-        is_subagent_mode=bool(getattr(ctx, "is_subagent_mode", False)),
    )


@@ -195,13 +193,10 @@ def build_system_prompt(spec: NodePromptSpec) -> str:
    if spec.narrative:
        parts.append(f"\n--- Context (what has happened so far) ---\n{spec.narrative}")

-    if not spec.is_subagent_mode and spec.node_type in ("event_loop", "gcu") and spec.output_keys:
+    if not False and spec.node_type == "event_loop" and spec.output_keys:
        parts.append(f"\n{EXECUTION_SCOPE_PREAMBLE}")

-    if spec.node_type == "gcu":
-        from framework.graph.gcu import GCU_BROWSER_SYSTEM_PROMPT

-        parts.append(f"\n{GCU_BROWSER_SYSTEM_PROMPT}")

    if spec.focus_prompt:
        parts.append(f"\n--- Current Focus ---\n{spec.focus_prompt}")
@@ -0,0 +1,32 @@
+"""Pipeline middleware for the agent runtime.
+
+Stages run in order when :meth:`AgentRuntime.trigger` receives a request.
+Each stage can pass the context through, transform the input data, or reject
+the request entirely.  This is the runtime-level analogue of AstrBot's
+pipeline architecture and lets operators compose rate limiting, validation,
+cost guards, and custom pre/post-processing without patching core code.
+"""
+
+from framework.pipeline.registry import (
+    build_pipeline_from_config,
+    build_stage,
+    register,
+)
+from framework.pipeline.runner import PipelineRunner
+from framework.pipeline.stage import (
+    PipelineContext,
+    PipelineRejectedError,
+    PipelineResult,
+    PipelineStage,
+)
+
+__all__ = [
+    "PipelineContext",
+    "PipelineRejectedError",
+    "PipelineResult",
+    "PipelineRunner",
+    "PipelineStage",
+    "build_pipeline_from_config",
+    "build_stage",
+    "register",
+]
@@ -0,0 +1,44 @@
+"""Execution-level middleware protocol.
+
+Unlike :class:`PipelineStage` (which gates ``AgentHost.trigger()`` at the
+request level), execution middleware runs at the start of **every** execution
+attempt inside ``ExecutionManager._run_execution()`` -- including resurrection
+retries.
+
+Use this for concerns that must re-evaluate per attempt:
+- Cost tracking (charge per attempt, not per trigger)
+- Tool scoping (different tools on retry)
+- Checkpoint config overrides
+- Per-execution logging/tracing setup
+"""
+
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from typing import Any
+
+
+@dataclass
+class ExecutionContext:
+    """Context passed to execution middleware."""
+
+    execution_id: str
+    stream_id: str
+    run_id: str
+    input_data: dict[str, Any]
+    session_state: dict[str, Any] | None = None
+    attempt: int = 1
+    metadata: dict[str, Any] = field(default_factory=dict)
+
+
+class ExecutionMiddleware(ABC):
+    """Base class for per-execution middleware."""
+
+    @abstractmethod
+    async def on_execution_start(self, ctx: ExecutionContext) -> ExecutionContext:
+        """Called before each execution attempt (including resurrections).
+
+        Modify and return *ctx* to transform execution parameters.
+        Raise to abort the execution.
+        """
@@ -0,0 +1,107 @@
+"""Pipeline stage registry -- maps type names to stage classes.
+
+Stages self-register via the ``@register`` decorator. The
+``build_pipeline_from_config`` function reads a declarative config
+(from ``~/.hive/configuration.json`` or ``agent.json``) and
+instantiates the corresponding stage objects.
+
+Example config::
+
+    {
+      "pipeline": {
+        "stages": [
+          {"type": "rate_limit", "order": 200, "config": {"max_requests_per_minute": 60}},
+          {"type": "cost_guard", "order": 300, "config": {"max_cost_per_request": 0.50}}
+        ]
+      }
+    }
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Any
+
+from framework.pipeline.runner import PipelineRunner
+from framework.pipeline.stage import PipelineStage
+
+logger = logging.getLogger(__name__)
+
+_STAGE_REGISTRY: dict[str, type[PipelineStage]] = {}
+
+
+def register(name: str):
+    """Decorator to register a pipeline stage class by type name.
+
+    Usage::
+
+        @register("rate_limit")
+        class RateLimitStage(PipelineStage):
+            ...
+    """
+
+    def decorator(cls: type[PipelineStage]) -> type[PipelineStage]:
+        _STAGE_REGISTRY[name] = cls
+        return cls
+
+    return decorator
+
+
+def get_registered_stages() -> dict[str, type[PipelineStage]]:
+    """Return a copy of the stage registry."""
+    return dict(_STAGE_REGISTRY)
+
+
+def build_stage(spec: dict[str, Any]) -> PipelineStage:
+    """Instantiate a single stage from a config spec.
+
+    Args:
+        spec: Dict with ``type`` (required), ``order`` (optional),
+              and ``config`` (optional kwargs dict).
+
+    Raises:
+        KeyError: If the stage type is not registered.
+    """
+    stage_type = spec["type"]
+    if stage_type not in _STAGE_REGISTRY:
+        available = ", ".join(sorted(_STAGE_REGISTRY)) or "(none)"
+        raise KeyError(
+            f"Unknown pipeline stage type '{stage_type}'. "
+            f"Available: {available}"
+        )
+    cls = _STAGE_REGISTRY[stage_type]
+    config = spec.get("config", {})
+    stage = cls(**config)
+    if "order" in spec:
+        stage.order = spec["order"]
+    return stage
+
+
+def build_pipeline_from_config(
+    stages_config: list[dict[str, Any]],
+) -> PipelineRunner:
+    """Build a ``PipelineRunner`` from a declarative stages list.
+
+    Each entry is ``{"type": "...", "order": N, "config": {...}}``.
+    """
+    # Import built-in stages so they self-register
+    _ensure_builtins_registered()
+
+    stages = [build_stage(s) for s in stages_config]
+    return PipelineRunner(stages)
+
+
+def _ensure_builtins_registered() -> None:
+    """Import built-in stage modules so their ``@register`` decorators fire."""
+    if _STAGE_REGISTRY:
+        return  # already populated
+    try:
+        import framework.pipeline.stages.cost_guard  # noqa: F401
+        import framework.pipeline.stages.credential_resolver  # noqa: F401
+        import framework.pipeline.stages.input_validation  # noqa: F401
+        import framework.pipeline.stages.llm_provider  # noqa: F401
+        import framework.pipeline.stages.mcp_registry  # noqa: F401
+        import framework.pipeline.stages.rate_limit  # noqa: F401
+        import framework.pipeline.stages.skill_registry  # noqa: F401
+    except ImportError:
+        pass
@@ -0,0 +1,111 @@
+"""Pipeline runner -- executes registered stages in order."""
+
+from __future__ import annotations
+
+import logging
+from typing import Any
+
+from framework.pipeline.stage import (
+    PipelineContext,
+    PipelineRejectedError,
+    PipelineStage,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class PipelineRunner:
+    """Executes a list of :class:`PipelineStage` instances in ``order``.
+
+    The runner is the orchestration layer that :class:`AgentRuntime` calls
+    on every trigger.  Stages execute in ascending ``order`` (ties broken by
+    registration order).  A stage returning ``reject`` short-circuits the
+    pipeline and causes the trigger to raise :class:`PipelineRejectedError`.
+    """
+
+    def __init__(self, stages: list[PipelineStage] | None = None) -> None:
+        self._stages: list[PipelineStage] = sorted(stages or [], key=lambda s: s.order)
+
+    @property
+    def stages(self) -> list[PipelineStage]:
+        return list(self._stages)
+
+    def add_stage(self, stage: PipelineStage) -> None:
+        """Add a stage after construction (for dynamic registration)."""
+        self._stages.append(stage)
+        self._stages.sort(key=lambda s: s.order)
+
+    async def initialize_all(self) -> None:
+        """Call ``initialize`` on every registered stage."""
+        for stage in self._stages:
+            name = stage.__class__.__name__
+            logger.info("[pipeline] Initializing %s (order=%d)", name, stage.order)
+            await stage.initialize()
+            logger.info("[pipeline] %s initialized", name)
+        if self._stages:
+            logger.info(
+                "[pipeline] Ready: %d stages [%s]",
+                len(self._stages),
+                " -> ".join(s.__class__.__name__ for s in self._stages),
+            )
+
+    async def run(self, ctx: PipelineContext) -> PipelineContext:
+        """Run all stages.  Raises ``PipelineRejectedError`` on rejection.
+
+        Returns the (possibly transformed) context.
+        """
+        if not self._stages:
+            return ctx
+        import time
+
+        pipeline_start = time.perf_counter()
+        logger.info(
+            "[pipeline] Running %d stages for entry_point=%s",
+            len(self._stages),
+            ctx.entry_point_id,
+        )
+        for stage in self._stages:
+            stage_name = stage.__class__.__name__
+            t0 = time.perf_counter()
+            result = await stage.process(ctx)
+            elapsed_ms = (time.perf_counter() - t0) * 1000
+            if result.action == "reject":
+                reason = result.rejection_reason or "(no reason given)"
+                logger.warning(
+                    "[pipeline] REJECTED by %s (%.1fms): %s",
+                    stage_name, elapsed_ms, reason,
+                )
+                raise PipelineRejectedError(stage_name, reason)
+            if result.action == "transform":
+                logger.info(
+                    "[pipeline] %s TRANSFORMED input (%.1fms)",
+                    stage_name, elapsed_ms,
+                )
+                if result.input_data is not None:
+                    ctx.input_data = result.input_data
+            else:
+                logger.info(
+                    "[pipeline] %s passed (%.1fms)",
+                    stage_name, elapsed_ms,
+                )
+        total_ms = (time.perf_counter() - pipeline_start) * 1000
+        logger.info("[pipeline] Complete (%.1fms total)", total_ms)
+        return ctx
+
+    async def run_post(self, ctx: PipelineContext, result: Any) -> Any:
+        """Run all stages' ``post_process`` hooks in order.
+
+        Each stage can transform the result; the final value is returned.
+        Exceptions are logged and swallowed -- post-processing must not
+        break a successful execution.
+        """
+        current = result
+        for stage in self._stages:
+            try:
+                current = await stage.post_process(ctx, current)
+            except Exception:
+                logger.exception(
+                    "Pipeline post_process raised in %s; continuing with previous result",
+                    stage.__class__.__name__,
+                )
+        return current
@@ -0,0 +1,77 @@
+"""Pipeline stage base class and request/response types."""
+
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from typing import Any, Literal
+
+
+class PipelineRejectedError(Exception):
+    """Raised by ``AgentHost.trigger`` when a stage rejects the request."""
+
+    def __init__(self, stage_name: str, reason: str) -> None:
+        super().__init__(f"Pipeline rejected by {stage_name}: {reason}")
+        self.stage_name = stage_name
+        self.reason = reason
+
+
+@dataclass
+class PipelineContext:
+    """Carries request data through the pipeline."""
+
+    entry_point_id: str
+    input_data: dict[str, Any]
+    correlation_id: str | None = None
+    session_state: dict[str, Any] | None = None
+    metadata: dict[str, Any] = field(default_factory=dict)
+
+
+@dataclass
+class PipelineResult:
+    """Outcome of a stage's ``process`` call."""
+
+    action: Literal["continue", "reject", "transform"] = "continue"
+    input_data: dict[str, Any] | None = None
+    rejection_reason: str | None = None
+
+
+class PipelineStage(ABC):
+    """Base class for all middleware stages.
+
+    Infrastructure stages (LLM, MCP, credentials, skills) set typed
+    attributes during ``initialize()`` that the host reads after all
+    stages have initialized.  Request-level stages (rate limit, input
+    validation, cost guard) implement ``process()``.
+
+    Attributes set by infrastructure stages:
+        llm: LLM provider instance (set by LlmProviderStage)
+        tool_registry: ToolRegistry with discovered MCP tools (set by McpRegistryStage)
+        accounts_prompt: Connected accounts system prompt block (set by CredentialResolverStage)
+        accounts_data: Raw account info list (set by CredentialResolverStage)
+        tool_provider_map: Tool name -> provider mapping (set by CredentialResolverStage)
+        skills_manager: SkillsManager instance (set by SkillRegistryStage)
+    """
+
+    order: int = 100
+
+    # Infrastructure stage outputs -- typed so _apply_pipeline_results
+    # doesn't need hasattr() sniffing.
+    llm: Any = None
+    tool_registry: Any = None
+    accounts_prompt: str = ""
+    accounts_data: list[dict] | None = None
+    tool_provider_map: dict[str, str] | None = None
+    skills_manager: Any = None
+
+    async def initialize(self) -> None:
+        """Called once when the runtime starts."""
+        return None
+
+    @abstractmethod
+    async def process(self, ctx: PipelineContext) -> PipelineResult:
+        """Process the incoming request."""
+
+    async def post_process(self, ctx: PipelineContext, result: Any) -> Any:
+        """Optional post-execution hook. Default: pass-through."""
+        return result
@@ -0,0 +1,19 @@
+"""Built-in pipeline stages."""
+
+from framework.pipeline.stages.cost_guard import CostGuardStage
+from framework.pipeline.stages.credential_resolver import CredentialResolverStage
+from framework.pipeline.stages.input_validation import InputValidationStage
+from framework.pipeline.stages.llm_provider import LlmProviderStage
+from framework.pipeline.stages.mcp_registry import McpRegistryStage
+from framework.pipeline.stages.rate_limit import RateLimitStage
+from framework.pipeline.stages.skill_registry import SkillRegistryStage
+
+__all__ = [
+    "CostGuardStage",
+    "CredentialResolverStage",
+    "InputValidationStage",
+    "LlmProviderStage",
+    "McpRegistryStage",
+    "RateLimitStage",
+    "SkillRegistryStage",
+]
@@ -0,0 +1,35 @@
+"""Cost guard stage -- reject requests over a pre-flight budget."""
+
+from __future__ import annotations
+
+from framework.pipeline.registry import register
+from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage
+
+
+@register("cost_guard")
+class CostGuardStage(PipelineStage):
+    """Reject requests whose estimated cost exceeds the per-request budget.
+
+    The cost estimate must be populated in ``ctx.metadata["estimated_cost"]``
+    by an earlier stage (or by the caller).  When no estimate is present,
+    the stage passes through.
+    """
+
+    order = 300
+
+    def __init__(self, max_cost_per_request: float = 1.0) -> None:
+        self._budget = max_cost_per_request
+
+    async def process(self, ctx: PipelineContext) -> PipelineResult:
+        estimated = ctx.metadata.get("estimated_cost")
+        if estimated is None:
+            return PipelineResult(action="continue")
+        if estimated > self._budget:
+            return PipelineResult(
+                action="reject",
+                rejection_reason=(
+                    f"Estimated cost ${estimated:.4f} exceeds budget "
+                    f"${self._budget:.4f}"
+                ),
+            )
+        return PipelineResult(action="continue")
@@ -0,0 +1,58 @@
+"""Credential resolver pipeline stage.
+
+Resolves connected accounts at startup. Individual credential TTL/refresh
+is handled by MCP server processes internally -- they resolve tokens from
+the credential store on every tool call.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Any
+
+from framework.pipeline.registry import register
+from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage
+
+logger = logging.getLogger(__name__)
+
+
+@register("credential_resolver")
+class CredentialResolverStage(PipelineStage):
+    """Resolve connected accounts for system prompt injection."""
+
+    order = 40
+
+    def __init__(self, credential_store: Any = None, **kwargs: Any) -> None:
+        self._credential_store = credential_store
+        self.accounts_prompt = ""
+        self.accounts_data: list[dict] | None = None
+        self.tool_provider_map: dict[str, str] | None = None
+
+    async def initialize(self) -> None:
+        try:
+            from aden_tools.credentials.store_adapter import (
+                CredentialStoreAdapter,
+            )
+            from framework.orchestrator.prompting import build_accounts_prompt
+
+            if self._credential_store is not None:
+                adapter = CredentialStoreAdapter(store=self._credential_store)
+            else:
+                adapter = CredentialStoreAdapter.default()
+            self.accounts_data = adapter.get_all_account_info()
+            self.tool_provider_map = adapter.get_tool_provider_map()
+            if self.accounts_data:
+                self.accounts_prompt = build_accounts_prompt(
+                    self.accounts_data, self.tool_provider_map,
+                )
+            logger.info(
+                "[pipeline] CredentialResolverStage: %d accounts",
+                len(self.accounts_data or []),
+            )
+        except Exception:
+            logger.debug(
+                "Credential resolution failed (non-fatal)", exc_info=True,
+            )
+
+    async def process(self, ctx: PipelineContext) -> PipelineResult:
+        return PipelineResult(action="continue")
@@ -0,0 +1,47 @@
+"""Input validation stage.
+
+Rejects requests whose ``input_data`` does not match the entry point's
+declared input schema.  Uses a user-provided schema map:
+``{entry_point_id: {required_key: expected_type, ...}}``.
+"""
+
+from __future__ import annotations
+
+from framework.pipeline.registry import register
+from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage
+
+
+@register("input_validation")
+class InputValidationStage(PipelineStage):
+    """Validate ``input_data`` against per-entry-point schemas.
+
+    The schema is a simple dict mapping key -> expected Python type.
+    For richer validation, substitute a Pydantic-based stage.
+    """
+
+    order = 100
+
+    def __init__(self, schemas: dict[str, dict[str, type]] | None = None) -> None:
+        self._schemas = schemas or {}
+
+    async def process(self, ctx: PipelineContext) -> PipelineResult:
+        schema = self._schemas.get(ctx.entry_point_id)
+        if not schema:
+            return PipelineResult(action="continue")
+
+        for key, expected_type in schema.items():
+            if key not in ctx.input_data:
+                return PipelineResult(
+                    action="reject",
+                    rejection_reason=f"Missing required input key: '{key}'",
+                )
+            value = ctx.input_data[key]
+            if not isinstance(value, expected_type):
+                return PipelineResult(
+                    action="reject",
+                    rejection_reason=(
+                        f"Input key '{key}' has type {type(value).__name__}, "
+                        f"expected {expected_type.__name__}"
+                    ),
+                )
+        return PipelineResult(action="continue")
@@ -0,0 +1,95 @@
+"""LLM provider pipeline stage.
+
+Resolves the LLM provider from global config. This is the ONLY place
+the LLM gets created for worker agents.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Any
+
+from framework.pipeline.registry import register
+from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage
+
+logger = logging.getLogger(__name__)
+
+
+@register("llm_provider")
+class LlmProviderStage(PipelineStage):
+    """Resolve LLM provider and make it available."""
+
+    order = 10
+
+    def __init__(
+        self,
+        model: str | None = None,
+        mock_mode: bool = False,
+        llm: Any = None,
+        **kwargs: Any,
+    ) -> None:
+        self._model = model
+        self._mock_mode = mock_mode
+        self.llm = llm  # Pre-injected LLM (e.g. from session)
+
+    async def initialize(self) -> None:
+        if self.llm is not None:
+            return  # Already injected
+
+        from framework.config import (
+            get_api_key,
+            get_api_keys,
+            get_hive_config,
+            get_preferred_model,
+        )
+
+        model = self._model or get_preferred_model()
+
+        if self._mock_mode:
+            from framework.llm.mock import MockLLMProvider
+
+            self.llm = MockLLMProvider(model=model)
+            return
+
+        config = get_hive_config()
+        llm_config = config.get("llm", {})
+        api_base = llm_config.get("api_base")
+
+        # Check for Antigravity (special provider)
+        if llm_config.get("use_antigravity_subscription"):
+            try:
+                from framework.llm.antigravity import AntigravityProvider
+
+                provider = AntigravityProvider(model=model)
+                if provider.has_credentials():
+                    self.llm = provider
+                    logger.info("[pipeline] LlmProviderStage: Antigravity")
+                    return
+            except Exception:
+                pass
+
+        from framework.llm.litellm import LiteLLMProvider
+
+        api_key = get_api_key()
+        api_keys = get_api_keys()
+
+        if api_keys and len(api_keys) > 1:
+            self.llm = LiteLLMProvider(
+                model=model, api_keys=api_keys, api_base=api_base,
+            )
+        elif api_key:
+            extra = {}
+            if api_key.startswith("sk-ant-oat"):
+                extra["extra_headers"] = {
+                    "authorization": f"Bearer {api_key}"
+                }
+            self.llm = LiteLLMProvider(
+                model=model, api_key=api_key, api_base=api_base, **extra,
+            )
+        else:
+            self.llm = LiteLLMProvider(model=model, api_base=api_base)
+
+        logger.info("[pipeline] LlmProviderStage: %s", model)
+
+    async def process(self, ctx: PipelineContext) -> PipelineResult:
+        return PipelineResult(action="continue")
@@ -0,0 +1,92 @@
+"""MCP registry pipeline stage.
+
+Resolves MCP server references from the agent config against the global
+registry and registers tools. This is the ONLY place MCP tools get loaded.
+"""
+
+from __future__ import annotations
+
+import logging
+from dataclasses import asdict
+from pathlib import Path
+from typing import Any
+
+from framework.pipeline.registry import register
+from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage
+
+logger = logging.getLogger(__name__)
+
+
+@register("mcp_registry")
+class McpRegistryStage(PipelineStage):
+    """Resolve MCP tools from the global registry."""
+
+    order = 50
+
+    def __init__(
+        self,
+        server_refs: list[dict[str, Any]] | None = None,
+        agent_path: str | Path | None = None,
+        tool_registry: Any = None,
+        **kwargs: Any,
+    ) -> None:
+        self._server_refs = server_refs or []
+        self._agent_path = Path(agent_path) if agent_path else None
+        self._tool_registry = tool_registry
+
+    async def initialize(self) -> None:
+        """Connect to MCP servers and discover tools."""
+        if self._tool_registry is None:
+            from framework.loader.tool_registry import ToolRegistry
+
+            self._tool_registry = ToolRegistry()
+
+        from framework.loader.mcp_registry import MCPRegistry
+
+        registry = MCPRegistry()
+        mcp_loaded = False
+
+        # 1. From agent.json mcp_servers refs
+        if self._server_refs:
+            names = [ref["name"] for ref in self._server_refs if ref.get("name")]
+            if names:
+                configs = registry.resolve_for_agent(include=names)
+                if configs:
+                    self._tool_registry.load_registry_servers(
+                        [asdict(c) for c in configs]
+                    )
+                    mcp_loaded = True
+                    logger.info(
+                        "[pipeline] McpRegistryStage: loaded %d servers: %s",
+                        len(configs),
+                        names,
+                    )
+
+        # 2. Legacy: mcp_servers.json
+        if not mcp_loaded and self._agent_path:
+            mcp_json = self._agent_path / "mcp_servers.json"
+            if mcp_json.exists():
+                self._tool_registry.load_mcp_config(mcp_json)
+                mcp_loaded = True
+
+        # 3. Fallback: all servers from global registry
+        if not mcp_loaded:
+            configs = registry.resolve_for_agent(profile="all")
+            if configs:
+                self._tool_registry.load_registry_servers(
+                    [asdict(c) for c in configs]
+                )
+                logger.info(
+                    "[pipeline] McpRegistryStage: loaded %d servers (fallback)",
+                    len(configs),
+                )
+
+        total = len(self._tool_registry.get_tools())
+        logger.info("[pipeline] McpRegistryStage: %d tools available", total)
+
+    async def process(self, ctx: PipelineContext) -> PipelineResult:
+        return PipelineResult(action="continue")
+
+    @property
+    def tool_registry(self):
+        return self._tool_registry
@@ -0,0 +1,44 @@
+"""Per-(entry-point, session) rate limiting stage."""
+
+from __future__ import annotations
+
+import time
+from collections import defaultdict
+
+from framework.pipeline.registry import register
+from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage
+
+
+@register("rate_limit")
+class RateLimitStage(PipelineStage):
+    """Reject requests that exceed ``max_requests_per_minute`` per session.
+
+    The key is ``<entry_point_id>:<session_id>``.  When no session_id is
+    present in ``session_state``, a single shared "default" bucket is used.
+    """
+
+    order = 200
+
+    def __init__(self, max_requests_per_minute: int = 60) -> None:
+        self._max_rpm = max_requests_per_minute
+        self._timestamps: dict[str, list[float]] = defaultdict(list)
+
+    async def process(self, ctx: PipelineContext) -> PipelineResult:
+        session_id = "default"
+        if ctx.session_state:
+            session_id = str(ctx.session_state.get("session_id", "default"))
+        key = f"{ctx.entry_point_id}:{session_id}"
+
+        now = time.monotonic()
+        # Prune entries older than 60s.
+        self._timestamps[key] = [t for t in self._timestamps[key] if now - t < 60.0]
+        if len(self._timestamps[key]) >= self._max_rpm:
+            return PipelineResult(
+                action="reject",
+                rejection_reason=(
+                    f"Rate limit exceeded: {self._max_rpm} req/min "
+                    f"for session '{session_id}'"
+                ),
+            )
+        self._timestamps[key].append(now)
+        return PipelineResult(action="continue")
@@ -0,0 +1,55 @@
+"""Skill registry pipeline stage.
+
+Discovers and loads skills. This is the ONLY place skills get loaded.
+"""
+
+from __future__ import annotations
+
+import logging
+from pathlib import Path
+from typing import Any
+
+from framework.pipeline.registry import register
+from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage
+
+logger = logging.getLogger(__name__)
+
+
+@register("skill_registry")
+class SkillRegistryStage(PipelineStage):
+    """Discover skills and provide prompts."""
+
+    order = 60
+
+    def __init__(
+        self,
+        project_root: str | Path | None = None,
+        interactive: bool = True,
+        skills_config: Any = None,
+        **kwargs: Any,
+    ) -> None:
+        self._project_root = Path(project_root) if project_root else None
+        self._interactive = interactive
+        self._skills_config = skills_config
+        self.skills_manager: Any = None
+
+    async def initialize(self) -> None:
+        from framework.skills.config import SkillsConfig
+        from framework.skills.manager import SkillsManager, SkillsManagerConfig
+
+        config = SkillsManagerConfig(
+            skills_config=self._skills_config or SkillsConfig(),
+            project_root=self._project_root,
+            interactive=self._interactive,
+        )
+        self.skills_manager = SkillsManager(config)
+        self.skills_manager.load()
+        await self.skills_manager.start_watching()
+        logger.info(
+            "[pipeline] SkillRegistryStage: catalog=%d chars, protocols=%d chars",
+            len(self.skills_manager.skills_catalog_prompt),
+            len(self.skills_manager.protocols_prompt),
+        )
+
+    async def process(self, ctx: PipelineContext) -> PipelineResult:
+        return PipelineResult(action="continue")
@@ -1,27 +0,0 @@
-"""Agent Runner - load and run exported agents."""
-
-from framework.runner.mcp_registry import MCPRegistry
-from framework.runner.protocol import (
-    AgentMessage,
-    CapabilityLevel,
-    CapabilityResponse,
-    MessageType,
-    OrchestratorResult,
-)
-from framework.runner.runner import AgentInfo, AgentRunner, ValidationResult
-from framework.runner.tool_registry import ToolRegistry, tool
-
-__all__ = [
-    # Single agent
-    "AgentRunner",
-    "AgentInfo",
-    "ValidationResult",
-    "ToolRegistry",
-    "MCPRegistry",
-    "tool",
-    "AgentMessage",
-    "MessageType",
-    "CapabilityLevel",
-    "CapabilityResponse",
-    "OrchestratorResult",
-]
@@ -1,493 +0,0 @@
-# Event Types and Schema Reference
-
-The Hive runtime uses a pub/sub `EventBus` for inter-component communication and observability. Every event is an `AgentEvent` dataclass published through `EventBus.publish()`.
-
-## Event Envelope (`AgentEvent`)
-
-Every event shares a common envelope:
-
-| Field            | Type              | Description                                                  |
-| ---------------- | ----------------- | ------------------------------------------------------------ |
-| `type`           | `EventType` (str) | Event type identifier (see below)                            |
-| `stream_id`      | `str`             | Entry point / pipeline that emitted the event                |
-| `node_id`        | `str \| None`     | Graph node that emitted the event                            |
-| `execution_id`   | `str \| None`     | Unique execution run ID (UUID, set by `ExecutionStream`)     |
-| `graph_id`       | `str \| None`     | Graph that emitted the event (set by `GraphScopedEventBus`)  |
-| `data`           | `dict`            | Event-type-specific payload (see individual schemas below)   |
-| `timestamp`      | `datetime`        | When the event was created                                   |
-| `correlation_id` | `str \| None`     | Optional ID for tracking related events across streams       |
-
-### Identity Fields
-
-The identity tuple `(graph_id, stream_id, node_id, execution_id)` uniquely locates any event:
-
- **`graph_id`** — Which graph produced the event. Set automatically by `GraphScopedEventBus` (a subclass that stamps `graph_id` on every `publish()` call). Values: `"worker"`, `"judge"`, `"queen"`, or the graph spec ID.
- **`stream_id`** — Which entry point / pipeline. Corresponds to `EntryPointSpec.id` in the graph definition. For single-entry-point graphs, this equals the entry point name (e.g. `"default"`, `"health_check"`).
- **`node_id`** — Which specific node emitted the event. For `EventLoopNode` events, this is the node spec ID.
- **`execution_id`** — UUID identifying a specific execution run. Multiple concurrent executions of the same entry point each get a unique `execution_id`.
-
---
-
-## Execution Lifecycle
-
-### `execution_started`
-
-A new graph execution has begun.
-
-| Data Field | Type   | Description                     |
-| ---------- | ------ | ------------------------------- |
-| `input`    | `dict` | Input data passed to the graph  |
-
-**Emitted by:** `ExecutionStream._run_execution()`
-
---
-
-### `execution_completed`
-
-A graph execution finished successfully.
-
-| Data Field | Type   | Description       |
-| ---------- | ------ | ----------------- |
-| `output`   | `dict` | Final output data |
-
-**Emitted by:** `ExecutionStream._run_execution()`
-
-**Queen notification:** When a worker execution completes, the session manager \
-injects a `[WORKER_TERMINAL]` notification into the queen with the output summary. \
-The queen reports to the user and asks what to do next.
-
---
-
-### `execution_failed`
-
-A graph execution failed with an error.
-
-| Data Field | Type  | Description   |
-| ---------- | ----- | ------------- |
-| `error`    | `str` | Error message |
-
-**Emitted by:** `ExecutionStream._run_execution()`
-
-**Queen notification:** When a worker execution fails, the session manager \
-injects a `[WORKER_TERMINAL]` notification into the queen with the error. \
-The queen reports to the user and helps troubleshoot.
-
---
-
-### `execution_paused`
-
-Execution has been paused (Ctrl+Z or HITL approval).
-
-| Data Field | Type  | Description       |
-| ---------- | ----- | ----------------- |
-| `reason`   | `str` | Why it was paused |
-
-**Emitted by:** `GraphExecutor.execute()`
-
---
-
-### `execution_resumed`
-
-Execution has resumed from a paused state.
-
-| Data Field | Type | Description |
-| ---------- | ---- | ----------- |
-| *(none)*   |      |             |
-
-**Emitted by:** `GraphExecutor.execute()`
-
---
-
-## Node Event-Loop Lifecycle
-
-These events track the inner loop of `EventLoopNode` — the multi-turn LLM streaming loop that powers most agent nodes.
-
-### `node_loop_started`
-
-An EventLoopNode has begun its execution loop.
-
-| Data Field       | Type       | Description                     |
-| ---------------- | ---------- | ------------------------------- |
-| `max_iterations` | `int\|null`| Maximum iterations configured   |
-
-**Emitted by:** `EventLoopNode._publish_loop_started()`, `GraphExecutor` (for function nodes in parallel branches)
-
---
-
-### `node_loop_iteration`
-
-An EventLoopNode has started a new iteration (one LLM turn).
-
-| Data Field  | Type  | Description               |
-| ----------- | ----- | ------------------------- |
-| `iteration` | `int` | Zero-based iteration index |
-
-**Emitted by:** `EventLoopNode._publish_iteration()`
-
---
-
-### `node_loop_completed`
-
-An EventLoopNode has finished its execution loop.
-
-| Data Field   | Type  | Description                            |
-| ------------ | ----- | -------------------------------------- |
-| `iterations` | `int` | Total number of iterations completed   |
-
-**Emitted by:** `EventLoopNode._publish_loop_completed()`, `GraphExecutor` (for function nodes in parallel branches)
-
---
-
-## LLM Streaming
-
-### `llm_text_delta`
-
-Incremental text output from the LLM (non-client-facing nodes only).
-
-| Data Field | Type  | Description                              |
-| ---------- | ----- | ---------------------------------------- |
-| `content`  | `str` | New text chunk (delta)                   |
-| `snapshot` | `str` | Full accumulated text so far             |
-
-**Emitted by:** `EventLoopNode._publish_text_delta()` when `client_facing=False`
-
---
-
-### `llm_reasoning_delta`
-
-Incremental reasoning/thinking output from the LLM.
-
-| Data Field | Type  | Description         |
-| ---------- | ----- | ------------------- |
-| `content`  | `str` | New reasoning chunk |
-
-**Emitted by:** Not currently wired in `EventLoopNode` (reserved for extended thinking models).
-
---
-
-## Tool Lifecycle
-
-### `tool_call_started`
-
-The LLM has requested a tool call and execution is about to begin.
-
-| Data Field   | Type   | Description                          |
-| ------------ | ------ | ------------------------------------ |
-| `tool_use_id`| `str`  | Unique ID for this tool invocation   |
-| `tool_name`  | `str`  | Name of the tool being called        |
-| `tool_input` | `dict` | Arguments passed to the tool         |
-
-**Emitted by:** `EventLoopNode._publish_tool_started()`
-
---
-
-### `tool_call_completed`
-
-A tool call has finished executing.
-
-| Data Field   | Type   | Description                            |
-| ------------ | ------ | -------------------------------------- |
-| `tool_use_id`| `str`  | Same ID from `tool_call_started`       |
-| `tool_name`  | `str`  | Name of the tool                       |
-| `result`     | `str`  | Tool execution result (may be truncated)|
-| `is_error`   | `bool` | Whether the tool returned an error     |
-
-**Emitted by:** `EventLoopNode._publish_tool_completed()`
-
---
-
-## Client I/O
-
-These events are emitted by the queen's interactive turns. They drive the TUI's chat interface.
-
-### `client_output_delta`
-
-Incremental text output meant for the human operator.
-
-| Data Field | Type  | Description                  |
-| ---------- | ----- | ---------------------------- |
-| `content`  | `str` | New text chunk (delta)       |
-| `snapshot` | `str` | Full accumulated text so far |
-
-**Emitted by:** `EventLoopNode._publish_text_delta()` for queen/user-facing output
-
---
-
-### `client_input_requested`
-
-The node is waiting for human input (via `ask_user` tool or auto-block on text-only turns).
-
-| Data Field | Type  | Description                                       |
-| ---------- | ----- | ------------------------------------------------- |
-| `prompt`   | `str` | Optional prompt/question shown to the user        |
-
-**Emitted by:** `EventLoopNode._await_user_input()`, doom loop handler
-
-The TUI subscribes to this event to show the input prompt and focus the chat input. After the user types, `inject_event()` is called on the node to unblock it.
-
---
-
-## Internal Node Observability
-
-### `node_internal_output`
-
-Output from a non-client-facing node (for debugging/monitoring).
-
-| Data Field | Type  | Description      |
-| ---------- | ----- | ---------------- |
-| `content`  | `str` | Output text      |
-
-**Emitted by:** Available via `emit_node_internal_output()` — not currently wired in the default `EventLoopNode`.
-
---
-
-### `node_input_blocked`
-
-A non-client-facing node is blocked waiting for input.
-
-| Data Field | Type  | Description     |
-| ---------- | ----- | --------------- |
-| `prompt`   | `str` | Block reason    |
-
-**Emitted by:** Available via `emit_node_input_blocked()` — reserved for future use.
-
---
-
-### `node_stalled`
-
-The node's LLM has produced identical responses for several consecutive turns (stall detection).
-
-| Data Field | Type  | Description                                       |
-| ---------- | ----- | ------------------------------------------------- |
-| `reason`   | `str` | Always `"Consecutive identical responses detected"`|
-
-**Emitted by:** `EventLoopNode._publish_stalled()`
-
---
-
-### `node_tool_doom_loop`
-
-The LLM is calling the same tool(s) with identical arguments repeatedly (doom loop detection).
-
-| Data Field    | Type  | Description                          |
-| ------------- | ----- | ------------------------------------ |
-| `description` | `str` | Human-readable doom loop description |
-
-**Emitted by:** `EventLoopNode` doom loop handler
-
---
-
-## Judge Decisions
-
-### `judge_verdict`
-
-The judge (custom or implicit) has evaluated the current iteration.
-
-| Data Field   | Type  | Description                                          |
-| ------------ | ----- | ---------------------------------------------------- |
-| `action`     | `str` | `"ACCEPT"`, `"RETRY"`, `"ESCALATE"`, or `"CONTINUE"` |
-| `feedback`   | `str` | Judge feedback (empty for ACCEPT/CONTINUE)           |
-| `judge_type` | `str` | `"custom"` (explicit JudgeProtocol) or `"implicit"` (stop-reason heuristic) |
-| `iteration`  | `int` | Which iteration this verdict applies to              |
-
-**Emitted by:** `EventLoopNode._publish_judge_verdict()`
-
-**Verdict meanings:**
- **ACCEPT** — Output meets requirements; node exits successfully.
- **RETRY** — Output needs improvement; loop continues with feedback injected.
- **ESCALATE** — Problem cannot be solved at this level; triggers escalation.
- **CONTINUE** — Implicit verdict: LLM called tools, so it's making progress — let it keep going.
-
---
-
-## Output Tracking
-
-### `output_key_set`
-
-A node has set an output key via the `set_output` synthetic tool.
-
-| Data Field | Type  | Description       |
-| ---------- | ----- | ----------------- |
-| `key`      | `str` | Output key name   |
-
-**Emitted by:** `EventLoopNode._publish_output_key_set()`
-
---
-
-## Retry & Edge Tracking
-
-### `node_retry`
-
-A transient error occurred during an LLM call and the node is retrying.
-
-| Data Field    | Type  | Description                        |
-| ------------- | ----- | ---------------------------------- |
-| `retry_count` | `int` | Current retry attempt number       |
-| `max_retries` | `int` | Maximum retries configured         |
-| `error`       | `str` | Error message (truncated to 500ch) |
-
-**Emitted by:** `EventLoopNode` (stream retry handler), `GraphExecutor` (node-level retry)
-
---
-
-### `edge_traversed`
-
-The executor has traversed an edge from one node to another.
-
-| Data Field       | Type  | Description                                    |
-| ---------------- | ----- | ---------------------------------------------- |
-| `source_node`    | `str` | Node ID the edge starts from                   |
-| `target_node`    | `str` | Node ID the edge goes to                       |
-| `edge_condition` | `str` | Edge condition: `"router"`, `"on_success"`, etc. |
-
-**Emitted by:** `GraphExecutor.execute()` — after router decisions, condition-based edges, and fallback edges.
-
---
-
-## Context Management
-
-### `context_compacted`
-
-Not currently emitted — reserved for future use when `NodeConversation` compacts history.
-
---
-
-## State Changes
-
-### `state_changed`
-
-A shared buffer key has been modified.
-
-| Data Field  | Type  | Description                        |
-| ----------- | ----- | ---------------------------------- |
-| `key`       | `str` | Buffer key that changed            |
-| `old_value` | `Any` | Previous value                     |
-| `new_value` | `Any` | New value                          |
-| `scope`     | `str` | Scope of the change                |
-
-**Emitted by:** Available via `emit_state_changed()` — not currently wired in default execution.
-
---
-
-### `state_conflict`
-
-Not currently emitted — reserved for concurrent write conflict detection.
-
---
-
-## Goal Tracking
-
-### `goal_progress`
-
-Goal completion progress update.
-
-| Data Field        | Type    | Description                          |
-| ----------------- | ------- | ------------------------------------ |
-| `progress`        | `float` | 0.0–1.0 completion fraction         |
-| `criteria_status` | `dict`  | Per-criterion status                 |
-
-**Emitted by:** Available via `emit_goal_progress()` — not currently wired in default execution.
-
---
-
-### `goal_achieved`
-
-Not currently emitted — reserved for explicit goal completion signals.
-
---
-
-### `constraint_violation`
-
-A goal constraint has been violated.
-
-| Data Field      | Type  | Description              |
-| --------------- | ----- | ------------------------ |
-| `constraint_id` | `str` | Which constraint failed  |
-| `description`   | `str` | What went wrong          |
-
-**Emitted by:** Available via `emit_constraint_violation()`.
-
---
-
-## Stream Lifecycle
-
-### `stream_started` / `stream_stopped`
-
-Not currently emitted — reserved for `ExecutionStream` lifecycle tracking.
-
---
-
-## External Triggers
-
-### `webhook_received`
-
-An external webhook has been received.
-
-| Data Field     | Type   | Description                  |
-| -------------- | ------ | ---------------------------- |
-| `path`         | `str`  | Webhook URL path             |
-| `method`       | `str`  | HTTP method                  |
-| `headers`      | `dict` | HTTP headers                 |
-| `payload`      | `dict` | Request body                 |
-| `query_params` | `dict` | URL query parameters         |
-
-**Emitted by:** Webhook server integration.
-
-Note: `node_id` is not set on this event; `stream_id` is the webhook source ID.
-
---
-
-## Escalation
-
-### `escalation_requested`
-
-An agent has requested handoff to the Hive Coder (via the `escalate` synthetic tool).
-
-| Data Field | Type  | Description                     |
-| ---------- | ----- | ------------------------------- |
-| `reason`   | `str` | Why escalation is needed        |
-| `context`  | `str` | Additional context for the coder|
-
-**Emitted by:** `EventLoopNode` when the LLM calls `escalate`.
-
---
-
-## Custom Events
-
-### `custom`
-
-User-defined events with arbitrary payloads. No schema enforced.
-
---
-
-## Subscription & Filtering
-
-Events can be filtered when subscribing:
-
-```python
-bus.subscribe(
-    event_types=[EventType.TOOL_CALL_STARTED, EventType.TOOL_CALL_COMPLETED],
-    handler=my_handler,
-    filter_stream="default",       # Only events from this stream
-    filter_node="planner",         # Only events from this node
-    filter_execution="exec-uuid",  # Only events from this execution
-    filter_graph="worker",         # Only events from this graph
-)
-```
-
-## Debug Event Logging
-
-Set `HIVE_DEBUG_EVENTS=1` to write every published event to a JSONL file at `~/.hive/event_logs/<timestamp>.jsonl`. Each line is the full JSON serialization of an `AgentEvent`:
-
-```json
-{
-  "type": "tool_call_started",
-  "stream_id": "default",
-  "node_id": "planner",
-  "execution_id": "a1b2c3d4-...",
-  "graph_id": "worker",
-  "data": {"tool_use_id": "tu_1", "tool_name": "web_search", "tool_input": {"query": "..."}},
-  "timestamp": "2026-02-24T12:00:00.000000",
-  "correlation_id": null
-}
-```
@@ -1,171 +0,0 @@
-# Agent Runtime
-
-Unified execution system for all Hive agents. Every agent — single-entry or multi-entry, headless or dashboard — runs through the same runtime stack.
-
-## Topology
-
-```
-                     AgentRunner.load(agent_path)
-                              |
-                         AgentRunner
-                     (factory + public API)
-                              |
-                       _setup_agent_runtime()
-                              |
-                        AgentRuntime
-                   (lifecycle + orchestration)
-                      /       |       \
-               Stream A   Stream B   Stream C    ← one per entry point
-                  |           |          |
-            GraphExecutor  GraphExecutor  GraphExecutor
-                  |           |          |
-              Node → Node → Node  (graph traversal)
-```
-
-Single-entry agents get a `"default"` entry point automatically. There is no separate code path.
-
-## Components
-
-| Component | File | Role |
-|---|---|---|
-| `AgentRunner` | `runner/runner.py` | Load agents, configure tools/LLM, expose high-level API |
-| `AgentRuntime` | `runtime/agent_runtime.py` | Lifecycle management, entry point routing, event bus |
-| `ExecutionStream` | `runtime/execution_stream.py` | Per-entry-point execution queue, session persistence |
-| `GraphExecutor` | `graph/executor.py` | Node traversal, tool dispatch, checkpointing |
-| `EventBus` | `runtime/event_bus.py` | Pub/sub for execution events (streaming, I/O) |
-| `SharedBufferManager` | `runtime/shared_state.py` | Cross-stream state with isolation levels |
-| `OutcomeAggregator` | `runtime/outcome_aggregator.py` | Goal progress tracking across streams |
-| `SessionStore` | `storage/session_store.py` | Session state persistence (`sessions/{id}/state.json`) |
-
-## Programming Interface
-
-### AgentRunner (high-level)
-
-```python
-from framework.runner import AgentRunner
-
-# Load and run
-runner = AgentRunner.load("exports/my_agent", model="anthropic/claude-sonnet-4-20250514")
-result = await runner.run({"query": "hello"})
-
-# Resume from paused session
-result = await runner.run({"query": "continue"}, session_state=saved_state)
-
-# Lifecycle
-await runner.start()                           # Start the runtime
-await runner.stop()                            # Stop the runtime
-exec_id = await runner.trigger("default", {})  # Non-blocking trigger
-entry_points = runner.get_entry_points()       # List entry points
-
-# Context manager
-async with AgentRunner.load("exports/my_agent") as runner:
-    result = await runner.run({"query": "hello"})
-
-# Cleanup
-runner.cleanup()          # Synchronous
-await runner.cleanup_async()  # Asynchronous
-```
-
-### AgentRuntime (lower-level)
-
-```python
-from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
-from framework.runtime.execution_stream import EntryPointSpec
-
-# Create runtime with entry points
-runtime = create_agent_runtime(
-    graph=graph,
-    goal=goal,
-    storage_path=Path("~/.hive/agents/my_agent"),
-    entry_points=[
-        EntryPointSpec(id="default", name="Default", entry_node="start", trigger_type="manual"),
-    ],
-    llm=llm,
-    tools=tools,
-    tool_executor=tool_executor,
-    checkpoint_config=checkpoint_config,
-)
-
-# Lifecycle
-await runtime.start()
-await runtime.stop()
-
-# Execution
-exec_id = await runtime.trigger("default", {"query": "hello"})              # Non-blocking
-result = await runtime.trigger_and_wait("default", {"query": "hello"})      # Blocking
-result = await runtime.trigger_and_wait("default", {}, session_state=state) # Resume
-
-# Client-facing node I/O
-await runtime.inject_input(node_id="chat", content="user response")
-
-# Events
-sub_id = runtime.subscribe_to_events(
-    event_types=[EventType.CLIENT_OUTPUT_DELTA],
-    handler=my_handler,
-)
-runtime.unsubscribe_from_events(sub_id)
-
-# Inspection
-runtime.is_running           # bool
-runtime.event_bus            # EventBus
-runtime.state_manager        # SharedBufferManager
-runtime.get_stats()          # Runtime statistics
-```
-
-## Execution Flow
-
-1. `AgentRunner.run()` calls `AgentRuntime.trigger_and_wait()`
-2. `AgentRuntime` routes to the `ExecutionStream` for the entry point
-3. `ExecutionStream` creates a `GraphExecutor` and calls `execute()`
-4. `GraphExecutor` traverses nodes, dispatches tools, manages checkpoints
-5. `ExecutionResult` flows back up through the stack
-6. `ExecutionStream` writes session state to disk
-
-## Session Resume
-
-All execution paths support session resume:
-
-```python
-# First run (agent pauses at a client-facing node)
-result = await runner.run({"query": "start task"})
-# result.paused_at = "review-node"
-# result.session_state = {"memory": {...}, "paused_at": "review-node", ...}
-
-# Resume
-result = await runner.run({"input": "approved"}, session_state=result.session_state)
-```
-
-Session state flows: `AgentRunner.run()` → `AgentRuntime.trigger_and_wait()` → `ExecutionStream.execute()` → `GraphExecutor.execute()`.
-
-Checkpoints are saved at node boundaries (`sessions/{id}/checkpoints/`) for crash recovery.
-
-## Event Bus
-
-The `EventBus` provides real-time execution visibility:
-
-| Event | When |
-|---|---|
-| `NODE_STARTED` | Node begins execution |
-| `NODE_COMPLETED` | Node finishes |
-| `TOOL_CALL_STARTED` | Tool invocation begins |
-| `TOOL_CALL_COMPLETED` | Tool invocation finishes |
-| `CLIENT_OUTPUT_DELTA` | Agent streams text to user |
-| `CLIENT_INPUT_REQUESTED` | Agent needs user input |
-| `EXECUTION_COMPLETED` | Full execution finishes |
-
-In headless mode, `AgentRunner` subscribes to `CLIENT_OUTPUT_DELTA` and `CLIENT_INPUT_REQUESTED` to print output and read stdin. The web dashboard subscribes to route events to the frontend.
-
-## Storage Layout
-
-```
-~/.hive/agents/{agent_name}/
-  sessions/
-    session_YYYYMMDD_HHMMSS_{uuid}/
-      state.json              # Session state (status, memory, progress)
-      checkpoints/            # Node-boundary snapshots
-      logs/
-        summary.json          # Execution summary
-        details.jsonl         # Detailed event log
-        tool_logs.jsonl       # Tool call log
-  runtime_logs/               # Cross-session runtime logs
-```
@@ -1,5 +0,0 @@
-"""Runtime core for agent execution."""
-
-from framework.runtime.core import Runtime
-
-__all__ = ["Runtime"]
@@ -1 +0,0 @@
-"""Tests for runtime components."""
@@ -1,869 +0,0 @@
-"""
-Tests for AgentRuntime and multi-entry-point execution.
-
-Tests:
-1. AgentRuntime creation and lifecycle
-2. Entry point registration
-3. Concurrent executions across streams
-4. SharedBufferManager isolation levels
-5. OutcomeAggregator goal evaluation
-6. EventBus pub/sub
-"""
-
-import asyncio
-import tempfile
-from pathlib import Path
-
-import pytest
-
-from framework.graph import Goal
-from framework.graph.edge import EdgeCondition, EdgeSpec, GraphSpec
-from framework.graph.goal import Constraint, SuccessCriterion
-from framework.graph.node import NodeSpec
-from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
-from framework.runtime.event_bus import AgentEvent, EventBus, EventType
-from framework.runtime.execution_stream import EntryPointSpec
-from framework.runtime.outcome_aggregator import OutcomeAggregator
-from framework.runtime.shared_state import IsolationLevel, SharedBufferManager
-from framework.schemas.session_state import SessionState, SessionTimestamps
-
-# === Test Fixtures ===
-
-
-@pytest.fixture
-def sample_goal():
-    """Create a sample goal for testing."""
-    return Goal(
-        id="test-goal",
-        name="Test Goal",
-        description="A goal for testing multi-entry-point execution",
-        success_criteria=[
-            SuccessCriterion(
-                id="sc-1",
-                description="Process all requests",
-                metric="requests_processed",
-                target="100%",
-                weight=1.0,
-            ),
-        ],
-        constraints=[
-            Constraint(
-                id="c-1",
-                description="Must not exceed rate limits",
-                constraint_type="hard",
-                category="operational",
-            ),
-        ],
-    )
-
-
-@pytest.fixture
-def sample_graph():
-    """Create a sample graph with multiple entry points."""
-    nodes = [
-        NodeSpec(
-            id="process-webhook",
-            name="Process Webhook",
-            description="Process incoming webhook",
-            node_type="event_loop",
-            input_keys=["webhook_data"],
-            output_keys=["result"],
-        ),
-        NodeSpec(
-            id="process-api",
-            name="Process API Request",
-            description="Process API request",
-            node_type="event_loop",
-            input_keys=["request_data"],
-            output_keys=["result"],
-        ),
-        NodeSpec(
-            id="complete",
-            name="Complete",
-            description="Execution complete",
-            node_type="terminal",
-            input_keys=["result"],
-            output_keys=["final_result"],
-        ),
-    ]
-
-    edges = [
-        EdgeSpec(
-            id="webhook-to-complete",
-            source="process-webhook",
-            target="complete",
-            condition=EdgeCondition.ON_SUCCESS,
-        ),
-        EdgeSpec(
-            id="api-to-complete",
-            source="process-api",
-            target="complete",
-            condition=EdgeCondition.ON_SUCCESS,
-        ),
-    ]
-
-    return GraphSpec(
-        id="test-graph",
-        goal_id="test-goal",
-        version="1.0.0",
-        entry_node="process-webhook",
-        entry_points={"start": "process-webhook"},
-        terminal_nodes=["complete"],
-        pause_nodes=[],
-        nodes=nodes,
-        edges=edges,
-    )
-
-
-@pytest.fixture
-def temp_storage():
-    """Create a temporary storage directory."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        yield Path(tmpdir)
-
-
-# === SharedBufferManager Tests ===
-
-
-class TestSharedBufferManager:
-    """Tests for SharedBufferManager."""
-
-    def test_create_buffer(self):
-        """Test creating execution-scoped buffer."""
-        manager = SharedBufferManager()
-        buffer = manager.create_buffer(
-            execution_id="exec-1",
-            stream_id="webhook",
-            isolation=IsolationLevel.SHARED,
-        )
-        assert buffer is not None
-        assert buffer._execution_id == "exec-1"
-        assert buffer._stream_id == "webhook"
-
-    @pytest.mark.asyncio
-    async def test_isolated_state(self):
-        """Test isolated state doesn't leak between executions."""
-        manager = SharedBufferManager()
-
-        buf1 = manager.create_buffer("exec-1", "stream-1", IsolationLevel.ISOLATED)
-        buf2 = manager.create_buffer("exec-2", "stream-1", IsolationLevel.ISOLATED)
-
-        await buf1.write("key", "value1")
-        await buf2.write("key", "value2")
-
-        assert await buf1.read("key") == "value1"
-        assert await buf2.read("key") == "value2"
-
-    @pytest.mark.asyncio
-    async def test_shared_state(self):
-        """Test shared state is visible across executions."""
-        manager = SharedBufferManager()
-
-        manager.create_buffer("exec-1", "stream-1", IsolationLevel.SHARED)
-        manager.create_buffer("exec-2", "stream-1", IsolationLevel.SHARED)
-
-        # Write to global scope
-        await manager.write(
-            key="global_key",
-            value="global_value",
-            execution_id="exec-1",
-            stream_id="stream-1",
-            isolation=IsolationLevel.SHARED,
-            scope="global",
-        )
-
-        # Both should see it
-        value1 = await manager.read("global_key", "exec-1", "stream-1", IsolationLevel.SHARED)
-        value2 = await manager.read("global_key", "exec-2", "stream-1", IsolationLevel.SHARED)
-
-        assert value1 == "global_value"
-        assert value2 == "global_value"
-
-    def test_cleanup_execution(self):
-        """Test execution cleanup removes state."""
-        manager = SharedBufferManager()
-        manager.create_buffer("exec-1", "stream-1", IsolationLevel.ISOLATED)
-
-        assert "exec-1" in manager._execution_state
-
-        manager.cleanup_execution("exec-1")
-
-        assert "exec-1" not in manager._execution_state
-
-
-class TestSessionState:
-    """Tests for session state data-buffer compatibility."""
-
-    def test_legacy_memory_alias_populates_data_buffer(self):
-        """Legacy `memory` payloads should still hydrate the session buffer."""
-        state = SessionState(
-            session_id="session-1",
-            goal_id="goal-1",
-            timestamps=SessionTimestamps(
-                started_at="2026-01-01T00:00:00",
-                updated_at="2026-01-01T00:00:00",
-            ),
-            memory={"rules": "keep starred mail"},
-        )
-
-        assert state.data_buffer == {"rules": "keep starred mail"}
-        assert state.memory == {"rules": "keep starred mail"}
-        assert state.to_session_state_dict()["data_buffer"] == {"rules": "keep starred mail"}
-
-
-# === EventBus Tests ===
-
-
-class TestEventBus:
-    """Tests for EventBus pub/sub."""
-
-    @pytest.mark.asyncio
-    async def test_publish_subscribe(self):
-        """Test basic publish/subscribe."""
-        bus = EventBus()
-        received_events = []
-
-        async def handler(event: AgentEvent):
-            received_events.append(event)
-
-        bus.subscribe(
-            event_types=[EventType.EXECUTION_STARTED],
-            handler=handler,
-        )
-
-        await bus.publish(
-            AgentEvent(
-                type=EventType.EXECUTION_STARTED,
-                stream_id="webhook",
-                execution_id="exec-1",
-                data={"test": "data"},
-            )
-        )
-
-        # Allow handler to run
-        await asyncio.sleep(0.1)
-
-        assert len(received_events) == 1
-        assert received_events[0].type == EventType.EXECUTION_STARTED
-        assert received_events[0].stream_id == "webhook"
-
-    @pytest.mark.asyncio
-    async def test_stream_filter(self):
-        """Test filtering by stream ID."""
-        bus = EventBus()
-        received_events = []
-
-        async def handler(event: AgentEvent):
-            received_events.append(event)
-
-        bus.subscribe(
-            event_types=[EventType.EXECUTION_STARTED],
-            handler=handler,
-            filter_stream="webhook",
-        )
-
-        # Publish to webhook stream (should be received)
-        await bus.publish(
-            AgentEvent(
-                type=EventType.EXECUTION_STARTED,
-                stream_id="webhook",
-            )
-        )
-
-        # Publish to api stream (should NOT be received)
-        await bus.publish(
-            AgentEvent(
-                type=EventType.EXECUTION_STARTED,
-                stream_id="api",
-            )
-        )
-
-        await asyncio.sleep(0.1)
-
-        assert len(received_events) == 1
-        assert received_events[0].stream_id == "webhook"
-
-    def test_unsubscribe(self):
-        """Test unsubscribing from events."""
-        bus = EventBus()
-
-        async def handler(event: AgentEvent):
-            pass
-
-        sub_id = bus.subscribe(
-            event_types=[EventType.EXECUTION_STARTED],
-            handler=handler,
-        )
-
-        assert sub_id in bus._subscriptions
-
-        result = bus.unsubscribe(sub_id)
-
-        assert result is True
-        assert sub_id not in bus._subscriptions
-
-    @pytest.mark.asyncio
-    async def test_wait_for(self):
-        """Test waiting for a specific event."""
-        bus = EventBus()
-
-        # Start waiting in background
-        async def wait_and_check():
-            event = await bus.wait_for(
-                event_type=EventType.EXECUTION_COMPLETED,
-                timeout=1.0,
-            )
-            return event
-
-        wait_task = asyncio.create_task(wait_and_check())
-
-        # Publish the event
-        await asyncio.sleep(0.1)
-        await bus.publish(
-            AgentEvent(
-                type=EventType.EXECUTION_COMPLETED,
-                stream_id="webhook",
-                execution_id="exec-1",
-            )
-        )
-
-        event = await wait_task
-
-        assert event is not None
-        assert event.type == EventType.EXECUTION_COMPLETED
-
-
-# === OutcomeAggregator Tests ===
-
-
-class TestOutcomeAggregator:
-    """Tests for OutcomeAggregator."""
-
-    def test_record_decision(self, sample_goal):
-        """Test recording decisions."""
-        aggregator = OutcomeAggregator(sample_goal)
-
-        from framework.schemas.decision import Decision, DecisionType
-
-        decision = Decision(
-            id="dec-1",
-            node_id="process-webhook",
-            intent="Process incoming webhook",
-            decision_type=DecisionType.PATH_CHOICE,
-            options=[],
-            chosen_option_id="opt-1",
-            reasoning="Standard processing path",
-        )
-
-        aggregator.record_decision("webhook", "exec-1", decision)
-
-        assert aggregator._total_decisions == 1
-        assert len(aggregator._decisions) == 1
-
-    @pytest.mark.asyncio
-    async def test_evaluate_goal_progress(self, sample_goal):
-        """Test goal progress evaluation."""
-        aggregator = OutcomeAggregator(sample_goal)
-
-        progress = await aggregator.evaluate_goal_progress()
-
-        assert "overall_progress" in progress
-        assert "criteria_status" in progress
-        assert "constraint_violations" in progress
-        assert "recommendation" in progress
-
-    def test_record_constraint_violation(self, sample_goal):
-        """Test recording constraint violations."""
-        aggregator = OutcomeAggregator(sample_goal)
-
-        aggregator.record_constraint_violation(
-            constraint_id="c-1",
-            description="Rate limit exceeded",
-            violation_details="More than 100 requests/minute",
-            stream_id="webhook",
-            execution_id="exec-1",
-        )
-
-        assert len(aggregator._constraint_violations) == 1
-        assert aggregator._constraint_violations[0].constraint_id == "c-1"
-
-
-# === AgentRuntime Tests ===
-
-
-class TestAgentRuntime:
-    """Tests for AgentRuntime orchestration."""
-
-    def test_register_entry_point(self, sample_graph, sample_goal, temp_storage):
-        """Test registering entry points."""
-        runtime = AgentRuntime(
-            graph=sample_graph,
-            goal=sample_goal,
-            storage_path=temp_storage,
-        )
-
-        entry_spec = EntryPointSpec(
-            id="manual",
-            name="Manual Trigger",
-            entry_node="process-webhook",
-            trigger_type="manual",
-        )
-
-        runtime.register_entry_point(entry_spec)
-
-        assert "manual" in runtime._entry_points
-        assert len(runtime.get_entry_points()) == 1
-
-    def test_register_duplicate_entry_point_fails(self, sample_graph, sample_goal, temp_storage):
-        """Test that duplicate entry point IDs fail."""
-        runtime = AgentRuntime(
-            graph=sample_graph,
-            goal=sample_goal,
-            storage_path=temp_storage,
-        )
-
-        entry_spec = EntryPointSpec(
-            id="webhook",
-            name="Webhook Handler",
-            entry_node="process-webhook",
-            trigger_type="webhook",
-        )
-
-        runtime.register_entry_point(entry_spec)
-
-        with pytest.raises(ValueError, match="already registered"):
-            runtime.register_entry_point(entry_spec)
-
-    def test_register_invalid_entry_node_fails(self, sample_graph, sample_goal, temp_storage):
-        """Test that invalid entry nodes fail."""
-        runtime = AgentRuntime(
-            graph=sample_graph,
-            goal=sample_goal,
-            storage_path=temp_storage,
-        )
-
-        entry_spec = EntryPointSpec(
-            id="invalid",
-            name="Invalid Entry",
-            entry_node="nonexistent-node",
-            trigger_type="manual",
-        )
-
-        with pytest.raises(ValueError, match="not found in graph"):
-            runtime.register_entry_point(entry_spec)
-
-    @pytest.mark.asyncio
-    async def test_start_stop_lifecycle(self, sample_graph, sample_goal, temp_storage):
-        """Test runtime start/stop lifecycle."""
-        runtime = AgentRuntime(
-            graph=sample_graph,
-            goal=sample_goal,
-            storage_path=temp_storage,
-        )
-
-        entry_spec = EntryPointSpec(
-            id="webhook",
-            name="Webhook Handler",
-            entry_node="process-webhook",
-            trigger_type="webhook",
-        )
-
-        runtime.register_entry_point(entry_spec)
-
-        assert not runtime.is_running
-
-        await runtime.start()
-
-        assert runtime.is_running
-        assert "webhook" in runtime._streams
-
-        await runtime.stop()
-
-        assert not runtime.is_running
-        assert len(runtime._streams) == 0
-
-    @pytest.mark.asyncio
-    async def test_trigger_requires_running(self, sample_graph, sample_goal, temp_storage):
-        """Test that trigger fails if runtime not running."""
-        runtime = AgentRuntime(
-            graph=sample_graph,
-            goal=sample_goal,
-            storage_path=temp_storage,
-        )
-
-        entry_spec = EntryPointSpec(
-            id="webhook",
-            name="Webhook Handler",
-            entry_node="process-webhook",
-            trigger_type="webhook",
-        )
-
-        runtime.register_entry_point(entry_spec)
-
-        with pytest.raises(RuntimeError, match="not running"):
-            await runtime.trigger("webhook", {"test": "data"})
-
-
-# === GraphSpec Validation Tests ===
-
-
-# === Integration Tests ===
-
-
-class TestCreateAgentRuntime:
-    """Tests for the create_agent_runtime factory."""
-
-    def test_create_with_entry_points(self, sample_graph, sample_goal, temp_storage):
-        """Test factory creates runtime with entry points."""
-        entry_points = [
-            EntryPointSpec(
-                id="webhook",
-                name="Webhook",
-                entry_node="process-webhook",
-                trigger_type="webhook",
-            ),
-            EntryPointSpec(
-                id="api",
-                name="API",
-                entry_node="process-api",
-                trigger_type="api",
-            ),
-        ]
-
-        runtime = create_agent_runtime(
-            graph=sample_graph,
-            goal=sample_goal,
-            storage_path=temp_storage,
-            entry_points=entry_points,
-        )
-
-        assert len(runtime.get_entry_points()) == 2
-        assert "webhook" in runtime._entry_points
-        assert "api" in runtime._entry_points
-
-
-# === Timer Entry Point Tests ===
-
-
-class TestTimerEntryPoints:
-    """Tests for timer-driven entry points (interval and cron)."""
-
-    @pytest.mark.asyncio
-    async def test_interval_timer_starts_task(self, sample_graph, sample_goal, temp_storage):
-        """Test that interval_minutes timer creates an async task."""
-        runtime = AgentRuntime(
-            graph=sample_graph,
-            goal=sample_goal,
-            storage_path=temp_storage,
-        )
-
-        entry_spec = EntryPointSpec(
-            id="timer-interval",
-            name="Interval Timer",
-            entry_node="process-webhook",
-            trigger_type="timer",
-            trigger_config={"interval_minutes": 60},
-        )
-        runtime.register_entry_point(entry_spec)
-
-        await runtime.start()
-        try:
-            assert len(runtime._timer_tasks) == 1
-            assert not runtime._timer_tasks[0].done()
-            # Give the async task a moment to set next_fire
-            await asyncio.sleep(0.05)
-            assert "timer-interval" in runtime._timer_next_fire
-        finally:
-            await runtime.stop()
-
-        assert len(runtime._timer_tasks) == 0
-
-    @pytest.mark.asyncio
-    async def test_cron_timer_starts_task(self, sample_graph, sample_goal, temp_storage):
-        """Test that cron expression timer creates an async task."""
-        runtime = AgentRuntime(
-            graph=sample_graph,
-            goal=sample_goal,
-            storage_path=temp_storage,
-        )
-
-        entry_spec = EntryPointSpec(
-            id="timer-cron",
-            name="Cron Timer",
-            entry_node="process-webhook",
-            trigger_type="timer",
-            trigger_config={"cron": "*/5 * * * *"},  # Every 5 minutes
-        )
-        runtime.register_entry_point(entry_spec)
-
-        await runtime.start()
-        try:
-            assert len(runtime._timer_tasks) == 1
-            assert not runtime._timer_tasks[0].done()
-            # Give the async task a moment to set next_fire
-            await asyncio.sleep(0.05)
-            assert "timer-cron" in runtime._timer_next_fire
-        finally:
-            await runtime.stop()
-
-    @pytest.mark.asyncio
-    async def test_invalid_cron_expression_skipped(
-        self, sample_graph, sample_goal, temp_storage, caplog
-    ):
-        """Test that an invalid cron expression logs a warning and skips."""
-        runtime = AgentRuntime(
-            graph=sample_graph,
-            goal=sample_goal,
-            storage_path=temp_storage,
-        )
-
-        entry_spec = EntryPointSpec(
-            id="timer-bad-cron",
-            name="Bad Cron Timer",
-            entry_node="process-webhook",
-            trigger_type="timer",
-            trigger_config={"cron": "not a cron expression"},
-        )
-        runtime.register_entry_point(entry_spec)
-
-        await runtime.start()
-        try:
-            assert len(runtime._timer_tasks) == 0
-            assert "invalid cron" in caplog.text.lower() or "Invalid cron" in caplog.text
-        finally:
-            await runtime.stop()
-
-    @pytest.mark.asyncio
-    async def test_cron_takes_priority_over_interval(
-        self, sample_graph, sample_goal, temp_storage, caplog
-    ):
-        """Test that when both cron and interval_minutes are set, cron wins."""
-        import logging
-
-        runtime = AgentRuntime(
-            graph=sample_graph,
-            goal=sample_goal,
-            storage_path=temp_storage,
-        )
-
-        entry_spec = EntryPointSpec(
-            id="timer-both",
-            name="Both Timer",
-            entry_node="process-webhook",
-            trigger_type="timer",
-            trigger_config={"cron": "0 9 * * *", "interval_minutes": 30},
-        )
-        runtime.register_entry_point(entry_spec)
-
-        with caplog.at_level(logging.INFO):
-            await runtime.start()
-        try:
-            assert len(runtime._timer_tasks) == 1
-            # Should log cron, not interval
-            assert any("cron" in r.message.lower() for r in caplog.records)
-        finally:
-            await runtime.stop()
-
-    @pytest.mark.asyncio
-    async def test_no_interval_or_cron_warns(self, sample_graph, sample_goal, temp_storage, caplog):
-        """Test that timer with neither cron nor interval_minutes logs a warning."""
-        runtime = AgentRuntime(
-            graph=sample_graph,
-            goal=sample_goal,
-            storage_path=temp_storage,
-        )
-
-        entry_spec = EntryPointSpec(
-            id="timer-empty",
-            name="Empty Timer",
-            entry_node="process-webhook",
-            trigger_type="timer",
-            trigger_config={},
-        )
-        runtime.register_entry_point(entry_spec)
-
-        await runtime.start()
-        try:
-            assert len(runtime._timer_tasks) == 0
-            assert "no 'cron' or valid 'interval_minutes'" in caplog.text
-        finally:
-            await runtime.stop()
-
-    @pytest.mark.asyncio
-    async def test_cron_immediate_fires_first(self, sample_graph, sample_goal, temp_storage):
-        """Test that run_immediately=True with cron doesn't set next_fire before first run."""
-        runtime = AgentRuntime(
-            graph=sample_graph,
-            goal=sample_goal,
-            storage_path=temp_storage,
-        )
-
-        entry_spec = EntryPointSpec(
-            id="timer-cron-immediate",
-            name="Cron Immediate",
-            entry_node="process-webhook",
-            trigger_type="timer",
-            trigger_config={"cron": "0 0 * * *", "run_immediately": True},
-        )
-        runtime.register_entry_point(entry_spec)
-
-        await runtime.start()
-        try:
-            assert len(runtime._timer_tasks) == 1
-            # With run_immediately, the task enters the while loop directly,
-            # so _timer_next_fire is NOT set before the first trigger attempt
-            # (it pops it at the top of the loop)
-            # Give it a moment to start executing
-            await asyncio.sleep(0.05)
-            # Task should still be running (it will try to trigger and likely fail
-            # since there's no LLM, but the task itself continues)
-            assert not runtime._timer_tasks[0].done()
-        finally:
-            await runtime.stop()
-
-
-# === Cancel All Tasks Tests ===
-
-
-class TestCancelAllTasks:
-    """Tests for cancel_all_tasks and cancel_all_tasks_async."""
-
-    @pytest.mark.asyncio
-    async def test_cancel_all_tasks_async_returns_false_when_no_tasks(
-        self, sample_graph, sample_goal, temp_storage
-    ):
-        """Test that cancel_all_tasks_async returns False with no running tasks."""
-        runtime = AgentRuntime(
-            graph=sample_graph,
-            goal=sample_goal,
-            storage_path=temp_storage,
-        )
-
-        entry_spec = EntryPointSpec(
-            id="webhook",
-            name="Webhook",
-            entry_node="process-webhook",
-            trigger_type="webhook",
-        )
-        runtime.register_entry_point(entry_spec)
-        await runtime.start()
-
-        try:
-            result = await runtime.cancel_all_tasks_async()
-            assert result is False
-        finally:
-            await runtime.stop()
-
-    @pytest.mark.asyncio
-    async def test_cancel_all_tasks_async_cancels_running_task(
-        self, sample_graph, sample_goal, temp_storage
-    ):
-        """Test that cancel_all_tasks_async cancels a running task and returns True."""
-        runtime = AgentRuntime(
-            graph=sample_graph,
-            goal=sample_goal,
-            storage_path=temp_storage,
-        )
-
-        entry_spec = EntryPointSpec(
-            id="webhook",
-            name="Webhook",
-            entry_node="process-webhook",
-            trigger_type="webhook",
-        )
-        runtime.register_entry_point(entry_spec)
-        await runtime.start()
-
-        try:
-            # Inject a fake running task into the stream
-            stream = runtime._streams["webhook"]
-
-            async def hang_forever():
-                await asyncio.get_event_loop().create_future()
-
-            fake_task = asyncio.ensure_future(hang_forever())
-            stream._execution_tasks["fake-exec"] = fake_task
-
-            result = await runtime.cancel_all_tasks_async()
-            assert result is True
-
-            # Let the CancelledError propagate
-            try:
-                await fake_task
-            except asyncio.CancelledError:
-                pass
-            assert fake_task.cancelled()
-
-            # Clean up
-            del stream._execution_tasks["fake-exec"]
-        finally:
-            await runtime.stop()
-
-    @pytest.mark.asyncio
-    async def test_cancel_all_tasks_async_cancels_multiple_tasks_across_streams(
-        self, sample_graph, sample_goal, temp_storage
-    ):
-        """Test that cancel_all_tasks_async cancels tasks across multiple streams."""
-        runtime = AgentRuntime(
-            graph=sample_graph,
-            goal=sample_goal,
-            storage_path=temp_storage,
-        )
-
-        # Register two entry points so we get two streams
-        runtime.register_entry_point(
-            EntryPointSpec(
-                id="stream-a",
-                name="Stream A",
-                entry_node="process-webhook",
-                trigger_type="webhook",
-            )
-        )
-        runtime.register_entry_point(
-            EntryPointSpec(
-                id="stream-b",
-                name="Stream B",
-                entry_node="process-webhook",
-                trigger_type="webhook",
-            )
-        )
-        await runtime.start()
-
-        try:
-
-            async def hang_forever():
-                await asyncio.get_event_loop().create_future()
-
-            stream_a = runtime._streams["stream-a"]
-            stream_b = runtime._streams["stream-b"]
-
-            # Two tasks in stream A, one task in stream B
-            task_a1 = asyncio.ensure_future(hang_forever())
-            task_a2 = asyncio.ensure_future(hang_forever())
-            task_b1 = asyncio.ensure_future(hang_forever())
-
-            stream_a._execution_tasks["exec-a1"] = task_a1
-            stream_a._execution_tasks["exec-a2"] = task_a2
-            stream_b._execution_tasks["exec-b1"] = task_b1
-
-            result = await runtime.cancel_all_tasks_async()
-            assert result is True
-
-            # Let CancelledErrors propagate
-            for task in [task_a1, task_a2, task_b1]:
-                try:
-                    await task
-                except asyncio.CancelledError:
-                    pass
-                assert task.cancelled()
-
-            # Clean up
-            del stream_a._execution_tasks["exec-a1"]
-            del stream_a._execution_tasks["exec-a2"]
-            del stream_b._execution_tasks["exec-b1"]
-        finally:
-            await runtime.stop()
-
-
-if __name__ == "__main__":
-    pytest.main([__file__, "-v"])
@@ -1,268 +0,0 @@
-"""Tests for webhook idempotency key support in AgentRuntime.trigger()."""
-
-import asyncio
-import time
-from collections import OrderedDict
-from unittest.mock import AsyncMock, MagicMock
-
-import pytest
-
-from framework.runtime.agent_runtime import AgentRuntime, AgentRuntimeConfig
-
-
-def _make_runtime(ttl=300.0, max_keys=10000):
-    """Create a minimal AgentRuntime with idempotency cache attributes.
-
-    Uses ``object.__new__`` to skip ``__init__`` and its heavy dependencies
-    (storage, LLM, skills) — we only need the cache and config for these tests.
-    """
-    runtime = object.__new__(AgentRuntime)
-    runtime._config = AgentRuntimeConfig(idempotency_ttl_seconds=ttl, idempotency_max_keys=max_keys)
-    runtime._running = True
-    runtime._lock = asyncio.Lock()
-    runtime._idempotency_keys = OrderedDict()
-    runtime._idempotency_times = {}
-    runtime._graphs = {}
-    runtime._active_graph_id = "primary"
-    runtime._graph_id = "primary"
-    runtime._streams = {}
-    runtime._entry_points = {}
-    return runtime
-
-
-def _make_runtime_with_stream(ttl=300.0, max_keys=10000):
-    """Create a mock runtime whose stream.execute() returns unique IDs."""
-    runtime = _make_runtime(ttl=ttl, max_keys=max_keys)
-
-    call_count = 0
-
-    async def _fake_execute(*args, **kwargs):
-        nonlocal call_count
-        call_count += 1
-        return f"session-{call_count:04d}"
-
-    stream = MagicMock()
-    stream.execute = _fake_execute
-    runtime._streams = {"webhook": stream}
-    runtime._entry_points = {"webhook": MagicMock()}
-    return runtime
-
-
-class TestIdempotencyConfig:
-    """Verify idempotency configuration defaults."""
-
-    def test_default_ttl(self):
-        config = AgentRuntimeConfig()
-        assert config.idempotency_ttl_seconds == 300.0
-
-    def test_default_max_keys(self):
-        config = AgentRuntimeConfig()
-        assert config.idempotency_max_keys == 10000
-
-    def test_custom_config(self):
-        config = AgentRuntimeConfig(idempotency_ttl_seconds=60.0, idempotency_max_keys=100)
-        assert config.idempotency_ttl_seconds == 60.0
-        assert config.idempotency_max_keys == 100
-
-
-class TestIdempotencyCache:
-    """Test the idempotency cache and pruning logic directly."""
-
-    def test_cache_stores_and_retrieves_key(self):
-        runtime = _make_runtime()
-        runtime._idempotency_keys["stripe-evt-123"] = "exec-001"
-        runtime._idempotency_times["stripe-evt-123"] = time.time()
-
-        assert runtime._idempotency_keys.get("stripe-evt-123") == "exec-001"
-
-    def test_cache_returns_none_for_unknown_key(self):
-        runtime = _make_runtime()
-        assert runtime._idempotency_keys.get("unknown") is None
-
-    def test_prune_removes_expired_keys(self):
-        runtime = _make_runtime(ttl=0.1)
-
-        runtime._idempotency_keys["old-key"] = "exec-old"
-        runtime._idempotency_times["old-key"] = time.time() - 1.0  # expired
-
-        runtime._prune_idempotency_keys()
-
-        assert "old-key" not in runtime._idempotency_keys
-        assert "old-key" not in runtime._idempotency_times
-
-    def test_prune_keeps_fresh_keys(self):
-        runtime = _make_runtime(ttl=300.0)
-
-        runtime._idempotency_keys["fresh-key"] = "exec-fresh"
-        runtime._idempotency_times["fresh-key"] = time.time()
-
-        runtime._prune_idempotency_keys()
-
-        assert "fresh-key" in runtime._idempotency_keys
-
-    def test_prune_respects_max_keys(self):
-        runtime = _make_runtime(max_keys=2)
-
-        for i in range(3):
-            key = f"key-{i}"
-            runtime._idempotency_keys[key] = f"exec-{i}"
-            runtime._idempotency_times[key] = time.time()
-
-        runtime._prune_idempotency_keys()
-
-        assert len(runtime._idempotency_keys) == 2
-        # Oldest (key-0) should be evicted
-        assert "key-0" not in runtime._idempotency_keys
-        assert "key-1" in runtime._idempotency_keys
-        assert "key-2" in runtime._idempotency_keys
-
-    def test_prune_evicts_fifo(self):
-        runtime = _make_runtime(max_keys=1)
-
-        runtime._idempotency_keys["first"] = "exec-1"
-        runtime._idempotency_times["first"] = time.time()
-        runtime._idempotency_keys["second"] = "exec-2"
-        runtime._idempotency_times["second"] = time.time()
-
-        runtime._prune_idempotency_keys()
-
-        assert len(runtime._idempotency_keys) == 1
-        assert "second" in runtime._idempotency_keys
-        assert "first" not in runtime._idempotency_keys
-
-    def test_mixed_expired_and_max_size(self):
-        runtime = _make_runtime(ttl=0.1, max_keys=2)
-
-        # Add expired key
-        runtime._idempotency_keys["expired"] = "exec-e"
-        runtime._idempotency_times["expired"] = time.time() - 1.0
-
-        # Add fresh keys
-        runtime._idempotency_keys["fresh-1"] = "exec-f1"
-        runtime._idempotency_times["fresh-1"] = time.time()
-        runtime._idempotency_keys["fresh-2"] = "exec-f2"
-        runtime._idempotency_times["fresh-2"] = time.time()
-
-        runtime._prune_idempotency_keys()
-
-        assert "expired" not in runtime._idempotency_keys
-        assert "fresh-1" in runtime._idempotency_keys
-        assert "fresh-2" in runtime._idempotency_keys
-
-
-class TestTriggerIdempotency:
-    """Tests for trigger() idempotency deduplication."""
-
-    def test_trigger_accepts_idempotency_key(self):
-        """trigger() accepts idempotency_key as a keyword argument."""
-        import inspect
-
-        sig = inspect.signature(AgentRuntime.trigger)
-        assert "idempotency_key" in sig.parameters
-
-    def test_idempotency_key_defaults_to_none(self):
-        """idempotency_key defaults to None (backward compatible)."""
-        import inspect
-
-        sig = inspect.signature(AgentRuntime.trigger)
-        assert sig.parameters["idempotency_key"].default is None
-
-    def test_trigger_and_wait_accepts_idempotency_key(self):
-        """trigger_and_wait() also accepts idempotency_key."""
-        import inspect
-
-        sig = inspect.signature(AgentRuntime.trigger_and_wait)
-        assert "idempotency_key" in sig.parameters
-
-    def test_trigger_and_wait_idempotency_key_defaults_to_none(self):
-        """trigger_and_wait() idempotency_key defaults to None."""
-        import inspect
-
-        sig = inspect.signature(AgentRuntime.trigger_and_wait)
-        assert sig.parameters["idempotency_key"].default is None
-
-    @pytest.mark.asyncio
-    async def test_duplicate_key_returns_cached_id(self):
-        """Same idempotency key within TTL returns the cached execution ID."""
-        runtime = _make_runtime_with_stream()
-
-        first = await runtime.trigger("webhook", {}, idempotency_key="stripe-evt-001")
-        second = await runtime.trigger("webhook", {}, idempotency_key="stripe-evt-001")
-
-        assert first == second
-        assert first == "session-0001"
-
-    @pytest.mark.asyncio
-    async def test_different_keys_produce_different_ids(self):
-        """Different idempotency keys start separate executions."""
-        runtime = _make_runtime_with_stream()
-
-        id_a = await runtime.trigger("webhook", {}, idempotency_key="evt-aaa")
-        id_b = await runtime.trigger("webhook", {}, idempotency_key="evt-bbb")
-
-        assert id_a != id_b
-        assert id_a == "session-0001"
-        assert id_b == "session-0002"
-
-    @pytest.mark.asyncio
-    async def test_none_key_always_starts_new_execution(self):
-        """key=None (default) skips dedup — every call starts fresh."""
-        runtime = _make_runtime_with_stream()
-
-        id_1 = await runtime.trigger("webhook", {})
-        id_2 = await runtime.trigger("webhook", {})
-
-        assert id_1 != id_2
-        assert len(runtime._idempotency_keys) == 0  # nothing cached
-
-    @pytest.mark.asyncio
-    async def test_expired_key_allows_new_execution(self):
-        """After TTL expires, the same key starts a new execution."""
-        runtime = _make_runtime_with_stream(ttl=0.1)
-
-        first = await runtime.trigger("webhook", {}, idempotency_key="evt-expire")
-
-        # Backdate the cached timestamp so the key looks expired
-        runtime._idempotency_times["evt-expire"] = time.time() - 1.0
-
-        second = await runtime.trigger("webhook", {}, idempotency_key="evt-expire")
-
-        assert first != second
-        assert first == "session-0001"
-        assert second == "session-0002"
-
-    @pytest.mark.asyncio
-    async def test_stream_not_found_does_not_cache(self):
-        """If entry point doesn't exist, nothing is cached."""
-        runtime = _make_runtime_with_stream()
-
-        with pytest.raises(ValueError, match="not found"):
-            await runtime.trigger("nonexistent", {}, idempotency_key="evt-orphan")
-
-        assert "evt-orphan" not in runtime._idempotency_keys
-
-    @pytest.mark.asyncio
-    async def test_execute_error_does_not_cache(self):
-        """If stream.execute() raises, nothing is cached so retries can go through."""
-        runtime = _make_runtime()
-
-        failing_stream = MagicMock()
-        failing_stream.execute = AsyncMock(side_effect=RuntimeError("stream not running"))
-        runtime._streams = {"webhook": failing_stream}
-        runtime._entry_points = {"webhook": MagicMock()}
-
-        with pytest.raises(RuntimeError, match="stream not running"):
-            await runtime.trigger("webhook", {}, idempotency_key="evt-123")
-
-        assert "evt-123" not in runtime._idempotency_keys
-
-    @pytest.mark.asyncio
-    async def test_cache_holds_real_execution_id(self):
-        """Cached value matches the actual execution ID from execute()."""
-        runtime = _make_runtime_with_stream()
-
-        exec_id = await runtime.trigger("webhook", {}, idempotency_key="evt-real")
-
-        cached = runtime._idempotency_keys.get("evt-real")
-        assert cached == exec_id
-        assert cached == "session-0001"
@@ -1,29 +0,0 @@
-"""Tests for custom session-backed runtime logging paths."""
-
-from pathlib import Path
-from unittest.mock import MagicMock
-
-from framework.graph.executor import GraphExecutor
-from framework.runtime.runtime_log_store import RuntimeLogStore
-from framework.runtime.runtime_logger import RuntimeLogger
-
-
-def test_graph_executor_uses_custom_session_dir_name_for_runtime_logs():
-    executor = GraphExecutor(
-        runtime=MagicMock(),
-        storage_path=Path("/tmp/test-agent/sessions/my-custom-session"),
-    )
-
-    assert executor._get_runtime_log_session_id() == "my-custom-session"
-
-
-def test_runtime_logger_creates_session_log_dir_for_custom_session_id(tmp_path):
-    base = tmp_path / ".hive" / "agents" / "test_agent"
-    base.mkdir(parents=True)
-    store = RuntimeLogStore(base)
-    logger = RuntimeLogger(store=store, agent_id="test-agent")
-
-    run_id = logger.start_run(goal_id="goal-1", session_id="my-custom-session")
-
-    assert run_id == "my-custom-session"
-    assert (base / "sessions" / "my-custom-session" / "logs").is_dir()
@@ -1,716 +0,0 @@
-"""
-Tests for WebhookServer and event-driven entry points.
-"""
-
-import asyncio
-import hashlib
-import hmac as hmac_mod
-import json
-import tempfile
-from pathlib import Path
-from unittest.mock import patch
-
-import aiohttp
-import pytest
-
-from framework.runtime.agent_runtime import AgentRuntime, AgentRuntimeConfig
-from framework.runtime.event_bus import AgentEvent, EventBus, EventType
-from framework.runtime.execution_stream import EntryPointSpec
-from framework.runtime.webhook_server import (
-    WebhookRoute,
-    WebhookServer,
-    WebhookServerConfig,
-)
-
-
-def _make_server(event_bus: EventBus, routes: list[WebhookRoute] | None = None):
-    """Helper to create a WebhookServer with port=0 for OS-assigned port."""
-    config = WebhookServerConfig(host="127.0.0.1", port=0)
-    server = WebhookServer(event_bus, config)
-    for route in routes or []:
-        server.add_route(route)
-    return server
-
-
-def _base_url(server: WebhookServer) -> str:
-    """Get the base URL for a running server."""
-    return f"http://127.0.0.1:{server.port}"
-
-
-class TestWebhookServerLifecycle:
-    """Tests for server start/stop."""
-
-    @pytest.mark.asyncio
-    async def test_start_stop(self):
-        bus = EventBus()
-        server = _make_server(
-            bus,
-            [
-                WebhookRoute(source_id="test", path="/webhooks/test", methods=["POST"]),
-            ],
-        )
-
-        await server.start()
-        assert server.is_running
-        assert server.port is not None
-
-        await server.stop()
-        assert not server.is_running
-        assert server.port is None
-
-    @pytest.mark.asyncio
-    async def test_no_routes_skips_start(self):
-        bus = EventBus()
-        server = _make_server(bus)  # no routes
-
-        await server.start()
-        assert not server.is_running
-
-    @pytest.mark.asyncio
-    async def test_stop_when_not_started(self):
-        bus = EventBus()
-        server = _make_server(bus)
-
-        # Should be a no-op, not raise
-        await server.stop()
-        assert not server.is_running
-
-
-class TestWebhookEventPublishing:
-    """Tests for HTTP request -> EventBus event publishing."""
-
-    @pytest.mark.asyncio
-    async def test_post_publishes_webhook_received(self):
-        bus = EventBus()
-        received = []
-
-        async def handler(event):
-            received.append(event)
-
-        bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
-
-        server = _make_server(
-            bus,
-            [
-                WebhookRoute(source_id="gh", path="/webhooks/github", methods=["POST"]),
-            ],
-        )
-        await server.start()
-
-        try:
-            async with aiohttp.ClientSession() as session:
-                async with session.post(
-                    f"{_base_url(server)}/webhooks/github",
-                    json={"action": "opened", "number": 42},
-                ) as resp:
-                    assert resp.status == 202
-                    body = await resp.json()
-                    assert body["status"] == "accepted"
-
-            # Give event bus time to dispatch
-            await asyncio.sleep(0.05)
-
-            assert len(received) == 1
-            event = received[0]
-            assert event.type == EventType.WEBHOOK_RECEIVED
-            assert event.stream_id == "gh"
-            assert event.data["path"] == "/webhooks/github"
-            assert event.data["method"] == "POST"
-            assert event.data["payload"] == {"action": "opened", "number": 42}
-            assert isinstance(event.data["headers"], dict)
-            assert event.data["query_params"] == {}
-        finally:
-            await server.stop()
-
-    @pytest.mark.asyncio
-    async def test_query_params_included(self):
-        bus = EventBus()
-        received = []
-
-        async def handler(event):
-            received.append(event)
-
-        bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
-
-        server = _make_server(
-            bus,
-            [
-                WebhookRoute(source_id="hook", path="/webhooks/hook", methods=["POST"]),
-            ],
-        )
-        await server.start()
-
-        try:
-            async with aiohttp.ClientSession() as session:
-                async with session.post(
-                    f"{_base_url(server)}/webhooks/hook?source=test&v=2",
-                    json={"data": "hello"},
-                ) as resp:
-                    assert resp.status == 202
-
-            await asyncio.sleep(0.05)
-
-            assert len(received) == 1
-            assert received[0].data["query_params"] == {"source": "test", "v": "2"}
-        finally:
-            await server.stop()
-
-    @pytest.mark.asyncio
-    async def test_non_json_body(self):
-        bus = EventBus()
-        received = []
-
-        async def handler(event):
-            received.append(event)
-
-        bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
-
-        server = _make_server(
-            bus,
-            [
-                WebhookRoute(source_id="raw", path="/webhooks/raw", methods=["POST"]),
-            ],
-        )
-        await server.start()
-
-        try:
-            async with aiohttp.ClientSession() as session:
-                async with session.post(
-                    f"{_base_url(server)}/webhooks/raw",
-                    data=b"plain text body",
-                    headers={"Content-Type": "text/plain"},
-                ) as resp:
-                    assert resp.status == 202
-
-            await asyncio.sleep(0.05)
-
-            assert len(received) == 1
-            assert received[0].data["payload"] == {"raw_body": "plain text body"}
-        finally:
-            await server.stop()
-
-    @pytest.mark.asyncio
-    async def test_empty_body(self):
-        bus = EventBus()
-        received = []
-
-        async def handler(event):
-            received.append(event)
-
-        bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
-
-        server = _make_server(
-            bus,
-            [
-                WebhookRoute(source_id="empty", path="/webhooks/empty", methods=["POST"]),
-            ],
-        )
-        await server.start()
-
-        try:
-            async with aiohttp.ClientSession() as session:
-                async with session.post(f"{_base_url(server)}/webhooks/empty") as resp:
-                    assert resp.status == 202
-
-            await asyncio.sleep(0.05)
-
-            assert len(received) == 1
-            assert received[0].data["payload"] == {}
-        finally:
-            await server.stop()
-
-    @pytest.mark.asyncio
-    async def test_multiple_routes(self):
-        bus = EventBus()
-        received = []
-
-        async def handler(event):
-            received.append(event)
-
-        bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
-
-        server = _make_server(
-            bus,
-            [
-                WebhookRoute(source_id="a", path="/webhooks/a", methods=["POST"]),
-                WebhookRoute(source_id="b", path="/webhooks/b", methods=["POST"]),
-            ],
-        )
-        await server.start()
-
-        try:
-            async with aiohttp.ClientSession() as session:
-                async with session.post(
-                    f"{_base_url(server)}/webhooks/a", json={"from": "a"}
-                ) as resp:
-                    assert resp.status == 202
-
-                async with session.post(
-                    f"{_base_url(server)}/webhooks/b", json={"from": "b"}
-                ) as resp:
-                    assert resp.status == 202
-
-            await asyncio.sleep(0.05)
-
-            assert len(received) == 2
-            stream_ids = {e.stream_id for e in received}
-            assert stream_ids == {"a", "b"}
-        finally:
-            await server.stop()
-
-    @pytest.mark.asyncio
-    async def test_filter_stream_subscription(self):
-        """Subscribers can filter by stream_id (source_id)."""
-        bus = EventBus()
-        a_events = []
-        b_events = []
-
-        async def handle_a(event):
-            a_events.append(event)
-
-        async def handle_b(event):
-            b_events.append(event)
-
-        bus.subscribe([EventType.WEBHOOK_RECEIVED], handle_a, filter_stream="a")
-        bus.subscribe([EventType.WEBHOOK_RECEIVED], handle_b, filter_stream="b")
-
-        server = _make_server(
-            bus,
-            [
-                WebhookRoute(source_id="a", path="/webhooks/a", methods=["POST"]),
-                WebhookRoute(source_id="b", path="/webhooks/b", methods=["POST"]),
-            ],
-        )
-        await server.start()
-
-        try:
-            async with aiohttp.ClientSession() as session:
-                await session.post(f"{_base_url(server)}/webhooks/a", json={"x": 1})
-                await session.post(f"{_base_url(server)}/webhooks/b", json={"x": 2})
-
-            await asyncio.sleep(0.05)
-
-            assert len(a_events) == 1
-            assert a_events[0].data["payload"] == {"x": 1}
-            assert len(b_events) == 1
-            assert b_events[0].data["payload"] == {"x": 2}
-        finally:
-            await server.stop()
-
-
-class TestHMACVerification:
-    """Tests for HMAC-SHA256 signature verification."""
-
-    @pytest.mark.asyncio
-    async def test_valid_signature_accepted(self):
-        bus = EventBus()
-        received = []
-
-        async def handler(event):
-            received.append(event)
-
-        bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
-
-        secret = "test-secret-key"
-        server = _make_server(
-            bus,
-            [
-                WebhookRoute(
-                    source_id="secure",
-                    path="/webhooks/secure",
-                    methods=["POST"],
-                    secret=secret,
-                ),
-            ],
-        )
-        await server.start()
-
-        try:
-            body = json.dumps({"event": "push"}).encode()
-            sig = hmac_mod.new(secret.encode(), body, hashlib.sha256).hexdigest()
-
-            async with aiohttp.ClientSession() as session:
-                async with session.post(
-                    f"{_base_url(server)}/webhooks/secure",
-                    data=body,
-                    headers={
-                        "Content-Type": "application/json",
-                        "X-Hub-Signature-256": f"sha256={sig}",
-                    },
-                ) as resp:
-                    assert resp.status == 202
-
-            await asyncio.sleep(0.05)
-            assert len(received) == 1
-        finally:
-            await server.stop()
-
-    @pytest.mark.asyncio
-    async def test_invalid_signature_rejected(self):
-        bus = EventBus()
-        received = []
-
-        async def handler(event):
-            received.append(event)
-
-        bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
-
-        server = _make_server(
-            bus,
-            [
-                WebhookRoute(
-                    source_id="secure",
-                    path="/webhooks/secure",
-                    methods=["POST"],
-                    secret="real-secret",
-                ),
-            ],
-        )
-        await server.start()
-
-        try:
-            async with aiohttp.ClientSession() as session:
-                async with session.post(
-                    f"{_base_url(server)}/webhooks/secure",
-                    json={"event": "push"},
-                    headers={"X-Hub-Signature-256": "sha256=invalidsignature"},
-                ) as resp:
-                    assert resp.status == 401
-
-            await asyncio.sleep(0.05)
-            assert len(received) == 0  # No event published
-        finally:
-            await server.stop()
-
-    @pytest.mark.asyncio
-    async def test_missing_signature_rejected(self):
-        bus = EventBus()
-        received = []
-
-        async def handler(event):
-            received.append(event)
-
-        bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
-
-        server = _make_server(
-            bus,
-            [
-                WebhookRoute(
-                    source_id="secure",
-                    path="/webhooks/secure",
-                    methods=["POST"],
-                    secret="my-secret",
-                ),
-            ],
-        )
-        await server.start()
-
-        try:
-            async with aiohttp.ClientSession() as session:
-                # No X-Hub-Signature-256 header
-                async with session.post(
-                    f"{_base_url(server)}/webhooks/secure",
-                    json={"event": "push"},
-                ) as resp:
-                    assert resp.status == 401
-
-            await asyncio.sleep(0.05)
-            assert len(received) == 0
-        finally:
-            await server.stop()
-
-    @pytest.mark.asyncio
-    async def test_no_secret_skips_verification(self):
-        """Routes without a secret accept any request."""
-        bus = EventBus()
-        received = []
-
-        async def handler(event):
-            received.append(event)
-
-        bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
-
-        server = _make_server(
-            bus,
-            [
-                WebhookRoute(
-                    source_id="open",
-                    path="/webhooks/open",
-                    methods=["POST"],
-                    secret=None,
-                ),
-            ],
-        )
-        await server.start()
-
-        try:
-            async with aiohttp.ClientSession() as session:
-                async with session.post(
-                    f"{_base_url(server)}/webhooks/open",
-                    json={"data": "test"},
-                ) as resp:
-                    assert resp.status == 202
-
-            await asyncio.sleep(0.05)
-            assert len(received) == 1
-        finally:
-            await server.stop()
-
-
-class TestEventDrivenEntryPoints:
-    """Tests for event-driven entry points wired through AgentRuntime."""
-
-    def _make_graph_and_goal(self):
-        """Minimal graph + goal for testing entry point triggering."""
-        from framework.graph import Goal
-        from framework.graph.edge import GraphSpec
-        from framework.graph.goal import SuccessCriterion
-        from framework.graph.node import NodeSpec
-
-        nodes = [
-            NodeSpec(
-                id="process-event",
-                name="Process Event",
-                description="Process incoming event",
-                node_type="event_loop",
-                input_keys=["event"],
-                output_keys=["result"],
-            ),
-        ]
-        graph = GraphSpec(
-            id="test-graph",
-            goal_id="test-goal",
-            version="1.0.0",
-            entry_node="process-event",
-            entry_points={"start": "process-event"},
-            terminal_nodes=[],
-            pause_nodes=[],
-            nodes=nodes,
-            edges=[],
-        )
-        goal = Goal(
-            id="test-goal",
-            name="Test Goal",
-            description="Test",
-            success_criteria=[
-                SuccessCriterion(
-                    id="sc-1",
-                    description="Done",
-                    metric="done",
-                    target="yes",
-                    weight=1.0,
-                ),
-            ],
-        )
-        return graph, goal
-
-    @pytest.mark.asyncio
-    async def test_event_entry_point_subscribes_to_bus(self):
-        """Entry point with trigger_type='event' subscribes and triggers on matching events."""
-        graph, goal = self._make_graph_and_goal()
-
-        config = AgentRuntimeConfig(
-            webhook_host="127.0.0.1",
-            webhook_port=0,
-            webhook_routes=[
-                {"source_id": "gh", "path": "/webhooks/github"},
-            ],
-        )
-
-        with tempfile.TemporaryDirectory() as tmpdir:
-            runtime = AgentRuntime(
-                graph=graph,
-                goal=goal,
-                storage_path=Path(tmpdir),
-                config=config,
-            )
-
-            runtime.register_entry_point(
-                EntryPointSpec(
-                    id="gh-handler",
-                    name="GitHub Handler",
-                    entry_node="process-event",
-                    trigger_type="event",
-                    trigger_config={
-                        "event_types": ["webhook_received"],
-                        "filter_stream": "gh",
-                    },
-                )
-            )
-
-            trigger_calls = []
-
-            async def mock_trigger(ep_id, data, **kwargs):
-                trigger_calls.append((ep_id, data))
-
-            with patch.object(runtime, "trigger", side_effect=mock_trigger):
-                await runtime.start()
-
-                try:
-                    assert runtime.webhook_server is not None
-                    assert runtime.webhook_server.is_running
-
-                    port = runtime.webhook_server.port
-                    async with aiohttp.ClientSession() as session:
-                        async with session.post(
-                            f"http://127.0.0.1:{port}/webhooks/github",
-                            json={"action": "push", "ref": "main"},
-                        ) as resp:
-                            assert resp.status == 202
-
-                    await asyncio.sleep(0.1)
-
-                    assert len(trigger_calls) == 1
-                    ep_id, data = trigger_calls[0]
-                    assert ep_id == "gh-handler"
-                    assert "event" in data
-                    assert data["event"]["type"] == "webhook_received"
-                    assert data["event"]["stream_id"] == "gh"
-                    assert data["event"]["data"]["payload"] == {
-                        "action": "push",
-                        "ref": "main",
-                    }
-                finally:
-                    await runtime.stop()
-
-            assert runtime.webhook_server is None
-
-    @pytest.mark.asyncio
-    async def test_event_entry_point_filter_stream(self):
-        """Entry point only triggers for matching stream_id (source_id)."""
-        graph, goal = self._make_graph_and_goal()
-
-        config = AgentRuntimeConfig(
-            webhook_routes=[
-                {"source_id": "github", "path": "/webhooks/github"},
-                {"source_id": "stripe", "path": "/webhooks/stripe"},
-            ],
-            webhook_port=0,
-        )
-
-        with tempfile.TemporaryDirectory() as tmpdir:
-            runtime = AgentRuntime(
-                graph=graph,
-                goal=goal,
-                storage_path=Path(tmpdir),
-                config=config,
-            )
-
-            runtime.register_entry_point(
-                EntryPointSpec(
-                    id="gh-only",
-                    name="GitHub Only",
-                    entry_node="process-event",
-                    trigger_type="event",
-                    trigger_config={
-                        "event_types": ["webhook_received"],
-                        "filter_stream": "github",
-                    },
-                )
-            )
-
-            trigger_calls = []
-
-            async def mock_trigger(ep_id, data, **kwargs):
-                trigger_calls.append((ep_id, data))
-
-            with patch.object(runtime, "trigger", side_effect=mock_trigger):
-                await runtime.start()
-
-                try:
-                    port = runtime.webhook_server.port
-                    async with aiohttp.ClientSession() as session:
-                        # POST to stripe — should NOT trigger
-                        await session.post(
-                            f"http://127.0.0.1:{port}/webhooks/stripe",
-                            json={"type": "payment"},
-                        )
-                        # POST to github — should trigger
-                        await session.post(
-                            f"http://127.0.0.1:{port}/webhooks/github",
-                            json={"action": "opened"},
-                        )
-
-                    await asyncio.sleep(0.1)
-
-                    assert len(trigger_calls) == 1
-                    assert trigger_calls[0][0] == "gh-only"
-                finally:
-                    await runtime.stop()
-
-    @pytest.mark.asyncio
-    async def test_no_webhook_routes_skips_server(self):
-        """Runtime without webhook_routes does not start a webhook server."""
-        graph, goal = self._make_graph_and_goal()
-
-        with tempfile.TemporaryDirectory() as tmpdir:
-            runtime = AgentRuntime(
-                graph=graph,
-                goal=goal,
-                storage_path=Path(tmpdir),
-            )
-
-            runtime.register_entry_point(
-                EntryPointSpec(
-                    id="manual",
-                    name="Manual",
-                    entry_node="process-event",
-                    trigger_type="manual",
-                )
-            )
-
-            await runtime.start()
-            try:
-                assert runtime.webhook_server is None
-            finally:
-                await runtime.stop()
-
-    @pytest.mark.asyncio
-    async def test_event_entry_point_custom_event(self):
-        """Entry point can subscribe to CUSTOM events, not just webhooks."""
-        graph, goal = self._make_graph_and_goal()
-
-        with tempfile.TemporaryDirectory() as tmpdir:
-            runtime = AgentRuntime(
-                graph=graph,
-                goal=goal,
-                storage_path=Path(tmpdir),
-            )
-
-            runtime.register_entry_point(
-                EntryPointSpec(
-                    id="custom-handler",
-                    name="Custom Handler",
-                    entry_node="process-event",
-                    trigger_type="event",
-                    trigger_config={
-                        "event_types": ["custom"],
-                    },
-                )
-            )
-
-            trigger_calls = []
-
-            async def mock_trigger(ep_id, data, **kwargs):
-                trigger_calls.append((ep_id, data))
-
-            with patch.object(runtime, "trigger", side_effect=mock_trigger):
-                await runtime.start()
-
-                try:
-                    await runtime.event_bus.publish(
-                        AgentEvent(
-                            type=EventType.CUSTOM,
-                            stream_id="some-source",
-                            data={"key": "value"},
-                        )
-                    )
-
-                    await asyncio.sleep(0.1)
-
-                    assert len(trigger_calls) == 1
-                    assert trigger_calls[0][0] == "custom-handler"
-                    assert trigger_calls[0][1]["event"]["type"] == "custom"
-                    assert trigger_calls[0][1]["event"]["data"]["key"] == "value"
-                finally:
-                    await runtime.stop()
@@ -0,0 +1,192 @@
+"""Declarative agent configuration schema.
+
+Allows defining agents via JSON/YAML config files instead of Python modules.
+The ``AgentConfig`` model is the top-level schema loaded from ``agent.json``.
+The runner detects this format by checking for a ``name`` key at the top level.
+
+Template variables
+------------------
+System prompts and identity_prompt support ``{{variable_name}}`` placeholders.
+These are resolved at load time from ``AgentConfig.variables``.
+"""
+
+from __future__ import annotations
+
+from pydantic import BaseModel, Field
+
+
+class ToolAccessConfig(BaseModel):
+    """Declarative tool access policy.
+
+    Controls which tools a node/agent has access to.
+
+    * ``all``      -- every tool from the registry.
+    * ``explicit`` -- only tools listed in ``allowed`` (default; empty = zero tools).
+    * ``none``     -- no tools at all.
+    """
+
+    policy: str = Field(
+        default="explicit",
+        description="One of: 'all', 'explicit', 'none'.",
+    )
+    allowed: list[str] = Field(
+        default_factory=list,
+        description="Tool names when policy='explicit'.",
+    )
+    denied: list[str] = Field(
+        default_factory=list,
+        description="Tool names to deny (applied after allowed).",
+    )
+
+
+class NodeConfig(BaseModel):
+    """Declarative node definition."""
+
+    id: str
+    name: str | None = None
+    description: str | None = None
+    node_type: str = Field(
+        default="event_loop",
+        description="event_loop",
+    )
+    system_prompt: str | None = None
+    tools: ToolAccessConfig = Field(default_factory=ToolAccessConfig)
+    model: str | None = None
+    input_keys: list[str] = Field(default_factory=list)
+    output_keys: list[str] = Field(default_factory=list)
+    nullable_output_keys: list[str] = Field(default_factory=list)
+    max_iterations: int = 30
+    max_node_visits: int = 1
+    client_facing: bool = False
+    success_criteria: str | None = None
+    failure_criteria: str | None = None
+    skip_judge: bool = False
+    max_retries: int | None = None
+
+
+class EdgeConfig(BaseModel):
+    """Declarative edge definition."""
+
+    from_node: str = Field(description="Source node ID.")
+    to_node: str = Field(description="Target node ID.")
+    condition: str = Field(
+        default="on_success",
+        description="always | on_success | on_failure | conditional | llm_decide",
+    )
+    condition_expr: str | None = None
+    input_mapping: dict[str, str] = Field(default_factory=dict)
+    priority: int = 1
+
+
+class GoalConfig(BaseModel):
+    """Simplified goal definition for declarative config."""
+
+    description: str
+    success_criteria: list[str] = Field(default_factory=list)
+    constraints: list[str] = Field(default_factory=list)
+
+
+class EntryPointConfig(BaseModel):
+    """Entry point configuration."""
+
+    id: str = "default"
+    name: str = "Default"
+    entry_node: str | None = None  # defaults to AgentConfig.entry_node
+    trigger_type: str = Field(
+        default="manual",
+        description="manual | scheduled | timer",
+    )
+    trigger_config: dict = Field(default_factory=dict)
+    isolation_level: str = "shared"
+    max_concurrent: int | None = None
+
+
+class MCPServerRef(BaseModel):
+    """Reference to an MCP server to connect for this agent."""
+
+    name: str
+    config: dict | None = None
+
+
+class MetadataConfig(BaseModel):
+    """Agent metadata for display / intro messages."""
+
+    intro_message: str = ""
+
+
+class AgentConfig(BaseModel):
+    """Top-level declarative agent configuration.
+
+    Load from ``agent.json`` and pass to
+    :func:`framework.runner.runner.load_agent_config` to build the
+    ``GraphSpec`` + ``Goal`` pair.
+
+    Example (YAML)::
+
+        name: lead-enrichment-agent
+        version: 1.0.0
+        variables:
+          spreadsheet_id: "1ZVx..."
+          sheet_name: "contacts"
+        goal:
+          description: "Enrich leads in Google Sheets"
+          success_criteria:
+            - "All unprocessed leads enriched"
+          constraints:
+            - "Browser-only research"
+        identity_prompt: |
+          You are the Lead Enrichment Agent...
+        nodes:
+          - id: start
+            tools: {policy: explicit, allowed: [google_sheets_get_values]}
+            system_prompt: |
+              Spreadsheet ID: {{spreadsheet_id}}
+              ...
+    """
+
+    name: str
+    version: str = "1.0.0"
+    description: str | None = None
+    metadata: MetadataConfig = Field(default_factory=MetadataConfig)
+
+    # Template variables -- substituted into prompts via {{var_name}}
+    variables: dict[str, str] = Field(default_factory=dict)
+
+    # Goal
+    goal: GoalConfig
+
+    # Graph structure
+    nodes: list[NodeConfig]
+    edges: list[EdgeConfig]
+    entry_node: str
+    terminal_nodes: list[str] = Field(default_factory=list)
+    pause_nodes: list[str] = Field(default_factory=list)
+
+    # Entry points (if omitted, a single "default" manual entry is created)
+    entry_points: list[EntryPointConfig] = Field(default_factory=list)
+
+    # Agent-level tool defaults (nodes inherit unless they override)
+    tools: ToolAccessConfig = Field(default_factory=ToolAccessConfig)
+    mcp_servers: list[MCPServerRef] = Field(default_factory=list)
+
+    # LLM / execution
+    model: str | None = None
+    max_tokens: int = 4096
+    conversation_mode: str = "continuous"
+    identity_prompt: str = ""
+    loop_config: dict = Field(
+        default_factory=lambda: {
+            "max_iterations": 100,
+            "max_tool_calls_per_turn": 30,
+            "max_context_tokens": 32000,
+        },
+    )
+
+    # Pipeline overrides (per-agent, merged with global config)
+    pipeline: dict = Field(
+        default_factory=dict,
+        description="Per-agent pipeline stage overrides. Same format as global pipeline config.",
+    )
+
+    # Resource limits
+    max_cost_per_run: float | None = None
@@ -12,7 +12,7 @@ from typing import TYPE_CHECKING, Any
 from pydantic import AliasChoices, BaseModel, Field, computed_field

 if TYPE_CHECKING:
-    from framework.graph.executor import ExecutionResult
+    from framework.orchestrator.orchestrator import ExecutionResult
    from framework.schemas.run import Run


@@ -28,8 +28,11 @@ def _get_allowed_agent_roots() -> tuple[Path, ...]:
    """
    global _ALLOWED_AGENT_ROOTS
    if _ALLOWED_AGENT_ROOTS is None:
+        from framework.config import COLONIES_DIR
+
        _ALLOWED_AGENT_ROOTS = (
-            (_REPO_ROOT / "exports").resolve(),
+            COLONIES_DIR.resolve(),                     # ~/.hive/colonies/
+            (_REPO_ROOT / "exports").resolve(),         # compat fallback
            (_REPO_ROOT / "examples").resolve(),
            (Path.home() / ".hive" / "agents").resolve(),
        )
@@ -53,7 +56,8 @@ def validate_agent_path(agent_path: str | Path) -> Path:
        if resolved.is_relative_to(root) and resolved != root:
            return resolved
    raise ValueError(
-        "agent_path must be inside an allowed directory (exports/, examples/, or ~/.hive/agents/)"
+        "agent_path must be inside an allowed directory "
+        "(~/.hive/colonies/, exports/, examples/, or ~/.hive/agents/)"
    )


--- a/Show More
+++ b/Show More