diff --git a/.gitignore b/.gitignore
index 54798a34..dccd6d8e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -70,6 +70,8 @@ tmp/
 temp/
 
 exports/*
+exports.old*
+artifacts/*
 
 .claude/settings.local.json
 
diff --git a/core/framework/__init__.py b/core/framework/__init__.py
index 27909db6..438323c5 100644
--- a/core/framework/__init__.py
+++ b/core/framework/__init__.py
@@ -1,71 +1,23 @@
-"""
-Aden Hive Framework: A goal-driven agent runtime optimized for Builder observability.
+"""Hive Agent Framework.
 
-The runtime is designed around DECISIONS, not just actions. Every significant
-choice the agent makes is captured with:
-- What it was trying to do (intent)
-- What options it considered
-- What it chose and why
-- What happened as a result
-- Whether that was good or bad (evaluated post-hoc)
-
-This gives the Builder LLM the information it needs to improve agent behavior.
-
-## Testing Framework
-
-The framework includes a Goal-Based Testing system (Goal → Agent → Eval):
-- Generate tests from Goal success_criteria and constraints
-- Mandatory user approval before tests are stored
-- Parallel test execution with error categorization
-- Debug tools with fix suggestions
-
-See `framework.testing` for details.
+Core classes:
+    AgentHost      -- hosts agents, manages entry points and pipeline
+    Orchestrator   -- routes between nodes in a graph
+    AgentLoop      -- the LLM + tool execution loop (one per node)
+    AgentLoader    -- loads agent.json from disk, builds pipeline
+    DecisionTracker -- records decisions for post-hoc analysis
 """
 
-from framework.llm import LLMProvider
-
-try:
-    from framework.llm import AnthropicProvider  # noqa: F401
-except ImportError:
-    pass
-from framework.runner import AgentRunner
-from framework.runtime.core import Runtime
-from framework.schemas.decision import Decision, DecisionEvaluation, Option, Outcome
-from framework.schemas.run import Problem, Run, RunSummary
-
-# Testing framework
-from framework.testing import (
-    ApprovalStatus,
-    DebugTool,
-    ErrorCategory,
-    Test,
-    TestResult,
-    TestStorage,
-    TestSuiteResult,
-)
+from framework.agent_loop import AgentLoop
+from framework.host import AgentHost
+from framework.loader import AgentLoader
+from framework.orchestrator import Orchestrator
+from framework.tracker import DecisionTracker
 
 __all__ = [
-    # Schemas
-    "Decision",
-    "Option",
-    "Outcome",
-    "DecisionEvaluation",
-    "Run",
-    "RunSummary",
-    "Problem",
-    # Runtime
-    "Runtime",
-    # LLM
-    "LLMProvider",
-    "AnthropicProvider",
-    # Runner
-    "AgentRunner",
-    # Testing
-    "Test",
-    "TestResult",
-    "TestSuiteResult",
-    "TestStorage",
-    "ApprovalStatus",
-    "ErrorCategory",
-    "DebugTool",
+    "AgentHost",
+    "AgentLoader",
+    "AgentLoop",
+    "DecisionTracker",
+    "Orchestrator",
 ]
diff --git a/core/framework/agent_loop/__init__.py b/core/framework/agent_loop/__init__.py
new file mode 100644
index 00000000..845428b1
--- /dev/null
+++ b/core/framework/agent_loop/__init__.py
@@ -0,0 +1,32 @@
+"""Agent loop -- the core agent execution primitive."""
+
+from framework.agent_loop.conversation import (  # noqa: F401
+    ConversationStore,
+    Message,
+    NodeConversation,
+)
+
+# Lazy import to avoid circular dependency with graph/event_loop/
+# (graph/event_loop/* imports framework.graph.conversation which is a shim
+# pointing here, which would trigger agent_loop.py loading, which imports
+# graph/event_loop/* again)
+
+
+def __getattr__(name: str):
+    if name in ("AgentLoop", "JudgeProtocol", "JudgeVerdict", "LoopConfig", "OutputAccumulator"):
+        from framework.agent_loop.agent_loop import (
+            AgentLoop,
+            JudgeProtocol,
+            JudgeVerdict,
+            LoopConfig,
+            OutputAccumulator,
+        )
+        _exports = {
+            "AgentLoop": AgentLoop,
+            "JudgeProtocol": JudgeProtocol,
+            "JudgeVerdict": JudgeVerdict,
+            "LoopConfig": LoopConfig,
+            "OutputAccumulator": OutputAccumulator,
+        }
+        return _exports[name]
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
diff --git a/core/framework/graph/event_loop_node.py b/core/framework/agent_loop/agent_loop.py
similarity index 87%
rename from core/framework/graph/event_loop_node.py
rename to core/framework/agent_loop/agent_loop.py
index 61f447af..39f47dd4 100644
--- a/core/framework/graph/event_loop_node.py
+++ b/core/framework/agent_loop/agent_loop.py
@@ -21,16 +21,16 @@ from collections.abc import Awaitable, Callable
 from datetime import UTC, datetime
 from typing import Any
 
-from framework.graph.conversation import ConversationStore, NodeConversation
-from framework.graph.event_loop import types as event_loop_types
-from framework.graph.event_loop.compaction import (
+from framework.agent_loop.conversation import ConversationStore, NodeConversation
+from framework.agent_loop.internals import types as event_loop_types
+from framework.agent_loop.internals.compaction import (
     build_emergency_summary,
     build_llm_compaction_prompt,
     compact,
     format_messages_for_summary,
     llm_compact,
 )
-from framework.graph.event_loop.cursor_persistence import (
+from framework.agent_loop.internals.cursor_persistence import (
     RestoredState,
     check_pause,
     drain_injection_queue,
@@ -38,7 +38,7 @@ from framework.graph.event_loop.cursor_persistence import (
     restore,
     write_cursor,
 )
-from framework.graph.event_loop.event_publishing import (
+from framework.agent_loop.internals.event_publishing import (
     generate_action_plan,
     log_skip_judge,
     publish_context_usage,
@@ -54,27 +54,24 @@ from framework.graph.event_loop.event_publishing import (
     publish_tool_started,
     run_hooks,
 )
-from framework.graph.event_loop.judge_pipeline import (
+from framework.agent_loop.internals.judge_pipeline import (
     SubagentJudge as SharedSubagentJudge,
     judge_turn,
 )
-from framework.graph.event_loop.stall_detector import (
+from framework.agent_loop.internals.stall_detector import (
     fingerprint_tool_calls,
     is_stalled,
     is_tool_doom_loop,
     ngram_similarity,
 )
-from framework.graph.event_loop.subagent_executor import execute_subagent
-from framework.graph.event_loop.synthetic_tools import (
+from framework.agent_loop.internals.synthetic_tools import (
     build_ask_user_multiple_tool,
     build_ask_user_tool,
-    build_delegate_tool,
     build_escalate_tool,
-    build_report_to_parent_tool,
     build_set_output_tool,
     handle_set_output,
 )
-from framework.graph.event_loop.tool_result_handler import (
+from framework.agent_loop.internals.tool_result_handler import (
     build_json_preview,
     execute_tool,
     extract_json_metadata,
@@ -82,12 +79,12 @@ from framework.graph.event_loop.tool_result_handler import (
     restore_spill_counter,
     truncate_tool_result,
 )
-from framework.graph.event_loop.types import (
+from framework.agent_loop.internals.types import (
     JudgeProtocol,
     JudgeVerdict,
     TriggerEvent,
 )
-from framework.graph.node import NodeContext, NodeProtocol, NodeResult
+from framework.orchestrator.node import NodeContext, NodeProtocol, NodeResult
 from framework.llm.capabilities import supports_image_tool_results
 from framework.llm.provider import Tool, ToolResult, ToolUse
 from framework.llm.stream_events import (
@@ -96,8 +93,8 @@ from framework.llm.stream_events import (
     TextDeltaEvent,
     ToolCallEvent,
 )
-from framework.runtime.event_bus import EventBus
-from framework.runtime.llm_debug_logger import log_llm_turn
+from framework.host.event_bus import EventBus
+from framework.tracker.llm_debug_logger import log_llm_turn
 
 logger = logging.getLogger(__name__)
 
@@ -163,43 +160,9 @@ def _is_context_too_large_error(exc: BaseException) -> bool:
 
 
 # ---------------------------------------------------------------------------
-# Escalation receiver (temporary routing target for subagent → user input)
 # ---------------------------------------------------------------------------
 
 
-class _EscalationReceiver:
-    """Temporary receiver registered in node_registry for subagent escalation routing.
-
-    When a subagent calls ``report_to_parent(wait_for_response=True)``, the callback
-    creates one of these, registers it under a unique escalation ID in the executor's
-    ``node_registry``, and awaits ``wait()``.  The TUI / runner calls
-    ``inject_input(escalation_id, content)`` which the ``ExecutionStream`` routes here
-    via ``inject_event()`` — matching the same ``hasattr(node, "inject_event")`` check
-    used for regular ``EventLoopNode`` instances.
-    """
-
-    def __init__(self) -> None:
-        self._event = asyncio.Event()
-        self._response: str | None = None
-        self._awaiting_input = True  # So inject_message() can prefer us
-
-    async def inject_event(
-        self,
-        content: str,
-        *,
-        is_client_input: bool = False,
-        image_content: list[dict] | None = None,
-    ) -> None:
-        """Called by ExecutionStream.inject_input() when the user responds."""
-        self._response = content
-        self._event.set()
-
-    async def wait(self) -> str | None:
-        """Block until inject_event() delivers the user's response."""
-        await self._event.wait()
-        return self._response
-
-
 # ---------------------------------------------------------------------------
 # Judge protocol (simple 3-action interface for event loop evaluation)
 # ---------------------------------------------------------------------------
@@ -224,7 +187,7 @@ OutputAccumulator = event_loop_types.OutputAccumulator
 # ---------------------------------------------------------------------------
 
 
-class EventLoopNode(NodeProtocol):
+class AgentLoop(NodeProtocol):
     """Multi-turn LLM streaming loop with tool execution and judge evaluation.
 
     Lifecycle:
@@ -284,9 +247,6 @@ class EventLoopNode(NodeProtocol):
         # Monotonic counter for spillover file naming (web_search_1.txt, etc.)
         self._spill_counter: int = 0
         # Subagent mark_complete: when True, _evaluate returns ACCEPT immediately
-        self._mark_complete_flag = False
-        # Counter for subagent instances (1, 2, 3, ...)
-        self._subagent_instance_counter: dict[str, int] = {}
 
     def validate_input(self, ctx: NodeContext) -> list[str]:
         """Validate hard requirements only.
@@ -307,7 +267,7 @@ class EventLoopNode(NodeProtocol):
     async def execute(self, ctx: NodeContext) -> NodeResult:
         """Run the event loop."""
         logger.debug(
-            "[EventLoopNode.execute] Starting execution for node=%s, stream=%s",
+            "[AgentLoop.execute] Starting execution for node=%s, stream=%s",
             ctx.node_id,
             ctx.stream_id,
         )
@@ -320,7 +280,7 @@ class EventLoopNode(NodeProtocol):
         # Store skill dirs for AS-9 file-read interception in _execute_tool
         self._skill_dirs: list[str] = ctx.skill_dirs
         logger.debug(
-            "[EventLoopNode.execute] node_id=%s, execution_id=%s, max_iterations=%d",
+            "[AgentLoop.execute] node_id=%s, execution_id=%s, max_iterations=%d",
             node_id,
             execution_id,
             self._config.max_iterations,
@@ -402,7 +362,7 @@ class EventLoopNode(NodeProtocol):
                 # execution preamble and node-type preamble.  The stored
                 # prompt may be stale after code changes or when runtime-
                 # injected context (e.g. worker identity) has changed.
-                from framework.graph.prompting import build_system_prompt_for_node_context
+                from framework.orchestrator.prompting import build_system_prompt_for_node_context
 
                 _current_prompt = build_system_prompt_for_node_context(ctx)
                 if conversation.system_prompt != _current_prompt:
@@ -425,7 +385,7 @@ class EventLoopNode(NodeProtocol):
                     await self._conversation_store.clear()
 
                 # Fresh conversation: either isolated mode or first node in continuous mode.
-                from framework.graph.prompting import build_system_prompt_for_node_context
+                from framework.orchestrator.prompting import build_system_prompt_for_node_context
 
                 system_prompt = build_system_prompt_for_node_context(ctx)
 
@@ -484,7 +444,7 @@ class EventLoopNode(NodeProtocol):
         # 2a. Guard: ensure at least one non-system message exists.
         # A restored conversation may have 0 messages if phase_id filtering
         # removes them all, or if a prior run stored metadata without messages
-        # (e.g. subagent that failed before the first LLM call).
+        # (e.g. node that failed before the first LLM call).
         if conversation.message_count == 0:
             initial_message = self._build_initial_message(ctx)
             if initial_message:
@@ -502,37 +462,10 @@ class EventLoopNode(NodeProtocol):
             tools.append(self._build_ask_user_tool())
             if stream_id == "queen":
                 tools.append(self._build_ask_user_multiple_tool())
-        # Workers/subagents can escalate blockers to the queen.
+        # Workers can escalate blockers to the queen.
         if stream_id not in ("queen", "judge"):
             tools.append(self._build_escalate_tool())
 
-        # Add delegate_to_sub_agent tool if:
-        # - Node has sub_agents defined
-        # - We are NOT in subagent mode (prevents nested delegation)
-        if not ctx.is_subagent_mode:
-            sub_agents = getattr(ctx.node_spec, "sub_agents", None) or []
-            if sub_agents:
-                delegate_tool = self._build_delegate_tool(sub_agents, ctx.node_registry)
-                if delegate_tool:
-                    tools.append(delegate_tool)
-                    logger.info(
-                        "[%s] delegate_to_sub_agent injected (sub_agents=%s)",
-                        node_id,
-                        sub_agents,
-                    )
-                else:
-                    logger.error(
-                        "[%s] _build_delegate_tool returned None for sub_agents=%s",
-                        node_id,
-                        sub_agents,
-                    )
-        else:
-            logger.debug("[%s] Skipped delegate tool (is_subagent_mode=True)", node_id)
-
-        # Add report_to_parent tool for sub-agents with a report callback
-        if ctx.is_subagent_mode and ctx.report_callback is not None:
-            tools.append(self._build_report_to_parent_tool())
-
         logger.info(
             "[%s] Tools available (%d): %s | direct_user_io=%s | judge=%s",
             node_id,
@@ -565,11 +498,11 @@ class EventLoopNode(NodeProtocol):
 
         # 6. Main loop
         logger.debug(
-            "[EventLoopNode.execute] Entering main loop, start_iteration=%d", start_iteration
+            "[AgentLoop.execute] Entering main loop, start_iteration=%d", start_iteration
         )
         for iteration in range(start_iteration, self._config.max_iterations):
             iter_start = time.time()
-            logger.debug("[EventLoopNode.execute] iteration=%d starting", iteration)
+            logger.debug("[AgentLoop.execute] iteration=%d starting", iteration)
 
             # 6a. Check pause (no current-iteration data yet — only log_node_complete needed)
             if await self._check_pause(ctx, conversation, iteration):
@@ -601,18 +534,18 @@ class EventLoopNode(NodeProtocol):
 
             # 6b. Drain injection queue
             logger.debug(
-                "[EventLoopNode.execute] iteration=%d: draining injection queue...", iteration
+                "[AgentLoop.execute] iteration=%d: draining injection queue...", iteration
             )
             drained_injections = await self._drain_injection_queue(conversation, ctx)
             logger.debug(
-                "[EventLoopNode.execute] iteration=%d: drained %d injections",
+                "[AgentLoop.execute] iteration=%d: drained %d injections",
                 iteration,
                 drained_injections,
             )
             # 6b1. Drain trigger queue (framework-level signals)
             drained_triggers = await self._drain_trigger_queue(conversation)
             logger.debug(
-                "[EventLoopNode.execute] iteration=%d: drained %d triggers",
+                "[AgentLoop.execute] iteration=%d: drained %d triggers",
                 iteration,
                 drained_triggers,
             )
@@ -685,8 +618,6 @@ class EventLoopNode(NodeProtocol):
                     "ask_user",
                     "ask_user_multiple",
                     "escalate",
-                    "delegate_to_sub_agent",
-                    "report_to_parent",
                 }
                 synthetic = [t for t in tools if t.name in _synthetic_names]
                 tools.clear()
@@ -696,11 +627,11 @@ class EventLoopNode(NodeProtocol):
             # 6b3. Dynamic prompt refresh (phase switching / memory refresh)
             if ctx.dynamic_prompt_provider is not None or ctx.dynamic_memory_provider is not None:
                 if ctx.dynamic_prompt_provider is not None:
-                    from framework.graph.prompting import stamp_prompt_datetime
+                    from framework.orchestrator.prompting import stamp_prompt_datetime
 
                     _new_prompt = stamp_prompt_datetime(ctx.dynamic_prompt_provider())
                 else:
-                    from framework.graph.prompting import build_system_prompt_for_node_context
+                    from framework.orchestrator.prompting import build_system_prompt_for_node_context
 
                     _new_prompt = build_system_prompt_for_node_context(ctx)
                 if _new_prompt != conversation.system_prompt:
@@ -743,7 +674,7 @@ class EventLoopNode(NodeProtocol):
                 len(conversation.messages),
             )
             logger.debug(
-                "[EventLoopNode.execute] iteration=%d: entering _run_single_turn loop", iteration
+                "[AgentLoop.execute] iteration=%d: entering _run_single_turn loop", iteration
             )
             _stream_retry_count = 0
             _turn_cancelled = False
@@ -752,7 +683,7 @@ class EventLoopNode(NodeProtocol):
             while True:
                 try:
                     logger.debug(
-                        "[EventLoopNode.execute] iteration=%d: calling _run_single_turn (retry=%d)",
+                        "[AgentLoop.execute] iteration=%d: calling _run_single_turn (retry=%d)",
                         iteration,
                         _stream_retry_count,
                     )
@@ -768,12 +699,12 @@ class EventLoopNode(NodeProtocol):
                         queen_input_requested,
                         request_system_prompt,
                         request_messages,
-                        reported_to_parent,
+                        _,
                     ) = await self._run_single_turn(
                         ctx, conversation, tools, iteration, accumulator
                     )
                     logger.debug(
-                        "[EventLoopNode.execute] iteration=%d:"
+                        "[AgentLoop.execute] iteration=%d:"
                         " _run_single_turn completed successfully",
                         iteration,
                     )
@@ -842,13 +773,13 @@ class EventLoopNode(NodeProtocol):
                     break  # success — exit retry loop
 
                 except TurnCancelled:
-                    logger.debug("[EventLoopNode.execute] iteration=%d: TurnCancelled", iteration)
+                    logger.debug("[AgentLoop.execute] iteration=%d: TurnCancelled", iteration)
                     _turn_cancelled = True
                     break
 
                 except Exception as e:
                     logger.debug(
-                        "[EventLoopNode.execute] iteration=%d:"
+                        "[AgentLoop.execute] iteration=%d:"
                         " Exception in _run_single_turn: %s (%s)",
                         iteration,
                         type(e).__name__,
@@ -1024,7 +955,7 @@ class EventLoopNode(NodeProtocol):
                 and not outputs_set
                 and not user_input_requested
                 and not queen_input_requested
-                and not reported_to_parent
+                
             )
             if truly_empty and accumulator is not None:
                 missing = self._get_missing_output_keys(
@@ -1276,14 +1207,14 @@ class EventLoopNode(NodeProtocol):
             # blocking and resumption.
             _is_worker = (
                 stream_id not in ("queen", "judge")
-                and not ctx.is_subagent_mode
+                and not False
                 and not ctx.supports_direct_user_io
                 and self._event_bus is not None
             )
             _worker_no_tool_turn = (
                 not real_tool_results
                 and not outputs_set
-                and not reported_to_parent
+                
                 and not queen_input_requested
                 and not user_input_requested
             )
@@ -1733,7 +1664,7 @@ class EventLoopNode(NodeProtocol):
 
             # 6i. Judge evaluation
             should_judge = (
-                ctx.is_subagent_mode  # Always evaluate subagents
+                False
                 or (iteration + 1) % self._config.judge_every_n_turns == 0
                 or not real_tool_results  # no real tool calls = natural stop
             )
@@ -1789,7 +1720,7 @@ class EventLoopNode(NodeProtocol):
                 missing = self._get_missing_output_keys(
                     accumulator, ctx.node_spec.output_keys, ctx.node_spec.nullable_output_keys
                 )
-                if missing and self._judge is not None and not self._mark_complete_flag:
+                if missing and self._judge is not None :
                     hint = (
                         f"Task incomplete. Required outputs not yet produced: {missing}. "
                         f"Follow your system prompt instructions to complete the work."
@@ -1988,7 +1919,7 @@ class EventLoopNode(NodeProtocol):
             image_content: Optional list of OpenAI-style image blocks to attach.
         """
         logger.debug(
-            "[EventLoopNode.inject_event] content_len=%d,"
+            "[AgentLoop.inject_event] content_len=%d,"
             " is_client_input=%s, has_images=%s,"
             " queue_size_before=%d",
             len(content) if content else 0,
@@ -1998,15 +1929,15 @@ class EventLoopNode(NodeProtocol):
         )
         try:
             await self._injection_queue.put((content, is_client_input, image_content))
-            logger.debug("[EventLoopNode.inject_event] Message queued successfully")
+            logger.debug("[AgentLoop.inject_event] Message queued successfully")
         except Exception as e:
-            logger.exception("[EventLoopNode.inject_event] Failed to queue message: %s", e)
+            logger.exception("[AgentLoop.inject_event] Failed to queue message: %s", e)
             raise
         try:
             self._input_ready.set()
-            logger.debug("[EventLoopNode.inject_event] _input_ready.set() called")
+            logger.debug("[AgentLoop.inject_event] _input_ready.set() called")
         except Exception as e:
-            logger.exception("[EventLoopNode.inject_event] Failed to set _input_ready: %s", e)
+            logger.exception("[AgentLoop.inject_event] Failed to set _input_ready: %s", e)
             raise
 
     async def inject_trigger(self, trigger: TriggerEvent) -> None:
@@ -2157,7 +2088,6 @@ class EventLoopNode(NodeProtocol):
         ask_user_prompt = ""
         ask_user_options: list[str] | None = None
         queen_input_requested = False
-        reported_to_parent = False
         # Accumulate ALL tool calls across inner iterations for L3 logging.
         # Unlike real_tool_results (reset each inner iteration), this persists.
         logged_tool_calls: list[dict] = []
@@ -2231,16 +2161,28 @@ class EventLoopNode(NodeProtocol):
                 ):
                     if isinstance(event, TextDeltaEvent):
                         accumulated_text = event.snapshot
-                        await self._publish_text_delta(
-                            stream_id,
-                            node_id,
-                            event.content,
-                            event.snapshot,
-                            ctx,
-                            execution_id,
-                            iteration=iteration,
-                            inner_turn=inner_turn,
-                        )
+                        # Filter <think>...</think> blocks from client output.
+                        # Content inside think tags is internal reasoning -- only
+                        # the text after </think> is shown to the user.
+                        _content = event.content
+                        if "<think>" in event.snapshot and "</think>" not in event.snapshot:
+                            _content = ""  # still inside think block
+                        elif "</think>" in _content:
+                            # End of think block -- emit only text after the tag
+                            _content = _content.split("</think>", 1)[-1]
+                        elif "<think>" in _content:
+                            _content = ""  # opening tag in this chunk
+                        if _content:
+                            await self._publish_text_delta(
+                                stream_id,
+                                node_id,
+                                _content,
+                                event.snapshot,
+                                ctx,
+                                execution_id,
+                                iteration=iteration,
+                                inner_turn=inner_turn,
+                            )
 
                     elif isinstance(event, ToolCallEvent):
                         _tc.append(event)
@@ -2348,10 +2290,27 @@ class EventLoopNode(NodeProtocol):
                     queen_input_requested,
                     final_system_prompt,
                     final_messages,
-                    reported_to_parent,
+                    False,
                 )
 
-            # Execute tool calls — framework tools (set_output, ask_user)
+            # Priority drain: if user sent a message while the LLM was
+            # streaming, inject it into the conversation NOW -- before tool
+            # execution.  The LLM will see it on the next inner turn.
+            if not self._injection_queue.empty():
+                while not self._injection_queue.empty():
+                    _inj_content, _inj_client, _inj_images = (
+                        self._injection_queue.get_nowait()
+                    )
+                    if _inj_client:
+                        await conversation.add_user_message(_inj_content)
+                        logger.info(
+                            "[%s] Priority-injected user message mid-turn (%d chars)",
+                            node_id, len(_inj_content),
+                        )
+                    else:
+                        await conversation.add_user_message(_inj_content)
+
+            # Execute tool calls -- framework tools (set_output, ask_user)
             # run inline; real MCP tools run in parallel.
             real_tool_results: list[dict] = []
             limit_hit = False
@@ -2361,13 +2320,12 @@ class EventLoopNode(NodeProtocol):
             )
 
             # Phase 1: triage — handle framework tools immediately,
-            # queue real tools and subagents for parallel execution.
+            # queue real tools for parallel execution.
             results_by_id: dict[str, ToolResult] = {}
             timing_by_id: dict[
                 str, dict[str, Any]
             ] = {}  # tool_use_id -> {start_timestamp, duration_s}
             pending_real: list[ToolCallEvent] = []
-            pending_subagent: list[ToolCallEvent] = []
 
             for tc in tool_calls:
                 tool_call_count += 1
@@ -2610,76 +2568,6 @@ class EventLoopNode(NodeProtocol):
                     )
                     results_by_id[tc.tool_use_id] = result
 
-                elif tc.tool_name == "delegate_to_sub_agent":
-                    # Guard: in continuous mode the LLM may see delegate
-                    # calls from a previous node's conversation history and
-                    # attempt to re-use the tool on a node that doesn't own
-                    # it.  Only accept if the tool was actually offered.
-                    if not any(t.name == "delegate_to_sub_agent" for t in tools):
-                        logger.warning(
-                            "[%s] LLM called delegate_to_sub_agent but tool "
-                            "was not offered to this node — rejecting",
-                            node_id,
-                        )
-                        result = ToolResult(
-                            tool_use_id=tc.tool_use_id,
-                            content=(
-                                "ERROR: delegate_to_sub_agent is not available "
-                                "on this node. This tool belongs to a different "
-                                "node in the workflow."
-                            ),
-                            is_error=True,
-                        )
-                        results_by_id[tc.tool_use_id] = result
-                        continue
-                    # --- Framework-level subagent delegation ---
-                    # Queue for parallel execution in Phase 2
-                    logger.info(
-                        "🔄 LLM requesting subagent delegation: agent_id='%s', task='%s'",
-                        tc.tool_input.get("agent_id", "?"),
-                        (tc.tool_input.get("task", "")[:100] + "...")
-                        if len(tc.tool_input.get("task", "")) > 100
-                        else tc.tool_input.get("task", ""),
-                    )
-                    pending_subagent.append(tc)
-
-                elif tc.tool_name == "report_to_parent":
-                    # --- Report from sub-agent to parent (optionally blocking) ---
-                    reported_to_parent = True
-                    msg = tc.tool_input.get("message", "")
-                    data = tc.tool_input.get("data")
-                    wait = tc.tool_input.get("wait_for_response", False)
-                    mark_complete = tc.tool_input.get("mark_complete", False)
-                    response = None
-
-                    if ctx.report_callback:
-                        try:
-                            response = await ctx.report_callback(
-                                msg,
-                                data,
-                                wait_for_response=wait,
-                            )
-                        except Exception:
-                            logger.warning(
-                                "[%s] report_to_parent callback failed (swallowed)",
-                                node_id,
-                                exc_info=True,
-                            )
-
-                    if mark_complete:
-                        self._mark_complete_flag = True
-                        logger.info(
-                            "[%s] mark_complete=True — subagent will accept on this iteration",
-                            node_id,
-                        )
-
-                    result = ToolResult(
-                        tool_use_id=tc.tool_use_id,
-                        content=response if (wait and response) else "Report sent to parent.",
-                        is_error=False,
-                    )
-                    results_by_id[tc.tool_use_id] = result
-
                 else:
                     # --- Real tool: check for truncated args, else queue ---
                     if "_raw" in tc.tool_input:
@@ -2754,175 +2642,6 @@ class EventLoopNode(NodeProtocol):
                         result = raw
                     results_by_id[tc.tool_use_id] = self._truncate_tool_result(result, tc.tool_name)
 
-            # Phase 2b: execute subagent delegations in parallel.
-            if pending_subagent:
-                _subagent_timeout = self._config.subagent_timeout_seconds
-                _inactivity_timeout = self._config.subagent_inactivity_timeout_seconds
-
-                async def _timed_subagent(
-                    _ctx: NodeContext,
-                    _tc: ToolCallEvent,
-                    _acc: OutputAccumulator = accumulator,
-                    _wall_timeout: float = _subagent_timeout,
-                    _activity_timeout: float = _inactivity_timeout,
-                ) -> tuple[ToolResult | BaseException, str, float]:
-                    _s = time.time()
-                    _iso = datetime.now(UTC).isoformat()
-                    _last_activity = _s
-                    _activity_event = asyncio.Event()
-
-                    async def _watchdog() -> None:
-                        """Watchdog that times out only after inactivity period."""
-                        nonlocal _last_activity
-                        while True:
-                            _now = time.time()
-                            _inactive_for = _now - _last_activity
-                            _remaining = _activity_timeout - _inactive_for
-
-                            if _remaining <= 0:
-                                # Inactivity timeout reached
-                                return
-
-                            try:
-                                await asyncio.wait_for(_activity_event.wait(), timeout=_remaining)
-                                _activity_event.clear()
-                            except TimeoutError:
-                                # Check again in case activity happened during wait
-                                continue
-
-                    async def _run_with_activity_timeout(
-                        _coro,
-                    ) -> ToolResult:
-                        """Run subagent with activity-based timeout."""
-                        _watchdog_task = asyncio.create_task(_watchdog())
-                        try:
-                            _result = await _coro
-                            return _result
-                        finally:
-                            _watchdog_task.cancel()
-                            try:
-                                await _watchdog_task
-                            except asyncio.CancelledError:
-                                pass
-
-                    try:
-                        # Subscribe to subagent activity events to reset inactivity timer
-                        async def _on_subagent_activity(event) -> None:
-                            nonlocal _last_activity
-                            _last_activity = time.time()
-                            _activity_event.set()
-
-                        _sub_id = None
-                        if self._event_bus and _activity_timeout > 0:
-                            from framework.runtime.event_bus import EventType
-
-                            _sub_id = self._event_bus.subscribe(
-                                event_types=[
-                                    EventType.TOOL_CALL_STARTED,
-                                    EventType.LLM_TEXT_DELTA,
-                                    EventType.EXECUTION_STARTED,
-                                ],
-                                handler=_on_subagent_activity,
-                            )
-
-                        try:
-                            _coro = self._execute_subagent(
-                                _ctx,
-                                _tc.tool_input.get("agent_id", ""),
-                                _tc.tool_input.get("task", ""),
-                                accumulator=_acc,
-                            )
-
-                            if _activity_timeout > 0:
-                                # Use activity-based timeout with wall-clock max
-                                _result_coro = _run_with_activity_timeout(_coro)
-                                if _wall_timeout > 0:
-                                    _r = await asyncio.wait_for(_result_coro, timeout=_wall_timeout)
-                                else:
-                                    _r = await _result_coro
-                            elif _wall_timeout > 0:
-                                _r = await asyncio.wait_for(_coro, timeout=_wall_timeout)
-                            else:
-                                _r = await _coro
-                        finally:
-                            if _sub_id and self._event_bus:
-                                self._event_bus.unsubscribe(_sub_id)
-
-                    except TimeoutError:
-                        _agent_id = _tc.tool_input.get("agent_id", "unknown")
-                        _elapsed = time.time() - _s
-                        logger.warning(
-                            "Subagent '%s' timed out after %.0fs (inactivity threshold: %.0fs)",
-                            _agent_id,
-                            _elapsed,
-                            _activity_timeout if _activity_timeout > 0 else _wall_timeout,
-                        )
-                        _r = ToolResult(
-                            tool_use_id=_tc.tool_use_id,
-                            content=(
-                                f"Subagent '{_agent_id}' timed out after "
-                                f"{_elapsed:.0f}s of inactivity. "
-                                "The subagent was not making progress. "
-                                "Try a simpler task or break it into smaller pieces."
-                            ),
-                            is_error=True,
-                        )
-                    except BaseException as _exc:
-                        _r = _exc
-                    _dur = round(time.time() - _s, 3)
-                    return _r, _iso, _dur
-
-                subagent_timed = await asyncio.gather(
-                    *(_timed_subagent(ctx, tc) for tc in pending_subagent),
-                    return_exceptions=True,
-                )
-                for tc, entry in zip(pending_subagent, subagent_timed, strict=True):
-                    if isinstance(entry, BaseException):
-                        raw = entry
-                        _start_iso = datetime.now(UTC).isoformat()
-                        _dur_s = 0
-                    else:
-                        raw, _start_iso, _dur_s = entry
-                    _sa_timing = {
-                        "start_timestamp": _start_iso,
-                        "duration_s": _dur_s,
-                    }
-                    if isinstance(raw, BaseException):
-                        result = ToolResult(
-                            tool_use_id=tc.tool_use_id,
-                            content=json.dumps(
-                                {
-                                    "message": f"Sub-agent execution raised: {raw}",
-                                    "data": None,
-                                    "metadata": {"success": False, "error": str(raw)},
-                                }
-                            ),
-                            is_error=True,
-                        )
-                    else:
-                        # Attach the tool_use_id to the result
-                        result = ToolResult(
-                            tool_use_id=tc.tool_use_id,
-                            content=raw.content,
-                            is_error=raw.is_error,
-                        )
-                    # Route through _truncate_tool_result so large
-                    # subagent results are saved to spillover files
-                    # and survive pruning (instead of being "cleared
-                    # from context" with no recovery path).
-                    result = self._truncate_tool_result(result, "delegate_to_sub_agent")
-                    results_by_id[tc.tool_use_id] = result
-                    logged_tool_calls.append(
-                        {
-                            "tool_use_id": tc.tool_use_id,
-                            "tool_name": "delegate_to_sub_agent",
-                            "tool_input": tc.tool_input,
-                            "content": result.content,
-                            "is_error": result.is_error,
-                            **_sa_timing,
-                        }
-                    )
-
             # Phase 3: record results into conversation in original order,
             # build logged/real lists, and publish completed events.
             for tc in tool_calls[:executed_in_batch]:
@@ -2936,8 +2655,6 @@ class EventLoopNode(NodeProtocol):
                     "ask_user",
                     "ask_user_multiple",
                     "escalate",
-                    "delegate_to_sub_agent",
-                    "report_to_parent",
                 ):
                     tool_entry = {
                         "tool_use_id": tc.tool_use_id,
@@ -3056,7 +2773,7 @@ class EventLoopNode(NodeProtocol):
                     queen_input_requested,
                     final_system_prompt,
                     final_messages,
-                    reported_to_parent,
+                    False,
                 )
 
             # --- Mid-turn pruning: prevent context blowup within a single turn ---
@@ -3090,7 +2807,7 @@ class EventLoopNode(NodeProtocol):
                     queen_input_requested,
                     final_system_prompt,
                     final_messages,
-                    reported_to_parent,
+                    False,
                 )
 
             # Tool calls processed -- loop back to stream with updated conversation
@@ -3118,16 +2835,6 @@ class EventLoopNode(NodeProtocol):
         """Build the synthetic escalate tool. Delegates to synthetic_tools module."""
         return build_escalate_tool()
 
-    def _build_delegate_tool(
-        self, sub_agents: list[str], node_registry: dict[str, Any]
-    ) -> Tool | None:
-        """Build the synthetic delegate_to_sub_agent tool. Delegates to synthetic_tools module."""
-        return build_delegate_tool(sub_agents, node_registry)
-
-    def _build_report_to_parent_tool(self) -> Tool:
-        """Build the synthetic report_to_parent tool. Delegates to synthetic_tools module."""
-        return build_report_to_parent_tool()
-
     def _handle_set_output(
         self,
         tool_input: dict[str, Any],
@@ -3151,7 +2858,7 @@ class EventLoopNode(NodeProtocol):
     ) -> JudgeVerdict:
         """Evaluate the current state. Delegates to judge_pipeline module."""
         return await judge_turn(
-            mark_complete_flag=self._mark_complete_flag,
+            mark_complete_flag=False,
             judge=self._judge,
             ctx=ctx,
             conversation=conversation,
@@ -3176,7 +2883,7 @@ class EventLoopNode(NodeProtocol):
 
         Delegates to :func:`extract_tool_call_history` in conversation.py.
         """
-        from framework.graph.conversation import extract_tool_call_history
+        from framework.agent_loop.conversation import extract_tool_call_history
 
         return extract_tool_call_history(conversation.messages, max_entries=max_entries)
 
@@ -3781,46 +3488,3 @@ class EventLoopNode(NodeProtocol):
     # Subagent Execution
     # -------------------------------------------------------------------
 
-    async def _execute_subagent(
-        self,
-        ctx: NodeContext,
-        agent_id: str,
-        task: str,
-        *,
-        accumulator: OutputAccumulator | None = None,
-    ) -> ToolResult:
-        """Execute a subagent and return the result as a ToolResult.
-
-        The subagent:
-        - Gets a fresh conversation with just the task
-        - Has read-only access to the parent's readable data buffer
-        - Cannot delegate to its own subagents (prevents recursion)
-        - Returns its output in structured JSON format
-
-        Args:
-            ctx: Parent node's context (for data buffer, tools, LLM access).
-            agent_id: The node ID of the subagent to invoke.
-            task: The task description to give the subagent.
-            accumulator: Parent's OutputAccumulator — provides outputs that
-                have been set via ``set_output`` but not yet written to
-                data buffer (which only happens after the node completes).
-
-        Returns:
-            ToolResult with structured JSON output containing:
-            - message: Human-readable summary
-            - data: Subagent's output (free-form JSON)
-            - metadata: Execution metadata (success, tokens, latency)
-        """
-        return await execute_subagent(
-            ctx=ctx,
-            agent_id=agent_id,
-            task=task,
-            accumulator=accumulator,
-            event_bus=self._event_bus,
-            config=self._config,
-            tool_executor=self._tool_executor,
-            conversation_store=self._conversation_store,
-            subagent_instance_counter=self._subagent_instance_counter,
-            event_loop_node_cls=type(self),
-            escalation_receiver_cls=_EscalationReceiver,
-        )
diff --git a/core/framework/graph/conversation.py b/core/framework/agent_loop/conversation.py
similarity index 99%
rename from core/framework/graph/conversation.py
rename to core/framework/agent_loop/conversation.py
index 40b720fc..dcd9da70 100644
--- a/core/framework/graph/conversation.py
+++ b/core/framework/agent_loop/conversation.py
@@ -324,7 +324,7 @@ def _try_extract_key(content: str, key: str) -> str | None:
     3. Colon format: ``key: value``.
     4. Equals format: ``key = value``.
     """
-    from framework.graph.node import find_json_object
+    from framework.orchestrator.node import find_json_object
 
     # 1. Whole message is JSON
     try:
diff --git a/core/framework/agent_loop/internals/__init__.py b/core/framework/agent_loop/internals/__init__.py
new file mode 100644
index 00000000..45601913
--- /dev/null
+++ b/core/framework/agent_loop/internals/__init__.py
@@ -0,0 +1,7 @@
+"""Agent loop internals -- compaction, judge, tools, subagent execution.
+
+Re-exports from legacy locations for the new import path.
+"""
+
+from framework.agent_loop.internals.compaction import *  # noqa: F401, F403
+from framework.agent_loop.internals.synthetic_tools import *  # noqa: F401, F403
diff --git a/core/framework/graph/event_loop/compaction.py b/core/framework/agent_loop/internals/compaction.py
similarity index 97%
rename from core/framework/graph/event_loop/compaction.py
rename to core/framework/agent_loop/internals/compaction.py
index a22da05a..1b54be9a 100644
--- a/core/framework/graph/event_loop/compaction.py
+++ b/core/framework/agent_loop/internals/compaction.py
@@ -19,11 +19,11 @@ from datetime import UTC, datetime
 from pathlib import Path
 from typing import Any
 
-from framework.graph.conversation import Message, NodeConversation
-from framework.graph.event_loop.event_publishing import publish_context_usage
-from framework.graph.event_loop.types import LoopConfig, OutputAccumulator
-from framework.graph.node import NodeContext
-from framework.runtime.event_bus import EventBus
+from framework.agent_loop.conversation import Message, NodeConversation
+from framework.agent_loop.internals.event_publishing import publish_context_usage
+from framework.agent_loop.internals.types import LoopConfig, OutputAccumulator
+from framework.orchestrator.node import NodeContext
+from framework.host.event_bus import EventBus
 
 logger = logging.getLogger(__name__)
 
@@ -368,8 +368,8 @@ async def llm_compact(
     in half and each half is summarised independently.  Tool history is
     appended once at the top-level call (``_depth == 0``).
     """
-    from framework.graph.conversation import extract_tool_call_history
-    from framework.graph.event_loop.tool_result_handler import is_context_too_large_error
+    from framework.agent_loop.conversation import extract_tool_call_history
+    from framework.agent_loop.internals.tool_result_handler import is_context_too_large_error
 
     if _depth > max_depth:
         raise RuntimeError(f"LLM compaction recursion limit ({max_depth})")
@@ -724,7 +724,7 @@ async def log_compaction(
         )
 
     if event_bus:
-        from framework.runtime.event_bus import AgentEvent, EventType
+        from framework.host.event_bus import AgentEvent, EventType
 
         event_data: dict[str, Any] = {
             "level": level,
@@ -861,6 +861,6 @@ def _extract_tool_call_history(conversation: NodeConversation) -> str:
     directly (vs. the module-level extract_tool_call_history in conversation.py
     which works on raw message lists).
     """
-    from framework.graph.conversation import extract_tool_call_history
+    from framework.agent_loop.conversation import extract_tool_call_history
 
     return extract_tool_call_history(list(conversation.messages))
diff --git a/core/framework/graph/event_loop/cursor_persistence.py b/core/framework/agent_loop/internals/cursor_persistence.py
similarity index 97%
rename from core/framework/graph/event_loop/cursor_persistence.py
rename to core/framework/agent_loop/internals/cursor_persistence.py
index 65f7d5fc..627fee8c 100644
--- a/core/framework/graph/event_loop/cursor_persistence.py
+++ b/core/framework/agent_loop/internals/cursor_persistence.py
@@ -14,9 +14,9 @@ from collections.abc import Awaitable, Callable
 from dataclasses import dataclass
 from typing import Any
 
-from framework.graph.conversation import ConversationStore, NodeConversation
-from framework.graph.event_loop.types import LoopConfig, OutputAccumulator, TriggerEvent
-from framework.graph.node import NodeContext
+from framework.agent_loop.conversation import ConversationStore, NodeConversation
+from framework.agent_loop.internals.types import LoopConfig, OutputAccumulator, TriggerEvent
+from framework.orchestrator.node import NodeContext
 from framework.llm.capabilities import supports_image_tool_results
 
 logger = logging.getLogger(__name__)
diff --git a/core/framework/graph/event_loop/event_publishing.py b/core/framework/agent_loop/internals/event_publishing.py
similarity index 97%
rename from core/framework/graph/event_loop/event_publishing.py
rename to core/framework/agent_loop/internals/event_publishing.py
index 85846620..69e487ab 100644
--- a/core/framework/graph/event_loop/event_publishing.py
+++ b/core/framework/agent_loop/internals/event_publishing.py
@@ -9,10 +9,10 @@ from __future__ import annotations
 import logging
 import time
 
-from framework.graph.conversation import NodeConversation
-from framework.graph.event_loop.types import HookContext
-from framework.graph.node import NodeContext
-from framework.runtime.event_bus import EventBus
+from framework.agent_loop.conversation import NodeConversation
+from framework.agent_loop.internals.types import HookContext
+from framework.orchestrator.node import NodeContext
+from framework.host.event_bus import EventBus
 
 logger = logging.getLogger(__name__)
 
@@ -177,7 +177,7 @@ async def publish_context_usage(
     if not event_bus:
         return
 
-    from framework.runtime.event_bus import AgentEvent, EventType
+    from framework.host.event_bus import AgentEvent, EventType
 
     estimated = conversation.estimate_tokens()
     max_tokens = conversation._max_context_tokens
diff --git a/core/framework/graph/event_loop/judge_pipeline.py b/core/framework/agent_loop/internals/judge_pipeline.py
similarity index 95%
rename from core/framework/graph/event_loop/judge_pipeline.py
rename to core/framework/agent_loop/internals/judge_pipeline.py
index 281d3991..2bebe4a7 100644
--- a/core/framework/graph/event_loop/judge_pipeline.py
+++ b/core/framework/agent_loop/internals/judge_pipeline.py
@@ -5,9 +5,9 @@ from __future__ import annotations
 import logging
 from collections.abc import Callable
 
-from framework.graph.conversation import NodeConversation
-from framework.graph.event_loop.types import JudgeProtocol, JudgeVerdict, OutputAccumulator
-from framework.graph.node import NodeContext
+from framework.agent_loop.conversation import NodeConversation
+from framework.agent_loop.internals.types import JudgeProtocol, JudgeVerdict, OutputAccumulator
+from framework.orchestrator.node import NodeContext
 
 logger = logging.getLogger(__name__)
 
@@ -155,7 +155,7 @@ async def judge_turn(
 
     # Level 2b: conversation-aware quality check (if success_criteria set)
     if ctx.node_spec.success_criteria and ctx.llm:
-        from framework.graph.conversation_judge import evaluate_phase_completion
+        from framework.orchestrator.conversation_judge import evaluate_phase_completion
 
         verdict = await evaluate_phase_completion(
             llm=ctx.llm,
diff --git a/core/framework/graph/event_loop/stall_detector.py b/core/framework/agent_loop/internals/stall_detector.py
similarity index 100%
rename from core/framework/graph/event_loop/stall_detector.py
rename to core/framework/agent_loop/internals/stall_detector.py
diff --git a/core/framework/graph/event_loop/synthetic_tools.py b/core/framework/agent_loop/internals/synthetic_tools.py
similarity index 69%
rename from core/framework/graph/event_loop/synthetic_tools.py
rename to core/framework/agent_loop/internals/synthetic_tools.py
index fa9d19d5..5a5bf3c3 100644
--- a/core/framework/graph/event_loop/synthetic_tools.py
+++ b/core/framework/agent_loop/internals/synthetic_tools.py
@@ -204,118 +204,6 @@ def build_escalate_tool() -> Tool:
         },
     )
 
-
-def build_delegate_tool(sub_agents: list[str], node_registry: dict[str, Any]) -> Tool | None:
-    """Build the synthetic delegate_to_sub_agent tool for subagent invocation.
-
-    Args:
-        sub_agents: List of node IDs that can be invoked as subagents.
-        node_registry: Map of node_id -> NodeSpec for looking up subagent descriptions.
-
-    Returns:
-        Tool definition if sub_agents is non-empty, None otherwise.
-    """
-    if not sub_agents:
-        return None
-
-    agent_descriptions = []
-    for agent_id in sub_agents:
-        spec = node_registry.get(agent_id)
-        if spec:
-            desc = getattr(spec, "description", "(no description)")
-            agent_descriptions.append(f"- {agent_id}: {desc}")
-        else:
-            agent_descriptions.append(f"- {agent_id}: (not found in registry)")
-
-    return Tool(
-        name="delegate_to_sub_agent",
-        description=(
-            "Delegate a task to a specialized sub-agent. The sub-agent runs "
-            "autonomously with read-only access to current memory and returns "
-            "its result. Use this to parallelize work or leverage specialized capabilities.\n\n"
-            "Available sub-agents:\n" + "\n".join(agent_descriptions)
-        ),
-        parameters={
-            "type": "object",
-            "properties": {
-                "agent_id": {
-                    "type": "string",
-                    "description": f"The sub-agent to invoke. Must be one of: {sub_agents}",
-                    "enum": sub_agents,
-                },
-                "task": {
-                    "type": "string",
-                    "description": (
-                        "The task description for the sub-agent to execute. "
-                        "Be specific about what you want the sub-agent to do and "
-                        "what information to return."
-                    ),
-                },
-            },
-            "required": ["agent_id", "task"],
-        },
-    )
-
-
-def build_report_to_parent_tool() -> Tool:
-    """Build the synthetic report_to_parent tool for sub-agent progress reports.
-
-    Sub-agents call this to send one-way progress updates, partial findings,
-    or status reports to the parent node (and external observers via event bus)
-    without blocking execution.
-
-    When ``wait_for_response`` is True, the sub-agent blocks until the parent
-    relays the user's response — used for escalation (e.g. login pages, CAPTCHAs).
-
-    When ``mark_complete`` is True, the sub-agent terminates immediately after
-    sending the report — no need to call set_output for each output key.
-    """
-    return Tool(
-        name="report_to_parent",
-        description=(
-            "Send a report to the parent agent. By default this is fire-and-forget: "
-            "the parent receives the report but does not respond. "
-            "Set wait_for_response=true to BLOCK until the user replies — use this "
-            "when you need human intervention (e.g. login pages, CAPTCHAs, "
-            "authentication walls). The user's response is returned as the tool result. "
-            "Set mark_complete=true to finish your task and terminate immediately "
-            "after sending the report — use this when your findings are in the "
-            "message/data fields and you don't need to call set_output."
-        ),
-        parameters={
-            "type": "object",
-            "properties": {
-                "message": {
-                    "type": "string",
-                    "description": "A human-readable status or progress message.",
-                },
-                "data": {
-                    "type": "object",
-                    "description": "Optional structured data to include with the report.",
-                },
-                "wait_for_response": {
-                    "type": "boolean",
-                    "description": (
-                        "If true, block execution until the user responds. "
-                        "Use for escalation scenarios requiring human intervention."
-                    ),
-                    "default": False,
-                },
-                "mark_complete": {
-                    "type": "boolean",
-                    "description": (
-                        "If true, terminate the sub-agent immediately after sending "
-                        "this report. The report message and data are delivered to the "
-                        "parent as the final result. No set_output calls are needed."
-                    ),
-                    "default": False,
-                },
-            },
-            "required": ["message"],
-        },
-    )
-
-
 def handle_set_output(
     tool_input: dict[str, Any],
     output_keys: list[str] | None,
diff --git a/core/framework/graph/event_loop/tool_result_handler.py b/core/framework/agent_loop/internals/tool_result_handler.py
similarity index 100%
rename from core/framework/graph/event_loop/tool_result_handler.py
rename to core/framework/agent_loop/internals/tool_result_handler.py
diff --git a/core/framework/graph/event_loop/types.py b/core/framework/agent_loop/internals/types.py
similarity index 98%
rename from core/framework/graph/event_loop/types.py
rename to core/framework/agent_loop/internals/types.py
index 69357fce..3a100f65 100644
--- a/core/framework/graph/event_loop/types.py
+++ b/core/framework/agent_loop/internals/types.py
@@ -9,7 +9,7 @@ from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any, Literal, Protocol, runtime_checkable
 
-from framework.graph.conversation import (
+from framework.agent_loop.conversation import (
     ConversationStore,
 )
 
@@ -68,7 +68,7 @@ class LoopConfig:
     max_output_value_chars: int = 2_000
 
     # Stream retry.
-    max_stream_retries: int = 3
+    max_stream_retries: int = 5
     stream_retry_backoff_base: float = 2.0
     stream_retry_max_delay: float = 60.0
 
diff --git a/core/framework/agents/__init__.py b/core/framework/agents/__init__.py
index 561d96a0..46c0a5f8 100644
--- a/core/framework/agents/__init__.py
+++ b/core/framework/agents/__init__.py
@@ -8,6 +8,14 @@ FRAMEWORK_AGENTS_DIR = Path(__file__).parent
 def list_framework_agents() -> list[Path]:
     """List all framework agent directories."""
     return sorted(
-        [p for p in FRAMEWORK_AGENTS_DIR.iterdir() if p.is_dir() and (p / "agent.py").exists()],
+        [
+            p
+            for p in FRAMEWORK_AGENTS_DIR.iterdir()
+            if p.is_dir()
+            and (
+                (p / "agent.json").exists()
+                or (p / "agent.py").exists()
+            )
+        ],
         key=lambda p: p.name,
     )
diff --git a/core/framework/agents/credential_tester/agent.py b/core/framework/agents/credential_tester/agent.py
index c78823f2..32336a72 100644
--- a/core/framework/agents/credential_tester/agent.py
+++ b/core/framework/agents/credential_tester/agent.py
@@ -21,15 +21,15 @@ from pathlib import Path
 from typing import TYPE_CHECKING
 
 from framework.config import get_max_context_tokens
-from framework.graph import Goal, NodeSpec, SuccessCriterion
-from framework.graph.checkpoint_config import CheckpointConfig
-from framework.graph.edge import GraphSpec
-from framework.graph.executor import ExecutionResult
+from framework.orchestrator import Goal, NodeSpec, SuccessCriterion
+from framework.orchestrator.checkpoint_config import CheckpointConfig
+from framework.orchestrator.edge import GraphSpec
+from framework.orchestrator.orchestrator import ExecutionResult
 from framework.llm import LiteLLMProvider
-from framework.runner.mcp_registry import MCPRegistry
-from framework.runner.tool_registry import ToolRegistry
-from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
-from framework.runtime.execution_stream import EntryPointSpec
+from framework.loader.mcp_registry import MCPRegistry
+from framework.loader.tool_registry import ToolRegistry
+from framework.host.agent_host import AgentHost
+from framework.host.execution_manager import EntryPointSpec
 
 from .config import default_config
 from .nodes import build_tester_node
@@ -37,7 +37,7 @@ from .nodes import build_tester_node
 logger = logging.getLogger(__name__)
 
 if TYPE_CHECKING:
-    from framework.runner import AgentRunner
+    from framework.loader import AgentLoader
 
 logger = logging.getLogger(__name__)
 
@@ -233,7 +233,7 @@ requires_account_selection = True
 """Signal TUI to show account picker before starting the agent."""
 
 
-def configure_for_account(runner: AgentRunner, account: dict) -> None:
+def configure_for_account(runner: AgentLoader, account: dict) -> None:
     """Scope the tester node's tools to the selected provider.
 
     Handles both Aden accounts (account= routing) and local accounts
@@ -325,7 +325,7 @@ def _activate_local_account(credential_id: str, alias: str) -> None:
 
 
 def _configure_aden_node(
-    runner: AgentRunner,
+    runner: AgentLoader,
     provider: str,
     alias: str,
     detail: str,
@@ -368,7 +368,7 @@ or any other identifier — always use the alias exactly as shown.
 
 
 def _configure_local_node(
-    runner: AgentRunner,
+    runner: AgentLoader,
     provider: str,
     alias: str,
     identity: dict,
@@ -497,7 +497,7 @@ class CredentialTesterAgent:
     def __init__(self, config=None):
         self.config = config or default_config
         self._selected_account: dict | None = None
-        self._agent_runtime: AgentRuntime | None = None
+        self._agent_runtime: AgentHost | None = None
         self._tool_registry: ToolRegistry | None = None
         self._storage_path: Path | None = None
 
@@ -613,7 +613,7 @@ class CredentialTesterAgent:
 
         graph = self._build_graph()
 
-        self._agent_runtime = create_agent_runtime(
+        self._agent_runtime = AgentHost(
             graph=graph,
             goal=goal,
             storage_path=self._storage_path,
diff --git a/core/framework/agents/credential_tester/nodes/__init__.py b/core/framework/agents/credential_tester/nodes/__init__.py
index 31b1ac7e..682ca08e 100644
--- a/core/framework/agents/credential_tester/nodes/__init__.py
+++ b/core/framework/agents/credential_tester/nodes/__init__.py
@@ -1,6 +1,6 @@
 """Node definitions for Credential Tester agent."""
 
-from framework.graph import NodeSpec
+from framework.orchestrator import NodeSpec
 
 
 def build_tester_node(
diff --git a/core/framework/agents/discovery.py b/core/framework/agents/discovery.py
index 2cbf712b..8f978636 100644
--- a/core/framework/agents/discovery.py
+++ b/core/framework/agents/discovery.py
@@ -27,8 +27,8 @@ def _get_last_active(agent_path: Path) -> str | None:
     """Return the most recent updated_at timestamp across all sessions.
 
     Checks both worker sessions (``~/.hive/agents/{name}/sessions/``) and
-    queen sessions (``~/.hive/queen/session/``) whose ``meta.json`` references
-    the same *agent_path*.
+    queen sessions (``~/.hive/agents/queens/default/sessions/``) whose
+    ``meta.json`` references the same *agent_path*.
     """
     from datetime import datetime
 
@@ -53,7 +53,9 @@ def _get_last_active(agent_path: Path) -> str | None:
                 continue
 
     # 2. Queen sessions
-    queen_sessions_dir = Path.home() / ".hive" / "queen" / "session"
+    from framework.config import QUEENS_DIR
+
+    queen_sessions_dir = QUEENS_DIR / "default" / "sessions"
     if queen_sessions_dir.exists():
         resolved = agent_path.resolve()
         for d in queen_sessions_dir.iterdir():
@@ -112,13 +114,33 @@ def _count_runs(agent_name: str) -> int:
 def _extract_agent_stats(agent_path: Path) -> tuple[int, int, list[str]]:
     """Extract node count, tool count, and tags from an agent directory.
 
-    Prefers agent.py (AST-parsed) over agent.json for node/tool counts
-    since agent.json may be stale.  Tags are only available from agent.json.
+    Checks agent.json (declarative) first, then agent.py (legacy).
     """
     import ast
 
     node_count, tool_count, tags = 0, 0, []
 
+    # Declarative JSON agents (preferred)
+    agent_json = agent_path / "agent.json"
+    if agent_json.exists():
+        try:
+            data = json.loads(agent_json.read_text(encoding="utf-8"))
+            if isinstance(data, dict):
+                json_nodes = data.get("nodes", [])
+                node_count = len(json_nodes)
+                tools: set[str] = set()
+                for n in json_nodes:
+                    node_tools = n.get("tools", {})
+                    if isinstance(node_tools, dict):
+                        tools.update(node_tools.get("allowed", []))
+                    elif isinstance(node_tools, list):
+                        tools.update(node_tools)
+                tool_count = len(tools)
+                return node_count, tool_count, tags
+        except Exception:
+            pass
+
+    # Legacy: agent.py (AST-parsed)
     agent_py = agent_path / "agent.py"
     if agent_py.exists():
         try:
@@ -132,39 +154,31 @@ def _extract_agent_stats(agent_path: Path) -> tuple[int, int, list[str]]:
         except Exception:
             pass
 
-    agent_json = agent_path / "agent.json"
-    if agent_json.exists():
-        try:
-            data = json.loads(agent_json.read_text(encoding="utf-8"))
-            json_nodes = data.get("graph", {}).get("nodes", []) or data.get("nodes", [])
-            if node_count == 0:
-                node_count = len(json_nodes)
-            tools: set[str] = set()
-            for n in json_nodes:
-                tools.update(n.get("tools", []))
-            tool_count = len(tools)
-            tags = data.get("agent", {}).get("tags", [])
-        except Exception:
-            pass
-
     return node_count, tool_count, tags
 
 
 def discover_agents() -> dict[str, list[AgentEntry]]:
     """Discover agents from all known sources grouped by category."""
-    from framework.runner.cli import (
+    from framework.loader.cli import (
         _extract_python_agent_metadata,
         _get_framework_agents_dir,
         _is_valid_agent_dir,
     )
 
+    from framework.config import COLONIES_DIR
+
     groups: dict[str, list[AgentEntry]] = {}
     sources = [
-        ("Your Agents", Path("exports")),
+        ("Your Agents", COLONIES_DIR),
+        ("Your Agents", Path("exports")),  # compat fallback
         ("Framework", _get_framework_agents_dir()),
         ("Examples", Path("examples/templates")),
     ]
 
+    # Track seen agent directory names to avoid duplicates when the same
+    # agent exists in both colonies/ and exports/ (colonies takes priority).
+    _seen_agent_names: set[str] = set()
+
     for category, base_dir in sources:
         if not base_dir.exists():
             continue
@@ -172,6 +186,9 @@ def discover_agents() -> dict[str, list[AgentEntry]]:
         for path in sorted(base_dir.iterdir(), key=lambda p: p.name):
             if not _is_valid_agent_dir(path):
                 continue
+            if path.name in _seen_agent_names:
+                continue
+            _seen_agent_names.add(path.name)
 
             name, desc = _extract_python_agent_metadata(path)
             config_fallback_name = path.name.replace("_", " ").title()
@@ -179,13 +196,19 @@ def discover_agents() -> dict[str, list[AgentEntry]]:
 
             node_count, tool_count, tags = _extract_agent_stats(path)
             if not used_config:
-                agent_json = path / "agent.json"
-                if agent_json.exists():
+                # Try agent.json (declarative) for metadata
+                agent_json_path = path / "agent.json"
+                if agent_json_path.exists():
                     try:
-                        data = json.loads(agent_json.read_text(encoding="utf-8"))
-                        meta = data.get("agent", {})
-                        name = meta.get("name", name)
-                        desc = meta.get("description", desc)
+                        data = json.loads(
+                            agent_json_path.read_text(encoding="utf-8"),
+                        )
+                        if isinstance(data, dict):
+                            raw_name = data.get("name", name)
+                            if "-" in raw_name and " " not in raw_name:
+                                raw_name = raw_name.replace("-", " ").title()
+                            name = raw_name
+                            desc = data.get("description", desc)
                     except Exception:
                         pass
 
@@ -204,6 +227,8 @@ def discover_agents() -> dict[str, list[AgentEntry]]:
                 )
             )
         if entries:
-            groups[category] = entries
+            existing = groups.get(category, [])
+            existing.extend(entries)
+            groups[category] = existing
 
     return groups
diff --git a/core/framework/agents/queen/__init__.py b/core/framework/agents/queen/__init__.py
index caff6298..f86488f6 100644
--- a/core/framework/agents/queen/__init__.py
+++ b/core/framework/agents/queen/__init__.py
@@ -1,19 +1,13 @@
-"""
-Queen — Native agent builder for the Hive framework.
+"""Queen -- the agent builder for the Hive framework."""
 
-Deeply understands the agent framework and produces complete Python packages
-with goals, nodes, edges, system prompts, MCP configuration, and tests
-from natural language specifications.
-"""
-
-from .agent import queen_goal, queen_graph
+from .agent import queen_goal, queen_loop_config
 from .config import AgentMetadata, RuntimeConfig, default_config, metadata
 
 __version__ = "1.0.0"
 
 __all__ = [
     "queen_goal",
-    "queen_graph",
+    "queen_loop_config",
     "RuntimeConfig",
     "AgentMetadata",
     "default_config",
diff --git a/core/framework/agents/queen/agent.py b/core/framework/agents/queen/agent.py
index e6583354..ba59f963 100644
--- a/core/framework/agents/queen/agent.py
+++ b/core/framework/agents/queen/agent.py
@@ -1,38 +1,29 @@
-"""Queen graph definition."""
+"""Queen agent definition.
 
-from framework.graph import Goal
-from framework.graph.edge import GraphSpec
+The queen is a single AgentLoop -- no graph, no orchestrator.
+Loaded by queen_orchestrator.create_queen().
+"""
+
+from framework.orchestrator.goal import Goal
 
 from .nodes import queen_node
 
-# ---------------------------------------------------------------------------
-# Queen graph — the primary persistent conversation.
-# Loaded by queen_orchestrator.create_queen(), NOT by AgentRunner.
-# ---------------------------------------------------------------------------
-
 queen_goal = Goal(
     id="queen-manager",
     name="Queen Manager",
     description=(
-        "Manage the worker agent lifecycle and serve as the user's primary interactive interface."
+        "Manage the worker agent lifecycle and serve as the "
+        "user's primary interactive interface."
     ),
     success_criteria=[],
     constraints=[],
 )
 
-queen_graph = GraphSpec(
-    id="queen-graph",
-    goal_id=queen_goal.id,
-    version="1.0.0",
-    entry_node="queen",
-    entry_points={"start": "queen"},
-    terminal_nodes=[],
-    pause_nodes=[],
-    nodes=[queen_node],
-    edges=[],
-    conversation_mode="continuous",
-    loop_config={
-        "max_iterations": 999_999,
-        "max_tool_calls_per_turn": 30,
-    },
-)
+# Loop config -- used by queen_orchestrator to build LoopConfig
+queen_loop_config = {
+    "max_iterations": 999_999,
+    "max_tool_calls_per_turn": 30,
+    "max_context_tokens": 180_000,
+}
+
+__all__ = ["queen_goal", "queen_loop_config", "queen_node"]
diff --git a/core/framework/agents/queen/mcp_registry.json b/core/framework/agents/queen/mcp_registry.json
new file mode 100644
index 00000000..80e62804
--- /dev/null
+++ b/core/framework/agents/queen/mcp_registry.json
@@ -0,0 +1,3 @@
+{
+  "include": ["gcu-tools"]
+}
diff --git a/core/framework/agents/queen/nodes/__init__.py b/core/framework/agents/queen/nodes/__init__.py
index 3f447541..e078afa4 100644
--- a/core/framework/agents/queen/nodes/__init__.py
+++ b/core/framework/agents/queen/nodes/__init__.py
@@ -2,7 +2,7 @@
 
 from pathlib import Path
 
-from framework.graph import NodeSpec
+from framework.orchestrator import NodeSpec
 
 # Load reference docs at import time so they're always in the system prompt.
 # No voluntary read_file() calls needed — the LLM gets everything upfront.
@@ -37,7 +37,7 @@ _appendices = _build_appendices()
 
 # GCU guide — shared between planning and building via _shared_building_knowledge.
 _gcu_section = (
-    ("\n\n# GCU Nodes — Browser Automation\n\n" + _gcu_guide)
+    ("\n\n# Browser Automation Nodes\n\n" + _gcu_guide)
     if _is_gcu_enabled() and _gcu_guide
     else ""
 )
@@ -81,7 +81,6 @@ _QUEEN_PLANNING_TOOLS = [
     "save_agent_draft",
     "confirm_and_build",
     # Scaffold + transition to building (requires confirm_and_build first)
-    "initialize_and_build_agent",
     # Load existing agent (after user confirms)
     "load_built_agent",
 ]
@@ -172,7 +171,7 @@ _shared_building_knowledge = (
 
 ## Paths (MANDATORY)
 **Always use RELATIVE paths** \
-(e.g. `exports/agent_name/config.py`, `exports/agent_name/nodes/__init__.py`).
+(e.g. `exports/agent_name/agent.json`).
 **Never use absolute paths** like `/mnt/data/...` or `/workspace/...` — they fail.
 The project root is implicit.
 
@@ -182,14 +181,18 @@ When designing worker nodes or writing worker system prompts, reference these \
 tool names — NOT the coder-tools names (read_file, write_file, etc.).
 
 Worker data tools (for large results and spillover):
-- save_data(filename, data, data_dir) — save data to a file for later retrieval
-- load_data(filename, data_dir, offset_bytes?, limit_bytes?) — load data \
-with byte-based pagination
-- list_data_files(data_dir) — list available data files
-- append_data(filename, data, data_dir) — append to a file incrementally
-- edit_data(filename, old_text, new_text, data_dir) — find-and-replace in a data file
-- serve_file_to_user(filename, data_dir, label?, open_in_browser?) — \
-generate a clickable file URI for the user
+Worker data tools (from files-tools MCP server):
+- read_file(path) — read a file
+- write_file(path, content) — write/create a file
+- list_files(path) — list directory contents
+- search_files(pattern, path) — regex search in files
+
+Worker data tools (from hive-tools MCP server):
+- csv_read, csv_write, csv_append — CSV operations
+- pdf_read — read PDF files
+
+All tools are registered in the global MCP registry (~/.hive/mcp_registry/). \
+Workers get tools from: hive-tools, gcu-tools, files-tools.
 
 IMPORTANT: Do NOT tell workers to use read_file, write_file, edit_file, \
 search_files, or list_directory — those are YOUR tools, not theirs.
@@ -204,7 +207,7 @@ _planning_knowledge = """\
 # Core Mandates (Planning)
 - **DO NOT propose a complete goal on your own.** Instead, \
 collaborate with the user to define it.
-- **NEVER call `initialize_and_build_agent` without explicit user approval.** \
+- **NEVER call `confirm_and_build` without explicit user approval.** \
 Present the full design first and wait for the user to confirm before building.
 - **Discover tools dynamically.** NEVER reference tools from static \
 docs. Always run list_agent_tools() to see what actually exists.
@@ -252,9 +255,9 @@ When the stakeholder describes what they want, mentally construct:
 
 **After the user responds, assess fit and gaps together.** Be honest and specific. \
 Reference tools from list_agent_tools() AND built-in capabilities:
-- **GCU browser automation** (`node_type="gcu"`) provides full Playwright-based \
+- **Browser automation provides full Playwright-based \
 browser control (navigation, clicking, typing, scrolling, JS-rendered pages, \
-multi-tab). Do NOT list browser automation as missing — use GCU nodes.
+multi-tab). Do NOT list browser automation as missing — use browser nodes with tools: {policy: "all"}.
 
 Present a short **Framework Fit Assessment**:
 - **Works well**: 2-4 strengths for this use case
@@ -306,14 +309,11 @@ explicitly on a node. Available types:
 - **io** (dusty purple, parallelogram): External data input/output
 - **document** (steel blue, wavy rect): Report or document generation
 - **database** (muted teal, cylinder): Database or data store
-- **subprocess** (dark cyan, subroutine): Delegated sub-agent / predefined process
-- **browser** (deep blue, hexagon): GCU browser automation / sub-agent \
-delegation. At build time, browser nodes are dissolved into the parent \
-node's sub_agents list. Use for any GCU or sub-agent leaf node.
+- **browser** (deep blue, hexagon): Browser automation node (uses gcu-tools).
 
 Auto-detection works well for most cases: first node → start, nodes with \
 no outgoing edges → terminal, nodes with multiple conditional outgoing \
-edges → decision, GCU nodes → browser, nodes mentioning "database" → \
+edges → decision, browser tool nodes → browser, nodes mentioning "database" → \
 database, nodes mentioning "report/document" → document, I/O tools like \
 send_email → io. Everything else defaults to process. Set flowchart_type \
 explicitly only when auto-detection would be wrong.
@@ -354,48 +354,19 @@ gather → [Valid data?] →Yes→ transform → deliver
 In the draft: the `[Valid data?]` node has `flowchart_type: "decision"`, \
 `decision_clause: "Data passes validation checks?"`, with labeled yes/no edges.
 
-## Sub-Agent Nodes — Planning-Only Delegation
+## Browser Automation Nodes
 
-Sub-agent nodes (dark teal subroutines) are **planning-only** visual elements \
-that show which nodes delegate to sub-agents. At `confirm_and_build()`, \
-sub-agent nodes are **dissolved** into their parent node:
-
-- The sub-agent node's ID is added to the predecessor's `sub_agents` list
-- The sub-agent node and its connecting edge are removed
-- At runtime, the parent node can invoke the sub-agent via `delegate_to_sub_agent`
-
-**Rules for sub-agent nodes (INCLUDING GCU nodes):**
-- GCU nodes are auto-detected as `flowchart_type: "browser"` (hexagon)
-- Connect from the managing parent node to the sub-agent node
-- Sub-agent nodes must be **leaf nodes** — NO outgoing edges to other nodes
-- At build time, browser/GCU nodes are dissolved into the parent's \
-`sub_agents` list, just like decision nodes are dissolved into criteria
-
-**CRITICAL: GCU nodes (`node_type: "gcu"`) are ALWAYS sub-agents.** \
-They MUST NOT appear in the linear flow. NEVER chain GCU nodes \
-sequentially (A → gcu1 → gcu2 → B is WRONG). Instead, attach them \
-as leaves to the parent that orchestrates them:
+Browser nodes are regular `event_loop` nodes with browser tools \
+(from the gcu-tools MCP server) in their tool list. They are wired \
+into the graph with edges like any other node:
 ```
-WRONG:  intake → gcu_find_prospect → gcu_scan_mutuals → check_results
-WRONG:  decision_node → gcu_node (as a yes/no branch)
-RIGHT:  intake (sub_agents: [gcu_find, gcu_scan]) → check_results
+research → browser_scan → analyze_results
 ```
-The parent node delegates to its GCU sub-agents and collects results. \
-The main flow continues from the parent, not from the GCU node. \
-GCU nodes MUST NOT be children of decision nodes — decision nodes \
-dissolve at build time, which would leave the GCU as a dangling \
-workflow step.
+Use `tools: {policy: "all"}` to give browser nodes access to all \
+browser tools, or list specific ones with `policy: "explicit"`.
 
-**How to show delegation in the flowchart:**
-```
-research → (deep_searcher)   ← browser/GCU node, leaf
-research → [Enough results?] ← decision node
-```
-After dissolution: `research` node gets `sub_agents: ["deep_searcher"]` \
-and `success_criteria: "Enough results?"`.
-
-If the worker agent start from some initial input it is okay. \
-The queen(you) owns intake: you gathers user requirements, then calls \
+If the worker agent starts from some initial input it is okay. \
+The queen(you) owns intake: you gather user requirements, then call \
 `run_agent_with_input(task)` with a structured task description. \
 When building the agent, design the entry node's `input_keys` to \
 match what the queen will provide at run time. Worker nodes should \
@@ -411,14 +382,14 @@ You MUST get explicit user approval before ANY code is generated.
 2. **WAIT for user response.** Do NOT proceed without it.
 3. Handle the response:
    - If **Approve / Proceed**: Call confirm_and_build(), then \
-   initialize_and_build_agent(agent_name, nodes)
+   confirm_and_build(agent_name)
    - If **Adjust scope**: Discuss changes, update the draft with \
    save_agent_draft() again, and re-ask
    - If **More questions**: Answer them honestly, then ask again
    - If **Reconsider**: Discuss alternatives. If they decide to proceed, \
    that's their informed choice
 
-**NEVER call initialize_and_build_agent without first calling \
+**NEVER call confirm_and_build without first calling \
 confirm_and_build().** The system will block the transition if you try.
 """
 
@@ -477,53 +448,75 @@ When a user says "my agent is failing" or "debug this agent":
 ## 5. Implement
 
 **You should only reach this step after the user has approved the draft design \
-in the planning phase. The draft metadata will pre-populate descriptions, \
-goals, success criteria, and node metadata in the generated files.**
+and you have called `confirm_and_build(agent_name="my_agent")`.**
 
-Call `initialize_and_build_agent(agent_name, nodes)` to generate all package \
-files. The agent_name must be snake_case (e.g., "my_agent"). Pass node names \
-as comma-separated string (e.g., "gather,process,review").
-The tool creates: config.py, nodes/__init__.py, agent.py, \
-__init__.py, __main__.py, mcp_servers.json, tests/conftest.py.
+`confirm_and_build` created the agent directory (returned in agent_path). \
+Now write the complete agent config directly:
 
-The generated files are **structurally complete** with correct imports, \
-class definition, `validate()` method, `default_agent` export, and \
-`__init__.py` re-exports. They pass validation as-is.
+```
+write_file("<colony_path>/agent.json", <complete JSON config>)
+```
 
-`mcp_servers.json` is auto-generated with hive-tools as the default. \
-Do NOT manually create or overwrite `mcp_servers.json`.
+The agent.json must include ALL of these in one write:
+- `name`, `version`, `description`
+- `goal` with `description`, `success_criteria`, `constraints`
+- `identity_prompt` (agent-level behavior)
+- `nodes` — each with `id`, `description`, `system_prompt`, `tools`, \
+`input_keys`, `output_keys`, `success_criteria`
+- `edges` — connecting all nodes with proper conditions
+- `entry_node`, `terminal_nodes`
+- `mcp_servers` — REQUIRED. Always include all three: \
+`[{"name": "hive-tools"}, {"name": "gcu-tools"}, {"name": "files-tools"}]`
+- `loop_config` — `max_iterations`, `max_context_tokens`
 
-### Customizing generated files
+**Write the COMPLETE config in one `write_file` call. No TODOs, no placeholders.** \
+The queen writes final production-ready system prompts directly.
 
-**CRITICAL: Use `edit_file` to customize TODO placeholders. \
-NEVER use `write_file` to rewrite generated files from scratch. \
-Rewriting breaks imports, class structure, and causes validation failures.**
+**There are NO Python files.** The framework loads agent.json directly.
 
-Safe to edit with `edit_file`:
-- System prompts, tools, input_keys, output_keys, success_criteria in \
-nodes/__init__.py
-- Goal description, success criteria values, constraint values, edge \
-definitions, identity_prompt in agent.py
-- CLI options in __main__.py
-- For triggers (timers/webhooks), add entries to triggers.json in the \
-agent's export directory
+MCP servers are loaded from the global registry by name. Available servers:
+- `hive-tools` — web search, email, CRM, calendar, 100+ integrations
+- `gcu-tools` — browser automation (click, type, navigate, screenshot)
+- `files-tools` — file I/O (read, write, edit, search, list)
 
-Do NOT modify or rewrite:
-- Import statements at top of agent.py (they are correct)
-- The agent class definition, `validate()`, `_build_graph()`, `_setup()`, \
-or lifecycle methods (start/stop/run)
-- `__init__.py` exports (all required variables are already re-exported)
-- `default_agent = ClassName()` at bottom of agent.py
+**Template variables:** Add a `variables:` section at the top of agent.json \
+and use `{{variable_name}}` in system prompts for config injection:
+```yaml
+variables:
+  spreadsheet_id: "1ZVx..."
+nodes:
+  - id: start
+    system_prompt: |
+      Use spreadsheet: {{spreadsheet_id}}
+```
+
+### Tool access in nodes
+
+Each node declares its tool access policy:
+```yaml
+# Explicit list (recommended)
+tools:
+  policy: explicit
+  allowed: [web_search, write_file]
+
+# All tools (for browser automation nodes)
+tools:
+  policy: all
+
+# No tools (for handoff/summary nodes)
+tools:
+  policy: none
+```
 
 ## 6. Verify and Load
 
 Call `validate_agent_package("{name}")` after initialization. \
 It runs structural checks (class validation, graph validation, tool \
 validation, tests) and returns a consolidated result. If anything \
-fails: read the error, fix with edit_file, re-validate. Up to 3x.
+fails: read the error, fix with read_file+write_file, re-validate. Up to 3x.
 
 When validation passes, immediately call \
-`load_built_agent("exports/{name}")` to load the agent into the \
+`load_built_agent("<agent_path>")` to load the agent into the \
 session. This switches to STAGING phase and shows the graph in the \
 visualizer. Do NOT wait for user input between validation and loading.
 """
@@ -625,13 +618,11 @@ document, database, subprocess, etc.) with unique shapes and colors. Set \
 flowchart_type on a node to override. Nodes need only an id. \
 Use decision nodes (flowchart_type: "decision", with decision_clause and \
 labeled yes/no edges) to make conditional branching explicit. \
-GCU/sub-agent nodes (node_type: "gcu") are auto-detected as browser \
 hexagons — connect them as leaf nodes to their parent.
 - confirm_and_build() — Record user confirmation of the draft. Dissolves \
 planning-only nodes (decision → predecessor criteria; browser/GCU → \
-predecessor sub_agents list). Call this ONLY after the user explicitly \
 approves via ask_user.
-- initialize_and_build_agent(agent_name?, nodes?) — Scaffold the agent package \
+- confirm_and_build(agent_name) — Scaffold the agent package \
 and transition to BUILDING phase. For new agents, this REQUIRES \
 save_agent_draft() + confirm_and_build() first. The draft metadata is used to \
 pre-populate the generated files. Without agent_name: transition to BUILDING \
@@ -647,8 +638,8 @@ phase. Only use this when the user explicitly asks to work with an existing agen
 2. Call save_agent_draft() to create visual draft → present to user
 3. Call ask_user() to get explicit approval
 4. Call confirm_and_build() to record approval
-5. Call initialize_and_build_agent() to scaffold and start building
-For diagnosis of existing agents, call initialize_and_build_agent() \
+5. Call confirm_and_build() to scaffold and start building
+For diagnosis of existing agents, call confirm_and_build() \
 (no args) after agreeing on a fix plan with the user.
 """
 
@@ -884,7 +875,7 @@ that changes the structure, call save_agent_draft() again so they see the \
 update in real-time. The flowchart is a live collaboration tool.
 8. When the design is stable, use ask_user to get explicit approval
 9. Call confirm_and_build() after the user approves
-10. Call initialize_and_build_agent(agent_name, nodes) to scaffold and start building
+10. Call confirm_and_build(agent_name) to scaffold and start building
 
 **The flowchart is your shared whiteboard.** Don't describe changes in text \
 and then ask "should I update the draft?" — just update it. If the user says \
@@ -895,7 +886,7 @@ see every structural change reflected in the visualizer as you discuss it.
 **CRITICAL: Planning → Building boundary.** You MUST get explicit user \
 confirmation before moving to building. The sequence is:
   save_agent_draft() → iterate with user → ask_user() → confirm_and_build() → \
-  initialize_and_build_agent()
+  confirm_and_build()
 Skipping any of these steps will be blocked by the system.
 
 Remember: DO NOT write or edit any files yet. This is a read-only exploration \
@@ -911,7 +902,7 @@ your priority is diagnosis, not new design:
 2. Summarize the root cause to the user
 3. Propose a fix plan (what to change, what behavior to adjust)
 4. Get user approval via ask_user
-5. Call initialize_and_build_agent() (no args) to transition to building and implement the fix
+5. Call confirm_and_build() (no args) to transition to building and implement the fix
 
 Do NOT start the full discovery workflow (tool discovery, gap analysis) in \
 diagnosis mode — you already have a built agent, you just need to fix it.
@@ -947,7 +938,7 @@ delegate agent construction to the worker, even as a "research" subtask.
 ## Keeping the flowchart in sync during building
 
 When you make structural changes to the agent (add/remove/rename nodes, \
-change edges, modify sub-agent assignments), call save_agent_draft() to \
+change edges, modify node connections), call save_agent_draft() to \
 update the flowchart. During building, this auto-dissolves planning-only \
 nodes without needing user re-confirmation. The user sees the updated \
 flowchart immediately.
@@ -966,15 +957,15 @@ user says "replan", "go back", "let's redesign", "change the approach", \
 
 ## CRITICAL — Graph topology errors require replanning, not code edits
 
-If you discover that the agent graph has structural problems — GCU nodes \
+If you discover that the agent graph has structural problems — browser nodes \
 in the linear flow, missing edges, wrong node connections, incorrect \
-sub-agent assignments — you MUST call replan_agent() and fix the draft. \
-Do NOT attempt to fix topology by editing agent.py directly. The graph \
+node connections — you MUST call replan_agent() and fix the draft. \
+Do NOT attempt to fix topology by editing agent.json directly. The graph \
 structure is defined by the draft → dissolution → code-gen pipeline. \
-Editing code to rewire nodes bypasses the flowchart and creates drift \
-between what the user sees and what the code does.
+Editing the config to rewire nodes bypasses the flowchart and creates drift \
+between what the user sees and what the config does.
 
-**WRONG:** "Let me fix agent.py to remove GCU nodes from edges..."
+**WRONG:** "Let me fix agent.json to remove browser nodes from edges..."
 **RIGHT:** Call replan_agent(), fix the draft with save_agent_draft(), \
 get user approval, then confirm_and_build() → the corrected code is \
 generated automatically.
@@ -1100,18 +1091,15 @@ You wake up when:
 If the user asks for progress, call get_graph_status() ONCE and report. \
 If the summary mentions issues, follow up with get_graph_status(focus="issues").
 
-## Subagent delegations (browser automation, GCU)
+## Browser automation nodes
 
-When the worker delegates to a subagent (e.g., GCU browser automation), expect it \
-to take 2-5 minutes. During this time:
-- Progress will show 0% — this is NORMAL. The subagent only calls set_output at the end.
-- Check get_graph_status(focus="full") for "subagent_activity" — this shows the \
-subagent's latest reasoning text and confirms it is making real progress.
-- Do NOT conclude the subagent is stuck just because progress is 0% or because \
-you see repeated browser_click/browser_snapshot calls — that is the expected \
-pattern for web scraping.
-- Only intervene if: the subagent has been running for 5+ minutes with no new \
-subagent_activity updates, OR the judge escalates.
+Browser nodes may take 2-5 minutes for web scraping tasks. During this time:
+- Progress will show 0% until the node calls set_output at the end.
+- Check get_graph_status(focus="full") for activity updates.
+- Do NOT conclude it is stuck just because you see repeated \
+browser_click/browser_snapshot calls — that is expected for web scraping.
+- Only intervene if: the node has been running for 5+ minutes with no new \
+activity updates, OR the judge escalates.
 
 ## Handling worker termination ([WORKER_TERMINAL])
 
@@ -1143,11 +1131,11 @@ escalations. If the user gave you instructions (e.g., "just retry on errors", \
 
 CRITICAL — escalation relay protocol:
 When an escalation requires user input (auth blocks, human review), the worker \
-or its subagent is BLOCKED and waiting for your response. You MUST follow this \
+or is BLOCKED and waiting for your response. You MUST follow this \
 exact two-step sequence:
   Step 1: call ask_user() to get the user's answer.
   Step 2: call inject_message() with the user's answer IMMEDIATELY after.
-If you skip Step 2, the worker/subagent stays blocked FOREVER and the task hangs. \
+If you skip Step 2, the worker stays blocked FOREVER and the task hangs. \
 NEVER respond to the user without also calling inject_message() to unblock \
 the worker. Even if the user says "skip" or "cancel", you must still relay that \
 decision via inject_message() so the worker can clean up.
@@ -1233,7 +1221,7 @@ _queen_tools_docs = (
     + "\n\n### Phase transitions\n"
     "- save_agent_draft(...) → creates visual-only draft graph (stays in PLANNING)\n"
     "- confirm_and_build() → records user approval of draft (stays in PLANNING)\n"
-    "- initialize_and_build_agent(agent_name?, nodes?) → scaffolds package + switches to "
+    "- confirm_and_build(agent_name) → scaffolds package + switches to "
     "BUILDING (requires draft + confirmation for new agents)\n"
     "- replan_agent() → switches back to PLANNING phase (only when user explicitly requests)\n"
     "- load_built_agent(path) → switches to STAGING phase\n"
diff --git a/core/framework/agents/queen/queen_memory_v2.py b/core/framework/agents/queen/queen_memory_v2.py
index bf41a5a6..d2e61b6f 100644
--- a/core/framework/agents/queen/queen_memory_v2.py
+++ b/core/framework/agents/queen/queen_memory_v2.py
@@ -1,9 +1,15 @@
 """Queen global memory helpers.
 
-Global memory lives in ``~/.hive/queen/global_memory/`` and stores durable
-cross-session knowledge about the user (profile, preferences, environment,
-feedback).  Each memory is an individual ``.md`` file with optional YAML
-frontmatter (name, type, description).
+Memory hierarchy::
+
+    ~/.hive/memories/
+        global/              # shared across all queens and colonies
+        colonies/{name}/     # colony-scoped memories
+        agents/queens/{name}/ # queen-specific memories
+        agents/{name}/       # per-worker-agent memories
+
+Each memory is an individual ``.md`` file with optional YAML frontmatter
+(name, type, description).
 """
 
 from __future__ import annotations
@@ -21,7 +27,7 @@ logger = logging.getLogger(__name__)
 
 GLOBAL_MEMORY_CATEGORIES: tuple[str, ...] = ("profile", "preference", "environment", "feedback")
 
-_HIVE_QUEEN_DIR = Path.home() / ".hive" / "queen"
+from framework.config import MEMORIES_DIR
 
 MAX_FILES: int = 200
 MAX_FILE_SIZE_BYTES: int = 4096  # 4 KB hard limit per memory file
@@ -31,8 +37,23 @@ _HEADER_LINE_LIMIT: int = 30
 
 
 def global_memory_dir() -> Path:
-    """Return the queen-global memory directory."""
-    return _HIVE_QUEEN_DIR / "global_memory"
+    """Return the global memory directory (shared across all queens/colonies)."""
+    return MEMORIES_DIR / "global"
+
+
+def colony_memory_dir(colony_name: str) -> Path:
+    """Return the memory directory for a named colony."""
+    return MEMORIES_DIR / "colonies" / colony_name
+
+
+def queen_memory_dir(queen_name: str = "default") -> Path:
+    """Return the memory directory for a named queen."""
+    return MEMORIES_DIR / "agents" / "queens" / queen_name
+
+
+def agent_memory_dir(agent_name: str) -> Path:
+    """Return the memory directory for a worker agent."""
+    return MEMORIES_DIR / "agents" / agent_name
 
 
 # ---------------------------------------------------------------------------
diff --git a/core/framework/agents/queen/recall_selector.py b/core/framework/agents/queen/recall_selector.py
index ad1676eb..f2ec6d21 100644
--- a/core/framework/agents/queen/recall_selector.py
+++ b/core/framework/agents/queen/recall_selector.py
@@ -91,7 +91,19 @@ async def select_memories(
                 resp.stop_reason,
             )
             return []
-        data = json.loads(raw)
+        # Some models wrap JSON in markdown fences or add preamble text.
+        # Try to extract the JSON object if raw parse fails.
+        try:
+            data = json.loads(raw)
+        except json.JSONDecodeError:
+            import re
+
+            m = re.search(r"\{.*\}", raw, re.DOTALL)
+            if m:
+                data = json.loads(m.group())
+            else:
+                logger.warning("recall: LLM returned non-JSON: %.200s", raw)
+                return []
         selected = data.get("selected_memories", [])
         valid_names = {f.filename for f in files}
         result = [s for s in selected if s in valid_names][:max_results]
diff --git a/core/framework/agents/queen/reference/anti_patterns.md b/core/framework/agents/queen/reference/anti_patterns.md
index 4e6bf085..1fa10218 100644
--- a/core/framework/agents/queen/reference/anti_patterns.md
+++ b/core/framework/agents/queen/reference/anti_patterns.md
@@ -25,10 +25,7 @@
 14. **Forgetting sys.path setup in conftest.py** — Tests need `exports/` and `core/` on sys.path.
 
 ## GCU Errors
-15. **Manually wiring browser tools on event_loop nodes** — Use `node_type="gcu"` which auto-includes browser tools. Do NOT manually list browser tool names.
-16. **Using GCU nodes as regular graph nodes** — GCU nodes are subagents only. They must ONLY appear in `sub_agents=["gcu-node-id"]` and be invoked via `delegate_to_sub_agent()`. Never connect via edges or use as entry/terminal nodes.
-17. **Reusing the same GCU node ID for parallel tasks** — Each concurrent browser task needs a distinct GCU node ID (e.g. `gcu-site-a`, `gcu-site-b`). Two `delegate_to_sub_agent` calls with the same `agent_id` share a browser profile and will interfere with each other's pages.
-18. **Passing `profile=` in GCU tool calls** — Profile isolation for parallel subagents is automatic. The framework injects a unique profile per subagent via an asyncio `ContextVar`. Hardcoding `profile="default"` in a GCU system prompt breaks this isolation.
+15. **Manually wiring browser tools on event_loop nodes** — Browser nodes use tools: {policy: "all"} to get all browser tools.
 
 ## Worker Agent Errors
 19. **Adding client-facing intake node to workers** — The queen owns intake. Workers should start with an autonomous processing node. Route worker review/approval through queen escalation instead of direct worker HITL.
diff --git a/core/framework/agents/queen/reference/file_templates_declarative.md b/core/framework/agents/queen/reference/file_templates_declarative.md
new file mode 100644
index 00000000..97afbb81
--- /dev/null
+++ b/core/framework/agents/queen/reference/file_templates_declarative.md
@@ -0,0 +1,227 @@
+# Declarative Agent File Templates
+
+Agents are defined as a single `agent.yaml` file. No Python code needed.
+The runner loads this file directly -- no `agent.py`, `config.py`, or
+`nodes/__init__.py` required.
+
+## agent.yaml -- Complete Agent Definition
+
+```yaml
+name: my-agent
+version: 1.0.0
+description: What this agent does.
+
+metadata:
+  intro_message: Welcome! What would you like me to do?
+
+# Template variables -- substituted into system_prompt and identity_prompt
+# via {{variable_name}} syntax.  Use this for config values that appear
+# in prompts (spreadsheet IDs, API endpoints, account names, etc.)
+variables:
+  spreadsheet_id: "1ZVxWDL..."
+  sheet_name: "contacts"
+
+goal:
+  description: What this agent achieves.
+  success_criteria:
+    - "First success criterion"
+    - "Second success criterion"
+  constraints:
+    - "Hard constraint the agent must respect"
+
+identity_prompt: |
+  You are a helpful agent.
+
+conversation_mode: continuous   # always "continuous" for Hive agents
+
+loop_config:
+  max_iterations: 100
+  max_tool_calls_per_turn: 30
+  max_context_tokens: 32000
+
+# MCP servers to connect (resolved by name from ~/.hive/mcp_registry/)
+mcp_servers:
+  - name: hive-tools
+  - name: gcu-tools
+
+nodes:
+  # Node 1: Process (autonomous entry node)
+  # The queen handles intake and passes structured input via
+  # run_agent_with_input(task). NO client-facing intake node.
+  - id: process
+    name: Process
+    description: Execute the task using available tools
+    max_node_visits: 0   # 0 = unlimited (forever-alive agents)
+    input_keys: [user_request, feedback]
+    output_keys: [results]
+    nullable_output_keys: [feedback]
+    tools:
+      policy: explicit
+      allowed: [web_search, web_scrape, save_data, load_data, list_data_files]
+    success_criteria: Results are complete and accurate.
+    system_prompt: |
+      You are a processing agent. Your task is in memory under "user_request".
+      If "feedback" is present, this is a revision.
+
+      Work in phases:
+      1. Use tools to gather/process data
+      2. Analyze results
+      3. Call set_output in a SEPARATE turn:
+         - set_output("results", "structured results")
+
+  # Node 2: Handoff (autonomous)
+  - id: handoff
+    name: Handoff
+    description: Prepare worker results for queen review
+    max_node_visits: 0
+    input_keys: [results, user_request]
+    output_keys: [next_action, feedback, worker_summary]
+    nullable_output_keys: [feedback, worker_summary]
+    tools:
+      policy: none   # handoff nodes don't need tools
+    success_criteria: Results are packaged for queen decision-making.
+    system_prompt: |
+      Do NOT talk to the user directly. The queen is the only user interface.
+
+      If blocked, call escalate(reason, context) then set:
+      - set_output("next_action", "escalated")
+      - set_output("feedback", "what help is needed")
+
+      Otherwise summarize and set:
+      - set_output("worker_summary", "short summary for queen")
+      - set_output("next_action", "done") or "revise"
+      - set_output("feedback", "what to revise") only when revising
+
+edges:
+  - from_node: process
+    to_node: handoff
+  # Feedback loop
+  - from_node: handoff
+    to_node: process
+    condition: conditional
+    condition_expr: "str(next_action).lower() == 'revise'"
+    priority: 2
+  # Escalation loop
+  - from_node: handoff
+    to_node: process
+    condition: conditional
+    condition_expr: "str(next_action).lower() == 'escalated'"
+    priority: 3
+  # Loop back for next task
+  - from_node: handoff
+    to_node: process
+    condition: conditional
+    condition_expr: "str(next_action).lower() == 'done'"
+
+entry_node: process
+terminal_nodes: []   # [] = forever-alive
+```
+
+## Key differences from Python templates
+
+| Before (Python)                     | After (YAML)                           |
+|-------------------------------------|----------------------------------------|
+| `agent.py` (250 lines boilerplate)  | Not needed                             |
+| `config.py` (dataclass + metadata)  | `variables:` + `metadata:` in YAML     |
+| `nodes/__init__.py` (NodeSpec calls)| `nodes:` list in YAML                  |
+| `__init__.py`, `__main__.py`        | Not needed                             |
+| f-string config injection           | `{{variable_name}}` templates          |
+| `mcp_servers.json` (separate file)  | `mcp_servers:` in YAML (or keep file)  |
+
+## Node types
+
+| Type         | Description                           | Tools                    |
+|--------------|---------------------------------------|--------------------------|
+| `event_loop` | LLM-driven orchestration (default)    | Explicit list or `none`  |
+| `gcu`        | Browser automation via GCU tools      | `policy: all` (auto)     |
+
+## Tool access policies
+
+```yaml
+# Explicit list (recommended for most nodes)
+tools:
+  policy: explicit
+  allowed: [web_search, save_data]
+
+# All tools (for browser automation nodes)
+tools:
+  policy: all
+
+# No tools (for handoff/summary nodes)
+tools:
+  policy: none
+```
+
+## Edge conditions
+
+| Condition     | When to use                                           |
+|---------------|-------------------------------------------------------|
+| `on_success`  | Default. Next node after current succeeds.            |
+| `on_failure`  | Fallback path when current node fails.                |
+| `always`      | Always traverse regardless of outcome.                |
+| `conditional` | Evaluate `condition_expr` against shared memory keys. |
+| `llm_decide`  | Let the LLM decide at runtime.                        |
+
+## Template variables
+
+Use `{{variable_name}}` in `system_prompt` and `identity_prompt`.
+Variables are defined in the top-level `variables:` map.
+
+```yaml
+variables:
+  spreadsheet_id: "1ZVxWDL..."
+  api_endpoint: "https://api.example.com"
+
+nodes:
+  - id: start
+    system_prompt: |
+      Connect to spreadsheet: {{spreadsheet_id}}
+      API endpoint: {{api_endpoint}}
+```
+
+## Entry points
+
+Default is a single manual entry point. For timer/scheduled triggers:
+
+```yaml
+entry_points:
+  - id: default
+    trigger_type: manual
+  - id: daily-check
+    trigger_type: timer
+    trigger_config:
+      interval_minutes: 30
+```
+
+## mcp_servers.json -- Still Supported
+
+The `mcp_servers.json` file is still loaded automatically if present alongside
+`agent.yaml`.  You can also inline servers in the YAML:
+
+```yaml
+mcp_servers:
+  - name: hive-tools
+  - name: gcu-tools
+```
+
+Both approaches work. The JSON file takes precedence for backward compatibility.
+
+## Migration from Python agents
+
+Run the migration tool to convert existing agents:
+
+```bash
+uv run python -m framework.tools.migrate_agent exports/my_agent
+```
+
+This generates `agent.yaml` from the existing `agent.py` + `nodes/` + `config.py`.
+The original files are left untouched. Once verified, you can delete the Python files.
+
+## Files after migration
+
+```
+my_agent/
+  agent.yaml           # The only required file
+  mcp_servers.json     # Optional (can inline in YAML)
+  flowchart.json       # Optional (auto-generated)
+```
diff --git a/core/framework/agents/queen/reference/framework_guide.md b/core/framework/agents/queen/reference/framework_guide.md
index 5f93511a..1a0f7e4f 100644
--- a/core/framework/agents/queen/reference/framework_guide.md
+++ b/core/framework/agents/queen/reference/framework_guide.md
@@ -1,306 +1,193 @@
-# Hive Agent Framework — Condensed Reference
+# Hive Agent Framework -- Condensed Reference
 
 ## Architecture
 
-Agents are Python packages in `exports/`:
+Agents are declarative JSON configs in `exports/`:
 ```
 exports/my_agent/
-├── __init__.py          # MUST re-export ALL module-level vars from agent.py
-├── __main__.py          # CLI (run, tui, info, validate, shell)
-├── agent.py             # Graph construction (goal, edges, agent class)
-├── config.py            # Runtime config
-├── nodes/__init__.py    # Node definitions (NodeSpec)
-├── mcp_servers.json     # MCP tool server config
-└── tests/               # pytest tests
+  agent.json          # The entire agent definition
+  mcp_servers.json    # MCP tool server config (optional, prefer registry refs)
 ```
 
-## Agent Loading Contract
+No Python files. No `__init__.py`, `__main__.py`, `config.py`, or `nodes/`.
 
-`AgentRunner.load()` imports the package (`__init__.py`) and reads these
-module-level variables via `getattr()`:
+## Agent Loading
 
-| Variable | Required | Default if missing | Consequence |
-|----------|----------|--------------------|-------------|
-| `goal` | YES | `None` | **FATAL** — "must define goal, nodes, edges" |
-| `nodes` | YES | `None` | **FATAL** — same error |
-| `edges` | YES | `None` | **FATAL** — same error |
-| `entry_node` | no | `nodes[0].id` | Probably wrong node |
-| `entry_points` | no | `{}` | **Nodes unreachable** — validation fails |
-| `terminal_nodes` | **YES** | `[]` | **FATAL** — graph must have at least one terminal node |
-| `pause_nodes` | no | `[]` | OK |
-| `conversation_mode` | no | not passed | Isolated mode (no context carryover) |
-| `identity_prompt` | no | not passed | No agent-level identity |
-| `loop_config` | no | `{}` | No iteration limits |
-| `triggers.json` (file) | no | not present | No triggers (timers, webhooks) |
+`AgentLoader.load()` reads `agent.json` and builds the execution graph.
+If `agent.py` exists (legacy), it's loaded as a Python module instead.
 
-**CRITICAL:** `__init__.py` MUST import and re-export ALL of these from
-`agent.py`. Missing exports silently fall back to defaults, causing
-hard-to-debug failures.
+## agent.json Schema
 
-**Why `default_agent.validate()` is NOT sufficient:**
-`validate()` checks the agent CLASS's internal graph (self.nodes, self.edges).
-These are always correct because the constructor references agent.py's module
-vars directly. But `AgentRunner.load()` reads from the PACKAGE (`__init__.py`),
-not the class. So `validate()` passes while `AgentRunner.load()` fails.
-Always test with `AgentRunner.load("exports/{name}")` — this is the same
-code path the TUI and `hive run` use.
-
-## Goal
-
-Defines success criteria and constraints:
-```python
-goal = Goal(
-    id="kebab-case-id",
-    name="Display Name",
-    description="What the agent does",
-    success_criteria=[
-        SuccessCriterion(id="sc-id", description="...", metric="...", target="...", weight=0.25),
-    ],
-    constraints=[
-        Constraint(id="c-id", description="...", constraint_type="hard", category="quality"),
-    ],
-)
+```json
+{
+  "name": "my-agent",
+  "version": "1.0.0",
+  "description": "What this agent does",
+  "goal": {
+    "description": "What to achieve",
+    "success_criteria": ["criterion 1", "criterion 2"],
+    "constraints": ["constraint 1"]
+  },
+  "identity_prompt": "You are a helpful agent.",
+  "conversation_mode": "continuous",
+  "loop_config": {
+    "max_iterations": 100,
+    "max_tool_calls_per_turn": 30,
+    "max_context_tokens": 32000
+  },
+  "mcp_servers": [
+    {"name": "hive-tools"},
+    {"name": "gcu-tools"}
+  ],
+  "variables": {
+    "spreadsheet_id": "1ZVx..."
+  },
+  "nodes": [...],
+  "edges": [...],
+  "entry_node": "process",
+  "terminal_nodes": []
+}
 ```
-- 3-5 success criteria, weights sum to 1.0
-- 1-5 constraints (hard/soft, categories: quality, accuracy, interaction, functional)
 
-## NodeSpec Fields
+## Template Variables
+
+Use `{{variable_name}}` in `system_prompt` and `identity_prompt`. Variables
+are defined in the top-level `variables` object:
+
+```json
+{
+  "variables": {"sheet_id": "1ZVx..."},
+  "nodes": [{
+    "id": "start",
+    "system_prompt": "Use sheet: {{sheet_id}}"
+  }]
+}
+```
+
+## Node Fields
 
 | Field | Type | Default | Description |
 |-------|------|---------|-------------|
 | id | str | required | kebab-case identifier |
-| name | str | required | Display name |
+| name | str | id | Display name |
 | description | str | required | What the node does |
-| node_type | str | required | `"event_loop"` or `"gcu"` (browser automation — see GCU Guide appendix) |
-| input_keys | list[str] | required | Memory keys this node reads |
-| output_keys | list[str] | required | Memory keys this node writes via set_output |
+| node_type | str | "event_loop" | `"event_loop"` |
+| input_keys | list | [] | Memory keys this node reads |
+| output_keys | list | [] | Memory keys this node writes via set_output |
 | system_prompt | str | "" | LLM instructions |
-| tools | list[str] | [] | Tool names from MCP servers |
-| client_facing | bool | False | Deprecated compatibility field. Queen interactivity is implicit; workers should escalate instead |
-| nullable_output_keys | list[str] | [] | Keys that may remain unset |
-| max_node_visits | int | 0 | 0=unlimited (default); >1 for one-shot feedback loops |
-| max_retries | int | 3 | Retries on failure |
+| tools | object | {} | Tool access policy (see below) |
+| nullable_output_keys | list | [] | Keys that may remain unset |
+| max_node_visits | int | 1 | 0=unlimited (for forever-alive agents) |
 | success_criteria | str | "" | Natural language for judge evaluation |
+| client_facing | bool | false | Whether output is shown to user |
 
-## EdgeSpec Fields
+## Tool Access Policies
+
+Each node declares its tools via a policy object:
+
+```json
+{"tools": {"policy": "explicit", "allowed": ["web_search", "save_data"]}}
+{"tools": {"policy": "all"}}
+{"tools": {"policy": "none"}}
+```
+
+- `explicit` (default): only named tools. Empty `allowed` = zero tools.
+- `all`: all tools from registry (e.g. for browser automation nodes).
+- `none`: no tools (for handoff/summary nodes).
+
+## Edge Fields
 
 | Field | Type | Description |
 |-------|------|-------------|
-| id | str | kebab-case identifier |
-| source | str | Source node ID |
-| target | str | Target node ID |
-| condition | EdgeCondition | ON_SUCCESS, ON_FAILURE, ALWAYS, CONDITIONAL |
-| condition_expr | str | Python expression evaluated against memory (for CONDITIONAL) |
-| priority | int | Positive=forward (evaluated first), negative=feedback (loop-back) |
+| from_node | str | Source node ID |
+| to_node | str | Target node ID |
+| condition | str | `on_success`, `on_failure`, `always`, `conditional` |
+| condition_expr | str | Python expression for conditional routing |
+| priority | int | Higher = evaluated first |
+
+condition_expr examples:
+- `"needs_more_research == True"`
+- `"str(next_action).lower() == 'revise'"`
 
 ## Key Patterns
 
-### STEP 1/STEP 2 (Client-Facing Nodes)
-```
-**STEP 1 — Respond to the user (text only, NO tool calls):**
-[Present information, ask questions]
-
-**STEP 2 — After the user responds, call set_output:**
-- set_output("key", "value based on user response")
-```
-This prevents premature set_output before user interaction.
-
 ### Fewer, Richer Nodes (CRITICAL)
 
-**Hard limit: 3-6 nodes for most agents.** Never exceed 6 unless the user
-explicitly requests a complex multi-phase pipeline.
+**Hard limit: 3-6 nodes for most agents.** Each node boundary serializes
+outputs and destroys in-context information. Merge unless:
+1. Client-facing boundary (different interaction models)
+2. Disjoint tool sets
+3. Parallel execution (fan-out branches)
 
-Each node boundary serializes outputs to the shared buffer and **destroys** all
-in-context information: tool call results, intermediate reasoning, conversation
-history. A research node that searches, fetches, and analyzes in ONE node keeps
-all source material in its conversation context. Split across 3 nodes, each
-downstream node only sees the serialized summary string.
-
-**Decision framework — merge unless ANY of these apply:**
-1. **Client-facing boundary** — Autonomous and client-facing work MUST be
-   separate nodes (different interaction models)
-2. **Disjoint tool sets** — If tools are fundamentally different (e.g., web
-   search vs database), separate nodes make sense
-3. **Parallel execution** — Fan-out branches must be separate nodes
-
-**Red flags that you have too many nodes:**
-- A node with 0 tools (pure LLM reasoning) → merge into predecessor/successor
-- A node that sets only 1 trivial output → collapse into predecessor
-- Multiple consecutive autonomous nodes → combine into one rich node
-- A "report" node that presents analysis → merge into the client-facing node
-- A "confirm" or "schedule" node that doesn't call any external service → remove
-
-**Typical agent structure (2 nodes):**
+**Typical structure (2 nodes):**
 ```
-process (autonomous) ←→ review (queen-mediated)
-```
-The queen owns intake — she gathers requirements from the user, then
-passes structured input via `run_agent_with_input(task)`. When building
-the agent, design the entry node's `input_keys` to match what the queen
-will provide at run time. Worker agents should NOT have a client-facing
-intake node. Mid-execution review/approval should happen through queen
-escalation rather than direct worker HITL.
-
-For simpler agents, just 1 autonomous node:
-```
-process (autonomous) — loops back to itself
+process (autonomous) <-> review (queen-mediated)
 ```
 
-### nullable_output_keys
-For inputs that only arrive on certain edges:
-```python
-research_node = NodeSpec(
-    input_keys=["brief", "feedback"],
-    nullable_output_keys=["feedback"],  # Only present on feedback edge
-    max_node_visits=3,
-)
-```
-
-### Mutually Exclusive Outputs
-For routing decisions:
-```python
-review_node = NodeSpec(
-    output_keys=["approved", "feedback"],
-    nullable_output_keys=["approved", "feedback"],  # Node sets one or the other
-)
-```
-
-### Continuous Loop Pattern
-Mark the primary event_loop node as terminal: `terminal_nodes=["process"]`.
-The node has `output_keys` and can complete when the agent finishes its work.
-Use `conversation_mode="continuous"` to preserve context across transitions.
+The queen owns intake. Worker agents should NOT have a client-facing intake
+node. Mid-execution review should happen through queen escalation.
 
 ### set_output
 - Synthetic tool injected by framework
 - Call separately from real tool calls (separate turn)
 - `set_output("key", "value")` stores to the shared buffer
 
-## Edge Conditions
-
-| Condition | When |
-|-----------|------|
-| ON_SUCCESS | Node completed successfully |
-| ON_FAILURE | Node failed |
-| ALWAYS | Unconditional |
-| CONDITIONAL | condition_expr evaluates to True against memory |
-
-condition_expr examples:
-- `"needs_more_research == True"`
-- `"str(next_action).lower() == 'new_agent'"`
-- `"feedback is not None"`
-
-## Graph Lifecycle
+### Graph Lifecycle
 
 | Pattern | terminal_nodes | When |
 |---------|---------------|------|
-| **Continuous loop** | `["node-with-output-keys"]` | **DEFAULT for all agents** |
+| Continuous loop | `["node-with-output-keys"]` | DEFAULT for all agents |
 | Linear | `["last-node"]` | One-shot/batch agents |
 
-**Every graph must have at least one terminal node.** Terminal nodes
-define where execution ends. For interactive agents that loop continuously,
-mark the primary event_loop node as terminal (it has `output_keys` and can
-complete at any point). The framework default for `max_node_visits` is 0
-(unbounded), so nodes work correctly in continuous loops without explicit
-override. Only set `max_node_visits > 0` in one-shot agents with feedback loops.
-Every node must have at least one outgoing edge — no dead ends.
+Every graph must have at least one terminal node.
 
-## Continuous Conversation Mode
+### Continuous Conversation Mode
 
 `conversation_mode` has ONLY two valid states:
-- `"continuous"` — recommended for interactive agents
-- Omit entirely — isolated per-node conversations (each node starts fresh)
+- `"continuous"` -- recommended (context carries across node transitions)
+- Omit entirely -- isolated per-node conversations
 
-**INVALID values** (do NOT use): `"client_facing"`, `"interactive"`,
-`"adaptive"`, `"shared"`. These do not exist in the framework.
-
-When `conversation_mode="continuous"`:
-- Same conversation thread carries across node transitions
-- Layered system prompts: identity (agent-level) + narrative + focus (per-node)
-- Transition markers inserted at boundaries
-- Compaction happens opportunistically at phase transitions
+**INVALID values:** `"client_facing"`, `"interactive"`, `"shared"`.
 
 ## loop_config
 
 Only three valid keys:
-```python
-loop_config = {
-    "max_iterations": 100,          # Max LLM turns per node visit
-    "max_tool_calls_per_turn": 20,  # Max tool calls per LLM response
-    "max_context_tokens": 32000,    # Triggers conversation compaction
+```json
+{
+  "max_iterations": 100,
+  "max_tool_calls_per_turn": 20,
+  "max_context_tokens": 32000
 }
 ```
-**INVALID keys** (do NOT use): `"strategy"`, `"mode"`, `"timeout"`,
-`"temperature"`. These are silently ignored or cause errors.
 
 ## Data Tools (Spillover)
 
 For large data that exceeds context:
-- `save_data(filename, data)` — Write to session data dir
-- `load_data(filename, offset, limit)` — Read with pagination
-- `list_data_files()` — List files
-- `serve_file_to_user(filename, label)` — Clickable file:// URI
+- `save_data(filename, data)` -- write to session data dir
+- `load_data(filename, offset, limit)` -- read with pagination
+- `list_data_files()` -- list files
+- `serve_file_to_user(filename, label)` -- clickable file URI
 
-`data_dir` is auto-injected by framework — LLM never sees it.
+`data_dir` is auto-injected by framework.
 
 ## Fan-Out / Fan-In
 
-Multiple ON_SUCCESS edges from same source → parallel execution via asyncio.gather().
-- Parallel nodes must have disjoint output_keys
-- Only one branch may have client_facing nodes
-- Fan-in node gets all outputs in the shared buffer
+Multiple `on_success` edges from same source = parallel execution.
+Parallel nodes must have disjoint output_keys.
 
 ## Judge System
 
 - **Implicit** (default): ACCEPTs when LLM finishes with no tool calls and all required outputs set
 - **SchemaJudge**: Validates against Pydantic model
-- **Custom**: Implement `evaluate(context) -> JudgeVerdict`
-
-Judge is the SOLE acceptance mechanism — no ad-hoc framework gating.
-
-## Triggers (Timers, Webhooks)
-
-For agents that react to external events, create a `triggers.json` file
-in the agent's export directory:
-
-```json
-[
-  {
-    "id": "daily-check",
-    "name": "Daily Check",
-    "trigger_type": "timer",
-    "trigger_config": {"cron": "0 9 * * *"},
-    "task": "Run the daily check process"
-  }
-]
-```
-
-### Key Fields
-- `trigger_type`: `"timer"` or `"webhook"`
-- `trigger_config`: `{"cron": "0 9 * * *"}` or `{"interval_minutes": 20}`
-- `task`: describes what the worker should do when the trigger fires
-- Triggers can also be created/removed at runtime via `set_trigger` / `remove_trigger` queen tools
 
 ## Tool Discovery
 
-Do NOT rely on a static tool list — it will be outdated. Always call
-`list_agent_tools()` with NO arguments first to see ALL available tools.
-Only use `group=` or `output_schema=` as follow-up calls after seeing the
-full list.
+Always call `list_agent_tools()` first to see available tools.
+Do NOT rely on a static tool list.
 
 ```
-list_agent_tools()                            # ALWAYS call this first
-list_agent_tools(group="gmail", output_schema="full")  # then drill into a category
-list_agent_tools("exports/my_agent/mcp_servers.json")  # specific agent's tools
+list_agent_tools()                                      # full summary
+list_agent_tools(group="gmail", output_schema="full")   # drill into category
 ```
 
-After building, run `validate_agent_package("{name}")` to check everything at once.
-
-Common tool categories (verify via list_agent_tools):
-- **Web**: search, scrape, PDF
-- **Data**: save/load/append/list data files, serve to user
-- **File**: view, write, replace, diff, list, grep
-- **Communication**: email, gmail, slack, telegram
-- **CRM**: hubspot, apollo, calcom
-- **GitHub**: stargazers, user profiles, repos
-- **Vision**: image analysis
-- **Time**: current time
+After building, run `validate_agent_package("{name}")` to check everything.
diff --git a/core/framework/agents/queen/reference/gcu_guide.md b/core/framework/agents/queen/reference/gcu_guide.md
index c27db24d..cf254637 100644
--- a/core/framework/agents/queen/reference/gcu_guide.md
+++ b/core/framework/agents/queen/reference/gcu_guide.md
@@ -1,158 +1,53 @@
-# GCU Browser Automation Guide
+# Browser Automation Guide
 
-## When to Use GCU Nodes
+## When to Use Browser Nodes
 
-Use `node_type="gcu"` when:
-- The user's workflow requires **navigating real websites** (scraping, form-filling, social media interaction, testing web UIs)
-- The task involves **dynamic/JS-rendered pages** that `web_scrape` cannot handle (SPAs, infinite scroll, login-gated content)
-- The agent needs to **interact with a website** — clicking, typing, scrolling, selecting, uploading files
+Use browser nodes (with `tools: {policy: "all"}`) when:
+- The task requires interacting with web pages (clicking, typing, navigating)
+- No API is available for the target service
+- The user is already logged in to the target site
 
-Do NOT use GCU for:
-- Static content that `web_scrape` handles fine
-- API-accessible data (use the API directly)
-- PDF/file processing
-- Anything that doesn't require a browser UI
+## What Browser Nodes Are
 
-## What GCU Nodes Are
+- Regular `event_loop` nodes with browser tools from gcu-tools MCP server
+- Set `tools: {policy: "all"}` to give access to all browser tools
+- Wire into the graph with edges like any other node
+- No special node_type needed
 
-- `node_type="gcu"` — a declarative enhancement over `event_loop`
-- Framework auto-prepends browser best-practices system prompt
-- Framework auto-includes all 31 browser tools from `gcu-tools` MCP server
-- Same underlying `EventLoopNode` class — no new imports needed
-- `tools=[]` is correct — tools are auto-populated at runtime
+## Available Browser Tools
 
-## GCU Architecture Pattern  
+All tools are prefixed with `browser_`:
+- `browser_start`, `browser_open` -- launch/navigate
+- `browser_click`, `browser_fill`, `browser_type` -- interact
+- `browser_snapshot` -- read page content (preferred over screenshot)
+- `browser_screenshot` -- visual capture
+- `browser_scroll`, `browser_wait` -- navigation helpers
+- `browser_evaluate` -- run JavaScript
 
-GCU nodes are **subagents** — invoked via `delegate_to_sub_agent()`, not connected via edges.
+## System Prompt Tips for Browser Nodes
 
-- Primary nodes (`event_loop`, client-facing) orchestrate; GCU nodes do browser work
-- Parent node declares `sub_agents=["gcu-node-id"]` and calls `delegate_to_sub_agent(agent_id="gcu-node-id", task="...")`
-- GCU nodes set `max_node_visits=1` (single execution per delegation), `client_facing=False`
-- GCU nodes use `output_keys=["result"]` and return structured JSON via `set_output("result", ...)`
-
-## GCU Node Definition Template
-
-```python
-gcu_browser_node = NodeSpec(
-    id="gcu-browser-worker",
-    name="Browser Worker",
-    description="Browser subagent that does X.",
-    node_type="gcu",
-    client_facing=False,
-    max_node_visits=1,
-    input_keys=[],
-    output_keys=["result"],
-    tools=[],  # Auto-populated with all browser tools
-    system_prompt="""\
-You are a browser agent. Your job: [specific task].
-
-## Workflow
-1. browser_start (only if no browser is running yet)
-2. browser_open(url=TARGET_URL) — note the returned targetId
-3. browser_snapshot to read the page
-4. [task-specific steps]
-5. set_output("result", JSON)
-
-## Output format
-set_output("result", JSON) with:
-- [field]: [type and description]
-""",
-)
+```
+1. Use browser_snapshot() to read page content (NOT browser_get_text)
+2. Use browser_wait(seconds=2-3) after navigation for page load
+3. If you hit an auth wall, call set_output with an error and move on
+4. Keep tool calls per turn <= 10 for reliability
 ```
 
-## Parent Node Template (orchestrating GCU subagents)
-
-```python
-orchestrator_node = NodeSpec(
-    id="orchestrator",
-    ...
-    node_type="event_loop",
-    sub_agents=["gcu-browser-worker"],
-    system_prompt="""\
-...
-delegate_to_sub_agent(
-    agent_id="gcu-browser-worker",
-    task="Navigate to [URL]. Do [specific task]. Return JSON with [fields]."
-)
-...
-""",
-    tools=[],  # Orchestrator doesn't need browser tools
-)
-```
-
-## mcp_servers.json with GCU
+## Example
 
 ```json
 {
-  "hive-tools": { ... },
-  "gcu-tools": {
-    "transport": "stdio",
-    "command": "uv",
-    "args": ["run", "python", "-m", "gcu.server", "--stdio"],
-    "cwd": "../../tools",
-    "description": "GCU tools for browser automation"
-  }
+  "id": "scan-profiles",
+  "name": "Scan LinkedIn Profiles",
+  "description": "Navigate LinkedIn search results and collect profile data",
+  "tools": {"policy": "all"},
+  "input_keys": ["search_url"],
+  "output_keys": ["profiles"],
+  "system_prompt": "Navigate to the search URL, paginate through results..."
 }
 ```
 
-Note: `gcu-tools` is auto-added if any node uses `node_type="gcu"`, but including it explicitly is fine.
-
-## GCU System Prompt Best Practices
-
-Key rules to bake into GCU node prompts:
-
-- Prefer `browser_snapshot` over `browser_get_text("body")` — compact accessibility tree vs 100KB+ raw HTML
-- Always `browser_wait` after navigation
-- Use large scroll amounts (~2000-5000) for lazy-loaded content
-- For spillover files, use `run_command` with grep, not `read_file`
-- If auth wall detected, report immediately — don't attempt login
-- Keep tool calls per turn ≤10
-- Tab isolation: when browser is already running, use `browser_open(background=true)` and pass `target_id` to every call
-
-## Multiple Concurrent GCU Subagents
-
-When a task can be parallelized across multiple sites or profiles, declare a distinct GCU
-node for each and invoke them all in the same LLM turn.  The framework batches all
-`delegate_to_sub_agent` calls made in one turn and runs them with `asyncio.gather`, so
-they execute concurrently — not sequentially.
-
-**Each GCU subagent automatically gets its own isolated browser context** — no `profile=`
-argument is needed in tool calls.  The framework derives a unique profile from the subagent's
-node ID and instance counter and injects it via an asyncio `ContextVar` before the subagent
-runs.
-
-### Example: three sites in parallel
-
-```python
-# Three distinct GCU nodes
-gcu_site_a = NodeSpec(id="gcu-site-a", node_type="gcu", ...)
-gcu_site_b = NodeSpec(id="gcu-site-b", node_type="gcu", ...)
-gcu_site_c = NodeSpec(id="gcu-site-c", node_type="gcu", ...)
-
-orchestrator = NodeSpec(
-    id="orchestrator",
-    node_type="event_loop",
-    sub_agents=["gcu-site-a", "gcu-site-b", "gcu-site-c"],
-    system_prompt="""\
-Call all three subagents in a single response to run them in parallel:
-  delegate_to_sub_agent(agent_id="gcu-site-a", task="Scrape prices from site A")
-  delegate_to_sub_agent(agent_id="gcu-site-b", task="Scrape prices from site B")
-  delegate_to_sub_agent(agent_id="gcu-site-c", task="Scrape prices from site C")
-""",
-)
+Connected via regular edges:
+```
+search-setup -> scan-profiles -> process-results
 ```
-
-**Rules:**
-- Use distinct node IDs for each concurrent task — sharing an ID shares the browser context.
-- The GCU node prompts do not need to mention `profile=`; isolation is automatic.
-- Cleanup is automatic at session end, but GCU nodes can call `browser_stop()` explicitly
-  if they want to release resources mid-run.
-
-## GCU Anti-Patterns
-
-- Using `browser_screenshot` to read text (use `browser_snapshot` instead; screenshots are for visual context only)
-- Re-navigating after scrolling (resets scroll position)
-- Attempting login on auth walls
-- Forgetting `target_id` in multi-tab scenarios
-- Putting browser tools directly on `event_loop` nodes instead of using GCU subagent pattern
-- Making GCU nodes `client_facing=True` (they should be autonomous subagents)
diff --git a/core/framework/agents/queen/reflection_agent.py b/core/framework/agents/queen/reflection_agent.py
index 4bb20d64..d2033717 100644
--- a/core/framework/agents/queen/reflection_agent.py
+++ b/core/framework/agents/queen/reflection_agent.py
@@ -2,7 +2,7 @@
 
 A lightweight side agent that runs after each queen LLM turn.  It inspects
 recent conversation messages and extracts durable user knowledge into
-individual memory files in ``~/.hive/queen/global_memory/``.
+individual memory files in ``~/.hive/memories/global/``.
 
 Two reflection types:
   - **Short reflection**: after conversational queen turns.  Distills
@@ -493,7 +493,7 @@ async def subscribe_reflection_triggers(
     Call this once during queen setup.  Returns a list of event-bus
     subscription IDs for cleanup during session teardown.
     """
-    from framework.runtime.event_bus import EventType
+    from framework.host.event_bus import EventType
 
     mem_dir = memory_dir or global_memory_dir()
     _lock = asyncio.Lock()
diff --git a/core/framework/agents/queen/tests/conftest.py b/core/framework/agents/queen/tests/conftest.py
index de518df2..d34d2bf2 100644
--- a/core/framework/agents/queen/tests/conftest.py
+++ b/core/framework/agents/queen/tests/conftest.py
@@ -22,10 +22,10 @@ def mock_mode():
 
 @pytest_asyncio.fixture(scope="session")
 async def runner(tmp_path_factory, mock_mode):
-    from framework.runner.runner import AgentRunner
+    from framework.loader.agent_loader import AgentLoader
 
     storage = tmp_path_factory.mktemp("agent_storage")
-    r = AgentRunner.load(AGENT_PATH, mock_mode=mock_mode, storage_path=storage)
+    r = AgentLoader.load(AGENT_PATH, mock_mode=mock_mode, storage_path=storage)
     r._setup()
     yield r
     await r.cleanup_async()
diff --git a/core/framework/cli.py b/core/framework/cli.py
index e7752922..86c51cd1 100644
--- a/core/framework/cli.py
+++ b/core/framework/cli.py
@@ -79,7 +79,7 @@ def main():
     subparsers = parser.add_subparsers(dest="command", required=True)
 
     # Register runner commands (run, info, validate, list, shell)
-    from framework.runner.cli import register_commands
+    from framework.loader.cli import register_commands
 
     register_commands(subparsers)
 
@@ -99,7 +99,7 @@ def main():
     register_debugger_commands(subparsers)
 
     # Register MCP registry commands (mcp install, mcp add, ...)
-    from framework.runner.mcp_registry_cli import register_mcp_commands
+    from framework.loader.mcp_registry_cli import register_mcp_commands
 
     register_mcp_commands(subparsers)
 
diff --git a/core/framework/config.py b/core/framework/config.py
index 095f49fc..341f0ed4 100644
--- a/core/framework/config.py
+++ b/core/framework/config.py
@@ -12,13 +12,47 @@ from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any
 
-from framework.graph.edge import DEFAULT_MAX_TOKENS
+from framework.orchestrator.edge import DEFAULT_MAX_TOKENS
+
+# ---------------------------------------------------------------------------
+# Hive home directory structure
+# ---------------------------------------------------------------------------
+
+HIVE_HOME = Path.home() / ".hive"
+QUEENS_DIR = HIVE_HOME / "agents" / "queens"
+COLONIES_DIR = HIVE_HOME / "colonies"
+MEMORIES_DIR = HIVE_HOME / "memories"
+
+
+def queen_dir(queen_name: str = "default") -> Path:
+    """Return the storage directory for a named queen agent."""
+    return QUEENS_DIR / queen_name
+
+
+def colony_dir(colony_name: str) -> Path:
+    """Return the directory for a named colony."""
+    return COLONIES_DIR / colony_name
+
+
+def memory_dir(scope: str, name: str | None = None) -> Path:
+    """Return memory dir for a scope.
+
+    Examples::
+
+        memory_dir("global")                  -> ~/.hive/memories/global
+        memory_dir("colonies", "my_agent")    -> ~/.hive/memories/colonies/my_agent
+        memory_dir("agents/queens", "default")-> ~/.hive/memories/agents/queens/default
+        memory_dir("agents", "worker_name")   -> ~/.hive/memories/agents/worker_name
+    """
+    base = MEMORIES_DIR / scope
+    return base / name if name else base
+
 
 # ---------------------------------------------------------------------------
 # Low-level config file access
 # ---------------------------------------------------------------------------
 
-HIVE_CONFIG_FILE = Path.home() / ".hive" / "configuration.json"
+HIVE_CONFIG_FILE = HIVE_HOME / "configuration.json"
 
 # Hive LLM router endpoint (Anthropic-compatible).
 # litellm's Anthropic handler appends /v1/messages, so this is just the base host.
@@ -130,7 +164,7 @@ def get_worker_api_key() -> str | None:
     # Worker-specific subscription / env var
     if worker_llm.get("use_claude_code_subscription"):
         try:
-            from framework.runner.runner import get_claude_code_token
+            from framework.loader.agent_loader import get_claude_code_token
 
             token = get_claude_code_token()
             if token:
@@ -140,7 +174,7 @@ def get_worker_api_key() -> str | None:
 
     if worker_llm.get("use_codex_subscription"):
         try:
-            from framework.runner.runner import get_codex_token
+            from framework.loader.agent_loader import get_codex_token
 
             token = get_codex_token()
             if token:
@@ -150,7 +184,7 @@ def get_worker_api_key() -> str | None:
 
     if worker_llm.get("use_kimi_code_subscription"):
         try:
-            from framework.runner.runner import get_kimi_code_token
+            from framework.loader.agent_loader import get_kimi_code_token
 
             token = get_kimi_code_token()
             if token:
@@ -160,7 +194,7 @@ def get_worker_api_key() -> str | None:
 
     if worker_llm.get("use_antigravity_subscription"):
         try:
-            from framework.runner.runner import get_antigravity_token
+            from framework.loader.agent_loader import get_antigravity_token
 
             token = get_antigravity_token()
             if token:
@@ -216,7 +250,7 @@ def get_worker_llm_extra_kwargs() -> dict[str, Any]:
                 "User-Agent": "CodexBar",
             }
             try:
-                from framework.runner.runner import get_codex_account_id
+                from framework.loader.agent_loader import get_codex_account_id
 
                 account_id = get_codex_account_id()
                 if account_id:
@@ -263,22 +297,43 @@ def get_max_context_tokens() -> int:
     return get_hive_config().get("llm", {}).get("max_context_tokens", DEFAULT_MAX_CONTEXT_TOKENS)
 
 
+def get_api_keys() -> list[str] | None:
+    """Return a list of API keys if ``api_keys`` is configured, else ``None``.
+
+    This supports key-pool rotation: configure multiple keys in
+    ``~/.hive/configuration.json`` under ``llm.api_keys`` and the
+    :class:`~framework.llm.key_pool.KeyPool` will rotate through them.
+    """
+    llm = get_hive_config().get("llm", {})
+    keys = llm.get("api_keys")
+    if keys and isinstance(keys, list) and len(keys) > 0:
+        return [k for k in keys if k]  # filter empties
+    return None
+
+
 def get_api_key() -> str | None:
     """Return the API key, supporting env var, Claude Code subscription, Codex, and ZAI Code.
 
     Priority:
+    0. Explicit key pool (``api_keys`` list) -- returns first key for
+       single-key callers; full pool available via :func:`get_api_keys`.
     1. Claude Code subscription (``use_claude_code_subscription: true``)
        reads the OAuth token from ``~/.claude/.credentials.json``.
     2. Codex subscription (``use_codex_subscription: true``)
        reads the OAuth token from macOS Keychain or ``~/.codex/auth.json``.
     3. Environment variable named in ``api_key_env_var``.
     """
+    # If an explicit key pool is configured, use the first key.
+    pool_keys = get_api_keys()
+    if pool_keys:
+        return pool_keys[0]
+
     llm = get_hive_config().get("llm", {})
 
     # Claude Code subscription: read OAuth token directly
     if llm.get("use_claude_code_subscription"):
         try:
-            from framework.runner.runner import get_claude_code_token
+            from framework.loader.agent_loader import get_claude_code_token
 
             token = get_claude_code_token()
             if token:
@@ -289,7 +344,7 @@ def get_api_key() -> str | None:
     # Codex subscription: read OAuth token from Keychain / auth.json
     if llm.get("use_codex_subscription"):
         try:
-            from framework.runner.runner import get_codex_token
+            from framework.loader.agent_loader import get_codex_token
 
             token = get_codex_token()
             if token:
@@ -300,7 +355,7 @@ def get_api_key() -> str | None:
     # Kimi Code subscription: read API key from ~/.kimi/config.toml
     if llm.get("use_kimi_code_subscription"):
         try:
-            from framework.runner.runner import get_kimi_code_token
+            from framework.loader.agent_loader import get_kimi_code_token
 
             token = get_kimi_code_token()
             if token:
@@ -311,7 +366,7 @@ def get_api_key() -> str | None:
     # Antigravity subscription: read OAuth token from accounts JSON
     if llm.get("use_antigravity_subscription"):
         try:
-            from framework.runner.runner import get_antigravity_token
+            from framework.loader.agent_loader import get_antigravity_token
 
             token = get_antigravity_token()
             if token:
@@ -468,7 +523,7 @@ def get_llm_extra_kwargs() -> dict[str, Any]:
                 "User-Agent": "CodexBar",
             }
             try:
-                from framework.runner.runner import get_codex_account_id
+                from framework.loader.agent_loader import get_codex_account_id
 
                 account_id = get_codex_account_id()
                 if account_id:
diff --git a/core/framework/credentials/setup.py b/core/framework/credentials/setup.py
index dfee6bf4..1bb4e90d 100644
--- a/core/framework/credentials/setup.py
+++ b/core/framework/credentials/setup.py
@@ -36,7 +36,7 @@ from pathlib import Path
 from typing import TYPE_CHECKING, Any
 
 if TYPE_CHECKING:
-    from framework.graph import NodeSpec
+    from framework.orchestrator import NodeSpec
 
 logger = logging.getLogger(__name__)
 
@@ -533,7 +533,9 @@ class CredentialSetupSession:
 
 
 def load_agent_nodes(agent_path: str | Path) -> list:
-    """Load NodeSpec list from an agent's agent.py or agent.json.
+    """Load NodeSpec list from an agent directory.
+
+    Checks agent.json (declarative) first, then agent.py (legacy).
 
     Args:
         agent_path: Path to agent directory.
@@ -542,16 +544,28 @@ def load_agent_nodes(agent_path: str | Path) -> list:
         List of NodeSpec objects (empty list if agent can't be loaded).
     """
     agent_path = Path(agent_path)
+    agent_json_file = agent_path / "agent.json"
     agent_py = agent_path / "agent.py"
-    agent_json = agent_path / "agent.json"
 
-    if agent_py.exists():
+    if agent_json_file.exists():
+        return _load_nodes_from_json_declarative(agent_json_file)
+    elif agent_py.exists():
         return _load_nodes_from_python_agent(agent_path)
-    elif agent_json.exists():
-        return _load_nodes_from_json_agent(agent_json)
     return []
 
 
+def _load_nodes_from_json_declarative(agent_json: Path) -> list:
+    """Load nodes from a declarative JSON agent."""
+    try:
+        from framework.loader.agent_loader import load_agent_config
+
+        data = json.loads(agent_json.read_text(encoding="utf-8"))
+        graph, _ = load_agent_config(data)
+        return list(graph.nodes)
+    except Exception:
+        return []
+
+
 def _load_nodes_from_python_agent(agent_path: Path) -> list:
     """Load nodes from a Python-based agent."""
     import importlib.util
@@ -590,7 +604,7 @@ def _load_nodes_from_json_agent(agent_json: Path) -> list:
         with open(agent_json, encoding="utf-8-sig") as f:
             data = json.load(f)
 
-        from framework.graph import NodeSpec
+        from framework.orchestrator import NodeSpec
 
         nodes_data = data.get("graph", {}).get("nodes", [])
         nodes = []
diff --git a/core/framework/graph/__init__.py b/core/framework/graph/__init__.py
deleted file mode 100644
index a6751ddf..00000000
--- a/core/framework/graph/__init__.py
+++ /dev/null
@@ -1,65 +0,0 @@
-"""Graph structures: Goals, Nodes, Edges, and Execution."""
-
-from framework.graph.context import GraphContext
-from framework.graph.context_handoff import ContextHandoff, HandoffContext
-from framework.graph.conversation import ConversationStore, Message, NodeConversation
-from framework.graph.edge import DEFAULT_MAX_TOKENS, EdgeCondition, EdgeSpec, GraphSpec
-from framework.graph.event_loop_node import (
-    EventLoopNode,
-    JudgeProtocol,
-    JudgeVerdict,
-    LoopConfig,
-    OutputAccumulator,
-)
-from framework.graph.executor import GraphExecutor
-from framework.graph.goal import Constraint, Goal, GoalStatus, SuccessCriterion
-from framework.graph.node import NodeContext, NodeProtocol, NodeResult, NodeSpec
-from framework.graph.worker_agent import (
-    Activation,
-    FanOutTag,
-    FanOutTracker,
-    WorkerAgent,
-    WorkerCompletion,
-    WorkerLifecycle,
-)
-
-__all__ = [
-    # Goal
-    "Goal",
-    "SuccessCriterion",
-    "Constraint",
-    "GoalStatus",
-    # Node
-    "NodeSpec",
-    "NodeContext",
-    "NodeResult",
-    "NodeProtocol",
-    # Edge
-    "EdgeSpec",
-    "EdgeCondition",
-    "GraphSpec",
-    "DEFAULT_MAX_TOKENS",
-    # Executor
-    "GraphExecutor",
-    # Conversation
-    "NodeConversation",
-    "ConversationStore",
-    "Message",
-    # Event Loop
-    "EventLoopNode",
-    "LoopConfig",
-    "OutputAccumulator",
-    "JudgeProtocol",
-    "JudgeVerdict",
-    # Context Handoff
-    "ContextHandoff",
-    "HandoffContext",
-    # Worker Agent
-    "WorkerAgent",
-    "WorkerLifecycle",
-    "WorkerCompletion",
-    "Activation",
-    "FanOutTag",
-    "FanOutTracker",
-    "GraphContext",
-]
diff --git a/core/framework/graph/event_loop/__init__.py b/core/framework/graph/event_loop/__init__.py
deleted file mode 100644
index 1ec8a803..00000000
--- a/core/framework/graph/event_loop/__init__.py
+++ /dev/null
@@ -1,6 +0,0 @@
-"""EventLoopNode subpackage — modular components of the event loop orchestrator.
-
-All public symbols are re-exported by the parent ``event_loop_node.py`` for
-backward compatibility.  Internal consumers may import directly from these
-submodules for clarity.
-"""
diff --git a/core/framework/graph/event_loop/subagent_executor.py b/core/framework/graph/event_loop/subagent_executor.py
deleted file mode 100644
index cd5c207c..00000000
--- a/core/framework/graph/event_loop/subagent_executor.py
+++ /dev/null
@@ -1,370 +0,0 @@
-"""Subagent execution for the event loop.
-
-Handles the full subagent lifecycle: validation, context setup, tool filtering,
-conversation store derivation, execution, and cleanup.
-"""
-
-from __future__ import annotations
-
-import json
-import logging
-import time
-from collections.abc import Awaitable, Callable
-from pathlib import Path
-from typing import TYPE_CHECKING, Any
-
-from framework.graph.conversation import ConversationStore
-from framework.graph.event_loop.judge_pipeline import SubagentJudge
-from framework.graph.event_loop.types import LoopConfig, OutputAccumulator
-from framework.graph.node import DataBuffer, NodeContext
-from framework.llm.provider import ToolResult, ToolUse
-from framework.runner.tool_registry import ToolRegistry
-from framework.runtime.event_bus import EventBus
-
-if TYPE_CHECKING:
-    from framework.graph.event_loop_node import EventLoopNode
-
-logger = logging.getLogger(__name__)
-
-
-async def execute_subagent(
-    ctx: NodeContext,
-    agent_id: str,
-    task: str,
-    *,
-    config: LoopConfig,
-    event_loop_node_cls: type[EventLoopNode],
-    escalation_receiver_cls: Callable[[], Any],
-    accumulator: OutputAccumulator | None = None,
-    event_bus: EventBus | None = None,
-    tool_executor: Callable[[ToolUse], ToolResult | Awaitable[ToolResult]] | None = None,
-    conversation_store: ConversationStore | None = None,
-    subagent_instance_counter: dict[str, int] | None = None,
-) -> ToolResult:
-    """Execute a subagent and return the result as a ToolResult.
-
-    The subagent:
-    - Gets a fresh conversation with just the task
-    - Has read-only access to the parent's readable memory
-    - Cannot delegate to its own subagents (prevents recursion)
-    - Returns its output in structured JSON format
-
-    Args:
-        ctx: Parent node's context (for memory, tools, LLM access).
-        agent_id: The node ID of the subagent to invoke.
-        task: The task description to give the subagent.
-        accumulator: Parent's OutputAccumulator.
-        event_bus: EventBus for lifecycle events.
-        config: LoopConfig for iteration/tool limits.
-        tool_executor: Tool executor callable.
-        conversation_store: Parent conversation store (for deriving subagent store).
-        subagent_instance_counter: Mutable counter dict for unique subagent paths.
-
-    Returns:
-        ToolResult with structured JSON output.
-    """
-    # Log subagent invocation start
-    logger.info(
-        "\n" + "=" * 60 + "\n"
-        "🤖 SUBAGENT INVOCATION\n"
-        "=" * 60 + "\n"
-        "Parent Node: %s\n"
-        "Subagent ID: %s\n"
-        "Task: %s\n" + "=" * 60,
-        ctx.node_id,
-        agent_id,
-        task[:500] + "..." if len(task) > 500 else task,
-    )
-
-    # 1. Validate agent exists in registry
-    if agent_id not in ctx.node_registry:
-        return ToolResult(
-            tool_use_id="",
-            content=json.dumps(
-                {
-                    "message": f"Sub-agent '{agent_id}' not found in registry",
-                    "data": None,
-                    "metadata": {"agent_id": agent_id, "success": False, "error": "not_found"},
-                }
-            ),
-            is_error=True,
-        )
-
-    subagent_spec = ctx.node_registry[agent_id]
-
-    # 2. Create read-only memory snapshot
-    parent_data = ctx.buffer.read_all()
-
-    # Merge in-flight outputs from the parent's accumulator.
-    if accumulator:
-        for key, value in accumulator.to_dict().items():
-            if key not in parent_data:
-                parent_data[key] = value
-
-    subagent_buffer = DataBuffer()
-    for key, value in parent_data.items():
-        subagent_buffer.write(key, value, validate=False)
-
-    read_keys = set(parent_data.keys()) | set(subagent_spec.input_keys or [])
-    scoped_buffer = subagent_buffer.with_permissions(
-        read_keys=list(read_keys),
-        write_keys=[],  # Read-only!
-    )
-
-    # 2b. Compute instance counter early so the callback and child context
-    # share the same stable node_id for this subagent invocation.
-    if subagent_instance_counter is not None:
-        subagent_instance_counter.setdefault(agent_id, 0)
-        subagent_instance_counter[agent_id] += 1
-        subagent_instance = str(subagent_instance_counter[agent_id])
-    else:
-        subagent_instance = "1"
-
-    if subagent_instance == "1":
-        sa_node_id = f"{ctx.node_id}:subagent:{agent_id}"
-    else:
-        sa_node_id = f"{ctx.node_id}:subagent:{agent_id}:{subagent_instance}"
-
-    # 2c. Set up report callback (one-way channel to parent / event bus)
-    subagent_reports: list[dict] = []
-
-    async def _report_callback(
-        message: str,
-        data: dict | None = None,
-        *,
-        wait_for_response: bool = False,
-    ) -> str | None:
-        subagent_reports.append({"message": message, "data": data, "timestamp": time.time()})
-        if event_bus:
-            await event_bus.emit_subagent_report(
-                stream_id=ctx.node_id,
-                node_id=sa_node_id,
-                subagent_id=agent_id,
-                message=message,
-                data=data,
-                execution_id=ctx.execution_id,
-            )
-
-        if not wait_for_response:
-            return None
-
-        if not event_bus:
-            logger.warning(
-                "Subagent '%s' requested user response but no event_bus available",
-                agent_id,
-            )
-            return None
-
-        # Create isolated receiver and register for input routing
-        import uuid
-
-        escalation_id = f"{ctx.node_id}:escalation:{uuid.uuid4().hex[:8]}"
-        receiver = escalation_receiver_cls()
-        registry = ctx.shared_node_registry
-
-        registry[escalation_id] = receiver
-        try:
-            await event_bus.emit_escalation_requested(
-                stream_id=ctx.stream_id or ctx.node_id,
-                node_id=escalation_id,
-                reason=f"Subagent report (wait_for_response) from {agent_id}",
-                context=message,
-                execution_id=ctx.execution_id,
-            )
-            # Block until queen responds
-            return await receiver.wait()
-        finally:
-            registry.pop(escalation_id, None)
-
-    # 3. Filter tools for subagent
-    subagent_tool_names = set(subagent_spec.tools or [])
-    tool_source = ctx.all_tools if ctx.all_tools else ctx.available_tools
-
-    # GCU auto-population
-    if subagent_spec.node_type == "gcu" and not subagent_tool_names:
-        subagent_tools = [t for t in tool_source if t.name != "delegate_to_sub_agent"]
-    else:
-        subagent_tools = [
-            t
-            for t in tool_source
-            if t.name in subagent_tool_names and t.name != "delegate_to_sub_agent"
-        ]
-
-    missing = subagent_tool_names - {t.name for t in subagent_tools}
-    if missing:
-        logger.warning(
-            "Subagent '%s' requested tools not found in catalog: %s",
-            agent_id,
-            sorted(missing),
-        )
-
-    logger.info(
-        "📦 Subagent '%s' configuration:\n"
-        "   - System prompt: %s\n"
-        "   - Tools available (%d): %s\n"
-        "   - Memory keys inherited: %s",
-        agent_id,
-        (subagent_spec.system_prompt[:200] + "...")
-        if subagent_spec.system_prompt and len(subagent_spec.system_prompt) > 200
-        else subagent_spec.system_prompt,
-        len(subagent_tools),
-        [t.name for t in subagent_tools],
-        list(parent_data.keys()),
-    )
-
-    # 4. Build subagent context
-    max_iter = min(config.max_iterations, 10)
-    subagent_ctx = NodeContext(
-        runtime=ctx.runtime,
-        node_id=sa_node_id,
-        node_spec=subagent_spec,
-        buffer=scoped_buffer,
-        input_data={"task": task, **parent_data},
-        llm=ctx.llm,
-        available_tools=subagent_tools,
-        goal_context=(
-            f"Your specific task: {task}\n\n"
-            f"COMPLETION REQUIREMENTS:\n"
-            f"When your task is done, you MUST call set_output() "
-            f"for each required key: {subagent_spec.output_keys}\n"
-            f"Alternatively, call report_to_parent(mark_complete=true) "
-            f"with your findings in message/data.\n"
-            + (
-                "Before finishing, call browser_close_finished() to clean up your browser tabs.\n"
-                if subagent_spec.node_type == "gcu"
-                else ""
-            )
-            + f"You have a maximum of {max_iter} turns to complete this task."
-        ),
-        goal=ctx.goal,
-        max_tokens=ctx.max_tokens,
-        runtime_logger=ctx.runtime_logger,
-        is_subagent_mode=True,  # Prevents nested delegation
-        report_callback=_report_callback,
-        node_registry={},  # Empty - no nested subagents
-        shared_node_registry=ctx.shared_node_registry,  # For escalation routing
-    )
-
-    # 5. Create and execute subagent EventLoopNode
-    subagent_conv_store = None
-    if conversation_store is not None:
-        from framework.storage.conversation_store import FileConversationStore
-
-        parent_base = getattr(conversation_store, "_base", None)
-        if parent_base is not None:
-            conversations_dir = parent_base.parent
-            subagent_dir_name = f"{agent_id}-{subagent_instance}"
-            subagent_store_path = conversations_dir / subagent_dir_name
-            subagent_conv_store = FileConversationStore(base_path=subagent_store_path)
-
-    # Derive a subagent-scoped spillover dir
-    subagent_spillover = None
-    if config.spillover_dir:
-        subagent_spillover = str(Path(config.spillover_dir) / agent_id / subagent_instance)
-
-    subagent_node = event_loop_node_cls(
-        event_bus=event_bus,
-        judge=SubagentJudge(task=task, max_iterations=max_iter),
-        config=LoopConfig(
-            max_iterations=max_iter,
-            max_tool_calls_per_turn=config.max_tool_calls_per_turn,
-            tool_call_overflow_margin=config.tool_call_overflow_margin,
-            max_context_tokens=config.max_context_tokens,
-            stall_detection_threshold=config.stall_detection_threshold,
-            max_tool_result_chars=config.max_tool_result_chars,
-            spillover_dir=subagent_spillover,
-        ),
-        tool_executor=tool_executor,
-        conversation_store=subagent_conv_store,
-    )
-
-    # Each subagent instance gets its own unique browser profile so concurrent
-    # subagents don't share tab groups. The profile is set as execution context
-    # so the tool registry auto-injects it into every browser_* MCP tool call.
-    _gcu_profile = f"{agent_id}:{subagent_instance}"
-    _profile_token = ToolRegistry.set_execution_context(profile=_gcu_profile)
-
-    try:
-        logger.info("🚀 Starting subagent '%s' execution...", agent_id)
-        start_time = time.time()
-        result = await subagent_node.execute(subagent_ctx)
-        latency_ms = int((time.time() - start_time) * 1000)
-
-        separator = "-" * 60
-        logger.info(
-            "\n%s\n"
-            "✅ SUBAGENT '%s' COMPLETED\n"
-            "%s\n"
-            "Success: %s\n"
-            "Latency: %dms\n"
-            "Tokens used: %s\n"
-            "Output keys: %s\n"
-            "%s",
-            separator,
-            agent_id,
-            separator,
-            result.success,
-            latency_ms,
-            result.tokens_used,
-            list(result.output.keys()) if result.output else [],
-            separator,
-        )
-
-        result_json = {
-            "message": (
-                f"Sub-agent '{agent_id}' completed successfully"
-                if result.success
-                else f"Sub-agent '{agent_id}' failed: {result.error}"
-            ),
-            "data": result.output,
-            "reports": subagent_reports if subagent_reports else None,
-            "metadata": {
-                "agent_id": agent_id,
-                "success": result.success,
-                "tokens_used": result.tokens_used,
-                "latency_ms": latency_ms,
-                "report_count": len(subagent_reports),
-            },
-        }
-
-        return ToolResult(
-            tool_use_id="",
-            content=json.dumps(result_json, indent=2, default=str),
-            is_error=not result.success,
-        )
-
-    except Exception as e:
-        logger.exception(
-            "\n" + "!" * 60 + "\n❌ SUBAGENT '%s' FAILED\nError: %s\n" + "!" * 60,
-            agent_id,
-            str(e),
-        )
-        result_json = {
-            "message": f"Sub-agent '{agent_id}' raised exception: {e}",
-            "data": None,
-            "metadata": {
-                "agent_id": agent_id,
-                "success": False,
-                "error": str(e),
-            },
-        }
-        return ToolResult(
-            tool_use_id="",
-            content=json.dumps(result_json, indent=2),
-            is_error=True,
-        )
-    finally:
-        ToolRegistry.reset_execution_context(_profile_token)
-        # Close the tab group this subagent created, if any.
-        try:
-            from gcu.browser.bridge import get_bridge
-            from gcu.browser.tools.lifecycle import _contexts
-
-            bridge = get_bridge()
-            ctx_entry = _contexts.pop(_gcu_profile, None)
-            if bridge and bridge.is_connected and ctx_entry:
-                group_id = ctx_entry.get("groupId")
-                if group_id is not None:
-                    await bridge.destroy_context(group_id)
-        except Exception:
-            pass
diff --git a/core/framework/host/__init__.py b/core/framework/host/__init__.py
new file mode 100644
index 00000000..c432b680
--- /dev/null
+++ b/core/framework/host/__init__.py
@@ -0,0 +1,11 @@
+"""Host layer -- how agents are triggered and hosted."""
+
+from framework.host.agent_host import (  # noqa: F401
+    AgentHost,
+    AgentRuntimeConfig,
+)
+from framework.host.event_bus import AgentEvent, EventBus, EventType  # noqa: F401
+from framework.host.execution_manager import (  # noqa: F401
+    EntryPointSpec,
+    ExecutionManager,
+)
diff --git a/core/framework/runtime/agent_runtime.py b/core/framework/host/agent_host.py
similarity index 74%
rename from core/framework/runtime/agent_runtime.py
rename to core/framework/host/agent_host.py
index 320e5371..8cb367b5 100644
--- a/core/framework/runtime/agent_runtime.py
+++ b/core/framework/host/agent_host.py
@@ -16,20 +16,21 @@ from datetime import datetime
 from pathlib import Path
 from typing import TYPE_CHECKING, Any
 
-from framework.graph.checkpoint_config import CheckpointConfig
-from framework.graph.executor import ExecutionResult
-from framework.runtime.event_bus import EventBus
-from framework.runtime.execution_stream import EntryPointSpec, ExecutionStream
-from framework.runtime.outcome_aggregator import OutcomeAggregator
-from framework.runtime.runtime_log_store import RuntimeLogStore
-from framework.runtime.shared_state import SharedBufferManager
+from framework.orchestrator.checkpoint_config import CheckpointConfig
+from framework.orchestrator.orchestrator import ExecutionResult
+from framework.host.event_bus import EventBus
+from framework.host.execution_manager import EntryPointSpec, ExecutionManager
+from framework.host.outcome_aggregator import OutcomeAggregator
+from framework.tracker.runtime_log_store import RuntimeLogStore
+from framework.host.shared_state import SharedBufferManager
 from framework.storage.concurrent import ConcurrentStorage
 from framework.storage.session_store import SessionStore
 
 if TYPE_CHECKING:
-    from framework.graph.edge import GraphSpec
-    from framework.graph.goal import Goal
+    from framework.orchestrator.edge import GraphSpec
+    from framework.orchestrator.goal import Goal
     from framework.llm.provider import LLMProvider, Tool
+    from framework.pipeline.stage import PipelineStage
     from framework.skills.manager import SkillsManagerConfig
 
 logger = logging.getLogger(__name__)
@@ -37,7 +38,7 @@ logger = logging.getLogger(__name__)
 
 @dataclass
 class AgentRuntimeConfig:
-    """Configuration for AgentRuntime."""
+    """Configuration for AgentHost."""
 
     max_concurrent_executions: int = 100
     cache_ttl: float = 60.0
@@ -62,14 +63,14 @@ class _GraphRegistration:
     graph: "GraphSpec"
     goal: "Goal"
     entry_points: dict[str, EntryPointSpec]
-    streams: dict[str, ExecutionStream]  # ep_id -> stream (NOT namespaced)
+    streams: dict[str, ExecutionManager]  # ep_id -> stream (NOT namespaced)
     storage_subpath: str  # relative to session root, e.g. "graphs/email_agent"
     event_subscriptions: list[str] = field(default_factory=list)
     timer_tasks: list[asyncio.Task] = field(default_factory=list)
     timer_next_fire: dict[str, float] = field(default_factory=dict)
 
 
-class AgentRuntime:
+class AgentHost:
     """
     Top-level runtime that manages agent lifecycle and concurrent executions.
 
@@ -142,6 +143,7 @@ class AgentRuntime:
         skills_catalog_prompt: str = "",
         protocols_prompt: str = "",
         skill_dirs: list[str] | None = None,
+        pipeline_stages: "list[PipelineStage] | None" = None,
     ):
         """
         Initialize agent runtime.
@@ -171,6 +173,7 @@ class AgentRuntime:
             skills_catalog_prompt: Deprecated. Pre-rendered skills catalog.
             protocols_prompt: Deprecated. Pre-rendered operational protocols.
         """
+        from framework.pipeline.runner import PipelineRunner
         from framework.skills.manager import SkillsManager
 
         self.graph = graph
@@ -180,6 +183,14 @@ class AgentRuntime:
         self._checkpoint_config = checkpoint_config
         self.accounts_prompt = accounts_prompt
 
+        # Pipeline middleware: runs before every trigger() dispatch.
+        # Accepts either pre-built stage objects or loads from config.
+        if pipeline_stages:
+            self._pipeline = PipelineRunner(pipeline_stages)
+        else:
+            self._pipeline = self._load_pipeline_from_config()
+
+
         # --- Skill lifecycle: runtime owns the SkillsManager ---
         if skills_manager_config is not None:
             # New path: config-driven, runtime handles loading
@@ -246,7 +257,7 @@ class AgentRuntime:
 
         # Entry points and streams (primary graph)
         self._entry_points: dict[str, EntryPointSpec] = {}
-        self._streams: dict[str, ExecutionStream] = {}
+        self._streams: dict[str, ExecutionManager] = {}
 
         # Webhook server (created on start if webhook_routes configured)
         self._webhook_server: Any = None
@@ -270,7 +281,7 @@ class AgentRuntime:
         self.intro_message: str = ""
 
     # ------------------------------------------------------------------
-    # Skill prompt accessors (read by ExecutionStream constructors)
+    # Skill prompt accessors (read by ExecutionManager constructors)
     # ------------------------------------------------------------------
 
     @property
@@ -335,9 +346,14 @@ class AgentRuntime:
             # Start storage
             await self._storage.start()
 
-            # Create streams for each entry point
+            # Initialize pipeline stages FIRST -- they inject LLM, tools,
+            # credentials, and skills into the host before streams are created.
+            await self._pipeline.initialize_all()
+            self._apply_pipeline_results()
+
+            # Create streams for each entry point (uses pipeline results)
             for ep_id, spec in self._entry_points.items():
-                stream = ExecutionStream(
+                stream = ExecutionManager(
                     stream_id=ep_id,
                     entry_spec=spec,
                     graph=self.graph,
@@ -370,7 +386,7 @@ class AgentRuntime:
 
             # Start webhook server if routes are configured
             if self._config.webhook_routes:
-                from framework.runtime.webhook_server import (
+                from framework.host.webhook_server import (
                     WebhookRoute,
                     WebhookServer,
                     WebhookServerConfig,
@@ -394,7 +410,7 @@ class AgentRuntime:
                 await self._webhook_server.start()
 
             # Subscribe event-driven entry points to EventBus
-            from framework.runtime.event_bus import EventType as _ET
+            from framework.host.event_bus import EventType as _ET
 
             for ep_id, spec in self._entry_points.items():
                 if spec.trigger_type != "event":
@@ -458,332 +474,345 @@ class AgentRuntime:
                 self._event_subscriptions.append(sub_id)
 
             # Start timer-driven entry points
-            for ep_id, spec in self._entry_points.items():
-                if spec.trigger_type != "timer":
-                    continue
+            await self._start_timers()
 
-                tc = spec.trigger_config
-                cron_expr = tc.get("cron")
-                _raw_interval = tc.get("interval_minutes")
-                interval = float(_raw_interval) if _raw_interval is not None else None
-                run_immediately = tc.get("run_immediately", False)
-
-                if cron_expr:
-                    # Cron expression mode — takes priority over interval_minutes
-                    try:
-                        from croniter import croniter
-                    except ImportError as e:
-                        raise RuntimeError(
-                            "croniter is required for cron-based entry points. "
-                            "Install it with: uv pip install croniter"
-                        ) from e
-
-                    try:
-                        if not croniter.is_valid(cron_expr):
-                            raise ValueError(f"Invalid cron expression: {cron_expr}")
-                    except ValueError as e:
-                        logger.warning(
-                            "Entry point '%s' has invalid cron config: %s",
-                            ep_id,
-                            e,
-                        )
-                        continue
-
-                    def _make_cron_timer(
-                        entry_point_id: str,
-                        expr: str,
-                        immediate: bool,
-                        idle_timeout: float = 300,
-                    ):
-                        async def _cron_loop():
-                            from croniter import croniter
-
-                            _persistent_session_id: str | None = None
-                            if not immediate:
-                                cron = croniter(expr, datetime.now())
-                                next_dt = cron.get_next(datetime)
-                                sleep_secs = (next_dt - datetime.now()).total_seconds()
-                                self._timer_next_fire[entry_point_id] = (
-                                    time.monotonic() + sleep_secs
-                                )
-                                await asyncio.sleep(max(0, sleep_secs))
-                            while self._running:
-                                # Calculate next fire time upfront (used by skip paths too)
-                                cron = croniter(expr, datetime.now())
-                                next_dt = cron.get_next(datetime)
-                                sleep_secs = (next_dt - datetime.now()).total_seconds()
-
-                                # Gate: skip tick if timers are explicitly paused
-                                if self._timers_paused:
-                                    logger.debug(
-                                        "Cron '%s': paused, skipping tick",
-                                        entry_point_id,
-                                    )
-                                    self._timer_next_fire[entry_point_id] = (
-                                        time.monotonic() + sleep_secs
-                                    )
-                                    await asyncio.sleep(max(0, sleep_secs))
-                                    continue
-
-                                # Gate: skip tick if ANY stream is actively working.
-                                # If the execution is idle (no LLM/tool activity
-                                # beyond idle_timeout) let the timer proceed —
-                                # execute() will cancel the stale execution.
-                                _any_active = False
-                                _min_idle = float("inf")
-                                for _s in self._streams.values():
-                                    if _s.active_execution_ids:
-                                        _any_active = True
-                                        _idle = _s.agent_idle_seconds
-                                        if _idle < _min_idle:
-                                            _min_idle = _idle
-                                logger.info(
-                                    "Cron '%s': gate — active=%s, idle=%.1fs, timeout=%ds",
-                                    entry_point_id,
-                                    _any_active,
-                                    _min_idle,
-                                    idle_timeout,
-                                )
-                                if _any_active and _min_idle < idle_timeout:
-                                    logger.info(
-                                        "Cron '%s': agent actively working, skipping tick",
-                                        entry_point_id,
-                                    )
-                                    self._timer_next_fire[entry_point_id] = (
-                                        time.monotonic() + sleep_secs
-                                    )
-                                    await asyncio.sleep(max(0, sleep_secs))
-                                    continue
-
-                                self._timer_next_fire.pop(entry_point_id, None)
-                                try:
-                                    ep_spec = self._entry_points.get(entry_point_id)
-                                    is_isolated = ep_spec and ep_spec.isolation_level == "isolated"
-                                    if is_isolated:
-                                        if _persistent_session_id:
-                                            session_state = {
-                                                "resume_session_id": _persistent_session_id
-                                            }
-                                        else:
-                                            session_state = None
-                                    else:
-                                        session_state = self._get_primary_session_state(
-                                            exclude_entry_point=entry_point_id
-                                        )
-                                        # Gate: skip tick if no active session
-                                        if session_state is None:
-                                            logger.debug(
-                                                "Cron '%s': no active session, skipping",
-                                                entry_point_id,
-                                            )
-                                            self._timer_next_fire[entry_point_id] = (
-                                                time.monotonic() + sleep_secs
-                                            )
-                                            await asyncio.sleep(max(0, sleep_secs))
-                                            continue
-
-                                    exec_id = await self.trigger(
-                                        entry_point_id,
-                                        {
-                                            "event": {
-                                                "source": "timer",
-                                                "reason": "scheduled",
-                                            }
-                                        },
-                                        session_state=session_state,
-                                    )
-                                    if not _persistent_session_id and is_isolated:
-                                        _persistent_session_id = exec_id
-                                    logger.info(
-                                        "Cron fired for entry point '%s' (expr: %s)",
-                                        entry_point_id,
-                                        expr,
-                                    )
-                                except Exception:
-                                    logger.error(
-                                        "Cron trigger failed for '%s'",
-                                        entry_point_id,
-                                        exc_info=True,
-                                    )
-                                # Calculate next fire from now
-                                cron = croniter(expr, datetime.now())
-                                next_dt = cron.get_next(datetime)
-                                sleep_secs = (next_dt - datetime.now()).total_seconds()
-                                self._timer_next_fire[entry_point_id] = (
-                                    time.monotonic() + sleep_secs
-                                )
-                                await asyncio.sleep(max(0, sleep_secs))
-
-                        return _cron_loop
-
-                    task = asyncio.create_task(
-                        _make_cron_timer(
-                            ep_id,
-                            cron_expr,
-                            run_immediately,
-                            idle_timeout=float(tc.get("idle_timeout_seconds", 300)),
-                        )()
-                    )
-                    self._timer_tasks.append(task)
-                    logger.info(
-                        "Started cron timer for entry point '%s' with expression '%s'%s",
-                        ep_id,
-                        cron_expr,
-                        " (immediate first run)" if run_immediately else "",
-                    )
-
-                elif interval and interval > 0:
-                    # Fixed interval mode (original behavior)
-                    def _make_timer(
-                        entry_point_id: str,
-                        mins: float,
-                        immediate: bool,
-                        idle_timeout: float = 300,
-                    ):
-                        async def _timer_loop():
-                            interval_secs = mins * 60
-                            _persistent_session_id: str | None = None
-                            if not immediate:
-                                self._timer_next_fire[entry_point_id] = (
-                                    time.monotonic() + interval_secs
-                                )
-                                await asyncio.sleep(interval_secs)
-                            while self._running:
-                                # Gate: skip tick if timers are explicitly paused
-                                if self._timers_paused:
-                                    logger.debug(
-                                        "Timer '%s': paused, skipping tick",
-                                        entry_point_id,
-                                    )
-                                    self._timer_next_fire[entry_point_id] = (
-                                        time.monotonic() + interval_secs
-                                    )
-                                    await asyncio.sleep(interval_secs)
-                                    continue
-
-                                # Gate: skip tick if agent is actively working.
-                                # Gate: skip tick if ANY stream is actively working.
-                                _any_active = False
-                                _min_idle = float("inf")
-                                for _s in self._streams.values():
-                                    if _s.active_execution_ids:
-                                        _any_active = True
-                                        _idle = _s.agent_idle_seconds
-                                        if _idle < _min_idle:
-                                            _min_idle = _idle
-                                logger.info(
-                                    "Timer '%s': gate — active=%s, idle=%.1fs, timeout=%ds",
-                                    entry_point_id,
-                                    _any_active,
-                                    _min_idle,
-                                    idle_timeout,
-                                )
-                                if _any_active and _min_idle < idle_timeout:
-                                    logger.info(
-                                        "Timer '%s': agent actively working, skipping tick",
-                                        entry_point_id,
-                                    )
-                                    self._timer_next_fire[entry_point_id] = (
-                                        time.monotonic() + interval_secs
-                                    )
-                                    await asyncio.sleep(interval_secs)
-                                    continue
-
-                                self._timer_next_fire.pop(entry_point_id, None)
-                                try:
-                                    ep_spec = self._entry_points.get(entry_point_id)
-                                    is_isolated = ep_spec and ep_spec.isolation_level == "isolated"
-                                    if is_isolated:
-                                        if _persistent_session_id:
-                                            session_state = {
-                                                "resume_session_id": _persistent_session_id
-                                            }
-                                        else:
-                                            session_state = None
-                                    else:
-                                        session_state = self._get_primary_session_state(
-                                            exclude_entry_point=entry_point_id
-                                        )
-                                        # Gate: skip tick if no active session
-                                        if session_state is None:
-                                            logger.debug(
-                                                "Timer '%s': no active session, skipping",
-                                                entry_point_id,
-                                            )
-                                            self._timer_next_fire[entry_point_id] = (
-                                                time.monotonic() + interval_secs
-                                            )
-                                            await asyncio.sleep(interval_secs)
-                                            continue
-
-                                    exec_id = await self.trigger(
-                                        entry_point_id,
-                                        {
-                                            "event": {
-                                                "source": "timer",
-                                                "reason": "scheduled",
-                                            }
-                                        },
-                                        session_state=session_state,
-                                    )
-                                    if not _persistent_session_id and is_isolated:
-                                        _persistent_session_id = exec_id
-                                    logger.info(
-                                        "Timer fired for entry point '%s' (next in %s min)",
-                                        entry_point_id,
-                                        mins,
-                                    )
-                                except Exception:
-                                    logger.error(
-                                        "Timer trigger failed for '%s'",
-                                        entry_point_id,
-                                        exc_info=True,
-                                    )
-                                self._timer_next_fire[entry_point_id] = (
-                                    time.monotonic() + interval_secs
-                                )
-                                await asyncio.sleep(interval_secs)
-
-                        return _timer_loop
-
-                    task = asyncio.create_task(
-                        _make_timer(
-                            ep_id,
-                            interval,
-                            run_immediately,
-                            idle_timeout=float(tc.get("idle_timeout_seconds", 300)),
-                        )()
-                    )
-                    self._timer_tasks.append(task)
-                    logger.info(
-                        "Started timer for entry point '%s' every %s min%s",
-                        ep_id,
-                        interval,
-                        " (immediate first run)" if run_immediately else "",
-                    )
-
-                else:
-                    logger.warning(
-                        "Entry point '%s' has trigger_type='timer' "
-                        "but no 'cron' or valid 'interval_minutes' in trigger_config",
-                        ep_id,
-                    )
-
-            # Register primary graph
-            self._graphs[self._graph_id] = _GraphRegistration(
-                graph=self.graph,
-                goal=self.goal,
-                entry_points=dict(self._entry_points),
-                streams=dict(self._streams),
-                storage_subpath="",
-                event_subscriptions=list(self._event_subscriptions),
-                timer_tasks=list(self._timer_tasks),
-                timer_next_fire=self._timer_next_fire,
-            )
+            # Start skill hot-reload watcher (no-op if watchfiles not installed)
+            await self._skills_manager.start_watching()
 
             self._running = True
             self._timers_paused = False
-            logger.info(f"AgentRuntime started with {len(self._streams)} streams")
+            n_stages = len(self._pipeline.stages)
+            logger.info(
+                "AgentHost started with %d streams, %d pipeline stages",
+                len(self._streams),
+                n_stages,
+            )
+
+    async def _start_timers(self) -> None:
+        """Start timer-driven entry points (extracted from start())."""
+        for ep_id, spec in self._entry_points.items():
+            if spec.trigger_type != "timer":
+                continue
+
+            tc = spec.trigger_config
+            cron_expr = tc.get("cron")
+            _raw_interval = tc.get("interval_minutes")
+            interval = float(_raw_interval) if _raw_interval is not None else None
+            run_immediately = tc.get("run_immediately", False)
+
+            if cron_expr:
+                # Cron expression mode — takes priority over interval_minutes
+                try:
+                    from croniter import croniter
+                except ImportError as e:
+                    raise RuntimeError(
+                        "croniter is required for cron-based entry points. "
+                        "Install it with: uv pip install croniter"
+                    ) from e
+
+                try:
+                    if not croniter.is_valid(cron_expr):
+                        raise ValueError(f"Invalid cron expression: {cron_expr}")
+                except ValueError as e:
+                    logger.warning(
+                        "Entry point '%s' has invalid cron config: %s",
+                        ep_id,
+                        e,
+                    )
+                    continue
+
+                def _make_cron_timer(
+                    entry_point_id: str,
+                    expr: str,
+                    immediate: bool,
+                    idle_timeout: float = 300,
+                ):
+                    async def _cron_loop():
+                        from croniter import croniter
+
+                        _persistent_session_id: str | None = None
+                        if not immediate:
+                            cron = croniter(expr, datetime.now())
+                            next_dt = cron.get_next(datetime)
+                            sleep_secs = (next_dt - datetime.now()).total_seconds()
+                            self._timer_next_fire[entry_point_id] = (
+                                time.monotonic() + sleep_secs
+                            )
+                            await asyncio.sleep(max(0, sleep_secs))
+                        while self._running:
+                            # Calculate next fire time upfront (used by skip paths too)
+                            cron = croniter(expr, datetime.now())
+                            next_dt = cron.get_next(datetime)
+                            sleep_secs = (next_dt - datetime.now()).total_seconds()
+
+                            # Gate: skip tick if timers are explicitly paused
+                            if self._timers_paused:
+                                logger.debug(
+                                    "Cron '%s': paused, skipping tick",
+                                    entry_point_id,
+                                )
+                                self._timer_next_fire[entry_point_id] = (
+                                    time.monotonic() + sleep_secs
+                                )
+                                await asyncio.sleep(max(0, sleep_secs))
+                                continue
+
+                            # Gate: skip tick if ANY stream is actively working.
+                            # If the execution is idle (no LLM/tool activity
+                            # beyond idle_timeout) let the timer proceed —
+                            # execute() will cancel the stale execution.
+                            _any_active = False
+                            _min_idle = float("inf")
+                            for _s in self._streams.values():
+                                if _s.active_execution_ids:
+                                    _any_active = True
+                                    _idle = _s.agent_idle_seconds
+                                    if _idle < _min_idle:
+                                        _min_idle = _idle
+                            logger.info(
+                                "Cron '%s': gate — active=%s, idle=%.1fs, timeout=%ds",
+                                entry_point_id,
+                                _any_active,
+                                _min_idle,
+                                idle_timeout,
+                            )
+                            if _any_active and _min_idle < idle_timeout:
+                                logger.info(
+                                    "Cron '%s': agent actively working, skipping tick",
+                                    entry_point_id,
+                                )
+                                self._timer_next_fire[entry_point_id] = (
+                                    time.monotonic() + sleep_secs
+                                )
+                                await asyncio.sleep(max(0, sleep_secs))
+                                continue
+
+                            self._timer_next_fire.pop(entry_point_id, None)
+                            try:
+                                ep_spec = self._entry_points.get(entry_point_id)
+                                is_isolated = ep_spec and ep_spec.isolation_level == "isolated"
+                                if is_isolated:
+                                    if _persistent_session_id:
+                                        session_state = {
+                                            "resume_session_id": _persistent_session_id
+                                        }
+                                    else:
+                                        session_state = None
+                                else:
+                                    session_state = self._get_primary_session_state(
+                                        exclude_entry_point=entry_point_id
+                                    )
+                                    # Gate: skip tick if no active session
+                                    if session_state is None:
+                                        logger.debug(
+                                            "Cron '%s': no active session, skipping",
+                                            entry_point_id,
+                                        )
+                                        self._timer_next_fire[entry_point_id] = (
+                                            time.monotonic() + sleep_secs
+                                        )
+                                        await asyncio.sleep(max(0, sleep_secs))
+                                        continue
+
+                                exec_id = await self.trigger(
+                                    entry_point_id,
+                                    {
+                                        "event": {
+                                            "source": "timer",
+                                            "reason": "scheduled",
+                                        }
+                                    },
+                                    session_state=session_state,
+                                )
+                                if not _persistent_session_id and is_isolated:
+                                    _persistent_session_id = exec_id
+                                logger.info(
+                                    "Cron fired for entry point '%s' (expr: %s)",
+                                    entry_point_id,
+                                    expr,
+                                )
+                            except Exception:
+                                logger.error(
+                                    "Cron trigger failed for '%s'",
+                                    entry_point_id,
+                                    exc_info=True,
+                                )
+                            # Calculate next fire from now
+                            cron = croniter(expr, datetime.now())
+                            next_dt = cron.get_next(datetime)
+                            sleep_secs = (next_dt - datetime.now()).total_seconds()
+                            self._timer_next_fire[entry_point_id] = (
+                                time.monotonic() + sleep_secs
+                            )
+                            await asyncio.sleep(max(0, sleep_secs))
+
+                    return _cron_loop
+
+                task = asyncio.create_task(
+                    _make_cron_timer(
+                        ep_id,
+                        cron_expr,
+                        run_immediately,
+                        idle_timeout=float(tc.get("idle_timeout_seconds", 300)),
+                    )()
+                )
+                self._timer_tasks.append(task)
+                logger.info(
+                    "Started cron timer for entry point '%s' with expression '%s'%s",
+                    ep_id,
+                    cron_expr,
+                    " (immediate first run)" if run_immediately else "",
+                )
+
+            elif interval and interval > 0:
+                # Fixed interval mode (original behavior)
+                def _make_timer(
+                    entry_point_id: str,
+                    mins: float,
+                    immediate: bool,
+                    idle_timeout: float = 300,
+                ):
+                    async def _timer_loop():
+                        interval_secs = mins * 60
+                        _persistent_session_id: str | None = None
+                        if not immediate:
+                            self._timer_next_fire[entry_point_id] = (
+                                time.monotonic() + interval_secs
+                            )
+                            await asyncio.sleep(interval_secs)
+                        while self._running:
+                            # Gate: skip tick if timers are explicitly paused
+                            if self._timers_paused:
+                                logger.debug(
+                                    "Timer '%s': paused, skipping tick",
+                                    entry_point_id,
+                                )
+                                self._timer_next_fire[entry_point_id] = (
+                                    time.monotonic() + interval_secs
+                                )
+                                await asyncio.sleep(interval_secs)
+                                continue
+
+                            # Gate: skip tick if agent is actively working.
+                            # Gate: skip tick if ANY stream is actively working.
+                            _any_active = False
+                            _min_idle = float("inf")
+                            for _s in self._streams.values():
+                                if _s.active_execution_ids:
+                                    _any_active = True
+                                    _idle = _s.agent_idle_seconds
+                                    if _idle < _min_idle:
+                                        _min_idle = _idle
+                            logger.info(
+                                "Timer '%s': gate — active=%s, idle=%.1fs, timeout=%ds",
+                                entry_point_id,
+                                _any_active,
+                                _min_idle,
+                                idle_timeout,
+                            )
+                            if _any_active and _min_idle < idle_timeout:
+                                logger.info(
+                                    "Timer '%s': agent actively working, skipping tick",
+                                    entry_point_id,
+                                )
+                                self._timer_next_fire[entry_point_id] = (
+                                    time.monotonic() + interval_secs
+                                )
+                                await asyncio.sleep(interval_secs)
+                                continue
+
+                            self._timer_next_fire.pop(entry_point_id, None)
+                            try:
+                                ep_spec = self._entry_points.get(entry_point_id)
+                                is_isolated = ep_spec and ep_spec.isolation_level == "isolated"
+                                if is_isolated:
+                                    if _persistent_session_id:
+                                        session_state = {
+                                            "resume_session_id": _persistent_session_id
+                                        }
+                                    else:
+                                        session_state = None
+                                else:
+                                    session_state = self._get_primary_session_state(
+                                        exclude_entry_point=entry_point_id
+                                    )
+                                    # Gate: skip tick if no active session
+                                    if session_state is None:
+                                        logger.debug(
+                                            "Timer '%s': no active session, skipping",
+                                            entry_point_id,
+                                        )
+                                        self._timer_next_fire[entry_point_id] = (
+                                            time.monotonic() + interval_secs
+                                        )
+                                        await asyncio.sleep(interval_secs)
+                                        continue
+
+                                exec_id = await self.trigger(
+                                    entry_point_id,
+                                    {
+                                        "event": {
+                                            "source": "timer",
+                                            "reason": "scheduled",
+                                        }
+                                    },
+                                    session_state=session_state,
+                                )
+                                if not _persistent_session_id and is_isolated:
+                                    _persistent_session_id = exec_id
+                                logger.info(
+                                    "Timer fired for entry point '%s' (next in %s min)",
+                                    entry_point_id,
+                                    mins,
+                                )
+                            except Exception:
+                                logger.error(
+                                    "Timer trigger failed for '%s'",
+                                    entry_point_id,
+                                    exc_info=True,
+                                )
+                            self._timer_next_fire[entry_point_id] = (
+                                time.monotonic() + interval_secs
+                            )
+                            await asyncio.sleep(interval_secs)
+
+                    return _timer_loop
+
+                task = asyncio.create_task(
+                    _make_timer(
+                        ep_id,
+                        interval,
+                        run_immediately,
+                        idle_timeout=float(tc.get("idle_timeout_seconds", 300)),
+                    )()
+                )
+                self._timer_tasks.append(task)
+                logger.info(
+                    "Started timer for entry point '%s' every %s min%s",
+                    ep_id,
+                    interval,
+                    " (immediate first run)" if run_immediately else "",
+                )
+
+            else:
+                logger.warning(
+                    "Entry point '%s' has trigger_type='timer' "
+                    "but no 'cron' or valid 'interval_minutes' in trigger_config",
+                    ep_id,
+                )
+
+            # Register primary graph
+            self._graphs[self._graph_id] = _GraphRegistration(
+            graph=self.graph,
+            goal=self.goal,
+            entry_points=dict(self._entry_points),
+            streams=dict(self._streams),
+            storage_subpath="",
+            event_subscriptions=list(self._event_subscriptions),
+            timer_tasks=list(self._timer_tasks),
+            timer_next_fire=self._timer_next_fire,
+            )
+
 
     async def stop(self) -> None:
         """Stop the agent runtime and all streams."""
@@ -818,11 +847,14 @@ class AgentRuntime:
             self._streams.clear()
             self._graphs.clear()
 
+            # Stop skill hot-reload watcher
+            await self._skills_manager.stop_watching()
+
             # Stop storage
             await self._storage.stop()
 
             self._running = False
-            logger.info("AgentRuntime stopped")
+            logger.info("AgentHost stopped")
 
     def pause_timers(self) -> None:
         """Pause all timer-driven entry points.
@@ -841,7 +873,7 @@ class AgentRuntime:
         self,
         entry_point_id: str,
         graph_id: str | None = None,
-    ) -> ExecutionStream | None:
+    ) -> ExecutionManager | None:
         """Find the stream for an entry point, searching the active graph first.
 
         Lookup order:
@@ -865,6 +897,57 @@ class AgentRuntime:
         # Primary graph (also stored in self._streams)
         return self._streams.get(entry_point_id)
 
+    def _apply_pipeline_results(self) -> None:
+        """Read typed attributes from pipeline stages after initialization."""
+        for stage in self._pipeline.stages:
+            name = stage.__class__.__name__
+
+            if stage.tool_registry is not None:
+                tools = list(stage.tool_registry.get_tools().values())
+                if tools:
+                    self._tools = tools
+                    self._tool_executor = stage.tool_registry.get_executor()
+                    logger.info("Pipeline: %d tools from %s", len(tools), name)
+
+            if stage.llm is not None and self._llm is None:
+                self._llm = stage.llm
+                logger.info("Pipeline: LLM from %s", name)
+
+            if stage.accounts_prompt:
+                self._accounts_prompt = stage.accounts_prompt
+                self._accounts_data = stage.accounts_data
+                self._tool_provider_map = stage.tool_provider_map
+
+            if stage.skills_manager is not None:
+                self._skills_manager = stage.skills_manager
+
+
+    @staticmethod
+    def _load_pipeline_from_config():
+        """Build pipeline from ``~/.hive/configuration.json`` ``pipeline`` key.
+
+        Returns an empty pipeline if no config is set.
+        """
+        from framework.config import get_hive_config
+        from framework.pipeline.registry import build_pipeline_from_config
+        from framework.pipeline.runner import PipelineRunner
+
+        config = get_hive_config()
+        stages_config = config.get("pipeline", {}).get("stages", [])
+        if not stages_config:
+            return PipelineRunner([])
+        return build_pipeline_from_config(stages_config)
+
+    async def _reload_pipeline(self) -> None:
+        """Hot-reload pipeline from config.  Atomic swap."""
+        new_pipeline = self._load_pipeline_from_config()
+        await new_pipeline.initialize_all()
+        self._pipeline = new_pipeline
+        logger.info(
+            "Pipeline reloaded: %d stages",
+            len(new_pipeline.stages),
+        )
+
     def _prune_idempotency_keys(self) -> None:
         """Prune expired idempotency keys based on TTL and max size."""
         ttl = self._config.idempotency_ttl_seconds
@@ -915,7 +998,7 @@ class AgentRuntime:
             RuntimeError: If runtime not running
         """
         if not self._running:
-            raise RuntimeError("AgentRuntime is not running")
+            raise RuntimeError("AgentHost is not running")
 
         # Idempotency check: return cached execution_id for duplicate keys.
         if idempotency_key is not None:
@@ -929,6 +1012,21 @@ class AgentRuntime:
                 )
                 return cached
 
+        # Run pipeline middleware (rate limiting, validation, cost guards, ...)
+        # Raises PipelineRejectedError if any stage rejects.
+        if self._pipeline.stages:
+            from framework.pipeline.stage import PipelineContext
+
+            pipeline_ctx = PipelineContext(
+                entry_point_id=entry_point_id,
+                input_data=input_data,
+                correlation_id=correlation_id,
+                session_state=session_state,
+            )
+            pipeline_ctx = await self._pipeline.run(pipeline_ctx)
+            # Stages may have transformed the input_data.
+            input_data = pipeline_ctx.input_data
+
         stream = self._resolve_stream(entry_point_id, graph_id)
         if stream is None:
             raise ValueError(f"Entry point '{entry_point_id}' not found")
@@ -1023,9 +1121,9 @@ class AgentRuntime:
         graph_log_store = RuntimeLogStore(graph_base / "runtime_logs")
 
         # Create streams for each entry point
-        streams: dict[str, ExecutionStream] = {}
+        streams: dict[str, ExecutionManager] = {}
         for ep_id, spec in entry_points.items():
-            stream = ExecutionStream(
+            stream = ExecutionManager(
                 stream_id=f"{graph_id}::{ep_id}",
                 entry_spec=spec,
                 graph=graph,
@@ -1055,7 +1153,7 @@ class AgentRuntime:
             streams[ep_id] = stream
 
         # Set up event-driven subscriptions
-        from framework.runtime.event_bus import EventType as _ET
+        from framework.host.event_bus import EventType as _ET
 
         event_subs: list[str] = []
         for ep_id, spec in entry_points.items():
@@ -1492,7 +1590,7 @@ class AgentRuntime:
         # Search primary graph's streams for an active session.
         # Skip isolated streams — they have their own session directories
         # and must never be used as a shared session.
-        all_streams: list[tuple[str, ExecutionStream]] = []
+        all_streams: list[tuple[str, ExecutionManager]] = []
         for _gid, reg in self._graphs.items():
             for ep_id, stream in reg.streams.items():
                 # Skip isolated entry points — they run in their own namespace
@@ -1653,7 +1751,7 @@ class AgentRuntime:
                 return max(0.0, mono - time.monotonic())
         return None
 
-    def get_stream(self, entry_point_id: str) -> ExecutionStream | None:
+    def get_stream(self, entry_point_id: str) -> ExecutionManager | None:
         """Get a specific execution stream."""
         return self._streams.get(entry_point_id)
 
@@ -1820,94 +1918,3 @@ class AgentRuntime:
 # === CONVENIENCE FACTORY ===
 
 
-def create_agent_runtime(
-    graph: "GraphSpec",
-    goal: "Goal",
-    storage_path: str | Path,
-    entry_points: list[EntryPointSpec],
-    llm: "LLMProvider | None" = None,
-    tools: list["Tool"] | None = None,
-    tool_executor: Callable | None = None,
-    config: AgentRuntimeConfig | None = None,
-    runtime_log_store: Any = None,
-    enable_logging: bool = True,
-    checkpoint_config: CheckpointConfig | None = None,
-    graph_id: str | None = None,
-    accounts_prompt: str = "",
-    accounts_data: list[dict] | None = None,
-    tool_provider_map: dict[str, str] | None = None,
-    event_bus: "EventBus | None" = None,
-    skills_manager_config: "SkillsManagerConfig | None" = None,
-    # Deprecated — pass skills_manager_config instead.
-    skills_catalog_prompt: str = "",
-    protocols_prompt: str = "",
-    skill_dirs: list[str] | None = None,
-) -> AgentRuntime:
-    """
-    Create and configure an AgentRuntime with entry points.
-
-    Convenience factory that creates runtime and registers entry points.
-    Runtime logging is enabled by default for observability.
-
-    Args:
-        graph: Graph specification
-        goal: Goal driving execution
-        storage_path: Path for persistent storage
-        entry_points: Entry point specifications
-        llm: LLM provider
-        tools: Available tools
-        tool_executor: Tool executor function
-        config: Runtime configuration
-        runtime_log_store: Optional RuntimeLogStore for per-execution logging.
-            If None and enable_logging=True, creates one automatically.
-        enable_logging: Whether to enable runtime logging (default: True).
-            Set to False to disable logging entirely.
-        checkpoint_config: Optional checkpoint configuration for resumable sessions.
-            If None, uses default checkpointing behavior.
-        graph_id: Optional identifier for the primary graph (defaults to "primary").
-        accounts_data: Raw account data for per-node prompt generation.
-        tool_provider_map: Tool name to provider name mapping for account routing.
-        event_bus: Optional external EventBus to share with other components.
-        skills_catalog_prompt: Available skills catalog for system prompt.
-        protocols_prompt: Default skill operational protocols for system prompt.
-        skill_dirs: Skill base directories for Tier 3 resource access.
-        skills_manager_config: Skill configuration — the runtime owns
-            discovery, loading, and prompt renderation internally.
-        skills_catalog_prompt: Deprecated. Pre-rendered skills catalog.
-        protocols_prompt: Deprecated. Pre-rendered operational protocols.
-
-    Returns:
-        Configured AgentRuntime (not yet started)
-    """
-    # Auto-create runtime log store if logging is enabled and not provided
-    if enable_logging and runtime_log_store is None:
-        from framework.runtime.runtime_log_store import RuntimeLogStore
-
-        storage_path_obj = Path(storage_path) if isinstance(storage_path, str) else storage_path
-        runtime_log_store = RuntimeLogStore(storage_path_obj / "runtime_logs")
-
-    runtime = AgentRuntime(
-        graph=graph,
-        goal=goal,
-        storage_path=storage_path,
-        llm=llm,
-        tools=tools,
-        tool_executor=tool_executor,
-        config=config,
-        runtime_log_store=runtime_log_store,
-        checkpoint_config=checkpoint_config,
-        graph_id=graph_id,
-        accounts_prompt=accounts_prompt,
-        accounts_data=accounts_data,
-        tool_provider_map=tool_provider_map,
-        event_bus=event_bus,
-        skills_manager_config=skills_manager_config,
-        skills_catalog_prompt=skills_catalog_prompt,
-        protocols_prompt=protocols_prompt,
-        skill_dirs=skill_dirs,
-    )
-
-    for spec in entry_points:
-        runtime.register_entry_point(spec)
-
-    return runtime
diff --git a/core/framework/runtime/event_bus.py b/core/framework/host/event_bus.py
similarity index 100%
rename from core/framework/runtime/event_bus.py
rename to core/framework/host/event_bus.py
diff --git a/core/framework/runtime/execution_stream.py b/core/framework/host/execution_manager.py
similarity index 97%
rename from core/framework/runtime/execution_stream.py
rename to core/framework/host/execution_manager.py
index 6ff2f89b..39b9aadf 100644
--- a/core/framework/runtime/execution_stream.py
+++ b/core/framework/host/execution_manager.py
@@ -18,18 +18,18 @@ from dataclasses import dataclass, field
 from datetime import datetime
 from typing import TYPE_CHECKING, Any
 
-from framework.graph.checkpoint_config import CheckpointConfig
-from framework.graph.executor import ExecutionResult, GraphExecutor
-from framework.runtime.event_bus import EventBus
-from framework.runtime.shared_state import IsolationLevel, SharedBufferManager
-from framework.runtime.stream_runtime import StreamRuntime, StreamRuntimeAdapter
+from framework.orchestrator.checkpoint_config import CheckpointConfig
+from framework.orchestrator.orchestrator import ExecutionResult, Orchestrator
+from framework.host.event_bus import EventBus
+from framework.host.shared_state import IsolationLevel, SharedBufferManager
+from framework.host.stream_runtime import StreamDecisionTracker, StreamRuntimeAdapter
 
 if TYPE_CHECKING:
-    from framework.graph.edge import GraphSpec
-    from framework.graph.goal import Goal
+    from framework.orchestrator.edge import GraphSpec
+    from framework.orchestrator.goal import Goal
     from framework.llm.provider import LLMProvider, Tool
-    from framework.runtime.event_bus import AgentEvent
-    from framework.runtime.outcome_aggregator import OutcomeAggregator
+    from framework.host.event_bus import AgentEvent
+    from framework.host.outcome_aggregator import OutcomeAggregator
     from framework.storage.concurrent import ConcurrentStorage
     from framework.storage.session_store import SessionStore
 
@@ -133,7 +133,7 @@ class ExecutionContext:
     status: str = "pending"  # pending, running, completed, failed, paused
 
 
-class ExecutionStream:
+class ExecutionManager:
     """
     Manages concurrent executions for a single entry point.
 
@@ -262,7 +262,7 @@ class ExecutionStream:
             )
 
         # Create stream-scoped runtime
-        self._runtime = StreamRuntime(
+        self._runtime = StreamDecisionTracker(
             stream_id=stream_id,
             storage=storage,
             outcome_aggregator=outcome_aggregator,
@@ -271,7 +271,7 @@ class ExecutionStream:
         # Execution tracking
         self._active_executions: dict[str, ExecutionContext] = {}
         self._execution_tasks: dict[str, asyncio.Task] = {}
-        self._active_executors: dict[str, GraphExecutor] = {}
+        self._active_executors: dict[str, Orchestrator] = {}
         self._cancel_reasons: dict[str, str] = {}
         self._execution_results: OrderedDict[str, ExecutionResult] = OrderedDict()
         self._execution_result_times: dict[str, float] = {}
@@ -301,7 +301,7 @@ class ExecutionStream:
 
         # Emit stream started event
         if self._scoped_event_bus:
-            from framework.runtime.event_bus import AgentEvent, EventType
+            from framework.host.event_bus import AgentEvent, EventType
 
             await self._scoped_event_bus.publish(
                 AgentEvent(
@@ -426,7 +426,7 @@ class ExecutionStream:
 
         # Emit stream stopped event
         if self._scoped_event_bus:
-            from framework.runtime.event_bus import AgentEvent, EventType
+            from framework.host.event_bus import AgentEvent, EventType
 
             await self._scoped_event_bus.publish(
                 AgentEvent(
@@ -668,7 +668,7 @@ class ExecutionStream:
                 # Create per-execution runtime logger
                 runtime_logger = None
                 if self._runtime_log_store:
-                    from framework.runtime.runtime_logger import RuntimeLogger
+                    from framework.tracker.runtime_logger import RuntimeLogger
 
                     runtime_logger = RuntimeLogger(
                         store=self._runtime_log_store, agent_id=self.graph.id
@@ -697,12 +697,7 @@ class ExecutionStream:
                 # forward so the next attempt resumes at the failed node.
                 while True:
                     # Create executor for this execution.
-                    # Each execution gets its own storage under sessions/{exec_id}/
-                    # so conversations, spillover, and data files are all scoped
-                    # to this execution.  The executor sets data_dir via execution
-                    # context (contextvars) so data tools and spillover share the
-                    # same session-scoped directory.
-                    executor = GraphExecutor(
+                    executor = Orchestrator(
                         runtime=runtime_adapter,
                         llm=self._llm,
                         tools=self._tools,
@@ -763,7 +758,7 @@ class ExecutionStream:
 
                         # Emit resurrection event
                         if self._scoped_event_bus:
-                            from framework.runtime.event_bus import AgentEvent, EventType
+                            from framework.host.event_bus import AgentEvent, EventType
 
                             await self._scoped_event_bus.publish(
                                 AgentEvent(
@@ -1119,7 +1114,7 @@ class ExecutionStream:
         Each stream only executes from its own entry_node, but the full
         graph must validate with all entry points accounted for.
         """
-        from framework.graph.edge import GraphSpec
+        from framework.orchestrator.edge import GraphSpec
 
         # Merge entry points: this stream's entry + original graph's primary
         # entry + any other entry points. This ensures all nodes are
diff --git a/core/framework/runtime/outcome_aggregator.py b/core/framework/host/outcome_aggregator.py
similarity index 99%
rename from core/framework/runtime/outcome_aggregator.py
rename to core/framework/host/outcome_aggregator.py
index 9bf8c596..164a8ceb 100644
--- a/core/framework/runtime/outcome_aggregator.py
+++ b/core/framework/host/outcome_aggregator.py
@@ -14,8 +14,8 @@ from typing import TYPE_CHECKING, Any
 from framework.schemas.decision import Decision, Outcome
 
 if TYPE_CHECKING:
-    from framework.graph.goal import Goal
-    from framework.runtime.event_bus import EventBus
+    from framework.orchestrator.goal import Goal
+    from framework.host.event_bus import EventBus
 
 logger = logging.getLogger(__name__)
 
diff --git a/core/framework/runtime/shared_state.py b/core/framework/host/shared_state.py
similarity index 100%
rename from core/framework/runtime/shared_state.py
rename to core/framework/host/shared_state.py
diff --git a/core/framework/runtime/stream_runtime.py b/core/framework/host/stream_runtime.py
similarity index 98%
rename from core/framework/runtime/stream_runtime.py
rename to core/framework/host/stream_runtime.py
index 9c1a5131..eeecce0c 100644
--- a/core/framework/runtime/stream_runtime.py
+++ b/core/framework/host/stream_runtime.py
@@ -18,12 +18,12 @@ from framework.schemas.run import Run, RunStatus
 from framework.storage.concurrent import ConcurrentStorage
 
 if TYPE_CHECKING:
-    from framework.runtime.outcome_aggregator import OutcomeAggregator
+    from framework.host.outcome_aggregator import OutcomeAggregator
 
 logger = logging.getLogger(__name__)
 
 
-class StreamRuntime:
+class StreamDecisionTracker:
     """
     Thread-safe runtime for a single execution stream.
 
@@ -431,7 +431,7 @@ class StreamRuntimeAdapter:
     by providing the same API as Runtime but routing to a specific execution.
     """
 
-    def __init__(self, stream_runtime: StreamRuntime, execution_id: str):
+    def __init__(self, stream_runtime: StreamDecisionTracker, execution_id: str):
         """
         Create adapter for a specific execution.
 
diff --git a/core/framework/runtime/triggers.py b/core/framework/host/triggers.py
similarity index 100%
rename from core/framework/runtime/triggers.py
rename to core/framework/host/triggers.py
diff --git a/core/framework/runtime/webhook_server.py b/core/framework/host/webhook_server.py
similarity index 99%
rename from core/framework/runtime/webhook_server.py
rename to core/framework/host/webhook_server.py
index 3d8a5754..b33dcaba 100644
--- a/core/framework/runtime/webhook_server.py
+++ b/core/framework/host/webhook_server.py
@@ -13,7 +13,7 @@ from dataclasses import dataclass
 
 from aiohttp import web
 
-from framework.runtime.event_bus import EventBus
+from framework.host.event_bus import EventBus
 
 logger = logging.getLogger(__name__)
 
diff --git a/core/framework/llm/key_pool.py b/core/framework/llm/key_pool.py
new file mode 100644
index 00000000..9790e1ba
--- /dev/null
+++ b/core/framework/llm/key_pool.py
@@ -0,0 +1,101 @@
+"""Thread-safe API key pool with round-robin rotation and health tracking.
+
+When multiple API keys are configured, the pool rotates through them on each
+request.  Keys that hit rate limits are temporarily cooled-down so the next
+call automatically uses a healthy key -- no sleep required.
+"""
+
+from __future__ import annotations
+
+import logging
+import threading
+import time
+from dataclasses import dataclass
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class KeyHealth:
+    """Per-key health counters."""
+
+    rate_limited_until: float = 0.0  # monotonic timestamp
+    consecutive_errors: int = 0
+    total_requests: int = 0
+    total_successes: int = 0
+
+
+class KeyPool:
+    """Round-robin key pool with health tracking.
+
+    Thread-safe: all mutations protected by a lock so concurrent LLM calls
+    (e.g. parallel tool execution in EventLoopNode) don't race.
+    """
+
+    def __init__(self, keys: list[str]) -> None:
+        if not keys:
+            raise ValueError("KeyPool requires at least one key")
+        self._keys = list(keys)
+        self._index = 0
+        self._health: dict[str, KeyHealth] = {k: KeyHealth() for k in keys}
+        self._lock = threading.Lock()
+
+    @property
+    def size(self) -> int:
+        return len(self._keys)
+
+    def get_key(self) -> str:
+        """Return the next healthy key (round-robin).
+
+        If every key is currently rate-limited, returns the one whose cooldown
+        expires soonest so the caller can proceed with minimal delay.
+        """
+        with self._lock:
+            now = time.monotonic()
+            for _ in range(len(self._keys)):
+                key = self._keys[self._index]
+                self._index = (self._index + 1) % len(self._keys)
+                health = self._health[key]
+                if health.rate_limited_until <= now:
+                    health.total_requests += 1
+                    return key
+            # All rate-limited -- pick the one that expires soonest.
+            soonest = min(self._keys, key=lambda k: self._health[k].rate_limited_until)
+            self._health[soonest].total_requests += 1
+            return soonest
+
+    def mark_rate_limited(self, key: str, retry_after: float = 60.0) -> None:
+        """Mark *key* as rate-limited for *retry_after* seconds."""
+        with self._lock:
+            health = self._health.get(key)
+            if health:
+                health.rate_limited_until = time.monotonic() + retry_after
+                health.consecutive_errors += 1
+                logger.info(
+                    "[key-pool] Key ...%s rate-limited for %.0fs (errors=%d)",
+                    key[-6:],
+                    retry_after,
+                    health.consecutive_errors,
+                )
+
+    def mark_success(self, key: str) -> None:
+        """Record a successful call on *key*."""
+        with self._lock:
+            health = self._health.get(key)
+            if health:
+                health.consecutive_errors = 0
+                health.total_successes += 1
+
+    def get_stats(self) -> dict[str, dict]:
+        """Return health stats keyed by the last 6 chars of each key."""
+        with self._lock:
+            now = time.monotonic()
+            return {
+                f"...{k[-6:]}": {
+                    "healthy": self._health[k].rate_limited_until <= now,
+                    "requests": self._health[k].total_requests,
+                    "successes": self._health[k].total_successes,
+                    "consecutive_errors": self._health[k].consecutive_errors,
+                }
+                for k in self._keys
+            }
diff --git a/core/framework/llm/litellm.py b/core/framework/llm/litellm.py
index 89e01b14..d4e2def6 100644
--- a/core/framework/llm/litellm.py
+++ b/core/framework/llm/litellm.py
@@ -7,6 +7,8 @@ Groq, and local models.
 See: https://docs.litellm.ai/docs/providers
 """
 
+from __future__ import annotations
+
 import ast
 import asyncio
 import hashlib
@@ -18,7 +20,10 @@ import time
 from collections.abc import AsyncIterator
 from datetime import datetime
 from pathlib import Path
-from typing import Any
+from typing import TYPE_CHECKING, Any
+
+if TYPE_CHECKING:
+    from framework.llm.key_pool import KeyPool
 
 try:
     import litellm
@@ -561,6 +566,7 @@ class LiteLLMProvider(LLMProvider):
         model: str = "gpt-4o-mini",
         api_key: str | None = None,
         api_base: str | None = None,
+        api_keys: list[str] | None = None,
         **kwargs: Any,
     ):
         """
@@ -573,6 +579,9 @@ class LiteLLMProvider(LLMProvider):
                      look for the appropriate env var (OPENAI_API_KEY,
                      ANTHROPIC_API_KEY, etc.)
             api_base: Custom API base URL (for proxies or local deployments)
+            api_keys: Optional list of API keys for key-pool rotation. When
+                      provided with 2+ keys, a :class:`KeyPool` is created and
+                      keys are rotated on rate-limit errors.
             **kwargs: Additional arguments passed to litellm.completion()
         """
         # Kimi For Coding exposes an Anthropic-compatible endpoint at
@@ -594,11 +603,24 @@ class LiteLLMProvider(LLMProvider):
             if api_base and api_base.rstrip("/").endswith("/v1"):
                 api_base = api_base.rstrip("/")[:-3]
         self.model = model
-        self.api_key = api_key
+        # Key pool: when multiple keys are provided, enable rotation.
+        self._key_pool: KeyPool | None = None
+        if api_keys and len(api_keys) > 1:
+            from framework.llm.key_pool import KeyPool
+
+            self._key_pool = KeyPool(api_keys)
+            self.api_key = api_keys[0]  # default for OAuth detection below
+            logger.info(
+                "[litellm] Key pool enabled with %d keys for model %s",
+                len(api_keys),
+                model,
+            )
+        else:
+            self.api_key = api_key or (api_keys[0] if api_keys else None)
         self.api_base = api_base or self._default_api_base_for_model(_original_model)
         self.extra_kwargs = kwargs
         # Detect Claude Code OAuth subscription by checking the api_key prefix.
-        self._claude_code_oauth = bool(api_key and api_key.startswith("sk-ant-oat"))
+        self._claude_code_oauth = bool(self.api_key and self.api_key.startswith("sk-ant-oat"))
         if self._claude_code_oauth:
             # Anthropic requires a specific User-Agent for OAuth requests.
             eh = self.extra_kwargs.setdefault("extra_headers", {})
@@ -669,10 +691,20 @@ class LiteLLMProvider(LLMProvider):
     def _completion_with_rate_limit_retry(
         self, max_retries: int | None = None, **kwargs: Any
     ) -> Any:
-        """Call litellm.completion with retry on 429 rate limit errors and empty responses."""
+        """Call litellm.completion with retry on 429 rate limit errors and empty responses.
+
+        When a :class:`KeyPool` is configured, rate-limited keys are rotated
+        automatically so the next attempt uses a different key -- no sleep
+        needed between attempts.
+        """
         model = kwargs.get("model", self.model)
         retries = max_retries if max_retries is not None else RATE_LIMIT_MAX_RETRIES
         for attempt in range(retries + 1):
+            # Rotate key from pool when available.
+            current_key: str | None = None
+            if self._key_pool:
+                current_key = self._key_pool.get_key()
+                kwargs["api_key"] = current_key
             try:
                 response = litellm.completion(**kwargs)  # type: ignore[union-attr]
 
@@ -747,8 +779,22 @@ class LiteLLMProvider(LLMProvider):
                     time.sleep(wait)
                     continue
 
+                if self._key_pool and current_key:
+                    self._key_pool.mark_success(current_key)
                 return response
             except RateLimitError as e:
+                # Key pool: mark the offending key and rotate immediately.
+                if self._key_pool and current_key:
+                    self._key_pool.mark_rate_limited(current_key, retry_after=60.0)
+                    # When we have other healthy keys, skip the sleep -- the
+                    # next iteration will pick a different key automatically.
+                    if attempt < retries:
+                        logger.info(
+                            "[retry] Key pool rotating away from ...%s on 429",
+                            current_key[-6:],
+                        )
+                        continue
+
                 # Dump full request to file for debugging
                 messages = kwargs.get("messages", [])
                 token_count, token_method = _estimate_tokens(model, messages)
@@ -761,7 +807,7 @@ class LiteLLMProvider(LLMProvider):
                 if attempt == retries:
                     logger.error(
                         f"[retry] GAVE UP on {model} after {retries + 1} "
-                        f"attempts — rate limit error: {e!s}. "
+                        f"attempts -- rate limit error: {e!s}. "
                         f"~{token_count} tokens ({token_method}). "
                         f"Full request dumped to: {dump_path}"
                     )
@@ -880,10 +926,16 @@ class LiteLLMProvider(LLMProvider):
         """Async version of _completion_with_rate_limit_retry.
 
         Uses litellm.acompletion and asyncio.sleep instead of blocking calls.
+        When a :class:`KeyPool` is configured, rate-limited keys are rotated.
         """
         model = kwargs.get("model", self.model)
         retries = max_retries if max_retries is not None else RATE_LIMIT_MAX_RETRIES
         for attempt in range(retries + 1):
+            # Rotate key from pool when available.
+            current_key: str | None = None
+            if self._key_pool:
+                current_key = self._key_pool.get_key()
+                kwargs["api_key"] = current_key
             try:
                 response = await litellm.acompletion(**kwargs)  # type: ignore[union-attr]
 
@@ -952,8 +1004,20 @@ class LiteLLMProvider(LLMProvider):
                     await asyncio.sleep(wait)
                     continue
 
+                if self._key_pool and current_key:
+                    self._key_pool.mark_success(current_key)
                 return response
             except RateLimitError as e:
+                # Key pool: mark the offending key and rotate immediately.
+                if self._key_pool and current_key:
+                    self._key_pool.mark_rate_limited(current_key, retry_after=60.0)
+                    if attempt < retries:
+                        logger.info(
+                            "[async-retry] Key pool rotating away from ...%s on 429",
+                            current_key[-6:],
+                        )
+                        continue
+
                 messages = kwargs.get("messages", [])
                 token_count, token_method = _estimate_tokens(model, messages)
                 dump_path = _dump_failed_request(
@@ -965,7 +1029,7 @@ class LiteLLMProvider(LLMProvider):
                 if attempt == retries:
                     logger.error(
                         f"[async-retry] GAVE UP on {model} after {retries + 1} "
-                        f"attempts — rate limit error: {e!s}. "
+                        f"attempts -- rate limit error: {e!s}. "
                         f"~{token_count} tokens ({token_method}). "
                         f"Full request dumped to: {dump_path}"
                     )
diff --git a/core/framework/loader/__init__.py b/core/framework/loader/__init__.py
new file mode 100644
index 00000000..1b00d9b4
--- /dev/null
+++ b/core/framework/loader/__init__.py
@@ -0,0 +1,4 @@
+"""Loader layer -- agent loading from disk (JSON config, MCP, credentials)."""
+
+from framework.loader.agent_loader import AgentLoader  # noqa: F401
+from framework.loader.tool_registry import ToolRegistry  # noqa: F401
diff --git a/core/framework/runner/runner.py b/core/framework/loader/agent_loader.py
similarity index 77%
rename from core/framework/runner/runner.py
rename to core/framework/loader/agent_loader.py
index e8735250..5adca172 100644
--- a/core/framework/runner/runner.py
+++ b/core/framework/loader/agent_loader.py
@@ -13,21 +13,20 @@ from framework.config import get_hive_config, get_max_context_tokens, get_prefer
 from framework.credentials.validation import (
     ensure_credential_key_env as _ensure_credential_key_env,
 )
-from framework.graph import Goal
-from framework.graph.edge import (
+from framework.orchestrator import Goal
+from framework.orchestrator.edge import (
     DEFAULT_MAX_TOKENS,
     EdgeCondition,
     EdgeSpec,
     GraphSpec,
 )
-from framework.graph.executor import ExecutionResult
-from framework.graph.node import NodeSpec
+from framework.orchestrator.orchestrator import ExecutionResult
+from framework.orchestrator.node import NodeSpec
 from framework.llm.provider import LLMProvider, Tool
-from framework.runner.preload_validation import run_preload_validation
-from framework.runner.tool_registry import ToolRegistry
-from framework.runtime.agent_runtime import AgentRuntime, AgentRuntimeConfig, create_agent_runtime
-from framework.runtime.execution_stream import EntryPointSpec
-from framework.runtime.runtime_log_store import RuntimeLogStore
+from framework.loader.preload_validation import run_preload_validation
+from framework.loader.tool_registry import ToolRegistry
+from framework.host.agent_host import AgentHost, AgentRuntimeConfig
+from framework.host.execution_manager import EntryPointSpec
 from framework.tools.flowchart_utils import generate_fallback_flowchart
 
 logger = logging.getLogger(__name__)
@@ -881,6 +880,172 @@ class ValidationResult:
     missing_credentials: list[str] = field(default_factory=list)
 
 
+def _resolve_template_vars(text: str | None, variables: dict[str, str]) -> str | None:
+    """Resolve ``{{variable_name}}`` placeholders in *text*."""
+    if text is None or not variables:
+        return text
+    import re
+
+    def _replace(m: re.Match) -> str:
+        key = m.group(1).strip()
+        return variables.get(key, m.group(0))
+
+    return re.sub(r"\{\{(.+?)\}\}", _replace, text)
+
+
+def load_agent_config(data: str | dict) -> tuple[GraphSpec, Goal]:
+    """Load ``GraphSpec`` and ``Goal`` from a declarative :class:`AgentConfig`.
+
+    The declarative format uses a ``name`` key at the top level, unlike the
+    legacy export format which uses ``graph``/``goal`` keys.  The runner
+    auto-detects the format in :meth:`AgentLoader.load`.
+
+    Template variables in ``config.variables`` are resolved in all
+    ``system_prompt`` and ``identity_prompt`` fields via ``{{var_name}}``.
+
+    Returns:
+        Tuple of (GraphSpec, Goal)
+    """
+    from framework.orchestrator.edge import EdgeCondition, EdgeSpec
+    from framework.orchestrator.goal import Constraint, Goal as GoalModel, SuccessCriterion
+    from framework.schemas.agent_config import AgentConfig
+
+    if isinstance(data, str):
+        data = json.loads(data)
+
+    config = AgentConfig.model_validate(data)
+    tvars = config.variables
+
+    # Build Goal
+    success_criteria = [
+        SuccessCriterion(
+            id=f"sc-{i}",
+            description=sc,
+            metric="llm_judge",
+            target="",
+        )
+        for i, sc in enumerate(config.goal.success_criteria)
+    ]
+    constraints = [
+        Constraint(
+            id=f"c-{i}",
+            description=c,
+            constraint_type="hard",
+            category="general",
+        )
+        for i, c in enumerate(config.goal.constraints)
+    ]
+    goal = GoalModel(
+        id=f"{config.name}-goal",
+        name=config.name,
+        description=config.goal.description,
+        success_criteria=success_criteria,
+        constraints=constraints,
+    )
+
+    # Build nodes
+    condition_map = {
+        "always": EdgeCondition.ALWAYS,
+        "on_success": EdgeCondition.ON_SUCCESS,
+        "on_failure": EdgeCondition.ON_FAILURE,
+        "conditional": EdgeCondition.CONDITIONAL,
+        "llm_decide": EdgeCondition.LLM_DECIDE,
+    }
+
+    nodes = []
+    for nc in config.nodes:
+        # Resolve tool access: node-level config -> agent-level fallback
+        if nc.tools.policy == "explicit" and nc.tools.allowed:
+            tools_list = nc.tools.allowed
+            tool_policy = "explicit"
+        elif nc.tools.policy == "none":
+            tools_list = []
+            tool_policy = "none"
+        elif nc.tools.policy == "all":
+            tools_list = []
+            tool_policy = "all"
+        else:
+            # Inherit agent-level tool config
+            if config.tools.policy == "explicit" and config.tools.allowed:
+                tools_list = config.tools.allowed
+            else:
+                tools_list = []
+            tool_policy = config.tools.policy
+
+        node_kwargs: dict = {
+            "id": nc.id,
+            "name": nc.name or nc.id,
+            "description": nc.description or "",
+            "node_type": nc.node_type,
+            "system_prompt": _resolve_template_vars(nc.system_prompt, tvars),
+            "tools": tools_list,
+            "tool_access_policy": tool_policy,
+            "model": nc.model,
+            "input_keys": nc.input_keys,
+            "output_keys": nc.output_keys,
+            "nullable_output_keys": nc.nullable_output_keys,
+            "max_iterations": nc.max_iterations,
+            "success_criteria": nc.success_criteria,
+            "skip_judge": nc.skip_judge,
+        }
+        # Optional fields -- only pass when set (avoids overriding defaults)
+        if nc.client_facing:
+            node_kwargs["client_facing"] = nc.client_facing
+        if nc.max_node_visits != 1:
+            node_kwargs["max_node_visits"] = nc.max_node_visits
+        if nc.failure_criteria:
+            node_kwargs["failure_criteria"] = nc.failure_criteria
+        if nc.max_retries is not None:
+            node_kwargs["max_retries"] = nc.max_retries
+
+        nodes.append(NodeSpec(**node_kwargs))
+
+    # Build edges
+    edges = []
+    for i, ec in enumerate(config.edges):
+        edges.append(
+            EdgeSpec(
+                id=f"e-{i}-{ec.from_node}-{ec.to_node}",
+                source=ec.from_node,
+                target=ec.to_node,
+                condition=condition_map.get(ec.condition, EdgeCondition.ON_SUCCESS),
+                condition_expr=ec.condition_expr,
+                priority=ec.priority,
+                input_mapping=ec.input_mapping,
+            )
+        )
+
+    # Build entry_points dict for GraphSpec
+    entry_points_dict: dict = {}
+    if config.entry_points:
+        for ep in config.entry_points:
+            entry_points_dict[ep.id] = ep.entry_node or config.entry_node
+    else:
+        entry_points_dict = {"default": config.entry_node}
+
+    # Build GraphSpec
+    graph_kwargs: dict = {
+        "id": f"{config.name}-graph",
+        "goal_id": goal.id,
+        "version": config.version,
+        "entry_node": config.entry_node,
+        "entry_points": entry_points_dict,
+        "terminal_nodes": config.terminal_nodes,
+        "pause_nodes": config.pause_nodes,
+        "nodes": nodes,
+        "edges": edges,
+        "max_tokens": config.max_tokens,
+        "loop_config": dict(config.loop_config),
+        "conversation_mode": config.conversation_mode,
+        "identity_prompt": _resolve_template_vars(
+            config.identity_prompt, tvars
+        ) or "",
+    }
+
+    graph = GraphSpec(**graph_kwargs)
+    return graph, goal
+
+
 def load_agent_export(data: str | dict) -> tuple[GraphSpec, Goal]:
     """
     Load GraphSpec and Goal from export_graph() output.
@@ -942,7 +1107,7 @@ def load_agent_export(data: str | dict) -> tuple[GraphSpec, Goal]:
     )
 
     # Build Goal
-    from framework.graph.goal import Constraint, SuccessCriterion
+    from framework.orchestrator.goal import Constraint, SuccessCriterion
 
     success_criteria = []
     for sc_data in goal_data.get("success_criteria", []):
@@ -979,7 +1144,7 @@ def load_agent_export(data: str | dict) -> tuple[GraphSpec, Goal]:
     return graph, goal
 
 
-class AgentRunner:
+class AgentLoader:
     """
     Loads and runs exported agents with minimal boilerplate.
 
@@ -991,15 +1156,15 @@ class AgentRunner:
 
     Usage:
         # Simple usage
-        runner = AgentRunner.load("exports/outbound-sales-agent")
+        runner = AgentLoader.load("exports/outbound-sales-agent")
         result = await runner.run({"lead_id": "123"})
 
         # With context manager
-        async with AgentRunner.load("exports/outbound-sales-agent") as runner:
+        async with AgentLoader.load("exports/outbound-sales-agent") as runner:
             result = await runner.run({"lead_id": "123"})
 
         # With custom tools
-        runner = AgentRunner.load("exports/outbound-sales-agent")
+        runner = AgentLoader.load("exports/outbound-sales-agent")
         runner.register_tool("my_tool", my_tool_func)
         result = await runner.run({"lead_id": "123"})
     """
@@ -1027,7 +1192,7 @@ class AgentRunner:
         credential_store: Any | None = None,
     ):
         """
-        Initialize the runner (use AgentRunner.load() instead).
+        Initialize the runner (use AgentLoader.load() instead).
 
         Args:
             agent_path: Path to agent folder
@@ -1082,7 +1247,7 @@ class AgentRunner:
         self._approval_callback: Callable | None = None
 
         # AgentRuntime — unified execution path for all agents
-        self._agent_runtime: AgentRuntime | None = None
+        self._agent_runtime: AgentHost | None = None
         # Pre-load validation: structural checks + credentials.
         # Fails fast with actionable guidance — no MCP noise on screen.
         run_preload_validation(
@@ -1101,14 +1266,7 @@ class AgentRunner:
         os.environ["HIVE_AGENT_NAME"] = agent_path.name
         os.environ["HIVE_STORAGE_PATH"] = str(self._storage_path)
 
-        # Auto-discover MCP servers from mcp_servers.json
-        mcp_config_path = agent_path / "mcp_servers.json"
-        if mcp_config_path.exists():
-            self._load_mcp_servers_from_config(mcp_config_path)
-
-        # Auto-discover registry-selected MCP servers from mcp_registry.json
-        self._load_registry_mcp_servers(agent_path)
-
+        # MCP tools are loaded by McpRegistryStage in the pipeline during AgentHost.start()
     @staticmethod
     def _import_agent_module(agent_path: Path):
         """Import an agent package from its directory path.
@@ -1158,7 +1316,7 @@ class AgentRunner:
         interactive: bool = True,
         skip_credential_validation: bool | None = None,
         credential_store: Any | None = None,
-    ) -> "AgentRunner":
+    ) -> "AgentLoader":
         """
         Load an agent from an export folder.
 
@@ -1299,21 +1457,22 @@ class AgentRunner:
             runner._agent_skills = agent_skills
             return runner
 
-        # Fallback: load from agent.json (legacy JSON-based agents)
+        # Fallback: load from agent.json (declarative config)
         agent_json_path = agent_path / "agent.json"
+
         if not agent_json_path.is_file():
             raise FileNotFoundError(f"No agent.py or agent.json found in {agent_path}")
 
-        with open(agent_json_path, encoding="utf-8") as f:
-            export_data = f.read()
-
+        export_data = agent_json_path.read_text(encoding="utf-8")
         if not export_data.strip():
-            raise ValueError(f"Empty agent export file: {agent_json_path}")
+            raise ValueError(f"Empty agent.json: {agent_json_path}")
 
-        try:
-            graph, goal = load_agent_export(export_data)
-        except json.JSONDecodeError as exc:
-            raise ValueError(f"Invalid JSON in agent export file: {agent_json_path}") from exc
+        parsed = json.loads(export_data)
+        graph, goal = load_agent_config(parsed)
+        logger.info(
+            "Loaded declarative agent config from agent.json (name=%s)",
+            parsed.get("name"),
+        )
 
         # Generate flowchart.json if missing (for legacy JSON-based agents)
         generate_fallback_flowchart(graph, goal, agent_path)
@@ -1396,60 +1555,6 @@ class AgentRunner:
         }
         return self._tool_registry.register_mcp_server(server_config)
 
-    def _load_mcp_servers_from_config(self, config_path: Path) -> None:
-        """Load and register MCP servers from a configuration file."""
-        self._tool_registry.load_mcp_config(config_path)
-
-    def _load_registry_mcp_servers(self, agent_path: Path) -> None:
-        """Load and register MCP servers selected via ``mcp_registry.json``."""
-        registry_json = agent_path / "mcp_registry.json"
-        if registry_json.is_file():
-            self._tool_registry.set_mcp_registry_agent_path(agent_path)
-        else:
-            self._tool_registry.set_mcp_registry_agent_path(None)
-
-        from framework.runner.mcp_registry import MCPRegistry
-
-        try:
-            registry = MCPRegistry()
-            registry.initialize()
-            server_configs, selection_max_tools = registry.load_agent_selection(agent_path)
-        except Exception as exc:
-            logger.warning(
-                "Failed to load MCP registry servers for '%s': %s",
-                agent_path.name,
-                exc,
-            )
-            return
-
-        if not server_configs:
-            return
-
-        results = self._tool_registry.load_registry_servers(
-            server_configs,
-            preserve_existing_tools=True,
-            log_collisions=True,
-            max_tools=selection_max_tools,
-        )
-        loaded = [result for result in results if result["status"] == "loaded"]
-        skipped = [result for result in results if result["status"] != "loaded"]
-
-        logger.info(
-            "Loaded %d/%d MCP registry server(s) for agent '%s'",
-            len(loaded),
-            len(results),
-            agent_path.name,
-        )
-        if skipped:
-            logger.info(
-                "Skipped MCP registry servers for agent '%s': %s",
-                agent_path.name,
-                [
-                    {"server": result["server"], "reason": result["skipped_reason"]}
-                    for result in skipped
-                ],
-            )
-
     def set_approval_callback(self, callback: Callable) -> None:
         """
         Set a callback for human-in-the-loop approval during execution.
@@ -1460,272 +1565,119 @@ class AgentRunner:
         self._approval_callback = callback
 
     def _setup(self, event_bus=None) -> None:
-        """Set up runtime, LLM, and executor."""
-        # Configure structured logging (auto-detects JSON vs human-readable)
+        """Set up runtime via pipeline stages.
+
+        Builds a pipeline with the default stages (LLM, credentials, MCP,
+        skills) and passes it to AgentHost.  The stages initialize during
+        ``AgentHost.start()`` and inject tools/LLM/credentials/skills.
+        """
         from framework.observability import configure_logging
+        from framework.pipeline.stages.credential_resolver import CredentialResolverStage
+        from framework.pipeline.stages.llm_provider import LlmProviderStage
+        from framework.pipeline.stages.mcp_registry import McpRegistryStage
+        from framework.pipeline.stages.skill_registry import SkillRegistryStage
+        from framework.skills.config import SkillsConfig
 
         configure_logging(level="INFO", format="auto")
 
-        # Set up session context for tools (agent_id)
+        # Set up session context for tools
         agent_id = self.graph.id or "unknown"
+        self._tool_registry.set_session_context(agent_id=agent_id)
 
-        self._tool_registry.set_session_context(
-            agent_id=agent_id,
-        )
+        # Read MCP server refs from agent.json
+        mcp_refs = []
+        agent_json = self.agent_path / "agent.json"
+        if agent_json.exists():
+            try:
+                import json as _json
 
-        # Create LLM provider
-        # Uses LiteLLM which auto-detects the provider from model name
-        # Skip if already injected (e.g. worker agents with a pre-built LLM)
-        if self._llm is not None:
-            pass  # LLM already configured externally
-        elif self.mock_mode:
-            # Use mock LLM for testing without real API calls
-            from framework.llm.mock import MockLLMProvider
+                data = _json.loads(agent_json.read_text(encoding="utf-8"))
+                mcp_refs = data.get("mcp_servers", [])
+            except Exception:
+                pass
 
-            self._llm = MockLLMProvider(model=self.model)
-        else:
-            from framework.llm.litellm import LiteLLMProvider
-
-            # Check if a subscription mode is configured
-            config = get_hive_config()
-            llm_config = config.get("llm", {})
-            use_claude_code = llm_config.get("use_claude_code_subscription", False)
-            use_codex = llm_config.get("use_codex_subscription", False)
-            use_kimi_code = llm_config.get("use_kimi_code_subscription", False)
-            use_antigravity = llm_config.get("use_antigravity_subscription", False)
-            api_base = llm_config.get("api_base")
-
-            api_key = None
-            if use_claude_code:
-                # Get OAuth token from Claude Code subscription
-                api_key = get_claude_code_token()
-                if not api_key:
-                    logger.warning(
-                        "Claude Code subscription configured but no token found. "
-                        "Run 'claude' to authenticate, then try again."
-                    )
-            elif use_codex:
-                # Get OAuth token from Codex subscription
-                api_key = get_codex_token()
-                if not api_key:
-                    logger.warning(
-                        "Codex subscription configured but no token found. "
-                        "Run 'codex' to authenticate, then try again."
-                    )
-            elif use_kimi_code:
-                # Get API key from Kimi Code CLI config (~/.kimi/config.toml)
-                api_key = get_kimi_code_token()
-                if not api_key:
-                    logger.warning(
-                        "Kimi Code subscription configured but no key found. "
-                        "Run 'kimi /login' to authenticate, then try again."
-                    )
-            elif use_antigravity:
-                pass  # AntigravityProvider handles credentials internally
-
-            if api_key and use_claude_code:
-                # Use litellm's built-in Anthropic OAuth support.
-                # The lowercase "authorization" key triggers OAuth detection which
-                # adds the required anthropic-beta and browser-access headers.
-                self._llm = LiteLLMProvider(
-                    model=self.model,
-                    api_key=api_key,
-                    api_base=api_base,
-                    extra_headers={"authorization": f"Bearer {api_key}"},
-                )
-            elif api_key and use_codex:
-                # OpenAI Codex subscription routes through the ChatGPT backend
-                # (chatgpt.com/backend-api/codex/responses), NOT the standard
-                # OpenAI API.  The consumer OAuth token lacks platform API scopes.
-                extra_headers: dict[str, str] = {
-                    "Authorization": f"Bearer {api_key}",
-                    "User-Agent": "CodexBar",
-                }
-                account_id = get_codex_account_id()
-                if account_id:
-                    extra_headers["ChatGPT-Account-Id"] = account_id
-                self._llm = LiteLLMProvider(
-                    model=self.model,
-                    api_key=api_key,
-                    api_base="https://chatgpt.com/backend-api/codex",
-                    extra_headers=extra_headers,
-                    store=False,
-                    allowed_openai_params=["store"],
-                )
-            elif api_key and use_kimi_code:
-                # Kimi Code subscription uses the Kimi coding API (OpenAI-compatible).
-                # The api_base is set automatically by LiteLLMProvider for kimi/ models.
-                self._llm = LiteLLMProvider(
-                    model=self.model,
-                    api_key=api_key,
-                    api_base=api_base,
-                )
-            elif use_antigravity:
-                # Direct OAuth to Google's internal Cloud Code Assist gateway.
-                # No local proxy required — AntigravityProvider handles token
-                # refresh and Gemini-format request/response conversion natively.
-                from framework.llm.antigravity import AntigravityProvider  # noqa: PLC0415
-
-                provider = AntigravityProvider(model=self.model)
-                if not provider.has_credentials():
-                    print(
-                        "Warning: Antigravity credentials not found. "
-                        "Run: uv run python core/antigravity_auth.py auth account add"
-                    )
-                self._llm = provider
-            else:
-                # Local models (e.g. Ollama) don't need an API key
-                if self._is_local_model(self.model):
-                    self._llm = LiteLLMProvider(
-                        model=self.model,
-                        api_base=api_base,
-                    )
-                else:
-                    # Fall back to environment variable
-                    # First check api_key_env_var from config (set by quickstart)
-                    api_key_env = llm_config.get("api_key_env_var") or self._get_api_key_env_var(
-                        self.model
-                    )
-                    if api_key_env and os.environ.get(api_key_env):
-                        self._llm = LiteLLMProvider(
-                            model=self.model,
-                            api_key=os.environ[api_key_env],
-                            api_base=api_base,
-                        )
-                    else:
-                        # Fall back to credential store
-                        api_key = self._get_api_key_from_credential_store()
-                        if api_key:
-                            self._llm = LiteLLMProvider(
-                                model=self.model, api_key=api_key, api_base=api_base
-                            )
-                            # Set env var so downstream code (e.g. cleanup LLM in
-                            # node._extract_json) can also find it
-                            if api_key_env:
-                                os.environ[api_key_env] = api_key
-                        elif api_key_env:
-                            logger.warning(
-                                "%s not set. LLM calls will fail. "
-                                "Set it with: export %s=your-api-key",
-                                api_key_env,
-                                api_key_env,
-                            )
-
-            # Fail fast if the agent needs an LLM but none was configured
-            if self._llm is None:
-                has_llm_nodes = any(
-                    node.node_type in ("event_loop", "gcu") for node in self.graph.nodes
-                )
-                if has_llm_nodes:
-                    from framework.credentials.models import CredentialError
-
-                    if self._is_local_model(self.model):
-                        raise CredentialError(
-                            f"Failed to initialize LLM for local model '{self.model}'. "
-                            f"Ensure your local LLM server is running "
-                            f"(e.g. 'ollama serve' for Ollama)."
-                        )
-                    api_key_env = self._get_api_key_env_var(self.model)
-                    hint = (
-                        f"Set it with: export {api_key_env}=your-api-key"
-                        if api_key_env
-                        else "Configure an API key for your LLM provider."
-                    )
-                    raise CredentialError(f"LLM API key not found for model '{self.model}'. {hint}")
-
-        # For GCU nodes: auto-register GCU MCP server if needed, then expand tool lists
-        has_gcu_nodes = any(node.node_type == "gcu" for node in self.graph.nodes)
-        if has_gcu_nodes:
-            from framework.graph.gcu import GCU_MCP_SERVER_CONFIG, GCU_SERVER_NAME
-
-            # Auto-register GCU MCP server if tools aren't loaded yet
-            gcu_tool_names = self._tool_registry.get_server_tool_names(GCU_SERVER_NAME)
-            if not gcu_tool_names:
-                # Resolve cwd to repo-level tools/ (not relative to agent_path)
-                gcu_config = dict(GCU_MCP_SERVER_CONFIG)
-                _repo_root = Path(__file__).resolve().parent.parent.parent.parent
-                gcu_config["cwd"] = str(_repo_root / "tools")
-                self._tool_registry.register_mcp_server(gcu_config)
-                gcu_tool_names = self._tool_registry.get_server_tool_names(GCU_SERVER_NAME)
-
-            # Expand each GCU node's tools list to include all GCU server tools
-            if gcu_tool_names:
-                for node in self.graph.nodes:
-                    if node.node_type == "gcu":
-                        existing = set(node.tools)
-                        for tool_name in sorted(gcu_tool_names):
-                            if tool_name not in existing:
-                                node.tools.append(tool_name)
-
-        # For event_loop/gcu nodes: auto-register file tools MCP server, then expand tool lists
-        has_loop_nodes = any(node.node_type in ("event_loop", "gcu") for node in self.graph.nodes)
-        if has_loop_nodes:
-            from framework.graph.files import FILES_MCP_SERVER_CONFIG, FILES_MCP_SERVER_NAME
-
-            files_tool_names = self._tool_registry.get_server_tool_names(FILES_MCP_SERVER_NAME)
-            if not files_tool_names:
-                # Resolve cwd to repo-level tools/ (not relative to agent_path)
-                files_config = dict(FILES_MCP_SERVER_CONFIG)
-                _repo_root = Path(__file__).resolve().parent.parent.parent.parent
-                files_config["cwd"] = str(_repo_root / "tools")
-                self._tool_registry.register_mcp_server(files_config)
-                files_tool_names = self._tool_registry.get_server_tool_names(FILES_MCP_SERVER_NAME)
-
-            if files_tool_names:
-                for node in self.graph.nodes:
-                    if node.node_type in ("event_loop", "gcu"):
-                        existing = set(node.tools)
-                        for tool_name in sorted(files_tool_names):
-                            if tool_name not in existing:
-                                node.tools.append(tool_name)
-
-        # Get tools for runtime
-        tools = list(self._tool_registry.get_tools().values())
-        tool_executor = self._tool_registry.get_executor()
-
-        # Collect connected account info for system prompt injection
-        accounts_prompt = ""
-        accounts_data: list[dict] | None = None
-        tool_provider_map: dict[str, str] | None = None
-        try:
-            from aden_tools.credentials.store_adapter import CredentialStoreAdapter
-
-            if self._credential_store is not None:
-                adapter = CredentialStoreAdapter(store=self._credential_store)
-            else:
-                adapter = CredentialStoreAdapter.default()
-            accounts_data = adapter.get_all_account_info()
-            tool_provider_map = adapter.get_tool_provider_map()
-            if accounts_data:
-                from framework.graph.prompting import build_accounts_prompt
-
-                accounts_prompt = build_accounts_prompt(accounts_data, tool_provider_map)
-        except Exception:
-            pass  # Best-effort — agent works without account info
-
-        # Skill configuration — the runtime handles discovery, loading, trust-gating and
-        # prompt rasterization.  The runner just builds the config.
-        from framework.skills.config import SkillsConfig
-        from framework.skills.manager import SkillsManagerConfig
-
-        skills_manager_config = SkillsManagerConfig(
-            skills_config=SkillsConfig.from_agent_vars(
-                default_skills=getattr(self, "_agent_default_skills", None),
-                skills=getattr(self, "_agent_skills", None),
+        # Build default pipeline stages
+        # Default infrastructure stages (always present)
+        pipeline_stages = [
+            LlmProviderStage(
+                model=self.model,
+                mock_mode=self.mock_mode,
+                llm=self._llm,
             ),
-            project_root=self.agent_path,
-            interactive=self._interactive,
-        )
+            CredentialResolverStage(
+                credential_store=self._credential_store,
+            ),
+            McpRegistryStage(
+                server_refs=mcp_refs,
+                agent_path=self.agent_path,
+                tool_registry=self._tool_registry,
+            ),
+            SkillRegistryStage(
+                project_root=self.agent_path,
+                interactive=self._interactive,
+                skills_config=SkillsConfig.from_agent_vars(
+                    default_skills=getattr(self, "_agent_default_skills", None),
+                    skills=getattr(self, "_agent_skills", None),
+                ),
+            ),
+        ]
 
-        self._setup_agent_runtime(
-            tools,
-            tool_executor,
-            accounts_prompt=accounts_prompt,
-            accounts_data=accounts_data,
-            tool_provider_map=tool_provider_map,
+        # Merge user-configured stages from ~/.hive/configuration.json
+        from framework.config import get_hive_config
+        from framework.pipeline.registry import build_pipeline_from_config
+
+        hive_config = get_hive_config()
+        user_stages_config = hive_config.get("pipeline", {}).get("stages", [])
+        if user_stages_config:
+            user_pipeline = build_pipeline_from_config(user_stages_config)
+            pipeline_stages.extend(user_pipeline.stages)
+
+        # Merge agent-level overrides from agent.json pipeline field
+        if agent_json.exists():
+            try:
+                agent_pipeline = (
+                    _json.loads(agent_json.read_text(encoding="utf-8"))
+                    .get("pipeline", {})
+                    .get("stages", [])
+                )
+                if agent_pipeline:
+                    agent_stages = build_pipeline_from_config(agent_pipeline)
+                    pipeline_stages.extend(agent_stages.stages)
+            except Exception:
+                pass
+
+        # Create AgentHost directly (no wrapper)
+        from framework.host.execution_manager import EntryPointSpec
+        from framework.orchestrator.checkpoint_config import CheckpointConfig
+        from framework.tracker.runtime_log_store import RuntimeLogStore
+
+        self._agent_runtime = AgentHost(
+            graph=self.graph,
+            goal=self.goal,
+            storage_path=self._storage_path,
+            runtime_log_store=RuntimeLogStore(
+                base_path=self._storage_path / "runtime_logs",
+            ),
+            checkpoint_config=CheckpointConfig(
+                enabled=True,
+                checkpoint_on_node_complete=True,
+                checkpoint_max_age_days=7,
+                async_checkpoint=True,
+            ),
+            graph_id=self.graph.id or self.agent_path.name,
             event_bus=event_bus,
-            skills_manager_config=skills_manager_config,
+            pipeline_stages=pipeline_stages,
         )
+        self._agent_runtime.register_entry_point(
+            EntryPointSpec(
+                id="default",
+                name="Default",
+                entry_node=self.graph.entry_node,
+                trigger_type="manual",
+                isolation_level="shared",
+            ),
+        )
+        self._agent_runtime.intro_message = self.intro_message
 
     def _get_api_key_env_var(self, model: str) -> str | None:
         """Get the environment variable name for the API key based on model name."""
@@ -1833,83 +1785,6 @@ class AgentRunner:
         )
         return model.lower().startswith(LOCAL_PREFIXES)
 
-    def _setup_agent_runtime(
-        self,
-        tools: list,
-        tool_executor: Callable | None,
-        accounts_prompt: str = "",
-        accounts_data: list[dict] | None = None,
-        tool_provider_map: dict[str, str] | None = None,
-        event_bus=None,
-        skills_catalog_prompt: str = "",
-        protocols_prompt: str = "",
-        skill_dirs: list[str] | None = None,
-        skills_manager_config=None,
-    ) -> None:
-        """Set up multi-entry-point execution using AgentRuntime."""
-        entry_points = []
-
-        # Always create a primary entry point for the graph's entry node.
-        # For multi-entry-point agents this ensures the primary path (e.g.
-        # user-facing rule setup) is reachable alongside async entry points.
-        if self.graph.entry_node:
-            entry_points.insert(
-                0,
-                EntryPointSpec(
-                    id="default",
-                    name="Default",
-                    entry_node=self.graph.entry_node,
-                    trigger_type="manual",
-                    isolation_level="shared",
-                ),
-            )
-
-        # Create AgentRuntime with all entry points
-        log_store = RuntimeLogStore(base_path=self._storage_path / "runtime_logs")
-
-        # Enable checkpointing by default for resumable sessions
-        from framework.graph.checkpoint_config import CheckpointConfig
-
-        checkpoint_config = CheckpointConfig(
-            enabled=True,
-            checkpoint_on_node_start=False,  # Only checkpoint after nodes complete
-            checkpoint_on_node_complete=True,
-            checkpoint_max_age_days=7,
-            async_checkpoint=True,  # Non-blocking
-        )
-
-        # Handle runtime_config - only pass through if it's actually an AgentRuntimeConfig.
-        # Agents may export a RuntimeConfig (LLM settings) or queen-generated custom classes
-        # that would crash AgentRuntime if passed through.
-        runtime_config = None
-        if self.runtime_config is not None:
-            from framework.runtime.agent_runtime import AgentRuntimeConfig
-
-            if isinstance(self.runtime_config, AgentRuntimeConfig):
-                runtime_config = self.runtime_config
-
-        self._agent_runtime = create_agent_runtime(
-            graph=self.graph,
-            goal=self.goal,
-            storage_path=self._storage_path,
-            entry_points=entry_points,
-            llm=self._llm,
-            tools=tools,
-            tool_executor=tool_executor,
-            runtime_log_store=log_store,
-            checkpoint_config=checkpoint_config,
-            config=runtime_config,
-            graph_id=self.graph.id or self.agent_path.name,
-            accounts_prompt=accounts_prompt,
-            accounts_data=accounts_data,
-            tool_provider_map=tool_provider_map,
-            event_bus=event_bus,
-            skills_manager_config=skills_manager_config,
-        )
-
-        # Pass intro_message through for TUI display
-        self._agent_runtime.intro_message = self.intro_message
-
     # ------------------------------------------------------------------
     # Execution modes
     #
@@ -1990,7 +1865,7 @@ class AgentRunner:
         sub_ids: list[str] = []
 
         if has_queen and sys.stdin.isatty():
-            from framework.runtime.event_bus import EventType
+            from framework.host.event_bus import EventType
 
             runtime = self._agent_runtime
 
@@ -2246,7 +2121,7 @@ class AgentRunner:
         except ImportError:
             # aden_tools not installed - fall back to direct check
             has_llm_nodes = any(
-                node.node_type in ("event_loop", "gcu") for node in self.graph.nodes
+                node.node_type == "event_loop" for node in self.graph.nodes
             )
             if has_llm_nodes:
                 api_key_env = self._get_api_key_env_var(self.model)
@@ -2283,7 +2158,7 @@ class AgentRunner:
         # Run synchronous cleanup
         self.cleanup()
 
-    async def __aenter__(self) -> "AgentRunner":
+    async def __aenter__(self) -> "AgentLoader":
         """Context manager entry."""
         self._setup()
         if self._agent_runtime is not None:
diff --git a/core/framework/runner/cli.py b/core/framework/loader/cli.py
similarity index 96%
rename from core/framework/runner/cli.py
rename to core/framework/loader/cli.py
index 3e94afb9..bd83fa8e 100644
--- a/core/framework/runner/cli.py
+++ b/core/framework/loader/cli.py
@@ -19,7 +19,7 @@ def register_commands(subparsers: argparse._SubParsersAction) -> None:
     run_parser.add_argument(
         "agent_path",
         type=str,
-        help="Path to agent folder (containing agent.json)",
+        help="Path to agent folder (containing agent.json or agent.py)",
     )
     run_parser.add_argument(
         "--input",
@@ -87,7 +87,7 @@ def register_commands(subparsers: argparse._SubParsersAction) -> None:
     info_parser.add_argument(
         "agent_path",
         type=str,
-        help="Path to agent folder (containing agent.json)",
+        help="Path to agent folder (containing agent.json or agent.py)",
     )
     info_parser.add_argument(
         "--json",
@@ -105,7 +105,7 @@ def register_commands(subparsers: argparse._SubParsersAction) -> None:
     validate_parser.add_argument(
         "agent_path",
         type=str,
-        help="Path to agent folder (containing agent.json)",
+        help="Path to agent folder (containing agent.json or agent.py)",
     )
     validate_parser.set_defaults(func=cmd_validate)
 
@@ -310,7 +310,7 @@ def _prompt_before_start(agent_path: str, runner, model: str | None = None):
         Updated runner if user proceeds, None if user aborts.
     """
     from framework.credentials.setup import CredentialSetupSession
-    from framework.runner import AgentRunner
+    from framework.loader import AgentLoader
 
     while True:
         print()
@@ -328,7 +328,7 @@ def _prompt_before_start(agent_path: str, runner, model: str | None = None):
             if result.success:
                 # Reload runner with updated credentials
                 try:
-                    runner = AgentRunner.load(agent_path, model=model)
+                    runner = AgentLoader.load(agent_path, model=model)
                 except Exception as e:
                     print(f"Error reloading agent: {e}")
                     return None
@@ -342,7 +342,7 @@ def cmd_run(args: argparse.Namespace) -> int:
 
     from framework.credentials.models import CredentialError
     from framework.observability import configure_logging
-    from framework.runner import AgentRunner
+    from framework.loader import AgentLoader
 
     # Set logging level (quiet by default for cleaner output)
     if args.quiet:
@@ -390,7 +390,7 @@ def cmd_run(args: argparse.Namespace) -> int:
     # Standard execution
     # AgentRunner handles credential setup interactively when stdin is a TTY.
     try:
-        runner = AgentRunner.load(
+        runner = AgentLoader.load(
             args.agent_path,
             model=args.model,
         )
@@ -528,10 +528,10 @@ def cmd_run(args: argparse.Namespace) -> int:
 def cmd_info(args: argparse.Namespace) -> int:
     """Show agent information."""
     from framework.credentials.models import CredentialError
-    from framework.runner import AgentRunner
+    from framework.loader import AgentLoader
 
     try:
-        runner = AgentRunner.load(args.agent_path)
+        runner = AgentLoader.load(args.agent_path)
     except CredentialError as e:
         print(f"\n{e}", file=sys.stderr)
         return 1
@@ -595,10 +595,10 @@ def cmd_info(args: argparse.Namespace) -> int:
 def cmd_validate(args: argparse.Namespace) -> int:
     """Validate an exported agent."""
     from framework.credentials.models import CredentialError
-    from framework.runner import AgentRunner
+    from framework.loader import AgentLoader
 
     try:
-        runner = AgentRunner.load(args.agent_path)
+        runner = AgentLoader.load(args.agent_path)
     except CredentialError as e:
         print(f"\n{e}", file=sys.stderr)
         return 1
@@ -632,7 +632,7 @@ def cmd_validate(args: argparse.Namespace) -> int:
 
 def cmd_list(args: argparse.Namespace) -> int:
     """List available agents."""
-    from framework.runner import AgentRunner
+    from framework.loader import AgentLoader
 
     directory = Path(args.directory)
     if not directory.exists():
@@ -644,7 +644,7 @@ def cmd_list(args: argparse.Namespace) -> int:
     for path in directory.iterdir():
         if _is_valid_agent_dir(path):
             try:
-                runner = AgentRunner.load(path)
+                runner = AgentLoader.load(path)
                 info = runner.info()
                 agents.append(
                     {
@@ -686,7 +686,7 @@ def cmd_list(args: argparse.Namespace) -> int:
 
 def _interactive_approval(request):
     """Interactive approval callback for HITL mode."""
-    from framework.graph import ApprovalDecision, ApprovalResult
+    from framework.orchestrator import ApprovalDecision, ApprovalResult
 
     print()
     print("=" * 60)
@@ -775,7 +775,7 @@ def cmd_shell(args: argparse.Namespace) -> int:
 
     from framework.credentials.models import CredentialError
     from framework.observability import configure_logging
-    from framework.runner import AgentRunner
+    from framework.loader import AgentLoader
 
     configure_logging(level="INFO")
 
@@ -789,7 +789,7 @@ def cmd_shell(args: argparse.Namespace) -> int:
             return 1
 
     try:
-        runner = AgentRunner.load(agent_path)
+        runner = AgentLoader.load(agent_path)
     except CredentialError as e:
         print(f"\n{e}", file=sys.stderr)
         return 1
@@ -1004,17 +1004,35 @@ def _get_framework_agents_dir() -> Path:
 
 
 def _extract_python_agent_metadata(agent_path: Path) -> tuple[str, str]:
-    """Extract name and description from a Python-based agent's config.py.
+    """Extract name and description from an agent directory.
 
-    Uses AST parsing to safely extract values without executing code.
+    Checks agent.json first (declarative), then falls back to config.py
+    (legacy Python). Uses AST parsing for Python to avoid executing code.
     Returns (name, description) tuple, with fallbacks if parsing fails.
     """
     import ast
 
-    config_path = agent_path / "config.py"
     fallback_name = agent_path.name.replace("_", " ").title()
     fallback_desc = "(Python-based agent)"
 
+    # Declarative agent: read from agent.json
+    agent_json = agent_path / "agent.json"
+    if agent_json.exists():
+        try:
+            import json
+
+            data = json.loads(agent_json.read_text(encoding="utf-8"))
+            if isinstance(data, dict):
+                name = data.get("name", fallback_name)
+                # Convert kebab-case to Title Case for display
+                if "-" in name and " " not in name:
+                    name = name.replace("-", " ").title()
+                desc = data.get("description", fallback_desc)
+                return name, desc
+        except Exception:
+            pass
+
+    config_path = agent_path / "config.py"
     if not config_path.exists():
         return fallback_name, fallback_desc
 
@@ -1083,7 +1101,7 @@ def _is_valid_agent_dir(path: Path) -> bool:
 
 
 def _has_agents(directory: Path) -> bool:
-    """Check if a directory contains any valid agents (folders with agent.json or agent.py)."""
+    """Check if a directory contains any valid agents."""
     if not directory.exists():
         return False
     return any(_is_valid_agent_dir(p) for p in directory.iterdir())
diff --git a/core/framework/runner/mcp_client.py b/core/framework/loader/mcp_client.py
similarity index 99%
rename from core/framework/runner/mcp_client.py
rename to core/framework/loader/mcp_client.py
index df665571..d2e36273 100644
--- a/core/framework/runner/mcp_client.py
+++ b/core/framework/loader/mcp_client.py
@@ -14,7 +14,7 @@ from typing import Any, Literal
 
 import httpx
 
-from framework.runner.mcp_errors import MCPToolNotFoundError
+from framework.loader.mcp_errors import MCPToolNotFoundError
 
 logger = logging.getLogger(__name__)
 
diff --git a/core/framework/runner/mcp_connection_manager.py b/core/framework/loader/mcp_connection_manager.py
similarity index 99%
rename from core/framework/runner/mcp_connection_manager.py
rename to core/framework/loader/mcp_connection_manager.py
index 98bb9a24..f5118d94 100644
--- a/core/framework/runner/mcp_connection_manager.py
+++ b/core/framework/loader/mcp_connection_manager.py
@@ -5,7 +5,7 @@ import threading
 
 import httpx
 
-from framework.runner.mcp_client import MCPClient, MCPServerConfig
+from framework.loader.mcp_client import MCPClient, MCPServerConfig
 
 logger = logging.getLogger(__name__)
 
diff --git a/core/framework/runner/mcp_errors.py b/core/framework/loader/mcp_errors.py
similarity index 100%
rename from core/framework/runner/mcp_errors.py
rename to core/framework/loader/mcp_errors.py
diff --git a/core/framework/runner/mcp_registry.py b/core/framework/loader/mcp_registry.py
similarity index 99%
rename from core/framework/runner/mcp_registry.py
rename to core/framework/loader/mcp_registry.py
index 4de4bb93..adaaebaa 100644
--- a/core/framework/runner/mcp_registry.py
+++ b/core/framework/loader/mcp_registry.py
@@ -14,9 +14,9 @@ from typing import Any, Literal
 
 import httpx
 
-from framework.runner.mcp_client import MCPClient, MCPServerConfig
-from framework.runner.mcp_connection_manager import MCPConnectionManager
-from framework.runner.mcp_errors import (
+from framework.loader.mcp_client import MCPClient, MCPServerConfig
+from framework.loader.mcp_connection_manager import MCPConnectionManager
+from framework.loader.mcp_errors import (
     MCPError,
     MCPErrorCode,
     MCPInstallError,
diff --git a/core/framework/runner/mcp_registry_cli.py b/core/framework/loader/mcp_registry_cli.py
similarity index 99%
rename from core/framework/runner/mcp_registry_cli.py
rename to core/framework/loader/mcp_registry_cli.py
index b84b59dc..ccaa4861 100644
--- a/core/framework/runner/mcp_registry_cli.py
+++ b/core/framework/loader/mcp_registry_cli.py
@@ -28,7 +28,7 @@ from typing import Any
 
 def _get_registry(base_path: Path | None = None):
     """Initialize and return an MCPRegistry instance."""
-    from framework.runner.mcp_registry import MCPRegistry
+    from framework.loader.mcp_registry import MCPRegistry
 
     registry = MCPRegistry(base_path=base_path)
     registry.initialize()
diff --git a/core/framework/runner/preload_validation.py b/core/framework/loader/preload_validation.py
similarity index 98%
rename from core/framework/runner/preload_validation.py
rename to core/framework/loader/preload_validation.py
index c04ceabf..9e76e3d4 100644
--- a/core/framework/runner/preload_validation.py
+++ b/core/framework/loader/preload_validation.py
@@ -11,8 +11,8 @@ from dataclasses import dataclass, field
 from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
-    from framework.graph.edge import GraphSpec
-    from framework.graph.node import NodeSpec
+    from framework.orchestrator.edge import GraphSpec
+    from framework.orchestrator.node import NodeSpec
 
 logger = logging.getLogger(__name__)
 
diff --git a/core/framework/runner/protocol.py b/core/framework/loader/protocol.py
similarity index 100%
rename from core/framework/runner/protocol.py
rename to core/framework/loader/protocol.py
diff --git a/core/framework/runner/tool_registry.py b/core/framework/loader/tool_registry.py
similarity index 97%
rename from core/framework/runner/tool_registry.py
rename to core/framework/loader/tool_registry.py
index 5ea8154f..4c862e44 100644
--- a/core/framework/runner/tool_registry.py
+++ b/core/framework/loader/tool_registry.py
@@ -262,15 +262,21 @@ class ToolRegistry:
                 is_error=False,
             )
 
+        registry_ref = self
+
         def executor(tool_use: ToolUse) -> ToolResult:
-            if tool_use.name not in self._tools:
+            # Check if credential files changed (lightweight dir listing).
+            # If new OAuth tokens appeared, restarts MCP servers to pick them up.
+            registry_ref.resync_mcp_servers_if_needed()
+
+            if tool_use.name not in registry_ref._tools:
                 return ToolResult(
                     tool_use_id=tool_use.id,
                     content=json.dumps({"error": f"Unknown tool: {tool_use.name}"}),
                     is_error=True,
                 )
 
-            registered = self._tools[tool_use.name]
+            registered = registry_ref._tools[tool_use.name]
             try:
                 result = registered.executor(tool_use.input)
 
@@ -635,8 +641,8 @@ class ToolRegistry:
             Number of tools registered from this server
         """
         try:
-            from framework.runner.mcp_client import MCPClient, MCPServerConfig
-            from framework.runner.mcp_connection_manager import MCPConnectionManager
+            from framework.loader.mcp_client import MCPClient, MCPServerConfig
+            from framework.loader.mcp_connection_manager import MCPConnectionManager
 
             # Build config object
             config = MCPServerConfig(
@@ -883,7 +889,7 @@ class ToolRegistry:
         """Re-run ``mcp_registry.json`` resolution and register servers (post-resync)."""
         if self._mcp_registry_agent_path is None:
             return
-        from framework.runner.mcp_registry import MCPRegistry
+        from framework.loader.mcp_registry import MCPRegistry
 
         try:
             reg = MCPRegistry()
@@ -922,6 +928,11 @@ class ToolRegistry:
         clients and re-loads them so the new subprocess picks up the fresh
         credentials.
 
+        Note: Individual credential TTL/refresh is handled by the MCP server
+        process internally -- it resolves tokens from the credential store
+        on every tool call, not at startup. This method only handles the case
+        where entirely new credential files appear.
+
         Returns True if a resync was performed, False otherwise.
         """
         if not self._mcp_clients or self._mcp_config_path is None:
@@ -975,7 +986,7 @@ class ToolRegistry:
             server_name = self._mcp_client_servers.get(client_id, client.config.name)
             try:
                 if client_id in self._mcp_managed_clients:
-                    from framework.runner.mcp_connection_manager import MCPConnectionManager
+                    from framework.loader.mcp_connection_manager import MCPConnectionManager
 
                     MCPConnectionManager.get_instance().release(server_name)
                 else:
diff --git a/core/framework/orchestrator/__init__.py b/core/framework/orchestrator/__init__.py
new file mode 100644
index 00000000..6ffc277a
--- /dev/null
+++ b/core/framework/orchestrator/__init__.py
@@ -0,0 +1,27 @@
+"""Orchestrator layer -- how agents are composed via graphs.
+
+Lazy imports to avoid circular dependencies with graph/event_loop/*.
+"""
+
+
+def __getattr__(name: str):
+    if name in ("GraphContext",):
+        from framework.orchestrator.context import GraphContext
+        return GraphContext
+    if name in ("DEFAULT_MAX_TOKENS", "EdgeCondition", "EdgeSpec", "GraphSpec"):
+        from framework.orchestrator import edge as _e
+        return getattr(_e, name)
+    if name in ("Orchestrator", "ExecutionResult"):
+        from framework.orchestrator import orchestrator as _o
+        return getattr(_o, name)
+    if name in ("Constraint", "Goal", "GoalStatus", "SuccessCriterion"):
+        from framework.orchestrator import goal as _g
+        return getattr(_g, name)
+    if name in ("DataBuffer", "NodeContext", "NodeProtocol", "NodeResult", "NodeSpec"):
+        from framework.orchestrator import node as _n
+        return getattr(_n, name)
+    if name in ("NodeWorker", "Activation", "FanOutTag", "FanOutTracker",
+                "WorkerCompletion", "WorkerLifecycle"):
+        from framework.orchestrator import node_worker as _nw
+        return getattr(_nw, name)
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
diff --git a/core/framework/graph/checkpoint_config.py b/core/framework/orchestrator/checkpoint_config.py
similarity index 100%
rename from core/framework/graph/checkpoint_config.py
rename to core/framework/orchestrator/checkpoint_config.py
diff --git a/core/framework/graph/client_io.py b/core/framework/orchestrator/client_io.py
similarity index 99%
rename from core/framework/graph/client_io.py
rename to core/framework/orchestrator/client_io.py
index 992b5818..1fbc66c9 100644
--- a/core/framework/graph/client_io.py
+++ b/core/framework/orchestrator/client_io.py
@@ -16,7 +16,7 @@ from collections.abc import AsyncIterator
 from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
-    from framework.runtime.event_bus import EventBus
+    from framework.host.event_bus import EventBus
 
 logger = logging.getLogger(__name__)
 
diff --git a/core/framework/graph/context.py b/core/framework/orchestrator/context.py
similarity index 91%
rename from core/framework/graph/context.py
rename to core/framework/orchestrator/context.py
index 5b4fba4d..381c0474 100644
--- a/core/framework/graph/context.py
+++ b/core/framework/orchestrator/context.py
@@ -13,10 +13,10 @@ import asyncio
 from dataclasses import dataclass, field
 from typing import Any
 
-from framework.graph.edge import GraphSpec
-from framework.graph.goal import Goal
-from framework.graph.node import DataBuffer, NodeContext, NodeProtocol, NodeSpec
-from framework.runtime.core import Runtime
+from framework.orchestrator.edge import GraphSpec
+from framework.orchestrator.goal import Goal
+from framework.orchestrator.node import DataBuffer, NodeContext, NodeProtocol, NodeSpec
+from framework.tracker.decision_tracker import DecisionTracker
 
 
 @dataclass
@@ -26,7 +26,7 @@ class GraphContext:
     graph: GraphSpec
     goal: Goal
     buffer: DataBuffer
-    runtime: Runtime
+    runtime: DecisionTracker
     llm: Any  # LLMProvider
     tools: list[Any]  # list[Tool]
     tool_executor: Any  # Callable
@@ -106,7 +106,7 @@ def build_node_accounts_prompt(
 
     resolved = accounts_prompt
     if accounts_data and tool_provider_map:
-        from framework.graph.prompting import build_accounts_prompt
+        from framework.orchestrator.prompting import build_accounts_prompt
 
         filtered = build_accounts_prompt(
             accounts_data,
@@ -125,11 +125,27 @@ def _resolve_available_tools(
     tools: list[Any],
     override_tools: list[Any] | None,
 ) -> list[Any]:
-    """Select tools available to the current node."""
+    """Select tools available to the current node.
+
+    Respects ``node_spec.tool_access_policy``:
+    - ``"all"``      -- all tools from the registry (no filtering).
+    - ``"explicit"``  -- only tools whose name appears in ``node_spec.tools``.
+                        If the list is empty, **no tools** are given (default-deny).
+    - ``"none"``     -- no tools at all.
+    """
 
     if override_tools is not None:
         return list(override_tools)
 
+    policy = getattr(node_spec, "tool_access_policy", "explicit")
+
+    if policy == "none":
+        return []
+
+    if policy == "all":
+        return list(tools)
+
+    # "explicit" (default): only tools named in node_spec.tools.
     if not node_spec.tools:
         return []
 
@@ -149,7 +165,7 @@ def _derive_input_data(buffer: DataBuffer, input_keys: list[str]) -> dict[str, A
 
 def build_node_context(
     *,
-    runtime: Runtime,
+    runtime: DecisionTracker,
     node_spec: NodeSpec,
     buffer: DataBuffer,
     goal: Goal,
@@ -234,9 +250,6 @@ def build_node_context(
         execution_id=execution_id,
         run_id=run_id,
         stream_id=stream_id,
-        node_registry=node_registry or {},
-        all_tools=list(all_tools or tools),
-        shared_node_registry=shared_node_registry or {},
         dynamic_tools_provider=dynamic_tools_provider,
         dynamic_prompt_provider=dynamic_prompt_provider,
         dynamic_memory_provider=dynamic_memory_provider,
@@ -308,9 +321,6 @@ def build_node_context_from_graph_context(
         execution_id=gc.execution_id,
         run_id=gc.run_id,
         stream_id=gc.stream_id,
-        node_registry=node_registry or gc.node_spec_registry,
-        all_tools=gc.tools,
-        shared_node_registry=gc.node_registry,
         dynamic_tools_provider=gc.dynamic_tools_provider,
         dynamic_prompt_provider=gc.dynamic_prompt_provider,
         dynamic_memory_provider=gc.dynamic_memory_provider,
diff --git a/core/framework/graph/context_handoff.py b/core/framework/orchestrator/context_handoff.py
similarity index 98%
rename from core/framework/graph/context_handoff.py
rename to core/framework/orchestrator/context_handoff.py
index 69831506..0d9a7e54 100644
--- a/core/framework/graph/context_handoff.py
+++ b/core/framework/orchestrator/context_handoff.py
@@ -6,10 +6,10 @@ import logging
 from dataclasses import dataclass
 from typing import TYPE_CHECKING, Any
 
-from framework.graph.conversation import _try_extract_key
+from framework.agent_loop.conversation import _try_extract_key
 
 if TYPE_CHECKING:
-    from framework.graph.conversation import NodeConversation
+    from framework.agent_loop.conversation import NodeConversation
     from framework.llm.provider import LLMProvider
 
 logger = logging.getLogger(__name__)
diff --git a/core/framework/graph/conversation_judge.py b/core/framework/orchestrator/conversation_judge.py
similarity index 99%
rename from core/framework/graph/conversation_judge.py
rename to core/framework/orchestrator/conversation_judge.py
index 298776b4..e5a57a06 100644
--- a/core/framework/graph/conversation_judge.py
+++ b/core/framework/orchestrator/conversation_judge.py
@@ -15,7 +15,7 @@ import logging
 from dataclasses import dataclass
 from typing import Any
 
-from framework.graph.conversation import NodeConversation
+from framework.agent_loop.conversation import NodeConversation
 from framework.llm.provider import LLMProvider
 
 logger = logging.getLogger(__name__)
diff --git a/core/framework/graph/edge.py b/core/framework/orchestrator/edge.py
similarity index 89%
rename from core/framework/graph/edge.py
rename to core/framework/orchestrator/edge.py
index 284f66f8..a617edb9 100644
--- a/core/framework/graph/edge.py
+++ b/core/framework/orchestrator/edge.py
@@ -29,7 +29,7 @@ from typing import Any
 
 from pydantic import BaseModel, Field, model_validator
 
-from framework.graph.safe_eval import safe_eval
+from framework.orchestrator.safe_eval import safe_eval
 
 logger = logging.getLogger(__name__)
 
@@ -538,13 +538,6 @@ class GraphSpec(BaseModel):
             for edge in self.get_outgoing_edges(current):
                 to_visit.append(edge.target)
 
-        # Also mark sub-agents as reachable (they're invoked via delegate_to_sub_agent, not edges)
-        for node in self.nodes:
-            if node.id in reachable:
-                sub_agents = getattr(node, "sub_agents", []) or []
-                for sub_agent_id in sub_agents:
-                    reachable.add(sub_agent_id)
-
         for node in self.nodes:
             if node.id not in reachable:
                 # Skip if node is a pause node or entry point target
@@ -583,48 +576,4 @@ class GraphSpec(BaseModel):
                         else:
                             seen_keys[key] = node_id
 
-        # GCU nodes must only be used as subagents
-        gcu_node_ids = {n.id for n in self.nodes if n.node_type == "gcu"}
-        if gcu_node_ids:
-            # GCU nodes must not be entry nodes
-            if self.entry_node in gcu_node_ids:
-                errors.append(
-                    f"GCU node '{self.entry_node}' is used as entry node. "
-                    "GCU nodes must only be used as subagents via delegate_to_sub_agent()."
-                )
-
-            # GCU nodes must not be terminal nodes
-            for term in self.terminal_nodes:
-                if term in gcu_node_ids:
-                    errors.append(
-                        f"GCU node '{term}' is used as terminal node. "
-                        "GCU nodes must only be used as subagents."
-                    )
-
-            # GCU nodes must not be connected via edges
-            for edge in self.edges:
-                if edge.source in gcu_node_ids:
-                    errors.append(
-                        f"GCU node '{edge.source}' is used as edge source (edge '{edge.id}'). "
-                        "GCU nodes must only be used as subagents, not connected via edges."
-                    )
-                if edge.target in gcu_node_ids:
-                    errors.append(
-                        f"GCU node '{edge.target}' is used as edge target (edge '{edge.id}'). "
-                        "GCU nodes must only be used as subagents, not connected via edges."
-                    )
-
-            # GCU nodes must be referenced in at least one parent's sub_agents
-            referenced_subagents = set()
-            for node in self.nodes:
-                for sa_id in node.sub_agents or []:
-                    referenced_subagents.add(sa_id)
-
-            orphaned = gcu_node_ids - referenced_subagents
-            for nid in orphaned:
-                errors.append(
-                    f"GCU node '{nid}' is not referenced in any node's sub_agents list. "
-                    "GCU nodes must be declared as subagents of a parent node."
-                )
-
         return {"errors": errors, "warnings": warnings}
diff --git a/core/framework/graph/files.py b/core/framework/orchestrator/files.py
similarity index 100%
rename from core/framework/graph/files.py
rename to core/framework/orchestrator/files.py
diff --git a/core/framework/graph/gcu.py b/core/framework/orchestrator/gcu.py
similarity index 86%
rename from core/framework/graph/gcu.py
rename to core/framework/orchestrator/gcu.py
index c336faf4..a68d2d11 100644
--- a/core/framework/graph/gcu.py
+++ b/core/framework/orchestrator/gcu.py
@@ -1,34 +1,14 @@
-"""GCU (browser automation) node type constants.
+"""Browser automation best-practices prompt.
 
-A ``gcu`` node is an ``event_loop`` node with two automatic enhancements:
-1. A canonical browser best-practices system prompt is prepended.
-2. All tools from the GCU MCP server are auto-included.
+This module provides ``GCU_BROWSER_SYSTEM_PROMPT`` -- a canonical set of
+browser automation guidelines that can be included in any node's system
+prompt that uses browser tools from the gcu-tools MCP server.
 
-No new ``NodeProtocol`` subclass — the ``gcu`` type is purely a declarative
-signal processed by the runner and executor at setup time.
+Browser tools are registered via the global MCP registry (gcu-tools).
+Nodes that need browser access declare ``tools: {policy: "all"}`` in their
+agent.json config.
 """
 
-# ---------------------------------------------------------------------------
-# MCP server identity
-# ---------------------------------------------------------------------------
-
-GCU_SERVER_NAME = "gcu-tools"
-"""Name used to identify the GCU MCP server in ``mcp_servers.json``."""
-
-GCU_MCP_SERVER_CONFIG: dict = {
-    "name": GCU_SERVER_NAME,
-    "transport": "stdio",
-    "command": "uv",
-    "args": ["run", "python", "-m", "gcu.server", "--stdio"],
-    "cwd": "../../tools",
-    "description": "GCU tools for browser automation",
-}
-"""Default stdio config for the GCU MCP server (relative to exports/<agent>/)."""
-
-# ---------------------------------------------------------------------------
-# Browser best-practices system prompt
-# ---------------------------------------------------------------------------
-
 GCU_BROWSER_SYSTEM_PROMPT = """\
 # Browser Automation Best Practices
 
diff --git a/core/framework/graph/goal.py b/core/framework/orchestrator/goal.py
similarity index 100%
rename from core/framework/graph/goal.py
rename to core/framework/orchestrator/goal.py
diff --git a/core/framework/graph/node.py b/core/framework/orchestrator/node.py
similarity index 95%
rename from core/framework/graph/node.py
rename to core/framework/orchestrator/node.py
index a430f8d2..6c474817 100644
--- a/core/framework/graph/node.py
+++ b/core/framework/orchestrator/node.py
@@ -25,7 +25,7 @@ from typing import Any
 from pydantic import BaseModel, Field
 
 from framework.llm.provider import LLMProvider, Tool
-from framework.runtime.core import Runtime
+from framework.tracker.decision_tracker import DecisionTracker
 
 logger = logging.getLogger(__name__)
 
@@ -144,15 +144,19 @@ class NodeSpec(BaseModel):
     # For LLM nodes
     system_prompt: str | None = Field(default=None, description="System prompt for LLM nodes")
     tools: list[str] = Field(default_factory=list, description="Tool names this node can use")
+    tool_access_policy: str = Field(
+        default="explicit",
+        description=(
+            "Tool access policy for this node. "
+            "'all' = all tools from registry, "
+            "'explicit' = only tools listed in `tools` (default, recommended), "
+            "'none' = no tools at all."
+        ),
+    )
     model: str | None = Field(
         default=None, description="Specific model to use (defaults to graph default)"
     )
 
-    # For subagent delegation
-    sub_agents: list[str] = Field(
-        default_factory=list,
-        description="Node IDs that can be invoked as subagents from this node",
-    )
     # For function nodes
     function: str | None = Field(
         default=None, description="Function name or path for function nodes"
@@ -459,7 +463,7 @@ class NodeContext:
     """
 
     # Core runtime
-    runtime: Runtime
+    runtime: DecisionTracker
 
     # Node identity
     node_id: str
@@ -526,20 +530,6 @@ class NodeContext:
     # Falls back to node_id when not set (legacy / standalone executor).
     stream_id: str = ""
 
-    # Subagent mode
-    is_subagent_mode: bool = False  # True when running as a subagent (prevents nested delegation)
-    report_callback: Any = None  # async (message: str, data: dict | None) -> None
-    node_registry: dict[str, "NodeSpec"] = field(default_factory=dict)  # For subagent lookup
-
-    # Full tool catalog (unfiltered) — used by _execute_subagent to resolve
-    # subagent tools that aren't in the parent node's filtered available_tools.
-    all_tools: list[Tool] = field(default_factory=list)
-
-    # Shared reference to the executor's node_registry — used by subagent
-    # escalation (_EscalationReceiver) to register temporary receivers that
-    # the inject_input() routing chain can find.
-    shared_node_registry: dict[str, Any] = field(default_factory=dict)
-
     # Dynamic tool provider — when set, EventLoopNode rebuilds the tool
     # list from this callback at the start of each iteration.  Used by
     # the queen to switch between building-mode and running-mode tools.
diff --git a/core/framework/graph/worker_agent.py b/core/framework/orchestrator/node_worker.py
similarity index 97%
rename from core/framework/graph/worker_agent.py
rename to core/framework/orchestrator/node_worker.py
index ce9d8b4e..436096f9 100644
--- a/core/framework/graph/worker_agent.py
+++ b/core/framework/orchestrator/node_worker.py
@@ -19,15 +19,15 @@ from dataclasses import dataclass, field
 from enum import StrEnum
 from typing import Any
 
-from framework.graph.context import GraphContext, build_node_context_from_graph_context
-from framework.graph.edge import EdgeCondition, EdgeSpec
-from framework.graph.node import (
+from framework.orchestrator.context import GraphContext, build_node_context_from_graph_context
+from framework.orchestrator.edge import EdgeCondition, EdgeSpec
+from framework.orchestrator.node import (
     NodeContext,
     NodeProtocol,
     NodeResult,
     NodeSpec,
 )
-from framework.graph.validator import OutputValidator
+from framework.orchestrator.validator import OutputValidator
 
 logger = logging.getLogger(__name__)
 
@@ -109,7 +109,7 @@ class RetryState:
 # ---------------------------------------------------------------------------
 
 
-class WorkerAgent:
+class NodeWorker:
     """First-class autonomous worker for one node in the graph.
 
     Lifecycle:
@@ -355,7 +355,7 @@ class WorkerAgent:
         # Only skip retries for actual EventLoopNode instances (they handle
         # retries internally).  Custom NodeProtocol impls registered via
         # register_node should be retried by the executor.
-        from framework.graph.event_loop_node import EventLoopNode as _ELN
+        from framework.agent_loop.agent_loop import AgentLoop as _ELN
 
         if isinstance(node_impl, _ELN):
             max_retries = 0
@@ -603,10 +603,10 @@ class WorkerAgent:
             return self._node_impl
 
         # Auto-create EventLoopNode
-        if self.node_spec.node_type in ("event_loop", "gcu"):
-            from framework.graph.event_loop.types import LoopConfig
-            from framework.graph.event_loop_node import EventLoopNode
-            from framework.graph.node import warn_if_deprecated_client_facing
+        if self.node_spec.node_type == "event_loop":
+            from framework.agent_loop.internals.types import LoopConfig
+            from framework.agent_loop.agent_loop import AgentLoop
+            from framework.orchestrator.node import warn_if_deprecated_client_facing
 
             conv_store = None
             if gc.storage_path:
@@ -619,7 +619,7 @@ class WorkerAgent:
             warn_if_deprecated_client_facing(self.node_spec)
             default_max_iter = 100 if self.node_spec.supports_direct_user_io() else 50
 
-            node = EventLoopNode(
+            node = AgentLoop(
                 event_bus=gc.event_bus,
                 judge=None,
                 config=LoopConfig(
@@ -734,7 +734,7 @@ class WorkerAgent:
         if not next_spec or next_spec.node_type != "event_loop":
             return
 
-        from framework.graph.prompting import (
+        from framework.orchestrator.prompting import (
             TransitionSpec,
             build_narrative,
             build_system_prompt_for_node_context,
diff --git a/core/framework/graph/executor.py b/core/framework/orchestrator/orchestrator.py
similarity index 97%
rename from core/framework/graph/executor.py
rename to core/framework/orchestrator/orchestrator.py
index c2015744..666b021a 100644
--- a/core/framework/graph/executor.py
+++ b/core/framework/orchestrator/orchestrator.py
@@ -16,21 +16,21 @@ from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any
 
-from framework.graph.checkpoint_config import CheckpointConfig
-from framework.graph.context import GraphContext, build_node_context
-from framework.graph.conversation import LEGACY_RUN_ID
-from framework.graph.edge import EdgeCondition, EdgeSpec, GraphSpec
-from framework.graph.goal import Goal
-from framework.graph.node import (
+from framework.orchestrator.checkpoint_config import CheckpointConfig
+from framework.orchestrator.context import GraphContext, build_node_context
+from framework.agent_loop.conversation import LEGACY_RUN_ID
+from framework.orchestrator.edge import EdgeCondition, EdgeSpec, GraphSpec
+from framework.orchestrator.goal import Goal
+from framework.orchestrator.node import (
     DataBuffer,
     NodeProtocol,
     NodeResult,
     NodeSpec,
 )
-from framework.graph.validator import OutputValidator
+from framework.orchestrator.validator import OutputValidator
 from framework.llm.provider import LLMProvider, Tool
 from framework.observability import set_trace_context
-from framework.runtime.core import Runtime
+from framework.tracker.decision_tracker import DecisionTracker
 from framework.schemas.checkpoint import Checkpoint
 from framework.storage.checkpoint_store import CheckpointStore
 from framework.utils.io import atomic_write
@@ -112,7 +112,7 @@ class ParallelExecutionConfig:
     branch_timeout_seconds: float = 300.0
 
 
-class GraphExecutor:
+class Orchestrator:
     """
     Executes agent graphs.
 
@@ -133,7 +133,7 @@ class GraphExecutor:
 
     def __init__(
         self,
-        runtime: Runtime,
+        runtime: DecisionTracker,
         llm: LLMProvider | None = None,
         tools: list[Tool] | None = None,
         tool_executor: Callable | None = None,
@@ -165,7 +165,7 @@ class GraphExecutor:
         Initialize the executor.
 
         Args:
-            runtime: Runtime for decision logging
+            runtime: DecisionTracker for decision logging
             llm: LLM provider for LLM nodes
             tools: Available tools
             tool_executor: Function to execute tools
@@ -202,7 +202,7 @@ class GraphExecutor:
         self.validator = OutputValidator()
         self.logger = logging.getLogger(__name__)
         self.logger.debug(
-            "[GraphExecutor.__init__] Created with"
+            "[Orchestrator.__init__] Created with"
             " stream_id=%s, execution_id=%s,"
             " initial node_registry keys: %s",
             stream_id,
@@ -361,8 +361,8 @@ class GraphExecutor:
 
         Uses the same recursive binary-search splitting as EventLoopNode.
         """
-        from framework.graph.conversation import extract_tool_call_history
-        from framework.graph.event_loop_node import _is_context_too_large_error
+        from framework.agent_loop.conversation import extract_tool_call_history
+        from framework.agent_loop.agent_loop import _is_context_too_large_error
 
         if _depth > self._PHASE_LLM_MAX_DEPTH:
             raise RuntimeError("Phase LLM compaction recursion limit")
@@ -690,7 +690,7 @@ class GraphExecutor:
         # and spillover files share the same session-scoped directory.
         _ctx_token = None
         if self._storage_path:
-            from framework.runner.tool_registry import ToolRegistry
+            from framework.loader.tool_registry import ToolRegistry
 
             _ctx_token = ToolRegistry.set_execution_context(
                 data_dir=str(self._storage_path / "data"),
@@ -712,13 +712,12 @@ class GraphExecutor:
 
         finally:
             if _ctx_token is not None:
-                from framework.runner.tool_registry import ToolRegistry
+                from framework.loader.tool_registry import ToolRegistry
 
                 ToolRegistry.reset_execution_context(_ctx_token)
 
     VALID_NODE_TYPES = {
         "event_loop",
-        "gcu",
     }
     # Node types removed in v0.5 — provide migration guidance
     REMOVED_NODE_TYPES = {
@@ -736,11 +735,11 @@ class GraphExecutor:
         # Check registry first
         if node_spec.id in self.node_registry:
             logger.debug(
-                "[GraphExecutor._get_node_implementation] Found node '%s' in registry", node_spec.id
+                "[Orchestrator._get_node_implementation] Found node '%s' in registry", node_spec.id
             )
             return self.node_registry[node_spec.id]
         logger.debug(
-            "[GraphExecutor._get_node_implementation]"
+            "[Orchestrator._get_node_implementation]"
             " Node '%s' not in registry (keys: %s),"
             " creating new",
             node_spec.id,
@@ -764,10 +763,10 @@ class GraphExecutor:
             )
 
         # Create based on type
-        if node_spec.node_type in ("event_loop", "gcu"):
+        if node_spec.node_type == "event_loop":
             # Auto-create EventLoopNode with sensible defaults.
             # Custom configs can still be pre-registered via node_registry.
-            from framework.graph.event_loop_node import EventLoopNode, LoopConfig
+            from framework.agent_loop.agent_loop import AgentLoop, LoopConfig
 
             # Create a FileConversationStore if a storage path is available
             conv_store = None
@@ -787,13 +786,13 @@ class GraphExecutor:
             if self._storage_path:
                 spillover = str(self._storage_path / "data")
 
-            from framework.graph.node import warn_if_deprecated_client_facing
+            from framework.orchestrator.node import warn_if_deprecated_client_facing
 
             warn_if_deprecated_client_facing(node_spec)
 
             lc = self._loop_config
             default_max_iter = 100 if node_spec.supports_direct_user_io() else 50
-            node = EventLoopNode(
+            node = AgentLoop(
                 event_bus=self._event_bus,
                 judge=None,  # implicit judge: accept when output_keys are filled
                 config=LoopConfig(
@@ -812,7 +811,7 @@ class GraphExecutor:
             # Cache so inject_event() is reachable for queen interaction and escalation routing
             self.node_registry[node_spec.id] = node
             logger.debug(
-                "[GraphExecutor._get_node_implementation]"
+                "[Orchestrator._get_node_implementation]"
                 " Cached node '%s' in node_registry,"
                 " registry now has keys: %s",
                 node_spec.id,
@@ -998,10 +997,10 @@ class GraphExecutor:
             branch_impl = self._get_node_implementation(node_spec, graph.cleanup_llm_model)
 
             effective_max_retries = node_spec.max_retries
-            # Only override for actual EventLoopNode instances, not custom NodeProtocol impls
-            from framework.graph.event_loop_node import EventLoopNode
+            # Only override for actual AgentLoop instances, not custom NodeProtocol impls
+            from framework.agent_loop.agent_loop import AgentLoop as _AgentLoop  # noqa: F811
 
-            if isinstance(branch_impl, EventLoopNode) and effective_max_retries > 1:
+            if isinstance(branch_impl, _AgentLoop) and effective_max_retries > 1:
                 self.logger.warning(
                     f"EventLoopNode '{node_spec.id}' has "
                     f"max_retries={effective_max_retries}. Overriding "
@@ -1042,9 +1041,6 @@ class GraphExecutor:
                         execution_id=self._execution_id,
                         run_id=self._run_id,
                         stream_id=self._stream_id,
-                        node_registry=node_registry,
-                        all_tools=self.tools,
-                        shared_node_registry=self.node_registry,
                         dynamic_tools_provider=self.dynamic_tools_provider,
                         dynamic_prompt_provider=self.dynamic_prompt_provider,
                         dynamic_memory_provider=self.dynamic_memory_provider,
@@ -1293,14 +1289,14 @@ class GraphExecutor:
         Replaces the imperative while-loop with autonomous workers that
         self-activate based on edge conditions and fan-out tracking.
         """
-        from framework.graph.worker_agent import (
+        from framework.orchestrator.node_worker import (
             Activation,
             FanOutTag,
-            WorkerAgent,
+            NodeWorker,
             WorkerCompletion,
             WorkerLifecycle,
         )
-        from framework.runtime.event_bus import AgentEvent, EventType
+        from framework.host.event_bus import AgentEvent, EventType
 
         # Build shared graph context
         gc = GraphContext(
@@ -1339,9 +1335,9 @@ class GraphExecutor:
         )
 
         # Create one WorkerAgent per node
-        workers: dict[str, WorkerAgent] = {}
+        workers: dict[str, NodeWorker] = {}
         for node_spec in graph.nodes:
-            workers[node_spec.id] = WorkerAgent(node_spec=node_spec, graph_context=gc)
+            workers[node_spec.id] = NodeWorker(node_spec=node_spec, graph_context=gc)
 
         # Identify entry workers (graph entry node, not based on edge count)
         # A node can be the entry point AND have incoming feedback edges.
@@ -1442,7 +1438,7 @@ class GraphExecutor:
 
         def _route_activation(
             activation: Activation,
-            workers_map: dict[str, WorkerAgent],
+            workers_map: dict[str, NodeWorker],
             pending_tasks_map: dict[str, asyncio.Task],
             *,
             has_event_subscription: bool,
diff --git a/core/framework/graph/prompt_composer.py b/core/framework/orchestrator/prompt_composer.py
similarity index 94%
rename from core/framework/graph/prompt_composer.py
rename to core/framework/orchestrator/prompt_composer.py
index b83c047e..92bbaca0 100644
--- a/core/framework/graph/prompt_composer.py
+++ b/core/framework/orchestrator/prompt_composer.py
@@ -9,7 +9,7 @@ import json
 from pathlib import Path
 from typing import TYPE_CHECKING
 
-from framework.graph.prompting import (
+from framework.orchestrator.prompting import (
     EXECUTION_SCOPE_PREAMBLE,
     TransitionSpec,
     build_accounts_prompt,
@@ -19,7 +19,7 @@ from framework.graph.prompting import (
 )
 
 if TYPE_CHECKING:
-    from framework.graph.node import DataBuffer, NodeSpec
+    from framework.orchestrator.node import DataBuffer, NodeSpec
 
 
 _with_datetime = stamp_prompt_datetime
@@ -36,7 +36,7 @@ def compose_system_prompt(
     node_type_preamble: str | None = None,
 ) -> str:
     """Compatibility wrapper for the legacy function signature."""
-    from framework.graph.prompting import NodePromptSpec
+    from framework.orchestrator.prompting import NodePromptSpec
 
     spec = NodePromptSpec(
         identity_prompt=identity_prompt or "",
@@ -66,7 +66,6 @@ def compose_system_prompt(
             protocols_prompt=spec.protocols_prompt,
             node_type=spec.node_type,
             output_keys=spec.output_keys,
-            is_subagent_mode=spec.is_subagent_mode,
         )
     return build_system_prompt(spec)
 
@@ -135,7 +134,7 @@ def build_transition_marker(
     )
 
 
-from framework.graph.prompting import build_transition_message  # noqa: E402
+from framework.orchestrator.prompting import build_transition_message  # noqa: E402
 
 __all__ = [
     "EXECUTION_SCOPE_PREAMBLE",
diff --git a/core/framework/graph/prompting.py b/core/framework/orchestrator/prompting.py
similarity index 95%
rename from core/framework/graph/prompting.py
rename to core/framework/orchestrator/prompting.py
index 072abf83..b76faa9b 100644
--- a/core/framework/graph/prompting.py
+++ b/core/framework/orchestrator/prompting.py
@@ -12,8 +12,8 @@ from datetime import datetime
 from typing import TYPE_CHECKING, Any
 
 if TYPE_CHECKING:
-    from framework.graph.edge import GraphSpec
-    from framework.graph.node import DataBuffer
+    from framework.orchestrator.edge import GraphSpec
+    from framework.orchestrator.node import DataBuffer
 
 
 # Injected into every worker node's system prompt so the LLM understands
@@ -40,7 +40,6 @@ class NodePromptSpec:
     memory_prompt: str = ""
     node_type: str = "event_loop"
     output_keys: tuple[str, ...] = ()
-    is_subagent_mode: bool = False
 
 
 @dataclass(frozen=True)
@@ -165,7 +164,6 @@ def build_prompt_spec_from_node_context(
         memory_prompt=resolved_memory_prompt,
         node_type=ctx.node_spec.node_type,
         output_keys=tuple(ctx.node_spec.output_keys or ()),
-        is_subagent_mode=bool(getattr(ctx, "is_subagent_mode", False)),
     )
 
 
@@ -195,13 +193,10 @@ def build_system_prompt(spec: NodePromptSpec) -> str:
     if spec.narrative:
         parts.append(f"\n--- Context (what has happened so far) ---\n{spec.narrative}")
 
-    if not spec.is_subagent_mode and spec.node_type in ("event_loop", "gcu") and spec.output_keys:
+    if not False and spec.node_type == "event_loop" and spec.output_keys:
         parts.append(f"\n{EXECUTION_SCOPE_PREAMBLE}")
 
-    if spec.node_type == "gcu":
-        from framework.graph.gcu import GCU_BROWSER_SYSTEM_PROMPT
 
-        parts.append(f"\n{GCU_BROWSER_SYSTEM_PROMPT}")
 
     if spec.focus_prompt:
         parts.append(f"\n--- Current Focus ---\n{spec.focus_prompt}")
diff --git a/core/framework/graph/safe_eval.py b/core/framework/orchestrator/safe_eval.py
similarity index 100%
rename from core/framework/graph/safe_eval.py
rename to core/framework/orchestrator/safe_eval.py
diff --git a/core/framework/graph/validator.py b/core/framework/orchestrator/validator.py
similarity index 100%
rename from core/framework/graph/validator.py
rename to core/framework/orchestrator/validator.py
diff --git a/core/framework/pipeline/__init__.py b/core/framework/pipeline/__init__.py
new file mode 100644
index 00000000..da2793a7
--- /dev/null
+++ b/core/framework/pipeline/__init__.py
@@ -0,0 +1,32 @@
+"""Pipeline middleware for the agent runtime.
+
+Stages run in order when :meth:`AgentRuntime.trigger` receives a request.
+Each stage can pass the context through, transform the input data, or reject
+the request entirely.  This is the runtime-level analogue of AstrBot's
+pipeline architecture and lets operators compose rate limiting, validation,
+cost guards, and custom pre/post-processing without patching core code.
+"""
+
+from framework.pipeline.registry import (
+    build_pipeline_from_config,
+    build_stage,
+    register,
+)
+from framework.pipeline.runner import PipelineRunner
+from framework.pipeline.stage import (
+    PipelineContext,
+    PipelineRejectedError,
+    PipelineResult,
+    PipelineStage,
+)
+
+__all__ = [
+    "PipelineContext",
+    "PipelineRejectedError",
+    "PipelineResult",
+    "PipelineRunner",
+    "PipelineStage",
+    "build_pipeline_from_config",
+    "build_stage",
+    "register",
+]
diff --git a/core/framework/pipeline/execution_middleware.py b/core/framework/pipeline/execution_middleware.py
new file mode 100644
index 00000000..cdebfc99
--- /dev/null
+++ b/core/framework/pipeline/execution_middleware.py
@@ -0,0 +1,44 @@
+"""Execution-level middleware protocol.
+
+Unlike :class:`PipelineStage` (which gates ``AgentHost.trigger()`` at the
+request level), execution middleware runs at the start of **every** execution
+attempt inside ``ExecutionManager._run_execution()`` -- including resurrection
+retries.
+
+Use this for concerns that must re-evaluate per attempt:
+- Cost tracking (charge per attempt, not per trigger)
+- Tool scoping (different tools on retry)
+- Checkpoint config overrides
+- Per-execution logging/tracing setup
+"""
+
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from typing import Any
+
+
+@dataclass
+class ExecutionContext:
+    """Context passed to execution middleware."""
+
+    execution_id: str
+    stream_id: str
+    run_id: str
+    input_data: dict[str, Any]
+    session_state: dict[str, Any] | None = None
+    attempt: int = 1
+    metadata: dict[str, Any] = field(default_factory=dict)
+
+
+class ExecutionMiddleware(ABC):
+    """Base class for per-execution middleware."""
+
+    @abstractmethod
+    async def on_execution_start(self, ctx: ExecutionContext) -> ExecutionContext:
+        """Called before each execution attempt (including resurrections).
+
+        Modify and return *ctx* to transform execution parameters.
+        Raise to abort the execution.
+        """
diff --git a/core/framework/pipeline/registry.py b/core/framework/pipeline/registry.py
new file mode 100644
index 00000000..f46f32c2
--- /dev/null
+++ b/core/framework/pipeline/registry.py
@@ -0,0 +1,107 @@
+"""Pipeline stage registry -- maps type names to stage classes.
+
+Stages self-register via the ``@register`` decorator. The
+``build_pipeline_from_config`` function reads a declarative config
+(from ``~/.hive/configuration.json`` or ``agent.json``) and
+instantiates the corresponding stage objects.
+
+Example config::
+
+    {
+      "pipeline": {
+        "stages": [
+          {"type": "rate_limit", "order": 200, "config": {"max_requests_per_minute": 60}},
+          {"type": "cost_guard", "order": 300, "config": {"max_cost_per_request": 0.50}}
+        ]
+      }
+    }
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Any
+
+from framework.pipeline.runner import PipelineRunner
+from framework.pipeline.stage import PipelineStage
+
+logger = logging.getLogger(__name__)
+
+_STAGE_REGISTRY: dict[str, type[PipelineStage]] = {}
+
+
+def register(name: str):
+    """Decorator to register a pipeline stage class by type name.
+
+    Usage::
+
+        @register("rate_limit")
+        class RateLimitStage(PipelineStage):
+            ...
+    """
+
+    def decorator(cls: type[PipelineStage]) -> type[PipelineStage]:
+        _STAGE_REGISTRY[name] = cls
+        return cls
+
+    return decorator
+
+
+def get_registered_stages() -> dict[str, type[PipelineStage]]:
+    """Return a copy of the stage registry."""
+    return dict(_STAGE_REGISTRY)
+
+
+def build_stage(spec: dict[str, Any]) -> PipelineStage:
+    """Instantiate a single stage from a config spec.
+
+    Args:
+        spec: Dict with ``type`` (required), ``order`` (optional),
+              and ``config`` (optional kwargs dict).
+
+    Raises:
+        KeyError: If the stage type is not registered.
+    """
+    stage_type = spec["type"]
+    if stage_type not in _STAGE_REGISTRY:
+        available = ", ".join(sorted(_STAGE_REGISTRY)) or "(none)"
+        raise KeyError(
+            f"Unknown pipeline stage type '{stage_type}'. "
+            f"Available: {available}"
+        )
+    cls = _STAGE_REGISTRY[stage_type]
+    config = spec.get("config", {})
+    stage = cls(**config)
+    if "order" in spec:
+        stage.order = spec["order"]
+    return stage
+
+
+def build_pipeline_from_config(
+    stages_config: list[dict[str, Any]],
+) -> PipelineRunner:
+    """Build a ``PipelineRunner`` from a declarative stages list.
+
+    Each entry is ``{"type": "...", "order": N, "config": {...}}``.
+    """
+    # Import built-in stages so they self-register
+    _ensure_builtins_registered()
+
+    stages = [build_stage(s) for s in stages_config]
+    return PipelineRunner(stages)
+
+
+def _ensure_builtins_registered() -> None:
+    """Import built-in stage modules so their ``@register`` decorators fire."""
+    if _STAGE_REGISTRY:
+        return  # already populated
+    try:
+        import framework.pipeline.stages.cost_guard  # noqa: F401
+        import framework.pipeline.stages.credential_resolver  # noqa: F401
+        import framework.pipeline.stages.input_validation  # noqa: F401
+        import framework.pipeline.stages.llm_provider  # noqa: F401
+        import framework.pipeline.stages.mcp_registry  # noqa: F401
+        import framework.pipeline.stages.rate_limit  # noqa: F401
+        import framework.pipeline.stages.skill_registry  # noqa: F401
+    except ImportError:
+        pass
diff --git a/core/framework/pipeline/runner.py b/core/framework/pipeline/runner.py
new file mode 100644
index 00000000..7d05deb9
--- /dev/null
+++ b/core/framework/pipeline/runner.py
@@ -0,0 +1,111 @@
+"""Pipeline runner -- executes registered stages in order."""
+
+from __future__ import annotations
+
+import logging
+from typing import Any
+
+from framework.pipeline.stage import (
+    PipelineContext,
+    PipelineRejectedError,
+    PipelineStage,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class PipelineRunner:
+    """Executes a list of :class:`PipelineStage` instances in ``order``.
+
+    The runner is the orchestration layer that :class:`AgentRuntime` calls
+    on every trigger.  Stages execute in ascending ``order`` (ties broken by
+    registration order).  A stage returning ``reject`` short-circuits the
+    pipeline and causes the trigger to raise :class:`PipelineRejectedError`.
+    """
+
+    def __init__(self, stages: list[PipelineStage] | None = None) -> None:
+        self._stages: list[PipelineStage] = sorted(stages or [], key=lambda s: s.order)
+
+    @property
+    def stages(self) -> list[PipelineStage]:
+        return list(self._stages)
+
+    def add_stage(self, stage: PipelineStage) -> None:
+        """Add a stage after construction (for dynamic registration)."""
+        self._stages.append(stage)
+        self._stages.sort(key=lambda s: s.order)
+
+    async def initialize_all(self) -> None:
+        """Call ``initialize`` on every registered stage."""
+        for stage in self._stages:
+            name = stage.__class__.__name__
+            logger.info("[pipeline] Initializing %s (order=%d)", name, stage.order)
+            await stage.initialize()
+            logger.info("[pipeline] %s initialized", name)
+        if self._stages:
+            logger.info(
+                "[pipeline] Ready: %d stages [%s]",
+                len(self._stages),
+                " -> ".join(s.__class__.__name__ for s in self._stages),
+            )
+
+    async def run(self, ctx: PipelineContext) -> PipelineContext:
+        """Run all stages.  Raises ``PipelineRejectedError`` on rejection.
+
+        Returns the (possibly transformed) context.
+        """
+        if not self._stages:
+            return ctx
+        import time
+
+        pipeline_start = time.perf_counter()
+        logger.info(
+            "[pipeline] Running %d stages for entry_point=%s",
+            len(self._stages),
+            ctx.entry_point_id,
+        )
+        for stage in self._stages:
+            stage_name = stage.__class__.__name__
+            t0 = time.perf_counter()
+            result = await stage.process(ctx)
+            elapsed_ms = (time.perf_counter() - t0) * 1000
+            if result.action == "reject":
+                reason = result.rejection_reason or "(no reason given)"
+                logger.warning(
+                    "[pipeline] REJECTED by %s (%.1fms): %s",
+                    stage_name, elapsed_ms, reason,
+                )
+                raise PipelineRejectedError(stage_name, reason)
+            if result.action == "transform":
+                logger.info(
+                    "[pipeline] %s TRANSFORMED input (%.1fms)",
+                    stage_name, elapsed_ms,
+                )
+                if result.input_data is not None:
+                    ctx.input_data = result.input_data
+            else:
+                logger.info(
+                    "[pipeline] %s passed (%.1fms)",
+                    stage_name, elapsed_ms,
+                )
+        total_ms = (time.perf_counter() - pipeline_start) * 1000
+        logger.info("[pipeline] Complete (%.1fms total)", total_ms)
+        return ctx
+
+    async def run_post(self, ctx: PipelineContext, result: Any) -> Any:
+        """Run all stages' ``post_process`` hooks in order.
+
+        Each stage can transform the result; the final value is returned.
+        Exceptions are logged and swallowed -- post-processing must not
+        break a successful execution.
+        """
+        current = result
+        for stage in self._stages:
+            try:
+                current = await stage.post_process(ctx, current)
+            except Exception:
+                logger.exception(
+                    "Pipeline post_process raised in %s; continuing with previous result",
+                    stage.__class__.__name__,
+                )
+        return current
diff --git a/core/framework/pipeline/stage.py b/core/framework/pipeline/stage.py
new file mode 100644
index 00000000..e250189c
--- /dev/null
+++ b/core/framework/pipeline/stage.py
@@ -0,0 +1,77 @@
+"""Pipeline stage base class and request/response types."""
+
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from typing import Any, Literal
+
+
+class PipelineRejectedError(Exception):
+    """Raised by ``AgentHost.trigger`` when a stage rejects the request."""
+
+    def __init__(self, stage_name: str, reason: str) -> None:
+        super().__init__(f"Pipeline rejected by {stage_name}: {reason}")
+        self.stage_name = stage_name
+        self.reason = reason
+
+
+@dataclass
+class PipelineContext:
+    """Carries request data through the pipeline."""
+
+    entry_point_id: str
+    input_data: dict[str, Any]
+    correlation_id: str | None = None
+    session_state: dict[str, Any] | None = None
+    metadata: dict[str, Any] = field(default_factory=dict)
+
+
+@dataclass
+class PipelineResult:
+    """Outcome of a stage's ``process`` call."""
+
+    action: Literal["continue", "reject", "transform"] = "continue"
+    input_data: dict[str, Any] | None = None
+    rejection_reason: str | None = None
+
+
+class PipelineStage(ABC):
+    """Base class for all middleware stages.
+
+    Infrastructure stages (LLM, MCP, credentials, skills) set typed
+    attributes during ``initialize()`` that the host reads after all
+    stages have initialized.  Request-level stages (rate limit, input
+    validation, cost guard) implement ``process()``.
+
+    Attributes set by infrastructure stages:
+        llm: LLM provider instance (set by LlmProviderStage)
+        tool_registry: ToolRegistry with discovered MCP tools (set by McpRegistryStage)
+        accounts_prompt: Connected accounts system prompt block (set by CredentialResolverStage)
+        accounts_data: Raw account info list (set by CredentialResolverStage)
+        tool_provider_map: Tool name -> provider mapping (set by CredentialResolverStage)
+        skills_manager: SkillsManager instance (set by SkillRegistryStage)
+    """
+
+    order: int = 100
+
+    # Infrastructure stage outputs -- typed so _apply_pipeline_results
+    # doesn't need hasattr() sniffing.
+    llm: Any = None
+    tool_registry: Any = None
+    accounts_prompt: str = ""
+    accounts_data: list[dict] | None = None
+    tool_provider_map: dict[str, str] | None = None
+    skills_manager: Any = None
+
+    async def initialize(self) -> None:
+        """Called once when the runtime starts."""
+        return None
+
+    @abstractmethod
+    async def process(self, ctx: PipelineContext) -> PipelineResult:
+        """Process the incoming request."""
+
+    async def post_process(self, ctx: PipelineContext, result: Any) -> Any:
+        """Optional post-execution hook. Default: pass-through."""
+        return result
diff --git a/core/framework/pipeline/stages/__init__.py b/core/framework/pipeline/stages/__init__.py
new file mode 100644
index 00000000..6a9105be
--- /dev/null
+++ b/core/framework/pipeline/stages/__init__.py
@@ -0,0 +1,19 @@
+"""Built-in pipeline stages."""
+
+from framework.pipeline.stages.cost_guard import CostGuardStage
+from framework.pipeline.stages.credential_resolver import CredentialResolverStage
+from framework.pipeline.stages.input_validation import InputValidationStage
+from framework.pipeline.stages.llm_provider import LlmProviderStage
+from framework.pipeline.stages.mcp_registry import McpRegistryStage
+from framework.pipeline.stages.rate_limit import RateLimitStage
+from framework.pipeline.stages.skill_registry import SkillRegistryStage
+
+__all__ = [
+    "CostGuardStage",
+    "CredentialResolverStage",
+    "InputValidationStage",
+    "LlmProviderStage",
+    "McpRegistryStage",
+    "RateLimitStage",
+    "SkillRegistryStage",
+]
diff --git a/core/framework/pipeline/stages/cost_guard.py b/core/framework/pipeline/stages/cost_guard.py
new file mode 100644
index 00000000..4850fe3b
--- /dev/null
+++ b/core/framework/pipeline/stages/cost_guard.py
@@ -0,0 +1,35 @@
+"""Cost guard stage -- reject requests over a pre-flight budget."""
+
+from __future__ import annotations
+
+from framework.pipeline.registry import register
+from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage
+
+
+@register("cost_guard")
+class CostGuardStage(PipelineStage):
+    """Reject requests whose estimated cost exceeds the per-request budget.
+
+    The cost estimate must be populated in ``ctx.metadata["estimated_cost"]``
+    by an earlier stage (or by the caller).  When no estimate is present,
+    the stage passes through.
+    """
+
+    order = 300
+
+    def __init__(self, max_cost_per_request: float = 1.0) -> None:
+        self._budget = max_cost_per_request
+
+    async def process(self, ctx: PipelineContext) -> PipelineResult:
+        estimated = ctx.metadata.get("estimated_cost")
+        if estimated is None:
+            return PipelineResult(action="continue")
+        if estimated > self._budget:
+            return PipelineResult(
+                action="reject",
+                rejection_reason=(
+                    f"Estimated cost ${estimated:.4f} exceeds budget "
+                    f"${self._budget:.4f}"
+                ),
+            )
+        return PipelineResult(action="continue")
diff --git a/core/framework/pipeline/stages/credential_resolver.py b/core/framework/pipeline/stages/credential_resolver.py
new file mode 100644
index 00000000..b76df37f
--- /dev/null
+++ b/core/framework/pipeline/stages/credential_resolver.py
@@ -0,0 +1,58 @@
+"""Credential resolver pipeline stage.
+
+Resolves connected accounts at startup. Individual credential TTL/refresh
+is handled by MCP server processes internally -- they resolve tokens from
+the credential store on every tool call.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Any
+
+from framework.pipeline.registry import register
+from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage
+
+logger = logging.getLogger(__name__)
+
+
+@register("credential_resolver")
+class CredentialResolverStage(PipelineStage):
+    """Resolve connected accounts for system prompt injection."""
+
+    order = 40
+
+    def __init__(self, credential_store: Any = None, **kwargs: Any) -> None:
+        self._credential_store = credential_store
+        self.accounts_prompt = ""
+        self.accounts_data: list[dict] | None = None
+        self.tool_provider_map: dict[str, str] | None = None
+
+    async def initialize(self) -> None:
+        try:
+            from aden_tools.credentials.store_adapter import (
+                CredentialStoreAdapter,
+            )
+            from framework.orchestrator.prompting import build_accounts_prompt
+
+            if self._credential_store is not None:
+                adapter = CredentialStoreAdapter(store=self._credential_store)
+            else:
+                adapter = CredentialStoreAdapter.default()
+            self.accounts_data = adapter.get_all_account_info()
+            self.tool_provider_map = adapter.get_tool_provider_map()
+            if self.accounts_data:
+                self.accounts_prompt = build_accounts_prompt(
+                    self.accounts_data, self.tool_provider_map,
+                )
+            logger.info(
+                "[pipeline] CredentialResolverStage: %d accounts",
+                len(self.accounts_data or []),
+            )
+        except Exception:
+            logger.debug(
+                "Credential resolution failed (non-fatal)", exc_info=True,
+            )
+
+    async def process(self, ctx: PipelineContext) -> PipelineResult:
+        return PipelineResult(action="continue")
diff --git a/core/framework/pipeline/stages/input_validation.py b/core/framework/pipeline/stages/input_validation.py
new file mode 100644
index 00000000..3a025c0e
--- /dev/null
+++ b/core/framework/pipeline/stages/input_validation.py
@@ -0,0 +1,47 @@
+"""Input validation stage.
+
+Rejects requests whose ``input_data`` does not match the entry point's
+declared input schema.  Uses a user-provided schema map:
+``{entry_point_id: {required_key: expected_type, ...}}``.
+"""
+
+from __future__ import annotations
+
+from framework.pipeline.registry import register
+from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage
+
+
+@register("input_validation")
+class InputValidationStage(PipelineStage):
+    """Validate ``input_data`` against per-entry-point schemas.
+
+    The schema is a simple dict mapping key -> expected Python type.
+    For richer validation, substitute a Pydantic-based stage.
+    """
+
+    order = 100
+
+    def __init__(self, schemas: dict[str, dict[str, type]] | None = None) -> None:
+        self._schemas = schemas or {}
+
+    async def process(self, ctx: PipelineContext) -> PipelineResult:
+        schema = self._schemas.get(ctx.entry_point_id)
+        if not schema:
+            return PipelineResult(action="continue")
+
+        for key, expected_type in schema.items():
+            if key not in ctx.input_data:
+                return PipelineResult(
+                    action="reject",
+                    rejection_reason=f"Missing required input key: '{key}'",
+                )
+            value = ctx.input_data[key]
+            if not isinstance(value, expected_type):
+                return PipelineResult(
+                    action="reject",
+                    rejection_reason=(
+                        f"Input key '{key}' has type {type(value).__name__}, "
+                        f"expected {expected_type.__name__}"
+                    ),
+                )
+        return PipelineResult(action="continue")
diff --git a/core/framework/pipeline/stages/llm_provider.py b/core/framework/pipeline/stages/llm_provider.py
new file mode 100644
index 00000000..899342f2
--- /dev/null
+++ b/core/framework/pipeline/stages/llm_provider.py
@@ -0,0 +1,95 @@
+"""LLM provider pipeline stage.
+
+Resolves the LLM provider from global config. This is the ONLY place
+the LLM gets created for worker agents.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Any
+
+from framework.pipeline.registry import register
+from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage
+
+logger = logging.getLogger(__name__)
+
+
+@register("llm_provider")
+class LlmProviderStage(PipelineStage):
+    """Resolve LLM provider and make it available."""
+
+    order = 10
+
+    def __init__(
+        self,
+        model: str | None = None,
+        mock_mode: bool = False,
+        llm: Any = None,
+        **kwargs: Any,
+    ) -> None:
+        self._model = model
+        self._mock_mode = mock_mode
+        self.llm = llm  # Pre-injected LLM (e.g. from session)
+
+    async def initialize(self) -> None:
+        if self.llm is not None:
+            return  # Already injected
+
+        from framework.config import (
+            get_api_key,
+            get_api_keys,
+            get_hive_config,
+            get_preferred_model,
+        )
+
+        model = self._model or get_preferred_model()
+
+        if self._mock_mode:
+            from framework.llm.mock import MockLLMProvider
+
+            self.llm = MockLLMProvider(model=model)
+            return
+
+        config = get_hive_config()
+        llm_config = config.get("llm", {})
+        api_base = llm_config.get("api_base")
+
+        # Check for Antigravity (special provider)
+        if llm_config.get("use_antigravity_subscription"):
+            try:
+                from framework.llm.antigravity import AntigravityProvider
+
+                provider = AntigravityProvider(model=model)
+                if provider.has_credentials():
+                    self.llm = provider
+                    logger.info("[pipeline] LlmProviderStage: Antigravity")
+                    return
+            except Exception:
+                pass
+
+        from framework.llm.litellm import LiteLLMProvider
+
+        api_key = get_api_key()
+        api_keys = get_api_keys()
+
+        if api_keys and len(api_keys) > 1:
+            self.llm = LiteLLMProvider(
+                model=model, api_keys=api_keys, api_base=api_base,
+            )
+        elif api_key:
+            extra = {}
+            if api_key.startswith("sk-ant-oat"):
+                extra["extra_headers"] = {
+                    "authorization": f"Bearer {api_key}"
+                }
+            self.llm = LiteLLMProvider(
+                model=model, api_key=api_key, api_base=api_base, **extra,
+            )
+        else:
+            self.llm = LiteLLMProvider(model=model, api_base=api_base)
+
+        logger.info("[pipeline] LlmProviderStage: %s", model)
+
+    async def process(self, ctx: PipelineContext) -> PipelineResult:
+        return PipelineResult(action="continue")
diff --git a/core/framework/pipeline/stages/mcp_registry.py b/core/framework/pipeline/stages/mcp_registry.py
new file mode 100644
index 00000000..989cfd98
--- /dev/null
+++ b/core/framework/pipeline/stages/mcp_registry.py
@@ -0,0 +1,92 @@
+"""MCP registry pipeline stage.
+
+Resolves MCP server references from the agent config against the global
+registry and registers tools. This is the ONLY place MCP tools get loaded.
+"""
+
+from __future__ import annotations
+
+import logging
+from dataclasses import asdict
+from pathlib import Path
+from typing import Any
+
+from framework.pipeline.registry import register
+from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage
+
+logger = logging.getLogger(__name__)
+
+
+@register("mcp_registry")
+class McpRegistryStage(PipelineStage):
+    """Resolve MCP tools from the global registry."""
+
+    order = 50
+
+    def __init__(
+        self,
+        server_refs: list[dict[str, Any]] | None = None,
+        agent_path: str | Path | None = None,
+        tool_registry: Any = None,
+        **kwargs: Any,
+    ) -> None:
+        self._server_refs = server_refs or []
+        self._agent_path = Path(agent_path) if agent_path else None
+        self._tool_registry = tool_registry
+
+    async def initialize(self) -> None:
+        """Connect to MCP servers and discover tools."""
+        if self._tool_registry is None:
+            from framework.loader.tool_registry import ToolRegistry
+
+            self._tool_registry = ToolRegistry()
+
+        from framework.loader.mcp_registry import MCPRegistry
+
+        registry = MCPRegistry()
+        mcp_loaded = False
+
+        # 1. From agent.json mcp_servers refs
+        if self._server_refs:
+            names = [ref["name"] for ref in self._server_refs if ref.get("name")]
+            if names:
+                configs = registry.resolve_for_agent(include=names)
+                if configs:
+                    self._tool_registry.load_registry_servers(
+                        [asdict(c) for c in configs]
+                    )
+                    mcp_loaded = True
+                    logger.info(
+                        "[pipeline] McpRegistryStage: loaded %d servers: %s",
+                        len(configs),
+                        names,
+                    )
+
+        # 2. Legacy: mcp_servers.json
+        if not mcp_loaded and self._agent_path:
+            mcp_json = self._agent_path / "mcp_servers.json"
+            if mcp_json.exists():
+                self._tool_registry.load_mcp_config(mcp_json)
+                mcp_loaded = True
+
+        # 3. Fallback: all servers from global registry
+        if not mcp_loaded:
+            configs = registry.resolve_for_agent(profile="all")
+            if configs:
+                self._tool_registry.load_registry_servers(
+                    [asdict(c) for c in configs]
+                )
+                logger.info(
+                    "[pipeline] McpRegistryStage: loaded %d servers (fallback)",
+                    len(configs),
+                )
+
+        total = len(self._tool_registry.get_tools())
+        logger.info("[pipeline] McpRegistryStage: %d tools available", total)
+
+    async def process(self, ctx: PipelineContext) -> PipelineResult:
+        return PipelineResult(action="continue")
+
+    @property
+    def tool_registry(self):
+        return self._tool_registry
diff --git a/core/framework/pipeline/stages/rate_limit.py b/core/framework/pipeline/stages/rate_limit.py
new file mode 100644
index 00000000..364c10fa
--- /dev/null
+++ b/core/framework/pipeline/stages/rate_limit.py
@@ -0,0 +1,44 @@
+"""Per-(entry-point, session) rate limiting stage."""
+
+from __future__ import annotations
+
+import time
+from collections import defaultdict
+
+from framework.pipeline.registry import register
+from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage
+
+
+@register("rate_limit")
+class RateLimitStage(PipelineStage):
+    """Reject requests that exceed ``max_requests_per_minute`` per session.
+
+    The key is ``<entry_point_id>:<session_id>``.  When no session_id is
+    present in ``session_state``, a single shared "default" bucket is used.
+    """
+
+    order = 200
+
+    def __init__(self, max_requests_per_minute: int = 60) -> None:
+        self._max_rpm = max_requests_per_minute
+        self._timestamps: dict[str, list[float]] = defaultdict(list)
+
+    async def process(self, ctx: PipelineContext) -> PipelineResult:
+        session_id = "default"
+        if ctx.session_state:
+            session_id = str(ctx.session_state.get("session_id", "default"))
+        key = f"{ctx.entry_point_id}:{session_id}"
+
+        now = time.monotonic()
+        # Prune entries older than 60s.
+        self._timestamps[key] = [t for t in self._timestamps[key] if now - t < 60.0]
+        if len(self._timestamps[key]) >= self._max_rpm:
+            return PipelineResult(
+                action="reject",
+                rejection_reason=(
+                    f"Rate limit exceeded: {self._max_rpm} req/min "
+                    f"for session '{session_id}'"
+                ),
+            )
+        self._timestamps[key].append(now)
+        return PipelineResult(action="continue")
diff --git a/core/framework/pipeline/stages/skill_registry.py b/core/framework/pipeline/stages/skill_registry.py
new file mode 100644
index 00000000..71a73a69
--- /dev/null
+++ b/core/framework/pipeline/stages/skill_registry.py
@@ -0,0 +1,55 @@
+"""Skill registry pipeline stage.
+
+Discovers and loads skills. This is the ONLY place skills get loaded.
+"""
+
+from __future__ import annotations
+
+import logging
+from pathlib import Path
+from typing import Any
+
+from framework.pipeline.registry import register
+from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage
+
+logger = logging.getLogger(__name__)
+
+
+@register("skill_registry")
+class SkillRegistryStage(PipelineStage):
+    """Discover skills and provide prompts."""
+
+    order = 60
+
+    def __init__(
+        self,
+        project_root: str | Path | None = None,
+        interactive: bool = True,
+        skills_config: Any = None,
+        **kwargs: Any,
+    ) -> None:
+        self._project_root = Path(project_root) if project_root else None
+        self._interactive = interactive
+        self._skills_config = skills_config
+        self.skills_manager: Any = None
+
+    async def initialize(self) -> None:
+        from framework.skills.config import SkillsConfig
+        from framework.skills.manager import SkillsManager, SkillsManagerConfig
+
+        config = SkillsManagerConfig(
+            skills_config=self._skills_config or SkillsConfig(),
+            project_root=self._project_root,
+            interactive=self._interactive,
+        )
+        self.skills_manager = SkillsManager(config)
+        self.skills_manager.load()
+        await self.skills_manager.start_watching()
+        logger.info(
+            "[pipeline] SkillRegistryStage: catalog=%d chars, protocols=%d chars",
+            len(self.skills_manager.skills_catalog_prompt),
+            len(self.skills_manager.protocols_prompt),
+        )
+
+    async def process(self, ctx: PipelineContext) -> PipelineResult:
+        return PipelineResult(action="continue")
diff --git a/core/framework/runner/__init__.py b/core/framework/runner/__init__.py
deleted file mode 100644
index 376866a7..00000000
--- a/core/framework/runner/__init__.py
+++ /dev/null
@@ -1,27 +0,0 @@
-"""Agent Runner - load and run exported agents."""
-
-from framework.runner.mcp_registry import MCPRegistry
-from framework.runner.protocol import (
-    AgentMessage,
-    CapabilityLevel,
-    CapabilityResponse,
-    MessageType,
-    OrchestratorResult,
-)
-from framework.runner.runner import AgentInfo, AgentRunner, ValidationResult
-from framework.runner.tool_registry import ToolRegistry, tool
-
-__all__ = [
-    # Single agent
-    "AgentRunner",
-    "AgentInfo",
-    "ValidationResult",
-    "ToolRegistry",
-    "MCPRegistry",
-    "tool",
-    "AgentMessage",
-    "MessageType",
-    "CapabilityLevel",
-    "CapabilityResponse",
-    "OrchestratorResult",
-]
diff --git a/core/framework/runtime/EVENT_TYPES.md b/core/framework/runtime/EVENT_TYPES.md
deleted file mode 100644
index 22d3cc6a..00000000
--- a/core/framework/runtime/EVENT_TYPES.md
+++ /dev/null
@@ -1,493 +0,0 @@
-# Event Types and Schema Reference
-
-The Hive runtime uses a pub/sub `EventBus` for inter-component communication and observability. Every event is an `AgentEvent` dataclass published through `EventBus.publish()`.
-
-## Event Envelope (`AgentEvent`)
-
-Every event shares a common envelope:
-
-| Field            | Type              | Description                                                  |
-| ---------------- | ----------------- | ------------------------------------------------------------ |
-| `type`           | `EventType` (str) | Event type identifier (see below)                            |
-| `stream_id`      | `str`             | Entry point / pipeline that emitted the event                |
-| `node_id`        | `str \| None`     | Graph node that emitted the event                            |
-| `execution_id`   | `str \| None`     | Unique execution run ID (UUID, set by `ExecutionStream`)     |
-| `graph_id`       | `str \| None`     | Graph that emitted the event (set by `GraphScopedEventBus`)  |
-| `data`           | `dict`            | Event-type-specific payload (see individual schemas below)   |
-| `timestamp`      | `datetime`        | When the event was created                                   |
-| `correlation_id` | `str \| None`     | Optional ID for tracking related events across streams       |
-
-### Identity Fields
-
-The identity tuple `(graph_id, stream_id, node_id, execution_id)` uniquely locates any event:
-
-- **`graph_id`** — Which graph produced the event. Set automatically by `GraphScopedEventBus` (a subclass that stamps `graph_id` on every `publish()` call). Values: `"worker"`, `"judge"`, `"queen"`, or the graph spec ID.
-- **`stream_id`** — Which entry point / pipeline. Corresponds to `EntryPointSpec.id` in the graph definition. For single-entry-point graphs, this equals the entry point name (e.g. `"default"`, `"health_check"`).
-- **`node_id`** — Which specific node emitted the event. For `EventLoopNode` events, this is the node spec ID.
-- **`execution_id`** — UUID identifying a specific execution run. Multiple concurrent executions of the same entry point each get a unique `execution_id`.
-
----
-
-## Execution Lifecycle
-
-### `execution_started`
-
-A new graph execution has begun.
-
-| Data Field | Type   | Description                     |
-| ---------- | ------ | ------------------------------- |
-| `input`    | `dict` | Input data passed to the graph  |
-
-**Emitted by:** `ExecutionStream._run_execution()`
-
----
-
-### `execution_completed`
-
-A graph execution finished successfully.
-
-| Data Field | Type   | Description       |
-| ---------- | ------ | ----------------- |
-| `output`   | `dict` | Final output data |
-
-**Emitted by:** `ExecutionStream._run_execution()`
-
-**Queen notification:** When a worker execution completes, the session manager \
-injects a `[WORKER_TERMINAL]` notification into the queen with the output summary. \
-The queen reports to the user and asks what to do next.
-
----
-
-### `execution_failed`
-
-A graph execution failed with an error.
-
-| Data Field | Type  | Description   |
-| ---------- | ----- | ------------- |
-| `error`    | `str` | Error message |
-
-**Emitted by:** `ExecutionStream._run_execution()`
-
-**Queen notification:** When a worker execution fails, the session manager \
-injects a `[WORKER_TERMINAL]` notification into the queen with the error. \
-The queen reports to the user and helps troubleshoot.
-
----
-
-### `execution_paused`
-
-Execution has been paused (Ctrl+Z or HITL approval).
-
-| Data Field | Type  | Description       |
-| ---------- | ----- | ----------------- |
-| `reason`   | `str` | Why it was paused |
-
-**Emitted by:** `GraphExecutor.execute()`
-
----
-
-### `execution_resumed`
-
-Execution has resumed from a paused state.
-
-| Data Field | Type | Description |
-| ---------- | ---- | ----------- |
-| *(none)*   |      |             |
-
-**Emitted by:** `GraphExecutor.execute()`
-
----
-
-## Node Event-Loop Lifecycle
-
-These events track the inner loop of `EventLoopNode` — the multi-turn LLM streaming loop that powers most agent nodes.
-
-### `node_loop_started`
-
-An EventLoopNode has begun its execution loop.
-
-| Data Field       | Type       | Description                     |
-| ---------------- | ---------- | ------------------------------- |
-| `max_iterations` | `int\|null`| Maximum iterations configured   |
-
-**Emitted by:** `EventLoopNode._publish_loop_started()`, `GraphExecutor` (for function nodes in parallel branches)
-
----
-
-### `node_loop_iteration`
-
-An EventLoopNode has started a new iteration (one LLM turn).
-
-| Data Field  | Type  | Description               |
-| ----------- | ----- | ------------------------- |
-| `iteration` | `int` | Zero-based iteration index |
-
-**Emitted by:** `EventLoopNode._publish_iteration()`
-
----
-
-### `node_loop_completed`
-
-An EventLoopNode has finished its execution loop.
-
-| Data Field   | Type  | Description                            |
-| ------------ | ----- | -------------------------------------- |
-| `iterations` | `int` | Total number of iterations completed   |
-
-**Emitted by:** `EventLoopNode._publish_loop_completed()`, `GraphExecutor` (for function nodes in parallel branches)
-
----
-
-## LLM Streaming
-
-### `llm_text_delta`
-
-Incremental text output from the LLM (non-client-facing nodes only).
-
-| Data Field | Type  | Description                              |
-| ---------- | ----- | ---------------------------------------- |
-| `content`  | `str` | New text chunk (delta)                   |
-| `snapshot` | `str` | Full accumulated text so far             |
-
-**Emitted by:** `EventLoopNode._publish_text_delta()` when `client_facing=False`
-
----
-
-### `llm_reasoning_delta`
-
-Incremental reasoning/thinking output from the LLM.
-
-| Data Field | Type  | Description         |
-| ---------- | ----- | ------------------- |
-| `content`  | `str` | New reasoning chunk |
-
-**Emitted by:** Not currently wired in `EventLoopNode` (reserved for extended thinking models).
-
----
-
-## Tool Lifecycle
-
-### `tool_call_started`
-
-The LLM has requested a tool call and execution is about to begin.
-
-| Data Field   | Type   | Description                          |
-| ------------ | ------ | ------------------------------------ |
-| `tool_use_id`| `str`  | Unique ID for this tool invocation   |
-| `tool_name`  | `str`  | Name of the tool being called        |
-| `tool_input` | `dict` | Arguments passed to the tool         |
-
-**Emitted by:** `EventLoopNode._publish_tool_started()`
-
----
-
-### `tool_call_completed`
-
-A tool call has finished executing.
-
-| Data Field   | Type   | Description                            |
-| ------------ | ------ | -------------------------------------- |
-| `tool_use_id`| `str`  | Same ID from `tool_call_started`       |
-| `tool_name`  | `str`  | Name of the tool                       |
-| `result`     | `str`  | Tool execution result (may be truncated)|
-| `is_error`   | `bool` | Whether the tool returned an error     |
-
-**Emitted by:** `EventLoopNode._publish_tool_completed()`
-
----
-
-## Client I/O
-
-These events are emitted by the queen's interactive turns. They drive the TUI's chat interface.
-
-### `client_output_delta`
-
-Incremental text output meant for the human operator.
-
-| Data Field | Type  | Description                  |
-| ---------- | ----- | ---------------------------- |
-| `content`  | `str` | New text chunk (delta)       |
-| `snapshot` | `str` | Full accumulated text so far |
-
-**Emitted by:** `EventLoopNode._publish_text_delta()` for queen/user-facing output
-
----
-
-### `client_input_requested`
-
-The node is waiting for human input (via `ask_user` tool or auto-block on text-only turns).
-
-| Data Field | Type  | Description                                       |
-| ---------- | ----- | ------------------------------------------------- |
-| `prompt`   | `str` | Optional prompt/question shown to the user        |
-
-**Emitted by:** `EventLoopNode._await_user_input()`, doom loop handler
-
-The TUI subscribes to this event to show the input prompt and focus the chat input. After the user types, `inject_event()` is called on the node to unblock it.
-
----
-
-## Internal Node Observability
-
-### `node_internal_output`
-
-Output from a non-client-facing node (for debugging/monitoring).
-
-| Data Field | Type  | Description      |
-| ---------- | ----- | ---------------- |
-| `content`  | `str` | Output text      |
-
-**Emitted by:** Available via `emit_node_internal_output()` — not currently wired in the default `EventLoopNode`.
-
----
-
-### `node_input_blocked`
-
-A non-client-facing node is blocked waiting for input.
-
-| Data Field | Type  | Description     |
-| ---------- | ----- | --------------- |
-| `prompt`   | `str` | Block reason    |
-
-**Emitted by:** Available via `emit_node_input_blocked()` — reserved for future use.
-
----
-
-### `node_stalled`
-
-The node's LLM has produced identical responses for several consecutive turns (stall detection).
-
-| Data Field | Type  | Description                                       |
-| ---------- | ----- | ------------------------------------------------- |
-| `reason`   | `str` | Always `"Consecutive identical responses detected"`|
-
-**Emitted by:** `EventLoopNode._publish_stalled()`
-
----
-
-### `node_tool_doom_loop`
-
-The LLM is calling the same tool(s) with identical arguments repeatedly (doom loop detection).
-
-| Data Field    | Type  | Description                          |
-| ------------- | ----- | ------------------------------------ |
-| `description` | `str` | Human-readable doom loop description |
-
-**Emitted by:** `EventLoopNode` doom loop handler
-
----
-
-## Judge Decisions
-
-### `judge_verdict`
-
-The judge (custom or implicit) has evaluated the current iteration.
-
-| Data Field   | Type  | Description                                          |
-| ------------ | ----- | ---------------------------------------------------- |
-| `action`     | `str` | `"ACCEPT"`, `"RETRY"`, `"ESCALATE"`, or `"CONTINUE"` |
-| `feedback`   | `str` | Judge feedback (empty for ACCEPT/CONTINUE)           |
-| `judge_type` | `str` | `"custom"` (explicit JudgeProtocol) or `"implicit"` (stop-reason heuristic) |
-| `iteration`  | `int` | Which iteration this verdict applies to              |
-
-**Emitted by:** `EventLoopNode._publish_judge_verdict()`
-
-**Verdict meanings:**
-- **ACCEPT** — Output meets requirements; node exits successfully.
-- **RETRY** — Output needs improvement; loop continues with feedback injected.
-- **ESCALATE** — Problem cannot be solved at this level; triggers escalation.
-- **CONTINUE** — Implicit verdict: LLM called tools, so it's making progress — let it keep going.
-
----
-
-## Output Tracking
-
-### `output_key_set`
-
-A node has set an output key via the `set_output` synthetic tool.
-
-| Data Field | Type  | Description       |
-| ---------- | ----- | ----------------- |
-| `key`      | `str` | Output key name   |
-
-**Emitted by:** `EventLoopNode._publish_output_key_set()`
-
----
-
-## Retry & Edge Tracking
-
-### `node_retry`
-
-A transient error occurred during an LLM call and the node is retrying.
-
-| Data Field    | Type  | Description                        |
-| ------------- | ----- | ---------------------------------- |
-| `retry_count` | `int` | Current retry attempt number       |
-| `max_retries` | `int` | Maximum retries configured         |
-| `error`       | `str` | Error message (truncated to 500ch) |
-
-**Emitted by:** `EventLoopNode` (stream retry handler), `GraphExecutor` (node-level retry)
-
----
-
-### `edge_traversed`
-
-The executor has traversed an edge from one node to another.
-
-| Data Field       | Type  | Description                                    |
-| ---------------- | ----- | ---------------------------------------------- |
-| `source_node`    | `str` | Node ID the edge starts from                   |
-| `target_node`    | `str` | Node ID the edge goes to                       |
-| `edge_condition` | `str` | Edge condition: `"router"`, `"on_success"`, etc. |
-
-**Emitted by:** `GraphExecutor.execute()` — after router decisions, condition-based edges, and fallback edges.
-
----
-
-## Context Management
-
-### `context_compacted`
-
-Not currently emitted — reserved for future use when `NodeConversation` compacts history.
-
----
-
-## State Changes
-
-### `state_changed`
-
-A shared buffer key has been modified.
-
-| Data Field  | Type  | Description                        |
-| ----------- | ----- | ---------------------------------- |
-| `key`       | `str` | Buffer key that changed            |
-| `old_value` | `Any` | Previous value                     |
-| `new_value` | `Any` | New value                          |
-| `scope`     | `str` | Scope of the change                |
-
-**Emitted by:** Available via `emit_state_changed()` — not currently wired in default execution.
-
----
-
-### `state_conflict`
-
-Not currently emitted — reserved for concurrent write conflict detection.
-
----
-
-## Goal Tracking
-
-### `goal_progress`
-
-Goal completion progress update.
-
-| Data Field        | Type    | Description                          |
-| ----------------- | ------- | ------------------------------------ |
-| `progress`        | `float` | 0.0–1.0 completion fraction         |
-| `criteria_status` | `dict`  | Per-criterion status                 |
-
-**Emitted by:** Available via `emit_goal_progress()` — not currently wired in default execution.
-
----
-
-### `goal_achieved`
-
-Not currently emitted — reserved for explicit goal completion signals.
-
----
-
-### `constraint_violation`
-
-A goal constraint has been violated.
-
-| Data Field      | Type  | Description              |
-| --------------- | ----- | ------------------------ |
-| `constraint_id` | `str` | Which constraint failed  |
-| `description`   | `str` | What went wrong          |
-
-**Emitted by:** Available via `emit_constraint_violation()`.
-
----
-
-## Stream Lifecycle
-
-### `stream_started` / `stream_stopped`
-
-Not currently emitted — reserved for `ExecutionStream` lifecycle tracking.
-
----
-
-## External Triggers
-
-### `webhook_received`
-
-An external webhook has been received.
-
-| Data Field     | Type   | Description                  |
-| -------------- | ------ | ---------------------------- |
-| `path`         | `str`  | Webhook URL path             |
-| `method`       | `str`  | HTTP method                  |
-| `headers`      | `dict` | HTTP headers                 |
-| `payload`      | `dict` | Request body                 |
-| `query_params` | `dict` | URL query parameters         |
-
-**Emitted by:** Webhook server integration.
-
-Note: `node_id` is not set on this event; `stream_id` is the webhook source ID.
-
----
-
-## Escalation
-
-### `escalation_requested`
-
-An agent has requested handoff to the Hive Coder (via the `escalate` synthetic tool).
-
-| Data Field | Type  | Description                     |
-| ---------- | ----- | ------------------------------- |
-| `reason`   | `str` | Why escalation is needed        |
-| `context`  | `str` | Additional context for the coder|
-
-**Emitted by:** `EventLoopNode` when the LLM calls `escalate`.
-
----
-
-## Custom Events
-
-### `custom`
-
-User-defined events with arbitrary payloads. No schema enforced.
-
----
-
-## Subscription & Filtering
-
-Events can be filtered when subscribing:
-
-```python
-bus.subscribe(
-    event_types=[EventType.TOOL_CALL_STARTED, EventType.TOOL_CALL_COMPLETED],
-    handler=my_handler,
-    filter_stream="default",       # Only events from this stream
-    filter_node="planner",         # Only events from this node
-    filter_execution="exec-uuid",  # Only events from this execution
-    filter_graph="worker",         # Only events from this graph
-)
-```
-
-## Debug Event Logging
-
-Set `HIVE_DEBUG_EVENTS=1` to write every published event to a JSONL file at `~/.hive/event_logs/<timestamp>.jsonl`. Each line is the full JSON serialization of an `AgentEvent`:
-
-```json
-{
-  "type": "tool_call_started",
-  "stream_id": "default",
-  "node_id": "planner",
-  "execution_id": "a1b2c3d4-...",
-  "graph_id": "worker",
-  "data": {"tool_use_id": "tu_1", "tool_name": "web_search", "tool_input": {"query": "..."}},
-  "timestamp": "2026-02-24T12:00:00.000000",
-  "correlation_id": null
-}
-```
diff --git a/core/framework/runtime/README.md b/core/framework/runtime/README.md
deleted file mode 100644
index 0159c2e1..00000000
--- a/core/framework/runtime/README.md
+++ /dev/null
@@ -1,171 +0,0 @@
-# Agent Runtime
-
-Unified execution system for all Hive agents. Every agent — single-entry or multi-entry, headless or dashboard — runs through the same runtime stack.
-
-## Topology
-
-```
-                     AgentRunner.load(agent_path)
-                              |
-                         AgentRunner
-                     (factory + public API)
-                              |
-                       _setup_agent_runtime()
-                              |
-                        AgentRuntime
-                   (lifecycle + orchestration)
-                      /       |       \
-               Stream A   Stream B   Stream C    ← one per entry point
-                  |           |          |
-            GraphExecutor  GraphExecutor  GraphExecutor
-                  |           |          |
-              Node → Node → Node  (graph traversal)
-```
-
-Single-entry agents get a `"default"` entry point automatically. There is no separate code path.
-
-## Components
-
-| Component | File | Role |
-|---|---|---|
-| `AgentRunner` | `runner/runner.py` | Load agents, configure tools/LLM, expose high-level API |
-| `AgentRuntime` | `runtime/agent_runtime.py` | Lifecycle management, entry point routing, event bus |
-| `ExecutionStream` | `runtime/execution_stream.py` | Per-entry-point execution queue, session persistence |
-| `GraphExecutor` | `graph/executor.py` | Node traversal, tool dispatch, checkpointing |
-| `EventBus` | `runtime/event_bus.py` | Pub/sub for execution events (streaming, I/O) |
-| `SharedBufferManager` | `runtime/shared_state.py` | Cross-stream state with isolation levels |
-| `OutcomeAggregator` | `runtime/outcome_aggregator.py` | Goal progress tracking across streams |
-| `SessionStore` | `storage/session_store.py` | Session state persistence (`sessions/{id}/state.json`) |
-
-## Programming Interface
-
-### AgentRunner (high-level)
-
-```python
-from framework.runner import AgentRunner
-
-# Load and run
-runner = AgentRunner.load("exports/my_agent", model="anthropic/claude-sonnet-4-20250514")
-result = await runner.run({"query": "hello"})
-
-# Resume from paused session
-result = await runner.run({"query": "continue"}, session_state=saved_state)
-
-# Lifecycle
-await runner.start()                           # Start the runtime
-await runner.stop()                            # Stop the runtime
-exec_id = await runner.trigger("default", {})  # Non-blocking trigger
-entry_points = runner.get_entry_points()       # List entry points
-
-# Context manager
-async with AgentRunner.load("exports/my_agent") as runner:
-    result = await runner.run({"query": "hello"})
-
-# Cleanup
-runner.cleanup()          # Synchronous
-await runner.cleanup_async()  # Asynchronous
-```
-
-### AgentRuntime (lower-level)
-
-```python
-from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
-from framework.runtime.execution_stream import EntryPointSpec
-
-# Create runtime with entry points
-runtime = create_agent_runtime(
-    graph=graph,
-    goal=goal,
-    storage_path=Path("~/.hive/agents/my_agent"),
-    entry_points=[
-        EntryPointSpec(id="default", name="Default", entry_node="start", trigger_type="manual"),
-    ],
-    llm=llm,
-    tools=tools,
-    tool_executor=tool_executor,
-    checkpoint_config=checkpoint_config,
-)
-
-# Lifecycle
-await runtime.start()
-await runtime.stop()
-
-# Execution
-exec_id = await runtime.trigger("default", {"query": "hello"})              # Non-blocking
-result = await runtime.trigger_and_wait("default", {"query": "hello"})      # Blocking
-result = await runtime.trigger_and_wait("default", {}, session_state=state) # Resume
-
-# Client-facing node I/O
-await runtime.inject_input(node_id="chat", content="user response")
-
-# Events
-sub_id = runtime.subscribe_to_events(
-    event_types=[EventType.CLIENT_OUTPUT_DELTA],
-    handler=my_handler,
-)
-runtime.unsubscribe_from_events(sub_id)
-
-# Inspection
-runtime.is_running           # bool
-runtime.event_bus            # EventBus
-runtime.state_manager        # SharedBufferManager
-runtime.get_stats()          # Runtime statistics
-```
-
-## Execution Flow
-
-1. `AgentRunner.run()` calls `AgentRuntime.trigger_and_wait()`
-2. `AgentRuntime` routes to the `ExecutionStream` for the entry point
-3. `ExecutionStream` creates a `GraphExecutor` and calls `execute()`
-4. `GraphExecutor` traverses nodes, dispatches tools, manages checkpoints
-5. `ExecutionResult` flows back up through the stack
-6. `ExecutionStream` writes session state to disk
-
-## Session Resume
-
-All execution paths support session resume:
-
-```python
-# First run (agent pauses at a client-facing node)
-result = await runner.run({"query": "start task"})
-# result.paused_at = "review-node"
-# result.session_state = {"memory": {...}, "paused_at": "review-node", ...}
-
-# Resume
-result = await runner.run({"input": "approved"}, session_state=result.session_state)
-```
-
-Session state flows: `AgentRunner.run()` → `AgentRuntime.trigger_and_wait()` → `ExecutionStream.execute()` → `GraphExecutor.execute()`.
-
-Checkpoints are saved at node boundaries (`sessions/{id}/checkpoints/`) for crash recovery.
-
-## Event Bus
-
-The `EventBus` provides real-time execution visibility:
-
-| Event | When |
-|---|---|
-| `NODE_STARTED` | Node begins execution |
-| `NODE_COMPLETED` | Node finishes |
-| `TOOL_CALL_STARTED` | Tool invocation begins |
-| `TOOL_CALL_COMPLETED` | Tool invocation finishes |
-| `CLIENT_OUTPUT_DELTA` | Agent streams text to user |
-| `CLIENT_INPUT_REQUESTED` | Agent needs user input |
-| `EXECUTION_COMPLETED` | Full execution finishes |
-
-In headless mode, `AgentRunner` subscribes to `CLIENT_OUTPUT_DELTA` and `CLIENT_INPUT_REQUESTED` to print output and read stdin. The web dashboard subscribes to route events to the frontend.
-
-## Storage Layout
-
-```
-~/.hive/agents/{agent_name}/
-  sessions/
-    session_YYYYMMDD_HHMMSS_{uuid}/
-      state.json              # Session state (status, memory, progress)
-      checkpoints/            # Node-boundary snapshots
-      logs/
-        summary.json          # Execution summary
-        details.jsonl         # Detailed event log
-        tool_logs.jsonl       # Tool call log
-  runtime_logs/               # Cross-session runtime logs
-```
diff --git a/core/framework/runtime/__init__.py b/core/framework/runtime/__init__.py
deleted file mode 100644
index 26441d41..00000000
--- a/core/framework/runtime/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-"""Runtime core for agent execution."""
-
-from framework.runtime.core import Runtime
-
-__all__ = ["Runtime"]
diff --git a/core/framework/runtime/tests/__init__.py b/core/framework/runtime/tests/__init__.py
deleted file mode 100644
index 2e79aec4..00000000
--- a/core/framework/runtime/tests/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Tests for runtime components."""
diff --git a/core/framework/runtime/tests/test_agent_runtime.py b/core/framework/runtime/tests/test_agent_runtime.py
deleted file mode 100644
index 561aba1b..00000000
--- a/core/framework/runtime/tests/test_agent_runtime.py
+++ /dev/null
@@ -1,869 +0,0 @@
-"""
-Tests for AgentRuntime and multi-entry-point execution.
-
-Tests:
-1. AgentRuntime creation and lifecycle
-2. Entry point registration
-3. Concurrent executions across streams
-4. SharedBufferManager isolation levels
-5. OutcomeAggregator goal evaluation
-6. EventBus pub/sub
-"""
-
-import asyncio
-import tempfile
-from pathlib import Path
-
-import pytest
-
-from framework.graph import Goal
-from framework.graph.edge import EdgeCondition, EdgeSpec, GraphSpec
-from framework.graph.goal import Constraint, SuccessCriterion
-from framework.graph.node import NodeSpec
-from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
-from framework.runtime.event_bus import AgentEvent, EventBus, EventType
-from framework.runtime.execution_stream import EntryPointSpec
-from framework.runtime.outcome_aggregator import OutcomeAggregator
-from framework.runtime.shared_state import IsolationLevel, SharedBufferManager
-from framework.schemas.session_state import SessionState, SessionTimestamps
-
-# === Test Fixtures ===
-
-
-@pytest.fixture
-def sample_goal():
-    """Create a sample goal for testing."""
-    return Goal(
-        id="test-goal",
-        name="Test Goal",
-        description="A goal for testing multi-entry-point execution",
-        success_criteria=[
-            SuccessCriterion(
-                id="sc-1",
-                description="Process all requests",
-                metric="requests_processed",
-                target="100%",
-                weight=1.0,
-            ),
-        ],
-        constraints=[
-            Constraint(
-                id="c-1",
-                description="Must not exceed rate limits",
-                constraint_type="hard",
-                category="operational",
-            ),
-        ],
-    )
-
-
-@pytest.fixture
-def sample_graph():
-    """Create a sample graph with multiple entry points."""
-    nodes = [
-        NodeSpec(
-            id="process-webhook",
-            name="Process Webhook",
-            description="Process incoming webhook",
-            node_type="event_loop",
-            input_keys=["webhook_data"],
-            output_keys=["result"],
-        ),
-        NodeSpec(
-            id="process-api",
-            name="Process API Request",
-            description="Process API request",
-            node_type="event_loop",
-            input_keys=["request_data"],
-            output_keys=["result"],
-        ),
-        NodeSpec(
-            id="complete",
-            name="Complete",
-            description="Execution complete",
-            node_type="terminal",
-            input_keys=["result"],
-            output_keys=["final_result"],
-        ),
-    ]
-
-    edges = [
-        EdgeSpec(
-            id="webhook-to-complete",
-            source="process-webhook",
-            target="complete",
-            condition=EdgeCondition.ON_SUCCESS,
-        ),
-        EdgeSpec(
-            id="api-to-complete",
-            source="process-api",
-            target="complete",
-            condition=EdgeCondition.ON_SUCCESS,
-        ),
-    ]
-
-    return GraphSpec(
-        id="test-graph",
-        goal_id="test-goal",
-        version="1.0.0",
-        entry_node="process-webhook",
-        entry_points={"start": "process-webhook"},
-        terminal_nodes=["complete"],
-        pause_nodes=[],
-        nodes=nodes,
-        edges=edges,
-    )
-
-
-@pytest.fixture
-def temp_storage():
-    """Create a temporary storage directory."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        yield Path(tmpdir)
-
-
-# === SharedBufferManager Tests ===
-
-
-class TestSharedBufferManager:
-    """Tests for SharedBufferManager."""
-
-    def test_create_buffer(self):
-        """Test creating execution-scoped buffer."""
-        manager = SharedBufferManager()
-        buffer = manager.create_buffer(
-            execution_id="exec-1",
-            stream_id="webhook",
-            isolation=IsolationLevel.SHARED,
-        )
-        assert buffer is not None
-        assert buffer._execution_id == "exec-1"
-        assert buffer._stream_id == "webhook"
-
-    @pytest.mark.asyncio
-    async def test_isolated_state(self):
-        """Test isolated state doesn't leak between executions."""
-        manager = SharedBufferManager()
-
-        buf1 = manager.create_buffer("exec-1", "stream-1", IsolationLevel.ISOLATED)
-        buf2 = manager.create_buffer("exec-2", "stream-1", IsolationLevel.ISOLATED)
-
-        await buf1.write("key", "value1")
-        await buf2.write("key", "value2")
-
-        assert await buf1.read("key") == "value1"
-        assert await buf2.read("key") == "value2"
-
-    @pytest.mark.asyncio
-    async def test_shared_state(self):
-        """Test shared state is visible across executions."""
-        manager = SharedBufferManager()
-
-        manager.create_buffer("exec-1", "stream-1", IsolationLevel.SHARED)
-        manager.create_buffer("exec-2", "stream-1", IsolationLevel.SHARED)
-
-        # Write to global scope
-        await manager.write(
-            key="global_key",
-            value="global_value",
-            execution_id="exec-1",
-            stream_id="stream-1",
-            isolation=IsolationLevel.SHARED,
-            scope="global",
-        )
-
-        # Both should see it
-        value1 = await manager.read("global_key", "exec-1", "stream-1", IsolationLevel.SHARED)
-        value2 = await manager.read("global_key", "exec-2", "stream-1", IsolationLevel.SHARED)
-
-        assert value1 == "global_value"
-        assert value2 == "global_value"
-
-    def test_cleanup_execution(self):
-        """Test execution cleanup removes state."""
-        manager = SharedBufferManager()
-        manager.create_buffer("exec-1", "stream-1", IsolationLevel.ISOLATED)
-
-        assert "exec-1" in manager._execution_state
-
-        manager.cleanup_execution("exec-1")
-
-        assert "exec-1" not in manager._execution_state
-
-
-class TestSessionState:
-    """Tests for session state data-buffer compatibility."""
-
-    def test_legacy_memory_alias_populates_data_buffer(self):
-        """Legacy `memory` payloads should still hydrate the session buffer."""
-        state = SessionState(
-            session_id="session-1",
-            goal_id="goal-1",
-            timestamps=SessionTimestamps(
-                started_at="2026-01-01T00:00:00",
-                updated_at="2026-01-01T00:00:00",
-            ),
-            memory={"rules": "keep starred mail"},
-        )
-
-        assert state.data_buffer == {"rules": "keep starred mail"}
-        assert state.memory == {"rules": "keep starred mail"}
-        assert state.to_session_state_dict()["data_buffer"] == {"rules": "keep starred mail"}
-
-
-# === EventBus Tests ===
-
-
-class TestEventBus:
-    """Tests for EventBus pub/sub."""
-
-    @pytest.mark.asyncio
-    async def test_publish_subscribe(self):
-        """Test basic publish/subscribe."""
-        bus = EventBus()
-        received_events = []
-
-        async def handler(event: AgentEvent):
-            received_events.append(event)
-
-        bus.subscribe(
-            event_types=[EventType.EXECUTION_STARTED],
-            handler=handler,
-        )
-
-        await bus.publish(
-            AgentEvent(
-                type=EventType.EXECUTION_STARTED,
-                stream_id="webhook",
-                execution_id="exec-1",
-                data={"test": "data"},
-            )
-        )
-
-        # Allow handler to run
-        await asyncio.sleep(0.1)
-
-        assert len(received_events) == 1
-        assert received_events[0].type == EventType.EXECUTION_STARTED
-        assert received_events[0].stream_id == "webhook"
-
-    @pytest.mark.asyncio
-    async def test_stream_filter(self):
-        """Test filtering by stream ID."""
-        bus = EventBus()
-        received_events = []
-
-        async def handler(event: AgentEvent):
-            received_events.append(event)
-
-        bus.subscribe(
-            event_types=[EventType.EXECUTION_STARTED],
-            handler=handler,
-            filter_stream="webhook",
-        )
-
-        # Publish to webhook stream (should be received)
-        await bus.publish(
-            AgentEvent(
-                type=EventType.EXECUTION_STARTED,
-                stream_id="webhook",
-            )
-        )
-
-        # Publish to api stream (should NOT be received)
-        await bus.publish(
-            AgentEvent(
-                type=EventType.EXECUTION_STARTED,
-                stream_id="api",
-            )
-        )
-
-        await asyncio.sleep(0.1)
-
-        assert len(received_events) == 1
-        assert received_events[0].stream_id == "webhook"
-
-    def test_unsubscribe(self):
-        """Test unsubscribing from events."""
-        bus = EventBus()
-
-        async def handler(event: AgentEvent):
-            pass
-
-        sub_id = bus.subscribe(
-            event_types=[EventType.EXECUTION_STARTED],
-            handler=handler,
-        )
-
-        assert sub_id in bus._subscriptions
-
-        result = bus.unsubscribe(sub_id)
-
-        assert result is True
-        assert sub_id not in bus._subscriptions
-
-    @pytest.mark.asyncio
-    async def test_wait_for(self):
-        """Test waiting for a specific event."""
-        bus = EventBus()
-
-        # Start waiting in background
-        async def wait_and_check():
-            event = await bus.wait_for(
-                event_type=EventType.EXECUTION_COMPLETED,
-                timeout=1.0,
-            )
-            return event
-
-        wait_task = asyncio.create_task(wait_and_check())
-
-        # Publish the event
-        await asyncio.sleep(0.1)
-        await bus.publish(
-            AgentEvent(
-                type=EventType.EXECUTION_COMPLETED,
-                stream_id="webhook",
-                execution_id="exec-1",
-            )
-        )
-
-        event = await wait_task
-
-        assert event is not None
-        assert event.type == EventType.EXECUTION_COMPLETED
-
-
-# === OutcomeAggregator Tests ===
-
-
-class TestOutcomeAggregator:
-    """Tests for OutcomeAggregator."""
-
-    def test_record_decision(self, sample_goal):
-        """Test recording decisions."""
-        aggregator = OutcomeAggregator(sample_goal)
-
-        from framework.schemas.decision import Decision, DecisionType
-
-        decision = Decision(
-            id="dec-1",
-            node_id="process-webhook",
-            intent="Process incoming webhook",
-            decision_type=DecisionType.PATH_CHOICE,
-            options=[],
-            chosen_option_id="opt-1",
-            reasoning="Standard processing path",
-        )
-
-        aggregator.record_decision("webhook", "exec-1", decision)
-
-        assert aggregator._total_decisions == 1
-        assert len(aggregator._decisions) == 1
-
-    @pytest.mark.asyncio
-    async def test_evaluate_goal_progress(self, sample_goal):
-        """Test goal progress evaluation."""
-        aggregator = OutcomeAggregator(sample_goal)
-
-        progress = await aggregator.evaluate_goal_progress()
-
-        assert "overall_progress" in progress
-        assert "criteria_status" in progress
-        assert "constraint_violations" in progress
-        assert "recommendation" in progress
-
-    def test_record_constraint_violation(self, sample_goal):
-        """Test recording constraint violations."""
-        aggregator = OutcomeAggregator(sample_goal)
-
-        aggregator.record_constraint_violation(
-            constraint_id="c-1",
-            description="Rate limit exceeded",
-            violation_details="More than 100 requests/minute",
-            stream_id="webhook",
-            execution_id="exec-1",
-        )
-
-        assert len(aggregator._constraint_violations) == 1
-        assert aggregator._constraint_violations[0].constraint_id == "c-1"
-
-
-# === AgentRuntime Tests ===
-
-
-class TestAgentRuntime:
-    """Tests for AgentRuntime orchestration."""
-
-    def test_register_entry_point(self, sample_graph, sample_goal, temp_storage):
-        """Test registering entry points."""
-        runtime = AgentRuntime(
-            graph=sample_graph,
-            goal=sample_goal,
-            storage_path=temp_storage,
-        )
-
-        entry_spec = EntryPointSpec(
-            id="manual",
-            name="Manual Trigger",
-            entry_node="process-webhook",
-            trigger_type="manual",
-        )
-
-        runtime.register_entry_point(entry_spec)
-
-        assert "manual" in runtime._entry_points
-        assert len(runtime.get_entry_points()) == 1
-
-    def test_register_duplicate_entry_point_fails(self, sample_graph, sample_goal, temp_storage):
-        """Test that duplicate entry point IDs fail."""
-        runtime = AgentRuntime(
-            graph=sample_graph,
-            goal=sample_goal,
-            storage_path=temp_storage,
-        )
-
-        entry_spec = EntryPointSpec(
-            id="webhook",
-            name="Webhook Handler",
-            entry_node="process-webhook",
-            trigger_type="webhook",
-        )
-
-        runtime.register_entry_point(entry_spec)
-
-        with pytest.raises(ValueError, match="already registered"):
-            runtime.register_entry_point(entry_spec)
-
-    def test_register_invalid_entry_node_fails(self, sample_graph, sample_goal, temp_storage):
-        """Test that invalid entry nodes fail."""
-        runtime = AgentRuntime(
-            graph=sample_graph,
-            goal=sample_goal,
-            storage_path=temp_storage,
-        )
-
-        entry_spec = EntryPointSpec(
-            id="invalid",
-            name="Invalid Entry",
-            entry_node="nonexistent-node",
-            trigger_type="manual",
-        )
-
-        with pytest.raises(ValueError, match="not found in graph"):
-            runtime.register_entry_point(entry_spec)
-
-    @pytest.mark.asyncio
-    async def test_start_stop_lifecycle(self, sample_graph, sample_goal, temp_storage):
-        """Test runtime start/stop lifecycle."""
-        runtime = AgentRuntime(
-            graph=sample_graph,
-            goal=sample_goal,
-            storage_path=temp_storage,
-        )
-
-        entry_spec = EntryPointSpec(
-            id="webhook",
-            name="Webhook Handler",
-            entry_node="process-webhook",
-            trigger_type="webhook",
-        )
-
-        runtime.register_entry_point(entry_spec)
-
-        assert not runtime.is_running
-
-        await runtime.start()
-
-        assert runtime.is_running
-        assert "webhook" in runtime._streams
-
-        await runtime.stop()
-
-        assert not runtime.is_running
-        assert len(runtime._streams) == 0
-
-    @pytest.mark.asyncio
-    async def test_trigger_requires_running(self, sample_graph, sample_goal, temp_storage):
-        """Test that trigger fails if runtime not running."""
-        runtime = AgentRuntime(
-            graph=sample_graph,
-            goal=sample_goal,
-            storage_path=temp_storage,
-        )
-
-        entry_spec = EntryPointSpec(
-            id="webhook",
-            name="Webhook Handler",
-            entry_node="process-webhook",
-            trigger_type="webhook",
-        )
-
-        runtime.register_entry_point(entry_spec)
-
-        with pytest.raises(RuntimeError, match="not running"):
-            await runtime.trigger("webhook", {"test": "data"})
-
-
-# === GraphSpec Validation Tests ===
-
-
-# === Integration Tests ===
-
-
-class TestCreateAgentRuntime:
-    """Tests for the create_agent_runtime factory."""
-
-    def test_create_with_entry_points(self, sample_graph, sample_goal, temp_storage):
-        """Test factory creates runtime with entry points."""
-        entry_points = [
-            EntryPointSpec(
-                id="webhook",
-                name="Webhook",
-                entry_node="process-webhook",
-                trigger_type="webhook",
-            ),
-            EntryPointSpec(
-                id="api",
-                name="API",
-                entry_node="process-api",
-                trigger_type="api",
-            ),
-        ]
-
-        runtime = create_agent_runtime(
-            graph=sample_graph,
-            goal=sample_goal,
-            storage_path=temp_storage,
-            entry_points=entry_points,
-        )
-
-        assert len(runtime.get_entry_points()) == 2
-        assert "webhook" in runtime._entry_points
-        assert "api" in runtime._entry_points
-
-
-# === Timer Entry Point Tests ===
-
-
-class TestTimerEntryPoints:
-    """Tests for timer-driven entry points (interval and cron)."""
-
-    @pytest.mark.asyncio
-    async def test_interval_timer_starts_task(self, sample_graph, sample_goal, temp_storage):
-        """Test that interval_minutes timer creates an async task."""
-        runtime = AgentRuntime(
-            graph=sample_graph,
-            goal=sample_goal,
-            storage_path=temp_storage,
-        )
-
-        entry_spec = EntryPointSpec(
-            id="timer-interval",
-            name="Interval Timer",
-            entry_node="process-webhook",
-            trigger_type="timer",
-            trigger_config={"interval_minutes": 60},
-        )
-        runtime.register_entry_point(entry_spec)
-
-        await runtime.start()
-        try:
-            assert len(runtime._timer_tasks) == 1
-            assert not runtime._timer_tasks[0].done()
-            # Give the async task a moment to set next_fire
-            await asyncio.sleep(0.05)
-            assert "timer-interval" in runtime._timer_next_fire
-        finally:
-            await runtime.stop()
-
-        assert len(runtime._timer_tasks) == 0
-
-    @pytest.mark.asyncio
-    async def test_cron_timer_starts_task(self, sample_graph, sample_goal, temp_storage):
-        """Test that cron expression timer creates an async task."""
-        runtime = AgentRuntime(
-            graph=sample_graph,
-            goal=sample_goal,
-            storage_path=temp_storage,
-        )
-
-        entry_spec = EntryPointSpec(
-            id="timer-cron",
-            name="Cron Timer",
-            entry_node="process-webhook",
-            trigger_type="timer",
-            trigger_config={"cron": "*/5 * * * *"},  # Every 5 minutes
-        )
-        runtime.register_entry_point(entry_spec)
-
-        await runtime.start()
-        try:
-            assert len(runtime._timer_tasks) == 1
-            assert not runtime._timer_tasks[0].done()
-            # Give the async task a moment to set next_fire
-            await asyncio.sleep(0.05)
-            assert "timer-cron" in runtime._timer_next_fire
-        finally:
-            await runtime.stop()
-
-    @pytest.mark.asyncio
-    async def test_invalid_cron_expression_skipped(
-        self, sample_graph, sample_goal, temp_storage, caplog
-    ):
-        """Test that an invalid cron expression logs a warning and skips."""
-        runtime = AgentRuntime(
-            graph=sample_graph,
-            goal=sample_goal,
-            storage_path=temp_storage,
-        )
-
-        entry_spec = EntryPointSpec(
-            id="timer-bad-cron",
-            name="Bad Cron Timer",
-            entry_node="process-webhook",
-            trigger_type="timer",
-            trigger_config={"cron": "not a cron expression"},
-        )
-        runtime.register_entry_point(entry_spec)
-
-        await runtime.start()
-        try:
-            assert len(runtime._timer_tasks) == 0
-            assert "invalid cron" in caplog.text.lower() or "Invalid cron" in caplog.text
-        finally:
-            await runtime.stop()
-
-    @pytest.mark.asyncio
-    async def test_cron_takes_priority_over_interval(
-        self, sample_graph, sample_goal, temp_storage, caplog
-    ):
-        """Test that when both cron and interval_minutes are set, cron wins."""
-        import logging
-
-        runtime = AgentRuntime(
-            graph=sample_graph,
-            goal=sample_goal,
-            storage_path=temp_storage,
-        )
-
-        entry_spec = EntryPointSpec(
-            id="timer-both",
-            name="Both Timer",
-            entry_node="process-webhook",
-            trigger_type="timer",
-            trigger_config={"cron": "0 9 * * *", "interval_minutes": 30},
-        )
-        runtime.register_entry_point(entry_spec)
-
-        with caplog.at_level(logging.INFO):
-            await runtime.start()
-        try:
-            assert len(runtime._timer_tasks) == 1
-            # Should log cron, not interval
-            assert any("cron" in r.message.lower() for r in caplog.records)
-        finally:
-            await runtime.stop()
-
-    @pytest.mark.asyncio
-    async def test_no_interval_or_cron_warns(self, sample_graph, sample_goal, temp_storage, caplog):
-        """Test that timer with neither cron nor interval_minutes logs a warning."""
-        runtime = AgentRuntime(
-            graph=sample_graph,
-            goal=sample_goal,
-            storage_path=temp_storage,
-        )
-
-        entry_spec = EntryPointSpec(
-            id="timer-empty",
-            name="Empty Timer",
-            entry_node="process-webhook",
-            trigger_type="timer",
-            trigger_config={},
-        )
-        runtime.register_entry_point(entry_spec)
-
-        await runtime.start()
-        try:
-            assert len(runtime._timer_tasks) == 0
-            assert "no 'cron' or valid 'interval_minutes'" in caplog.text
-        finally:
-            await runtime.stop()
-
-    @pytest.mark.asyncio
-    async def test_cron_immediate_fires_first(self, sample_graph, sample_goal, temp_storage):
-        """Test that run_immediately=True with cron doesn't set next_fire before first run."""
-        runtime = AgentRuntime(
-            graph=sample_graph,
-            goal=sample_goal,
-            storage_path=temp_storage,
-        )
-
-        entry_spec = EntryPointSpec(
-            id="timer-cron-immediate",
-            name="Cron Immediate",
-            entry_node="process-webhook",
-            trigger_type="timer",
-            trigger_config={"cron": "0 0 * * *", "run_immediately": True},
-        )
-        runtime.register_entry_point(entry_spec)
-
-        await runtime.start()
-        try:
-            assert len(runtime._timer_tasks) == 1
-            # With run_immediately, the task enters the while loop directly,
-            # so _timer_next_fire is NOT set before the first trigger attempt
-            # (it pops it at the top of the loop)
-            # Give it a moment to start executing
-            await asyncio.sleep(0.05)
-            # Task should still be running (it will try to trigger and likely fail
-            # since there's no LLM, but the task itself continues)
-            assert not runtime._timer_tasks[0].done()
-        finally:
-            await runtime.stop()
-
-
-# === Cancel All Tasks Tests ===
-
-
-class TestCancelAllTasks:
-    """Tests for cancel_all_tasks and cancel_all_tasks_async."""
-
-    @pytest.mark.asyncio
-    async def test_cancel_all_tasks_async_returns_false_when_no_tasks(
-        self, sample_graph, sample_goal, temp_storage
-    ):
-        """Test that cancel_all_tasks_async returns False with no running tasks."""
-        runtime = AgentRuntime(
-            graph=sample_graph,
-            goal=sample_goal,
-            storage_path=temp_storage,
-        )
-
-        entry_spec = EntryPointSpec(
-            id="webhook",
-            name="Webhook",
-            entry_node="process-webhook",
-            trigger_type="webhook",
-        )
-        runtime.register_entry_point(entry_spec)
-        await runtime.start()
-
-        try:
-            result = await runtime.cancel_all_tasks_async()
-            assert result is False
-        finally:
-            await runtime.stop()
-
-    @pytest.mark.asyncio
-    async def test_cancel_all_tasks_async_cancels_running_task(
-        self, sample_graph, sample_goal, temp_storage
-    ):
-        """Test that cancel_all_tasks_async cancels a running task and returns True."""
-        runtime = AgentRuntime(
-            graph=sample_graph,
-            goal=sample_goal,
-            storage_path=temp_storage,
-        )
-
-        entry_spec = EntryPointSpec(
-            id="webhook",
-            name="Webhook",
-            entry_node="process-webhook",
-            trigger_type="webhook",
-        )
-        runtime.register_entry_point(entry_spec)
-        await runtime.start()
-
-        try:
-            # Inject a fake running task into the stream
-            stream = runtime._streams["webhook"]
-
-            async def hang_forever():
-                await asyncio.get_event_loop().create_future()
-
-            fake_task = asyncio.ensure_future(hang_forever())
-            stream._execution_tasks["fake-exec"] = fake_task
-
-            result = await runtime.cancel_all_tasks_async()
-            assert result is True
-
-            # Let the CancelledError propagate
-            try:
-                await fake_task
-            except asyncio.CancelledError:
-                pass
-            assert fake_task.cancelled()
-
-            # Clean up
-            del stream._execution_tasks["fake-exec"]
-        finally:
-            await runtime.stop()
-
-    @pytest.mark.asyncio
-    async def test_cancel_all_tasks_async_cancels_multiple_tasks_across_streams(
-        self, sample_graph, sample_goal, temp_storage
-    ):
-        """Test that cancel_all_tasks_async cancels tasks across multiple streams."""
-        runtime = AgentRuntime(
-            graph=sample_graph,
-            goal=sample_goal,
-            storage_path=temp_storage,
-        )
-
-        # Register two entry points so we get two streams
-        runtime.register_entry_point(
-            EntryPointSpec(
-                id="stream-a",
-                name="Stream A",
-                entry_node="process-webhook",
-                trigger_type="webhook",
-            )
-        )
-        runtime.register_entry_point(
-            EntryPointSpec(
-                id="stream-b",
-                name="Stream B",
-                entry_node="process-webhook",
-                trigger_type="webhook",
-            )
-        )
-        await runtime.start()
-
-        try:
-
-            async def hang_forever():
-                await asyncio.get_event_loop().create_future()
-
-            stream_a = runtime._streams["stream-a"]
-            stream_b = runtime._streams["stream-b"]
-
-            # Two tasks in stream A, one task in stream B
-            task_a1 = asyncio.ensure_future(hang_forever())
-            task_a2 = asyncio.ensure_future(hang_forever())
-            task_b1 = asyncio.ensure_future(hang_forever())
-
-            stream_a._execution_tasks["exec-a1"] = task_a1
-            stream_a._execution_tasks["exec-a2"] = task_a2
-            stream_b._execution_tasks["exec-b1"] = task_b1
-
-            result = await runtime.cancel_all_tasks_async()
-            assert result is True
-
-            # Let CancelledErrors propagate
-            for task in [task_a1, task_a2, task_b1]:
-                try:
-                    await task
-                except asyncio.CancelledError:
-                    pass
-                assert task.cancelled()
-
-            # Clean up
-            del stream_a._execution_tasks["exec-a1"]
-            del stream_a._execution_tasks["exec-a2"]
-            del stream_b._execution_tasks["exec-b1"]
-        finally:
-            await runtime.stop()
-
-
-if __name__ == "__main__":
-    pytest.main([__file__, "-v"])
diff --git a/core/framework/runtime/tests/test_idempotency.py b/core/framework/runtime/tests/test_idempotency.py
deleted file mode 100644
index 713e037b..00000000
--- a/core/framework/runtime/tests/test_idempotency.py
+++ /dev/null
@@ -1,268 +0,0 @@
-"""Tests for webhook idempotency key support in AgentRuntime.trigger()."""
-
-import asyncio
-import time
-from collections import OrderedDict
-from unittest.mock import AsyncMock, MagicMock
-
-import pytest
-
-from framework.runtime.agent_runtime import AgentRuntime, AgentRuntimeConfig
-
-
-def _make_runtime(ttl=300.0, max_keys=10000):
-    """Create a minimal AgentRuntime with idempotency cache attributes.
-
-    Uses ``object.__new__`` to skip ``__init__`` and its heavy dependencies
-    (storage, LLM, skills) — we only need the cache and config for these tests.
-    """
-    runtime = object.__new__(AgentRuntime)
-    runtime._config = AgentRuntimeConfig(idempotency_ttl_seconds=ttl, idempotency_max_keys=max_keys)
-    runtime._running = True
-    runtime._lock = asyncio.Lock()
-    runtime._idempotency_keys = OrderedDict()
-    runtime._idempotency_times = {}
-    runtime._graphs = {}
-    runtime._active_graph_id = "primary"
-    runtime._graph_id = "primary"
-    runtime._streams = {}
-    runtime._entry_points = {}
-    return runtime
-
-
-def _make_runtime_with_stream(ttl=300.0, max_keys=10000):
-    """Create a mock runtime whose stream.execute() returns unique IDs."""
-    runtime = _make_runtime(ttl=ttl, max_keys=max_keys)
-
-    call_count = 0
-
-    async def _fake_execute(*args, **kwargs):
-        nonlocal call_count
-        call_count += 1
-        return f"session-{call_count:04d}"
-
-    stream = MagicMock()
-    stream.execute = _fake_execute
-    runtime._streams = {"webhook": stream}
-    runtime._entry_points = {"webhook": MagicMock()}
-    return runtime
-
-
-class TestIdempotencyConfig:
-    """Verify idempotency configuration defaults."""
-
-    def test_default_ttl(self):
-        config = AgentRuntimeConfig()
-        assert config.idempotency_ttl_seconds == 300.0
-
-    def test_default_max_keys(self):
-        config = AgentRuntimeConfig()
-        assert config.idempotency_max_keys == 10000
-
-    def test_custom_config(self):
-        config = AgentRuntimeConfig(idempotency_ttl_seconds=60.0, idempotency_max_keys=100)
-        assert config.idempotency_ttl_seconds == 60.0
-        assert config.idempotency_max_keys == 100
-
-
-class TestIdempotencyCache:
-    """Test the idempotency cache and pruning logic directly."""
-
-    def test_cache_stores_and_retrieves_key(self):
-        runtime = _make_runtime()
-        runtime._idempotency_keys["stripe-evt-123"] = "exec-001"
-        runtime._idempotency_times["stripe-evt-123"] = time.time()
-
-        assert runtime._idempotency_keys.get("stripe-evt-123") == "exec-001"
-
-    def test_cache_returns_none_for_unknown_key(self):
-        runtime = _make_runtime()
-        assert runtime._idempotency_keys.get("unknown") is None
-
-    def test_prune_removes_expired_keys(self):
-        runtime = _make_runtime(ttl=0.1)
-
-        runtime._idempotency_keys["old-key"] = "exec-old"
-        runtime._idempotency_times["old-key"] = time.time() - 1.0  # expired
-
-        runtime._prune_idempotency_keys()
-
-        assert "old-key" not in runtime._idempotency_keys
-        assert "old-key" not in runtime._idempotency_times
-
-    def test_prune_keeps_fresh_keys(self):
-        runtime = _make_runtime(ttl=300.0)
-
-        runtime._idempotency_keys["fresh-key"] = "exec-fresh"
-        runtime._idempotency_times["fresh-key"] = time.time()
-
-        runtime._prune_idempotency_keys()
-
-        assert "fresh-key" in runtime._idempotency_keys
-
-    def test_prune_respects_max_keys(self):
-        runtime = _make_runtime(max_keys=2)
-
-        for i in range(3):
-            key = f"key-{i}"
-            runtime._idempotency_keys[key] = f"exec-{i}"
-            runtime._idempotency_times[key] = time.time()
-
-        runtime._prune_idempotency_keys()
-
-        assert len(runtime._idempotency_keys) == 2
-        # Oldest (key-0) should be evicted
-        assert "key-0" not in runtime._idempotency_keys
-        assert "key-1" in runtime._idempotency_keys
-        assert "key-2" in runtime._idempotency_keys
-
-    def test_prune_evicts_fifo(self):
-        runtime = _make_runtime(max_keys=1)
-
-        runtime._idempotency_keys["first"] = "exec-1"
-        runtime._idempotency_times["first"] = time.time()
-        runtime._idempotency_keys["second"] = "exec-2"
-        runtime._idempotency_times["second"] = time.time()
-
-        runtime._prune_idempotency_keys()
-
-        assert len(runtime._idempotency_keys) == 1
-        assert "second" in runtime._idempotency_keys
-        assert "first" not in runtime._idempotency_keys
-
-    def test_mixed_expired_and_max_size(self):
-        runtime = _make_runtime(ttl=0.1, max_keys=2)
-
-        # Add expired key
-        runtime._idempotency_keys["expired"] = "exec-e"
-        runtime._idempotency_times["expired"] = time.time() - 1.0
-
-        # Add fresh keys
-        runtime._idempotency_keys["fresh-1"] = "exec-f1"
-        runtime._idempotency_times["fresh-1"] = time.time()
-        runtime._idempotency_keys["fresh-2"] = "exec-f2"
-        runtime._idempotency_times["fresh-2"] = time.time()
-
-        runtime._prune_idempotency_keys()
-
-        assert "expired" not in runtime._idempotency_keys
-        assert "fresh-1" in runtime._idempotency_keys
-        assert "fresh-2" in runtime._idempotency_keys
-
-
-class TestTriggerIdempotency:
-    """Tests for trigger() idempotency deduplication."""
-
-    def test_trigger_accepts_idempotency_key(self):
-        """trigger() accepts idempotency_key as a keyword argument."""
-        import inspect
-
-        sig = inspect.signature(AgentRuntime.trigger)
-        assert "idempotency_key" in sig.parameters
-
-    def test_idempotency_key_defaults_to_none(self):
-        """idempotency_key defaults to None (backward compatible)."""
-        import inspect
-
-        sig = inspect.signature(AgentRuntime.trigger)
-        assert sig.parameters["idempotency_key"].default is None
-
-    def test_trigger_and_wait_accepts_idempotency_key(self):
-        """trigger_and_wait() also accepts idempotency_key."""
-        import inspect
-
-        sig = inspect.signature(AgentRuntime.trigger_and_wait)
-        assert "idempotency_key" in sig.parameters
-
-    def test_trigger_and_wait_idempotency_key_defaults_to_none(self):
-        """trigger_and_wait() idempotency_key defaults to None."""
-        import inspect
-
-        sig = inspect.signature(AgentRuntime.trigger_and_wait)
-        assert sig.parameters["idempotency_key"].default is None
-
-    @pytest.mark.asyncio
-    async def test_duplicate_key_returns_cached_id(self):
-        """Same idempotency key within TTL returns the cached execution ID."""
-        runtime = _make_runtime_with_stream()
-
-        first = await runtime.trigger("webhook", {}, idempotency_key="stripe-evt-001")
-        second = await runtime.trigger("webhook", {}, idempotency_key="stripe-evt-001")
-
-        assert first == second
-        assert first == "session-0001"
-
-    @pytest.mark.asyncio
-    async def test_different_keys_produce_different_ids(self):
-        """Different idempotency keys start separate executions."""
-        runtime = _make_runtime_with_stream()
-
-        id_a = await runtime.trigger("webhook", {}, idempotency_key="evt-aaa")
-        id_b = await runtime.trigger("webhook", {}, idempotency_key="evt-bbb")
-
-        assert id_a != id_b
-        assert id_a == "session-0001"
-        assert id_b == "session-0002"
-
-    @pytest.mark.asyncio
-    async def test_none_key_always_starts_new_execution(self):
-        """key=None (default) skips dedup — every call starts fresh."""
-        runtime = _make_runtime_with_stream()
-
-        id_1 = await runtime.trigger("webhook", {})
-        id_2 = await runtime.trigger("webhook", {})
-
-        assert id_1 != id_2
-        assert len(runtime._idempotency_keys) == 0  # nothing cached
-
-    @pytest.mark.asyncio
-    async def test_expired_key_allows_new_execution(self):
-        """After TTL expires, the same key starts a new execution."""
-        runtime = _make_runtime_with_stream(ttl=0.1)
-
-        first = await runtime.trigger("webhook", {}, idempotency_key="evt-expire")
-
-        # Backdate the cached timestamp so the key looks expired
-        runtime._idempotency_times["evt-expire"] = time.time() - 1.0
-
-        second = await runtime.trigger("webhook", {}, idempotency_key="evt-expire")
-
-        assert first != second
-        assert first == "session-0001"
-        assert second == "session-0002"
-
-    @pytest.mark.asyncio
-    async def test_stream_not_found_does_not_cache(self):
-        """If entry point doesn't exist, nothing is cached."""
-        runtime = _make_runtime_with_stream()
-
-        with pytest.raises(ValueError, match="not found"):
-            await runtime.trigger("nonexistent", {}, idempotency_key="evt-orphan")
-
-        assert "evt-orphan" not in runtime._idempotency_keys
-
-    @pytest.mark.asyncio
-    async def test_execute_error_does_not_cache(self):
-        """If stream.execute() raises, nothing is cached so retries can go through."""
-        runtime = _make_runtime()
-
-        failing_stream = MagicMock()
-        failing_stream.execute = AsyncMock(side_effect=RuntimeError("stream not running"))
-        runtime._streams = {"webhook": failing_stream}
-        runtime._entry_points = {"webhook": MagicMock()}
-
-        with pytest.raises(RuntimeError, match="stream not running"):
-            await runtime.trigger("webhook", {}, idempotency_key="evt-123")
-
-        assert "evt-123" not in runtime._idempotency_keys
-
-    @pytest.mark.asyncio
-    async def test_cache_holds_real_execution_id(self):
-        """Cached value matches the actual execution ID from execute()."""
-        runtime = _make_runtime_with_stream()
-
-        exec_id = await runtime.trigger("webhook", {}, idempotency_key="evt-real")
-
-        cached = runtime._idempotency_keys.get("evt-real")
-        assert cached == exec_id
-        assert cached == "session-0001"
diff --git a/core/framework/runtime/tests/test_runtime_logging_paths.py b/core/framework/runtime/tests/test_runtime_logging_paths.py
deleted file mode 100644
index 3eb60ce0..00000000
--- a/core/framework/runtime/tests/test_runtime_logging_paths.py
+++ /dev/null
@@ -1,29 +0,0 @@
-"""Tests for custom session-backed runtime logging paths."""
-
-from pathlib import Path
-from unittest.mock import MagicMock
-
-from framework.graph.executor import GraphExecutor
-from framework.runtime.runtime_log_store import RuntimeLogStore
-from framework.runtime.runtime_logger import RuntimeLogger
-
-
-def test_graph_executor_uses_custom_session_dir_name_for_runtime_logs():
-    executor = GraphExecutor(
-        runtime=MagicMock(),
-        storage_path=Path("/tmp/test-agent/sessions/my-custom-session"),
-    )
-
-    assert executor._get_runtime_log_session_id() == "my-custom-session"
-
-
-def test_runtime_logger_creates_session_log_dir_for_custom_session_id(tmp_path):
-    base = tmp_path / ".hive" / "agents" / "test_agent"
-    base.mkdir(parents=True)
-    store = RuntimeLogStore(base)
-    logger = RuntimeLogger(store=store, agent_id="test-agent")
-
-    run_id = logger.start_run(goal_id="goal-1", session_id="my-custom-session")
-
-    assert run_id == "my-custom-session"
-    assert (base / "sessions" / "my-custom-session" / "logs").is_dir()
diff --git a/core/framework/runtime/tests/test_webhook_server.py b/core/framework/runtime/tests/test_webhook_server.py
deleted file mode 100644
index 13d2628c..00000000
--- a/core/framework/runtime/tests/test_webhook_server.py
+++ /dev/null
@@ -1,716 +0,0 @@
-"""
-Tests for WebhookServer and event-driven entry points.
-"""
-
-import asyncio
-import hashlib
-import hmac as hmac_mod
-import json
-import tempfile
-from pathlib import Path
-from unittest.mock import patch
-
-import aiohttp
-import pytest
-
-from framework.runtime.agent_runtime import AgentRuntime, AgentRuntimeConfig
-from framework.runtime.event_bus import AgentEvent, EventBus, EventType
-from framework.runtime.execution_stream import EntryPointSpec
-from framework.runtime.webhook_server import (
-    WebhookRoute,
-    WebhookServer,
-    WebhookServerConfig,
-)
-
-
-def _make_server(event_bus: EventBus, routes: list[WebhookRoute] | None = None):
-    """Helper to create a WebhookServer with port=0 for OS-assigned port."""
-    config = WebhookServerConfig(host="127.0.0.1", port=0)
-    server = WebhookServer(event_bus, config)
-    for route in routes or []:
-        server.add_route(route)
-    return server
-
-
-def _base_url(server: WebhookServer) -> str:
-    """Get the base URL for a running server."""
-    return f"http://127.0.0.1:{server.port}"
-
-
-class TestWebhookServerLifecycle:
-    """Tests for server start/stop."""
-
-    @pytest.mark.asyncio
-    async def test_start_stop(self):
-        bus = EventBus()
-        server = _make_server(
-            bus,
-            [
-                WebhookRoute(source_id="test", path="/webhooks/test", methods=["POST"]),
-            ],
-        )
-
-        await server.start()
-        assert server.is_running
-        assert server.port is not None
-
-        await server.stop()
-        assert not server.is_running
-        assert server.port is None
-
-    @pytest.mark.asyncio
-    async def test_no_routes_skips_start(self):
-        bus = EventBus()
-        server = _make_server(bus)  # no routes
-
-        await server.start()
-        assert not server.is_running
-
-    @pytest.mark.asyncio
-    async def test_stop_when_not_started(self):
-        bus = EventBus()
-        server = _make_server(bus)
-
-        # Should be a no-op, not raise
-        await server.stop()
-        assert not server.is_running
-
-
-class TestWebhookEventPublishing:
-    """Tests for HTTP request -> EventBus event publishing."""
-
-    @pytest.mark.asyncio
-    async def test_post_publishes_webhook_received(self):
-        bus = EventBus()
-        received = []
-
-        async def handler(event):
-            received.append(event)
-
-        bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
-
-        server = _make_server(
-            bus,
-            [
-                WebhookRoute(source_id="gh", path="/webhooks/github", methods=["POST"]),
-            ],
-        )
-        await server.start()
-
-        try:
-            async with aiohttp.ClientSession() as session:
-                async with session.post(
-                    f"{_base_url(server)}/webhooks/github",
-                    json={"action": "opened", "number": 42},
-                ) as resp:
-                    assert resp.status == 202
-                    body = await resp.json()
-                    assert body["status"] == "accepted"
-
-            # Give event bus time to dispatch
-            await asyncio.sleep(0.05)
-
-            assert len(received) == 1
-            event = received[0]
-            assert event.type == EventType.WEBHOOK_RECEIVED
-            assert event.stream_id == "gh"
-            assert event.data["path"] == "/webhooks/github"
-            assert event.data["method"] == "POST"
-            assert event.data["payload"] == {"action": "opened", "number": 42}
-            assert isinstance(event.data["headers"], dict)
-            assert event.data["query_params"] == {}
-        finally:
-            await server.stop()
-
-    @pytest.mark.asyncio
-    async def test_query_params_included(self):
-        bus = EventBus()
-        received = []
-
-        async def handler(event):
-            received.append(event)
-
-        bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
-
-        server = _make_server(
-            bus,
-            [
-                WebhookRoute(source_id="hook", path="/webhooks/hook", methods=["POST"]),
-            ],
-        )
-        await server.start()
-
-        try:
-            async with aiohttp.ClientSession() as session:
-                async with session.post(
-                    f"{_base_url(server)}/webhooks/hook?source=test&v=2",
-                    json={"data": "hello"},
-                ) as resp:
-                    assert resp.status == 202
-
-            await asyncio.sleep(0.05)
-
-            assert len(received) == 1
-            assert received[0].data["query_params"] == {"source": "test", "v": "2"}
-        finally:
-            await server.stop()
-
-    @pytest.mark.asyncio
-    async def test_non_json_body(self):
-        bus = EventBus()
-        received = []
-
-        async def handler(event):
-            received.append(event)
-
-        bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
-
-        server = _make_server(
-            bus,
-            [
-                WebhookRoute(source_id="raw", path="/webhooks/raw", methods=["POST"]),
-            ],
-        )
-        await server.start()
-
-        try:
-            async with aiohttp.ClientSession() as session:
-                async with session.post(
-                    f"{_base_url(server)}/webhooks/raw",
-                    data=b"plain text body",
-                    headers={"Content-Type": "text/plain"},
-                ) as resp:
-                    assert resp.status == 202
-
-            await asyncio.sleep(0.05)
-
-            assert len(received) == 1
-            assert received[0].data["payload"] == {"raw_body": "plain text body"}
-        finally:
-            await server.stop()
-
-    @pytest.mark.asyncio
-    async def test_empty_body(self):
-        bus = EventBus()
-        received = []
-
-        async def handler(event):
-            received.append(event)
-
-        bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
-
-        server = _make_server(
-            bus,
-            [
-                WebhookRoute(source_id="empty", path="/webhooks/empty", methods=["POST"]),
-            ],
-        )
-        await server.start()
-
-        try:
-            async with aiohttp.ClientSession() as session:
-                async with session.post(f"{_base_url(server)}/webhooks/empty") as resp:
-                    assert resp.status == 202
-
-            await asyncio.sleep(0.05)
-
-            assert len(received) == 1
-            assert received[0].data["payload"] == {}
-        finally:
-            await server.stop()
-
-    @pytest.mark.asyncio
-    async def test_multiple_routes(self):
-        bus = EventBus()
-        received = []
-
-        async def handler(event):
-            received.append(event)
-
-        bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
-
-        server = _make_server(
-            bus,
-            [
-                WebhookRoute(source_id="a", path="/webhooks/a", methods=["POST"]),
-                WebhookRoute(source_id="b", path="/webhooks/b", methods=["POST"]),
-            ],
-        )
-        await server.start()
-
-        try:
-            async with aiohttp.ClientSession() as session:
-                async with session.post(
-                    f"{_base_url(server)}/webhooks/a", json={"from": "a"}
-                ) as resp:
-                    assert resp.status == 202
-
-                async with session.post(
-                    f"{_base_url(server)}/webhooks/b", json={"from": "b"}
-                ) as resp:
-                    assert resp.status == 202
-
-            await asyncio.sleep(0.05)
-
-            assert len(received) == 2
-            stream_ids = {e.stream_id for e in received}
-            assert stream_ids == {"a", "b"}
-        finally:
-            await server.stop()
-
-    @pytest.mark.asyncio
-    async def test_filter_stream_subscription(self):
-        """Subscribers can filter by stream_id (source_id)."""
-        bus = EventBus()
-        a_events = []
-        b_events = []
-
-        async def handle_a(event):
-            a_events.append(event)
-
-        async def handle_b(event):
-            b_events.append(event)
-
-        bus.subscribe([EventType.WEBHOOK_RECEIVED], handle_a, filter_stream="a")
-        bus.subscribe([EventType.WEBHOOK_RECEIVED], handle_b, filter_stream="b")
-
-        server = _make_server(
-            bus,
-            [
-                WebhookRoute(source_id="a", path="/webhooks/a", methods=["POST"]),
-                WebhookRoute(source_id="b", path="/webhooks/b", methods=["POST"]),
-            ],
-        )
-        await server.start()
-
-        try:
-            async with aiohttp.ClientSession() as session:
-                await session.post(f"{_base_url(server)}/webhooks/a", json={"x": 1})
-                await session.post(f"{_base_url(server)}/webhooks/b", json={"x": 2})
-
-            await asyncio.sleep(0.05)
-
-            assert len(a_events) == 1
-            assert a_events[0].data["payload"] == {"x": 1}
-            assert len(b_events) == 1
-            assert b_events[0].data["payload"] == {"x": 2}
-        finally:
-            await server.stop()
-
-
-class TestHMACVerification:
-    """Tests for HMAC-SHA256 signature verification."""
-
-    @pytest.mark.asyncio
-    async def test_valid_signature_accepted(self):
-        bus = EventBus()
-        received = []
-
-        async def handler(event):
-            received.append(event)
-
-        bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
-
-        secret = "test-secret-key"
-        server = _make_server(
-            bus,
-            [
-                WebhookRoute(
-                    source_id="secure",
-                    path="/webhooks/secure",
-                    methods=["POST"],
-                    secret=secret,
-                ),
-            ],
-        )
-        await server.start()
-
-        try:
-            body = json.dumps({"event": "push"}).encode()
-            sig = hmac_mod.new(secret.encode(), body, hashlib.sha256).hexdigest()
-
-            async with aiohttp.ClientSession() as session:
-                async with session.post(
-                    f"{_base_url(server)}/webhooks/secure",
-                    data=body,
-                    headers={
-                        "Content-Type": "application/json",
-                        "X-Hub-Signature-256": f"sha256={sig}",
-                    },
-                ) as resp:
-                    assert resp.status == 202
-
-            await asyncio.sleep(0.05)
-            assert len(received) == 1
-        finally:
-            await server.stop()
-
-    @pytest.mark.asyncio
-    async def test_invalid_signature_rejected(self):
-        bus = EventBus()
-        received = []
-
-        async def handler(event):
-            received.append(event)
-
-        bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
-
-        server = _make_server(
-            bus,
-            [
-                WebhookRoute(
-                    source_id="secure",
-                    path="/webhooks/secure",
-                    methods=["POST"],
-                    secret="real-secret",
-                ),
-            ],
-        )
-        await server.start()
-
-        try:
-            async with aiohttp.ClientSession() as session:
-                async with session.post(
-                    f"{_base_url(server)}/webhooks/secure",
-                    json={"event": "push"},
-                    headers={"X-Hub-Signature-256": "sha256=invalidsignature"},
-                ) as resp:
-                    assert resp.status == 401
-
-            await asyncio.sleep(0.05)
-            assert len(received) == 0  # No event published
-        finally:
-            await server.stop()
-
-    @pytest.mark.asyncio
-    async def test_missing_signature_rejected(self):
-        bus = EventBus()
-        received = []
-
-        async def handler(event):
-            received.append(event)
-
-        bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
-
-        server = _make_server(
-            bus,
-            [
-                WebhookRoute(
-                    source_id="secure",
-                    path="/webhooks/secure",
-                    methods=["POST"],
-                    secret="my-secret",
-                ),
-            ],
-        )
-        await server.start()
-
-        try:
-            async with aiohttp.ClientSession() as session:
-                # No X-Hub-Signature-256 header
-                async with session.post(
-                    f"{_base_url(server)}/webhooks/secure",
-                    json={"event": "push"},
-                ) as resp:
-                    assert resp.status == 401
-
-            await asyncio.sleep(0.05)
-            assert len(received) == 0
-        finally:
-            await server.stop()
-
-    @pytest.mark.asyncio
-    async def test_no_secret_skips_verification(self):
-        """Routes without a secret accept any request."""
-        bus = EventBus()
-        received = []
-
-        async def handler(event):
-            received.append(event)
-
-        bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
-
-        server = _make_server(
-            bus,
-            [
-                WebhookRoute(
-                    source_id="open",
-                    path="/webhooks/open",
-                    methods=["POST"],
-                    secret=None,
-                ),
-            ],
-        )
-        await server.start()
-
-        try:
-            async with aiohttp.ClientSession() as session:
-                async with session.post(
-                    f"{_base_url(server)}/webhooks/open",
-                    json={"data": "test"},
-                ) as resp:
-                    assert resp.status == 202
-
-            await asyncio.sleep(0.05)
-            assert len(received) == 1
-        finally:
-            await server.stop()
-
-
-class TestEventDrivenEntryPoints:
-    """Tests for event-driven entry points wired through AgentRuntime."""
-
-    def _make_graph_and_goal(self):
-        """Minimal graph + goal for testing entry point triggering."""
-        from framework.graph import Goal
-        from framework.graph.edge import GraphSpec
-        from framework.graph.goal import SuccessCriterion
-        from framework.graph.node import NodeSpec
-
-        nodes = [
-            NodeSpec(
-                id="process-event",
-                name="Process Event",
-                description="Process incoming event",
-                node_type="event_loop",
-                input_keys=["event"],
-                output_keys=["result"],
-            ),
-        ]
-        graph = GraphSpec(
-            id="test-graph",
-            goal_id="test-goal",
-            version="1.0.0",
-            entry_node="process-event",
-            entry_points={"start": "process-event"},
-            terminal_nodes=[],
-            pause_nodes=[],
-            nodes=nodes,
-            edges=[],
-        )
-        goal = Goal(
-            id="test-goal",
-            name="Test Goal",
-            description="Test",
-            success_criteria=[
-                SuccessCriterion(
-                    id="sc-1",
-                    description="Done",
-                    metric="done",
-                    target="yes",
-                    weight=1.0,
-                ),
-            ],
-        )
-        return graph, goal
-
-    @pytest.mark.asyncio
-    async def test_event_entry_point_subscribes_to_bus(self):
-        """Entry point with trigger_type='event' subscribes and triggers on matching events."""
-        graph, goal = self._make_graph_and_goal()
-
-        config = AgentRuntimeConfig(
-            webhook_host="127.0.0.1",
-            webhook_port=0,
-            webhook_routes=[
-                {"source_id": "gh", "path": "/webhooks/github"},
-            ],
-        )
-
-        with tempfile.TemporaryDirectory() as tmpdir:
-            runtime = AgentRuntime(
-                graph=graph,
-                goal=goal,
-                storage_path=Path(tmpdir),
-                config=config,
-            )
-
-            runtime.register_entry_point(
-                EntryPointSpec(
-                    id="gh-handler",
-                    name="GitHub Handler",
-                    entry_node="process-event",
-                    trigger_type="event",
-                    trigger_config={
-                        "event_types": ["webhook_received"],
-                        "filter_stream": "gh",
-                    },
-                )
-            )
-
-            trigger_calls = []
-
-            async def mock_trigger(ep_id, data, **kwargs):
-                trigger_calls.append((ep_id, data))
-
-            with patch.object(runtime, "trigger", side_effect=mock_trigger):
-                await runtime.start()
-
-                try:
-                    assert runtime.webhook_server is not None
-                    assert runtime.webhook_server.is_running
-
-                    port = runtime.webhook_server.port
-                    async with aiohttp.ClientSession() as session:
-                        async with session.post(
-                            f"http://127.0.0.1:{port}/webhooks/github",
-                            json={"action": "push", "ref": "main"},
-                        ) as resp:
-                            assert resp.status == 202
-
-                    await asyncio.sleep(0.1)
-
-                    assert len(trigger_calls) == 1
-                    ep_id, data = trigger_calls[0]
-                    assert ep_id == "gh-handler"
-                    assert "event" in data
-                    assert data["event"]["type"] == "webhook_received"
-                    assert data["event"]["stream_id"] == "gh"
-                    assert data["event"]["data"]["payload"] == {
-                        "action": "push",
-                        "ref": "main",
-                    }
-                finally:
-                    await runtime.stop()
-
-            assert runtime.webhook_server is None
-
-    @pytest.mark.asyncio
-    async def test_event_entry_point_filter_stream(self):
-        """Entry point only triggers for matching stream_id (source_id)."""
-        graph, goal = self._make_graph_and_goal()
-
-        config = AgentRuntimeConfig(
-            webhook_routes=[
-                {"source_id": "github", "path": "/webhooks/github"},
-                {"source_id": "stripe", "path": "/webhooks/stripe"},
-            ],
-            webhook_port=0,
-        )
-
-        with tempfile.TemporaryDirectory() as tmpdir:
-            runtime = AgentRuntime(
-                graph=graph,
-                goal=goal,
-                storage_path=Path(tmpdir),
-                config=config,
-            )
-
-            runtime.register_entry_point(
-                EntryPointSpec(
-                    id="gh-only",
-                    name="GitHub Only",
-                    entry_node="process-event",
-                    trigger_type="event",
-                    trigger_config={
-                        "event_types": ["webhook_received"],
-                        "filter_stream": "github",
-                    },
-                )
-            )
-
-            trigger_calls = []
-
-            async def mock_trigger(ep_id, data, **kwargs):
-                trigger_calls.append((ep_id, data))
-
-            with patch.object(runtime, "trigger", side_effect=mock_trigger):
-                await runtime.start()
-
-                try:
-                    port = runtime.webhook_server.port
-                    async with aiohttp.ClientSession() as session:
-                        # POST to stripe — should NOT trigger
-                        await session.post(
-                            f"http://127.0.0.1:{port}/webhooks/stripe",
-                            json={"type": "payment"},
-                        )
-                        # POST to github — should trigger
-                        await session.post(
-                            f"http://127.0.0.1:{port}/webhooks/github",
-                            json={"action": "opened"},
-                        )
-
-                    await asyncio.sleep(0.1)
-
-                    assert len(trigger_calls) == 1
-                    assert trigger_calls[0][0] == "gh-only"
-                finally:
-                    await runtime.stop()
-
-    @pytest.mark.asyncio
-    async def test_no_webhook_routes_skips_server(self):
-        """Runtime without webhook_routes does not start a webhook server."""
-        graph, goal = self._make_graph_and_goal()
-
-        with tempfile.TemporaryDirectory() as tmpdir:
-            runtime = AgentRuntime(
-                graph=graph,
-                goal=goal,
-                storage_path=Path(tmpdir),
-            )
-
-            runtime.register_entry_point(
-                EntryPointSpec(
-                    id="manual",
-                    name="Manual",
-                    entry_node="process-event",
-                    trigger_type="manual",
-                )
-            )
-
-            await runtime.start()
-            try:
-                assert runtime.webhook_server is None
-            finally:
-                await runtime.stop()
-
-    @pytest.mark.asyncio
-    async def test_event_entry_point_custom_event(self):
-        """Entry point can subscribe to CUSTOM events, not just webhooks."""
-        graph, goal = self._make_graph_and_goal()
-
-        with tempfile.TemporaryDirectory() as tmpdir:
-            runtime = AgentRuntime(
-                graph=graph,
-                goal=goal,
-                storage_path=Path(tmpdir),
-            )
-
-            runtime.register_entry_point(
-                EntryPointSpec(
-                    id="custom-handler",
-                    name="Custom Handler",
-                    entry_node="process-event",
-                    trigger_type="event",
-                    trigger_config={
-                        "event_types": ["custom"],
-                    },
-                )
-            )
-
-            trigger_calls = []
-
-            async def mock_trigger(ep_id, data, **kwargs):
-                trigger_calls.append((ep_id, data))
-
-            with patch.object(runtime, "trigger", side_effect=mock_trigger):
-                await runtime.start()
-
-                try:
-                    await runtime.event_bus.publish(
-                        AgentEvent(
-                            type=EventType.CUSTOM,
-                            stream_id="some-source",
-                            data={"key": "value"},
-                        )
-                    )
-
-                    await asyncio.sleep(0.1)
-
-                    assert len(trigger_calls) == 1
-                    assert trigger_calls[0][0] == "custom-handler"
-                    assert trigger_calls[0][1]["event"]["type"] == "custom"
-                    assert trigger_calls[0][1]["event"]["data"]["key"] == "value"
-                finally:
-                    await runtime.stop()
diff --git a/core/framework/schemas/agent_config.py b/core/framework/schemas/agent_config.py
new file mode 100644
index 00000000..7c65c844
--- /dev/null
+++ b/core/framework/schemas/agent_config.py
@@ -0,0 +1,192 @@
+"""Declarative agent configuration schema.
+
+Allows defining agents via JSON/YAML config files instead of Python modules.
+The ``AgentConfig`` model is the top-level schema loaded from ``agent.json``.
+The runner detects this format by checking for a ``name`` key at the top level.
+
+Template variables
+------------------
+System prompts and identity_prompt support ``{{variable_name}}`` placeholders.
+These are resolved at load time from ``AgentConfig.variables``.
+"""
+
+from __future__ import annotations
+
+from pydantic import BaseModel, Field
+
+
+class ToolAccessConfig(BaseModel):
+    """Declarative tool access policy.
+
+    Controls which tools a node/agent has access to.
+
+    * ``all``      -- every tool from the registry.
+    * ``explicit`` -- only tools listed in ``allowed`` (default; empty = zero tools).
+    * ``none``     -- no tools at all.
+    """
+
+    policy: str = Field(
+        default="explicit",
+        description="One of: 'all', 'explicit', 'none'.",
+    )
+    allowed: list[str] = Field(
+        default_factory=list,
+        description="Tool names when policy='explicit'.",
+    )
+    denied: list[str] = Field(
+        default_factory=list,
+        description="Tool names to deny (applied after allowed).",
+    )
+
+
+class NodeConfig(BaseModel):
+    """Declarative node definition."""
+
+    id: str
+    name: str | None = None
+    description: str | None = None
+    node_type: str = Field(
+        default="event_loop",
+        description="event_loop",
+    )
+    system_prompt: str | None = None
+    tools: ToolAccessConfig = Field(default_factory=ToolAccessConfig)
+    model: str | None = None
+    input_keys: list[str] = Field(default_factory=list)
+    output_keys: list[str] = Field(default_factory=list)
+    nullable_output_keys: list[str] = Field(default_factory=list)
+    max_iterations: int = 30
+    max_node_visits: int = 1
+    client_facing: bool = False
+    success_criteria: str | None = None
+    failure_criteria: str | None = None
+    skip_judge: bool = False
+    max_retries: int | None = None
+
+
+class EdgeConfig(BaseModel):
+    """Declarative edge definition."""
+
+    from_node: str = Field(description="Source node ID.")
+    to_node: str = Field(description="Target node ID.")
+    condition: str = Field(
+        default="on_success",
+        description="always | on_success | on_failure | conditional | llm_decide",
+    )
+    condition_expr: str | None = None
+    input_mapping: dict[str, str] = Field(default_factory=dict)
+    priority: int = 1
+
+
+class GoalConfig(BaseModel):
+    """Simplified goal definition for declarative config."""
+
+    description: str
+    success_criteria: list[str] = Field(default_factory=list)
+    constraints: list[str] = Field(default_factory=list)
+
+
+class EntryPointConfig(BaseModel):
+    """Entry point configuration."""
+
+    id: str = "default"
+    name: str = "Default"
+    entry_node: str | None = None  # defaults to AgentConfig.entry_node
+    trigger_type: str = Field(
+        default="manual",
+        description="manual | scheduled | timer",
+    )
+    trigger_config: dict = Field(default_factory=dict)
+    isolation_level: str = "shared"
+    max_concurrent: int | None = None
+
+
+class MCPServerRef(BaseModel):
+    """Reference to an MCP server to connect for this agent."""
+
+    name: str
+    config: dict | None = None
+
+
+class MetadataConfig(BaseModel):
+    """Agent metadata for display / intro messages."""
+
+    intro_message: str = ""
+
+
+class AgentConfig(BaseModel):
+    """Top-level declarative agent configuration.
+
+    Load from ``agent.json`` and pass to
+    :func:`framework.runner.runner.load_agent_config` to build the
+    ``GraphSpec`` + ``Goal`` pair.
+
+    Example (YAML)::
+
+        name: lead-enrichment-agent
+        version: 1.0.0
+        variables:
+          spreadsheet_id: "1ZVx..."
+          sheet_name: "contacts"
+        goal:
+          description: "Enrich leads in Google Sheets"
+          success_criteria:
+            - "All unprocessed leads enriched"
+          constraints:
+            - "Browser-only research"
+        identity_prompt: |
+          You are the Lead Enrichment Agent...
+        nodes:
+          - id: start
+            tools: {policy: explicit, allowed: [google_sheets_get_values]}
+            system_prompt: |
+              Spreadsheet ID: {{spreadsheet_id}}
+              ...
+    """
+
+    name: str
+    version: str = "1.0.0"
+    description: str | None = None
+    metadata: MetadataConfig = Field(default_factory=MetadataConfig)
+
+    # Template variables -- substituted into prompts via {{var_name}}
+    variables: dict[str, str] = Field(default_factory=dict)
+
+    # Goal
+    goal: GoalConfig
+
+    # Graph structure
+    nodes: list[NodeConfig]
+    edges: list[EdgeConfig]
+    entry_node: str
+    terminal_nodes: list[str] = Field(default_factory=list)
+    pause_nodes: list[str] = Field(default_factory=list)
+
+    # Entry points (if omitted, a single "default" manual entry is created)
+    entry_points: list[EntryPointConfig] = Field(default_factory=list)
+
+    # Agent-level tool defaults (nodes inherit unless they override)
+    tools: ToolAccessConfig = Field(default_factory=ToolAccessConfig)
+    mcp_servers: list[MCPServerRef] = Field(default_factory=list)
+
+    # LLM / execution
+    model: str | None = None
+    max_tokens: int = 4096
+    conversation_mode: str = "continuous"
+    identity_prompt: str = ""
+    loop_config: dict = Field(
+        default_factory=lambda: {
+            "max_iterations": 100,
+            "max_tool_calls_per_turn": 30,
+            "max_context_tokens": 32000,
+        },
+    )
+
+    # Pipeline overrides (per-agent, merged with global config)
+    pipeline: dict = Field(
+        default_factory=dict,
+        description="Per-agent pipeline stage overrides. Same format as global pipeline config.",
+    )
+
+    # Resource limits
+    max_cost_per_run: float | None = None
diff --git a/core/framework/schemas/session_state.py b/core/framework/schemas/session_state.py
index 7b143985..4fcecd7c 100644
--- a/core/framework/schemas/session_state.py
+++ b/core/framework/schemas/session_state.py
@@ -12,7 +12,7 @@ from typing import TYPE_CHECKING, Any
 from pydantic import AliasChoices, BaseModel, Field, computed_field
 
 if TYPE_CHECKING:
-    from framework.graph.executor import ExecutionResult
+    from framework.orchestrator.orchestrator import ExecutionResult
     from framework.schemas.run import Run
 
 
diff --git a/core/framework/server/app.py b/core/framework/server/app.py
index 32c2c081..e014f01d 100644
--- a/core/framework/server/app.py
+++ b/core/framework/server/app.py
@@ -28,8 +28,11 @@ def _get_allowed_agent_roots() -> tuple[Path, ...]:
     """
     global _ALLOWED_AGENT_ROOTS
     if _ALLOWED_AGENT_ROOTS is None:
+        from framework.config import COLONIES_DIR
+
         _ALLOWED_AGENT_ROOTS = (
-            (_REPO_ROOT / "exports").resolve(),
+            COLONIES_DIR.resolve(),                     # ~/.hive/colonies/
+            (_REPO_ROOT / "exports").resolve(),         # compat fallback
             (_REPO_ROOT / "examples").resolve(),
             (Path.home() / ".hive" / "agents").resolve(),
         )
@@ -53,7 +56,8 @@ def validate_agent_path(agent_path: str | Path) -> Path:
         if resolved.is_relative_to(root) and resolved != root:
             return resolved
     raise ValueError(
-        "agent_path must be inside an allowed directory (exports/, examples/, or ~/.hive/agents/)"
+        "agent_path must be inside an allowed directory "
+        "(~/.hive/colonies/, exports/, examples/, or ~/.hive/agents/)"
     )
 
 
diff --git a/core/framework/server/queen_orchestrator.py b/core/framework/server/queen_orchestrator.py
index 43f5c5dd..4022a5ab 100644
--- a/core/framework/server/queen_orchestrator.py
+++ b/core/framework/server/queen_orchestrator.py
@@ -32,7 +32,7 @@ async def create_queen(
     """
     from framework.agents.queen.agent import (
         queen_goal,
-        queen_graph as _queen_graph,
+        queen_loop_config as _base_loop_config,
     )
     from framework.agents.queen.nodes import (
         _QUEEN_BUILDING_TOOLS,
@@ -65,18 +65,15 @@ async def create_queen(
         _shared_building_knowledge,
     )
     from framework.agents.queen.nodes.thinking_hook import select_expert_persona
-    from framework.graph.event_loop_node import HookContext, HookResult
-    from framework.graph.executor import GraphExecutor
-    from framework.runner.mcp_registry import MCPRegistry
-    from framework.runner.tool_registry import ToolRegistry
-    from framework.runtime.core import Runtime
-    from framework.runtime.event_bus import AgentEvent, EventType
+    from framework.agent_loop.agent_loop import HookContext, HookResult
+    from framework.loader.mcp_registry import MCPRegistry
+    from framework.loader.tool_registry import ToolRegistry
+    from framework.host.event_bus import AgentEvent, EventType
     from framework.tools.queen_lifecycle_tools import (
         QueenPhaseState,
         register_queen_lifecycle_tools,
     )
 
-    hive_home = Path.home() / ".hive"
 
     # ---- Tool registry ------------------------------------------------
     queen_registry = ToolRegistry()
@@ -194,7 +191,7 @@ async def create_queen(
     phase_state.global_memory_dir = global_dir
 
     # ---- Compose phase-specific prompts ------------------------------
-    _orig_node = _queen_graph.nodes[0]
+    from framework.agents.queen.nodes import queen_node as _orig_node
 
     if worker_identity is None:
         worker_identity = (
@@ -348,61 +345,81 @@ async def create_queen(
     if set(available_tools) != set(declared_tools):
         missing = sorted(set(declared_tools) - registered_tool_names)
         if missing:
-            logger.warning("Queen: tools not available: %s", missing)
+            logger.debug("Queen: tools not yet available (registered on worker load): %s", missing)
         node_updates["tools"] = available_tools
 
     adjusted_node = _orig_node.model_copy(update=node_updates)
     _queen_loop_config = {
-        **(_queen_graph.loop_config or {}),
+        **_base_loop_config,
         "hooks": {"session_start": [_persona_hook]},
     }
-    queen_graph = _queen_graph.model_copy(
-        update={"nodes": [adjusted_node], "loop_config": _queen_loop_config}
-    )
 
-    # ---- Queen event loop --------------------------------------------
-    queen_runtime = Runtime(hive_home / "queen")
+    # ---- Queen event loop (AgentLoop directly, no Orchestrator) -------
+    from types import SimpleNamespace
+
+    from framework.agent_loop.agent_loop import AgentLoop, LoopConfig
+    from framework.storage.conversation_store import FileConversationStore
+    from framework.orchestrator.node import DataBuffer, NodeContext
 
     async def _queen_loop():
         logger.debug("[_queen_loop] Starting queen loop for session %s", session.id)
         try:
-            logger.debug("[_queen_loop] Creating GraphExecutor...")
-            executor = GraphExecutor(
-                runtime=queen_runtime,
-                llm=session.llm,
-                tools=queen_tools,
-                tool_executor=queen_tool_executor,
+            # Build LoopConfig from the queen graph's config + persona hook
+            lc = _queen_loop_config
+            queen_loop_config = LoopConfig(
+                max_iterations=lc.get("max_iterations", 999_999),
+                max_tool_calls_per_turn=lc.get("max_tool_calls_per_turn", 30),
+                max_context_tokens=lc.get("max_context_tokens", 180_000),
+                hooks=lc.get("hooks", {}),
+            )
+
+            # Create AgentLoop directly -- no Orchestrator, no graph traversal
+            agent_loop = AgentLoop(
                 event_bus=session.event_bus,
+                config=queen_loop_config,
+                tool_executor=queen_tool_executor,
+                conversation_store=FileConversationStore(queen_dir / "conversations"),
+            )
+
+            # Build NodeContext manually
+            from framework.tracker.decision_tracker import DecisionTracker
+
+            ctx = NodeContext(
+                runtime=DecisionTracker(queen_dir),
+                node_id="queen",
+                node_spec=adjusted_node,
+                buffer=DataBuffer(),
+                llm=session.llm,
+                available_tools=queen_tools,
+                goal_context=queen_goal.description,
+                max_tokens=lc.get("max_tokens", 8192),
                 stream_id="queen",
-                storage_path=queen_dir,
-                loop_config=_queen_loop_config,
                 execution_id=session.id,
                 dynamic_tools_provider=phase_state.get_current_tools,
                 dynamic_prompt_provider=phase_state.get_current_prompt,
                 iteration_metadata_provider=lambda: {"phase": phase_state.phase},
-                skill_dirs=_queen_skill_dirs,
-                protocols_prompt=phase_state.protocols_prompt,
                 skills_catalog_prompt=phase_state.skills_catalog_prompt,
+                protocols_prompt=phase_state.protocols_prompt,
+                skill_dirs=_queen_skill_dirs,
+            )
+
+            # Expose for chat handler injection (node_registry compat)
+            session.queen_executor = SimpleNamespace(
+                node_registry={"queen": agent_loop},
             )
-            session.queen_executor = executor
-            logger.debug("[_queen_loop] GraphExecutor created and stored in session.queen_executor")
 
             # Wire inject_notification so phase switches notify the queen LLM
             async def _inject_phase_notification(content: str) -> None:
-                node = executor.node_registry.get("queen")
-                if node is not None and hasattr(node, "inject_event"):
-                    await node.inject_event(content)
+                await agent_loop.inject_event(content)
 
             phase_state.inject_notification = _inject_phase_notification
 
             # Auto-switch to editing when worker execution finishes.
-            # The worker stays loaded — queen can tweak config and re-run.
             async def _on_worker_done(event):
                 if event.stream_id == "queen":
                     return
                 if phase_state.phase == "running":
                     if event.type == EventType.EXECUTION_COMPLETED:
-                        # Mark worker as configured after first successful run
                         session.worker_configured = True
                         output = event.data.get("output", {})
                         output_summary = ""
@@ -420,7 +437,7 @@ async def create_queen(
                             "Ask if they want to re-run with different input "
                             "or tweak the configuration."
                         )
-                    else:  # EXECUTION_FAILED
+                    else:
                         error = event.data.get("error", "Unknown error")
                         notification = (
                             "[WORKER_TERMINAL] Worker failed.\n"
@@ -430,17 +447,14 @@ async def create_queen(
                             "building/planning if code changes are needed."
                         )
 
-                    node = executor.node_registry.get("queen")
-                    if node is not None and hasattr(node, "inject_event"):
-                        await node.inject_event(notification)
-
+                    await agent_loop.inject_event(notification)
                     await phase_state.switch_to_editing(source="auto")
 
             session.event_bus.subscribe(
                 event_types=[EventType.EXECUTION_COMPLETED, EventType.EXECUTION_FAILED],
                 handler=_on_worker_done,
             )
-            session_manager._subscribe_worker_handoffs(session, executor)
+            session_manager._subscribe_worker_handoffs(session, session.queen_executor)
 
             # ---- Global memory reflection + recall -------------------------
             from framework.agents.queen.reflection_agent import subscribe_reflection_triggers
@@ -459,23 +473,23 @@ async def create_queen(
                 len(phase_state.get_current_tools()),
                 [t.name for t in phase_state.get_current_tools()],
             )
-            logger.debug("[_queen_loop] Calling executor.execute()...")
-            result = await executor.execute(
-                graph=queen_graph,
-                goal=queen_goal,
-                input_data={"greeting": initial_prompt or "Session started."},
-                session_state={"resume_session_id": session.id},
-            )
-            logger.debug(
-                "[_queen_loop] executor.execute() returned with success=%s", result.success
-            )
-            if result.success:
-                logger.warning("Queen executor returned (should be forever-alive)")
-            else:
-                logger.error(
-                    "Queen executor failed: %s",
-                    result.error or "(no error message)",
-                )
+
+            # Set the first user message.
+            # When initial_prompt is None (user opens UI without ?prompt=),
+            # use a generic greeting so the queen has a user message to
+            # respond to.  The user's real first question arrives via /chat.
+            ctx.input_data = {
+                "user_request": initial_prompt or "Hello",
+            }
+
+            # Run the queen -- forever-alive conversation loop
+            result = await agent_loop.execute(ctx)
+
+            if result.stop_reason == "complete":
+                logger.warning("Queen returned (should be forever-alive)")
+            elif result.error:
+                logger.error("Queen failed: %s", result.error)
+
         except asyncio.CancelledError:
             logger.info("[_queen_loop] Queen loop cancelled (normal shutdown)")
             raise
@@ -484,7 +498,8 @@ async def create_queen(
             raise
         finally:
             logger.warning(
-                "[_queen_loop] Queen loop exiting — clearing queen_executor for session '%s'",
+                "[_queen_loop] Queen loop exiting — clearing queen_executor "
+                "for session '%s'",
                 session.id,
             )
             session.queen_executor = None
diff --git a/core/framework/server/routes_events.py b/core/framework/server/routes_events.py
index 3ef77428..f0c1616e 100644
--- a/core/framework/server/routes_events.py
+++ b/core/framework/server/routes_events.py
@@ -6,7 +6,7 @@ import logging
 from aiohttp import web
 from aiohttp.client_exceptions import ClientConnectionResetError as _AiohttpConnReset
 
-from framework.runtime.event_bus import AgentEvent, EventType
+from framework.host.event_bus import AgentEvent, EventType
 from framework.server.app import resolve_session
 
 logger = logging.getLogger(__name__)
diff --git a/core/framework/server/routes_execution.py b/core/framework/server/routes_execution.py
index f757746b..5efa9c94 100644
--- a/core/framework/server/routes_execution.py
+++ b/core/framework/server/routes_execution.py
@@ -8,7 +8,7 @@ from typing import Any
 from aiohttp import web
 
 from framework.credentials.validation import validate_agent_credentials
-from framework.graph.conversation import LEGACY_RUN_ID
+from framework.agent_loop.conversation import LEGACY_RUN_ID
 from framework.server.app import resolve_session, safe_path_segment, sessions_dir
 from framework.server.routes_sessions import _credential_error_response
 
@@ -187,7 +187,7 @@ async def handle_chat(request: web.Request) -> web.Response:
         if node is not None and hasattr(node, "inject_event"):
             # Publish BEFORE inject_event so handlers (e.g. memory recall)
             # complete before the event loop unblocks and starts the LLM turn.
-            from framework.runtime.event_bus import AgentEvent, EventType
+            from framework.host.event_bus import AgentEvent, EventType
 
             await session.event_bus.publish(
                 AgentEvent(
diff --git a/core/framework/server/routes_graphs.py b/core/framework/server/routes_graphs.py
index 3b735f5f..16ce63d6 100644
--- a/core/framework/server/routes_graphs.py
+++ b/core/framework/server/routes_graphs.py
@@ -46,7 +46,7 @@ def _node_to_dict(node) -> dict:
         "client_facing": node.client_facing,
         "success_criteria": node.success_criteria,
         "system_prompt": node.system_prompt or "",
-        "sub_agents": node.sub_agents,
+        "sub_agents": getattr(node, "sub_agents", []),
     }
 
 
diff --git a/core/framework/server/routes_sessions.py b/core/framework/server/routes_sessions.py
index 04f71b08..ab98b3fb 100644
--- a/core/framework/server/routes_sessions.py
+++ b/core/framework/server/routes_sessions.py
@@ -527,7 +527,7 @@ async def handle_update_trigger_task(request: web.Request) -> web.Response:
     # Emit SSE event so the frontend updates the graph and detail panel
     bus = getattr(session, "event_bus", None)
     if bus:
-        from framework.runtime.event_bus import AgentEvent, EventType
+        from framework.host.event_bus import AgentEvent, EventType
 
         await bus.publish(
             AgentEvent(
@@ -583,7 +583,9 @@ async def handle_session_events_history(request: web.Request) -> web.Response:
     """
     session_id = request.match_info["session_id"]
 
-    queen_dir = Path.home() / ".hive" / "queen" / "session" / session_id
+    from framework.server.session_manager import _queen_session_dir
+
+    queen_dir = _queen_session_dir(session_id)
     events_path = queen_dir / "events.jsonl"
     if not events_path.exists():
         return web.json_response({"events": [], "session_id": session_id})
@@ -608,7 +610,7 @@ async def handle_session_events_history(request: web.Request) -> web.Response:
 async def handle_session_history(request: web.Request) -> web.Response:
     """GET /api/sessions/history — all queen sessions on disk (live + cold).
 
-    Returns every session directory under ~/.hive/queen/session/, newest first.
+    Returns every queen session directory on disk, newest first.
     Live sessions have ``live: true, cold: false``; sessions that survived a
     server restart have ``live: false, cold: true``.
     """
@@ -634,7 +636,7 @@ async def handle_delete_history_session(request: web.Request) -> web.Response:
     """DELETE /api/sessions/history/{session_id} — permanently remove a session.
 
     Stops the live session (if still running) and deletes the queen session
-    directory from disk at ~/.hive/queen/session/{session_id}/.
+    directory from disk.
     This is the frontend 'delete from history' action.
     """
     manager = _get_manager(request)
@@ -645,7 +647,9 @@ async def handle_delete_history_session(request: web.Request) -> web.Response:
         await manager.stop_session(session_id)
 
     # Delete the queen session directory from disk
-    queen_session_dir = Path.home() / ".hive" / "queen" / "session" / session_id
+    from framework.server.session_manager import _queen_session_dir
+
+    queen_session_dir = _queen_session_dir(session_id)
     if queen_session_dir.exists() and queen_session_dir.is_dir():
         try:
             shutil.rmtree(queen_session_dir)
@@ -743,7 +747,9 @@ async def handle_reveal_session_folder(request: web.Request) -> web.Response:
 
     session = manager.get_session(session_id)
     storage_session_id = (session.queen_resume_from or session.id) if session else session_id
-    folder = Path.home() / ".hive" / "queen" / "session" / storage_session_id
+    from framework.server.session_manager import _queen_session_dir
+
+    folder = _queen_session_dir(storage_session_id)
     folder.mkdir(parents=True, exist_ok=True)
 
     try:
diff --git a/core/framework/server/session_manager.py b/core/framework/server/session_manager.py
index 25e3961f..e88ae445 100644
--- a/core/framework/server/session_manager.py
+++ b/core/framework/server/session_manager.py
@@ -19,11 +19,17 @@ from datetime import datetime
 from pathlib import Path
 from typing import Any
 
-from framework.runtime.triggers import TriggerDefinition
+from framework.config import QUEENS_DIR
+from framework.host.triggers import TriggerDefinition
 
 logger = logging.getLogger(__name__)
 
 
+def _queen_session_dir(session_id: str, queen_name: str = "default") -> Path:
+    """Return the on-disk directory for a queen session."""
+    return QUEENS_DIR / queen_name / "sessions" / session_id
+
+
 @dataclass
 class Session:
     """A live session with a queen and optional worker."""
@@ -67,6 +73,10 @@ class Session:
     queen_resume_from: str | None = None
     # Queen session directory (set during _start_queen, used for shutdown reflection)
     queen_dir: Path | None = None
+    # Multi-queen support: which queen profile this session uses
+    queen_name: str = "default"
+    # Colony name: set when a worker is loaded from a colony
+    colony_name: str | None = None
 
 
 class SessionManager:
@@ -86,6 +96,14 @@ class SessionManager:
         # reflections) so they aren't garbage-collected before completion.
         self._background_tasks: set[asyncio.Task] = set()
 
+        # Run one-time v2 directory structure migration
+        from framework.storage.migrate_v2 import run_migration
+
+        try:
+            run_migration()
+        except Exception:
+            logger.warning("v2 migration failed (non-fatal)", exc_info=True)
+
     # ------------------------------------------------------------------
     # Session lifecycle
     # ------------------------------------------------------------------
@@ -100,7 +118,7 @@ class SessionManager:
         Internal helper — use create_session() or create_session_with_worker_graph().
         """
         from framework.config import RuntimeConfig, get_hive_config
-        from framework.runtime.event_bus import EventBus
+        from framework.host.event_bus import EventBus
 
         ts = datetime.now().strftime("%Y%m%d_%H%M%S")
         resolved_id = session_id or f"session_{ts}_{uuid.uuid4().hex[:8]}"
@@ -194,9 +212,7 @@ class SessionManager:
         # is incomplete and will fail to import).
         if queen_resume_from:
             _resume_phase = None
-            _meta_path = (
-                Path.home() / ".hive" / "queen" / "session" / queen_resume_from / "meta.json"
-            )
+            _meta_path = _queen_session_dir(queen_resume_from) / "meta.json"
             if _meta_path.exists():
                 try:
                     _meta = json.loads(_meta_path.read_text(encoding="utf-8"))
@@ -281,7 +297,7 @@ class SessionManager:
         Sets up the runner, runtime, and session fields. Does NOT notify
         the queen — callers handle that step.
         """
-        from framework.runner import AgentRunner
+        from framework.loader import AgentLoader
 
         agent_path = Path(agent_path)
         resolved_graph_id = graph_id or agent_path.name
@@ -303,7 +319,7 @@ class SessionManager:
             resolved_model = model or session_model or self._model
             runner = await loop.run_in_executor(
                 None,
-                lambda: AgentRunner.load(
+                lambda: AgentLoader.load(
                     agent_path,
                     model=resolved_model,
                     interactive=False,
@@ -536,7 +552,7 @@ class SessionManager:
 
         # Update meta.json so cold-restore can discover this session by agent_path
         storage_session_id = session.queen_resume_from or session.id
-        meta_path = Path.home() / ".hive" / "queen" / "session" / storage_session_id / "meta.json"
+        meta_path = _queen_session_dir(storage_session_id, session.queen_name) / "meta.json"
         try:
             _agent_name = (
                 session.worker_info.name
@@ -644,10 +660,11 @@ class SessionManager:
 
                 task = asyncio.create_task(
                     asyncio.shield(run_shutdown_reflection(session.queen_dir, session.llm)),
+                    name=f"shutdown-reflect-{session_id}",
                 )
+                logger.info("Session '%s': shutdown reflection spawned", session_id)
                 self._background_tasks.add(task)
                 task.add_done_callback(self._background_tasks.discard)
-                logger.info("Session '%s': shutdown reflection spawned", session_id)
             except Exception:
                 logger.warning(
                     "Session '%s': failed to spawn shutdown reflection", session_id, exc_info=True
@@ -721,7 +738,7 @@ class SessionManager:
 
     def _subscribe_worker_handoffs(self, session: Session, executor: Any) -> None:
         """Subscribe queen to worker/subagent escalation handoff events."""
-        from framework.runtime.event_bus import EventType as _ET
+        from framework.host.event_bus import EventType as _ET
 
         if session.worker_handoff_sub is not None:
             session.event_bus.unsubscribe(session.worker_handoff_sub)
@@ -755,13 +772,11 @@ class SessionManager:
             session.queen_executor,
         )
 
-        hive_home = Path.home() / ".hive"
-
         # Determine which session directory to use for queen storage.
         # When queen_resume_from is set we write to the ORIGINAL session's
         # directory so that all messages accumulate in one place.
         storage_session_id = session.queen_resume_from or session.id
-        queen_dir = hive_home / "queen" / "session" / storage_session_id
+        queen_dir = _queen_session_dir(storage_session_id, session.queen_name)
         queen_dir.mkdir(parents=True, exist_ok=True)
         session.queen_dir = queen_dir
 
@@ -920,7 +935,7 @@ class SessionManager:
 
     async def _emit_graph_loaded(self, session: Session) -> None:
         """Publish a WORKER_GRAPH_LOADED event so the frontend can update."""
-        from framework.runtime.event_bus import AgentEvent, EventType
+        from framework.host.event_bus import AgentEvent, EventType
 
         info = session.worker_info
         await session.event_bus.publish(
@@ -939,7 +954,7 @@ class SessionManager:
 
     async def _emit_flowchart_on_restore(self, session: Session, agent_path: str | Path) -> None:
         """Emit FLOWCHART_MAP_UPDATED from persisted flowchart file on cold restore."""
-        from framework.runtime.event_bus import AgentEvent, EventType
+        from framework.host.event_bus import AgentEvent, EventType
         from framework.tools.flowchart_utils import load_flowchart_file
 
         original_draft, flowchart_map = load_flowchart_file(agent_path)
@@ -982,7 +997,7 @@ class SessionManager:
         triggers: dict[str, TriggerDefinition],
     ) -> None:
         """Emit TRIGGER_AVAILABLE or TRIGGER_REMOVED events for each trigger."""
-        from framework.runtime.event_bus import AgentEvent, EventType
+        from framework.host.event_bus import AgentEvent, EventType
 
         event_type = (
             EventType.TRIGGER_AVAILABLE if kind == "available" else EventType.TRIGGER_REMOVED
@@ -1076,10 +1091,10 @@ class SessionManager:
         """Return disk metadata for a session that is no longer live in memory.
 
         Checks whether queen conversation files exist at
-        ~/.hive/queen/session/{session_id}/conversations/.  Returns None when
+        ~/.hive/agents/queens/{name}/sessions/{session_id}/conversations/.  Returns None when
         no data is found so callers can fall through to a 404.
         """
-        queen_dir = Path.home() / ".hive" / "queen" / "session" / session_id
+        queen_dir = _queen_session_dir(session_id)
         convs_dir = queen_dir / "conversations"
         if not convs_dir.exists():
             return None
@@ -1134,7 +1149,7 @@ class SessionManager:
     @staticmethod
     def list_cold_sessions() -> list[dict]:
         """Return metadata for every queen session directory on disk, newest first."""
-        queen_sessions_dir = Path.home() / ".hive" / "queen" / "session"
+        queen_sessions_dir = QUEENS_DIR / "default" / "sessions"
         if not queen_sessions_dir.exists():
             return []
 
diff --git a/core/framework/server/tests/test_api.py b/core/framework/server/tests/test_api.py
index ef7cc905..e7e60081 100644
--- a/core/framework/server/tests/test_api.py
+++ b/core/framework/server/tests/test_api.py
@@ -14,7 +14,7 @@ from unittest.mock import AsyncMock, MagicMock
 import pytest
 from aiohttp.test_utils import TestClient, TestServer
 
-from framework.runtime.triggers import TriggerDefinition
+from framework.host.triggers import TriggerDefinition
 from framework.server.app import create_app
 from framework.server.session_manager import Session
 
@@ -1055,7 +1055,7 @@ class TestNodeCriteria:
         nodes, edges = nodes_and_edges
 
         # Create a real RuntimeLogStore pointed at the temp agent dir
-        from framework.runtime.runtime_log_store import RuntimeLogStore
+        from framework.tracker.runtime_log_store import RuntimeLogStore
 
         log_store = RuntimeLogStore(base)
 
@@ -1110,7 +1110,7 @@ class TestLogs:
         session_id, session_dir, state = sample_session
         tmp_path, agent_name, base = tmp_agent_dir
 
-        from framework.runtime.runtime_log_store import RuntimeLogStore
+        from framework.tracker.runtime_log_store import RuntimeLogStore
 
         log_store = RuntimeLogStore(base)
         session = _make_session(
@@ -1132,7 +1132,7 @@ class TestLogs:
         session_id, session_dir, state = custom_id_session
         tmp_path, agent_name, base = tmp_agent_dir
 
-        from framework.runtime.runtime_log_store import RuntimeLogStore
+        from framework.tracker.runtime_log_store import RuntimeLogStore
 
         log_store = RuntimeLogStore(base)
         session = _make_session(
@@ -1154,7 +1154,7 @@ class TestLogs:
         session_id, session_dir, state = sample_session
         tmp_path, agent_name, base = tmp_agent_dir
 
-        from framework.runtime.runtime_log_store import RuntimeLogStore
+        from framework.tracker.runtime_log_store import RuntimeLogStore
 
         log_store = RuntimeLogStore(base)
         session = _make_session(
@@ -1177,7 +1177,7 @@ class TestLogs:
         session_id, session_dir, state = sample_session
         tmp_path, agent_name, base = tmp_agent_dir
 
-        from framework.runtime.runtime_log_store import RuntimeLogStore
+        from framework.tracker.runtime_log_store import RuntimeLogStore
 
         log_store = RuntimeLogStore(base)
         session = _make_session(
@@ -1201,7 +1201,7 @@ class TestLogs:
         session_id, session_dir, state = sample_session
         tmp_path, agent_name, base = tmp_agent_dir
 
-        from framework.runtime.runtime_log_store import RuntimeLogStore
+        from framework.tracker.runtime_log_store import RuntimeLogStore
 
         log_store = RuntimeLogStore(base)
         session = _make_session(
@@ -1227,7 +1227,7 @@ class TestNodeLogs:
         tmp_path, agent_name, base = tmp_agent_dir
         nodes, edges = nodes_and_edges
 
-        from framework.runtime.runtime_log_store import RuntimeLogStore
+        from framework.tracker.runtime_log_store import RuntimeLogStore
 
         log_store = RuntimeLogStore(base)
         session = _make_session(
@@ -1256,7 +1256,7 @@ class TestNodeLogs:
     @pytest.mark.asyncio
     async def test_node_logs_missing_session_id(self, nodes_and_edges):
         nodes, edges = nodes_and_edges
-        from framework.runtime.runtime_log_store import RuntimeLogStore
+        from framework.tracker.runtime_log_store import RuntimeLogStore
 
         log_store = RuntimeLogStore(Path("/tmp/dummy"))
         session = _make_session(nodes=nodes, edges=edges, log_store=log_store)
diff --git a/core/framework/skills/_default_skills/browser-automation/SKILL.md b/core/framework/skills/_default_skills/browser-automation/SKILL.md
new file mode 100644
index 00000000..bc481fc4
--- /dev/null
+++ b/core/framework/skills/_default_skills/browser-automation/SKILL.md
@@ -0,0 +1,80 @@
+---
+name: hive.browser-automation
+description: Best practices for browser automation via gcu-tools MCP server (reading pages, navigation, scrolling, tab management, shadow DOM, coordinates).
+metadata:
+  author: hive
+  type: default-skill
+---
+
+## Operational Protocol: Browser Automation
+
+Follow these rules for reliable, efficient browser interaction.
+
+### Reading Pages
+- ALWAYS prefer `browser_snapshot` over `browser_get_text("body")` -- it returns a compact ~1-5 KB accessibility tree vs 100+ KB of raw HTML.
+- Interaction tools (`browser_click`, `browser_type`, `browser_fill`, `browser_scroll`, etc.) return a page snapshot automatically in their result. Use it to decide your next action -- do NOT call `browser_snapshot` separately after every action. Only call `browser_snapshot` when you need a fresh view without performing an action, or after setting `auto_snapshot=false`.
+- Do NOT use `browser_screenshot` to read text -- use `browser_snapshot` for that (compact, searchable, fast).
+- DO use `browser_screenshot` when you need visual context: charts, images, canvas elements, layout verification, or when the snapshot doesn't capture what you need.
+- Only fall back to `browser_get_text` for extracting specific small elements by CSS selector.
+
+### Navigation & Waiting
+- `browser_navigate` and `browser_open` already wait for the page to load. Do NOT call `browser_wait` with no arguments after navigation -- it wastes time. Only use `browser_wait` when you need a *specific element* or *text* to appear (pass `selector` or `text`).
+- NEVER re-navigate to the same URL after scrolling -- this resets your scroll position and loses loaded content.
+
+### Scrolling
+- Use large scroll amounts ~2000 when loading more content -- sites like twitter and linkedin have lazy loading for paging.
+- The scroll result includes a snapshot automatically -- no need to call `browser_snapshot` separately.
+
+### Batching Actions
+- You can call multiple tools in a single turn -- they execute in parallel. ALWAYS batch independent actions together. Examples: fill multiple form fields in one turn, navigate + snapshot in one turn, click + scroll if targeting different elements.
+- When batching, set `auto_snapshot=false` on all but the last action to avoid redundant snapshots.
+- Aim for 3-5 tool calls per turn minimum. One tool call per turn is wasteful.
+
+### Error Recovery
+- If a tool fails, retry once with the same approach.
+- If it fails a second time, STOP retrying and switch approach.
+- If `browser_snapshot` fails, try `browser_get_text` with a specific small selector as fallback.
+- If `browser_open` fails or page seems stale, `browser_stop`, then `browser_start`, then retry.
+
+### Tab Management
+**Close tabs as soon as you are done with them** -- not only at the end of the task. After reading or extracting data from a tab, close it immediately.
+
+- Finished reading/extracting from a tab? `browser_close(target_id=...)`
+- Completed a multi-tab workflow? `browser_close_finished()` to clean up all your tabs
+- More than 3 tabs open? Stop and close finished ones before opening more
+- Popup appeared that you didn't need? Close it immediately
+
+`browser_tabs` returns an `origin` field for each tab:
+- `"agent"` -- you opened it; you own it; close it when done
+- `"popup"` -- opened by a link or script; close after extracting what you need
+- `"startup"` or `"user"` -- leave these alone unless the task requires it
+
+Never accumulate tabs. Treat every tab you open as a resource you must free.
+
+### Shadow DOM & Overlays
+Some sites (LinkedIn messaging, etc.) render content inside closed shadow roots invisible to regular DOM queries.
+
+- `browser_shadow_query("#interop-outlet >>> #msg-overlay >>> p")` -- uses `>>>` to pierce shadow roots. Returns `rect` in CSS pixels and `physicalRect` ready for coordinate tools.
+- `browser_get_rect(selector="...", pierce_shadow=true)` -- get physical rect for any element including shadow DOM.
+
+### Coordinate System
+There are THREE coordinate spaces. Using the wrong one causes clicks/hovers to land in the wrong place.
+
+| Space | Used by | How to get |
+|---|---|---|
+| Physical pixels | `browser_click_coordinate` | `browser_coords` `physical_x/y` |
+| CSS pixels | `getBoundingClientRect()`, `elementFromPoint` | `browser_coords` `css_x/y` |
+| Screenshot pixels | What you see in the image | Raw position in screenshot |
+
+**Converting screenshot to physical**: `browser_coords(x, y)` then use `physical_x/y`.
+**Converting CSS to physical**: multiply by `window.devicePixelRatio` (typically 1.6 on HiDPI).
+**Never** pass raw `getBoundingClientRect()` values to coordinate tools without multiplying by DPR first.
+
+### Login & Auth Walls
+- If you see a "Log in" or "Sign up" prompt, report the auth wall immediately -- do NOT attempt to log in.
+- Check for cookie consent banners and dismiss them if they block content.
+
+### Efficiency
+- Minimize tool calls -- combine actions where possible.
+- When a snapshot result is saved to a spillover file, use `run_command` with grep to extract specific data rather than re-reading the full file.
+- Call `set_output` in the same turn as your last browser action when possible -- don't waste a turn.
diff --git a/core/framework/skills/catalog.py b/core/framework/skills/catalog.py
index 3621dbe1..08d3285e 100644
--- a/core/framework/skills/catalog.py
+++ b/core/framework/skills/catalog.py
@@ -64,15 +64,14 @@ class SkillCatalog:
         Returns empty string if no community/user skills are discovered
         (default skills are handled separately by DefaultSkillManager).
         """
-        # Filter out framework-scope skills (default skills) — they're
-        # injected via the protocols prompt, not the catalog
-        community_skills = [s for s in self._skills.values() if s.source_scope != "framework"]
+        # All skills go through the catalog for progressive disclosure.
+        all_skills = list(self._skills.values())
 
-        if not community_skills:
+        if not all_skills:
             return ""
 
         lines = ["<available_skills>"]
-        for skill in sorted(community_skills, key=lambda s: s.name):
+        for skill in sorted(all_skills, key=lambda s: s.name):
             lines.append("  <skill>")
             lines.append(f"    <name>{escape(skill.name)}</name>")
             lines.append(f"    <description>{escape(skill.description)}</description>")
diff --git a/core/framework/skills/discovery.py b/core/framework/skills/discovery.py
index 2db1a78b..cd0ab6eb 100644
--- a/core/framework/skills/discovery.py
+++ b/core/framework/skills/discovery.py
@@ -56,6 +56,16 @@ class SkillDiscovery:
 
     def __init__(self, config: DiscoveryConfig | None = None):
         self._config = config or DiscoveryConfig()
+        self._scanned_dirs: list[Path] = []
+
+    @property
+    def scanned_directories(self) -> list[str]:
+        """Return the skill directories that were scanned during discovery.
+
+        Populated after :meth:`discover` runs.  Used by the hot-reload
+        watcher to know which directories to monitor for changes.
+        """
+        return [str(d) for d in self._scanned_dirs if d.exists()]
 
     def discover(self) -> list[ParsedSkill]:
         """Scan all scopes and return deduplicated skill list.
@@ -70,11 +80,13 @@ class SkillDiscovery:
         Later entries override earlier ones on name collision.
         """
         all_skills: list[ParsedSkill] = []
+        self._scanned_dirs = []
 
         # Framework scope (lowest precedence)
         if not self._config.skip_framework_scope:
             framework_dir = Path(__file__).parent / "_default_skills"
             if framework_dir.is_dir():
+                self._scanned_dirs.append(framework_dir)
                 all_skills.extend(self._scan_scope(framework_dir, "framework"))
 
         # User scope
@@ -84,11 +96,13 @@ class SkillDiscovery:
             # Cross-client (lower precedence within user scope)
             user_agents = home / ".agents" / "skills"
             if user_agents.is_dir():
+                self._scanned_dirs.append(user_agents)
                 all_skills.extend(self._scan_scope(user_agents, "user"))
 
             # Hive-specific (higher precedence within user scope)
             user_hive = home / ".hive" / "skills"
             if user_hive.is_dir():
+                self._scanned_dirs.append(user_hive)
                 all_skills.extend(self._scan_scope(user_hive, "user"))
 
         # Project scope (highest precedence)
@@ -98,11 +112,13 @@ class SkillDiscovery:
             # Cross-client
             project_agents = root / ".agents" / "skills"
             if project_agents.is_dir():
+                self._scanned_dirs.append(project_agents)
                 all_skills.extend(self._scan_scope(project_agents, "project"))
 
             # Hive-specific
             project_hive = root / ".hive" / "skills"
             if project_hive.is_dir():
+                self._scanned_dirs.append(project_hive)
                 all_skills.extend(self._scan_scope(project_hive, "project"))
 
         resolved = self._resolve_collisions(all_skills)
diff --git a/core/framework/skills/manager.py b/core/framework/skills/manager.py
index 9c1b4b80..5f9006fd 100644
--- a/core/framework/skills/manager.py
+++ b/core/framework/skills/manager.py
@@ -68,6 +68,9 @@ class SkillsManager:
         self._protocols_prompt: str = ""
         self._allowlisted_dirs: list[str] = []
         self._default_mgr: object = None  # DefaultSkillManager, set after load()
+        # Hot-reload state
+        self._watched_dirs: list[str] = []
+        self._watcher_task: object = None  # asyncio.Task, set by start_watching()
 
     # ------------------------------------------------------------------
     # Factory for backwards-compat bridge
@@ -117,62 +120,140 @@ class SkillsManager:
 
         skills_config = self._config.skills_config
 
-        # 1. Community skill discovery (when project_root is available)
-        catalog_prompt = ""
+        # 1. Skill discovery -- always run to pick up framework skills;
+        # community/project skills only when project_root is available.
+        discovery = SkillDiscovery(DiscoveryConfig(
+            project_root=self._config.project_root,
+            skip_framework_scope=False,
+        ))
+        discovered = discovery.discover()
+        self._watched_dirs = discovery.scanned_directories
+
+        # Trust-gate project-scope skills (AS-13)
         if self._config.project_root is not None and not self._config.skip_community_discovery:
             from framework.skills.trust import TrustGate
 
-            discovery = SkillDiscovery(DiscoveryConfig(project_root=self._config.project_root))
-            discovered = discovery.discover()
-
-            # Trust-gate project-scope skills (AS-13)
             discovered = TrustGate(interactive=self._config.interactive).filter_and_gate(
                 discovered, project_dir=self._config.project_root
             )
 
-            catalog = SkillCatalog(discovered)
-            self._allowlisted_dirs = catalog.allowlisted_dirs
-            catalog_prompt = catalog.to_prompt()
+        catalog = SkillCatalog(discovered)
+        self._allowlisted_dirs = catalog.allowlisted_dirs
+        catalog_prompt = catalog.to_prompt()
 
-            # Pre-activated community skills
-            if skills_config.skills:
-                pre_activated = catalog.build_pre_activated_prompt(skills_config.skills)
-                if pre_activated:
-                    if catalog_prompt:
-                        catalog_prompt = f"{catalog_prompt}\n\n{pre_activated}"
-                    else:
-                        catalog_prompt = pre_activated
+        # Pre-activated community skills
+        if skills_config.skills:
+            pre_activated = catalog.build_pre_activated_prompt(skills_config.skills)
+            if pre_activated:
+                if catalog_prompt:
+                    catalog_prompt = f"{catalog_prompt}\n\n{pre_activated}"
+                else:
+                    catalog_prompt = pre_activated
 
-        # 2. Default skills (always loaded unless explicitly disabled)
+        # 2. Default skills -- discovered via _default_skills/ and included
+        # in the catalog for progressive disclosure (no longer force-injected
+        # as protocols_prompt).  DefaultSkillManager still handles config,
+        # logging, and metadata.
         default_mgr = DefaultSkillManager(config=skills_config)
         default_mgr.load()
         default_mgr.log_active_skills()
-        protocols_prompt = default_mgr.build_protocols_prompt()
         self._default_mgr = default_mgr
-        # DX-3: Community skill startup summary
-        if self._config.project_root is not None and not self._config.skip_community_discovery:
-            community_count = len(catalog._skills) if catalog_prompt else 0
-            pre_activated_count = len(skills_config.skills) if skills_config.skills else 0
-            logger.info(
-                "Skills: %d community (%d catalog, %d pre-activated)",
-                community_count,
-                community_count,
-                pre_activated_count,
-            )
 
         # 3. Cache
         self._catalog_prompt = catalog_prompt
-        self._protocols_prompt = protocols_prompt
+        self._protocols_prompt = ""  # all skills use progressive disclosure now
 
-        if protocols_prompt:
-            logger.info(
-                "Skill system ready: protocols=%d chars, catalog=%d chars",
-                len(protocols_prompt),
-                len(catalog_prompt),
-            )
-        else:
+        if catalog_prompt:
             logger.warning("Skill system produced empty protocols_prompt")
 
+    # ------------------------------------------------------------------
+    # Hot-reload: watch skill directories for SKILL.md changes.
+    # ------------------------------------------------------------------
+
+    async def start_watching(self) -> None:
+        """Start a background task watching skill directories for changes.
+
+        When a ``SKILL.md`` file is added/modified/removed, the cached
+        ``skills_catalog_prompt`` is rebuilt.  The next node iteration picks
+        up the new prompt automatically via the ``dynamic_prompt_provider``.
+
+        Silently no-ops when ``watchfiles`` is not installed or when no
+        directories are being watched (e.g. bare mode, no project_root).
+        """
+        import asyncio
+
+        try:
+            import watchfiles  # noqa: F401 -- optional dep check
+        except ImportError:
+            logger.debug("watchfiles not installed; skill hot-reload disabled")
+            return
+
+        if not self._watched_dirs:
+            logger.debug("No skill directories to watch; hot-reload skipped")
+            return
+
+        if self._watcher_task is not None:
+            return  # already watching
+
+        self._watcher_task = asyncio.create_task(
+            self._watch_loop(),
+            name="skills-hot-reload",
+        )
+        logger.info(
+            "Skill hot-reload enabled (watching %d directories)",
+            len(self._watched_dirs),
+        )
+
+    async def stop_watching(self) -> None:
+        """Cancel the background watcher task (if running)."""
+        import asyncio
+
+        task = self._watcher_task
+        if task is None:
+            return
+        self._watcher_task = None
+        if not task.done():  # type: ignore[attr-defined]
+            task.cancel()  # type: ignore[attr-defined]
+            try:
+                await task  # type: ignore[misc]
+            except asyncio.CancelledError:
+                pass
+
+    async def _watch_loop(self) -> None:
+        """Background coroutine that watches SKILL.md files and triggers reload."""
+        import asyncio
+
+        import watchfiles
+
+        def _filter(_change: object, path: str) -> bool:
+            return path.endswith("SKILL.md")
+
+        try:
+            async for changes in watchfiles.awatch(
+                *self._watched_dirs,
+                watch_filter=_filter,
+                debounce=1000,
+            ):
+                paths = [p for _, p in changes]
+                logger.info("SKILL.md changes detected: %s", paths)
+                try:
+                    self._reload()
+                except Exception:
+                    logger.exception("Skill reload failed; keeping previous prompts")
+        except asyncio.CancelledError:
+            raise
+        except Exception:
+            logger.exception("Skill watcher crashed; hot-reload disabled for this session")
+
+    def _reload(self) -> None:
+        """Re-run discovery and rebuild cached prompts."""
+        # Reset loaded flag so _do_load actually re-runs.
+        self._loaded = False
+        self._do_load()
+        self._loaded = True
+        logger.info("Skills reloaded: protocols=%d chars, catalog=%d chars",
+                    len(self._protocols_prompt), len(self._catalog_prompt))
+
     # ------------------------------------------------------------------
     # Prompt accessors (consumed by downstream layers)
     # ------------------------------------------------------------------
diff --git a/core/framework/storage/migrate_v2.py b/core/framework/storage/migrate_v2.py
new file mode 100644
index 00000000..33273926
--- /dev/null
+++ b/core/framework/storage/migrate_v2.py
@@ -0,0 +1,145 @@
+"""One-time migration to the v2 ~/.hive/ directory structure.
+
+Moves:
+- exports/{name}/ -> ~/.hive/colonies/{name}/
+- ~/.hive/queen/session/{id}/ -> ~/.hive/agents/queens/default/sessions/{id}/
+- ~/.hive/queen/global_memory/ -> ~/.hive/memories/global/
+
+Runs automatically on first startup when the marker file is absent.
+Safe to re-run (skips already-migrated items).
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import shutil
+from pathlib import Path
+
+from framework.config import COLONIES_DIR, HIVE_HOME, MEMORIES_DIR, QUEENS_DIR
+
+logger = logging.getLogger(__name__)
+
+_MIGRATION_MARKER = HIVE_HOME / ".migrated-v2"
+
+
+def needs_migration() -> bool:
+    """Return True if the v2 migration has not yet run."""
+    return not _MIGRATION_MARKER.exists()
+
+
+def run_migration(*, exports_dir: Path | None = None) -> None:
+    """Run the full v2 migration. Idempotent and safe to re-run."""
+    if not needs_migration():
+        return
+
+    logger.info("migrate_v2: starting ~/.hive structure migration")
+
+    _migrate_colonies(exports_dir or Path("exports"))
+    _migrate_queen_sessions()
+    _migrate_memories()
+    _cleanup_old_queen_dir()
+
+    # Write marker
+    HIVE_HOME.mkdir(parents=True, exist_ok=True)
+    _MIGRATION_MARKER.write_text("1\n", encoding="utf-8")
+    logger.info("migrate_v2: migration complete")
+
+
+def _migrate_colonies(exports_dir: Path) -> None:
+    """Copy exports/{name}/ -> ~/.hive/colonies/{name}/."""
+    if not exports_dir.exists():
+        return
+
+    COLONIES_DIR.mkdir(parents=True, exist_ok=True)
+    migrated = 0
+
+    for agent_dir in sorted(exports_dir.iterdir()):
+        if not agent_dir.is_dir() or agent_dir.name.startswith("."):
+            continue
+        target = COLONIES_DIR / agent_dir.name
+        if target.exists():
+            continue
+        try:
+            shutil.copytree(agent_dir, target)
+            migrated += 1
+        except OSError:
+            logger.warning("migrate_v2: failed to copy %s", agent_dir, exc_info=True)
+
+    if migrated:
+        logger.info("migrate_v2: copied %d agent(s) from exports/ to colonies/", migrated)
+
+
+def _migrate_queen_sessions() -> None:
+    """Move ~/.hive/queen/session/{id}/ -> ~/.hive/agents/queens/default/sessions/{id}/."""
+    old_sessions = HIVE_HOME / "queen" / "session"
+    if not old_sessions.exists():
+        return
+
+    new_sessions = QUEENS_DIR / "default" / "sessions"
+    new_sessions.mkdir(parents=True, exist_ok=True)
+    migrated = 0
+
+    for session_dir in sorted(old_sessions.iterdir()):
+        if not session_dir.is_dir():
+            continue
+        target = new_sessions / session_dir.name
+        if target.exists():
+            continue
+        try:
+            session_dir.rename(target)
+            migrated += 1
+        except OSError:
+            logger.warning(
+                "migrate_v2: failed to move session %s", session_dir, exc_info=True
+            )
+
+    if migrated:
+        logger.info("migrate_v2: moved %d queen session(s) to new path", migrated)
+
+
+def _migrate_memories() -> None:
+    """Move ~/.hive/queen/global_memory/ -> ~/.hive/memories/global/."""
+    old_global = HIVE_HOME / "queen" / "global_memory"
+    if not old_global.exists():
+        return
+
+    new_global = MEMORIES_DIR / "global"
+    if new_global.exists():
+        # Already has content -- merge individual files
+        merged = 0
+        for f in old_global.iterdir():
+            if f.is_file() and not (new_global / f.name).exists():
+                try:
+                    shutil.copy2(f, new_global / f.name)
+                    merged += 1
+                except OSError:
+                    pass
+        if merged:
+            logger.info("migrate_v2: merged %d memory file(s) into global/", merged)
+        return
+
+    new_global.mkdir(parents=True, exist_ok=True)
+    migrated = 0
+    for f in old_global.iterdir():
+        if f.is_file():
+            try:
+                shutil.copy2(f, new_global / f.name)
+                migrated += 1
+            except OSError:
+                pass
+
+    if migrated:
+        logger.info("migrate_v2: copied %d memory file(s) to memories/global/", migrated)
+
+
+def _cleanup_old_queen_dir() -> None:
+    """Remove ~/.hive/queen/ after all content has been migrated."""
+    old_queen = HIVE_HOME / "queen"
+    if not old_queen.exists():
+        return
+    try:
+        shutil.rmtree(old_queen)
+        logger.info("migrate_v2: removed old ~/.hive/queen/ directory")
+    except OSError:
+        logger.debug("migrate_v2: could not remove old queen dir", exc_info=True)
diff --git a/core/framework/testing/prompts.py b/core/framework/testing/prompts.py
index 08df7625..61ae340b 100644
--- a/core/framework/testing/prompts.py
+++ b/core/framework/testing/prompts.py
@@ -68,8 +68,8 @@ for _p in ["exports", "core"]:
         sys.path.insert(0, _path)
 
 import pytest
-from framework.runner.runner import AgentRunner
-from framework.runtime.event_bus import EventType
+from framework.loader.agent_loader import AgentLoader
+from framework.host.event_bus import EventType
 
 AGENT_PATH = Path(__file__).resolve().parents[1]
 
diff --git a/core/framework/tools/flowchart_utils.py b/core/framework/tools/flowchart_utils.py
index f436ceb8..0c2c37cb 100644
--- a/core/framework/tools/flowchart_utils.py
+++ b/core/framework/tools/flowchart_utils.py
@@ -119,12 +119,11 @@ def classify_flowchart_node(
         return FLOWCHART_REMAP[explicit]
 
     node_id = node["id"]
-    node_type = node.get("node_type", "event_loop")
     node_tools = set(node.get("tools") or [])
     desc = (node.get("description") or "").lower()
 
     # GCU / browser automation nodes → hexagon
-    if node_type == "gcu":
+    if False:  # gcu removed
         return "browser"
 
     # Entry node (first node or no incoming edges) → start terminator
diff --git a/core/framework/tools/migrate_agent.py b/core/framework/tools/migrate_agent.py
new file mode 100644
index 00000000..52119c60
--- /dev/null
+++ b/core/framework/tools/migrate_agent.py
@@ -0,0 +1,273 @@
+"""Migrate a Python-based agent export to declarative agent.yaml.
+
+Usage::
+
+    uv run python -m framework.tools.migrate_agent exports/lead_enrichment_agent
+
+Reads agent.py, nodes/__init__.py, config.py, and mcp_servers.json from the
+given directory and writes an ``agent.yaml`` file that is equivalent.  The
+original Python files are left untouched.
+
+After migration, verify with::
+
+    uv run python -c "
+    from framework.loader.agent_loader import load_agent_config
+    import yaml, pathlib
+    data = yaml.safe_load(pathlib.Path('exports/lead_enrichment_agent/agent.yaml').read_text())
+    graph, goal = load_agent_config(data)
+    print(f'OK: {len(graph.nodes)} nodes, {len(graph.edges)} edges')
+    "
+"""
+
+from __future__ import annotations
+
+import importlib
+import importlib.util
+import json
+import logging
+import sys
+from pathlib import Path
+from typing import Any
+
+logger = logging.getLogger(__name__)
+
+
+def _import_module_from_path(module_name: str, file_path: Path) -> Any:
+    """Import a Python file as a module."""
+    spec = importlib.util.spec_from_file_location(module_name, file_path)
+    if spec is None or spec.loader is None:
+        raise ImportError(f"Cannot import {file_path}")
+    mod = importlib.util.module_from_spec(spec)
+    sys.modules[module_name] = mod
+    spec.loader.exec_module(mod)
+    return mod
+
+
+def _node_to_dict(node: Any) -> dict:
+    """Convert a NodeSpec instance to a YAML-friendly dict."""
+    d: dict[str, Any] = {"id": node.id}
+    if node.name and node.name != node.id:
+        d["name"] = node.name
+    if node.description:
+        d["description"] = node.description
+    if node.node_type != "event_loop":
+        d["node_type"] = node.node_type
+    if node.client_facing:
+        d["client_facing"] = True
+    if node.max_node_visits != 1:
+        d["max_node_visits"] = node.max_node_visits
+
+    if node.input_keys:
+        d["input_keys"] = list(node.input_keys)
+    if node.output_keys:
+        d["output_keys"] = list(node.output_keys)
+    if node.nullable_output_keys:
+        d["nullable_output_keys"] = list(node.nullable_output_keys)
+
+    # Tools
+    tools_list = list(node.tools) if node.tools else []
+    if tools_list:
+        d["tools"] = {"policy": "explicit", "allowed": tools_list}
+    elif False:  # gcu removed
+        d["tools"] = {"policy": "all"}
+    else:
+        d["tools"] = {"policy": "none"}
+
+    if node.sub_agents:
+        d["sub_agents"] = list(node.sub_agents)
+    if node.success_criteria:
+        d["success_criteria"] = node.success_criteria
+    if getattr(node, "failure_criteria", None):
+        d["failure_criteria"] = node.failure_criteria
+    if getattr(node, "max_retries", None):
+        d["max_retries"] = node.max_retries
+    if getattr(node, "skip_judge", False):
+        d["skip_judge"] = True
+    if getattr(node, "max_iterations", 30) != 30:
+        d["max_iterations"] = node.max_iterations
+
+    if node.system_prompt:
+        d["system_prompt"] = node.system_prompt
+
+    return d
+
+
+def _edge_to_dict(edge: Any) -> dict:
+    """Convert an EdgeSpec instance to a YAML-friendly dict."""
+    d: dict[str, Any] = {
+        "from_node": edge.source,
+        "to_node": edge.target,
+    }
+    cond = str(edge.condition.value) if hasattr(edge.condition, "value") else str(edge.condition)
+    if cond != "on_success":
+        d["condition"] = cond
+    if edge.condition_expr:
+        d["condition"] = "conditional"
+        d["condition_expr"] = edge.condition_expr
+    if edge.priority and edge.priority != 1:
+        d["priority"] = edge.priority
+    if edge.input_mapping:
+        d["input_mapping"] = dict(edge.input_mapping)
+    return d
+
+
+def migrate_agent(agent_dir: str | Path) -> dict:
+    """Read a Python-based agent export and return the declarative config dict.
+
+    The returned dict can be serialized to YAML or JSON.
+    """
+    agent_dir = Path(agent_dir).resolve()
+    agent_py = agent_dir / "agent.py"
+    if not agent_py.exists():
+        raise FileNotFoundError(f"No agent.py in {agent_dir}")
+
+    # Make the agent importable as a package (handles relative imports)
+    parent = str(agent_dir.parent)
+    if parent not in sys.path:
+        sys.path.insert(0, parent)
+
+    pkg_name = agent_dir.name
+    agent_mod = importlib.import_module(f"{pkg_name}.agent")
+
+    # Extract module-level variables
+    goal = getattr(agent_mod, "goal", None)
+    nodes = getattr(agent_mod, "nodes", [])
+    edges = getattr(agent_mod, "edges", [])
+    entry_node = getattr(agent_mod, "entry_node", "")
+    terminal_nodes = getattr(agent_mod, "terminal_nodes", [])
+    pause_nodes = getattr(agent_mod, "pause_nodes", [])
+    conversation_mode = getattr(agent_mod, "conversation_mode", "continuous")
+    identity_prompt = getattr(agent_mod, "identity_prompt", "")
+    loop_config = getattr(agent_mod, "loop_config", {})
+
+    # Config / metadata
+    config_mod = None
+    config_py = agent_dir / "config.py"
+    if config_py.exists():
+        try:
+            config_mod = importlib.import_module(f"{pkg_name}.config")
+        except ImportError:
+            pass
+    metadata = getattr(config_mod, "metadata", None)
+    default_config = getattr(config_mod, "default_config", None)
+
+    # Agent name
+    name = agent_dir.name
+    if metadata and hasattr(metadata, "name"):
+        name = str(metadata.name).lower().replace(" ", "-")
+
+    # Build config dict
+    config: dict[str, Any] = {
+        "name": name,
+        "version": getattr(metadata, "version", "1.0.0") if metadata else "1.0.0",
+    }
+    if goal and goal.description:
+        config["description"] = goal.description
+    if metadata and hasattr(metadata, "intro_message") and metadata.intro_message:
+        intro = metadata.intro_message
+        if intro and "TODO" not in intro:
+            config["metadata"] = {"intro_message": intro}
+
+    # Variables (detect config fields injected into prompts)
+    variables: dict[str, str] = {}
+    _SKIP_CONFIG = {"model", "temperature", "max_tokens", "api_key", "api_base"}
+    if default_config:
+        for attr in dir(default_config):
+            if attr.startswith("_") or attr in _SKIP_CONFIG:
+                continue
+            val = getattr(default_config, attr)
+            if isinstance(val, str) and val:
+                variables[attr] = val
+    if variables:
+        config["variables"] = variables
+
+    # Goal
+    if goal:
+        goal_dict: dict[str, Any] = {"description": goal.description}
+        if goal.success_criteria:
+            goal_dict["success_criteria"] = [sc.description for sc in goal.success_criteria]
+        if goal.constraints:
+            goal_dict["constraints"] = [c.description for c in goal.constraints]
+        config["goal"] = goal_dict
+
+    # Identity / conversation / loop
+    if identity_prompt:
+        config["identity_prompt"] = identity_prompt
+    if conversation_mode and conversation_mode != "continuous":
+        config["conversation_mode"] = conversation_mode
+    if loop_config:
+        config["loop_config"] = dict(loop_config)
+
+    # MCP servers
+    mcp_path = agent_dir / "mcp_servers.json"
+    if mcp_path.exists():
+        with open(mcp_path) as f:
+            mcp_data = json.load(f)
+        if mcp_data:
+            config["mcp_servers"] = [{"name": name} for name in mcp_data]
+
+    # Nodes
+    config["nodes"] = [_node_to_dict(n) for n in nodes]
+
+    # Edges
+    config["edges"] = [_edge_to_dict(e) for e in edges]
+
+    # Graph structure
+    config["entry_node"] = entry_node
+    if terminal_nodes:
+        config["terminal_nodes"] = terminal_nodes
+    if pause_nodes:
+        config["pause_nodes"] = pause_nodes
+
+    return config
+
+
+def write_yaml(config: dict, output_path: Path) -> None:
+    """Write config dict to YAML with clean formatting."""
+    try:
+        import yaml
+    except ImportError:
+        raise ImportError("PyYAML required: uv pip install pyyaml") from None
+
+    # Custom representer for multiline strings
+    def _str_representer(dumper: yaml.Dumper, data: str) -> Any:
+        if "\n" in data:
+            return dumper.represent_scalar("tag:yaml.org,2002:str", data, style="|")
+        return dumper.represent_scalar("tag:yaml.org,2002:str", data)
+
+    yaml.add_representer(str, _str_representer)
+
+    with open(output_path, "w") as f:
+        yaml.dump(
+            config, f,
+            default_flow_style=False, sort_keys=False,
+            allow_unicode=True, width=120,
+        )
+
+    logger.info("Wrote %s", output_path)
+
+
+def main() -> None:
+    """CLI entry point."""
+    logging.basicConfig(level=logging.INFO, format="%(message)s")
+
+    if len(sys.argv) < 2:
+        print("Usage: uv run python -m framework.tools.migrate_agent <agent_dir>")
+        sys.exit(1)
+
+    agent_dir = Path(sys.argv[1])
+    config = migrate_agent(agent_dir)
+
+    output = agent_dir / "agent.yaml"
+    write_yaml(config, output)
+    print(f"Wrote {output}")
+
+    n_nodes = len(config["nodes"])
+    n_edges = len(config["edges"])
+    print(f"\nMigrated {config['name']}: {n_nodes} nodes, {n_edges} edges")
+    print("\nVerify with:")
+    print(f"  uv run python -m framework.tools.migrate_agent --verify {output}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/core/framework/tools/queen_lifecycle/__init__.py b/core/framework/tools/queen_lifecycle/__init__.py
new file mode 100644
index 00000000..6f850e5b
--- /dev/null
+++ b/core/framework/tools/queen_lifecycle/__init__.py
@@ -0,0 +1,10 @@
+"""Queen lifecycle tools -- split into per-tool modules.
+
+The main entry point is still ``register_queen_lifecycle_tools()`` in
+``queen_lifecycle_tools.py``. This package provides the shared context
+and individual tool registration functions.
+"""
+
+from framework.tools.queen_lifecycle.context import QueenToolContext
+
+__all__ = ["QueenToolContext"]
diff --git a/core/framework/tools/queen_lifecycle/context.py b/core/framework/tools/queen_lifecycle/context.py
new file mode 100644
index 00000000..4da53ecb
--- /dev/null
+++ b/core/framework/tools/queen_lifecycle/context.py
@@ -0,0 +1,52 @@
+"""Shared context for queen lifecycle tools.
+
+All queen tools receive this context instead of closing over
+individual variables from the registration function.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+from dataclasses import dataclass
+from typing import Any
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class QueenToolContext:
+    """Shared state passed to all queen lifecycle tool implementations."""
+
+    session: Any  # Session or WorkerSessionAdapter
+    session_manager: Any | None = None
+    manager_session_id: str | None = None
+    phase_state: Any | None = None  # QueenPhaseState
+    registry: Any = None  # ToolRegistry
+
+    def get_runtime(self):
+        """Get current graph runtime from session (late-binding)."""
+        return getattr(self.session, "graph_runtime", None)
+
+    def update_meta(self, updates: dict) -> None:
+        """Update session metadata JSON."""
+        if self.session_manager is None or self.manager_session_id is None:
+            return
+        try:
+            srv_session = self.session_manager.get_session(self.manager_session_id)
+            if srv_session is None:
+                return
+            meta_path = getattr(srv_session, "meta_path", None)
+            if meta_path is None:
+                return
+            import pathlib
+
+            meta_file = pathlib.Path(meta_path)
+            if meta_file.exists():
+                data = json.loads(meta_file.read_text(encoding="utf-8"))
+            else:
+                data = {}
+            data.update(updates)
+            meta_file.write_text(json.dumps(data, indent=2) + "\n")
+        except Exception:
+            logger.debug("Failed to update session meta", exc_info=True)
diff --git a/core/framework/tools/queen_lifecycle_tools.py b/core/framework/tools/queen_lifecycle_tools.py
index ee7e0cb9..229673ba 100644
--- a/core/framework/tools/queen_lifecycle_tools.py
+++ b/core/framework/tools/queen_lifecycle_tools.py
@@ -43,8 +43,8 @@ from pathlib import Path
 from typing import TYPE_CHECKING, Any
 
 from framework.credentials.models import CredentialError
-from framework.runner.preload_validation import credential_errors_to_json, validate_credentials
-from framework.runtime.event_bus import AgentEvent, EventType
+from framework.loader.preload_validation import credential_errors_to_json, validate_credentials
+from framework.host.event_bus import AgentEvent, EventType
 from framework.server.app import validate_agent_path
 from framework.tools.flowchart_utils import (
     FLOWCHART_TYPES,
@@ -55,9 +55,9 @@ from framework.tools.flowchart_utils import (
 )
 
 if TYPE_CHECKING:
-    from framework.runner.tool_registry import ToolRegistry
-    from framework.runtime.agent_runtime import AgentRuntime
-    from framework.runtime.event_bus import EventBus
+    from framework.loader.tool_registry import ToolRegistry
+    from framework.host.agent_host import AgentHost
+    from framework.host.event_bus import EventBus
 
 logger = logging.getLogger(__name__)
 
@@ -323,7 +323,7 @@ class QueenPhaseState:
             )
 
 
-def build_worker_profile(runtime: AgentRuntime, agent_path: Path | str | None = None) -> str:
+def build_worker_profile(runtime: AgentHost, agent_path: Path | str | None = None) -> str:
     """Build a worker capability profile from its graph/goal definition.
 
     Injected into the queen's system prompt so it knows what the worker
@@ -452,7 +452,7 @@ async def _persist_active_triggers(session: Any, session_id: str) -> None:
 
 async def _start_trigger_timer(session: Any, trigger_id: str, tdef: Any) -> None:
     """Start an asyncio background task that fires the trigger on a timer."""
-    from framework.graph.event_loop_node import TriggerEvent
+    from framework.agent_loop.agent_loop import TriggerEvent
 
     cron_expr = tdef.trigger_config.get("cron")
     interval_minutes = tdef.trigger_config.get("interval_minutes")
@@ -513,8 +513,8 @@ async def _start_trigger_timer(session: Any, trigger_id: str, tdef: Any) -> None
 
 async def _start_trigger_webhook(session: Any, trigger_id: str, tdef: Any) -> None:
     """Subscribe to WEBHOOK_RECEIVED events and route matching ones to the queen."""
-    from framework.graph.event_loop_node import TriggerEvent
-    from framework.runtime.webhook_server import WebhookRoute, WebhookServer, WebhookServerConfig
+    from framework.agent_loop.agent_loop import TriggerEvent
+    from framework.host.webhook_server import WebhookRoute, WebhookServer, WebhookServerConfig
 
     bus = session.event_bus
     path = tdef.trigger_config.get("path", "")
@@ -722,54 +722,6 @@ def _dissolve_planning_nodes(
         nodes[:] = [n for n in nodes if n["id"] != d_id]
         del node_by_id[d_id]
 
-    # ── Dissolve sub-agent nodes ──────────────────────────────
-    # Sub-agent nodes are leaf delegates: parent -> subagent (no outgoing).
-    # Dissolution adds the subagent's ID to parent's sub_agents list.
-    subagent_ids = [
-        n["id"]
-        for n in nodes
-        if n.get("flowchart_type") == "browser" or n.get("node_type") == "gcu"
-    ]
-
-    for sa_id in subagent_ids:
-        sa_node = node_by_id.get(sa_id)
-        if sa_node is None:
-            continue
-
-        in_edges = _incoming(sa_id)
-        out_edges = _outgoing(sa_id)
-
-        # Validate: sub-agent nodes must be leaves (no outgoing edges)
-        if out_edges:
-            logger.warning(
-                "Sub-agent node '%s' has outgoing edges — they will be dropped "
-                "during dissolution. Sub-agent nodes should be leaf nodes.",
-                sa_id,
-            )
-
-        # Attach to each predecessor's sub_agents list
-        for ie in in_edges:
-            pred_id = ie["source"]
-            pred = node_by_id.get(pred_id)
-            if pred is None:
-                continue
-
-            existing_subs = pred.get("sub_agents") or []
-            if sa_id not in existing_subs:
-                existing_subs.append(sa_id)
-            pred["sub_agents"] = existing_subs
-
-            # Record absorption
-            prev_absorbed = absorbed.get(pred_id, [pred_id])
-            if sa_id not in prev_absorbed:
-                prev_absorbed.append(sa_id)
-            absorbed[pred_id] = prev_absorbed
-
-        # Remove sub-agent node and all its edges
-        edges[:] = [e for e in edges if e["source"] != sa_id and e["target"] != sa_id]
-        nodes[:] = [n for n in nodes if n["id"] != sa_id]
-        del node_by_id[sa_id]
-
     # Build complete flowchart_map (identity for non-absorbed nodes)
     flowchart_map: dict[str, list[str]] = {}
     for n in nodes:
@@ -799,8 +751,11 @@ def _update_meta_json(session_manager, manager_session_id, updates: dict) -> Non
     srv_session = session_manager.get_session(manager_session_id)
     if not srv_session:
         return
+    from framework.config import QUEENS_DIR
+
     storage_sid = getattr(srv_session, "queen_resume_from", None) or srv_session.id
-    meta_path = Path.home() / ".hive" / "queen" / "session" / storage_sid / "meta.json"
+    queen_name = getattr(srv_session, "queen_name", "default")
+    meta_path = QUEENS_DIR / queen_name / "sessions" / storage_sid / "meta.json"
     try:
         existing = {}
         if meta_path.exists():
@@ -816,7 +771,7 @@ def register_queen_lifecycle_tools(
     session: Any = None,
     session_id: str | None = None,
     # Legacy params — used by TUI when not passing a session object
-    graph_runtime: AgentRuntime | None = None,
+    graph_runtime: AgentHost | None = None,
     event_bus: EventBus | None = None,
     storage_path: Path | None = None,
     # Server context — enables load_built_agent tool
@@ -1388,81 +1343,6 @@ def register_queen_lifecycle_tools(
             nodes[:] = [n for n in nodes if n["id"] != d_id]
             del node_by_id[d_id]
 
-        # ── Dissolve sub-agent nodes ──────────────────────────────
-        # Sub-agent nodes are leaf delegates: parent → subagent (no outgoing).
-        # Dissolution adds the subagent's ID to parent's sub_agents list.
-        subagent_ids = [
-            n["id"]
-            for n in nodes
-            if n.get("flowchart_type") == "browser" or n.get("node_type") == "gcu"
-        ]
-
-        for sa_id in subagent_ids:
-            sa_node = node_by_id.get(sa_id)
-            if sa_node is None:
-                continue
-
-            in_edges = _incoming(sa_id)
-            out_edges = _outgoing(sa_id)
-
-            # Validate: sub-agent nodes must be leaves (no outgoing edges)
-            if out_edges:
-                logger.warning(
-                    "Sub-agent node '%s' has outgoing edges — they will be dropped "
-                    "during dissolution. Sub-agent nodes should be leaf nodes.",
-                    sa_id,
-                )
-
-            # Attach to each predecessor's sub_agents list
-            for ie in in_edges:
-                pred_id = ie["source"]
-                pred = node_by_id.get(pred_id)
-                if pred is None:
-                    continue
-
-                existing_subs = pred.get("sub_agents") or []
-                if sa_id not in existing_subs:
-                    existing_subs.append(sa_id)
-                pred["sub_agents"] = existing_subs
-
-                # Record absorption
-                prev_absorbed = absorbed.get(pred_id, [pred_id])
-                if sa_id not in prev_absorbed:
-                    prev_absorbed.append(sa_id)
-                absorbed[pred_id] = prev_absorbed
-
-            # Remove sub-agent node and all its edges
-            edges[:] = [e for e in edges if e["source"] != sa_id and e["target"] != sa_id]
-            nodes[:] = [n for n in nodes if n["id"] != sa_id]
-            del node_by_id[sa_id]
-
-        # ── Dissolve implicit sub-agents ─────────────────────────
-        # Nodes that appear in another node's sub_agents list but weren't
-        # caught above (e.g. GCU nodes with flowchart_type="browser" where
-        # the queen set sub_agents directly on the parent).
-        implicit_sa_ids: list[str] = []
-        for n in nodes:
-            for sa_id in n.get("sub_agents") or []:
-                if sa_id in node_by_id and sa_id != n["id"]:
-                    implicit_sa_ids.append(sa_id)
-
-        for sa_id in implicit_sa_ids:
-            if sa_id not in node_by_id:
-                continue  # already removed
-
-            # Find which parent(s) reference this sub-agent
-            for n in nodes:
-                if sa_id in (n.get("sub_agents") or []) and n["id"] != sa_id:
-                    prev_absorbed = absorbed.get(n["id"], [n["id"]])
-                    if sa_id not in prev_absorbed:
-                        prev_absorbed.append(sa_id)
-                    absorbed[n["id"]] = prev_absorbed
-
-            # Remove the sub-agent node and its edges
-            edges[:] = [e for e in edges if e["source"] != sa_id and e["target"] != sa_id]
-            nodes[:] = [n for n in nodes if n["id"] != sa_id]
-            del node_by_id[sa_id]
-
         # Build complete flowchart_map (identity for non-absorbed nodes)
         flowchart_map: dict[str, list[str]] = {}
         for n in nodes:
@@ -1470,14 +1350,9 @@ def register_queen_lifecycle_tools(
             flowchart_map[nid] = absorbed.get(nid, [nid])
 
         # Rebuild terminal_nodes (decision targets may have changed).
-        # Sub-agent nodes are leaf helpers, not endpoints — exclude them.
-        post_sa_ids: set[str] = set()
-        for n in nodes:
-            for sa_id in n.get("sub_agents") or []:
-                post_sa_ids.add(sa_id)
         sources = {e["source"] for e in edges}
         all_ids = {n["id"] for n in nodes}
-        terminal_ids = all_ids - sources - post_sa_ids
+        terminal_ids = all_ids - sources
         if not terminal_ids and nodes:
             terminal_ids = {nodes[-1]["id"]}
 
@@ -1563,7 +1438,6 @@ def register_queen_lifecycle_tools(
                     "input_keys": n.get("input_keys", []),
                     "output_keys": n.get("output_keys", []),
                     "success_criteria": n.get("success_criteria", ""),
-                    "sub_agents": n.get("sub_agents", []),
                     # Decision nodes: the yes/no question to evaluate
                     "decision_clause": n.get("decision_clause", ""),
                     # Explicit flowchart override (preserved for classification)
@@ -1601,219 +1475,7 @@ def register_queen_lifecycle_tools(
                     }
                 )
 
-        # ── GCU nodes cannot be children of decision nodes ─────────
-        # Decision nodes dissolve into their predecessor. If a GCU node
-        # is a decision child, after dissolution it would become a
-        # conditional workflow step — violating the leaf sub-agent rule.
-        # Rewire: move the GCU to the decision's predecessor as a
-        # sub-agent and remove the decision → GCU edge.
-        node_by_id_v = {n["id"]: n for n in validated_nodes}
-        decision_node_ids = {
-            n["id"] for n in validated_nodes if n.get("flowchart_type") == "decision"
-        }
-        gcu_node_ids = {
-            n["id"]
-            for n in validated_nodes
-            if n.get("node_type") == "gcu" or n.get("flowchart_type") == "browser"
-        }
         topology_corrections: list[str] = []
-        if decision_node_ids and gcu_node_ids:
-            for d_id in decision_node_ids:
-                gcu_children = [
-                    e
-                    for e in validated_edges
-                    if e["source"] == d_id and e["target"] in gcu_node_ids
-                ]
-                if not gcu_children:
-                    continue
-                d_parents = [e["source"] for e in validated_edges if e["target"] == d_id]
-                for gc_edge in gcu_children:
-                    gc_id = gc_edge["target"]
-                    logger.warning(
-                        "GCU node '%s' is a child of decision node '%s' "
-                        "— moving it to the decision's predecessor.",
-                        gc_id,
-                        d_id,
-                    )
-                    topology_corrections.append(
-                        f"GCU node '{gc_id}' was a child of decision "
-                        f"node '{d_id}' — invalid because decision "
-                        f"nodes dissolve at build time. Moved "
-                        f"'{gc_id}' to predecessor as a sub-agent."
-                    )
-                    # Remove the decision → GCU edge
-                    validated_edges[:] = [
-                        e
-                        for e in validated_edges
-                        if not (e["source"] == d_id and e["target"] == gc_id)
-                    ]
-                    # Remove any outgoing edges from the GCU node
-                    # (keep report edges back to predecessors)
-                    validated_edges[:] = [
-                        e
-                        for e in validated_edges
-                        if e["source"] != gc_id or e["target"] in set(d_parents)
-                    ]
-                    # Assign GCU as sub-agent of predecessor(s)
-                    for pid in d_parents:
-                        parent = node_by_id_v.get(pid)
-                        if parent is None:
-                            continue
-                        existing = parent.get("sub_agents") or []
-                        if gc_id not in existing:
-                            existing.append(gc_id)
-                        parent["sub_agents"] = existing
-
-        # ── Enforce GCU / subagent leaf constraint ────────────────
-        # GCU nodes and nodes with flowchart_type "subagent" are leaf
-        # delegates: they can only receive a delegate edge IN from
-        # their parent and send a report edge OUT back to that parent.
-        # Any other outgoing edges are design errors — strip them and
-        # auto-assign the node as a sub-agent of its predecessor.
-        leaf_node_ids: set[str] = set()
-        for n in validated_nodes:
-            if n.get("node_type") == "gcu" or n.get("flowchart_type") == "browser":
-                leaf_node_ids.add(n["id"])
-        if leaf_node_ids:
-            for leaf_id in leaf_node_ids:
-                # Find edges where this leaf node is the source
-                out_edges = [e for e in validated_edges if e["source"] == leaf_id]
-                in_edges = [e for e in validated_edges if e["target"] == leaf_id]
-
-                # Identify the parent (predecessor that connects IN)
-                parent_ids = [e["source"] for e in in_edges]
-
-                if not out_edges:
-                    # Already a proper leaf — still ensure sub_agents is set
-                    for pid in parent_ids:
-                        parent = node_by_id_v.get(pid)
-                        if parent is None:
-                            continue
-                        existing = parent.get("sub_agents") or []
-                        if leaf_id not in existing:
-                            existing.append(leaf_id)
-                        parent["sub_agents"] = existing
-                    continue
-
-                # Strip all outgoing edges from the leaf node that
-                # don't go back to a parent (report edges are OK)
-                illegal_targets: list[str] = []
-                for oe in out_edges:
-                    if oe["target"] not in parent_ids:
-                        illegal_targets.append(oe["target"])
-
-                if illegal_targets:
-                    logger.warning(
-                        "GCU/subagent node '%s' has illegal outgoing "
-                        "edges to %s — stripping them. GCU nodes "
-                        "must be leaf sub-agents.",
-                        leaf_id,
-                        illegal_targets,
-                    )
-                    topology_corrections.append(
-                        f"GCU node '{leaf_id}' had illegal edges to "
-                        f"{illegal_targets} — stripped. GCU nodes MUST "
-                        f"be leaf sub-agents, never in the linear flow."
-                    )
-                    # Rewire: predecessor → leaf's targets (skip leaf)
-                    for parent_id in parent_ids:
-                        for tgt_id in illegal_targets:
-                            validated_edges.append(
-                                {
-                                    "id": f"edge-rewire-{len(validated_edges)}",
-                                    "source": parent_id,
-                                    "target": tgt_id,
-                                    "condition": "on_success",
-                                    "description": "",
-                                    "label": "",
-                                }
-                            )
-                    # Remove the illegal edges
-                    validated_edges[:] = [
-                        e
-                        for e in validated_edges
-                        if not (e["source"] == leaf_id and e["target"] in set(illegal_targets))
-                    ]
-
-                # Ensure the leaf is in its parent's sub_agents list
-                for pid in parent_ids:
-                    parent = node_by_id_v.get(pid)
-                    if parent is None:
-                        continue
-                    existing = parent.get("sub_agents") or []
-                    if leaf_id not in existing:
-                        existing.append(leaf_id)
-                    parent["sub_agents"] = existing
-
-        # ── Remove orphaned GCU / subagent nodes ──────────────────
-        # After enforcing the leaf constraint, any GCU/subagent node
-        # that has zero edges AND is not in any parent's sub_agents
-        # list is orphaned — remove it and warn the queen.
-        all_edge_node_ids = set()
-        for e in validated_edges:
-            all_edge_node_ids.add(e["source"])
-            all_edge_node_ids.add(e["target"])
-        all_sa_refs: set[str] = set()
-        for n in validated_nodes:
-            for sa_id in n.get("sub_agents") or []:
-                all_sa_refs.add(sa_id)
-
-        orphaned_ids: list[str] = []
-        for lid in leaf_node_ids:
-            if lid not in all_edge_node_ids and lid not in all_sa_refs:
-                orphaned_ids.append(lid)
-
-        if orphaned_ids:
-            for oid in orphaned_ids:
-                logger.warning(
-                    "GCU/subagent node '%s' is orphaned (no edges, "
-                    "not in any parent's sub_agents) — removing it.",
-                    oid,
-                )
-                topology_corrections.append(
-                    f"GCU node '{oid}' was orphaned (no edges, not "
-                    f"assigned as a sub-agent of any parent node) — "
-                    f"removed. Add it to a parent node's sub_agents "
-                    f"list and re-save the draft."
-                )
-            validated_nodes[:] = [n for n in validated_nodes if n["id"] not in set(orphaned_ids)]
-            node_by_id_v = {n["id"]: n for n in validated_nodes}
-
-        # Synthesize visual edges for sub-agents that are referenced in
-        # a parent's sub_agents list but have no connecting edge yet.
-        node_id_set = {n["id"] for n in validated_nodes}
-        existing_edge_pairs = {(e["source"], e["target"]) for e in validated_edges}
-        edge_counter = len(validated_edges)
-        for n in validated_nodes:
-            for sa_id in n.get("sub_agents") or []:
-                if sa_id not in node_id_set:
-                    continue
-                if (n["id"], sa_id) not in existing_edge_pairs:
-                    validated_edges.append(
-                        {
-                            "id": f"edge-subagent-{edge_counter}",
-                            "source": n["id"],
-                            "target": sa_id,
-                            "condition": "always",
-                            "description": "sub-agent delegation",
-                            "label": "delegate",
-                        }
-                    )
-                    edge_counter += 1
-                    existing_edge_pairs.add((n["id"], sa_id))
-                if (sa_id, n["id"]) not in existing_edge_pairs:
-                    validated_edges.append(
-                        {
-                            "id": f"edge-subagent-{edge_counter}",
-                            "source": sa_id,
-                            "target": n["id"],
-                            "condition": "always",
-                            "description": "sub-agent report back",
-                            "label": "report",
-                        }
-                    )
-                    edge_counter += 1
-                    existing_edge_pairs.add((sa_id, n["id"]))
 
         # ── Validate graph connectivity ─────────────────────────────
         # Every node must be reachable from the entry node. Disconnected
@@ -1928,7 +1590,9 @@ def register_queen_lifecycle_tools(
                     # Worker not loaded yet — resolve from draft name
                     draft_name = draft.get("agent_name", "")
                     if draft_name:
-                        candidate = Path("exports") / draft_name
+                        from framework.config import COLONIES_DIR
+
+                        candidate = COLONIES_DIR / draft_name
                         if candidate.is_dir():
                             save_path = candidate
                 _save_flowchart_file(
@@ -2195,12 +1859,12 @@ def register_queen_lifecycle_tools(
     # Explicit user confirmation is required before transitioning from planning
     # to building. This tool records that confirmation and proceeds.
 
-    async def confirm_and_build() -> str:
-        """Confirm the draft and transition from planning to building phase.
+    async def confirm_and_build(*, agent_name: str | None = None) -> str:
+        """Confirm the draft, create agent directory, and transition to building.
 
         This tool should ONLY be called after the user has explicitly approved
-        the draft graph design via ask_user. It gates the planning→building
-        transition so the user always has a chance to review before code is written.
+        the draft graph design via ask_user. It creates the agent directory and
+        transitions to BUILDING phase. The queen then writes agent.json directly.
         """
         if phase_state is None:
             return json.dumps({"error": "Phase state not available."})
@@ -2238,9 +1902,14 @@ def register_queen_lifecycle_tools(
 
         # Create agent folder early so flowchart and agent_path are available
         # throughout the entire BUILDING phase.
-        _agent_name = phase_state.draft_graph.get("agent_name", "").strip()
+        _agent_name = (
+            agent_name
+            or phase_state.draft_graph.get("agent_name", "").strip()
+        )
         if _agent_name:
-            _agent_folder = Path("exports") / _agent_name
+            from framework.config import COLONIES_DIR
+
+            _agent_folder = COLONIES_DIR / _agent_name
             _agent_folder.mkdir(parents=True, exist_ok=True)
             _save_flowchart_file(_agent_folder, original_copy, fmap)
             phase_state.agent_path = str(_agent_folder)
@@ -2271,20 +1940,30 @@ def register_queen_lifecycle_tools(
                 f"{subagent_count} sub-agent node(s) dissolved into predecessor sub_agents"
             )
 
+        # Transition to BUILDING phase
+        await phase_state.switch_to_building(source="tool")
+        _update_meta_json(
+            session_manager, manager_session_id, {"phase": "building"}
+        )
+        phase_state.build_confirmed = False
+
+        # No injection here -- the return message tells the queen what to do.
+        # Injecting would queue a BUILDING message that drains AFTER the queen
+        # may have already moved to STAGING via load_built_agent.
+
         return json.dumps(
             {
                 "status": "confirmed",
-                "agent_name": phase_state.draft_graph.get("agent_name", ""),
+                "phase": "building",
+                "agent_name": _agent_name,
+                "agent_path": str(_agent_folder),
                 "planning_nodes_dissolved": dissolved_count,
-                "decision_nodes_dissolved": decision_count,
-                "subagent_nodes_dissolved": subagent_count,
                 "flowchart_map": fmap,
                 "message": (
-                    "User has confirmed the design. "
+                    "Design confirmed and directory created. "
                     + ("; ".join(dissolution_parts) + ". " if dissolution_parts else "")
-                    + "Now call initialize_and_build_agent(agent_name, nodes) to scaffold the "
-                    "agent package and start building. The draft metadata will be "
-                    "used to pre-populate the generated files."
+                    + f"Now write the complete agent config to {_agent_folder}/agent.json "
+                    "using write_file(). Include all system prompts, tools, edges, and goal."
                 ),
             }
         )
@@ -2292,180 +1971,30 @@ def register_queen_lifecycle_tools(
     _confirm_tool = Tool(
         name="confirm_and_build",
         description=(
-            "Confirm the draft graph design and approve transition to building phase. "
+            "Confirm the draft graph design, create agent directory, and transition to building phase. "
             "ONLY call this after the user has explicitly approved the design via ask_user. "
-            "After confirmation, call initialize_and_build_agent() to scaffold and build."
+            "After confirmation, write the complete agent.json using write_file()."
         ),
-        parameters={"type": "object", "properties": {}},
+        parameters={
+            "type": "object",
+            "properties": {
+                "agent_name": {
+                    "type": "string",
+                    "description": "Snake_case name for the agent (e.g. 'linkedin_outreach'). "
+                    "If omitted, uses the name from save_agent_draft().",
+                },
+            },
+        },
     )
     registry.register(
         "confirm_and_build",
         _confirm_tool,
-        lambda inputs: confirm_and_build(),
+        lambda inputs: confirm_and_build(
+            agent_name=inputs.get("agent_name"),
+        ),
     )
     tools_registered += 1
 
-    # --- initialize_and_build_agent wrapper (Planning → Building) -------------
-    # With agent_name: scaffold a new agent via MCP tool, then switch to building.
-    # Without agent_name: just switch to building (for fixing an existing loaded agent).
-
-    _existing_init = registry._tools.get("initialize_and_build_agent")
-    if _existing_init is not None:
-        _orig_init_executor = _existing_init.executor
-
-        async def initialize_and_build_agent_wrapper(inputs: dict) -> str:
-            """Wrapper: scaffold or just switch to building phase."""
-            agent_name = (inputs.get("agent_name") or "").strip()
-
-            # Gate: when in planning phase and creating a new agent,
-            # require the user to have confirmed the draft first.
-            if (
-                agent_name
-                and phase_state is not None
-                and phase_state.phase == "planning"
-                and not phase_state.build_confirmed
-            ):
-                if phase_state.draft_graph is None:
-                    return json.dumps(
-                        {
-                            "error": (
-                                "Cannot transition to building without a draft. "
-                                "Call save_agent_draft() first to create a visual draft of the "
-                                "graph, present it to the user for review, then call "
-                                "confirm_and_build() after the user approves."
-                            )
-                        }
-                    )
-                return json.dumps(
-                    {
-                        "error": (
-                            "The user has not confirmed the draft design yet. "
-                            "Present the draft to the user and call ask_user() to get "
-                            "their approval. Then call confirm_and_build() before "
-                            "calling initialize_and_build_agent()."
-                        )
-                    }
-                )
-
-            # No agent_name → try to fall back to the session's current agent,
-            # or fail with actionable guidance.
-            if not agent_name:
-                # Try to resolve agent_name from the current session
-                fallback_path = getattr(session, "worker_path", None)
-                if fallback_path is not None:
-                    agent_name = Path(fallback_path).name
-                else:
-                    # Server path: check SessionManager
-                    if session_manager is not None and manager_session_id:
-                        srv_session = session_manager.get_session(manager_session_id)
-                        if srv_session and getattr(srv_session, "worker_path", None):
-                            fallback_path = srv_session.worker_path
-                            agent_name = Path(fallback_path).name
-
-                if not agent_name:
-                    return json.dumps(
-                        {
-                            "error": (
-                                "No agent_name provided and no agent loaded in this session. "
-                                "To fix: call list_agents() to find the agent name, then call "
-                                "initialize_and_build_agent(agent_name='<name>') to scaffold it."
-                            )
-                        }
-                    )
-
-                # Fall back succeeded — switch to building without scaffolding
-                logger.info(
-                    "initialize_and_build_agent: no agent_name provided, "
-                    "falling back to session agent '%s'",
-                    agent_name,
-                )
-                if phase_state is not None:
-                    if fallback_path:
-                        phase_state.agent_path = str(fallback_path)
-                    await phase_state.switch_to_building(source="tool")
-                    _update_meta_json(session_manager, manager_session_id, {"phase": "building"})
-                    if phase_state.inject_notification:
-                        await phase_state.inject_notification(
-                            "[PHASE CHANGE] Switched to BUILDING phase. "
-                            "Start implementing the fix now."
-                        )
-                return json.dumps(
-                    {
-                        "status": "editing",
-                        "phase": "building",
-                        "agent_name": agent_name,
-                        "warning": (
-                            f"No agent_name provided — using session agent '{agent_name}'. "
-                            f"Agent files are at exports/{agent_name}/."
-                        ),
-                        "message": (
-                            "Switched to BUILDING phase. Full coding tools restored. "
-                            "Implement the fix, then call load_built_agent(path) to reload."
-                        ),
-                    }
-                )
-
-            # Has agent_name → scaffold via MCP tool.
-            # If a draft exists, pass its metadata so the scaffolder can
-            # pre-populate descriptions, goals, and node metadata.
-            scaffold_inputs = dict(inputs)
-            draft = phase_state.draft_graph if phase_state else None
-            if draft and draft.get("agent_name") == agent_name:
-                scaffold_inputs["_draft"] = draft
-
-            result = _orig_init_executor(scaffold_inputs)
-            # Handle both sync and async executors
-            if asyncio.iscoroutine(result) or asyncio.isfuture(result):
-                result = await result
-            # If result is a ToolResult, extract the text content
-            result_str = str(result)
-            if hasattr(result, "content"):
-                result_str = str(result.content)
-            try:
-                parsed = json.loads(result_str)
-                if parsed.get("success", True):
-                    if phase_state is not None:
-                        # Set agent_path so the frontend can query credentials
-                        phase_state.agent_path = phase_state.agent_path or str(
-                            Path("exports") / agent_name
-                        )
-                        await phase_state.switch_to_building(source="tool")
-                        _update_meta_json(
-                            session_manager, manager_session_id, {"phase": "building"}
-                        )
-                        # Reset draft state after successful scaffolding
-                        phase_state.build_confirmed = False
-                        # Persist flowchart now that the agent folder exists
-                        if phase_state.original_draft_graph and phase_state.flowchart_map:
-                            _save_flowchart_file(
-                                Path("exports") / agent_name,
-                                phase_state.original_draft_graph,
-                                phase_state.flowchart_map,
-                            )
-                        # Inject a continuation message so the queen starts
-                        # building immediately instead of blocking for user input.
-                        draft_hint = ""
-                        if draft:
-                            draft_hint = (
-                                " The draft metadata has been used to pre-populate "
-                                "node descriptions, goal, and success criteria. "
-                                "Review and refine the generated files."
-                            )
-                        if phase_state.inject_notification:
-                            await phase_state.inject_notification(
-                                "[PHASE CHANGE] Agent scaffolded and switched to BUILDING phase. "
-                                "Start implementing the agent nodes now." + draft_hint
-                            )
-            except (json.JSONDecodeError, KeyError, TypeError):
-                pass
-            return result_str
-
-        registry.register(
-            "initialize_and_build_agent",
-            _existing_init.tool,
-            lambda inputs: initialize_and_build_agent_wrapper(inputs),
-        )
-
     # --- stop_graph (Running → Staging) --------------------------------------
 
     async def stop_graph_to_staging() -> str:
@@ -2554,7 +2083,7 @@ def register_queen_lifecycle_tools(
         return s
 
     def _build_preamble(
-        runtime: AgentRuntime,
+        runtime: AgentHost,
     ) -> dict[str, Any]:
         """Build the lightweight preamble: status, node, elapsed, iteration.
 
@@ -2712,9 +2241,9 @@ def register_queen_lifecycle_tools(
 
         return "\n".join(lines)
 
-    async def _format_memory(runtime: AgentRuntime) -> str:
+    async def _format_memory(runtime: AgentHost) -> str:
         """Format the worker's shared buffer snapshot and recent changes."""
-        from framework.runtime.shared_state import IsolationLevel
+        from framework.host.shared_state import IsolationLevel
 
         lines = []
         active_streams = runtime.get_active_streams()
@@ -2865,7 +2394,7 @@ def register_queen_lifecycle_tools(
         header = f"{total} issue(s) detected."
         return header + "\n\n" + "\n".join(lines)
 
-    async def _format_progress(runtime: AgentRuntime, bus: EventBus) -> str:
+    async def _format_progress(runtime: AgentHost, bus: EventBus) -> str:
         """Format goal progress, token consumption, and execution outcomes."""
         lines = []
 
@@ -2921,7 +2450,7 @@ def register_queen_lifecycle_tools(
         return "\n".join(lines)
 
     def _build_full_json(
-        runtime: AgentRuntime,
+        runtime: AgentHost,
         bus: EventBus,
         preamble: dict[str, Any],
         last_n: int,
@@ -3475,50 +3004,59 @@ def register_queen_lifecycle_tools(
             if not resolved_path.exists():
                 return json.dumps({"error": f"Agent path does not exist: {agent_path}"})
 
-            # Pre-check: verify the module exports goal/nodes/edges before
-            # attempting the full load.  This gives the queen an actionable
-            # error message instead of a cryptic ImportError or TypeError.
-            try:
-                import importlib
-                import sys as _sys
+            # Pre-check: verify the agent can be loaded before attempting
+            # the full session load.  Declarative (agent.json) agents skip
+            # the Python import check since AgentRunner.load() handles them.
+            _has_yaml = (resolved_path / "agent.json").exists()
+            if not _has_yaml:
+                # Legacy Python agent: verify module exports goal/nodes/edges
+                try:
+                    import importlib
+                    import sys as _sys
 
-                pkg_name = resolved_path.name
-                parent_dir = str(resolved_path.resolve().parent)
-                # Temporarily put parent on sys.path for import
-                if parent_dir not in _sys.path:
-                    _sys.path.insert(0, parent_dir)
-                # Evict stale cached modules
-                stale = [n for n in _sys.modules if n == pkg_name or n.startswith(f"{pkg_name}.")]
-                for n in stale:
-                    del _sys.modules[n]
+                    pkg_name = resolved_path.name
+                    parent_dir = str(resolved_path.resolve().parent)
+                    if parent_dir not in _sys.path:
+                        _sys.path.insert(0, parent_dir)
+                    stale = [
+                        n for n in _sys.modules
+                        if n == pkg_name or n.startswith(f"{pkg_name}.")
+                    ]
+                    for n in stale:
+                        del _sys.modules[n]
 
-                mod = importlib.import_module(pkg_name)
-                missing_attrs = [
-                    attr for attr in ("goal", "nodes", "edges") if getattr(mod, attr, None) is None
-                ]
-                if missing_attrs:
+                    mod = importlib.import_module(pkg_name)
+                    missing_attrs = [
+                        attr
+                        for attr in ("goal", "nodes", "edges")
+                        if getattr(mod, attr, None) is None
+                    ]
+                    if missing_attrs:
+                        return json.dumps(
+                            {
+                                "error": (
+                                    f"Agent module '{pkg_name}' is missing module-level "
+                                    f"attributes: {', '.join(missing_attrs)}. "
+                                    f"Fix: in {pkg_name}/__init__.py, add "
+                                    f"'from .agent import {', '.join(missing_attrs)}' "
+                                    f"so that 'import {pkg_name}' exposes them at "
+                                    f"package level."
+                                )
+                            }
+                        )
+                except Exception as pre_err:
                     return json.dumps(
                         {
                             "error": (
-                                f"Agent module '{pkg_name}' is missing module-level "
-                                f"attributes: {', '.join(missing_attrs)}. "
-                                f"Fix: in {pkg_name}/__init__.py, add "
-                                f"'from .agent import {', '.join(missing_attrs)}' "
-                                f"so that 'import {pkg_name}' exposes them at package level."
+                                f"Failed to import agent module "
+                                f"'{resolved_path.name}': {pre_err}. "
+                                f"Fix: ensure {resolved_path.name}/__init__.py "
+                                f"exists and can be imported without errors "
+                                f"(check syntax, missing dependencies, and "
+                                f"relative imports)."
                             )
                         }
                     )
-            except Exception as pre_err:
-                return json.dumps(
-                    {
-                        "error": (
-                            f"Failed to import agent module '{resolved_path.name}': {pre_err}. "
-                            f"Fix: ensure {resolved_path.name}/__init__.py exists and can be "
-                            f"imported without errors (check syntax, missing dependencies, "
-                            f"and relative imports)."
-                        )
-                    }
-                )
 
             try:
                 updated_session = await session_manager.load_graph(
@@ -3635,7 +3173,7 @@ def register_queen_lifecycle_tools(
             description=(
                 "Load a newly built agent as the worker in this session. "
                 "After building and validating an agent, call this with the agent's "
-                "path (e.g. 'exports/my_agent') to make it available immediately. "
+                "path (e.g. '~/.hive/colonies/my_agent') to make it available immediately. "
                 "The user will see the agent's graph and can interact with it."
             ),
             parameters={
@@ -3643,7 +3181,7 @@ def register_queen_lifecycle_tools(
                 "properties": {
                     "agent_path": {
                         "type": "string",
-                        "description": ("Path to the agent directory (e.g. 'exports/my_agent')"),
+                        "description": ("Path to the agent directory (e.g. '~/.hive/colonies/my_agent')"),
                     },
                 },
                 "required": ["agent_path"],
@@ -3795,7 +3333,7 @@ def register_queen_lifecycle_tools(
 
         if tdef is None:
             if trigger_type and trigger_config:
-                from framework.runtime.triggers import TriggerDefinition
+                from framework.host.triggers import TriggerDefinition
 
                 tdef = TriggerDefinition(
                     id=trigger_id,
diff --git a/core/framework/tools/session_graph_tools.py b/core/framework/tools/session_graph_tools.py
index 8b068770..aadd3557 100644
--- a/core/framework/tools/session_graph_tools.py
+++ b/core/framework/tools/session_graph_tools.py
@@ -21,13 +21,13 @@ import logging
 from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
-    from framework.runner.tool_registry import ToolRegistry
-    from framework.runtime.agent_runtime import AgentRuntime
+    from framework.loader.tool_registry import ToolRegistry
+    from framework.host.agent_host import AgentHost
 
 logger = logging.getLogger(__name__)
 
 
-def register_graph_tools(registry: ToolRegistry, runtime: AgentRuntime) -> int:
+def register_graph_tools(registry: ToolRegistry, runtime: AgentHost) -> int:
     """Register graph lifecycle tools bound to *runtime*.
 
     Returns the number of tools registered.
@@ -41,12 +41,13 @@ def register_graph_tools(registry: ToolRegistry, runtime: AgentRuntime) -> int:
     async def load_agent(agent_path: str) -> str:
         """Load an agent graph from disk into the running session.
 
-        The agent is imported from *agent_path* (a directory containing
-        ``agent.py``).  Its graph, goal, and entry points are registered
-        as a secondary graph on the runtime.  Returns a JSON summary.
+        The agent is loaded from *agent_path* (a directory containing
+        ``agent.json`` or ``agent.py``).  Its graph, goal, and entry points
+        are registered as a secondary graph on the runtime.  Returns a JSON
+        summary.
         """
-        from framework.runner.runner import AgentRunner
-        from framework.runtime.execution_stream import EntryPointSpec
+        from framework.loader.agent_loader import AgentLoader
+        from framework.host.execution_manager import EntryPointSpec
         from framework.server.app import validate_agent_path
 
         try:
@@ -57,7 +58,7 @@ def register_graph_tools(registry: ToolRegistry, runtime: AgentRuntime) -> int:
             return json.dumps({"error": f"Agent path does not exist: {agent_path}"})
 
         try:
-            runner = AgentRunner.load(path)
+            runner = AgentLoader.load(path)
         except Exception as exc:
             return json.dumps({"error": f"Failed to load agent: {exc}"})
 
@@ -105,7 +106,7 @@ def register_graph_tools(registry: ToolRegistry, runtime: AgentRuntime) -> int:
             "properties": {
                 "agent_path": {
                     "type": "string",
-                    "description": "Path to the agent directory (containing agent.py)",
+                    "description": "Path to the agent directory",
                 },
             },
             "required": ["agent_path"],
diff --git a/core/framework/tools/worker_monitoring_tools.py b/core/framework/tools/worker_monitoring_tools.py
index 9d78708b..d1382020 100644
--- a/core/framework/tools/worker_monitoring_tools.py
+++ b/core/framework/tools/worker_monitoring_tools.py
@@ -23,7 +23,7 @@ from pathlib import Path
 from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
-    from framework.runner.tool_registry import ToolRegistry
+    from framework.loader.tool_registry import ToolRegistry
 
 logger = logging.getLogger(__name__)
 
diff --git a/core/framework/tracker/__init__.py b/core/framework/tracker/__init__.py
new file mode 100644
index 00000000..6cc52d06
--- /dev/null
+++ b/core/framework/tracker/__init__.py
@@ -0,0 +1,3 @@
+"""Tracker layer -- decision/run logging for Builder analysis."""
+
+from framework.tracker.decision_tracker import DecisionTracker  # noqa: F401
diff --git a/core/framework/runtime/core.py b/core/framework/tracker/decision_tracker.py
similarity index 99%
rename from core/framework/runtime/core.py
rename to core/framework/tracker/decision_tracker.py
index c61e8d96..5f134b7a 100644
--- a/core/framework/runtime/core.py
+++ b/core/framework/tracker/decision_tracker.py
@@ -21,7 +21,7 @@ from framework.storage.concurrent import ConcurrentStorage
 logger = logging.getLogger(__name__)
 
 
-class Runtime:
+class DecisionTracker:
     """
     The runtime environment that agents execute within.
 
diff --git a/core/framework/runtime/llm_debug_logger.py b/core/framework/tracker/llm_debug_logger.py
similarity index 100%
rename from core/framework/runtime/llm_debug_logger.py
rename to core/framework/tracker/llm_debug_logger.py
diff --git a/core/framework/runtime/runtime_log_schemas.py b/core/framework/tracker/runtime_log_schemas.py
similarity index 100%
rename from core/framework/runtime/runtime_log_schemas.py
rename to core/framework/tracker/runtime_log_schemas.py
diff --git a/core/framework/runtime/runtime_log_store.py b/core/framework/tracker/runtime_log_store.py
similarity index 99%
rename from core/framework/runtime/runtime_log_store.py
rename to core/framework/tracker/runtime_log_store.py
index 7be0942c..b3f98db2 100644
--- a/core/framework/runtime/runtime_log_store.py
+++ b/core/framework/tracker/runtime_log_store.py
@@ -29,7 +29,7 @@ import logging
 from datetime import UTC, datetime
 from pathlib import Path
 
-from framework.runtime.runtime_log_schemas import (
+from framework.tracker.runtime_log_schemas import (
     NodeDetail,
     NodeStepLog,
     RunDetailsLog,
diff --git a/core/framework/runtime/runtime_logger.py b/core/framework/tracker/runtime_logger.py
similarity index 98%
rename from core/framework/runtime/runtime_logger.py
rename to core/framework/tracker/runtime_logger.py
index f816131c..0da112fa 100644
--- a/core/framework/runtime/runtime_logger.py
+++ b/core/framework/tracker/runtime_logger.py
@@ -27,13 +27,13 @@ from datetime import UTC, datetime
 from typing import Any
 
 from framework.observability import get_trace_context
-from framework.runtime.runtime_log_schemas import (
+from framework.tracker.runtime_log_schemas import (
     NodeDetail,
     NodeStepLog,
     RunSummaryLog,
     ToolCallLog,
 )
-from framework.runtime.runtime_log_store import RuntimeLogStore
+from framework.tracker.runtime_log_store import RuntimeLogStore
 
 logger = logging.getLogger(__name__)
 
diff --git a/examples/templates/competitive_intel_agent/__main__.py b/examples/templates/competitive_intel_agent/__main__.py
index 50dfc8f6..286d7e97 100644
--- a/examples/templates/competitive_intel_agent/__main__.py
+++ b/examples/templates/competitive_intel_agent/__main__.py
@@ -121,10 +121,10 @@ def tui(verbose: bool, debug: bool) -> None:
         sys.exit(1)
 
     from framework.llm import LiteLLMProvider
-    from framework.runner.tool_registry import ToolRegistry
-    from framework.runtime.agent_runtime import create_agent_runtime
-    from framework.runtime.event_bus import EventBus
-    from framework.runtime.execution_stream import EntryPointSpec
+    from framework.loader.tool_registry import ToolRegistry
+    from framework.host.agent_host import AgentHost
+    from framework.host.event_bus import EventBus
+    from framework.host.execution_manager import EntryPointSpec
 
     async def run_with_tui() -> None:
         agent = CompetitiveIntelAgent()
@@ -150,7 +150,7 @@ def tui(verbose: bool, debug: bool) -> None:
         tool_executor = agent._tool_registry.get_executor()
         graph = agent._build_graph()
 
-        runtime = create_agent_runtime(
+        runtime = AgentHost(
             graph=graph,
             goal=agent.goal,
             storage_path=storage_path,
diff --git a/examples/templates/competitive_intel_agent/agent.py b/examples/templates/competitive_intel_agent/agent.py
index 7ae2ea3f..879cab88 100644
--- a/examples/templates/competitive_intel_agent/agent.py
+++ b/examples/templates/competitive_intel_agent/agent.py
@@ -1,7 +1,7 @@
 """Agent graph construction for Competitive Intelligence Agent."""
 
 from typing import Any, TYPE_CHECKING
-from framework.graph import (
+from framework.orchestrator import (
     EdgeSpec,
     EdgeCondition,
     Goal,
@@ -9,12 +9,12 @@ from framework.graph import (
     Constraint,
     NodeSpec,
 )
-from framework.graph.edge import GraphSpec
-from framework.graph.executor import ExecutionResult, GraphExecutor
-from framework.runtime.event_bus import EventBus
-from framework.runtime.core import Runtime
+from framework.orchestrator.edge import GraphSpec
+from framework.orchestrator.orchestrator import ExecutionResult, Orchestrator
+from framework.host.event_bus import EventBus
+from framework.tracker.decision_tracker import DecisionTracker as Runtime
 from framework.llm import LiteLLMProvider
-from framework.runner.tool_registry import ToolRegistry
+from framework.loader.tool_registry import ToolRegistry
 
 from .config import default_config, metadata, RuntimeConfig
 from .nodes import (
@@ -188,7 +188,7 @@ class CompetitiveIntelAgent:
         self.entry_points = entry_points
         self.pause_nodes = pause_nodes
         self.terminal_nodes = terminal_nodes
-        self._executor: GraphExecutor | None = None
+        self._executor: Orchestrator | None = None
         self._graph: GraphSpec | None = None
         self._event_bus: EventBus | None = None
         self._tool_registry: ToolRegistry | None = None
@@ -219,12 +219,12 @@ class CompetitiveIntelAgent:
             },
         )
 
-    def _setup(self) -> GraphExecutor:
+    def _setup(self) -> Orchestrator:
         """
         Set up the executor with all components (runtime, LLM, tools).
 
         Returns:
-            An initialized GraphExecutor instance.
+            An initialized Orchestrator instance.
         """
         from pathlib import Path
 
@@ -250,7 +250,7 @@ class CompetitiveIntelAgent:
         self._graph = self._build_graph()
         runtime = Runtime(storage_path)
 
-        self._executor = GraphExecutor(
+        self._executor = Orchestrator(
             runtime=runtime,
             llm=llm,
             tools=tools,
diff --git a/examples/templates/competitive_intel_agent/nodes/__init__.py b/examples/templates/competitive_intel_agent/nodes/__init__.py
index 5d1b716d..449e6a64 100644
--- a/examples/templates/competitive_intel_agent/nodes/__init__.py
+++ b/examples/templates/competitive_intel_agent/nodes/__init__.py
@@ -1,6 +1,6 @@
 """Node definitions for Competitive Intelligence Agent."""
 
-from framework.graph import NodeSpec
+from framework.orchestrator import NodeSpec
 
 # Node 1: Intake (client-facing)
 intake_node: NodeSpec = NodeSpec(
diff --git a/examples/templates/deep_research_agent/__main__.py b/examples/templates/deep_research_agent/__main__.py
index 48c4f81a..adcfb4d4 100644
--- a/examples/templates/deep_research_agent/__main__.py
+++ b/examples/templates/deep_research_agent/__main__.py
@@ -74,10 +74,10 @@ def tui(verbose, debug):
     from pathlib import Path
 
     from framework.llm import LiteLLMProvider
-    from framework.runner.tool_registry import ToolRegistry
-    from framework.runtime.agent_runtime import create_agent_runtime
-    from framework.runtime.event_bus import EventBus
-    from framework.runtime.execution_stream import EntryPointSpec
+    from framework.loader.tool_registry import ToolRegistry
+    from framework.host.agent_host import AgentHost
+    from framework.host.event_bus import EventBus
+    from framework.host.execution_manager import EntryPointSpec
 
     async def run_with_tui():
         agent = DeepResearchAgent()
@@ -103,7 +103,7 @@ def tui(verbose, debug):
         tool_executor = agent._tool_registry.get_executor()
         graph = agent._build_graph()
 
-        runtime = create_agent_runtime(
+        runtime = AgentHost(
             graph=graph,
             goal=agent.goal,
             storage_path=storage_path,
diff --git a/examples/templates/deep_research_agent/agent.py b/examples/templates/deep_research_agent/agent.py
index 0ef6df69..d95b8a5e 100644
--- a/examples/templates/deep_research_agent/agent.py
+++ b/examples/templates/deep_research_agent/agent.py
@@ -2,14 +2,14 @@
 
 from pathlib import Path
 
-from framework.graph import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
-from framework.graph.edge import GraphSpec
-from framework.graph.executor import ExecutionResult
-from framework.graph.checkpoint_config import CheckpointConfig
+from framework.orchestrator import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
+from framework.orchestrator.edge import GraphSpec
+from framework.orchestrator.orchestrator import ExecutionResult
+from framework.orchestrator.checkpoint_config import CheckpointConfig
 from framework.llm import LiteLLMProvider
-from framework.runner.tool_registry import ToolRegistry
-from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
-from framework.runtime.execution_stream import EntryPointSpec
+from framework.loader.tool_registry import ToolRegistry
+from framework.host.agent_host import AgentHost
+from framework.host.execution_manager import EntryPointSpec
 
 from .config import default_config, metadata
 from .nodes import (
@@ -244,7 +244,7 @@ class DeepResearchAgent:
             )
         ]
 
-        self._agent_runtime = create_agent_runtime(
+        self._agent_runtime = AgentHost(
             graph=self._graph,
             goal=self.goal,
             storage_path=self._storage_path,
diff --git a/examples/templates/deep_research_agent/nodes/__init__.py b/examples/templates/deep_research_agent/nodes/__init__.py
index 9350f14d..00a7bbb5 100644
--- a/examples/templates/deep_research_agent/nodes/__init__.py
+++ b/examples/templates/deep_research_agent/nodes/__init__.py
@@ -1,6 +1,6 @@
 """Node definitions for Deep Research Agent."""
 
-from framework.graph import NodeSpec
+from framework.orchestrator import NodeSpec
 
 # Node 1: Intake (client-facing)
 # Brief conversation to clarify what the user wants researched.
diff --git a/examples/templates/email_inbox_management/__main__.py b/examples/templates/email_inbox_management/__main__.py
index d75b3e3c..58a22b70 100644
--- a/examples/templates/email_inbox_management/__main__.py
+++ b/examples/templates/email_inbox_management/__main__.py
@@ -83,10 +83,10 @@ def tui(mock, verbose, debug):
     from pathlib import Path
 
     from framework.llm import LiteLLMProvider
-    from framework.runner.tool_registry import ToolRegistry
-    from framework.runtime.agent_runtime import create_agent_runtime
-    from framework.runtime.event_bus import EventBus
-    from framework.runtime.execution_stream import EntryPointSpec
+    from framework.loader.tool_registry import ToolRegistry
+    from framework.host.agent_host import AgentHost
+    from framework.host.event_bus import EventBus
+    from framework.host.execution_manager import EntryPointSpec
 
     async def run_with_tui():
         agent = InboxManagementAgent()
@@ -118,7 +118,7 @@ def tui(mock, verbose, debug):
         tool_executor = agent._tool_registry.get_executor()
         graph = agent._build_graph()
 
-        runtime = create_agent_runtime(
+        runtime = AgentHost(
             graph=graph,
             goal=agent.goal,
             storage_path=storage_path,
diff --git a/examples/templates/email_inbox_management/agent.py b/examples/templates/email_inbox_management/agent.py
index 97df181d..ab805eb6 100644
--- a/examples/templates/email_inbox_management/agent.py
+++ b/examples/templates/email_inbox_management/agent.py
@@ -2,15 +2,15 @@
 
 from pathlib import Path
 
-from framework.graph import EdgeCondition, EdgeSpec, Goal, SuccessCriterion, Constraint
-from framework.graph.checkpoint_config import CheckpointConfig
-from framework.graph.edge import GraphSpec
-from framework.graph.executor import ExecutionResult, GraphExecutor
+from framework.orchestrator import EdgeCondition, EdgeSpec, Goal, SuccessCriterion, Constraint
+from framework.orchestrator.checkpoint_config import CheckpointConfig
+from framework.orchestrator.edge import GraphSpec
+from framework.orchestrator.orchestrator import ExecutionResult, Orchestrator
 from framework.llm import LiteLLMProvider
-from framework.runner.tool_registry import ToolRegistry
-from framework.runtime.agent_runtime import create_agent_runtime
-from framework.runtime.event_bus import EventBus
-from framework.runtime.execution_stream import EntryPointSpec
+from framework.loader.tool_registry import ToolRegistry
+from framework.host.agent_host import AgentHost
+from framework.host.event_bus import EventBus
+from framework.host.execution_manager import EntryPointSpec
 
 from .config import default_config, metadata
 from .nodes import (
@@ -190,7 +190,7 @@ class EmailInboxManagementAgent:
         self.entry_points = entry_points
         self.pause_nodes = pause_nodes
         self.terminal_nodes = terminal_nodes
-        self._executor: GraphExecutor | None = None
+        self._executor: Orchestrator | None = None
         self._graph: GraphSpec | None = None
         self._event_bus: EventBus | None = None
         self._tool_registry: ToolRegistry | None = None
@@ -264,7 +264,7 @@ class EmailInboxManagementAgent:
             ),
         ]
 
-        self._agent_runtime = create_agent_runtime(
+        self._agent_runtime = AgentHost(
             graph=self._graph,
             goal=self.goal,
             storage_path=self._storage_path,
diff --git a/examples/templates/email_inbox_management/nodes/__init__.py b/examples/templates/email_inbox_management/nodes/__init__.py
index 89a56a09..407956c0 100644
--- a/examples/templates/email_inbox_management/nodes/__init__.py
+++ b/examples/templates/email_inbox_management/nodes/__init__.py
@@ -1,6 +1,6 @@
 """Node definitions for Inbox Management Agent."""
 
-from framework.graph import NodeSpec
+from framework.orchestrator import NodeSpec
 
 # Node 1: Intake (client-facing)
 # Receives user rules and max_emails, confirms understanding with user.
diff --git a/examples/templates/email_inbox_management/tools.py b/examples/templates/email_inbox_management/tools.py
index 27370650..5959bc7b 100644
--- a/examples/templates/email_inbox_management/tools.py
+++ b/examples/templates/email_inbox_management/tools.py
@@ -15,7 +15,7 @@ from pathlib import Path
 import httpx
 
 from framework.llm.provider import Tool, ToolResult, ToolUse
-from framework.runner.tool_registry import _execution_context
+from framework.loader.tool_registry import _execution_context
 
 logger = logging.getLogger(__name__)
 
@@ -102,7 +102,7 @@ def _get_data_dir() -> str:
     ctx = _execution_context.get()
     if not ctx or "data_dir" not in ctx:
         raise RuntimeError(
-            "data_dir not set in execution context. Is the tool running inside a GraphExecutor?"
+            "data_dir not set in execution context. Is the tool running inside a Orchestrator?"
         )
     return ctx["data_dir"]
 
diff --git a/examples/templates/email_reply_agent/__main__.py b/examples/templates/email_reply_agent/__main__.py
index 9858c770..4fd4086d 100644
--- a/examples/templates/email_reply_agent/__main__.py
+++ b/examples/templates/email_reply_agent/__main__.py
@@ -51,9 +51,9 @@ def tui():
 
     from framework.tui.app import AdenTUI
     from framework.llm import LiteLLMProvider
-    from framework.runner.tool_registry import ToolRegistry
-    from framework.runtime.agent_runtime import create_agent_runtime
-    from framework.runtime.execution_stream import EntryPointSpec
+    from framework.loader.tool_registry import ToolRegistry
+    from framework.host.agent_host import AgentHost
+    from framework.host.execution_manager import EntryPointSpec
 
     async def run_tui():
         agent = EmailReplyAgent()
@@ -68,7 +68,7 @@ def tui():
             api_key=agent.config.api_key,
             api_base=agent.config.api_base,
         )
-        runtime = create_agent_runtime(
+        runtime = AgentHost(
             graph=agent._build_graph(),
             goal=agent.goal,
             storage_path=storage,
diff --git a/examples/templates/email_reply_agent/agent.py b/examples/templates/email_reply_agent/agent.py
index 03448409..434683d6 100644
--- a/examples/templates/email_reply_agent/agent.py
+++ b/examples/templates/email_reply_agent/agent.py
@@ -2,14 +2,14 @@
 
 from pathlib import Path
 
-from framework.graph import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
-from framework.graph.edge import GraphSpec
-from framework.graph.executor import ExecutionResult
-from framework.graph.checkpoint_config import CheckpointConfig
+from framework.orchestrator import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
+from framework.orchestrator.edge import GraphSpec
+from framework.orchestrator.orchestrator import ExecutionResult
+from framework.orchestrator.checkpoint_config import CheckpointConfig
 from framework.llm import LiteLLMProvider
-from framework.runner.tool_registry import ToolRegistry
-from framework.runtime.agent_runtime import create_agent_runtime
-from framework.runtime.execution_stream import EntryPointSpec
+from framework.loader.tool_registry import ToolRegistry
+from framework.host.agent_host import AgentHost
+from framework.host.execution_manager import EntryPointSpec
 
 from .config import default_config, metadata
 from .nodes import intake_node, search_node, confirm_draft_node
@@ -101,7 +101,7 @@ entry_points = {"start": "intake"}
 pause_nodes = []
 terminal_nodes = []
 
-# Module-level vars read by AgentRunner.load()
+# Module-level vars read by AgentLoader.load()
 conversation_mode = "continuous"
 identity_prompt = "You are a helpful email reply assistant that filters unreplied emails and sends personalized responses."
 loop_config = {
@@ -159,7 +159,7 @@ class EmailReplyAgent:
         tools = list(self._tool_registry.get_tools().values())
         tool_executor = self._tool_registry.get_executor()
         self._graph = self._build_graph()
-        self._agent_runtime = create_agent_runtime(
+        self._agent_runtime = AgentHost(
             graph=self._graph,
             goal=self.goal,
             storage_path=self._storage_path,
diff --git a/examples/templates/email_reply_agent/nodes/__init__.py b/examples/templates/email_reply_agent/nodes/__init__.py
index aaf69a95..71f827e9 100644
--- a/examples/templates/email_reply_agent/nodes/__init__.py
+++ b/examples/templates/email_reply_agent/nodes/__init__.py
@@ -1,6 +1,6 @@
 """Node definitions for Email Reply Agent."""
 
-from framework.graph import NodeSpec
+from framework.orchestrator import NodeSpec
 
 # Node 1: Intake (client-facing)
 intake_node = NodeSpec(
diff --git a/examples/templates/email_reply_agent/tests/conftest.py b/examples/templates/email_reply_agent/tests/conftest.py
index 2e9d1813..96f98ee1 100644
--- a/examples/templates/email_reply_agent/tests/conftest.py
+++ b/examples/templates/email_reply_agent/tests/conftest.py
@@ -25,6 +25,6 @@ def agent_module():
 @pytest.fixture(scope="session")
 def runner_loaded():
     """Load the agent through AgentRunner (structural only, no LLM needed)."""
-    from framework.runner.runner import AgentRunner
+    from framework.loader.agent_loader import AgentLoader
 
-    return AgentRunner.load(AGENT_PATH)
+    return AgentLoader.load(AGENT_PATH)
diff --git a/examples/templates/email_reply_agent/tests/test_email_reply_agent.py b/examples/templates/email_reply_agent/tests/test_email_reply_agent.py
index ec5f05c9..717c309e 100644
--- a/examples/templates/email_reply_agent/tests/test_email_reply_agent.py
+++ b/examples/templates/email_reply_agent/tests/test_email_reply_agent.py
@@ -77,7 +77,7 @@ class TestRunnerLoad:
     """Test AgentRunner can load the agent."""
 
     def test_runner_load_succeeds(self, runner_loaded):
-        """AgentRunner.load() succeeds."""
+        """AgentLoader.load() succeeds."""
         assert runner_loaded is not None
 
     def test_runner_has_goal(self, runner_loaded):
diff --git a/examples/templates/job_hunter/__main__.py b/examples/templates/job_hunter/__main__.py
index 752ae545..bdf5726d 100644
--- a/examples/templates/job_hunter/__main__.py
+++ b/examples/templates/job_hunter/__main__.py
@@ -75,10 +75,10 @@ def tui(mock, verbose, debug):
     from pathlib import Path
 
     from framework.llm import LiteLLMProvider
-    from framework.runner.tool_registry import ToolRegistry
-    from framework.runtime.agent_runtime import create_agent_runtime
-    from framework.runtime.event_bus import EventBus
-    from framework.runtime.execution_stream import EntryPointSpec
+    from framework.loader.tool_registry import ToolRegistry
+    from framework.host.agent_host import AgentHost
+    from framework.host.event_bus import EventBus
+    from framework.host.execution_manager import EntryPointSpec
 
     async def run_with_tui():
         agent = JobHunterAgent()
@@ -106,7 +106,7 @@ def tui(mock, verbose, debug):
         tool_executor = agent._tool_registry.get_executor()
         graph = agent._build_graph()
 
-        runtime = create_agent_runtime(
+        runtime = AgentHost(
             graph=graph,
             goal=agent.goal,
             storage_path=storage_path,
diff --git a/examples/templates/job_hunter/agent.py b/examples/templates/job_hunter/agent.py
index 29d37efc..6e7d9036 100644
--- a/examples/templates/job_hunter/agent.py
+++ b/examples/templates/job_hunter/agent.py
@@ -2,14 +2,14 @@
 
 from pathlib import Path
 
-from framework.graph import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
-from framework.graph.edge import GraphSpec
-from framework.graph.executor import ExecutionResult
-from framework.graph.checkpoint_config import CheckpointConfig
+from framework.orchestrator import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
+from framework.orchestrator.edge import GraphSpec
+from framework.orchestrator.orchestrator import ExecutionResult
+from framework.orchestrator.checkpoint_config import CheckpointConfig
 from framework.llm import LiteLLMProvider
-from framework.runner.tool_registry import ToolRegistry
-from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
-from framework.runtime.execution_stream import EntryPointSpec
+from framework.loader.tool_registry import ToolRegistry
+from framework.host.agent_host import AgentHost
+from framework.host.execution_manager import EntryPointSpec
 
 from .config import default_config
 from .nodes import (
@@ -224,7 +224,7 @@ class JobHunterAgent:
             )
         ]
 
-        self._agent_runtime = create_agent_runtime(
+        self._agent_runtime = AgentHost(
             graph=self._graph,
             goal=self.goal,
             storage_path=self._storage_path,
diff --git a/examples/templates/job_hunter/nodes/__init__.py b/examples/templates/job_hunter/nodes/__init__.py
index 9d6dc619..3b9841de 100644
--- a/examples/templates/job_hunter/nodes/__init__.py
+++ b/examples/templates/job_hunter/nodes/__init__.py
@@ -1,6 +1,6 @@
 """Node definitions for Job Hunter Agent."""
 
-from framework.graph import NodeSpec
+from framework.orchestrator import NodeSpec
 
 # Node 1: Intake (simple)
 # Collect resume and identify strongest role types.
diff --git a/examples/templates/local_business_extractor/agent.py b/examples/templates/local_business_extractor/agent.py
index 82c1736d..822b040e 100644
--- a/examples/templates/local_business_extractor/agent.py
+++ b/examples/templates/local_business_extractor/agent.py
@@ -1,14 +1,14 @@
 """Agent graph construction for Local Business Extractor."""
 
 from pathlib import Path
-from framework.graph import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
-from framework.graph.edge import GraphSpec
-from framework.graph.executor import ExecutionResult
-from framework.graph.checkpoint_config import CheckpointConfig
+from framework.orchestrator import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
+from framework.orchestrator.edge import GraphSpec
+from framework.orchestrator.orchestrator import ExecutionResult
+from framework.orchestrator.checkpoint_config import CheckpointConfig
 from framework.llm import LiteLLMProvider
-from framework.runner.tool_registry import ToolRegistry
-from framework.runtime.agent_runtime import create_agent_runtime
-from framework.runtime.execution_stream import EntryPointSpec
+from framework.loader.tool_registry import ToolRegistry
+from framework.host.agent_host import AgentHost
+from framework.host.execution_manager import EntryPointSpec
 
 from .config import default_config, metadata
 from .nodes import map_search_gcu, extract_contacts_node, sheets_sync_node
@@ -125,7 +125,7 @@ class LocalBusinessExtractor:
         tools = list(self._tool_registry.get_tools().values())
         tool_executor = self._tool_registry.get_executor()
         self._graph = self._build_graph()
-        self._agent_runtime = create_agent_runtime(
+        self._agent_runtime = AgentHost(
             graph=self._graph,
             goal=self.goal,
             storage_path=self._storage_path,
diff --git a/examples/templates/local_business_extractor/nodes/__init__.py b/examples/templates/local_business_extractor/nodes/__init__.py
index 26c4e16c..f0b5727f 100644
--- a/examples/templates/local_business_extractor/nodes/__init__.py
+++ b/examples/templates/local_business_extractor/nodes/__init__.py
@@ -1,6 +1,6 @@
 """Node definitions for Local Business Extractor."""
 
-from framework.graph import NodeSpec
+from framework.orchestrator import NodeSpec
 
 # GCU Subagent for Google Maps
 map_search_gcu = NodeSpec(
diff --git a/examples/templates/meeting_scheduler/__main__.py b/examples/templates/meeting_scheduler/__main__.py
index b6ff5493..6315d102 100644
--- a/examples/templates/meeting_scheduler/__main__.py
+++ b/examples/templates/meeting_scheduler/__main__.py
@@ -54,9 +54,9 @@ def tui():
     from pathlib import Path
     from framework.tui.app import AdenTUI
     from framework.llm import LiteLLMProvider
-    from framework.runner.tool_registry import ToolRegistry
-    from framework.runtime.agent_runtime import create_agent_runtime
-    from framework.runtime.execution_stream import EntryPointSpec
+    from framework.loader.tool_registry import ToolRegistry
+    from framework.host.agent_host import AgentHost
+    from framework.host.execution_manager import EntryPointSpec
 
     async def run_tui():
         agent = MeetingScheduler()
@@ -71,7 +71,7 @@ def tui():
             api_key=agent.config.api_key,
             api_base=agent.config.api_base,
         )
-        runtime = create_agent_runtime(
+        runtime = AgentHost(
             graph=agent._build_graph(),
             goal=agent.goal,
             storage_path=storage,
diff --git a/examples/templates/meeting_scheduler/agent.py b/examples/templates/meeting_scheduler/agent.py
index f3de5e0c..ec077a33 100644
--- a/examples/templates/meeting_scheduler/agent.py
+++ b/examples/templates/meeting_scheduler/agent.py
@@ -2,14 +2,14 @@
 
 from pathlib import Path
 
-from framework.graph import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
-from framework.graph.edge import GraphSpec
-from framework.graph.executor import ExecutionResult
-from framework.graph.checkpoint_config import CheckpointConfig
+from framework.orchestrator import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
+from framework.orchestrator.edge import GraphSpec
+from framework.orchestrator.orchestrator import ExecutionResult
+from framework.orchestrator.checkpoint_config import CheckpointConfig
 from framework.llm import LiteLLMProvider
-from framework.runner.tool_registry import ToolRegistry
-from framework.runtime.agent_runtime import create_agent_runtime
-from framework.runtime.execution_stream import EntryPointSpec
+from framework.loader.tool_registry import ToolRegistry
+from framework.host.agent_host import AgentHost
+from framework.host.execution_manager import EntryPointSpec
 
 from .config import default_config, metadata
 from .nodes import intake_node, schedule_node, confirm_node
@@ -107,7 +107,7 @@ entry_points = {"start": "intake"}
 pause_nodes = []
 terminal_nodes = []  # Forever-alive
 
-# Module-level vars read by AgentRunner.load()
+# Module-level vars read by AgentLoader.load()
 conversation_mode = "continuous"
 identity_prompt = "You are a helpful meeting scheduler assistant that manages calendar availability and sends confirmations."
 loop_config = {
@@ -165,7 +165,7 @@ class MeetingScheduler:
         tools = list(self._tool_registry.get_tools().values())
         tool_executor = self._tool_registry.get_executor()
         self._graph = self._build_graph()
-        self._agent_runtime = create_agent_runtime(
+        self._agent_runtime = AgentHost(
             graph=self._graph,
             goal=self.goal,
             storage_path=self._storage_path,
diff --git a/examples/templates/meeting_scheduler/nodes/__init__.py b/examples/templates/meeting_scheduler/nodes/__init__.py
index 5ccf3dae..81394f69 100644
--- a/examples/templates/meeting_scheduler/nodes/__init__.py
+++ b/examples/templates/meeting_scheduler/nodes/__init__.py
@@ -1,6 +1,6 @@
 """Node definitions for Meeting Scheduler."""
 
-from framework.graph import NodeSpec
+from framework.orchestrator import NodeSpec
 
 # Node 1: Intake (client-facing)
 intake_node = NodeSpec(
diff --git a/examples/templates/meeting_scheduler/tests/conftest.py b/examples/templates/meeting_scheduler/tests/conftest.py
index d1e60437..289333c1 100644
--- a/examples/templates/meeting_scheduler/tests/conftest.py
+++ b/examples/templates/meeting_scheduler/tests/conftest.py
@@ -25,10 +25,10 @@ def agent_module():
 @pytest.fixture(scope="session")
 def runner_loaded():
     """Load the agent through AgentRunner (structural only, no LLM needed)."""
-    from framework.runner.runner import AgentRunner
+    from framework.loader.agent_loader import AgentLoader
     from framework.credentials.models import CredentialError
 
     try:
-        return AgentRunner.load(AGENT_PATH)
+        return AgentLoader.load(AGENT_PATH)
     except CredentialError:
         pytest.skip("Google OAuth credentials not configured")
diff --git a/examples/templates/sdr_agent/agent.py b/examples/templates/sdr_agent/agent.py
index 105cf3dc..b279ae3d 100644
--- a/examples/templates/sdr_agent/agent.py
+++ b/examples/templates/sdr_agent/agent.py
@@ -2,14 +2,14 @@
 
 from pathlib import Path
 
-from framework.graph import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
-from framework.graph.checkpoint_config import CheckpointConfig
-from framework.graph.edge import AsyncEntryPointSpec, GraphSpec
-from framework.graph.executor import ExecutionResult
+from framework.orchestrator import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
+from framework.orchestrator.checkpoint_config import CheckpointConfig
+from framework.orchestrator.edge import AsyncEntryPointSpec, GraphSpec
+from framework.orchestrator.orchestrator import ExecutionResult
 from framework.llm import LiteLLMProvider
-from framework.runner.tool_registry import ToolRegistry
-from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
-from framework.runtime.execution_stream import EntryPointSpec
+from framework.loader.tool_registry import ToolRegistry
+from framework.host.agent_host import AgentHost
+from framework.host.execution_manager import EntryPointSpec
 
 from .config import default_config, metadata
 from .nodes import (
@@ -265,7 +265,7 @@ class SDRAgent:
             ),
         ]
 
-        self._agent_runtime = create_agent_runtime(
+        self._agent_runtime = AgentHost(
             graph=self._graph,
             goal=self.goal,
             storage_path=self._storage_path,
diff --git a/examples/templates/sdr_agent/nodes/__init__.py b/examples/templates/sdr_agent/nodes/__init__.py
index cbd274f8..6de8b3e5 100644
--- a/examples/templates/sdr_agent/nodes/__init__.py
+++ b/examples/templates/sdr_agent/nodes/__init__.py
@@ -1,6 +1,6 @@
 """Node definitions for SDR Agent."""
 
-from framework.graph import NodeSpec
+from framework.orchestrator import NodeSpec
 
 # Node 1: Intake (client-facing)
 # Receives contact list and outreach goal, confirms with user before proceeding.
diff --git a/examples/templates/sdr_agent/tools.py b/examples/templates/sdr_agent/tools.py
index 26c24aab..4a3eb3f9 100644
--- a/examples/templates/sdr_agent/tools.py
+++ b/examples/templates/sdr_agent/tools.py
@@ -15,7 +15,7 @@ from __future__ import annotations
 import json
 
 from framework.llm.provider import Tool, ToolResult, ToolUse
-from framework.runner.tool_registry import _execution_context
+from framework.loader.tool_registry import _execution_context
 
 # ---------------------------------------------------------------------------
 # Tool definitions (auto-discovered by ToolRegistry.discover_from_module)
@@ -56,7 +56,7 @@ def _get_data_dir() -> str:
     ctx = _execution_context.get()
     if not ctx or "data_dir" not in ctx:
         raise RuntimeError(
-            "data_dir not set in execution context. Is the tool running inside a GraphExecutor?"
+            "data_dir not set in execution context. Is the tool running inside a Orchestrator?"
         )
     return ctx["data_dir"]
 
diff --git a/examples/templates/tech_news_reporter/__main__.py b/examples/templates/tech_news_reporter/__main__.py
index 711c0f23..f37e0b09 100644
--- a/examples/templates/tech_news_reporter/__main__.py
+++ b/examples/templates/tech_news_reporter/__main__.py
@@ -73,10 +73,10 @@ def tui(verbose, debug):
     from pathlib import Path
 
     from framework.llm import LiteLLMProvider
-    from framework.runner.tool_registry import ToolRegistry
-    from framework.runtime.agent_runtime import create_agent_runtime
-    from framework.runtime.event_bus import EventBus
-    from framework.runtime.execution_stream import EntryPointSpec
+    from framework.loader.tool_registry import ToolRegistry
+    from framework.host.agent_host import AgentHost
+    from framework.host.event_bus import EventBus
+    from framework.host.execution_manager import EntryPointSpec
 
     async def run_with_tui():
         agent = TechNewsReporterAgent()
@@ -101,7 +101,7 @@ def tui(verbose, debug):
         tool_executor = agent._tool_registry.get_executor()
         graph = agent._build_graph()
 
-        runtime = create_agent_runtime(
+        runtime = AgentHost(
             graph=graph,
             goal=agent.goal,
             storage_path=storage_path,
diff --git a/examples/templates/tech_news_reporter/agent.py b/examples/templates/tech_news_reporter/agent.py
index ef65fbb3..1346184f 100644
--- a/examples/templates/tech_news_reporter/agent.py
+++ b/examples/templates/tech_news_reporter/agent.py
@@ -1,12 +1,12 @@
 """Agent graph construction for Tech & AI News Reporter."""
 
-from framework.graph import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
-from framework.graph.edge import GraphSpec
-from framework.graph.executor import ExecutionResult, GraphExecutor
-from framework.runtime.event_bus import EventBus
-from framework.runtime.core import Runtime
+from framework.orchestrator import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
+from framework.orchestrator.edge import GraphSpec
+from framework.orchestrator.orchestrator import ExecutionResult, Orchestrator
+from framework.host.event_bus import EventBus
+from framework.tracker.decision_tracker import DecisionTracker as Runtime
 from framework.llm import LiteLLMProvider
-from framework.runner.tool_registry import ToolRegistry
+from framework.loader.tool_registry import ToolRegistry
 
 from .config import default_config, metadata
 from .nodes import (
@@ -131,7 +131,7 @@ class TechNewsReporterAgent:
         self.entry_points = entry_points
         self.pause_nodes = pause_nodes
         self.terminal_nodes = terminal_nodes
-        self._executor: GraphExecutor | None = None
+        self._executor: Orchestrator | None = None
         self._graph: GraphSpec | None = None
         self._event_bus: EventBus | None = None
         self._tool_registry: ToolRegistry | None = None
@@ -157,7 +157,7 @@ class TechNewsReporterAgent:
             },
         )
 
-    def _setup(self) -> GraphExecutor:
+    def _setup(self) -> Orchestrator:
         """Set up the executor with all components."""
         from pathlib import Path
 
@@ -183,7 +183,7 @@ class TechNewsReporterAgent:
         self._graph = self._build_graph()
         runtime = Runtime(storage_path)
 
-        self._executor = GraphExecutor(
+        self._executor = Orchestrator(
             runtime=runtime,
             llm=llm,
             tools=tools,
diff --git a/examples/templates/tech_news_reporter/nodes/__init__.py b/examples/templates/tech_news_reporter/nodes/__init__.py
index 2d0b9b27..a7e2fbe7 100644
--- a/examples/templates/tech_news_reporter/nodes/__init__.py
+++ b/examples/templates/tech_news_reporter/nodes/__init__.py
@@ -1,6 +1,6 @@
 """Node definitions for Tech & AI News Reporter."""
 
-from framework.graph import NodeSpec
+from framework.orchestrator import NodeSpec
 
 # Node 1: Intake (client-facing)
 # Brief conversation to understand what topics the user cares about.
diff --git a/examples/templates/twitter_news_agent/agent.py b/examples/templates/twitter_news_agent/agent.py
index 0dffb4a2..50627cf3 100644
--- a/examples/templates/twitter_news_agent/agent.py
+++ b/examples/templates/twitter_news_agent/agent.py
@@ -2,14 +2,14 @@
 
 from pathlib import Path
 
-from framework.graph import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
-from framework.graph.edge import GraphSpec
-from framework.graph.executor import ExecutionResult
-from framework.graph.checkpoint_config import CheckpointConfig
+from framework.orchestrator import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
+from framework.orchestrator.edge import GraphSpec
+from framework.orchestrator.orchestrator import ExecutionResult
+from framework.orchestrator.checkpoint_config import CheckpointConfig
 from framework.llm import LiteLLMProvider
-from framework.runner.tool_registry import ToolRegistry
-from framework.runtime.agent_runtime import create_agent_runtime
-from framework.runtime.execution_stream import EntryPointSpec
+from framework.loader.tool_registry import ToolRegistry
+from framework.host.agent_host import AgentHost
+from framework.host.execution_manager import EntryPointSpec
 
 from .config import default_config, metadata
 from .nodes import fetch_node, process_node, review_node
@@ -91,7 +91,7 @@ entry_points = {"start": "process-news"}
 pause_nodes = []
 terminal_nodes = []  # Forever-alive
 
-# Module-level vars read by AgentRunner.load()
+# Module-level vars read by AgentLoader.load()
 conversation_mode = "continuous"
 identity_prompt = "You are a professional news analyst and researcher."
 loop_config = {
@@ -149,7 +149,7 @@ class TwitterNewsAgent:
         tools = list(self._tool_registry.get_tools().values())
         tool_executor = self._tool_registry.get_executor()
         self._graph = self._build_graph()
-        self._agent_runtime = create_agent_runtime(
+        self._agent_runtime = AgentHost(
             graph=self._graph,
             goal=self.goal,
             storage_path=self._storage_path,
diff --git a/examples/templates/twitter_news_agent/nodes/__init__.py b/examples/templates/twitter_news_agent/nodes/__init__.py
index bda48139..b21a493d 100644
--- a/examples/templates/twitter_news_agent/nodes/__init__.py
+++ b/examples/templates/twitter_news_agent/nodes/__init__.py
@@ -1,6 +1,6 @@
 """Node definitions for Twitter News Digest."""
 
-from framework.graph import NodeSpec
+from framework.orchestrator import NodeSpec
 
 # Node 1: Browser subagent (GCU) to fetch tweets
 fetch_node = NodeSpec(
diff --git a/examples/templates/vulnerability_assessment/__main__.py b/examples/templates/vulnerability_assessment/__main__.py
index fa1382c1..921c5031 100644
--- a/examples/templates/vulnerability_assessment/__main__.py
+++ b/examples/templates/vulnerability_assessment/__main__.py
@@ -76,10 +76,10 @@ def tui(mock, verbose, debug):
     from pathlib import Path
 
     from framework.llm import LiteLLMProvider
-    from framework.runner.tool_registry import ToolRegistry
-    from framework.runtime.agent_runtime import create_agent_runtime
-    from framework.runtime.event_bus import EventBus
-    from framework.runtime.execution_stream import EntryPointSpec
+    from framework.loader.tool_registry import ToolRegistry
+    from framework.host.agent_host import AgentHost
+    from framework.host.event_bus import EventBus
+    from framework.host.execution_manager import EntryPointSpec
 
     async def run_with_tui():
         agent = VulnerabilityResearcherAgent()
@@ -107,7 +107,7 @@ def tui(mock, verbose, debug):
         tool_executor = agent._tool_registry.get_executor()
         graph = agent._build_graph()
 
-        runtime = create_agent_runtime(
+        runtime = AgentHost(
             graph=graph,
             goal=agent.goal,
             storage_path=storage_path,
diff --git a/examples/templates/vulnerability_assessment/agent.py b/examples/templates/vulnerability_assessment/agent.py
index 0cc79436..fbc2ffc8 100644
--- a/examples/templates/vulnerability_assessment/agent.py
+++ b/examples/templates/vulnerability_assessment/agent.py
@@ -1,12 +1,12 @@
 """Agent graph construction for Passive Website Vulnerability Assessment."""
 
-from framework.graph import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
-from framework.graph.edge import GraphSpec
-from framework.graph.executor import ExecutionResult, GraphExecutor
-from framework.runtime.event_bus import EventBus
-from framework.runtime.core import Runtime
+from framework.orchestrator import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
+from framework.orchestrator.edge import GraphSpec
+from framework.orchestrator.orchestrator import ExecutionResult, Orchestrator
+from framework.host.event_bus import EventBus
+from framework.tracker.decision_tracker import DecisionTracker as Runtime
 from framework.llm import LiteLLMProvider
-from framework.runner.tool_registry import ToolRegistry
+from framework.loader.tool_registry import ToolRegistry
 
 from .config import default_config, metadata
 from .nodes import (
@@ -186,7 +186,7 @@ class VulnerabilityResearcherAgent:
         self.entry_points = entry_points
         self.pause_nodes = pause_nodes
         self.terminal_nodes = terminal_nodes
-        self._executor: GraphExecutor | None = None
+        self._executor: Orchestrator | None = None
         self._graph: GraphSpec | None = None
         self._event_bus: EventBus | None = None
         self._tool_registry: ToolRegistry | None = None
@@ -219,7 +219,7 @@ class VulnerabilityResearcherAgent:
             ),
         )
 
-    def _setup(self, mock_mode=False) -> GraphExecutor:
+    def _setup(self, mock_mode=False) -> Orchestrator:
         """Set up the executor with all components."""
         from pathlib import Path
 
@@ -247,7 +247,7 @@ class VulnerabilityResearcherAgent:
         self._graph = self._build_graph()
         runtime = Runtime(storage_path)
 
-        self._executor = GraphExecutor(
+        self._executor = Orchestrator(
             runtime=runtime,
             llm=llm,
             tools=tools,
diff --git a/examples/templates/vulnerability_assessment/nodes/__init__.py b/examples/templates/vulnerability_assessment/nodes/__init__.py
index 17212d79..a88eb565 100644
--- a/examples/templates/vulnerability_assessment/nodes/__init__.py
+++ b/examples/templates/vulnerability_assessment/nodes/__init__.py
@@ -1,6 +1,6 @@
 """Node definitions for Passive Website Vulnerability Assessment."""
 
-from framework.graph import NodeSpec
+from framework.orchestrator import NodeSpec
 
 # Node 1: Intake (client-facing)
 # Collect the target domain and confirm scanning scope.
diff --git a/tools/browser-extension/offscreen.js b/tools/browser-extension/offscreen.js
index 00705e12..8a64c687 100644
--- a/tools/browser-extension/offscreen.js
+++ b/tools/browser-extension/offscreen.js
@@ -9,51 +9,34 @@
 const HIVE_WS_URL = "ws://127.0.0.1:9229/bridge";
 
 let ws = null;
-let reconnectAttempts = 0;
-const MAX_RECONNECT_DELAY = 10000; // Max 10 seconds between attempts
+const RETRY_INTERVAL = 2000; // Poll every 2s while disconnected
 
 function connect() {
-  // Exponential backoff with cap
-  const delay = Math.min(reconnectAttempts * 1000, MAX_RECONNECT_DELAY);
+  try {
+    ws = new WebSocket(HIVE_WS_URL);
 
-  if (reconnectAttempts > 0) {
-    console.log(`[Beeline] Reconnecting in ${delay}ms (attempt ${reconnectAttempts + 1})...`);
+    ws.onopen = () => {
+      console.log("[Beeline] WebSocket connected to Hive");
+      chrome.runtime.sendMessage({ _beeline: true, type: "ws_open" });
+    };
+
+    ws.onmessage = (event) => {
+      chrome.runtime.sendMessage({ _beeline: true, type: "ws_message", data: event.data });
+    };
+
+    ws.onclose = (event) => {
+      console.log(`[Beeline] WebSocket closed: code=${event.code}, reason=${event.reason}`);
+      chrome.runtime.sendMessage({ _beeline: true, type: "ws_close" });
+      setTimeout(connect, RETRY_INTERVAL);
+    };
+
+    ws.onerror = () => {
+      console.warn(`[Beeline] WebSocket connection failed (server may not be running)`);
+    };
+  } catch (error) {
+    console.error("[Beeline] Failed to create WebSocket:", error.message);
+    setTimeout(connect, RETRY_INTERVAL);
   }
-
-  setTimeout(() => {
-    try {
-      ws = new WebSocket(HIVE_WS_URL);
-
-      ws.onopen = () => {
-        console.log("[Beeline] WebSocket connected to Hive");
-        reconnectAttempts = 0;
-        chrome.runtime.sendMessage({ _beeline: true, type: "ws_open" });
-      };
-
-      ws.onmessage = (event) => {
-        chrome.runtime.sendMessage({ _beeline: true, type: "ws_message", data: event.data });
-      };
-
-      ws.onclose = (event) => {
-        console.log(`[Beeline] WebSocket closed: code=${event.code}, reason=${event.reason}`);
-        chrome.runtime.sendMessage({ _beeline: true, type: "ws_close" });
-        reconnectAttempts++;
-        // Reconnect after delay
-        setTimeout(connect, 2000);
-      };
-
-      ws.onerror = () => {
-        // Don't log the full error object - it's usually just an Event
-        // The actual error will be reflected in onclose
-        console.warn(`[Beeline] WebSocket connection failed (server may not be running)`);
-        // Don't close here - let onclose handle cleanup
-      };
-    } catch (error) {
-      console.error("[Beeline] Failed to create WebSocket:", error.message);
-      reconnectAttempts++;
-      setTimeout(connect, 2000);
-    }
-  }, delay);
 }
 
 // Forward outbound messages from the service worker onto the WebSocket.
diff --git a/tools/coder_tools_server.py b/tools/coder_tools_server.py
index 1aee0819..f5f480fb 100644
--- a/tools/coder_tools_server.py
+++ b/tools/coder_tools_server.py
@@ -456,8 +456,8 @@ def list_agent_tools(
     try:
         from pathlib import Path
 
-        from framework.runner.mcp_client import MCPClient, MCPServerConfig
-        from framework.runner.tool_registry import ToolRegistry
+        from framework.loader.mcp_client import MCPClient, MCPServerConfig
+        from framework.loader.tool_registry import ToolRegistry
     except ImportError:
         return json.dumps({"error": "Cannot import MCPClient"})
 
@@ -806,8 +806,8 @@ def _validate_agent_tools_impl(agent_path: str) -> dict:
     try:
         from pathlib import Path
 
-        from framework.runner.mcp_client import MCPClient, MCPServerConfig
-        from framework.runner.tool_registry import ToolRegistry
+        from framework.loader.mcp_client import MCPClient, MCPServerConfig
+        from framework.loader.tool_registry import ToolRegistry
     except ImportError:
         return {"error": "Cannot import MCPClient"}
 
@@ -845,27 +845,50 @@ def _validate_agent_tools_impl(agent_path: str) -> dict:
             discovery_errors.append({"server": server_name, "error": str(e)})
 
     # --- Load agent nodes and extract declared tools ---
+    agent_json_file = os.path.join(agent_dir, "agent.json")
     agent_py = os.path.join(agent_dir, "agent.py")
-    if not os.path.isfile(agent_py):
-        return {"error": f"No agent.py found in {agent_path}"}
 
-    import importlib
-    import importlib.util
-    import sys
+    nodes = None
+    if os.path.isfile(agent_json_file):
+        # Declarative JSON agent
+        try:
+            with open(agent_json_file, encoding="utf-8") as f:
+                data = json.load(f)
+            # Build lightweight node stubs with .tools and .id/.name
+            class _NodeStub:
+                def __init__(self, d):
+                    self.id = d.get("id", "?")
+                    self.name = d.get("name", self.id)
+                    t = d.get("tools", {})
+                    if isinstance(t, dict):
+                        self.tools = t.get("allowed", [])
+                    elif isinstance(t, list):
+                        self.tools = t
+                    else:
+                        self.tools = []
+            nodes = [_NodeStub(n) for n in data.get("nodes", [])]
+        except Exception as e:
+            return {"error": f"Failed to parse agent.json: {e}"}
+    elif os.path.isfile(agent_py):
+        # Legacy Python agent
+        import importlib
+        import importlib.util
+        import sys
 
-    package_name = os.path.basename(agent_dir)
-    parent_dir = os.path.dirname(os.path.abspath(agent_dir))
-    if parent_dir not in sys.path:
-        sys.path.insert(0, parent_dir)
+        package_name = os.path.basename(agent_dir)
+        parent_dir = os.path.dirname(os.path.abspath(agent_dir))
+        if parent_dir not in sys.path:
+            sys.path.insert(0, parent_dir)
+        try:
+            agent_module = importlib.import_module(package_name)
+        except Exception as e:
+            return {"error": f"Failed to import agent: {e}"}
+        nodes = getattr(agent_module, "nodes", None)
+    else:
+        return {"error": f"No agent.json or agent.py found in {agent_path}"}
 
-    try:
-        agent_module = importlib.import_module(package_name)
-    except Exception as e:
-        return {"error": f"Failed to import agent: {e}"}
-
-    nodes = getattr(agent_module, "nodes", None)
     if not nodes:
-        return {"error": "Agent module has no 'nodes' attribute"}
+        return {"error": "Agent has no nodes defined"}
 
     # --- Validate declared vs available ---
     missing_by_node: dict[str, list[str]] = {}
@@ -951,37 +974,46 @@ def list_agents() -> str:
             if not os.path.isdir(agent_dir):
                 continue
 
-            # Must have agent.py to be considered an agent package
-            if not os.path.isfile(os.path.join(agent_dir, "agent.py")):
+            # Must have agent.json (declarative) or agent.py (legacy)
+            has_json = os.path.isfile(os.path.join(agent_dir, "agent.json"))
+            has_py = os.path.isfile(os.path.join(agent_dir, "agent.py"))
+            if not has_json and not has_py:
                 continue
 
             info = {
                 "name": entry,
                 "path": os.path.relpath(agent_dir, PROJECT_ROOT),
                 "source": source,
-                "has_nodes": os.path.isdir(os.path.join(agent_dir, "nodes")),
-                "has_tests": os.path.isdir(os.path.join(agent_dir, "tests")),
+                "format": "json" if has_json else "python",
                 "has_mcp_config": os.path.isfile(os.path.join(agent_dir, "mcp_servers.json")),
             }
 
-            # Read description from __init__.py docstring
-            init_path = os.path.join(agent_dir, "__init__.py")
-            if os.path.isfile(init_path):
+            # Read description from agent.json or __init__.py
+            if has_json:
                 try:
-                    with open(init_path, encoding="utf-8") as f:
-                        content = f.read(2000)
-                    # Extract module docstring
-                    for quote in ['"""', "'''"]:
-                        start = content.find(quote)
-                        if start != -1:
-                            end = content.find(quote, start + 3)
-                            if end != -1:
-                                info["description"] = (
-                                    content[start + 3 : end].strip().split("\n")[0]
-                                )
-                                break
-                except OSError:
+                    with open(os.path.join(agent_dir, "agent.json"), encoding="utf-8") as f:
+                        data = json.load(f)
+                    if isinstance(data, dict) and data.get("description"):
+                        info["description"] = data["description"]
+                except Exception:
                     pass
+            else:
+                init_path = os.path.join(agent_dir, "__init__.py")
+                if os.path.isfile(init_path):
+                    try:
+                        with open(init_path, encoding="utf-8") as f:
+                            content = f.read(2000)
+                        for quote in ['"""', "'''"]:
+                            start = content.find(quote)
+                            if start != -1:
+                                end = content.find(quote, start + 3)
+                                if end != -1:
+                                    info["description"] = (
+                                        content[start + 3 : end].strip().split("\n")[0]
+                                    )
+                                    break
+                    except OSError:
+                        pass
 
             # Check runtime data
             runtime_dir = hive_agents_dir / entry
@@ -1266,8 +1298,8 @@ def _run_agent_tests_impl(
 
     if not tests_dir.exists():
         return {
-            "error": f"No tests directory: exports/{agent_name}/tests/",
-            "hint": "Create test files in the tests/ directory first.",
+            "skipped": True,
+            "summary": "No tests directory (OK for declarative agents)",
         }
 
     # Parse test types
@@ -1446,13 +1478,11 @@ def run_agent_tests(
 def validate_agent_package(agent_name: str) -> str:
     """Run structural validation checks on a built agent package in one call.
 
-    Executes 5 steps and reports all results (does not stop on first failure):
-      1. Class validation — checks graph structure and entry_points contract
-      2. Node completeness — every NodeSpec in nodes/ must be in the nodes list,
-         and GCU nodes must be referenced in a parent's sub_agents
-      3. Graph validation — loads the agent graph without credential checks
-      4. Tool validation — checks declared tools exist in MCP servers
-      5. Tests — runs the agent's pytest suite
+    Executes validation steps and reports all results:
+      1. Schema validation — loads agent.json via load_agent_config
+      2. Graph validation — loads the agent graph via AgentLoader
+      3. Tool validation — checks declared tools exist in MCP servers
+      4. Tests — runs the agent's pytest suite (skipped if no tests/)
 
     Note: Credential validation is intentionally skipped here (building phase).
     Credentials are validated at run time by run_agent_with_input() preflight.
@@ -1477,137 +1507,88 @@ def validate_agent_package(agent_name: str) -> str:
         path_parts.append(pythonpath)
     env["PYTHONPATH"] = os.pathsep.join(path_parts)
 
-    # Step 0: Module contract — __init__.py must expose goal, nodes, edges
-    try:
-        _contract_script = textwrap.dedent("""\
-            import importlib, json
-            mod = importlib.import_module('{agent_name}')
-            missing = [a for a in ('goal', 'nodes', 'edges') if getattr(mod, a, None) is None]
-            if missing:
+    # Detect agent format
+    _is_json = os.path.isfile(os.path.join(PROJECT_ROOT, agent_path, "agent.json"))
+
+    if _is_json:
+        # JSON agents: validate via load_agent_config (schema + round-trip)
+        try:
+            _json_script = textwrap.dedent("""\
+                import json, pathlib
+                from framework.loader.agent_loader import load_agent_config
+                data = json.loads(
+                    pathlib.Path('exports/{agent_name}/agent.json').read_text()
+                )
+                g, goal = load_agent_config(data)
                 print(json.dumps({{
-                    'valid': False,
-                    'error': (
-                        "Module '{agent_name}' is missing module-level attributes: "
-                        + ", ".join(missing) + ". "
-                        "Fix: in {agent_name}/__init__.py, add "
-                        "'from .agent import " + ", ".join(missing) + "' "
-                        "so that 'import {agent_name}' exposes them at package level."
-                    )
+                    'valid': True,
+                    'nodes': len(g.nodes),
+                    'edges': len(g.edges),
+                    'entry': g.entry_node,
+                    'errors': errors,
                 }}))
+            """).format(agent_name=agent_name)
+            proc = subprocess.run(
+                ["uv", "run", "python", "-c", _json_script],
+                capture_output=True,
+                text=True,
+                timeout=30,
+                env=env,
+                cwd=PROJECT_ROOT,
+                stdin=subprocess.DEVNULL,
+            )
+            if proc.returncode == 0:
+                result = json.loads(proc.stdout.strip())
+                steps["schema_validation"] = {
+                    "passed": result["valid"],
+                    "output": (
+                        f"{result['nodes']} nodes, {result['edges']} edges, "
+                        f"entry={result['entry']}"
+                    ),
+                }
+                if result.get("errors"):
+                    steps["schema_validation"]["errors"] = result["errors"]
             else:
-                print(json.dumps({{'valid': True}}))
-        """).format(agent_name=agent_name)
-        proc = subprocess.run(
-            ["uv", "run", "python", "-c", _contract_script],
-            capture_output=True,
-            text=True,
-            timeout=30,
-            env=env,
-            cwd=PROJECT_ROOT,
-            stdin=subprocess.DEVNULL,
-        )
-        if proc.returncode == 0:
-            result = json.loads(proc.stdout.strip())
-            steps["module_contract"] = {
-                "passed": result["valid"],
-                "output": result.get("error", "goal, nodes, edges exported correctly"),
-            }
-        else:
-            steps["module_contract"] = {
-                "passed": False,
-                "error": (
-                    f"Failed to import '{agent_name}': {proc.stderr.strip()[:1000]}. "
-                    f"Fix: ensure {agent_name}/__init__.py exists and can be imported "
-                    f"without errors (check syntax, missing dependencies, relative imports)."
-                ),
-            }
-    except Exception as e:
-        steps["module_contract"] = {"passed": False, "error": str(e)}
-
-    # Step A: Class validation (subprocess for import isolation)
-    try:
-        proc = subprocess.run(
-            [
-                "uv",
-                "run",
-                "python",
-                "-c",
-                f"from {agent_name} import default_agent; print(default_agent.validate())",
-            ],
-            capture_output=True,
-            text=True,
-            timeout=30,
-            env=env,
-            cwd=PROJECT_ROOT,
-            stdin=subprocess.DEVNULL,
-        )
-        passed = proc.returncode == 0
-        steps["class_validation"] = {
-            "passed": passed,
-            "output": (proc.stdout.strip() or proc.stderr.strip())[:2000],
-        }
-        if not passed:
-            steps["class_validation"]["error"] = proc.stderr.strip()[:2000]
-    except Exception as e:
-        steps["class_validation"] = {"passed": False, "error": str(e)}
-
-    # Step A2: Node completeness — every NodeSpec in nodes/ must be in the nodes list
-    try:
-        _check_template = textwrap.dedent("""\
-            import importlib, json
-            agent = importlib.import_module('{agent_name}')
-            nodes_mod = importlib.import_module('{agent_name}.nodes')
-            graph_ids = {{n.id for n in agent.nodes}}
-            defined = {{}}
-            for attr in dir(nodes_mod):
-                obj = getattr(nodes_mod, attr)
-                if hasattr(obj, 'id') and hasattr(obj, 'node_type'):
-                    defined[obj.id] = attr
-            orphaned = set(defined) - graph_ids
-            errors = [
-                f"Node '{{nid}}' ({{defined[nid]}}) defined in nodes/ but not in nodes list"
-                for nid in sorted(orphaned)
-            ]
-            sub_refs = set()
-            for n in agent.nodes:
-                for sa in getattr(n, 'sub_agents', []) or []:
-                    sub_refs.add(sa)
-            for n in agent.nodes:
-                if n.node_type == 'gcu' and n.id not in sub_refs:
-                    errors.append(
-                        f"GCU node '{{n.id}}' not referenced in any node's sub_agents list"
-                    )
-            print(json.dumps({{'valid': len(errors) == 0, 'errors': errors}}))
-        """)
-        check_script = _check_template.format(agent_name=agent_name)
-        proc = subprocess.run(
-            ["uv", "run", "python", "-c", check_script],
-            capture_output=True,
-            text=True,
-            timeout=30,
-            env=env,
-            cwd=PROJECT_ROOT,
-            stdin=subprocess.DEVNULL,
-        )
-        if proc.returncode == 0:
-            result = json.loads(proc.stdout.strip())
-            steps["node_completeness"] = {
-                "passed": result["valid"],
-                "output": (
-                    "; ".join(result["errors"])
-                    if result["errors"]
-                    else "All defined nodes are in the graph"
-                ),
-            }
-            if not result["valid"]:
-                steps["node_completeness"]["errors"] = result["errors"]
-        else:
-            steps["node_completeness"] = {
-                "passed": False,
-                "error": proc.stderr.strip()[:2000],
-            }
-    except Exception as e:
-        steps["node_completeness"] = {"passed": False, "error": str(e)}
+                steps["schema_validation"] = {
+                    "passed": False,
+                    "error": proc.stderr.strip()[:2000],
+                }
+        except Exception as e:
+            steps["schema_validation"] = {"passed": False, "error": str(e)}
+    else:
+        # Legacy Python agents: module contract + class validation
+        try:
+            _contract_script = textwrap.dedent("""\
+                import importlib, json
+                mod = importlib.import_module('{agent_name}')
+                missing = [
+                    a for a in ('goal', 'nodes', 'edges')
+                    if getattr(mod, a, None) is None
+                ]
+                print(json.dumps({{'valid': len(missing) == 0, 'missing': missing}}))
+            """).format(agent_name=agent_name)
+            proc = subprocess.run(
+                ["uv", "run", "python", "-c", _contract_script],
+                capture_output=True, text=True, timeout=30,
+                env=env, cwd=PROJECT_ROOT, stdin=subprocess.DEVNULL,
+            )
+            if proc.returncode == 0:
+                result = json.loads(proc.stdout.strip())
+                steps["module_contract"] = {
+                    "passed": result["valid"],
+                    "output": (
+                        f"Missing: {result['missing']}"
+                        if result.get("missing")
+                        else "goal, nodes, edges exported correctly"
+                    ),
+                }
+            else:
+                steps["module_contract"] = {
+                    "passed": False,
+                    "error": proc.stderr.strip()[:1000],
+                }
+        except Exception as e:
+            steps["module_contract"] = {"passed": False, "error": str(e)}
 
     # Step B: Graph validation (subprocess for import isolation)
     # Credentials are checked at run time (run_agent_with_input preflight),
@@ -1619,10 +1600,10 @@ def validate_agent_package(agent_name: str) -> str:
                 "run",
                 "python",
                 "-c",
-                f"from framework.runner.runner import AgentRunner; "
-                f'r = AgentRunner.load("exports/{agent_name}", '
+                f"from framework.loader.agent_loader import AgentLoader; "
+                f'r = AgentLoader.load("exports/{agent_name}", '
                 f"skip_credential_validation=True); "
-                f'print("AgentRunner.load (graph-only): OK")',
+                f'print("AgentLoader.load (graph-only): OK")',
             ],
             capture_output=True,
             text=True,
@@ -1659,7 +1640,9 @@ def validate_agent_package(agent_name: str) -> str:
     # Step D: Tests (direct call)
     try:
         test_result = _run_agent_tests_impl(agent_name)
-        if "error" in test_result:
+        if test_result.get("skipped"):
+            steps["tests"] = {"passed": True, "output": "No tests (skipped)"}
+        elif "error" in test_result:
             steps["tests"] = {"passed": False, "error": test_result["error"]}
         else:
             all_passed = test_result.get("failed", 0) == 0 and test_result.get("errors", 0) == 0
@@ -1697,703 +1680,6 @@ def validate_agent_package(agent_name: str) -> str:
 # ── Meta-agent: Package initialization ─────────────────────────────────────
 
 
-def _snake_to_camel(name: str) -> str:
-    """Convert snake_case to CamelCase."""
-    return "".join(word.capitalize() for word in name.split("_"))
-
-
-def _node_var_name(node_id: str) -> str:
-    """Convert node id to a Python variable name."""
-    return node_id.replace("-", "_") + "_node"
-
-
-@mcp.tool()
-def initialize_and_build_agent(
-    agent_name: str,
-    nodes: str | None = None,
-    _draft: dict | None = None,
-) -> str:
-    """Scaffold a new agent package with placeholder files.
-
-    Creates exports/{agent_name}/ with all files needed for a runnable agent:
-    config.py, nodes/__init__.py, agent.py, __init__.py, __main__.py,
-    mcp_servers.json, tests/conftest.py.
-
-    After initialization, customize the generated files:
-    - System prompts and node logic in nodes/__init__.py
-    - Goal and edges in agent.py
-    - CLI options in __main__.py
-
-    Args:
-        agent_name: Name for the agent package. Must be snake_case (e.g. 'my_agent').
-        nodes: Comma-separated node names (snake_case or kebab-case).
-               If omitted, a single 'start' node is created.
-               Example: 'intake,process,review'
-        _draft: Internal. Draft graph metadata from planning phase, used to
-                pre-populate descriptions, goals, and node metadata.
-
-    Returns:
-        JSON with files written and next steps.
-    """
-    import re
-
-    if not re.match(r"^[a-z][a-z0-9_]*$", agent_name):
-        return json.dumps(
-            {
-                "success": False,
-                "error": (
-                    f"Invalid agent_name '{agent_name}'. Must be snake_case: "
-                    "lowercase letters, numbers, underscores, starting with a letter."
-                ),
-            }
-        )
-
-    node_list = [n.strip() for n in nodes.split(",") if n.strip()] if nodes else ["start"]
-
-    # Build draft node lookup for pre-populating metadata from planning phase
-    _draft_nodes: dict[str, dict] = {}
-    if _draft and _draft.get("nodes"):
-        for dn in _draft["nodes"]:
-            _draft_nodes[dn.get("id", "")] = dn
-
-    # Extract top-level draft metadata early so it's available for all templates
-    _draft_desc = (_draft.get("description") or "") if _draft else ""
-
-    class_name = _snake_to_camel(agent_name)
-    human_name = agent_name.replace("_", " ").title()
-    entry_node = node_list[0]
-
-    exports_dir = os.path.join(PROJECT_ROOT, "exports", agent_name)
-    nodes_dir = os.path.join(exports_dir, "nodes")
-    tests_dir = os.path.join(exports_dir, "tests")
-    os.makedirs(nodes_dir, exist_ok=True)
-    os.makedirs(tests_dir, exist_ok=True)
-
-    files_written: dict[str, dict] = {}
-
-    def _write(rel_path: str, content: str) -> None:
-        full = os.path.join(exports_dir, rel_path)
-        os.makedirs(os.path.dirname(full), exist_ok=True)
-        with open(full, "w", encoding="utf-8") as f:
-            f.write(content)
-        files_written[rel_path] = {
-            "path": f"exports/{agent_name}/{rel_path}",
-            "size_bytes": os.path.getsize(full),
-        }
-
-    # -- config.py --
-    _write(
-        "config.py",
-        f'''\
-"""Runtime configuration."""
-
-import json
-from dataclasses import dataclass, field
-from pathlib import Path
-
-
-def _load_preferred_model() -> str:
-    """Load preferred model from ~/.hive/configuration.json."""
-    config_path = Path.home() / ".hive" / "configuration.json"
-    if config_path.exists():
-        try:
-            with open(config_path) as f:
-                config = json.load(f)
-            llm = config.get("llm", {{}})
-            if llm.get("provider") and llm.get("model"):
-                return f"{{llm[\'provider\']}}/{{llm[\'model\']}}"
-        except Exception:
-            pass
-    return "anthropic/claude-sonnet-4-20250514"
-
-
-@dataclass
-class RuntimeConfig:
-    model: str = field(default_factory=_load_preferred_model)
-    temperature: float = 0.7
-    max_tokens: int = 40000
-    api_key: str | None = None
-    api_base: str | None = None
-
-
-default_config = RuntimeConfig()
-
-
-@dataclass
-class AgentMetadata:
-    name: str = "{human_name}"
-    version: str = "1.0.0"
-    description: str = "{_draft_desc or "TODO: Add agent description."}"
-    intro_message: str = "TODO: Add intro message."
-
-
-metadata = AgentMetadata()
-''',
-    )
-
-    # -- nodes/__init__.py --
-    node_specs = []
-    node_var_names = []
-    for node_id in node_list:
-        var = _node_var_name(node_id)
-        node_var_names.append(var)
-        is_first = node_id == entry_node
-
-        # Use draft metadata to pre-populate if available
-        dn = _draft_nodes.get(node_id, {})
-        node_name = dn.get("name") or node_id.replace("_", " ").replace("-", " ").title()
-        node_desc = dn.get("description") or "TODO: Describe what this node does."
-        node_type = dn.get("node_type") or "event_loop"
-        node_tools = dn.get("tools") or []
-        node_input_keys = dn.get("input_keys") or []
-        node_output_keys = dn.get("output_keys") or []
-        node_sc = dn.get("success_criteria") or "TODO: Define success criteria."
-
-        node_specs.append(f'''\
-{var} = NodeSpec(
-    id="{node_id}",
-    name="{node_name}",
-    description="{node_desc}",
-    node_type="{node_type}",
-    client_facing={is_first},
-    max_node_visits=0,
-    input_keys={node_input_keys!r},
-    output_keys={node_output_keys!r},
-    nullable_output_keys=[],
-    success_criteria="{node_sc}",
-    system_prompt="""\\
-TODO: Add system prompt for this node.
-""",
-    tools={node_tools!r},
-)''')
-
-    nodes_init = f'''\
-"""Node definitions for {human_name}."""
-
-from framework.graph import NodeSpec
-
-{chr(10).join(node_specs)}
-
-__all__ = {node_var_names!r}
-'''
-    _write("nodes/__init__.py", nodes_init)
-
-    # -- agent.py --
-    node_imports = ", ".join(node_var_names)
-    nodes_list = ", ".join(node_var_names)
-
-    # Use draft edges if available, otherwise generate linear edges
-    _draft_edges = _draft.get("edges", []) if _draft else []
-    edge_defs = []
-    if _draft_edges:
-        for de in _draft_edges:
-            eid = de.get("id", f"{de.get('source', '')}-to-{de.get('target', '')}")
-            src = de.get("source", "")
-            tgt = de.get("target", "")
-            cond = de.get("condition", "on_success").upper()
-            desc = de.get("description", "")
-            desc_line = f'\n        description="{desc}",' if desc else ""
-            edge_defs.append(f"""\
-    EdgeSpec(
-        id="{eid}",
-        source="{src}",
-        target="{tgt}",
-        condition=EdgeCondition.{cond},{desc_line}
-        priority=1,
-    ),""")
-    else:
-        for i in range(len(node_list) - 1):
-            src, tgt = node_list[i], node_list[i + 1]
-            edge_defs.append(f"""\
-    EdgeSpec(
-        id="{src}-to-{tgt}",
-        source="{src}",
-        target="{tgt}",
-        condition=EdgeCondition.ON_SUCCESS,
-        priority=1,
-    ),""")
-    edges_str = "\n".join(edge_defs) if edge_defs else "    # TODO: Add edges"
-
-    # Pre-populate goal from draft metadata
-    _draft_goal = (
-        (_draft.get("goal") or "TODO: Describe the agent's goal.")
-        if _draft
-        else "TODO: Describe the agent's goal."
-    )
-    _draft_sc = (_draft.get("success_criteria") or []) if _draft else []
-    _draft_constraints = (_draft.get("constraints") or []) if _draft else []
-
-    # Build success criteria entries
-    if _draft_sc:
-        sc_entries = "\n".join(
-            f"""\
-        SuccessCriterion(
-            id="sc-{i + 1}",
-            description="{sc}",
-            metric="TODO",
-            target="TODO",
-            weight=1.0,
-        ),"""
-            for i, sc in enumerate(_draft_sc)
-        )
-    else:
-        sc_entries = """\
-        SuccessCriterion(
-            id="sc-1",
-            description="TODO: Define success criterion.",
-            metric="TODO",
-            target="TODO",
-            weight=1.0,
-        ),"""
-
-    # Build constraint entries
-    if _draft_constraints:
-        constraint_entries = "\n".join(
-            f"""\
-        Constraint(
-            id="c-{i + 1}",
-            description="{c}",
-            constraint_type="hard",
-            category="functional",
-        ),"""
-            for i, c in enumerate(_draft_constraints)
-        )
-    else:
-        constraint_entries = """\
-        Constraint(
-            id="c-1",
-            description="TODO: Define constraint.",
-            constraint_type="hard",
-            category="functional",
-        ),"""
-
-    _write(
-        "agent.py",
-        f'''\
-"""Agent graph construction for {human_name}."""
-
-from pathlib import Path
-
-from framework.graph import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
-from framework.graph.edge import GraphSpec
-from framework.graph.executor import ExecutionResult
-from framework.graph.checkpoint_config import CheckpointConfig
-from framework.llm import LiteLLMProvider
-from framework.runner.tool_registry import ToolRegistry
-from framework.runtime.agent_runtime import create_agent_runtime
-from framework.runtime.execution_stream import EntryPointSpec
-
-from .config import default_config, metadata
-from .nodes import {node_imports}
-
-# Goal definition
-goal = Goal(
-    id="{agent_name}-goal",
-    name="{human_name}",
-    description="{_draft_goal}",
-    success_criteria=[
-{sc_entries}
-    ],
-    constraints=[
-{constraint_entries}
-    ],
-)
-
-# Node list
-nodes = [{nodes_list}]
-
-# Edge definitions
-edges = [
-{edges_str}
-]
-
-# Graph configuration
-entry_node = "{entry_node}"
-entry_points = {{"start": "{entry_node}"}}
-pause_nodes = []
-terminal_nodes = []
-
-conversation_mode = "continuous"
-identity_prompt = "TODO: Add identity prompt."
-loop_config = {{
-    "max_iterations": 100,
-    "max_tool_calls_per_turn": 30,
-    "max_history_tokens": 32000,
-}}
-
-
-class {class_name}:
-    def __init__(self, config=None):
-        self.config = config or default_config
-        self.goal = goal
-        self.nodes = nodes
-        self.edges = edges
-        self.entry_node = entry_node
-        self.entry_points = entry_points
-        self.pause_nodes = pause_nodes
-        self.terminal_nodes = terminal_nodes
-        self._graph = None
-        self._agent_runtime = None
-        self._tool_registry = None
-        self._storage_path = None
-
-    def _build_graph(self):
-        return GraphSpec(
-            id="{agent_name}-graph",
-            goal_id=self.goal.id,
-            version="1.0.0",
-            entry_node=self.entry_node,
-            entry_points=self.entry_points,
-            terminal_nodes=self.terminal_nodes,
-            pause_nodes=self.pause_nodes,
-            nodes=self.nodes,
-            edges=self.edges,
-            default_model=self.config.model,
-            max_tokens=self.config.max_tokens,
-            loop_config=loop_config,
-            conversation_mode=conversation_mode,
-            identity_prompt=identity_prompt,
-        )
-
-    def _setup(self):
-        self._storage_path = Path.home() / ".hive" / "agents" / "{agent_name}"
-        self._storage_path.mkdir(parents=True, exist_ok=True)
-        self._tool_registry = ToolRegistry()
-        mcp_config = Path(__file__).parent / "mcp_servers.json"
-        if mcp_config.exists():
-            self._tool_registry.load_mcp_config(mcp_config)
-        llm = LiteLLMProvider(
-            model=self.config.model,
-            api_key=self.config.api_key,
-            api_base=self.config.api_base,
-        )
-        tools = list(self._tool_registry.get_tools().values())
-        tool_executor = self._tool_registry.get_executor()
-        self._graph = self._build_graph()
-        self._agent_runtime = create_agent_runtime(
-            graph=self._graph,
-            goal=self.goal,
-            storage_path=self._storage_path,
-            entry_points=[
-                EntryPointSpec(
-                    id="default",
-                    name="Default",
-                    entry_node=self.entry_node,
-                    trigger_type="manual",
-                    isolation_level="shared",
-                ),
-            ],
-            llm=llm,
-            tools=tools,
-            tool_executor=tool_executor,
-            checkpoint_config=CheckpointConfig(
-                enabled=True,
-                checkpoint_on_node_complete=True,
-                checkpoint_max_age_days=7,
-                async_checkpoint=True,
-            ),
-        )
-
-    async def start(self):
-        if self._agent_runtime is None:
-            self._setup()
-        if not self._agent_runtime.is_running:
-            await self._agent_runtime.start()
-
-    async def stop(self):
-        if self._agent_runtime and self._agent_runtime.is_running:
-            await self._agent_runtime.stop()
-        self._agent_runtime = None
-
-    async def trigger_and_wait(
-        self,
-        entry_point="default",
-        input_data=None,
-        timeout=None,
-        session_state=None,
-    ):
-        if self._agent_runtime is None:
-            raise RuntimeError("Agent not started. Call start() first.")
-        return await self._agent_runtime.trigger_and_wait(
-            entry_point_id=entry_point,
-            input_data=input_data or {{}},
-            session_state=session_state,
-        )
-
-    async def run(self, context, session_state=None):
-        await self.start()
-        try:
-            result = await self.trigger_and_wait(
-                "default", context, session_state=session_state
-            )
-            return result or ExecutionResult(success=False, error="Execution timeout")
-        finally:
-            await self.stop()
-
-    def info(self):
-        return {{
-            "name": metadata.name,
-            "version": metadata.version,
-            "description": metadata.description,
-            "goal": {{
-                "name": self.goal.name,
-                "description": self.goal.description,
-            }},
-            "nodes": [n.id for n in self.nodes],
-            "edges": [e.id for e in self.edges],
-            "entry_node": self.entry_node,
-            "entry_points": self.entry_points,
-            "terminal_nodes": self.terminal_nodes,
-            "client_facing_nodes": [n.id for n in self.nodes if n.client_facing],
-        }}
-
-    def validate(self):
-        errors, warnings = [], []
-        node_ids = {{n.id for n in self.nodes}}
-        for e in self.edges:
-            if e.source not in node_ids:
-                errors.append(f"Edge {{e.id}}: source '{{e.source}}' not found")
-            if e.target not in node_ids:
-                errors.append(f"Edge {{e.id}}: target '{{e.target}}' not found")
-        if self.entry_node not in node_ids:
-            errors.append(f"Entry node '{{self.entry_node}}' not found")
-        for t in self.terminal_nodes:
-            if t not in node_ids:
-                errors.append(f"Terminal node '{{t}}' not found")
-        for ep_id, nid in self.entry_points.items():
-            if nid not in node_ids:
-                errors.append(f"Entry point '{{ep_id}}' references unknown node '{{nid}}'")
-
-        return {{"valid": len(errors) == 0, "errors": errors, "warnings": warnings}}
-
-
-default_agent = {class_name}()
-''',
-    )
-
-    # -- __init__.py --
-    _write(
-        "__init__.py",
-        f'''\
-"""{human_name} — TODO: Add description."""
-
-from .agent import (
-    {class_name},
-    default_agent,
-    goal,
-    nodes,
-    edges,
-    entry_node,
-    entry_points,
-    pause_nodes,
-    terminal_nodes,
-    conversation_mode,
-    identity_prompt,
-    loop_config,
-)
-from .config import default_config, metadata
-
-__all__ = [
-    "{class_name}",
-    "default_agent",
-    "goal",
-    "nodes",
-    "edges",
-    "entry_node",
-    "entry_points",
-    "pause_nodes",
-    "terminal_nodes",
-    "conversation_mode",
-    "identity_prompt",
-    "loop_config",
-    "default_config",
-    "metadata",
-]
-''',
-    )
-
-    # -- __main__.py --
-    _write(
-        "__main__.py",
-        f'''\
-"""CLI entry point for {human_name}."""
-
-import asyncio
-import json
-import logging
-import sys
-
-import click
-
-from .agent import default_agent, {class_name}
-
-
-def setup_logging(verbose=False, debug=False):
-    if debug:
-        level, fmt = logging.DEBUG, "%(asctime)s %(name)s: %(message)s"
-    elif verbose:
-        level, fmt = logging.INFO, "%(message)s"
-    else:
-        level, fmt = logging.WARNING, "%(levelname)s: %(message)s"
-    logging.basicConfig(level=level, format=fmt, stream=sys.stderr)
-
-
-@click.group()
-@click.version_option(version="1.0.0")
-def cli():
-    """{human_name}."""
-    pass
-
-
-@cli.command()
-@click.option("--verbose", "-v", is_flag=True)
-def run(verbose):
-    """Execute the agent."""
-    setup_logging(verbose=verbose)
-    result = asyncio.run(default_agent.run({{}}))
-    click.echo(
-        json.dumps(
-            {{"success": result.success, "output": result.output}},
-            indent=2,
-            default=str,
-        )
-    )
-    sys.exit(0 if result.success else 1)
-
-
-@cli.command()
-def info():
-    """Show agent info."""
-    data = default_agent.info()
-    click.echo(
-        f"Agent: {{data[\'name\']}}\n"
-        f"Version: {{data[\'version\']}}\n"
-        f"Description: {{data[\'description\']}}"
-    )
-    click.echo(f"Nodes: {{', '.join(data[\'nodes\'])}}")
-    click.echo(f"Client-facing: {{', '.join(data[\'client_facing_nodes\'])}}")
-
-
-@cli.command()
-def validate():
-    """Validate agent structure."""
-    v = default_agent.validate()
-    if v["valid"]:
-        click.echo("Agent is valid")
-    else:
-        click.echo("Errors:")
-        for e in v["errors"]:
-            click.echo(f"  {{e}}")
-    sys.exit(0 if v["valid"] else 1)
-
-
-if __name__ == "__main__":
-    cli()
-''',
-    )
-
-    # -- mcp_servers.json --
-    mcp_config: dict = {
-        "hive-tools": {
-            "transport": "stdio",
-            "command": "uv",
-            "args": ["run", "python", "mcp_server.py", "--stdio"],
-            "cwd": "../../tools",
-            "description": "Hive tools MCP server",
-        },
-        "gcu-tools": {
-            "transport": "stdio",
-            "command": "uv",
-            "args": ["run", "python", "-m", "gcu.server", "--stdio"],
-            "cwd": "../../tools",
-            "description": "GCU browser automation tools",
-        },
-    }
-
-    _write("mcp_servers.json", json.dumps(mcp_config, indent=2))
-
-    # -- tests/conftest.py --
-    _write(
-        "tests/conftest.py",
-        '''\
-"""Test fixtures."""
-
-import sys
-from pathlib import Path
-
-import pytest
-
-_repo_root = Path(__file__).resolve().parents[3]
-for _p in ["exports", "core"]:
-    _path = str(_repo_root / _p)
-    if _path not in sys.path:
-        sys.path.insert(0, _path)
-
-AGENT_PATH = str(Path(__file__).resolve().parents[1])
-
-
-@pytest.fixture(scope="session")
-def agent_module():
-    """Import the agent package for structural validation."""
-    import importlib
-
-    return importlib.import_module(Path(AGENT_PATH).name)
-
-
-@pytest.fixture(scope="session")
-def runner_loaded():
-    """Load the agent through AgentRunner (structural only, no LLM needed)."""
-    from framework.runner.runner import AgentRunner
-
-    return AgentRunner.load(AGENT_PATH)
-''',
-    )
-
-    # Build list of all generated file paths for the caller.
-    all_file_paths = [info["path"] for info in files_written.values()]
-
-    return json.dumps(
-        {
-            "success": True,
-            "agent_name": agent_name,
-            "class_name": class_name,
-            "entry_node": entry_node,
-            "nodes": node_list,
-            "files_written": files_written,
-            "file_count": len(files_written),
-            "files": all_file_paths,
-            "next_steps": [
-                (
-                    "IMPORTANT: All generated files are structurally complete "
-                    "with correct imports, class definition, validate() method, "
-                    "and __init__.py exports. Use edit_file to customize TODO "
-                    "placeholders — do NOT use write_file to rewrite entire files, "
-                    "as this will break imports and structure."
-                ),
-                (
-                    f"Use edit_file to customize system prompts, tools, "
-                    f"input_keys, output_keys, and success_criteria in "
-                    f"exports/{agent_name}/nodes/__init__.py"
-                ),
-                (
-                    f"Use edit_file to customize goal description, "
-                    f"success_criteria values, constraint values, edge "
-                    f"definitions, and identity_prompt in "
-                    f"exports/{agent_name}/agent.py"
-                ),
-                (
-                    "Do NOT modify: imports at top of agent.py, the class "
-                    "definition, validate() method, _build_graph()/_setup()/"
-                    "lifecycle methods, or __init__.py exports — they are "
-                    "already correct."
-                ),
-                f'Run validate_agent_package("{agent_name}") to verify structure',
-            ],
-        },
-        indent=2,
-    )
-
-
 # ── Main ──────────────────────────────────────────────────────────────────
 
 
diff --git a/tools/src/gcu/browser/bridge.py b/tools/src/gcu/browser/bridge.py
index 3ef3e4d7..6d3afd7d 100644
--- a/tools/src/gcu/browser/bridge.py
+++ b/tools/src/gcu/browser/bridge.py
@@ -1026,6 +1026,9 @@ class BeelineBridge:
         await self.highlight_point(tab_id, x, y, label=f"{key} ({x},{y})")
         return {"ok": True, "action": "press_at", "x": x, "y": y, "key": key}
 
+    # Duration (ms) that injected highlights stay visible before fading out.
+    _HIGHLIGHT_DURATION_MS = 1500
+
     async def highlight_rect(
         self,
         tab_id: int,
@@ -1036,61 +1039,112 @@ class BeelineBridge:
         label: str = "",
         color: dict | None = None,
     ) -> None:
-        """Draw a CDP Overlay highlight box in the live browser window.
+        """Inject a visible highlight overlay into the page DOM.
 
-        Visible in the next screenshot. Automatically cleared on the next
-        interaction or by calling clear_highlight().
+        Creates a fixed-position div with border, background tint, and an
+        optional label tag.  The element fades out after ``_HIGHLIGHT_DURATION_MS``
+        and removes itself.  Much more visible than the CDP Overlay API.
         """
-        await self.cdp_attach(tab_id)
-        await self._try_enable_domain(tab_id, "Overlay")
-        fill = color or {"r": 59, "g": 130, "b": 246, "a": 0.35}  # blue-500 @ 35%
-        outline = {"r": fill["r"], "g": fill["g"], "b": fill["b"], "a": 1.0}
-        await self._cdp(
-            tab_id,
-            "Overlay.highlightRect",
-            {
-                "x": int(x),
-                "y": int(y),
-                "width": max(1, int(w)),
-                "height": max(1, int(h)),
-                "color": fill,
-                "outlineColor": outline,
-            },
-        )
+        fill = color or {"r": 59, "g": 130, "b": 246, "a": 0.18}
+        border_rgb = f"rgb({fill['r']},{fill['g']},{fill['b']})"
+        bg_rgba = f"rgba({fill['r']},{fill['g']},{fill['b']},{fill.get('a', 0.18)})"
+        duration = self._HIGHLIGHT_DURATION_MS
+
+        # Escape label for safe injection
+        safe_label = json.dumps(label[:60]) if label else '""'
+
+        js = f"""
+        (function() {{
+          // Remove any previous hive highlight
+          var old = document.getElementById('__hive_hl');
+          if (old) old.remove();
+
+          var box = document.createElement('div');
+          box.id = '__hive_hl';
+          box.style.cssText = 'position:fixed;z-index:2147483647;pointer-events:none;'
+            + 'left:{int(x)}px;top:{int(y)}px;width:{max(1,int(w))}px;height:{max(1,int(h))}px;'
+            + 'border:2px solid {border_rgb};background:{bg_rgba};'
+            + 'border-radius:3px;transition:opacity 0.4s ease;opacity:1;'
+            + 'box-shadow:0 0 8px {bg_rgba};';
+
+          var lbl = {safe_label};
+          if (lbl) {{
+            var tag = document.createElement('span');
+            tag.textContent = lbl;
+            tag.style.cssText = 'position:absolute;left:0;top:-20px;'
+              + 'background:{border_rgb};color:#fff;font:bold 11px/16px system-ui;'
+              + 'padding:1px 6px;border-radius:3px;white-space:nowrap;max-width:200px;'
+              + 'overflow:hidden;text-overflow:ellipsis;';
+            box.appendChild(tag);
+          }}
+
+          document.documentElement.appendChild(box);
+          setTimeout(function() {{ box.style.opacity = '0'; }}, {duration});
+          setTimeout(function() {{ box.remove(); }}, {duration + 500});
+        }})();
+        """
+        try:
+            await self.cdp_attach(tab_id)
+            await self.evaluate(tab_id, js)
+        except Exception:
+            pass  # best-effort visual feedback
+
         _interaction_highlights[tab_id] = {
-            "x": x,
-            "y": y,
-            "w": w,
-            "h": h,
-            "label": label,
-            "kind": "rect",
+            "x": x, "y": y, "w": w, "h": h,
+            "label": label, "kind": "rect",
         }
 
     async def highlight_point(self, tab_id: int, x: float, y: float, label: str = "") -> None:
-        """Highlight a coordinate as a small crosshair box in the browser."""
-        r = 12  # half-size of the crosshair box in CSS px
-        await self.highlight_rect(
-            tab_id,
-            x - r,
-            y - r,
-            r * 2,
-            r * 2,
-            label=label,
-            color={"r": 239, "g": 68, "b": 68, "a": 0.45},  # red-500 @ 45%
-        )
+        """Highlight a coordinate with a pulsing dot and crosshair."""
+        duration = self._HIGHLIGHT_DURATION_MS
+        safe_label = json.dumps(label[:60]) if label else '""'
+
+        js = f"""
+        (function() {{
+          var old = document.getElementById('__hive_hl');
+          if (old) old.remove();
+
+          var dot = document.createElement('div');
+          dot.id = '__hive_hl';
+          dot.style.cssText = 'position:fixed;z-index:2147483647;pointer-events:none;'
+            + 'left:{int(x)-8}px;top:{int(y)-8}px;width:16px;height:16px;'
+            + 'border-radius:50%;background:rgba(239,68,68,0.7);'
+            + 'box-shadow:0 0 0 4px rgba(239,68,68,0.25),0 0 12px rgba(239,68,68,0.4);'
+            + 'transition:opacity 0.4s ease;opacity:1;';
+
+          var lbl = {safe_label};
+          if (lbl) {{
+            var tag = document.createElement('span');
+            tag.textContent = lbl;
+            tag.style.cssText = 'position:absolute;left:20px;top:-4px;'
+              + 'background:rgba(239,68,68,0.9);color:#fff;font:bold 11px/16px system-ui;'
+              + 'padding:1px 6px;border-radius:3px;white-space:nowrap;';
+            dot.appendChild(tag);
+          }}
+
+          document.documentElement.appendChild(dot);
+          setTimeout(function() {{ dot.style.opacity = '0'; }}, {duration});
+          setTimeout(function() {{ dot.remove(); }}, {duration + 500});
+        }})();
+        """
+        try:
+            await self.cdp_attach(tab_id)
+            await self.evaluate(tab_id, js)
+        except Exception:
+            pass
+
         _interaction_highlights[tab_id] = {
-            "x": x,
-            "y": y,
-            "w": 0,
-            "h": 0,
-            "label": label,
-            "kind": "point",
+            "x": x, "y": y, "w": 0, "h": 0,
+            "label": label, "kind": "point",
         }
 
     async def clear_highlight(self, tab_id: int) -> None:
-        """Remove the CDP Overlay highlight from the browser."""
+        """Remove the injected highlight from the page."""
         try:
-            await self._cdp(tab_id, "Overlay.hideHighlight")
+            await self.evaluate(tab_id, """
+                var el = document.getElementById('__hive_hl');
+                if (el) el.remove();
+            """)
         except Exception:
             pass
         _interaction_highlights.pop(tab_id, None)
@@ -1199,6 +1253,20 @@ class BeelineBridge:
             },
         )
 
+        # Highlight the select element
+        rect_result = await self.evaluate(
+            tab_id,
+            f"(function(){{const el=document.querySelector("
+            f"{json.dumps(selector)});if(!el)return null;"
+            f"const r=el.getBoundingClientRect();"
+            f"return{{x:r.left,y:r.top,w:r.width,h:r.height}};}})()",
+        )
+        rect = (rect_result or {}).get("result")
+        if rect:
+            await self.highlight_rect(
+                tab_id, rect["x"], rect["y"], rect["w"], rect["h"], label=selector
+            )
+
         return {"ok": True, "action": "select", "selector": selector, "selected": values}
 
     # ── Inspection ─────────────────────────────────────────────────────────────
diff --git a/tools/src/gcu/browser/tools/advanced.py b/tools/src/gcu/browser/tools/advanced.py
index 80c4a3e6..d75860ec 100644
--- a/tools/src/gcu/browser/tools/advanced.py
+++ b/tools/src/gcu/browser/tools/advanced.py
@@ -113,6 +113,28 @@ def register_advanced_tools(mcp: FastMCP) -> None:
             return {"ok": False, "error": "No active tab"}
 
         try:
+            # Show a brief toast in the browser so the user sees JS executing
+            snippet = script.strip().replace("'", "\\'")[:80]
+            toast_js = f"""
+            (function(){{
+              var old=document.getElementById('__hive_toast');if(old)old.remove();
+              var t=document.createElement('div');t.id='__hive_toast';
+              t.style.cssText='position:fixed;z-index:2147483647;top:12px;right:12px;'
+                +'background:rgba(30,30,30,0.9);color:#a5d6ff;font:12px/18px monospace;'
+                +'padding:8px 14px;border-radius:6px;max-width:420px;pointer-events:none;'
+                +'white-space:pre-wrap;word-break:break-all;transition:opacity 0.4s;opacity:1;'
+                +'border:1px solid rgba(59,130,246,0.4);box-shadow:0 4px 12px rgba(0,0,0,0.3);';
+              t.textContent='\\u25b6 '+'{snippet}';
+              document.documentElement.appendChild(t);
+              setTimeout(function(){{t.style.opacity='0';}},2000);
+              setTimeout(function(){{t.remove();}},2500);
+            }})();
+            """
+            try:
+                await bridge.evaluate(target_tab, toast_js)
+            except Exception:
+                pass
+
             result = await bridge.evaluate(target_tab, script)
             return result
         except Exception as e:
diff --git a/tools/src/gcu/browser/tools/lifecycle.py b/tools/src/gcu/browser/tools/lifecycle.py
index add68502..e39e769a 100644
--- a/tools/src/gcu/browser/tools/lifecycle.py
+++ b/tools/src/gcu/browser/tools/lifecycle.py
@@ -245,6 +245,7 @@ def register_lifecycle_tools(mcp: FastMCP) -> None:
             _contexts[profile_name] = {
                 "groupId": group_id,
                 "activeTabId": tab_id,
+                "_seedTabId": tab_id,  # reused by first browser_open call
             }
 
             logger.info(
diff --git a/tools/src/gcu/browser/tools/tabs.py b/tools/src/gcu/browser/tools/tabs.py
index 5555bd45..e487cfbc 100644
--- a/tools/src/gcu/browser/tools/tabs.py
+++ b/tools/src/gcu/browser/tools/tabs.py
@@ -128,9 +128,13 @@ def register_tab_tools(mcp: FastMCP) -> None:
             return result
 
         try:
-            # Create tab in the group
-            result = await bridge.create_tab(url=url, group_id=ctx.get("groupId"))
-            tab_id = result.get("tabId")
+            # Reuse the seed about:blank tab from context.create on first open
+            seed_tab = ctx.pop("_seedTabId", None)
+            if seed_tab is not None:
+                tab_id = seed_tab
+            else:
+                result = await bridge.create_tab(url=url, group_id=ctx.get("groupId"))
+                tab_id = result.get("tabId")
 
             # Update active tab if not background
             if not background and tab_id is not None: