Merge branch 'feature/hive-experimental-comp-pipeline' into feat/open-hive-colony

2026-04-08 11:50:39 -07:00
parent ecbf543e4c 7daca39bb2
commit 7e1ebf1c26
167 changed files with 4504 additions and 7221 deletions
@@ -70,6 +70,8 @@ tmp/
 temp/
 exports/*
 exports.old*
 artifacts/*
 .claude/settings.local.json
@@ -1,71 +1,23 @@
-"""
+"""Hive Agent Framework.
 Aden Hive Framework: A goal-driven agent runtime optimized for Builder observability.
-The runtime is designed around DECISIONS, not just actions. Every significant
+Core classes:
-choice the agent makes is captured with:
+    AgentHost      -- hosts agents, manages entry points and pipeline
- What it was trying to do (intent)
+    Orchestrator   -- routes between nodes in a graph
- What options it considered
+    AgentLoop      -- the LLM + tool execution loop (one per node)
- What it chose and why
+    AgentLoader    -- loads agent.json from disk, builds pipeline
- What happened as a result
+    DecisionTracker -- records decisions for post-hoc analysis
 - Whether that was good or bad (evaluated post-hoc)
 This gives the Builder LLM the information it needs to improve agent behavior.
 ## Testing Framework
 The framework includes a Goal-Based Testing system (Goal → Agent → Eval):
 - Generate tests from Goal success_criteria and constraints
 - Mandatory user approval before tests are stored
 - Parallel test execution with error categorization
 - Debug tools with fix suggestions
 See `framework.testing` for details.
 """
-from framework.llm import LLMProvider
+from framework.agent_loop import AgentLoop
-
+from framework.host import AgentHost
-try:
+from framework.loader import AgentLoader
-    from framework.llm import AnthropicProvider  # noqa: F401
+from framework.orchestrator import Orchestrator
-except ImportError:
+from framework.tracker import DecisionTracker
    pass
 from framework.runner import AgentRunner
 from framework.runtime.core import Runtime
 from framework.schemas.decision import Decision, DecisionEvaluation, Option, Outcome
 from framework.schemas.run import Problem, Run, RunSummary
 # Testing framework
 from framework.testing import (
    ApprovalStatus,
    DebugTool,
    ErrorCategory,
    Test,
    TestResult,
    TestStorage,
    TestSuiteResult,
 )
 __all__ = [
-    # Schemas
+    "AgentHost",
-    "Decision",
+    "AgentLoader",
-    "Option",
+    "AgentLoop",
-    "Outcome",
+    "DecisionTracker",
-    "DecisionEvaluation",
+    "Orchestrator",
    "Run",
    "RunSummary",
    "Problem",
    # Runtime
    "Runtime",
    # LLM
    "LLMProvider",
    "AnthropicProvider",
    # Runner
    "AgentRunner",
    # Testing
    "Test",
    "TestResult",
    "TestSuiteResult",
    "TestStorage",
    "ApprovalStatus",
    "ErrorCategory",
    "DebugTool",
 ]
@@ -0,0 +1,32 @@
 """Agent loop -- the core agent execution primitive."""
 from framework.agent_loop.conversation import (  # noqa: F401
    ConversationStore,
    Message,
    NodeConversation,
 )
 # Lazy import to avoid circular dependency with graph/event_loop/
 # (graph/event_loop/* imports framework.graph.conversation which is a shim
 # pointing here, which would trigger agent_loop.py loading, which imports
 # graph/event_loop/* again)
 def __getattr__(name: str):
    if name in ("AgentLoop", "JudgeProtocol", "JudgeVerdict", "LoopConfig", "OutputAccumulator"):
        from framework.agent_loop.agent_loop import (
            AgentLoop,
            JudgeProtocol,
            JudgeVerdict,
            LoopConfig,
            OutputAccumulator,
        )
        _exports = {
            "AgentLoop": AgentLoop,
            "JudgeProtocol": JudgeProtocol,
            "JudgeVerdict": JudgeVerdict,
            "LoopConfig": LoopConfig,
            "OutputAccumulator": OutputAccumulator,
        }
        return _exports[name]
    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
@@ -21,16 +21,16 @@ from collections.abc import Awaitable, Callable
 from datetime import UTC, datetime
 from typing import Any
-from framework.graph.conversation import ConversationStore, NodeConversation
+from framework.agent_loop.conversation import ConversationStore, NodeConversation
-from framework.graph.event_loop import types as event_loop_types
+from framework.agent_loop.internals import types as event_loop_types
-from framework.graph.event_loop.compaction import (
+from framework.agent_loop.internals.compaction import (
    build_emergency_summary,
    build_llm_compaction_prompt,
    compact,
    format_messages_for_summary,
    llm_compact,
 )
-from framework.graph.event_loop.cursor_persistence import (
+from framework.agent_loop.internals.cursor_persistence import (
    RestoredState,
    check_pause,
    drain_injection_queue,
@@ -38,7 +38,7 @@ from framework.graph.event_loop.cursor_persistence import (
    restore,
    write_cursor,
 )
-from framework.graph.event_loop.event_publishing import (
+from framework.agent_loop.internals.event_publishing import (
    generate_action_plan,
    log_skip_judge,
    publish_context_usage,
@@ -54,27 +54,24 @@ from framework.graph.event_loop.event_publishing import (
    publish_tool_started,
    run_hooks,
 )
-from framework.graph.event_loop.judge_pipeline import (
+from framework.agent_loop.internals.judge_pipeline import (
    SubagentJudge as SharedSubagentJudge,
    judge_turn,
 )
-from framework.graph.event_loop.stall_detector import (
+from framework.agent_loop.internals.stall_detector import (
    fingerprint_tool_calls,
    is_stalled,
    is_tool_doom_loop,
    ngram_similarity,
 )
-from framework.graph.event_loop.subagent_executor import execute_subagent
+from framework.agent_loop.internals.synthetic_tools import (
 from framework.graph.event_loop.synthetic_tools import (
    build_ask_user_multiple_tool,
    build_ask_user_tool,
    build_delegate_tool,
    build_escalate_tool,
    build_report_to_parent_tool,
    build_set_output_tool,
    handle_set_output,
 )
-from framework.graph.event_loop.tool_result_handler import (
+from framework.agent_loop.internals.tool_result_handler import (
    build_json_preview,
    execute_tool,
    extract_json_metadata,
@@ -82,12 +79,12 @@ from framework.graph.event_loop.tool_result_handler import (
    restore_spill_counter,
    truncate_tool_result,
 )
-from framework.graph.event_loop.types import (
+from framework.agent_loop.internals.types import (
    JudgeProtocol,
    JudgeVerdict,
    TriggerEvent,
 )
-from framework.graph.node import NodeContext, NodeProtocol, NodeResult
+from framework.orchestrator.node import NodeContext, NodeProtocol, NodeResult
 from framework.llm.capabilities import supports_image_tool_results
 from framework.llm.provider import Tool, ToolResult, ToolUse
 from framework.llm.stream_events import (
@@ -96,8 +93,8 @@ from framework.llm.stream_events import (
    TextDeltaEvent,
    ToolCallEvent,
 )
-from framework.runtime.event_bus import EventBus
+from framework.host.event_bus import EventBus
-from framework.runtime.llm_debug_logger import log_llm_turn
+from framework.tracker.llm_debug_logger import log_llm_turn
 logger = logging.getLogger(__name__)
@@ -163,43 +160,9 @@ def _is_context_too_large_error(exc: BaseException) -> bool:
 # ---------------------------------------------------------------------------
 # Escalation receiver (temporary routing target for subagent → user input)
 # ---------------------------------------------------------------------------
 class _EscalationReceiver:
    """Temporary receiver registered in node_registry for subagent escalation routing.
    When a subagent calls ``report_to_parent(wait_for_response=True)``, the callback
    creates one of these, registers it under a unique escalation ID in the executor's
    ``node_registry``, and awaits ``wait()``.  The TUI / runner calls
    ``inject_input(escalation_id, content)`` which the ``ExecutionStream`` routes here
    via ``inject_event()`` — matching the same ``hasattr(node, "inject_event")`` check
    used for regular ``EventLoopNode`` instances.
    """
    def __init__(self) -> None:
        self._event = asyncio.Event()
        self._response: str | None = None
        self._awaiting_input = True  # So inject_message() can prefer us
    async def inject_event(
        self,
        content: str,
        *,
        is_client_input: bool = False,
        image_content: list[dict] | None = None,
    ) -> None:
        """Called by ExecutionStream.inject_input() when the user responds."""
        self._response = content
        self._event.set()
    async def wait(self) -> str | None:
        """Block until inject_event() delivers the user's response."""
        await self._event.wait()
        return self._response
 # ---------------------------------------------------------------------------
 # Judge protocol (simple 3-action interface for event loop evaluation)
 # ---------------------------------------------------------------------------
@@ -224,7 +187,7 @@ OutputAccumulator = event_loop_types.OutputAccumulator
 # ---------------------------------------------------------------------------
-class EventLoopNode(NodeProtocol):
+class AgentLoop(NodeProtocol):
    """Multi-turn LLM streaming loop with tool execution and judge evaluation.
    Lifecycle:
@@ -284,9 +247,6 @@ class EventLoopNode(NodeProtocol):
        # Monotonic counter for spillover file naming (web_search_1.txt, etc.)
        self._spill_counter: int = 0
        # Subagent mark_complete: when True, _evaluate returns ACCEPT immediately
        self._mark_complete_flag = False
        # Counter for subagent instances (1, 2, 3, ...)
        self._subagent_instance_counter: dict[str, int] = {}
    def validate_input(self, ctx: NodeContext) -> list[str]:
        """Validate hard requirements only.
@@ -307,7 +267,7 @@ class EventLoopNode(NodeProtocol):
    async def execute(self, ctx: NodeContext) -> NodeResult:
        """Run the event loop."""
        logger.debug(
-            "[EventLoopNode.execute] Starting execution for node=%s, stream=%s",
+            "[AgentLoop.execute] Starting execution for node=%s, stream=%s",
            ctx.node_id,
            ctx.stream_id,
        )
@@ -320,7 +280,7 @@ class EventLoopNode(NodeProtocol):
        # Store skill dirs for AS-9 file-read interception in _execute_tool
        self._skill_dirs: list[str] = ctx.skill_dirs
        logger.debug(
-            "[EventLoopNode.execute] node_id=%s, execution_id=%s, max_iterations=%d",
+            "[AgentLoop.execute] node_id=%s, execution_id=%s, max_iterations=%d",
            node_id,
            execution_id,
            self._config.max_iterations,
@@ -402,7 +362,7 @@ class EventLoopNode(NodeProtocol):
                # execution preamble and node-type preamble.  The stored
                # prompt may be stale after code changes or when runtime-
                # injected context (e.g. worker identity) has changed.
-                from framework.graph.prompting import build_system_prompt_for_node_context
+                from framework.orchestrator.prompting import build_system_prompt_for_node_context
                _current_prompt = build_system_prompt_for_node_context(ctx)
                if conversation.system_prompt != _current_prompt:
@@ -425,7 +385,7 @@ class EventLoopNode(NodeProtocol):
                    await self._conversation_store.clear()
                # Fresh conversation: either isolated mode or first node in continuous mode.
-                from framework.graph.prompting import build_system_prompt_for_node_context
+                from framework.orchestrator.prompting import build_system_prompt_for_node_context
                system_prompt = build_system_prompt_for_node_context(ctx)
@@ -484,7 +444,7 @@ class EventLoopNode(NodeProtocol):
        # 2a. Guard: ensure at least one non-system message exists.
        # A restored conversation may have 0 messages if phase_id filtering
        # removes them all, or if a prior run stored metadata without messages
-        # (e.g. subagent that failed before the first LLM call).
+        # (e.g. node that failed before the first LLM call).
        if conversation.message_count == 0:
            initial_message = self._build_initial_message(ctx)
            if initial_message:
@@ -502,37 +462,10 @@ class EventLoopNode(NodeProtocol):
            tools.append(self._build_ask_user_tool())
            if stream_id == "queen":
                tools.append(self._build_ask_user_multiple_tool())
-        # Workers/subagents can escalate blockers to the queen.
+        # Workers can escalate blockers to the queen.
        if stream_id not in ("queen", "judge"):
            tools.append(self._build_escalate_tool())
        # Add delegate_to_sub_agent tool if:
        # - Node has sub_agents defined
        # - We are NOT in subagent mode (prevents nested delegation)
        if not ctx.is_subagent_mode:
            sub_agents = getattr(ctx.node_spec, "sub_agents", None) or []
            if sub_agents:
                delegate_tool = self._build_delegate_tool(sub_agents, ctx.node_registry)
                if delegate_tool:
                    tools.append(delegate_tool)
                    logger.info(
                        "[%s] delegate_to_sub_agent injected (sub_agents=%s)",
                        node_id,
                        sub_agents,
                    )
                else:
                    logger.error(
                        "[%s] _build_delegate_tool returned None for sub_agents=%s",
                        node_id,
                        sub_agents,
                    )
        else:
            logger.debug("[%s] Skipped delegate tool (is_subagent_mode=True)", node_id)
        # Add report_to_parent tool for sub-agents with a report callback
        if ctx.is_subagent_mode and ctx.report_callback is not None:
            tools.append(self._build_report_to_parent_tool())
        logger.info(
            "[%s] Tools available (%d): %s | direct_user_io=%s | judge=%s",
            node_id,
@@ -565,11 +498,11 @@ class EventLoopNode(NodeProtocol):
        # 6. Main loop
        logger.debug(
-            "[EventLoopNode.execute] Entering main loop, start_iteration=%d", start_iteration
+            "[AgentLoop.execute] Entering main loop, start_iteration=%d", start_iteration
        )
        for iteration in range(start_iteration, self._config.max_iterations):
            iter_start = time.time()
-            logger.debug("[EventLoopNode.execute] iteration=%d starting", iteration)
+            logger.debug("[AgentLoop.execute] iteration=%d starting", iteration)
            # 6a. Check pause (no current-iteration data yet — only log_node_complete needed)
            if await self._check_pause(ctx, conversation, iteration):
@@ -601,18 +534,18 @@ class EventLoopNode(NodeProtocol):
            # 6b. Drain injection queue
            logger.debug(
-                "[EventLoopNode.execute] iteration=%d: draining injection queue...", iteration
+                "[AgentLoop.execute] iteration=%d: draining injection queue...", iteration
            )
            drained_injections = await self._drain_injection_queue(conversation, ctx)
            logger.debug(
-                "[EventLoopNode.execute] iteration=%d: drained %d injections",
+                "[AgentLoop.execute] iteration=%d: drained %d injections",
                iteration,
                drained_injections,
            )
            # 6b1. Drain trigger queue (framework-level signals)
            drained_triggers = await self._drain_trigger_queue(conversation)
            logger.debug(
-                "[EventLoopNode.execute] iteration=%d: drained %d triggers",
+                "[AgentLoop.execute] iteration=%d: drained %d triggers",
                iteration,
                drained_triggers,
            )
@@ -685,8 +618,6 @@ class EventLoopNode(NodeProtocol):
                    "ask_user",
                    "ask_user_multiple",
                    "escalate",
                    "delegate_to_sub_agent",
                    "report_to_parent",
                }
                synthetic = [t for t in tools if t.name in _synthetic_names]
                tools.clear()
@@ -696,11 +627,11 @@ class EventLoopNode(NodeProtocol):
            # 6b3. Dynamic prompt refresh (phase switching / memory refresh)
            if ctx.dynamic_prompt_provider is not None or ctx.dynamic_memory_provider is not None:
                if ctx.dynamic_prompt_provider is not None:
-                    from framework.graph.prompting import stamp_prompt_datetime
+                    from framework.orchestrator.prompting import stamp_prompt_datetime
                    _new_prompt = stamp_prompt_datetime(ctx.dynamic_prompt_provider())
                else:
-                    from framework.graph.prompting import build_system_prompt_for_node_context
+                    from framework.orchestrator.prompting import build_system_prompt_for_node_context
                    _new_prompt = build_system_prompt_for_node_context(ctx)
                if _new_prompt != conversation.system_prompt:
@@ -743,7 +674,7 @@ class EventLoopNode(NodeProtocol):
                len(conversation.messages),
            )
            logger.debug(
-                "[EventLoopNode.execute] iteration=%d: entering _run_single_turn loop", iteration
+                "[AgentLoop.execute] iteration=%d: entering _run_single_turn loop", iteration
            )
            _stream_retry_count = 0
            _turn_cancelled = False
@@ -752,7 +683,7 @@ class EventLoopNode(NodeProtocol):
            while True:
                try:
                    logger.debug(
-                        "[EventLoopNode.execute] iteration=%d: calling _run_single_turn (retry=%d)",
+                        "[AgentLoop.execute] iteration=%d: calling _run_single_turn (retry=%d)",
                        iteration,
                        _stream_retry_count,
                    )
@@ -768,12 +699,12 @@ class EventLoopNode(NodeProtocol):
                        queen_input_requested,
                        request_system_prompt,
                        request_messages,
-                        reported_to_parent,
+                        _,
                    ) = await self._run_single_turn(
                        ctx, conversation, tools, iteration, accumulator
                    )
                    logger.debug(
-                        "[EventLoopNode.execute] iteration=%d:"
+                        "[AgentLoop.execute] iteration=%d:"
                        " _run_single_turn completed successfully",
                        iteration,
                    )
@@ -842,13 +773,13 @@ class EventLoopNode(NodeProtocol):
                    break  # success — exit retry loop
                except TurnCancelled:
-                    logger.debug("[EventLoopNode.execute] iteration=%d: TurnCancelled", iteration)
+                    logger.debug("[AgentLoop.execute] iteration=%d: TurnCancelled", iteration)
                    _turn_cancelled = True
                    break
                except Exception as e:
                    logger.debug(
-                        "[EventLoopNode.execute] iteration=%d:"
+                        "[AgentLoop.execute] iteration=%d:"
                        " Exception in _run_single_turn: %s (%s)",
                        iteration,
                        type(e).__name__,
@@ -1024,7 +955,7 @@ class EventLoopNode(NodeProtocol):
                and not outputs_set
                and not user_input_requested
                and not queen_input_requested
-                and not reported_to_parent
+                
            )
            if truly_empty and accumulator is not None:
                missing = self._get_missing_output_keys(
@@ -1276,14 +1207,14 @@ class EventLoopNode(NodeProtocol):
            # blocking and resumption.
            _is_worker = (
                stream_id not in ("queen", "judge")
-                and not ctx.is_subagent_mode
+                and not False
                and not ctx.supports_direct_user_io
                and self._event_bus is not None
            )
            _worker_no_tool_turn = (
                not real_tool_results
                and not outputs_set
-                and not reported_to_parent
+                
                and not queen_input_requested
                and not user_input_requested
            )
@@ -1733,7 +1664,7 @@ class EventLoopNode(NodeProtocol):
            # 6i. Judge evaluation
            should_judge = (
-                ctx.is_subagent_mode  # Always evaluate subagents
+                False
                or (iteration + 1) % self._config.judge_every_n_turns == 0
                or not real_tool_results  # no real tool calls = natural stop
            )
@@ -1789,7 +1720,7 @@ class EventLoopNode(NodeProtocol):
                missing = self._get_missing_output_keys(
                    accumulator, ctx.node_spec.output_keys, ctx.node_spec.nullable_output_keys
                )
-                if missing and self._judge is not None and not self._mark_complete_flag:
+                if missing and self._judge is not None :
                    hint = (
                        f"Task incomplete. Required outputs not yet produced: {missing}. "
                        f"Follow your system prompt instructions to complete the work."
@@ -1988,7 +1919,7 @@ class EventLoopNode(NodeProtocol):
            image_content: Optional list of OpenAI-style image blocks to attach.
        """
        logger.debug(
-            "[EventLoopNode.inject_event] content_len=%d,"
+            "[AgentLoop.inject_event] content_len=%d,"
            " is_client_input=%s, has_images=%s,"
            " queue_size_before=%d",
            len(content) if content else 0,
@@ -1998,15 +1929,15 @@ class EventLoopNode(NodeProtocol):
        )
        try:
            await self._injection_queue.put((content, is_client_input, image_content))
-            logger.debug("[EventLoopNode.inject_event] Message queued successfully")
+            logger.debug("[AgentLoop.inject_event] Message queued successfully")
        except Exception as e:
-            logger.exception("[EventLoopNode.inject_event] Failed to queue message: %s", e)
+            logger.exception("[AgentLoop.inject_event] Failed to queue message: %s", e)
            raise
        try:
            self._input_ready.set()
-            logger.debug("[EventLoopNode.inject_event] _input_ready.set() called")
+            logger.debug("[AgentLoop.inject_event] _input_ready.set() called")
        except Exception as e:
-            logger.exception("[EventLoopNode.inject_event] Failed to set _input_ready: %s", e)
+            logger.exception("[AgentLoop.inject_event] Failed to set _input_ready: %s", e)
            raise
    async def inject_trigger(self, trigger: TriggerEvent) -> None:
@@ -2157,7 +2088,6 @@ class EventLoopNode(NodeProtocol):
        ask_user_prompt = ""
        ask_user_options: list[str] | None = None
        queen_input_requested = False
        reported_to_parent = False
        # Accumulate ALL tool calls across inner iterations for L3 logging.
        # Unlike real_tool_results (reset each inner iteration), this persists.
        logged_tool_calls: list[dict] = []
@@ -2231,16 +2161,28 @@ class EventLoopNode(NodeProtocol):
                ):
                    if isinstance(event, TextDeltaEvent):
                        accumulated_text = event.snapshot
-                        await self._publish_text_delta(
+                        # Filter <think>...</think> blocks from client output.
-                            stream_id,
+                        # Content inside think tags is internal reasoning -- only
-                            node_id,
+                        # the text after </think> is shown to the user.
-                            event.content,
+                        _content = event.content
-                            event.snapshot,
+                        if "<think>" in event.snapshot and "</think>" not in event.snapshot:
-                            ctx,
+                            _content = ""  # still inside think block
-                            execution_id,
+                        elif "</think>" in _content:
-                            iteration=iteration,
+                            # End of think block -- emit only text after the tag
-                            inner_turn=inner_turn,
+                            _content = _content.split("</think>", 1)[-1]
-                        )
+                        elif "<think>" in _content:
                            _content = ""  # opening tag in this chunk
                        if _content:
                            await self._publish_text_delta(
                                stream_id,
                                node_id,
                                _content,
                                event.snapshot,
                                ctx,
                                execution_id,
                                iteration=iteration,
                                inner_turn=inner_turn,
                            )
                    elif isinstance(event, ToolCallEvent):
                        _tc.append(event)
@@ -2348,10 +2290,27 @@ class EventLoopNode(NodeProtocol):
                    queen_input_requested,
                    final_system_prompt,
                    final_messages,
-                    reported_to_parent,
+                    False,
                )
-            # Execute tool calls — framework tools (set_output, ask_user)
+            # Priority drain: if user sent a message while the LLM was
            # streaming, inject it into the conversation NOW -- before tool
            # execution.  The LLM will see it on the next inner turn.
            if not self._injection_queue.empty():
                while not self._injection_queue.empty():
                    _inj_content, _inj_client, _inj_images = (
                        self._injection_queue.get_nowait()
                    )
                    if _inj_client:
                        await conversation.add_user_message(_inj_content)
                        logger.info(
                            "[%s] Priority-injected user message mid-turn (%d chars)",
                            node_id, len(_inj_content),
                        )
                    else:
                        await conversation.add_user_message(_inj_content)
            # Execute tool calls -- framework tools (set_output, ask_user)
            # run inline; real MCP tools run in parallel.
            real_tool_results: list[dict] = []
            limit_hit = False
@@ -2361,13 +2320,12 @@ class EventLoopNode(NodeProtocol):
            )
            # Phase 1: triage — handle framework tools immediately,
-            # queue real tools and subagents for parallel execution.
+            # queue real tools for parallel execution.
            results_by_id: dict[str, ToolResult] = {}
            timing_by_id: dict[
                str, dict[str, Any]
            ] = {}  # tool_use_id -> {start_timestamp, duration_s}
            pending_real: list[ToolCallEvent] = []
            pending_subagent: list[ToolCallEvent] = []
            for tc in tool_calls:
                tool_call_count += 1
@@ -2610,76 +2568,6 @@ class EventLoopNode(NodeProtocol):
                    )
                    results_by_id[tc.tool_use_id] = result
                elif tc.tool_name == "delegate_to_sub_agent":
                    # Guard: in continuous mode the LLM may see delegate
                    # calls from a previous node's conversation history and
                    # attempt to re-use the tool on a node that doesn't own
                    # it.  Only accept if the tool was actually offered.
                    if not any(t.name == "delegate_to_sub_agent" for t in tools):
                        logger.warning(
                            "[%s] LLM called delegate_to_sub_agent but tool "
                            "was not offered to this node — rejecting",
                            node_id,
                        )
                        result = ToolResult(
                            tool_use_id=tc.tool_use_id,
                            content=(
                                "ERROR: delegate_to_sub_agent is not available "
                                "on this node. This tool belongs to a different "
                                "node in the workflow."
                            ),
                            is_error=True,
                        )
                        results_by_id[tc.tool_use_id] = result
                        continue
                    # --- Framework-level subagent delegation ---
                    # Queue for parallel execution in Phase 2
                    logger.info(
                        "🔄 LLM requesting subagent delegation: agent_id='%s', task='%s'",
                        tc.tool_input.get("agent_id", "?"),
                        (tc.tool_input.get("task", "")[:100] + "...")
                        if len(tc.tool_input.get("task", "")) > 100
                        else tc.tool_input.get("task", ""),
                    )
                    pending_subagent.append(tc)
                elif tc.tool_name == "report_to_parent":
                    # --- Report from sub-agent to parent (optionally blocking) ---
                    reported_to_parent = True
                    msg = tc.tool_input.get("message", "")
                    data = tc.tool_input.get("data")
                    wait = tc.tool_input.get("wait_for_response", False)
                    mark_complete = tc.tool_input.get("mark_complete", False)
                    response = None
                    if ctx.report_callback:
                        try:
                            response = await ctx.report_callback(
                                msg,
                                data,
                                wait_for_response=wait,
                            )
                        except Exception:
                            logger.warning(
                                "[%s] report_to_parent callback failed (swallowed)",
                                node_id,
                                exc_info=True,
                            )
                    if mark_complete:
                        self._mark_complete_flag = True
                        logger.info(
                            "[%s] mark_complete=True — subagent will accept on this iteration",
                            node_id,
                        )
                    result = ToolResult(
                        tool_use_id=tc.tool_use_id,
                        content=response if (wait and response) else "Report sent to parent.",
                        is_error=False,
                    )
                    results_by_id[tc.tool_use_id] = result
                else:
                    # --- Real tool: check for truncated args, else queue ---
                    if "_raw" in tc.tool_input:
@@ -2754,175 +2642,6 @@ class EventLoopNode(NodeProtocol):
                        result = raw
                    results_by_id[tc.tool_use_id] = self._truncate_tool_result(result, tc.tool_name)
            # Phase 2b: execute subagent delegations in parallel.
            if pending_subagent:
                _subagent_timeout = self._config.subagent_timeout_seconds
                _inactivity_timeout = self._config.subagent_inactivity_timeout_seconds
                async def _timed_subagent(
                    _ctx: NodeContext,
                    _tc: ToolCallEvent,
                    _acc: OutputAccumulator = accumulator,
                    _wall_timeout: float = _subagent_timeout,
                    _activity_timeout: float = _inactivity_timeout,
                ) -> tuple[ToolResult | BaseException, str, float]:
                    _s = time.time()
                    _iso = datetime.now(UTC).isoformat()
                    _last_activity = _s
                    _activity_event = asyncio.Event()
                    async def _watchdog() -> None:
                        """Watchdog that times out only after inactivity period."""
                        nonlocal _last_activity
                        while True:
                            _now = time.time()
                            _inactive_for = _now - _last_activity
                            _remaining = _activity_timeout - _inactive_for
                            if _remaining <= 0:
                                # Inactivity timeout reached
                                return
                            try:
                                await asyncio.wait_for(_activity_event.wait(), timeout=_remaining)
                                _activity_event.clear()
                            except TimeoutError:
                                # Check again in case activity happened during wait
                                continue
                    async def _run_with_activity_timeout(
                        _coro,
                    ) -> ToolResult:
                        """Run subagent with activity-based timeout."""
                        _watchdog_task = asyncio.create_task(_watchdog())
                        try:
                            _result = await _coro
                            return _result
                        finally:
                            _watchdog_task.cancel()
                            try:
                                await _watchdog_task
                            except asyncio.CancelledError:
                                pass
                    try:
                        # Subscribe to subagent activity events to reset inactivity timer
                        async def _on_subagent_activity(event) -> None:
                            nonlocal _last_activity
                            _last_activity = time.time()
                            _activity_event.set()
                        _sub_id = None
                        if self._event_bus and _activity_timeout > 0:
                            from framework.runtime.event_bus import EventType
                            _sub_id = self._event_bus.subscribe(
                                event_types=[
                                    EventType.TOOL_CALL_STARTED,
                                    EventType.LLM_TEXT_DELTA,
                                    EventType.EXECUTION_STARTED,
                                ],
                                handler=_on_subagent_activity,
                            )
                        try:
                            _coro = self._execute_subagent(
                                _ctx,
                                _tc.tool_input.get("agent_id", ""),
                                _tc.tool_input.get("task", ""),
                                accumulator=_acc,
                            )
                            if _activity_timeout > 0:
                                # Use activity-based timeout with wall-clock max
                                _result_coro = _run_with_activity_timeout(_coro)
                                if _wall_timeout > 0:
                                    _r = await asyncio.wait_for(_result_coro, timeout=_wall_timeout)
                                else:
                                    _r = await _result_coro
                            elif _wall_timeout > 0:
                                _r = await asyncio.wait_for(_coro, timeout=_wall_timeout)
                            else:
                                _r = await _coro
                        finally:
                            if _sub_id and self._event_bus:
                                self._event_bus.unsubscribe(_sub_id)
                    except TimeoutError:
                        _agent_id = _tc.tool_input.get("agent_id", "unknown")
                        _elapsed = time.time() - _s
                        logger.warning(
                            "Subagent '%s' timed out after %.0fs (inactivity threshold: %.0fs)",
                            _agent_id,
                            _elapsed,
                            _activity_timeout if _activity_timeout > 0 else _wall_timeout,
                        )
                        _r = ToolResult(
                            tool_use_id=_tc.tool_use_id,
                            content=(
                                f"Subagent '{_agent_id}' timed out after "
                                f"{_elapsed:.0f}s of inactivity. "
                                "The subagent was not making progress. "
                                "Try a simpler task or break it into smaller pieces."
                            ),
                            is_error=True,
                        )
                    except BaseException as _exc:
                        _r = _exc
                    _dur = round(time.time() - _s, 3)
                    return _r, _iso, _dur
                subagent_timed = await asyncio.gather(
                    *(_timed_subagent(ctx, tc) for tc in pending_subagent),
                    return_exceptions=True,
                )
                for tc, entry in zip(pending_subagent, subagent_timed, strict=True):
                    if isinstance(entry, BaseException):
                        raw = entry
                        _start_iso = datetime.now(UTC).isoformat()
                        _dur_s = 0
                    else:
                        raw, _start_iso, _dur_s = entry
                    _sa_timing = {
                        "start_timestamp": _start_iso,
                        "duration_s": _dur_s,
                    }
                    if isinstance(raw, BaseException):
                        result = ToolResult(
                            tool_use_id=tc.tool_use_id,
                            content=json.dumps(
                                {
                                    "message": f"Sub-agent execution raised: {raw}",
                                    "data": None,
                                    "metadata": {"success": False, "error": str(raw)},
                                }
                            ),
                            is_error=True,
                        )
                    else:
                        # Attach the tool_use_id to the result
                        result = ToolResult(
                            tool_use_id=tc.tool_use_id,
                            content=raw.content,
                            is_error=raw.is_error,
                        )
                    # Route through _truncate_tool_result so large
                    # subagent results are saved to spillover files
                    # and survive pruning (instead of being "cleared
                    # from context" with no recovery path).
                    result = self._truncate_tool_result(result, "delegate_to_sub_agent")
                    results_by_id[tc.tool_use_id] = result
                    logged_tool_calls.append(
                        {
                            "tool_use_id": tc.tool_use_id,
                            "tool_name": "delegate_to_sub_agent",
                            "tool_input": tc.tool_input,
                            "content": result.content,
                            "is_error": result.is_error,
                            **_sa_timing,
                        }
                    )
            # Phase 3: record results into conversation in original order,
            # build logged/real lists, and publish completed events.
            for tc in tool_calls[:executed_in_batch]:
@@ -2936,8 +2655,6 @@ class EventLoopNode(NodeProtocol):
                    "ask_user",
                    "ask_user_multiple",
                    "escalate",
                    "delegate_to_sub_agent",
                    "report_to_parent",
                ):
                    tool_entry = {
                        "tool_use_id": tc.tool_use_id,
@@ -3056,7 +2773,7 @@ class EventLoopNode(NodeProtocol):
                    queen_input_requested,
                    final_system_prompt,
                    final_messages,
-                    reported_to_parent,
+                    False,
                )
            # --- Mid-turn pruning: prevent context blowup within a single turn ---
@@ -3090,7 +2807,7 @@ class EventLoopNode(NodeProtocol):
                    queen_input_requested,
                    final_system_prompt,
                    final_messages,
-                    reported_to_parent,
+                    False,
                )
            # Tool calls processed -- loop back to stream with updated conversation
@@ -3118,16 +2835,6 @@ class EventLoopNode(NodeProtocol):
        """Build the synthetic escalate tool. Delegates to synthetic_tools module."""
        return build_escalate_tool()
    def _build_delegate_tool(
        self, sub_agents: list[str], node_registry: dict[str, Any]
    ) -> Tool | None:
        """Build the synthetic delegate_to_sub_agent tool. Delegates to synthetic_tools module."""
        return build_delegate_tool(sub_agents, node_registry)
    def _build_report_to_parent_tool(self) -> Tool:
        """Build the synthetic report_to_parent tool. Delegates to synthetic_tools module."""
        return build_report_to_parent_tool()
    def _handle_set_output(
        self,
        tool_input: dict[str, Any],
@@ -3151,7 +2858,7 @@ class EventLoopNode(NodeProtocol):
    ) -> JudgeVerdict:
        """Evaluate the current state. Delegates to judge_pipeline module."""
        return await judge_turn(
-            mark_complete_flag=self._mark_complete_flag,
+            mark_complete_flag=False,
            judge=self._judge,
            ctx=ctx,
            conversation=conversation,
@@ -3176,7 +2883,7 @@ class EventLoopNode(NodeProtocol):
        Delegates to :func:`extract_tool_call_history` in conversation.py.
        """
-        from framework.graph.conversation import extract_tool_call_history
+        from framework.agent_loop.conversation import extract_tool_call_history
        return extract_tool_call_history(conversation.messages, max_entries=max_entries)
@@ -3781,46 +3488,3 @@ class EventLoopNode(NodeProtocol):
    # Subagent Execution
    # -------------------------------------------------------------------
    async def _execute_subagent(
        self,
        ctx: NodeContext,
        agent_id: str,
        task: str,
        *,
        accumulator: OutputAccumulator | None = None,
    ) -> ToolResult:
        """Execute a subagent and return the result as a ToolResult.
        The subagent:
        - Gets a fresh conversation with just the task
        - Has read-only access to the parent's readable data buffer
        - Cannot delegate to its own subagents (prevents recursion)
        - Returns its output in structured JSON format
        Args:
            ctx: Parent node's context (for data buffer, tools, LLM access).
            agent_id: The node ID of the subagent to invoke.
            task: The task description to give the subagent.
            accumulator: Parent's OutputAccumulator — provides outputs that
                have been set via ``set_output`` but not yet written to
                data buffer (which only happens after the node completes).
        Returns:
            ToolResult with structured JSON output containing:
            - message: Human-readable summary
            - data: Subagent's output (free-form JSON)
            - metadata: Execution metadata (success, tokens, latency)
        """
        return await execute_subagent(
            ctx=ctx,
            agent_id=agent_id,
            task=task,
            accumulator=accumulator,
            event_bus=self._event_bus,
            config=self._config,
            tool_executor=self._tool_executor,
            conversation_store=self._conversation_store,
            subagent_instance_counter=self._subagent_instance_counter,
            event_loop_node_cls=type(self),
            escalation_receiver_cls=_EscalationReceiver,
        )
@@ -324,7 +324,7 @@ def _try_extract_key(content: str, key: str) -> str | None:
    3. Colon format: ``key: value``.
    4. Equals format: ``key = value``.
    """
-    from framework.graph.node import find_json_object
+    from framework.orchestrator.node import find_json_object
    # 1. Whole message is JSON
    try:
@@ -0,0 +1,7 @@
 """Agent loop internals -- compaction, judge, tools, subagent execution.
 Re-exports from legacy locations for the new import path.
 """
 from framework.agent_loop.internals.compaction import *  # noqa: F401, F403
 from framework.agent_loop.internals.synthetic_tools import *  # noqa: F401, F403
@@ -19,11 +19,11 @@ from datetime import UTC, datetime
 from pathlib import Path
 from typing import Any
-from framework.graph.conversation import Message, NodeConversation
+from framework.agent_loop.conversation import Message, NodeConversation
-from framework.graph.event_loop.event_publishing import publish_context_usage
+from framework.agent_loop.internals.event_publishing import publish_context_usage
-from framework.graph.event_loop.types import LoopConfig, OutputAccumulator
+from framework.agent_loop.internals.types import LoopConfig, OutputAccumulator
-from framework.graph.node import NodeContext
+from framework.orchestrator.node import NodeContext
-from framework.runtime.event_bus import EventBus
+from framework.host.event_bus import EventBus
 logger = logging.getLogger(__name__)
@@ -368,8 +368,8 @@ async def llm_compact(
    in half and each half is summarised independently.  Tool history is
    appended once at the top-level call (``_depth == 0``).
    """
-    from framework.graph.conversation import extract_tool_call_history
+    from framework.agent_loop.conversation import extract_tool_call_history
-    from framework.graph.event_loop.tool_result_handler import is_context_too_large_error
+    from framework.agent_loop.internals.tool_result_handler import is_context_too_large_error
    if _depth > max_depth:
        raise RuntimeError(f"LLM compaction recursion limit ({max_depth})")
@@ -724,7 +724,7 @@ async def log_compaction(
        )
    if event_bus:
-        from framework.runtime.event_bus import AgentEvent, EventType
+        from framework.host.event_bus import AgentEvent, EventType
        event_data: dict[str, Any] = {
            "level": level,
@@ -861,6 +861,6 @@ def _extract_tool_call_history(conversation: NodeConversation) -> str:
    directly (vs. the module-level extract_tool_call_history in conversation.py
    which works on raw message lists).
    """
-    from framework.graph.conversation import extract_tool_call_history
+    from framework.agent_loop.conversation import extract_tool_call_history
    return extract_tool_call_history(list(conversation.messages))
@@ -14,9 +14,9 @@ from collections.abc import Awaitable, Callable
 from dataclasses import dataclass
 from typing import Any
-from framework.graph.conversation import ConversationStore, NodeConversation
+from framework.agent_loop.conversation import ConversationStore, NodeConversation
-from framework.graph.event_loop.types import LoopConfig, OutputAccumulator, TriggerEvent
+from framework.agent_loop.internals.types import LoopConfig, OutputAccumulator, TriggerEvent
-from framework.graph.node import NodeContext
+from framework.orchestrator.node import NodeContext
 from framework.llm.capabilities import supports_image_tool_results
 logger = logging.getLogger(__name__)
@@ -9,10 +9,10 @@ from __future__ import annotations
 import logging
 import time
-from framework.graph.conversation import NodeConversation
+from framework.agent_loop.conversation import NodeConversation
-from framework.graph.event_loop.types import HookContext
+from framework.agent_loop.internals.types import HookContext
-from framework.graph.node import NodeContext
+from framework.orchestrator.node import NodeContext
-from framework.runtime.event_bus import EventBus
+from framework.host.event_bus import EventBus
 logger = logging.getLogger(__name__)
@@ -177,7 +177,7 @@ async def publish_context_usage(
    if not event_bus:
        return
-    from framework.runtime.event_bus import AgentEvent, EventType
+    from framework.host.event_bus import AgentEvent, EventType
    estimated = conversation.estimate_tokens()
    max_tokens = conversation._max_context_tokens
@@ -5,9 +5,9 @@ from __future__ import annotations
 import logging
 from collections.abc import Callable
-from framework.graph.conversation import NodeConversation
+from framework.agent_loop.conversation import NodeConversation
-from framework.graph.event_loop.types import JudgeProtocol, JudgeVerdict, OutputAccumulator
+from framework.agent_loop.internals.types import JudgeProtocol, JudgeVerdict, OutputAccumulator
-from framework.graph.node import NodeContext
+from framework.orchestrator.node import NodeContext
 logger = logging.getLogger(__name__)
@@ -155,7 +155,7 @@ async def judge_turn(
    # Level 2b: conversation-aware quality check (if success_criteria set)
    if ctx.node_spec.success_criteria and ctx.llm:
-        from framework.graph.conversation_judge import evaluate_phase_completion
+        from framework.orchestrator.conversation_judge import evaluate_phase_completion
        verdict = await evaluate_phase_completion(
            llm=ctx.llm,
@@ -204,118 +204,6 @@ def build_escalate_tool() -> Tool:
        },
    )
 def build_delegate_tool(sub_agents: list[str], node_registry: dict[str, Any]) -> Tool | None:
    """Build the synthetic delegate_to_sub_agent tool for subagent invocation.
    Args:
        sub_agents: List of node IDs that can be invoked as subagents.
        node_registry: Map of node_id -> NodeSpec for looking up subagent descriptions.
    Returns:
        Tool definition if sub_agents is non-empty, None otherwise.
    """
    if not sub_agents:
        return None
    agent_descriptions = []
    for agent_id in sub_agents:
        spec = node_registry.get(agent_id)
        if spec:
            desc = getattr(spec, "description", "(no description)")
            agent_descriptions.append(f"- {agent_id}: {desc}")
        else:
            agent_descriptions.append(f"- {agent_id}: (not found in registry)")
    return Tool(
        name="delegate_to_sub_agent",
        description=(
            "Delegate a task to a specialized sub-agent. The sub-agent runs "
            "autonomously with read-only access to current memory and returns "
            "its result. Use this to parallelize work or leverage specialized capabilities.\n\n"
            "Available sub-agents:\n" + "\n".join(agent_descriptions)
        ),
        parameters={
            "type": "object",
            "properties": {
                "agent_id": {
                    "type": "string",
                    "description": f"The sub-agent to invoke. Must be one of: {sub_agents}",
                    "enum": sub_agents,
                },
                "task": {
                    "type": "string",
                    "description": (
                        "The task description for the sub-agent to execute. "
                        "Be specific about what you want the sub-agent to do and "
                        "what information to return."
                    ),
                },
            },
            "required": ["agent_id", "task"],
        },
    )
 def build_report_to_parent_tool() -> Tool:
    """Build the synthetic report_to_parent tool for sub-agent progress reports.
    Sub-agents call this to send one-way progress updates, partial findings,
    or status reports to the parent node (and external observers via event bus)
    without blocking execution.
    When ``wait_for_response`` is True, the sub-agent blocks until the parent
    relays the user's response — used for escalation (e.g. login pages, CAPTCHAs).
    When ``mark_complete`` is True, the sub-agent terminates immediately after
    sending the report — no need to call set_output for each output key.
    """
    return Tool(
        name="report_to_parent",
        description=(
            "Send a report to the parent agent. By default this is fire-and-forget: "
            "the parent receives the report but does not respond. "
            "Set wait_for_response=true to BLOCK until the user replies — use this "
            "when you need human intervention (e.g. login pages, CAPTCHAs, "
            "authentication walls). The user's response is returned as the tool result. "
            "Set mark_complete=true to finish your task and terminate immediately "
            "after sending the report — use this when your findings are in the "
            "message/data fields and you don't need to call set_output."
        ),
        parameters={
            "type": "object",
            "properties": {
                "message": {
                    "type": "string",
                    "description": "A human-readable status or progress message.",
                },
                "data": {
                    "type": "object",
                    "description": "Optional structured data to include with the report.",
                },
                "wait_for_response": {
                    "type": "boolean",
                    "description": (
                        "If true, block execution until the user responds. "
                        "Use for escalation scenarios requiring human intervention."
                    ),
                    "default": False,
                },
                "mark_complete": {
                    "type": "boolean",
                    "description": (
                        "If true, terminate the sub-agent immediately after sending "
                        "this report. The report message and data are delivered to the "
                        "parent as the final result. No set_output calls are needed."
                    ),
                    "default": False,
                },
            },
            "required": ["message"],
        },
    )
 def handle_set_output(
    tool_input: dict[str, Any],
    output_keys: list[str] | None,
@@ -9,7 +9,7 @@ from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any, Literal, Protocol, runtime_checkable
-from framework.graph.conversation import (
+from framework.agent_loop.conversation import (
    ConversationStore,
 )
@@ -68,7 +68,7 @@ class LoopConfig:
    max_output_value_chars: int = 2_000
    # Stream retry.
-    max_stream_retries: int = 3
+    max_stream_retries: int = 5
    stream_retry_backoff_base: float = 2.0
    stream_retry_max_delay: float = 60.0
@@ -8,6 +8,14 @@ FRAMEWORK_AGENTS_DIR = Path(__file__).parent
 def list_framework_agents() -> list[Path]:
    """List all framework agent directories."""
    return sorted(
-        [p for p in FRAMEWORK_AGENTS_DIR.iterdir() if p.is_dir() and (p / "agent.py").exists()],
+        [
            p
            for p in FRAMEWORK_AGENTS_DIR.iterdir()
            if p.is_dir()
            and (
                (p / "agent.json").exists()
                or (p / "agent.py").exists()
            )
        ],
        key=lambda p: p.name,
    )
@@ -21,15 +21,15 @@ from pathlib import Path
 from typing import TYPE_CHECKING
 from framework.config import get_max_context_tokens
-from framework.graph import Goal, NodeSpec, SuccessCriterion
+from framework.orchestrator import Goal, NodeSpec, SuccessCriterion
-from framework.graph.checkpoint_config import CheckpointConfig
+from framework.orchestrator.checkpoint_config import CheckpointConfig
-from framework.graph.edge import GraphSpec
+from framework.orchestrator.edge import GraphSpec
-from framework.graph.executor import ExecutionResult
+from framework.orchestrator.orchestrator import ExecutionResult
 from framework.llm import LiteLLMProvider
-from framework.runner.mcp_registry import MCPRegistry
+from framework.loader.mcp_registry import MCPRegistry
-from framework.runner.tool_registry import ToolRegistry
+from framework.loader.tool_registry import ToolRegistry
-from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
+from framework.host.agent_host import AgentHost
-from framework.runtime.execution_stream import EntryPointSpec
+from framework.host.execution_manager import EntryPointSpec
 from .config import default_config
 from .nodes import build_tester_node
@@ -37,7 +37,7 @@ from .nodes import build_tester_node
 logger = logging.getLogger(__name__)
 if TYPE_CHECKING:
-    from framework.runner import AgentRunner
+    from framework.loader import AgentLoader
 logger = logging.getLogger(__name__)
@@ -233,7 +233,7 @@ requires_account_selection = True
 """Signal TUI to show account picker before starting the agent."""
-def configure_for_account(runner: AgentRunner, account: dict) -> None:
+def configure_for_account(runner: AgentLoader, account: dict) -> None:
    """Scope the tester node's tools to the selected provider.
    Handles both Aden accounts (account= routing) and local accounts
@@ -325,7 +325,7 @@ def _activate_local_account(credential_id: str, alias: str) -> None:
 def _configure_aden_node(
-    runner: AgentRunner,
+    runner: AgentLoader,
    provider: str,
    alias: str,
    detail: str,
@@ -368,7 +368,7 @@ or any other identifier — always use the alias exactly as shown.
 def _configure_local_node(
-    runner: AgentRunner,
+    runner: AgentLoader,
    provider: str,
    alias: str,
    identity: dict,
@@ -497,7 +497,7 @@ class CredentialTesterAgent:
    def __init__(self, config=None):
        self.config = config or default_config
        self._selected_account: dict | None = None
-        self._agent_runtime: AgentRuntime | None = None
+        self._agent_runtime: AgentHost | None = None
        self._tool_registry: ToolRegistry | None = None
        self._storage_path: Path | None = None
@@ -613,7 +613,7 @@ class CredentialTesterAgent:
        graph = self._build_graph()
-        self._agent_runtime = create_agent_runtime(
+        self._agent_runtime = AgentHost(
            graph=graph,
            goal=goal,
            storage_path=self._storage_path,
@@ -1,6 +1,6 @@
 """Node definitions for Credential Tester agent."""
-from framework.graph import NodeSpec
+from framework.orchestrator import NodeSpec
 def build_tester_node(
@@ -27,8 +27,8 @@ def _get_last_active(agent_path: Path) -> str | None:
    """Return the most recent updated_at timestamp across all sessions.
    Checks both worker sessions (``~/.hive/agents/{name}/sessions/``) and
-    queen sessions (``~/.hive/queen/session/``) whose ``meta.json`` references
+    queen sessions (``~/.hive/agents/queens/default/sessions/``) whose
-    the same *agent_path*.
+    ``meta.json`` references the same *agent_path*.
    """
    from datetime import datetime
@@ -53,7 +53,9 @@ def _get_last_active(agent_path: Path) -> str | None:
                continue
    # 2. Queen sessions
-    queen_sessions_dir = Path.home() / ".hive" / "queen" / "session"
+    from framework.config import QUEENS_DIR
    queen_sessions_dir = QUEENS_DIR / "default" / "sessions"
    if queen_sessions_dir.exists():
        resolved = agent_path.resolve()
        for d in queen_sessions_dir.iterdir():
@@ -112,13 +114,33 @@ def _count_runs(agent_name: str) -> int:
 def _extract_agent_stats(agent_path: Path) -> tuple[int, int, list[str]]:
    """Extract node count, tool count, and tags from an agent directory.
-    Prefers agent.py (AST-parsed) over agent.json for node/tool counts
+    Checks agent.json (declarative) first, then agent.py (legacy).
    since agent.json may be stale.  Tags are only available from agent.json.
    """
    import ast
    node_count, tool_count, tags = 0, 0, []
    # Declarative JSON agents (preferred)
    agent_json = agent_path / "agent.json"
    if agent_json.exists():
        try:
            data = json.loads(agent_json.read_text(encoding="utf-8"))
            if isinstance(data, dict):
                json_nodes = data.get("nodes", [])
                node_count = len(json_nodes)
                tools: set[str] = set()
                for n in json_nodes:
                    node_tools = n.get("tools", {})
                    if isinstance(node_tools, dict):
                        tools.update(node_tools.get("allowed", []))
                    elif isinstance(node_tools, list):
                        tools.update(node_tools)
                tool_count = len(tools)
                return node_count, tool_count, tags
        except Exception:
            pass
    # Legacy: agent.py (AST-parsed)
    agent_py = agent_path / "agent.py"
    if agent_py.exists():
        try:
@@ -132,39 +154,31 @@ def _extract_agent_stats(agent_path: Path) -> tuple[int, int, list[str]]:
        except Exception:
            pass
    agent_json = agent_path / "agent.json"
    if agent_json.exists():
        try:
            data = json.loads(agent_json.read_text(encoding="utf-8"))
            json_nodes = data.get("graph", {}).get("nodes", []) or data.get("nodes", [])
            if node_count == 0:
                node_count = len(json_nodes)
            tools: set[str] = set()
            for n in json_nodes:
                tools.update(n.get("tools", []))
            tool_count = len(tools)
            tags = data.get("agent", {}).get("tags", [])
        except Exception:
            pass
    return node_count, tool_count, tags
 def discover_agents() -> dict[str, list[AgentEntry]]:
    """Discover agents from all known sources grouped by category."""
-    from framework.runner.cli import (
+    from framework.loader.cli import (
        _extract_python_agent_metadata,
        _get_framework_agents_dir,
        _is_valid_agent_dir,
    )
    from framework.config import COLONIES_DIR
    groups: dict[str, list[AgentEntry]] = {}
    sources = [
-        ("Your Agents", Path("exports")),
+        ("Your Agents", COLONIES_DIR),
        ("Your Agents", Path("exports")),  # compat fallback
        ("Framework", _get_framework_agents_dir()),
        ("Examples", Path("examples/templates")),
    ]
    # Track seen agent directory names to avoid duplicates when the same
    # agent exists in both colonies/ and exports/ (colonies takes priority).
    _seen_agent_names: set[str] = set()
    for category, base_dir in sources:
        if not base_dir.exists():
            continue
@@ -172,6 +186,9 @@ def discover_agents() -> dict[str, list[AgentEntry]]:
        for path in sorted(base_dir.iterdir(), key=lambda p: p.name):
            if not _is_valid_agent_dir(path):
                continue
            if path.name in _seen_agent_names:
                continue
            _seen_agent_names.add(path.name)
            name, desc = _extract_python_agent_metadata(path)
            config_fallback_name = path.name.replace("_", " ").title()
@@ -179,13 +196,19 @@ def discover_agents() -> dict[str, list[AgentEntry]]:
            node_count, tool_count, tags = _extract_agent_stats(path)
            if not used_config:
-                agent_json = path / "agent.json"
+                # Try agent.json (declarative) for metadata
-                if agent_json.exists():
+                agent_json_path = path / "agent.json"
                if agent_json_path.exists():
                    try:
-                        data = json.loads(agent_json.read_text(encoding="utf-8"))
+                        data = json.loads(
-                        meta = data.get("agent", {})
+                            agent_json_path.read_text(encoding="utf-8"),
-                        name = meta.get("name", name)
+                        )
-                        desc = meta.get("description", desc)
+                        if isinstance(data, dict):
                            raw_name = data.get("name", name)
                            if "-" in raw_name and " " not in raw_name:
                                raw_name = raw_name.replace("-", " ").title()
                            name = raw_name
                            desc = data.get("description", desc)
                    except Exception:
                        pass
@@ -204,6 +227,8 @@ def discover_agents() -> dict[str, list[AgentEntry]]:
                )
            )
        if entries:
-            groups[category] = entries
+            existing = groups.get(category, [])
            existing.extend(entries)
            groups[category] = existing
    return groups
@@ -1,19 +1,13 @@
-"""
+"""Queen -- the agent builder for the Hive framework."""
 Queen — Native agent builder for the Hive framework.
-Deeply understands the agent framework and produces complete Python packages
+from .agent import queen_goal, queen_loop_config
 with goals, nodes, edges, system prompts, MCP configuration, and tests
 from natural language specifications.
 """
 from .agent import queen_goal, queen_graph
 from .config import AgentMetadata, RuntimeConfig, default_config, metadata
 __version__ = "1.0.0"
 __all__ = [
    "queen_goal",
-    "queen_graph",
+    "queen_loop_config",
    "RuntimeConfig",
    "AgentMetadata",
    "default_config",
@@ -1,38 +1,29 @@
-"""Queen graph definition."""
+"""Queen agent definition.
-from framework.graph import Goal
+The queen is a single AgentLoop -- no graph, no orchestrator.
-from framework.graph.edge import GraphSpec
+Loaded by queen_orchestrator.create_queen().
 """
 from framework.orchestrator.goal import Goal
 from .nodes import queen_node
 # ---------------------------------------------------------------------------
 # Queen graph — the primary persistent conversation.
 # Loaded by queen_orchestrator.create_queen(), NOT by AgentRunner.
 # ---------------------------------------------------------------------------
 queen_goal = Goal(
    id="queen-manager",
    name="Queen Manager",
    description=(
-        "Manage the worker agent lifecycle and serve as the user's primary interactive interface."
+        "Manage the worker agent lifecycle and serve as the "
        "user's primary interactive interface."
    ),
    success_criteria=[],
    constraints=[],
 )
-queen_graph = GraphSpec(
+# Loop config -- used by queen_orchestrator to build LoopConfig
-    id="queen-graph",
+queen_loop_config = {
-    goal_id=queen_goal.id,
+    "max_iterations": 999_999,
-    version="1.0.0",
+    "max_tool_calls_per_turn": 30,
-    entry_node="queen",
+    "max_context_tokens": 180_000,
-    entry_points={"start": "queen"},
+}
-    terminal_nodes=[],
+
-    pause_nodes=[],
+__all__ = ["queen_goal", "queen_loop_config", "queen_node"]
    nodes=[queen_node],
    edges=[],
    conversation_mode="continuous",
    loop_config={
        "max_iterations": 999_999,
        "max_tool_calls_per_turn": 30,
    },
 )
@@ -0,0 +1,3 @@
 {
  "include": ["gcu-tools"]
 }
@@ -2,7 +2,7 @@
 from pathlib import Path
-from framework.graph import NodeSpec
+from framework.orchestrator import NodeSpec
 # Load reference docs at import time so they're always in the system prompt.
 # No voluntary read_file() calls needed — the LLM gets everything upfront.
@@ -37,7 +37,7 @@ _appendices = _build_appendices()
 # GCU guide — shared between planning and building via _shared_building_knowledge.
 _gcu_section = (
-    ("\n\n# GCU Nodes — Browser Automation\n\n" + _gcu_guide)
+    ("\n\n# Browser Automation Nodes\n\n" + _gcu_guide)
    if _is_gcu_enabled() and _gcu_guide
    else ""
 )
@@ -81,7 +81,6 @@ _QUEEN_PLANNING_TOOLS = [
    "save_agent_draft",
    "confirm_and_build",
    # Scaffold + transition to building (requires confirm_and_build first)
    "initialize_and_build_agent",
    # Load existing agent (after user confirms)
    "load_built_agent",
 ]
@@ -172,7 +171,7 @@ _shared_building_knowledge = (
 ## Paths (MANDATORY)
 **Always use RELATIVE paths** \
-(e.g. `exports/agent_name/config.py`, `exports/agent_name/nodes/__init__.py`).
+(e.g. `exports/agent_name/agent.json`).
 **Never use absolute paths** like `/mnt/data/...` or `/workspace/...` — they fail.
 The project root is implicit.
@@ -182,14 +181,18 @@ When designing worker nodes or writing worker system prompts, reference these \
 tool names — NOT the coder-tools names (read_file, write_file, etc.).
 Worker data tools (for large results and spillover):
- save_data(filename, data, data_dir) — save data to a file for later retrieval
+Worker data tools (from files-tools MCP server):
- load_data(filename, data_dir, offset_bytes?, limit_bytes?) — load data \
+- read_file(path) — read a file
-with byte-based pagination
+- write_file(path, content) — write/create a file
- list_data_files(data_dir) — list available data files
+- list_files(path) — list directory contents
- append_data(filename, data, data_dir) — append to a file incrementally
+- search_files(pattern, path) — regex search in files
- edit_data(filename, old_text, new_text, data_dir) — find-and-replace in a data file
+
- serve_file_to_user(filename, data_dir, label?, open_in_browser?) — \
+Worker data tools (from hive-tools MCP server):
-generate a clickable file URI for the user
+- csv_read, csv_write, csv_append — CSV operations
 - pdf_read — read PDF files
 All tools are registered in the global MCP registry (~/.hive/mcp_registry/). \
 Workers get tools from: hive-tools, gcu-tools, files-tools.
 IMPORTANT: Do NOT tell workers to use read_file, write_file, edit_file, \
 search_files, or list_directory — those are YOUR tools, not theirs.
@@ -204,7 +207,7 @@ _planning_knowledge = """\
 # Core Mandates (Planning)
 - **DO NOT propose a complete goal on your own.** Instead, \
 collaborate with the user to define it.
- **NEVER call `initialize_and_build_agent` without explicit user approval.** \
+- **NEVER call `confirm_and_build` without explicit user approval.** \
 Present the full design first and wait for the user to confirm before building.
 - **Discover tools dynamically.** NEVER reference tools from static \
 docs. Always run list_agent_tools() to see what actually exists.
@@ -252,9 +255,9 @@ When the stakeholder describes what they want, mentally construct:
 **After the user responds, assess fit and gaps together.** Be honest and specific. \
 Reference tools from list_agent_tools() AND built-in capabilities:
- **GCU browser automation** (`node_type="gcu"`) provides full Playwright-based \
+- **Browser automation provides full Playwright-based \
 browser control (navigation, clicking, typing, scrolling, JS-rendered pages, \
-multi-tab). Do NOT list browser automation as missing — use GCU nodes.
+multi-tab). Do NOT list browser automation as missing — use browser nodes with tools: {policy: "all"}.
 Present a short **Framework Fit Assessment**:
 - **Works well**: 2-4 strengths for this use case
@@ -306,14 +309,11 @@ explicitly on a node. Available types:
 - **io** (dusty purple, parallelogram): External data input/output
 - **document** (steel blue, wavy rect): Report or document generation
 - **database** (muted teal, cylinder): Database or data store
- **subprocess** (dark cyan, subroutine): Delegated sub-agent / predefined process
+- **browser** (deep blue, hexagon): Browser automation node (uses gcu-tools).
 - **browser** (deep blue, hexagon): GCU browser automation / sub-agent \
 delegation. At build time, browser nodes are dissolved into the parent \
 node's sub_agents list. Use for any GCU or sub-agent leaf node.
 Auto-detection works well for most cases: first node → start, nodes with \
 no outgoing edges → terminal, nodes with multiple conditional outgoing \
-edges → decision, GCU nodes → browser, nodes mentioning "database" → \
+edges → decision, browser tool nodes → browser, nodes mentioning "database" → \
 database, nodes mentioning "report/document" → document, I/O tools like \
 send_email → io. Everything else defaults to process. Set flowchart_type \
 explicitly only when auto-detection would be wrong.
@@ -354,48 +354,19 @@ gather → [Valid data?] →Yes→ transform → deliver
 In the draft: the `[Valid data?]` node has `flowchart_type: "decision"`, \
 `decision_clause: "Data passes validation checks?"`, with labeled yes/no edges.
-## Sub-Agent Nodes — Planning-Only Delegation
+## Browser Automation Nodes
-Sub-agent nodes (dark teal subroutines) are **planning-only** visual elements \
+Browser nodes are regular `event_loop` nodes with browser tools \
-that show which nodes delegate to sub-agents. At `confirm_and_build()`, \
+(from the gcu-tools MCP server) in their tool list. They are wired \
-sub-agent nodes are **dissolved** into their parent node:
+into the graph with edges like any other node:
 - The sub-agent node's ID is added to the predecessor's `sub_agents` list
 - The sub-agent node and its connecting edge are removed
 - At runtime, the parent node can invoke the sub-agent via `delegate_to_sub_agent`
 **Rules for sub-agent nodes (INCLUDING GCU nodes):**
 - GCU nodes are auto-detected as `flowchart_type: "browser"` (hexagon)
 - Connect from the managing parent node to the sub-agent node
 - Sub-agent nodes must be **leaf nodes** — NO outgoing edges to other nodes
 - At build time, browser/GCU nodes are dissolved into the parent's \
 `sub_agents` list, just like decision nodes are dissolved into criteria
 **CRITICAL: GCU nodes (`node_type: "gcu"`) are ALWAYS sub-agents.** \
 They MUST NOT appear in the linear flow. NEVER chain GCU nodes \
 sequentially (A → gcu1 → gcu2 → B is WRONG). Instead, attach them \
 as leaves to the parent that orchestrates them:
 ```
-WRONG:  intake → gcu_find_prospect → gcu_scan_mutuals → check_results
+research → browser_scan → analyze_results
 WRONG:  decision_node → gcu_node (as a yes/no branch)
 RIGHT:  intake (sub_agents: [gcu_find, gcu_scan]) → check_results
 ```
-The parent node delegates to its GCU sub-agents and collects results. \
+Use `tools: {policy: "all"}` to give browser nodes access to all \
-The main flow continues from the parent, not from the GCU node. \
+browser tools, or list specific ones with `policy: "explicit"`.
 GCU nodes MUST NOT be children of decision nodes — decision nodes \
 dissolve at build time, which would leave the GCU as a dangling \
 workflow step.
-**How to show delegation in the flowchart:**
+If the worker agent starts from some initial input it is okay. \
-```
+The queen(you) owns intake: you gather user requirements, then call \
 research → (deep_searcher)   ← browser/GCU node, leaf
 research → [Enough results?] ← decision node
 ```
 After dissolution: `research` node gets `sub_agents: ["deep_searcher"]` \
 and `success_criteria: "Enough results?"`.
 If the worker agent start from some initial input it is okay. \
 The queen(you) owns intake: you gathers user requirements, then calls \
 `run_agent_with_input(task)` with a structured task description. \
 When building the agent, design the entry node's `input_keys` to \
 match what the queen will provide at run time. Worker nodes should \
@@ -411,14 +382,14 @@ You MUST get explicit user approval before ANY code is generated.
 2. **WAIT for user response.** Do NOT proceed without it.
 3. Handle the response:
   - If **Approve / Proceed**: Call confirm_and_build(), then \
-   initialize_and_build_agent(agent_name, nodes)
+   confirm_and_build(agent_name)
   - If **Adjust scope**: Discuss changes, update the draft with \
   save_agent_draft() again, and re-ask
   - If **More questions**: Answer them honestly, then ask again
   - If **Reconsider**: Discuss alternatives. If they decide to proceed, \
   that's their informed choice
-**NEVER call initialize_and_build_agent without first calling \
+**NEVER call confirm_and_build without first calling \
 confirm_and_build().** The system will block the transition if you try.
 """
@@ -477,53 +448,75 @@ When a user says "my agent is failing" or "debug this agent":
 ## 5. Implement
 **You should only reach this step after the user has approved the draft design \
-in the planning phase. The draft metadata will pre-populate descriptions, \
+and you have called `confirm_and_build(agent_name="my_agent")`.**
 goals, success criteria, and node metadata in the generated files.**
-Call `initialize_and_build_agent(agent_name, nodes)` to generate all package \
+`confirm_and_build` created the agent directory (returned in agent_path). \
-files. The agent_name must be snake_case (e.g., "my_agent"). Pass node names \
+Now write the complete agent config directly:
 as comma-separated string (e.g., "gather,process,review").
 The tool creates: config.py, nodes/__init__.py, agent.py, \
 __init__.py, __main__.py, mcp_servers.json, tests/conftest.py.
-The generated files are **structurally complete** with correct imports, \
+```
-class definition, `validate()` method, `default_agent` export, and \
+write_file("<colony_path>/agent.json", <complete JSON config>)
-`__init__.py` re-exports. They pass validation as-is.
+```
-`mcp_servers.json` is auto-generated with hive-tools as the default. \
+The agent.json must include ALL of these in one write:
-Do NOT manually create or overwrite `mcp_servers.json`.
+- `name`, `version`, `description`
 - `goal` with `description`, `success_criteria`, `constraints`
 - `identity_prompt` (agent-level behavior)
 - `nodes` — each with `id`, `description`, `system_prompt`, `tools`, \
 `input_keys`, `output_keys`, `success_criteria`
 - `edges` — connecting all nodes with proper conditions
 - `entry_node`, `terminal_nodes`
 - `mcp_servers` — REQUIRED. Always include all three: \
 `[{"name": "hive-tools"}, {"name": "gcu-tools"}, {"name": "files-tools"}]`
 - `loop_config` — `max_iterations`, `max_context_tokens`
-### Customizing generated files
+**Write the COMPLETE config in one `write_file` call. No TODOs, no placeholders.** \
 The queen writes final production-ready system prompts directly.
-**CRITICAL: Use `edit_file` to customize TODO placeholders. \
+**There are NO Python files.** The framework loads agent.json directly.
 NEVER use `write_file` to rewrite generated files from scratch. \
 Rewriting breaks imports, class structure, and causes validation failures.**
-Safe to edit with `edit_file`:
+MCP servers are loaded from the global registry by name. Available servers:
- System prompts, tools, input_keys, output_keys, success_criteria in \
+- `hive-tools` — web search, email, CRM, calendar, 100+ integrations
-nodes/__init__.py
+- `gcu-tools` — browser automation (click, type, navigate, screenshot)
- Goal description, success criteria values, constraint values, edge \
+- `files-tools` — file I/O (read, write, edit, search, list)
 definitions, identity_prompt in agent.py
 - CLI options in __main__.py
 - For triggers (timers/webhooks), add entries to triggers.json in the \
 agent's export directory
-Do NOT modify or rewrite:
+**Template variables:** Add a `variables:` section at the top of agent.json \
- Import statements at top of agent.py (they are correct)
+and use `{{variable_name}}` in system prompts for config injection:
- The agent class definition, `validate()`, `_build_graph()`, `_setup()`, \
+```yaml
-or lifecycle methods (start/stop/run)
+variables:
- `__init__.py` exports (all required variables are already re-exported)
+  spreadsheet_id: "1ZVx..."
- `default_agent = ClassName()` at bottom of agent.py
+nodes:
  - id: start
    system_prompt: |
      Use spreadsheet: {{spreadsheet_id}}
 ```
 ### Tool access in nodes
 Each node declares its tool access policy:
 ```yaml
 # Explicit list (recommended)
 tools:
  policy: explicit
  allowed: [web_search, write_file]
 # All tools (for browser automation nodes)
 tools:
  policy: all
 # No tools (for handoff/summary nodes)
 tools:
  policy: none
 ```
 ## 6. Verify and Load
 Call `validate_agent_package("{name}")` after initialization. \
 It runs structural checks (class validation, graph validation, tool \
 validation, tests) and returns a consolidated result. If anything \
-fails: read the error, fix with edit_file, re-validate. Up to 3x.
+fails: read the error, fix with read_file+write_file, re-validate. Up to 3x.
 When validation passes, immediately call \
-`load_built_agent("exports/{name}")` to load the agent into the \
+`load_built_agent("<agent_path>")` to load the agent into the \
 session. This switches to STAGING phase and shows the graph in the \
 visualizer. Do NOT wait for user input between validation and loading.
 """
@@ -625,13 +618,11 @@ document, database, subprocess, etc.) with unique shapes and colors. Set \
 flowchart_type on a node to override. Nodes need only an id. \
 Use decision nodes (flowchart_type: "decision", with decision_clause and \
 labeled yes/no edges) to make conditional branching explicit. \
 GCU/sub-agent nodes (node_type: "gcu") are auto-detected as browser \
 hexagons — connect them as leaf nodes to their parent.
 - confirm_and_build() — Record user confirmation of the draft. Dissolves \
 planning-only nodes (decision → predecessor criteria; browser/GCU → \
 predecessor sub_agents list). Call this ONLY after the user explicitly \
 approves via ask_user.
- initialize_and_build_agent(agent_name?, nodes?) — Scaffold the agent package \
+- confirm_and_build(agent_name) — Scaffold the agent package \
 and transition to BUILDING phase. For new agents, this REQUIRES \
 save_agent_draft() + confirm_and_build() first. The draft metadata is used to \
 pre-populate the generated files. Without agent_name: transition to BUILDING \
@@ -647,8 +638,8 @@ phase. Only use this when the user explicitly asks to work with an existing agen
 2. Call save_agent_draft() to create visual draft → present to user
 3. Call ask_user() to get explicit approval
 4. Call confirm_and_build() to record approval
-5. Call initialize_and_build_agent() to scaffold and start building
+5. Call confirm_and_build() to scaffold and start building
-For diagnosis of existing agents, call initialize_and_build_agent() \
+For diagnosis of existing agents, call confirm_and_build() \
 (no args) after agreeing on a fix plan with the user.
 """
@@ -884,7 +875,7 @@ that changes the structure, call save_agent_draft() again so they see the \
 update in real-time. The flowchart is a live collaboration tool.
 8. When the design is stable, use ask_user to get explicit approval
 9. Call confirm_and_build() after the user approves
-10. Call initialize_and_build_agent(agent_name, nodes) to scaffold and start building
+10. Call confirm_and_build(agent_name) to scaffold and start building
 **The flowchart is your shared whiteboard.** Don't describe changes in text \
 and then ask "should I update the draft?" — just update it. If the user says \
@@ -895,7 +886,7 @@ see every structural change reflected in the visualizer as you discuss it.
 **CRITICAL: Planning → Building boundary.** You MUST get explicit user \
 confirmation before moving to building. The sequence is:
  save_agent_draft() → iterate with user → ask_user() → confirm_and_build() → \
-  initialize_and_build_agent()
+  confirm_and_build()
 Skipping any of these steps will be blocked by the system.
 Remember: DO NOT write or edit any files yet. This is a read-only exploration \
@@ -911,7 +902,7 @@ your priority is diagnosis, not new design:
 2. Summarize the root cause to the user
 3. Propose a fix plan (what to change, what behavior to adjust)
 4. Get user approval via ask_user
-5. Call initialize_and_build_agent() (no args) to transition to building and implement the fix
+5. Call confirm_and_build() (no args) to transition to building and implement the fix
 Do NOT start the full discovery workflow (tool discovery, gap analysis) in \
 diagnosis mode — you already have a built agent, you just need to fix it.
@@ -947,7 +938,7 @@ delegate agent construction to the worker, even as a "research" subtask.
 ## Keeping the flowchart in sync during building
 When you make structural changes to the agent (add/remove/rename nodes, \
-change edges, modify sub-agent assignments), call save_agent_draft() to \
+change edges, modify node connections), call save_agent_draft() to \
 update the flowchart. During building, this auto-dissolves planning-only \
 nodes without needing user re-confirmation. The user sees the updated \
 flowchart immediately.
@@ -966,15 +957,15 @@ user says "replan", "go back", "let's redesign", "change the approach", \
 ## CRITICAL — Graph topology errors require replanning, not code edits
-If you discover that the agent graph has structural problems — GCU nodes \
+If you discover that the agent graph has structural problems — browser nodes \
 in the linear flow, missing edges, wrong node connections, incorrect \
-sub-agent assignments — you MUST call replan_agent() and fix the draft. \
+node connections — you MUST call replan_agent() and fix the draft. \
-Do NOT attempt to fix topology by editing agent.py directly. The graph \
+Do NOT attempt to fix topology by editing agent.json directly. The graph \
 structure is defined by the draft → dissolution → code-gen pipeline. \
-Editing code to rewire nodes bypasses the flowchart and creates drift \
+Editing the config to rewire nodes bypasses the flowchart and creates drift \
-between what the user sees and what the code does.
+between what the user sees and what the config does.
-**WRONG:** "Let me fix agent.py to remove GCU nodes from edges..."
+**WRONG:** "Let me fix agent.json to remove browser nodes from edges..."
 **RIGHT:** Call replan_agent(), fix the draft with save_agent_draft(), \
 get user approval, then confirm_and_build() → the corrected code is \
 generated automatically.
@@ -1100,18 +1091,15 @@ You wake up when:
 If the user asks for progress, call get_graph_status() ONCE and report. \
 If the summary mentions issues, follow up with get_graph_status(focus="issues").
-## Subagent delegations (browser automation, GCU)
+## Browser automation nodes
-When the worker delegates to a subagent (e.g., GCU browser automation), expect it \
+Browser nodes may take 2-5 minutes for web scraping tasks. During this time:
-to take 2-5 minutes. During this time:
+- Progress will show 0% until the node calls set_output at the end.
- Progress will show 0% — this is NORMAL. The subagent only calls set_output at the end.
+- Check get_graph_status(focus="full") for activity updates.
- Check get_graph_status(focus="full") for "subagent_activity" — this shows the \
+- Do NOT conclude it is stuck just because you see repeated \
-subagent's latest reasoning text and confirms it is making real progress.
+browser_click/browser_snapshot calls — that is expected for web scraping.
- Do NOT conclude the subagent is stuck just because progress is 0% or because \
+- Only intervene if: the node has been running for 5+ minutes with no new \
-you see repeated browser_click/browser_snapshot calls — that is the expected \
+activity updates, OR the judge escalates.
 pattern for web scraping.
 - Only intervene if: the subagent has been running for 5+ minutes with no new \
 subagent_activity updates, OR the judge escalates.
 ## Handling worker termination ([WORKER_TERMINAL])
@@ -1143,11 +1131,11 @@ escalations. If the user gave you instructions (e.g., "just retry on errors", \
 CRITICAL — escalation relay protocol:
 When an escalation requires user input (auth blocks, human review), the worker \
-or its subagent is BLOCKED and waiting for your response. You MUST follow this \
+or is BLOCKED and waiting for your response. You MUST follow this \
 exact two-step sequence:
  Step 1: call ask_user() to get the user's answer.
  Step 2: call inject_message() with the user's answer IMMEDIATELY after.
-If you skip Step 2, the worker/subagent stays blocked FOREVER and the task hangs. \
+If you skip Step 2, the worker stays blocked FOREVER and the task hangs. \
 NEVER respond to the user without also calling inject_message() to unblock \
 the worker. Even if the user says "skip" or "cancel", you must still relay that \
 decision via inject_message() so the worker can clean up.
@@ -1233,7 +1221,7 @@ _queen_tools_docs = (
    + "\n\n### Phase transitions\n"
    "- save_agent_draft(...) → creates visual-only draft graph (stays in PLANNING)\n"
    "- confirm_and_build() → records user approval of draft (stays in PLANNING)\n"
-    "- initialize_and_build_agent(agent_name?, nodes?) → scaffolds package + switches to "
+    "- confirm_and_build(agent_name) → scaffolds package + switches to "
    "BUILDING (requires draft + confirmation for new agents)\n"
    "- replan_agent() → switches back to PLANNING phase (only when user explicitly requests)\n"
    "- load_built_agent(path) → switches to STAGING phase\n"
@@ -1,9 +1,15 @@
 """Queen global memory helpers.
-Global memory lives in ``~/.hive/queen/global_memory/`` and stores durable
+Memory hierarchy::
-cross-session knowledge about the user (profile, preferences, environment,
+
-feedback).  Each memory is an individual ``.md`` file with optional YAML
+    ~/.hive/memories/
-frontmatter (name, type, description).
+        global/              # shared across all queens and colonies
        colonies/{name}/     # colony-scoped memories
        agents/queens/{name}/ # queen-specific memories
        agents/{name}/       # per-worker-agent memories
 Each memory is an individual ``.md`` file with optional YAML frontmatter
 (name, type, description).
 """
 from __future__ import annotations
@@ -21,7 +27,7 @@ logger = logging.getLogger(__name__)
 GLOBAL_MEMORY_CATEGORIES: tuple[str, ...] = ("profile", "preference", "environment", "feedback")
-_HIVE_QUEEN_DIR = Path.home() / ".hive" / "queen"
+from framework.config import MEMORIES_DIR
 MAX_FILES: int = 200
 MAX_FILE_SIZE_BYTES: int = 4096  # 4 KB hard limit per memory file
@@ -31,8 +37,23 @@ _HEADER_LINE_LIMIT: int = 30
 def global_memory_dir() -> Path:
-    """Return the queen-global memory directory."""
+    """Return the global memory directory (shared across all queens/colonies)."""
-    return _HIVE_QUEEN_DIR / "global_memory"
+    return MEMORIES_DIR / "global"
 def colony_memory_dir(colony_name: str) -> Path:
    """Return the memory directory for a named colony."""
    return MEMORIES_DIR / "colonies" / colony_name
 def queen_memory_dir(queen_name: str = "default") -> Path:
    """Return the memory directory for a named queen."""
    return MEMORIES_DIR / "agents" / "queens" / queen_name
 def agent_memory_dir(agent_name: str) -> Path:
    """Return the memory directory for a worker agent."""
    return MEMORIES_DIR / "agents" / agent_name
 # ---------------------------------------------------------------------------
@@ -91,7 +91,19 @@ async def select_memories(
                resp.stop_reason,
            )
            return []
-        data = json.loads(raw)
+        # Some models wrap JSON in markdown fences or add preamble text.
        # Try to extract the JSON object if raw parse fails.
        try:
            data = json.loads(raw)
        except json.JSONDecodeError:
            import re
            m = re.search(r"\{.*\}", raw, re.DOTALL)
            if m:
                data = json.loads(m.group())
            else:
                logger.warning("recall: LLM returned non-JSON: %.200s", raw)
                return []
        selected = data.get("selected_memories", [])
        valid_names = {f.filename for f in files}
        result = [s for s in selected if s in valid_names][:max_results]
@@ -25,10 +25,7 @@
 14. **Forgetting sys.path setup in conftest.py** — Tests need `exports/` and `core/` on sys.path.
 ## GCU Errors
-15. **Manually wiring browser tools on event_loop nodes** — Use `node_type="gcu"` which auto-includes browser tools. Do NOT manually list browser tool names.
+15. **Manually wiring browser tools on event_loop nodes** — Browser nodes use tools: {policy: "all"} to get all browser tools.
 16. **Using GCU nodes as regular graph nodes** — GCU nodes are subagents only. They must ONLY appear in `sub_agents=["gcu-node-id"]` and be invoked via `delegate_to_sub_agent()`. Never connect via edges or use as entry/terminal nodes.
 17. **Reusing the same GCU node ID for parallel tasks** — Each concurrent browser task needs a distinct GCU node ID (e.g. `gcu-site-a`, `gcu-site-b`). Two `delegate_to_sub_agent` calls with the same `agent_id` share a browser profile and will interfere with each other's pages.
 18. **Passing `profile=` in GCU tool calls** — Profile isolation for parallel subagents is automatic. The framework injects a unique profile per subagent via an asyncio `ContextVar`. Hardcoding `profile="default"` in a GCU system prompt breaks this isolation.
 ## Worker Agent Errors
 19. **Adding client-facing intake node to workers** — The queen owns intake. Workers should start with an autonomous processing node. Route worker review/approval through queen escalation instead of direct worker HITL.
@@ -0,0 +1,227 @@
 # Declarative Agent File Templates
 Agents are defined as a single `agent.yaml` file. No Python code needed.
 The runner loads this file directly -- no `agent.py`, `config.py`, or
 `nodes/__init__.py` required.
 ## agent.yaml -- Complete Agent Definition
 ```yaml
 name: my-agent
 version: 1.0.0
 description: What this agent does.
 metadata:
  intro_message: Welcome! What would you like me to do?
 # Template variables -- substituted into system_prompt and identity_prompt
 # via {{variable_name}} syntax.  Use this for config values that appear
 # in prompts (spreadsheet IDs, API endpoints, account names, etc.)
 variables:
  spreadsheet_id: "1ZVxWDL..."
  sheet_name: "contacts"
 goal:
  description: What this agent achieves.
  success_criteria:
    - "First success criterion"
    - "Second success criterion"
  constraints:
    - "Hard constraint the agent must respect"
 identity_prompt: |
  You are a helpful agent.
 conversation_mode: continuous   # always "continuous" for Hive agents
 loop_config:
  max_iterations: 100
  max_tool_calls_per_turn: 30
  max_context_tokens: 32000
 # MCP servers to connect (resolved by name from ~/.hive/mcp_registry/)
 mcp_servers:
  - name: hive-tools
  - name: gcu-tools
 nodes:
  # Node 1: Process (autonomous entry node)
  # The queen handles intake and passes structured input via
  # run_agent_with_input(task). NO client-facing intake node.
  - id: process
    name: Process
    description: Execute the task using available tools
    max_node_visits: 0   # 0 = unlimited (forever-alive agents)
    input_keys: [user_request, feedback]
    output_keys: [results]
    nullable_output_keys: [feedback]
    tools:
      policy: explicit
      allowed: [web_search, web_scrape, save_data, load_data, list_data_files]
    success_criteria: Results are complete and accurate.
    system_prompt: |
      You are a processing agent. Your task is in memory under "user_request".
      If "feedback" is present, this is a revision.
      Work in phases:
      1. Use tools to gather/process data
      2. Analyze results
      3. Call set_output in a SEPARATE turn:
         - set_output("results", "structured results")
  # Node 2: Handoff (autonomous)
  - id: handoff
    name: Handoff
    description: Prepare worker results for queen review
    max_node_visits: 0
    input_keys: [results, user_request]
    output_keys: [next_action, feedback, worker_summary]
    nullable_output_keys: [feedback, worker_summary]
    tools:
      policy: none   # handoff nodes don't need tools
    success_criteria: Results are packaged for queen decision-making.
    system_prompt: |
      Do NOT talk to the user directly. The queen is the only user interface.
      If blocked, call escalate(reason, context) then set:
      - set_output("next_action", "escalated")
      - set_output("feedback", "what help is needed")
      Otherwise summarize and set:
      - set_output("worker_summary", "short summary for queen")
      - set_output("next_action", "done") or "revise"
      - set_output("feedback", "what to revise") only when revising
 edges:
  - from_node: process
    to_node: handoff
  # Feedback loop
  - from_node: handoff
    to_node: process
    condition: conditional
    condition_expr: "str(next_action).lower() == 'revise'"
    priority: 2
  # Escalation loop
  - from_node: handoff
    to_node: process
    condition: conditional
    condition_expr: "str(next_action).lower() == 'escalated'"
    priority: 3
  # Loop back for next task
  - from_node: handoff
    to_node: process
    condition: conditional
    condition_expr: "str(next_action).lower() == 'done'"
 entry_node: process
 terminal_nodes: []   # [] = forever-alive
 ```
 ## Key differences from Python templates
 | Before (Python)                     | After (YAML)                           |
 |-------------------------------------|----------------------------------------|
 | `agent.py` (250 lines boilerplate)  | Not needed                             |
 | `config.py` (dataclass + metadata)  | `variables:` + `metadata:` in YAML     |
 | `nodes/__init__.py` (NodeSpec calls)| `nodes:` list in YAML                  |
 | `__init__.py`, `__main__.py`        | Not needed                             |
 | f-string config injection           | `{{variable_name}}` templates          |
 | `mcp_servers.json` (separate file)  | `mcp_servers:` in YAML (or keep file)  |
 ## Node types
 | Type         | Description                           | Tools                    |
 |--------------|---------------------------------------|--------------------------|
 | `event_loop` | LLM-driven orchestration (default)    | Explicit list or `none`  |
 | `gcu`        | Browser automation via GCU tools      | `policy: all` (auto)     |
 ## Tool access policies
 ```yaml
 # Explicit list (recommended for most nodes)
 tools:
  policy: explicit
  allowed: [web_search, save_data]
 # All tools (for browser automation nodes)
 tools:
  policy: all
 # No tools (for handoff/summary nodes)
 tools:
  policy: none
 ```
 ## Edge conditions
 | Condition     | When to use                                           |
 |---------------|-------------------------------------------------------|
 | `on_success`  | Default. Next node after current succeeds.            |
 | `on_failure`  | Fallback path when current node fails.                |
 | `always`      | Always traverse regardless of outcome.                |
 | `conditional` | Evaluate `condition_expr` against shared memory keys. |
 | `llm_decide`  | Let the LLM decide at runtime.                        |
 ## Template variables
 Use `{{variable_name}}` in `system_prompt` and `identity_prompt`.
 Variables are defined in the top-level `variables:` map.
 ```yaml
 variables:
  spreadsheet_id: "1ZVxWDL..."
  api_endpoint: "https://api.example.com"
 nodes:
  - id: start
    system_prompt: |
      Connect to spreadsheet: {{spreadsheet_id}}
      API endpoint: {{api_endpoint}}
 ```
 ## Entry points
 Default is a single manual entry point. For timer/scheduled triggers:
 ```yaml
 entry_points:
  - id: default
    trigger_type: manual
  - id: daily-check
    trigger_type: timer
    trigger_config:
      interval_minutes: 30
 ```
 ## mcp_servers.json -- Still Supported
 The `mcp_servers.json` file is still loaded automatically if present alongside
 `agent.yaml`.  You can also inline servers in the YAML:
 ```yaml
 mcp_servers:
  - name: hive-tools
  - name: gcu-tools
 ```
 Both approaches work. The JSON file takes precedence for backward compatibility.
 ## Migration from Python agents
 Run the migration tool to convert existing agents:
 ```bash
 uv run python -m framework.tools.migrate_agent exports/my_agent
 ```
 This generates `agent.yaml` from the existing `agent.py` + `nodes/` + `config.py`.
 The original files are left untouched. Once verified, you can delete the Python files.
 ## Files after migration
 ```
 my_agent/
  agent.yaml           # The only required file
  mcp_servers.json     # Optional (can inline in YAML)
  flowchart.json       # Optional (auto-generated)
 ```
@@ -1,306 +1,193 @@
-# Hive Agent Framework — Condensed Reference
+# Hive Agent Framework -- Condensed Reference
 ## Architecture
-Agents are Python packages in `exports/`:
+Agents are declarative JSON configs in `exports/`:
 ```
 exports/my_agent/
-├── __init__.py          # MUST re-export ALL module-level vars from agent.py
+  agent.json          # The entire agent definition
-├── __main__.py          # CLI (run, tui, info, validate, shell)
+  mcp_servers.json    # MCP tool server config (optional, prefer registry refs)
 ├── agent.py             # Graph construction (goal, edges, agent class)
 ├── config.py            # Runtime config
 ├── nodes/__init__.py    # Node definitions (NodeSpec)
 ├── mcp_servers.json     # MCP tool server config
 └── tests/               # pytest tests
 ```
-## Agent Loading Contract
+No Python files. No `__init__.py`, `__main__.py`, `config.py`, or `nodes/`.
-`AgentRunner.load()` imports the package (`__init__.py`) and reads these
+## Agent Loading
 module-level variables via `getattr()`:
-| Variable | Required | Default if missing | Consequence |
+`AgentLoader.load()` reads `agent.json` and builds the execution graph.
-|----------|----------|--------------------|-------------|
+If `agent.py` exists (legacy), it's loaded as a Python module instead.
 | `goal` | YES | `None` | **FATAL** — "must define goal, nodes, edges" |
 | `nodes` | YES | `None` | **FATAL** — same error |
 | `edges` | YES | `None` | **FATAL** — same error |
 | `entry_node` | no | `nodes[0].id` | Probably wrong node |
 | `entry_points` | no | `{}` | **Nodes unreachable** — validation fails |
 | `terminal_nodes` | **YES** | `[]` | **FATAL** — graph must have at least one terminal node |
 | `pause_nodes` | no | `[]` | OK |
 | `conversation_mode` | no | not passed | Isolated mode (no context carryover) |
 | `identity_prompt` | no | not passed | No agent-level identity |
 | `loop_config` | no | `{}` | No iteration limits |
 | `triggers.json` (file) | no | not present | No triggers (timers, webhooks) |
-**CRITICAL:** `__init__.py` MUST import and re-export ALL of these from
+## agent.json Schema
 `agent.py`. Missing exports silently fall back to defaults, causing
 hard-to-debug failures.
-**Why `default_agent.validate()` is NOT sufficient:**
+```json
-`validate()` checks the agent CLASS's internal graph (self.nodes, self.edges).
+{
-These are always correct because the constructor references agent.py's module
+  "name": "my-agent",
-vars directly. But `AgentRunner.load()` reads from the PACKAGE (`__init__.py`),
+  "version": "1.0.0",
-not the class. So `validate()` passes while `AgentRunner.load()` fails.
+  "description": "What this agent does",
-Always test with `AgentRunner.load("exports/{name}")` — this is the same
+  "goal": {
-code path the TUI and `hive run` use.
+    "description": "What to achieve",
-
+    "success_criteria": ["criterion 1", "criterion 2"],
-## Goal
+    "constraints": ["constraint 1"]
-
+  },
-Defines success criteria and constraints:
+  "identity_prompt": "You are a helpful agent.",
-```python
+  "conversation_mode": "continuous",
-goal = Goal(
+  "loop_config": {
-    id="kebab-case-id",
+    "max_iterations": 100,
-    name="Display Name",
+    "max_tool_calls_per_turn": 30,
-    description="What the agent does",
+    "max_context_tokens": 32000
-    success_criteria=[
+  },
-        SuccessCriterion(id="sc-id", description="...", metric="...", target="...", weight=0.25),
+  "mcp_servers": [
-    ],
+    {"name": "hive-tools"},
-    constraints=[
+    {"name": "gcu-tools"}
-        Constraint(id="c-id", description="...", constraint_type="hard", category="quality"),
+  ],
-    ],
+  "variables": {
-)
+    "spreadsheet_id": "1ZVx..."
  },
  "nodes": [...],
  "edges": [...],
  "entry_node": "process",
  "terminal_nodes": []
 }
 ```
 - 3-5 success criteria, weights sum to 1.0
 - 1-5 constraints (hard/soft, categories: quality, accuracy, interaction, functional)
-## NodeSpec Fields
+## Template Variables
 Use `{{variable_name}}` in `system_prompt` and `identity_prompt`. Variables
 are defined in the top-level `variables` object:
 ```json
 {
  "variables": {"sheet_id": "1ZVx..."},
  "nodes": [{
    "id": "start",
    "system_prompt": "Use sheet: {{sheet_id}}"
  }]
 }
 ```
 ## Node Fields
 | Field | Type | Default | Description |
 |-------|------|---------|-------------|
 | id | str | required | kebab-case identifier |
-| name | str | required | Display name |
+| name | str | id | Display name |
 | description | str | required | What the node does |
-| node_type | str | required | `"event_loop"` or `"gcu"` (browser automation — see GCU Guide appendix) |
+| node_type | str | "event_loop" | `"event_loop"` |
-| input_keys | list[str] | required | Memory keys this node reads |
+| input_keys | list | [] | Memory keys this node reads |
-| output_keys | list[str] | required | Memory keys this node writes via set_output |
+| output_keys | list | [] | Memory keys this node writes via set_output |
 | system_prompt | str | "" | LLM instructions |
-| tools | list[str] | [] | Tool names from MCP servers |
+| tools | object | {} | Tool access policy (see below) |
-| client_facing | bool | False | Deprecated compatibility field. Queen interactivity is implicit; workers should escalate instead |
+| nullable_output_keys | list | [] | Keys that may remain unset |
-| nullable_output_keys | list[str] | [] | Keys that may remain unset |
+| max_node_visits | int | 1 | 0=unlimited (for forever-alive agents) |
 | max_node_visits | int | 0 | 0=unlimited (default); >1 for one-shot feedback loops |
 | max_retries | int | 3 | Retries on failure |
 | success_criteria | str | "" | Natural language for judge evaluation |
 | client_facing | bool | false | Whether output is shown to user |
-## EdgeSpec Fields
+## Tool Access Policies
 Each node declares its tools via a policy object:
 ```json
 {"tools": {"policy": "explicit", "allowed": ["web_search", "save_data"]}}
 {"tools": {"policy": "all"}}
 {"tools": {"policy": "none"}}
 ```
 - `explicit` (default): only named tools. Empty `allowed` = zero tools.
 - `all`: all tools from registry (e.g. for browser automation nodes).
 - `none`: no tools (for handoff/summary nodes).
 ## Edge Fields
 | Field | Type | Description |
 |-------|------|-------------|
-| id | str | kebab-case identifier |
+| from_node | str | Source node ID |
-| source | str | Source node ID |
+| to_node | str | Target node ID |
-| target | str | Target node ID |
+| condition | str | `on_success`, `on_failure`, `always`, `conditional` |
-| condition | EdgeCondition | ON_SUCCESS, ON_FAILURE, ALWAYS, CONDITIONAL |
+| condition_expr | str | Python expression for conditional routing |
-| condition_expr | str | Python expression evaluated against memory (for CONDITIONAL) |
+| priority | int | Higher = evaluated first |
-| priority | int | Positive=forward (evaluated first), negative=feedback (loop-back) |
+
 condition_expr examples:
 - `"needs_more_research == True"`
 - `"str(next_action).lower() == 'revise'"`
 ## Key Patterns
 ### STEP 1/STEP 2 (Client-Facing Nodes)
 ```
 **STEP 1 — Respond to the user (text only, NO tool calls):**
 [Present information, ask questions]
 **STEP 2 — After the user responds, call set_output:**
 - set_output("key", "value based on user response")
 ```
 This prevents premature set_output before user interaction.
 ### Fewer, Richer Nodes (CRITICAL)
-**Hard limit: 3-6 nodes for most agents.** Never exceed 6 unless the user
+**Hard limit: 3-6 nodes for most agents.** Each node boundary serializes
-explicitly requests a complex multi-phase pipeline.
+outputs and destroys in-context information. Merge unless:
 1. Client-facing boundary (different interaction models)
 2. Disjoint tool sets
 3. Parallel execution (fan-out branches)
-Each node boundary serializes outputs to the shared buffer and **destroys** all
+**Typical structure (2 nodes):**
 in-context information: tool call results, intermediate reasoning, conversation
 history. A research node that searches, fetches, and analyzes in ONE node keeps
 all source material in its conversation context. Split across 3 nodes, each
 downstream node only sees the serialized summary string.
 **Decision framework — merge unless ANY of these apply:**
 1. **Client-facing boundary** — Autonomous and client-facing work MUST be
   separate nodes (different interaction models)
 2. **Disjoint tool sets** — If tools are fundamentally different (e.g., web
   search vs database), separate nodes make sense
 3. **Parallel execution** — Fan-out branches must be separate nodes
 **Red flags that you have too many nodes:**
 - A node with 0 tools (pure LLM reasoning) → merge into predecessor/successor
 - A node that sets only 1 trivial output → collapse into predecessor
 - Multiple consecutive autonomous nodes → combine into one rich node
 - A "report" node that presents analysis → merge into the client-facing node
 - A "confirm" or "schedule" node that doesn't call any external service → remove
 **Typical agent structure (2 nodes):**
 ```
-process (autonomous) ←→ review (queen-mediated)
+process (autonomous) <-> review (queen-mediated)
 ```
 The queen owns intake — she gathers requirements from the user, then
 passes structured input via `run_agent_with_input(task)`. When building
 the agent, design the entry node's `input_keys` to match what the queen
 will provide at run time. Worker agents should NOT have a client-facing
 intake node. Mid-execution review/approval should happen through queen
 escalation rather than direct worker HITL.
 For simpler agents, just 1 autonomous node:
 ```
 process (autonomous) — loops back to itself
 ```
-### nullable_output_keys
+The queen owns intake. Worker agents should NOT have a client-facing intake
-For inputs that only arrive on certain edges:
+node. Mid-execution review should happen through queen escalation.
 ```python
 research_node = NodeSpec(
    input_keys=["brief", "feedback"],
    nullable_output_keys=["feedback"],  # Only present on feedback edge
    max_node_visits=3,
 )
 ```
 ### Mutually Exclusive Outputs
 For routing decisions:
 ```python
 review_node = NodeSpec(
    output_keys=["approved", "feedback"],
    nullable_output_keys=["approved", "feedback"],  # Node sets one or the other
 )
 ```
 ### Continuous Loop Pattern
 Mark the primary event_loop node as terminal: `terminal_nodes=["process"]`.
 The node has `output_keys` and can complete when the agent finishes its work.
 Use `conversation_mode="continuous"` to preserve context across transitions.
 ### set_output
 - Synthetic tool injected by framework
 - Call separately from real tool calls (separate turn)
 - `set_output("key", "value")` stores to the shared buffer
-## Edge Conditions
+### Graph Lifecycle
 | Condition | When |
 |-----------|------|
 | ON_SUCCESS | Node completed successfully |
 | ON_FAILURE | Node failed |
 | ALWAYS | Unconditional |
 | CONDITIONAL | condition_expr evaluates to True against memory |
 condition_expr examples:
 - `"needs_more_research == True"`
 - `"str(next_action).lower() == 'new_agent'"`
 - `"feedback is not None"`
 ## Graph Lifecycle
 | Pattern | terminal_nodes | When |
 |---------|---------------|------|
-| **Continuous loop** | `["node-with-output-keys"]` | **DEFAULT for all agents** |
+| Continuous loop | `["node-with-output-keys"]` | DEFAULT for all agents |
 | Linear | `["last-node"]` | One-shot/batch agents |
-**Every graph must have at least one terminal node.** Terminal nodes
+Every graph must have at least one terminal node.
 define where execution ends. For interactive agents that loop continuously,
 mark the primary event_loop node as terminal (it has `output_keys` and can
 complete at any point). The framework default for `max_node_visits` is 0
 (unbounded), so nodes work correctly in continuous loops without explicit
 override. Only set `max_node_visits > 0` in one-shot agents with feedback loops.
 Every node must have at least one outgoing edge — no dead ends.
-## Continuous Conversation Mode
+### Continuous Conversation Mode
 `conversation_mode` has ONLY two valid states:
- `"continuous"` — recommended for interactive agents
+- `"continuous"` -- recommended (context carries across node transitions)
- Omit entirely — isolated per-node conversations (each node starts fresh)
+- Omit entirely -- isolated per-node conversations
-**INVALID values** (do NOT use): `"client_facing"`, `"interactive"`,
+**INVALID values:** `"client_facing"`, `"interactive"`, `"shared"`.
 `"adaptive"`, `"shared"`. These do not exist in the framework.
 When `conversation_mode="continuous"`:
 - Same conversation thread carries across node transitions
 - Layered system prompts: identity (agent-level) + narrative + focus (per-node)
 - Transition markers inserted at boundaries
 - Compaction happens opportunistically at phase transitions
 ## loop_config
 Only three valid keys:
-```python
+```json
-loop_config = {
+{
-    "max_iterations": 100,          # Max LLM turns per node visit
+  "max_iterations": 100,
-    "max_tool_calls_per_turn": 20,  # Max tool calls per LLM response
+  "max_tool_calls_per_turn": 20,
-    "max_context_tokens": 32000,    # Triggers conversation compaction
+  "max_context_tokens": 32000
 }
 ```
 **INVALID keys** (do NOT use): `"strategy"`, `"mode"`, `"timeout"`,
 `"temperature"`. These are silently ignored or cause errors.
 ## Data Tools (Spillover)
 For large data that exceeds context:
- `save_data(filename, data)` — Write to session data dir
+- `save_data(filename, data)` -- write to session data dir
- `load_data(filename, offset, limit)` — Read with pagination
+- `load_data(filename, offset, limit)` -- read with pagination
- `list_data_files()` — List files
+- `list_data_files()` -- list files
- `serve_file_to_user(filename, label)` — Clickable file:// URI
+- `serve_file_to_user(filename, label)` -- clickable file URI
-`data_dir` is auto-injected by framework — LLM never sees it.
+`data_dir` is auto-injected by framework.
 ## Fan-Out / Fan-In
-Multiple ON_SUCCESS edges from same source → parallel execution via asyncio.gather().
+Multiple `on_success` edges from same source = parallel execution.
- Parallel nodes must have disjoint output_keys
+Parallel nodes must have disjoint output_keys.
 - Only one branch may have client_facing nodes
 - Fan-in node gets all outputs in the shared buffer
 ## Judge System
 - **Implicit** (default): ACCEPTs when LLM finishes with no tool calls and all required outputs set
 - **SchemaJudge**: Validates against Pydantic model
 - **Custom**: Implement `evaluate(context) -> JudgeVerdict`
 Judge is the SOLE acceptance mechanism — no ad-hoc framework gating.
 ## Triggers (Timers, Webhooks)
 For agents that react to external events, create a `triggers.json` file
 in the agent's export directory:
 ```json
 [
  {
    "id": "daily-check",
    "name": "Daily Check",
    "trigger_type": "timer",
    "trigger_config": {"cron": "0 9 * * *"},
    "task": "Run the daily check process"
  }
 ]
 ```
 ### Key Fields
 - `trigger_type`: `"timer"` or `"webhook"`
 - `trigger_config`: `{"cron": "0 9 * * *"}` or `{"interval_minutes": 20}`
 - `task`: describes what the worker should do when the trigger fires
 - Triggers can also be created/removed at runtime via `set_trigger` / `remove_trigger` queen tools
 ## Tool Discovery
-Do NOT rely on a static tool list — it will be outdated. Always call
+Always call `list_agent_tools()` first to see available tools.
-`list_agent_tools()` with NO arguments first to see ALL available tools.
+Do NOT rely on a static tool list.
 Only use `group=` or `output_schema=` as follow-up calls after seeing the
 full list.
 ```
-list_agent_tools()                            # ALWAYS call this first
+list_agent_tools()                                      # full summary
-list_agent_tools(group="gmail", output_schema="full")  # then drill into a category
+list_agent_tools(group="gmail", output_schema="full")   # drill into category
 list_agent_tools("exports/my_agent/mcp_servers.json")  # specific agent's tools
 ```
-After building, run `validate_agent_package("{name}")` to check everything at once.
+After building, run `validate_agent_package("{name}")` to check everything.
 Common tool categories (verify via list_agent_tools):
 - **Web**: search, scrape, PDF
 - **Data**: save/load/append/list data files, serve to user
 - **File**: view, write, replace, diff, list, grep
 - **Communication**: email, gmail, slack, telegram
 - **CRM**: hubspot, apollo, calcom
 - **GitHub**: stargazers, user profiles, repos
 - **Vision**: image analysis
 - **Time**: current time
@@ -1,158 +1,53 @@
-# GCU Browser Automation Guide
+# Browser Automation Guide
-## When to Use GCU Nodes
+## When to Use Browser Nodes
-Use `node_type="gcu"` when:
+Use browser nodes (with `tools: {policy: "all"}`) when:
- The user's workflow requires **navigating real websites** (scraping, form-filling, social media interaction, testing web UIs)
+- The task requires interacting with web pages (clicking, typing, navigating)
- The task involves **dynamic/JS-rendered pages** that `web_scrape` cannot handle (SPAs, infinite scroll, login-gated content)
+- No API is available for the target service
- The agent needs to **interact with a website** — clicking, typing, scrolling, selecting, uploading files
+- The user is already logged in to the target site
-Do NOT use GCU for:
+## What Browser Nodes Are
 - Static content that `web_scrape` handles fine
 - API-accessible data (use the API directly)
 - PDF/file processing
 - Anything that doesn't require a browser UI
-## What GCU Nodes Are
+- Regular `event_loop` nodes with browser tools from gcu-tools MCP server
 - Set `tools: {policy: "all"}` to give access to all browser tools
 - Wire into the graph with edges like any other node
 - No special node_type needed
- `node_type="gcu"` — a declarative enhancement over `event_loop`
+## Available Browser Tools
 - Framework auto-prepends browser best-practices system prompt
 - Framework auto-includes all 31 browser tools from `gcu-tools` MCP server
 - Same underlying `EventLoopNode` class — no new imports needed
 - `tools=[]` is correct — tools are auto-populated at runtime
-## GCU Architecture Pattern  
+All tools are prefixed with `browser_`:
 - `browser_start`, `browser_open` -- launch/navigate
 - `browser_click`, `browser_fill`, `browser_type` -- interact
 - `browser_snapshot` -- read page content (preferred over screenshot)
 - `browser_screenshot` -- visual capture
 - `browser_scroll`, `browser_wait` -- navigation helpers
 - `browser_evaluate` -- run JavaScript
-GCU nodes are **subagents** — invoked via `delegate_to_sub_agent()`, not connected via edges.
+## System Prompt Tips for Browser Nodes
- Primary nodes (`event_loop`, client-facing) orchestrate; GCU nodes do browser work
+```
- Parent node declares `sub_agents=["gcu-node-id"]` and calls `delegate_to_sub_agent(agent_id="gcu-node-id", task="...")`
+1. Use browser_snapshot() to read page content (NOT browser_get_text)
- GCU nodes set `max_node_visits=1` (single execution per delegation), `client_facing=False`
+2. Use browser_wait(seconds=2-3) after navigation for page load
- GCU nodes use `output_keys=["result"]` and return structured JSON via `set_output("result", ...)`
+3. If you hit an auth wall, call set_output with an error and move on
-
+4. Keep tool calls per turn <= 10 for reliability
 ## GCU Node Definition Template
 ```python
 gcu_browser_node = NodeSpec(
    id="gcu-browser-worker",
    name="Browser Worker",
    description="Browser subagent that does X.",
    node_type="gcu",
    client_facing=False,
    max_node_visits=1,
    input_keys=[],
    output_keys=["result"],
    tools=[],  # Auto-populated with all browser tools
    system_prompt="""\
 You are a browser agent. Your job: [specific task].
 ## Workflow
 1. browser_start (only if no browser is running yet)
 2. browser_open(url=TARGET_URL) — note the returned targetId
 3. browser_snapshot to read the page
 4. [task-specific steps]
 5. set_output("result", JSON)
 ## Output format
 set_output("result", JSON) with:
 - [field]: [type and description]
 """,
 )
 ```
-## Parent Node Template (orchestrating GCU subagents)
+## Example
 ```python
 orchestrator_node = NodeSpec(
    id="orchestrator",
    ...
    node_type="event_loop",
    sub_agents=["gcu-browser-worker"],
    system_prompt="""\
 ...
 delegate_to_sub_agent(
    agent_id="gcu-browser-worker",
    task="Navigate to [URL]. Do [specific task]. Return JSON with [fields]."
 )
 ...
 """,
    tools=[],  # Orchestrator doesn't need browser tools
 )
 ```
 ## mcp_servers.json with GCU
 ```json
 {
-  "hive-tools": { ... },
+  "id": "scan-profiles",
-  "gcu-tools": {
+  "name": "Scan LinkedIn Profiles",
-    "transport": "stdio",
+  "description": "Navigate LinkedIn search results and collect profile data",
-    "command": "uv",
+  "tools": {"policy": "all"},
-    "args": ["run", "python", "-m", "gcu.server", "--stdio"],
+  "input_keys": ["search_url"],
-    "cwd": "../../tools",
+  "output_keys": ["profiles"],
-    "description": "GCU tools for browser automation"
+  "system_prompt": "Navigate to the search URL, paginate through results..."
  }
 }
 ```
-Note: `gcu-tools` is auto-added if any node uses `node_type="gcu"`, but including it explicitly is fine.
+Connected via regular edges:
-
+```
-## GCU System Prompt Best Practices
+search-setup -> scan-profiles -> process-results
 Key rules to bake into GCU node prompts:
 - Prefer `browser_snapshot` over `browser_get_text("body")` — compact accessibility tree vs 100KB+ raw HTML
 - Always `browser_wait` after navigation
 - Use large scroll amounts (~2000-5000) for lazy-loaded content
 - For spillover files, use `run_command` with grep, not `read_file`
 - If auth wall detected, report immediately — don't attempt login
 - Keep tool calls per turn ≤10
 - Tab isolation: when browser is already running, use `browser_open(background=true)` and pass `target_id` to every call
 ## Multiple Concurrent GCU Subagents
 When a task can be parallelized across multiple sites or profiles, declare a distinct GCU
 node for each and invoke them all in the same LLM turn.  The framework batches all
 `delegate_to_sub_agent` calls made in one turn and runs them with `asyncio.gather`, so
 they execute concurrently — not sequentially.
 **Each GCU subagent automatically gets its own isolated browser context** — no `profile=`
 argument is needed in tool calls.  The framework derives a unique profile from the subagent's
 node ID and instance counter and injects it via an asyncio `ContextVar` before the subagent
 runs.
 ### Example: three sites in parallel
 ```python
 # Three distinct GCU nodes
 gcu_site_a = NodeSpec(id="gcu-site-a", node_type="gcu", ...)
 gcu_site_b = NodeSpec(id="gcu-site-b", node_type="gcu", ...)
 gcu_site_c = NodeSpec(id="gcu-site-c", node_type="gcu", ...)
 orchestrator = NodeSpec(
    id="orchestrator",
    node_type="event_loop",
    sub_agents=["gcu-site-a", "gcu-site-b", "gcu-site-c"],
    system_prompt="""\
 Call all three subagents in a single response to run them in parallel:
  delegate_to_sub_agent(agent_id="gcu-site-a", task="Scrape prices from site A")
  delegate_to_sub_agent(agent_id="gcu-site-b", task="Scrape prices from site B")
  delegate_to_sub_agent(agent_id="gcu-site-c", task="Scrape prices from site C")
 """,
 )
 ```
 **Rules:**
 - Use distinct node IDs for each concurrent task — sharing an ID shares the browser context.
 - The GCU node prompts do not need to mention `profile=`; isolation is automatic.
 - Cleanup is automatic at session end, but GCU nodes can call `browser_stop()` explicitly
  if they want to release resources mid-run.
 ## GCU Anti-Patterns
 - Using `browser_screenshot` to read text (use `browser_snapshot` instead; screenshots are for visual context only)
 - Re-navigating after scrolling (resets scroll position)
 - Attempting login on auth walls
 - Forgetting `target_id` in multi-tab scenarios
 - Putting browser tools directly on `event_loop` nodes instead of using GCU subagent pattern
 - Making GCU nodes `client_facing=True` (they should be autonomous subagents)
@@ -2,7 +2,7 @@
 A lightweight side agent that runs after each queen LLM turn.  It inspects
 recent conversation messages and extracts durable user knowledge into
-individual memory files in ``~/.hive/queen/global_memory/``.
+individual memory files in ``~/.hive/memories/global/``.
 Two reflection types:
  - **Short reflection**: after conversational queen turns.  Distills
@@ -493,7 +493,7 @@ async def subscribe_reflection_triggers(
    Call this once during queen setup.  Returns a list of event-bus
    subscription IDs for cleanup during session teardown.
    """
-    from framework.runtime.event_bus import EventType
+    from framework.host.event_bus import EventType
    mem_dir = memory_dir or global_memory_dir()
    _lock = asyncio.Lock()
@@ -22,10 +22,10 @@ def mock_mode():
@pytest_asyncio.fixture(scope="session")
 async def runner(tmp_path_factory, mock_mode):
-    from framework.runner.runner import AgentRunner
+    from framework.loader.agent_loader import AgentLoader
    storage = tmp_path_factory.mktemp("agent_storage")
-    r = AgentRunner.load(AGENT_PATH, mock_mode=mock_mode, storage_path=storage)
+    r = AgentLoader.load(AGENT_PATH, mock_mode=mock_mode, storage_path=storage)
    r._setup()
    yield r
    await r.cleanup_async()
@@ -79,7 +79,7 @@ def main():
    subparsers = parser.add_subparsers(dest="command", required=True)
    # Register runner commands (run, info, validate, list, shell)
-    from framework.runner.cli import register_commands
+    from framework.loader.cli import register_commands
    register_commands(subparsers)
@@ -99,7 +99,7 @@ def main():
    register_debugger_commands(subparsers)
    # Register MCP registry commands (mcp install, mcp add, ...)
-    from framework.runner.mcp_registry_cli import register_mcp_commands
+    from framework.loader.mcp_registry_cli import register_mcp_commands
    register_mcp_commands(subparsers)
@@ -12,13 +12,47 @@ from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any
-from framework.graph.edge import DEFAULT_MAX_TOKENS
+from framework.orchestrator.edge import DEFAULT_MAX_TOKENS
 # ---------------------------------------------------------------------------
 # Hive home directory structure
 # ---------------------------------------------------------------------------
 HIVE_HOME = Path.home() / ".hive"
 QUEENS_DIR = HIVE_HOME / "agents" / "queens"
 COLONIES_DIR = HIVE_HOME / "colonies"
 MEMORIES_DIR = HIVE_HOME / "memories"
 def queen_dir(queen_name: str = "default") -> Path:
    """Return the storage directory for a named queen agent."""
    return QUEENS_DIR / queen_name
 def colony_dir(colony_name: str) -> Path:
    """Return the directory for a named colony."""
    return COLONIES_DIR / colony_name
 def memory_dir(scope: str, name: str | None = None) -> Path:
    """Return memory dir for a scope.
    Examples::
        memory_dir("global")                  -> ~/.hive/memories/global
        memory_dir("colonies", "my_agent")    -> ~/.hive/memories/colonies/my_agent
        memory_dir("agents/queens", "default")-> ~/.hive/memories/agents/queens/default
        memory_dir("agents", "worker_name")   -> ~/.hive/memories/agents/worker_name
    """
    base = MEMORIES_DIR / scope
    return base / name if name else base
 # ---------------------------------------------------------------------------
 # Low-level config file access
 # ---------------------------------------------------------------------------
-HIVE_CONFIG_FILE = Path.home() / ".hive" / "configuration.json"
+HIVE_CONFIG_FILE = HIVE_HOME / "configuration.json"
 # Hive LLM router endpoint (Anthropic-compatible).
 # litellm's Anthropic handler appends /v1/messages, so this is just the base host.
@@ -130,7 +164,7 @@ def get_worker_api_key() -> str | None:
    # Worker-specific subscription / env var
    if worker_llm.get("use_claude_code_subscription"):
        try:
-            from framework.runner.runner import get_claude_code_token
+            from framework.loader.agent_loader import get_claude_code_token
            token = get_claude_code_token()
            if token:
@@ -140,7 +174,7 @@ def get_worker_api_key() -> str | None:
    if worker_llm.get("use_codex_subscription"):
        try:
-            from framework.runner.runner import get_codex_token
+            from framework.loader.agent_loader import get_codex_token
            token = get_codex_token()
            if token:
@@ -150,7 +184,7 @@ def get_worker_api_key() -> str | None:
    if worker_llm.get("use_kimi_code_subscription"):
        try:
-            from framework.runner.runner import get_kimi_code_token
+            from framework.loader.agent_loader import get_kimi_code_token
            token = get_kimi_code_token()
            if token:
@@ -160,7 +194,7 @@ def get_worker_api_key() -> str | None:
    if worker_llm.get("use_antigravity_subscription"):
        try:
-            from framework.runner.runner import get_antigravity_token
+            from framework.loader.agent_loader import get_antigravity_token
            token = get_antigravity_token()
            if token:
@@ -216,7 +250,7 @@ def get_worker_llm_extra_kwargs() -> dict[str, Any]:
                "User-Agent": "CodexBar",
            }
            try:
-                from framework.runner.runner import get_codex_account_id
+                from framework.loader.agent_loader import get_codex_account_id
                account_id = get_codex_account_id()
                if account_id:
@@ -263,22 +297,43 @@ def get_max_context_tokens() -> int:
    return get_hive_config().get("llm", {}).get("max_context_tokens", DEFAULT_MAX_CONTEXT_TOKENS)
 def get_api_keys() -> list[str] | None:
    """Return a list of API keys if ``api_keys`` is configured, else ``None``.
    This supports key-pool rotation: configure multiple keys in
    ``~/.hive/configuration.json`` under ``llm.api_keys`` and the
    :class:`~framework.llm.key_pool.KeyPool` will rotate through them.
    """
    llm = get_hive_config().get("llm", {})
    keys = llm.get("api_keys")
    if keys and isinstance(keys, list) and len(keys) > 0:
        return [k for k in keys if k]  # filter empties
    return None
 def get_api_key() -> str | None:
    """Return the API key, supporting env var, Claude Code subscription, Codex, and ZAI Code.
    Priority:
    0. Explicit key pool (``api_keys`` list) -- returns first key for
       single-key callers; full pool available via :func:`get_api_keys`.
    1. Claude Code subscription (``use_claude_code_subscription: true``)
       reads the OAuth token from ``~/.claude/.credentials.json``.
    2. Codex subscription (``use_codex_subscription: true``)
       reads the OAuth token from macOS Keychain or ``~/.codex/auth.json``.
    3. Environment variable named in ``api_key_env_var``.
    """
    # If an explicit key pool is configured, use the first key.
    pool_keys = get_api_keys()
    if pool_keys:
        return pool_keys[0]
    llm = get_hive_config().get("llm", {})
    # Claude Code subscription: read OAuth token directly
    if llm.get("use_claude_code_subscription"):
        try:
-            from framework.runner.runner import get_claude_code_token
+            from framework.loader.agent_loader import get_claude_code_token
            token = get_claude_code_token()
            if token:
@@ -289,7 +344,7 @@ def get_api_key() -> str | None:
    # Codex subscription: read OAuth token from Keychain / auth.json
    if llm.get("use_codex_subscription"):
        try:
-            from framework.runner.runner import get_codex_token
+            from framework.loader.agent_loader import get_codex_token
            token = get_codex_token()
            if token:
@@ -300,7 +355,7 @@ def get_api_key() -> str | None:
    # Kimi Code subscription: read API key from ~/.kimi/config.toml
    if llm.get("use_kimi_code_subscription"):
        try:
-            from framework.runner.runner import get_kimi_code_token
+            from framework.loader.agent_loader import get_kimi_code_token
            token = get_kimi_code_token()
            if token:
@@ -311,7 +366,7 @@ def get_api_key() -> str | None:
    # Antigravity subscription: read OAuth token from accounts JSON
    if llm.get("use_antigravity_subscription"):
        try:
-            from framework.runner.runner import get_antigravity_token
+            from framework.loader.agent_loader import get_antigravity_token
            token = get_antigravity_token()
            if token:
@@ -468,7 +523,7 @@ def get_llm_extra_kwargs() -> dict[str, Any]:
                "User-Agent": "CodexBar",
            }
            try:
-                from framework.runner.runner import get_codex_account_id
+                from framework.loader.agent_loader import get_codex_account_id
                account_id = get_codex_account_id()
                if account_id:
@@ -36,7 +36,7 @@ from pathlib import Path
 from typing import TYPE_CHECKING, Any
 if TYPE_CHECKING:
-    from framework.graph import NodeSpec
+    from framework.orchestrator import NodeSpec
 logger = logging.getLogger(__name__)
@@ -533,7 +533,9 @@ class CredentialSetupSession:
 def load_agent_nodes(agent_path: str | Path) -> list:
-    """Load NodeSpec list from an agent's agent.py or agent.json.
+    """Load NodeSpec list from an agent directory.
    Checks agent.json (declarative) first, then agent.py (legacy).
    Args:
        agent_path: Path to agent directory.
@@ -542,16 +544,28 @@ def load_agent_nodes(agent_path: str | Path) -> list:
        List of NodeSpec objects (empty list if agent can't be loaded).
    """
    agent_path = Path(agent_path)
    agent_json_file = agent_path / "agent.json"
    agent_py = agent_path / "agent.py"
    agent_json = agent_path / "agent.json"
-    if agent_py.exists():
+    if agent_json_file.exists():
        return _load_nodes_from_json_declarative(agent_json_file)
    elif agent_py.exists():
        return _load_nodes_from_python_agent(agent_path)
    elif agent_json.exists():
        return _load_nodes_from_json_agent(agent_json)
    return []
 def _load_nodes_from_json_declarative(agent_json: Path) -> list:
    """Load nodes from a declarative JSON agent."""
    try:
        from framework.loader.agent_loader import load_agent_config
        data = json.loads(agent_json.read_text(encoding="utf-8"))
        graph, _ = load_agent_config(data)
        return list(graph.nodes)
    except Exception:
        return []
 def _load_nodes_from_python_agent(agent_path: Path) -> list:
    """Load nodes from a Python-based agent."""
    import importlib.util
@@ -590,7 +604,7 @@ def _load_nodes_from_json_agent(agent_json: Path) -> list:
        with open(agent_json, encoding="utf-8-sig") as f:
            data = json.load(f)
-        from framework.graph import NodeSpec
+        from framework.orchestrator import NodeSpec
        nodes_data = data.get("graph", {}).get("nodes", [])
        nodes = []
@@ -1,65 +0,0 @@
 """Graph structures: Goals, Nodes, Edges, and Execution."""
 from framework.graph.context import GraphContext
 from framework.graph.context_handoff import ContextHandoff, HandoffContext
 from framework.graph.conversation import ConversationStore, Message, NodeConversation
 from framework.graph.edge import DEFAULT_MAX_TOKENS, EdgeCondition, EdgeSpec, GraphSpec
 from framework.graph.event_loop_node import (
    EventLoopNode,
    JudgeProtocol,
    JudgeVerdict,
    LoopConfig,
    OutputAccumulator,
 )
 from framework.graph.executor import GraphExecutor
 from framework.graph.goal import Constraint, Goal, GoalStatus, SuccessCriterion
 from framework.graph.node import NodeContext, NodeProtocol, NodeResult, NodeSpec
 from framework.graph.worker_agent import (
    Activation,
    FanOutTag,
    FanOutTracker,
    WorkerAgent,
    WorkerCompletion,
    WorkerLifecycle,
 )
 __all__ = [
    # Goal
    "Goal",
    "SuccessCriterion",
    "Constraint",
    "GoalStatus",
    # Node
    "NodeSpec",
    "NodeContext",
    "NodeResult",
    "NodeProtocol",
    # Edge
    "EdgeSpec",
    "EdgeCondition",
    "GraphSpec",
    "DEFAULT_MAX_TOKENS",
    # Executor
    "GraphExecutor",
    # Conversation
    "NodeConversation",
    "ConversationStore",
    "Message",
    # Event Loop
    "EventLoopNode",
    "LoopConfig",
    "OutputAccumulator",
    "JudgeProtocol",
    "JudgeVerdict",
    # Context Handoff
    "ContextHandoff",
    "HandoffContext",
    # Worker Agent
    "WorkerAgent",
    "WorkerLifecycle",
    "WorkerCompletion",
    "Activation",
    "FanOutTag",
    "FanOutTracker",
    "GraphContext",
 ]
@@ -1,6 +0,0 @@
 """EventLoopNode subpackage — modular components of the event loop orchestrator.
 All public symbols are re-exported by the parent ``event_loop_node.py`` for
 backward compatibility.  Internal consumers may import directly from these
 submodules for clarity.
 """
@@ -1,370 +0,0 @@
 """Subagent execution for the event loop.
 Handles the full subagent lifecycle: validation, context setup, tool filtering,
 conversation store derivation, execution, and cleanup.
 """
 from __future__ import annotations
 import json
 import logging
 import time
 from collections.abc import Awaitable, Callable
 from pathlib import Path
 from typing import TYPE_CHECKING, Any
 from framework.graph.conversation import ConversationStore
 from framework.graph.event_loop.judge_pipeline import SubagentJudge
 from framework.graph.event_loop.types import LoopConfig, OutputAccumulator
 from framework.graph.node import DataBuffer, NodeContext
 from framework.llm.provider import ToolResult, ToolUse
 from framework.runner.tool_registry import ToolRegistry
 from framework.runtime.event_bus import EventBus
 if TYPE_CHECKING:
    from framework.graph.event_loop_node import EventLoopNode
 logger = logging.getLogger(__name__)
 async def execute_subagent(
    ctx: NodeContext,
    agent_id: str,
    task: str,
    *,
    config: LoopConfig,
    event_loop_node_cls: type[EventLoopNode],
    escalation_receiver_cls: Callable[[], Any],
    accumulator: OutputAccumulator | None = None,
    event_bus: EventBus | None = None,
    tool_executor: Callable[[ToolUse], ToolResult | Awaitable[ToolResult]] | None = None,
    conversation_store: ConversationStore | None = None,
    subagent_instance_counter: dict[str, int] | None = None,
 ) -> ToolResult:
    """Execute a subagent and return the result as a ToolResult.
    The subagent:
    - Gets a fresh conversation with just the task
    - Has read-only access to the parent's readable memory
    - Cannot delegate to its own subagents (prevents recursion)
    - Returns its output in structured JSON format
    Args:
        ctx: Parent node's context (for memory, tools, LLM access).
        agent_id: The node ID of the subagent to invoke.
        task: The task description to give the subagent.
        accumulator: Parent's OutputAccumulator.
        event_bus: EventBus for lifecycle events.
        config: LoopConfig for iteration/tool limits.
        tool_executor: Tool executor callable.
        conversation_store: Parent conversation store (for deriving subagent store).
        subagent_instance_counter: Mutable counter dict for unique subagent paths.
    Returns:
        ToolResult with structured JSON output.
    """
    # Log subagent invocation start
    logger.info(
        "\n" + "=" * 60 + "\n"
        "🤖 SUBAGENT INVOCATION\n"
        "=" * 60 + "\n"
        "Parent Node: %s\n"
        "Subagent ID: %s\n"
        "Task: %s\n" + "=" * 60,
        ctx.node_id,
        agent_id,
        task[:500] + "..." if len(task) > 500 else task,
    )
    # 1. Validate agent exists in registry
    if agent_id not in ctx.node_registry:
        return ToolResult(
            tool_use_id="",
            content=json.dumps(
                {
                    "message": f"Sub-agent '{agent_id}' not found in registry",
                    "data": None,
                    "metadata": {"agent_id": agent_id, "success": False, "error": "not_found"},
                }
            ),
            is_error=True,
        )
    subagent_spec = ctx.node_registry[agent_id]
    # 2. Create read-only memory snapshot
    parent_data = ctx.buffer.read_all()
    # Merge in-flight outputs from the parent's accumulator.
    if accumulator:
        for key, value in accumulator.to_dict().items():
            if key not in parent_data:
                parent_data[key] = value
    subagent_buffer = DataBuffer()
    for key, value in parent_data.items():
        subagent_buffer.write(key, value, validate=False)
    read_keys = set(parent_data.keys()) | set(subagent_spec.input_keys or [])
    scoped_buffer = subagent_buffer.with_permissions(
        read_keys=list(read_keys),
        write_keys=[],  # Read-only!
    )
    # 2b. Compute instance counter early so the callback and child context
    # share the same stable node_id for this subagent invocation.
    if subagent_instance_counter is not None:
        subagent_instance_counter.setdefault(agent_id, 0)
        subagent_instance_counter[agent_id] += 1
        subagent_instance = str(subagent_instance_counter[agent_id])
    else:
        subagent_instance = "1"
    if subagent_instance == "1":
        sa_node_id = f"{ctx.node_id}:subagent:{agent_id}"
    else:
        sa_node_id = f"{ctx.node_id}:subagent:{agent_id}:{subagent_instance}"
    # 2c. Set up report callback (one-way channel to parent / event bus)
    subagent_reports: list[dict] = []
    async def _report_callback(
        message: str,
        data: dict | None = None,
        *,
        wait_for_response: bool = False,
    ) -> str | None:
        subagent_reports.append({"message": message, "data": data, "timestamp": time.time()})
        if event_bus:
            await event_bus.emit_subagent_report(
                stream_id=ctx.node_id,
                node_id=sa_node_id,
                subagent_id=agent_id,
                message=message,
                data=data,
                execution_id=ctx.execution_id,
            )
        if not wait_for_response:
            return None
        if not event_bus:
            logger.warning(
                "Subagent '%s' requested user response but no event_bus available",
                agent_id,
            )
            return None
        # Create isolated receiver and register for input routing
        import uuid
        escalation_id = f"{ctx.node_id}:escalation:{uuid.uuid4().hex[:8]}"
        receiver = escalation_receiver_cls()
        registry = ctx.shared_node_registry
        registry[escalation_id] = receiver
        try:
            await event_bus.emit_escalation_requested(
                stream_id=ctx.stream_id or ctx.node_id,
                node_id=escalation_id,
                reason=f"Subagent report (wait_for_response) from {agent_id}",
                context=message,
                execution_id=ctx.execution_id,
            )
            # Block until queen responds
            return await receiver.wait()
        finally:
            registry.pop(escalation_id, None)
    # 3. Filter tools for subagent
    subagent_tool_names = set(subagent_spec.tools or [])
    tool_source = ctx.all_tools if ctx.all_tools else ctx.available_tools
    # GCU auto-population
    if subagent_spec.node_type == "gcu" and not subagent_tool_names:
        subagent_tools = [t for t in tool_source if t.name != "delegate_to_sub_agent"]
    else:
        subagent_tools = [
            t
            for t in tool_source
            if t.name in subagent_tool_names and t.name != "delegate_to_sub_agent"
        ]
    missing = subagent_tool_names - {t.name for t in subagent_tools}
    if missing:
        logger.warning(
            "Subagent '%s' requested tools not found in catalog: %s",
            agent_id,
            sorted(missing),
        )
    logger.info(
        "📦 Subagent '%s' configuration:\n"
        "   - System prompt: %s\n"
        "   - Tools available (%d): %s\n"
        "   - Memory keys inherited: %s",
        agent_id,
        (subagent_spec.system_prompt[:200] + "...")
        if subagent_spec.system_prompt and len(subagent_spec.system_prompt) > 200
        else subagent_spec.system_prompt,
        len(subagent_tools),
        [t.name for t in subagent_tools],
        list(parent_data.keys()),
    )
    # 4. Build subagent context
    max_iter = min(config.max_iterations, 10)
    subagent_ctx = NodeContext(
        runtime=ctx.runtime,
        node_id=sa_node_id,
        node_spec=subagent_spec,
        buffer=scoped_buffer,
        input_data={"task": task, **parent_data},
        llm=ctx.llm,
        available_tools=subagent_tools,
        goal_context=(
            f"Your specific task: {task}\n\n"
            f"COMPLETION REQUIREMENTS:\n"
            f"When your task is done, you MUST call set_output() "
            f"for each required key: {subagent_spec.output_keys}\n"
            f"Alternatively, call report_to_parent(mark_complete=true) "
            f"with your findings in message/data.\n"
            + (
                "Before finishing, call browser_close_finished() to clean up your browser tabs.\n"
                if subagent_spec.node_type == "gcu"
                else ""
            )
            + f"You have a maximum of {max_iter} turns to complete this task."
        ),
        goal=ctx.goal,
        max_tokens=ctx.max_tokens,
        runtime_logger=ctx.runtime_logger,
        is_subagent_mode=True,  # Prevents nested delegation
        report_callback=_report_callback,
        node_registry={},  # Empty - no nested subagents
        shared_node_registry=ctx.shared_node_registry,  # For escalation routing
    )
    # 5. Create and execute subagent EventLoopNode
    subagent_conv_store = None
    if conversation_store is not None:
        from framework.storage.conversation_store import FileConversationStore
        parent_base = getattr(conversation_store, "_base", None)
        if parent_base is not None:
            conversations_dir = parent_base.parent
            subagent_dir_name = f"{agent_id}-{subagent_instance}"
            subagent_store_path = conversations_dir / subagent_dir_name
            subagent_conv_store = FileConversationStore(base_path=subagent_store_path)
    # Derive a subagent-scoped spillover dir
    subagent_spillover = None
    if config.spillover_dir:
        subagent_spillover = str(Path(config.spillover_dir) / agent_id / subagent_instance)
    subagent_node = event_loop_node_cls(
        event_bus=event_bus,
        judge=SubagentJudge(task=task, max_iterations=max_iter),
        config=LoopConfig(
            max_iterations=max_iter,
            max_tool_calls_per_turn=config.max_tool_calls_per_turn,
            tool_call_overflow_margin=config.tool_call_overflow_margin,
            max_context_tokens=config.max_context_tokens,
            stall_detection_threshold=config.stall_detection_threshold,
            max_tool_result_chars=config.max_tool_result_chars,
            spillover_dir=subagent_spillover,
        ),
        tool_executor=tool_executor,
        conversation_store=subagent_conv_store,
    )
    # Each subagent instance gets its own unique browser profile so concurrent
    # subagents don't share tab groups. The profile is set as execution context
    # so the tool registry auto-injects it into every browser_* MCP tool call.
    _gcu_profile = f"{agent_id}:{subagent_instance}"
    _profile_token = ToolRegistry.set_execution_context(profile=_gcu_profile)
    try:
        logger.info("🚀 Starting subagent '%s' execution...", agent_id)
        start_time = time.time()
        result = await subagent_node.execute(subagent_ctx)
        latency_ms = int((time.time() - start_time) * 1000)
        separator = "-" * 60
        logger.info(
            "\n%s\n"
            "✅ SUBAGENT '%s' COMPLETED\n"
            "%s\n"
            "Success: %s\n"
            "Latency: %dms\n"
            "Tokens used: %s\n"
            "Output keys: %s\n"
            "%s",
            separator,
            agent_id,
            separator,
            result.success,
            latency_ms,
            result.tokens_used,
            list(result.output.keys()) if result.output else [],
            separator,
        )
        result_json = {
            "message": (
                f"Sub-agent '{agent_id}' completed successfully"
                if result.success
                else f"Sub-agent '{agent_id}' failed: {result.error}"
            ),
            "data": result.output,
            "reports": subagent_reports if subagent_reports else None,
            "metadata": {
                "agent_id": agent_id,
                "success": result.success,
                "tokens_used": result.tokens_used,
                "latency_ms": latency_ms,
                "report_count": len(subagent_reports),
            },
        }
        return ToolResult(
            tool_use_id="",
            content=json.dumps(result_json, indent=2, default=str),
            is_error=not result.success,
        )
    except Exception as e:
        logger.exception(
            "\n" + "!" * 60 + "\n❌ SUBAGENT '%s' FAILED\nError: %s\n" + "!" * 60,
            agent_id,
            str(e),
        )
        result_json = {
            "message": f"Sub-agent '{agent_id}' raised exception: {e}",
            "data": None,
            "metadata": {
                "agent_id": agent_id,
                "success": False,
                "error": str(e),
            },
        }
        return ToolResult(
            tool_use_id="",
            content=json.dumps(result_json, indent=2),
            is_error=True,
        )
    finally:
        ToolRegistry.reset_execution_context(_profile_token)
        # Close the tab group this subagent created, if any.
        try:
            from gcu.browser.bridge import get_bridge
            from gcu.browser.tools.lifecycle import _contexts
            bridge = get_bridge()
            ctx_entry = _contexts.pop(_gcu_profile, None)
            if bridge and bridge.is_connected and ctx_entry:
                group_id = ctx_entry.get("groupId")
                if group_id is not None:
                    await bridge.destroy_context(group_id)
        except Exception:
            pass
@@ -0,0 +1,11 @@
 """Host layer -- how agents are triggered and hosted."""
 from framework.host.agent_host import (  # noqa: F401
    AgentHost,
    AgentRuntimeConfig,
 )
 from framework.host.event_bus import AgentEvent, EventBus, EventType  # noqa: F401
 from framework.host.execution_manager import (  # noqa: F401
    EntryPointSpec,
    ExecutionManager,
 )
@@ -18,18 +18,18 @@ from dataclasses import dataclass, field
 from datetime import datetime
 from typing import TYPE_CHECKING, Any
-from framework.graph.checkpoint_config import CheckpointConfig
+from framework.orchestrator.checkpoint_config import CheckpointConfig
-from framework.graph.executor import ExecutionResult, GraphExecutor
+from framework.orchestrator.orchestrator import ExecutionResult, Orchestrator
-from framework.runtime.event_bus import EventBus
+from framework.host.event_bus import EventBus
-from framework.runtime.shared_state import IsolationLevel, SharedBufferManager
+from framework.host.shared_state import IsolationLevel, SharedBufferManager
-from framework.runtime.stream_runtime import StreamRuntime, StreamRuntimeAdapter
+from framework.host.stream_runtime import StreamDecisionTracker, StreamRuntimeAdapter
 if TYPE_CHECKING:
-    from framework.graph.edge import GraphSpec
+    from framework.orchestrator.edge import GraphSpec
-    from framework.graph.goal import Goal
+    from framework.orchestrator.goal import Goal
    from framework.llm.provider import LLMProvider, Tool
-    from framework.runtime.event_bus import AgentEvent
+    from framework.host.event_bus import AgentEvent
-    from framework.runtime.outcome_aggregator import OutcomeAggregator
+    from framework.host.outcome_aggregator import OutcomeAggregator
    from framework.storage.concurrent import ConcurrentStorage
    from framework.storage.session_store import SessionStore
@@ -133,7 +133,7 @@ class ExecutionContext:
    status: str = "pending"  # pending, running, completed, failed, paused
-class ExecutionStream:
+class ExecutionManager:
    """
    Manages concurrent executions for a single entry point.
@@ -262,7 +262,7 @@ class ExecutionStream:
            )
        # Create stream-scoped runtime
-        self._runtime = StreamRuntime(
+        self._runtime = StreamDecisionTracker(
            stream_id=stream_id,
            storage=storage,
            outcome_aggregator=outcome_aggregator,
@@ -271,7 +271,7 @@ class ExecutionStream:
        # Execution tracking
        self._active_executions: dict[str, ExecutionContext] = {}
        self._execution_tasks: dict[str, asyncio.Task] = {}
-        self._active_executors: dict[str, GraphExecutor] = {}
+        self._active_executors: dict[str, Orchestrator] = {}
        self._cancel_reasons: dict[str, str] = {}
        self._execution_results: OrderedDict[str, ExecutionResult] = OrderedDict()
        self._execution_result_times: dict[str, float] = {}
@@ -301,7 +301,7 @@ class ExecutionStream:
        # Emit stream started event
        if self._scoped_event_bus:
-            from framework.runtime.event_bus import AgentEvent, EventType
+            from framework.host.event_bus import AgentEvent, EventType
            await self._scoped_event_bus.publish(
                AgentEvent(
@@ -426,7 +426,7 @@ class ExecutionStream:
        # Emit stream stopped event
        if self._scoped_event_bus:
-            from framework.runtime.event_bus import AgentEvent, EventType
+            from framework.host.event_bus import AgentEvent, EventType
            await self._scoped_event_bus.publish(
                AgentEvent(
@@ -668,7 +668,7 @@ class ExecutionStream:
                # Create per-execution runtime logger
                runtime_logger = None
                if self._runtime_log_store:
-                    from framework.runtime.runtime_logger import RuntimeLogger
+                    from framework.tracker.runtime_logger import RuntimeLogger
                    runtime_logger = RuntimeLogger(
                        store=self._runtime_log_store, agent_id=self.graph.id
@@ -697,12 +697,7 @@ class ExecutionStream:
                # forward so the next attempt resumes at the failed node.
                while True:
                    # Create executor for this execution.
-                    # Each execution gets its own storage under sessions/{exec_id}/
+                    executor = Orchestrator(
                    # so conversations, spillover, and data files are all scoped
                    # to this execution.  The executor sets data_dir via execution
                    # context (contextvars) so data tools and spillover share the
                    # same session-scoped directory.
                    executor = GraphExecutor(
                        runtime=runtime_adapter,
                        llm=self._llm,
                        tools=self._tools,
@@ -763,7 +758,7 @@ class ExecutionStream:
                        # Emit resurrection event
                        if self._scoped_event_bus:
-                            from framework.runtime.event_bus import AgentEvent, EventType
+                            from framework.host.event_bus import AgentEvent, EventType
                            await self._scoped_event_bus.publish(
                                AgentEvent(
@@ -1119,7 +1114,7 @@ class ExecutionStream:
        Each stream only executes from its own entry_node, but the full
        graph must validate with all entry points accounted for.
        """
-        from framework.graph.edge import GraphSpec
+        from framework.orchestrator.edge import GraphSpec
        # Merge entry points: this stream's entry + original graph's primary
        # entry + any other entry points. This ensures all nodes are
@@ -14,8 +14,8 @@ from typing import TYPE_CHECKING, Any
 from framework.schemas.decision import Decision, Outcome
 if TYPE_CHECKING:
-    from framework.graph.goal import Goal
+    from framework.orchestrator.goal import Goal
-    from framework.runtime.event_bus import EventBus
+    from framework.host.event_bus import EventBus
 logger = logging.getLogger(__name__)
@@ -18,12 +18,12 @@ from framework.schemas.run import Run, RunStatus
 from framework.storage.concurrent import ConcurrentStorage
 if TYPE_CHECKING:
-    from framework.runtime.outcome_aggregator import OutcomeAggregator
+    from framework.host.outcome_aggregator import OutcomeAggregator
 logger = logging.getLogger(__name__)
-class StreamRuntime:
+class StreamDecisionTracker:
    """
    Thread-safe runtime for a single execution stream.
@@ -431,7 +431,7 @@ class StreamRuntimeAdapter:
    by providing the same API as Runtime but routing to a specific execution.
    """
-    def __init__(self, stream_runtime: StreamRuntime, execution_id: str):
+    def __init__(self, stream_runtime: StreamDecisionTracker, execution_id: str):
        """
        Create adapter for a specific execution.
@@ -13,7 +13,7 @@ from dataclasses import dataclass
 from aiohttp import web
-from framework.runtime.event_bus import EventBus
+from framework.host.event_bus import EventBus
 logger = logging.getLogger(__name__)
@@ -0,0 +1,101 @@
 """Thread-safe API key pool with round-robin rotation and health tracking.
 When multiple API keys are configured, the pool rotates through them on each
 request.  Keys that hit rate limits are temporarily cooled-down so the next
 call automatically uses a healthy key -- no sleep required.
 """
 from __future__ import annotations
 import logging
 import threading
 import time
 from dataclasses import dataclass
 logger = logging.getLogger(__name__)
@dataclass
 class KeyHealth:
    """Per-key health counters."""
    rate_limited_until: float = 0.0  # monotonic timestamp
    consecutive_errors: int = 0
    total_requests: int = 0
    total_successes: int = 0
 class KeyPool:
    """Round-robin key pool with health tracking.
    Thread-safe: all mutations protected by a lock so concurrent LLM calls
    (e.g. parallel tool execution in EventLoopNode) don't race.
    """
    def __init__(self, keys: list[str]) -> None:
        if not keys:
            raise ValueError("KeyPool requires at least one key")
        self._keys = list(keys)
        self._index = 0
        self._health: dict[str, KeyHealth] = {k: KeyHealth() for k in keys}
        self._lock = threading.Lock()
    @property
    def size(self) -> int:
        return len(self._keys)
    def get_key(self) -> str:
        """Return the next healthy key (round-robin).
        If every key is currently rate-limited, returns the one whose cooldown
        expires soonest so the caller can proceed with minimal delay.
        """
        with self._lock:
            now = time.monotonic()
            for _ in range(len(self._keys)):
                key = self._keys[self._index]
                self._index = (self._index + 1) % len(self._keys)
                health = self._health[key]
                if health.rate_limited_until <= now:
                    health.total_requests += 1
                    return key
            # All rate-limited -- pick the one that expires soonest.
            soonest = min(self._keys, key=lambda k: self._health[k].rate_limited_until)
            self._health[soonest].total_requests += 1
            return soonest
    def mark_rate_limited(self, key: str, retry_after: float = 60.0) -> None:
        """Mark *key* as rate-limited for *retry_after* seconds."""
        with self._lock:
            health = self._health.get(key)
            if health:
                health.rate_limited_until = time.monotonic() + retry_after
                health.consecutive_errors += 1
                logger.info(
                    "[key-pool] Key ...%s rate-limited for %.0fs (errors=%d)",
                    key[-6:],
                    retry_after,
                    health.consecutive_errors,
                )
    def mark_success(self, key: str) -> None:
        """Record a successful call on *key*."""
        with self._lock:
            health = self._health.get(key)
            if health:
                health.consecutive_errors = 0
                health.total_successes += 1
    def get_stats(self) -> dict[str, dict]:
        """Return health stats keyed by the last 6 chars of each key."""
        with self._lock:
            now = time.monotonic()
            return {
                f"...{k[-6:]}": {
                    "healthy": self._health[k].rate_limited_until <= now,
                    "requests": self._health[k].total_requests,
                    "successes": self._health[k].total_successes,
                    "consecutive_errors": self._health[k].consecutive_errors,
                }
                for k in self._keys
            }
@@ -7,6 +7,8 @@ Groq, and local models.
 See: https://docs.litellm.ai/docs/providers
 """
 from __future__ import annotations
 import ast
 import asyncio
 import hashlib
@@ -18,7 +20,10 @@ import time
 from collections.abc import AsyncIterator
 from datetime import datetime
 from pathlib import Path
-from typing import Any
+from typing import TYPE_CHECKING, Any
 if TYPE_CHECKING:
    from framework.llm.key_pool import KeyPool
 try:
    import litellm
@@ -561,6 +566,7 @@ class LiteLLMProvider(LLMProvider):
        model: str = "gpt-4o-mini",
        api_key: str | None = None,
        api_base: str | None = None,
        api_keys: list[str] | None = None,
        **kwargs: Any,
    ):
        """
@@ -573,6 +579,9 @@ class LiteLLMProvider(LLMProvider):
                     look for the appropriate env var (OPENAI_API_KEY,
                     ANTHROPIC_API_KEY, etc.)
            api_base: Custom API base URL (for proxies or local deployments)
            api_keys: Optional list of API keys for key-pool rotation. When
                      provided with 2+ keys, a :class:`KeyPool` is created and
                      keys are rotated on rate-limit errors.
            **kwargs: Additional arguments passed to litellm.completion()
        """
        # Kimi For Coding exposes an Anthropic-compatible endpoint at
@@ -594,11 +603,24 @@ class LiteLLMProvider(LLMProvider):
            if api_base and api_base.rstrip("/").endswith("/v1"):
                api_base = api_base.rstrip("/")[:-3]
        self.model = model
-        self.api_key = api_key
+        # Key pool: when multiple keys are provided, enable rotation.
        self._key_pool: KeyPool | None = None
        if api_keys and len(api_keys) > 1:
            from framework.llm.key_pool import KeyPool
            self._key_pool = KeyPool(api_keys)
            self.api_key = api_keys[0]  # default for OAuth detection below
            logger.info(
                "[litellm] Key pool enabled with %d keys for model %s",
                len(api_keys),
                model,
            )
        else:
            self.api_key = api_key or (api_keys[0] if api_keys else None)
        self.api_base = api_base or self._default_api_base_for_model(_original_model)
        self.extra_kwargs = kwargs
        # Detect Claude Code OAuth subscription by checking the api_key prefix.
-        self._claude_code_oauth = bool(api_key and api_key.startswith("sk-ant-oat"))
+        self._claude_code_oauth = bool(self.api_key and self.api_key.startswith("sk-ant-oat"))
        if self._claude_code_oauth:
            # Anthropic requires a specific User-Agent for OAuth requests.
            eh = self.extra_kwargs.setdefault("extra_headers", {})
@@ -669,10 +691,20 @@ class LiteLLMProvider(LLMProvider):
    def _completion_with_rate_limit_retry(
        self, max_retries: int | None = None, **kwargs: Any
    ) -> Any:
-        """Call litellm.completion with retry on 429 rate limit errors and empty responses."""
+        """Call litellm.completion with retry on 429 rate limit errors and empty responses.
        When a :class:`KeyPool` is configured, rate-limited keys are rotated
        automatically so the next attempt uses a different key -- no sleep
        needed between attempts.
        """
        model = kwargs.get("model", self.model)
        retries = max_retries if max_retries is not None else RATE_LIMIT_MAX_RETRIES
        for attempt in range(retries + 1):
            # Rotate key from pool when available.
            current_key: str | None = None
            if self._key_pool:
                current_key = self._key_pool.get_key()
                kwargs["api_key"] = current_key
            try:
                response = litellm.completion(**kwargs)  # type: ignore[union-attr]
@@ -747,8 +779,22 @@ class LiteLLMProvider(LLMProvider):
                    time.sleep(wait)
                    continue
                if self._key_pool and current_key:
                    self._key_pool.mark_success(current_key)
                return response
            except RateLimitError as e:
                # Key pool: mark the offending key and rotate immediately.
                if self._key_pool and current_key:
                    self._key_pool.mark_rate_limited(current_key, retry_after=60.0)
                    # When we have other healthy keys, skip the sleep -- the
                    # next iteration will pick a different key automatically.
                    if attempt < retries:
                        logger.info(
                            "[retry] Key pool rotating away from ...%s on 429",
                            current_key[-6:],
                        )
                        continue
                # Dump full request to file for debugging
                messages = kwargs.get("messages", [])
                token_count, token_method = _estimate_tokens(model, messages)
@@ -761,7 +807,7 @@ class LiteLLMProvider(LLMProvider):
                if attempt == retries:
                    logger.error(
                        f"[retry] GAVE UP on {model} after {retries + 1} "
-                        f"attempts — rate limit error: {e!s}. "
+                        f"attempts -- rate limit error: {e!s}. "
                        f"~{token_count} tokens ({token_method}). "
                        f"Full request dumped to: {dump_path}"
                    )
@@ -880,10 +926,16 @@ class LiteLLMProvider(LLMProvider):
        """Async version of _completion_with_rate_limit_retry.
        Uses litellm.acompletion and asyncio.sleep instead of blocking calls.
        When a :class:`KeyPool` is configured, rate-limited keys are rotated.
        """
        model = kwargs.get("model", self.model)
        retries = max_retries if max_retries is not None else RATE_LIMIT_MAX_RETRIES
        for attempt in range(retries + 1):
            # Rotate key from pool when available.
            current_key: str | None = None
            if self._key_pool:
                current_key = self._key_pool.get_key()
                kwargs["api_key"] = current_key
            try:
                response = await litellm.acompletion(**kwargs)  # type: ignore[union-attr]
@@ -952,8 +1004,20 @@ class LiteLLMProvider(LLMProvider):
                    await asyncio.sleep(wait)
                    continue
                if self._key_pool and current_key:
                    self._key_pool.mark_success(current_key)
                return response
            except RateLimitError as e:
                # Key pool: mark the offending key and rotate immediately.
                if self._key_pool and current_key:
                    self._key_pool.mark_rate_limited(current_key, retry_after=60.0)
                    if attempt < retries:
                        logger.info(
                            "[async-retry] Key pool rotating away from ...%s on 429",
                            current_key[-6:],
                        )
                        continue
                messages = kwargs.get("messages", [])
                token_count, token_method = _estimate_tokens(model, messages)
                dump_path = _dump_failed_request(
@@ -965,7 +1029,7 @@ class LiteLLMProvider(LLMProvider):
                if attempt == retries:
                    logger.error(
                        f"[async-retry] GAVE UP on {model} after {retries + 1} "
-                        f"attempts — rate limit error: {e!s}. "
+                        f"attempts -- rate limit error: {e!s}. "
                        f"~{token_count} tokens ({token_method}). "
                        f"Full request dumped to: {dump_path}"
                    )
@@ -0,0 +1,4 @@
 """Loader layer -- agent loading from disk (JSON config, MCP, credentials)."""
 from framework.loader.agent_loader import AgentLoader  # noqa: F401
 from framework.loader.tool_registry import ToolRegistry  # noqa: F401
@@ -13,21 +13,20 @@ from framework.config import get_hive_config, get_max_context_tokens, get_prefer
 from framework.credentials.validation import (
    ensure_credential_key_env as _ensure_credential_key_env,
 )
-from framework.graph import Goal
+from framework.orchestrator import Goal
-from framework.graph.edge import (
+from framework.orchestrator.edge import (
    DEFAULT_MAX_TOKENS,
    EdgeCondition,
    EdgeSpec,
    GraphSpec,
 )
-from framework.graph.executor import ExecutionResult
+from framework.orchestrator.orchestrator import ExecutionResult
-from framework.graph.node import NodeSpec
+from framework.orchestrator.node import NodeSpec
 from framework.llm.provider import LLMProvider, Tool
-from framework.runner.preload_validation import run_preload_validation
+from framework.loader.preload_validation import run_preload_validation
-from framework.runner.tool_registry import ToolRegistry
+from framework.loader.tool_registry import ToolRegistry
-from framework.runtime.agent_runtime import AgentRuntime, AgentRuntimeConfig, create_agent_runtime
+from framework.host.agent_host import AgentHost, AgentRuntimeConfig
-from framework.runtime.execution_stream import EntryPointSpec
+from framework.host.execution_manager import EntryPointSpec
 from framework.runtime.runtime_log_store import RuntimeLogStore
 from framework.tools.flowchart_utils import generate_fallback_flowchart
 logger = logging.getLogger(__name__)
@@ -881,6 +880,172 @@ class ValidationResult:
    missing_credentials: list[str] = field(default_factory=list)
 def _resolve_template_vars(text: str | None, variables: dict[str, str]) -> str | None:
    """Resolve ``{{variable_name}}`` placeholders in *text*."""
    if text is None or not variables:
        return text
    import re
    def _replace(m: re.Match) -> str:
        key = m.group(1).strip()
        return variables.get(key, m.group(0))
    return re.sub(r"\{\{(.+?)\}\}", _replace, text)
 def load_agent_config(data: str | dict) -> tuple[GraphSpec, Goal]:
    """Load ``GraphSpec`` and ``Goal`` from a declarative :class:`AgentConfig`.
    The declarative format uses a ``name`` key at the top level, unlike the
    legacy export format which uses ``graph``/``goal`` keys.  The runner
    auto-detects the format in :meth:`AgentLoader.load`.
    Template variables in ``config.variables`` are resolved in all
    ``system_prompt`` and ``identity_prompt`` fields via ``{{var_name}}``.
    Returns:
        Tuple of (GraphSpec, Goal)
    """
    from framework.orchestrator.edge import EdgeCondition, EdgeSpec
    from framework.orchestrator.goal import Constraint, Goal as GoalModel, SuccessCriterion
    from framework.schemas.agent_config import AgentConfig
    if isinstance(data, str):
        data = json.loads(data)
    config = AgentConfig.model_validate(data)
    tvars = config.variables
    # Build Goal
    success_criteria = [
        SuccessCriterion(
            id=f"sc-{i}",
            description=sc,
            metric="llm_judge",
            target="",
        )
        for i, sc in enumerate(config.goal.success_criteria)
    ]
    constraints = [
        Constraint(
            id=f"c-{i}",
            description=c,
            constraint_type="hard",
            category="general",
        )
        for i, c in enumerate(config.goal.constraints)
    ]
    goal = GoalModel(
        id=f"{config.name}-goal",
        name=config.name,
        description=config.goal.description,
        success_criteria=success_criteria,
        constraints=constraints,
    )
    # Build nodes
    condition_map = {
        "always": EdgeCondition.ALWAYS,
        "on_success": EdgeCondition.ON_SUCCESS,
        "on_failure": EdgeCondition.ON_FAILURE,
        "conditional": EdgeCondition.CONDITIONAL,
        "llm_decide": EdgeCondition.LLM_DECIDE,
    }
    nodes = []
    for nc in config.nodes:
        # Resolve tool access: node-level config -> agent-level fallback
        if nc.tools.policy == "explicit" and nc.tools.allowed:
            tools_list = nc.tools.allowed
            tool_policy = "explicit"
        elif nc.tools.policy == "none":
            tools_list = []
            tool_policy = "none"
        elif nc.tools.policy == "all":
            tools_list = []
            tool_policy = "all"
        else:
            # Inherit agent-level tool config
            if config.tools.policy == "explicit" and config.tools.allowed:
                tools_list = config.tools.allowed
            else:
                tools_list = []
            tool_policy = config.tools.policy
        node_kwargs: dict = {
            "id": nc.id,
            "name": nc.name or nc.id,
            "description": nc.description or "",
            "node_type": nc.node_type,
            "system_prompt": _resolve_template_vars(nc.system_prompt, tvars),
            "tools": tools_list,
            "tool_access_policy": tool_policy,
            "model": nc.model,
            "input_keys": nc.input_keys,
            "output_keys": nc.output_keys,
            "nullable_output_keys": nc.nullable_output_keys,
            "max_iterations": nc.max_iterations,
            "success_criteria": nc.success_criteria,
            "skip_judge": nc.skip_judge,
        }
        # Optional fields -- only pass when set (avoids overriding defaults)
        if nc.client_facing:
            node_kwargs["client_facing"] = nc.client_facing
        if nc.max_node_visits != 1:
            node_kwargs["max_node_visits"] = nc.max_node_visits
        if nc.failure_criteria:
            node_kwargs["failure_criteria"] = nc.failure_criteria
        if nc.max_retries is not None:
            node_kwargs["max_retries"] = nc.max_retries
        nodes.append(NodeSpec(**node_kwargs))
    # Build edges
    edges = []
    for i, ec in enumerate(config.edges):
        edges.append(
            EdgeSpec(
                id=f"e-{i}-{ec.from_node}-{ec.to_node}",
                source=ec.from_node,
                target=ec.to_node,
                condition=condition_map.get(ec.condition, EdgeCondition.ON_SUCCESS),
                condition_expr=ec.condition_expr,
                priority=ec.priority,
                input_mapping=ec.input_mapping,
            )
        )
    # Build entry_points dict for GraphSpec
    entry_points_dict: dict = {}
    if config.entry_points:
        for ep in config.entry_points:
            entry_points_dict[ep.id] = ep.entry_node or config.entry_node
    else:
        entry_points_dict = {"default": config.entry_node}
    # Build GraphSpec
    graph_kwargs: dict = {
        "id": f"{config.name}-graph",
        "goal_id": goal.id,
        "version": config.version,
        "entry_node": config.entry_node,
        "entry_points": entry_points_dict,
        "terminal_nodes": config.terminal_nodes,
        "pause_nodes": config.pause_nodes,
        "nodes": nodes,
        "edges": edges,
        "max_tokens": config.max_tokens,
        "loop_config": dict(config.loop_config),
        "conversation_mode": config.conversation_mode,
        "identity_prompt": _resolve_template_vars(
            config.identity_prompt, tvars
        ) or "",
    }
    graph = GraphSpec(**graph_kwargs)
    return graph, goal
 def load_agent_export(data: str | dict) -> tuple[GraphSpec, Goal]:
    """
    Load GraphSpec and Goal from export_graph() output.
@@ -942,7 +1107,7 @@ def load_agent_export(data: str | dict) -> tuple[GraphSpec, Goal]:
    )
    # Build Goal
-    from framework.graph.goal import Constraint, SuccessCriterion
+    from framework.orchestrator.goal import Constraint, SuccessCriterion
    success_criteria = []
    for sc_data in goal_data.get("success_criteria", []):
@@ -979,7 +1144,7 @@ def load_agent_export(data: str | dict) -> tuple[GraphSpec, Goal]:
    return graph, goal
-class AgentRunner:
+class AgentLoader:
    """
    Loads and runs exported agents with minimal boilerplate.
@@ -991,15 +1156,15 @@ class AgentRunner:
    Usage:
        # Simple usage
-        runner = AgentRunner.load("exports/outbound-sales-agent")
+        runner = AgentLoader.load("exports/outbound-sales-agent")
        result = await runner.run({"lead_id": "123"})
        # With context manager
-        async with AgentRunner.load("exports/outbound-sales-agent") as runner:
+        async with AgentLoader.load("exports/outbound-sales-agent") as runner:
            result = await runner.run({"lead_id": "123"})
        # With custom tools
-        runner = AgentRunner.load("exports/outbound-sales-agent")
+        runner = AgentLoader.load("exports/outbound-sales-agent")
        runner.register_tool("my_tool", my_tool_func)
        result = await runner.run({"lead_id": "123"})
    """
@@ -1027,7 +1192,7 @@ class AgentRunner:
        credential_store: Any | None = None,
    ):
        """
-        Initialize the runner (use AgentRunner.load() instead).
+        Initialize the runner (use AgentLoader.load() instead).
        Args:
            agent_path: Path to agent folder
@@ -1082,7 +1247,7 @@ class AgentRunner:
        self._approval_callback: Callable | None = None
        # AgentRuntime — unified execution path for all agents
-        self._agent_runtime: AgentRuntime | None = None
+        self._agent_runtime: AgentHost | None = None
        # Pre-load validation: structural checks + credentials.
        # Fails fast with actionable guidance — no MCP noise on screen.
        run_preload_validation(
@@ -1101,14 +1266,7 @@ class AgentRunner:
        os.environ["HIVE_AGENT_NAME"] = agent_path.name
        os.environ["HIVE_STORAGE_PATH"] = str(self._storage_path)
-        # Auto-discover MCP servers from mcp_servers.json
+        # MCP tools are loaded by McpRegistryStage in the pipeline during AgentHost.start()
        mcp_config_path = agent_path / "mcp_servers.json"
        if mcp_config_path.exists():
            self._load_mcp_servers_from_config(mcp_config_path)
        # Auto-discover registry-selected MCP servers from mcp_registry.json
        self._load_registry_mcp_servers(agent_path)
    @staticmethod
    def _import_agent_module(agent_path: Path):
        """Import an agent package from its directory path.
@@ -1158,7 +1316,7 @@ class AgentRunner:
        interactive: bool = True,
        skip_credential_validation: bool | None = None,
        credential_store: Any | None = None,
-    ) -> "AgentRunner":
+    ) -> "AgentLoader":
        """
        Load an agent from an export folder.
@@ -1299,21 +1457,22 @@ class AgentRunner:
            runner._agent_skills = agent_skills
            return runner
-        # Fallback: load from agent.json (legacy JSON-based agents)
+        # Fallback: load from agent.json (declarative config)
        agent_json_path = agent_path / "agent.json"
        if not agent_json_path.is_file():
            raise FileNotFoundError(f"No agent.py or agent.json found in {agent_path}")
-        with open(agent_json_path, encoding="utf-8") as f:
+        export_data = agent_json_path.read_text(encoding="utf-8")
            export_data = f.read()
        if not export_data.strip():
-            raise ValueError(f"Empty agent export file: {agent_json_path}")
+            raise ValueError(f"Empty agent.json: {agent_json_path}")
-        try:
+        parsed = json.loads(export_data)
-            graph, goal = load_agent_export(export_data)
+        graph, goal = load_agent_config(parsed)
-        except json.JSONDecodeError as exc:
+        logger.info(
-            raise ValueError(f"Invalid JSON in agent export file: {agent_json_path}") from exc
+            "Loaded declarative agent config from agent.json (name=%s)",
            parsed.get("name"),
        )
        # Generate flowchart.json if missing (for legacy JSON-based agents)
        generate_fallback_flowchart(graph, goal, agent_path)
@@ -1396,60 +1555,6 @@ class AgentRunner:
        }
        return self._tool_registry.register_mcp_server(server_config)
    def _load_mcp_servers_from_config(self, config_path: Path) -> None:
        """Load and register MCP servers from a configuration file."""
        self._tool_registry.load_mcp_config(config_path)
    def _load_registry_mcp_servers(self, agent_path: Path) -> None:
        """Load and register MCP servers selected via ``mcp_registry.json``."""
        registry_json = agent_path / "mcp_registry.json"
        if registry_json.is_file():
            self._tool_registry.set_mcp_registry_agent_path(agent_path)
        else:
            self._tool_registry.set_mcp_registry_agent_path(None)
        from framework.runner.mcp_registry import MCPRegistry
        try:
            registry = MCPRegistry()
            registry.initialize()
            server_configs, selection_max_tools = registry.load_agent_selection(agent_path)
        except Exception as exc:
            logger.warning(
                "Failed to load MCP registry servers for '%s': %s",
                agent_path.name,
                exc,
            )
            return
        if not server_configs:
            return
        results = self._tool_registry.load_registry_servers(
            server_configs,
            preserve_existing_tools=True,
            log_collisions=True,
            max_tools=selection_max_tools,
        )
        loaded = [result for result in results if result["status"] == "loaded"]
        skipped = [result for result in results if result["status"] != "loaded"]
        logger.info(
            "Loaded %d/%d MCP registry server(s) for agent '%s'",
            len(loaded),
            len(results),
            agent_path.name,
        )
        if skipped:
            logger.info(
                "Skipped MCP registry servers for agent '%s': %s",
                agent_path.name,
                [
                    {"server": result["server"], "reason": result["skipped_reason"]}
                    for result in skipped
                ],
            )
    def set_approval_callback(self, callback: Callable) -> None:
        """
        Set a callback for human-in-the-loop approval during execution.
@@ -1460,272 +1565,119 @@ class AgentRunner:
        self._approval_callback = callback
    def _setup(self, event_bus=None) -> None:
-        """Set up runtime, LLM, and executor."""
+        """Set up runtime via pipeline stages.
-        # Configure structured logging (auto-detects JSON vs human-readable)
+
        Builds a pipeline with the default stages (LLM, credentials, MCP,
        skills) and passes it to AgentHost.  The stages initialize during
        ``AgentHost.start()`` and inject tools/LLM/credentials/skills.
        """
        from framework.observability import configure_logging
        from framework.pipeline.stages.credential_resolver import CredentialResolverStage
        from framework.pipeline.stages.llm_provider import LlmProviderStage
        from framework.pipeline.stages.mcp_registry import McpRegistryStage
        from framework.pipeline.stages.skill_registry import SkillRegistryStage
        from framework.skills.config import SkillsConfig
        configure_logging(level="INFO", format="auto")
-        # Set up session context for tools (agent_id)
+        # Set up session context for tools
        agent_id = self.graph.id or "unknown"
        self._tool_registry.set_session_context(agent_id=agent_id)
-        self._tool_registry.set_session_context(
+        # Read MCP server refs from agent.json
-            agent_id=agent_id,
+        mcp_refs = []
-        )
+        agent_json = self.agent_path / "agent.json"
        if agent_json.exists():
            try:
                import json as _json
-        # Create LLM provider
+                data = _json.loads(agent_json.read_text(encoding="utf-8"))
-        # Uses LiteLLM which auto-detects the provider from model name
+                mcp_refs = data.get("mcp_servers", [])
-        # Skip if already injected (e.g. worker agents with a pre-built LLM)
+            except Exception:
-        if self._llm is not None:
+                pass
            pass  # LLM already configured externally
        elif self.mock_mode:
            # Use mock LLM for testing without real API calls
            from framework.llm.mock import MockLLMProvider
-            self._llm = MockLLMProvider(model=self.model)
+        # Build default pipeline stages
-        else:
+        # Default infrastructure stages (always present)
-            from framework.llm.litellm import LiteLLMProvider
+        pipeline_stages = [
-
+            LlmProviderStage(
-            # Check if a subscription mode is configured
+                model=self.model,
-            config = get_hive_config()
+                mock_mode=self.mock_mode,
-            llm_config = config.get("llm", {})
+                llm=self._llm,
            use_claude_code = llm_config.get("use_claude_code_subscription", False)
            use_codex = llm_config.get("use_codex_subscription", False)
            use_kimi_code = llm_config.get("use_kimi_code_subscription", False)
            use_antigravity = llm_config.get("use_antigravity_subscription", False)
            api_base = llm_config.get("api_base")
            api_key = None
            if use_claude_code:
                # Get OAuth token from Claude Code subscription
                api_key = get_claude_code_token()
                if not api_key:
                    logger.warning(
                        "Claude Code subscription configured but no token found. "
                        "Run 'claude' to authenticate, then try again."
                    )
            elif use_codex:
                # Get OAuth token from Codex subscription
                api_key = get_codex_token()
                if not api_key:
                    logger.warning(
                        "Codex subscription configured but no token found. "
                        "Run 'codex' to authenticate, then try again."
                    )
            elif use_kimi_code:
                # Get API key from Kimi Code CLI config (~/.kimi/config.toml)
                api_key = get_kimi_code_token()
                if not api_key:
                    logger.warning(
                        "Kimi Code subscription configured but no key found. "
                        "Run 'kimi /login' to authenticate, then try again."
                    )
            elif use_antigravity:
                pass  # AntigravityProvider handles credentials internally
            if api_key and use_claude_code:
                # Use litellm's built-in Anthropic OAuth support.
                # The lowercase "authorization" key triggers OAuth detection which
                # adds the required anthropic-beta and browser-access headers.
                self._llm = LiteLLMProvider(
                    model=self.model,
                    api_key=api_key,
                    api_base=api_base,
                    extra_headers={"authorization": f"Bearer {api_key}"},
                )
            elif api_key and use_codex:
                # OpenAI Codex subscription routes through the ChatGPT backend
                # (chatgpt.com/backend-api/codex/responses), NOT the standard
                # OpenAI API.  The consumer OAuth token lacks platform API scopes.
                extra_headers: dict[str, str] = {
                    "Authorization": f"Bearer {api_key}",
                    "User-Agent": "CodexBar",
                }
                account_id = get_codex_account_id()
                if account_id:
                    extra_headers["ChatGPT-Account-Id"] = account_id
                self._llm = LiteLLMProvider(
                    model=self.model,
                    api_key=api_key,
                    api_base="https://chatgpt.com/backend-api/codex",
                    extra_headers=extra_headers,
                    store=False,
                    allowed_openai_params=["store"],
                )
            elif api_key and use_kimi_code:
                # Kimi Code subscription uses the Kimi coding API (OpenAI-compatible).
                # The api_base is set automatically by LiteLLMProvider for kimi/ models.
                self._llm = LiteLLMProvider(
                    model=self.model,
                    api_key=api_key,
                    api_base=api_base,
                )
            elif use_antigravity:
                # Direct OAuth to Google's internal Cloud Code Assist gateway.
                # No local proxy required — AntigravityProvider handles token
                # refresh and Gemini-format request/response conversion natively.
                from framework.llm.antigravity import AntigravityProvider  # noqa: PLC0415
                provider = AntigravityProvider(model=self.model)
                if not provider.has_credentials():
                    print(
                        "Warning: Antigravity credentials not found. "
                        "Run: uv run python core/antigravity_auth.py auth account add"
                    )
                self._llm = provider
            else:
                # Local models (e.g. Ollama) don't need an API key
                if self._is_local_model(self.model):
                    self._llm = LiteLLMProvider(
                        model=self.model,
                        api_base=api_base,
                    )
                else:
                    # Fall back to environment variable
                    # First check api_key_env_var from config (set by quickstart)
                    api_key_env = llm_config.get("api_key_env_var") or self._get_api_key_env_var(
                        self.model
                    )
                    if api_key_env and os.environ.get(api_key_env):
                        self._llm = LiteLLMProvider(
                            model=self.model,
                            api_key=os.environ[api_key_env],
                            api_base=api_base,
                        )
                    else:
                        # Fall back to credential store
                        api_key = self._get_api_key_from_credential_store()
                        if api_key:
                            self._llm = LiteLLMProvider(
                                model=self.model, api_key=api_key, api_base=api_base
                            )
                            # Set env var so downstream code (e.g. cleanup LLM in
                            # node._extract_json) can also find it
                            if api_key_env:
                                os.environ[api_key_env] = api_key
                        elif api_key_env:
                            logger.warning(
                                "%s not set. LLM calls will fail. "
                                "Set it with: export %s=your-api-key",
                                api_key_env,
                                api_key_env,
                            )
            # Fail fast if the agent needs an LLM but none was configured
            if self._llm is None:
                has_llm_nodes = any(
                    node.node_type in ("event_loop", "gcu") for node in self.graph.nodes
                )
                if has_llm_nodes:
                    from framework.credentials.models import CredentialError
                    if self._is_local_model(self.model):
                        raise CredentialError(
                            f"Failed to initialize LLM for local model '{self.model}'. "
                            f"Ensure your local LLM server is running "
                            f"(e.g. 'ollama serve' for Ollama)."
                        )
                    api_key_env = self._get_api_key_env_var(self.model)
                    hint = (
                        f"Set it with: export {api_key_env}=your-api-key"
                        if api_key_env
                        else "Configure an API key for your LLM provider."
                    )
                    raise CredentialError(f"LLM API key not found for model '{self.model}'. {hint}")
        # For GCU nodes: auto-register GCU MCP server if needed, then expand tool lists
        has_gcu_nodes = any(node.node_type == "gcu" for node in self.graph.nodes)
        if has_gcu_nodes:
            from framework.graph.gcu import GCU_MCP_SERVER_CONFIG, GCU_SERVER_NAME
            # Auto-register GCU MCP server if tools aren't loaded yet
            gcu_tool_names = self._tool_registry.get_server_tool_names(GCU_SERVER_NAME)
            if not gcu_tool_names:
                # Resolve cwd to repo-level tools/ (not relative to agent_path)
                gcu_config = dict(GCU_MCP_SERVER_CONFIG)
                _repo_root = Path(__file__).resolve().parent.parent.parent.parent
                gcu_config["cwd"] = str(_repo_root / "tools")
                self._tool_registry.register_mcp_server(gcu_config)
                gcu_tool_names = self._tool_registry.get_server_tool_names(GCU_SERVER_NAME)
            # Expand each GCU node's tools list to include all GCU server tools
            if gcu_tool_names:
                for node in self.graph.nodes:
                    if node.node_type == "gcu":
                        existing = set(node.tools)
                        for tool_name in sorted(gcu_tool_names):
                            if tool_name not in existing:
                                node.tools.append(tool_name)
        # For event_loop/gcu nodes: auto-register file tools MCP server, then expand tool lists
        has_loop_nodes = any(node.node_type in ("event_loop", "gcu") for node in self.graph.nodes)
        if has_loop_nodes:
            from framework.graph.files import FILES_MCP_SERVER_CONFIG, FILES_MCP_SERVER_NAME
            files_tool_names = self._tool_registry.get_server_tool_names(FILES_MCP_SERVER_NAME)
            if not files_tool_names:
                # Resolve cwd to repo-level tools/ (not relative to agent_path)
                files_config = dict(FILES_MCP_SERVER_CONFIG)
                _repo_root = Path(__file__).resolve().parent.parent.parent.parent
                files_config["cwd"] = str(_repo_root / "tools")
                self._tool_registry.register_mcp_server(files_config)
                files_tool_names = self._tool_registry.get_server_tool_names(FILES_MCP_SERVER_NAME)
            if files_tool_names:
                for node in self.graph.nodes:
                    if node.node_type in ("event_loop", "gcu"):
                        existing = set(node.tools)
                        for tool_name in sorted(files_tool_names):
                            if tool_name not in existing:
                                node.tools.append(tool_name)
        # Get tools for runtime
        tools = list(self._tool_registry.get_tools().values())
        tool_executor = self._tool_registry.get_executor()
        # Collect connected account info for system prompt injection
        accounts_prompt = ""
        accounts_data: list[dict] | None = None
        tool_provider_map: dict[str, str] | None = None
        try:
            from aden_tools.credentials.store_adapter import CredentialStoreAdapter
            if self._credential_store is not None:
                adapter = CredentialStoreAdapter(store=self._credential_store)
            else:
                adapter = CredentialStoreAdapter.default()
            accounts_data = adapter.get_all_account_info()
            tool_provider_map = adapter.get_tool_provider_map()
            if accounts_data:
                from framework.graph.prompting import build_accounts_prompt
                accounts_prompt = build_accounts_prompt(accounts_data, tool_provider_map)
        except Exception:
            pass  # Best-effort — agent works without account info
        # Skill configuration — the runtime handles discovery, loading, trust-gating and
        # prompt rasterization.  The runner just builds the config.
        from framework.skills.config import SkillsConfig
        from framework.skills.manager import SkillsManagerConfig
        skills_manager_config = SkillsManagerConfig(
            skills_config=SkillsConfig.from_agent_vars(
                default_skills=getattr(self, "_agent_default_skills", None),
                skills=getattr(self, "_agent_skills", None),
            ),
-            project_root=self.agent_path,
+            CredentialResolverStage(
-            interactive=self._interactive,
+                credential_store=self._credential_store,
-        )
+            ),
            McpRegistryStage(
                server_refs=mcp_refs,
                agent_path=self.agent_path,
                tool_registry=self._tool_registry,
            ),
            SkillRegistryStage(
                project_root=self.agent_path,
                interactive=self._interactive,
                skills_config=SkillsConfig.from_agent_vars(
                    default_skills=getattr(self, "_agent_default_skills", None),
                    skills=getattr(self, "_agent_skills", None),
                ),
            ),
        ]
-        self._setup_agent_runtime(
+        # Merge user-configured stages from ~/.hive/configuration.json
-            tools,
+        from framework.config import get_hive_config
-            tool_executor,
+        from framework.pipeline.registry import build_pipeline_from_config
-            accounts_prompt=accounts_prompt,
+
-            accounts_data=accounts_data,
+        hive_config = get_hive_config()
-            tool_provider_map=tool_provider_map,
+        user_stages_config = hive_config.get("pipeline", {}).get("stages", [])
        if user_stages_config:
            user_pipeline = build_pipeline_from_config(user_stages_config)
            pipeline_stages.extend(user_pipeline.stages)
        # Merge agent-level overrides from agent.json pipeline field
        if agent_json.exists():
            try:
                agent_pipeline = (
                    _json.loads(agent_json.read_text(encoding="utf-8"))
                    .get("pipeline", {})
                    .get("stages", [])
                )
                if agent_pipeline:
                    agent_stages = build_pipeline_from_config(agent_pipeline)
                    pipeline_stages.extend(agent_stages.stages)
            except Exception:
                pass
        # Create AgentHost directly (no wrapper)
        from framework.host.execution_manager import EntryPointSpec
        from framework.orchestrator.checkpoint_config import CheckpointConfig
        from framework.tracker.runtime_log_store import RuntimeLogStore
        self._agent_runtime = AgentHost(
            graph=self.graph,
            goal=self.goal,
            storage_path=self._storage_path,
            runtime_log_store=RuntimeLogStore(
                base_path=self._storage_path / "runtime_logs",
            ),
            checkpoint_config=CheckpointConfig(
                enabled=True,
                checkpoint_on_node_complete=True,
                checkpoint_max_age_days=7,
                async_checkpoint=True,
            ),
            graph_id=self.graph.id or self.agent_path.name,
            event_bus=event_bus,
-            skills_manager_config=skills_manager_config,
+            pipeline_stages=pipeline_stages,
        )
        self._agent_runtime.register_entry_point(
            EntryPointSpec(
                id="default",
                name="Default",
                entry_node=self.graph.entry_node,
                trigger_type="manual",
                isolation_level="shared",
            ),
        )
        self._agent_runtime.intro_message = self.intro_message
    def _get_api_key_env_var(self, model: str) -> str | None:
        """Get the environment variable name for the API key based on model name."""
@@ -1833,83 +1785,6 @@ class AgentRunner:
        )
        return model.lower().startswith(LOCAL_PREFIXES)
    def _setup_agent_runtime(
        self,
        tools: list,
        tool_executor: Callable | None,
        accounts_prompt: str = "",
        accounts_data: list[dict] | None = None,
        tool_provider_map: dict[str, str] | None = None,
        event_bus=None,
        skills_catalog_prompt: str = "",
        protocols_prompt: str = "",
        skill_dirs: list[str] | None = None,
        skills_manager_config=None,
    ) -> None:
        """Set up multi-entry-point execution using AgentRuntime."""
        entry_points = []
        # Always create a primary entry point for the graph's entry node.
        # For multi-entry-point agents this ensures the primary path (e.g.
        # user-facing rule setup) is reachable alongside async entry points.
        if self.graph.entry_node:
            entry_points.insert(
                0,
                EntryPointSpec(
                    id="default",
                    name="Default",
                    entry_node=self.graph.entry_node,
                    trigger_type="manual",
                    isolation_level="shared",
                ),
            )
        # Create AgentRuntime with all entry points
        log_store = RuntimeLogStore(base_path=self._storage_path / "runtime_logs")
        # Enable checkpointing by default for resumable sessions
        from framework.graph.checkpoint_config import CheckpointConfig
        checkpoint_config = CheckpointConfig(
            enabled=True,
            checkpoint_on_node_start=False,  # Only checkpoint after nodes complete
            checkpoint_on_node_complete=True,
            checkpoint_max_age_days=7,
            async_checkpoint=True,  # Non-blocking
        )
        # Handle runtime_config - only pass through if it's actually an AgentRuntimeConfig.
        # Agents may export a RuntimeConfig (LLM settings) or queen-generated custom classes
        # that would crash AgentRuntime if passed through.
        runtime_config = None
        if self.runtime_config is not None:
            from framework.runtime.agent_runtime import AgentRuntimeConfig
            if isinstance(self.runtime_config, AgentRuntimeConfig):
                runtime_config = self.runtime_config
        self._agent_runtime = create_agent_runtime(
            graph=self.graph,
            goal=self.goal,
            storage_path=self._storage_path,
            entry_points=entry_points,
            llm=self._llm,
            tools=tools,
            tool_executor=tool_executor,
            runtime_log_store=log_store,
            checkpoint_config=checkpoint_config,
            config=runtime_config,
            graph_id=self.graph.id or self.agent_path.name,
            accounts_prompt=accounts_prompt,
            accounts_data=accounts_data,
            tool_provider_map=tool_provider_map,
            event_bus=event_bus,
            skills_manager_config=skills_manager_config,
        )
        # Pass intro_message through for TUI display
        self._agent_runtime.intro_message = self.intro_message
    # ------------------------------------------------------------------
    # Execution modes
    #
@@ -1990,7 +1865,7 @@ class AgentRunner:
        sub_ids: list[str] = []
        if has_queen and sys.stdin.isatty():
-            from framework.runtime.event_bus import EventType
+            from framework.host.event_bus import EventType
            runtime = self._agent_runtime
@@ -2246,7 +2121,7 @@ class AgentRunner:
        except ImportError:
            # aden_tools not installed - fall back to direct check
            has_llm_nodes = any(
-                node.node_type in ("event_loop", "gcu") for node in self.graph.nodes
+                node.node_type == "event_loop" for node in self.graph.nodes
            )
            if has_llm_nodes:
                api_key_env = self._get_api_key_env_var(self.model)
@@ -2283,7 +2158,7 @@ class AgentRunner:
        # Run synchronous cleanup
        self.cleanup()
-    async def __aenter__(self) -> "AgentRunner":
+    async def __aenter__(self) -> "AgentLoader":
        """Context manager entry."""
        self._setup()
        if self._agent_runtime is not None:
@@ -19,7 +19,7 @@ def register_commands(subparsers: argparse._SubParsersAction) -> None:
    run_parser.add_argument(
        "agent_path",
        type=str,
-        help="Path to agent folder (containing agent.json)",
+        help="Path to agent folder (containing agent.json or agent.py)",
    )
    run_parser.add_argument(
        "--input",
@@ -87,7 +87,7 @@ def register_commands(subparsers: argparse._SubParsersAction) -> None:
    info_parser.add_argument(
        "agent_path",
        type=str,
-        help="Path to agent folder (containing agent.json)",
+        help="Path to agent folder (containing agent.json or agent.py)",
    )
    info_parser.add_argument(
        "--json",
@@ -105,7 +105,7 @@ def register_commands(subparsers: argparse._SubParsersAction) -> None:
    validate_parser.add_argument(
        "agent_path",
        type=str,
-        help="Path to agent folder (containing agent.json)",
+        help="Path to agent folder (containing agent.json or agent.py)",
    )
    validate_parser.set_defaults(func=cmd_validate)
@@ -310,7 +310,7 @@ def _prompt_before_start(agent_path: str, runner, model: str | None = None):
        Updated runner if user proceeds, None if user aborts.
    """
    from framework.credentials.setup import CredentialSetupSession
-    from framework.runner import AgentRunner
+    from framework.loader import AgentLoader
    while True:
        print()
@@ -328,7 +328,7 @@ def _prompt_before_start(agent_path: str, runner, model: str | None = None):
            if result.success:
                # Reload runner with updated credentials
                try:
-                    runner = AgentRunner.load(agent_path, model=model)
+                    runner = AgentLoader.load(agent_path, model=model)
                except Exception as e:
                    print(f"Error reloading agent: {e}")
                    return None
@@ -342,7 +342,7 @@ def cmd_run(args: argparse.Namespace) -> int:
    from framework.credentials.models import CredentialError
    from framework.observability import configure_logging
-    from framework.runner import AgentRunner
+    from framework.loader import AgentLoader
    # Set logging level (quiet by default for cleaner output)
    if args.quiet:
@@ -390,7 +390,7 @@ def cmd_run(args: argparse.Namespace) -> int:
    # Standard execution
    # AgentRunner handles credential setup interactively when stdin is a TTY.
    try:
-        runner = AgentRunner.load(
+        runner = AgentLoader.load(
            args.agent_path,
            model=args.model,
        )
@@ -528,10 +528,10 @@ def cmd_run(args: argparse.Namespace) -> int:
 def cmd_info(args: argparse.Namespace) -> int:
    """Show agent information."""
    from framework.credentials.models import CredentialError
-    from framework.runner import AgentRunner
+    from framework.loader import AgentLoader
    try:
-        runner = AgentRunner.load(args.agent_path)
+        runner = AgentLoader.load(args.agent_path)
    except CredentialError as e:
        print(f"\n{e}", file=sys.stderr)
        return 1
@@ -595,10 +595,10 @@ def cmd_info(args: argparse.Namespace) -> int:
 def cmd_validate(args: argparse.Namespace) -> int:
    """Validate an exported agent."""
    from framework.credentials.models import CredentialError
-    from framework.runner import AgentRunner
+    from framework.loader import AgentLoader
    try:
-        runner = AgentRunner.load(args.agent_path)
+        runner = AgentLoader.load(args.agent_path)
    except CredentialError as e:
        print(f"\n{e}", file=sys.stderr)
        return 1
@@ -632,7 +632,7 @@ def cmd_validate(args: argparse.Namespace) -> int:
 def cmd_list(args: argparse.Namespace) -> int:
    """List available agents."""
-    from framework.runner import AgentRunner
+    from framework.loader import AgentLoader
    directory = Path(args.directory)
    if not directory.exists():
@@ -644,7 +644,7 @@ def cmd_list(args: argparse.Namespace) -> int:
    for path in directory.iterdir():
        if _is_valid_agent_dir(path):
            try:
-                runner = AgentRunner.load(path)
+                runner = AgentLoader.load(path)
                info = runner.info()
                agents.append(
                    {
@@ -686,7 +686,7 @@ def cmd_list(args: argparse.Namespace) -> int:
 def _interactive_approval(request):
    """Interactive approval callback for HITL mode."""
-    from framework.graph import ApprovalDecision, ApprovalResult
+    from framework.orchestrator import ApprovalDecision, ApprovalResult
    print()
    print("=" * 60)
@@ -775,7 +775,7 @@ def cmd_shell(args: argparse.Namespace) -> int:
    from framework.credentials.models import CredentialError
    from framework.observability import configure_logging
-    from framework.runner import AgentRunner
+    from framework.loader import AgentLoader
    configure_logging(level="INFO")
@@ -789,7 +789,7 @@ def cmd_shell(args: argparse.Namespace) -> int:
            return 1
    try:
-        runner = AgentRunner.load(agent_path)
+        runner = AgentLoader.load(agent_path)
    except CredentialError as e:
        print(f"\n{e}", file=sys.stderr)
        return 1
@@ -1004,17 +1004,35 @@ def _get_framework_agents_dir() -> Path:
 def _extract_python_agent_metadata(agent_path: Path) -> tuple[str, str]:
-    """Extract name and description from a Python-based agent's config.py.
+    """Extract name and description from an agent directory.
-    Uses AST parsing to safely extract values without executing code.
+    Checks agent.json first (declarative), then falls back to config.py
    (legacy Python). Uses AST parsing for Python to avoid executing code.
    Returns (name, description) tuple, with fallbacks if parsing fails.
    """
    import ast
    config_path = agent_path / "config.py"
    fallback_name = agent_path.name.replace("_", " ").title()
    fallback_desc = "(Python-based agent)"
    # Declarative agent: read from agent.json
    agent_json = agent_path / "agent.json"
    if agent_json.exists():
        try:
            import json
            data = json.loads(agent_json.read_text(encoding="utf-8"))
            if isinstance(data, dict):
                name = data.get("name", fallback_name)
                # Convert kebab-case to Title Case for display
                if "-" in name and " " not in name:
                    name = name.replace("-", " ").title()
                desc = data.get("description", fallback_desc)
                return name, desc
        except Exception:
            pass
    config_path = agent_path / "config.py"
    if not config_path.exists():
        return fallback_name, fallback_desc
@@ -1083,7 +1101,7 @@ def _is_valid_agent_dir(path: Path) -> bool:
 def _has_agents(directory: Path) -> bool:
-    """Check if a directory contains any valid agents (folders with agent.json or agent.py)."""
+    """Check if a directory contains any valid agents."""
    if not directory.exists():
        return False
    return any(_is_valid_agent_dir(p) for p in directory.iterdir())
@@ -14,7 +14,7 @@ from typing import Any, Literal
 import httpx
-from framework.runner.mcp_errors import MCPToolNotFoundError
+from framework.loader.mcp_errors import MCPToolNotFoundError
 logger = logging.getLogger(__name__)
@@ -5,7 +5,7 @@ import threading
 import httpx
-from framework.runner.mcp_client import MCPClient, MCPServerConfig
+from framework.loader.mcp_client import MCPClient, MCPServerConfig
 logger = logging.getLogger(__name__)
@@ -14,9 +14,9 @@ from typing import Any, Literal
 import httpx
-from framework.runner.mcp_client import MCPClient, MCPServerConfig
+from framework.loader.mcp_client import MCPClient, MCPServerConfig
-from framework.runner.mcp_connection_manager import MCPConnectionManager
+from framework.loader.mcp_connection_manager import MCPConnectionManager
-from framework.runner.mcp_errors import (
+from framework.loader.mcp_errors import (
    MCPError,
    MCPErrorCode,
    MCPInstallError,
@@ -28,7 +28,7 @@ from typing import Any
 def _get_registry(base_path: Path | None = None):
    """Initialize and return an MCPRegistry instance."""
-    from framework.runner.mcp_registry import MCPRegistry
+    from framework.loader.mcp_registry import MCPRegistry
    registry = MCPRegistry(base_path=base_path)
    registry.initialize()
@@ -11,8 +11,8 @@ from dataclasses import dataclass, field
 from typing import TYPE_CHECKING
 if TYPE_CHECKING:
-    from framework.graph.edge import GraphSpec
+    from framework.orchestrator.edge import GraphSpec
-    from framework.graph.node import NodeSpec
+    from framework.orchestrator.node import NodeSpec
 logger = logging.getLogger(__name__)
@@ -262,15 +262,21 @@ class ToolRegistry:
                is_error=False,
            )
        registry_ref = self
        def executor(tool_use: ToolUse) -> ToolResult:
-            if tool_use.name not in self._tools:
+            # Check if credential files changed (lightweight dir listing).
            # If new OAuth tokens appeared, restarts MCP servers to pick them up.
            registry_ref.resync_mcp_servers_if_needed()
            if tool_use.name not in registry_ref._tools:
                return ToolResult(
                    tool_use_id=tool_use.id,
                    content=json.dumps({"error": f"Unknown tool: {tool_use.name}"}),
                    is_error=True,
                )
-            registered = self._tools[tool_use.name]
+            registered = registry_ref._tools[tool_use.name]
            try:
                result = registered.executor(tool_use.input)
@@ -635,8 +641,8 @@ class ToolRegistry:
            Number of tools registered from this server
        """
        try:
-            from framework.runner.mcp_client import MCPClient, MCPServerConfig
+            from framework.loader.mcp_client import MCPClient, MCPServerConfig
-            from framework.runner.mcp_connection_manager import MCPConnectionManager
+            from framework.loader.mcp_connection_manager import MCPConnectionManager
            # Build config object
            config = MCPServerConfig(
@@ -883,7 +889,7 @@ class ToolRegistry:
        """Re-run ``mcp_registry.json`` resolution and register servers (post-resync)."""
        if self._mcp_registry_agent_path is None:
            return
-        from framework.runner.mcp_registry import MCPRegistry
+        from framework.loader.mcp_registry import MCPRegistry
        try:
            reg = MCPRegistry()
@@ -922,6 +928,11 @@ class ToolRegistry:
        clients and re-loads them so the new subprocess picks up the fresh
        credentials.
        Note: Individual credential TTL/refresh is handled by the MCP server
        process internally -- it resolves tokens from the credential store
        on every tool call, not at startup. This method only handles the case
        where entirely new credential files appear.
        Returns True if a resync was performed, False otherwise.
        """
        if not self._mcp_clients or self._mcp_config_path is None:
@@ -975,7 +986,7 @@ class ToolRegistry:
            server_name = self._mcp_client_servers.get(client_id, client.config.name)
            try:
                if client_id in self._mcp_managed_clients:
-                    from framework.runner.mcp_connection_manager import MCPConnectionManager
+                    from framework.loader.mcp_connection_manager import MCPConnectionManager
                    MCPConnectionManager.get_instance().release(server_name)
                else:
@@ -0,0 +1,27 @@
 """Orchestrator layer -- how agents are composed via graphs.
 Lazy imports to avoid circular dependencies with graph/event_loop/*.
 """
 def __getattr__(name: str):
    if name in ("GraphContext",):
        from framework.orchestrator.context import GraphContext
        return GraphContext
    if name in ("DEFAULT_MAX_TOKENS", "EdgeCondition", "EdgeSpec", "GraphSpec"):
        from framework.orchestrator import edge as _e
        return getattr(_e, name)
    if name in ("Orchestrator", "ExecutionResult"):
        from framework.orchestrator import orchestrator as _o
        return getattr(_o, name)
    if name in ("Constraint", "Goal", "GoalStatus", "SuccessCriterion"):
        from framework.orchestrator import goal as _g
        return getattr(_g, name)
    if name in ("DataBuffer", "NodeContext", "NodeProtocol", "NodeResult", "NodeSpec"):
        from framework.orchestrator import node as _n
        return getattr(_n, name)
    if name in ("NodeWorker", "Activation", "FanOutTag", "FanOutTracker",
                "WorkerCompletion", "WorkerLifecycle"):
        from framework.orchestrator import node_worker as _nw
        return getattr(_nw, name)
    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
@@ -16,7 +16,7 @@ from collections.abc import AsyncIterator
 from typing import TYPE_CHECKING
 if TYPE_CHECKING:
-    from framework.runtime.event_bus import EventBus
+    from framework.host.event_bus import EventBus
 logger = logging.getLogger(__name__)
@@ -13,10 +13,10 @@ import asyncio
 from dataclasses import dataclass, field
 from typing import Any
-from framework.graph.edge import GraphSpec
+from framework.orchestrator.edge import GraphSpec
-from framework.graph.goal import Goal
+from framework.orchestrator.goal import Goal
-from framework.graph.node import DataBuffer, NodeContext, NodeProtocol, NodeSpec
+from framework.orchestrator.node import DataBuffer, NodeContext, NodeProtocol, NodeSpec
-from framework.runtime.core import Runtime
+from framework.tracker.decision_tracker import DecisionTracker
@dataclass
@@ -26,7 +26,7 @@ class GraphContext:
    graph: GraphSpec
    goal: Goal
    buffer: DataBuffer
-    runtime: Runtime
+    runtime: DecisionTracker
    llm: Any  # LLMProvider
    tools: list[Any]  # list[Tool]
    tool_executor: Any  # Callable
@@ -106,7 +106,7 @@ def build_node_accounts_prompt(
    resolved = accounts_prompt
    if accounts_data and tool_provider_map:
-        from framework.graph.prompting import build_accounts_prompt
+        from framework.orchestrator.prompting import build_accounts_prompt
        filtered = build_accounts_prompt(
            accounts_data,
@@ -125,11 +125,27 @@ def _resolve_available_tools(
    tools: list[Any],
    override_tools: list[Any] | None,
 ) -> list[Any]:
-    """Select tools available to the current node."""
+    """Select tools available to the current node.
    Respects ``node_spec.tool_access_policy``:
    - ``"all"``      -- all tools from the registry (no filtering).
    - ``"explicit"``  -- only tools whose name appears in ``node_spec.tools``.
                        If the list is empty, **no tools** are given (default-deny).
    - ``"none"``     -- no tools at all.
    """
    if override_tools is not None:
        return list(override_tools)
    policy = getattr(node_spec, "tool_access_policy", "explicit")
    if policy == "none":
        return []
    if policy == "all":
        return list(tools)
    # "explicit" (default): only tools named in node_spec.tools.
    if not node_spec.tools:
        return []
@@ -149,7 +165,7 @@ def _derive_input_data(buffer: DataBuffer, input_keys: list[str]) -> dict[str, A
 def build_node_context(
    *,
-    runtime: Runtime,
+    runtime: DecisionTracker,
    node_spec: NodeSpec,
    buffer: DataBuffer,
    goal: Goal,
@@ -234,9 +250,6 @@ def build_node_context(
        execution_id=execution_id,
        run_id=run_id,
        stream_id=stream_id,
        node_registry=node_registry or {},
        all_tools=list(all_tools or tools),
        shared_node_registry=shared_node_registry or {},
        dynamic_tools_provider=dynamic_tools_provider,
        dynamic_prompt_provider=dynamic_prompt_provider,
        dynamic_memory_provider=dynamic_memory_provider,
@@ -308,9 +321,6 @@ def build_node_context_from_graph_context(
        execution_id=gc.execution_id,
        run_id=gc.run_id,
        stream_id=gc.stream_id,
        node_registry=node_registry or gc.node_spec_registry,
        all_tools=gc.tools,
        shared_node_registry=gc.node_registry,
        dynamic_tools_provider=gc.dynamic_tools_provider,
        dynamic_prompt_provider=gc.dynamic_prompt_provider,
        dynamic_memory_provider=gc.dynamic_memory_provider,
@@ -6,10 +6,10 @@ import logging
 from dataclasses import dataclass
 from typing import TYPE_CHECKING, Any
-from framework.graph.conversation import _try_extract_key
+from framework.agent_loop.conversation import _try_extract_key
 if TYPE_CHECKING:
-    from framework.graph.conversation import NodeConversation
+    from framework.agent_loop.conversation import NodeConversation
    from framework.llm.provider import LLMProvider
 logger = logging.getLogger(__name__)
@@ -15,7 +15,7 @@ import logging
 from dataclasses import dataclass
 from typing import Any
-from framework.graph.conversation import NodeConversation
+from framework.agent_loop.conversation import NodeConversation
 from framework.llm.provider import LLMProvider
 logger = logging.getLogger(__name__)
@@ -29,7 +29,7 @@ from typing import Any
 from pydantic import BaseModel, Field, model_validator
-from framework.graph.safe_eval import safe_eval
+from framework.orchestrator.safe_eval import safe_eval
 logger = logging.getLogger(__name__)
@@ -538,13 +538,6 @@ class GraphSpec(BaseModel):
            for edge in self.get_outgoing_edges(current):
                to_visit.append(edge.target)
        # Also mark sub-agents as reachable (they're invoked via delegate_to_sub_agent, not edges)
        for node in self.nodes:
            if node.id in reachable:
                sub_agents = getattr(node, "sub_agents", []) or []
                for sub_agent_id in sub_agents:
                    reachable.add(sub_agent_id)
        for node in self.nodes:
            if node.id not in reachable:
                # Skip if node is a pause node or entry point target
@@ -583,48 +576,4 @@ class GraphSpec(BaseModel):
                        else:
                            seen_keys[key] = node_id
        # GCU nodes must only be used as subagents
        gcu_node_ids = {n.id for n in self.nodes if n.node_type == "gcu"}
        if gcu_node_ids:
            # GCU nodes must not be entry nodes
            if self.entry_node in gcu_node_ids:
                errors.append(
                    f"GCU node '{self.entry_node}' is used as entry node. "
                    "GCU nodes must only be used as subagents via delegate_to_sub_agent()."
                )
            # GCU nodes must not be terminal nodes
            for term in self.terminal_nodes:
                if term in gcu_node_ids:
                    errors.append(
                        f"GCU node '{term}' is used as terminal node. "
                        "GCU nodes must only be used as subagents."
                    )
            # GCU nodes must not be connected via edges
            for edge in self.edges:
                if edge.source in gcu_node_ids:
                    errors.append(
                        f"GCU node '{edge.source}' is used as edge source (edge '{edge.id}'). "
                        "GCU nodes must only be used as subagents, not connected via edges."
                    )
                if edge.target in gcu_node_ids:
                    errors.append(
                        f"GCU node '{edge.target}' is used as edge target (edge '{edge.id}'). "
                        "GCU nodes must only be used as subagents, not connected via edges."
                    )
            # GCU nodes must be referenced in at least one parent's sub_agents
            referenced_subagents = set()
            for node in self.nodes:
                for sa_id in node.sub_agents or []:
                    referenced_subagents.add(sa_id)
            orphaned = gcu_node_ids - referenced_subagents
            for nid in orphaned:
                errors.append(
                    f"GCU node '{nid}' is not referenced in any node's sub_agents list. "
                    "GCU nodes must be declared as subagents of a parent node."
                )
        return {"errors": errors, "warnings": warnings}
@@ -1,34 +1,14 @@
-"""GCU (browser automation) node type constants.
+"""Browser automation best-practices prompt.
-A ``gcu`` node is an ``event_loop`` node with two automatic enhancements:
+This module provides ``GCU_BROWSER_SYSTEM_PROMPT`` -- a canonical set of
-1. A canonical browser best-practices system prompt is prepended.
+browser automation guidelines that can be included in any node's system
-2. All tools from the GCU MCP server are auto-included.
+prompt that uses browser tools from the gcu-tools MCP server.
-No new ``NodeProtocol`` subclass — the ``gcu`` type is purely a declarative
+Browser tools are registered via the global MCP registry (gcu-tools).
-signal processed by the runner and executor at setup time.
+Nodes that need browser access declare ``tools: {policy: "all"}`` in their
 agent.json config.
 """
 # ---------------------------------------------------------------------------
 # MCP server identity
 # ---------------------------------------------------------------------------
 GCU_SERVER_NAME = "gcu-tools"
 """Name used to identify the GCU MCP server in ``mcp_servers.json``."""
 GCU_MCP_SERVER_CONFIG: dict = {
    "name": GCU_SERVER_NAME,
    "transport": "stdio",
    "command": "uv",
    "args": ["run", "python", "-m", "gcu.server", "--stdio"],
    "cwd": "../../tools",
    "description": "GCU tools for browser automation",
 }
 """Default stdio config for the GCU MCP server (relative to exports/<agent>/)."""
 # ---------------------------------------------------------------------------
 # Browser best-practices system prompt
 # ---------------------------------------------------------------------------
 GCU_BROWSER_SYSTEM_PROMPT = """\
 # Browser Automation Best Practices
@@ -25,7 +25,7 @@ from typing import Any
 from pydantic import BaseModel, Field
 from framework.llm.provider import LLMProvider, Tool
-from framework.runtime.core import Runtime
+from framework.tracker.decision_tracker import DecisionTracker
 logger = logging.getLogger(__name__)
@@ -144,15 +144,19 @@ class NodeSpec(BaseModel):
    # For LLM nodes
    system_prompt: str | None = Field(default=None, description="System prompt for LLM nodes")
    tools: list[str] = Field(default_factory=list, description="Tool names this node can use")
    tool_access_policy: str = Field(
        default="explicit",
        description=(
            "Tool access policy for this node. "
            "'all' = all tools from registry, "
            "'explicit' = only tools listed in `tools` (default, recommended), "
            "'none' = no tools at all."
        ),
    )
    model: str | None = Field(
        default=None, description="Specific model to use (defaults to graph default)"
    )
    # For subagent delegation
    sub_agents: list[str] = Field(
        default_factory=list,
        description="Node IDs that can be invoked as subagents from this node",
    )
    # For function nodes
    function: str | None = Field(
        default=None, description="Function name or path for function nodes"
@@ -459,7 +463,7 @@ class NodeContext:
    """
    # Core runtime
-    runtime: Runtime
+    runtime: DecisionTracker
    # Node identity
    node_id: str
@@ -526,20 +530,6 @@ class NodeContext:
    # Falls back to node_id when not set (legacy / standalone executor).
    stream_id: str = ""
    # Subagent mode
    is_subagent_mode: bool = False  # True when running as a subagent (prevents nested delegation)
    report_callback: Any = None  # async (message: str, data: dict | None) -> None
    node_registry: dict[str, "NodeSpec"] = field(default_factory=dict)  # For subagent lookup
    # Full tool catalog (unfiltered) — used by _execute_subagent to resolve
    # subagent tools that aren't in the parent node's filtered available_tools.
    all_tools: list[Tool] = field(default_factory=list)
    # Shared reference to the executor's node_registry — used by subagent
    # escalation (_EscalationReceiver) to register temporary receivers that
    # the inject_input() routing chain can find.
    shared_node_registry: dict[str, Any] = field(default_factory=dict)
    # Dynamic tool provider — when set, EventLoopNode rebuilds the tool
    # list from this callback at the start of each iteration.  Used by
    # the queen to switch between building-mode and running-mode tools.
@@ -19,15 +19,15 @@ from dataclasses import dataclass, field
 from enum import StrEnum
 from typing import Any
-from framework.graph.context import GraphContext, build_node_context_from_graph_context
+from framework.orchestrator.context import GraphContext, build_node_context_from_graph_context
-from framework.graph.edge import EdgeCondition, EdgeSpec
+from framework.orchestrator.edge import EdgeCondition, EdgeSpec
-from framework.graph.node import (
+from framework.orchestrator.node import (
    NodeContext,
    NodeProtocol,
    NodeResult,
    NodeSpec,
 )
-from framework.graph.validator import OutputValidator
+from framework.orchestrator.validator import OutputValidator
 logger = logging.getLogger(__name__)
@@ -109,7 +109,7 @@ class RetryState:
 # ---------------------------------------------------------------------------
-class WorkerAgent:
+class NodeWorker:
    """First-class autonomous worker for one node in the graph.
    Lifecycle:
@@ -355,7 +355,7 @@ class WorkerAgent:
        # Only skip retries for actual EventLoopNode instances (they handle
        # retries internally).  Custom NodeProtocol impls registered via
        # register_node should be retried by the executor.
-        from framework.graph.event_loop_node import EventLoopNode as _ELN
+        from framework.agent_loop.agent_loop import AgentLoop as _ELN
        if isinstance(node_impl, _ELN):
            max_retries = 0
@@ -603,10 +603,10 @@ class WorkerAgent:
            return self._node_impl
        # Auto-create EventLoopNode
-        if self.node_spec.node_type in ("event_loop", "gcu"):
+        if self.node_spec.node_type == "event_loop":
-            from framework.graph.event_loop.types import LoopConfig
+            from framework.agent_loop.internals.types import LoopConfig
-            from framework.graph.event_loop_node import EventLoopNode
+            from framework.agent_loop.agent_loop import AgentLoop
-            from framework.graph.node import warn_if_deprecated_client_facing
+            from framework.orchestrator.node import warn_if_deprecated_client_facing
            conv_store = None
            if gc.storage_path:
@@ -619,7 +619,7 @@ class WorkerAgent:
            warn_if_deprecated_client_facing(self.node_spec)
            default_max_iter = 100 if self.node_spec.supports_direct_user_io() else 50
-            node = EventLoopNode(
+            node = AgentLoop(
                event_bus=gc.event_bus,
                judge=None,
                config=LoopConfig(
@@ -734,7 +734,7 @@ class WorkerAgent:
        if not next_spec or next_spec.node_type != "event_loop":
            return
-        from framework.graph.prompting import (
+        from framework.orchestrator.prompting import (
            TransitionSpec,
            build_narrative,
            build_system_prompt_for_node_context,
@@ -16,21 +16,21 @@ from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any
-from framework.graph.checkpoint_config import CheckpointConfig
+from framework.orchestrator.checkpoint_config import CheckpointConfig
-from framework.graph.context import GraphContext, build_node_context
+from framework.orchestrator.context import GraphContext, build_node_context
-from framework.graph.conversation import LEGACY_RUN_ID
+from framework.agent_loop.conversation import LEGACY_RUN_ID
-from framework.graph.edge import EdgeCondition, EdgeSpec, GraphSpec
+from framework.orchestrator.edge import EdgeCondition, EdgeSpec, GraphSpec
-from framework.graph.goal import Goal
+from framework.orchestrator.goal import Goal
-from framework.graph.node import (
+from framework.orchestrator.node import (
    DataBuffer,
    NodeProtocol,
    NodeResult,
    NodeSpec,
 )
-from framework.graph.validator import OutputValidator
+from framework.orchestrator.validator import OutputValidator
 from framework.llm.provider import LLMProvider, Tool
 from framework.observability import set_trace_context
-from framework.runtime.core import Runtime
+from framework.tracker.decision_tracker import DecisionTracker
 from framework.schemas.checkpoint import Checkpoint
 from framework.storage.checkpoint_store import CheckpointStore
 from framework.utils.io import atomic_write
@@ -112,7 +112,7 @@ class ParallelExecutionConfig:
    branch_timeout_seconds: float = 300.0
-class GraphExecutor:
+class Orchestrator:
    """
    Executes agent graphs.
@@ -133,7 +133,7 @@ class GraphExecutor:
    def __init__(
        self,
-        runtime: Runtime,
+        runtime: DecisionTracker,
        llm: LLMProvider | None = None,
        tools: list[Tool] | None = None,
        tool_executor: Callable | None = None,
@@ -165,7 +165,7 @@ class GraphExecutor:
        Initialize the executor.
        Args:
-            runtime: Runtime for decision logging
+            runtime: DecisionTracker for decision logging
            llm: LLM provider for LLM nodes
            tools: Available tools
            tool_executor: Function to execute tools
@@ -202,7 +202,7 @@ class GraphExecutor:
        self.validator = OutputValidator()
        self.logger = logging.getLogger(__name__)
        self.logger.debug(
-            "[GraphExecutor.__init__] Created with"
+            "[Orchestrator.__init__] Created with"
            " stream_id=%s, execution_id=%s,"
            " initial node_registry keys: %s",
            stream_id,
@@ -361,8 +361,8 @@ class GraphExecutor:
        Uses the same recursive binary-search splitting as EventLoopNode.
        """
-        from framework.graph.conversation import extract_tool_call_history
+        from framework.agent_loop.conversation import extract_tool_call_history
-        from framework.graph.event_loop_node import _is_context_too_large_error
+        from framework.agent_loop.agent_loop import _is_context_too_large_error
        if _depth > self._PHASE_LLM_MAX_DEPTH:
            raise RuntimeError("Phase LLM compaction recursion limit")
@@ -690,7 +690,7 @@ class GraphExecutor:
        # and spillover files share the same session-scoped directory.
        _ctx_token = None
        if self._storage_path:
-            from framework.runner.tool_registry import ToolRegistry
+            from framework.loader.tool_registry import ToolRegistry
            _ctx_token = ToolRegistry.set_execution_context(
                data_dir=str(self._storage_path / "data"),
@@ -712,13 +712,12 @@ class GraphExecutor:
        finally:
            if _ctx_token is not None:
-                from framework.runner.tool_registry import ToolRegistry
+                from framework.loader.tool_registry import ToolRegistry
                ToolRegistry.reset_execution_context(_ctx_token)
    VALID_NODE_TYPES = {
        "event_loop",
        "gcu",
    }
    # Node types removed in v0.5 — provide migration guidance
    REMOVED_NODE_TYPES = {
@@ -736,11 +735,11 @@ class GraphExecutor:
        # Check registry first
        if node_spec.id in self.node_registry:
            logger.debug(
-                "[GraphExecutor._get_node_implementation] Found node '%s' in registry", node_spec.id
+                "[Orchestrator._get_node_implementation] Found node '%s' in registry", node_spec.id
            )
            return self.node_registry[node_spec.id]
        logger.debug(
-            "[GraphExecutor._get_node_implementation]"
+            "[Orchestrator._get_node_implementation]"
            " Node '%s' not in registry (keys: %s),"
            " creating new",
            node_spec.id,
@@ -764,10 +763,10 @@ class GraphExecutor:
            )
        # Create based on type
-        if node_spec.node_type in ("event_loop", "gcu"):
+        if node_spec.node_type == "event_loop":
            # Auto-create EventLoopNode with sensible defaults.
            # Custom configs can still be pre-registered via node_registry.
-            from framework.graph.event_loop_node import EventLoopNode, LoopConfig
+            from framework.agent_loop.agent_loop import AgentLoop, LoopConfig
            # Create a FileConversationStore if a storage path is available
            conv_store = None
@@ -787,13 +786,13 @@ class GraphExecutor:
            if self._storage_path:
                spillover = str(self._storage_path / "data")
-            from framework.graph.node import warn_if_deprecated_client_facing
+            from framework.orchestrator.node import warn_if_deprecated_client_facing
            warn_if_deprecated_client_facing(node_spec)
            lc = self._loop_config
            default_max_iter = 100 if node_spec.supports_direct_user_io() else 50
-            node = EventLoopNode(
+            node = AgentLoop(
                event_bus=self._event_bus,
                judge=None,  # implicit judge: accept when output_keys are filled
                config=LoopConfig(
@@ -812,7 +811,7 @@ class GraphExecutor:
            # Cache so inject_event() is reachable for queen interaction and escalation routing
            self.node_registry[node_spec.id] = node
            logger.debug(
-                "[GraphExecutor._get_node_implementation]"
+                "[Orchestrator._get_node_implementation]"
                " Cached node '%s' in node_registry,"
                " registry now has keys: %s",
                node_spec.id,
@@ -998,10 +997,10 @@ class GraphExecutor:
            branch_impl = self._get_node_implementation(node_spec, graph.cleanup_llm_model)
            effective_max_retries = node_spec.max_retries
-            # Only override for actual EventLoopNode instances, not custom NodeProtocol impls
+            # Only override for actual AgentLoop instances, not custom NodeProtocol impls
-            from framework.graph.event_loop_node import EventLoopNode
+            from framework.agent_loop.agent_loop import AgentLoop as _AgentLoop  # noqa: F811
-            if isinstance(branch_impl, EventLoopNode) and effective_max_retries > 1:
+            if isinstance(branch_impl, _AgentLoop) and effective_max_retries > 1:
                self.logger.warning(
                    f"EventLoopNode '{node_spec.id}' has "
                    f"max_retries={effective_max_retries}. Overriding "
@@ -1042,9 +1041,6 @@ class GraphExecutor:
                        execution_id=self._execution_id,
                        run_id=self._run_id,
                        stream_id=self._stream_id,
                        node_registry=node_registry,
                        all_tools=self.tools,
                        shared_node_registry=self.node_registry,
                        dynamic_tools_provider=self.dynamic_tools_provider,
                        dynamic_prompt_provider=self.dynamic_prompt_provider,
                        dynamic_memory_provider=self.dynamic_memory_provider,
@@ -1293,14 +1289,14 @@ class GraphExecutor:
        Replaces the imperative while-loop with autonomous workers that
        self-activate based on edge conditions and fan-out tracking.
        """
-        from framework.graph.worker_agent import (
+        from framework.orchestrator.node_worker import (
            Activation,
            FanOutTag,
-            WorkerAgent,
+            NodeWorker,
            WorkerCompletion,
            WorkerLifecycle,
        )
-        from framework.runtime.event_bus import AgentEvent, EventType
+        from framework.host.event_bus import AgentEvent, EventType
        # Build shared graph context
        gc = GraphContext(
@@ -1339,9 +1335,9 @@ class GraphExecutor:
        )
        # Create one WorkerAgent per node
-        workers: dict[str, WorkerAgent] = {}
+        workers: dict[str, NodeWorker] = {}
        for node_spec in graph.nodes:
-            workers[node_spec.id] = WorkerAgent(node_spec=node_spec, graph_context=gc)
+            workers[node_spec.id] = NodeWorker(node_spec=node_spec, graph_context=gc)
        # Identify entry workers (graph entry node, not based on edge count)
        # A node can be the entry point AND have incoming feedback edges.
@@ -1442,7 +1438,7 @@ class GraphExecutor:
        def _route_activation(
            activation: Activation,
-            workers_map: dict[str, WorkerAgent],
+            workers_map: dict[str, NodeWorker],
            pending_tasks_map: dict[str, asyncio.Task],
            *,
            has_event_subscription: bool,
@@ -9,7 +9,7 @@ import json
 from pathlib import Path
 from typing import TYPE_CHECKING
-from framework.graph.prompting import (
+from framework.orchestrator.prompting import (
    EXECUTION_SCOPE_PREAMBLE,
    TransitionSpec,
    build_accounts_prompt,
@@ -19,7 +19,7 @@ from framework.graph.prompting import (
 )
 if TYPE_CHECKING:
-    from framework.graph.node import DataBuffer, NodeSpec
+    from framework.orchestrator.node import DataBuffer, NodeSpec
 _with_datetime = stamp_prompt_datetime
@@ -36,7 +36,7 @@ def compose_system_prompt(
    node_type_preamble: str | None = None,
 ) -> str:
    """Compatibility wrapper for the legacy function signature."""
-    from framework.graph.prompting import NodePromptSpec
+    from framework.orchestrator.prompting import NodePromptSpec
    spec = NodePromptSpec(
        identity_prompt=identity_prompt or "",
@@ -66,7 +66,6 @@ def compose_system_prompt(
            protocols_prompt=spec.protocols_prompt,
            node_type=spec.node_type,
            output_keys=spec.output_keys,
            is_subagent_mode=spec.is_subagent_mode,
        )
    return build_system_prompt(spec)
@@ -135,7 +134,7 @@ def build_transition_marker(
    )
-from framework.graph.prompting import build_transition_message  # noqa: E402
+from framework.orchestrator.prompting import build_transition_message  # noqa: E402
 __all__ = [
    "EXECUTION_SCOPE_PREAMBLE",
@@ -12,8 +12,8 @@ from datetime import datetime
 from typing import TYPE_CHECKING, Any
 if TYPE_CHECKING:
-    from framework.graph.edge import GraphSpec
+    from framework.orchestrator.edge import GraphSpec
-    from framework.graph.node import DataBuffer
+    from framework.orchestrator.node import DataBuffer
 # Injected into every worker node's system prompt so the LLM understands
@@ -40,7 +40,6 @@ class NodePromptSpec:
    memory_prompt: str = ""
    node_type: str = "event_loop"
    output_keys: tuple[str, ...] = ()
    is_subagent_mode: bool = False
@dataclass(frozen=True)
@@ -165,7 +164,6 @@ def build_prompt_spec_from_node_context(
        memory_prompt=resolved_memory_prompt,
        node_type=ctx.node_spec.node_type,
        output_keys=tuple(ctx.node_spec.output_keys or ()),
        is_subagent_mode=bool(getattr(ctx, "is_subagent_mode", False)),
    )
@@ -195,13 +193,10 @@ def build_system_prompt(spec: NodePromptSpec) -> str:
    if spec.narrative:
        parts.append(f"\n--- Context (what has happened so far) ---\n{spec.narrative}")
-    if not spec.is_subagent_mode and spec.node_type in ("event_loop", "gcu") and spec.output_keys:
+    if not False and spec.node_type == "event_loop" and spec.output_keys:
        parts.append(f"\n{EXECUTION_SCOPE_PREAMBLE}")
    if spec.node_type == "gcu":
        from framework.graph.gcu import GCU_BROWSER_SYSTEM_PROMPT
        parts.append(f"\n{GCU_BROWSER_SYSTEM_PROMPT}")
    if spec.focus_prompt:
        parts.append(f"\n--- Current Focus ---\n{spec.focus_prompt}")
@@ -0,0 +1,32 @@
 """Pipeline middleware for the agent runtime.
 Stages run in order when :meth:`AgentRuntime.trigger` receives a request.
 Each stage can pass the context through, transform the input data, or reject
 the request entirely.  This is the runtime-level analogue of AstrBot's
 pipeline architecture and lets operators compose rate limiting, validation,
 cost guards, and custom pre/post-processing without patching core code.
 """
 from framework.pipeline.registry import (
    build_pipeline_from_config,
    build_stage,
    register,
 )
 from framework.pipeline.runner import PipelineRunner
 from framework.pipeline.stage import (
    PipelineContext,
    PipelineRejectedError,
    PipelineResult,
    PipelineStage,
 )
 __all__ = [
    "PipelineContext",
    "PipelineRejectedError",
    "PipelineResult",
    "PipelineRunner",
    "PipelineStage",
    "build_pipeline_from_config",
    "build_stage",
    "register",
 ]
@@ -0,0 +1,44 @@
 """Execution-level middleware protocol.
 Unlike :class:`PipelineStage` (which gates ``AgentHost.trigger()`` at the
 request level), execution middleware runs at the start of **every** execution
 attempt inside ``ExecutionManager._run_execution()`` -- including resurrection
 retries.
 Use this for concerns that must re-evaluate per attempt:
 - Cost tracking (charge per attempt, not per trigger)
 - Tool scoping (different tools on retry)
 - Checkpoint config overrides
 - Per-execution logging/tracing setup
 """
 from __future__ import annotations
 from abc import ABC, abstractmethod
 from dataclasses import dataclass, field
 from typing import Any
@dataclass
 class ExecutionContext:
    """Context passed to execution middleware."""
    execution_id: str
    stream_id: str
    run_id: str
    input_data: dict[str, Any]
    session_state: dict[str, Any] | None = None
    attempt: int = 1
    metadata: dict[str, Any] = field(default_factory=dict)
 class ExecutionMiddleware(ABC):
    """Base class for per-execution middleware."""
    @abstractmethod
    async def on_execution_start(self, ctx: ExecutionContext) -> ExecutionContext:
        """Called before each execution attempt (including resurrections).
        Modify and return *ctx* to transform execution parameters.
        Raise to abort the execution.
        """
@@ -0,0 +1,107 @@
 """Pipeline stage registry -- maps type names to stage classes.
 Stages self-register via the ``@register`` decorator. The
 ``build_pipeline_from_config`` function reads a declarative config
 (from ``~/.hive/configuration.json`` or ``agent.json``) and
 instantiates the corresponding stage objects.
 Example config::
    {
      "pipeline": {
        "stages": [
          {"type": "rate_limit", "order": 200, "config": {"max_requests_per_minute": 60}},
          {"type": "cost_guard", "order": 300, "config": {"max_cost_per_request": 0.50}}
        ]
      }
    }
 """
 from __future__ import annotations
 import logging
 from typing import Any
 from framework.pipeline.runner import PipelineRunner
 from framework.pipeline.stage import PipelineStage
 logger = logging.getLogger(__name__)
 _STAGE_REGISTRY: dict[str, type[PipelineStage]] = {}
 def register(name: str):
    """Decorator to register a pipeline stage class by type name.
    Usage::
        @register("rate_limit")
        class RateLimitStage(PipelineStage):
            ...
    """
    def decorator(cls: type[PipelineStage]) -> type[PipelineStage]:
        _STAGE_REGISTRY[name] = cls
        return cls
    return decorator
 def get_registered_stages() -> dict[str, type[PipelineStage]]:
    """Return a copy of the stage registry."""
    return dict(_STAGE_REGISTRY)
 def build_stage(spec: dict[str, Any]) -> PipelineStage:
    """Instantiate a single stage from a config spec.
    Args:
        spec: Dict with ``type`` (required), ``order`` (optional),
              and ``config`` (optional kwargs dict).
    Raises:
        KeyError: If the stage type is not registered.
    """
    stage_type = spec["type"]
    if stage_type not in _STAGE_REGISTRY:
        available = ", ".join(sorted(_STAGE_REGISTRY)) or "(none)"
        raise KeyError(
            f"Unknown pipeline stage type '{stage_type}'. "
            f"Available: {available}"
        )
    cls = _STAGE_REGISTRY[stage_type]
    config = spec.get("config", {})
    stage = cls(**config)
    if "order" in spec:
        stage.order = spec["order"]
    return stage
 def build_pipeline_from_config(
    stages_config: list[dict[str, Any]],
 ) -> PipelineRunner:
    """Build a ``PipelineRunner`` from a declarative stages list.
    Each entry is ``{"type": "...", "order": N, "config": {...}}``.
    """
    # Import built-in stages so they self-register
    _ensure_builtins_registered()
    stages = [build_stage(s) for s in stages_config]
    return PipelineRunner(stages)
 def _ensure_builtins_registered() -> None:
    """Import built-in stage modules so their ``@register`` decorators fire."""
    if _STAGE_REGISTRY:
        return  # already populated
    try:
        import framework.pipeline.stages.cost_guard  # noqa: F401
        import framework.pipeline.stages.credential_resolver  # noqa: F401
        import framework.pipeline.stages.input_validation  # noqa: F401
        import framework.pipeline.stages.llm_provider  # noqa: F401
        import framework.pipeline.stages.mcp_registry  # noqa: F401
        import framework.pipeline.stages.rate_limit  # noqa: F401
        import framework.pipeline.stages.skill_registry  # noqa: F401
    except ImportError:
        pass
@@ -0,0 +1,111 @@
 """Pipeline runner -- executes registered stages in order."""
 from __future__ import annotations
 import logging
 from typing import Any
 from framework.pipeline.stage import (
    PipelineContext,
    PipelineRejectedError,
    PipelineStage,
 )
 logger = logging.getLogger(__name__)
 class PipelineRunner:
    """Executes a list of :class:`PipelineStage` instances in ``order``.
    The runner is the orchestration layer that :class:`AgentRuntime` calls
    on every trigger.  Stages execute in ascending ``order`` (ties broken by
    registration order).  A stage returning ``reject`` short-circuits the
    pipeline and causes the trigger to raise :class:`PipelineRejectedError`.
    """
    def __init__(self, stages: list[PipelineStage] | None = None) -> None:
        self._stages: list[PipelineStage] = sorted(stages or [], key=lambda s: s.order)
    @property
    def stages(self) -> list[PipelineStage]:
        return list(self._stages)
    def add_stage(self, stage: PipelineStage) -> None:
        """Add a stage after construction (for dynamic registration)."""
        self._stages.append(stage)
        self._stages.sort(key=lambda s: s.order)
    async def initialize_all(self) -> None:
        """Call ``initialize`` on every registered stage."""
        for stage in self._stages:
            name = stage.__class__.__name__
            logger.info("[pipeline] Initializing %s (order=%d)", name, stage.order)
            await stage.initialize()
            logger.info("[pipeline] %s initialized", name)
        if self._stages:
            logger.info(
                "[pipeline] Ready: %d stages [%s]",
                len(self._stages),
                " -> ".join(s.__class__.__name__ for s in self._stages),
            )
    async def run(self, ctx: PipelineContext) -> PipelineContext:
        """Run all stages.  Raises ``PipelineRejectedError`` on rejection.
        Returns the (possibly transformed) context.
        """
        if not self._stages:
            return ctx
        import time
        pipeline_start = time.perf_counter()
        logger.info(
            "[pipeline] Running %d stages for entry_point=%s",
            len(self._stages),
            ctx.entry_point_id,
        )
        for stage in self._stages:
            stage_name = stage.__class__.__name__
            t0 = time.perf_counter()
            result = await stage.process(ctx)
            elapsed_ms = (time.perf_counter() - t0) * 1000
            if result.action == "reject":
                reason = result.rejection_reason or "(no reason given)"
                logger.warning(
                    "[pipeline] REJECTED by %s (%.1fms): %s",
                    stage_name, elapsed_ms, reason,
                )
                raise PipelineRejectedError(stage_name, reason)
            if result.action == "transform":
                logger.info(
                    "[pipeline] %s TRANSFORMED input (%.1fms)",
                    stage_name, elapsed_ms,
                )
                if result.input_data is not None:
                    ctx.input_data = result.input_data
            else:
                logger.info(
                    "[pipeline] %s passed (%.1fms)",
                    stage_name, elapsed_ms,
                )
        total_ms = (time.perf_counter() - pipeline_start) * 1000
        logger.info("[pipeline] Complete (%.1fms total)", total_ms)
        return ctx
    async def run_post(self, ctx: PipelineContext, result: Any) -> Any:
        """Run all stages' ``post_process`` hooks in order.
        Each stage can transform the result; the final value is returned.
        Exceptions are logged and swallowed -- post-processing must not
        break a successful execution.
        """
        current = result
        for stage in self._stages:
            try:
                current = await stage.post_process(ctx, current)
            except Exception:
                logger.exception(
                    "Pipeline post_process raised in %s; continuing with previous result",
                    stage.__class__.__name__,
                )
        return current
@@ -0,0 +1,77 @@
 """Pipeline stage base class and request/response types."""
 from __future__ import annotations
 from abc import ABC, abstractmethod
 from dataclasses import dataclass, field
 from typing import Any, Literal
 class PipelineRejectedError(Exception):
    """Raised by ``AgentHost.trigger`` when a stage rejects the request."""
    def __init__(self, stage_name: str, reason: str) -> None:
        super().__init__(f"Pipeline rejected by {stage_name}: {reason}")
        self.stage_name = stage_name
        self.reason = reason
@dataclass
 class PipelineContext:
    """Carries request data through the pipeline."""
    entry_point_id: str
    input_data: dict[str, Any]
    correlation_id: str | None = None
    session_state: dict[str, Any] | None = None
    metadata: dict[str, Any] = field(default_factory=dict)
@dataclass
 class PipelineResult:
    """Outcome of a stage's ``process`` call."""
    action: Literal["continue", "reject", "transform"] = "continue"
    input_data: dict[str, Any] | None = None
    rejection_reason: str | None = None
 class PipelineStage(ABC):
    """Base class for all middleware stages.
    Infrastructure stages (LLM, MCP, credentials, skills) set typed
    attributes during ``initialize()`` that the host reads after all
    stages have initialized.  Request-level stages (rate limit, input
    validation, cost guard) implement ``process()``.
    Attributes set by infrastructure stages:
        llm: LLM provider instance (set by LlmProviderStage)
        tool_registry: ToolRegistry with discovered MCP tools (set by McpRegistryStage)
        accounts_prompt: Connected accounts system prompt block (set by CredentialResolverStage)
        accounts_data: Raw account info list (set by CredentialResolverStage)
        tool_provider_map: Tool name -> provider mapping (set by CredentialResolverStage)
        skills_manager: SkillsManager instance (set by SkillRegistryStage)
    """
    order: int = 100
    # Infrastructure stage outputs -- typed so _apply_pipeline_results
    # doesn't need hasattr() sniffing.
    llm: Any = None
    tool_registry: Any = None
    accounts_prompt: str = ""
    accounts_data: list[dict] | None = None
    tool_provider_map: dict[str, str] | None = None
    skills_manager: Any = None
    async def initialize(self) -> None:
        """Called once when the runtime starts."""
        return None
    @abstractmethod
    async def process(self, ctx: PipelineContext) -> PipelineResult:
        """Process the incoming request."""
    async def post_process(self, ctx: PipelineContext, result: Any) -> Any:
        """Optional post-execution hook. Default: pass-through."""
        return result
@@ -0,0 +1,19 @@
 """Built-in pipeline stages."""
 from framework.pipeline.stages.cost_guard import CostGuardStage
 from framework.pipeline.stages.credential_resolver import CredentialResolverStage
 from framework.pipeline.stages.input_validation import InputValidationStage
 from framework.pipeline.stages.llm_provider import LlmProviderStage
 from framework.pipeline.stages.mcp_registry import McpRegistryStage
 from framework.pipeline.stages.rate_limit import RateLimitStage
 from framework.pipeline.stages.skill_registry import SkillRegistryStage
 __all__ = [
    "CostGuardStage",
    "CredentialResolverStage",
    "InputValidationStage",
    "LlmProviderStage",
    "McpRegistryStage",
    "RateLimitStage",
    "SkillRegistryStage",
 ]
@@ -0,0 +1,35 @@
 """Cost guard stage -- reject requests over a pre-flight budget."""
 from __future__ import annotations
 from framework.pipeline.registry import register
 from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage
@register("cost_guard")
 class CostGuardStage(PipelineStage):
    """Reject requests whose estimated cost exceeds the per-request budget.
    The cost estimate must be populated in ``ctx.metadata["estimated_cost"]``
    by an earlier stage (or by the caller).  When no estimate is present,
    the stage passes through.
    """
    order = 300
    def __init__(self, max_cost_per_request: float = 1.0) -> None:
        self._budget = max_cost_per_request
    async def process(self, ctx: PipelineContext) -> PipelineResult:
        estimated = ctx.metadata.get("estimated_cost")
        if estimated is None:
            return PipelineResult(action="continue")
        if estimated > self._budget:
            return PipelineResult(
                action="reject",
                rejection_reason=(
                    f"Estimated cost ${estimated:.4f} exceeds budget "
                    f"${self._budget:.4f}"
                ),
            )
        return PipelineResult(action="continue")
@@ -0,0 +1,58 @@
 """Credential resolver pipeline stage.
 Resolves connected accounts at startup. Individual credential TTL/refresh
 is handled by MCP server processes internally -- they resolve tokens from
 the credential store on every tool call.
 """
 from __future__ import annotations
 import logging
 from typing import Any
 from framework.pipeline.registry import register
 from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage
 logger = logging.getLogger(__name__)
@register("credential_resolver")
 class CredentialResolverStage(PipelineStage):
    """Resolve connected accounts for system prompt injection."""
    order = 40
    def __init__(self, credential_store: Any = None, **kwargs: Any) -> None:
        self._credential_store = credential_store
        self.accounts_prompt = ""
        self.accounts_data: list[dict] | None = None
        self.tool_provider_map: dict[str, str] | None = None
    async def initialize(self) -> None:
        try:
            from aden_tools.credentials.store_adapter import (
                CredentialStoreAdapter,
            )
            from framework.orchestrator.prompting import build_accounts_prompt
            if self._credential_store is not None:
                adapter = CredentialStoreAdapter(store=self._credential_store)
            else:
                adapter = CredentialStoreAdapter.default()
            self.accounts_data = adapter.get_all_account_info()
            self.tool_provider_map = adapter.get_tool_provider_map()
            if self.accounts_data:
                self.accounts_prompt = build_accounts_prompt(
                    self.accounts_data, self.tool_provider_map,
                )
            logger.info(
                "[pipeline] CredentialResolverStage: %d accounts",
                len(self.accounts_data or []),
            )
        except Exception:
            logger.debug(
                "Credential resolution failed (non-fatal)", exc_info=True,
            )
    async def process(self, ctx: PipelineContext) -> PipelineResult:
        return PipelineResult(action="continue")
@@ -0,0 +1,47 @@
 """Input validation stage.
 Rejects requests whose ``input_data`` does not match the entry point's
 declared input schema.  Uses a user-provided schema map:
 ``{entry_point_id: {required_key: expected_type, ...}}``.
 """
 from __future__ import annotations
 from framework.pipeline.registry import register
 from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage
@register("input_validation")
 class InputValidationStage(PipelineStage):
    """Validate ``input_data`` against per-entry-point schemas.
    The schema is a simple dict mapping key -> expected Python type.
    For richer validation, substitute a Pydantic-based stage.
    """
    order = 100
    def __init__(self, schemas: dict[str, dict[str, type]] | None = None) -> None:
        self._schemas = schemas or {}
    async def process(self, ctx: PipelineContext) -> PipelineResult:
        schema = self._schemas.get(ctx.entry_point_id)
        if not schema:
            return PipelineResult(action="continue")
        for key, expected_type in schema.items():
            if key not in ctx.input_data:
                return PipelineResult(
                    action="reject",
                    rejection_reason=f"Missing required input key: '{key}'",
                )
            value = ctx.input_data[key]
            if not isinstance(value, expected_type):
                return PipelineResult(
                    action="reject",
                    rejection_reason=(
                        f"Input key '{key}' has type {type(value).__name__}, "
                        f"expected {expected_type.__name__}"
                    ),
                )
        return PipelineResult(action="continue")
@@ -0,0 +1,95 @@
 """LLM provider pipeline stage.
 Resolves the LLM provider from global config. This is the ONLY place
 the LLM gets created for worker agents.
 """
 from __future__ import annotations
 import logging
 from typing import Any
 from framework.pipeline.registry import register
 from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage
 logger = logging.getLogger(__name__)
@register("llm_provider")
 class LlmProviderStage(PipelineStage):
    """Resolve LLM provider and make it available."""
    order = 10
    def __init__(
        self,
        model: str | None = None,
        mock_mode: bool = False,
        llm: Any = None,
        **kwargs: Any,
    ) -> None:
        self._model = model
        self._mock_mode = mock_mode
        self.llm = llm  # Pre-injected LLM (e.g. from session)
    async def initialize(self) -> None:
        if self.llm is not None:
            return  # Already injected
        from framework.config import (
            get_api_key,
            get_api_keys,
            get_hive_config,
            get_preferred_model,
        )
        model = self._model or get_preferred_model()
        if self._mock_mode:
            from framework.llm.mock import MockLLMProvider
            self.llm = MockLLMProvider(model=model)
            return
        config = get_hive_config()
        llm_config = config.get("llm", {})
        api_base = llm_config.get("api_base")
        # Check for Antigravity (special provider)
        if llm_config.get("use_antigravity_subscription"):
            try:
                from framework.llm.antigravity import AntigravityProvider
                provider = AntigravityProvider(model=model)
                if provider.has_credentials():
                    self.llm = provider
                    logger.info("[pipeline] LlmProviderStage: Antigravity")
                    return
            except Exception:
                pass
        from framework.llm.litellm import LiteLLMProvider
        api_key = get_api_key()
        api_keys = get_api_keys()
        if api_keys and len(api_keys) > 1:
            self.llm = LiteLLMProvider(
                model=model, api_keys=api_keys, api_base=api_base,
            )
        elif api_key:
            extra = {}
            if api_key.startswith("sk-ant-oat"):
                extra["extra_headers"] = {
                    "authorization": f"Bearer {api_key}"
                }
            self.llm = LiteLLMProvider(
                model=model, api_key=api_key, api_base=api_base, **extra,
            )
        else:
            self.llm = LiteLLMProvider(model=model, api_base=api_base)
        logger.info("[pipeline] LlmProviderStage: %s", model)
    async def process(self, ctx: PipelineContext) -> PipelineResult:
        return PipelineResult(action="continue")
@@ -0,0 +1,92 @@
 """MCP registry pipeline stage.
 Resolves MCP server references from the agent config against the global
 registry and registers tools. This is the ONLY place MCP tools get loaded.
 """
 from __future__ import annotations
 import logging
 from dataclasses import asdict
 from pathlib import Path
 from typing import Any
 from framework.pipeline.registry import register
 from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage
 logger = logging.getLogger(__name__)
@register("mcp_registry")
 class McpRegistryStage(PipelineStage):
    """Resolve MCP tools from the global registry."""
    order = 50
    def __init__(
        self,
        server_refs: list[dict[str, Any]] | None = None,
        agent_path: str | Path | None = None,
        tool_registry: Any = None,
        **kwargs: Any,
    ) -> None:
        self._server_refs = server_refs or []
        self._agent_path = Path(agent_path) if agent_path else None
        self._tool_registry = tool_registry
    async def initialize(self) -> None:
        """Connect to MCP servers and discover tools."""
        if self._tool_registry is None:
            from framework.loader.tool_registry import ToolRegistry
            self._tool_registry = ToolRegistry()
        from framework.loader.mcp_registry import MCPRegistry
        registry = MCPRegistry()
        mcp_loaded = False
        # 1. From agent.json mcp_servers refs
        if self._server_refs:
            names = [ref["name"] for ref in self._server_refs if ref.get("name")]
            if names:
                configs = registry.resolve_for_agent(include=names)
                if configs:
                    self._tool_registry.load_registry_servers(
                        [asdict(c) for c in configs]
                    )
                    mcp_loaded = True
                    logger.info(
                        "[pipeline] McpRegistryStage: loaded %d servers: %s",
                        len(configs),
                        names,
                    )
        # 2. Legacy: mcp_servers.json
        if not mcp_loaded and self._agent_path:
            mcp_json = self._agent_path / "mcp_servers.json"
            if mcp_json.exists():
                self._tool_registry.load_mcp_config(mcp_json)
                mcp_loaded = True
        # 3. Fallback: all servers from global registry
        if not mcp_loaded:
            configs = registry.resolve_for_agent(profile="all")
            if configs:
                self._tool_registry.load_registry_servers(
                    [asdict(c) for c in configs]
                )
                logger.info(
                    "[pipeline] McpRegistryStage: loaded %d servers (fallback)",
                    len(configs),
                )
        total = len(self._tool_registry.get_tools())
        logger.info("[pipeline] McpRegistryStage: %d tools available", total)
    async def process(self, ctx: PipelineContext) -> PipelineResult:
        return PipelineResult(action="continue")
    @property
    def tool_registry(self):
        return self._tool_registry
@@ -0,0 +1,44 @@
 """Per-(entry-point, session) rate limiting stage."""
 from __future__ import annotations
 import time
 from collections import defaultdict
 from framework.pipeline.registry import register
 from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage
@register("rate_limit")
 class RateLimitStage(PipelineStage):
    """Reject requests that exceed ``max_requests_per_minute`` per session.
    The key is ``<entry_point_id>:<session_id>``.  When no session_id is
    present in ``session_state``, a single shared "default" bucket is used.
    """
    order = 200
    def __init__(self, max_requests_per_minute: int = 60) -> None:
        self._max_rpm = max_requests_per_minute
        self._timestamps: dict[str, list[float]] = defaultdict(list)
    async def process(self, ctx: PipelineContext) -> PipelineResult:
        session_id = "default"
        if ctx.session_state:
            session_id = str(ctx.session_state.get("session_id", "default"))
        key = f"{ctx.entry_point_id}:{session_id}"
        now = time.monotonic()
        # Prune entries older than 60s.
        self._timestamps[key] = [t for t in self._timestamps[key] if now - t < 60.0]
        if len(self._timestamps[key]) >= self._max_rpm:
            return PipelineResult(
                action="reject",
                rejection_reason=(
                    f"Rate limit exceeded: {self._max_rpm} req/min "
                    f"for session '{session_id}'"
                ),
            )
        self._timestamps[key].append(now)
        return PipelineResult(action="continue")
@@ -0,0 +1,55 @@
 """Skill registry pipeline stage.
 Discovers and loads skills. This is the ONLY place skills get loaded.
 """
 from __future__ import annotations
 import logging
 from pathlib import Path
 from typing import Any
 from framework.pipeline.registry import register
 from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage
 logger = logging.getLogger(__name__)
@register("skill_registry")
 class SkillRegistryStage(PipelineStage):
    """Discover skills and provide prompts."""
    order = 60
    def __init__(
        self,
        project_root: str | Path | None = None,
        interactive: bool = True,
        skills_config: Any = None,
        **kwargs: Any,
    ) -> None:
        self._project_root = Path(project_root) if project_root else None
        self._interactive = interactive
        self._skills_config = skills_config
        self.skills_manager: Any = None
    async def initialize(self) -> None:
        from framework.skills.config import SkillsConfig
        from framework.skills.manager import SkillsManager, SkillsManagerConfig
        config = SkillsManagerConfig(
            skills_config=self._skills_config or SkillsConfig(),
            project_root=self._project_root,
            interactive=self._interactive,
        )
        self.skills_manager = SkillsManager(config)
        self.skills_manager.load()
        await self.skills_manager.start_watching()
        logger.info(
            "[pipeline] SkillRegistryStage: catalog=%d chars, protocols=%d chars",
            len(self.skills_manager.skills_catalog_prompt),
            len(self.skills_manager.protocols_prompt),
        )
    async def process(self, ctx: PipelineContext) -> PipelineResult:
        return PipelineResult(action="continue")
@@ -1,27 +0,0 @@
 """Agent Runner - load and run exported agents."""
 from framework.runner.mcp_registry import MCPRegistry
 from framework.runner.protocol import (
    AgentMessage,
    CapabilityLevel,
    CapabilityResponse,
    MessageType,
    OrchestratorResult,
 )
 from framework.runner.runner import AgentInfo, AgentRunner, ValidationResult
 from framework.runner.tool_registry import ToolRegistry, tool
 __all__ = [
    # Single agent
    "AgentRunner",
    "AgentInfo",
    "ValidationResult",
    "ToolRegistry",
    "MCPRegistry",
    "tool",
    "AgentMessage",
    "MessageType",
    "CapabilityLevel",
    "CapabilityResponse",
    "OrchestratorResult",
 ]
@@ -1,493 +0,0 @@
 # Event Types and Schema Reference
 The Hive runtime uses a pub/sub `EventBus` for inter-component communication and observability. Every event is an `AgentEvent` dataclass published through `EventBus.publish()`.
 ## Event Envelope (`AgentEvent`)
 Every event shares a common envelope:
 | Field            | Type              | Description                                                  |
 | ---------------- | ----------------- | ------------------------------------------------------------ |
 | `type`           | `EventType` (str) | Event type identifier (see below)                            |
 | `stream_id`      | `str`             | Entry point / pipeline that emitted the event                |
 | `node_id`        | `str \| None`     | Graph node that emitted the event                            |
 | `execution_id`   | `str \| None`     | Unique execution run ID (UUID, set by `ExecutionStream`)     |
 | `graph_id`       | `str \| None`     | Graph that emitted the event (set by `GraphScopedEventBus`)  |
 | `data`           | `dict`            | Event-type-specific payload (see individual schemas below)   |
 | `timestamp`      | `datetime`        | When the event was created                                   |
 | `correlation_id` | `str \| None`     | Optional ID for tracking related events across streams       |
 ### Identity Fields
 The identity tuple `(graph_id, stream_id, node_id, execution_id)` uniquely locates any event:
 - **`graph_id`** — Which graph produced the event. Set automatically by `GraphScopedEventBus` (a subclass that stamps `graph_id` on every `publish()` call). Values: `"worker"`, `"judge"`, `"queen"`, or the graph spec ID.
 - **`stream_id`** — Which entry point / pipeline. Corresponds to `EntryPointSpec.id` in the graph definition. For single-entry-point graphs, this equals the entry point name (e.g. `"default"`, `"health_check"`).
 - **`node_id`** — Which specific node emitted the event. For `EventLoopNode` events, this is the node spec ID.
 - **`execution_id`** — UUID identifying a specific execution run. Multiple concurrent executions of the same entry point each get a unique `execution_id`.
 ---
 ## Execution Lifecycle
 ### `execution_started`
 A new graph execution has begun.
 | Data Field | Type   | Description                     |
 | ---------- | ------ | ------------------------------- |
 | `input`    | `dict` | Input data passed to the graph  |
 **Emitted by:** `ExecutionStream._run_execution()`
 ---
 ### `execution_completed`
 A graph execution finished successfully.
 | Data Field | Type   | Description       |
 | ---------- | ------ | ----------------- |
 | `output`   | `dict` | Final output data |
 **Emitted by:** `ExecutionStream._run_execution()`
 **Queen notification:** When a worker execution completes, the session manager \
 injects a `[WORKER_TERMINAL]` notification into the queen with the output summary. \
 The queen reports to the user and asks what to do next.
 ---
 ### `execution_failed`
 A graph execution failed with an error.
 | Data Field | Type  | Description   |
 | ---------- | ----- | ------------- |
 | `error`    | `str` | Error message |
 **Emitted by:** `ExecutionStream._run_execution()`
 **Queen notification:** When a worker execution fails, the session manager \
 injects a `[WORKER_TERMINAL]` notification into the queen with the error. \
 The queen reports to the user and helps troubleshoot.
 ---
 ### `execution_paused`
 Execution has been paused (Ctrl+Z or HITL approval).
 | Data Field | Type  | Description       |
 | ---------- | ----- | ----------------- |
 | `reason`   | `str` | Why it was paused |
 **Emitted by:** `GraphExecutor.execute()`
 ---
 ### `execution_resumed`
 Execution has resumed from a paused state.
 | Data Field | Type | Description |
 | ---------- | ---- | ----------- |
 | *(none)*   |      |             |
 **Emitted by:** `GraphExecutor.execute()`
 ---
 ## Node Event-Loop Lifecycle
 These events track the inner loop of `EventLoopNode` — the multi-turn LLM streaming loop that powers most agent nodes.
 ### `node_loop_started`
 An EventLoopNode has begun its execution loop.
 | Data Field       | Type       | Description                     |
 | ---------------- | ---------- | ------------------------------- |
 | `max_iterations` | `int\|null`| Maximum iterations configured   |
 **Emitted by:** `EventLoopNode._publish_loop_started()`, `GraphExecutor` (for function nodes in parallel branches)
 ---
 ### `node_loop_iteration`
 An EventLoopNode has started a new iteration (one LLM turn).
 | Data Field  | Type  | Description               |
 | ----------- | ----- | ------------------------- |
 | `iteration` | `int` | Zero-based iteration index |
 **Emitted by:** `EventLoopNode._publish_iteration()`
 ---
 ### `node_loop_completed`
 An EventLoopNode has finished its execution loop.
 | Data Field   | Type  | Description                            |
 | ------------ | ----- | -------------------------------------- |
 | `iterations` | `int` | Total number of iterations completed   |
 **Emitted by:** `EventLoopNode._publish_loop_completed()`, `GraphExecutor` (for function nodes in parallel branches)
 ---
 ## LLM Streaming
 ### `llm_text_delta`
 Incremental text output from the LLM (non-client-facing nodes only).
 | Data Field | Type  | Description                              |
 | ---------- | ----- | ---------------------------------------- |
 | `content`  | `str` | New text chunk (delta)                   |
 | `snapshot` | `str` | Full accumulated text so far             |
 **Emitted by:** `EventLoopNode._publish_text_delta()` when `client_facing=False`
 ---
 ### `llm_reasoning_delta`
 Incremental reasoning/thinking output from the LLM.
 | Data Field | Type  | Description         |
 | ---------- | ----- | ------------------- |
 | `content`  | `str` | New reasoning chunk |
 **Emitted by:** Not currently wired in `EventLoopNode` (reserved for extended thinking models).
 ---
 ## Tool Lifecycle
 ### `tool_call_started`
 The LLM has requested a tool call and execution is about to begin.
 | Data Field   | Type   | Description                          |
 | ------------ | ------ | ------------------------------------ |
 | `tool_use_id`| `str`  | Unique ID for this tool invocation   |
 | `tool_name`  | `str`  | Name of the tool being called        |
 | `tool_input` | `dict` | Arguments passed to the tool         |
 **Emitted by:** `EventLoopNode._publish_tool_started()`
 ---
 ### `tool_call_completed`
 A tool call has finished executing.
 | Data Field   | Type   | Description                            |
 | ------------ | ------ | -------------------------------------- |
 | `tool_use_id`| `str`  | Same ID from `tool_call_started`       |
 | `tool_name`  | `str`  | Name of the tool                       |
 | `result`     | `str`  | Tool execution result (may be truncated)|
 | `is_error`   | `bool` | Whether the tool returned an error     |
 **Emitted by:** `EventLoopNode._publish_tool_completed()`
 ---
 ## Client I/O
 These events are emitted by the queen's interactive turns. They drive the TUI's chat interface.
 ### `client_output_delta`
 Incremental text output meant for the human operator.
 | Data Field | Type  | Description                  |
 | ---------- | ----- | ---------------------------- |
 | `content`  | `str` | New text chunk (delta)       |
 | `snapshot` | `str` | Full accumulated text so far |
 **Emitted by:** `EventLoopNode._publish_text_delta()` for queen/user-facing output
 ---
 ### `client_input_requested`
 The node is waiting for human input (via `ask_user` tool or auto-block on text-only turns).
 | Data Field | Type  | Description                                       |
 | ---------- | ----- | ------------------------------------------------- |
 | `prompt`   | `str` | Optional prompt/question shown to the user        |
 **Emitted by:** `EventLoopNode._await_user_input()`, doom loop handler
 The TUI subscribes to this event to show the input prompt and focus the chat input. After the user types, `inject_event()` is called on the node to unblock it.
 ---
 ## Internal Node Observability
 ### `node_internal_output`
 Output from a non-client-facing node (for debugging/monitoring).
 | Data Field | Type  | Description      |
 | ---------- | ----- | ---------------- |
 | `content`  | `str` | Output text      |
 **Emitted by:** Available via `emit_node_internal_output()` — not currently wired in the default `EventLoopNode`.
 ---
 ### `node_input_blocked`
 A non-client-facing node is blocked waiting for input.
 | Data Field | Type  | Description     |
 | ---------- | ----- | --------------- |
 | `prompt`   | `str` | Block reason    |
 **Emitted by:** Available via `emit_node_input_blocked()` — reserved for future use.
 ---
 ### `node_stalled`
 The node's LLM has produced identical responses for several consecutive turns (stall detection).
 | Data Field | Type  | Description                                       |
 | ---------- | ----- | ------------------------------------------------- |
 | `reason`   | `str` | Always `"Consecutive identical responses detected"`|
 **Emitted by:** `EventLoopNode._publish_stalled()`
 ---
 ### `node_tool_doom_loop`
 The LLM is calling the same tool(s) with identical arguments repeatedly (doom loop detection).
 | Data Field    | Type  | Description                          |
 | ------------- | ----- | ------------------------------------ |
 | `description` | `str` | Human-readable doom loop description |
 **Emitted by:** `EventLoopNode` doom loop handler
 ---
 ## Judge Decisions
 ### `judge_verdict`
 The judge (custom or implicit) has evaluated the current iteration.
 | Data Field   | Type  | Description                                          |
 | ------------ | ----- | ---------------------------------------------------- |
 | `action`     | `str` | `"ACCEPT"`, `"RETRY"`, `"ESCALATE"`, or `"CONTINUE"` |
 | `feedback`   | `str` | Judge feedback (empty for ACCEPT/CONTINUE)           |
 | `judge_type` | `str` | `"custom"` (explicit JudgeProtocol) or `"implicit"` (stop-reason heuristic) |
 | `iteration`  | `int` | Which iteration this verdict applies to              |
 **Emitted by:** `EventLoopNode._publish_judge_verdict()`
 **Verdict meanings:**
 - **ACCEPT** — Output meets requirements; node exits successfully.
 - **RETRY** — Output needs improvement; loop continues with feedback injected.
 - **ESCALATE** — Problem cannot be solved at this level; triggers escalation.
 - **CONTINUE** — Implicit verdict: LLM called tools, so it's making progress — let it keep going.
 ---
 ## Output Tracking
 ### `output_key_set`
 A node has set an output key via the `set_output` synthetic tool.
 | Data Field | Type  | Description       |
 | ---------- | ----- | ----------------- |
 | `key`      | `str` | Output key name   |
 **Emitted by:** `EventLoopNode._publish_output_key_set()`
 ---
 ## Retry & Edge Tracking
 ### `node_retry`
 A transient error occurred during an LLM call and the node is retrying.
 | Data Field    | Type  | Description                        |
 | ------------- | ----- | ---------------------------------- |
 | `retry_count` | `int` | Current retry attempt number       |
 | `max_retries` | `int` | Maximum retries configured         |
 | `error`       | `str` | Error message (truncated to 500ch) |
 **Emitted by:** `EventLoopNode` (stream retry handler), `GraphExecutor` (node-level retry)
 ---
 ### `edge_traversed`
 The executor has traversed an edge from one node to another.
 | Data Field       | Type  | Description                                    |
 | ---------------- | ----- | ---------------------------------------------- |
 | `source_node`    | `str` | Node ID the edge starts from                   |
 | `target_node`    | `str` | Node ID the edge goes to                       |
 | `edge_condition` | `str` | Edge condition: `"router"`, `"on_success"`, etc. |
 **Emitted by:** `GraphExecutor.execute()` — after router decisions, condition-based edges, and fallback edges.
 ---
 ## Context Management
 ### `context_compacted`
 Not currently emitted — reserved for future use when `NodeConversation` compacts history.
 ---
 ## State Changes
 ### `state_changed`
 A shared buffer key has been modified.
 | Data Field  | Type  | Description                        |
 | ----------- | ----- | ---------------------------------- |
 | `key`       | `str` | Buffer key that changed            |
 | `old_value` | `Any` | Previous value                     |
 | `new_value` | `Any` | New value                          |
 | `scope`     | `str` | Scope of the change                |
 **Emitted by:** Available via `emit_state_changed()` — not currently wired in default execution.
 ---
 ### `state_conflict`
 Not currently emitted — reserved for concurrent write conflict detection.
 ---
 ## Goal Tracking
 ### `goal_progress`
 Goal completion progress update.
 | Data Field        | Type    | Description                          |
 | ----------------- | ------- | ------------------------------------ |
 | `progress`        | `float` | 0.0–1.0 completion fraction         |
 | `criteria_status` | `dict`  | Per-criterion status                 |
 **Emitted by:** Available via `emit_goal_progress()` — not currently wired in default execution.
 ---
 ### `goal_achieved`
 Not currently emitted — reserved for explicit goal completion signals.
 ---
 ### `constraint_violation`
 A goal constraint has been violated.
 | Data Field      | Type  | Description              |
 | --------------- | ----- | ------------------------ |
 | `constraint_id` | `str` | Which constraint failed  |
 | `description`   | `str` | What went wrong          |
 **Emitted by:** Available via `emit_constraint_violation()`.
 ---
 ## Stream Lifecycle
 ### `stream_started` / `stream_stopped`
 Not currently emitted — reserved for `ExecutionStream` lifecycle tracking.
 ---
 ## External Triggers
 ### `webhook_received`
 An external webhook has been received.
 | Data Field     | Type   | Description                  |
 | -------------- | ------ | ---------------------------- |
 | `path`         | `str`  | Webhook URL path             |
 | `method`       | `str`  | HTTP method                  |
 | `headers`      | `dict` | HTTP headers                 |
 | `payload`      | `dict` | Request body                 |
 | `query_params` | `dict` | URL query parameters         |
 **Emitted by:** Webhook server integration.
 Note: `node_id` is not set on this event; `stream_id` is the webhook source ID.
 ---
 ## Escalation
 ### `escalation_requested`
 An agent has requested handoff to the Hive Coder (via the `escalate` synthetic tool).
 | Data Field | Type  | Description                     |
 | ---------- | ----- | ------------------------------- |
 | `reason`   | `str` | Why escalation is needed        |
 | `context`  | `str` | Additional context for the coder|
 **Emitted by:** `EventLoopNode` when the LLM calls `escalate`.
 ---
 ## Custom Events
 ### `custom`
 User-defined events with arbitrary payloads. No schema enforced.
 ---
 ## Subscription & Filtering
 Events can be filtered when subscribing:
 ```python
 bus.subscribe(
    event_types=[EventType.TOOL_CALL_STARTED, EventType.TOOL_CALL_COMPLETED],
    handler=my_handler,
    filter_stream="default",       # Only events from this stream
    filter_node="planner",         # Only events from this node
    filter_execution="exec-uuid",  # Only events from this execution
    filter_graph="worker",         # Only events from this graph
 )
 ```
 ## Debug Event Logging
 Set `HIVE_DEBUG_EVENTS=1` to write every published event to a JSONL file at `~/.hive/event_logs/<timestamp>.jsonl`. Each line is the full JSON serialization of an `AgentEvent`:
 ```json
 {
  "type": "tool_call_started",
  "stream_id": "default",
  "node_id": "planner",
  "execution_id": "a1b2c3d4-...",
  "graph_id": "worker",
  "data": {"tool_use_id": "tu_1", "tool_name": "web_search", "tool_input": {"query": "..."}},
  "timestamp": "2026-02-24T12:00:00.000000",
  "correlation_id": null
 }
 ```
@@ -1,171 +0,0 @@
 # Agent Runtime
 Unified execution system for all Hive agents. Every agent — single-entry or multi-entry, headless or dashboard — runs through the same runtime stack.
 ## Topology
 ```
                     AgentRunner.load(agent_path)
                              |
                         AgentRunner
                     (factory + public API)
                              |
                       _setup_agent_runtime()
                              |
                        AgentRuntime
                   (lifecycle + orchestration)
                      /       |       \
               Stream A   Stream B   Stream C    ← one per entry point
                  |           |          |
            GraphExecutor  GraphExecutor  GraphExecutor
                  |           |          |
              Node → Node → Node  (graph traversal)
 ```
 Single-entry agents get a `"default"` entry point automatically. There is no separate code path.
 ## Components
 | Component | File | Role |
 |---|---|---|
 | `AgentRunner` | `runner/runner.py` | Load agents, configure tools/LLM, expose high-level API |
 | `AgentRuntime` | `runtime/agent_runtime.py` | Lifecycle management, entry point routing, event bus |
 | `ExecutionStream` | `runtime/execution_stream.py` | Per-entry-point execution queue, session persistence |
 | `GraphExecutor` | `graph/executor.py` | Node traversal, tool dispatch, checkpointing |
 | `EventBus` | `runtime/event_bus.py` | Pub/sub for execution events (streaming, I/O) |
 | `SharedBufferManager` | `runtime/shared_state.py` | Cross-stream state with isolation levels |
 | `OutcomeAggregator` | `runtime/outcome_aggregator.py` | Goal progress tracking across streams |
 | `SessionStore` | `storage/session_store.py` | Session state persistence (`sessions/{id}/state.json`) |
 ## Programming Interface
 ### AgentRunner (high-level)
 ```python
 from framework.runner import AgentRunner
 # Load and run
 runner = AgentRunner.load("exports/my_agent", model="anthropic/claude-sonnet-4-20250514")
 result = await runner.run({"query": "hello"})
 # Resume from paused session
 result = await runner.run({"query": "continue"}, session_state=saved_state)
 # Lifecycle
 await runner.start()                           # Start the runtime
 await runner.stop()                            # Stop the runtime
 exec_id = await runner.trigger("default", {})  # Non-blocking trigger
 entry_points = runner.get_entry_points()       # List entry points
 # Context manager
 async with AgentRunner.load("exports/my_agent") as runner:
    result = await runner.run({"query": "hello"})
 # Cleanup
 runner.cleanup()          # Synchronous
 await runner.cleanup_async()  # Asynchronous
 ```
 ### AgentRuntime (lower-level)
 ```python
 from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
 from framework.runtime.execution_stream import EntryPointSpec
 # Create runtime with entry points
 runtime = create_agent_runtime(
    graph=graph,
    goal=goal,
    storage_path=Path("~/.hive/agents/my_agent"),
    entry_points=[
        EntryPointSpec(id="default", name="Default", entry_node="start", trigger_type="manual"),
    ],
    llm=llm,
    tools=tools,
    tool_executor=tool_executor,
    checkpoint_config=checkpoint_config,
 )
 # Lifecycle
 await runtime.start()
 await runtime.stop()
 # Execution
 exec_id = await runtime.trigger("default", {"query": "hello"})              # Non-blocking
 result = await runtime.trigger_and_wait("default", {"query": "hello"})      # Blocking
 result = await runtime.trigger_and_wait("default", {}, session_state=state) # Resume
 # Client-facing node I/O
 await runtime.inject_input(node_id="chat", content="user response")
 # Events
 sub_id = runtime.subscribe_to_events(
    event_types=[EventType.CLIENT_OUTPUT_DELTA],
    handler=my_handler,
 )
 runtime.unsubscribe_from_events(sub_id)
 # Inspection
 runtime.is_running           # bool
 runtime.event_bus            # EventBus
 runtime.state_manager        # SharedBufferManager
 runtime.get_stats()          # Runtime statistics
 ```
 ## Execution Flow
 1. `AgentRunner.run()` calls `AgentRuntime.trigger_and_wait()`
 2. `AgentRuntime` routes to the `ExecutionStream` for the entry point
 3. `ExecutionStream` creates a `GraphExecutor` and calls `execute()`
 4. `GraphExecutor` traverses nodes, dispatches tools, manages checkpoints
 5. `ExecutionResult` flows back up through the stack
 6. `ExecutionStream` writes session state to disk
 ## Session Resume
 All execution paths support session resume:
 ```python
 # First run (agent pauses at a client-facing node)
 result = await runner.run({"query": "start task"})
 # result.paused_at = "review-node"
 # result.session_state = {"memory": {...}, "paused_at": "review-node", ...}
 # Resume
 result = await runner.run({"input": "approved"}, session_state=result.session_state)
 ```
 Session state flows: `AgentRunner.run()` → `AgentRuntime.trigger_and_wait()` → `ExecutionStream.execute()` → `GraphExecutor.execute()`.
 Checkpoints are saved at node boundaries (`sessions/{id}/checkpoints/`) for crash recovery.
 ## Event Bus
 The `EventBus` provides real-time execution visibility:
 | Event | When |
 |---|---|
 | `NODE_STARTED` | Node begins execution |
 | `NODE_COMPLETED` | Node finishes |
 | `TOOL_CALL_STARTED` | Tool invocation begins |
 | `TOOL_CALL_COMPLETED` | Tool invocation finishes |
 | `CLIENT_OUTPUT_DELTA` | Agent streams text to user |
 | `CLIENT_INPUT_REQUESTED` | Agent needs user input |
 | `EXECUTION_COMPLETED` | Full execution finishes |
 In headless mode, `AgentRunner` subscribes to `CLIENT_OUTPUT_DELTA` and `CLIENT_INPUT_REQUESTED` to print output and read stdin. The web dashboard subscribes to route events to the frontend.
 ## Storage Layout
 ```
 ~/.hive/agents/{agent_name}/
  sessions/
    session_YYYYMMDD_HHMMSS_{uuid}/
      state.json              # Session state (status, memory, progress)
      checkpoints/            # Node-boundary snapshots
      logs/
        summary.json          # Execution summary
        details.jsonl         # Detailed event log
        tool_logs.jsonl       # Tool call log
  runtime_logs/               # Cross-session runtime logs
 ```
@@ -1,5 +0,0 @@
 """Runtime core for agent execution."""
 from framework.runtime.core import Runtime
 __all__ = ["Runtime"]
@@ -1 +0,0 @@
 """Tests for runtime components."""
@@ -1,869 +0,0 @@
 """
 Tests for AgentRuntime and multi-entry-point execution.
 Tests:
 1. AgentRuntime creation and lifecycle
 2. Entry point registration
 3. Concurrent executions across streams
 4. SharedBufferManager isolation levels
 5. OutcomeAggregator goal evaluation
 6. EventBus pub/sub
 """
 import asyncio
 import tempfile
 from pathlib import Path
 import pytest
 from framework.graph import Goal
 from framework.graph.edge import EdgeCondition, EdgeSpec, GraphSpec
 from framework.graph.goal import Constraint, SuccessCriterion
 from framework.graph.node import NodeSpec
 from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
 from framework.runtime.event_bus import AgentEvent, EventBus, EventType
 from framework.runtime.execution_stream import EntryPointSpec
 from framework.runtime.outcome_aggregator import OutcomeAggregator
 from framework.runtime.shared_state import IsolationLevel, SharedBufferManager
 from framework.schemas.session_state import SessionState, SessionTimestamps
 # === Test Fixtures ===
@pytest.fixture
 def sample_goal():
    """Create a sample goal for testing."""
    return Goal(
        id="test-goal",
        name="Test Goal",
        description="A goal for testing multi-entry-point execution",
        success_criteria=[
            SuccessCriterion(
                id="sc-1",
                description="Process all requests",
                metric="requests_processed",
                target="100%",
                weight=1.0,
            ),
        ],
        constraints=[
            Constraint(
                id="c-1",
                description="Must not exceed rate limits",
                constraint_type="hard",
                category="operational",
            ),
        ],
    )
@pytest.fixture
 def sample_graph():
    """Create a sample graph with multiple entry points."""
    nodes = [
        NodeSpec(
            id="process-webhook",
            name="Process Webhook",
            description="Process incoming webhook",
            node_type="event_loop",
            input_keys=["webhook_data"],
            output_keys=["result"],
        ),
        NodeSpec(
            id="process-api",
            name="Process API Request",
            description="Process API request",
            node_type="event_loop",
            input_keys=["request_data"],
            output_keys=["result"],
        ),
        NodeSpec(
            id="complete",
            name="Complete",
            description="Execution complete",
            node_type="terminal",
            input_keys=["result"],
            output_keys=["final_result"],
        ),
    ]
    edges = [
        EdgeSpec(
            id="webhook-to-complete",
            source="process-webhook",
            target="complete",
            condition=EdgeCondition.ON_SUCCESS,
        ),
        EdgeSpec(
            id="api-to-complete",
            source="process-api",
            target="complete",
            condition=EdgeCondition.ON_SUCCESS,
        ),
    ]
    return GraphSpec(
        id="test-graph",
        goal_id="test-goal",
        version="1.0.0",
        entry_node="process-webhook",
        entry_points={"start": "process-webhook"},
        terminal_nodes=["complete"],
        pause_nodes=[],
        nodes=nodes,
        edges=edges,
    )
@pytest.fixture
 def temp_storage():
    """Create a temporary storage directory."""
    with tempfile.TemporaryDirectory() as tmpdir:
        yield Path(tmpdir)
 # === SharedBufferManager Tests ===
 class TestSharedBufferManager:
    """Tests for SharedBufferManager."""
    def test_create_buffer(self):
        """Test creating execution-scoped buffer."""
        manager = SharedBufferManager()
        buffer = manager.create_buffer(
            execution_id="exec-1",
            stream_id="webhook",
            isolation=IsolationLevel.SHARED,
        )
        assert buffer is not None
        assert buffer._execution_id == "exec-1"
        assert buffer._stream_id == "webhook"
    @pytest.mark.asyncio
    async def test_isolated_state(self):
        """Test isolated state doesn't leak between executions."""
        manager = SharedBufferManager()
        buf1 = manager.create_buffer("exec-1", "stream-1", IsolationLevel.ISOLATED)
        buf2 = manager.create_buffer("exec-2", "stream-1", IsolationLevel.ISOLATED)
        await buf1.write("key", "value1")
        await buf2.write("key", "value2")
        assert await buf1.read("key") == "value1"
        assert await buf2.read("key") == "value2"
    @pytest.mark.asyncio
    async def test_shared_state(self):
        """Test shared state is visible across executions."""
        manager = SharedBufferManager()
        manager.create_buffer("exec-1", "stream-1", IsolationLevel.SHARED)
        manager.create_buffer("exec-2", "stream-1", IsolationLevel.SHARED)
        # Write to global scope
        await manager.write(
            key="global_key",
            value="global_value",
            execution_id="exec-1",
            stream_id="stream-1",
            isolation=IsolationLevel.SHARED,
            scope="global",
        )
        # Both should see it
        value1 = await manager.read("global_key", "exec-1", "stream-1", IsolationLevel.SHARED)
        value2 = await manager.read("global_key", "exec-2", "stream-1", IsolationLevel.SHARED)
        assert value1 == "global_value"
        assert value2 == "global_value"
    def test_cleanup_execution(self):
        """Test execution cleanup removes state."""
        manager = SharedBufferManager()
        manager.create_buffer("exec-1", "stream-1", IsolationLevel.ISOLATED)
        assert "exec-1" in manager._execution_state
        manager.cleanup_execution("exec-1")
        assert "exec-1" not in manager._execution_state
 class TestSessionState:
    """Tests for session state data-buffer compatibility."""
    def test_legacy_memory_alias_populates_data_buffer(self):
        """Legacy `memory` payloads should still hydrate the session buffer."""
        state = SessionState(
            session_id="session-1",
            goal_id="goal-1",
            timestamps=SessionTimestamps(
                started_at="2026-01-01T00:00:00",
                updated_at="2026-01-01T00:00:00",
            ),
            memory={"rules": "keep starred mail"},
        )
        assert state.data_buffer == {"rules": "keep starred mail"}
        assert state.memory == {"rules": "keep starred mail"}
        assert state.to_session_state_dict()["data_buffer"] == {"rules": "keep starred mail"}
 # === EventBus Tests ===
 class TestEventBus:
    """Tests for EventBus pub/sub."""
    @pytest.mark.asyncio
    async def test_publish_subscribe(self):
        """Test basic publish/subscribe."""
        bus = EventBus()
        received_events = []
        async def handler(event: AgentEvent):
            received_events.append(event)
        bus.subscribe(
            event_types=[EventType.EXECUTION_STARTED],
            handler=handler,
        )
        await bus.publish(
            AgentEvent(
                type=EventType.EXECUTION_STARTED,
                stream_id="webhook",
                execution_id="exec-1",
                data={"test": "data"},
            )
        )
        # Allow handler to run
        await asyncio.sleep(0.1)
        assert len(received_events) == 1
        assert received_events[0].type == EventType.EXECUTION_STARTED
        assert received_events[0].stream_id == "webhook"
    @pytest.mark.asyncio
    async def test_stream_filter(self):
        """Test filtering by stream ID."""
        bus = EventBus()
        received_events = []
        async def handler(event: AgentEvent):
            received_events.append(event)
        bus.subscribe(
            event_types=[EventType.EXECUTION_STARTED],
            handler=handler,
            filter_stream="webhook",
        )
        # Publish to webhook stream (should be received)
        await bus.publish(
            AgentEvent(
                type=EventType.EXECUTION_STARTED,
                stream_id="webhook",
            )
        )
        # Publish to api stream (should NOT be received)
        await bus.publish(
            AgentEvent(
                type=EventType.EXECUTION_STARTED,
                stream_id="api",
            )
        )
        await asyncio.sleep(0.1)
        assert len(received_events) == 1
        assert received_events[0].stream_id == "webhook"
    def test_unsubscribe(self):
        """Test unsubscribing from events."""
        bus = EventBus()
        async def handler(event: AgentEvent):
            pass
        sub_id = bus.subscribe(
            event_types=[EventType.EXECUTION_STARTED],
            handler=handler,
        )
        assert sub_id in bus._subscriptions
        result = bus.unsubscribe(sub_id)
        assert result is True
        assert sub_id not in bus._subscriptions
    @pytest.mark.asyncio
    async def test_wait_for(self):
        """Test waiting for a specific event."""
        bus = EventBus()
        # Start waiting in background
        async def wait_and_check():
            event = await bus.wait_for(
                event_type=EventType.EXECUTION_COMPLETED,
                timeout=1.0,
            )
            return event
        wait_task = asyncio.create_task(wait_and_check())
        # Publish the event
        await asyncio.sleep(0.1)
        await bus.publish(
            AgentEvent(
                type=EventType.EXECUTION_COMPLETED,
                stream_id="webhook",
                execution_id="exec-1",
            )
        )
        event = await wait_task
        assert event is not None
        assert event.type == EventType.EXECUTION_COMPLETED
 # === OutcomeAggregator Tests ===
 class TestOutcomeAggregator:
    """Tests for OutcomeAggregator."""
    def test_record_decision(self, sample_goal):
        """Test recording decisions."""
        aggregator = OutcomeAggregator(sample_goal)
        from framework.schemas.decision import Decision, DecisionType
        decision = Decision(
            id="dec-1",
            node_id="process-webhook",
            intent="Process incoming webhook",
            decision_type=DecisionType.PATH_CHOICE,
            options=[],
            chosen_option_id="opt-1",
            reasoning="Standard processing path",
        )
        aggregator.record_decision("webhook", "exec-1", decision)
        assert aggregator._total_decisions == 1
        assert len(aggregator._decisions) == 1
    @pytest.mark.asyncio
    async def test_evaluate_goal_progress(self, sample_goal):
        """Test goal progress evaluation."""
        aggregator = OutcomeAggregator(sample_goal)
        progress = await aggregator.evaluate_goal_progress()
        assert "overall_progress" in progress
        assert "criteria_status" in progress
        assert "constraint_violations" in progress
        assert "recommendation" in progress
    def test_record_constraint_violation(self, sample_goal):
        """Test recording constraint violations."""
        aggregator = OutcomeAggregator(sample_goal)
        aggregator.record_constraint_violation(
            constraint_id="c-1",
            description="Rate limit exceeded",
            violation_details="More than 100 requests/minute",
            stream_id="webhook",
            execution_id="exec-1",
        )
        assert len(aggregator._constraint_violations) == 1
        assert aggregator._constraint_violations[0].constraint_id == "c-1"
 # === AgentRuntime Tests ===
 class TestAgentRuntime:
    """Tests for AgentRuntime orchestration."""
    def test_register_entry_point(self, sample_graph, sample_goal, temp_storage):
        """Test registering entry points."""
        runtime = AgentRuntime(
            graph=sample_graph,
            goal=sample_goal,
            storage_path=temp_storage,
        )
        entry_spec = EntryPointSpec(
            id="manual",
            name="Manual Trigger",
            entry_node="process-webhook",
            trigger_type="manual",
        )
        runtime.register_entry_point(entry_spec)
        assert "manual" in runtime._entry_points
        assert len(runtime.get_entry_points()) == 1
    def test_register_duplicate_entry_point_fails(self, sample_graph, sample_goal, temp_storage):
        """Test that duplicate entry point IDs fail."""
        runtime = AgentRuntime(
            graph=sample_graph,
            goal=sample_goal,
            storage_path=temp_storage,
        )
        entry_spec = EntryPointSpec(
            id="webhook",
            name="Webhook Handler",
            entry_node="process-webhook",
            trigger_type="webhook",
        )
        runtime.register_entry_point(entry_spec)
        with pytest.raises(ValueError, match="already registered"):
            runtime.register_entry_point(entry_spec)
    def test_register_invalid_entry_node_fails(self, sample_graph, sample_goal, temp_storage):
        """Test that invalid entry nodes fail."""
        runtime = AgentRuntime(
            graph=sample_graph,
            goal=sample_goal,
            storage_path=temp_storage,
        )
        entry_spec = EntryPointSpec(
            id="invalid",
            name="Invalid Entry",
            entry_node="nonexistent-node",
            trigger_type="manual",
        )
        with pytest.raises(ValueError, match="not found in graph"):
            runtime.register_entry_point(entry_spec)
    @pytest.mark.asyncio
    async def test_start_stop_lifecycle(self, sample_graph, sample_goal, temp_storage):
        """Test runtime start/stop lifecycle."""
        runtime = AgentRuntime(
            graph=sample_graph,
            goal=sample_goal,
            storage_path=temp_storage,
        )
        entry_spec = EntryPointSpec(
            id="webhook",
            name="Webhook Handler",
            entry_node="process-webhook",
            trigger_type="webhook",
        )
        runtime.register_entry_point(entry_spec)
        assert not runtime.is_running
        await runtime.start()
        assert runtime.is_running
        assert "webhook" in runtime._streams
        await runtime.stop()
        assert not runtime.is_running
        assert len(runtime._streams) == 0
    @pytest.mark.asyncio
    async def test_trigger_requires_running(self, sample_graph, sample_goal, temp_storage):
        """Test that trigger fails if runtime not running."""
        runtime = AgentRuntime(
            graph=sample_graph,
            goal=sample_goal,
            storage_path=temp_storage,
        )
        entry_spec = EntryPointSpec(
            id="webhook",
            name="Webhook Handler",
            entry_node="process-webhook",
            trigger_type="webhook",
        )
        runtime.register_entry_point(entry_spec)
        with pytest.raises(RuntimeError, match="not running"):
            await runtime.trigger("webhook", {"test": "data"})
 # === GraphSpec Validation Tests ===
 # === Integration Tests ===
 class TestCreateAgentRuntime:
    """Tests for the create_agent_runtime factory."""
    def test_create_with_entry_points(self, sample_graph, sample_goal, temp_storage):
        """Test factory creates runtime with entry points."""
        entry_points = [
            EntryPointSpec(
                id="webhook",
                name="Webhook",
                entry_node="process-webhook",
                trigger_type="webhook",
            ),
            EntryPointSpec(
                id="api",
                name="API",
                entry_node="process-api",
                trigger_type="api",
            ),
        ]
        runtime = create_agent_runtime(
            graph=sample_graph,
            goal=sample_goal,
            storage_path=temp_storage,
            entry_points=entry_points,
        )
        assert len(runtime.get_entry_points()) == 2
        assert "webhook" in runtime._entry_points
        assert "api" in runtime._entry_points
 # === Timer Entry Point Tests ===
 class TestTimerEntryPoints:
    """Tests for timer-driven entry points (interval and cron)."""
    @pytest.mark.asyncio
    async def test_interval_timer_starts_task(self, sample_graph, sample_goal, temp_storage):
        """Test that interval_minutes timer creates an async task."""
        runtime = AgentRuntime(
            graph=sample_graph,
            goal=sample_goal,
            storage_path=temp_storage,
        )
        entry_spec = EntryPointSpec(
            id="timer-interval",
            name="Interval Timer",
            entry_node="process-webhook",
            trigger_type="timer",
            trigger_config={"interval_minutes": 60},
        )
        runtime.register_entry_point(entry_spec)
        await runtime.start()
        try:
            assert len(runtime._timer_tasks) == 1
            assert not runtime._timer_tasks[0].done()
            # Give the async task a moment to set next_fire
            await asyncio.sleep(0.05)
            assert "timer-interval" in runtime._timer_next_fire
        finally:
            await runtime.stop()
        assert len(runtime._timer_tasks) == 0
    @pytest.mark.asyncio
    async def test_cron_timer_starts_task(self, sample_graph, sample_goal, temp_storage):
        """Test that cron expression timer creates an async task."""
        runtime = AgentRuntime(
            graph=sample_graph,
            goal=sample_goal,
            storage_path=temp_storage,
        )
        entry_spec = EntryPointSpec(
            id="timer-cron",
            name="Cron Timer",
            entry_node="process-webhook",
            trigger_type="timer",
            trigger_config={"cron": "*/5 * * * *"},  # Every 5 minutes
        )
        runtime.register_entry_point(entry_spec)
        await runtime.start()
        try:
            assert len(runtime._timer_tasks) == 1
            assert not runtime._timer_tasks[0].done()
            # Give the async task a moment to set next_fire
            await asyncio.sleep(0.05)
            assert "timer-cron" in runtime._timer_next_fire
        finally:
            await runtime.stop()
    @pytest.mark.asyncio
    async def test_invalid_cron_expression_skipped(
        self, sample_graph, sample_goal, temp_storage, caplog
    ):
        """Test that an invalid cron expression logs a warning and skips."""
        runtime = AgentRuntime(
            graph=sample_graph,
            goal=sample_goal,
            storage_path=temp_storage,
        )
        entry_spec = EntryPointSpec(
            id="timer-bad-cron",
            name="Bad Cron Timer",
            entry_node="process-webhook",
            trigger_type="timer",
            trigger_config={"cron": "not a cron expression"},
        )
        runtime.register_entry_point(entry_spec)
        await runtime.start()
        try:
            assert len(runtime._timer_tasks) == 0
            assert "invalid cron" in caplog.text.lower() or "Invalid cron" in caplog.text
        finally:
            await runtime.stop()
    @pytest.mark.asyncio
    async def test_cron_takes_priority_over_interval(
        self, sample_graph, sample_goal, temp_storage, caplog
    ):
        """Test that when both cron and interval_minutes are set, cron wins."""
        import logging
        runtime = AgentRuntime(
            graph=sample_graph,
            goal=sample_goal,
            storage_path=temp_storage,
        )
        entry_spec = EntryPointSpec(
            id="timer-both",
            name="Both Timer",
            entry_node="process-webhook",
            trigger_type="timer",
            trigger_config={"cron": "0 9 * * *", "interval_minutes": 30},
        )
        runtime.register_entry_point(entry_spec)
        with caplog.at_level(logging.INFO):
            await runtime.start()
        try:
            assert len(runtime._timer_tasks) == 1
            # Should log cron, not interval
            assert any("cron" in r.message.lower() for r in caplog.records)
        finally:
            await runtime.stop()
    @pytest.mark.asyncio
    async def test_no_interval_or_cron_warns(self, sample_graph, sample_goal, temp_storage, caplog):
        """Test that timer with neither cron nor interval_minutes logs a warning."""
        runtime = AgentRuntime(
            graph=sample_graph,
            goal=sample_goal,
            storage_path=temp_storage,
        )
        entry_spec = EntryPointSpec(
            id="timer-empty",
            name="Empty Timer",
            entry_node="process-webhook",
            trigger_type="timer",
            trigger_config={},
        )
        runtime.register_entry_point(entry_spec)
        await runtime.start()
        try:
            assert len(runtime._timer_tasks) == 0
            assert "no 'cron' or valid 'interval_minutes'" in caplog.text
        finally:
            await runtime.stop()
    @pytest.mark.asyncio
    async def test_cron_immediate_fires_first(self, sample_graph, sample_goal, temp_storage):
        """Test that run_immediately=True with cron doesn't set next_fire before first run."""
        runtime = AgentRuntime(
            graph=sample_graph,
            goal=sample_goal,
            storage_path=temp_storage,
        )
        entry_spec = EntryPointSpec(
            id="timer-cron-immediate",
            name="Cron Immediate",
            entry_node="process-webhook",
            trigger_type="timer",
            trigger_config={"cron": "0 0 * * *", "run_immediately": True},
        )
        runtime.register_entry_point(entry_spec)
        await runtime.start()
        try:
            assert len(runtime._timer_tasks) == 1
            # With run_immediately, the task enters the while loop directly,
            # so _timer_next_fire is NOT set before the first trigger attempt
            # (it pops it at the top of the loop)
            # Give it a moment to start executing
            await asyncio.sleep(0.05)
            # Task should still be running (it will try to trigger and likely fail
            # since there's no LLM, but the task itself continues)
            assert not runtime._timer_tasks[0].done()
        finally:
            await runtime.stop()
 # === Cancel All Tasks Tests ===
 class TestCancelAllTasks:
    """Tests for cancel_all_tasks and cancel_all_tasks_async."""
    @pytest.mark.asyncio
    async def test_cancel_all_tasks_async_returns_false_when_no_tasks(
        self, sample_graph, sample_goal, temp_storage
    ):
        """Test that cancel_all_tasks_async returns False with no running tasks."""
        runtime = AgentRuntime(
            graph=sample_graph,
            goal=sample_goal,
            storage_path=temp_storage,
        )
        entry_spec = EntryPointSpec(
            id="webhook",
            name="Webhook",
            entry_node="process-webhook",
            trigger_type="webhook",
        )
        runtime.register_entry_point(entry_spec)
        await runtime.start()
        try:
            result = await runtime.cancel_all_tasks_async()
            assert result is False
        finally:
            await runtime.stop()
    @pytest.mark.asyncio
    async def test_cancel_all_tasks_async_cancels_running_task(
        self, sample_graph, sample_goal, temp_storage
    ):
        """Test that cancel_all_tasks_async cancels a running task and returns True."""
        runtime = AgentRuntime(
            graph=sample_graph,
            goal=sample_goal,
            storage_path=temp_storage,
        )
        entry_spec = EntryPointSpec(
            id="webhook",
            name="Webhook",
            entry_node="process-webhook",
            trigger_type="webhook",
        )
        runtime.register_entry_point(entry_spec)
        await runtime.start()
        try:
            # Inject a fake running task into the stream
            stream = runtime._streams["webhook"]
            async def hang_forever():
                await asyncio.get_event_loop().create_future()
            fake_task = asyncio.ensure_future(hang_forever())
            stream._execution_tasks["fake-exec"] = fake_task
            result = await runtime.cancel_all_tasks_async()
            assert result is True
            # Let the CancelledError propagate
            try:
                await fake_task
            except asyncio.CancelledError:
                pass
            assert fake_task.cancelled()
            # Clean up
            del stream._execution_tasks["fake-exec"]
        finally:
            await runtime.stop()
    @pytest.mark.asyncio
    async def test_cancel_all_tasks_async_cancels_multiple_tasks_across_streams(
        self, sample_graph, sample_goal, temp_storage
    ):
        """Test that cancel_all_tasks_async cancels tasks across multiple streams."""
        runtime = AgentRuntime(
            graph=sample_graph,
            goal=sample_goal,
            storage_path=temp_storage,
        )
        # Register two entry points so we get two streams
        runtime.register_entry_point(
            EntryPointSpec(
                id="stream-a",
                name="Stream A",
                entry_node="process-webhook",
                trigger_type="webhook",
            )
        )
        runtime.register_entry_point(
            EntryPointSpec(
                id="stream-b",
                name="Stream B",
                entry_node="process-webhook",
                trigger_type="webhook",
            )
        )
        await runtime.start()
        try:
            async def hang_forever():
                await asyncio.get_event_loop().create_future()
            stream_a = runtime._streams["stream-a"]
            stream_b = runtime._streams["stream-b"]
            # Two tasks in stream A, one task in stream B
            task_a1 = asyncio.ensure_future(hang_forever())
            task_a2 = asyncio.ensure_future(hang_forever())
            task_b1 = asyncio.ensure_future(hang_forever())
            stream_a._execution_tasks["exec-a1"] = task_a1
            stream_a._execution_tasks["exec-a2"] = task_a2
            stream_b._execution_tasks["exec-b1"] = task_b1
            result = await runtime.cancel_all_tasks_async()
            assert result is True
            # Let CancelledErrors propagate
            for task in [task_a1, task_a2, task_b1]:
                try:
                    await task
                except asyncio.CancelledError:
                    pass
                assert task.cancelled()
            # Clean up
            del stream_a._execution_tasks["exec-a1"]
            del stream_a._execution_tasks["exec-a2"]
            del stream_b._execution_tasks["exec-b1"]
        finally:
            await runtime.stop()
 if __name__ == "__main__":
    pytest.main([__file__, "-v"])
@@ -1,268 +0,0 @@
 """Tests for webhook idempotency key support in AgentRuntime.trigger()."""
 import asyncio
 import time
 from collections import OrderedDict
 from unittest.mock import AsyncMock, MagicMock
 import pytest
 from framework.runtime.agent_runtime import AgentRuntime, AgentRuntimeConfig
 def _make_runtime(ttl=300.0, max_keys=10000):
    """Create a minimal AgentRuntime with idempotency cache attributes.
    Uses ``object.__new__`` to skip ``__init__`` and its heavy dependencies
    (storage, LLM, skills) — we only need the cache and config for these tests.
    """
    runtime = object.__new__(AgentRuntime)
    runtime._config = AgentRuntimeConfig(idempotency_ttl_seconds=ttl, idempotency_max_keys=max_keys)
    runtime._running = True
    runtime._lock = asyncio.Lock()
    runtime._idempotency_keys = OrderedDict()
    runtime._idempotency_times = {}
    runtime._graphs = {}
    runtime._active_graph_id = "primary"
    runtime._graph_id = "primary"
    runtime._streams = {}
    runtime._entry_points = {}
    return runtime
 def _make_runtime_with_stream(ttl=300.0, max_keys=10000):
    """Create a mock runtime whose stream.execute() returns unique IDs."""
    runtime = _make_runtime(ttl=ttl, max_keys=max_keys)
    call_count = 0
    async def _fake_execute(*args, **kwargs):
        nonlocal call_count
        call_count += 1
        return f"session-{call_count:04d}"
    stream = MagicMock()
    stream.execute = _fake_execute
    runtime._streams = {"webhook": stream}
    runtime._entry_points = {"webhook": MagicMock()}
    return runtime
 class TestIdempotencyConfig:
    """Verify idempotency configuration defaults."""
    def test_default_ttl(self):
        config = AgentRuntimeConfig()
        assert config.idempotency_ttl_seconds == 300.0
    def test_default_max_keys(self):
        config = AgentRuntimeConfig()
        assert config.idempotency_max_keys == 10000
    def test_custom_config(self):
        config = AgentRuntimeConfig(idempotency_ttl_seconds=60.0, idempotency_max_keys=100)
        assert config.idempotency_ttl_seconds == 60.0
        assert config.idempotency_max_keys == 100
 class TestIdempotencyCache:
    """Test the idempotency cache and pruning logic directly."""
    def test_cache_stores_and_retrieves_key(self):
        runtime = _make_runtime()
        runtime._idempotency_keys["stripe-evt-123"] = "exec-001"
        runtime._idempotency_times["stripe-evt-123"] = time.time()
        assert runtime._idempotency_keys.get("stripe-evt-123") == "exec-001"
    def test_cache_returns_none_for_unknown_key(self):
        runtime = _make_runtime()
        assert runtime._idempotency_keys.get("unknown") is None
    def test_prune_removes_expired_keys(self):
        runtime = _make_runtime(ttl=0.1)
        runtime._idempotency_keys["old-key"] = "exec-old"
        runtime._idempotency_times["old-key"] = time.time() - 1.0  # expired
        runtime._prune_idempotency_keys()
        assert "old-key" not in runtime._idempotency_keys
        assert "old-key" not in runtime._idempotency_times
    def test_prune_keeps_fresh_keys(self):
        runtime = _make_runtime(ttl=300.0)
        runtime._idempotency_keys["fresh-key"] = "exec-fresh"
        runtime._idempotency_times["fresh-key"] = time.time()
        runtime._prune_idempotency_keys()
        assert "fresh-key" in runtime._idempotency_keys
    def test_prune_respects_max_keys(self):
        runtime = _make_runtime(max_keys=2)
        for i in range(3):
            key = f"key-{i}"
            runtime._idempotency_keys[key] = f"exec-{i}"
            runtime._idempotency_times[key] = time.time()
        runtime._prune_idempotency_keys()
        assert len(runtime._idempotency_keys) == 2
        # Oldest (key-0) should be evicted
        assert "key-0" not in runtime._idempotency_keys
        assert "key-1" in runtime._idempotency_keys
        assert "key-2" in runtime._idempotency_keys
    def test_prune_evicts_fifo(self):
        runtime = _make_runtime(max_keys=1)
        runtime._idempotency_keys["first"] = "exec-1"
        runtime._idempotency_times["first"] = time.time()
        runtime._idempotency_keys["second"] = "exec-2"
        runtime._idempotency_times["second"] = time.time()
        runtime._prune_idempotency_keys()
        assert len(runtime._idempotency_keys) == 1
        assert "second" in runtime._idempotency_keys
        assert "first" not in runtime._idempotency_keys
    def test_mixed_expired_and_max_size(self):
        runtime = _make_runtime(ttl=0.1, max_keys=2)
        # Add expired key
        runtime._idempotency_keys["expired"] = "exec-e"
        runtime._idempotency_times["expired"] = time.time() - 1.0
        # Add fresh keys
        runtime._idempotency_keys["fresh-1"] = "exec-f1"
        runtime._idempotency_times["fresh-1"] = time.time()
        runtime._idempotency_keys["fresh-2"] = "exec-f2"
        runtime._idempotency_times["fresh-2"] = time.time()
        runtime._prune_idempotency_keys()
        assert "expired" not in runtime._idempotency_keys
        assert "fresh-1" in runtime._idempotency_keys
        assert "fresh-2" in runtime._idempotency_keys
 class TestTriggerIdempotency:
    """Tests for trigger() idempotency deduplication."""
    def test_trigger_accepts_idempotency_key(self):
        """trigger() accepts idempotency_key as a keyword argument."""
        import inspect
        sig = inspect.signature(AgentRuntime.trigger)
        assert "idempotency_key" in sig.parameters
    def test_idempotency_key_defaults_to_none(self):
        """idempotency_key defaults to None (backward compatible)."""
        import inspect
        sig = inspect.signature(AgentRuntime.trigger)
        assert sig.parameters["idempotency_key"].default is None
    def test_trigger_and_wait_accepts_idempotency_key(self):
        """trigger_and_wait() also accepts idempotency_key."""
        import inspect
        sig = inspect.signature(AgentRuntime.trigger_and_wait)
        assert "idempotency_key" in sig.parameters
    def test_trigger_and_wait_idempotency_key_defaults_to_none(self):
        """trigger_and_wait() idempotency_key defaults to None."""
        import inspect
        sig = inspect.signature(AgentRuntime.trigger_and_wait)
        assert sig.parameters["idempotency_key"].default is None
    @pytest.mark.asyncio
    async def test_duplicate_key_returns_cached_id(self):
        """Same idempotency key within TTL returns the cached execution ID."""
        runtime = _make_runtime_with_stream()
        first = await runtime.trigger("webhook", {}, idempotency_key="stripe-evt-001")
        second = await runtime.trigger("webhook", {}, idempotency_key="stripe-evt-001")
        assert first == second
        assert first == "session-0001"
    @pytest.mark.asyncio
    async def test_different_keys_produce_different_ids(self):
        """Different idempotency keys start separate executions."""
        runtime = _make_runtime_with_stream()
        id_a = await runtime.trigger("webhook", {}, idempotency_key="evt-aaa")
        id_b = await runtime.trigger("webhook", {}, idempotency_key="evt-bbb")
        assert id_a != id_b
        assert id_a == "session-0001"
        assert id_b == "session-0002"
    @pytest.mark.asyncio
    async def test_none_key_always_starts_new_execution(self):
        """key=None (default) skips dedup — every call starts fresh."""
        runtime = _make_runtime_with_stream()
        id_1 = await runtime.trigger("webhook", {})
        id_2 = await runtime.trigger("webhook", {})
        assert id_1 != id_2
        assert len(runtime._idempotency_keys) == 0  # nothing cached
    @pytest.mark.asyncio
    async def test_expired_key_allows_new_execution(self):
        """After TTL expires, the same key starts a new execution."""
        runtime = _make_runtime_with_stream(ttl=0.1)
        first = await runtime.trigger("webhook", {}, idempotency_key="evt-expire")
        # Backdate the cached timestamp so the key looks expired
        runtime._idempotency_times["evt-expire"] = time.time() - 1.0
        second = await runtime.trigger("webhook", {}, idempotency_key="evt-expire")
        assert first != second
        assert first == "session-0001"
        assert second == "session-0002"
    @pytest.mark.asyncio
    async def test_stream_not_found_does_not_cache(self):
        """If entry point doesn't exist, nothing is cached."""
        runtime = _make_runtime_with_stream()
        with pytest.raises(ValueError, match="not found"):
            await runtime.trigger("nonexistent", {}, idempotency_key="evt-orphan")
        assert "evt-orphan" not in runtime._idempotency_keys
    @pytest.mark.asyncio
    async def test_execute_error_does_not_cache(self):
        """If stream.execute() raises, nothing is cached so retries can go through."""
        runtime = _make_runtime()
        failing_stream = MagicMock()
        failing_stream.execute = AsyncMock(side_effect=RuntimeError("stream not running"))
        runtime._streams = {"webhook": failing_stream}
        runtime._entry_points = {"webhook": MagicMock()}
        with pytest.raises(RuntimeError, match="stream not running"):
            await runtime.trigger("webhook", {}, idempotency_key="evt-123")
        assert "evt-123" not in runtime._idempotency_keys
    @pytest.mark.asyncio
    async def test_cache_holds_real_execution_id(self):
        """Cached value matches the actual execution ID from execute()."""
        runtime = _make_runtime_with_stream()
        exec_id = await runtime.trigger("webhook", {}, idempotency_key="evt-real")
        cached = runtime._idempotency_keys.get("evt-real")
        assert cached == exec_id
        assert cached == "session-0001"
@@ -1,29 +0,0 @@
 """Tests for custom session-backed runtime logging paths."""
 from pathlib import Path
 from unittest.mock import MagicMock
 from framework.graph.executor import GraphExecutor
 from framework.runtime.runtime_log_store import RuntimeLogStore
 from framework.runtime.runtime_logger import RuntimeLogger
 def test_graph_executor_uses_custom_session_dir_name_for_runtime_logs():
    executor = GraphExecutor(
        runtime=MagicMock(),
        storage_path=Path("/tmp/test-agent/sessions/my-custom-session"),
    )
    assert executor._get_runtime_log_session_id() == "my-custom-session"
 def test_runtime_logger_creates_session_log_dir_for_custom_session_id(tmp_path):
    base = tmp_path / ".hive" / "agents" / "test_agent"
    base.mkdir(parents=True)
    store = RuntimeLogStore(base)
    logger = RuntimeLogger(store=store, agent_id="test-agent")
    run_id = logger.start_run(goal_id="goal-1", session_id="my-custom-session")
    assert run_id == "my-custom-session"
    assert (base / "sessions" / "my-custom-session" / "logs").is_dir()
@@ -1,716 +0,0 @@
 """
 Tests for WebhookServer and event-driven entry points.
 """
 import asyncio
 import hashlib
 import hmac as hmac_mod
 import json
 import tempfile
 from pathlib import Path
 from unittest.mock import patch
 import aiohttp
 import pytest
 from framework.runtime.agent_runtime import AgentRuntime, AgentRuntimeConfig
 from framework.runtime.event_bus import AgentEvent, EventBus, EventType
 from framework.runtime.execution_stream import EntryPointSpec
 from framework.runtime.webhook_server import (
    WebhookRoute,
    WebhookServer,
    WebhookServerConfig,
 )
 def _make_server(event_bus: EventBus, routes: list[WebhookRoute] | None = None):
    """Helper to create a WebhookServer with port=0 for OS-assigned port."""
    config = WebhookServerConfig(host="127.0.0.1", port=0)
    server = WebhookServer(event_bus, config)
    for route in routes or []:
        server.add_route(route)
    return server
 def _base_url(server: WebhookServer) -> str:
    """Get the base URL for a running server."""
    return f"http://127.0.0.1:{server.port}"
 class TestWebhookServerLifecycle:
    """Tests for server start/stop."""
    @pytest.mark.asyncio
    async def test_start_stop(self):
        bus = EventBus()
        server = _make_server(
            bus,
            [
                WebhookRoute(source_id="test", path="/webhooks/test", methods=["POST"]),
            ],
        )
        await server.start()
        assert server.is_running
        assert server.port is not None
        await server.stop()
        assert not server.is_running
        assert server.port is None
    @pytest.mark.asyncio
    async def test_no_routes_skips_start(self):
        bus = EventBus()
        server = _make_server(bus)  # no routes
        await server.start()
        assert not server.is_running
    @pytest.mark.asyncio
    async def test_stop_when_not_started(self):
        bus = EventBus()
        server = _make_server(bus)
        # Should be a no-op, not raise
        await server.stop()
        assert not server.is_running
 class TestWebhookEventPublishing:
    """Tests for HTTP request -> EventBus event publishing."""
    @pytest.mark.asyncio
    async def test_post_publishes_webhook_received(self):
        bus = EventBus()
        received = []
        async def handler(event):
            received.append(event)
        bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
        server = _make_server(
            bus,
            [
                WebhookRoute(source_id="gh", path="/webhooks/github", methods=["POST"]),
            ],
        )
        await server.start()
        try:
            async with aiohttp.ClientSession() as session:
                async with session.post(
                    f"{_base_url(server)}/webhooks/github",
                    json={"action": "opened", "number": 42},
                ) as resp:
                    assert resp.status == 202
                    body = await resp.json()
                    assert body["status"] == "accepted"
            # Give event bus time to dispatch
            await asyncio.sleep(0.05)
            assert len(received) == 1
            event = received[0]
            assert event.type == EventType.WEBHOOK_RECEIVED
            assert event.stream_id == "gh"
            assert event.data["path"] == "/webhooks/github"
            assert event.data["method"] == "POST"
            assert event.data["payload"] == {"action": "opened", "number": 42}
            assert isinstance(event.data["headers"], dict)
            assert event.data["query_params"] == {}
        finally:
            await server.stop()
    @pytest.mark.asyncio
    async def test_query_params_included(self):
        bus = EventBus()
        received = []
        async def handler(event):
            received.append(event)
        bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
        server = _make_server(
            bus,
            [
                WebhookRoute(source_id="hook", path="/webhooks/hook", methods=["POST"]),
            ],
        )
        await server.start()
        try:
            async with aiohttp.ClientSession() as session:
                async with session.post(
                    f"{_base_url(server)}/webhooks/hook?source=test&v=2",
                    json={"data": "hello"},
                ) as resp:
                    assert resp.status == 202
            await asyncio.sleep(0.05)
            assert len(received) == 1
            assert received[0].data["query_params"] == {"source": "test", "v": "2"}
        finally:
            await server.stop()
    @pytest.mark.asyncio
    async def test_non_json_body(self):
        bus = EventBus()
        received = []
        async def handler(event):
            received.append(event)
        bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
        server = _make_server(
            bus,
            [
                WebhookRoute(source_id="raw", path="/webhooks/raw", methods=["POST"]),
            ],
        )
        await server.start()
        try:
            async with aiohttp.ClientSession() as session:
                async with session.post(
                    f"{_base_url(server)}/webhooks/raw",
                    data=b"plain text body",
                    headers={"Content-Type": "text/plain"},
                ) as resp:
                    assert resp.status == 202
            await asyncio.sleep(0.05)
            assert len(received) == 1
            assert received[0].data["payload"] == {"raw_body": "plain text body"}
        finally:
            await server.stop()
    @pytest.mark.asyncio
    async def test_empty_body(self):
        bus = EventBus()
        received = []
        async def handler(event):
            received.append(event)
        bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
        server = _make_server(
            bus,
            [
                WebhookRoute(source_id="empty", path="/webhooks/empty", methods=["POST"]),
            ],
        )
        await server.start()
        try:
            async with aiohttp.ClientSession() as session:
                async with session.post(f"{_base_url(server)}/webhooks/empty") as resp:
                    assert resp.status == 202
            await asyncio.sleep(0.05)
            assert len(received) == 1
            assert received[0].data["payload"] == {}
        finally:
            await server.stop()
    @pytest.mark.asyncio
    async def test_multiple_routes(self):
        bus = EventBus()
        received = []
        async def handler(event):
            received.append(event)
        bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
        server = _make_server(
            bus,
            [
                WebhookRoute(source_id="a", path="/webhooks/a", methods=["POST"]),
                WebhookRoute(source_id="b", path="/webhooks/b", methods=["POST"]),
            ],
        )
        await server.start()
        try:
            async with aiohttp.ClientSession() as session:
                async with session.post(
                    f"{_base_url(server)}/webhooks/a", json={"from": "a"}
                ) as resp:
                    assert resp.status == 202
                async with session.post(
                    f"{_base_url(server)}/webhooks/b", json={"from": "b"}
                ) as resp:
                    assert resp.status == 202
            await asyncio.sleep(0.05)
            assert len(received) == 2
            stream_ids = {e.stream_id for e in received}
            assert stream_ids == {"a", "b"}
        finally:
            await server.stop()
    @pytest.mark.asyncio
    async def test_filter_stream_subscription(self):
        """Subscribers can filter by stream_id (source_id)."""
        bus = EventBus()
        a_events = []
        b_events = []
        async def handle_a(event):
            a_events.append(event)
        async def handle_b(event):
            b_events.append(event)
        bus.subscribe([EventType.WEBHOOK_RECEIVED], handle_a, filter_stream="a")
        bus.subscribe([EventType.WEBHOOK_RECEIVED], handle_b, filter_stream="b")
        server = _make_server(
            bus,
            [
                WebhookRoute(source_id="a", path="/webhooks/a", methods=["POST"]),
                WebhookRoute(source_id="b", path="/webhooks/b", methods=["POST"]),
            ],
        )
        await server.start()
        try:
            async with aiohttp.ClientSession() as session:
                await session.post(f"{_base_url(server)}/webhooks/a", json={"x": 1})
                await session.post(f"{_base_url(server)}/webhooks/b", json={"x": 2})
            await asyncio.sleep(0.05)
            assert len(a_events) == 1
            assert a_events[0].data["payload"] == {"x": 1}
            assert len(b_events) == 1
            assert b_events[0].data["payload"] == {"x": 2}
        finally:
            await server.stop()
 class TestHMACVerification:
    """Tests for HMAC-SHA256 signature verification."""
    @pytest.mark.asyncio
    async def test_valid_signature_accepted(self):
        bus = EventBus()
        received = []
        async def handler(event):
            received.append(event)
        bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
        secret = "test-secret-key"
        server = _make_server(
            bus,
            [
                WebhookRoute(
                    source_id="secure",
                    path="/webhooks/secure",
                    methods=["POST"],
                    secret=secret,
                ),
            ],
        )
        await server.start()
        try:
            body = json.dumps({"event": "push"}).encode()
            sig = hmac_mod.new(secret.encode(), body, hashlib.sha256).hexdigest()
            async with aiohttp.ClientSession() as session:
                async with session.post(
                    f"{_base_url(server)}/webhooks/secure",
                    data=body,
                    headers={
                        "Content-Type": "application/json",
                        "X-Hub-Signature-256": f"sha256={sig}",
                    },
                ) as resp:
                    assert resp.status == 202
            await asyncio.sleep(0.05)
            assert len(received) == 1
        finally:
            await server.stop()
    @pytest.mark.asyncio
    async def test_invalid_signature_rejected(self):
        bus = EventBus()
        received = []
        async def handler(event):
            received.append(event)
        bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
        server = _make_server(
            bus,
            [
                WebhookRoute(
                    source_id="secure",
                    path="/webhooks/secure",
                    methods=["POST"],
                    secret="real-secret",
                ),
            ],
        )
        await server.start()
        try:
            async with aiohttp.ClientSession() as session:
                async with session.post(
                    f"{_base_url(server)}/webhooks/secure",
                    json={"event": "push"},
                    headers={"X-Hub-Signature-256": "sha256=invalidsignature"},
                ) as resp:
                    assert resp.status == 401
            await asyncio.sleep(0.05)
            assert len(received) == 0  # No event published
        finally:
            await server.stop()
    @pytest.mark.asyncio
    async def test_missing_signature_rejected(self):
        bus = EventBus()
        received = []
        async def handler(event):
            received.append(event)
        bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
        server = _make_server(
            bus,
            [
                WebhookRoute(
                    source_id="secure",
                    path="/webhooks/secure",
                    methods=["POST"],
                    secret="my-secret",
                ),
            ],
        )
        await server.start()
        try:
            async with aiohttp.ClientSession() as session:
                # No X-Hub-Signature-256 header
                async with session.post(
                    f"{_base_url(server)}/webhooks/secure",
                    json={"event": "push"},
                ) as resp:
                    assert resp.status == 401
            await asyncio.sleep(0.05)
            assert len(received) == 0
        finally:
            await server.stop()
    @pytest.mark.asyncio
    async def test_no_secret_skips_verification(self):
        """Routes without a secret accept any request."""
        bus = EventBus()
        received = []
        async def handler(event):
            received.append(event)
        bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
        server = _make_server(
            bus,
            [
                WebhookRoute(
                    source_id="open",
                    path="/webhooks/open",
                    methods=["POST"],
                    secret=None,
                ),
            ],
        )
        await server.start()
        try:
            async with aiohttp.ClientSession() as session:
                async with session.post(
                    f"{_base_url(server)}/webhooks/open",
                    json={"data": "test"},
                ) as resp:
                    assert resp.status == 202
            await asyncio.sleep(0.05)
            assert len(received) == 1
        finally:
            await server.stop()
 class TestEventDrivenEntryPoints:
    """Tests for event-driven entry points wired through AgentRuntime."""
    def _make_graph_and_goal(self):
        """Minimal graph + goal for testing entry point triggering."""
        from framework.graph import Goal
        from framework.graph.edge import GraphSpec
        from framework.graph.goal import SuccessCriterion
        from framework.graph.node import NodeSpec
        nodes = [
            NodeSpec(
                id="process-event",
                name="Process Event",
                description="Process incoming event",
                node_type="event_loop",
                input_keys=["event"],
                output_keys=["result"],
            ),
        ]
        graph = GraphSpec(
            id="test-graph",
            goal_id="test-goal",
            version="1.0.0",
            entry_node="process-event",
            entry_points={"start": "process-event"},
            terminal_nodes=[],
            pause_nodes=[],
            nodes=nodes,
            edges=[],
        )
        goal = Goal(
            id="test-goal",
            name="Test Goal",
            description="Test",
            success_criteria=[
                SuccessCriterion(
                    id="sc-1",
                    description="Done",
                    metric="done",
                    target="yes",
                    weight=1.0,
                ),
            ],
        )
        return graph, goal
    @pytest.mark.asyncio
    async def test_event_entry_point_subscribes_to_bus(self):
        """Entry point with trigger_type='event' subscribes and triggers on matching events."""
        graph, goal = self._make_graph_and_goal()
        config = AgentRuntimeConfig(
            webhook_host="127.0.0.1",
            webhook_port=0,
            webhook_routes=[
                {"source_id": "gh", "path": "/webhooks/github"},
            ],
        )
        with tempfile.TemporaryDirectory() as tmpdir:
            runtime = AgentRuntime(
                graph=graph,
                goal=goal,
                storage_path=Path(tmpdir),
                config=config,
            )
            runtime.register_entry_point(
                EntryPointSpec(
                    id="gh-handler",
                    name="GitHub Handler",
                    entry_node="process-event",
                    trigger_type="event",
                    trigger_config={
                        "event_types": ["webhook_received"],
                        "filter_stream": "gh",
                    },
                )
            )
            trigger_calls = []
            async def mock_trigger(ep_id, data, **kwargs):
                trigger_calls.append((ep_id, data))
            with patch.object(runtime, "trigger", side_effect=mock_trigger):
                await runtime.start()
                try:
                    assert runtime.webhook_server is not None
                    assert runtime.webhook_server.is_running
                    port = runtime.webhook_server.port
                    async with aiohttp.ClientSession() as session:
                        async with session.post(
                            f"http://127.0.0.1:{port}/webhooks/github",
                            json={"action": "push", "ref": "main"},
                        ) as resp:
                            assert resp.status == 202
                    await asyncio.sleep(0.1)
                    assert len(trigger_calls) == 1
                    ep_id, data = trigger_calls[0]
                    assert ep_id == "gh-handler"
                    assert "event" in data
                    assert data["event"]["type"] == "webhook_received"
                    assert data["event"]["stream_id"] == "gh"
                    assert data["event"]["data"]["payload"] == {
                        "action": "push",
                        "ref": "main",
                    }
                finally:
                    await runtime.stop()
            assert runtime.webhook_server is None
    @pytest.mark.asyncio
    async def test_event_entry_point_filter_stream(self):
        """Entry point only triggers for matching stream_id (source_id)."""
        graph, goal = self._make_graph_and_goal()
        config = AgentRuntimeConfig(
            webhook_routes=[
                {"source_id": "github", "path": "/webhooks/github"},
                {"source_id": "stripe", "path": "/webhooks/stripe"},
            ],
            webhook_port=0,
        )
        with tempfile.TemporaryDirectory() as tmpdir:
            runtime = AgentRuntime(
                graph=graph,
                goal=goal,
                storage_path=Path(tmpdir),
                config=config,
            )
            runtime.register_entry_point(
                EntryPointSpec(
                    id="gh-only",
                    name="GitHub Only",
                    entry_node="process-event",
                    trigger_type="event",
                    trigger_config={
                        "event_types": ["webhook_received"],
                        "filter_stream": "github",
                    },
                )
            )
            trigger_calls = []
            async def mock_trigger(ep_id, data, **kwargs):
                trigger_calls.append((ep_id, data))
            with patch.object(runtime, "trigger", side_effect=mock_trigger):
                await runtime.start()
                try:
                    port = runtime.webhook_server.port
                    async with aiohttp.ClientSession() as session:
                        # POST to stripe — should NOT trigger
                        await session.post(
                            f"http://127.0.0.1:{port}/webhooks/stripe",
                            json={"type": "payment"},
                        )
                        # POST to github — should trigger
                        await session.post(
                            f"http://127.0.0.1:{port}/webhooks/github",
                            json={"action": "opened"},
                        )
                    await asyncio.sleep(0.1)
                    assert len(trigger_calls) == 1
                    assert trigger_calls[0][0] == "gh-only"
                finally:
                    await runtime.stop()
    @pytest.mark.asyncio
    async def test_no_webhook_routes_skips_server(self):
        """Runtime without webhook_routes does not start a webhook server."""
        graph, goal = self._make_graph_and_goal()
        with tempfile.TemporaryDirectory() as tmpdir:
            runtime = AgentRuntime(
                graph=graph,
                goal=goal,
                storage_path=Path(tmpdir),
            )
            runtime.register_entry_point(
                EntryPointSpec(
                    id="manual",
                    name="Manual",
                    entry_node="process-event",
                    trigger_type="manual",
                )
            )
            await runtime.start()
            try:
                assert runtime.webhook_server is None
            finally:
                await runtime.stop()
    @pytest.mark.asyncio
    async def test_event_entry_point_custom_event(self):
        """Entry point can subscribe to CUSTOM events, not just webhooks."""
        graph, goal = self._make_graph_and_goal()
        with tempfile.TemporaryDirectory() as tmpdir:
            runtime = AgentRuntime(
                graph=graph,
                goal=goal,
                storage_path=Path(tmpdir),
            )
            runtime.register_entry_point(
                EntryPointSpec(
                    id="custom-handler",
                    name="Custom Handler",
                    entry_node="process-event",
                    trigger_type="event",
                    trigger_config={
                        "event_types": ["custom"],
                    },
                )
            )
            trigger_calls = []
            async def mock_trigger(ep_id, data, **kwargs):
                trigger_calls.append((ep_id, data))
            with patch.object(runtime, "trigger", side_effect=mock_trigger):
                await runtime.start()
                try:
                    await runtime.event_bus.publish(
                        AgentEvent(
                            type=EventType.CUSTOM,
                            stream_id="some-source",
                            data={"key": "value"},
                        )
                    )
                    await asyncio.sleep(0.1)
                    assert len(trigger_calls) == 1
                    assert trigger_calls[0][0] == "custom-handler"
                    assert trigger_calls[0][1]["event"]["type"] == "custom"
                    assert trigger_calls[0][1]["event"]["data"]["key"] == "value"
                finally:
                    await runtime.stop()
@@ -0,0 +1,192 @@
 """Declarative agent configuration schema.
 Allows defining agents via JSON/YAML config files instead of Python modules.
 The ``AgentConfig`` model is the top-level schema loaded from ``agent.json``.
 The runner detects this format by checking for a ``name`` key at the top level.
 Template variables
 ------------------
 System prompts and identity_prompt support ``{{variable_name}}`` placeholders.
 These are resolved at load time from ``AgentConfig.variables``.
 """
 from __future__ import annotations
 from pydantic import BaseModel, Field
 class ToolAccessConfig(BaseModel):
    """Declarative tool access policy.
    Controls which tools a node/agent has access to.
    * ``all``      -- every tool from the registry.
    * ``explicit`` -- only tools listed in ``allowed`` (default; empty = zero tools).
    * ``none``     -- no tools at all.
    """
    policy: str = Field(
        default="explicit",
        description="One of: 'all', 'explicit', 'none'.",
    )
    allowed: list[str] = Field(
        default_factory=list,
        description="Tool names when policy='explicit'.",
    )
    denied: list[str] = Field(
        default_factory=list,
        description="Tool names to deny (applied after allowed).",
    )
 class NodeConfig(BaseModel):
    """Declarative node definition."""
    id: str
    name: str | None = None
    description: str | None = None
    node_type: str = Field(
        default="event_loop",
        description="event_loop",
    )
    system_prompt: str | None = None
    tools: ToolAccessConfig = Field(default_factory=ToolAccessConfig)
    model: str | None = None
    input_keys: list[str] = Field(default_factory=list)
    output_keys: list[str] = Field(default_factory=list)
    nullable_output_keys: list[str] = Field(default_factory=list)
    max_iterations: int = 30
    max_node_visits: int = 1
    client_facing: bool = False
    success_criteria: str | None = None
    failure_criteria: str | None = None
    skip_judge: bool = False
    max_retries: int | None = None
 class EdgeConfig(BaseModel):
    """Declarative edge definition."""
    from_node: str = Field(description="Source node ID.")
    to_node: str = Field(description="Target node ID.")
    condition: str = Field(
        default="on_success",
        description="always | on_success | on_failure | conditional | llm_decide",
    )
    condition_expr: str | None = None
    input_mapping: dict[str, str] = Field(default_factory=dict)
    priority: int = 1
 class GoalConfig(BaseModel):
    """Simplified goal definition for declarative config."""
    description: str
    success_criteria: list[str] = Field(default_factory=list)
    constraints: list[str] = Field(default_factory=list)
 class EntryPointConfig(BaseModel):
    """Entry point configuration."""
    id: str = "default"
    name: str = "Default"
    entry_node: str | None = None  # defaults to AgentConfig.entry_node
    trigger_type: str = Field(
        default="manual",
        description="manual | scheduled | timer",
    )
    trigger_config: dict = Field(default_factory=dict)
    isolation_level: str = "shared"
    max_concurrent: int | None = None
 class MCPServerRef(BaseModel):
    """Reference to an MCP server to connect for this agent."""
    name: str
    config: dict | None = None
 class MetadataConfig(BaseModel):
    """Agent metadata for display / intro messages."""
    intro_message: str = ""
 class AgentConfig(BaseModel):
    """Top-level declarative agent configuration.
    Load from ``agent.json`` and pass to
    :func:`framework.runner.runner.load_agent_config` to build the
    ``GraphSpec`` + ``Goal`` pair.
    Example (YAML)::
        name: lead-enrichment-agent
        version: 1.0.0
        variables:
          spreadsheet_id: "1ZVx..."
          sheet_name: "contacts"
        goal:
          description: "Enrich leads in Google Sheets"
          success_criteria:
            - "All unprocessed leads enriched"
          constraints:
            - "Browser-only research"
        identity_prompt: |
          You are the Lead Enrichment Agent...
        nodes:
          - id: start
            tools: {policy: explicit, allowed: [google_sheets_get_values]}
            system_prompt: |
              Spreadsheet ID: {{spreadsheet_id}}
              ...
    """
    name: str
    version: str = "1.0.0"
    description: str | None = None
    metadata: MetadataConfig = Field(default_factory=MetadataConfig)
    # Template variables -- substituted into prompts via {{var_name}}
    variables: dict[str, str] = Field(default_factory=dict)
    # Goal
    goal: GoalConfig
    # Graph structure
    nodes: list[NodeConfig]
    edges: list[EdgeConfig]
    entry_node: str
    terminal_nodes: list[str] = Field(default_factory=list)
    pause_nodes: list[str] = Field(default_factory=list)
    # Entry points (if omitted, a single "default" manual entry is created)
    entry_points: list[EntryPointConfig] = Field(default_factory=list)
    # Agent-level tool defaults (nodes inherit unless they override)
    tools: ToolAccessConfig = Field(default_factory=ToolAccessConfig)
    mcp_servers: list[MCPServerRef] = Field(default_factory=list)
    # LLM / execution
    model: str | None = None
    max_tokens: int = 4096
    conversation_mode: str = "continuous"
    identity_prompt: str = ""
    loop_config: dict = Field(
        default_factory=lambda: {
            "max_iterations": 100,
            "max_tool_calls_per_turn": 30,
            "max_context_tokens": 32000,
        },
    )
    # Pipeline overrides (per-agent, merged with global config)
    pipeline: dict = Field(
        default_factory=dict,
        description="Per-agent pipeline stage overrides. Same format as global pipeline config.",
    )
    # Resource limits
    max_cost_per_run: float | None = None
@@ -12,7 +12,7 @@ from typing import TYPE_CHECKING, Any
 from pydantic import AliasChoices, BaseModel, Field, computed_field
 if TYPE_CHECKING:
-    from framework.graph.executor import ExecutionResult
+    from framework.orchestrator.orchestrator import ExecutionResult
    from framework.schemas.run import Run
@@ -28,8 +28,11 @@ def _get_allowed_agent_roots() -> tuple[Path, ...]:
    """
    global _ALLOWED_AGENT_ROOTS
    if _ALLOWED_AGENT_ROOTS is None:
        from framework.config import COLONIES_DIR
        _ALLOWED_AGENT_ROOTS = (
-            (_REPO_ROOT / "exports").resolve(),
+            COLONIES_DIR.resolve(),                     # ~/.hive/colonies/
            (_REPO_ROOT / "exports").resolve(),         # compat fallback
            (_REPO_ROOT / "examples").resolve(),
            (Path.home() / ".hive" / "agents").resolve(),
        )
@@ -53,7 +56,8 @@ def validate_agent_path(agent_path: str | Path) -> Path:
        if resolved.is_relative_to(root) and resolved != root:
            return resolved
    raise ValueError(
-        "agent_path must be inside an allowed directory (exports/, examples/, or ~/.hive/agents/)"
+        "agent_path must be inside an allowed directory "
        "(~/.hive/colonies/, exports/, examples/, or ~/.hive/agents/)"
    )
--- a/Show More
+++ b/Show More