diff --git a/.gitignore b/.gitignore
index 54798a34..dccd6d8e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -70,6 +70,8 @@ tmp/
temp/
exports/*
+exports.old*
+artifacts/*
.claude/settings.local.json
diff --git a/core/framework/__init__.py b/core/framework/__init__.py
index 27909db6..438323c5 100644
--- a/core/framework/__init__.py
+++ b/core/framework/__init__.py
@@ -1,71 +1,23 @@
-"""
-Aden Hive Framework: A goal-driven agent runtime optimized for Builder observability.
+"""Hive Agent Framework.
-The runtime is designed around DECISIONS, not just actions. Every significant
-choice the agent makes is captured with:
-- What it was trying to do (intent)
-- What options it considered
-- What it chose and why
-- What happened as a result
-- Whether that was good or bad (evaluated post-hoc)
-
-This gives the Builder LLM the information it needs to improve agent behavior.
-
-## Testing Framework
-
-The framework includes a Goal-Based Testing system (Goal → Agent → Eval):
-- Generate tests from Goal success_criteria and constraints
-- Mandatory user approval before tests are stored
-- Parallel test execution with error categorization
-- Debug tools with fix suggestions
-
-See `framework.testing` for details.
+Core classes:
+ AgentHost -- hosts agents, manages entry points and pipeline
+ Orchestrator -- routes between nodes in a graph
+ AgentLoop -- the LLM + tool execution loop (one per node)
+ AgentLoader -- loads agent.json from disk, builds pipeline
+ DecisionTracker -- records decisions for post-hoc analysis
"""
-from framework.llm import LLMProvider
-
-try:
- from framework.llm import AnthropicProvider # noqa: F401
-except ImportError:
- pass
-from framework.runner import AgentRunner
-from framework.runtime.core import Runtime
-from framework.schemas.decision import Decision, DecisionEvaluation, Option, Outcome
-from framework.schemas.run import Problem, Run, RunSummary
-
-# Testing framework
-from framework.testing import (
- ApprovalStatus,
- DebugTool,
- ErrorCategory,
- Test,
- TestResult,
- TestStorage,
- TestSuiteResult,
-)
+from framework.agent_loop import AgentLoop
+from framework.host import AgentHost
+from framework.loader import AgentLoader
+from framework.orchestrator import Orchestrator
+from framework.tracker import DecisionTracker
__all__ = [
- # Schemas
- "Decision",
- "Option",
- "Outcome",
- "DecisionEvaluation",
- "Run",
- "RunSummary",
- "Problem",
- # Runtime
- "Runtime",
- # LLM
- "LLMProvider",
- "AnthropicProvider",
- # Runner
- "AgentRunner",
- # Testing
- "Test",
- "TestResult",
- "TestSuiteResult",
- "TestStorage",
- "ApprovalStatus",
- "ErrorCategory",
- "DebugTool",
+ "AgentHost",
+ "AgentLoader",
+ "AgentLoop",
+ "DecisionTracker",
+ "Orchestrator",
]
diff --git a/core/framework/agent_loop/__init__.py b/core/framework/agent_loop/__init__.py
new file mode 100644
index 00000000..845428b1
--- /dev/null
+++ b/core/framework/agent_loop/__init__.py
@@ -0,0 +1,32 @@
+"""Agent loop -- the core agent execution primitive."""
+
+from framework.agent_loop.conversation import ( # noqa: F401
+ ConversationStore,
+ Message,
+ NodeConversation,
+)
+
+# Lazy import to avoid circular dependency with graph/event_loop/
+# (graph/event_loop/* imports framework.graph.conversation which is a shim
+# pointing here, which would trigger agent_loop.py loading, which imports
+# graph/event_loop/* again)
+
+
+def __getattr__(name: str):
+ if name in ("AgentLoop", "JudgeProtocol", "JudgeVerdict", "LoopConfig", "OutputAccumulator"):
+ from framework.agent_loop.agent_loop import (
+ AgentLoop,
+ JudgeProtocol,
+ JudgeVerdict,
+ LoopConfig,
+ OutputAccumulator,
+ )
+ _exports = {
+ "AgentLoop": AgentLoop,
+ "JudgeProtocol": JudgeProtocol,
+ "JudgeVerdict": JudgeVerdict,
+ "LoopConfig": LoopConfig,
+ "OutputAccumulator": OutputAccumulator,
+ }
+ return _exports[name]
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
diff --git a/core/framework/graph/event_loop_node.py b/core/framework/agent_loop/agent_loop.py
similarity index 87%
rename from core/framework/graph/event_loop_node.py
rename to core/framework/agent_loop/agent_loop.py
index 61f447af..39f47dd4 100644
--- a/core/framework/graph/event_loop_node.py
+++ b/core/framework/agent_loop/agent_loop.py
@@ -21,16 +21,16 @@ from collections.abc import Awaitable, Callable
from datetime import UTC, datetime
from typing import Any
-from framework.graph.conversation import ConversationStore, NodeConversation
-from framework.graph.event_loop import types as event_loop_types
-from framework.graph.event_loop.compaction import (
+from framework.agent_loop.conversation import ConversationStore, NodeConversation
+from framework.agent_loop.internals import types as event_loop_types
+from framework.agent_loop.internals.compaction import (
build_emergency_summary,
build_llm_compaction_prompt,
compact,
format_messages_for_summary,
llm_compact,
)
-from framework.graph.event_loop.cursor_persistence import (
+from framework.agent_loop.internals.cursor_persistence import (
RestoredState,
check_pause,
drain_injection_queue,
@@ -38,7 +38,7 @@ from framework.graph.event_loop.cursor_persistence import (
restore,
write_cursor,
)
-from framework.graph.event_loop.event_publishing import (
+from framework.agent_loop.internals.event_publishing import (
generate_action_plan,
log_skip_judge,
publish_context_usage,
@@ -54,27 +54,24 @@ from framework.graph.event_loop.event_publishing import (
publish_tool_started,
run_hooks,
)
-from framework.graph.event_loop.judge_pipeline import (
+from framework.agent_loop.internals.judge_pipeline import (
SubagentJudge as SharedSubagentJudge,
judge_turn,
)
-from framework.graph.event_loop.stall_detector import (
+from framework.agent_loop.internals.stall_detector import (
fingerprint_tool_calls,
is_stalled,
is_tool_doom_loop,
ngram_similarity,
)
-from framework.graph.event_loop.subagent_executor import execute_subagent
-from framework.graph.event_loop.synthetic_tools import (
+from framework.agent_loop.internals.synthetic_tools import (
build_ask_user_multiple_tool,
build_ask_user_tool,
- build_delegate_tool,
build_escalate_tool,
- build_report_to_parent_tool,
build_set_output_tool,
handle_set_output,
)
-from framework.graph.event_loop.tool_result_handler import (
+from framework.agent_loop.internals.tool_result_handler import (
build_json_preview,
execute_tool,
extract_json_metadata,
@@ -82,12 +79,12 @@ from framework.graph.event_loop.tool_result_handler import (
restore_spill_counter,
truncate_tool_result,
)
-from framework.graph.event_loop.types import (
+from framework.agent_loop.internals.types import (
JudgeProtocol,
JudgeVerdict,
TriggerEvent,
)
-from framework.graph.node import NodeContext, NodeProtocol, NodeResult
+from framework.orchestrator.node import NodeContext, NodeProtocol, NodeResult
from framework.llm.capabilities import supports_image_tool_results
from framework.llm.provider import Tool, ToolResult, ToolUse
from framework.llm.stream_events import (
@@ -96,8 +93,8 @@ from framework.llm.stream_events import (
TextDeltaEvent,
ToolCallEvent,
)
-from framework.runtime.event_bus import EventBus
-from framework.runtime.llm_debug_logger import log_llm_turn
+from framework.host.event_bus import EventBus
+from framework.tracker.llm_debug_logger import log_llm_turn
logger = logging.getLogger(__name__)
@@ -163,43 +160,9 @@ def _is_context_too_large_error(exc: BaseException) -> bool:
# ---------------------------------------------------------------------------
-# Escalation receiver (temporary routing target for subagent → user input)
# ---------------------------------------------------------------------------
-class _EscalationReceiver:
- """Temporary receiver registered in node_registry for subagent escalation routing.
-
- When a subagent calls ``report_to_parent(wait_for_response=True)``, the callback
- creates one of these, registers it under a unique escalation ID in the executor's
- ``node_registry``, and awaits ``wait()``. The TUI / runner calls
- ``inject_input(escalation_id, content)`` which the ``ExecutionStream`` routes here
- via ``inject_event()`` — matching the same ``hasattr(node, "inject_event")`` check
- used for regular ``EventLoopNode`` instances.
- """
-
- def __init__(self) -> None:
- self._event = asyncio.Event()
- self._response: str | None = None
- self._awaiting_input = True # So inject_message() can prefer us
-
- async def inject_event(
- self,
- content: str,
- *,
- is_client_input: bool = False,
- image_content: list[dict] | None = None,
- ) -> None:
- """Called by ExecutionStream.inject_input() when the user responds."""
- self._response = content
- self._event.set()
-
- async def wait(self) -> str | None:
- """Block until inject_event() delivers the user's response."""
- await self._event.wait()
- return self._response
-
-
# ---------------------------------------------------------------------------
# Judge protocol (simple 3-action interface for event loop evaluation)
# ---------------------------------------------------------------------------
@@ -224,7 +187,7 @@ OutputAccumulator = event_loop_types.OutputAccumulator
# ---------------------------------------------------------------------------
-class EventLoopNode(NodeProtocol):
+class AgentLoop(NodeProtocol):
"""Multi-turn LLM streaming loop with tool execution and judge evaluation.
Lifecycle:
@@ -284,9 +247,6 @@ class EventLoopNode(NodeProtocol):
# Monotonic counter for spillover file naming (web_search_1.txt, etc.)
self._spill_counter: int = 0
# Subagent mark_complete: when True, _evaluate returns ACCEPT immediately
- self._mark_complete_flag = False
- # Counter for subagent instances (1, 2, 3, ...)
- self._subagent_instance_counter: dict[str, int] = {}
def validate_input(self, ctx: NodeContext) -> list[str]:
"""Validate hard requirements only.
@@ -307,7 +267,7 @@ class EventLoopNode(NodeProtocol):
async def execute(self, ctx: NodeContext) -> NodeResult:
"""Run the event loop."""
logger.debug(
- "[EventLoopNode.execute] Starting execution for node=%s, stream=%s",
+ "[AgentLoop.execute] Starting execution for node=%s, stream=%s",
ctx.node_id,
ctx.stream_id,
)
@@ -320,7 +280,7 @@ class EventLoopNode(NodeProtocol):
# Store skill dirs for AS-9 file-read interception in _execute_tool
self._skill_dirs: list[str] = ctx.skill_dirs
logger.debug(
- "[EventLoopNode.execute] node_id=%s, execution_id=%s, max_iterations=%d",
+ "[AgentLoop.execute] node_id=%s, execution_id=%s, max_iterations=%d",
node_id,
execution_id,
self._config.max_iterations,
@@ -402,7 +362,7 @@ class EventLoopNode(NodeProtocol):
# execution preamble and node-type preamble. The stored
# prompt may be stale after code changes or when runtime-
# injected context (e.g. worker identity) has changed.
- from framework.graph.prompting import build_system_prompt_for_node_context
+ from framework.orchestrator.prompting import build_system_prompt_for_node_context
_current_prompt = build_system_prompt_for_node_context(ctx)
if conversation.system_prompt != _current_prompt:
@@ -425,7 +385,7 @@ class EventLoopNode(NodeProtocol):
await self._conversation_store.clear()
# Fresh conversation: either isolated mode or first node in continuous mode.
- from framework.graph.prompting import build_system_prompt_for_node_context
+ from framework.orchestrator.prompting import build_system_prompt_for_node_context
system_prompt = build_system_prompt_for_node_context(ctx)
@@ -484,7 +444,7 @@ class EventLoopNode(NodeProtocol):
# 2a. Guard: ensure at least one non-system message exists.
# A restored conversation may have 0 messages if phase_id filtering
# removes them all, or if a prior run stored metadata without messages
- # (e.g. subagent that failed before the first LLM call).
+ # (e.g. node that failed before the first LLM call).
if conversation.message_count == 0:
initial_message = self._build_initial_message(ctx)
if initial_message:
@@ -502,37 +462,10 @@ class EventLoopNode(NodeProtocol):
tools.append(self._build_ask_user_tool())
if stream_id == "queen":
tools.append(self._build_ask_user_multiple_tool())
- # Workers/subagents can escalate blockers to the queen.
+ # Workers can escalate blockers to the queen.
if stream_id not in ("queen", "judge"):
tools.append(self._build_escalate_tool())
- # Add delegate_to_sub_agent tool if:
- # - Node has sub_agents defined
- # - We are NOT in subagent mode (prevents nested delegation)
- if not ctx.is_subagent_mode:
- sub_agents = getattr(ctx.node_spec, "sub_agents", None) or []
- if sub_agents:
- delegate_tool = self._build_delegate_tool(sub_agents, ctx.node_registry)
- if delegate_tool:
- tools.append(delegate_tool)
- logger.info(
- "[%s] delegate_to_sub_agent injected (sub_agents=%s)",
- node_id,
- sub_agents,
- )
- else:
- logger.error(
- "[%s] _build_delegate_tool returned None for sub_agents=%s",
- node_id,
- sub_agents,
- )
- else:
- logger.debug("[%s] Skipped delegate tool (is_subagent_mode=True)", node_id)
-
- # Add report_to_parent tool for sub-agents with a report callback
- if ctx.is_subagent_mode and ctx.report_callback is not None:
- tools.append(self._build_report_to_parent_tool())
-
logger.info(
"[%s] Tools available (%d): %s | direct_user_io=%s | judge=%s",
node_id,
@@ -565,11 +498,11 @@ class EventLoopNode(NodeProtocol):
# 6. Main loop
logger.debug(
- "[EventLoopNode.execute] Entering main loop, start_iteration=%d", start_iteration
+ "[AgentLoop.execute] Entering main loop, start_iteration=%d", start_iteration
)
for iteration in range(start_iteration, self._config.max_iterations):
iter_start = time.time()
- logger.debug("[EventLoopNode.execute] iteration=%d starting", iteration)
+ logger.debug("[AgentLoop.execute] iteration=%d starting", iteration)
# 6a. Check pause (no current-iteration data yet — only log_node_complete needed)
if await self._check_pause(ctx, conversation, iteration):
@@ -601,18 +534,18 @@ class EventLoopNode(NodeProtocol):
# 6b. Drain injection queue
logger.debug(
- "[EventLoopNode.execute] iteration=%d: draining injection queue...", iteration
+ "[AgentLoop.execute] iteration=%d: draining injection queue...", iteration
)
drained_injections = await self._drain_injection_queue(conversation, ctx)
logger.debug(
- "[EventLoopNode.execute] iteration=%d: drained %d injections",
+ "[AgentLoop.execute] iteration=%d: drained %d injections",
iteration,
drained_injections,
)
# 6b1. Drain trigger queue (framework-level signals)
drained_triggers = await self._drain_trigger_queue(conversation)
logger.debug(
- "[EventLoopNode.execute] iteration=%d: drained %d triggers",
+ "[AgentLoop.execute] iteration=%d: drained %d triggers",
iteration,
drained_triggers,
)
@@ -685,8 +618,6 @@ class EventLoopNode(NodeProtocol):
"ask_user",
"ask_user_multiple",
"escalate",
- "delegate_to_sub_agent",
- "report_to_parent",
}
synthetic = [t for t in tools if t.name in _synthetic_names]
tools.clear()
@@ -696,11 +627,11 @@ class EventLoopNode(NodeProtocol):
# 6b3. Dynamic prompt refresh (phase switching / memory refresh)
if ctx.dynamic_prompt_provider is not None or ctx.dynamic_memory_provider is not None:
if ctx.dynamic_prompt_provider is not None:
- from framework.graph.prompting import stamp_prompt_datetime
+ from framework.orchestrator.prompting import stamp_prompt_datetime
_new_prompt = stamp_prompt_datetime(ctx.dynamic_prompt_provider())
else:
- from framework.graph.prompting import build_system_prompt_for_node_context
+ from framework.orchestrator.prompting import build_system_prompt_for_node_context
_new_prompt = build_system_prompt_for_node_context(ctx)
if _new_prompt != conversation.system_prompt:
@@ -743,7 +674,7 @@ class EventLoopNode(NodeProtocol):
len(conversation.messages),
)
logger.debug(
- "[EventLoopNode.execute] iteration=%d: entering _run_single_turn loop", iteration
+ "[AgentLoop.execute] iteration=%d: entering _run_single_turn loop", iteration
)
_stream_retry_count = 0
_turn_cancelled = False
@@ -752,7 +683,7 @@ class EventLoopNode(NodeProtocol):
while True:
try:
logger.debug(
- "[EventLoopNode.execute] iteration=%d: calling _run_single_turn (retry=%d)",
+ "[AgentLoop.execute] iteration=%d: calling _run_single_turn (retry=%d)",
iteration,
_stream_retry_count,
)
@@ -768,12 +699,12 @@ class EventLoopNode(NodeProtocol):
queen_input_requested,
request_system_prompt,
request_messages,
- reported_to_parent,
+ _,
) = await self._run_single_turn(
ctx, conversation, tools, iteration, accumulator
)
logger.debug(
- "[EventLoopNode.execute] iteration=%d:"
+ "[AgentLoop.execute] iteration=%d:"
" _run_single_turn completed successfully",
iteration,
)
@@ -842,13 +773,13 @@ class EventLoopNode(NodeProtocol):
break # success — exit retry loop
except TurnCancelled:
- logger.debug("[EventLoopNode.execute] iteration=%d: TurnCancelled", iteration)
+ logger.debug("[AgentLoop.execute] iteration=%d: TurnCancelled", iteration)
_turn_cancelled = True
break
except Exception as e:
logger.debug(
- "[EventLoopNode.execute] iteration=%d:"
+ "[AgentLoop.execute] iteration=%d:"
" Exception in _run_single_turn: %s (%s)",
iteration,
type(e).__name__,
@@ -1024,7 +955,7 @@ class EventLoopNode(NodeProtocol):
and not outputs_set
and not user_input_requested
and not queen_input_requested
- and not reported_to_parent
+
)
if truly_empty and accumulator is not None:
missing = self._get_missing_output_keys(
@@ -1276,14 +1207,14 @@ class EventLoopNode(NodeProtocol):
# blocking and resumption.
_is_worker = (
stream_id not in ("queen", "judge")
- and not ctx.is_subagent_mode
+ and not False
and not ctx.supports_direct_user_io
and self._event_bus is not None
)
_worker_no_tool_turn = (
not real_tool_results
and not outputs_set
- and not reported_to_parent
+
and not queen_input_requested
and not user_input_requested
)
@@ -1733,7 +1664,7 @@ class EventLoopNode(NodeProtocol):
# 6i. Judge evaluation
should_judge = (
- ctx.is_subagent_mode # Always evaluate subagents
+ False
or (iteration + 1) % self._config.judge_every_n_turns == 0
or not real_tool_results # no real tool calls = natural stop
)
@@ -1789,7 +1720,7 @@ class EventLoopNode(NodeProtocol):
missing = self._get_missing_output_keys(
accumulator, ctx.node_spec.output_keys, ctx.node_spec.nullable_output_keys
)
- if missing and self._judge is not None and not self._mark_complete_flag:
+ if missing and self._judge is not None :
hint = (
f"Task incomplete. Required outputs not yet produced: {missing}. "
f"Follow your system prompt instructions to complete the work."
@@ -1988,7 +1919,7 @@ class EventLoopNode(NodeProtocol):
image_content: Optional list of OpenAI-style image blocks to attach.
"""
logger.debug(
- "[EventLoopNode.inject_event] content_len=%d,"
+ "[AgentLoop.inject_event] content_len=%d,"
" is_client_input=%s, has_images=%s,"
" queue_size_before=%d",
len(content) if content else 0,
@@ -1998,15 +1929,15 @@ class EventLoopNode(NodeProtocol):
)
try:
await self._injection_queue.put((content, is_client_input, image_content))
- logger.debug("[EventLoopNode.inject_event] Message queued successfully")
+ logger.debug("[AgentLoop.inject_event] Message queued successfully")
except Exception as e:
- logger.exception("[EventLoopNode.inject_event] Failed to queue message: %s", e)
+ logger.exception("[AgentLoop.inject_event] Failed to queue message: %s", e)
raise
try:
self._input_ready.set()
- logger.debug("[EventLoopNode.inject_event] _input_ready.set() called")
+ logger.debug("[AgentLoop.inject_event] _input_ready.set() called")
except Exception as e:
- logger.exception("[EventLoopNode.inject_event] Failed to set _input_ready: %s", e)
+ logger.exception("[AgentLoop.inject_event] Failed to set _input_ready: %s", e)
raise
async def inject_trigger(self, trigger: TriggerEvent) -> None:
@@ -2157,7 +2088,6 @@ class EventLoopNode(NodeProtocol):
ask_user_prompt = ""
ask_user_options: list[str] | None = None
queen_input_requested = False
- reported_to_parent = False
# Accumulate ALL tool calls across inner iterations for L3 logging.
# Unlike real_tool_results (reset each inner iteration), this persists.
logged_tool_calls: list[dict] = []
@@ -2231,16 +2161,28 @@ class EventLoopNode(NodeProtocol):
):
if isinstance(event, TextDeltaEvent):
accumulated_text = event.snapshot
- await self._publish_text_delta(
- stream_id,
- node_id,
- event.content,
- event.snapshot,
- ctx,
- execution_id,
- iteration=iteration,
- inner_turn=inner_turn,
- )
+ # Filter ... blocks from client output.
+ # Content inside think tags is internal reasoning -- only
+ # the text after is shown to the user.
+ _content = event.content
+ if "" in event.snapshot and "" not in event.snapshot:
+ _content = "" # still inside think block
+ elif "" in _content:
+ # End of think block -- emit only text after the tag
+ _content = _content.split("", 1)[-1]
+ elif "" in _content:
+ _content = "" # opening tag in this chunk
+ if _content:
+ await self._publish_text_delta(
+ stream_id,
+ node_id,
+ _content,
+ event.snapshot,
+ ctx,
+ execution_id,
+ iteration=iteration,
+ inner_turn=inner_turn,
+ )
elif isinstance(event, ToolCallEvent):
_tc.append(event)
@@ -2348,10 +2290,27 @@ class EventLoopNode(NodeProtocol):
queen_input_requested,
final_system_prompt,
final_messages,
- reported_to_parent,
+ False,
)
- # Execute tool calls — framework tools (set_output, ask_user)
+ # Priority drain: if user sent a message while the LLM was
+ # streaming, inject it into the conversation NOW -- before tool
+ # execution. The LLM will see it on the next inner turn.
+ if not self._injection_queue.empty():
+ while not self._injection_queue.empty():
+ _inj_content, _inj_client, _inj_images = (
+ self._injection_queue.get_nowait()
+ )
+ if _inj_client:
+ await conversation.add_user_message(_inj_content)
+ logger.info(
+ "[%s] Priority-injected user message mid-turn (%d chars)",
+ node_id, len(_inj_content),
+ )
+ else:
+ await conversation.add_user_message(_inj_content)
+
+ # Execute tool calls -- framework tools (set_output, ask_user)
# run inline; real MCP tools run in parallel.
real_tool_results: list[dict] = []
limit_hit = False
@@ -2361,13 +2320,12 @@ class EventLoopNode(NodeProtocol):
)
# Phase 1: triage — handle framework tools immediately,
- # queue real tools and subagents for parallel execution.
+ # queue real tools for parallel execution.
results_by_id: dict[str, ToolResult] = {}
timing_by_id: dict[
str, dict[str, Any]
] = {} # tool_use_id -> {start_timestamp, duration_s}
pending_real: list[ToolCallEvent] = []
- pending_subagent: list[ToolCallEvent] = []
for tc in tool_calls:
tool_call_count += 1
@@ -2610,76 +2568,6 @@ class EventLoopNode(NodeProtocol):
)
results_by_id[tc.tool_use_id] = result
- elif tc.tool_name == "delegate_to_sub_agent":
- # Guard: in continuous mode the LLM may see delegate
- # calls from a previous node's conversation history and
- # attempt to re-use the tool on a node that doesn't own
- # it. Only accept if the tool was actually offered.
- if not any(t.name == "delegate_to_sub_agent" for t in tools):
- logger.warning(
- "[%s] LLM called delegate_to_sub_agent but tool "
- "was not offered to this node — rejecting",
- node_id,
- )
- result = ToolResult(
- tool_use_id=tc.tool_use_id,
- content=(
- "ERROR: delegate_to_sub_agent is not available "
- "on this node. This tool belongs to a different "
- "node in the workflow."
- ),
- is_error=True,
- )
- results_by_id[tc.tool_use_id] = result
- continue
- # --- Framework-level subagent delegation ---
- # Queue for parallel execution in Phase 2
- logger.info(
- "🔄 LLM requesting subagent delegation: agent_id='%s', task='%s'",
- tc.tool_input.get("agent_id", "?"),
- (tc.tool_input.get("task", "")[:100] + "...")
- if len(tc.tool_input.get("task", "")) > 100
- else tc.tool_input.get("task", ""),
- )
- pending_subagent.append(tc)
-
- elif tc.tool_name == "report_to_parent":
- # --- Report from sub-agent to parent (optionally blocking) ---
- reported_to_parent = True
- msg = tc.tool_input.get("message", "")
- data = tc.tool_input.get("data")
- wait = tc.tool_input.get("wait_for_response", False)
- mark_complete = tc.tool_input.get("mark_complete", False)
- response = None
-
- if ctx.report_callback:
- try:
- response = await ctx.report_callback(
- msg,
- data,
- wait_for_response=wait,
- )
- except Exception:
- logger.warning(
- "[%s] report_to_parent callback failed (swallowed)",
- node_id,
- exc_info=True,
- )
-
- if mark_complete:
- self._mark_complete_flag = True
- logger.info(
- "[%s] mark_complete=True — subagent will accept on this iteration",
- node_id,
- )
-
- result = ToolResult(
- tool_use_id=tc.tool_use_id,
- content=response if (wait and response) else "Report sent to parent.",
- is_error=False,
- )
- results_by_id[tc.tool_use_id] = result
-
else:
# --- Real tool: check for truncated args, else queue ---
if "_raw" in tc.tool_input:
@@ -2754,175 +2642,6 @@ class EventLoopNode(NodeProtocol):
result = raw
results_by_id[tc.tool_use_id] = self._truncate_tool_result(result, tc.tool_name)
- # Phase 2b: execute subagent delegations in parallel.
- if pending_subagent:
- _subagent_timeout = self._config.subagent_timeout_seconds
- _inactivity_timeout = self._config.subagent_inactivity_timeout_seconds
-
- async def _timed_subagent(
- _ctx: NodeContext,
- _tc: ToolCallEvent,
- _acc: OutputAccumulator = accumulator,
- _wall_timeout: float = _subagent_timeout,
- _activity_timeout: float = _inactivity_timeout,
- ) -> tuple[ToolResult | BaseException, str, float]:
- _s = time.time()
- _iso = datetime.now(UTC).isoformat()
- _last_activity = _s
- _activity_event = asyncio.Event()
-
- async def _watchdog() -> None:
- """Watchdog that times out only after inactivity period."""
- nonlocal _last_activity
- while True:
- _now = time.time()
- _inactive_for = _now - _last_activity
- _remaining = _activity_timeout - _inactive_for
-
- if _remaining <= 0:
- # Inactivity timeout reached
- return
-
- try:
- await asyncio.wait_for(_activity_event.wait(), timeout=_remaining)
- _activity_event.clear()
- except TimeoutError:
- # Check again in case activity happened during wait
- continue
-
- async def _run_with_activity_timeout(
- _coro,
- ) -> ToolResult:
- """Run subagent with activity-based timeout."""
- _watchdog_task = asyncio.create_task(_watchdog())
- try:
- _result = await _coro
- return _result
- finally:
- _watchdog_task.cancel()
- try:
- await _watchdog_task
- except asyncio.CancelledError:
- pass
-
- try:
- # Subscribe to subagent activity events to reset inactivity timer
- async def _on_subagent_activity(event) -> None:
- nonlocal _last_activity
- _last_activity = time.time()
- _activity_event.set()
-
- _sub_id = None
- if self._event_bus and _activity_timeout > 0:
- from framework.runtime.event_bus import EventType
-
- _sub_id = self._event_bus.subscribe(
- event_types=[
- EventType.TOOL_CALL_STARTED,
- EventType.LLM_TEXT_DELTA,
- EventType.EXECUTION_STARTED,
- ],
- handler=_on_subagent_activity,
- )
-
- try:
- _coro = self._execute_subagent(
- _ctx,
- _tc.tool_input.get("agent_id", ""),
- _tc.tool_input.get("task", ""),
- accumulator=_acc,
- )
-
- if _activity_timeout > 0:
- # Use activity-based timeout with wall-clock max
- _result_coro = _run_with_activity_timeout(_coro)
- if _wall_timeout > 0:
- _r = await asyncio.wait_for(_result_coro, timeout=_wall_timeout)
- else:
- _r = await _result_coro
- elif _wall_timeout > 0:
- _r = await asyncio.wait_for(_coro, timeout=_wall_timeout)
- else:
- _r = await _coro
- finally:
- if _sub_id and self._event_bus:
- self._event_bus.unsubscribe(_sub_id)
-
- except TimeoutError:
- _agent_id = _tc.tool_input.get("agent_id", "unknown")
- _elapsed = time.time() - _s
- logger.warning(
- "Subagent '%s' timed out after %.0fs (inactivity threshold: %.0fs)",
- _agent_id,
- _elapsed,
- _activity_timeout if _activity_timeout > 0 else _wall_timeout,
- )
- _r = ToolResult(
- tool_use_id=_tc.tool_use_id,
- content=(
- f"Subagent '{_agent_id}' timed out after "
- f"{_elapsed:.0f}s of inactivity. "
- "The subagent was not making progress. "
- "Try a simpler task or break it into smaller pieces."
- ),
- is_error=True,
- )
- except BaseException as _exc:
- _r = _exc
- _dur = round(time.time() - _s, 3)
- return _r, _iso, _dur
-
- subagent_timed = await asyncio.gather(
- *(_timed_subagent(ctx, tc) for tc in pending_subagent),
- return_exceptions=True,
- )
- for tc, entry in zip(pending_subagent, subagent_timed, strict=True):
- if isinstance(entry, BaseException):
- raw = entry
- _start_iso = datetime.now(UTC).isoformat()
- _dur_s = 0
- else:
- raw, _start_iso, _dur_s = entry
- _sa_timing = {
- "start_timestamp": _start_iso,
- "duration_s": _dur_s,
- }
- if isinstance(raw, BaseException):
- result = ToolResult(
- tool_use_id=tc.tool_use_id,
- content=json.dumps(
- {
- "message": f"Sub-agent execution raised: {raw}",
- "data": None,
- "metadata": {"success": False, "error": str(raw)},
- }
- ),
- is_error=True,
- )
- else:
- # Attach the tool_use_id to the result
- result = ToolResult(
- tool_use_id=tc.tool_use_id,
- content=raw.content,
- is_error=raw.is_error,
- )
- # Route through _truncate_tool_result so large
- # subagent results are saved to spillover files
- # and survive pruning (instead of being "cleared
- # from context" with no recovery path).
- result = self._truncate_tool_result(result, "delegate_to_sub_agent")
- results_by_id[tc.tool_use_id] = result
- logged_tool_calls.append(
- {
- "tool_use_id": tc.tool_use_id,
- "tool_name": "delegate_to_sub_agent",
- "tool_input": tc.tool_input,
- "content": result.content,
- "is_error": result.is_error,
- **_sa_timing,
- }
- )
-
# Phase 3: record results into conversation in original order,
# build logged/real lists, and publish completed events.
for tc in tool_calls[:executed_in_batch]:
@@ -2936,8 +2655,6 @@ class EventLoopNode(NodeProtocol):
"ask_user",
"ask_user_multiple",
"escalate",
- "delegate_to_sub_agent",
- "report_to_parent",
):
tool_entry = {
"tool_use_id": tc.tool_use_id,
@@ -3056,7 +2773,7 @@ class EventLoopNode(NodeProtocol):
queen_input_requested,
final_system_prompt,
final_messages,
- reported_to_parent,
+ False,
)
# --- Mid-turn pruning: prevent context blowup within a single turn ---
@@ -3090,7 +2807,7 @@ class EventLoopNode(NodeProtocol):
queen_input_requested,
final_system_prompt,
final_messages,
- reported_to_parent,
+ False,
)
# Tool calls processed -- loop back to stream with updated conversation
@@ -3118,16 +2835,6 @@ class EventLoopNode(NodeProtocol):
"""Build the synthetic escalate tool. Delegates to synthetic_tools module."""
return build_escalate_tool()
- def _build_delegate_tool(
- self, sub_agents: list[str], node_registry: dict[str, Any]
- ) -> Tool | None:
- """Build the synthetic delegate_to_sub_agent tool. Delegates to synthetic_tools module."""
- return build_delegate_tool(sub_agents, node_registry)
-
- def _build_report_to_parent_tool(self) -> Tool:
- """Build the synthetic report_to_parent tool. Delegates to synthetic_tools module."""
- return build_report_to_parent_tool()
-
def _handle_set_output(
self,
tool_input: dict[str, Any],
@@ -3151,7 +2858,7 @@ class EventLoopNode(NodeProtocol):
) -> JudgeVerdict:
"""Evaluate the current state. Delegates to judge_pipeline module."""
return await judge_turn(
- mark_complete_flag=self._mark_complete_flag,
+ mark_complete_flag=False,
judge=self._judge,
ctx=ctx,
conversation=conversation,
@@ -3176,7 +2883,7 @@ class EventLoopNode(NodeProtocol):
Delegates to :func:`extract_tool_call_history` in conversation.py.
"""
- from framework.graph.conversation import extract_tool_call_history
+ from framework.agent_loop.conversation import extract_tool_call_history
return extract_tool_call_history(conversation.messages, max_entries=max_entries)
@@ -3781,46 +3488,3 @@ class EventLoopNode(NodeProtocol):
# Subagent Execution
# -------------------------------------------------------------------
- async def _execute_subagent(
- self,
- ctx: NodeContext,
- agent_id: str,
- task: str,
- *,
- accumulator: OutputAccumulator | None = None,
- ) -> ToolResult:
- """Execute a subagent and return the result as a ToolResult.
-
- The subagent:
- - Gets a fresh conversation with just the task
- - Has read-only access to the parent's readable data buffer
- - Cannot delegate to its own subagents (prevents recursion)
- - Returns its output in structured JSON format
-
- Args:
- ctx: Parent node's context (for data buffer, tools, LLM access).
- agent_id: The node ID of the subagent to invoke.
- task: The task description to give the subagent.
- accumulator: Parent's OutputAccumulator — provides outputs that
- have been set via ``set_output`` but not yet written to
- data buffer (which only happens after the node completes).
-
- Returns:
- ToolResult with structured JSON output containing:
- - message: Human-readable summary
- - data: Subagent's output (free-form JSON)
- - metadata: Execution metadata (success, tokens, latency)
- """
- return await execute_subagent(
- ctx=ctx,
- agent_id=agent_id,
- task=task,
- accumulator=accumulator,
- event_bus=self._event_bus,
- config=self._config,
- tool_executor=self._tool_executor,
- conversation_store=self._conversation_store,
- subagent_instance_counter=self._subagent_instance_counter,
- event_loop_node_cls=type(self),
- escalation_receiver_cls=_EscalationReceiver,
- )
diff --git a/core/framework/graph/conversation.py b/core/framework/agent_loop/conversation.py
similarity index 99%
rename from core/framework/graph/conversation.py
rename to core/framework/agent_loop/conversation.py
index 40b720fc..dcd9da70 100644
--- a/core/framework/graph/conversation.py
+++ b/core/framework/agent_loop/conversation.py
@@ -324,7 +324,7 @@ def _try_extract_key(content: str, key: str) -> str | None:
3. Colon format: ``key: value``.
4. Equals format: ``key = value``.
"""
- from framework.graph.node import find_json_object
+ from framework.orchestrator.node import find_json_object
# 1. Whole message is JSON
try:
diff --git a/core/framework/agent_loop/internals/__init__.py b/core/framework/agent_loop/internals/__init__.py
new file mode 100644
index 00000000..45601913
--- /dev/null
+++ b/core/framework/agent_loop/internals/__init__.py
@@ -0,0 +1,7 @@
+"""Agent loop internals -- compaction, judge, tools, subagent execution.
+
+Re-exports from legacy locations for the new import path.
+"""
+
+from framework.agent_loop.internals.compaction import * # noqa: F401, F403
+from framework.agent_loop.internals.synthetic_tools import * # noqa: F401, F403
diff --git a/core/framework/graph/event_loop/compaction.py b/core/framework/agent_loop/internals/compaction.py
similarity index 97%
rename from core/framework/graph/event_loop/compaction.py
rename to core/framework/agent_loop/internals/compaction.py
index a22da05a..1b54be9a 100644
--- a/core/framework/graph/event_loop/compaction.py
+++ b/core/framework/agent_loop/internals/compaction.py
@@ -19,11 +19,11 @@ from datetime import UTC, datetime
from pathlib import Path
from typing import Any
-from framework.graph.conversation import Message, NodeConversation
-from framework.graph.event_loop.event_publishing import publish_context_usage
-from framework.graph.event_loop.types import LoopConfig, OutputAccumulator
-from framework.graph.node import NodeContext
-from framework.runtime.event_bus import EventBus
+from framework.agent_loop.conversation import Message, NodeConversation
+from framework.agent_loop.internals.event_publishing import publish_context_usage
+from framework.agent_loop.internals.types import LoopConfig, OutputAccumulator
+from framework.orchestrator.node import NodeContext
+from framework.host.event_bus import EventBus
logger = logging.getLogger(__name__)
@@ -368,8 +368,8 @@ async def llm_compact(
in half and each half is summarised independently. Tool history is
appended once at the top-level call (``_depth == 0``).
"""
- from framework.graph.conversation import extract_tool_call_history
- from framework.graph.event_loop.tool_result_handler import is_context_too_large_error
+ from framework.agent_loop.conversation import extract_tool_call_history
+ from framework.agent_loop.internals.tool_result_handler import is_context_too_large_error
if _depth > max_depth:
raise RuntimeError(f"LLM compaction recursion limit ({max_depth})")
@@ -724,7 +724,7 @@ async def log_compaction(
)
if event_bus:
- from framework.runtime.event_bus import AgentEvent, EventType
+ from framework.host.event_bus import AgentEvent, EventType
event_data: dict[str, Any] = {
"level": level,
@@ -861,6 +861,6 @@ def _extract_tool_call_history(conversation: NodeConversation) -> str:
directly (vs. the module-level extract_tool_call_history in conversation.py
which works on raw message lists).
"""
- from framework.graph.conversation import extract_tool_call_history
+ from framework.agent_loop.conversation import extract_tool_call_history
return extract_tool_call_history(list(conversation.messages))
diff --git a/core/framework/graph/event_loop/cursor_persistence.py b/core/framework/agent_loop/internals/cursor_persistence.py
similarity index 97%
rename from core/framework/graph/event_loop/cursor_persistence.py
rename to core/framework/agent_loop/internals/cursor_persistence.py
index 65f7d5fc..627fee8c 100644
--- a/core/framework/graph/event_loop/cursor_persistence.py
+++ b/core/framework/agent_loop/internals/cursor_persistence.py
@@ -14,9 +14,9 @@ from collections.abc import Awaitable, Callable
from dataclasses import dataclass
from typing import Any
-from framework.graph.conversation import ConversationStore, NodeConversation
-from framework.graph.event_loop.types import LoopConfig, OutputAccumulator, TriggerEvent
-from framework.graph.node import NodeContext
+from framework.agent_loop.conversation import ConversationStore, NodeConversation
+from framework.agent_loop.internals.types import LoopConfig, OutputAccumulator, TriggerEvent
+from framework.orchestrator.node import NodeContext
from framework.llm.capabilities import supports_image_tool_results
logger = logging.getLogger(__name__)
diff --git a/core/framework/graph/event_loop/event_publishing.py b/core/framework/agent_loop/internals/event_publishing.py
similarity index 97%
rename from core/framework/graph/event_loop/event_publishing.py
rename to core/framework/agent_loop/internals/event_publishing.py
index 85846620..69e487ab 100644
--- a/core/framework/graph/event_loop/event_publishing.py
+++ b/core/framework/agent_loop/internals/event_publishing.py
@@ -9,10 +9,10 @@ from __future__ import annotations
import logging
import time
-from framework.graph.conversation import NodeConversation
-from framework.graph.event_loop.types import HookContext
-from framework.graph.node import NodeContext
-from framework.runtime.event_bus import EventBus
+from framework.agent_loop.conversation import NodeConversation
+from framework.agent_loop.internals.types import HookContext
+from framework.orchestrator.node import NodeContext
+from framework.host.event_bus import EventBus
logger = logging.getLogger(__name__)
@@ -177,7 +177,7 @@ async def publish_context_usage(
if not event_bus:
return
- from framework.runtime.event_bus import AgentEvent, EventType
+ from framework.host.event_bus import AgentEvent, EventType
estimated = conversation.estimate_tokens()
max_tokens = conversation._max_context_tokens
diff --git a/core/framework/graph/event_loop/judge_pipeline.py b/core/framework/agent_loop/internals/judge_pipeline.py
similarity index 95%
rename from core/framework/graph/event_loop/judge_pipeline.py
rename to core/framework/agent_loop/internals/judge_pipeline.py
index 281d3991..2bebe4a7 100644
--- a/core/framework/graph/event_loop/judge_pipeline.py
+++ b/core/framework/agent_loop/internals/judge_pipeline.py
@@ -5,9 +5,9 @@ from __future__ import annotations
import logging
from collections.abc import Callable
-from framework.graph.conversation import NodeConversation
-from framework.graph.event_loop.types import JudgeProtocol, JudgeVerdict, OutputAccumulator
-from framework.graph.node import NodeContext
+from framework.agent_loop.conversation import NodeConversation
+from framework.agent_loop.internals.types import JudgeProtocol, JudgeVerdict, OutputAccumulator
+from framework.orchestrator.node import NodeContext
logger = logging.getLogger(__name__)
@@ -155,7 +155,7 @@ async def judge_turn(
# Level 2b: conversation-aware quality check (if success_criteria set)
if ctx.node_spec.success_criteria and ctx.llm:
- from framework.graph.conversation_judge import evaluate_phase_completion
+ from framework.orchestrator.conversation_judge import evaluate_phase_completion
verdict = await evaluate_phase_completion(
llm=ctx.llm,
diff --git a/core/framework/graph/event_loop/stall_detector.py b/core/framework/agent_loop/internals/stall_detector.py
similarity index 100%
rename from core/framework/graph/event_loop/stall_detector.py
rename to core/framework/agent_loop/internals/stall_detector.py
diff --git a/core/framework/graph/event_loop/synthetic_tools.py b/core/framework/agent_loop/internals/synthetic_tools.py
similarity index 69%
rename from core/framework/graph/event_loop/synthetic_tools.py
rename to core/framework/agent_loop/internals/synthetic_tools.py
index fa9d19d5..5a5bf3c3 100644
--- a/core/framework/graph/event_loop/synthetic_tools.py
+++ b/core/framework/agent_loop/internals/synthetic_tools.py
@@ -204,118 +204,6 @@ def build_escalate_tool() -> Tool:
},
)
-
-def build_delegate_tool(sub_agents: list[str], node_registry: dict[str, Any]) -> Tool | None:
- """Build the synthetic delegate_to_sub_agent tool for subagent invocation.
-
- Args:
- sub_agents: List of node IDs that can be invoked as subagents.
- node_registry: Map of node_id -> NodeSpec for looking up subagent descriptions.
-
- Returns:
- Tool definition if sub_agents is non-empty, None otherwise.
- """
- if not sub_agents:
- return None
-
- agent_descriptions = []
- for agent_id in sub_agents:
- spec = node_registry.get(agent_id)
- if spec:
- desc = getattr(spec, "description", "(no description)")
- agent_descriptions.append(f"- {agent_id}: {desc}")
- else:
- agent_descriptions.append(f"- {agent_id}: (not found in registry)")
-
- return Tool(
- name="delegate_to_sub_agent",
- description=(
- "Delegate a task to a specialized sub-agent. The sub-agent runs "
- "autonomously with read-only access to current memory and returns "
- "its result. Use this to parallelize work or leverage specialized capabilities.\n\n"
- "Available sub-agents:\n" + "\n".join(agent_descriptions)
- ),
- parameters={
- "type": "object",
- "properties": {
- "agent_id": {
- "type": "string",
- "description": f"The sub-agent to invoke. Must be one of: {sub_agents}",
- "enum": sub_agents,
- },
- "task": {
- "type": "string",
- "description": (
- "The task description for the sub-agent to execute. "
- "Be specific about what you want the sub-agent to do and "
- "what information to return."
- ),
- },
- },
- "required": ["agent_id", "task"],
- },
- )
-
-
-def build_report_to_parent_tool() -> Tool:
- """Build the synthetic report_to_parent tool for sub-agent progress reports.
-
- Sub-agents call this to send one-way progress updates, partial findings,
- or status reports to the parent node (and external observers via event bus)
- without blocking execution.
-
- When ``wait_for_response`` is True, the sub-agent blocks until the parent
- relays the user's response — used for escalation (e.g. login pages, CAPTCHAs).
-
- When ``mark_complete`` is True, the sub-agent terminates immediately after
- sending the report — no need to call set_output for each output key.
- """
- return Tool(
- name="report_to_parent",
- description=(
- "Send a report to the parent agent. By default this is fire-and-forget: "
- "the parent receives the report but does not respond. "
- "Set wait_for_response=true to BLOCK until the user replies — use this "
- "when you need human intervention (e.g. login pages, CAPTCHAs, "
- "authentication walls). The user's response is returned as the tool result. "
- "Set mark_complete=true to finish your task and terminate immediately "
- "after sending the report — use this when your findings are in the "
- "message/data fields and you don't need to call set_output."
- ),
- parameters={
- "type": "object",
- "properties": {
- "message": {
- "type": "string",
- "description": "A human-readable status or progress message.",
- },
- "data": {
- "type": "object",
- "description": "Optional structured data to include with the report.",
- },
- "wait_for_response": {
- "type": "boolean",
- "description": (
- "If true, block execution until the user responds. "
- "Use for escalation scenarios requiring human intervention."
- ),
- "default": False,
- },
- "mark_complete": {
- "type": "boolean",
- "description": (
- "If true, terminate the sub-agent immediately after sending "
- "this report. The report message and data are delivered to the "
- "parent as the final result. No set_output calls are needed."
- ),
- "default": False,
- },
- },
- "required": ["message"],
- },
- )
-
-
def handle_set_output(
tool_input: dict[str, Any],
output_keys: list[str] | None,
diff --git a/core/framework/graph/event_loop/tool_result_handler.py b/core/framework/agent_loop/internals/tool_result_handler.py
similarity index 100%
rename from core/framework/graph/event_loop/tool_result_handler.py
rename to core/framework/agent_loop/internals/tool_result_handler.py
diff --git a/core/framework/graph/event_loop/types.py b/core/framework/agent_loop/internals/types.py
similarity index 98%
rename from core/framework/graph/event_loop/types.py
rename to core/framework/agent_loop/internals/types.py
index 69357fce..3a100f65 100644
--- a/core/framework/graph/event_loop/types.py
+++ b/core/framework/agent_loop/internals/types.py
@@ -9,7 +9,7 @@ from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Literal, Protocol, runtime_checkable
-from framework.graph.conversation import (
+from framework.agent_loop.conversation import (
ConversationStore,
)
@@ -68,7 +68,7 @@ class LoopConfig:
max_output_value_chars: int = 2_000
# Stream retry.
- max_stream_retries: int = 3
+ max_stream_retries: int = 5
stream_retry_backoff_base: float = 2.0
stream_retry_max_delay: float = 60.0
diff --git a/core/framework/agents/__init__.py b/core/framework/agents/__init__.py
index 561d96a0..46c0a5f8 100644
--- a/core/framework/agents/__init__.py
+++ b/core/framework/agents/__init__.py
@@ -8,6 +8,14 @@ FRAMEWORK_AGENTS_DIR = Path(__file__).parent
def list_framework_agents() -> list[Path]:
"""List all framework agent directories."""
return sorted(
- [p for p in FRAMEWORK_AGENTS_DIR.iterdir() if p.is_dir() and (p / "agent.py").exists()],
+ [
+ p
+ for p in FRAMEWORK_AGENTS_DIR.iterdir()
+ if p.is_dir()
+ and (
+ (p / "agent.json").exists()
+ or (p / "agent.py").exists()
+ )
+ ],
key=lambda p: p.name,
)
diff --git a/core/framework/agents/credential_tester/agent.py b/core/framework/agents/credential_tester/agent.py
index c78823f2..32336a72 100644
--- a/core/framework/agents/credential_tester/agent.py
+++ b/core/framework/agents/credential_tester/agent.py
@@ -21,15 +21,15 @@ from pathlib import Path
from typing import TYPE_CHECKING
from framework.config import get_max_context_tokens
-from framework.graph import Goal, NodeSpec, SuccessCriterion
-from framework.graph.checkpoint_config import CheckpointConfig
-from framework.graph.edge import GraphSpec
-from framework.graph.executor import ExecutionResult
+from framework.orchestrator import Goal, NodeSpec, SuccessCriterion
+from framework.orchestrator.checkpoint_config import CheckpointConfig
+from framework.orchestrator.edge import GraphSpec
+from framework.orchestrator.orchestrator import ExecutionResult
from framework.llm import LiteLLMProvider
-from framework.runner.mcp_registry import MCPRegistry
-from framework.runner.tool_registry import ToolRegistry
-from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
-from framework.runtime.execution_stream import EntryPointSpec
+from framework.loader.mcp_registry import MCPRegistry
+from framework.loader.tool_registry import ToolRegistry
+from framework.host.agent_host import AgentHost
+from framework.host.execution_manager import EntryPointSpec
from .config import default_config
from .nodes import build_tester_node
@@ -37,7 +37,7 @@ from .nodes import build_tester_node
logger = logging.getLogger(__name__)
if TYPE_CHECKING:
- from framework.runner import AgentRunner
+ from framework.loader import AgentLoader
logger = logging.getLogger(__name__)
@@ -233,7 +233,7 @@ requires_account_selection = True
"""Signal TUI to show account picker before starting the agent."""
-def configure_for_account(runner: AgentRunner, account: dict) -> None:
+def configure_for_account(runner: AgentLoader, account: dict) -> None:
"""Scope the tester node's tools to the selected provider.
Handles both Aden accounts (account= routing) and local accounts
@@ -325,7 +325,7 @@ def _activate_local_account(credential_id: str, alias: str) -> None:
def _configure_aden_node(
- runner: AgentRunner,
+ runner: AgentLoader,
provider: str,
alias: str,
detail: str,
@@ -368,7 +368,7 @@ or any other identifier — always use the alias exactly as shown.
def _configure_local_node(
- runner: AgentRunner,
+ runner: AgentLoader,
provider: str,
alias: str,
identity: dict,
@@ -497,7 +497,7 @@ class CredentialTesterAgent:
def __init__(self, config=None):
self.config = config or default_config
self._selected_account: dict | None = None
- self._agent_runtime: AgentRuntime | None = None
+ self._agent_runtime: AgentHost | None = None
self._tool_registry: ToolRegistry | None = None
self._storage_path: Path | None = None
@@ -613,7 +613,7 @@ class CredentialTesterAgent:
graph = self._build_graph()
- self._agent_runtime = create_agent_runtime(
+ self._agent_runtime = AgentHost(
graph=graph,
goal=goal,
storage_path=self._storage_path,
diff --git a/core/framework/agents/credential_tester/nodes/__init__.py b/core/framework/agents/credential_tester/nodes/__init__.py
index 31b1ac7e..682ca08e 100644
--- a/core/framework/agents/credential_tester/nodes/__init__.py
+++ b/core/framework/agents/credential_tester/nodes/__init__.py
@@ -1,6 +1,6 @@
"""Node definitions for Credential Tester agent."""
-from framework.graph import NodeSpec
+from framework.orchestrator import NodeSpec
def build_tester_node(
diff --git a/core/framework/agents/discovery.py b/core/framework/agents/discovery.py
index 2cbf712b..8f978636 100644
--- a/core/framework/agents/discovery.py
+++ b/core/framework/agents/discovery.py
@@ -27,8 +27,8 @@ def _get_last_active(agent_path: Path) -> str | None:
"""Return the most recent updated_at timestamp across all sessions.
Checks both worker sessions (``~/.hive/agents/{name}/sessions/``) and
- queen sessions (``~/.hive/queen/session/``) whose ``meta.json`` references
- the same *agent_path*.
+ queen sessions (``~/.hive/agents/queens/default/sessions/``) whose
+ ``meta.json`` references the same *agent_path*.
"""
from datetime import datetime
@@ -53,7 +53,9 @@ def _get_last_active(agent_path: Path) -> str | None:
continue
# 2. Queen sessions
- queen_sessions_dir = Path.home() / ".hive" / "queen" / "session"
+ from framework.config import QUEENS_DIR
+
+ queen_sessions_dir = QUEENS_DIR / "default" / "sessions"
if queen_sessions_dir.exists():
resolved = agent_path.resolve()
for d in queen_sessions_dir.iterdir():
@@ -112,13 +114,33 @@ def _count_runs(agent_name: str) -> int:
def _extract_agent_stats(agent_path: Path) -> tuple[int, int, list[str]]:
"""Extract node count, tool count, and tags from an agent directory.
- Prefers agent.py (AST-parsed) over agent.json for node/tool counts
- since agent.json may be stale. Tags are only available from agent.json.
+ Checks agent.json (declarative) first, then agent.py (legacy).
"""
import ast
node_count, tool_count, tags = 0, 0, []
+ # Declarative JSON agents (preferred)
+ agent_json = agent_path / "agent.json"
+ if agent_json.exists():
+ try:
+ data = json.loads(agent_json.read_text(encoding="utf-8"))
+ if isinstance(data, dict):
+ json_nodes = data.get("nodes", [])
+ node_count = len(json_nodes)
+ tools: set[str] = set()
+ for n in json_nodes:
+ node_tools = n.get("tools", {})
+ if isinstance(node_tools, dict):
+ tools.update(node_tools.get("allowed", []))
+ elif isinstance(node_tools, list):
+ tools.update(node_tools)
+ tool_count = len(tools)
+ return node_count, tool_count, tags
+ except Exception:
+ pass
+
+ # Legacy: agent.py (AST-parsed)
agent_py = agent_path / "agent.py"
if agent_py.exists():
try:
@@ -132,39 +154,31 @@ def _extract_agent_stats(agent_path: Path) -> tuple[int, int, list[str]]:
except Exception:
pass
- agent_json = agent_path / "agent.json"
- if agent_json.exists():
- try:
- data = json.loads(agent_json.read_text(encoding="utf-8"))
- json_nodes = data.get("graph", {}).get("nodes", []) or data.get("nodes", [])
- if node_count == 0:
- node_count = len(json_nodes)
- tools: set[str] = set()
- for n in json_nodes:
- tools.update(n.get("tools", []))
- tool_count = len(tools)
- tags = data.get("agent", {}).get("tags", [])
- except Exception:
- pass
-
return node_count, tool_count, tags
def discover_agents() -> dict[str, list[AgentEntry]]:
"""Discover agents from all known sources grouped by category."""
- from framework.runner.cli import (
+ from framework.loader.cli import (
_extract_python_agent_metadata,
_get_framework_agents_dir,
_is_valid_agent_dir,
)
+ from framework.config import COLONIES_DIR
+
groups: dict[str, list[AgentEntry]] = {}
sources = [
- ("Your Agents", Path("exports")),
+ ("Your Agents", COLONIES_DIR),
+ ("Your Agents", Path("exports")), # compat fallback
("Framework", _get_framework_agents_dir()),
("Examples", Path("examples/templates")),
]
+ # Track seen agent directory names to avoid duplicates when the same
+ # agent exists in both colonies/ and exports/ (colonies takes priority).
+ _seen_agent_names: set[str] = set()
+
for category, base_dir in sources:
if not base_dir.exists():
continue
@@ -172,6 +186,9 @@ def discover_agents() -> dict[str, list[AgentEntry]]:
for path in sorted(base_dir.iterdir(), key=lambda p: p.name):
if not _is_valid_agent_dir(path):
continue
+ if path.name in _seen_agent_names:
+ continue
+ _seen_agent_names.add(path.name)
name, desc = _extract_python_agent_metadata(path)
config_fallback_name = path.name.replace("_", " ").title()
@@ -179,13 +196,19 @@ def discover_agents() -> dict[str, list[AgentEntry]]:
node_count, tool_count, tags = _extract_agent_stats(path)
if not used_config:
- agent_json = path / "agent.json"
- if agent_json.exists():
+ # Try agent.json (declarative) for metadata
+ agent_json_path = path / "agent.json"
+ if agent_json_path.exists():
try:
- data = json.loads(agent_json.read_text(encoding="utf-8"))
- meta = data.get("agent", {})
- name = meta.get("name", name)
- desc = meta.get("description", desc)
+ data = json.loads(
+ agent_json_path.read_text(encoding="utf-8"),
+ )
+ if isinstance(data, dict):
+ raw_name = data.get("name", name)
+ if "-" in raw_name and " " not in raw_name:
+ raw_name = raw_name.replace("-", " ").title()
+ name = raw_name
+ desc = data.get("description", desc)
except Exception:
pass
@@ -204,6 +227,8 @@ def discover_agents() -> dict[str, list[AgentEntry]]:
)
)
if entries:
- groups[category] = entries
+ existing = groups.get(category, [])
+ existing.extend(entries)
+ groups[category] = existing
return groups
diff --git a/core/framework/agents/queen/__init__.py b/core/framework/agents/queen/__init__.py
index caff6298..f86488f6 100644
--- a/core/framework/agents/queen/__init__.py
+++ b/core/framework/agents/queen/__init__.py
@@ -1,19 +1,13 @@
-"""
-Queen — Native agent builder for the Hive framework.
+"""Queen -- the agent builder for the Hive framework."""
-Deeply understands the agent framework and produces complete Python packages
-with goals, nodes, edges, system prompts, MCP configuration, and tests
-from natural language specifications.
-"""
-
-from .agent import queen_goal, queen_graph
+from .agent import queen_goal, queen_loop_config
from .config import AgentMetadata, RuntimeConfig, default_config, metadata
__version__ = "1.0.0"
__all__ = [
"queen_goal",
- "queen_graph",
+ "queen_loop_config",
"RuntimeConfig",
"AgentMetadata",
"default_config",
diff --git a/core/framework/agents/queen/agent.py b/core/framework/agents/queen/agent.py
index e6583354..ba59f963 100644
--- a/core/framework/agents/queen/agent.py
+++ b/core/framework/agents/queen/agent.py
@@ -1,38 +1,29 @@
-"""Queen graph definition."""
+"""Queen agent definition.
-from framework.graph import Goal
-from framework.graph.edge import GraphSpec
+The queen is a single AgentLoop -- no graph, no orchestrator.
+Loaded by queen_orchestrator.create_queen().
+"""
+
+from framework.orchestrator.goal import Goal
from .nodes import queen_node
-# ---------------------------------------------------------------------------
-# Queen graph — the primary persistent conversation.
-# Loaded by queen_orchestrator.create_queen(), NOT by AgentRunner.
-# ---------------------------------------------------------------------------
-
queen_goal = Goal(
id="queen-manager",
name="Queen Manager",
description=(
- "Manage the worker agent lifecycle and serve as the user's primary interactive interface."
+ "Manage the worker agent lifecycle and serve as the "
+ "user's primary interactive interface."
),
success_criteria=[],
constraints=[],
)
-queen_graph = GraphSpec(
- id="queen-graph",
- goal_id=queen_goal.id,
- version="1.0.0",
- entry_node="queen",
- entry_points={"start": "queen"},
- terminal_nodes=[],
- pause_nodes=[],
- nodes=[queen_node],
- edges=[],
- conversation_mode="continuous",
- loop_config={
- "max_iterations": 999_999,
- "max_tool_calls_per_turn": 30,
- },
-)
+# Loop config -- used by queen_orchestrator to build LoopConfig
+queen_loop_config = {
+ "max_iterations": 999_999,
+ "max_tool_calls_per_turn": 30,
+ "max_context_tokens": 180_000,
+}
+
+__all__ = ["queen_goal", "queen_loop_config", "queen_node"]
diff --git a/core/framework/agents/queen/mcp_registry.json b/core/framework/agents/queen/mcp_registry.json
new file mode 100644
index 00000000..80e62804
--- /dev/null
+++ b/core/framework/agents/queen/mcp_registry.json
@@ -0,0 +1,3 @@
+{
+ "include": ["gcu-tools"]
+}
diff --git a/core/framework/agents/queen/nodes/__init__.py b/core/framework/agents/queen/nodes/__init__.py
index 3f447541..e078afa4 100644
--- a/core/framework/agents/queen/nodes/__init__.py
+++ b/core/framework/agents/queen/nodes/__init__.py
@@ -2,7 +2,7 @@
from pathlib import Path
-from framework.graph import NodeSpec
+from framework.orchestrator import NodeSpec
# Load reference docs at import time so they're always in the system prompt.
# No voluntary read_file() calls needed — the LLM gets everything upfront.
@@ -37,7 +37,7 @@ _appendices = _build_appendices()
# GCU guide — shared between planning and building via _shared_building_knowledge.
_gcu_section = (
- ("\n\n# GCU Nodes — Browser Automation\n\n" + _gcu_guide)
+ ("\n\n# Browser Automation Nodes\n\n" + _gcu_guide)
if _is_gcu_enabled() and _gcu_guide
else ""
)
@@ -81,7 +81,6 @@ _QUEEN_PLANNING_TOOLS = [
"save_agent_draft",
"confirm_and_build",
# Scaffold + transition to building (requires confirm_and_build first)
- "initialize_and_build_agent",
# Load existing agent (after user confirms)
"load_built_agent",
]
@@ -172,7 +171,7 @@ _shared_building_knowledge = (
## Paths (MANDATORY)
**Always use RELATIVE paths** \
-(e.g. `exports/agent_name/config.py`, `exports/agent_name/nodes/__init__.py`).
+(e.g. `exports/agent_name/agent.json`).
**Never use absolute paths** like `/mnt/data/...` or `/workspace/...` — they fail.
The project root is implicit.
@@ -182,14 +181,18 @@ When designing worker nodes or writing worker system prompts, reference these \
tool names — NOT the coder-tools names (read_file, write_file, etc.).
Worker data tools (for large results and spillover):
-- save_data(filename, data, data_dir) — save data to a file for later retrieval
-- load_data(filename, data_dir, offset_bytes?, limit_bytes?) — load data \
-with byte-based pagination
-- list_data_files(data_dir) — list available data files
-- append_data(filename, data, data_dir) — append to a file incrementally
-- edit_data(filename, old_text, new_text, data_dir) — find-and-replace in a data file
-- serve_file_to_user(filename, data_dir, label?, open_in_browser?) — \
-generate a clickable file URI for the user
+Worker data tools (from files-tools MCP server):
+- read_file(path) — read a file
+- write_file(path, content) — write/create a file
+- list_files(path) — list directory contents
+- search_files(pattern, path) — regex search in files
+
+Worker data tools (from hive-tools MCP server):
+- csv_read, csv_write, csv_append — CSV operations
+- pdf_read — read PDF files
+
+All tools are registered in the global MCP registry (~/.hive/mcp_registry/). \
+Workers get tools from: hive-tools, gcu-tools, files-tools.
IMPORTANT: Do NOT tell workers to use read_file, write_file, edit_file, \
search_files, or list_directory — those are YOUR tools, not theirs.
@@ -204,7 +207,7 @@ _planning_knowledge = """\
# Core Mandates (Planning)
- **DO NOT propose a complete goal on your own.** Instead, \
collaborate with the user to define it.
-- **NEVER call `initialize_and_build_agent` without explicit user approval.** \
+- **NEVER call `confirm_and_build` without explicit user approval.** \
Present the full design first and wait for the user to confirm before building.
- **Discover tools dynamically.** NEVER reference tools from static \
docs. Always run list_agent_tools() to see what actually exists.
@@ -252,9 +255,9 @@ When the stakeholder describes what they want, mentally construct:
**After the user responds, assess fit and gaps together.** Be honest and specific. \
Reference tools from list_agent_tools() AND built-in capabilities:
-- **GCU browser automation** (`node_type="gcu"`) provides full Playwright-based \
+- **Browser automation provides full Playwright-based \
browser control (navigation, clicking, typing, scrolling, JS-rendered pages, \
-multi-tab). Do NOT list browser automation as missing — use GCU nodes.
+multi-tab). Do NOT list browser automation as missing — use browser nodes with tools: {policy: "all"}.
Present a short **Framework Fit Assessment**:
- **Works well**: 2-4 strengths for this use case
@@ -306,14 +309,11 @@ explicitly on a node. Available types:
- **io** (dusty purple, parallelogram): External data input/output
- **document** (steel blue, wavy rect): Report or document generation
- **database** (muted teal, cylinder): Database or data store
-- **subprocess** (dark cyan, subroutine): Delegated sub-agent / predefined process
-- **browser** (deep blue, hexagon): GCU browser automation / sub-agent \
-delegation. At build time, browser nodes are dissolved into the parent \
-node's sub_agents list. Use for any GCU or sub-agent leaf node.
+- **browser** (deep blue, hexagon): Browser automation node (uses gcu-tools).
Auto-detection works well for most cases: first node → start, nodes with \
no outgoing edges → terminal, nodes with multiple conditional outgoing \
-edges → decision, GCU nodes → browser, nodes mentioning "database" → \
+edges → decision, browser tool nodes → browser, nodes mentioning "database" → \
database, nodes mentioning "report/document" → document, I/O tools like \
send_email → io. Everything else defaults to process. Set flowchart_type \
explicitly only when auto-detection would be wrong.
@@ -354,48 +354,19 @@ gather → [Valid data?] →Yes→ transform → deliver
In the draft: the `[Valid data?]` node has `flowchart_type: "decision"`, \
`decision_clause: "Data passes validation checks?"`, with labeled yes/no edges.
-## Sub-Agent Nodes — Planning-Only Delegation
+## Browser Automation Nodes
-Sub-agent nodes (dark teal subroutines) are **planning-only** visual elements \
-that show which nodes delegate to sub-agents. At `confirm_and_build()`, \
-sub-agent nodes are **dissolved** into their parent node:
-
-- The sub-agent node's ID is added to the predecessor's `sub_agents` list
-- The sub-agent node and its connecting edge are removed
-- At runtime, the parent node can invoke the sub-agent via `delegate_to_sub_agent`
-
-**Rules for sub-agent nodes (INCLUDING GCU nodes):**
-- GCU nodes are auto-detected as `flowchart_type: "browser"` (hexagon)
-- Connect from the managing parent node to the sub-agent node
-- Sub-agent nodes must be **leaf nodes** — NO outgoing edges to other nodes
-- At build time, browser/GCU nodes are dissolved into the parent's \
-`sub_agents` list, just like decision nodes are dissolved into criteria
-
-**CRITICAL: GCU nodes (`node_type: "gcu"`) are ALWAYS sub-agents.** \
-They MUST NOT appear in the linear flow. NEVER chain GCU nodes \
-sequentially (A → gcu1 → gcu2 → B is WRONG). Instead, attach them \
-as leaves to the parent that orchestrates them:
+Browser nodes are regular `event_loop` nodes with browser tools \
+(from the gcu-tools MCP server) in their tool list. They are wired \
+into the graph with edges like any other node:
```
-WRONG: intake → gcu_find_prospect → gcu_scan_mutuals → check_results
-WRONG: decision_node → gcu_node (as a yes/no branch)
-RIGHT: intake (sub_agents: [gcu_find, gcu_scan]) → check_results
+research → browser_scan → analyze_results
```
-The parent node delegates to its GCU sub-agents and collects results. \
-The main flow continues from the parent, not from the GCU node. \
-GCU nodes MUST NOT be children of decision nodes — decision nodes \
-dissolve at build time, which would leave the GCU as a dangling \
-workflow step.
+Use `tools: {policy: "all"}` to give browser nodes access to all \
+browser tools, or list specific ones with `policy: "explicit"`.
-**How to show delegation in the flowchart:**
-```
-research → (deep_searcher) ← browser/GCU node, leaf
-research → [Enough results?] ← decision node
-```
-After dissolution: `research` node gets `sub_agents: ["deep_searcher"]` \
-and `success_criteria: "Enough results?"`.
-
-If the worker agent start from some initial input it is okay. \
-The queen(you) owns intake: you gathers user requirements, then calls \
+If the worker agent starts from some initial input it is okay. \
+The queen(you) owns intake: you gather user requirements, then call \
`run_agent_with_input(task)` with a structured task description. \
When building the agent, design the entry node's `input_keys` to \
match what the queen will provide at run time. Worker nodes should \
@@ -411,14 +382,14 @@ You MUST get explicit user approval before ANY code is generated.
2. **WAIT for user response.** Do NOT proceed without it.
3. Handle the response:
- If **Approve / Proceed**: Call confirm_and_build(), then \
- initialize_and_build_agent(agent_name, nodes)
+ confirm_and_build(agent_name)
- If **Adjust scope**: Discuss changes, update the draft with \
save_agent_draft() again, and re-ask
- If **More questions**: Answer them honestly, then ask again
- If **Reconsider**: Discuss alternatives. If they decide to proceed, \
that's their informed choice
-**NEVER call initialize_and_build_agent without first calling \
+**NEVER call confirm_and_build without first calling \
confirm_and_build().** The system will block the transition if you try.
"""
@@ -477,53 +448,75 @@ When a user says "my agent is failing" or "debug this agent":
## 5. Implement
**You should only reach this step after the user has approved the draft design \
-in the planning phase. The draft metadata will pre-populate descriptions, \
-goals, success criteria, and node metadata in the generated files.**
+and you have called `confirm_and_build(agent_name="my_agent")`.**
-Call `initialize_and_build_agent(agent_name, nodes)` to generate all package \
-files. The agent_name must be snake_case (e.g., "my_agent"). Pass node names \
-as comma-separated string (e.g., "gather,process,review").
-The tool creates: config.py, nodes/__init__.py, agent.py, \
-__init__.py, __main__.py, mcp_servers.json, tests/conftest.py.
+`confirm_and_build` created the agent directory (returned in agent_path). \
+Now write the complete agent config directly:
-The generated files are **structurally complete** with correct imports, \
-class definition, `validate()` method, `default_agent` export, and \
-`__init__.py` re-exports. They pass validation as-is.
+```
+write_file("/agent.json", )
+```
-`mcp_servers.json` is auto-generated with hive-tools as the default. \
-Do NOT manually create or overwrite `mcp_servers.json`.
+The agent.json must include ALL of these in one write:
+- `name`, `version`, `description`
+- `goal` with `description`, `success_criteria`, `constraints`
+- `identity_prompt` (agent-level behavior)
+- `nodes` — each with `id`, `description`, `system_prompt`, `tools`, \
+`input_keys`, `output_keys`, `success_criteria`
+- `edges` — connecting all nodes with proper conditions
+- `entry_node`, `terminal_nodes`
+- `mcp_servers` — REQUIRED. Always include all three: \
+`[{"name": "hive-tools"}, {"name": "gcu-tools"}, {"name": "files-tools"}]`
+- `loop_config` — `max_iterations`, `max_context_tokens`
-### Customizing generated files
+**Write the COMPLETE config in one `write_file` call. No TODOs, no placeholders.** \
+The queen writes final production-ready system prompts directly.
-**CRITICAL: Use `edit_file` to customize TODO placeholders. \
-NEVER use `write_file` to rewrite generated files from scratch. \
-Rewriting breaks imports, class structure, and causes validation failures.**
+**There are NO Python files.** The framework loads agent.json directly.
-Safe to edit with `edit_file`:
-- System prompts, tools, input_keys, output_keys, success_criteria in \
-nodes/__init__.py
-- Goal description, success criteria values, constraint values, edge \
-definitions, identity_prompt in agent.py
-- CLI options in __main__.py
-- For triggers (timers/webhooks), add entries to triggers.json in the \
-agent's export directory
+MCP servers are loaded from the global registry by name. Available servers:
+- `hive-tools` — web search, email, CRM, calendar, 100+ integrations
+- `gcu-tools` — browser automation (click, type, navigate, screenshot)
+- `files-tools` — file I/O (read, write, edit, search, list)
-Do NOT modify or rewrite:
-- Import statements at top of agent.py (they are correct)
-- The agent class definition, `validate()`, `_build_graph()`, `_setup()`, \
-or lifecycle methods (start/stop/run)
-- `__init__.py` exports (all required variables are already re-exported)
-- `default_agent = ClassName()` at bottom of agent.py
+**Template variables:** Add a `variables:` section at the top of agent.json \
+and use `{{variable_name}}` in system prompts for config injection:
+```yaml
+variables:
+ spreadsheet_id: "1ZVx..."
+nodes:
+ - id: start
+ system_prompt: |
+ Use spreadsheet: {{spreadsheet_id}}
+```
+
+### Tool access in nodes
+
+Each node declares its tool access policy:
+```yaml
+# Explicit list (recommended)
+tools:
+ policy: explicit
+ allowed: [web_search, write_file]
+
+# All tools (for browser automation nodes)
+tools:
+ policy: all
+
+# No tools (for handoff/summary nodes)
+tools:
+ policy: none
+```
## 6. Verify and Load
Call `validate_agent_package("{name}")` after initialization. \
It runs structural checks (class validation, graph validation, tool \
validation, tests) and returns a consolidated result. If anything \
-fails: read the error, fix with edit_file, re-validate. Up to 3x.
+fails: read the error, fix with read_file+write_file, re-validate. Up to 3x.
When validation passes, immediately call \
-`load_built_agent("exports/{name}")` to load the agent into the \
+`load_built_agent("")` to load the agent into the \
session. This switches to STAGING phase and shows the graph in the \
visualizer. Do NOT wait for user input between validation and loading.
"""
@@ -625,13 +618,11 @@ document, database, subprocess, etc.) with unique shapes and colors. Set \
flowchart_type on a node to override. Nodes need only an id. \
Use decision nodes (flowchart_type: "decision", with decision_clause and \
labeled yes/no edges) to make conditional branching explicit. \
-GCU/sub-agent nodes (node_type: "gcu") are auto-detected as browser \
hexagons — connect them as leaf nodes to their parent.
- confirm_and_build() — Record user confirmation of the draft. Dissolves \
planning-only nodes (decision → predecessor criteria; browser/GCU → \
-predecessor sub_agents list). Call this ONLY after the user explicitly \
approves via ask_user.
-- initialize_and_build_agent(agent_name?, nodes?) — Scaffold the agent package \
+- confirm_and_build(agent_name) — Scaffold the agent package \
and transition to BUILDING phase. For new agents, this REQUIRES \
save_agent_draft() + confirm_and_build() first. The draft metadata is used to \
pre-populate the generated files. Without agent_name: transition to BUILDING \
@@ -647,8 +638,8 @@ phase. Only use this when the user explicitly asks to work with an existing agen
2. Call save_agent_draft() to create visual draft → present to user
3. Call ask_user() to get explicit approval
4. Call confirm_and_build() to record approval
-5. Call initialize_and_build_agent() to scaffold and start building
-For diagnosis of existing agents, call initialize_and_build_agent() \
+5. Call confirm_and_build() to scaffold and start building
+For diagnosis of existing agents, call confirm_and_build() \
(no args) after agreeing on a fix plan with the user.
"""
@@ -884,7 +875,7 @@ that changes the structure, call save_agent_draft() again so they see the \
update in real-time. The flowchart is a live collaboration tool.
8. When the design is stable, use ask_user to get explicit approval
9. Call confirm_and_build() after the user approves
-10. Call initialize_and_build_agent(agent_name, nodes) to scaffold and start building
+10. Call confirm_and_build(agent_name) to scaffold and start building
**The flowchart is your shared whiteboard.** Don't describe changes in text \
and then ask "should I update the draft?" — just update it. If the user says \
@@ -895,7 +886,7 @@ see every structural change reflected in the visualizer as you discuss it.
**CRITICAL: Planning → Building boundary.** You MUST get explicit user \
confirmation before moving to building. The sequence is:
save_agent_draft() → iterate with user → ask_user() → confirm_and_build() → \
- initialize_and_build_agent()
+ confirm_and_build()
Skipping any of these steps will be blocked by the system.
Remember: DO NOT write or edit any files yet. This is a read-only exploration \
@@ -911,7 +902,7 @@ your priority is diagnosis, not new design:
2. Summarize the root cause to the user
3. Propose a fix plan (what to change, what behavior to adjust)
4. Get user approval via ask_user
-5. Call initialize_and_build_agent() (no args) to transition to building and implement the fix
+5. Call confirm_and_build() (no args) to transition to building and implement the fix
Do NOT start the full discovery workflow (tool discovery, gap analysis) in \
diagnosis mode — you already have a built agent, you just need to fix it.
@@ -947,7 +938,7 @@ delegate agent construction to the worker, even as a "research" subtask.
## Keeping the flowchart in sync during building
When you make structural changes to the agent (add/remove/rename nodes, \
-change edges, modify sub-agent assignments), call save_agent_draft() to \
+change edges, modify node connections), call save_agent_draft() to \
update the flowchart. During building, this auto-dissolves planning-only \
nodes without needing user re-confirmation. The user sees the updated \
flowchart immediately.
@@ -966,15 +957,15 @@ user says "replan", "go back", "let's redesign", "change the approach", \
## CRITICAL — Graph topology errors require replanning, not code edits
-If you discover that the agent graph has structural problems — GCU nodes \
+If you discover that the agent graph has structural problems — browser nodes \
in the linear flow, missing edges, wrong node connections, incorrect \
-sub-agent assignments — you MUST call replan_agent() and fix the draft. \
-Do NOT attempt to fix topology by editing agent.py directly. The graph \
+node connections — you MUST call replan_agent() and fix the draft. \
+Do NOT attempt to fix topology by editing agent.json directly. The graph \
structure is defined by the draft → dissolution → code-gen pipeline. \
-Editing code to rewire nodes bypasses the flowchart and creates drift \
-between what the user sees and what the code does.
+Editing the config to rewire nodes bypasses the flowchart and creates drift \
+between what the user sees and what the config does.
-**WRONG:** "Let me fix agent.py to remove GCU nodes from edges..."
+**WRONG:** "Let me fix agent.json to remove browser nodes from edges..."
**RIGHT:** Call replan_agent(), fix the draft with save_agent_draft(), \
get user approval, then confirm_and_build() → the corrected code is \
generated automatically.
@@ -1100,18 +1091,15 @@ You wake up when:
If the user asks for progress, call get_graph_status() ONCE and report. \
If the summary mentions issues, follow up with get_graph_status(focus="issues").
-## Subagent delegations (browser automation, GCU)
+## Browser automation nodes
-When the worker delegates to a subagent (e.g., GCU browser automation), expect it \
-to take 2-5 minutes. During this time:
-- Progress will show 0% — this is NORMAL. The subagent only calls set_output at the end.
-- Check get_graph_status(focus="full") for "subagent_activity" — this shows the \
-subagent's latest reasoning text and confirms it is making real progress.
-- Do NOT conclude the subagent is stuck just because progress is 0% or because \
-you see repeated browser_click/browser_snapshot calls — that is the expected \
-pattern for web scraping.
-- Only intervene if: the subagent has been running for 5+ minutes with no new \
-subagent_activity updates, OR the judge escalates.
+Browser nodes may take 2-5 minutes for web scraping tasks. During this time:
+- Progress will show 0% until the node calls set_output at the end.
+- Check get_graph_status(focus="full") for activity updates.
+- Do NOT conclude it is stuck just because you see repeated \
+browser_click/browser_snapshot calls — that is expected for web scraping.
+- Only intervene if: the node has been running for 5+ minutes with no new \
+activity updates, OR the judge escalates.
## Handling worker termination ([WORKER_TERMINAL])
@@ -1143,11 +1131,11 @@ escalations. If the user gave you instructions (e.g., "just retry on errors", \
CRITICAL — escalation relay protocol:
When an escalation requires user input (auth blocks, human review), the worker \
-or its subagent is BLOCKED and waiting for your response. You MUST follow this \
+or is BLOCKED and waiting for your response. You MUST follow this \
exact two-step sequence:
Step 1: call ask_user() to get the user's answer.
Step 2: call inject_message() with the user's answer IMMEDIATELY after.
-If you skip Step 2, the worker/subagent stays blocked FOREVER and the task hangs. \
+If you skip Step 2, the worker stays blocked FOREVER and the task hangs. \
NEVER respond to the user without also calling inject_message() to unblock \
the worker. Even if the user says "skip" or "cancel", you must still relay that \
decision via inject_message() so the worker can clean up.
@@ -1233,7 +1221,7 @@ _queen_tools_docs = (
+ "\n\n### Phase transitions\n"
"- save_agent_draft(...) → creates visual-only draft graph (stays in PLANNING)\n"
"- confirm_and_build() → records user approval of draft (stays in PLANNING)\n"
- "- initialize_and_build_agent(agent_name?, nodes?) → scaffolds package + switches to "
+ "- confirm_and_build(agent_name) → scaffolds package + switches to "
"BUILDING (requires draft + confirmation for new agents)\n"
"- replan_agent() → switches back to PLANNING phase (only when user explicitly requests)\n"
"- load_built_agent(path) → switches to STAGING phase\n"
diff --git a/core/framework/agents/queen/queen_memory_v2.py b/core/framework/agents/queen/queen_memory_v2.py
index bf41a5a6..d2e61b6f 100644
--- a/core/framework/agents/queen/queen_memory_v2.py
+++ b/core/framework/agents/queen/queen_memory_v2.py
@@ -1,9 +1,15 @@
"""Queen global memory helpers.
-Global memory lives in ``~/.hive/queen/global_memory/`` and stores durable
-cross-session knowledge about the user (profile, preferences, environment,
-feedback). Each memory is an individual ``.md`` file with optional YAML
-frontmatter (name, type, description).
+Memory hierarchy::
+
+ ~/.hive/memories/
+ global/ # shared across all queens and colonies
+ colonies/{name}/ # colony-scoped memories
+ agents/queens/{name}/ # queen-specific memories
+ agents/{name}/ # per-worker-agent memories
+
+Each memory is an individual ``.md`` file with optional YAML frontmatter
+(name, type, description).
"""
from __future__ import annotations
@@ -21,7 +27,7 @@ logger = logging.getLogger(__name__)
GLOBAL_MEMORY_CATEGORIES: tuple[str, ...] = ("profile", "preference", "environment", "feedback")
-_HIVE_QUEEN_DIR = Path.home() / ".hive" / "queen"
+from framework.config import MEMORIES_DIR
MAX_FILES: int = 200
MAX_FILE_SIZE_BYTES: int = 4096 # 4 KB hard limit per memory file
@@ -31,8 +37,23 @@ _HEADER_LINE_LIMIT: int = 30
def global_memory_dir() -> Path:
- """Return the queen-global memory directory."""
- return _HIVE_QUEEN_DIR / "global_memory"
+ """Return the global memory directory (shared across all queens/colonies)."""
+ return MEMORIES_DIR / "global"
+
+
+def colony_memory_dir(colony_name: str) -> Path:
+ """Return the memory directory for a named colony."""
+ return MEMORIES_DIR / "colonies" / colony_name
+
+
+def queen_memory_dir(queen_name: str = "default") -> Path:
+ """Return the memory directory for a named queen."""
+ return MEMORIES_DIR / "agents" / "queens" / queen_name
+
+
+def agent_memory_dir(agent_name: str) -> Path:
+ """Return the memory directory for a worker agent."""
+ return MEMORIES_DIR / "agents" / agent_name
# ---------------------------------------------------------------------------
diff --git a/core/framework/agents/queen/recall_selector.py b/core/framework/agents/queen/recall_selector.py
index ad1676eb..f2ec6d21 100644
--- a/core/framework/agents/queen/recall_selector.py
+++ b/core/framework/agents/queen/recall_selector.py
@@ -91,7 +91,19 @@ async def select_memories(
resp.stop_reason,
)
return []
- data = json.loads(raw)
+ # Some models wrap JSON in markdown fences or add preamble text.
+ # Try to extract the JSON object if raw parse fails.
+ try:
+ data = json.loads(raw)
+ except json.JSONDecodeError:
+ import re
+
+ m = re.search(r"\{.*\}", raw, re.DOTALL)
+ if m:
+ data = json.loads(m.group())
+ else:
+ logger.warning("recall: LLM returned non-JSON: %.200s", raw)
+ return []
selected = data.get("selected_memories", [])
valid_names = {f.filename for f in files}
result = [s for s in selected if s in valid_names][:max_results]
diff --git a/core/framework/agents/queen/reference/anti_patterns.md b/core/framework/agents/queen/reference/anti_patterns.md
index 4e6bf085..1fa10218 100644
--- a/core/framework/agents/queen/reference/anti_patterns.md
+++ b/core/framework/agents/queen/reference/anti_patterns.md
@@ -25,10 +25,7 @@
14. **Forgetting sys.path setup in conftest.py** — Tests need `exports/` and `core/` on sys.path.
## GCU Errors
-15. **Manually wiring browser tools on event_loop nodes** — Use `node_type="gcu"` which auto-includes browser tools. Do NOT manually list browser tool names.
-16. **Using GCU nodes as regular graph nodes** — GCU nodes are subagents only. They must ONLY appear in `sub_agents=["gcu-node-id"]` and be invoked via `delegate_to_sub_agent()`. Never connect via edges or use as entry/terminal nodes.
-17. **Reusing the same GCU node ID for parallel tasks** — Each concurrent browser task needs a distinct GCU node ID (e.g. `gcu-site-a`, `gcu-site-b`). Two `delegate_to_sub_agent` calls with the same `agent_id` share a browser profile and will interfere with each other's pages.
-18. **Passing `profile=` in GCU tool calls** — Profile isolation for parallel subagents is automatic. The framework injects a unique profile per subagent via an asyncio `ContextVar`. Hardcoding `profile="default"` in a GCU system prompt breaks this isolation.
+15. **Manually wiring browser tools on event_loop nodes** — Browser nodes use tools: {policy: "all"} to get all browser tools.
## Worker Agent Errors
19. **Adding client-facing intake node to workers** — The queen owns intake. Workers should start with an autonomous processing node. Route worker review/approval through queen escalation instead of direct worker HITL.
diff --git a/core/framework/agents/queen/reference/file_templates_declarative.md b/core/framework/agents/queen/reference/file_templates_declarative.md
new file mode 100644
index 00000000..97afbb81
--- /dev/null
+++ b/core/framework/agents/queen/reference/file_templates_declarative.md
@@ -0,0 +1,227 @@
+# Declarative Agent File Templates
+
+Agents are defined as a single `agent.yaml` file. No Python code needed.
+The runner loads this file directly -- no `agent.py`, `config.py`, or
+`nodes/__init__.py` required.
+
+## agent.yaml -- Complete Agent Definition
+
+```yaml
+name: my-agent
+version: 1.0.0
+description: What this agent does.
+
+metadata:
+ intro_message: Welcome! What would you like me to do?
+
+# Template variables -- substituted into system_prompt and identity_prompt
+# via {{variable_name}} syntax. Use this for config values that appear
+# in prompts (spreadsheet IDs, API endpoints, account names, etc.)
+variables:
+ spreadsheet_id: "1ZVxWDL..."
+ sheet_name: "contacts"
+
+goal:
+ description: What this agent achieves.
+ success_criteria:
+ - "First success criterion"
+ - "Second success criterion"
+ constraints:
+ - "Hard constraint the agent must respect"
+
+identity_prompt: |
+ You are a helpful agent.
+
+conversation_mode: continuous # always "continuous" for Hive agents
+
+loop_config:
+ max_iterations: 100
+ max_tool_calls_per_turn: 30
+ max_context_tokens: 32000
+
+# MCP servers to connect (resolved by name from ~/.hive/mcp_registry/)
+mcp_servers:
+ - name: hive-tools
+ - name: gcu-tools
+
+nodes:
+ # Node 1: Process (autonomous entry node)
+ # The queen handles intake and passes structured input via
+ # run_agent_with_input(task). NO client-facing intake node.
+ - id: process
+ name: Process
+ description: Execute the task using available tools
+ max_node_visits: 0 # 0 = unlimited (forever-alive agents)
+ input_keys: [user_request, feedback]
+ output_keys: [results]
+ nullable_output_keys: [feedback]
+ tools:
+ policy: explicit
+ allowed: [web_search, web_scrape, save_data, load_data, list_data_files]
+ success_criteria: Results are complete and accurate.
+ system_prompt: |
+ You are a processing agent. Your task is in memory under "user_request".
+ If "feedback" is present, this is a revision.
+
+ Work in phases:
+ 1. Use tools to gather/process data
+ 2. Analyze results
+ 3. Call set_output in a SEPARATE turn:
+ - set_output("results", "structured results")
+
+ # Node 2: Handoff (autonomous)
+ - id: handoff
+ name: Handoff
+ description: Prepare worker results for queen review
+ max_node_visits: 0
+ input_keys: [results, user_request]
+ output_keys: [next_action, feedback, worker_summary]
+ nullable_output_keys: [feedback, worker_summary]
+ tools:
+ policy: none # handoff nodes don't need tools
+ success_criteria: Results are packaged for queen decision-making.
+ system_prompt: |
+ Do NOT talk to the user directly. The queen is the only user interface.
+
+ If blocked, call escalate(reason, context) then set:
+ - set_output("next_action", "escalated")
+ - set_output("feedback", "what help is needed")
+
+ Otherwise summarize and set:
+ - set_output("worker_summary", "short summary for queen")
+ - set_output("next_action", "done") or "revise"
+ - set_output("feedback", "what to revise") only when revising
+
+edges:
+ - from_node: process
+ to_node: handoff
+ # Feedback loop
+ - from_node: handoff
+ to_node: process
+ condition: conditional
+ condition_expr: "str(next_action).lower() == 'revise'"
+ priority: 2
+ # Escalation loop
+ - from_node: handoff
+ to_node: process
+ condition: conditional
+ condition_expr: "str(next_action).lower() == 'escalated'"
+ priority: 3
+ # Loop back for next task
+ - from_node: handoff
+ to_node: process
+ condition: conditional
+ condition_expr: "str(next_action).lower() == 'done'"
+
+entry_node: process
+terminal_nodes: [] # [] = forever-alive
+```
+
+## Key differences from Python templates
+
+| Before (Python) | After (YAML) |
+|-------------------------------------|----------------------------------------|
+| `agent.py` (250 lines boilerplate) | Not needed |
+| `config.py` (dataclass + metadata) | `variables:` + `metadata:` in YAML |
+| `nodes/__init__.py` (NodeSpec calls)| `nodes:` list in YAML |
+| `__init__.py`, `__main__.py` | Not needed |
+| f-string config injection | `{{variable_name}}` templates |
+| `mcp_servers.json` (separate file) | `mcp_servers:` in YAML (or keep file) |
+
+## Node types
+
+| Type | Description | Tools |
+|--------------|---------------------------------------|--------------------------|
+| `event_loop` | LLM-driven orchestration (default) | Explicit list or `none` |
+| `gcu` | Browser automation via GCU tools | `policy: all` (auto) |
+
+## Tool access policies
+
+```yaml
+# Explicit list (recommended for most nodes)
+tools:
+ policy: explicit
+ allowed: [web_search, save_data]
+
+# All tools (for browser automation nodes)
+tools:
+ policy: all
+
+# No tools (for handoff/summary nodes)
+tools:
+ policy: none
+```
+
+## Edge conditions
+
+| Condition | When to use |
+|---------------|-------------------------------------------------------|
+| `on_success` | Default. Next node after current succeeds. |
+| `on_failure` | Fallback path when current node fails. |
+| `always` | Always traverse regardless of outcome. |
+| `conditional` | Evaluate `condition_expr` against shared memory keys. |
+| `llm_decide` | Let the LLM decide at runtime. |
+
+## Template variables
+
+Use `{{variable_name}}` in `system_prompt` and `identity_prompt`.
+Variables are defined in the top-level `variables:` map.
+
+```yaml
+variables:
+ spreadsheet_id: "1ZVxWDL..."
+ api_endpoint: "https://api.example.com"
+
+nodes:
+ - id: start
+ system_prompt: |
+ Connect to spreadsheet: {{spreadsheet_id}}
+ API endpoint: {{api_endpoint}}
+```
+
+## Entry points
+
+Default is a single manual entry point. For timer/scheduled triggers:
+
+```yaml
+entry_points:
+ - id: default
+ trigger_type: manual
+ - id: daily-check
+ trigger_type: timer
+ trigger_config:
+ interval_minutes: 30
+```
+
+## mcp_servers.json -- Still Supported
+
+The `mcp_servers.json` file is still loaded automatically if present alongside
+`agent.yaml`. You can also inline servers in the YAML:
+
+```yaml
+mcp_servers:
+ - name: hive-tools
+ - name: gcu-tools
+```
+
+Both approaches work. The JSON file takes precedence for backward compatibility.
+
+## Migration from Python agents
+
+Run the migration tool to convert existing agents:
+
+```bash
+uv run python -m framework.tools.migrate_agent exports/my_agent
+```
+
+This generates `agent.yaml` from the existing `agent.py` + `nodes/` + `config.py`.
+The original files are left untouched. Once verified, you can delete the Python files.
+
+## Files after migration
+
+```
+my_agent/
+ agent.yaml # The only required file
+ mcp_servers.json # Optional (can inline in YAML)
+ flowchart.json # Optional (auto-generated)
+```
diff --git a/core/framework/agents/queen/reference/framework_guide.md b/core/framework/agents/queen/reference/framework_guide.md
index 5f93511a..1a0f7e4f 100644
--- a/core/framework/agents/queen/reference/framework_guide.md
+++ b/core/framework/agents/queen/reference/framework_guide.md
@@ -1,306 +1,193 @@
-# Hive Agent Framework — Condensed Reference
+# Hive Agent Framework -- Condensed Reference
## Architecture
-Agents are Python packages in `exports/`:
+Agents are declarative JSON configs in `exports/`:
```
exports/my_agent/
-├── __init__.py # MUST re-export ALL module-level vars from agent.py
-├── __main__.py # CLI (run, tui, info, validate, shell)
-├── agent.py # Graph construction (goal, edges, agent class)
-├── config.py # Runtime config
-├── nodes/__init__.py # Node definitions (NodeSpec)
-├── mcp_servers.json # MCP tool server config
-└── tests/ # pytest tests
+ agent.json # The entire agent definition
+ mcp_servers.json # MCP tool server config (optional, prefer registry refs)
```
-## Agent Loading Contract
+No Python files. No `__init__.py`, `__main__.py`, `config.py`, or `nodes/`.
-`AgentRunner.load()` imports the package (`__init__.py`) and reads these
-module-level variables via `getattr()`:
+## Agent Loading
-| Variable | Required | Default if missing | Consequence |
-|----------|----------|--------------------|-------------|
-| `goal` | YES | `None` | **FATAL** — "must define goal, nodes, edges" |
-| `nodes` | YES | `None` | **FATAL** — same error |
-| `edges` | YES | `None` | **FATAL** — same error |
-| `entry_node` | no | `nodes[0].id` | Probably wrong node |
-| `entry_points` | no | `{}` | **Nodes unreachable** — validation fails |
-| `terminal_nodes` | **YES** | `[]` | **FATAL** — graph must have at least one terminal node |
-| `pause_nodes` | no | `[]` | OK |
-| `conversation_mode` | no | not passed | Isolated mode (no context carryover) |
-| `identity_prompt` | no | not passed | No agent-level identity |
-| `loop_config` | no | `{}` | No iteration limits |
-| `triggers.json` (file) | no | not present | No triggers (timers, webhooks) |
+`AgentLoader.load()` reads `agent.json` and builds the execution graph.
+If `agent.py` exists (legacy), it's loaded as a Python module instead.
-**CRITICAL:** `__init__.py` MUST import and re-export ALL of these from
-`agent.py`. Missing exports silently fall back to defaults, causing
-hard-to-debug failures.
+## agent.json Schema
-**Why `default_agent.validate()` is NOT sufficient:**
-`validate()` checks the agent CLASS's internal graph (self.nodes, self.edges).
-These are always correct because the constructor references agent.py's module
-vars directly. But `AgentRunner.load()` reads from the PACKAGE (`__init__.py`),
-not the class. So `validate()` passes while `AgentRunner.load()` fails.
-Always test with `AgentRunner.load("exports/{name}")` — this is the same
-code path the TUI and `hive run` use.
-
-## Goal
-
-Defines success criteria and constraints:
-```python
-goal = Goal(
- id="kebab-case-id",
- name="Display Name",
- description="What the agent does",
- success_criteria=[
- SuccessCriterion(id="sc-id", description="...", metric="...", target="...", weight=0.25),
- ],
- constraints=[
- Constraint(id="c-id", description="...", constraint_type="hard", category="quality"),
- ],
-)
+```json
+{
+ "name": "my-agent",
+ "version": "1.0.0",
+ "description": "What this agent does",
+ "goal": {
+ "description": "What to achieve",
+ "success_criteria": ["criterion 1", "criterion 2"],
+ "constraints": ["constraint 1"]
+ },
+ "identity_prompt": "You are a helpful agent.",
+ "conversation_mode": "continuous",
+ "loop_config": {
+ "max_iterations": 100,
+ "max_tool_calls_per_turn": 30,
+ "max_context_tokens": 32000
+ },
+ "mcp_servers": [
+ {"name": "hive-tools"},
+ {"name": "gcu-tools"}
+ ],
+ "variables": {
+ "spreadsheet_id": "1ZVx..."
+ },
+ "nodes": [...],
+ "edges": [...],
+ "entry_node": "process",
+ "terminal_nodes": []
+}
```
-- 3-5 success criteria, weights sum to 1.0
-- 1-5 constraints (hard/soft, categories: quality, accuracy, interaction, functional)
-## NodeSpec Fields
+## Template Variables
+
+Use `{{variable_name}}` in `system_prompt` and `identity_prompt`. Variables
+are defined in the top-level `variables` object:
+
+```json
+{
+ "variables": {"sheet_id": "1ZVx..."},
+ "nodes": [{
+ "id": "start",
+ "system_prompt": "Use sheet: {{sheet_id}}"
+ }]
+}
+```
+
+## Node Fields
| Field | Type | Default | Description |
|-------|------|---------|-------------|
| id | str | required | kebab-case identifier |
-| name | str | required | Display name |
+| name | str | id | Display name |
| description | str | required | What the node does |
-| node_type | str | required | `"event_loop"` or `"gcu"` (browser automation — see GCU Guide appendix) |
-| input_keys | list[str] | required | Memory keys this node reads |
-| output_keys | list[str] | required | Memory keys this node writes via set_output |
+| node_type | str | "event_loop" | `"event_loop"` |
+| input_keys | list | [] | Memory keys this node reads |
+| output_keys | list | [] | Memory keys this node writes via set_output |
| system_prompt | str | "" | LLM instructions |
-| tools | list[str] | [] | Tool names from MCP servers |
-| client_facing | bool | False | Deprecated compatibility field. Queen interactivity is implicit; workers should escalate instead |
-| nullable_output_keys | list[str] | [] | Keys that may remain unset |
-| max_node_visits | int | 0 | 0=unlimited (default); >1 for one-shot feedback loops |
-| max_retries | int | 3 | Retries on failure |
+| tools | object | {} | Tool access policy (see below) |
+| nullable_output_keys | list | [] | Keys that may remain unset |
+| max_node_visits | int | 1 | 0=unlimited (for forever-alive agents) |
| success_criteria | str | "" | Natural language for judge evaluation |
+| client_facing | bool | false | Whether output is shown to user |
-## EdgeSpec Fields
+## Tool Access Policies
+
+Each node declares its tools via a policy object:
+
+```json
+{"tools": {"policy": "explicit", "allowed": ["web_search", "save_data"]}}
+{"tools": {"policy": "all"}}
+{"tools": {"policy": "none"}}
+```
+
+- `explicit` (default): only named tools. Empty `allowed` = zero tools.
+- `all`: all tools from registry (e.g. for browser automation nodes).
+- `none`: no tools (for handoff/summary nodes).
+
+## Edge Fields
| Field | Type | Description |
|-------|------|-------------|
-| id | str | kebab-case identifier |
-| source | str | Source node ID |
-| target | str | Target node ID |
-| condition | EdgeCondition | ON_SUCCESS, ON_FAILURE, ALWAYS, CONDITIONAL |
-| condition_expr | str | Python expression evaluated against memory (for CONDITIONAL) |
-| priority | int | Positive=forward (evaluated first), negative=feedback (loop-back) |
+| from_node | str | Source node ID |
+| to_node | str | Target node ID |
+| condition | str | `on_success`, `on_failure`, `always`, `conditional` |
+| condition_expr | str | Python expression for conditional routing |
+| priority | int | Higher = evaluated first |
+
+condition_expr examples:
+- `"needs_more_research == True"`
+- `"str(next_action).lower() == 'revise'"`
## Key Patterns
-### STEP 1/STEP 2 (Client-Facing Nodes)
-```
-**STEP 1 — Respond to the user (text only, NO tool calls):**
-[Present information, ask questions]
-
-**STEP 2 — After the user responds, call set_output:**
-- set_output("key", "value based on user response")
-```
-This prevents premature set_output before user interaction.
-
### Fewer, Richer Nodes (CRITICAL)
-**Hard limit: 3-6 nodes for most agents.** Never exceed 6 unless the user
-explicitly requests a complex multi-phase pipeline.
+**Hard limit: 3-6 nodes for most agents.** Each node boundary serializes
+outputs and destroys in-context information. Merge unless:
+1. Client-facing boundary (different interaction models)
+2. Disjoint tool sets
+3. Parallel execution (fan-out branches)
-Each node boundary serializes outputs to the shared buffer and **destroys** all
-in-context information: tool call results, intermediate reasoning, conversation
-history. A research node that searches, fetches, and analyzes in ONE node keeps
-all source material in its conversation context. Split across 3 nodes, each
-downstream node only sees the serialized summary string.
-
-**Decision framework — merge unless ANY of these apply:**
-1. **Client-facing boundary** — Autonomous and client-facing work MUST be
- separate nodes (different interaction models)
-2. **Disjoint tool sets** — If tools are fundamentally different (e.g., web
- search vs database), separate nodes make sense
-3. **Parallel execution** — Fan-out branches must be separate nodes
-
-**Red flags that you have too many nodes:**
-- A node with 0 tools (pure LLM reasoning) → merge into predecessor/successor
-- A node that sets only 1 trivial output → collapse into predecessor
-- Multiple consecutive autonomous nodes → combine into one rich node
-- A "report" node that presents analysis → merge into the client-facing node
-- A "confirm" or "schedule" node that doesn't call any external service → remove
-
-**Typical agent structure (2 nodes):**
+**Typical structure (2 nodes):**
```
-process (autonomous) ←→ review (queen-mediated)
-```
-The queen owns intake — she gathers requirements from the user, then
-passes structured input via `run_agent_with_input(task)`. When building
-the agent, design the entry node's `input_keys` to match what the queen
-will provide at run time. Worker agents should NOT have a client-facing
-intake node. Mid-execution review/approval should happen through queen
-escalation rather than direct worker HITL.
-
-For simpler agents, just 1 autonomous node:
-```
-process (autonomous) — loops back to itself
+process (autonomous) <-> review (queen-mediated)
```
-### nullable_output_keys
-For inputs that only arrive on certain edges:
-```python
-research_node = NodeSpec(
- input_keys=["brief", "feedback"],
- nullable_output_keys=["feedback"], # Only present on feedback edge
- max_node_visits=3,
-)
-```
-
-### Mutually Exclusive Outputs
-For routing decisions:
-```python
-review_node = NodeSpec(
- output_keys=["approved", "feedback"],
- nullable_output_keys=["approved", "feedback"], # Node sets one or the other
-)
-```
-
-### Continuous Loop Pattern
-Mark the primary event_loop node as terminal: `terminal_nodes=["process"]`.
-The node has `output_keys` and can complete when the agent finishes its work.
-Use `conversation_mode="continuous"` to preserve context across transitions.
+The queen owns intake. Worker agents should NOT have a client-facing intake
+node. Mid-execution review should happen through queen escalation.
### set_output
- Synthetic tool injected by framework
- Call separately from real tool calls (separate turn)
- `set_output("key", "value")` stores to the shared buffer
-## Edge Conditions
-
-| Condition | When |
-|-----------|------|
-| ON_SUCCESS | Node completed successfully |
-| ON_FAILURE | Node failed |
-| ALWAYS | Unconditional |
-| CONDITIONAL | condition_expr evaluates to True against memory |
-
-condition_expr examples:
-- `"needs_more_research == True"`
-- `"str(next_action).lower() == 'new_agent'"`
-- `"feedback is not None"`
-
-## Graph Lifecycle
+### Graph Lifecycle
| Pattern | terminal_nodes | When |
|---------|---------------|------|
-| **Continuous loop** | `["node-with-output-keys"]` | **DEFAULT for all agents** |
+| Continuous loop | `["node-with-output-keys"]` | DEFAULT for all agents |
| Linear | `["last-node"]` | One-shot/batch agents |
-**Every graph must have at least one terminal node.** Terminal nodes
-define where execution ends. For interactive agents that loop continuously,
-mark the primary event_loop node as terminal (it has `output_keys` and can
-complete at any point). The framework default for `max_node_visits` is 0
-(unbounded), so nodes work correctly in continuous loops without explicit
-override. Only set `max_node_visits > 0` in one-shot agents with feedback loops.
-Every node must have at least one outgoing edge — no dead ends.
+Every graph must have at least one terminal node.
-## Continuous Conversation Mode
+### Continuous Conversation Mode
`conversation_mode` has ONLY two valid states:
-- `"continuous"` — recommended for interactive agents
-- Omit entirely — isolated per-node conversations (each node starts fresh)
+- `"continuous"` -- recommended (context carries across node transitions)
+- Omit entirely -- isolated per-node conversations
-**INVALID values** (do NOT use): `"client_facing"`, `"interactive"`,
-`"adaptive"`, `"shared"`. These do not exist in the framework.
-
-When `conversation_mode="continuous"`:
-- Same conversation thread carries across node transitions
-- Layered system prompts: identity (agent-level) + narrative + focus (per-node)
-- Transition markers inserted at boundaries
-- Compaction happens opportunistically at phase transitions
+**INVALID values:** `"client_facing"`, `"interactive"`, `"shared"`.
## loop_config
Only three valid keys:
-```python
-loop_config = {
- "max_iterations": 100, # Max LLM turns per node visit
- "max_tool_calls_per_turn": 20, # Max tool calls per LLM response
- "max_context_tokens": 32000, # Triggers conversation compaction
+```json
+{
+ "max_iterations": 100,
+ "max_tool_calls_per_turn": 20,
+ "max_context_tokens": 32000
}
```
-**INVALID keys** (do NOT use): `"strategy"`, `"mode"`, `"timeout"`,
-`"temperature"`. These are silently ignored or cause errors.
## Data Tools (Spillover)
For large data that exceeds context:
-- `save_data(filename, data)` — Write to session data dir
-- `load_data(filename, offset, limit)` — Read with pagination
-- `list_data_files()` — List files
-- `serve_file_to_user(filename, label)` — Clickable file:// URI
+- `save_data(filename, data)` -- write to session data dir
+- `load_data(filename, offset, limit)` -- read with pagination
+- `list_data_files()` -- list files
+- `serve_file_to_user(filename, label)` -- clickable file URI
-`data_dir` is auto-injected by framework — LLM never sees it.
+`data_dir` is auto-injected by framework.
## Fan-Out / Fan-In
-Multiple ON_SUCCESS edges from same source → parallel execution via asyncio.gather().
-- Parallel nodes must have disjoint output_keys
-- Only one branch may have client_facing nodes
-- Fan-in node gets all outputs in the shared buffer
+Multiple `on_success` edges from same source = parallel execution.
+Parallel nodes must have disjoint output_keys.
## Judge System
- **Implicit** (default): ACCEPTs when LLM finishes with no tool calls and all required outputs set
- **SchemaJudge**: Validates against Pydantic model
-- **Custom**: Implement `evaluate(context) -> JudgeVerdict`
-
-Judge is the SOLE acceptance mechanism — no ad-hoc framework gating.
-
-## Triggers (Timers, Webhooks)
-
-For agents that react to external events, create a `triggers.json` file
-in the agent's export directory:
-
-```json
-[
- {
- "id": "daily-check",
- "name": "Daily Check",
- "trigger_type": "timer",
- "trigger_config": {"cron": "0 9 * * *"},
- "task": "Run the daily check process"
- }
-]
-```
-
-### Key Fields
-- `trigger_type`: `"timer"` or `"webhook"`
-- `trigger_config`: `{"cron": "0 9 * * *"}` or `{"interval_minutes": 20}`
-- `task`: describes what the worker should do when the trigger fires
-- Triggers can also be created/removed at runtime via `set_trigger` / `remove_trigger` queen tools
## Tool Discovery
-Do NOT rely on a static tool list — it will be outdated. Always call
-`list_agent_tools()` with NO arguments first to see ALL available tools.
-Only use `group=` or `output_schema=` as follow-up calls after seeing the
-full list.
+Always call `list_agent_tools()` first to see available tools.
+Do NOT rely on a static tool list.
```
-list_agent_tools() # ALWAYS call this first
-list_agent_tools(group="gmail", output_schema="full") # then drill into a category
-list_agent_tools("exports/my_agent/mcp_servers.json") # specific agent's tools
+list_agent_tools() # full summary
+list_agent_tools(group="gmail", output_schema="full") # drill into category
```
-After building, run `validate_agent_package("{name}")` to check everything at once.
-
-Common tool categories (verify via list_agent_tools):
-- **Web**: search, scrape, PDF
-- **Data**: save/load/append/list data files, serve to user
-- **File**: view, write, replace, diff, list, grep
-- **Communication**: email, gmail, slack, telegram
-- **CRM**: hubspot, apollo, calcom
-- **GitHub**: stargazers, user profiles, repos
-- **Vision**: image analysis
-- **Time**: current time
+After building, run `validate_agent_package("{name}")` to check everything.
diff --git a/core/framework/agents/queen/reference/gcu_guide.md b/core/framework/agents/queen/reference/gcu_guide.md
index c27db24d..cf254637 100644
--- a/core/framework/agents/queen/reference/gcu_guide.md
+++ b/core/framework/agents/queen/reference/gcu_guide.md
@@ -1,158 +1,53 @@
-# GCU Browser Automation Guide
+# Browser Automation Guide
-## When to Use GCU Nodes
+## When to Use Browser Nodes
-Use `node_type="gcu"` when:
-- The user's workflow requires **navigating real websites** (scraping, form-filling, social media interaction, testing web UIs)
-- The task involves **dynamic/JS-rendered pages** that `web_scrape` cannot handle (SPAs, infinite scroll, login-gated content)
-- The agent needs to **interact with a website** — clicking, typing, scrolling, selecting, uploading files
+Use browser nodes (with `tools: {policy: "all"}`) when:
+- The task requires interacting with web pages (clicking, typing, navigating)
+- No API is available for the target service
+- The user is already logged in to the target site
-Do NOT use GCU for:
-- Static content that `web_scrape` handles fine
-- API-accessible data (use the API directly)
-- PDF/file processing
-- Anything that doesn't require a browser UI
+## What Browser Nodes Are
-## What GCU Nodes Are
+- Regular `event_loop` nodes with browser tools from gcu-tools MCP server
+- Set `tools: {policy: "all"}` to give access to all browser tools
+- Wire into the graph with edges like any other node
+- No special node_type needed
-- `node_type="gcu"` — a declarative enhancement over `event_loop`
-- Framework auto-prepends browser best-practices system prompt
-- Framework auto-includes all 31 browser tools from `gcu-tools` MCP server
-- Same underlying `EventLoopNode` class — no new imports needed
-- `tools=[]` is correct — tools are auto-populated at runtime
+## Available Browser Tools
-## GCU Architecture Pattern
+All tools are prefixed with `browser_`:
+- `browser_start`, `browser_open` -- launch/navigate
+- `browser_click`, `browser_fill`, `browser_type` -- interact
+- `browser_snapshot` -- read page content (preferred over screenshot)
+- `browser_screenshot` -- visual capture
+- `browser_scroll`, `browser_wait` -- navigation helpers
+- `browser_evaluate` -- run JavaScript
-GCU nodes are **subagents** — invoked via `delegate_to_sub_agent()`, not connected via edges.
+## System Prompt Tips for Browser Nodes
-- Primary nodes (`event_loop`, client-facing) orchestrate; GCU nodes do browser work
-- Parent node declares `sub_agents=["gcu-node-id"]` and calls `delegate_to_sub_agent(agent_id="gcu-node-id", task="...")`
-- GCU nodes set `max_node_visits=1` (single execution per delegation), `client_facing=False`
-- GCU nodes use `output_keys=["result"]` and return structured JSON via `set_output("result", ...)`
-
-## GCU Node Definition Template
-
-```python
-gcu_browser_node = NodeSpec(
- id="gcu-browser-worker",
- name="Browser Worker",
- description="Browser subagent that does X.",
- node_type="gcu",
- client_facing=False,
- max_node_visits=1,
- input_keys=[],
- output_keys=["result"],
- tools=[], # Auto-populated with all browser tools
- system_prompt="""\
-You are a browser agent. Your job: [specific task].
-
-## Workflow
-1. browser_start (only if no browser is running yet)
-2. browser_open(url=TARGET_URL) — note the returned targetId
-3. browser_snapshot to read the page
-4. [task-specific steps]
-5. set_output("result", JSON)
-
-## Output format
-set_output("result", JSON) with:
-- [field]: [type and description]
-""",
-)
+```
+1. Use browser_snapshot() to read page content (NOT browser_get_text)
+2. Use browser_wait(seconds=2-3) after navigation for page load
+3. If you hit an auth wall, call set_output with an error and move on
+4. Keep tool calls per turn <= 10 for reliability
```
-## Parent Node Template (orchestrating GCU subagents)
-
-```python
-orchestrator_node = NodeSpec(
- id="orchestrator",
- ...
- node_type="event_loop",
- sub_agents=["gcu-browser-worker"],
- system_prompt="""\
-...
-delegate_to_sub_agent(
- agent_id="gcu-browser-worker",
- task="Navigate to [URL]. Do [specific task]. Return JSON with [fields]."
-)
-...
-""",
- tools=[], # Orchestrator doesn't need browser tools
-)
-```
-
-## mcp_servers.json with GCU
+## Example
```json
{
- "hive-tools": { ... },
- "gcu-tools": {
- "transport": "stdio",
- "command": "uv",
- "args": ["run", "python", "-m", "gcu.server", "--stdio"],
- "cwd": "../../tools",
- "description": "GCU tools for browser automation"
- }
+ "id": "scan-profiles",
+ "name": "Scan LinkedIn Profiles",
+ "description": "Navigate LinkedIn search results and collect profile data",
+ "tools": {"policy": "all"},
+ "input_keys": ["search_url"],
+ "output_keys": ["profiles"],
+ "system_prompt": "Navigate to the search URL, paginate through results..."
}
```
-Note: `gcu-tools` is auto-added if any node uses `node_type="gcu"`, but including it explicitly is fine.
-
-## GCU System Prompt Best Practices
-
-Key rules to bake into GCU node prompts:
-
-- Prefer `browser_snapshot` over `browser_get_text("body")` — compact accessibility tree vs 100KB+ raw HTML
-- Always `browser_wait` after navigation
-- Use large scroll amounts (~2000-5000) for lazy-loaded content
-- For spillover files, use `run_command` with grep, not `read_file`
-- If auth wall detected, report immediately — don't attempt login
-- Keep tool calls per turn ≤10
-- Tab isolation: when browser is already running, use `browser_open(background=true)` and pass `target_id` to every call
-
-## Multiple Concurrent GCU Subagents
-
-When a task can be parallelized across multiple sites or profiles, declare a distinct GCU
-node for each and invoke them all in the same LLM turn. The framework batches all
-`delegate_to_sub_agent` calls made in one turn and runs them with `asyncio.gather`, so
-they execute concurrently — not sequentially.
-
-**Each GCU subagent automatically gets its own isolated browser context** — no `profile=`
-argument is needed in tool calls. The framework derives a unique profile from the subagent's
-node ID and instance counter and injects it via an asyncio `ContextVar` before the subagent
-runs.
-
-### Example: three sites in parallel
-
-```python
-# Three distinct GCU nodes
-gcu_site_a = NodeSpec(id="gcu-site-a", node_type="gcu", ...)
-gcu_site_b = NodeSpec(id="gcu-site-b", node_type="gcu", ...)
-gcu_site_c = NodeSpec(id="gcu-site-c", node_type="gcu", ...)
-
-orchestrator = NodeSpec(
- id="orchestrator",
- node_type="event_loop",
- sub_agents=["gcu-site-a", "gcu-site-b", "gcu-site-c"],
- system_prompt="""\
-Call all three subagents in a single response to run them in parallel:
- delegate_to_sub_agent(agent_id="gcu-site-a", task="Scrape prices from site A")
- delegate_to_sub_agent(agent_id="gcu-site-b", task="Scrape prices from site B")
- delegate_to_sub_agent(agent_id="gcu-site-c", task="Scrape prices from site C")
-""",
-)
+Connected via regular edges:
+```
+search-setup -> scan-profiles -> process-results
```
-
-**Rules:**
-- Use distinct node IDs for each concurrent task — sharing an ID shares the browser context.
-- The GCU node prompts do not need to mention `profile=`; isolation is automatic.
-- Cleanup is automatic at session end, but GCU nodes can call `browser_stop()` explicitly
- if they want to release resources mid-run.
-
-## GCU Anti-Patterns
-
-- Using `browser_screenshot` to read text (use `browser_snapshot` instead; screenshots are for visual context only)
-- Re-navigating after scrolling (resets scroll position)
-- Attempting login on auth walls
-- Forgetting `target_id` in multi-tab scenarios
-- Putting browser tools directly on `event_loop` nodes instead of using GCU subagent pattern
-- Making GCU nodes `client_facing=True` (they should be autonomous subagents)
diff --git a/core/framework/agents/queen/reflection_agent.py b/core/framework/agents/queen/reflection_agent.py
index 4bb20d64..d2033717 100644
--- a/core/framework/agents/queen/reflection_agent.py
+++ b/core/framework/agents/queen/reflection_agent.py
@@ -2,7 +2,7 @@
A lightweight side agent that runs after each queen LLM turn. It inspects
recent conversation messages and extracts durable user knowledge into
-individual memory files in ``~/.hive/queen/global_memory/``.
+individual memory files in ``~/.hive/memories/global/``.
Two reflection types:
- **Short reflection**: after conversational queen turns. Distills
@@ -493,7 +493,7 @@ async def subscribe_reflection_triggers(
Call this once during queen setup. Returns a list of event-bus
subscription IDs for cleanup during session teardown.
"""
- from framework.runtime.event_bus import EventType
+ from framework.host.event_bus import EventType
mem_dir = memory_dir or global_memory_dir()
_lock = asyncio.Lock()
diff --git a/core/framework/agents/queen/tests/conftest.py b/core/framework/agents/queen/tests/conftest.py
index de518df2..d34d2bf2 100644
--- a/core/framework/agents/queen/tests/conftest.py
+++ b/core/framework/agents/queen/tests/conftest.py
@@ -22,10 +22,10 @@ def mock_mode():
@pytest_asyncio.fixture(scope="session")
async def runner(tmp_path_factory, mock_mode):
- from framework.runner.runner import AgentRunner
+ from framework.loader.agent_loader import AgentLoader
storage = tmp_path_factory.mktemp("agent_storage")
- r = AgentRunner.load(AGENT_PATH, mock_mode=mock_mode, storage_path=storage)
+ r = AgentLoader.load(AGENT_PATH, mock_mode=mock_mode, storage_path=storage)
r._setup()
yield r
await r.cleanup_async()
diff --git a/core/framework/cli.py b/core/framework/cli.py
index e7752922..86c51cd1 100644
--- a/core/framework/cli.py
+++ b/core/framework/cli.py
@@ -79,7 +79,7 @@ def main():
subparsers = parser.add_subparsers(dest="command", required=True)
# Register runner commands (run, info, validate, list, shell)
- from framework.runner.cli import register_commands
+ from framework.loader.cli import register_commands
register_commands(subparsers)
@@ -99,7 +99,7 @@ def main():
register_debugger_commands(subparsers)
# Register MCP registry commands (mcp install, mcp add, ...)
- from framework.runner.mcp_registry_cli import register_mcp_commands
+ from framework.loader.mcp_registry_cli import register_mcp_commands
register_mcp_commands(subparsers)
diff --git a/core/framework/config.py b/core/framework/config.py
index 095f49fc..341f0ed4 100644
--- a/core/framework/config.py
+++ b/core/framework/config.py
@@ -12,13 +12,47 @@ from dataclasses import dataclass, field
from pathlib import Path
from typing import Any
-from framework.graph.edge import DEFAULT_MAX_TOKENS
+from framework.orchestrator.edge import DEFAULT_MAX_TOKENS
+
+# ---------------------------------------------------------------------------
+# Hive home directory structure
+# ---------------------------------------------------------------------------
+
+HIVE_HOME = Path.home() / ".hive"
+QUEENS_DIR = HIVE_HOME / "agents" / "queens"
+COLONIES_DIR = HIVE_HOME / "colonies"
+MEMORIES_DIR = HIVE_HOME / "memories"
+
+
+def queen_dir(queen_name: str = "default") -> Path:
+ """Return the storage directory for a named queen agent."""
+ return QUEENS_DIR / queen_name
+
+
+def colony_dir(colony_name: str) -> Path:
+ """Return the directory for a named colony."""
+ return COLONIES_DIR / colony_name
+
+
+def memory_dir(scope: str, name: str | None = None) -> Path:
+ """Return memory dir for a scope.
+
+ Examples::
+
+ memory_dir("global") -> ~/.hive/memories/global
+ memory_dir("colonies", "my_agent") -> ~/.hive/memories/colonies/my_agent
+ memory_dir("agents/queens", "default")-> ~/.hive/memories/agents/queens/default
+ memory_dir("agents", "worker_name") -> ~/.hive/memories/agents/worker_name
+ """
+ base = MEMORIES_DIR / scope
+ return base / name if name else base
+
# ---------------------------------------------------------------------------
# Low-level config file access
# ---------------------------------------------------------------------------
-HIVE_CONFIG_FILE = Path.home() / ".hive" / "configuration.json"
+HIVE_CONFIG_FILE = HIVE_HOME / "configuration.json"
# Hive LLM router endpoint (Anthropic-compatible).
# litellm's Anthropic handler appends /v1/messages, so this is just the base host.
@@ -130,7 +164,7 @@ def get_worker_api_key() -> str | None:
# Worker-specific subscription / env var
if worker_llm.get("use_claude_code_subscription"):
try:
- from framework.runner.runner import get_claude_code_token
+ from framework.loader.agent_loader import get_claude_code_token
token = get_claude_code_token()
if token:
@@ -140,7 +174,7 @@ def get_worker_api_key() -> str | None:
if worker_llm.get("use_codex_subscription"):
try:
- from framework.runner.runner import get_codex_token
+ from framework.loader.agent_loader import get_codex_token
token = get_codex_token()
if token:
@@ -150,7 +184,7 @@ def get_worker_api_key() -> str | None:
if worker_llm.get("use_kimi_code_subscription"):
try:
- from framework.runner.runner import get_kimi_code_token
+ from framework.loader.agent_loader import get_kimi_code_token
token = get_kimi_code_token()
if token:
@@ -160,7 +194,7 @@ def get_worker_api_key() -> str | None:
if worker_llm.get("use_antigravity_subscription"):
try:
- from framework.runner.runner import get_antigravity_token
+ from framework.loader.agent_loader import get_antigravity_token
token = get_antigravity_token()
if token:
@@ -216,7 +250,7 @@ def get_worker_llm_extra_kwargs() -> dict[str, Any]:
"User-Agent": "CodexBar",
}
try:
- from framework.runner.runner import get_codex_account_id
+ from framework.loader.agent_loader import get_codex_account_id
account_id = get_codex_account_id()
if account_id:
@@ -263,22 +297,43 @@ def get_max_context_tokens() -> int:
return get_hive_config().get("llm", {}).get("max_context_tokens", DEFAULT_MAX_CONTEXT_TOKENS)
+def get_api_keys() -> list[str] | None:
+ """Return a list of API keys if ``api_keys`` is configured, else ``None``.
+
+ This supports key-pool rotation: configure multiple keys in
+ ``~/.hive/configuration.json`` under ``llm.api_keys`` and the
+ :class:`~framework.llm.key_pool.KeyPool` will rotate through them.
+ """
+ llm = get_hive_config().get("llm", {})
+ keys = llm.get("api_keys")
+ if keys and isinstance(keys, list) and len(keys) > 0:
+ return [k for k in keys if k] # filter empties
+ return None
+
+
def get_api_key() -> str | None:
"""Return the API key, supporting env var, Claude Code subscription, Codex, and ZAI Code.
Priority:
+ 0. Explicit key pool (``api_keys`` list) -- returns first key for
+ single-key callers; full pool available via :func:`get_api_keys`.
1. Claude Code subscription (``use_claude_code_subscription: true``)
reads the OAuth token from ``~/.claude/.credentials.json``.
2. Codex subscription (``use_codex_subscription: true``)
reads the OAuth token from macOS Keychain or ``~/.codex/auth.json``.
3. Environment variable named in ``api_key_env_var``.
"""
+ # If an explicit key pool is configured, use the first key.
+ pool_keys = get_api_keys()
+ if pool_keys:
+ return pool_keys[0]
+
llm = get_hive_config().get("llm", {})
# Claude Code subscription: read OAuth token directly
if llm.get("use_claude_code_subscription"):
try:
- from framework.runner.runner import get_claude_code_token
+ from framework.loader.agent_loader import get_claude_code_token
token = get_claude_code_token()
if token:
@@ -289,7 +344,7 @@ def get_api_key() -> str | None:
# Codex subscription: read OAuth token from Keychain / auth.json
if llm.get("use_codex_subscription"):
try:
- from framework.runner.runner import get_codex_token
+ from framework.loader.agent_loader import get_codex_token
token = get_codex_token()
if token:
@@ -300,7 +355,7 @@ def get_api_key() -> str | None:
# Kimi Code subscription: read API key from ~/.kimi/config.toml
if llm.get("use_kimi_code_subscription"):
try:
- from framework.runner.runner import get_kimi_code_token
+ from framework.loader.agent_loader import get_kimi_code_token
token = get_kimi_code_token()
if token:
@@ -311,7 +366,7 @@ def get_api_key() -> str | None:
# Antigravity subscription: read OAuth token from accounts JSON
if llm.get("use_antigravity_subscription"):
try:
- from framework.runner.runner import get_antigravity_token
+ from framework.loader.agent_loader import get_antigravity_token
token = get_antigravity_token()
if token:
@@ -468,7 +523,7 @@ def get_llm_extra_kwargs() -> dict[str, Any]:
"User-Agent": "CodexBar",
}
try:
- from framework.runner.runner import get_codex_account_id
+ from framework.loader.agent_loader import get_codex_account_id
account_id = get_codex_account_id()
if account_id:
diff --git a/core/framework/credentials/setup.py b/core/framework/credentials/setup.py
index dfee6bf4..1bb4e90d 100644
--- a/core/framework/credentials/setup.py
+++ b/core/framework/credentials/setup.py
@@ -36,7 +36,7 @@ from pathlib import Path
from typing import TYPE_CHECKING, Any
if TYPE_CHECKING:
- from framework.graph import NodeSpec
+ from framework.orchestrator import NodeSpec
logger = logging.getLogger(__name__)
@@ -533,7 +533,9 @@ class CredentialSetupSession:
def load_agent_nodes(agent_path: str | Path) -> list:
- """Load NodeSpec list from an agent's agent.py or agent.json.
+ """Load NodeSpec list from an agent directory.
+
+ Checks agent.json (declarative) first, then agent.py (legacy).
Args:
agent_path: Path to agent directory.
@@ -542,16 +544,28 @@ def load_agent_nodes(agent_path: str | Path) -> list:
List of NodeSpec objects (empty list if agent can't be loaded).
"""
agent_path = Path(agent_path)
+ agent_json_file = agent_path / "agent.json"
agent_py = agent_path / "agent.py"
- agent_json = agent_path / "agent.json"
- if agent_py.exists():
+ if agent_json_file.exists():
+ return _load_nodes_from_json_declarative(agent_json_file)
+ elif agent_py.exists():
return _load_nodes_from_python_agent(agent_path)
- elif agent_json.exists():
- return _load_nodes_from_json_agent(agent_json)
return []
+def _load_nodes_from_json_declarative(agent_json: Path) -> list:
+ """Load nodes from a declarative JSON agent."""
+ try:
+ from framework.loader.agent_loader import load_agent_config
+
+ data = json.loads(agent_json.read_text(encoding="utf-8"))
+ graph, _ = load_agent_config(data)
+ return list(graph.nodes)
+ except Exception:
+ return []
+
+
def _load_nodes_from_python_agent(agent_path: Path) -> list:
"""Load nodes from a Python-based agent."""
import importlib.util
@@ -590,7 +604,7 @@ def _load_nodes_from_json_agent(agent_json: Path) -> list:
with open(agent_json, encoding="utf-8-sig") as f:
data = json.load(f)
- from framework.graph import NodeSpec
+ from framework.orchestrator import NodeSpec
nodes_data = data.get("graph", {}).get("nodes", [])
nodes = []
diff --git a/core/framework/graph/__init__.py b/core/framework/graph/__init__.py
deleted file mode 100644
index a6751ddf..00000000
--- a/core/framework/graph/__init__.py
+++ /dev/null
@@ -1,65 +0,0 @@
-"""Graph structures: Goals, Nodes, Edges, and Execution."""
-
-from framework.graph.context import GraphContext
-from framework.graph.context_handoff import ContextHandoff, HandoffContext
-from framework.graph.conversation import ConversationStore, Message, NodeConversation
-from framework.graph.edge import DEFAULT_MAX_TOKENS, EdgeCondition, EdgeSpec, GraphSpec
-from framework.graph.event_loop_node import (
- EventLoopNode,
- JudgeProtocol,
- JudgeVerdict,
- LoopConfig,
- OutputAccumulator,
-)
-from framework.graph.executor import GraphExecutor
-from framework.graph.goal import Constraint, Goal, GoalStatus, SuccessCriterion
-from framework.graph.node import NodeContext, NodeProtocol, NodeResult, NodeSpec
-from framework.graph.worker_agent import (
- Activation,
- FanOutTag,
- FanOutTracker,
- WorkerAgent,
- WorkerCompletion,
- WorkerLifecycle,
-)
-
-__all__ = [
- # Goal
- "Goal",
- "SuccessCriterion",
- "Constraint",
- "GoalStatus",
- # Node
- "NodeSpec",
- "NodeContext",
- "NodeResult",
- "NodeProtocol",
- # Edge
- "EdgeSpec",
- "EdgeCondition",
- "GraphSpec",
- "DEFAULT_MAX_TOKENS",
- # Executor
- "GraphExecutor",
- # Conversation
- "NodeConversation",
- "ConversationStore",
- "Message",
- # Event Loop
- "EventLoopNode",
- "LoopConfig",
- "OutputAccumulator",
- "JudgeProtocol",
- "JudgeVerdict",
- # Context Handoff
- "ContextHandoff",
- "HandoffContext",
- # Worker Agent
- "WorkerAgent",
- "WorkerLifecycle",
- "WorkerCompletion",
- "Activation",
- "FanOutTag",
- "FanOutTracker",
- "GraphContext",
-]
diff --git a/core/framework/graph/event_loop/__init__.py b/core/framework/graph/event_loop/__init__.py
deleted file mode 100644
index 1ec8a803..00000000
--- a/core/framework/graph/event_loop/__init__.py
+++ /dev/null
@@ -1,6 +0,0 @@
-"""EventLoopNode subpackage — modular components of the event loop orchestrator.
-
-All public symbols are re-exported by the parent ``event_loop_node.py`` for
-backward compatibility. Internal consumers may import directly from these
-submodules for clarity.
-"""
diff --git a/core/framework/graph/event_loop/subagent_executor.py b/core/framework/graph/event_loop/subagent_executor.py
deleted file mode 100644
index cd5c207c..00000000
--- a/core/framework/graph/event_loop/subagent_executor.py
+++ /dev/null
@@ -1,370 +0,0 @@
-"""Subagent execution for the event loop.
-
-Handles the full subagent lifecycle: validation, context setup, tool filtering,
-conversation store derivation, execution, and cleanup.
-"""
-
-from __future__ import annotations
-
-import json
-import logging
-import time
-from collections.abc import Awaitable, Callable
-from pathlib import Path
-from typing import TYPE_CHECKING, Any
-
-from framework.graph.conversation import ConversationStore
-from framework.graph.event_loop.judge_pipeline import SubagentJudge
-from framework.graph.event_loop.types import LoopConfig, OutputAccumulator
-from framework.graph.node import DataBuffer, NodeContext
-from framework.llm.provider import ToolResult, ToolUse
-from framework.runner.tool_registry import ToolRegistry
-from framework.runtime.event_bus import EventBus
-
-if TYPE_CHECKING:
- from framework.graph.event_loop_node import EventLoopNode
-
-logger = logging.getLogger(__name__)
-
-
-async def execute_subagent(
- ctx: NodeContext,
- agent_id: str,
- task: str,
- *,
- config: LoopConfig,
- event_loop_node_cls: type[EventLoopNode],
- escalation_receiver_cls: Callable[[], Any],
- accumulator: OutputAccumulator | None = None,
- event_bus: EventBus | None = None,
- tool_executor: Callable[[ToolUse], ToolResult | Awaitable[ToolResult]] | None = None,
- conversation_store: ConversationStore | None = None,
- subagent_instance_counter: dict[str, int] | None = None,
-) -> ToolResult:
- """Execute a subagent and return the result as a ToolResult.
-
- The subagent:
- - Gets a fresh conversation with just the task
- - Has read-only access to the parent's readable memory
- - Cannot delegate to its own subagents (prevents recursion)
- - Returns its output in structured JSON format
-
- Args:
- ctx: Parent node's context (for memory, tools, LLM access).
- agent_id: The node ID of the subagent to invoke.
- task: The task description to give the subagent.
- accumulator: Parent's OutputAccumulator.
- event_bus: EventBus for lifecycle events.
- config: LoopConfig for iteration/tool limits.
- tool_executor: Tool executor callable.
- conversation_store: Parent conversation store (for deriving subagent store).
- subagent_instance_counter: Mutable counter dict for unique subagent paths.
-
- Returns:
- ToolResult with structured JSON output.
- """
- # Log subagent invocation start
- logger.info(
- "\n" + "=" * 60 + "\n"
- "🤖 SUBAGENT INVOCATION\n"
- "=" * 60 + "\n"
- "Parent Node: %s\n"
- "Subagent ID: %s\n"
- "Task: %s\n" + "=" * 60,
- ctx.node_id,
- agent_id,
- task[:500] + "..." if len(task) > 500 else task,
- )
-
- # 1. Validate agent exists in registry
- if agent_id not in ctx.node_registry:
- return ToolResult(
- tool_use_id="",
- content=json.dumps(
- {
- "message": f"Sub-agent '{agent_id}' not found in registry",
- "data": None,
- "metadata": {"agent_id": agent_id, "success": False, "error": "not_found"},
- }
- ),
- is_error=True,
- )
-
- subagent_spec = ctx.node_registry[agent_id]
-
- # 2. Create read-only memory snapshot
- parent_data = ctx.buffer.read_all()
-
- # Merge in-flight outputs from the parent's accumulator.
- if accumulator:
- for key, value in accumulator.to_dict().items():
- if key not in parent_data:
- parent_data[key] = value
-
- subagent_buffer = DataBuffer()
- for key, value in parent_data.items():
- subagent_buffer.write(key, value, validate=False)
-
- read_keys = set(parent_data.keys()) | set(subagent_spec.input_keys or [])
- scoped_buffer = subagent_buffer.with_permissions(
- read_keys=list(read_keys),
- write_keys=[], # Read-only!
- )
-
- # 2b. Compute instance counter early so the callback and child context
- # share the same stable node_id for this subagent invocation.
- if subagent_instance_counter is not None:
- subagent_instance_counter.setdefault(agent_id, 0)
- subagent_instance_counter[agent_id] += 1
- subagent_instance = str(subagent_instance_counter[agent_id])
- else:
- subagent_instance = "1"
-
- if subagent_instance == "1":
- sa_node_id = f"{ctx.node_id}:subagent:{agent_id}"
- else:
- sa_node_id = f"{ctx.node_id}:subagent:{agent_id}:{subagent_instance}"
-
- # 2c. Set up report callback (one-way channel to parent / event bus)
- subagent_reports: list[dict] = []
-
- async def _report_callback(
- message: str,
- data: dict | None = None,
- *,
- wait_for_response: bool = False,
- ) -> str | None:
- subagent_reports.append({"message": message, "data": data, "timestamp": time.time()})
- if event_bus:
- await event_bus.emit_subagent_report(
- stream_id=ctx.node_id,
- node_id=sa_node_id,
- subagent_id=agent_id,
- message=message,
- data=data,
- execution_id=ctx.execution_id,
- )
-
- if not wait_for_response:
- return None
-
- if not event_bus:
- logger.warning(
- "Subagent '%s' requested user response but no event_bus available",
- agent_id,
- )
- return None
-
- # Create isolated receiver and register for input routing
- import uuid
-
- escalation_id = f"{ctx.node_id}:escalation:{uuid.uuid4().hex[:8]}"
- receiver = escalation_receiver_cls()
- registry = ctx.shared_node_registry
-
- registry[escalation_id] = receiver
- try:
- await event_bus.emit_escalation_requested(
- stream_id=ctx.stream_id or ctx.node_id,
- node_id=escalation_id,
- reason=f"Subagent report (wait_for_response) from {agent_id}",
- context=message,
- execution_id=ctx.execution_id,
- )
- # Block until queen responds
- return await receiver.wait()
- finally:
- registry.pop(escalation_id, None)
-
- # 3. Filter tools for subagent
- subagent_tool_names = set(subagent_spec.tools or [])
- tool_source = ctx.all_tools if ctx.all_tools else ctx.available_tools
-
- # GCU auto-population
- if subagent_spec.node_type == "gcu" and not subagent_tool_names:
- subagent_tools = [t for t in tool_source if t.name != "delegate_to_sub_agent"]
- else:
- subagent_tools = [
- t
- for t in tool_source
- if t.name in subagent_tool_names and t.name != "delegate_to_sub_agent"
- ]
-
- missing = subagent_tool_names - {t.name for t in subagent_tools}
- if missing:
- logger.warning(
- "Subagent '%s' requested tools not found in catalog: %s",
- agent_id,
- sorted(missing),
- )
-
- logger.info(
- "📦 Subagent '%s' configuration:\n"
- " - System prompt: %s\n"
- " - Tools available (%d): %s\n"
- " - Memory keys inherited: %s",
- agent_id,
- (subagent_spec.system_prompt[:200] + "...")
- if subagent_spec.system_prompt and len(subagent_spec.system_prompt) > 200
- else subagent_spec.system_prompt,
- len(subagent_tools),
- [t.name for t in subagent_tools],
- list(parent_data.keys()),
- )
-
- # 4. Build subagent context
- max_iter = min(config.max_iterations, 10)
- subagent_ctx = NodeContext(
- runtime=ctx.runtime,
- node_id=sa_node_id,
- node_spec=subagent_spec,
- buffer=scoped_buffer,
- input_data={"task": task, **parent_data},
- llm=ctx.llm,
- available_tools=subagent_tools,
- goal_context=(
- f"Your specific task: {task}\n\n"
- f"COMPLETION REQUIREMENTS:\n"
- f"When your task is done, you MUST call set_output() "
- f"for each required key: {subagent_spec.output_keys}\n"
- f"Alternatively, call report_to_parent(mark_complete=true) "
- f"with your findings in message/data.\n"
- + (
- "Before finishing, call browser_close_finished() to clean up your browser tabs.\n"
- if subagent_spec.node_type == "gcu"
- else ""
- )
- + f"You have a maximum of {max_iter} turns to complete this task."
- ),
- goal=ctx.goal,
- max_tokens=ctx.max_tokens,
- runtime_logger=ctx.runtime_logger,
- is_subagent_mode=True, # Prevents nested delegation
- report_callback=_report_callback,
- node_registry={}, # Empty - no nested subagents
- shared_node_registry=ctx.shared_node_registry, # For escalation routing
- )
-
- # 5. Create and execute subagent EventLoopNode
- subagent_conv_store = None
- if conversation_store is not None:
- from framework.storage.conversation_store import FileConversationStore
-
- parent_base = getattr(conversation_store, "_base", None)
- if parent_base is not None:
- conversations_dir = parent_base.parent
- subagent_dir_name = f"{agent_id}-{subagent_instance}"
- subagent_store_path = conversations_dir / subagent_dir_name
- subagent_conv_store = FileConversationStore(base_path=subagent_store_path)
-
- # Derive a subagent-scoped spillover dir
- subagent_spillover = None
- if config.spillover_dir:
- subagent_spillover = str(Path(config.spillover_dir) / agent_id / subagent_instance)
-
- subagent_node = event_loop_node_cls(
- event_bus=event_bus,
- judge=SubagentJudge(task=task, max_iterations=max_iter),
- config=LoopConfig(
- max_iterations=max_iter,
- max_tool_calls_per_turn=config.max_tool_calls_per_turn,
- tool_call_overflow_margin=config.tool_call_overflow_margin,
- max_context_tokens=config.max_context_tokens,
- stall_detection_threshold=config.stall_detection_threshold,
- max_tool_result_chars=config.max_tool_result_chars,
- spillover_dir=subagent_spillover,
- ),
- tool_executor=tool_executor,
- conversation_store=subagent_conv_store,
- )
-
- # Each subagent instance gets its own unique browser profile so concurrent
- # subagents don't share tab groups. The profile is set as execution context
- # so the tool registry auto-injects it into every browser_* MCP tool call.
- _gcu_profile = f"{agent_id}:{subagent_instance}"
- _profile_token = ToolRegistry.set_execution_context(profile=_gcu_profile)
-
- try:
- logger.info("🚀 Starting subagent '%s' execution...", agent_id)
- start_time = time.time()
- result = await subagent_node.execute(subagent_ctx)
- latency_ms = int((time.time() - start_time) * 1000)
-
- separator = "-" * 60
- logger.info(
- "\n%s\n"
- "✅ SUBAGENT '%s' COMPLETED\n"
- "%s\n"
- "Success: %s\n"
- "Latency: %dms\n"
- "Tokens used: %s\n"
- "Output keys: %s\n"
- "%s",
- separator,
- agent_id,
- separator,
- result.success,
- latency_ms,
- result.tokens_used,
- list(result.output.keys()) if result.output else [],
- separator,
- )
-
- result_json = {
- "message": (
- f"Sub-agent '{agent_id}' completed successfully"
- if result.success
- else f"Sub-agent '{agent_id}' failed: {result.error}"
- ),
- "data": result.output,
- "reports": subagent_reports if subagent_reports else None,
- "metadata": {
- "agent_id": agent_id,
- "success": result.success,
- "tokens_used": result.tokens_used,
- "latency_ms": latency_ms,
- "report_count": len(subagent_reports),
- },
- }
-
- return ToolResult(
- tool_use_id="",
- content=json.dumps(result_json, indent=2, default=str),
- is_error=not result.success,
- )
-
- except Exception as e:
- logger.exception(
- "\n" + "!" * 60 + "\n❌ SUBAGENT '%s' FAILED\nError: %s\n" + "!" * 60,
- agent_id,
- str(e),
- )
- result_json = {
- "message": f"Sub-agent '{agent_id}' raised exception: {e}",
- "data": None,
- "metadata": {
- "agent_id": agent_id,
- "success": False,
- "error": str(e),
- },
- }
- return ToolResult(
- tool_use_id="",
- content=json.dumps(result_json, indent=2),
- is_error=True,
- )
- finally:
- ToolRegistry.reset_execution_context(_profile_token)
- # Close the tab group this subagent created, if any.
- try:
- from gcu.browser.bridge import get_bridge
- from gcu.browser.tools.lifecycle import _contexts
-
- bridge = get_bridge()
- ctx_entry = _contexts.pop(_gcu_profile, None)
- if bridge and bridge.is_connected and ctx_entry:
- group_id = ctx_entry.get("groupId")
- if group_id is not None:
- await bridge.destroy_context(group_id)
- except Exception:
- pass
diff --git a/core/framework/host/__init__.py b/core/framework/host/__init__.py
new file mode 100644
index 00000000..c432b680
--- /dev/null
+++ b/core/framework/host/__init__.py
@@ -0,0 +1,11 @@
+"""Host layer -- how agents are triggered and hosted."""
+
+from framework.host.agent_host import ( # noqa: F401
+ AgentHost,
+ AgentRuntimeConfig,
+)
+from framework.host.event_bus import AgentEvent, EventBus, EventType # noqa: F401
+from framework.host.execution_manager import ( # noqa: F401
+ EntryPointSpec,
+ ExecutionManager,
+)
diff --git a/core/framework/runtime/agent_runtime.py b/core/framework/host/agent_host.py
similarity index 74%
rename from core/framework/runtime/agent_runtime.py
rename to core/framework/host/agent_host.py
index 320e5371..8cb367b5 100644
--- a/core/framework/runtime/agent_runtime.py
+++ b/core/framework/host/agent_host.py
@@ -16,20 +16,21 @@ from datetime import datetime
from pathlib import Path
from typing import TYPE_CHECKING, Any
-from framework.graph.checkpoint_config import CheckpointConfig
-from framework.graph.executor import ExecutionResult
-from framework.runtime.event_bus import EventBus
-from framework.runtime.execution_stream import EntryPointSpec, ExecutionStream
-from framework.runtime.outcome_aggregator import OutcomeAggregator
-from framework.runtime.runtime_log_store import RuntimeLogStore
-from framework.runtime.shared_state import SharedBufferManager
+from framework.orchestrator.checkpoint_config import CheckpointConfig
+from framework.orchestrator.orchestrator import ExecutionResult
+from framework.host.event_bus import EventBus
+from framework.host.execution_manager import EntryPointSpec, ExecutionManager
+from framework.host.outcome_aggregator import OutcomeAggregator
+from framework.tracker.runtime_log_store import RuntimeLogStore
+from framework.host.shared_state import SharedBufferManager
from framework.storage.concurrent import ConcurrentStorage
from framework.storage.session_store import SessionStore
if TYPE_CHECKING:
- from framework.graph.edge import GraphSpec
- from framework.graph.goal import Goal
+ from framework.orchestrator.edge import GraphSpec
+ from framework.orchestrator.goal import Goal
from framework.llm.provider import LLMProvider, Tool
+ from framework.pipeline.stage import PipelineStage
from framework.skills.manager import SkillsManagerConfig
logger = logging.getLogger(__name__)
@@ -37,7 +38,7 @@ logger = logging.getLogger(__name__)
@dataclass
class AgentRuntimeConfig:
- """Configuration for AgentRuntime."""
+ """Configuration for AgentHost."""
max_concurrent_executions: int = 100
cache_ttl: float = 60.0
@@ -62,14 +63,14 @@ class _GraphRegistration:
graph: "GraphSpec"
goal: "Goal"
entry_points: dict[str, EntryPointSpec]
- streams: dict[str, ExecutionStream] # ep_id -> stream (NOT namespaced)
+ streams: dict[str, ExecutionManager] # ep_id -> stream (NOT namespaced)
storage_subpath: str # relative to session root, e.g. "graphs/email_agent"
event_subscriptions: list[str] = field(default_factory=list)
timer_tasks: list[asyncio.Task] = field(default_factory=list)
timer_next_fire: dict[str, float] = field(default_factory=dict)
-class AgentRuntime:
+class AgentHost:
"""
Top-level runtime that manages agent lifecycle and concurrent executions.
@@ -142,6 +143,7 @@ class AgentRuntime:
skills_catalog_prompt: str = "",
protocols_prompt: str = "",
skill_dirs: list[str] | None = None,
+ pipeline_stages: "list[PipelineStage] | None" = None,
):
"""
Initialize agent runtime.
@@ -171,6 +173,7 @@ class AgentRuntime:
skills_catalog_prompt: Deprecated. Pre-rendered skills catalog.
protocols_prompt: Deprecated. Pre-rendered operational protocols.
"""
+ from framework.pipeline.runner import PipelineRunner
from framework.skills.manager import SkillsManager
self.graph = graph
@@ -180,6 +183,14 @@ class AgentRuntime:
self._checkpoint_config = checkpoint_config
self.accounts_prompt = accounts_prompt
+ # Pipeline middleware: runs before every trigger() dispatch.
+ # Accepts either pre-built stage objects or loads from config.
+ if pipeline_stages:
+ self._pipeline = PipelineRunner(pipeline_stages)
+ else:
+ self._pipeline = self._load_pipeline_from_config()
+
+
# --- Skill lifecycle: runtime owns the SkillsManager ---
if skills_manager_config is not None:
# New path: config-driven, runtime handles loading
@@ -246,7 +257,7 @@ class AgentRuntime:
# Entry points and streams (primary graph)
self._entry_points: dict[str, EntryPointSpec] = {}
- self._streams: dict[str, ExecutionStream] = {}
+ self._streams: dict[str, ExecutionManager] = {}
# Webhook server (created on start if webhook_routes configured)
self._webhook_server: Any = None
@@ -270,7 +281,7 @@ class AgentRuntime:
self.intro_message: str = ""
# ------------------------------------------------------------------
- # Skill prompt accessors (read by ExecutionStream constructors)
+ # Skill prompt accessors (read by ExecutionManager constructors)
# ------------------------------------------------------------------
@property
@@ -335,9 +346,14 @@ class AgentRuntime:
# Start storage
await self._storage.start()
- # Create streams for each entry point
+ # Initialize pipeline stages FIRST -- they inject LLM, tools,
+ # credentials, and skills into the host before streams are created.
+ await self._pipeline.initialize_all()
+ self._apply_pipeline_results()
+
+ # Create streams for each entry point (uses pipeline results)
for ep_id, spec in self._entry_points.items():
- stream = ExecutionStream(
+ stream = ExecutionManager(
stream_id=ep_id,
entry_spec=spec,
graph=self.graph,
@@ -370,7 +386,7 @@ class AgentRuntime:
# Start webhook server if routes are configured
if self._config.webhook_routes:
- from framework.runtime.webhook_server import (
+ from framework.host.webhook_server import (
WebhookRoute,
WebhookServer,
WebhookServerConfig,
@@ -394,7 +410,7 @@ class AgentRuntime:
await self._webhook_server.start()
# Subscribe event-driven entry points to EventBus
- from framework.runtime.event_bus import EventType as _ET
+ from framework.host.event_bus import EventType as _ET
for ep_id, spec in self._entry_points.items():
if spec.trigger_type != "event":
@@ -458,332 +474,345 @@ class AgentRuntime:
self._event_subscriptions.append(sub_id)
# Start timer-driven entry points
- for ep_id, spec in self._entry_points.items():
- if spec.trigger_type != "timer":
- continue
+ await self._start_timers()
- tc = spec.trigger_config
- cron_expr = tc.get("cron")
- _raw_interval = tc.get("interval_minutes")
- interval = float(_raw_interval) if _raw_interval is not None else None
- run_immediately = tc.get("run_immediately", False)
-
- if cron_expr:
- # Cron expression mode — takes priority over interval_minutes
- try:
- from croniter import croniter
- except ImportError as e:
- raise RuntimeError(
- "croniter is required for cron-based entry points. "
- "Install it with: uv pip install croniter"
- ) from e
-
- try:
- if not croniter.is_valid(cron_expr):
- raise ValueError(f"Invalid cron expression: {cron_expr}")
- except ValueError as e:
- logger.warning(
- "Entry point '%s' has invalid cron config: %s",
- ep_id,
- e,
- )
- continue
-
- def _make_cron_timer(
- entry_point_id: str,
- expr: str,
- immediate: bool,
- idle_timeout: float = 300,
- ):
- async def _cron_loop():
- from croniter import croniter
-
- _persistent_session_id: str | None = None
- if not immediate:
- cron = croniter(expr, datetime.now())
- next_dt = cron.get_next(datetime)
- sleep_secs = (next_dt - datetime.now()).total_seconds()
- self._timer_next_fire[entry_point_id] = (
- time.monotonic() + sleep_secs
- )
- await asyncio.sleep(max(0, sleep_secs))
- while self._running:
- # Calculate next fire time upfront (used by skip paths too)
- cron = croniter(expr, datetime.now())
- next_dt = cron.get_next(datetime)
- sleep_secs = (next_dt - datetime.now()).total_seconds()
-
- # Gate: skip tick if timers are explicitly paused
- if self._timers_paused:
- logger.debug(
- "Cron '%s': paused, skipping tick",
- entry_point_id,
- )
- self._timer_next_fire[entry_point_id] = (
- time.monotonic() + sleep_secs
- )
- await asyncio.sleep(max(0, sleep_secs))
- continue
-
- # Gate: skip tick if ANY stream is actively working.
- # If the execution is idle (no LLM/tool activity
- # beyond idle_timeout) let the timer proceed —
- # execute() will cancel the stale execution.
- _any_active = False
- _min_idle = float("inf")
- for _s in self._streams.values():
- if _s.active_execution_ids:
- _any_active = True
- _idle = _s.agent_idle_seconds
- if _idle < _min_idle:
- _min_idle = _idle
- logger.info(
- "Cron '%s': gate — active=%s, idle=%.1fs, timeout=%ds",
- entry_point_id,
- _any_active,
- _min_idle,
- idle_timeout,
- )
- if _any_active and _min_idle < idle_timeout:
- logger.info(
- "Cron '%s': agent actively working, skipping tick",
- entry_point_id,
- )
- self._timer_next_fire[entry_point_id] = (
- time.monotonic() + sleep_secs
- )
- await asyncio.sleep(max(0, sleep_secs))
- continue
-
- self._timer_next_fire.pop(entry_point_id, None)
- try:
- ep_spec = self._entry_points.get(entry_point_id)
- is_isolated = ep_spec and ep_spec.isolation_level == "isolated"
- if is_isolated:
- if _persistent_session_id:
- session_state = {
- "resume_session_id": _persistent_session_id
- }
- else:
- session_state = None
- else:
- session_state = self._get_primary_session_state(
- exclude_entry_point=entry_point_id
- )
- # Gate: skip tick if no active session
- if session_state is None:
- logger.debug(
- "Cron '%s': no active session, skipping",
- entry_point_id,
- )
- self._timer_next_fire[entry_point_id] = (
- time.monotonic() + sleep_secs
- )
- await asyncio.sleep(max(0, sleep_secs))
- continue
-
- exec_id = await self.trigger(
- entry_point_id,
- {
- "event": {
- "source": "timer",
- "reason": "scheduled",
- }
- },
- session_state=session_state,
- )
- if not _persistent_session_id and is_isolated:
- _persistent_session_id = exec_id
- logger.info(
- "Cron fired for entry point '%s' (expr: %s)",
- entry_point_id,
- expr,
- )
- except Exception:
- logger.error(
- "Cron trigger failed for '%s'",
- entry_point_id,
- exc_info=True,
- )
- # Calculate next fire from now
- cron = croniter(expr, datetime.now())
- next_dt = cron.get_next(datetime)
- sleep_secs = (next_dt - datetime.now()).total_seconds()
- self._timer_next_fire[entry_point_id] = (
- time.monotonic() + sleep_secs
- )
- await asyncio.sleep(max(0, sleep_secs))
-
- return _cron_loop
-
- task = asyncio.create_task(
- _make_cron_timer(
- ep_id,
- cron_expr,
- run_immediately,
- idle_timeout=float(tc.get("idle_timeout_seconds", 300)),
- )()
- )
- self._timer_tasks.append(task)
- logger.info(
- "Started cron timer for entry point '%s' with expression '%s'%s",
- ep_id,
- cron_expr,
- " (immediate first run)" if run_immediately else "",
- )
-
- elif interval and interval > 0:
- # Fixed interval mode (original behavior)
- def _make_timer(
- entry_point_id: str,
- mins: float,
- immediate: bool,
- idle_timeout: float = 300,
- ):
- async def _timer_loop():
- interval_secs = mins * 60
- _persistent_session_id: str | None = None
- if not immediate:
- self._timer_next_fire[entry_point_id] = (
- time.monotonic() + interval_secs
- )
- await asyncio.sleep(interval_secs)
- while self._running:
- # Gate: skip tick if timers are explicitly paused
- if self._timers_paused:
- logger.debug(
- "Timer '%s': paused, skipping tick",
- entry_point_id,
- )
- self._timer_next_fire[entry_point_id] = (
- time.monotonic() + interval_secs
- )
- await asyncio.sleep(interval_secs)
- continue
-
- # Gate: skip tick if agent is actively working.
- # Gate: skip tick if ANY stream is actively working.
- _any_active = False
- _min_idle = float("inf")
- for _s in self._streams.values():
- if _s.active_execution_ids:
- _any_active = True
- _idle = _s.agent_idle_seconds
- if _idle < _min_idle:
- _min_idle = _idle
- logger.info(
- "Timer '%s': gate — active=%s, idle=%.1fs, timeout=%ds",
- entry_point_id,
- _any_active,
- _min_idle,
- idle_timeout,
- )
- if _any_active and _min_idle < idle_timeout:
- logger.info(
- "Timer '%s': agent actively working, skipping tick",
- entry_point_id,
- )
- self._timer_next_fire[entry_point_id] = (
- time.monotonic() + interval_secs
- )
- await asyncio.sleep(interval_secs)
- continue
-
- self._timer_next_fire.pop(entry_point_id, None)
- try:
- ep_spec = self._entry_points.get(entry_point_id)
- is_isolated = ep_spec and ep_spec.isolation_level == "isolated"
- if is_isolated:
- if _persistent_session_id:
- session_state = {
- "resume_session_id": _persistent_session_id
- }
- else:
- session_state = None
- else:
- session_state = self._get_primary_session_state(
- exclude_entry_point=entry_point_id
- )
- # Gate: skip tick if no active session
- if session_state is None:
- logger.debug(
- "Timer '%s': no active session, skipping",
- entry_point_id,
- )
- self._timer_next_fire[entry_point_id] = (
- time.monotonic() + interval_secs
- )
- await asyncio.sleep(interval_secs)
- continue
-
- exec_id = await self.trigger(
- entry_point_id,
- {
- "event": {
- "source": "timer",
- "reason": "scheduled",
- }
- },
- session_state=session_state,
- )
- if not _persistent_session_id and is_isolated:
- _persistent_session_id = exec_id
- logger.info(
- "Timer fired for entry point '%s' (next in %s min)",
- entry_point_id,
- mins,
- )
- except Exception:
- logger.error(
- "Timer trigger failed for '%s'",
- entry_point_id,
- exc_info=True,
- )
- self._timer_next_fire[entry_point_id] = (
- time.monotonic() + interval_secs
- )
- await asyncio.sleep(interval_secs)
-
- return _timer_loop
-
- task = asyncio.create_task(
- _make_timer(
- ep_id,
- interval,
- run_immediately,
- idle_timeout=float(tc.get("idle_timeout_seconds", 300)),
- )()
- )
- self._timer_tasks.append(task)
- logger.info(
- "Started timer for entry point '%s' every %s min%s",
- ep_id,
- interval,
- " (immediate first run)" if run_immediately else "",
- )
-
- else:
- logger.warning(
- "Entry point '%s' has trigger_type='timer' "
- "but no 'cron' or valid 'interval_minutes' in trigger_config",
- ep_id,
- )
-
- # Register primary graph
- self._graphs[self._graph_id] = _GraphRegistration(
- graph=self.graph,
- goal=self.goal,
- entry_points=dict(self._entry_points),
- streams=dict(self._streams),
- storage_subpath="",
- event_subscriptions=list(self._event_subscriptions),
- timer_tasks=list(self._timer_tasks),
- timer_next_fire=self._timer_next_fire,
- )
+ # Start skill hot-reload watcher (no-op if watchfiles not installed)
+ await self._skills_manager.start_watching()
self._running = True
self._timers_paused = False
- logger.info(f"AgentRuntime started with {len(self._streams)} streams")
+ n_stages = len(self._pipeline.stages)
+ logger.info(
+ "AgentHost started with %d streams, %d pipeline stages",
+ len(self._streams),
+ n_stages,
+ )
+
+ async def _start_timers(self) -> None:
+ """Start timer-driven entry points (extracted from start())."""
+ for ep_id, spec in self._entry_points.items():
+ if spec.trigger_type != "timer":
+ continue
+
+ tc = spec.trigger_config
+ cron_expr = tc.get("cron")
+ _raw_interval = tc.get("interval_minutes")
+ interval = float(_raw_interval) if _raw_interval is not None else None
+ run_immediately = tc.get("run_immediately", False)
+
+ if cron_expr:
+ # Cron expression mode — takes priority over interval_minutes
+ try:
+ from croniter import croniter
+ except ImportError as e:
+ raise RuntimeError(
+ "croniter is required for cron-based entry points. "
+ "Install it with: uv pip install croniter"
+ ) from e
+
+ try:
+ if not croniter.is_valid(cron_expr):
+ raise ValueError(f"Invalid cron expression: {cron_expr}")
+ except ValueError as e:
+ logger.warning(
+ "Entry point '%s' has invalid cron config: %s",
+ ep_id,
+ e,
+ )
+ continue
+
+ def _make_cron_timer(
+ entry_point_id: str,
+ expr: str,
+ immediate: bool,
+ idle_timeout: float = 300,
+ ):
+ async def _cron_loop():
+ from croniter import croniter
+
+ _persistent_session_id: str | None = None
+ if not immediate:
+ cron = croniter(expr, datetime.now())
+ next_dt = cron.get_next(datetime)
+ sleep_secs = (next_dt - datetime.now()).total_seconds()
+ self._timer_next_fire[entry_point_id] = (
+ time.monotonic() + sleep_secs
+ )
+ await asyncio.sleep(max(0, sleep_secs))
+ while self._running:
+ # Calculate next fire time upfront (used by skip paths too)
+ cron = croniter(expr, datetime.now())
+ next_dt = cron.get_next(datetime)
+ sleep_secs = (next_dt - datetime.now()).total_seconds()
+
+ # Gate: skip tick if timers are explicitly paused
+ if self._timers_paused:
+ logger.debug(
+ "Cron '%s': paused, skipping tick",
+ entry_point_id,
+ )
+ self._timer_next_fire[entry_point_id] = (
+ time.monotonic() + sleep_secs
+ )
+ await asyncio.sleep(max(0, sleep_secs))
+ continue
+
+ # Gate: skip tick if ANY stream is actively working.
+ # If the execution is idle (no LLM/tool activity
+ # beyond idle_timeout) let the timer proceed —
+ # execute() will cancel the stale execution.
+ _any_active = False
+ _min_idle = float("inf")
+ for _s in self._streams.values():
+ if _s.active_execution_ids:
+ _any_active = True
+ _idle = _s.agent_idle_seconds
+ if _idle < _min_idle:
+ _min_idle = _idle
+ logger.info(
+ "Cron '%s': gate — active=%s, idle=%.1fs, timeout=%ds",
+ entry_point_id,
+ _any_active,
+ _min_idle,
+ idle_timeout,
+ )
+ if _any_active and _min_idle < idle_timeout:
+ logger.info(
+ "Cron '%s': agent actively working, skipping tick",
+ entry_point_id,
+ )
+ self._timer_next_fire[entry_point_id] = (
+ time.monotonic() + sleep_secs
+ )
+ await asyncio.sleep(max(0, sleep_secs))
+ continue
+
+ self._timer_next_fire.pop(entry_point_id, None)
+ try:
+ ep_spec = self._entry_points.get(entry_point_id)
+ is_isolated = ep_spec and ep_spec.isolation_level == "isolated"
+ if is_isolated:
+ if _persistent_session_id:
+ session_state = {
+ "resume_session_id": _persistent_session_id
+ }
+ else:
+ session_state = None
+ else:
+ session_state = self._get_primary_session_state(
+ exclude_entry_point=entry_point_id
+ )
+ # Gate: skip tick if no active session
+ if session_state is None:
+ logger.debug(
+ "Cron '%s': no active session, skipping",
+ entry_point_id,
+ )
+ self._timer_next_fire[entry_point_id] = (
+ time.monotonic() + sleep_secs
+ )
+ await asyncio.sleep(max(0, sleep_secs))
+ continue
+
+ exec_id = await self.trigger(
+ entry_point_id,
+ {
+ "event": {
+ "source": "timer",
+ "reason": "scheduled",
+ }
+ },
+ session_state=session_state,
+ )
+ if not _persistent_session_id and is_isolated:
+ _persistent_session_id = exec_id
+ logger.info(
+ "Cron fired for entry point '%s' (expr: %s)",
+ entry_point_id,
+ expr,
+ )
+ except Exception:
+ logger.error(
+ "Cron trigger failed for '%s'",
+ entry_point_id,
+ exc_info=True,
+ )
+ # Calculate next fire from now
+ cron = croniter(expr, datetime.now())
+ next_dt = cron.get_next(datetime)
+ sleep_secs = (next_dt - datetime.now()).total_seconds()
+ self._timer_next_fire[entry_point_id] = (
+ time.monotonic() + sleep_secs
+ )
+ await asyncio.sleep(max(0, sleep_secs))
+
+ return _cron_loop
+
+ task = asyncio.create_task(
+ _make_cron_timer(
+ ep_id,
+ cron_expr,
+ run_immediately,
+ idle_timeout=float(tc.get("idle_timeout_seconds", 300)),
+ )()
+ )
+ self._timer_tasks.append(task)
+ logger.info(
+ "Started cron timer for entry point '%s' with expression '%s'%s",
+ ep_id,
+ cron_expr,
+ " (immediate first run)" if run_immediately else "",
+ )
+
+ elif interval and interval > 0:
+ # Fixed interval mode (original behavior)
+ def _make_timer(
+ entry_point_id: str,
+ mins: float,
+ immediate: bool,
+ idle_timeout: float = 300,
+ ):
+ async def _timer_loop():
+ interval_secs = mins * 60
+ _persistent_session_id: str | None = None
+ if not immediate:
+ self._timer_next_fire[entry_point_id] = (
+ time.monotonic() + interval_secs
+ )
+ await asyncio.sleep(interval_secs)
+ while self._running:
+ # Gate: skip tick if timers are explicitly paused
+ if self._timers_paused:
+ logger.debug(
+ "Timer '%s': paused, skipping tick",
+ entry_point_id,
+ )
+ self._timer_next_fire[entry_point_id] = (
+ time.monotonic() + interval_secs
+ )
+ await asyncio.sleep(interval_secs)
+ continue
+
+ # Gate: skip tick if agent is actively working.
+ # Gate: skip tick if ANY stream is actively working.
+ _any_active = False
+ _min_idle = float("inf")
+ for _s in self._streams.values():
+ if _s.active_execution_ids:
+ _any_active = True
+ _idle = _s.agent_idle_seconds
+ if _idle < _min_idle:
+ _min_idle = _idle
+ logger.info(
+ "Timer '%s': gate — active=%s, idle=%.1fs, timeout=%ds",
+ entry_point_id,
+ _any_active,
+ _min_idle,
+ idle_timeout,
+ )
+ if _any_active and _min_idle < idle_timeout:
+ logger.info(
+ "Timer '%s': agent actively working, skipping tick",
+ entry_point_id,
+ )
+ self._timer_next_fire[entry_point_id] = (
+ time.monotonic() + interval_secs
+ )
+ await asyncio.sleep(interval_secs)
+ continue
+
+ self._timer_next_fire.pop(entry_point_id, None)
+ try:
+ ep_spec = self._entry_points.get(entry_point_id)
+ is_isolated = ep_spec and ep_spec.isolation_level == "isolated"
+ if is_isolated:
+ if _persistent_session_id:
+ session_state = {
+ "resume_session_id": _persistent_session_id
+ }
+ else:
+ session_state = None
+ else:
+ session_state = self._get_primary_session_state(
+ exclude_entry_point=entry_point_id
+ )
+ # Gate: skip tick if no active session
+ if session_state is None:
+ logger.debug(
+ "Timer '%s': no active session, skipping",
+ entry_point_id,
+ )
+ self._timer_next_fire[entry_point_id] = (
+ time.monotonic() + interval_secs
+ )
+ await asyncio.sleep(interval_secs)
+ continue
+
+ exec_id = await self.trigger(
+ entry_point_id,
+ {
+ "event": {
+ "source": "timer",
+ "reason": "scheduled",
+ }
+ },
+ session_state=session_state,
+ )
+ if not _persistent_session_id and is_isolated:
+ _persistent_session_id = exec_id
+ logger.info(
+ "Timer fired for entry point '%s' (next in %s min)",
+ entry_point_id,
+ mins,
+ )
+ except Exception:
+ logger.error(
+ "Timer trigger failed for '%s'",
+ entry_point_id,
+ exc_info=True,
+ )
+ self._timer_next_fire[entry_point_id] = (
+ time.monotonic() + interval_secs
+ )
+ await asyncio.sleep(interval_secs)
+
+ return _timer_loop
+
+ task = asyncio.create_task(
+ _make_timer(
+ ep_id,
+ interval,
+ run_immediately,
+ idle_timeout=float(tc.get("idle_timeout_seconds", 300)),
+ )()
+ )
+ self._timer_tasks.append(task)
+ logger.info(
+ "Started timer for entry point '%s' every %s min%s",
+ ep_id,
+ interval,
+ " (immediate first run)" if run_immediately else "",
+ )
+
+ else:
+ logger.warning(
+ "Entry point '%s' has trigger_type='timer' "
+ "but no 'cron' or valid 'interval_minutes' in trigger_config",
+ ep_id,
+ )
+
+ # Register primary graph
+ self._graphs[self._graph_id] = _GraphRegistration(
+ graph=self.graph,
+ goal=self.goal,
+ entry_points=dict(self._entry_points),
+ streams=dict(self._streams),
+ storage_subpath="",
+ event_subscriptions=list(self._event_subscriptions),
+ timer_tasks=list(self._timer_tasks),
+ timer_next_fire=self._timer_next_fire,
+ )
+
async def stop(self) -> None:
"""Stop the agent runtime and all streams."""
@@ -818,11 +847,14 @@ class AgentRuntime:
self._streams.clear()
self._graphs.clear()
+ # Stop skill hot-reload watcher
+ await self._skills_manager.stop_watching()
+
# Stop storage
await self._storage.stop()
self._running = False
- logger.info("AgentRuntime stopped")
+ logger.info("AgentHost stopped")
def pause_timers(self) -> None:
"""Pause all timer-driven entry points.
@@ -841,7 +873,7 @@ class AgentRuntime:
self,
entry_point_id: str,
graph_id: str | None = None,
- ) -> ExecutionStream | None:
+ ) -> ExecutionManager | None:
"""Find the stream for an entry point, searching the active graph first.
Lookup order:
@@ -865,6 +897,57 @@ class AgentRuntime:
# Primary graph (also stored in self._streams)
return self._streams.get(entry_point_id)
+ def _apply_pipeline_results(self) -> None:
+ """Read typed attributes from pipeline stages after initialization."""
+ for stage in self._pipeline.stages:
+ name = stage.__class__.__name__
+
+ if stage.tool_registry is not None:
+ tools = list(stage.tool_registry.get_tools().values())
+ if tools:
+ self._tools = tools
+ self._tool_executor = stage.tool_registry.get_executor()
+ logger.info("Pipeline: %d tools from %s", len(tools), name)
+
+ if stage.llm is not None and self._llm is None:
+ self._llm = stage.llm
+ logger.info("Pipeline: LLM from %s", name)
+
+ if stage.accounts_prompt:
+ self._accounts_prompt = stage.accounts_prompt
+ self._accounts_data = stage.accounts_data
+ self._tool_provider_map = stage.tool_provider_map
+
+ if stage.skills_manager is not None:
+ self._skills_manager = stage.skills_manager
+
+
+ @staticmethod
+ def _load_pipeline_from_config():
+ """Build pipeline from ``~/.hive/configuration.json`` ``pipeline`` key.
+
+ Returns an empty pipeline if no config is set.
+ """
+ from framework.config import get_hive_config
+ from framework.pipeline.registry import build_pipeline_from_config
+ from framework.pipeline.runner import PipelineRunner
+
+ config = get_hive_config()
+ stages_config = config.get("pipeline", {}).get("stages", [])
+ if not stages_config:
+ return PipelineRunner([])
+ return build_pipeline_from_config(stages_config)
+
+ async def _reload_pipeline(self) -> None:
+ """Hot-reload pipeline from config. Atomic swap."""
+ new_pipeline = self._load_pipeline_from_config()
+ await new_pipeline.initialize_all()
+ self._pipeline = new_pipeline
+ logger.info(
+ "Pipeline reloaded: %d stages",
+ len(new_pipeline.stages),
+ )
+
def _prune_idempotency_keys(self) -> None:
"""Prune expired idempotency keys based on TTL and max size."""
ttl = self._config.idempotency_ttl_seconds
@@ -915,7 +998,7 @@ class AgentRuntime:
RuntimeError: If runtime not running
"""
if not self._running:
- raise RuntimeError("AgentRuntime is not running")
+ raise RuntimeError("AgentHost is not running")
# Idempotency check: return cached execution_id for duplicate keys.
if idempotency_key is not None:
@@ -929,6 +1012,21 @@ class AgentRuntime:
)
return cached
+ # Run pipeline middleware (rate limiting, validation, cost guards, ...)
+ # Raises PipelineRejectedError if any stage rejects.
+ if self._pipeline.stages:
+ from framework.pipeline.stage import PipelineContext
+
+ pipeline_ctx = PipelineContext(
+ entry_point_id=entry_point_id,
+ input_data=input_data,
+ correlation_id=correlation_id,
+ session_state=session_state,
+ )
+ pipeline_ctx = await self._pipeline.run(pipeline_ctx)
+ # Stages may have transformed the input_data.
+ input_data = pipeline_ctx.input_data
+
stream = self._resolve_stream(entry_point_id, graph_id)
if stream is None:
raise ValueError(f"Entry point '{entry_point_id}' not found")
@@ -1023,9 +1121,9 @@ class AgentRuntime:
graph_log_store = RuntimeLogStore(graph_base / "runtime_logs")
# Create streams for each entry point
- streams: dict[str, ExecutionStream] = {}
+ streams: dict[str, ExecutionManager] = {}
for ep_id, spec in entry_points.items():
- stream = ExecutionStream(
+ stream = ExecutionManager(
stream_id=f"{graph_id}::{ep_id}",
entry_spec=spec,
graph=graph,
@@ -1055,7 +1153,7 @@ class AgentRuntime:
streams[ep_id] = stream
# Set up event-driven subscriptions
- from framework.runtime.event_bus import EventType as _ET
+ from framework.host.event_bus import EventType as _ET
event_subs: list[str] = []
for ep_id, spec in entry_points.items():
@@ -1492,7 +1590,7 @@ class AgentRuntime:
# Search primary graph's streams for an active session.
# Skip isolated streams — they have their own session directories
# and must never be used as a shared session.
- all_streams: list[tuple[str, ExecutionStream]] = []
+ all_streams: list[tuple[str, ExecutionManager]] = []
for _gid, reg in self._graphs.items():
for ep_id, stream in reg.streams.items():
# Skip isolated entry points — they run in their own namespace
@@ -1653,7 +1751,7 @@ class AgentRuntime:
return max(0.0, mono - time.monotonic())
return None
- def get_stream(self, entry_point_id: str) -> ExecutionStream | None:
+ def get_stream(self, entry_point_id: str) -> ExecutionManager | None:
"""Get a specific execution stream."""
return self._streams.get(entry_point_id)
@@ -1820,94 +1918,3 @@ class AgentRuntime:
# === CONVENIENCE FACTORY ===
-def create_agent_runtime(
- graph: "GraphSpec",
- goal: "Goal",
- storage_path: str | Path,
- entry_points: list[EntryPointSpec],
- llm: "LLMProvider | None" = None,
- tools: list["Tool"] | None = None,
- tool_executor: Callable | None = None,
- config: AgentRuntimeConfig | None = None,
- runtime_log_store: Any = None,
- enable_logging: bool = True,
- checkpoint_config: CheckpointConfig | None = None,
- graph_id: str | None = None,
- accounts_prompt: str = "",
- accounts_data: list[dict] | None = None,
- tool_provider_map: dict[str, str] | None = None,
- event_bus: "EventBus | None" = None,
- skills_manager_config: "SkillsManagerConfig | None" = None,
- # Deprecated — pass skills_manager_config instead.
- skills_catalog_prompt: str = "",
- protocols_prompt: str = "",
- skill_dirs: list[str] | None = None,
-) -> AgentRuntime:
- """
- Create and configure an AgentRuntime with entry points.
-
- Convenience factory that creates runtime and registers entry points.
- Runtime logging is enabled by default for observability.
-
- Args:
- graph: Graph specification
- goal: Goal driving execution
- storage_path: Path for persistent storage
- entry_points: Entry point specifications
- llm: LLM provider
- tools: Available tools
- tool_executor: Tool executor function
- config: Runtime configuration
- runtime_log_store: Optional RuntimeLogStore for per-execution logging.
- If None and enable_logging=True, creates one automatically.
- enable_logging: Whether to enable runtime logging (default: True).
- Set to False to disable logging entirely.
- checkpoint_config: Optional checkpoint configuration for resumable sessions.
- If None, uses default checkpointing behavior.
- graph_id: Optional identifier for the primary graph (defaults to "primary").
- accounts_data: Raw account data for per-node prompt generation.
- tool_provider_map: Tool name to provider name mapping for account routing.
- event_bus: Optional external EventBus to share with other components.
- skills_catalog_prompt: Available skills catalog for system prompt.
- protocols_prompt: Default skill operational protocols for system prompt.
- skill_dirs: Skill base directories for Tier 3 resource access.
- skills_manager_config: Skill configuration — the runtime owns
- discovery, loading, and prompt renderation internally.
- skills_catalog_prompt: Deprecated. Pre-rendered skills catalog.
- protocols_prompt: Deprecated. Pre-rendered operational protocols.
-
- Returns:
- Configured AgentRuntime (not yet started)
- """
- # Auto-create runtime log store if logging is enabled and not provided
- if enable_logging and runtime_log_store is None:
- from framework.runtime.runtime_log_store import RuntimeLogStore
-
- storage_path_obj = Path(storage_path) if isinstance(storage_path, str) else storage_path
- runtime_log_store = RuntimeLogStore(storage_path_obj / "runtime_logs")
-
- runtime = AgentRuntime(
- graph=graph,
- goal=goal,
- storage_path=storage_path,
- llm=llm,
- tools=tools,
- tool_executor=tool_executor,
- config=config,
- runtime_log_store=runtime_log_store,
- checkpoint_config=checkpoint_config,
- graph_id=graph_id,
- accounts_prompt=accounts_prompt,
- accounts_data=accounts_data,
- tool_provider_map=tool_provider_map,
- event_bus=event_bus,
- skills_manager_config=skills_manager_config,
- skills_catalog_prompt=skills_catalog_prompt,
- protocols_prompt=protocols_prompt,
- skill_dirs=skill_dirs,
- )
-
- for spec in entry_points:
- runtime.register_entry_point(spec)
-
- return runtime
diff --git a/core/framework/runtime/event_bus.py b/core/framework/host/event_bus.py
similarity index 100%
rename from core/framework/runtime/event_bus.py
rename to core/framework/host/event_bus.py
diff --git a/core/framework/runtime/execution_stream.py b/core/framework/host/execution_manager.py
similarity index 97%
rename from core/framework/runtime/execution_stream.py
rename to core/framework/host/execution_manager.py
index 6ff2f89b..39b9aadf 100644
--- a/core/framework/runtime/execution_stream.py
+++ b/core/framework/host/execution_manager.py
@@ -18,18 +18,18 @@ from dataclasses import dataclass, field
from datetime import datetime
from typing import TYPE_CHECKING, Any
-from framework.graph.checkpoint_config import CheckpointConfig
-from framework.graph.executor import ExecutionResult, GraphExecutor
-from framework.runtime.event_bus import EventBus
-from framework.runtime.shared_state import IsolationLevel, SharedBufferManager
-from framework.runtime.stream_runtime import StreamRuntime, StreamRuntimeAdapter
+from framework.orchestrator.checkpoint_config import CheckpointConfig
+from framework.orchestrator.orchestrator import ExecutionResult, Orchestrator
+from framework.host.event_bus import EventBus
+from framework.host.shared_state import IsolationLevel, SharedBufferManager
+from framework.host.stream_runtime import StreamDecisionTracker, StreamRuntimeAdapter
if TYPE_CHECKING:
- from framework.graph.edge import GraphSpec
- from framework.graph.goal import Goal
+ from framework.orchestrator.edge import GraphSpec
+ from framework.orchestrator.goal import Goal
from framework.llm.provider import LLMProvider, Tool
- from framework.runtime.event_bus import AgentEvent
- from framework.runtime.outcome_aggregator import OutcomeAggregator
+ from framework.host.event_bus import AgentEvent
+ from framework.host.outcome_aggregator import OutcomeAggregator
from framework.storage.concurrent import ConcurrentStorage
from framework.storage.session_store import SessionStore
@@ -133,7 +133,7 @@ class ExecutionContext:
status: str = "pending" # pending, running, completed, failed, paused
-class ExecutionStream:
+class ExecutionManager:
"""
Manages concurrent executions for a single entry point.
@@ -262,7 +262,7 @@ class ExecutionStream:
)
# Create stream-scoped runtime
- self._runtime = StreamRuntime(
+ self._runtime = StreamDecisionTracker(
stream_id=stream_id,
storage=storage,
outcome_aggregator=outcome_aggregator,
@@ -271,7 +271,7 @@ class ExecutionStream:
# Execution tracking
self._active_executions: dict[str, ExecutionContext] = {}
self._execution_tasks: dict[str, asyncio.Task] = {}
- self._active_executors: dict[str, GraphExecutor] = {}
+ self._active_executors: dict[str, Orchestrator] = {}
self._cancel_reasons: dict[str, str] = {}
self._execution_results: OrderedDict[str, ExecutionResult] = OrderedDict()
self._execution_result_times: dict[str, float] = {}
@@ -301,7 +301,7 @@ class ExecutionStream:
# Emit stream started event
if self._scoped_event_bus:
- from framework.runtime.event_bus import AgentEvent, EventType
+ from framework.host.event_bus import AgentEvent, EventType
await self._scoped_event_bus.publish(
AgentEvent(
@@ -426,7 +426,7 @@ class ExecutionStream:
# Emit stream stopped event
if self._scoped_event_bus:
- from framework.runtime.event_bus import AgentEvent, EventType
+ from framework.host.event_bus import AgentEvent, EventType
await self._scoped_event_bus.publish(
AgentEvent(
@@ -668,7 +668,7 @@ class ExecutionStream:
# Create per-execution runtime logger
runtime_logger = None
if self._runtime_log_store:
- from framework.runtime.runtime_logger import RuntimeLogger
+ from framework.tracker.runtime_logger import RuntimeLogger
runtime_logger = RuntimeLogger(
store=self._runtime_log_store, agent_id=self.graph.id
@@ -697,12 +697,7 @@ class ExecutionStream:
# forward so the next attempt resumes at the failed node.
while True:
# Create executor for this execution.
- # Each execution gets its own storage under sessions/{exec_id}/
- # so conversations, spillover, and data files are all scoped
- # to this execution. The executor sets data_dir via execution
- # context (contextvars) so data tools and spillover share the
- # same session-scoped directory.
- executor = GraphExecutor(
+ executor = Orchestrator(
runtime=runtime_adapter,
llm=self._llm,
tools=self._tools,
@@ -763,7 +758,7 @@ class ExecutionStream:
# Emit resurrection event
if self._scoped_event_bus:
- from framework.runtime.event_bus import AgentEvent, EventType
+ from framework.host.event_bus import AgentEvent, EventType
await self._scoped_event_bus.publish(
AgentEvent(
@@ -1119,7 +1114,7 @@ class ExecutionStream:
Each stream only executes from its own entry_node, but the full
graph must validate with all entry points accounted for.
"""
- from framework.graph.edge import GraphSpec
+ from framework.orchestrator.edge import GraphSpec
# Merge entry points: this stream's entry + original graph's primary
# entry + any other entry points. This ensures all nodes are
diff --git a/core/framework/runtime/outcome_aggregator.py b/core/framework/host/outcome_aggregator.py
similarity index 99%
rename from core/framework/runtime/outcome_aggregator.py
rename to core/framework/host/outcome_aggregator.py
index 9bf8c596..164a8ceb 100644
--- a/core/framework/runtime/outcome_aggregator.py
+++ b/core/framework/host/outcome_aggregator.py
@@ -14,8 +14,8 @@ from typing import TYPE_CHECKING, Any
from framework.schemas.decision import Decision, Outcome
if TYPE_CHECKING:
- from framework.graph.goal import Goal
- from framework.runtime.event_bus import EventBus
+ from framework.orchestrator.goal import Goal
+ from framework.host.event_bus import EventBus
logger = logging.getLogger(__name__)
diff --git a/core/framework/runtime/shared_state.py b/core/framework/host/shared_state.py
similarity index 100%
rename from core/framework/runtime/shared_state.py
rename to core/framework/host/shared_state.py
diff --git a/core/framework/runtime/stream_runtime.py b/core/framework/host/stream_runtime.py
similarity index 98%
rename from core/framework/runtime/stream_runtime.py
rename to core/framework/host/stream_runtime.py
index 9c1a5131..eeecce0c 100644
--- a/core/framework/runtime/stream_runtime.py
+++ b/core/framework/host/stream_runtime.py
@@ -18,12 +18,12 @@ from framework.schemas.run import Run, RunStatus
from framework.storage.concurrent import ConcurrentStorage
if TYPE_CHECKING:
- from framework.runtime.outcome_aggregator import OutcomeAggregator
+ from framework.host.outcome_aggregator import OutcomeAggregator
logger = logging.getLogger(__name__)
-class StreamRuntime:
+class StreamDecisionTracker:
"""
Thread-safe runtime for a single execution stream.
@@ -431,7 +431,7 @@ class StreamRuntimeAdapter:
by providing the same API as Runtime but routing to a specific execution.
"""
- def __init__(self, stream_runtime: StreamRuntime, execution_id: str):
+ def __init__(self, stream_runtime: StreamDecisionTracker, execution_id: str):
"""
Create adapter for a specific execution.
diff --git a/core/framework/runtime/triggers.py b/core/framework/host/triggers.py
similarity index 100%
rename from core/framework/runtime/triggers.py
rename to core/framework/host/triggers.py
diff --git a/core/framework/runtime/webhook_server.py b/core/framework/host/webhook_server.py
similarity index 99%
rename from core/framework/runtime/webhook_server.py
rename to core/framework/host/webhook_server.py
index 3d8a5754..b33dcaba 100644
--- a/core/framework/runtime/webhook_server.py
+++ b/core/framework/host/webhook_server.py
@@ -13,7 +13,7 @@ from dataclasses import dataclass
from aiohttp import web
-from framework.runtime.event_bus import EventBus
+from framework.host.event_bus import EventBus
logger = logging.getLogger(__name__)
diff --git a/core/framework/llm/key_pool.py b/core/framework/llm/key_pool.py
new file mode 100644
index 00000000..9790e1ba
--- /dev/null
+++ b/core/framework/llm/key_pool.py
@@ -0,0 +1,101 @@
+"""Thread-safe API key pool with round-robin rotation and health tracking.
+
+When multiple API keys are configured, the pool rotates through them on each
+request. Keys that hit rate limits are temporarily cooled-down so the next
+call automatically uses a healthy key -- no sleep required.
+"""
+
+from __future__ import annotations
+
+import logging
+import threading
+import time
+from dataclasses import dataclass
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class KeyHealth:
+ """Per-key health counters."""
+
+ rate_limited_until: float = 0.0 # monotonic timestamp
+ consecutive_errors: int = 0
+ total_requests: int = 0
+ total_successes: int = 0
+
+
+class KeyPool:
+ """Round-robin key pool with health tracking.
+
+ Thread-safe: all mutations protected by a lock so concurrent LLM calls
+ (e.g. parallel tool execution in EventLoopNode) don't race.
+ """
+
+ def __init__(self, keys: list[str]) -> None:
+ if not keys:
+ raise ValueError("KeyPool requires at least one key")
+ self._keys = list(keys)
+ self._index = 0
+ self._health: dict[str, KeyHealth] = {k: KeyHealth() for k in keys}
+ self._lock = threading.Lock()
+
+ @property
+ def size(self) -> int:
+ return len(self._keys)
+
+ def get_key(self) -> str:
+ """Return the next healthy key (round-robin).
+
+ If every key is currently rate-limited, returns the one whose cooldown
+ expires soonest so the caller can proceed with minimal delay.
+ """
+ with self._lock:
+ now = time.monotonic()
+ for _ in range(len(self._keys)):
+ key = self._keys[self._index]
+ self._index = (self._index + 1) % len(self._keys)
+ health = self._health[key]
+ if health.rate_limited_until <= now:
+ health.total_requests += 1
+ return key
+ # All rate-limited -- pick the one that expires soonest.
+ soonest = min(self._keys, key=lambda k: self._health[k].rate_limited_until)
+ self._health[soonest].total_requests += 1
+ return soonest
+
+ def mark_rate_limited(self, key: str, retry_after: float = 60.0) -> None:
+ """Mark *key* as rate-limited for *retry_after* seconds."""
+ with self._lock:
+ health = self._health.get(key)
+ if health:
+ health.rate_limited_until = time.monotonic() + retry_after
+ health.consecutive_errors += 1
+ logger.info(
+ "[key-pool] Key ...%s rate-limited for %.0fs (errors=%d)",
+ key[-6:],
+ retry_after,
+ health.consecutive_errors,
+ )
+
+ def mark_success(self, key: str) -> None:
+ """Record a successful call on *key*."""
+ with self._lock:
+ health = self._health.get(key)
+ if health:
+ health.consecutive_errors = 0
+ health.total_successes += 1
+
+ def get_stats(self) -> dict[str, dict]:
+ """Return health stats keyed by the last 6 chars of each key."""
+ with self._lock:
+ now = time.monotonic()
+ return {
+ f"...{k[-6:]}": {
+ "healthy": self._health[k].rate_limited_until <= now,
+ "requests": self._health[k].total_requests,
+ "successes": self._health[k].total_successes,
+ "consecutive_errors": self._health[k].consecutive_errors,
+ }
+ for k in self._keys
+ }
diff --git a/core/framework/llm/litellm.py b/core/framework/llm/litellm.py
index 89e01b14..d4e2def6 100644
--- a/core/framework/llm/litellm.py
+++ b/core/framework/llm/litellm.py
@@ -7,6 +7,8 @@ Groq, and local models.
See: https://docs.litellm.ai/docs/providers
"""
+from __future__ import annotations
+
import ast
import asyncio
import hashlib
@@ -18,7 +20,10 @@ import time
from collections.abc import AsyncIterator
from datetime import datetime
from pathlib import Path
-from typing import Any
+from typing import TYPE_CHECKING, Any
+
+if TYPE_CHECKING:
+ from framework.llm.key_pool import KeyPool
try:
import litellm
@@ -561,6 +566,7 @@ class LiteLLMProvider(LLMProvider):
model: str = "gpt-4o-mini",
api_key: str | None = None,
api_base: str | None = None,
+ api_keys: list[str] | None = None,
**kwargs: Any,
):
"""
@@ -573,6 +579,9 @@ class LiteLLMProvider(LLMProvider):
look for the appropriate env var (OPENAI_API_KEY,
ANTHROPIC_API_KEY, etc.)
api_base: Custom API base URL (for proxies or local deployments)
+ api_keys: Optional list of API keys for key-pool rotation. When
+ provided with 2+ keys, a :class:`KeyPool` is created and
+ keys are rotated on rate-limit errors.
**kwargs: Additional arguments passed to litellm.completion()
"""
# Kimi For Coding exposes an Anthropic-compatible endpoint at
@@ -594,11 +603,24 @@ class LiteLLMProvider(LLMProvider):
if api_base and api_base.rstrip("/").endswith("/v1"):
api_base = api_base.rstrip("/")[:-3]
self.model = model
- self.api_key = api_key
+ # Key pool: when multiple keys are provided, enable rotation.
+ self._key_pool: KeyPool | None = None
+ if api_keys and len(api_keys) > 1:
+ from framework.llm.key_pool import KeyPool
+
+ self._key_pool = KeyPool(api_keys)
+ self.api_key = api_keys[0] # default for OAuth detection below
+ logger.info(
+ "[litellm] Key pool enabled with %d keys for model %s",
+ len(api_keys),
+ model,
+ )
+ else:
+ self.api_key = api_key or (api_keys[0] if api_keys else None)
self.api_base = api_base or self._default_api_base_for_model(_original_model)
self.extra_kwargs = kwargs
# Detect Claude Code OAuth subscription by checking the api_key prefix.
- self._claude_code_oauth = bool(api_key and api_key.startswith("sk-ant-oat"))
+ self._claude_code_oauth = bool(self.api_key and self.api_key.startswith("sk-ant-oat"))
if self._claude_code_oauth:
# Anthropic requires a specific User-Agent for OAuth requests.
eh = self.extra_kwargs.setdefault("extra_headers", {})
@@ -669,10 +691,20 @@ class LiteLLMProvider(LLMProvider):
def _completion_with_rate_limit_retry(
self, max_retries: int | None = None, **kwargs: Any
) -> Any:
- """Call litellm.completion with retry on 429 rate limit errors and empty responses."""
+ """Call litellm.completion with retry on 429 rate limit errors and empty responses.
+
+ When a :class:`KeyPool` is configured, rate-limited keys are rotated
+ automatically so the next attempt uses a different key -- no sleep
+ needed between attempts.
+ """
model = kwargs.get("model", self.model)
retries = max_retries if max_retries is not None else RATE_LIMIT_MAX_RETRIES
for attempt in range(retries + 1):
+ # Rotate key from pool when available.
+ current_key: str | None = None
+ if self._key_pool:
+ current_key = self._key_pool.get_key()
+ kwargs["api_key"] = current_key
try:
response = litellm.completion(**kwargs) # type: ignore[union-attr]
@@ -747,8 +779,22 @@ class LiteLLMProvider(LLMProvider):
time.sleep(wait)
continue
+ if self._key_pool and current_key:
+ self._key_pool.mark_success(current_key)
return response
except RateLimitError as e:
+ # Key pool: mark the offending key and rotate immediately.
+ if self._key_pool and current_key:
+ self._key_pool.mark_rate_limited(current_key, retry_after=60.0)
+ # When we have other healthy keys, skip the sleep -- the
+ # next iteration will pick a different key automatically.
+ if attempt < retries:
+ logger.info(
+ "[retry] Key pool rotating away from ...%s on 429",
+ current_key[-6:],
+ )
+ continue
+
# Dump full request to file for debugging
messages = kwargs.get("messages", [])
token_count, token_method = _estimate_tokens(model, messages)
@@ -761,7 +807,7 @@ class LiteLLMProvider(LLMProvider):
if attempt == retries:
logger.error(
f"[retry] GAVE UP on {model} after {retries + 1} "
- f"attempts — rate limit error: {e!s}. "
+ f"attempts -- rate limit error: {e!s}. "
f"~{token_count} tokens ({token_method}). "
f"Full request dumped to: {dump_path}"
)
@@ -880,10 +926,16 @@ class LiteLLMProvider(LLMProvider):
"""Async version of _completion_with_rate_limit_retry.
Uses litellm.acompletion and asyncio.sleep instead of blocking calls.
+ When a :class:`KeyPool` is configured, rate-limited keys are rotated.
"""
model = kwargs.get("model", self.model)
retries = max_retries if max_retries is not None else RATE_LIMIT_MAX_RETRIES
for attempt in range(retries + 1):
+ # Rotate key from pool when available.
+ current_key: str | None = None
+ if self._key_pool:
+ current_key = self._key_pool.get_key()
+ kwargs["api_key"] = current_key
try:
response = await litellm.acompletion(**kwargs) # type: ignore[union-attr]
@@ -952,8 +1004,20 @@ class LiteLLMProvider(LLMProvider):
await asyncio.sleep(wait)
continue
+ if self._key_pool and current_key:
+ self._key_pool.mark_success(current_key)
return response
except RateLimitError as e:
+ # Key pool: mark the offending key and rotate immediately.
+ if self._key_pool and current_key:
+ self._key_pool.mark_rate_limited(current_key, retry_after=60.0)
+ if attempt < retries:
+ logger.info(
+ "[async-retry] Key pool rotating away from ...%s on 429",
+ current_key[-6:],
+ )
+ continue
+
messages = kwargs.get("messages", [])
token_count, token_method = _estimate_tokens(model, messages)
dump_path = _dump_failed_request(
@@ -965,7 +1029,7 @@ class LiteLLMProvider(LLMProvider):
if attempt == retries:
logger.error(
f"[async-retry] GAVE UP on {model} after {retries + 1} "
- f"attempts — rate limit error: {e!s}. "
+ f"attempts -- rate limit error: {e!s}. "
f"~{token_count} tokens ({token_method}). "
f"Full request dumped to: {dump_path}"
)
diff --git a/core/framework/loader/__init__.py b/core/framework/loader/__init__.py
new file mode 100644
index 00000000..1b00d9b4
--- /dev/null
+++ b/core/framework/loader/__init__.py
@@ -0,0 +1,4 @@
+"""Loader layer -- agent loading from disk (JSON config, MCP, credentials)."""
+
+from framework.loader.agent_loader import AgentLoader # noqa: F401
+from framework.loader.tool_registry import ToolRegistry # noqa: F401
diff --git a/core/framework/runner/runner.py b/core/framework/loader/agent_loader.py
similarity index 77%
rename from core/framework/runner/runner.py
rename to core/framework/loader/agent_loader.py
index e8735250..5adca172 100644
--- a/core/framework/runner/runner.py
+++ b/core/framework/loader/agent_loader.py
@@ -13,21 +13,20 @@ from framework.config import get_hive_config, get_max_context_tokens, get_prefer
from framework.credentials.validation import (
ensure_credential_key_env as _ensure_credential_key_env,
)
-from framework.graph import Goal
-from framework.graph.edge import (
+from framework.orchestrator import Goal
+from framework.orchestrator.edge import (
DEFAULT_MAX_TOKENS,
EdgeCondition,
EdgeSpec,
GraphSpec,
)
-from framework.graph.executor import ExecutionResult
-from framework.graph.node import NodeSpec
+from framework.orchestrator.orchestrator import ExecutionResult
+from framework.orchestrator.node import NodeSpec
from framework.llm.provider import LLMProvider, Tool
-from framework.runner.preload_validation import run_preload_validation
-from framework.runner.tool_registry import ToolRegistry
-from framework.runtime.agent_runtime import AgentRuntime, AgentRuntimeConfig, create_agent_runtime
-from framework.runtime.execution_stream import EntryPointSpec
-from framework.runtime.runtime_log_store import RuntimeLogStore
+from framework.loader.preload_validation import run_preload_validation
+from framework.loader.tool_registry import ToolRegistry
+from framework.host.agent_host import AgentHost, AgentRuntimeConfig
+from framework.host.execution_manager import EntryPointSpec
from framework.tools.flowchart_utils import generate_fallback_flowchart
logger = logging.getLogger(__name__)
@@ -881,6 +880,172 @@ class ValidationResult:
missing_credentials: list[str] = field(default_factory=list)
+def _resolve_template_vars(text: str | None, variables: dict[str, str]) -> str | None:
+ """Resolve ``{{variable_name}}`` placeholders in *text*."""
+ if text is None or not variables:
+ return text
+ import re
+
+ def _replace(m: re.Match) -> str:
+ key = m.group(1).strip()
+ return variables.get(key, m.group(0))
+
+ return re.sub(r"\{\{(.+?)\}\}", _replace, text)
+
+
+def load_agent_config(data: str | dict) -> tuple[GraphSpec, Goal]:
+ """Load ``GraphSpec`` and ``Goal`` from a declarative :class:`AgentConfig`.
+
+ The declarative format uses a ``name`` key at the top level, unlike the
+ legacy export format which uses ``graph``/``goal`` keys. The runner
+ auto-detects the format in :meth:`AgentLoader.load`.
+
+ Template variables in ``config.variables`` are resolved in all
+ ``system_prompt`` and ``identity_prompt`` fields via ``{{var_name}}``.
+
+ Returns:
+ Tuple of (GraphSpec, Goal)
+ """
+ from framework.orchestrator.edge import EdgeCondition, EdgeSpec
+ from framework.orchestrator.goal import Constraint, Goal as GoalModel, SuccessCriterion
+ from framework.schemas.agent_config import AgentConfig
+
+ if isinstance(data, str):
+ data = json.loads(data)
+
+ config = AgentConfig.model_validate(data)
+ tvars = config.variables
+
+ # Build Goal
+ success_criteria = [
+ SuccessCriterion(
+ id=f"sc-{i}",
+ description=sc,
+ metric="llm_judge",
+ target="",
+ )
+ for i, sc in enumerate(config.goal.success_criteria)
+ ]
+ constraints = [
+ Constraint(
+ id=f"c-{i}",
+ description=c,
+ constraint_type="hard",
+ category="general",
+ )
+ for i, c in enumerate(config.goal.constraints)
+ ]
+ goal = GoalModel(
+ id=f"{config.name}-goal",
+ name=config.name,
+ description=config.goal.description,
+ success_criteria=success_criteria,
+ constraints=constraints,
+ )
+
+ # Build nodes
+ condition_map = {
+ "always": EdgeCondition.ALWAYS,
+ "on_success": EdgeCondition.ON_SUCCESS,
+ "on_failure": EdgeCondition.ON_FAILURE,
+ "conditional": EdgeCondition.CONDITIONAL,
+ "llm_decide": EdgeCondition.LLM_DECIDE,
+ }
+
+ nodes = []
+ for nc in config.nodes:
+ # Resolve tool access: node-level config -> agent-level fallback
+ if nc.tools.policy == "explicit" and nc.tools.allowed:
+ tools_list = nc.tools.allowed
+ tool_policy = "explicit"
+ elif nc.tools.policy == "none":
+ tools_list = []
+ tool_policy = "none"
+ elif nc.tools.policy == "all":
+ tools_list = []
+ tool_policy = "all"
+ else:
+ # Inherit agent-level tool config
+ if config.tools.policy == "explicit" and config.tools.allowed:
+ tools_list = config.tools.allowed
+ else:
+ tools_list = []
+ tool_policy = config.tools.policy
+
+ node_kwargs: dict = {
+ "id": nc.id,
+ "name": nc.name or nc.id,
+ "description": nc.description or "",
+ "node_type": nc.node_type,
+ "system_prompt": _resolve_template_vars(nc.system_prompt, tvars),
+ "tools": tools_list,
+ "tool_access_policy": tool_policy,
+ "model": nc.model,
+ "input_keys": nc.input_keys,
+ "output_keys": nc.output_keys,
+ "nullable_output_keys": nc.nullable_output_keys,
+ "max_iterations": nc.max_iterations,
+ "success_criteria": nc.success_criteria,
+ "skip_judge": nc.skip_judge,
+ }
+ # Optional fields -- only pass when set (avoids overriding defaults)
+ if nc.client_facing:
+ node_kwargs["client_facing"] = nc.client_facing
+ if nc.max_node_visits != 1:
+ node_kwargs["max_node_visits"] = nc.max_node_visits
+ if nc.failure_criteria:
+ node_kwargs["failure_criteria"] = nc.failure_criteria
+ if nc.max_retries is not None:
+ node_kwargs["max_retries"] = nc.max_retries
+
+ nodes.append(NodeSpec(**node_kwargs))
+
+ # Build edges
+ edges = []
+ for i, ec in enumerate(config.edges):
+ edges.append(
+ EdgeSpec(
+ id=f"e-{i}-{ec.from_node}-{ec.to_node}",
+ source=ec.from_node,
+ target=ec.to_node,
+ condition=condition_map.get(ec.condition, EdgeCondition.ON_SUCCESS),
+ condition_expr=ec.condition_expr,
+ priority=ec.priority,
+ input_mapping=ec.input_mapping,
+ )
+ )
+
+ # Build entry_points dict for GraphSpec
+ entry_points_dict: dict = {}
+ if config.entry_points:
+ for ep in config.entry_points:
+ entry_points_dict[ep.id] = ep.entry_node or config.entry_node
+ else:
+ entry_points_dict = {"default": config.entry_node}
+
+ # Build GraphSpec
+ graph_kwargs: dict = {
+ "id": f"{config.name}-graph",
+ "goal_id": goal.id,
+ "version": config.version,
+ "entry_node": config.entry_node,
+ "entry_points": entry_points_dict,
+ "terminal_nodes": config.terminal_nodes,
+ "pause_nodes": config.pause_nodes,
+ "nodes": nodes,
+ "edges": edges,
+ "max_tokens": config.max_tokens,
+ "loop_config": dict(config.loop_config),
+ "conversation_mode": config.conversation_mode,
+ "identity_prompt": _resolve_template_vars(
+ config.identity_prompt, tvars
+ ) or "",
+ }
+
+ graph = GraphSpec(**graph_kwargs)
+ return graph, goal
+
+
def load_agent_export(data: str | dict) -> tuple[GraphSpec, Goal]:
"""
Load GraphSpec and Goal from export_graph() output.
@@ -942,7 +1107,7 @@ def load_agent_export(data: str | dict) -> tuple[GraphSpec, Goal]:
)
# Build Goal
- from framework.graph.goal import Constraint, SuccessCriterion
+ from framework.orchestrator.goal import Constraint, SuccessCriterion
success_criteria = []
for sc_data in goal_data.get("success_criteria", []):
@@ -979,7 +1144,7 @@ def load_agent_export(data: str | dict) -> tuple[GraphSpec, Goal]:
return graph, goal
-class AgentRunner:
+class AgentLoader:
"""
Loads and runs exported agents with minimal boilerplate.
@@ -991,15 +1156,15 @@ class AgentRunner:
Usage:
# Simple usage
- runner = AgentRunner.load("exports/outbound-sales-agent")
+ runner = AgentLoader.load("exports/outbound-sales-agent")
result = await runner.run({"lead_id": "123"})
# With context manager
- async with AgentRunner.load("exports/outbound-sales-agent") as runner:
+ async with AgentLoader.load("exports/outbound-sales-agent") as runner:
result = await runner.run({"lead_id": "123"})
# With custom tools
- runner = AgentRunner.load("exports/outbound-sales-agent")
+ runner = AgentLoader.load("exports/outbound-sales-agent")
runner.register_tool("my_tool", my_tool_func)
result = await runner.run({"lead_id": "123"})
"""
@@ -1027,7 +1192,7 @@ class AgentRunner:
credential_store: Any | None = None,
):
"""
- Initialize the runner (use AgentRunner.load() instead).
+ Initialize the runner (use AgentLoader.load() instead).
Args:
agent_path: Path to agent folder
@@ -1082,7 +1247,7 @@ class AgentRunner:
self._approval_callback: Callable | None = None
# AgentRuntime — unified execution path for all agents
- self._agent_runtime: AgentRuntime | None = None
+ self._agent_runtime: AgentHost | None = None
# Pre-load validation: structural checks + credentials.
# Fails fast with actionable guidance — no MCP noise on screen.
run_preload_validation(
@@ -1101,14 +1266,7 @@ class AgentRunner:
os.environ["HIVE_AGENT_NAME"] = agent_path.name
os.environ["HIVE_STORAGE_PATH"] = str(self._storage_path)
- # Auto-discover MCP servers from mcp_servers.json
- mcp_config_path = agent_path / "mcp_servers.json"
- if mcp_config_path.exists():
- self._load_mcp_servers_from_config(mcp_config_path)
-
- # Auto-discover registry-selected MCP servers from mcp_registry.json
- self._load_registry_mcp_servers(agent_path)
-
+ # MCP tools are loaded by McpRegistryStage in the pipeline during AgentHost.start()
@staticmethod
def _import_agent_module(agent_path: Path):
"""Import an agent package from its directory path.
@@ -1158,7 +1316,7 @@ class AgentRunner:
interactive: bool = True,
skip_credential_validation: bool | None = None,
credential_store: Any | None = None,
- ) -> "AgentRunner":
+ ) -> "AgentLoader":
"""
Load an agent from an export folder.
@@ -1299,21 +1457,22 @@ class AgentRunner:
runner._agent_skills = agent_skills
return runner
- # Fallback: load from agent.json (legacy JSON-based agents)
+ # Fallback: load from agent.json (declarative config)
agent_json_path = agent_path / "agent.json"
+
if not agent_json_path.is_file():
raise FileNotFoundError(f"No agent.py or agent.json found in {agent_path}")
- with open(agent_json_path, encoding="utf-8") as f:
- export_data = f.read()
-
+ export_data = agent_json_path.read_text(encoding="utf-8")
if not export_data.strip():
- raise ValueError(f"Empty agent export file: {agent_json_path}")
+ raise ValueError(f"Empty agent.json: {agent_json_path}")
- try:
- graph, goal = load_agent_export(export_data)
- except json.JSONDecodeError as exc:
- raise ValueError(f"Invalid JSON in agent export file: {agent_json_path}") from exc
+ parsed = json.loads(export_data)
+ graph, goal = load_agent_config(parsed)
+ logger.info(
+ "Loaded declarative agent config from agent.json (name=%s)",
+ parsed.get("name"),
+ )
# Generate flowchart.json if missing (for legacy JSON-based agents)
generate_fallback_flowchart(graph, goal, agent_path)
@@ -1396,60 +1555,6 @@ class AgentRunner:
}
return self._tool_registry.register_mcp_server(server_config)
- def _load_mcp_servers_from_config(self, config_path: Path) -> None:
- """Load and register MCP servers from a configuration file."""
- self._tool_registry.load_mcp_config(config_path)
-
- def _load_registry_mcp_servers(self, agent_path: Path) -> None:
- """Load and register MCP servers selected via ``mcp_registry.json``."""
- registry_json = agent_path / "mcp_registry.json"
- if registry_json.is_file():
- self._tool_registry.set_mcp_registry_agent_path(agent_path)
- else:
- self._tool_registry.set_mcp_registry_agent_path(None)
-
- from framework.runner.mcp_registry import MCPRegistry
-
- try:
- registry = MCPRegistry()
- registry.initialize()
- server_configs, selection_max_tools = registry.load_agent_selection(agent_path)
- except Exception as exc:
- logger.warning(
- "Failed to load MCP registry servers for '%s': %s",
- agent_path.name,
- exc,
- )
- return
-
- if not server_configs:
- return
-
- results = self._tool_registry.load_registry_servers(
- server_configs,
- preserve_existing_tools=True,
- log_collisions=True,
- max_tools=selection_max_tools,
- )
- loaded = [result for result in results if result["status"] == "loaded"]
- skipped = [result for result in results if result["status"] != "loaded"]
-
- logger.info(
- "Loaded %d/%d MCP registry server(s) for agent '%s'",
- len(loaded),
- len(results),
- agent_path.name,
- )
- if skipped:
- logger.info(
- "Skipped MCP registry servers for agent '%s': %s",
- agent_path.name,
- [
- {"server": result["server"], "reason": result["skipped_reason"]}
- for result in skipped
- ],
- )
-
def set_approval_callback(self, callback: Callable) -> None:
"""
Set a callback for human-in-the-loop approval during execution.
@@ -1460,272 +1565,119 @@ class AgentRunner:
self._approval_callback = callback
def _setup(self, event_bus=None) -> None:
- """Set up runtime, LLM, and executor."""
- # Configure structured logging (auto-detects JSON vs human-readable)
+ """Set up runtime via pipeline stages.
+
+ Builds a pipeline with the default stages (LLM, credentials, MCP,
+ skills) and passes it to AgentHost. The stages initialize during
+ ``AgentHost.start()`` and inject tools/LLM/credentials/skills.
+ """
from framework.observability import configure_logging
+ from framework.pipeline.stages.credential_resolver import CredentialResolverStage
+ from framework.pipeline.stages.llm_provider import LlmProviderStage
+ from framework.pipeline.stages.mcp_registry import McpRegistryStage
+ from framework.pipeline.stages.skill_registry import SkillRegistryStage
+ from framework.skills.config import SkillsConfig
configure_logging(level="INFO", format="auto")
- # Set up session context for tools (agent_id)
+ # Set up session context for tools
agent_id = self.graph.id or "unknown"
+ self._tool_registry.set_session_context(agent_id=agent_id)
- self._tool_registry.set_session_context(
- agent_id=agent_id,
- )
+ # Read MCP server refs from agent.json
+ mcp_refs = []
+ agent_json = self.agent_path / "agent.json"
+ if agent_json.exists():
+ try:
+ import json as _json
- # Create LLM provider
- # Uses LiteLLM which auto-detects the provider from model name
- # Skip if already injected (e.g. worker agents with a pre-built LLM)
- if self._llm is not None:
- pass # LLM already configured externally
- elif self.mock_mode:
- # Use mock LLM for testing without real API calls
- from framework.llm.mock import MockLLMProvider
+ data = _json.loads(agent_json.read_text(encoding="utf-8"))
+ mcp_refs = data.get("mcp_servers", [])
+ except Exception:
+ pass
- self._llm = MockLLMProvider(model=self.model)
- else:
- from framework.llm.litellm import LiteLLMProvider
-
- # Check if a subscription mode is configured
- config = get_hive_config()
- llm_config = config.get("llm", {})
- use_claude_code = llm_config.get("use_claude_code_subscription", False)
- use_codex = llm_config.get("use_codex_subscription", False)
- use_kimi_code = llm_config.get("use_kimi_code_subscription", False)
- use_antigravity = llm_config.get("use_antigravity_subscription", False)
- api_base = llm_config.get("api_base")
-
- api_key = None
- if use_claude_code:
- # Get OAuth token from Claude Code subscription
- api_key = get_claude_code_token()
- if not api_key:
- logger.warning(
- "Claude Code subscription configured but no token found. "
- "Run 'claude' to authenticate, then try again."
- )
- elif use_codex:
- # Get OAuth token from Codex subscription
- api_key = get_codex_token()
- if not api_key:
- logger.warning(
- "Codex subscription configured but no token found. "
- "Run 'codex' to authenticate, then try again."
- )
- elif use_kimi_code:
- # Get API key from Kimi Code CLI config (~/.kimi/config.toml)
- api_key = get_kimi_code_token()
- if not api_key:
- logger.warning(
- "Kimi Code subscription configured but no key found. "
- "Run 'kimi /login' to authenticate, then try again."
- )
- elif use_antigravity:
- pass # AntigravityProvider handles credentials internally
-
- if api_key and use_claude_code:
- # Use litellm's built-in Anthropic OAuth support.
- # The lowercase "authorization" key triggers OAuth detection which
- # adds the required anthropic-beta and browser-access headers.
- self._llm = LiteLLMProvider(
- model=self.model,
- api_key=api_key,
- api_base=api_base,
- extra_headers={"authorization": f"Bearer {api_key}"},
- )
- elif api_key and use_codex:
- # OpenAI Codex subscription routes through the ChatGPT backend
- # (chatgpt.com/backend-api/codex/responses), NOT the standard
- # OpenAI API. The consumer OAuth token lacks platform API scopes.
- extra_headers: dict[str, str] = {
- "Authorization": f"Bearer {api_key}",
- "User-Agent": "CodexBar",
- }
- account_id = get_codex_account_id()
- if account_id:
- extra_headers["ChatGPT-Account-Id"] = account_id
- self._llm = LiteLLMProvider(
- model=self.model,
- api_key=api_key,
- api_base="https://chatgpt.com/backend-api/codex",
- extra_headers=extra_headers,
- store=False,
- allowed_openai_params=["store"],
- )
- elif api_key and use_kimi_code:
- # Kimi Code subscription uses the Kimi coding API (OpenAI-compatible).
- # The api_base is set automatically by LiteLLMProvider for kimi/ models.
- self._llm = LiteLLMProvider(
- model=self.model,
- api_key=api_key,
- api_base=api_base,
- )
- elif use_antigravity:
- # Direct OAuth to Google's internal Cloud Code Assist gateway.
- # No local proxy required — AntigravityProvider handles token
- # refresh and Gemini-format request/response conversion natively.
- from framework.llm.antigravity import AntigravityProvider # noqa: PLC0415
-
- provider = AntigravityProvider(model=self.model)
- if not provider.has_credentials():
- print(
- "Warning: Antigravity credentials not found. "
- "Run: uv run python core/antigravity_auth.py auth account add"
- )
- self._llm = provider
- else:
- # Local models (e.g. Ollama) don't need an API key
- if self._is_local_model(self.model):
- self._llm = LiteLLMProvider(
- model=self.model,
- api_base=api_base,
- )
- else:
- # Fall back to environment variable
- # First check api_key_env_var from config (set by quickstart)
- api_key_env = llm_config.get("api_key_env_var") or self._get_api_key_env_var(
- self.model
- )
- if api_key_env and os.environ.get(api_key_env):
- self._llm = LiteLLMProvider(
- model=self.model,
- api_key=os.environ[api_key_env],
- api_base=api_base,
- )
- else:
- # Fall back to credential store
- api_key = self._get_api_key_from_credential_store()
- if api_key:
- self._llm = LiteLLMProvider(
- model=self.model, api_key=api_key, api_base=api_base
- )
- # Set env var so downstream code (e.g. cleanup LLM in
- # node._extract_json) can also find it
- if api_key_env:
- os.environ[api_key_env] = api_key
- elif api_key_env:
- logger.warning(
- "%s not set. LLM calls will fail. "
- "Set it with: export %s=your-api-key",
- api_key_env,
- api_key_env,
- )
-
- # Fail fast if the agent needs an LLM but none was configured
- if self._llm is None:
- has_llm_nodes = any(
- node.node_type in ("event_loop", "gcu") for node in self.graph.nodes
- )
- if has_llm_nodes:
- from framework.credentials.models import CredentialError
-
- if self._is_local_model(self.model):
- raise CredentialError(
- f"Failed to initialize LLM for local model '{self.model}'. "
- f"Ensure your local LLM server is running "
- f"(e.g. 'ollama serve' for Ollama)."
- )
- api_key_env = self._get_api_key_env_var(self.model)
- hint = (
- f"Set it with: export {api_key_env}=your-api-key"
- if api_key_env
- else "Configure an API key for your LLM provider."
- )
- raise CredentialError(f"LLM API key not found for model '{self.model}'. {hint}")
-
- # For GCU nodes: auto-register GCU MCP server if needed, then expand tool lists
- has_gcu_nodes = any(node.node_type == "gcu" for node in self.graph.nodes)
- if has_gcu_nodes:
- from framework.graph.gcu import GCU_MCP_SERVER_CONFIG, GCU_SERVER_NAME
-
- # Auto-register GCU MCP server if tools aren't loaded yet
- gcu_tool_names = self._tool_registry.get_server_tool_names(GCU_SERVER_NAME)
- if not gcu_tool_names:
- # Resolve cwd to repo-level tools/ (not relative to agent_path)
- gcu_config = dict(GCU_MCP_SERVER_CONFIG)
- _repo_root = Path(__file__).resolve().parent.parent.parent.parent
- gcu_config["cwd"] = str(_repo_root / "tools")
- self._tool_registry.register_mcp_server(gcu_config)
- gcu_tool_names = self._tool_registry.get_server_tool_names(GCU_SERVER_NAME)
-
- # Expand each GCU node's tools list to include all GCU server tools
- if gcu_tool_names:
- for node in self.graph.nodes:
- if node.node_type == "gcu":
- existing = set(node.tools)
- for tool_name in sorted(gcu_tool_names):
- if tool_name not in existing:
- node.tools.append(tool_name)
-
- # For event_loop/gcu nodes: auto-register file tools MCP server, then expand tool lists
- has_loop_nodes = any(node.node_type in ("event_loop", "gcu") for node in self.graph.nodes)
- if has_loop_nodes:
- from framework.graph.files import FILES_MCP_SERVER_CONFIG, FILES_MCP_SERVER_NAME
-
- files_tool_names = self._tool_registry.get_server_tool_names(FILES_MCP_SERVER_NAME)
- if not files_tool_names:
- # Resolve cwd to repo-level tools/ (not relative to agent_path)
- files_config = dict(FILES_MCP_SERVER_CONFIG)
- _repo_root = Path(__file__).resolve().parent.parent.parent.parent
- files_config["cwd"] = str(_repo_root / "tools")
- self._tool_registry.register_mcp_server(files_config)
- files_tool_names = self._tool_registry.get_server_tool_names(FILES_MCP_SERVER_NAME)
-
- if files_tool_names:
- for node in self.graph.nodes:
- if node.node_type in ("event_loop", "gcu"):
- existing = set(node.tools)
- for tool_name in sorted(files_tool_names):
- if tool_name not in existing:
- node.tools.append(tool_name)
-
- # Get tools for runtime
- tools = list(self._tool_registry.get_tools().values())
- tool_executor = self._tool_registry.get_executor()
-
- # Collect connected account info for system prompt injection
- accounts_prompt = ""
- accounts_data: list[dict] | None = None
- tool_provider_map: dict[str, str] | None = None
- try:
- from aden_tools.credentials.store_adapter import CredentialStoreAdapter
-
- if self._credential_store is not None:
- adapter = CredentialStoreAdapter(store=self._credential_store)
- else:
- adapter = CredentialStoreAdapter.default()
- accounts_data = adapter.get_all_account_info()
- tool_provider_map = adapter.get_tool_provider_map()
- if accounts_data:
- from framework.graph.prompting import build_accounts_prompt
-
- accounts_prompt = build_accounts_prompt(accounts_data, tool_provider_map)
- except Exception:
- pass # Best-effort — agent works without account info
-
- # Skill configuration — the runtime handles discovery, loading, trust-gating and
- # prompt rasterization. The runner just builds the config.
- from framework.skills.config import SkillsConfig
- from framework.skills.manager import SkillsManagerConfig
-
- skills_manager_config = SkillsManagerConfig(
- skills_config=SkillsConfig.from_agent_vars(
- default_skills=getattr(self, "_agent_default_skills", None),
- skills=getattr(self, "_agent_skills", None),
+ # Build default pipeline stages
+ # Default infrastructure stages (always present)
+ pipeline_stages = [
+ LlmProviderStage(
+ model=self.model,
+ mock_mode=self.mock_mode,
+ llm=self._llm,
),
- project_root=self.agent_path,
- interactive=self._interactive,
- )
+ CredentialResolverStage(
+ credential_store=self._credential_store,
+ ),
+ McpRegistryStage(
+ server_refs=mcp_refs,
+ agent_path=self.agent_path,
+ tool_registry=self._tool_registry,
+ ),
+ SkillRegistryStage(
+ project_root=self.agent_path,
+ interactive=self._interactive,
+ skills_config=SkillsConfig.from_agent_vars(
+ default_skills=getattr(self, "_agent_default_skills", None),
+ skills=getattr(self, "_agent_skills", None),
+ ),
+ ),
+ ]
- self._setup_agent_runtime(
- tools,
- tool_executor,
- accounts_prompt=accounts_prompt,
- accounts_data=accounts_data,
- tool_provider_map=tool_provider_map,
+ # Merge user-configured stages from ~/.hive/configuration.json
+ from framework.config import get_hive_config
+ from framework.pipeline.registry import build_pipeline_from_config
+
+ hive_config = get_hive_config()
+ user_stages_config = hive_config.get("pipeline", {}).get("stages", [])
+ if user_stages_config:
+ user_pipeline = build_pipeline_from_config(user_stages_config)
+ pipeline_stages.extend(user_pipeline.stages)
+
+ # Merge agent-level overrides from agent.json pipeline field
+ if agent_json.exists():
+ try:
+ agent_pipeline = (
+ _json.loads(agent_json.read_text(encoding="utf-8"))
+ .get("pipeline", {})
+ .get("stages", [])
+ )
+ if agent_pipeline:
+ agent_stages = build_pipeline_from_config(agent_pipeline)
+ pipeline_stages.extend(agent_stages.stages)
+ except Exception:
+ pass
+
+ # Create AgentHost directly (no wrapper)
+ from framework.host.execution_manager import EntryPointSpec
+ from framework.orchestrator.checkpoint_config import CheckpointConfig
+ from framework.tracker.runtime_log_store import RuntimeLogStore
+
+ self._agent_runtime = AgentHost(
+ graph=self.graph,
+ goal=self.goal,
+ storage_path=self._storage_path,
+ runtime_log_store=RuntimeLogStore(
+ base_path=self._storage_path / "runtime_logs",
+ ),
+ checkpoint_config=CheckpointConfig(
+ enabled=True,
+ checkpoint_on_node_complete=True,
+ checkpoint_max_age_days=7,
+ async_checkpoint=True,
+ ),
+ graph_id=self.graph.id or self.agent_path.name,
event_bus=event_bus,
- skills_manager_config=skills_manager_config,
+ pipeline_stages=pipeline_stages,
)
+ self._agent_runtime.register_entry_point(
+ EntryPointSpec(
+ id="default",
+ name="Default",
+ entry_node=self.graph.entry_node,
+ trigger_type="manual",
+ isolation_level="shared",
+ ),
+ )
+ self._agent_runtime.intro_message = self.intro_message
def _get_api_key_env_var(self, model: str) -> str | None:
"""Get the environment variable name for the API key based on model name."""
@@ -1833,83 +1785,6 @@ class AgentRunner:
)
return model.lower().startswith(LOCAL_PREFIXES)
- def _setup_agent_runtime(
- self,
- tools: list,
- tool_executor: Callable | None,
- accounts_prompt: str = "",
- accounts_data: list[dict] | None = None,
- tool_provider_map: dict[str, str] | None = None,
- event_bus=None,
- skills_catalog_prompt: str = "",
- protocols_prompt: str = "",
- skill_dirs: list[str] | None = None,
- skills_manager_config=None,
- ) -> None:
- """Set up multi-entry-point execution using AgentRuntime."""
- entry_points = []
-
- # Always create a primary entry point for the graph's entry node.
- # For multi-entry-point agents this ensures the primary path (e.g.
- # user-facing rule setup) is reachable alongside async entry points.
- if self.graph.entry_node:
- entry_points.insert(
- 0,
- EntryPointSpec(
- id="default",
- name="Default",
- entry_node=self.graph.entry_node,
- trigger_type="manual",
- isolation_level="shared",
- ),
- )
-
- # Create AgentRuntime with all entry points
- log_store = RuntimeLogStore(base_path=self._storage_path / "runtime_logs")
-
- # Enable checkpointing by default for resumable sessions
- from framework.graph.checkpoint_config import CheckpointConfig
-
- checkpoint_config = CheckpointConfig(
- enabled=True,
- checkpoint_on_node_start=False, # Only checkpoint after nodes complete
- checkpoint_on_node_complete=True,
- checkpoint_max_age_days=7,
- async_checkpoint=True, # Non-blocking
- )
-
- # Handle runtime_config - only pass through if it's actually an AgentRuntimeConfig.
- # Agents may export a RuntimeConfig (LLM settings) or queen-generated custom classes
- # that would crash AgentRuntime if passed through.
- runtime_config = None
- if self.runtime_config is not None:
- from framework.runtime.agent_runtime import AgentRuntimeConfig
-
- if isinstance(self.runtime_config, AgentRuntimeConfig):
- runtime_config = self.runtime_config
-
- self._agent_runtime = create_agent_runtime(
- graph=self.graph,
- goal=self.goal,
- storage_path=self._storage_path,
- entry_points=entry_points,
- llm=self._llm,
- tools=tools,
- tool_executor=tool_executor,
- runtime_log_store=log_store,
- checkpoint_config=checkpoint_config,
- config=runtime_config,
- graph_id=self.graph.id or self.agent_path.name,
- accounts_prompt=accounts_prompt,
- accounts_data=accounts_data,
- tool_provider_map=tool_provider_map,
- event_bus=event_bus,
- skills_manager_config=skills_manager_config,
- )
-
- # Pass intro_message through for TUI display
- self._agent_runtime.intro_message = self.intro_message
-
# ------------------------------------------------------------------
# Execution modes
#
@@ -1990,7 +1865,7 @@ class AgentRunner:
sub_ids: list[str] = []
if has_queen and sys.stdin.isatty():
- from framework.runtime.event_bus import EventType
+ from framework.host.event_bus import EventType
runtime = self._agent_runtime
@@ -2246,7 +2121,7 @@ class AgentRunner:
except ImportError:
# aden_tools not installed - fall back to direct check
has_llm_nodes = any(
- node.node_type in ("event_loop", "gcu") for node in self.graph.nodes
+ node.node_type == "event_loop" for node in self.graph.nodes
)
if has_llm_nodes:
api_key_env = self._get_api_key_env_var(self.model)
@@ -2283,7 +2158,7 @@ class AgentRunner:
# Run synchronous cleanup
self.cleanup()
- async def __aenter__(self) -> "AgentRunner":
+ async def __aenter__(self) -> "AgentLoader":
"""Context manager entry."""
self._setup()
if self._agent_runtime is not None:
diff --git a/core/framework/runner/cli.py b/core/framework/loader/cli.py
similarity index 96%
rename from core/framework/runner/cli.py
rename to core/framework/loader/cli.py
index 3e94afb9..bd83fa8e 100644
--- a/core/framework/runner/cli.py
+++ b/core/framework/loader/cli.py
@@ -19,7 +19,7 @@ def register_commands(subparsers: argparse._SubParsersAction) -> None:
run_parser.add_argument(
"agent_path",
type=str,
- help="Path to agent folder (containing agent.json)",
+ help="Path to agent folder (containing agent.json or agent.py)",
)
run_parser.add_argument(
"--input",
@@ -87,7 +87,7 @@ def register_commands(subparsers: argparse._SubParsersAction) -> None:
info_parser.add_argument(
"agent_path",
type=str,
- help="Path to agent folder (containing agent.json)",
+ help="Path to agent folder (containing agent.json or agent.py)",
)
info_parser.add_argument(
"--json",
@@ -105,7 +105,7 @@ def register_commands(subparsers: argparse._SubParsersAction) -> None:
validate_parser.add_argument(
"agent_path",
type=str,
- help="Path to agent folder (containing agent.json)",
+ help="Path to agent folder (containing agent.json or agent.py)",
)
validate_parser.set_defaults(func=cmd_validate)
@@ -310,7 +310,7 @@ def _prompt_before_start(agent_path: str, runner, model: str | None = None):
Updated runner if user proceeds, None if user aborts.
"""
from framework.credentials.setup import CredentialSetupSession
- from framework.runner import AgentRunner
+ from framework.loader import AgentLoader
while True:
print()
@@ -328,7 +328,7 @@ def _prompt_before_start(agent_path: str, runner, model: str | None = None):
if result.success:
# Reload runner with updated credentials
try:
- runner = AgentRunner.load(agent_path, model=model)
+ runner = AgentLoader.load(agent_path, model=model)
except Exception as e:
print(f"Error reloading agent: {e}")
return None
@@ -342,7 +342,7 @@ def cmd_run(args: argparse.Namespace) -> int:
from framework.credentials.models import CredentialError
from framework.observability import configure_logging
- from framework.runner import AgentRunner
+ from framework.loader import AgentLoader
# Set logging level (quiet by default for cleaner output)
if args.quiet:
@@ -390,7 +390,7 @@ def cmd_run(args: argparse.Namespace) -> int:
# Standard execution
# AgentRunner handles credential setup interactively when stdin is a TTY.
try:
- runner = AgentRunner.load(
+ runner = AgentLoader.load(
args.agent_path,
model=args.model,
)
@@ -528,10 +528,10 @@ def cmd_run(args: argparse.Namespace) -> int:
def cmd_info(args: argparse.Namespace) -> int:
"""Show agent information."""
from framework.credentials.models import CredentialError
- from framework.runner import AgentRunner
+ from framework.loader import AgentLoader
try:
- runner = AgentRunner.load(args.agent_path)
+ runner = AgentLoader.load(args.agent_path)
except CredentialError as e:
print(f"\n{e}", file=sys.stderr)
return 1
@@ -595,10 +595,10 @@ def cmd_info(args: argparse.Namespace) -> int:
def cmd_validate(args: argparse.Namespace) -> int:
"""Validate an exported agent."""
from framework.credentials.models import CredentialError
- from framework.runner import AgentRunner
+ from framework.loader import AgentLoader
try:
- runner = AgentRunner.load(args.agent_path)
+ runner = AgentLoader.load(args.agent_path)
except CredentialError as e:
print(f"\n{e}", file=sys.stderr)
return 1
@@ -632,7 +632,7 @@ def cmd_validate(args: argparse.Namespace) -> int:
def cmd_list(args: argparse.Namespace) -> int:
"""List available agents."""
- from framework.runner import AgentRunner
+ from framework.loader import AgentLoader
directory = Path(args.directory)
if not directory.exists():
@@ -644,7 +644,7 @@ def cmd_list(args: argparse.Namespace) -> int:
for path in directory.iterdir():
if _is_valid_agent_dir(path):
try:
- runner = AgentRunner.load(path)
+ runner = AgentLoader.load(path)
info = runner.info()
agents.append(
{
@@ -686,7 +686,7 @@ def cmd_list(args: argparse.Namespace) -> int:
def _interactive_approval(request):
"""Interactive approval callback for HITL mode."""
- from framework.graph import ApprovalDecision, ApprovalResult
+ from framework.orchestrator import ApprovalDecision, ApprovalResult
print()
print("=" * 60)
@@ -775,7 +775,7 @@ def cmd_shell(args: argparse.Namespace) -> int:
from framework.credentials.models import CredentialError
from framework.observability import configure_logging
- from framework.runner import AgentRunner
+ from framework.loader import AgentLoader
configure_logging(level="INFO")
@@ -789,7 +789,7 @@ def cmd_shell(args: argparse.Namespace) -> int:
return 1
try:
- runner = AgentRunner.load(agent_path)
+ runner = AgentLoader.load(agent_path)
except CredentialError as e:
print(f"\n{e}", file=sys.stderr)
return 1
@@ -1004,17 +1004,35 @@ def _get_framework_agents_dir() -> Path:
def _extract_python_agent_metadata(agent_path: Path) -> tuple[str, str]:
- """Extract name and description from a Python-based agent's config.py.
+ """Extract name and description from an agent directory.
- Uses AST parsing to safely extract values without executing code.
+ Checks agent.json first (declarative), then falls back to config.py
+ (legacy Python). Uses AST parsing for Python to avoid executing code.
Returns (name, description) tuple, with fallbacks if parsing fails.
"""
import ast
- config_path = agent_path / "config.py"
fallback_name = agent_path.name.replace("_", " ").title()
fallback_desc = "(Python-based agent)"
+ # Declarative agent: read from agent.json
+ agent_json = agent_path / "agent.json"
+ if agent_json.exists():
+ try:
+ import json
+
+ data = json.loads(agent_json.read_text(encoding="utf-8"))
+ if isinstance(data, dict):
+ name = data.get("name", fallback_name)
+ # Convert kebab-case to Title Case for display
+ if "-" in name and " " not in name:
+ name = name.replace("-", " ").title()
+ desc = data.get("description", fallback_desc)
+ return name, desc
+ except Exception:
+ pass
+
+ config_path = agent_path / "config.py"
if not config_path.exists():
return fallback_name, fallback_desc
@@ -1083,7 +1101,7 @@ def _is_valid_agent_dir(path: Path) -> bool:
def _has_agents(directory: Path) -> bool:
- """Check if a directory contains any valid agents (folders with agent.json or agent.py)."""
+ """Check if a directory contains any valid agents."""
if not directory.exists():
return False
return any(_is_valid_agent_dir(p) for p in directory.iterdir())
diff --git a/core/framework/runner/mcp_client.py b/core/framework/loader/mcp_client.py
similarity index 99%
rename from core/framework/runner/mcp_client.py
rename to core/framework/loader/mcp_client.py
index df665571..d2e36273 100644
--- a/core/framework/runner/mcp_client.py
+++ b/core/framework/loader/mcp_client.py
@@ -14,7 +14,7 @@ from typing import Any, Literal
import httpx
-from framework.runner.mcp_errors import MCPToolNotFoundError
+from framework.loader.mcp_errors import MCPToolNotFoundError
logger = logging.getLogger(__name__)
diff --git a/core/framework/runner/mcp_connection_manager.py b/core/framework/loader/mcp_connection_manager.py
similarity index 99%
rename from core/framework/runner/mcp_connection_manager.py
rename to core/framework/loader/mcp_connection_manager.py
index 98bb9a24..f5118d94 100644
--- a/core/framework/runner/mcp_connection_manager.py
+++ b/core/framework/loader/mcp_connection_manager.py
@@ -5,7 +5,7 @@ import threading
import httpx
-from framework.runner.mcp_client import MCPClient, MCPServerConfig
+from framework.loader.mcp_client import MCPClient, MCPServerConfig
logger = logging.getLogger(__name__)
diff --git a/core/framework/runner/mcp_errors.py b/core/framework/loader/mcp_errors.py
similarity index 100%
rename from core/framework/runner/mcp_errors.py
rename to core/framework/loader/mcp_errors.py
diff --git a/core/framework/runner/mcp_registry.py b/core/framework/loader/mcp_registry.py
similarity index 99%
rename from core/framework/runner/mcp_registry.py
rename to core/framework/loader/mcp_registry.py
index 4de4bb93..adaaebaa 100644
--- a/core/framework/runner/mcp_registry.py
+++ b/core/framework/loader/mcp_registry.py
@@ -14,9 +14,9 @@ from typing import Any, Literal
import httpx
-from framework.runner.mcp_client import MCPClient, MCPServerConfig
-from framework.runner.mcp_connection_manager import MCPConnectionManager
-from framework.runner.mcp_errors import (
+from framework.loader.mcp_client import MCPClient, MCPServerConfig
+from framework.loader.mcp_connection_manager import MCPConnectionManager
+from framework.loader.mcp_errors import (
MCPError,
MCPErrorCode,
MCPInstallError,
diff --git a/core/framework/runner/mcp_registry_cli.py b/core/framework/loader/mcp_registry_cli.py
similarity index 99%
rename from core/framework/runner/mcp_registry_cli.py
rename to core/framework/loader/mcp_registry_cli.py
index b84b59dc..ccaa4861 100644
--- a/core/framework/runner/mcp_registry_cli.py
+++ b/core/framework/loader/mcp_registry_cli.py
@@ -28,7 +28,7 @@ from typing import Any
def _get_registry(base_path: Path | None = None):
"""Initialize and return an MCPRegistry instance."""
- from framework.runner.mcp_registry import MCPRegistry
+ from framework.loader.mcp_registry import MCPRegistry
registry = MCPRegistry(base_path=base_path)
registry.initialize()
diff --git a/core/framework/runner/preload_validation.py b/core/framework/loader/preload_validation.py
similarity index 98%
rename from core/framework/runner/preload_validation.py
rename to core/framework/loader/preload_validation.py
index c04ceabf..9e76e3d4 100644
--- a/core/framework/runner/preload_validation.py
+++ b/core/framework/loader/preload_validation.py
@@ -11,8 +11,8 @@ from dataclasses import dataclass, field
from typing import TYPE_CHECKING
if TYPE_CHECKING:
- from framework.graph.edge import GraphSpec
- from framework.graph.node import NodeSpec
+ from framework.orchestrator.edge import GraphSpec
+ from framework.orchestrator.node import NodeSpec
logger = logging.getLogger(__name__)
diff --git a/core/framework/runner/protocol.py b/core/framework/loader/protocol.py
similarity index 100%
rename from core/framework/runner/protocol.py
rename to core/framework/loader/protocol.py
diff --git a/core/framework/runner/tool_registry.py b/core/framework/loader/tool_registry.py
similarity index 97%
rename from core/framework/runner/tool_registry.py
rename to core/framework/loader/tool_registry.py
index 5ea8154f..4c862e44 100644
--- a/core/framework/runner/tool_registry.py
+++ b/core/framework/loader/tool_registry.py
@@ -262,15 +262,21 @@ class ToolRegistry:
is_error=False,
)
+ registry_ref = self
+
def executor(tool_use: ToolUse) -> ToolResult:
- if tool_use.name not in self._tools:
+ # Check if credential files changed (lightweight dir listing).
+ # If new OAuth tokens appeared, restarts MCP servers to pick them up.
+ registry_ref.resync_mcp_servers_if_needed()
+
+ if tool_use.name not in registry_ref._tools:
return ToolResult(
tool_use_id=tool_use.id,
content=json.dumps({"error": f"Unknown tool: {tool_use.name}"}),
is_error=True,
)
- registered = self._tools[tool_use.name]
+ registered = registry_ref._tools[tool_use.name]
try:
result = registered.executor(tool_use.input)
@@ -635,8 +641,8 @@ class ToolRegistry:
Number of tools registered from this server
"""
try:
- from framework.runner.mcp_client import MCPClient, MCPServerConfig
- from framework.runner.mcp_connection_manager import MCPConnectionManager
+ from framework.loader.mcp_client import MCPClient, MCPServerConfig
+ from framework.loader.mcp_connection_manager import MCPConnectionManager
# Build config object
config = MCPServerConfig(
@@ -883,7 +889,7 @@ class ToolRegistry:
"""Re-run ``mcp_registry.json`` resolution and register servers (post-resync)."""
if self._mcp_registry_agent_path is None:
return
- from framework.runner.mcp_registry import MCPRegistry
+ from framework.loader.mcp_registry import MCPRegistry
try:
reg = MCPRegistry()
@@ -922,6 +928,11 @@ class ToolRegistry:
clients and re-loads them so the new subprocess picks up the fresh
credentials.
+ Note: Individual credential TTL/refresh is handled by the MCP server
+ process internally -- it resolves tokens from the credential store
+ on every tool call, not at startup. This method only handles the case
+ where entirely new credential files appear.
+
Returns True if a resync was performed, False otherwise.
"""
if not self._mcp_clients or self._mcp_config_path is None:
@@ -975,7 +986,7 @@ class ToolRegistry:
server_name = self._mcp_client_servers.get(client_id, client.config.name)
try:
if client_id in self._mcp_managed_clients:
- from framework.runner.mcp_connection_manager import MCPConnectionManager
+ from framework.loader.mcp_connection_manager import MCPConnectionManager
MCPConnectionManager.get_instance().release(server_name)
else:
diff --git a/core/framework/orchestrator/__init__.py b/core/framework/orchestrator/__init__.py
new file mode 100644
index 00000000..6ffc277a
--- /dev/null
+++ b/core/framework/orchestrator/__init__.py
@@ -0,0 +1,27 @@
+"""Orchestrator layer -- how agents are composed via graphs.
+
+Lazy imports to avoid circular dependencies with graph/event_loop/*.
+"""
+
+
+def __getattr__(name: str):
+ if name in ("GraphContext",):
+ from framework.orchestrator.context import GraphContext
+ return GraphContext
+ if name in ("DEFAULT_MAX_TOKENS", "EdgeCondition", "EdgeSpec", "GraphSpec"):
+ from framework.orchestrator import edge as _e
+ return getattr(_e, name)
+ if name in ("Orchestrator", "ExecutionResult"):
+ from framework.orchestrator import orchestrator as _o
+ return getattr(_o, name)
+ if name in ("Constraint", "Goal", "GoalStatus", "SuccessCriterion"):
+ from framework.orchestrator import goal as _g
+ return getattr(_g, name)
+ if name in ("DataBuffer", "NodeContext", "NodeProtocol", "NodeResult", "NodeSpec"):
+ from framework.orchestrator import node as _n
+ return getattr(_n, name)
+ if name in ("NodeWorker", "Activation", "FanOutTag", "FanOutTracker",
+ "WorkerCompletion", "WorkerLifecycle"):
+ from framework.orchestrator import node_worker as _nw
+ return getattr(_nw, name)
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
diff --git a/core/framework/graph/checkpoint_config.py b/core/framework/orchestrator/checkpoint_config.py
similarity index 100%
rename from core/framework/graph/checkpoint_config.py
rename to core/framework/orchestrator/checkpoint_config.py
diff --git a/core/framework/graph/client_io.py b/core/framework/orchestrator/client_io.py
similarity index 99%
rename from core/framework/graph/client_io.py
rename to core/framework/orchestrator/client_io.py
index 992b5818..1fbc66c9 100644
--- a/core/framework/graph/client_io.py
+++ b/core/framework/orchestrator/client_io.py
@@ -16,7 +16,7 @@ from collections.abc import AsyncIterator
from typing import TYPE_CHECKING
if TYPE_CHECKING:
- from framework.runtime.event_bus import EventBus
+ from framework.host.event_bus import EventBus
logger = logging.getLogger(__name__)
diff --git a/core/framework/graph/context.py b/core/framework/orchestrator/context.py
similarity index 91%
rename from core/framework/graph/context.py
rename to core/framework/orchestrator/context.py
index 5b4fba4d..381c0474 100644
--- a/core/framework/graph/context.py
+++ b/core/framework/orchestrator/context.py
@@ -13,10 +13,10 @@ import asyncio
from dataclasses import dataclass, field
from typing import Any
-from framework.graph.edge import GraphSpec
-from framework.graph.goal import Goal
-from framework.graph.node import DataBuffer, NodeContext, NodeProtocol, NodeSpec
-from framework.runtime.core import Runtime
+from framework.orchestrator.edge import GraphSpec
+from framework.orchestrator.goal import Goal
+from framework.orchestrator.node import DataBuffer, NodeContext, NodeProtocol, NodeSpec
+from framework.tracker.decision_tracker import DecisionTracker
@dataclass
@@ -26,7 +26,7 @@ class GraphContext:
graph: GraphSpec
goal: Goal
buffer: DataBuffer
- runtime: Runtime
+ runtime: DecisionTracker
llm: Any # LLMProvider
tools: list[Any] # list[Tool]
tool_executor: Any # Callable
@@ -106,7 +106,7 @@ def build_node_accounts_prompt(
resolved = accounts_prompt
if accounts_data and tool_provider_map:
- from framework.graph.prompting import build_accounts_prompt
+ from framework.orchestrator.prompting import build_accounts_prompt
filtered = build_accounts_prompt(
accounts_data,
@@ -125,11 +125,27 @@ def _resolve_available_tools(
tools: list[Any],
override_tools: list[Any] | None,
) -> list[Any]:
- """Select tools available to the current node."""
+ """Select tools available to the current node.
+
+ Respects ``node_spec.tool_access_policy``:
+ - ``"all"`` -- all tools from the registry (no filtering).
+ - ``"explicit"`` -- only tools whose name appears in ``node_spec.tools``.
+ If the list is empty, **no tools** are given (default-deny).
+ - ``"none"`` -- no tools at all.
+ """
if override_tools is not None:
return list(override_tools)
+ policy = getattr(node_spec, "tool_access_policy", "explicit")
+
+ if policy == "none":
+ return []
+
+ if policy == "all":
+ return list(tools)
+
+ # "explicit" (default): only tools named in node_spec.tools.
if not node_spec.tools:
return []
@@ -149,7 +165,7 @@ def _derive_input_data(buffer: DataBuffer, input_keys: list[str]) -> dict[str, A
def build_node_context(
*,
- runtime: Runtime,
+ runtime: DecisionTracker,
node_spec: NodeSpec,
buffer: DataBuffer,
goal: Goal,
@@ -234,9 +250,6 @@ def build_node_context(
execution_id=execution_id,
run_id=run_id,
stream_id=stream_id,
- node_registry=node_registry or {},
- all_tools=list(all_tools or tools),
- shared_node_registry=shared_node_registry or {},
dynamic_tools_provider=dynamic_tools_provider,
dynamic_prompt_provider=dynamic_prompt_provider,
dynamic_memory_provider=dynamic_memory_provider,
@@ -308,9 +321,6 @@ def build_node_context_from_graph_context(
execution_id=gc.execution_id,
run_id=gc.run_id,
stream_id=gc.stream_id,
- node_registry=node_registry or gc.node_spec_registry,
- all_tools=gc.tools,
- shared_node_registry=gc.node_registry,
dynamic_tools_provider=gc.dynamic_tools_provider,
dynamic_prompt_provider=gc.dynamic_prompt_provider,
dynamic_memory_provider=gc.dynamic_memory_provider,
diff --git a/core/framework/graph/context_handoff.py b/core/framework/orchestrator/context_handoff.py
similarity index 98%
rename from core/framework/graph/context_handoff.py
rename to core/framework/orchestrator/context_handoff.py
index 69831506..0d9a7e54 100644
--- a/core/framework/graph/context_handoff.py
+++ b/core/framework/orchestrator/context_handoff.py
@@ -6,10 +6,10 @@ import logging
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any
-from framework.graph.conversation import _try_extract_key
+from framework.agent_loop.conversation import _try_extract_key
if TYPE_CHECKING:
- from framework.graph.conversation import NodeConversation
+ from framework.agent_loop.conversation import NodeConversation
from framework.llm.provider import LLMProvider
logger = logging.getLogger(__name__)
diff --git a/core/framework/graph/conversation_judge.py b/core/framework/orchestrator/conversation_judge.py
similarity index 99%
rename from core/framework/graph/conversation_judge.py
rename to core/framework/orchestrator/conversation_judge.py
index 298776b4..e5a57a06 100644
--- a/core/framework/graph/conversation_judge.py
+++ b/core/framework/orchestrator/conversation_judge.py
@@ -15,7 +15,7 @@ import logging
from dataclasses import dataclass
from typing import Any
-from framework.graph.conversation import NodeConversation
+from framework.agent_loop.conversation import NodeConversation
from framework.llm.provider import LLMProvider
logger = logging.getLogger(__name__)
diff --git a/core/framework/graph/edge.py b/core/framework/orchestrator/edge.py
similarity index 89%
rename from core/framework/graph/edge.py
rename to core/framework/orchestrator/edge.py
index 284f66f8..a617edb9 100644
--- a/core/framework/graph/edge.py
+++ b/core/framework/orchestrator/edge.py
@@ -29,7 +29,7 @@ from typing import Any
from pydantic import BaseModel, Field, model_validator
-from framework.graph.safe_eval import safe_eval
+from framework.orchestrator.safe_eval import safe_eval
logger = logging.getLogger(__name__)
@@ -538,13 +538,6 @@ class GraphSpec(BaseModel):
for edge in self.get_outgoing_edges(current):
to_visit.append(edge.target)
- # Also mark sub-agents as reachable (they're invoked via delegate_to_sub_agent, not edges)
- for node in self.nodes:
- if node.id in reachable:
- sub_agents = getattr(node, "sub_agents", []) or []
- for sub_agent_id in sub_agents:
- reachable.add(sub_agent_id)
-
for node in self.nodes:
if node.id not in reachable:
# Skip if node is a pause node or entry point target
@@ -583,48 +576,4 @@ class GraphSpec(BaseModel):
else:
seen_keys[key] = node_id
- # GCU nodes must only be used as subagents
- gcu_node_ids = {n.id for n in self.nodes if n.node_type == "gcu"}
- if gcu_node_ids:
- # GCU nodes must not be entry nodes
- if self.entry_node in gcu_node_ids:
- errors.append(
- f"GCU node '{self.entry_node}' is used as entry node. "
- "GCU nodes must only be used as subagents via delegate_to_sub_agent()."
- )
-
- # GCU nodes must not be terminal nodes
- for term in self.terminal_nodes:
- if term in gcu_node_ids:
- errors.append(
- f"GCU node '{term}' is used as terminal node. "
- "GCU nodes must only be used as subagents."
- )
-
- # GCU nodes must not be connected via edges
- for edge in self.edges:
- if edge.source in gcu_node_ids:
- errors.append(
- f"GCU node '{edge.source}' is used as edge source (edge '{edge.id}'). "
- "GCU nodes must only be used as subagents, not connected via edges."
- )
- if edge.target in gcu_node_ids:
- errors.append(
- f"GCU node '{edge.target}' is used as edge target (edge '{edge.id}'). "
- "GCU nodes must only be used as subagents, not connected via edges."
- )
-
- # GCU nodes must be referenced in at least one parent's sub_agents
- referenced_subagents = set()
- for node in self.nodes:
- for sa_id in node.sub_agents or []:
- referenced_subagents.add(sa_id)
-
- orphaned = gcu_node_ids - referenced_subagents
- for nid in orphaned:
- errors.append(
- f"GCU node '{nid}' is not referenced in any node's sub_agents list. "
- "GCU nodes must be declared as subagents of a parent node."
- )
-
return {"errors": errors, "warnings": warnings}
diff --git a/core/framework/graph/files.py b/core/framework/orchestrator/files.py
similarity index 100%
rename from core/framework/graph/files.py
rename to core/framework/orchestrator/files.py
diff --git a/core/framework/graph/gcu.py b/core/framework/orchestrator/gcu.py
similarity index 86%
rename from core/framework/graph/gcu.py
rename to core/framework/orchestrator/gcu.py
index c336faf4..a68d2d11 100644
--- a/core/framework/graph/gcu.py
+++ b/core/framework/orchestrator/gcu.py
@@ -1,34 +1,14 @@
-"""GCU (browser automation) node type constants.
+"""Browser automation best-practices prompt.
-A ``gcu`` node is an ``event_loop`` node with two automatic enhancements:
-1. A canonical browser best-practices system prompt is prepended.
-2. All tools from the GCU MCP server are auto-included.
+This module provides ``GCU_BROWSER_SYSTEM_PROMPT`` -- a canonical set of
+browser automation guidelines that can be included in any node's system
+prompt that uses browser tools from the gcu-tools MCP server.
-No new ``NodeProtocol`` subclass — the ``gcu`` type is purely a declarative
-signal processed by the runner and executor at setup time.
+Browser tools are registered via the global MCP registry (gcu-tools).
+Nodes that need browser access declare ``tools: {policy: "all"}`` in their
+agent.json config.
"""
-# ---------------------------------------------------------------------------
-# MCP server identity
-# ---------------------------------------------------------------------------
-
-GCU_SERVER_NAME = "gcu-tools"
-"""Name used to identify the GCU MCP server in ``mcp_servers.json``."""
-
-GCU_MCP_SERVER_CONFIG: dict = {
- "name": GCU_SERVER_NAME,
- "transport": "stdio",
- "command": "uv",
- "args": ["run", "python", "-m", "gcu.server", "--stdio"],
- "cwd": "../../tools",
- "description": "GCU tools for browser automation",
-}
-"""Default stdio config for the GCU MCP server (relative to exports//)."""
-
-# ---------------------------------------------------------------------------
-# Browser best-practices system prompt
-# ---------------------------------------------------------------------------
-
GCU_BROWSER_SYSTEM_PROMPT = """\
# Browser Automation Best Practices
diff --git a/core/framework/graph/goal.py b/core/framework/orchestrator/goal.py
similarity index 100%
rename from core/framework/graph/goal.py
rename to core/framework/orchestrator/goal.py
diff --git a/core/framework/graph/node.py b/core/framework/orchestrator/node.py
similarity index 95%
rename from core/framework/graph/node.py
rename to core/framework/orchestrator/node.py
index a430f8d2..6c474817 100644
--- a/core/framework/graph/node.py
+++ b/core/framework/orchestrator/node.py
@@ -25,7 +25,7 @@ from typing import Any
from pydantic import BaseModel, Field
from framework.llm.provider import LLMProvider, Tool
-from framework.runtime.core import Runtime
+from framework.tracker.decision_tracker import DecisionTracker
logger = logging.getLogger(__name__)
@@ -144,15 +144,19 @@ class NodeSpec(BaseModel):
# For LLM nodes
system_prompt: str | None = Field(default=None, description="System prompt for LLM nodes")
tools: list[str] = Field(default_factory=list, description="Tool names this node can use")
+ tool_access_policy: str = Field(
+ default="explicit",
+ description=(
+ "Tool access policy for this node. "
+ "'all' = all tools from registry, "
+ "'explicit' = only tools listed in `tools` (default, recommended), "
+ "'none' = no tools at all."
+ ),
+ )
model: str | None = Field(
default=None, description="Specific model to use (defaults to graph default)"
)
- # For subagent delegation
- sub_agents: list[str] = Field(
- default_factory=list,
- description="Node IDs that can be invoked as subagents from this node",
- )
# For function nodes
function: str | None = Field(
default=None, description="Function name or path for function nodes"
@@ -459,7 +463,7 @@ class NodeContext:
"""
# Core runtime
- runtime: Runtime
+ runtime: DecisionTracker
# Node identity
node_id: str
@@ -526,20 +530,6 @@ class NodeContext:
# Falls back to node_id when not set (legacy / standalone executor).
stream_id: str = ""
- # Subagent mode
- is_subagent_mode: bool = False # True when running as a subagent (prevents nested delegation)
- report_callback: Any = None # async (message: str, data: dict | None) -> None
- node_registry: dict[str, "NodeSpec"] = field(default_factory=dict) # For subagent lookup
-
- # Full tool catalog (unfiltered) — used by _execute_subagent to resolve
- # subagent tools that aren't in the parent node's filtered available_tools.
- all_tools: list[Tool] = field(default_factory=list)
-
- # Shared reference to the executor's node_registry — used by subagent
- # escalation (_EscalationReceiver) to register temporary receivers that
- # the inject_input() routing chain can find.
- shared_node_registry: dict[str, Any] = field(default_factory=dict)
-
# Dynamic tool provider — when set, EventLoopNode rebuilds the tool
# list from this callback at the start of each iteration. Used by
# the queen to switch between building-mode and running-mode tools.
diff --git a/core/framework/graph/worker_agent.py b/core/framework/orchestrator/node_worker.py
similarity index 97%
rename from core/framework/graph/worker_agent.py
rename to core/framework/orchestrator/node_worker.py
index ce9d8b4e..436096f9 100644
--- a/core/framework/graph/worker_agent.py
+++ b/core/framework/orchestrator/node_worker.py
@@ -19,15 +19,15 @@ from dataclasses import dataclass, field
from enum import StrEnum
from typing import Any
-from framework.graph.context import GraphContext, build_node_context_from_graph_context
-from framework.graph.edge import EdgeCondition, EdgeSpec
-from framework.graph.node import (
+from framework.orchestrator.context import GraphContext, build_node_context_from_graph_context
+from framework.orchestrator.edge import EdgeCondition, EdgeSpec
+from framework.orchestrator.node import (
NodeContext,
NodeProtocol,
NodeResult,
NodeSpec,
)
-from framework.graph.validator import OutputValidator
+from framework.orchestrator.validator import OutputValidator
logger = logging.getLogger(__name__)
@@ -109,7 +109,7 @@ class RetryState:
# ---------------------------------------------------------------------------
-class WorkerAgent:
+class NodeWorker:
"""First-class autonomous worker for one node in the graph.
Lifecycle:
@@ -355,7 +355,7 @@ class WorkerAgent:
# Only skip retries for actual EventLoopNode instances (they handle
# retries internally). Custom NodeProtocol impls registered via
# register_node should be retried by the executor.
- from framework.graph.event_loop_node import EventLoopNode as _ELN
+ from framework.agent_loop.agent_loop import AgentLoop as _ELN
if isinstance(node_impl, _ELN):
max_retries = 0
@@ -603,10 +603,10 @@ class WorkerAgent:
return self._node_impl
# Auto-create EventLoopNode
- if self.node_spec.node_type in ("event_loop", "gcu"):
- from framework.graph.event_loop.types import LoopConfig
- from framework.graph.event_loop_node import EventLoopNode
- from framework.graph.node import warn_if_deprecated_client_facing
+ if self.node_spec.node_type == "event_loop":
+ from framework.agent_loop.internals.types import LoopConfig
+ from framework.agent_loop.agent_loop import AgentLoop
+ from framework.orchestrator.node import warn_if_deprecated_client_facing
conv_store = None
if gc.storage_path:
@@ -619,7 +619,7 @@ class WorkerAgent:
warn_if_deprecated_client_facing(self.node_spec)
default_max_iter = 100 if self.node_spec.supports_direct_user_io() else 50
- node = EventLoopNode(
+ node = AgentLoop(
event_bus=gc.event_bus,
judge=None,
config=LoopConfig(
@@ -734,7 +734,7 @@ class WorkerAgent:
if not next_spec or next_spec.node_type != "event_loop":
return
- from framework.graph.prompting import (
+ from framework.orchestrator.prompting import (
TransitionSpec,
build_narrative,
build_system_prompt_for_node_context,
diff --git a/core/framework/graph/executor.py b/core/framework/orchestrator/orchestrator.py
similarity index 97%
rename from core/framework/graph/executor.py
rename to core/framework/orchestrator/orchestrator.py
index c2015744..666b021a 100644
--- a/core/framework/graph/executor.py
+++ b/core/framework/orchestrator/orchestrator.py
@@ -16,21 +16,21 @@ from dataclasses import dataclass, field
from pathlib import Path
from typing import Any
-from framework.graph.checkpoint_config import CheckpointConfig
-from framework.graph.context import GraphContext, build_node_context
-from framework.graph.conversation import LEGACY_RUN_ID
-from framework.graph.edge import EdgeCondition, EdgeSpec, GraphSpec
-from framework.graph.goal import Goal
-from framework.graph.node import (
+from framework.orchestrator.checkpoint_config import CheckpointConfig
+from framework.orchestrator.context import GraphContext, build_node_context
+from framework.agent_loop.conversation import LEGACY_RUN_ID
+from framework.orchestrator.edge import EdgeCondition, EdgeSpec, GraphSpec
+from framework.orchestrator.goal import Goal
+from framework.orchestrator.node import (
DataBuffer,
NodeProtocol,
NodeResult,
NodeSpec,
)
-from framework.graph.validator import OutputValidator
+from framework.orchestrator.validator import OutputValidator
from framework.llm.provider import LLMProvider, Tool
from framework.observability import set_trace_context
-from framework.runtime.core import Runtime
+from framework.tracker.decision_tracker import DecisionTracker
from framework.schemas.checkpoint import Checkpoint
from framework.storage.checkpoint_store import CheckpointStore
from framework.utils.io import atomic_write
@@ -112,7 +112,7 @@ class ParallelExecutionConfig:
branch_timeout_seconds: float = 300.0
-class GraphExecutor:
+class Orchestrator:
"""
Executes agent graphs.
@@ -133,7 +133,7 @@ class GraphExecutor:
def __init__(
self,
- runtime: Runtime,
+ runtime: DecisionTracker,
llm: LLMProvider | None = None,
tools: list[Tool] | None = None,
tool_executor: Callable | None = None,
@@ -165,7 +165,7 @@ class GraphExecutor:
Initialize the executor.
Args:
- runtime: Runtime for decision logging
+ runtime: DecisionTracker for decision logging
llm: LLM provider for LLM nodes
tools: Available tools
tool_executor: Function to execute tools
@@ -202,7 +202,7 @@ class GraphExecutor:
self.validator = OutputValidator()
self.logger = logging.getLogger(__name__)
self.logger.debug(
- "[GraphExecutor.__init__] Created with"
+ "[Orchestrator.__init__] Created with"
" stream_id=%s, execution_id=%s,"
" initial node_registry keys: %s",
stream_id,
@@ -361,8 +361,8 @@ class GraphExecutor:
Uses the same recursive binary-search splitting as EventLoopNode.
"""
- from framework.graph.conversation import extract_tool_call_history
- from framework.graph.event_loop_node import _is_context_too_large_error
+ from framework.agent_loop.conversation import extract_tool_call_history
+ from framework.agent_loop.agent_loop import _is_context_too_large_error
if _depth > self._PHASE_LLM_MAX_DEPTH:
raise RuntimeError("Phase LLM compaction recursion limit")
@@ -690,7 +690,7 @@ class GraphExecutor:
# and spillover files share the same session-scoped directory.
_ctx_token = None
if self._storage_path:
- from framework.runner.tool_registry import ToolRegistry
+ from framework.loader.tool_registry import ToolRegistry
_ctx_token = ToolRegistry.set_execution_context(
data_dir=str(self._storage_path / "data"),
@@ -712,13 +712,12 @@ class GraphExecutor:
finally:
if _ctx_token is not None:
- from framework.runner.tool_registry import ToolRegistry
+ from framework.loader.tool_registry import ToolRegistry
ToolRegistry.reset_execution_context(_ctx_token)
VALID_NODE_TYPES = {
"event_loop",
- "gcu",
}
# Node types removed in v0.5 — provide migration guidance
REMOVED_NODE_TYPES = {
@@ -736,11 +735,11 @@ class GraphExecutor:
# Check registry first
if node_spec.id in self.node_registry:
logger.debug(
- "[GraphExecutor._get_node_implementation] Found node '%s' in registry", node_spec.id
+ "[Orchestrator._get_node_implementation] Found node '%s' in registry", node_spec.id
)
return self.node_registry[node_spec.id]
logger.debug(
- "[GraphExecutor._get_node_implementation]"
+ "[Orchestrator._get_node_implementation]"
" Node '%s' not in registry (keys: %s),"
" creating new",
node_spec.id,
@@ -764,10 +763,10 @@ class GraphExecutor:
)
# Create based on type
- if node_spec.node_type in ("event_loop", "gcu"):
+ if node_spec.node_type == "event_loop":
# Auto-create EventLoopNode with sensible defaults.
# Custom configs can still be pre-registered via node_registry.
- from framework.graph.event_loop_node import EventLoopNode, LoopConfig
+ from framework.agent_loop.agent_loop import AgentLoop, LoopConfig
# Create a FileConversationStore if a storage path is available
conv_store = None
@@ -787,13 +786,13 @@ class GraphExecutor:
if self._storage_path:
spillover = str(self._storage_path / "data")
- from framework.graph.node import warn_if_deprecated_client_facing
+ from framework.orchestrator.node import warn_if_deprecated_client_facing
warn_if_deprecated_client_facing(node_spec)
lc = self._loop_config
default_max_iter = 100 if node_spec.supports_direct_user_io() else 50
- node = EventLoopNode(
+ node = AgentLoop(
event_bus=self._event_bus,
judge=None, # implicit judge: accept when output_keys are filled
config=LoopConfig(
@@ -812,7 +811,7 @@ class GraphExecutor:
# Cache so inject_event() is reachable for queen interaction and escalation routing
self.node_registry[node_spec.id] = node
logger.debug(
- "[GraphExecutor._get_node_implementation]"
+ "[Orchestrator._get_node_implementation]"
" Cached node '%s' in node_registry,"
" registry now has keys: %s",
node_spec.id,
@@ -998,10 +997,10 @@ class GraphExecutor:
branch_impl = self._get_node_implementation(node_spec, graph.cleanup_llm_model)
effective_max_retries = node_spec.max_retries
- # Only override for actual EventLoopNode instances, not custom NodeProtocol impls
- from framework.graph.event_loop_node import EventLoopNode
+ # Only override for actual AgentLoop instances, not custom NodeProtocol impls
+ from framework.agent_loop.agent_loop import AgentLoop as _AgentLoop # noqa: F811
- if isinstance(branch_impl, EventLoopNode) and effective_max_retries > 1:
+ if isinstance(branch_impl, _AgentLoop) and effective_max_retries > 1:
self.logger.warning(
f"EventLoopNode '{node_spec.id}' has "
f"max_retries={effective_max_retries}. Overriding "
@@ -1042,9 +1041,6 @@ class GraphExecutor:
execution_id=self._execution_id,
run_id=self._run_id,
stream_id=self._stream_id,
- node_registry=node_registry,
- all_tools=self.tools,
- shared_node_registry=self.node_registry,
dynamic_tools_provider=self.dynamic_tools_provider,
dynamic_prompt_provider=self.dynamic_prompt_provider,
dynamic_memory_provider=self.dynamic_memory_provider,
@@ -1293,14 +1289,14 @@ class GraphExecutor:
Replaces the imperative while-loop with autonomous workers that
self-activate based on edge conditions and fan-out tracking.
"""
- from framework.graph.worker_agent import (
+ from framework.orchestrator.node_worker import (
Activation,
FanOutTag,
- WorkerAgent,
+ NodeWorker,
WorkerCompletion,
WorkerLifecycle,
)
- from framework.runtime.event_bus import AgentEvent, EventType
+ from framework.host.event_bus import AgentEvent, EventType
# Build shared graph context
gc = GraphContext(
@@ -1339,9 +1335,9 @@ class GraphExecutor:
)
# Create one WorkerAgent per node
- workers: dict[str, WorkerAgent] = {}
+ workers: dict[str, NodeWorker] = {}
for node_spec in graph.nodes:
- workers[node_spec.id] = WorkerAgent(node_spec=node_spec, graph_context=gc)
+ workers[node_spec.id] = NodeWorker(node_spec=node_spec, graph_context=gc)
# Identify entry workers (graph entry node, not based on edge count)
# A node can be the entry point AND have incoming feedback edges.
@@ -1442,7 +1438,7 @@ class GraphExecutor:
def _route_activation(
activation: Activation,
- workers_map: dict[str, WorkerAgent],
+ workers_map: dict[str, NodeWorker],
pending_tasks_map: dict[str, asyncio.Task],
*,
has_event_subscription: bool,
diff --git a/core/framework/graph/prompt_composer.py b/core/framework/orchestrator/prompt_composer.py
similarity index 94%
rename from core/framework/graph/prompt_composer.py
rename to core/framework/orchestrator/prompt_composer.py
index b83c047e..92bbaca0 100644
--- a/core/framework/graph/prompt_composer.py
+++ b/core/framework/orchestrator/prompt_composer.py
@@ -9,7 +9,7 @@ import json
from pathlib import Path
from typing import TYPE_CHECKING
-from framework.graph.prompting import (
+from framework.orchestrator.prompting import (
EXECUTION_SCOPE_PREAMBLE,
TransitionSpec,
build_accounts_prompt,
@@ -19,7 +19,7 @@ from framework.graph.prompting import (
)
if TYPE_CHECKING:
- from framework.graph.node import DataBuffer, NodeSpec
+ from framework.orchestrator.node import DataBuffer, NodeSpec
_with_datetime = stamp_prompt_datetime
@@ -36,7 +36,7 @@ def compose_system_prompt(
node_type_preamble: str | None = None,
) -> str:
"""Compatibility wrapper for the legacy function signature."""
- from framework.graph.prompting import NodePromptSpec
+ from framework.orchestrator.prompting import NodePromptSpec
spec = NodePromptSpec(
identity_prompt=identity_prompt or "",
@@ -66,7 +66,6 @@ def compose_system_prompt(
protocols_prompt=spec.protocols_prompt,
node_type=spec.node_type,
output_keys=spec.output_keys,
- is_subagent_mode=spec.is_subagent_mode,
)
return build_system_prompt(spec)
@@ -135,7 +134,7 @@ def build_transition_marker(
)
-from framework.graph.prompting import build_transition_message # noqa: E402
+from framework.orchestrator.prompting import build_transition_message # noqa: E402
__all__ = [
"EXECUTION_SCOPE_PREAMBLE",
diff --git a/core/framework/graph/prompting.py b/core/framework/orchestrator/prompting.py
similarity index 95%
rename from core/framework/graph/prompting.py
rename to core/framework/orchestrator/prompting.py
index 072abf83..b76faa9b 100644
--- a/core/framework/graph/prompting.py
+++ b/core/framework/orchestrator/prompting.py
@@ -12,8 +12,8 @@ from datetime import datetime
from typing import TYPE_CHECKING, Any
if TYPE_CHECKING:
- from framework.graph.edge import GraphSpec
- from framework.graph.node import DataBuffer
+ from framework.orchestrator.edge import GraphSpec
+ from framework.orchestrator.node import DataBuffer
# Injected into every worker node's system prompt so the LLM understands
@@ -40,7 +40,6 @@ class NodePromptSpec:
memory_prompt: str = ""
node_type: str = "event_loop"
output_keys: tuple[str, ...] = ()
- is_subagent_mode: bool = False
@dataclass(frozen=True)
@@ -165,7 +164,6 @@ def build_prompt_spec_from_node_context(
memory_prompt=resolved_memory_prompt,
node_type=ctx.node_spec.node_type,
output_keys=tuple(ctx.node_spec.output_keys or ()),
- is_subagent_mode=bool(getattr(ctx, "is_subagent_mode", False)),
)
@@ -195,13 +193,10 @@ def build_system_prompt(spec: NodePromptSpec) -> str:
if spec.narrative:
parts.append(f"\n--- Context (what has happened so far) ---\n{spec.narrative}")
- if not spec.is_subagent_mode and spec.node_type in ("event_loop", "gcu") and spec.output_keys:
+ if not False and spec.node_type == "event_loop" and spec.output_keys:
parts.append(f"\n{EXECUTION_SCOPE_PREAMBLE}")
- if spec.node_type == "gcu":
- from framework.graph.gcu import GCU_BROWSER_SYSTEM_PROMPT
- parts.append(f"\n{GCU_BROWSER_SYSTEM_PROMPT}")
if spec.focus_prompt:
parts.append(f"\n--- Current Focus ---\n{spec.focus_prompt}")
diff --git a/core/framework/graph/safe_eval.py b/core/framework/orchestrator/safe_eval.py
similarity index 100%
rename from core/framework/graph/safe_eval.py
rename to core/framework/orchestrator/safe_eval.py
diff --git a/core/framework/graph/validator.py b/core/framework/orchestrator/validator.py
similarity index 100%
rename from core/framework/graph/validator.py
rename to core/framework/orchestrator/validator.py
diff --git a/core/framework/pipeline/__init__.py b/core/framework/pipeline/__init__.py
new file mode 100644
index 00000000..da2793a7
--- /dev/null
+++ b/core/framework/pipeline/__init__.py
@@ -0,0 +1,32 @@
+"""Pipeline middleware for the agent runtime.
+
+Stages run in order when :meth:`AgentRuntime.trigger` receives a request.
+Each stage can pass the context through, transform the input data, or reject
+the request entirely. This is the runtime-level analogue of AstrBot's
+pipeline architecture and lets operators compose rate limiting, validation,
+cost guards, and custom pre/post-processing without patching core code.
+"""
+
+from framework.pipeline.registry import (
+ build_pipeline_from_config,
+ build_stage,
+ register,
+)
+from framework.pipeline.runner import PipelineRunner
+from framework.pipeline.stage import (
+ PipelineContext,
+ PipelineRejectedError,
+ PipelineResult,
+ PipelineStage,
+)
+
+__all__ = [
+ "PipelineContext",
+ "PipelineRejectedError",
+ "PipelineResult",
+ "PipelineRunner",
+ "PipelineStage",
+ "build_pipeline_from_config",
+ "build_stage",
+ "register",
+]
diff --git a/core/framework/pipeline/execution_middleware.py b/core/framework/pipeline/execution_middleware.py
new file mode 100644
index 00000000..cdebfc99
--- /dev/null
+++ b/core/framework/pipeline/execution_middleware.py
@@ -0,0 +1,44 @@
+"""Execution-level middleware protocol.
+
+Unlike :class:`PipelineStage` (which gates ``AgentHost.trigger()`` at the
+request level), execution middleware runs at the start of **every** execution
+attempt inside ``ExecutionManager._run_execution()`` -- including resurrection
+retries.
+
+Use this for concerns that must re-evaluate per attempt:
+- Cost tracking (charge per attempt, not per trigger)
+- Tool scoping (different tools on retry)
+- Checkpoint config overrides
+- Per-execution logging/tracing setup
+"""
+
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from typing import Any
+
+
+@dataclass
+class ExecutionContext:
+ """Context passed to execution middleware."""
+
+ execution_id: str
+ stream_id: str
+ run_id: str
+ input_data: dict[str, Any]
+ session_state: dict[str, Any] | None = None
+ attempt: int = 1
+ metadata: dict[str, Any] = field(default_factory=dict)
+
+
+class ExecutionMiddleware(ABC):
+ """Base class for per-execution middleware."""
+
+ @abstractmethod
+ async def on_execution_start(self, ctx: ExecutionContext) -> ExecutionContext:
+ """Called before each execution attempt (including resurrections).
+
+ Modify and return *ctx* to transform execution parameters.
+ Raise to abort the execution.
+ """
diff --git a/core/framework/pipeline/registry.py b/core/framework/pipeline/registry.py
new file mode 100644
index 00000000..f46f32c2
--- /dev/null
+++ b/core/framework/pipeline/registry.py
@@ -0,0 +1,107 @@
+"""Pipeline stage registry -- maps type names to stage classes.
+
+Stages self-register via the ``@register`` decorator. The
+``build_pipeline_from_config`` function reads a declarative config
+(from ``~/.hive/configuration.json`` or ``agent.json``) and
+instantiates the corresponding stage objects.
+
+Example config::
+
+ {
+ "pipeline": {
+ "stages": [
+ {"type": "rate_limit", "order": 200, "config": {"max_requests_per_minute": 60}},
+ {"type": "cost_guard", "order": 300, "config": {"max_cost_per_request": 0.50}}
+ ]
+ }
+ }
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Any
+
+from framework.pipeline.runner import PipelineRunner
+from framework.pipeline.stage import PipelineStage
+
+logger = logging.getLogger(__name__)
+
+_STAGE_REGISTRY: dict[str, type[PipelineStage]] = {}
+
+
+def register(name: str):
+ """Decorator to register a pipeline stage class by type name.
+
+ Usage::
+
+ @register("rate_limit")
+ class RateLimitStage(PipelineStage):
+ ...
+ """
+
+ def decorator(cls: type[PipelineStage]) -> type[PipelineStage]:
+ _STAGE_REGISTRY[name] = cls
+ return cls
+
+ return decorator
+
+
+def get_registered_stages() -> dict[str, type[PipelineStage]]:
+ """Return a copy of the stage registry."""
+ return dict(_STAGE_REGISTRY)
+
+
+def build_stage(spec: dict[str, Any]) -> PipelineStage:
+ """Instantiate a single stage from a config spec.
+
+ Args:
+ spec: Dict with ``type`` (required), ``order`` (optional),
+ and ``config`` (optional kwargs dict).
+
+ Raises:
+ KeyError: If the stage type is not registered.
+ """
+ stage_type = spec["type"]
+ if stage_type not in _STAGE_REGISTRY:
+ available = ", ".join(sorted(_STAGE_REGISTRY)) or "(none)"
+ raise KeyError(
+ f"Unknown pipeline stage type '{stage_type}'. "
+ f"Available: {available}"
+ )
+ cls = _STAGE_REGISTRY[stage_type]
+ config = spec.get("config", {})
+ stage = cls(**config)
+ if "order" in spec:
+ stage.order = spec["order"]
+ return stage
+
+
+def build_pipeline_from_config(
+ stages_config: list[dict[str, Any]],
+) -> PipelineRunner:
+ """Build a ``PipelineRunner`` from a declarative stages list.
+
+ Each entry is ``{"type": "...", "order": N, "config": {...}}``.
+ """
+ # Import built-in stages so they self-register
+ _ensure_builtins_registered()
+
+ stages = [build_stage(s) for s in stages_config]
+ return PipelineRunner(stages)
+
+
+def _ensure_builtins_registered() -> None:
+ """Import built-in stage modules so their ``@register`` decorators fire."""
+ if _STAGE_REGISTRY:
+ return # already populated
+ try:
+ import framework.pipeline.stages.cost_guard # noqa: F401
+ import framework.pipeline.stages.credential_resolver # noqa: F401
+ import framework.pipeline.stages.input_validation # noqa: F401
+ import framework.pipeline.stages.llm_provider # noqa: F401
+ import framework.pipeline.stages.mcp_registry # noqa: F401
+ import framework.pipeline.stages.rate_limit # noqa: F401
+ import framework.pipeline.stages.skill_registry # noqa: F401
+ except ImportError:
+ pass
diff --git a/core/framework/pipeline/runner.py b/core/framework/pipeline/runner.py
new file mode 100644
index 00000000..7d05deb9
--- /dev/null
+++ b/core/framework/pipeline/runner.py
@@ -0,0 +1,111 @@
+"""Pipeline runner -- executes registered stages in order."""
+
+from __future__ import annotations
+
+import logging
+from typing import Any
+
+from framework.pipeline.stage import (
+ PipelineContext,
+ PipelineRejectedError,
+ PipelineStage,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class PipelineRunner:
+ """Executes a list of :class:`PipelineStage` instances in ``order``.
+
+ The runner is the orchestration layer that :class:`AgentRuntime` calls
+ on every trigger. Stages execute in ascending ``order`` (ties broken by
+ registration order). A stage returning ``reject`` short-circuits the
+ pipeline and causes the trigger to raise :class:`PipelineRejectedError`.
+ """
+
+ def __init__(self, stages: list[PipelineStage] | None = None) -> None:
+ self._stages: list[PipelineStage] = sorted(stages or [], key=lambda s: s.order)
+
+ @property
+ def stages(self) -> list[PipelineStage]:
+ return list(self._stages)
+
+ def add_stage(self, stage: PipelineStage) -> None:
+ """Add a stage after construction (for dynamic registration)."""
+ self._stages.append(stage)
+ self._stages.sort(key=lambda s: s.order)
+
+ async def initialize_all(self) -> None:
+ """Call ``initialize`` on every registered stage."""
+ for stage in self._stages:
+ name = stage.__class__.__name__
+ logger.info("[pipeline] Initializing %s (order=%d)", name, stage.order)
+ await stage.initialize()
+ logger.info("[pipeline] %s initialized", name)
+ if self._stages:
+ logger.info(
+ "[pipeline] Ready: %d stages [%s]",
+ len(self._stages),
+ " -> ".join(s.__class__.__name__ for s in self._stages),
+ )
+
+ async def run(self, ctx: PipelineContext) -> PipelineContext:
+ """Run all stages. Raises ``PipelineRejectedError`` on rejection.
+
+ Returns the (possibly transformed) context.
+ """
+ if not self._stages:
+ return ctx
+ import time
+
+ pipeline_start = time.perf_counter()
+ logger.info(
+ "[pipeline] Running %d stages for entry_point=%s",
+ len(self._stages),
+ ctx.entry_point_id,
+ )
+ for stage in self._stages:
+ stage_name = stage.__class__.__name__
+ t0 = time.perf_counter()
+ result = await stage.process(ctx)
+ elapsed_ms = (time.perf_counter() - t0) * 1000
+ if result.action == "reject":
+ reason = result.rejection_reason or "(no reason given)"
+ logger.warning(
+ "[pipeline] REJECTED by %s (%.1fms): %s",
+ stage_name, elapsed_ms, reason,
+ )
+ raise PipelineRejectedError(stage_name, reason)
+ if result.action == "transform":
+ logger.info(
+ "[pipeline] %s TRANSFORMED input (%.1fms)",
+ stage_name, elapsed_ms,
+ )
+ if result.input_data is not None:
+ ctx.input_data = result.input_data
+ else:
+ logger.info(
+ "[pipeline] %s passed (%.1fms)",
+ stage_name, elapsed_ms,
+ )
+ total_ms = (time.perf_counter() - pipeline_start) * 1000
+ logger.info("[pipeline] Complete (%.1fms total)", total_ms)
+ return ctx
+
+ async def run_post(self, ctx: PipelineContext, result: Any) -> Any:
+ """Run all stages' ``post_process`` hooks in order.
+
+ Each stage can transform the result; the final value is returned.
+ Exceptions are logged and swallowed -- post-processing must not
+ break a successful execution.
+ """
+ current = result
+ for stage in self._stages:
+ try:
+ current = await stage.post_process(ctx, current)
+ except Exception:
+ logger.exception(
+ "Pipeline post_process raised in %s; continuing with previous result",
+ stage.__class__.__name__,
+ )
+ return current
diff --git a/core/framework/pipeline/stage.py b/core/framework/pipeline/stage.py
new file mode 100644
index 00000000..e250189c
--- /dev/null
+++ b/core/framework/pipeline/stage.py
@@ -0,0 +1,77 @@
+"""Pipeline stage base class and request/response types."""
+
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from typing import Any, Literal
+
+
+class PipelineRejectedError(Exception):
+ """Raised by ``AgentHost.trigger`` when a stage rejects the request."""
+
+ def __init__(self, stage_name: str, reason: str) -> None:
+ super().__init__(f"Pipeline rejected by {stage_name}: {reason}")
+ self.stage_name = stage_name
+ self.reason = reason
+
+
+@dataclass
+class PipelineContext:
+ """Carries request data through the pipeline."""
+
+ entry_point_id: str
+ input_data: dict[str, Any]
+ correlation_id: str | None = None
+ session_state: dict[str, Any] | None = None
+ metadata: dict[str, Any] = field(default_factory=dict)
+
+
+@dataclass
+class PipelineResult:
+ """Outcome of a stage's ``process`` call."""
+
+ action: Literal["continue", "reject", "transform"] = "continue"
+ input_data: dict[str, Any] | None = None
+ rejection_reason: str | None = None
+
+
+class PipelineStage(ABC):
+ """Base class for all middleware stages.
+
+ Infrastructure stages (LLM, MCP, credentials, skills) set typed
+ attributes during ``initialize()`` that the host reads after all
+ stages have initialized. Request-level stages (rate limit, input
+ validation, cost guard) implement ``process()``.
+
+ Attributes set by infrastructure stages:
+ llm: LLM provider instance (set by LlmProviderStage)
+ tool_registry: ToolRegistry with discovered MCP tools (set by McpRegistryStage)
+ accounts_prompt: Connected accounts system prompt block (set by CredentialResolverStage)
+ accounts_data: Raw account info list (set by CredentialResolverStage)
+ tool_provider_map: Tool name -> provider mapping (set by CredentialResolverStage)
+ skills_manager: SkillsManager instance (set by SkillRegistryStage)
+ """
+
+ order: int = 100
+
+ # Infrastructure stage outputs -- typed so _apply_pipeline_results
+ # doesn't need hasattr() sniffing.
+ llm: Any = None
+ tool_registry: Any = None
+ accounts_prompt: str = ""
+ accounts_data: list[dict] | None = None
+ tool_provider_map: dict[str, str] | None = None
+ skills_manager: Any = None
+
+ async def initialize(self) -> None:
+ """Called once when the runtime starts."""
+ return None
+
+ @abstractmethod
+ async def process(self, ctx: PipelineContext) -> PipelineResult:
+ """Process the incoming request."""
+
+ async def post_process(self, ctx: PipelineContext, result: Any) -> Any:
+ """Optional post-execution hook. Default: pass-through."""
+ return result
diff --git a/core/framework/pipeline/stages/__init__.py b/core/framework/pipeline/stages/__init__.py
new file mode 100644
index 00000000..6a9105be
--- /dev/null
+++ b/core/framework/pipeline/stages/__init__.py
@@ -0,0 +1,19 @@
+"""Built-in pipeline stages."""
+
+from framework.pipeline.stages.cost_guard import CostGuardStage
+from framework.pipeline.stages.credential_resolver import CredentialResolverStage
+from framework.pipeline.stages.input_validation import InputValidationStage
+from framework.pipeline.stages.llm_provider import LlmProviderStage
+from framework.pipeline.stages.mcp_registry import McpRegistryStage
+from framework.pipeline.stages.rate_limit import RateLimitStage
+from framework.pipeline.stages.skill_registry import SkillRegistryStage
+
+__all__ = [
+ "CostGuardStage",
+ "CredentialResolverStage",
+ "InputValidationStage",
+ "LlmProviderStage",
+ "McpRegistryStage",
+ "RateLimitStage",
+ "SkillRegistryStage",
+]
diff --git a/core/framework/pipeline/stages/cost_guard.py b/core/framework/pipeline/stages/cost_guard.py
new file mode 100644
index 00000000..4850fe3b
--- /dev/null
+++ b/core/framework/pipeline/stages/cost_guard.py
@@ -0,0 +1,35 @@
+"""Cost guard stage -- reject requests over a pre-flight budget."""
+
+from __future__ import annotations
+
+from framework.pipeline.registry import register
+from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage
+
+
+@register("cost_guard")
+class CostGuardStage(PipelineStage):
+ """Reject requests whose estimated cost exceeds the per-request budget.
+
+ The cost estimate must be populated in ``ctx.metadata["estimated_cost"]``
+ by an earlier stage (or by the caller). When no estimate is present,
+ the stage passes through.
+ """
+
+ order = 300
+
+ def __init__(self, max_cost_per_request: float = 1.0) -> None:
+ self._budget = max_cost_per_request
+
+ async def process(self, ctx: PipelineContext) -> PipelineResult:
+ estimated = ctx.metadata.get("estimated_cost")
+ if estimated is None:
+ return PipelineResult(action="continue")
+ if estimated > self._budget:
+ return PipelineResult(
+ action="reject",
+ rejection_reason=(
+ f"Estimated cost ${estimated:.4f} exceeds budget "
+ f"${self._budget:.4f}"
+ ),
+ )
+ return PipelineResult(action="continue")
diff --git a/core/framework/pipeline/stages/credential_resolver.py b/core/framework/pipeline/stages/credential_resolver.py
new file mode 100644
index 00000000..b76df37f
--- /dev/null
+++ b/core/framework/pipeline/stages/credential_resolver.py
@@ -0,0 +1,58 @@
+"""Credential resolver pipeline stage.
+
+Resolves connected accounts at startup. Individual credential TTL/refresh
+is handled by MCP server processes internally -- they resolve tokens from
+the credential store on every tool call.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Any
+
+from framework.pipeline.registry import register
+from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage
+
+logger = logging.getLogger(__name__)
+
+
+@register("credential_resolver")
+class CredentialResolverStage(PipelineStage):
+ """Resolve connected accounts for system prompt injection."""
+
+ order = 40
+
+ def __init__(self, credential_store: Any = None, **kwargs: Any) -> None:
+ self._credential_store = credential_store
+ self.accounts_prompt = ""
+ self.accounts_data: list[dict] | None = None
+ self.tool_provider_map: dict[str, str] | None = None
+
+ async def initialize(self) -> None:
+ try:
+ from aden_tools.credentials.store_adapter import (
+ CredentialStoreAdapter,
+ )
+ from framework.orchestrator.prompting import build_accounts_prompt
+
+ if self._credential_store is not None:
+ adapter = CredentialStoreAdapter(store=self._credential_store)
+ else:
+ adapter = CredentialStoreAdapter.default()
+ self.accounts_data = adapter.get_all_account_info()
+ self.tool_provider_map = adapter.get_tool_provider_map()
+ if self.accounts_data:
+ self.accounts_prompt = build_accounts_prompt(
+ self.accounts_data, self.tool_provider_map,
+ )
+ logger.info(
+ "[pipeline] CredentialResolverStage: %d accounts",
+ len(self.accounts_data or []),
+ )
+ except Exception:
+ logger.debug(
+ "Credential resolution failed (non-fatal)", exc_info=True,
+ )
+
+ async def process(self, ctx: PipelineContext) -> PipelineResult:
+ return PipelineResult(action="continue")
diff --git a/core/framework/pipeline/stages/input_validation.py b/core/framework/pipeline/stages/input_validation.py
new file mode 100644
index 00000000..3a025c0e
--- /dev/null
+++ b/core/framework/pipeline/stages/input_validation.py
@@ -0,0 +1,47 @@
+"""Input validation stage.
+
+Rejects requests whose ``input_data`` does not match the entry point's
+declared input schema. Uses a user-provided schema map:
+``{entry_point_id: {required_key: expected_type, ...}}``.
+"""
+
+from __future__ import annotations
+
+from framework.pipeline.registry import register
+from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage
+
+
+@register("input_validation")
+class InputValidationStage(PipelineStage):
+ """Validate ``input_data`` against per-entry-point schemas.
+
+ The schema is a simple dict mapping key -> expected Python type.
+ For richer validation, substitute a Pydantic-based stage.
+ """
+
+ order = 100
+
+ def __init__(self, schemas: dict[str, dict[str, type]] | None = None) -> None:
+ self._schemas = schemas or {}
+
+ async def process(self, ctx: PipelineContext) -> PipelineResult:
+ schema = self._schemas.get(ctx.entry_point_id)
+ if not schema:
+ return PipelineResult(action="continue")
+
+ for key, expected_type in schema.items():
+ if key not in ctx.input_data:
+ return PipelineResult(
+ action="reject",
+ rejection_reason=f"Missing required input key: '{key}'",
+ )
+ value = ctx.input_data[key]
+ if not isinstance(value, expected_type):
+ return PipelineResult(
+ action="reject",
+ rejection_reason=(
+ f"Input key '{key}' has type {type(value).__name__}, "
+ f"expected {expected_type.__name__}"
+ ),
+ )
+ return PipelineResult(action="continue")
diff --git a/core/framework/pipeline/stages/llm_provider.py b/core/framework/pipeline/stages/llm_provider.py
new file mode 100644
index 00000000..899342f2
--- /dev/null
+++ b/core/framework/pipeline/stages/llm_provider.py
@@ -0,0 +1,95 @@
+"""LLM provider pipeline stage.
+
+Resolves the LLM provider from global config. This is the ONLY place
+the LLM gets created for worker agents.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Any
+
+from framework.pipeline.registry import register
+from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage
+
+logger = logging.getLogger(__name__)
+
+
+@register("llm_provider")
+class LlmProviderStage(PipelineStage):
+ """Resolve LLM provider and make it available."""
+
+ order = 10
+
+ def __init__(
+ self,
+ model: str | None = None,
+ mock_mode: bool = False,
+ llm: Any = None,
+ **kwargs: Any,
+ ) -> None:
+ self._model = model
+ self._mock_mode = mock_mode
+ self.llm = llm # Pre-injected LLM (e.g. from session)
+
+ async def initialize(self) -> None:
+ if self.llm is not None:
+ return # Already injected
+
+ from framework.config import (
+ get_api_key,
+ get_api_keys,
+ get_hive_config,
+ get_preferred_model,
+ )
+
+ model = self._model or get_preferred_model()
+
+ if self._mock_mode:
+ from framework.llm.mock import MockLLMProvider
+
+ self.llm = MockLLMProvider(model=model)
+ return
+
+ config = get_hive_config()
+ llm_config = config.get("llm", {})
+ api_base = llm_config.get("api_base")
+
+ # Check for Antigravity (special provider)
+ if llm_config.get("use_antigravity_subscription"):
+ try:
+ from framework.llm.antigravity import AntigravityProvider
+
+ provider = AntigravityProvider(model=model)
+ if provider.has_credentials():
+ self.llm = provider
+ logger.info("[pipeline] LlmProviderStage: Antigravity")
+ return
+ except Exception:
+ pass
+
+ from framework.llm.litellm import LiteLLMProvider
+
+ api_key = get_api_key()
+ api_keys = get_api_keys()
+
+ if api_keys and len(api_keys) > 1:
+ self.llm = LiteLLMProvider(
+ model=model, api_keys=api_keys, api_base=api_base,
+ )
+ elif api_key:
+ extra = {}
+ if api_key.startswith("sk-ant-oat"):
+ extra["extra_headers"] = {
+ "authorization": f"Bearer {api_key}"
+ }
+ self.llm = LiteLLMProvider(
+ model=model, api_key=api_key, api_base=api_base, **extra,
+ )
+ else:
+ self.llm = LiteLLMProvider(model=model, api_base=api_base)
+
+ logger.info("[pipeline] LlmProviderStage: %s", model)
+
+ async def process(self, ctx: PipelineContext) -> PipelineResult:
+ return PipelineResult(action="continue")
diff --git a/core/framework/pipeline/stages/mcp_registry.py b/core/framework/pipeline/stages/mcp_registry.py
new file mode 100644
index 00000000..989cfd98
--- /dev/null
+++ b/core/framework/pipeline/stages/mcp_registry.py
@@ -0,0 +1,92 @@
+"""MCP registry pipeline stage.
+
+Resolves MCP server references from the agent config against the global
+registry and registers tools. This is the ONLY place MCP tools get loaded.
+"""
+
+from __future__ import annotations
+
+import logging
+from dataclasses import asdict
+from pathlib import Path
+from typing import Any
+
+from framework.pipeline.registry import register
+from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage
+
+logger = logging.getLogger(__name__)
+
+
+@register("mcp_registry")
+class McpRegistryStage(PipelineStage):
+ """Resolve MCP tools from the global registry."""
+
+ order = 50
+
+ def __init__(
+ self,
+ server_refs: list[dict[str, Any]] | None = None,
+ agent_path: str | Path | None = None,
+ tool_registry: Any = None,
+ **kwargs: Any,
+ ) -> None:
+ self._server_refs = server_refs or []
+ self._agent_path = Path(agent_path) if agent_path else None
+ self._tool_registry = tool_registry
+
+ async def initialize(self) -> None:
+ """Connect to MCP servers and discover tools."""
+ if self._tool_registry is None:
+ from framework.loader.tool_registry import ToolRegistry
+
+ self._tool_registry = ToolRegistry()
+
+ from framework.loader.mcp_registry import MCPRegistry
+
+ registry = MCPRegistry()
+ mcp_loaded = False
+
+ # 1. From agent.json mcp_servers refs
+ if self._server_refs:
+ names = [ref["name"] for ref in self._server_refs if ref.get("name")]
+ if names:
+ configs = registry.resolve_for_agent(include=names)
+ if configs:
+ self._tool_registry.load_registry_servers(
+ [asdict(c) for c in configs]
+ )
+ mcp_loaded = True
+ logger.info(
+ "[pipeline] McpRegistryStage: loaded %d servers: %s",
+ len(configs),
+ names,
+ )
+
+ # 2. Legacy: mcp_servers.json
+ if not mcp_loaded and self._agent_path:
+ mcp_json = self._agent_path / "mcp_servers.json"
+ if mcp_json.exists():
+ self._tool_registry.load_mcp_config(mcp_json)
+ mcp_loaded = True
+
+ # 3. Fallback: all servers from global registry
+ if not mcp_loaded:
+ configs = registry.resolve_for_agent(profile="all")
+ if configs:
+ self._tool_registry.load_registry_servers(
+ [asdict(c) for c in configs]
+ )
+ logger.info(
+ "[pipeline] McpRegistryStage: loaded %d servers (fallback)",
+ len(configs),
+ )
+
+ total = len(self._tool_registry.get_tools())
+ logger.info("[pipeline] McpRegistryStage: %d tools available", total)
+
+ async def process(self, ctx: PipelineContext) -> PipelineResult:
+ return PipelineResult(action="continue")
+
+ @property
+ def tool_registry(self):
+ return self._tool_registry
diff --git a/core/framework/pipeline/stages/rate_limit.py b/core/framework/pipeline/stages/rate_limit.py
new file mode 100644
index 00000000..364c10fa
--- /dev/null
+++ b/core/framework/pipeline/stages/rate_limit.py
@@ -0,0 +1,44 @@
+"""Per-(entry-point, session) rate limiting stage."""
+
+from __future__ import annotations
+
+import time
+from collections import defaultdict
+
+from framework.pipeline.registry import register
+from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage
+
+
+@register("rate_limit")
+class RateLimitStage(PipelineStage):
+ """Reject requests that exceed ``max_requests_per_minute`` per session.
+
+ The key is ``:``. When no session_id is
+ present in ``session_state``, a single shared "default" bucket is used.
+ """
+
+ order = 200
+
+ def __init__(self, max_requests_per_minute: int = 60) -> None:
+ self._max_rpm = max_requests_per_minute
+ self._timestamps: dict[str, list[float]] = defaultdict(list)
+
+ async def process(self, ctx: PipelineContext) -> PipelineResult:
+ session_id = "default"
+ if ctx.session_state:
+ session_id = str(ctx.session_state.get("session_id", "default"))
+ key = f"{ctx.entry_point_id}:{session_id}"
+
+ now = time.monotonic()
+ # Prune entries older than 60s.
+ self._timestamps[key] = [t for t in self._timestamps[key] if now - t < 60.0]
+ if len(self._timestamps[key]) >= self._max_rpm:
+ return PipelineResult(
+ action="reject",
+ rejection_reason=(
+ f"Rate limit exceeded: {self._max_rpm} req/min "
+ f"for session '{session_id}'"
+ ),
+ )
+ self._timestamps[key].append(now)
+ return PipelineResult(action="continue")
diff --git a/core/framework/pipeline/stages/skill_registry.py b/core/framework/pipeline/stages/skill_registry.py
new file mode 100644
index 00000000..71a73a69
--- /dev/null
+++ b/core/framework/pipeline/stages/skill_registry.py
@@ -0,0 +1,55 @@
+"""Skill registry pipeline stage.
+
+Discovers and loads skills. This is the ONLY place skills get loaded.
+"""
+
+from __future__ import annotations
+
+import logging
+from pathlib import Path
+from typing import Any
+
+from framework.pipeline.registry import register
+from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage
+
+logger = logging.getLogger(__name__)
+
+
+@register("skill_registry")
+class SkillRegistryStage(PipelineStage):
+ """Discover skills and provide prompts."""
+
+ order = 60
+
+ def __init__(
+ self,
+ project_root: str | Path | None = None,
+ interactive: bool = True,
+ skills_config: Any = None,
+ **kwargs: Any,
+ ) -> None:
+ self._project_root = Path(project_root) if project_root else None
+ self._interactive = interactive
+ self._skills_config = skills_config
+ self.skills_manager: Any = None
+
+ async def initialize(self) -> None:
+ from framework.skills.config import SkillsConfig
+ from framework.skills.manager import SkillsManager, SkillsManagerConfig
+
+ config = SkillsManagerConfig(
+ skills_config=self._skills_config or SkillsConfig(),
+ project_root=self._project_root,
+ interactive=self._interactive,
+ )
+ self.skills_manager = SkillsManager(config)
+ self.skills_manager.load()
+ await self.skills_manager.start_watching()
+ logger.info(
+ "[pipeline] SkillRegistryStage: catalog=%d chars, protocols=%d chars",
+ len(self.skills_manager.skills_catalog_prompt),
+ len(self.skills_manager.protocols_prompt),
+ )
+
+ async def process(self, ctx: PipelineContext) -> PipelineResult:
+ return PipelineResult(action="continue")
diff --git a/core/framework/runner/__init__.py b/core/framework/runner/__init__.py
deleted file mode 100644
index 376866a7..00000000
--- a/core/framework/runner/__init__.py
+++ /dev/null
@@ -1,27 +0,0 @@
-"""Agent Runner - load and run exported agents."""
-
-from framework.runner.mcp_registry import MCPRegistry
-from framework.runner.protocol import (
- AgentMessage,
- CapabilityLevel,
- CapabilityResponse,
- MessageType,
- OrchestratorResult,
-)
-from framework.runner.runner import AgentInfo, AgentRunner, ValidationResult
-from framework.runner.tool_registry import ToolRegistry, tool
-
-__all__ = [
- # Single agent
- "AgentRunner",
- "AgentInfo",
- "ValidationResult",
- "ToolRegistry",
- "MCPRegistry",
- "tool",
- "AgentMessage",
- "MessageType",
- "CapabilityLevel",
- "CapabilityResponse",
- "OrchestratorResult",
-]
diff --git a/core/framework/runtime/EVENT_TYPES.md b/core/framework/runtime/EVENT_TYPES.md
deleted file mode 100644
index 22d3cc6a..00000000
--- a/core/framework/runtime/EVENT_TYPES.md
+++ /dev/null
@@ -1,493 +0,0 @@
-# Event Types and Schema Reference
-
-The Hive runtime uses a pub/sub `EventBus` for inter-component communication and observability. Every event is an `AgentEvent` dataclass published through `EventBus.publish()`.
-
-## Event Envelope (`AgentEvent`)
-
-Every event shares a common envelope:
-
-| Field | Type | Description |
-| ---------------- | ----------------- | ------------------------------------------------------------ |
-| `type` | `EventType` (str) | Event type identifier (see below) |
-| `stream_id` | `str` | Entry point / pipeline that emitted the event |
-| `node_id` | `str \| None` | Graph node that emitted the event |
-| `execution_id` | `str \| None` | Unique execution run ID (UUID, set by `ExecutionStream`) |
-| `graph_id` | `str \| None` | Graph that emitted the event (set by `GraphScopedEventBus`) |
-| `data` | `dict` | Event-type-specific payload (see individual schemas below) |
-| `timestamp` | `datetime` | When the event was created |
-| `correlation_id` | `str \| None` | Optional ID for tracking related events across streams |
-
-### Identity Fields
-
-The identity tuple `(graph_id, stream_id, node_id, execution_id)` uniquely locates any event:
-
-- **`graph_id`** — Which graph produced the event. Set automatically by `GraphScopedEventBus` (a subclass that stamps `graph_id` on every `publish()` call). Values: `"worker"`, `"judge"`, `"queen"`, or the graph spec ID.
-- **`stream_id`** — Which entry point / pipeline. Corresponds to `EntryPointSpec.id` in the graph definition. For single-entry-point graphs, this equals the entry point name (e.g. `"default"`, `"health_check"`).
-- **`node_id`** — Which specific node emitted the event. For `EventLoopNode` events, this is the node spec ID.
-- **`execution_id`** — UUID identifying a specific execution run. Multiple concurrent executions of the same entry point each get a unique `execution_id`.
-
----
-
-## Execution Lifecycle
-
-### `execution_started`
-
-A new graph execution has begun.
-
-| Data Field | Type | Description |
-| ---------- | ------ | ------------------------------- |
-| `input` | `dict` | Input data passed to the graph |
-
-**Emitted by:** `ExecutionStream._run_execution()`
-
----
-
-### `execution_completed`
-
-A graph execution finished successfully.
-
-| Data Field | Type | Description |
-| ---------- | ------ | ----------------- |
-| `output` | `dict` | Final output data |
-
-**Emitted by:** `ExecutionStream._run_execution()`
-
-**Queen notification:** When a worker execution completes, the session manager \
-injects a `[WORKER_TERMINAL]` notification into the queen with the output summary. \
-The queen reports to the user and asks what to do next.
-
----
-
-### `execution_failed`
-
-A graph execution failed with an error.
-
-| Data Field | Type | Description |
-| ---------- | ----- | ------------- |
-| `error` | `str` | Error message |
-
-**Emitted by:** `ExecutionStream._run_execution()`
-
-**Queen notification:** When a worker execution fails, the session manager \
-injects a `[WORKER_TERMINAL]` notification into the queen with the error. \
-The queen reports to the user and helps troubleshoot.
-
----
-
-### `execution_paused`
-
-Execution has been paused (Ctrl+Z or HITL approval).
-
-| Data Field | Type | Description |
-| ---------- | ----- | ----------------- |
-| `reason` | `str` | Why it was paused |
-
-**Emitted by:** `GraphExecutor.execute()`
-
----
-
-### `execution_resumed`
-
-Execution has resumed from a paused state.
-
-| Data Field | Type | Description |
-| ---------- | ---- | ----------- |
-| *(none)* | | |
-
-**Emitted by:** `GraphExecutor.execute()`
-
----
-
-## Node Event-Loop Lifecycle
-
-These events track the inner loop of `EventLoopNode` — the multi-turn LLM streaming loop that powers most agent nodes.
-
-### `node_loop_started`
-
-An EventLoopNode has begun its execution loop.
-
-| Data Field | Type | Description |
-| ---------------- | ---------- | ------------------------------- |
-| `max_iterations` | `int\|null`| Maximum iterations configured |
-
-**Emitted by:** `EventLoopNode._publish_loop_started()`, `GraphExecutor` (for function nodes in parallel branches)
-
----
-
-### `node_loop_iteration`
-
-An EventLoopNode has started a new iteration (one LLM turn).
-
-| Data Field | Type | Description |
-| ----------- | ----- | ------------------------- |
-| `iteration` | `int` | Zero-based iteration index |
-
-**Emitted by:** `EventLoopNode._publish_iteration()`
-
----
-
-### `node_loop_completed`
-
-An EventLoopNode has finished its execution loop.
-
-| Data Field | Type | Description |
-| ------------ | ----- | -------------------------------------- |
-| `iterations` | `int` | Total number of iterations completed |
-
-**Emitted by:** `EventLoopNode._publish_loop_completed()`, `GraphExecutor` (for function nodes in parallel branches)
-
----
-
-## LLM Streaming
-
-### `llm_text_delta`
-
-Incremental text output from the LLM (non-client-facing nodes only).
-
-| Data Field | Type | Description |
-| ---------- | ----- | ---------------------------------------- |
-| `content` | `str` | New text chunk (delta) |
-| `snapshot` | `str` | Full accumulated text so far |
-
-**Emitted by:** `EventLoopNode._publish_text_delta()` when `client_facing=False`
-
----
-
-### `llm_reasoning_delta`
-
-Incremental reasoning/thinking output from the LLM.
-
-| Data Field | Type | Description |
-| ---------- | ----- | ------------------- |
-| `content` | `str` | New reasoning chunk |
-
-**Emitted by:** Not currently wired in `EventLoopNode` (reserved for extended thinking models).
-
----
-
-## Tool Lifecycle
-
-### `tool_call_started`
-
-The LLM has requested a tool call and execution is about to begin.
-
-| Data Field | Type | Description |
-| ------------ | ------ | ------------------------------------ |
-| `tool_use_id`| `str` | Unique ID for this tool invocation |
-| `tool_name` | `str` | Name of the tool being called |
-| `tool_input` | `dict` | Arguments passed to the tool |
-
-**Emitted by:** `EventLoopNode._publish_tool_started()`
-
----
-
-### `tool_call_completed`
-
-A tool call has finished executing.
-
-| Data Field | Type | Description |
-| ------------ | ------ | -------------------------------------- |
-| `tool_use_id`| `str` | Same ID from `tool_call_started` |
-| `tool_name` | `str` | Name of the tool |
-| `result` | `str` | Tool execution result (may be truncated)|
-| `is_error` | `bool` | Whether the tool returned an error |
-
-**Emitted by:** `EventLoopNode._publish_tool_completed()`
-
----
-
-## Client I/O
-
-These events are emitted by the queen's interactive turns. They drive the TUI's chat interface.
-
-### `client_output_delta`
-
-Incremental text output meant for the human operator.
-
-| Data Field | Type | Description |
-| ---------- | ----- | ---------------------------- |
-| `content` | `str` | New text chunk (delta) |
-| `snapshot` | `str` | Full accumulated text so far |
-
-**Emitted by:** `EventLoopNode._publish_text_delta()` for queen/user-facing output
-
----
-
-### `client_input_requested`
-
-The node is waiting for human input (via `ask_user` tool or auto-block on text-only turns).
-
-| Data Field | Type | Description |
-| ---------- | ----- | ------------------------------------------------- |
-| `prompt` | `str` | Optional prompt/question shown to the user |
-
-**Emitted by:** `EventLoopNode._await_user_input()`, doom loop handler
-
-The TUI subscribes to this event to show the input prompt and focus the chat input. After the user types, `inject_event()` is called on the node to unblock it.
-
----
-
-## Internal Node Observability
-
-### `node_internal_output`
-
-Output from a non-client-facing node (for debugging/monitoring).
-
-| Data Field | Type | Description |
-| ---------- | ----- | ---------------- |
-| `content` | `str` | Output text |
-
-**Emitted by:** Available via `emit_node_internal_output()` — not currently wired in the default `EventLoopNode`.
-
----
-
-### `node_input_blocked`
-
-A non-client-facing node is blocked waiting for input.
-
-| Data Field | Type | Description |
-| ---------- | ----- | --------------- |
-| `prompt` | `str` | Block reason |
-
-**Emitted by:** Available via `emit_node_input_blocked()` — reserved for future use.
-
----
-
-### `node_stalled`
-
-The node's LLM has produced identical responses for several consecutive turns (stall detection).
-
-| Data Field | Type | Description |
-| ---------- | ----- | ------------------------------------------------- |
-| `reason` | `str` | Always `"Consecutive identical responses detected"`|
-
-**Emitted by:** `EventLoopNode._publish_stalled()`
-
----
-
-### `node_tool_doom_loop`
-
-The LLM is calling the same tool(s) with identical arguments repeatedly (doom loop detection).
-
-| Data Field | Type | Description |
-| ------------- | ----- | ------------------------------------ |
-| `description` | `str` | Human-readable doom loop description |
-
-**Emitted by:** `EventLoopNode` doom loop handler
-
----
-
-## Judge Decisions
-
-### `judge_verdict`
-
-The judge (custom or implicit) has evaluated the current iteration.
-
-| Data Field | Type | Description |
-| ------------ | ----- | ---------------------------------------------------- |
-| `action` | `str` | `"ACCEPT"`, `"RETRY"`, `"ESCALATE"`, or `"CONTINUE"` |
-| `feedback` | `str` | Judge feedback (empty for ACCEPT/CONTINUE) |
-| `judge_type` | `str` | `"custom"` (explicit JudgeProtocol) or `"implicit"` (stop-reason heuristic) |
-| `iteration` | `int` | Which iteration this verdict applies to |
-
-**Emitted by:** `EventLoopNode._publish_judge_verdict()`
-
-**Verdict meanings:**
-- **ACCEPT** — Output meets requirements; node exits successfully.
-- **RETRY** — Output needs improvement; loop continues with feedback injected.
-- **ESCALATE** — Problem cannot be solved at this level; triggers escalation.
-- **CONTINUE** — Implicit verdict: LLM called tools, so it's making progress — let it keep going.
-
----
-
-## Output Tracking
-
-### `output_key_set`
-
-A node has set an output key via the `set_output` synthetic tool.
-
-| Data Field | Type | Description |
-| ---------- | ----- | ----------------- |
-| `key` | `str` | Output key name |
-
-**Emitted by:** `EventLoopNode._publish_output_key_set()`
-
----
-
-## Retry & Edge Tracking
-
-### `node_retry`
-
-A transient error occurred during an LLM call and the node is retrying.
-
-| Data Field | Type | Description |
-| ------------- | ----- | ---------------------------------- |
-| `retry_count` | `int` | Current retry attempt number |
-| `max_retries` | `int` | Maximum retries configured |
-| `error` | `str` | Error message (truncated to 500ch) |
-
-**Emitted by:** `EventLoopNode` (stream retry handler), `GraphExecutor` (node-level retry)
-
----
-
-### `edge_traversed`
-
-The executor has traversed an edge from one node to another.
-
-| Data Field | Type | Description |
-| ---------------- | ----- | ---------------------------------------------- |
-| `source_node` | `str` | Node ID the edge starts from |
-| `target_node` | `str` | Node ID the edge goes to |
-| `edge_condition` | `str` | Edge condition: `"router"`, `"on_success"`, etc. |
-
-**Emitted by:** `GraphExecutor.execute()` — after router decisions, condition-based edges, and fallback edges.
-
----
-
-## Context Management
-
-### `context_compacted`
-
-Not currently emitted — reserved for future use when `NodeConversation` compacts history.
-
----
-
-## State Changes
-
-### `state_changed`
-
-A shared buffer key has been modified.
-
-| Data Field | Type | Description |
-| ----------- | ----- | ---------------------------------- |
-| `key` | `str` | Buffer key that changed |
-| `old_value` | `Any` | Previous value |
-| `new_value` | `Any` | New value |
-| `scope` | `str` | Scope of the change |
-
-**Emitted by:** Available via `emit_state_changed()` — not currently wired in default execution.
-
----
-
-### `state_conflict`
-
-Not currently emitted — reserved for concurrent write conflict detection.
-
----
-
-## Goal Tracking
-
-### `goal_progress`
-
-Goal completion progress update.
-
-| Data Field | Type | Description |
-| ----------------- | ------- | ------------------------------------ |
-| `progress` | `float` | 0.0–1.0 completion fraction |
-| `criteria_status` | `dict` | Per-criterion status |
-
-**Emitted by:** Available via `emit_goal_progress()` — not currently wired in default execution.
-
----
-
-### `goal_achieved`
-
-Not currently emitted — reserved for explicit goal completion signals.
-
----
-
-### `constraint_violation`
-
-A goal constraint has been violated.
-
-| Data Field | Type | Description |
-| --------------- | ----- | ------------------------ |
-| `constraint_id` | `str` | Which constraint failed |
-| `description` | `str` | What went wrong |
-
-**Emitted by:** Available via `emit_constraint_violation()`.
-
----
-
-## Stream Lifecycle
-
-### `stream_started` / `stream_stopped`
-
-Not currently emitted — reserved for `ExecutionStream` lifecycle tracking.
-
----
-
-## External Triggers
-
-### `webhook_received`
-
-An external webhook has been received.
-
-| Data Field | Type | Description |
-| -------------- | ------ | ---------------------------- |
-| `path` | `str` | Webhook URL path |
-| `method` | `str` | HTTP method |
-| `headers` | `dict` | HTTP headers |
-| `payload` | `dict` | Request body |
-| `query_params` | `dict` | URL query parameters |
-
-**Emitted by:** Webhook server integration.
-
-Note: `node_id` is not set on this event; `stream_id` is the webhook source ID.
-
----
-
-## Escalation
-
-### `escalation_requested`
-
-An agent has requested handoff to the Hive Coder (via the `escalate` synthetic tool).
-
-| Data Field | Type | Description |
-| ---------- | ----- | ------------------------------- |
-| `reason` | `str` | Why escalation is needed |
-| `context` | `str` | Additional context for the coder|
-
-**Emitted by:** `EventLoopNode` when the LLM calls `escalate`.
-
----
-
-## Custom Events
-
-### `custom`
-
-User-defined events with arbitrary payloads. No schema enforced.
-
----
-
-## Subscription & Filtering
-
-Events can be filtered when subscribing:
-
-```python
-bus.subscribe(
- event_types=[EventType.TOOL_CALL_STARTED, EventType.TOOL_CALL_COMPLETED],
- handler=my_handler,
- filter_stream="default", # Only events from this stream
- filter_node="planner", # Only events from this node
- filter_execution="exec-uuid", # Only events from this execution
- filter_graph="worker", # Only events from this graph
-)
-```
-
-## Debug Event Logging
-
-Set `HIVE_DEBUG_EVENTS=1` to write every published event to a JSONL file at `~/.hive/event_logs/.jsonl`. Each line is the full JSON serialization of an `AgentEvent`:
-
-```json
-{
- "type": "tool_call_started",
- "stream_id": "default",
- "node_id": "planner",
- "execution_id": "a1b2c3d4-...",
- "graph_id": "worker",
- "data": {"tool_use_id": "tu_1", "tool_name": "web_search", "tool_input": {"query": "..."}},
- "timestamp": "2026-02-24T12:00:00.000000",
- "correlation_id": null
-}
-```
diff --git a/core/framework/runtime/README.md b/core/framework/runtime/README.md
deleted file mode 100644
index 0159c2e1..00000000
--- a/core/framework/runtime/README.md
+++ /dev/null
@@ -1,171 +0,0 @@
-# Agent Runtime
-
-Unified execution system for all Hive agents. Every agent — single-entry or multi-entry, headless or dashboard — runs through the same runtime stack.
-
-## Topology
-
-```
- AgentRunner.load(agent_path)
- |
- AgentRunner
- (factory + public API)
- |
- _setup_agent_runtime()
- |
- AgentRuntime
- (lifecycle + orchestration)
- / | \
- Stream A Stream B Stream C ← one per entry point
- | | |
- GraphExecutor GraphExecutor GraphExecutor
- | | |
- Node → Node → Node (graph traversal)
-```
-
-Single-entry agents get a `"default"` entry point automatically. There is no separate code path.
-
-## Components
-
-| Component | File | Role |
-|---|---|---|
-| `AgentRunner` | `runner/runner.py` | Load agents, configure tools/LLM, expose high-level API |
-| `AgentRuntime` | `runtime/agent_runtime.py` | Lifecycle management, entry point routing, event bus |
-| `ExecutionStream` | `runtime/execution_stream.py` | Per-entry-point execution queue, session persistence |
-| `GraphExecutor` | `graph/executor.py` | Node traversal, tool dispatch, checkpointing |
-| `EventBus` | `runtime/event_bus.py` | Pub/sub for execution events (streaming, I/O) |
-| `SharedBufferManager` | `runtime/shared_state.py` | Cross-stream state with isolation levels |
-| `OutcomeAggregator` | `runtime/outcome_aggregator.py` | Goal progress tracking across streams |
-| `SessionStore` | `storage/session_store.py` | Session state persistence (`sessions/{id}/state.json`) |
-
-## Programming Interface
-
-### AgentRunner (high-level)
-
-```python
-from framework.runner import AgentRunner
-
-# Load and run
-runner = AgentRunner.load("exports/my_agent", model="anthropic/claude-sonnet-4-20250514")
-result = await runner.run({"query": "hello"})
-
-# Resume from paused session
-result = await runner.run({"query": "continue"}, session_state=saved_state)
-
-# Lifecycle
-await runner.start() # Start the runtime
-await runner.stop() # Stop the runtime
-exec_id = await runner.trigger("default", {}) # Non-blocking trigger
-entry_points = runner.get_entry_points() # List entry points
-
-# Context manager
-async with AgentRunner.load("exports/my_agent") as runner:
- result = await runner.run({"query": "hello"})
-
-# Cleanup
-runner.cleanup() # Synchronous
-await runner.cleanup_async() # Asynchronous
-```
-
-### AgentRuntime (lower-level)
-
-```python
-from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
-from framework.runtime.execution_stream import EntryPointSpec
-
-# Create runtime with entry points
-runtime = create_agent_runtime(
- graph=graph,
- goal=goal,
- storage_path=Path("~/.hive/agents/my_agent"),
- entry_points=[
- EntryPointSpec(id="default", name="Default", entry_node="start", trigger_type="manual"),
- ],
- llm=llm,
- tools=tools,
- tool_executor=tool_executor,
- checkpoint_config=checkpoint_config,
-)
-
-# Lifecycle
-await runtime.start()
-await runtime.stop()
-
-# Execution
-exec_id = await runtime.trigger("default", {"query": "hello"}) # Non-blocking
-result = await runtime.trigger_and_wait("default", {"query": "hello"}) # Blocking
-result = await runtime.trigger_and_wait("default", {}, session_state=state) # Resume
-
-# Client-facing node I/O
-await runtime.inject_input(node_id="chat", content="user response")
-
-# Events
-sub_id = runtime.subscribe_to_events(
- event_types=[EventType.CLIENT_OUTPUT_DELTA],
- handler=my_handler,
-)
-runtime.unsubscribe_from_events(sub_id)
-
-# Inspection
-runtime.is_running # bool
-runtime.event_bus # EventBus
-runtime.state_manager # SharedBufferManager
-runtime.get_stats() # Runtime statistics
-```
-
-## Execution Flow
-
-1. `AgentRunner.run()` calls `AgentRuntime.trigger_and_wait()`
-2. `AgentRuntime` routes to the `ExecutionStream` for the entry point
-3. `ExecutionStream` creates a `GraphExecutor` and calls `execute()`
-4. `GraphExecutor` traverses nodes, dispatches tools, manages checkpoints
-5. `ExecutionResult` flows back up through the stack
-6. `ExecutionStream` writes session state to disk
-
-## Session Resume
-
-All execution paths support session resume:
-
-```python
-# First run (agent pauses at a client-facing node)
-result = await runner.run({"query": "start task"})
-# result.paused_at = "review-node"
-# result.session_state = {"memory": {...}, "paused_at": "review-node", ...}
-
-# Resume
-result = await runner.run({"input": "approved"}, session_state=result.session_state)
-```
-
-Session state flows: `AgentRunner.run()` → `AgentRuntime.trigger_and_wait()` → `ExecutionStream.execute()` → `GraphExecutor.execute()`.
-
-Checkpoints are saved at node boundaries (`sessions/{id}/checkpoints/`) for crash recovery.
-
-## Event Bus
-
-The `EventBus` provides real-time execution visibility:
-
-| Event | When |
-|---|---|
-| `NODE_STARTED` | Node begins execution |
-| `NODE_COMPLETED` | Node finishes |
-| `TOOL_CALL_STARTED` | Tool invocation begins |
-| `TOOL_CALL_COMPLETED` | Tool invocation finishes |
-| `CLIENT_OUTPUT_DELTA` | Agent streams text to user |
-| `CLIENT_INPUT_REQUESTED` | Agent needs user input |
-| `EXECUTION_COMPLETED` | Full execution finishes |
-
-In headless mode, `AgentRunner` subscribes to `CLIENT_OUTPUT_DELTA` and `CLIENT_INPUT_REQUESTED` to print output and read stdin. The web dashboard subscribes to route events to the frontend.
-
-## Storage Layout
-
-```
-~/.hive/agents/{agent_name}/
- sessions/
- session_YYYYMMDD_HHMMSS_{uuid}/
- state.json # Session state (status, memory, progress)
- checkpoints/ # Node-boundary snapshots
- logs/
- summary.json # Execution summary
- details.jsonl # Detailed event log
- tool_logs.jsonl # Tool call log
- runtime_logs/ # Cross-session runtime logs
-```
diff --git a/core/framework/runtime/__init__.py b/core/framework/runtime/__init__.py
deleted file mode 100644
index 26441d41..00000000
--- a/core/framework/runtime/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-"""Runtime core for agent execution."""
-
-from framework.runtime.core import Runtime
-
-__all__ = ["Runtime"]
diff --git a/core/framework/runtime/tests/__init__.py b/core/framework/runtime/tests/__init__.py
deleted file mode 100644
index 2e79aec4..00000000
--- a/core/framework/runtime/tests/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Tests for runtime components."""
diff --git a/core/framework/runtime/tests/test_agent_runtime.py b/core/framework/runtime/tests/test_agent_runtime.py
deleted file mode 100644
index 561aba1b..00000000
--- a/core/framework/runtime/tests/test_agent_runtime.py
+++ /dev/null
@@ -1,869 +0,0 @@
-"""
-Tests for AgentRuntime and multi-entry-point execution.
-
-Tests:
-1. AgentRuntime creation and lifecycle
-2. Entry point registration
-3. Concurrent executions across streams
-4. SharedBufferManager isolation levels
-5. OutcomeAggregator goal evaluation
-6. EventBus pub/sub
-"""
-
-import asyncio
-import tempfile
-from pathlib import Path
-
-import pytest
-
-from framework.graph import Goal
-from framework.graph.edge import EdgeCondition, EdgeSpec, GraphSpec
-from framework.graph.goal import Constraint, SuccessCriterion
-from framework.graph.node import NodeSpec
-from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
-from framework.runtime.event_bus import AgentEvent, EventBus, EventType
-from framework.runtime.execution_stream import EntryPointSpec
-from framework.runtime.outcome_aggregator import OutcomeAggregator
-from framework.runtime.shared_state import IsolationLevel, SharedBufferManager
-from framework.schemas.session_state import SessionState, SessionTimestamps
-
-# === Test Fixtures ===
-
-
-@pytest.fixture
-def sample_goal():
- """Create a sample goal for testing."""
- return Goal(
- id="test-goal",
- name="Test Goal",
- description="A goal for testing multi-entry-point execution",
- success_criteria=[
- SuccessCriterion(
- id="sc-1",
- description="Process all requests",
- metric="requests_processed",
- target="100%",
- weight=1.0,
- ),
- ],
- constraints=[
- Constraint(
- id="c-1",
- description="Must not exceed rate limits",
- constraint_type="hard",
- category="operational",
- ),
- ],
- )
-
-
-@pytest.fixture
-def sample_graph():
- """Create a sample graph with multiple entry points."""
- nodes = [
- NodeSpec(
- id="process-webhook",
- name="Process Webhook",
- description="Process incoming webhook",
- node_type="event_loop",
- input_keys=["webhook_data"],
- output_keys=["result"],
- ),
- NodeSpec(
- id="process-api",
- name="Process API Request",
- description="Process API request",
- node_type="event_loop",
- input_keys=["request_data"],
- output_keys=["result"],
- ),
- NodeSpec(
- id="complete",
- name="Complete",
- description="Execution complete",
- node_type="terminal",
- input_keys=["result"],
- output_keys=["final_result"],
- ),
- ]
-
- edges = [
- EdgeSpec(
- id="webhook-to-complete",
- source="process-webhook",
- target="complete",
- condition=EdgeCondition.ON_SUCCESS,
- ),
- EdgeSpec(
- id="api-to-complete",
- source="process-api",
- target="complete",
- condition=EdgeCondition.ON_SUCCESS,
- ),
- ]
-
- return GraphSpec(
- id="test-graph",
- goal_id="test-goal",
- version="1.0.0",
- entry_node="process-webhook",
- entry_points={"start": "process-webhook"},
- terminal_nodes=["complete"],
- pause_nodes=[],
- nodes=nodes,
- edges=edges,
- )
-
-
-@pytest.fixture
-def temp_storage():
- """Create a temporary storage directory."""
- with tempfile.TemporaryDirectory() as tmpdir:
- yield Path(tmpdir)
-
-
-# === SharedBufferManager Tests ===
-
-
-class TestSharedBufferManager:
- """Tests for SharedBufferManager."""
-
- def test_create_buffer(self):
- """Test creating execution-scoped buffer."""
- manager = SharedBufferManager()
- buffer = manager.create_buffer(
- execution_id="exec-1",
- stream_id="webhook",
- isolation=IsolationLevel.SHARED,
- )
- assert buffer is not None
- assert buffer._execution_id == "exec-1"
- assert buffer._stream_id == "webhook"
-
- @pytest.mark.asyncio
- async def test_isolated_state(self):
- """Test isolated state doesn't leak between executions."""
- manager = SharedBufferManager()
-
- buf1 = manager.create_buffer("exec-1", "stream-1", IsolationLevel.ISOLATED)
- buf2 = manager.create_buffer("exec-2", "stream-1", IsolationLevel.ISOLATED)
-
- await buf1.write("key", "value1")
- await buf2.write("key", "value2")
-
- assert await buf1.read("key") == "value1"
- assert await buf2.read("key") == "value2"
-
- @pytest.mark.asyncio
- async def test_shared_state(self):
- """Test shared state is visible across executions."""
- manager = SharedBufferManager()
-
- manager.create_buffer("exec-1", "stream-1", IsolationLevel.SHARED)
- manager.create_buffer("exec-2", "stream-1", IsolationLevel.SHARED)
-
- # Write to global scope
- await manager.write(
- key="global_key",
- value="global_value",
- execution_id="exec-1",
- stream_id="stream-1",
- isolation=IsolationLevel.SHARED,
- scope="global",
- )
-
- # Both should see it
- value1 = await manager.read("global_key", "exec-1", "stream-1", IsolationLevel.SHARED)
- value2 = await manager.read("global_key", "exec-2", "stream-1", IsolationLevel.SHARED)
-
- assert value1 == "global_value"
- assert value2 == "global_value"
-
- def test_cleanup_execution(self):
- """Test execution cleanup removes state."""
- manager = SharedBufferManager()
- manager.create_buffer("exec-1", "stream-1", IsolationLevel.ISOLATED)
-
- assert "exec-1" in manager._execution_state
-
- manager.cleanup_execution("exec-1")
-
- assert "exec-1" not in manager._execution_state
-
-
-class TestSessionState:
- """Tests for session state data-buffer compatibility."""
-
- def test_legacy_memory_alias_populates_data_buffer(self):
- """Legacy `memory` payloads should still hydrate the session buffer."""
- state = SessionState(
- session_id="session-1",
- goal_id="goal-1",
- timestamps=SessionTimestamps(
- started_at="2026-01-01T00:00:00",
- updated_at="2026-01-01T00:00:00",
- ),
- memory={"rules": "keep starred mail"},
- )
-
- assert state.data_buffer == {"rules": "keep starred mail"}
- assert state.memory == {"rules": "keep starred mail"}
- assert state.to_session_state_dict()["data_buffer"] == {"rules": "keep starred mail"}
-
-
-# === EventBus Tests ===
-
-
-class TestEventBus:
- """Tests for EventBus pub/sub."""
-
- @pytest.mark.asyncio
- async def test_publish_subscribe(self):
- """Test basic publish/subscribe."""
- bus = EventBus()
- received_events = []
-
- async def handler(event: AgentEvent):
- received_events.append(event)
-
- bus.subscribe(
- event_types=[EventType.EXECUTION_STARTED],
- handler=handler,
- )
-
- await bus.publish(
- AgentEvent(
- type=EventType.EXECUTION_STARTED,
- stream_id="webhook",
- execution_id="exec-1",
- data={"test": "data"},
- )
- )
-
- # Allow handler to run
- await asyncio.sleep(0.1)
-
- assert len(received_events) == 1
- assert received_events[0].type == EventType.EXECUTION_STARTED
- assert received_events[0].stream_id == "webhook"
-
- @pytest.mark.asyncio
- async def test_stream_filter(self):
- """Test filtering by stream ID."""
- bus = EventBus()
- received_events = []
-
- async def handler(event: AgentEvent):
- received_events.append(event)
-
- bus.subscribe(
- event_types=[EventType.EXECUTION_STARTED],
- handler=handler,
- filter_stream="webhook",
- )
-
- # Publish to webhook stream (should be received)
- await bus.publish(
- AgentEvent(
- type=EventType.EXECUTION_STARTED,
- stream_id="webhook",
- )
- )
-
- # Publish to api stream (should NOT be received)
- await bus.publish(
- AgentEvent(
- type=EventType.EXECUTION_STARTED,
- stream_id="api",
- )
- )
-
- await asyncio.sleep(0.1)
-
- assert len(received_events) == 1
- assert received_events[0].stream_id == "webhook"
-
- def test_unsubscribe(self):
- """Test unsubscribing from events."""
- bus = EventBus()
-
- async def handler(event: AgentEvent):
- pass
-
- sub_id = bus.subscribe(
- event_types=[EventType.EXECUTION_STARTED],
- handler=handler,
- )
-
- assert sub_id in bus._subscriptions
-
- result = bus.unsubscribe(sub_id)
-
- assert result is True
- assert sub_id not in bus._subscriptions
-
- @pytest.mark.asyncio
- async def test_wait_for(self):
- """Test waiting for a specific event."""
- bus = EventBus()
-
- # Start waiting in background
- async def wait_and_check():
- event = await bus.wait_for(
- event_type=EventType.EXECUTION_COMPLETED,
- timeout=1.0,
- )
- return event
-
- wait_task = asyncio.create_task(wait_and_check())
-
- # Publish the event
- await asyncio.sleep(0.1)
- await bus.publish(
- AgentEvent(
- type=EventType.EXECUTION_COMPLETED,
- stream_id="webhook",
- execution_id="exec-1",
- )
- )
-
- event = await wait_task
-
- assert event is not None
- assert event.type == EventType.EXECUTION_COMPLETED
-
-
-# === OutcomeAggregator Tests ===
-
-
-class TestOutcomeAggregator:
- """Tests for OutcomeAggregator."""
-
- def test_record_decision(self, sample_goal):
- """Test recording decisions."""
- aggregator = OutcomeAggregator(sample_goal)
-
- from framework.schemas.decision import Decision, DecisionType
-
- decision = Decision(
- id="dec-1",
- node_id="process-webhook",
- intent="Process incoming webhook",
- decision_type=DecisionType.PATH_CHOICE,
- options=[],
- chosen_option_id="opt-1",
- reasoning="Standard processing path",
- )
-
- aggregator.record_decision("webhook", "exec-1", decision)
-
- assert aggregator._total_decisions == 1
- assert len(aggregator._decisions) == 1
-
- @pytest.mark.asyncio
- async def test_evaluate_goal_progress(self, sample_goal):
- """Test goal progress evaluation."""
- aggregator = OutcomeAggregator(sample_goal)
-
- progress = await aggregator.evaluate_goal_progress()
-
- assert "overall_progress" in progress
- assert "criteria_status" in progress
- assert "constraint_violations" in progress
- assert "recommendation" in progress
-
- def test_record_constraint_violation(self, sample_goal):
- """Test recording constraint violations."""
- aggregator = OutcomeAggregator(sample_goal)
-
- aggregator.record_constraint_violation(
- constraint_id="c-1",
- description="Rate limit exceeded",
- violation_details="More than 100 requests/minute",
- stream_id="webhook",
- execution_id="exec-1",
- )
-
- assert len(aggregator._constraint_violations) == 1
- assert aggregator._constraint_violations[0].constraint_id == "c-1"
-
-
-# === AgentRuntime Tests ===
-
-
-class TestAgentRuntime:
- """Tests for AgentRuntime orchestration."""
-
- def test_register_entry_point(self, sample_graph, sample_goal, temp_storage):
- """Test registering entry points."""
- runtime = AgentRuntime(
- graph=sample_graph,
- goal=sample_goal,
- storage_path=temp_storage,
- )
-
- entry_spec = EntryPointSpec(
- id="manual",
- name="Manual Trigger",
- entry_node="process-webhook",
- trigger_type="manual",
- )
-
- runtime.register_entry_point(entry_spec)
-
- assert "manual" in runtime._entry_points
- assert len(runtime.get_entry_points()) == 1
-
- def test_register_duplicate_entry_point_fails(self, sample_graph, sample_goal, temp_storage):
- """Test that duplicate entry point IDs fail."""
- runtime = AgentRuntime(
- graph=sample_graph,
- goal=sample_goal,
- storage_path=temp_storage,
- )
-
- entry_spec = EntryPointSpec(
- id="webhook",
- name="Webhook Handler",
- entry_node="process-webhook",
- trigger_type="webhook",
- )
-
- runtime.register_entry_point(entry_spec)
-
- with pytest.raises(ValueError, match="already registered"):
- runtime.register_entry_point(entry_spec)
-
- def test_register_invalid_entry_node_fails(self, sample_graph, sample_goal, temp_storage):
- """Test that invalid entry nodes fail."""
- runtime = AgentRuntime(
- graph=sample_graph,
- goal=sample_goal,
- storage_path=temp_storage,
- )
-
- entry_spec = EntryPointSpec(
- id="invalid",
- name="Invalid Entry",
- entry_node="nonexistent-node",
- trigger_type="manual",
- )
-
- with pytest.raises(ValueError, match="not found in graph"):
- runtime.register_entry_point(entry_spec)
-
- @pytest.mark.asyncio
- async def test_start_stop_lifecycle(self, sample_graph, sample_goal, temp_storage):
- """Test runtime start/stop lifecycle."""
- runtime = AgentRuntime(
- graph=sample_graph,
- goal=sample_goal,
- storage_path=temp_storage,
- )
-
- entry_spec = EntryPointSpec(
- id="webhook",
- name="Webhook Handler",
- entry_node="process-webhook",
- trigger_type="webhook",
- )
-
- runtime.register_entry_point(entry_spec)
-
- assert not runtime.is_running
-
- await runtime.start()
-
- assert runtime.is_running
- assert "webhook" in runtime._streams
-
- await runtime.stop()
-
- assert not runtime.is_running
- assert len(runtime._streams) == 0
-
- @pytest.mark.asyncio
- async def test_trigger_requires_running(self, sample_graph, sample_goal, temp_storage):
- """Test that trigger fails if runtime not running."""
- runtime = AgentRuntime(
- graph=sample_graph,
- goal=sample_goal,
- storage_path=temp_storage,
- )
-
- entry_spec = EntryPointSpec(
- id="webhook",
- name="Webhook Handler",
- entry_node="process-webhook",
- trigger_type="webhook",
- )
-
- runtime.register_entry_point(entry_spec)
-
- with pytest.raises(RuntimeError, match="not running"):
- await runtime.trigger("webhook", {"test": "data"})
-
-
-# === GraphSpec Validation Tests ===
-
-
-# === Integration Tests ===
-
-
-class TestCreateAgentRuntime:
- """Tests for the create_agent_runtime factory."""
-
- def test_create_with_entry_points(self, sample_graph, sample_goal, temp_storage):
- """Test factory creates runtime with entry points."""
- entry_points = [
- EntryPointSpec(
- id="webhook",
- name="Webhook",
- entry_node="process-webhook",
- trigger_type="webhook",
- ),
- EntryPointSpec(
- id="api",
- name="API",
- entry_node="process-api",
- trigger_type="api",
- ),
- ]
-
- runtime = create_agent_runtime(
- graph=sample_graph,
- goal=sample_goal,
- storage_path=temp_storage,
- entry_points=entry_points,
- )
-
- assert len(runtime.get_entry_points()) == 2
- assert "webhook" in runtime._entry_points
- assert "api" in runtime._entry_points
-
-
-# === Timer Entry Point Tests ===
-
-
-class TestTimerEntryPoints:
- """Tests for timer-driven entry points (interval and cron)."""
-
- @pytest.mark.asyncio
- async def test_interval_timer_starts_task(self, sample_graph, sample_goal, temp_storage):
- """Test that interval_minutes timer creates an async task."""
- runtime = AgentRuntime(
- graph=sample_graph,
- goal=sample_goal,
- storage_path=temp_storage,
- )
-
- entry_spec = EntryPointSpec(
- id="timer-interval",
- name="Interval Timer",
- entry_node="process-webhook",
- trigger_type="timer",
- trigger_config={"interval_minutes": 60},
- )
- runtime.register_entry_point(entry_spec)
-
- await runtime.start()
- try:
- assert len(runtime._timer_tasks) == 1
- assert not runtime._timer_tasks[0].done()
- # Give the async task a moment to set next_fire
- await asyncio.sleep(0.05)
- assert "timer-interval" in runtime._timer_next_fire
- finally:
- await runtime.stop()
-
- assert len(runtime._timer_tasks) == 0
-
- @pytest.mark.asyncio
- async def test_cron_timer_starts_task(self, sample_graph, sample_goal, temp_storage):
- """Test that cron expression timer creates an async task."""
- runtime = AgentRuntime(
- graph=sample_graph,
- goal=sample_goal,
- storage_path=temp_storage,
- )
-
- entry_spec = EntryPointSpec(
- id="timer-cron",
- name="Cron Timer",
- entry_node="process-webhook",
- trigger_type="timer",
- trigger_config={"cron": "*/5 * * * *"}, # Every 5 minutes
- )
- runtime.register_entry_point(entry_spec)
-
- await runtime.start()
- try:
- assert len(runtime._timer_tasks) == 1
- assert not runtime._timer_tasks[0].done()
- # Give the async task a moment to set next_fire
- await asyncio.sleep(0.05)
- assert "timer-cron" in runtime._timer_next_fire
- finally:
- await runtime.stop()
-
- @pytest.mark.asyncio
- async def test_invalid_cron_expression_skipped(
- self, sample_graph, sample_goal, temp_storage, caplog
- ):
- """Test that an invalid cron expression logs a warning and skips."""
- runtime = AgentRuntime(
- graph=sample_graph,
- goal=sample_goal,
- storage_path=temp_storage,
- )
-
- entry_spec = EntryPointSpec(
- id="timer-bad-cron",
- name="Bad Cron Timer",
- entry_node="process-webhook",
- trigger_type="timer",
- trigger_config={"cron": "not a cron expression"},
- )
- runtime.register_entry_point(entry_spec)
-
- await runtime.start()
- try:
- assert len(runtime._timer_tasks) == 0
- assert "invalid cron" in caplog.text.lower() or "Invalid cron" in caplog.text
- finally:
- await runtime.stop()
-
- @pytest.mark.asyncio
- async def test_cron_takes_priority_over_interval(
- self, sample_graph, sample_goal, temp_storage, caplog
- ):
- """Test that when both cron and interval_minutes are set, cron wins."""
- import logging
-
- runtime = AgentRuntime(
- graph=sample_graph,
- goal=sample_goal,
- storage_path=temp_storage,
- )
-
- entry_spec = EntryPointSpec(
- id="timer-both",
- name="Both Timer",
- entry_node="process-webhook",
- trigger_type="timer",
- trigger_config={"cron": "0 9 * * *", "interval_minutes": 30},
- )
- runtime.register_entry_point(entry_spec)
-
- with caplog.at_level(logging.INFO):
- await runtime.start()
- try:
- assert len(runtime._timer_tasks) == 1
- # Should log cron, not interval
- assert any("cron" in r.message.lower() for r in caplog.records)
- finally:
- await runtime.stop()
-
- @pytest.mark.asyncio
- async def test_no_interval_or_cron_warns(self, sample_graph, sample_goal, temp_storage, caplog):
- """Test that timer with neither cron nor interval_minutes logs a warning."""
- runtime = AgentRuntime(
- graph=sample_graph,
- goal=sample_goal,
- storage_path=temp_storage,
- )
-
- entry_spec = EntryPointSpec(
- id="timer-empty",
- name="Empty Timer",
- entry_node="process-webhook",
- trigger_type="timer",
- trigger_config={},
- )
- runtime.register_entry_point(entry_spec)
-
- await runtime.start()
- try:
- assert len(runtime._timer_tasks) == 0
- assert "no 'cron' or valid 'interval_minutes'" in caplog.text
- finally:
- await runtime.stop()
-
- @pytest.mark.asyncio
- async def test_cron_immediate_fires_first(self, sample_graph, sample_goal, temp_storage):
- """Test that run_immediately=True with cron doesn't set next_fire before first run."""
- runtime = AgentRuntime(
- graph=sample_graph,
- goal=sample_goal,
- storage_path=temp_storage,
- )
-
- entry_spec = EntryPointSpec(
- id="timer-cron-immediate",
- name="Cron Immediate",
- entry_node="process-webhook",
- trigger_type="timer",
- trigger_config={"cron": "0 0 * * *", "run_immediately": True},
- )
- runtime.register_entry_point(entry_spec)
-
- await runtime.start()
- try:
- assert len(runtime._timer_tasks) == 1
- # With run_immediately, the task enters the while loop directly,
- # so _timer_next_fire is NOT set before the first trigger attempt
- # (it pops it at the top of the loop)
- # Give it a moment to start executing
- await asyncio.sleep(0.05)
- # Task should still be running (it will try to trigger and likely fail
- # since there's no LLM, but the task itself continues)
- assert not runtime._timer_tasks[0].done()
- finally:
- await runtime.stop()
-
-
-# === Cancel All Tasks Tests ===
-
-
-class TestCancelAllTasks:
- """Tests for cancel_all_tasks and cancel_all_tasks_async."""
-
- @pytest.mark.asyncio
- async def test_cancel_all_tasks_async_returns_false_when_no_tasks(
- self, sample_graph, sample_goal, temp_storage
- ):
- """Test that cancel_all_tasks_async returns False with no running tasks."""
- runtime = AgentRuntime(
- graph=sample_graph,
- goal=sample_goal,
- storage_path=temp_storage,
- )
-
- entry_spec = EntryPointSpec(
- id="webhook",
- name="Webhook",
- entry_node="process-webhook",
- trigger_type="webhook",
- )
- runtime.register_entry_point(entry_spec)
- await runtime.start()
-
- try:
- result = await runtime.cancel_all_tasks_async()
- assert result is False
- finally:
- await runtime.stop()
-
- @pytest.mark.asyncio
- async def test_cancel_all_tasks_async_cancels_running_task(
- self, sample_graph, sample_goal, temp_storage
- ):
- """Test that cancel_all_tasks_async cancels a running task and returns True."""
- runtime = AgentRuntime(
- graph=sample_graph,
- goal=sample_goal,
- storage_path=temp_storage,
- )
-
- entry_spec = EntryPointSpec(
- id="webhook",
- name="Webhook",
- entry_node="process-webhook",
- trigger_type="webhook",
- )
- runtime.register_entry_point(entry_spec)
- await runtime.start()
-
- try:
- # Inject a fake running task into the stream
- stream = runtime._streams["webhook"]
-
- async def hang_forever():
- await asyncio.get_event_loop().create_future()
-
- fake_task = asyncio.ensure_future(hang_forever())
- stream._execution_tasks["fake-exec"] = fake_task
-
- result = await runtime.cancel_all_tasks_async()
- assert result is True
-
- # Let the CancelledError propagate
- try:
- await fake_task
- except asyncio.CancelledError:
- pass
- assert fake_task.cancelled()
-
- # Clean up
- del stream._execution_tasks["fake-exec"]
- finally:
- await runtime.stop()
-
- @pytest.mark.asyncio
- async def test_cancel_all_tasks_async_cancels_multiple_tasks_across_streams(
- self, sample_graph, sample_goal, temp_storage
- ):
- """Test that cancel_all_tasks_async cancels tasks across multiple streams."""
- runtime = AgentRuntime(
- graph=sample_graph,
- goal=sample_goal,
- storage_path=temp_storage,
- )
-
- # Register two entry points so we get two streams
- runtime.register_entry_point(
- EntryPointSpec(
- id="stream-a",
- name="Stream A",
- entry_node="process-webhook",
- trigger_type="webhook",
- )
- )
- runtime.register_entry_point(
- EntryPointSpec(
- id="stream-b",
- name="Stream B",
- entry_node="process-webhook",
- trigger_type="webhook",
- )
- )
- await runtime.start()
-
- try:
-
- async def hang_forever():
- await asyncio.get_event_loop().create_future()
-
- stream_a = runtime._streams["stream-a"]
- stream_b = runtime._streams["stream-b"]
-
- # Two tasks in stream A, one task in stream B
- task_a1 = asyncio.ensure_future(hang_forever())
- task_a2 = asyncio.ensure_future(hang_forever())
- task_b1 = asyncio.ensure_future(hang_forever())
-
- stream_a._execution_tasks["exec-a1"] = task_a1
- stream_a._execution_tasks["exec-a2"] = task_a2
- stream_b._execution_tasks["exec-b1"] = task_b1
-
- result = await runtime.cancel_all_tasks_async()
- assert result is True
-
- # Let CancelledErrors propagate
- for task in [task_a1, task_a2, task_b1]:
- try:
- await task
- except asyncio.CancelledError:
- pass
- assert task.cancelled()
-
- # Clean up
- del stream_a._execution_tasks["exec-a1"]
- del stream_a._execution_tasks["exec-a2"]
- del stream_b._execution_tasks["exec-b1"]
- finally:
- await runtime.stop()
-
-
-if __name__ == "__main__":
- pytest.main([__file__, "-v"])
diff --git a/core/framework/runtime/tests/test_idempotency.py b/core/framework/runtime/tests/test_idempotency.py
deleted file mode 100644
index 713e037b..00000000
--- a/core/framework/runtime/tests/test_idempotency.py
+++ /dev/null
@@ -1,268 +0,0 @@
-"""Tests for webhook idempotency key support in AgentRuntime.trigger()."""
-
-import asyncio
-import time
-from collections import OrderedDict
-from unittest.mock import AsyncMock, MagicMock
-
-import pytest
-
-from framework.runtime.agent_runtime import AgentRuntime, AgentRuntimeConfig
-
-
-def _make_runtime(ttl=300.0, max_keys=10000):
- """Create a minimal AgentRuntime with idempotency cache attributes.
-
- Uses ``object.__new__`` to skip ``__init__`` and its heavy dependencies
- (storage, LLM, skills) — we only need the cache and config for these tests.
- """
- runtime = object.__new__(AgentRuntime)
- runtime._config = AgentRuntimeConfig(idempotency_ttl_seconds=ttl, idempotency_max_keys=max_keys)
- runtime._running = True
- runtime._lock = asyncio.Lock()
- runtime._idempotency_keys = OrderedDict()
- runtime._idempotency_times = {}
- runtime._graphs = {}
- runtime._active_graph_id = "primary"
- runtime._graph_id = "primary"
- runtime._streams = {}
- runtime._entry_points = {}
- return runtime
-
-
-def _make_runtime_with_stream(ttl=300.0, max_keys=10000):
- """Create a mock runtime whose stream.execute() returns unique IDs."""
- runtime = _make_runtime(ttl=ttl, max_keys=max_keys)
-
- call_count = 0
-
- async def _fake_execute(*args, **kwargs):
- nonlocal call_count
- call_count += 1
- return f"session-{call_count:04d}"
-
- stream = MagicMock()
- stream.execute = _fake_execute
- runtime._streams = {"webhook": stream}
- runtime._entry_points = {"webhook": MagicMock()}
- return runtime
-
-
-class TestIdempotencyConfig:
- """Verify idempotency configuration defaults."""
-
- def test_default_ttl(self):
- config = AgentRuntimeConfig()
- assert config.idempotency_ttl_seconds == 300.0
-
- def test_default_max_keys(self):
- config = AgentRuntimeConfig()
- assert config.idempotency_max_keys == 10000
-
- def test_custom_config(self):
- config = AgentRuntimeConfig(idempotency_ttl_seconds=60.0, idempotency_max_keys=100)
- assert config.idempotency_ttl_seconds == 60.0
- assert config.idempotency_max_keys == 100
-
-
-class TestIdempotencyCache:
- """Test the idempotency cache and pruning logic directly."""
-
- def test_cache_stores_and_retrieves_key(self):
- runtime = _make_runtime()
- runtime._idempotency_keys["stripe-evt-123"] = "exec-001"
- runtime._idempotency_times["stripe-evt-123"] = time.time()
-
- assert runtime._idempotency_keys.get("stripe-evt-123") == "exec-001"
-
- def test_cache_returns_none_for_unknown_key(self):
- runtime = _make_runtime()
- assert runtime._idempotency_keys.get("unknown") is None
-
- def test_prune_removes_expired_keys(self):
- runtime = _make_runtime(ttl=0.1)
-
- runtime._idempotency_keys["old-key"] = "exec-old"
- runtime._idempotency_times["old-key"] = time.time() - 1.0 # expired
-
- runtime._prune_idempotency_keys()
-
- assert "old-key" not in runtime._idempotency_keys
- assert "old-key" not in runtime._idempotency_times
-
- def test_prune_keeps_fresh_keys(self):
- runtime = _make_runtime(ttl=300.0)
-
- runtime._idempotency_keys["fresh-key"] = "exec-fresh"
- runtime._idempotency_times["fresh-key"] = time.time()
-
- runtime._prune_idempotency_keys()
-
- assert "fresh-key" in runtime._idempotency_keys
-
- def test_prune_respects_max_keys(self):
- runtime = _make_runtime(max_keys=2)
-
- for i in range(3):
- key = f"key-{i}"
- runtime._idempotency_keys[key] = f"exec-{i}"
- runtime._idempotency_times[key] = time.time()
-
- runtime._prune_idempotency_keys()
-
- assert len(runtime._idempotency_keys) == 2
- # Oldest (key-0) should be evicted
- assert "key-0" not in runtime._idempotency_keys
- assert "key-1" in runtime._idempotency_keys
- assert "key-2" in runtime._idempotency_keys
-
- def test_prune_evicts_fifo(self):
- runtime = _make_runtime(max_keys=1)
-
- runtime._idempotency_keys["first"] = "exec-1"
- runtime._idempotency_times["first"] = time.time()
- runtime._idempotency_keys["second"] = "exec-2"
- runtime._idempotency_times["second"] = time.time()
-
- runtime._prune_idempotency_keys()
-
- assert len(runtime._idempotency_keys) == 1
- assert "second" in runtime._idempotency_keys
- assert "first" not in runtime._idempotency_keys
-
- def test_mixed_expired_and_max_size(self):
- runtime = _make_runtime(ttl=0.1, max_keys=2)
-
- # Add expired key
- runtime._idempotency_keys["expired"] = "exec-e"
- runtime._idempotency_times["expired"] = time.time() - 1.0
-
- # Add fresh keys
- runtime._idempotency_keys["fresh-1"] = "exec-f1"
- runtime._idempotency_times["fresh-1"] = time.time()
- runtime._idempotency_keys["fresh-2"] = "exec-f2"
- runtime._idempotency_times["fresh-2"] = time.time()
-
- runtime._prune_idempotency_keys()
-
- assert "expired" not in runtime._idempotency_keys
- assert "fresh-1" in runtime._idempotency_keys
- assert "fresh-2" in runtime._idempotency_keys
-
-
-class TestTriggerIdempotency:
- """Tests for trigger() idempotency deduplication."""
-
- def test_trigger_accepts_idempotency_key(self):
- """trigger() accepts idempotency_key as a keyword argument."""
- import inspect
-
- sig = inspect.signature(AgentRuntime.trigger)
- assert "idempotency_key" in sig.parameters
-
- def test_idempotency_key_defaults_to_none(self):
- """idempotency_key defaults to None (backward compatible)."""
- import inspect
-
- sig = inspect.signature(AgentRuntime.trigger)
- assert sig.parameters["idempotency_key"].default is None
-
- def test_trigger_and_wait_accepts_idempotency_key(self):
- """trigger_and_wait() also accepts idempotency_key."""
- import inspect
-
- sig = inspect.signature(AgentRuntime.trigger_and_wait)
- assert "idempotency_key" in sig.parameters
-
- def test_trigger_and_wait_idempotency_key_defaults_to_none(self):
- """trigger_and_wait() idempotency_key defaults to None."""
- import inspect
-
- sig = inspect.signature(AgentRuntime.trigger_and_wait)
- assert sig.parameters["idempotency_key"].default is None
-
- @pytest.mark.asyncio
- async def test_duplicate_key_returns_cached_id(self):
- """Same idempotency key within TTL returns the cached execution ID."""
- runtime = _make_runtime_with_stream()
-
- first = await runtime.trigger("webhook", {}, idempotency_key="stripe-evt-001")
- second = await runtime.trigger("webhook", {}, idempotency_key="stripe-evt-001")
-
- assert first == second
- assert first == "session-0001"
-
- @pytest.mark.asyncio
- async def test_different_keys_produce_different_ids(self):
- """Different idempotency keys start separate executions."""
- runtime = _make_runtime_with_stream()
-
- id_a = await runtime.trigger("webhook", {}, idempotency_key="evt-aaa")
- id_b = await runtime.trigger("webhook", {}, idempotency_key="evt-bbb")
-
- assert id_a != id_b
- assert id_a == "session-0001"
- assert id_b == "session-0002"
-
- @pytest.mark.asyncio
- async def test_none_key_always_starts_new_execution(self):
- """key=None (default) skips dedup — every call starts fresh."""
- runtime = _make_runtime_with_stream()
-
- id_1 = await runtime.trigger("webhook", {})
- id_2 = await runtime.trigger("webhook", {})
-
- assert id_1 != id_2
- assert len(runtime._idempotency_keys) == 0 # nothing cached
-
- @pytest.mark.asyncio
- async def test_expired_key_allows_new_execution(self):
- """After TTL expires, the same key starts a new execution."""
- runtime = _make_runtime_with_stream(ttl=0.1)
-
- first = await runtime.trigger("webhook", {}, idempotency_key="evt-expire")
-
- # Backdate the cached timestamp so the key looks expired
- runtime._idempotency_times["evt-expire"] = time.time() - 1.0
-
- second = await runtime.trigger("webhook", {}, idempotency_key="evt-expire")
-
- assert first != second
- assert first == "session-0001"
- assert second == "session-0002"
-
- @pytest.mark.asyncio
- async def test_stream_not_found_does_not_cache(self):
- """If entry point doesn't exist, nothing is cached."""
- runtime = _make_runtime_with_stream()
-
- with pytest.raises(ValueError, match="not found"):
- await runtime.trigger("nonexistent", {}, idempotency_key="evt-orphan")
-
- assert "evt-orphan" not in runtime._idempotency_keys
-
- @pytest.mark.asyncio
- async def test_execute_error_does_not_cache(self):
- """If stream.execute() raises, nothing is cached so retries can go through."""
- runtime = _make_runtime()
-
- failing_stream = MagicMock()
- failing_stream.execute = AsyncMock(side_effect=RuntimeError("stream not running"))
- runtime._streams = {"webhook": failing_stream}
- runtime._entry_points = {"webhook": MagicMock()}
-
- with pytest.raises(RuntimeError, match="stream not running"):
- await runtime.trigger("webhook", {}, idempotency_key="evt-123")
-
- assert "evt-123" not in runtime._idempotency_keys
-
- @pytest.mark.asyncio
- async def test_cache_holds_real_execution_id(self):
- """Cached value matches the actual execution ID from execute()."""
- runtime = _make_runtime_with_stream()
-
- exec_id = await runtime.trigger("webhook", {}, idempotency_key="evt-real")
-
- cached = runtime._idempotency_keys.get("evt-real")
- assert cached == exec_id
- assert cached == "session-0001"
diff --git a/core/framework/runtime/tests/test_runtime_logging_paths.py b/core/framework/runtime/tests/test_runtime_logging_paths.py
deleted file mode 100644
index 3eb60ce0..00000000
--- a/core/framework/runtime/tests/test_runtime_logging_paths.py
+++ /dev/null
@@ -1,29 +0,0 @@
-"""Tests for custom session-backed runtime logging paths."""
-
-from pathlib import Path
-from unittest.mock import MagicMock
-
-from framework.graph.executor import GraphExecutor
-from framework.runtime.runtime_log_store import RuntimeLogStore
-from framework.runtime.runtime_logger import RuntimeLogger
-
-
-def test_graph_executor_uses_custom_session_dir_name_for_runtime_logs():
- executor = GraphExecutor(
- runtime=MagicMock(),
- storage_path=Path("/tmp/test-agent/sessions/my-custom-session"),
- )
-
- assert executor._get_runtime_log_session_id() == "my-custom-session"
-
-
-def test_runtime_logger_creates_session_log_dir_for_custom_session_id(tmp_path):
- base = tmp_path / ".hive" / "agents" / "test_agent"
- base.mkdir(parents=True)
- store = RuntimeLogStore(base)
- logger = RuntimeLogger(store=store, agent_id="test-agent")
-
- run_id = logger.start_run(goal_id="goal-1", session_id="my-custom-session")
-
- assert run_id == "my-custom-session"
- assert (base / "sessions" / "my-custom-session" / "logs").is_dir()
diff --git a/core/framework/runtime/tests/test_webhook_server.py b/core/framework/runtime/tests/test_webhook_server.py
deleted file mode 100644
index 13d2628c..00000000
--- a/core/framework/runtime/tests/test_webhook_server.py
+++ /dev/null
@@ -1,716 +0,0 @@
-"""
-Tests for WebhookServer and event-driven entry points.
-"""
-
-import asyncio
-import hashlib
-import hmac as hmac_mod
-import json
-import tempfile
-from pathlib import Path
-from unittest.mock import patch
-
-import aiohttp
-import pytest
-
-from framework.runtime.agent_runtime import AgentRuntime, AgentRuntimeConfig
-from framework.runtime.event_bus import AgentEvent, EventBus, EventType
-from framework.runtime.execution_stream import EntryPointSpec
-from framework.runtime.webhook_server import (
- WebhookRoute,
- WebhookServer,
- WebhookServerConfig,
-)
-
-
-def _make_server(event_bus: EventBus, routes: list[WebhookRoute] | None = None):
- """Helper to create a WebhookServer with port=0 for OS-assigned port."""
- config = WebhookServerConfig(host="127.0.0.1", port=0)
- server = WebhookServer(event_bus, config)
- for route in routes or []:
- server.add_route(route)
- return server
-
-
-def _base_url(server: WebhookServer) -> str:
- """Get the base URL for a running server."""
- return f"http://127.0.0.1:{server.port}"
-
-
-class TestWebhookServerLifecycle:
- """Tests for server start/stop."""
-
- @pytest.mark.asyncio
- async def test_start_stop(self):
- bus = EventBus()
- server = _make_server(
- bus,
- [
- WebhookRoute(source_id="test", path="/webhooks/test", methods=["POST"]),
- ],
- )
-
- await server.start()
- assert server.is_running
- assert server.port is not None
-
- await server.stop()
- assert not server.is_running
- assert server.port is None
-
- @pytest.mark.asyncio
- async def test_no_routes_skips_start(self):
- bus = EventBus()
- server = _make_server(bus) # no routes
-
- await server.start()
- assert not server.is_running
-
- @pytest.mark.asyncio
- async def test_stop_when_not_started(self):
- bus = EventBus()
- server = _make_server(bus)
-
- # Should be a no-op, not raise
- await server.stop()
- assert not server.is_running
-
-
-class TestWebhookEventPublishing:
- """Tests for HTTP request -> EventBus event publishing."""
-
- @pytest.mark.asyncio
- async def test_post_publishes_webhook_received(self):
- bus = EventBus()
- received = []
-
- async def handler(event):
- received.append(event)
-
- bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
-
- server = _make_server(
- bus,
- [
- WebhookRoute(source_id="gh", path="/webhooks/github", methods=["POST"]),
- ],
- )
- await server.start()
-
- try:
- async with aiohttp.ClientSession() as session:
- async with session.post(
- f"{_base_url(server)}/webhooks/github",
- json={"action": "opened", "number": 42},
- ) as resp:
- assert resp.status == 202
- body = await resp.json()
- assert body["status"] == "accepted"
-
- # Give event bus time to dispatch
- await asyncio.sleep(0.05)
-
- assert len(received) == 1
- event = received[0]
- assert event.type == EventType.WEBHOOK_RECEIVED
- assert event.stream_id == "gh"
- assert event.data["path"] == "/webhooks/github"
- assert event.data["method"] == "POST"
- assert event.data["payload"] == {"action": "opened", "number": 42}
- assert isinstance(event.data["headers"], dict)
- assert event.data["query_params"] == {}
- finally:
- await server.stop()
-
- @pytest.mark.asyncio
- async def test_query_params_included(self):
- bus = EventBus()
- received = []
-
- async def handler(event):
- received.append(event)
-
- bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
-
- server = _make_server(
- bus,
- [
- WebhookRoute(source_id="hook", path="/webhooks/hook", methods=["POST"]),
- ],
- )
- await server.start()
-
- try:
- async with aiohttp.ClientSession() as session:
- async with session.post(
- f"{_base_url(server)}/webhooks/hook?source=test&v=2",
- json={"data": "hello"},
- ) as resp:
- assert resp.status == 202
-
- await asyncio.sleep(0.05)
-
- assert len(received) == 1
- assert received[0].data["query_params"] == {"source": "test", "v": "2"}
- finally:
- await server.stop()
-
- @pytest.mark.asyncio
- async def test_non_json_body(self):
- bus = EventBus()
- received = []
-
- async def handler(event):
- received.append(event)
-
- bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
-
- server = _make_server(
- bus,
- [
- WebhookRoute(source_id="raw", path="/webhooks/raw", methods=["POST"]),
- ],
- )
- await server.start()
-
- try:
- async with aiohttp.ClientSession() as session:
- async with session.post(
- f"{_base_url(server)}/webhooks/raw",
- data=b"plain text body",
- headers={"Content-Type": "text/plain"},
- ) as resp:
- assert resp.status == 202
-
- await asyncio.sleep(0.05)
-
- assert len(received) == 1
- assert received[0].data["payload"] == {"raw_body": "plain text body"}
- finally:
- await server.stop()
-
- @pytest.mark.asyncio
- async def test_empty_body(self):
- bus = EventBus()
- received = []
-
- async def handler(event):
- received.append(event)
-
- bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
-
- server = _make_server(
- bus,
- [
- WebhookRoute(source_id="empty", path="/webhooks/empty", methods=["POST"]),
- ],
- )
- await server.start()
-
- try:
- async with aiohttp.ClientSession() as session:
- async with session.post(f"{_base_url(server)}/webhooks/empty") as resp:
- assert resp.status == 202
-
- await asyncio.sleep(0.05)
-
- assert len(received) == 1
- assert received[0].data["payload"] == {}
- finally:
- await server.stop()
-
- @pytest.mark.asyncio
- async def test_multiple_routes(self):
- bus = EventBus()
- received = []
-
- async def handler(event):
- received.append(event)
-
- bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
-
- server = _make_server(
- bus,
- [
- WebhookRoute(source_id="a", path="/webhooks/a", methods=["POST"]),
- WebhookRoute(source_id="b", path="/webhooks/b", methods=["POST"]),
- ],
- )
- await server.start()
-
- try:
- async with aiohttp.ClientSession() as session:
- async with session.post(
- f"{_base_url(server)}/webhooks/a", json={"from": "a"}
- ) as resp:
- assert resp.status == 202
-
- async with session.post(
- f"{_base_url(server)}/webhooks/b", json={"from": "b"}
- ) as resp:
- assert resp.status == 202
-
- await asyncio.sleep(0.05)
-
- assert len(received) == 2
- stream_ids = {e.stream_id for e in received}
- assert stream_ids == {"a", "b"}
- finally:
- await server.stop()
-
- @pytest.mark.asyncio
- async def test_filter_stream_subscription(self):
- """Subscribers can filter by stream_id (source_id)."""
- bus = EventBus()
- a_events = []
- b_events = []
-
- async def handle_a(event):
- a_events.append(event)
-
- async def handle_b(event):
- b_events.append(event)
-
- bus.subscribe([EventType.WEBHOOK_RECEIVED], handle_a, filter_stream="a")
- bus.subscribe([EventType.WEBHOOK_RECEIVED], handle_b, filter_stream="b")
-
- server = _make_server(
- bus,
- [
- WebhookRoute(source_id="a", path="/webhooks/a", methods=["POST"]),
- WebhookRoute(source_id="b", path="/webhooks/b", methods=["POST"]),
- ],
- )
- await server.start()
-
- try:
- async with aiohttp.ClientSession() as session:
- await session.post(f"{_base_url(server)}/webhooks/a", json={"x": 1})
- await session.post(f"{_base_url(server)}/webhooks/b", json={"x": 2})
-
- await asyncio.sleep(0.05)
-
- assert len(a_events) == 1
- assert a_events[0].data["payload"] == {"x": 1}
- assert len(b_events) == 1
- assert b_events[0].data["payload"] == {"x": 2}
- finally:
- await server.stop()
-
-
-class TestHMACVerification:
- """Tests for HMAC-SHA256 signature verification."""
-
- @pytest.mark.asyncio
- async def test_valid_signature_accepted(self):
- bus = EventBus()
- received = []
-
- async def handler(event):
- received.append(event)
-
- bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
-
- secret = "test-secret-key"
- server = _make_server(
- bus,
- [
- WebhookRoute(
- source_id="secure",
- path="/webhooks/secure",
- methods=["POST"],
- secret=secret,
- ),
- ],
- )
- await server.start()
-
- try:
- body = json.dumps({"event": "push"}).encode()
- sig = hmac_mod.new(secret.encode(), body, hashlib.sha256).hexdigest()
-
- async with aiohttp.ClientSession() as session:
- async with session.post(
- f"{_base_url(server)}/webhooks/secure",
- data=body,
- headers={
- "Content-Type": "application/json",
- "X-Hub-Signature-256": f"sha256={sig}",
- },
- ) as resp:
- assert resp.status == 202
-
- await asyncio.sleep(0.05)
- assert len(received) == 1
- finally:
- await server.stop()
-
- @pytest.mark.asyncio
- async def test_invalid_signature_rejected(self):
- bus = EventBus()
- received = []
-
- async def handler(event):
- received.append(event)
-
- bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
-
- server = _make_server(
- bus,
- [
- WebhookRoute(
- source_id="secure",
- path="/webhooks/secure",
- methods=["POST"],
- secret="real-secret",
- ),
- ],
- )
- await server.start()
-
- try:
- async with aiohttp.ClientSession() as session:
- async with session.post(
- f"{_base_url(server)}/webhooks/secure",
- json={"event": "push"},
- headers={"X-Hub-Signature-256": "sha256=invalidsignature"},
- ) as resp:
- assert resp.status == 401
-
- await asyncio.sleep(0.05)
- assert len(received) == 0 # No event published
- finally:
- await server.stop()
-
- @pytest.mark.asyncio
- async def test_missing_signature_rejected(self):
- bus = EventBus()
- received = []
-
- async def handler(event):
- received.append(event)
-
- bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
-
- server = _make_server(
- bus,
- [
- WebhookRoute(
- source_id="secure",
- path="/webhooks/secure",
- methods=["POST"],
- secret="my-secret",
- ),
- ],
- )
- await server.start()
-
- try:
- async with aiohttp.ClientSession() as session:
- # No X-Hub-Signature-256 header
- async with session.post(
- f"{_base_url(server)}/webhooks/secure",
- json={"event": "push"},
- ) as resp:
- assert resp.status == 401
-
- await asyncio.sleep(0.05)
- assert len(received) == 0
- finally:
- await server.stop()
-
- @pytest.mark.asyncio
- async def test_no_secret_skips_verification(self):
- """Routes without a secret accept any request."""
- bus = EventBus()
- received = []
-
- async def handler(event):
- received.append(event)
-
- bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
-
- server = _make_server(
- bus,
- [
- WebhookRoute(
- source_id="open",
- path="/webhooks/open",
- methods=["POST"],
- secret=None,
- ),
- ],
- )
- await server.start()
-
- try:
- async with aiohttp.ClientSession() as session:
- async with session.post(
- f"{_base_url(server)}/webhooks/open",
- json={"data": "test"},
- ) as resp:
- assert resp.status == 202
-
- await asyncio.sleep(0.05)
- assert len(received) == 1
- finally:
- await server.stop()
-
-
-class TestEventDrivenEntryPoints:
- """Tests for event-driven entry points wired through AgentRuntime."""
-
- def _make_graph_and_goal(self):
- """Minimal graph + goal for testing entry point triggering."""
- from framework.graph import Goal
- from framework.graph.edge import GraphSpec
- from framework.graph.goal import SuccessCriterion
- from framework.graph.node import NodeSpec
-
- nodes = [
- NodeSpec(
- id="process-event",
- name="Process Event",
- description="Process incoming event",
- node_type="event_loop",
- input_keys=["event"],
- output_keys=["result"],
- ),
- ]
- graph = GraphSpec(
- id="test-graph",
- goal_id="test-goal",
- version="1.0.0",
- entry_node="process-event",
- entry_points={"start": "process-event"},
- terminal_nodes=[],
- pause_nodes=[],
- nodes=nodes,
- edges=[],
- )
- goal = Goal(
- id="test-goal",
- name="Test Goal",
- description="Test",
- success_criteria=[
- SuccessCriterion(
- id="sc-1",
- description="Done",
- metric="done",
- target="yes",
- weight=1.0,
- ),
- ],
- )
- return graph, goal
-
- @pytest.mark.asyncio
- async def test_event_entry_point_subscribes_to_bus(self):
- """Entry point with trigger_type='event' subscribes and triggers on matching events."""
- graph, goal = self._make_graph_and_goal()
-
- config = AgentRuntimeConfig(
- webhook_host="127.0.0.1",
- webhook_port=0,
- webhook_routes=[
- {"source_id": "gh", "path": "/webhooks/github"},
- ],
- )
-
- with tempfile.TemporaryDirectory() as tmpdir:
- runtime = AgentRuntime(
- graph=graph,
- goal=goal,
- storage_path=Path(tmpdir),
- config=config,
- )
-
- runtime.register_entry_point(
- EntryPointSpec(
- id="gh-handler",
- name="GitHub Handler",
- entry_node="process-event",
- trigger_type="event",
- trigger_config={
- "event_types": ["webhook_received"],
- "filter_stream": "gh",
- },
- )
- )
-
- trigger_calls = []
-
- async def mock_trigger(ep_id, data, **kwargs):
- trigger_calls.append((ep_id, data))
-
- with patch.object(runtime, "trigger", side_effect=mock_trigger):
- await runtime.start()
-
- try:
- assert runtime.webhook_server is not None
- assert runtime.webhook_server.is_running
-
- port = runtime.webhook_server.port
- async with aiohttp.ClientSession() as session:
- async with session.post(
- f"http://127.0.0.1:{port}/webhooks/github",
- json={"action": "push", "ref": "main"},
- ) as resp:
- assert resp.status == 202
-
- await asyncio.sleep(0.1)
-
- assert len(trigger_calls) == 1
- ep_id, data = trigger_calls[0]
- assert ep_id == "gh-handler"
- assert "event" in data
- assert data["event"]["type"] == "webhook_received"
- assert data["event"]["stream_id"] == "gh"
- assert data["event"]["data"]["payload"] == {
- "action": "push",
- "ref": "main",
- }
- finally:
- await runtime.stop()
-
- assert runtime.webhook_server is None
-
- @pytest.mark.asyncio
- async def test_event_entry_point_filter_stream(self):
- """Entry point only triggers for matching stream_id (source_id)."""
- graph, goal = self._make_graph_and_goal()
-
- config = AgentRuntimeConfig(
- webhook_routes=[
- {"source_id": "github", "path": "/webhooks/github"},
- {"source_id": "stripe", "path": "/webhooks/stripe"},
- ],
- webhook_port=0,
- )
-
- with tempfile.TemporaryDirectory() as tmpdir:
- runtime = AgentRuntime(
- graph=graph,
- goal=goal,
- storage_path=Path(tmpdir),
- config=config,
- )
-
- runtime.register_entry_point(
- EntryPointSpec(
- id="gh-only",
- name="GitHub Only",
- entry_node="process-event",
- trigger_type="event",
- trigger_config={
- "event_types": ["webhook_received"],
- "filter_stream": "github",
- },
- )
- )
-
- trigger_calls = []
-
- async def mock_trigger(ep_id, data, **kwargs):
- trigger_calls.append((ep_id, data))
-
- with patch.object(runtime, "trigger", side_effect=mock_trigger):
- await runtime.start()
-
- try:
- port = runtime.webhook_server.port
- async with aiohttp.ClientSession() as session:
- # POST to stripe — should NOT trigger
- await session.post(
- f"http://127.0.0.1:{port}/webhooks/stripe",
- json={"type": "payment"},
- )
- # POST to github — should trigger
- await session.post(
- f"http://127.0.0.1:{port}/webhooks/github",
- json={"action": "opened"},
- )
-
- await asyncio.sleep(0.1)
-
- assert len(trigger_calls) == 1
- assert trigger_calls[0][0] == "gh-only"
- finally:
- await runtime.stop()
-
- @pytest.mark.asyncio
- async def test_no_webhook_routes_skips_server(self):
- """Runtime without webhook_routes does not start a webhook server."""
- graph, goal = self._make_graph_and_goal()
-
- with tempfile.TemporaryDirectory() as tmpdir:
- runtime = AgentRuntime(
- graph=graph,
- goal=goal,
- storage_path=Path(tmpdir),
- )
-
- runtime.register_entry_point(
- EntryPointSpec(
- id="manual",
- name="Manual",
- entry_node="process-event",
- trigger_type="manual",
- )
- )
-
- await runtime.start()
- try:
- assert runtime.webhook_server is None
- finally:
- await runtime.stop()
-
- @pytest.mark.asyncio
- async def test_event_entry_point_custom_event(self):
- """Entry point can subscribe to CUSTOM events, not just webhooks."""
- graph, goal = self._make_graph_and_goal()
-
- with tempfile.TemporaryDirectory() as tmpdir:
- runtime = AgentRuntime(
- graph=graph,
- goal=goal,
- storage_path=Path(tmpdir),
- )
-
- runtime.register_entry_point(
- EntryPointSpec(
- id="custom-handler",
- name="Custom Handler",
- entry_node="process-event",
- trigger_type="event",
- trigger_config={
- "event_types": ["custom"],
- },
- )
- )
-
- trigger_calls = []
-
- async def mock_trigger(ep_id, data, **kwargs):
- trigger_calls.append((ep_id, data))
-
- with patch.object(runtime, "trigger", side_effect=mock_trigger):
- await runtime.start()
-
- try:
- await runtime.event_bus.publish(
- AgentEvent(
- type=EventType.CUSTOM,
- stream_id="some-source",
- data={"key": "value"},
- )
- )
-
- await asyncio.sleep(0.1)
-
- assert len(trigger_calls) == 1
- assert trigger_calls[0][0] == "custom-handler"
- assert trigger_calls[0][1]["event"]["type"] == "custom"
- assert trigger_calls[0][1]["event"]["data"]["key"] == "value"
- finally:
- await runtime.stop()
diff --git a/core/framework/schemas/agent_config.py b/core/framework/schemas/agent_config.py
new file mode 100644
index 00000000..7c65c844
--- /dev/null
+++ b/core/framework/schemas/agent_config.py
@@ -0,0 +1,192 @@
+"""Declarative agent configuration schema.
+
+Allows defining agents via JSON/YAML config files instead of Python modules.
+The ``AgentConfig`` model is the top-level schema loaded from ``agent.json``.
+The runner detects this format by checking for a ``name`` key at the top level.
+
+Template variables
+------------------
+System prompts and identity_prompt support ``{{variable_name}}`` placeholders.
+These are resolved at load time from ``AgentConfig.variables``.
+"""
+
+from __future__ import annotations
+
+from pydantic import BaseModel, Field
+
+
+class ToolAccessConfig(BaseModel):
+ """Declarative tool access policy.
+
+ Controls which tools a node/agent has access to.
+
+ * ``all`` -- every tool from the registry.
+ * ``explicit`` -- only tools listed in ``allowed`` (default; empty = zero tools).
+ * ``none`` -- no tools at all.
+ """
+
+ policy: str = Field(
+ default="explicit",
+ description="One of: 'all', 'explicit', 'none'.",
+ )
+ allowed: list[str] = Field(
+ default_factory=list,
+ description="Tool names when policy='explicit'.",
+ )
+ denied: list[str] = Field(
+ default_factory=list,
+ description="Tool names to deny (applied after allowed).",
+ )
+
+
+class NodeConfig(BaseModel):
+ """Declarative node definition."""
+
+ id: str
+ name: str | None = None
+ description: str | None = None
+ node_type: str = Field(
+ default="event_loop",
+ description="event_loop",
+ )
+ system_prompt: str | None = None
+ tools: ToolAccessConfig = Field(default_factory=ToolAccessConfig)
+ model: str | None = None
+ input_keys: list[str] = Field(default_factory=list)
+ output_keys: list[str] = Field(default_factory=list)
+ nullable_output_keys: list[str] = Field(default_factory=list)
+ max_iterations: int = 30
+ max_node_visits: int = 1
+ client_facing: bool = False
+ success_criteria: str | None = None
+ failure_criteria: str | None = None
+ skip_judge: bool = False
+ max_retries: int | None = None
+
+
+class EdgeConfig(BaseModel):
+ """Declarative edge definition."""
+
+ from_node: str = Field(description="Source node ID.")
+ to_node: str = Field(description="Target node ID.")
+ condition: str = Field(
+ default="on_success",
+ description="always | on_success | on_failure | conditional | llm_decide",
+ )
+ condition_expr: str | None = None
+ input_mapping: dict[str, str] = Field(default_factory=dict)
+ priority: int = 1
+
+
+class GoalConfig(BaseModel):
+ """Simplified goal definition for declarative config."""
+
+ description: str
+ success_criteria: list[str] = Field(default_factory=list)
+ constraints: list[str] = Field(default_factory=list)
+
+
+class EntryPointConfig(BaseModel):
+ """Entry point configuration."""
+
+ id: str = "default"
+ name: str = "Default"
+ entry_node: str | None = None # defaults to AgentConfig.entry_node
+ trigger_type: str = Field(
+ default="manual",
+ description="manual | scheduled | timer",
+ )
+ trigger_config: dict = Field(default_factory=dict)
+ isolation_level: str = "shared"
+ max_concurrent: int | None = None
+
+
+class MCPServerRef(BaseModel):
+ """Reference to an MCP server to connect for this agent."""
+
+ name: str
+ config: dict | None = None
+
+
+class MetadataConfig(BaseModel):
+ """Agent metadata for display / intro messages."""
+
+ intro_message: str = ""
+
+
+class AgentConfig(BaseModel):
+ """Top-level declarative agent configuration.
+
+ Load from ``agent.json`` and pass to
+ :func:`framework.runner.runner.load_agent_config` to build the
+ ``GraphSpec`` + ``Goal`` pair.
+
+ Example (YAML)::
+
+ name: lead-enrichment-agent
+ version: 1.0.0
+ variables:
+ spreadsheet_id: "1ZVx..."
+ sheet_name: "contacts"
+ goal:
+ description: "Enrich leads in Google Sheets"
+ success_criteria:
+ - "All unprocessed leads enriched"
+ constraints:
+ - "Browser-only research"
+ identity_prompt: |
+ You are the Lead Enrichment Agent...
+ nodes:
+ - id: start
+ tools: {policy: explicit, allowed: [google_sheets_get_values]}
+ system_prompt: |
+ Spreadsheet ID: {{spreadsheet_id}}
+ ...
+ """
+
+ name: str
+ version: str = "1.0.0"
+ description: str | None = None
+ metadata: MetadataConfig = Field(default_factory=MetadataConfig)
+
+ # Template variables -- substituted into prompts via {{var_name}}
+ variables: dict[str, str] = Field(default_factory=dict)
+
+ # Goal
+ goal: GoalConfig
+
+ # Graph structure
+ nodes: list[NodeConfig]
+ edges: list[EdgeConfig]
+ entry_node: str
+ terminal_nodes: list[str] = Field(default_factory=list)
+ pause_nodes: list[str] = Field(default_factory=list)
+
+ # Entry points (if omitted, a single "default" manual entry is created)
+ entry_points: list[EntryPointConfig] = Field(default_factory=list)
+
+ # Agent-level tool defaults (nodes inherit unless they override)
+ tools: ToolAccessConfig = Field(default_factory=ToolAccessConfig)
+ mcp_servers: list[MCPServerRef] = Field(default_factory=list)
+
+ # LLM / execution
+ model: str | None = None
+ max_tokens: int = 4096
+ conversation_mode: str = "continuous"
+ identity_prompt: str = ""
+ loop_config: dict = Field(
+ default_factory=lambda: {
+ "max_iterations": 100,
+ "max_tool_calls_per_turn": 30,
+ "max_context_tokens": 32000,
+ },
+ )
+
+ # Pipeline overrides (per-agent, merged with global config)
+ pipeline: dict = Field(
+ default_factory=dict,
+ description="Per-agent pipeline stage overrides. Same format as global pipeline config.",
+ )
+
+ # Resource limits
+ max_cost_per_run: float | None = None
diff --git a/core/framework/schemas/session_state.py b/core/framework/schemas/session_state.py
index 7b143985..4fcecd7c 100644
--- a/core/framework/schemas/session_state.py
+++ b/core/framework/schemas/session_state.py
@@ -12,7 +12,7 @@ from typing import TYPE_CHECKING, Any
from pydantic import AliasChoices, BaseModel, Field, computed_field
if TYPE_CHECKING:
- from framework.graph.executor import ExecutionResult
+ from framework.orchestrator.orchestrator import ExecutionResult
from framework.schemas.run import Run
diff --git a/core/framework/server/app.py b/core/framework/server/app.py
index 32c2c081..e014f01d 100644
--- a/core/framework/server/app.py
+++ b/core/framework/server/app.py
@@ -28,8 +28,11 @@ def _get_allowed_agent_roots() -> tuple[Path, ...]:
"""
global _ALLOWED_AGENT_ROOTS
if _ALLOWED_AGENT_ROOTS is None:
+ from framework.config import COLONIES_DIR
+
_ALLOWED_AGENT_ROOTS = (
- (_REPO_ROOT / "exports").resolve(),
+ COLONIES_DIR.resolve(), # ~/.hive/colonies/
+ (_REPO_ROOT / "exports").resolve(), # compat fallback
(_REPO_ROOT / "examples").resolve(),
(Path.home() / ".hive" / "agents").resolve(),
)
@@ -53,7 +56,8 @@ def validate_agent_path(agent_path: str | Path) -> Path:
if resolved.is_relative_to(root) and resolved != root:
return resolved
raise ValueError(
- "agent_path must be inside an allowed directory (exports/, examples/, or ~/.hive/agents/)"
+ "agent_path must be inside an allowed directory "
+ "(~/.hive/colonies/, exports/, examples/, or ~/.hive/agents/)"
)
diff --git a/core/framework/server/queen_orchestrator.py b/core/framework/server/queen_orchestrator.py
index 43f5c5dd..4022a5ab 100644
--- a/core/framework/server/queen_orchestrator.py
+++ b/core/framework/server/queen_orchestrator.py
@@ -32,7 +32,7 @@ async def create_queen(
"""
from framework.agents.queen.agent import (
queen_goal,
- queen_graph as _queen_graph,
+ queen_loop_config as _base_loop_config,
)
from framework.agents.queen.nodes import (
_QUEEN_BUILDING_TOOLS,
@@ -65,18 +65,15 @@ async def create_queen(
_shared_building_knowledge,
)
from framework.agents.queen.nodes.thinking_hook import select_expert_persona
- from framework.graph.event_loop_node import HookContext, HookResult
- from framework.graph.executor import GraphExecutor
- from framework.runner.mcp_registry import MCPRegistry
- from framework.runner.tool_registry import ToolRegistry
- from framework.runtime.core import Runtime
- from framework.runtime.event_bus import AgentEvent, EventType
+ from framework.agent_loop.agent_loop import HookContext, HookResult
+ from framework.loader.mcp_registry import MCPRegistry
+ from framework.loader.tool_registry import ToolRegistry
+ from framework.host.event_bus import AgentEvent, EventType
from framework.tools.queen_lifecycle_tools import (
QueenPhaseState,
register_queen_lifecycle_tools,
)
- hive_home = Path.home() / ".hive"
# ---- Tool registry ------------------------------------------------
queen_registry = ToolRegistry()
@@ -194,7 +191,7 @@ async def create_queen(
phase_state.global_memory_dir = global_dir
# ---- Compose phase-specific prompts ------------------------------
- _orig_node = _queen_graph.nodes[0]
+ from framework.agents.queen.nodes import queen_node as _orig_node
if worker_identity is None:
worker_identity = (
@@ -348,61 +345,81 @@ async def create_queen(
if set(available_tools) != set(declared_tools):
missing = sorted(set(declared_tools) - registered_tool_names)
if missing:
- logger.warning("Queen: tools not available: %s", missing)
+ logger.debug("Queen: tools not yet available (registered on worker load): %s", missing)
node_updates["tools"] = available_tools
adjusted_node = _orig_node.model_copy(update=node_updates)
_queen_loop_config = {
- **(_queen_graph.loop_config or {}),
+ **_base_loop_config,
"hooks": {"session_start": [_persona_hook]},
}
- queen_graph = _queen_graph.model_copy(
- update={"nodes": [adjusted_node], "loop_config": _queen_loop_config}
- )
- # ---- Queen event loop --------------------------------------------
- queen_runtime = Runtime(hive_home / "queen")
+ # ---- Queen event loop (AgentLoop directly, no Orchestrator) -------
+ from types import SimpleNamespace
+
+ from framework.agent_loop.agent_loop import AgentLoop, LoopConfig
+ from framework.storage.conversation_store import FileConversationStore
+ from framework.orchestrator.node import DataBuffer, NodeContext
async def _queen_loop():
logger.debug("[_queen_loop] Starting queen loop for session %s", session.id)
try:
- logger.debug("[_queen_loop] Creating GraphExecutor...")
- executor = GraphExecutor(
- runtime=queen_runtime,
- llm=session.llm,
- tools=queen_tools,
- tool_executor=queen_tool_executor,
+ # Build LoopConfig from the queen graph's config + persona hook
+ lc = _queen_loop_config
+ queen_loop_config = LoopConfig(
+ max_iterations=lc.get("max_iterations", 999_999),
+ max_tool_calls_per_turn=lc.get("max_tool_calls_per_turn", 30),
+ max_context_tokens=lc.get("max_context_tokens", 180_000),
+ hooks=lc.get("hooks", {}),
+ )
+
+ # Create AgentLoop directly -- no Orchestrator, no graph traversal
+ agent_loop = AgentLoop(
event_bus=session.event_bus,
+ config=queen_loop_config,
+ tool_executor=queen_tool_executor,
+ conversation_store=FileConversationStore(queen_dir / "conversations"),
+ )
+
+ # Build NodeContext manually
+ from framework.tracker.decision_tracker import DecisionTracker
+
+ ctx = NodeContext(
+ runtime=DecisionTracker(queen_dir),
+ node_id="queen",
+ node_spec=adjusted_node,
+ buffer=DataBuffer(),
+ llm=session.llm,
+ available_tools=queen_tools,
+ goal_context=queen_goal.description,
+ max_tokens=lc.get("max_tokens", 8192),
stream_id="queen",
- storage_path=queen_dir,
- loop_config=_queen_loop_config,
execution_id=session.id,
dynamic_tools_provider=phase_state.get_current_tools,
dynamic_prompt_provider=phase_state.get_current_prompt,
iteration_metadata_provider=lambda: {"phase": phase_state.phase},
- skill_dirs=_queen_skill_dirs,
- protocols_prompt=phase_state.protocols_prompt,
skills_catalog_prompt=phase_state.skills_catalog_prompt,
+ protocols_prompt=phase_state.protocols_prompt,
+ skill_dirs=_queen_skill_dirs,
+ )
+
+ # Expose for chat handler injection (node_registry compat)
+ session.queen_executor = SimpleNamespace(
+ node_registry={"queen": agent_loop},
)
- session.queen_executor = executor
- logger.debug("[_queen_loop] GraphExecutor created and stored in session.queen_executor")
# Wire inject_notification so phase switches notify the queen LLM
async def _inject_phase_notification(content: str) -> None:
- node = executor.node_registry.get("queen")
- if node is not None and hasattr(node, "inject_event"):
- await node.inject_event(content)
+ await agent_loop.inject_event(content)
phase_state.inject_notification = _inject_phase_notification
# Auto-switch to editing when worker execution finishes.
- # The worker stays loaded — queen can tweak config and re-run.
async def _on_worker_done(event):
if event.stream_id == "queen":
return
if phase_state.phase == "running":
if event.type == EventType.EXECUTION_COMPLETED:
- # Mark worker as configured after first successful run
session.worker_configured = True
output = event.data.get("output", {})
output_summary = ""
@@ -420,7 +437,7 @@ async def create_queen(
"Ask if they want to re-run with different input "
"or tweak the configuration."
)
- else: # EXECUTION_FAILED
+ else:
error = event.data.get("error", "Unknown error")
notification = (
"[WORKER_TERMINAL] Worker failed.\n"
@@ -430,17 +447,14 @@ async def create_queen(
"building/planning if code changes are needed."
)
- node = executor.node_registry.get("queen")
- if node is not None and hasattr(node, "inject_event"):
- await node.inject_event(notification)
-
+ await agent_loop.inject_event(notification)
await phase_state.switch_to_editing(source="auto")
session.event_bus.subscribe(
event_types=[EventType.EXECUTION_COMPLETED, EventType.EXECUTION_FAILED],
handler=_on_worker_done,
)
- session_manager._subscribe_worker_handoffs(session, executor)
+ session_manager._subscribe_worker_handoffs(session, session.queen_executor)
# ---- Global memory reflection + recall -------------------------
from framework.agents.queen.reflection_agent import subscribe_reflection_triggers
@@ -459,23 +473,23 @@ async def create_queen(
len(phase_state.get_current_tools()),
[t.name for t in phase_state.get_current_tools()],
)
- logger.debug("[_queen_loop] Calling executor.execute()...")
- result = await executor.execute(
- graph=queen_graph,
- goal=queen_goal,
- input_data={"greeting": initial_prompt or "Session started."},
- session_state={"resume_session_id": session.id},
- )
- logger.debug(
- "[_queen_loop] executor.execute() returned with success=%s", result.success
- )
- if result.success:
- logger.warning("Queen executor returned (should be forever-alive)")
- else:
- logger.error(
- "Queen executor failed: %s",
- result.error or "(no error message)",
- )
+
+ # Set the first user message.
+ # When initial_prompt is None (user opens UI without ?prompt=),
+ # use a generic greeting so the queen has a user message to
+ # respond to. The user's real first question arrives via /chat.
+ ctx.input_data = {
+ "user_request": initial_prompt or "Hello",
+ }
+
+ # Run the queen -- forever-alive conversation loop
+ result = await agent_loop.execute(ctx)
+
+ if result.stop_reason == "complete":
+ logger.warning("Queen returned (should be forever-alive)")
+ elif result.error:
+ logger.error("Queen failed: %s", result.error)
+
except asyncio.CancelledError:
logger.info("[_queen_loop] Queen loop cancelled (normal shutdown)")
raise
@@ -484,7 +498,8 @@ async def create_queen(
raise
finally:
logger.warning(
- "[_queen_loop] Queen loop exiting — clearing queen_executor for session '%s'",
+ "[_queen_loop] Queen loop exiting — clearing queen_executor "
+ "for session '%s'",
session.id,
)
session.queen_executor = None
diff --git a/core/framework/server/routes_events.py b/core/framework/server/routes_events.py
index 3ef77428..f0c1616e 100644
--- a/core/framework/server/routes_events.py
+++ b/core/framework/server/routes_events.py
@@ -6,7 +6,7 @@ import logging
from aiohttp import web
from aiohttp.client_exceptions import ClientConnectionResetError as _AiohttpConnReset
-from framework.runtime.event_bus import AgentEvent, EventType
+from framework.host.event_bus import AgentEvent, EventType
from framework.server.app import resolve_session
logger = logging.getLogger(__name__)
diff --git a/core/framework/server/routes_execution.py b/core/framework/server/routes_execution.py
index f757746b..5efa9c94 100644
--- a/core/framework/server/routes_execution.py
+++ b/core/framework/server/routes_execution.py
@@ -8,7 +8,7 @@ from typing import Any
from aiohttp import web
from framework.credentials.validation import validate_agent_credentials
-from framework.graph.conversation import LEGACY_RUN_ID
+from framework.agent_loop.conversation import LEGACY_RUN_ID
from framework.server.app import resolve_session, safe_path_segment, sessions_dir
from framework.server.routes_sessions import _credential_error_response
@@ -187,7 +187,7 @@ async def handle_chat(request: web.Request) -> web.Response:
if node is not None and hasattr(node, "inject_event"):
# Publish BEFORE inject_event so handlers (e.g. memory recall)
# complete before the event loop unblocks and starts the LLM turn.
- from framework.runtime.event_bus import AgentEvent, EventType
+ from framework.host.event_bus import AgentEvent, EventType
await session.event_bus.publish(
AgentEvent(
diff --git a/core/framework/server/routes_graphs.py b/core/framework/server/routes_graphs.py
index 3b735f5f..16ce63d6 100644
--- a/core/framework/server/routes_graphs.py
+++ b/core/framework/server/routes_graphs.py
@@ -46,7 +46,7 @@ def _node_to_dict(node) -> dict:
"client_facing": node.client_facing,
"success_criteria": node.success_criteria,
"system_prompt": node.system_prompt or "",
- "sub_agents": node.sub_agents,
+ "sub_agents": getattr(node, "sub_agents", []),
}
diff --git a/core/framework/server/routes_sessions.py b/core/framework/server/routes_sessions.py
index 04f71b08..ab98b3fb 100644
--- a/core/framework/server/routes_sessions.py
+++ b/core/framework/server/routes_sessions.py
@@ -527,7 +527,7 @@ async def handle_update_trigger_task(request: web.Request) -> web.Response:
# Emit SSE event so the frontend updates the graph and detail panel
bus = getattr(session, "event_bus", None)
if bus:
- from framework.runtime.event_bus import AgentEvent, EventType
+ from framework.host.event_bus import AgentEvent, EventType
await bus.publish(
AgentEvent(
@@ -583,7 +583,9 @@ async def handle_session_events_history(request: web.Request) -> web.Response:
"""
session_id = request.match_info["session_id"]
- queen_dir = Path.home() / ".hive" / "queen" / "session" / session_id
+ from framework.server.session_manager import _queen_session_dir
+
+ queen_dir = _queen_session_dir(session_id)
events_path = queen_dir / "events.jsonl"
if not events_path.exists():
return web.json_response({"events": [], "session_id": session_id})
@@ -608,7 +610,7 @@ async def handle_session_events_history(request: web.Request) -> web.Response:
async def handle_session_history(request: web.Request) -> web.Response:
"""GET /api/sessions/history — all queen sessions on disk (live + cold).
- Returns every session directory under ~/.hive/queen/session/, newest first.
+ Returns every queen session directory on disk, newest first.
Live sessions have ``live: true, cold: false``; sessions that survived a
server restart have ``live: false, cold: true``.
"""
@@ -634,7 +636,7 @@ async def handle_delete_history_session(request: web.Request) -> web.Response:
"""DELETE /api/sessions/history/{session_id} — permanently remove a session.
Stops the live session (if still running) and deletes the queen session
- directory from disk at ~/.hive/queen/session/{session_id}/.
+ directory from disk.
This is the frontend 'delete from history' action.
"""
manager = _get_manager(request)
@@ -645,7 +647,9 @@ async def handle_delete_history_session(request: web.Request) -> web.Response:
await manager.stop_session(session_id)
# Delete the queen session directory from disk
- queen_session_dir = Path.home() / ".hive" / "queen" / "session" / session_id
+ from framework.server.session_manager import _queen_session_dir
+
+ queen_session_dir = _queen_session_dir(session_id)
if queen_session_dir.exists() and queen_session_dir.is_dir():
try:
shutil.rmtree(queen_session_dir)
@@ -743,7 +747,9 @@ async def handle_reveal_session_folder(request: web.Request) -> web.Response:
session = manager.get_session(session_id)
storage_session_id = (session.queen_resume_from or session.id) if session else session_id
- folder = Path.home() / ".hive" / "queen" / "session" / storage_session_id
+ from framework.server.session_manager import _queen_session_dir
+
+ folder = _queen_session_dir(storage_session_id)
folder.mkdir(parents=True, exist_ok=True)
try:
diff --git a/core/framework/server/session_manager.py b/core/framework/server/session_manager.py
index 25e3961f..e88ae445 100644
--- a/core/framework/server/session_manager.py
+++ b/core/framework/server/session_manager.py
@@ -19,11 +19,17 @@ from datetime import datetime
from pathlib import Path
from typing import Any
-from framework.runtime.triggers import TriggerDefinition
+from framework.config import QUEENS_DIR
+from framework.host.triggers import TriggerDefinition
logger = logging.getLogger(__name__)
+def _queen_session_dir(session_id: str, queen_name: str = "default") -> Path:
+ """Return the on-disk directory for a queen session."""
+ return QUEENS_DIR / queen_name / "sessions" / session_id
+
+
@dataclass
class Session:
"""A live session with a queen and optional worker."""
@@ -67,6 +73,10 @@ class Session:
queen_resume_from: str | None = None
# Queen session directory (set during _start_queen, used for shutdown reflection)
queen_dir: Path | None = None
+ # Multi-queen support: which queen profile this session uses
+ queen_name: str = "default"
+ # Colony name: set when a worker is loaded from a colony
+ colony_name: str | None = None
class SessionManager:
@@ -86,6 +96,14 @@ class SessionManager:
# reflections) so they aren't garbage-collected before completion.
self._background_tasks: set[asyncio.Task] = set()
+ # Run one-time v2 directory structure migration
+ from framework.storage.migrate_v2 import run_migration
+
+ try:
+ run_migration()
+ except Exception:
+ logger.warning("v2 migration failed (non-fatal)", exc_info=True)
+
# ------------------------------------------------------------------
# Session lifecycle
# ------------------------------------------------------------------
@@ -100,7 +118,7 @@ class SessionManager:
Internal helper — use create_session() or create_session_with_worker_graph().
"""
from framework.config import RuntimeConfig, get_hive_config
- from framework.runtime.event_bus import EventBus
+ from framework.host.event_bus import EventBus
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
resolved_id = session_id or f"session_{ts}_{uuid.uuid4().hex[:8]}"
@@ -194,9 +212,7 @@ class SessionManager:
# is incomplete and will fail to import).
if queen_resume_from:
_resume_phase = None
- _meta_path = (
- Path.home() / ".hive" / "queen" / "session" / queen_resume_from / "meta.json"
- )
+ _meta_path = _queen_session_dir(queen_resume_from) / "meta.json"
if _meta_path.exists():
try:
_meta = json.loads(_meta_path.read_text(encoding="utf-8"))
@@ -281,7 +297,7 @@ class SessionManager:
Sets up the runner, runtime, and session fields. Does NOT notify
the queen — callers handle that step.
"""
- from framework.runner import AgentRunner
+ from framework.loader import AgentLoader
agent_path = Path(agent_path)
resolved_graph_id = graph_id or agent_path.name
@@ -303,7 +319,7 @@ class SessionManager:
resolved_model = model or session_model or self._model
runner = await loop.run_in_executor(
None,
- lambda: AgentRunner.load(
+ lambda: AgentLoader.load(
agent_path,
model=resolved_model,
interactive=False,
@@ -536,7 +552,7 @@ class SessionManager:
# Update meta.json so cold-restore can discover this session by agent_path
storage_session_id = session.queen_resume_from or session.id
- meta_path = Path.home() / ".hive" / "queen" / "session" / storage_session_id / "meta.json"
+ meta_path = _queen_session_dir(storage_session_id, session.queen_name) / "meta.json"
try:
_agent_name = (
session.worker_info.name
@@ -644,10 +660,11 @@ class SessionManager:
task = asyncio.create_task(
asyncio.shield(run_shutdown_reflection(session.queen_dir, session.llm)),
+ name=f"shutdown-reflect-{session_id}",
)
+ logger.info("Session '%s': shutdown reflection spawned", session_id)
self._background_tasks.add(task)
task.add_done_callback(self._background_tasks.discard)
- logger.info("Session '%s': shutdown reflection spawned", session_id)
except Exception:
logger.warning(
"Session '%s': failed to spawn shutdown reflection", session_id, exc_info=True
@@ -721,7 +738,7 @@ class SessionManager:
def _subscribe_worker_handoffs(self, session: Session, executor: Any) -> None:
"""Subscribe queen to worker/subagent escalation handoff events."""
- from framework.runtime.event_bus import EventType as _ET
+ from framework.host.event_bus import EventType as _ET
if session.worker_handoff_sub is not None:
session.event_bus.unsubscribe(session.worker_handoff_sub)
@@ -755,13 +772,11 @@ class SessionManager:
session.queen_executor,
)
- hive_home = Path.home() / ".hive"
-
# Determine which session directory to use for queen storage.
# When queen_resume_from is set we write to the ORIGINAL session's
# directory so that all messages accumulate in one place.
storage_session_id = session.queen_resume_from or session.id
- queen_dir = hive_home / "queen" / "session" / storage_session_id
+ queen_dir = _queen_session_dir(storage_session_id, session.queen_name)
queen_dir.mkdir(parents=True, exist_ok=True)
session.queen_dir = queen_dir
@@ -920,7 +935,7 @@ class SessionManager:
async def _emit_graph_loaded(self, session: Session) -> None:
"""Publish a WORKER_GRAPH_LOADED event so the frontend can update."""
- from framework.runtime.event_bus import AgentEvent, EventType
+ from framework.host.event_bus import AgentEvent, EventType
info = session.worker_info
await session.event_bus.publish(
@@ -939,7 +954,7 @@ class SessionManager:
async def _emit_flowchart_on_restore(self, session: Session, agent_path: str | Path) -> None:
"""Emit FLOWCHART_MAP_UPDATED from persisted flowchart file on cold restore."""
- from framework.runtime.event_bus import AgentEvent, EventType
+ from framework.host.event_bus import AgentEvent, EventType
from framework.tools.flowchart_utils import load_flowchart_file
original_draft, flowchart_map = load_flowchart_file(agent_path)
@@ -982,7 +997,7 @@ class SessionManager:
triggers: dict[str, TriggerDefinition],
) -> None:
"""Emit TRIGGER_AVAILABLE or TRIGGER_REMOVED events for each trigger."""
- from framework.runtime.event_bus import AgentEvent, EventType
+ from framework.host.event_bus import AgentEvent, EventType
event_type = (
EventType.TRIGGER_AVAILABLE if kind == "available" else EventType.TRIGGER_REMOVED
@@ -1076,10 +1091,10 @@ class SessionManager:
"""Return disk metadata for a session that is no longer live in memory.
Checks whether queen conversation files exist at
- ~/.hive/queen/session/{session_id}/conversations/. Returns None when
+ ~/.hive/agents/queens/{name}/sessions/{session_id}/conversations/. Returns None when
no data is found so callers can fall through to a 404.
"""
- queen_dir = Path.home() / ".hive" / "queen" / "session" / session_id
+ queen_dir = _queen_session_dir(session_id)
convs_dir = queen_dir / "conversations"
if not convs_dir.exists():
return None
@@ -1134,7 +1149,7 @@ class SessionManager:
@staticmethod
def list_cold_sessions() -> list[dict]:
"""Return metadata for every queen session directory on disk, newest first."""
- queen_sessions_dir = Path.home() / ".hive" / "queen" / "session"
+ queen_sessions_dir = QUEENS_DIR / "default" / "sessions"
if not queen_sessions_dir.exists():
return []
diff --git a/core/framework/server/tests/test_api.py b/core/framework/server/tests/test_api.py
index ef7cc905..e7e60081 100644
--- a/core/framework/server/tests/test_api.py
+++ b/core/framework/server/tests/test_api.py
@@ -14,7 +14,7 @@ from unittest.mock import AsyncMock, MagicMock
import pytest
from aiohttp.test_utils import TestClient, TestServer
-from framework.runtime.triggers import TriggerDefinition
+from framework.host.triggers import TriggerDefinition
from framework.server.app import create_app
from framework.server.session_manager import Session
@@ -1055,7 +1055,7 @@ class TestNodeCriteria:
nodes, edges = nodes_and_edges
# Create a real RuntimeLogStore pointed at the temp agent dir
- from framework.runtime.runtime_log_store import RuntimeLogStore
+ from framework.tracker.runtime_log_store import RuntimeLogStore
log_store = RuntimeLogStore(base)
@@ -1110,7 +1110,7 @@ class TestLogs:
session_id, session_dir, state = sample_session
tmp_path, agent_name, base = tmp_agent_dir
- from framework.runtime.runtime_log_store import RuntimeLogStore
+ from framework.tracker.runtime_log_store import RuntimeLogStore
log_store = RuntimeLogStore(base)
session = _make_session(
@@ -1132,7 +1132,7 @@ class TestLogs:
session_id, session_dir, state = custom_id_session
tmp_path, agent_name, base = tmp_agent_dir
- from framework.runtime.runtime_log_store import RuntimeLogStore
+ from framework.tracker.runtime_log_store import RuntimeLogStore
log_store = RuntimeLogStore(base)
session = _make_session(
@@ -1154,7 +1154,7 @@ class TestLogs:
session_id, session_dir, state = sample_session
tmp_path, agent_name, base = tmp_agent_dir
- from framework.runtime.runtime_log_store import RuntimeLogStore
+ from framework.tracker.runtime_log_store import RuntimeLogStore
log_store = RuntimeLogStore(base)
session = _make_session(
@@ -1177,7 +1177,7 @@ class TestLogs:
session_id, session_dir, state = sample_session
tmp_path, agent_name, base = tmp_agent_dir
- from framework.runtime.runtime_log_store import RuntimeLogStore
+ from framework.tracker.runtime_log_store import RuntimeLogStore
log_store = RuntimeLogStore(base)
session = _make_session(
@@ -1201,7 +1201,7 @@ class TestLogs:
session_id, session_dir, state = sample_session
tmp_path, agent_name, base = tmp_agent_dir
- from framework.runtime.runtime_log_store import RuntimeLogStore
+ from framework.tracker.runtime_log_store import RuntimeLogStore
log_store = RuntimeLogStore(base)
session = _make_session(
@@ -1227,7 +1227,7 @@ class TestNodeLogs:
tmp_path, agent_name, base = tmp_agent_dir
nodes, edges = nodes_and_edges
- from framework.runtime.runtime_log_store import RuntimeLogStore
+ from framework.tracker.runtime_log_store import RuntimeLogStore
log_store = RuntimeLogStore(base)
session = _make_session(
@@ -1256,7 +1256,7 @@ class TestNodeLogs:
@pytest.mark.asyncio
async def test_node_logs_missing_session_id(self, nodes_and_edges):
nodes, edges = nodes_and_edges
- from framework.runtime.runtime_log_store import RuntimeLogStore
+ from framework.tracker.runtime_log_store import RuntimeLogStore
log_store = RuntimeLogStore(Path("/tmp/dummy"))
session = _make_session(nodes=nodes, edges=edges, log_store=log_store)
diff --git a/core/framework/skills/_default_skills/browser-automation/SKILL.md b/core/framework/skills/_default_skills/browser-automation/SKILL.md
new file mode 100644
index 00000000..bc481fc4
--- /dev/null
+++ b/core/framework/skills/_default_skills/browser-automation/SKILL.md
@@ -0,0 +1,80 @@
+---
+name: hive.browser-automation
+description: Best practices for browser automation via gcu-tools MCP server (reading pages, navigation, scrolling, tab management, shadow DOM, coordinates).
+metadata:
+ author: hive
+ type: default-skill
+---
+
+## Operational Protocol: Browser Automation
+
+Follow these rules for reliable, efficient browser interaction.
+
+### Reading Pages
+- ALWAYS prefer `browser_snapshot` over `browser_get_text("body")` -- it returns a compact ~1-5 KB accessibility tree vs 100+ KB of raw HTML.
+- Interaction tools (`browser_click`, `browser_type`, `browser_fill`, `browser_scroll`, etc.) return a page snapshot automatically in their result. Use it to decide your next action -- do NOT call `browser_snapshot` separately after every action. Only call `browser_snapshot` when you need a fresh view without performing an action, or after setting `auto_snapshot=false`.
+- Do NOT use `browser_screenshot` to read text -- use `browser_snapshot` for that (compact, searchable, fast).
+- DO use `browser_screenshot` when you need visual context: charts, images, canvas elements, layout verification, or when the snapshot doesn't capture what you need.
+- Only fall back to `browser_get_text` for extracting specific small elements by CSS selector.
+
+### Navigation & Waiting
+- `browser_navigate` and `browser_open` already wait for the page to load. Do NOT call `browser_wait` with no arguments after navigation -- it wastes time. Only use `browser_wait` when you need a *specific element* or *text* to appear (pass `selector` or `text`).
+- NEVER re-navigate to the same URL after scrolling -- this resets your scroll position and loses loaded content.
+
+### Scrolling
+- Use large scroll amounts ~2000 when loading more content -- sites like twitter and linkedin have lazy loading for paging.
+- The scroll result includes a snapshot automatically -- no need to call `browser_snapshot` separately.
+
+### Batching Actions
+- You can call multiple tools in a single turn -- they execute in parallel. ALWAYS batch independent actions together. Examples: fill multiple form fields in one turn, navigate + snapshot in one turn, click + scroll if targeting different elements.
+- When batching, set `auto_snapshot=false` on all but the last action to avoid redundant snapshots.
+- Aim for 3-5 tool calls per turn minimum. One tool call per turn is wasteful.
+
+### Error Recovery
+- If a tool fails, retry once with the same approach.
+- If it fails a second time, STOP retrying and switch approach.
+- If `browser_snapshot` fails, try `browser_get_text` with a specific small selector as fallback.
+- If `browser_open` fails or page seems stale, `browser_stop`, then `browser_start`, then retry.
+
+### Tab Management
+**Close tabs as soon as you are done with them** -- not only at the end of the task. After reading or extracting data from a tab, close it immediately.
+
+- Finished reading/extracting from a tab? `browser_close(target_id=...)`
+- Completed a multi-tab workflow? `browser_close_finished()` to clean up all your tabs
+- More than 3 tabs open? Stop and close finished ones before opening more
+- Popup appeared that you didn't need? Close it immediately
+
+`browser_tabs` returns an `origin` field for each tab:
+- `"agent"` -- you opened it; you own it; close it when done
+- `"popup"` -- opened by a link or script; close after extracting what you need
+- `"startup"` or `"user"` -- leave these alone unless the task requires it
+
+Never accumulate tabs. Treat every tab you open as a resource you must free.
+
+### Shadow DOM & Overlays
+Some sites (LinkedIn messaging, etc.) render content inside closed shadow roots invisible to regular DOM queries.
+
+- `browser_shadow_query("#interop-outlet >>> #msg-overlay >>> p")` -- uses `>>>` to pierce shadow roots. Returns `rect` in CSS pixels and `physicalRect` ready for coordinate tools.
+- `browser_get_rect(selector="...", pierce_shadow=true)` -- get physical rect for any element including shadow DOM.
+
+### Coordinate System
+There are THREE coordinate spaces. Using the wrong one causes clicks/hovers to land in the wrong place.
+
+| Space | Used by | How to get |
+|---|---|---|
+| Physical pixels | `browser_click_coordinate` | `browser_coords` `physical_x/y` |
+| CSS pixels | `getBoundingClientRect()`, `elementFromPoint` | `browser_coords` `css_x/y` |
+| Screenshot pixels | What you see in the image | Raw position in screenshot |
+
+**Converting screenshot to physical**: `browser_coords(x, y)` then use `physical_x/y`.
+**Converting CSS to physical**: multiply by `window.devicePixelRatio` (typically 1.6 on HiDPI).
+**Never** pass raw `getBoundingClientRect()` values to coordinate tools without multiplying by DPR first.
+
+### Login & Auth Walls
+- If you see a "Log in" or "Sign up" prompt, report the auth wall immediately -- do NOT attempt to log in.
+- Check for cookie consent banners and dismiss them if they block content.
+
+### Efficiency
+- Minimize tool calls -- combine actions where possible.
+- When a snapshot result is saved to a spillover file, use `run_command` with grep to extract specific data rather than re-reading the full file.
+- Call `set_output` in the same turn as your last browser action when possible -- don't waste a turn.
diff --git a/core/framework/skills/catalog.py b/core/framework/skills/catalog.py
index 3621dbe1..08d3285e 100644
--- a/core/framework/skills/catalog.py
+++ b/core/framework/skills/catalog.py
@@ -64,15 +64,14 @@ class SkillCatalog:
Returns empty string if no community/user skills are discovered
(default skills are handled separately by DefaultSkillManager).
"""
- # Filter out framework-scope skills (default skills) — they're
- # injected via the protocols prompt, not the catalog
- community_skills = [s for s in self._skills.values() if s.source_scope != "framework"]
+ # All skills go through the catalog for progressive disclosure.
+ all_skills = list(self._skills.values())
- if not community_skills:
+ if not all_skills:
return ""
lines = [""]
- for skill in sorted(community_skills, key=lambda s: s.name):
+ for skill in sorted(all_skills, key=lambda s: s.name):
lines.append(" ")
lines.append(f" {escape(skill.name)}")
lines.append(f" {escape(skill.description)}")
diff --git a/core/framework/skills/discovery.py b/core/framework/skills/discovery.py
index 2db1a78b..cd0ab6eb 100644
--- a/core/framework/skills/discovery.py
+++ b/core/framework/skills/discovery.py
@@ -56,6 +56,16 @@ class SkillDiscovery:
def __init__(self, config: DiscoveryConfig | None = None):
self._config = config or DiscoveryConfig()
+ self._scanned_dirs: list[Path] = []
+
+ @property
+ def scanned_directories(self) -> list[str]:
+ """Return the skill directories that were scanned during discovery.
+
+ Populated after :meth:`discover` runs. Used by the hot-reload
+ watcher to know which directories to monitor for changes.
+ """
+ return [str(d) for d in self._scanned_dirs if d.exists()]
def discover(self) -> list[ParsedSkill]:
"""Scan all scopes and return deduplicated skill list.
@@ -70,11 +80,13 @@ class SkillDiscovery:
Later entries override earlier ones on name collision.
"""
all_skills: list[ParsedSkill] = []
+ self._scanned_dirs = []
# Framework scope (lowest precedence)
if not self._config.skip_framework_scope:
framework_dir = Path(__file__).parent / "_default_skills"
if framework_dir.is_dir():
+ self._scanned_dirs.append(framework_dir)
all_skills.extend(self._scan_scope(framework_dir, "framework"))
# User scope
@@ -84,11 +96,13 @@ class SkillDiscovery:
# Cross-client (lower precedence within user scope)
user_agents = home / ".agents" / "skills"
if user_agents.is_dir():
+ self._scanned_dirs.append(user_agents)
all_skills.extend(self._scan_scope(user_agents, "user"))
# Hive-specific (higher precedence within user scope)
user_hive = home / ".hive" / "skills"
if user_hive.is_dir():
+ self._scanned_dirs.append(user_hive)
all_skills.extend(self._scan_scope(user_hive, "user"))
# Project scope (highest precedence)
@@ -98,11 +112,13 @@ class SkillDiscovery:
# Cross-client
project_agents = root / ".agents" / "skills"
if project_agents.is_dir():
+ self._scanned_dirs.append(project_agents)
all_skills.extend(self._scan_scope(project_agents, "project"))
# Hive-specific
project_hive = root / ".hive" / "skills"
if project_hive.is_dir():
+ self._scanned_dirs.append(project_hive)
all_skills.extend(self._scan_scope(project_hive, "project"))
resolved = self._resolve_collisions(all_skills)
diff --git a/core/framework/skills/manager.py b/core/framework/skills/manager.py
index 9c1b4b80..5f9006fd 100644
--- a/core/framework/skills/manager.py
+++ b/core/framework/skills/manager.py
@@ -68,6 +68,9 @@ class SkillsManager:
self._protocols_prompt: str = ""
self._allowlisted_dirs: list[str] = []
self._default_mgr: object = None # DefaultSkillManager, set after load()
+ # Hot-reload state
+ self._watched_dirs: list[str] = []
+ self._watcher_task: object = None # asyncio.Task, set by start_watching()
# ------------------------------------------------------------------
# Factory for backwards-compat bridge
@@ -117,62 +120,140 @@ class SkillsManager:
skills_config = self._config.skills_config
- # 1. Community skill discovery (when project_root is available)
- catalog_prompt = ""
+ # 1. Skill discovery -- always run to pick up framework skills;
+ # community/project skills only when project_root is available.
+ discovery = SkillDiscovery(DiscoveryConfig(
+ project_root=self._config.project_root,
+ skip_framework_scope=False,
+ ))
+ discovered = discovery.discover()
+ self._watched_dirs = discovery.scanned_directories
+
+ # Trust-gate project-scope skills (AS-13)
if self._config.project_root is not None and not self._config.skip_community_discovery:
from framework.skills.trust import TrustGate
- discovery = SkillDiscovery(DiscoveryConfig(project_root=self._config.project_root))
- discovered = discovery.discover()
-
- # Trust-gate project-scope skills (AS-13)
discovered = TrustGate(interactive=self._config.interactive).filter_and_gate(
discovered, project_dir=self._config.project_root
)
- catalog = SkillCatalog(discovered)
- self._allowlisted_dirs = catalog.allowlisted_dirs
- catalog_prompt = catalog.to_prompt()
+ catalog = SkillCatalog(discovered)
+ self._allowlisted_dirs = catalog.allowlisted_dirs
+ catalog_prompt = catalog.to_prompt()
- # Pre-activated community skills
- if skills_config.skills:
- pre_activated = catalog.build_pre_activated_prompt(skills_config.skills)
- if pre_activated:
- if catalog_prompt:
- catalog_prompt = f"{catalog_prompt}\n\n{pre_activated}"
- else:
- catalog_prompt = pre_activated
+ # Pre-activated community skills
+ if skills_config.skills:
+ pre_activated = catalog.build_pre_activated_prompt(skills_config.skills)
+ if pre_activated:
+ if catalog_prompt:
+ catalog_prompt = f"{catalog_prompt}\n\n{pre_activated}"
+ else:
+ catalog_prompt = pre_activated
- # 2. Default skills (always loaded unless explicitly disabled)
+ # 2. Default skills -- discovered via _default_skills/ and included
+ # in the catalog for progressive disclosure (no longer force-injected
+ # as protocols_prompt). DefaultSkillManager still handles config,
+ # logging, and metadata.
default_mgr = DefaultSkillManager(config=skills_config)
default_mgr.load()
default_mgr.log_active_skills()
- protocols_prompt = default_mgr.build_protocols_prompt()
self._default_mgr = default_mgr
- # DX-3: Community skill startup summary
- if self._config.project_root is not None and not self._config.skip_community_discovery:
- community_count = len(catalog._skills) if catalog_prompt else 0
- pre_activated_count = len(skills_config.skills) if skills_config.skills else 0
- logger.info(
- "Skills: %d community (%d catalog, %d pre-activated)",
- community_count,
- community_count,
- pre_activated_count,
- )
# 3. Cache
self._catalog_prompt = catalog_prompt
- self._protocols_prompt = protocols_prompt
+ self._protocols_prompt = "" # all skills use progressive disclosure now
- if protocols_prompt:
- logger.info(
- "Skill system ready: protocols=%d chars, catalog=%d chars",
- len(protocols_prompt),
- len(catalog_prompt),
- )
- else:
+ if catalog_prompt:
logger.warning("Skill system produced empty protocols_prompt")
+ # ------------------------------------------------------------------
+ # Hot-reload: watch skill directories for SKILL.md changes.
+ # ------------------------------------------------------------------
+
+ async def start_watching(self) -> None:
+ """Start a background task watching skill directories for changes.
+
+ When a ``SKILL.md`` file is added/modified/removed, the cached
+ ``skills_catalog_prompt`` is rebuilt. The next node iteration picks
+ up the new prompt automatically via the ``dynamic_prompt_provider``.
+
+ Silently no-ops when ``watchfiles`` is not installed or when no
+ directories are being watched (e.g. bare mode, no project_root).
+ """
+ import asyncio
+
+ try:
+ import watchfiles # noqa: F401 -- optional dep check
+ except ImportError:
+ logger.debug("watchfiles not installed; skill hot-reload disabled")
+ return
+
+ if not self._watched_dirs:
+ logger.debug("No skill directories to watch; hot-reload skipped")
+ return
+
+ if self._watcher_task is not None:
+ return # already watching
+
+ self._watcher_task = asyncio.create_task(
+ self._watch_loop(),
+ name="skills-hot-reload",
+ )
+ logger.info(
+ "Skill hot-reload enabled (watching %d directories)",
+ len(self._watched_dirs),
+ )
+
+ async def stop_watching(self) -> None:
+ """Cancel the background watcher task (if running)."""
+ import asyncio
+
+ task = self._watcher_task
+ if task is None:
+ return
+ self._watcher_task = None
+ if not task.done(): # type: ignore[attr-defined]
+ task.cancel() # type: ignore[attr-defined]
+ try:
+ await task # type: ignore[misc]
+ except asyncio.CancelledError:
+ pass
+
+ async def _watch_loop(self) -> None:
+ """Background coroutine that watches SKILL.md files and triggers reload."""
+ import asyncio
+
+ import watchfiles
+
+ def _filter(_change: object, path: str) -> bool:
+ return path.endswith("SKILL.md")
+
+ try:
+ async for changes in watchfiles.awatch(
+ *self._watched_dirs,
+ watch_filter=_filter,
+ debounce=1000,
+ ):
+ paths = [p for _, p in changes]
+ logger.info("SKILL.md changes detected: %s", paths)
+ try:
+ self._reload()
+ except Exception:
+ logger.exception("Skill reload failed; keeping previous prompts")
+ except asyncio.CancelledError:
+ raise
+ except Exception:
+ logger.exception("Skill watcher crashed; hot-reload disabled for this session")
+
+ def _reload(self) -> None:
+ """Re-run discovery and rebuild cached prompts."""
+ # Reset loaded flag so _do_load actually re-runs.
+ self._loaded = False
+ self._do_load()
+ self._loaded = True
+ logger.info("Skills reloaded: protocols=%d chars, catalog=%d chars",
+ len(self._protocols_prompt), len(self._catalog_prompt))
+
# ------------------------------------------------------------------
# Prompt accessors (consumed by downstream layers)
# ------------------------------------------------------------------
diff --git a/core/framework/storage/migrate_v2.py b/core/framework/storage/migrate_v2.py
new file mode 100644
index 00000000..33273926
--- /dev/null
+++ b/core/framework/storage/migrate_v2.py
@@ -0,0 +1,145 @@
+"""One-time migration to the v2 ~/.hive/ directory structure.
+
+Moves:
+- exports/{name}/ -> ~/.hive/colonies/{name}/
+- ~/.hive/queen/session/{id}/ -> ~/.hive/agents/queens/default/sessions/{id}/
+- ~/.hive/queen/global_memory/ -> ~/.hive/memories/global/
+
+Runs automatically on first startup when the marker file is absent.
+Safe to re-run (skips already-migrated items).
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import shutil
+from pathlib import Path
+
+from framework.config import COLONIES_DIR, HIVE_HOME, MEMORIES_DIR, QUEENS_DIR
+
+logger = logging.getLogger(__name__)
+
+_MIGRATION_MARKER = HIVE_HOME / ".migrated-v2"
+
+
+def needs_migration() -> bool:
+ """Return True if the v2 migration has not yet run."""
+ return not _MIGRATION_MARKER.exists()
+
+
+def run_migration(*, exports_dir: Path | None = None) -> None:
+ """Run the full v2 migration. Idempotent and safe to re-run."""
+ if not needs_migration():
+ return
+
+ logger.info("migrate_v2: starting ~/.hive structure migration")
+
+ _migrate_colonies(exports_dir or Path("exports"))
+ _migrate_queen_sessions()
+ _migrate_memories()
+ _cleanup_old_queen_dir()
+
+ # Write marker
+ HIVE_HOME.mkdir(parents=True, exist_ok=True)
+ _MIGRATION_MARKER.write_text("1\n", encoding="utf-8")
+ logger.info("migrate_v2: migration complete")
+
+
+def _migrate_colonies(exports_dir: Path) -> None:
+ """Copy exports/{name}/ -> ~/.hive/colonies/{name}/."""
+ if not exports_dir.exists():
+ return
+
+ COLONIES_DIR.mkdir(parents=True, exist_ok=True)
+ migrated = 0
+
+ for agent_dir in sorted(exports_dir.iterdir()):
+ if not agent_dir.is_dir() or agent_dir.name.startswith("."):
+ continue
+ target = COLONIES_DIR / agent_dir.name
+ if target.exists():
+ continue
+ try:
+ shutil.copytree(agent_dir, target)
+ migrated += 1
+ except OSError:
+ logger.warning("migrate_v2: failed to copy %s", agent_dir, exc_info=True)
+
+ if migrated:
+ logger.info("migrate_v2: copied %d agent(s) from exports/ to colonies/", migrated)
+
+
+def _migrate_queen_sessions() -> None:
+ """Move ~/.hive/queen/session/{id}/ -> ~/.hive/agents/queens/default/sessions/{id}/."""
+ old_sessions = HIVE_HOME / "queen" / "session"
+ if not old_sessions.exists():
+ return
+
+ new_sessions = QUEENS_DIR / "default" / "sessions"
+ new_sessions.mkdir(parents=True, exist_ok=True)
+ migrated = 0
+
+ for session_dir in sorted(old_sessions.iterdir()):
+ if not session_dir.is_dir():
+ continue
+ target = new_sessions / session_dir.name
+ if target.exists():
+ continue
+ try:
+ session_dir.rename(target)
+ migrated += 1
+ except OSError:
+ logger.warning(
+ "migrate_v2: failed to move session %s", session_dir, exc_info=True
+ )
+
+ if migrated:
+ logger.info("migrate_v2: moved %d queen session(s) to new path", migrated)
+
+
+def _migrate_memories() -> None:
+ """Move ~/.hive/queen/global_memory/ -> ~/.hive/memories/global/."""
+ old_global = HIVE_HOME / "queen" / "global_memory"
+ if not old_global.exists():
+ return
+
+ new_global = MEMORIES_DIR / "global"
+ if new_global.exists():
+ # Already has content -- merge individual files
+ merged = 0
+ for f in old_global.iterdir():
+ if f.is_file() and not (new_global / f.name).exists():
+ try:
+ shutil.copy2(f, new_global / f.name)
+ merged += 1
+ except OSError:
+ pass
+ if merged:
+ logger.info("migrate_v2: merged %d memory file(s) into global/", merged)
+ return
+
+ new_global.mkdir(parents=True, exist_ok=True)
+ migrated = 0
+ for f in old_global.iterdir():
+ if f.is_file():
+ try:
+ shutil.copy2(f, new_global / f.name)
+ migrated += 1
+ except OSError:
+ pass
+
+ if migrated:
+ logger.info("migrate_v2: copied %d memory file(s) to memories/global/", migrated)
+
+
+def _cleanup_old_queen_dir() -> None:
+ """Remove ~/.hive/queen/ after all content has been migrated."""
+ old_queen = HIVE_HOME / "queen"
+ if not old_queen.exists():
+ return
+ try:
+ shutil.rmtree(old_queen)
+ logger.info("migrate_v2: removed old ~/.hive/queen/ directory")
+ except OSError:
+ logger.debug("migrate_v2: could not remove old queen dir", exc_info=True)
diff --git a/core/framework/testing/prompts.py b/core/framework/testing/prompts.py
index 08df7625..61ae340b 100644
--- a/core/framework/testing/prompts.py
+++ b/core/framework/testing/prompts.py
@@ -68,8 +68,8 @@ for _p in ["exports", "core"]:
sys.path.insert(0, _path)
import pytest
-from framework.runner.runner import AgentRunner
-from framework.runtime.event_bus import EventType
+from framework.loader.agent_loader import AgentLoader
+from framework.host.event_bus import EventType
AGENT_PATH = Path(__file__).resolve().parents[1]
diff --git a/core/framework/tools/flowchart_utils.py b/core/framework/tools/flowchart_utils.py
index f436ceb8..0c2c37cb 100644
--- a/core/framework/tools/flowchart_utils.py
+++ b/core/framework/tools/flowchart_utils.py
@@ -119,12 +119,11 @@ def classify_flowchart_node(
return FLOWCHART_REMAP[explicit]
node_id = node["id"]
- node_type = node.get("node_type", "event_loop")
node_tools = set(node.get("tools") or [])
desc = (node.get("description") or "").lower()
# GCU / browser automation nodes → hexagon
- if node_type == "gcu":
+ if False: # gcu removed
return "browser"
# Entry node (first node or no incoming edges) → start terminator
diff --git a/core/framework/tools/migrate_agent.py b/core/framework/tools/migrate_agent.py
new file mode 100644
index 00000000..52119c60
--- /dev/null
+++ b/core/framework/tools/migrate_agent.py
@@ -0,0 +1,273 @@
+"""Migrate a Python-based agent export to declarative agent.yaml.
+
+Usage::
+
+ uv run python -m framework.tools.migrate_agent exports/lead_enrichment_agent
+
+Reads agent.py, nodes/__init__.py, config.py, and mcp_servers.json from the
+given directory and writes an ``agent.yaml`` file that is equivalent. The
+original Python files are left untouched.
+
+After migration, verify with::
+
+ uv run python -c "
+ from framework.loader.agent_loader import load_agent_config
+ import yaml, pathlib
+ data = yaml.safe_load(pathlib.Path('exports/lead_enrichment_agent/agent.yaml').read_text())
+ graph, goal = load_agent_config(data)
+ print(f'OK: {len(graph.nodes)} nodes, {len(graph.edges)} edges')
+ "
+"""
+
+from __future__ import annotations
+
+import importlib
+import importlib.util
+import json
+import logging
+import sys
+from pathlib import Path
+from typing import Any
+
+logger = logging.getLogger(__name__)
+
+
+def _import_module_from_path(module_name: str, file_path: Path) -> Any:
+ """Import a Python file as a module."""
+ spec = importlib.util.spec_from_file_location(module_name, file_path)
+ if spec is None or spec.loader is None:
+ raise ImportError(f"Cannot import {file_path}")
+ mod = importlib.util.module_from_spec(spec)
+ sys.modules[module_name] = mod
+ spec.loader.exec_module(mod)
+ return mod
+
+
+def _node_to_dict(node: Any) -> dict:
+ """Convert a NodeSpec instance to a YAML-friendly dict."""
+ d: dict[str, Any] = {"id": node.id}
+ if node.name and node.name != node.id:
+ d["name"] = node.name
+ if node.description:
+ d["description"] = node.description
+ if node.node_type != "event_loop":
+ d["node_type"] = node.node_type
+ if node.client_facing:
+ d["client_facing"] = True
+ if node.max_node_visits != 1:
+ d["max_node_visits"] = node.max_node_visits
+
+ if node.input_keys:
+ d["input_keys"] = list(node.input_keys)
+ if node.output_keys:
+ d["output_keys"] = list(node.output_keys)
+ if node.nullable_output_keys:
+ d["nullable_output_keys"] = list(node.nullable_output_keys)
+
+ # Tools
+ tools_list = list(node.tools) if node.tools else []
+ if tools_list:
+ d["tools"] = {"policy": "explicit", "allowed": tools_list}
+ elif False: # gcu removed
+ d["tools"] = {"policy": "all"}
+ else:
+ d["tools"] = {"policy": "none"}
+
+ if node.sub_agents:
+ d["sub_agents"] = list(node.sub_agents)
+ if node.success_criteria:
+ d["success_criteria"] = node.success_criteria
+ if getattr(node, "failure_criteria", None):
+ d["failure_criteria"] = node.failure_criteria
+ if getattr(node, "max_retries", None):
+ d["max_retries"] = node.max_retries
+ if getattr(node, "skip_judge", False):
+ d["skip_judge"] = True
+ if getattr(node, "max_iterations", 30) != 30:
+ d["max_iterations"] = node.max_iterations
+
+ if node.system_prompt:
+ d["system_prompt"] = node.system_prompt
+
+ return d
+
+
+def _edge_to_dict(edge: Any) -> dict:
+ """Convert an EdgeSpec instance to a YAML-friendly dict."""
+ d: dict[str, Any] = {
+ "from_node": edge.source,
+ "to_node": edge.target,
+ }
+ cond = str(edge.condition.value) if hasattr(edge.condition, "value") else str(edge.condition)
+ if cond != "on_success":
+ d["condition"] = cond
+ if edge.condition_expr:
+ d["condition"] = "conditional"
+ d["condition_expr"] = edge.condition_expr
+ if edge.priority and edge.priority != 1:
+ d["priority"] = edge.priority
+ if edge.input_mapping:
+ d["input_mapping"] = dict(edge.input_mapping)
+ return d
+
+
+def migrate_agent(agent_dir: str | Path) -> dict:
+ """Read a Python-based agent export and return the declarative config dict.
+
+ The returned dict can be serialized to YAML or JSON.
+ """
+ agent_dir = Path(agent_dir).resolve()
+ agent_py = agent_dir / "agent.py"
+ if not agent_py.exists():
+ raise FileNotFoundError(f"No agent.py in {agent_dir}")
+
+ # Make the agent importable as a package (handles relative imports)
+ parent = str(agent_dir.parent)
+ if parent not in sys.path:
+ sys.path.insert(0, parent)
+
+ pkg_name = agent_dir.name
+ agent_mod = importlib.import_module(f"{pkg_name}.agent")
+
+ # Extract module-level variables
+ goal = getattr(agent_mod, "goal", None)
+ nodes = getattr(agent_mod, "nodes", [])
+ edges = getattr(agent_mod, "edges", [])
+ entry_node = getattr(agent_mod, "entry_node", "")
+ terminal_nodes = getattr(agent_mod, "terminal_nodes", [])
+ pause_nodes = getattr(agent_mod, "pause_nodes", [])
+ conversation_mode = getattr(agent_mod, "conversation_mode", "continuous")
+ identity_prompt = getattr(agent_mod, "identity_prompt", "")
+ loop_config = getattr(agent_mod, "loop_config", {})
+
+ # Config / metadata
+ config_mod = None
+ config_py = agent_dir / "config.py"
+ if config_py.exists():
+ try:
+ config_mod = importlib.import_module(f"{pkg_name}.config")
+ except ImportError:
+ pass
+ metadata = getattr(config_mod, "metadata", None)
+ default_config = getattr(config_mod, "default_config", None)
+
+ # Agent name
+ name = agent_dir.name
+ if metadata and hasattr(metadata, "name"):
+ name = str(metadata.name).lower().replace(" ", "-")
+
+ # Build config dict
+ config: dict[str, Any] = {
+ "name": name,
+ "version": getattr(metadata, "version", "1.0.0") if metadata else "1.0.0",
+ }
+ if goal and goal.description:
+ config["description"] = goal.description
+ if metadata and hasattr(metadata, "intro_message") and metadata.intro_message:
+ intro = metadata.intro_message
+ if intro and "TODO" not in intro:
+ config["metadata"] = {"intro_message": intro}
+
+ # Variables (detect config fields injected into prompts)
+ variables: dict[str, str] = {}
+ _SKIP_CONFIG = {"model", "temperature", "max_tokens", "api_key", "api_base"}
+ if default_config:
+ for attr in dir(default_config):
+ if attr.startswith("_") or attr in _SKIP_CONFIG:
+ continue
+ val = getattr(default_config, attr)
+ if isinstance(val, str) and val:
+ variables[attr] = val
+ if variables:
+ config["variables"] = variables
+
+ # Goal
+ if goal:
+ goal_dict: dict[str, Any] = {"description": goal.description}
+ if goal.success_criteria:
+ goal_dict["success_criteria"] = [sc.description for sc in goal.success_criteria]
+ if goal.constraints:
+ goal_dict["constraints"] = [c.description for c in goal.constraints]
+ config["goal"] = goal_dict
+
+ # Identity / conversation / loop
+ if identity_prompt:
+ config["identity_prompt"] = identity_prompt
+ if conversation_mode and conversation_mode != "continuous":
+ config["conversation_mode"] = conversation_mode
+ if loop_config:
+ config["loop_config"] = dict(loop_config)
+
+ # MCP servers
+ mcp_path = agent_dir / "mcp_servers.json"
+ if mcp_path.exists():
+ with open(mcp_path) as f:
+ mcp_data = json.load(f)
+ if mcp_data:
+ config["mcp_servers"] = [{"name": name} for name in mcp_data]
+
+ # Nodes
+ config["nodes"] = [_node_to_dict(n) for n in nodes]
+
+ # Edges
+ config["edges"] = [_edge_to_dict(e) for e in edges]
+
+ # Graph structure
+ config["entry_node"] = entry_node
+ if terminal_nodes:
+ config["terminal_nodes"] = terminal_nodes
+ if pause_nodes:
+ config["pause_nodes"] = pause_nodes
+
+ return config
+
+
+def write_yaml(config: dict, output_path: Path) -> None:
+ """Write config dict to YAML with clean formatting."""
+ try:
+ import yaml
+ except ImportError:
+ raise ImportError("PyYAML required: uv pip install pyyaml") from None
+
+ # Custom representer for multiline strings
+ def _str_representer(dumper: yaml.Dumper, data: str) -> Any:
+ if "\n" in data:
+ return dumper.represent_scalar("tag:yaml.org,2002:str", data, style="|")
+ return dumper.represent_scalar("tag:yaml.org,2002:str", data)
+
+ yaml.add_representer(str, _str_representer)
+
+ with open(output_path, "w") as f:
+ yaml.dump(
+ config, f,
+ default_flow_style=False, sort_keys=False,
+ allow_unicode=True, width=120,
+ )
+
+ logger.info("Wrote %s", output_path)
+
+
+def main() -> None:
+ """CLI entry point."""
+ logging.basicConfig(level=logging.INFO, format="%(message)s")
+
+ if len(sys.argv) < 2:
+ print("Usage: uv run python -m framework.tools.migrate_agent ")
+ sys.exit(1)
+
+ agent_dir = Path(sys.argv[1])
+ config = migrate_agent(agent_dir)
+
+ output = agent_dir / "agent.yaml"
+ write_yaml(config, output)
+ print(f"Wrote {output}")
+
+ n_nodes = len(config["nodes"])
+ n_edges = len(config["edges"])
+ print(f"\nMigrated {config['name']}: {n_nodes} nodes, {n_edges} edges")
+ print("\nVerify with:")
+ print(f" uv run python -m framework.tools.migrate_agent --verify {output}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/core/framework/tools/queen_lifecycle/__init__.py b/core/framework/tools/queen_lifecycle/__init__.py
new file mode 100644
index 00000000..6f850e5b
--- /dev/null
+++ b/core/framework/tools/queen_lifecycle/__init__.py
@@ -0,0 +1,10 @@
+"""Queen lifecycle tools -- split into per-tool modules.
+
+The main entry point is still ``register_queen_lifecycle_tools()`` in
+``queen_lifecycle_tools.py``. This package provides the shared context
+and individual tool registration functions.
+"""
+
+from framework.tools.queen_lifecycle.context import QueenToolContext
+
+__all__ = ["QueenToolContext"]
diff --git a/core/framework/tools/queen_lifecycle/context.py b/core/framework/tools/queen_lifecycle/context.py
new file mode 100644
index 00000000..4da53ecb
--- /dev/null
+++ b/core/framework/tools/queen_lifecycle/context.py
@@ -0,0 +1,52 @@
+"""Shared context for queen lifecycle tools.
+
+All queen tools receive this context instead of closing over
+individual variables from the registration function.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+from dataclasses import dataclass
+from typing import Any
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class QueenToolContext:
+ """Shared state passed to all queen lifecycle tool implementations."""
+
+ session: Any # Session or WorkerSessionAdapter
+ session_manager: Any | None = None
+ manager_session_id: str | None = None
+ phase_state: Any | None = None # QueenPhaseState
+ registry: Any = None # ToolRegistry
+
+ def get_runtime(self):
+ """Get current graph runtime from session (late-binding)."""
+ return getattr(self.session, "graph_runtime", None)
+
+ def update_meta(self, updates: dict) -> None:
+ """Update session metadata JSON."""
+ if self.session_manager is None or self.manager_session_id is None:
+ return
+ try:
+ srv_session = self.session_manager.get_session(self.manager_session_id)
+ if srv_session is None:
+ return
+ meta_path = getattr(srv_session, "meta_path", None)
+ if meta_path is None:
+ return
+ import pathlib
+
+ meta_file = pathlib.Path(meta_path)
+ if meta_file.exists():
+ data = json.loads(meta_file.read_text(encoding="utf-8"))
+ else:
+ data = {}
+ data.update(updates)
+ meta_file.write_text(json.dumps(data, indent=2) + "\n")
+ except Exception:
+ logger.debug("Failed to update session meta", exc_info=True)
diff --git a/core/framework/tools/queen_lifecycle_tools.py b/core/framework/tools/queen_lifecycle_tools.py
index ee7e0cb9..229673ba 100644
--- a/core/framework/tools/queen_lifecycle_tools.py
+++ b/core/framework/tools/queen_lifecycle_tools.py
@@ -43,8 +43,8 @@ from pathlib import Path
from typing import TYPE_CHECKING, Any
from framework.credentials.models import CredentialError
-from framework.runner.preload_validation import credential_errors_to_json, validate_credentials
-from framework.runtime.event_bus import AgentEvent, EventType
+from framework.loader.preload_validation import credential_errors_to_json, validate_credentials
+from framework.host.event_bus import AgentEvent, EventType
from framework.server.app import validate_agent_path
from framework.tools.flowchart_utils import (
FLOWCHART_TYPES,
@@ -55,9 +55,9 @@ from framework.tools.flowchart_utils import (
)
if TYPE_CHECKING:
- from framework.runner.tool_registry import ToolRegistry
- from framework.runtime.agent_runtime import AgentRuntime
- from framework.runtime.event_bus import EventBus
+ from framework.loader.tool_registry import ToolRegistry
+ from framework.host.agent_host import AgentHost
+ from framework.host.event_bus import EventBus
logger = logging.getLogger(__name__)
@@ -323,7 +323,7 @@ class QueenPhaseState:
)
-def build_worker_profile(runtime: AgentRuntime, agent_path: Path | str | None = None) -> str:
+def build_worker_profile(runtime: AgentHost, agent_path: Path | str | None = None) -> str:
"""Build a worker capability profile from its graph/goal definition.
Injected into the queen's system prompt so it knows what the worker
@@ -452,7 +452,7 @@ async def _persist_active_triggers(session: Any, session_id: str) -> None:
async def _start_trigger_timer(session: Any, trigger_id: str, tdef: Any) -> None:
"""Start an asyncio background task that fires the trigger on a timer."""
- from framework.graph.event_loop_node import TriggerEvent
+ from framework.agent_loop.agent_loop import TriggerEvent
cron_expr = tdef.trigger_config.get("cron")
interval_minutes = tdef.trigger_config.get("interval_minutes")
@@ -513,8 +513,8 @@ async def _start_trigger_timer(session: Any, trigger_id: str, tdef: Any) -> None
async def _start_trigger_webhook(session: Any, trigger_id: str, tdef: Any) -> None:
"""Subscribe to WEBHOOK_RECEIVED events and route matching ones to the queen."""
- from framework.graph.event_loop_node import TriggerEvent
- from framework.runtime.webhook_server import WebhookRoute, WebhookServer, WebhookServerConfig
+ from framework.agent_loop.agent_loop import TriggerEvent
+ from framework.host.webhook_server import WebhookRoute, WebhookServer, WebhookServerConfig
bus = session.event_bus
path = tdef.trigger_config.get("path", "")
@@ -722,54 +722,6 @@ def _dissolve_planning_nodes(
nodes[:] = [n for n in nodes if n["id"] != d_id]
del node_by_id[d_id]
- # ── Dissolve sub-agent nodes ──────────────────────────────
- # Sub-agent nodes are leaf delegates: parent -> subagent (no outgoing).
- # Dissolution adds the subagent's ID to parent's sub_agents list.
- subagent_ids = [
- n["id"]
- for n in nodes
- if n.get("flowchart_type") == "browser" or n.get("node_type") == "gcu"
- ]
-
- for sa_id in subagent_ids:
- sa_node = node_by_id.get(sa_id)
- if sa_node is None:
- continue
-
- in_edges = _incoming(sa_id)
- out_edges = _outgoing(sa_id)
-
- # Validate: sub-agent nodes must be leaves (no outgoing edges)
- if out_edges:
- logger.warning(
- "Sub-agent node '%s' has outgoing edges — they will be dropped "
- "during dissolution. Sub-agent nodes should be leaf nodes.",
- sa_id,
- )
-
- # Attach to each predecessor's sub_agents list
- for ie in in_edges:
- pred_id = ie["source"]
- pred = node_by_id.get(pred_id)
- if pred is None:
- continue
-
- existing_subs = pred.get("sub_agents") or []
- if sa_id not in existing_subs:
- existing_subs.append(sa_id)
- pred["sub_agents"] = existing_subs
-
- # Record absorption
- prev_absorbed = absorbed.get(pred_id, [pred_id])
- if sa_id not in prev_absorbed:
- prev_absorbed.append(sa_id)
- absorbed[pred_id] = prev_absorbed
-
- # Remove sub-agent node and all its edges
- edges[:] = [e for e in edges if e["source"] != sa_id and e["target"] != sa_id]
- nodes[:] = [n for n in nodes if n["id"] != sa_id]
- del node_by_id[sa_id]
-
# Build complete flowchart_map (identity for non-absorbed nodes)
flowchart_map: dict[str, list[str]] = {}
for n in nodes:
@@ -799,8 +751,11 @@ def _update_meta_json(session_manager, manager_session_id, updates: dict) -> Non
srv_session = session_manager.get_session(manager_session_id)
if not srv_session:
return
+ from framework.config import QUEENS_DIR
+
storage_sid = getattr(srv_session, "queen_resume_from", None) or srv_session.id
- meta_path = Path.home() / ".hive" / "queen" / "session" / storage_sid / "meta.json"
+ queen_name = getattr(srv_session, "queen_name", "default")
+ meta_path = QUEENS_DIR / queen_name / "sessions" / storage_sid / "meta.json"
try:
existing = {}
if meta_path.exists():
@@ -816,7 +771,7 @@ def register_queen_lifecycle_tools(
session: Any = None,
session_id: str | None = None,
# Legacy params — used by TUI when not passing a session object
- graph_runtime: AgentRuntime | None = None,
+ graph_runtime: AgentHost | None = None,
event_bus: EventBus | None = None,
storage_path: Path | None = None,
# Server context — enables load_built_agent tool
@@ -1388,81 +1343,6 @@ def register_queen_lifecycle_tools(
nodes[:] = [n for n in nodes if n["id"] != d_id]
del node_by_id[d_id]
- # ── Dissolve sub-agent nodes ──────────────────────────────
- # Sub-agent nodes are leaf delegates: parent → subagent (no outgoing).
- # Dissolution adds the subagent's ID to parent's sub_agents list.
- subagent_ids = [
- n["id"]
- for n in nodes
- if n.get("flowchart_type") == "browser" or n.get("node_type") == "gcu"
- ]
-
- for sa_id in subagent_ids:
- sa_node = node_by_id.get(sa_id)
- if sa_node is None:
- continue
-
- in_edges = _incoming(sa_id)
- out_edges = _outgoing(sa_id)
-
- # Validate: sub-agent nodes must be leaves (no outgoing edges)
- if out_edges:
- logger.warning(
- "Sub-agent node '%s' has outgoing edges — they will be dropped "
- "during dissolution. Sub-agent nodes should be leaf nodes.",
- sa_id,
- )
-
- # Attach to each predecessor's sub_agents list
- for ie in in_edges:
- pred_id = ie["source"]
- pred = node_by_id.get(pred_id)
- if pred is None:
- continue
-
- existing_subs = pred.get("sub_agents") or []
- if sa_id not in existing_subs:
- existing_subs.append(sa_id)
- pred["sub_agents"] = existing_subs
-
- # Record absorption
- prev_absorbed = absorbed.get(pred_id, [pred_id])
- if sa_id not in prev_absorbed:
- prev_absorbed.append(sa_id)
- absorbed[pred_id] = prev_absorbed
-
- # Remove sub-agent node and all its edges
- edges[:] = [e for e in edges if e["source"] != sa_id and e["target"] != sa_id]
- nodes[:] = [n for n in nodes if n["id"] != sa_id]
- del node_by_id[sa_id]
-
- # ── Dissolve implicit sub-agents ─────────────────────────
- # Nodes that appear in another node's sub_agents list but weren't
- # caught above (e.g. GCU nodes with flowchart_type="browser" where
- # the queen set sub_agents directly on the parent).
- implicit_sa_ids: list[str] = []
- for n in nodes:
- for sa_id in n.get("sub_agents") or []:
- if sa_id in node_by_id and sa_id != n["id"]:
- implicit_sa_ids.append(sa_id)
-
- for sa_id in implicit_sa_ids:
- if sa_id not in node_by_id:
- continue # already removed
-
- # Find which parent(s) reference this sub-agent
- for n in nodes:
- if sa_id in (n.get("sub_agents") or []) and n["id"] != sa_id:
- prev_absorbed = absorbed.get(n["id"], [n["id"]])
- if sa_id not in prev_absorbed:
- prev_absorbed.append(sa_id)
- absorbed[n["id"]] = prev_absorbed
-
- # Remove the sub-agent node and its edges
- edges[:] = [e for e in edges if e["source"] != sa_id and e["target"] != sa_id]
- nodes[:] = [n for n in nodes if n["id"] != sa_id]
- del node_by_id[sa_id]
-
# Build complete flowchart_map (identity for non-absorbed nodes)
flowchart_map: dict[str, list[str]] = {}
for n in nodes:
@@ -1470,14 +1350,9 @@ def register_queen_lifecycle_tools(
flowchart_map[nid] = absorbed.get(nid, [nid])
# Rebuild terminal_nodes (decision targets may have changed).
- # Sub-agent nodes are leaf helpers, not endpoints — exclude them.
- post_sa_ids: set[str] = set()
- for n in nodes:
- for sa_id in n.get("sub_agents") or []:
- post_sa_ids.add(sa_id)
sources = {e["source"] for e in edges}
all_ids = {n["id"] for n in nodes}
- terminal_ids = all_ids - sources - post_sa_ids
+ terminal_ids = all_ids - sources
if not terminal_ids and nodes:
terminal_ids = {nodes[-1]["id"]}
@@ -1563,7 +1438,6 @@ def register_queen_lifecycle_tools(
"input_keys": n.get("input_keys", []),
"output_keys": n.get("output_keys", []),
"success_criteria": n.get("success_criteria", ""),
- "sub_agents": n.get("sub_agents", []),
# Decision nodes: the yes/no question to evaluate
"decision_clause": n.get("decision_clause", ""),
# Explicit flowchart override (preserved for classification)
@@ -1601,219 +1475,7 @@ def register_queen_lifecycle_tools(
}
)
- # ── GCU nodes cannot be children of decision nodes ─────────
- # Decision nodes dissolve into their predecessor. If a GCU node
- # is a decision child, after dissolution it would become a
- # conditional workflow step — violating the leaf sub-agent rule.
- # Rewire: move the GCU to the decision's predecessor as a
- # sub-agent and remove the decision → GCU edge.
- node_by_id_v = {n["id"]: n for n in validated_nodes}
- decision_node_ids = {
- n["id"] for n in validated_nodes if n.get("flowchart_type") == "decision"
- }
- gcu_node_ids = {
- n["id"]
- for n in validated_nodes
- if n.get("node_type") == "gcu" or n.get("flowchart_type") == "browser"
- }
topology_corrections: list[str] = []
- if decision_node_ids and gcu_node_ids:
- for d_id in decision_node_ids:
- gcu_children = [
- e
- for e in validated_edges
- if e["source"] == d_id and e["target"] in gcu_node_ids
- ]
- if not gcu_children:
- continue
- d_parents = [e["source"] for e in validated_edges if e["target"] == d_id]
- for gc_edge in gcu_children:
- gc_id = gc_edge["target"]
- logger.warning(
- "GCU node '%s' is a child of decision node '%s' "
- "— moving it to the decision's predecessor.",
- gc_id,
- d_id,
- )
- topology_corrections.append(
- f"GCU node '{gc_id}' was a child of decision "
- f"node '{d_id}' — invalid because decision "
- f"nodes dissolve at build time. Moved "
- f"'{gc_id}' to predecessor as a sub-agent."
- )
- # Remove the decision → GCU edge
- validated_edges[:] = [
- e
- for e in validated_edges
- if not (e["source"] == d_id and e["target"] == gc_id)
- ]
- # Remove any outgoing edges from the GCU node
- # (keep report edges back to predecessors)
- validated_edges[:] = [
- e
- for e in validated_edges
- if e["source"] != gc_id or e["target"] in set(d_parents)
- ]
- # Assign GCU as sub-agent of predecessor(s)
- for pid in d_parents:
- parent = node_by_id_v.get(pid)
- if parent is None:
- continue
- existing = parent.get("sub_agents") or []
- if gc_id not in existing:
- existing.append(gc_id)
- parent["sub_agents"] = existing
-
- # ── Enforce GCU / subagent leaf constraint ────────────────
- # GCU nodes and nodes with flowchart_type "subagent" are leaf
- # delegates: they can only receive a delegate edge IN from
- # their parent and send a report edge OUT back to that parent.
- # Any other outgoing edges are design errors — strip them and
- # auto-assign the node as a sub-agent of its predecessor.
- leaf_node_ids: set[str] = set()
- for n in validated_nodes:
- if n.get("node_type") == "gcu" or n.get("flowchart_type") == "browser":
- leaf_node_ids.add(n["id"])
- if leaf_node_ids:
- for leaf_id in leaf_node_ids:
- # Find edges where this leaf node is the source
- out_edges = [e for e in validated_edges if e["source"] == leaf_id]
- in_edges = [e for e in validated_edges if e["target"] == leaf_id]
-
- # Identify the parent (predecessor that connects IN)
- parent_ids = [e["source"] for e in in_edges]
-
- if not out_edges:
- # Already a proper leaf — still ensure sub_agents is set
- for pid in parent_ids:
- parent = node_by_id_v.get(pid)
- if parent is None:
- continue
- existing = parent.get("sub_agents") or []
- if leaf_id not in existing:
- existing.append(leaf_id)
- parent["sub_agents"] = existing
- continue
-
- # Strip all outgoing edges from the leaf node that
- # don't go back to a parent (report edges are OK)
- illegal_targets: list[str] = []
- for oe in out_edges:
- if oe["target"] not in parent_ids:
- illegal_targets.append(oe["target"])
-
- if illegal_targets:
- logger.warning(
- "GCU/subagent node '%s' has illegal outgoing "
- "edges to %s — stripping them. GCU nodes "
- "must be leaf sub-agents.",
- leaf_id,
- illegal_targets,
- )
- topology_corrections.append(
- f"GCU node '{leaf_id}' had illegal edges to "
- f"{illegal_targets} — stripped. GCU nodes MUST "
- f"be leaf sub-agents, never in the linear flow."
- )
- # Rewire: predecessor → leaf's targets (skip leaf)
- for parent_id in parent_ids:
- for tgt_id in illegal_targets:
- validated_edges.append(
- {
- "id": f"edge-rewire-{len(validated_edges)}",
- "source": parent_id,
- "target": tgt_id,
- "condition": "on_success",
- "description": "",
- "label": "",
- }
- )
- # Remove the illegal edges
- validated_edges[:] = [
- e
- for e in validated_edges
- if not (e["source"] == leaf_id and e["target"] in set(illegal_targets))
- ]
-
- # Ensure the leaf is in its parent's sub_agents list
- for pid in parent_ids:
- parent = node_by_id_v.get(pid)
- if parent is None:
- continue
- existing = parent.get("sub_agents") or []
- if leaf_id not in existing:
- existing.append(leaf_id)
- parent["sub_agents"] = existing
-
- # ── Remove orphaned GCU / subagent nodes ──────────────────
- # After enforcing the leaf constraint, any GCU/subagent node
- # that has zero edges AND is not in any parent's sub_agents
- # list is orphaned — remove it and warn the queen.
- all_edge_node_ids = set()
- for e in validated_edges:
- all_edge_node_ids.add(e["source"])
- all_edge_node_ids.add(e["target"])
- all_sa_refs: set[str] = set()
- for n in validated_nodes:
- for sa_id in n.get("sub_agents") or []:
- all_sa_refs.add(sa_id)
-
- orphaned_ids: list[str] = []
- for lid in leaf_node_ids:
- if lid not in all_edge_node_ids and lid not in all_sa_refs:
- orphaned_ids.append(lid)
-
- if orphaned_ids:
- for oid in orphaned_ids:
- logger.warning(
- "GCU/subagent node '%s' is orphaned (no edges, "
- "not in any parent's sub_agents) — removing it.",
- oid,
- )
- topology_corrections.append(
- f"GCU node '{oid}' was orphaned (no edges, not "
- f"assigned as a sub-agent of any parent node) — "
- f"removed. Add it to a parent node's sub_agents "
- f"list and re-save the draft."
- )
- validated_nodes[:] = [n for n in validated_nodes if n["id"] not in set(orphaned_ids)]
- node_by_id_v = {n["id"]: n for n in validated_nodes}
-
- # Synthesize visual edges for sub-agents that are referenced in
- # a parent's sub_agents list but have no connecting edge yet.
- node_id_set = {n["id"] for n in validated_nodes}
- existing_edge_pairs = {(e["source"], e["target"]) for e in validated_edges}
- edge_counter = len(validated_edges)
- for n in validated_nodes:
- for sa_id in n.get("sub_agents") or []:
- if sa_id not in node_id_set:
- continue
- if (n["id"], sa_id) not in existing_edge_pairs:
- validated_edges.append(
- {
- "id": f"edge-subagent-{edge_counter}",
- "source": n["id"],
- "target": sa_id,
- "condition": "always",
- "description": "sub-agent delegation",
- "label": "delegate",
- }
- )
- edge_counter += 1
- existing_edge_pairs.add((n["id"], sa_id))
- if (sa_id, n["id"]) not in existing_edge_pairs:
- validated_edges.append(
- {
- "id": f"edge-subagent-{edge_counter}",
- "source": sa_id,
- "target": n["id"],
- "condition": "always",
- "description": "sub-agent report back",
- "label": "report",
- }
- )
- edge_counter += 1
- existing_edge_pairs.add((sa_id, n["id"]))
# ── Validate graph connectivity ─────────────────────────────
# Every node must be reachable from the entry node. Disconnected
@@ -1928,7 +1590,9 @@ def register_queen_lifecycle_tools(
# Worker not loaded yet — resolve from draft name
draft_name = draft.get("agent_name", "")
if draft_name:
- candidate = Path("exports") / draft_name
+ from framework.config import COLONIES_DIR
+
+ candidate = COLONIES_DIR / draft_name
if candidate.is_dir():
save_path = candidate
_save_flowchart_file(
@@ -2195,12 +1859,12 @@ def register_queen_lifecycle_tools(
# Explicit user confirmation is required before transitioning from planning
# to building. This tool records that confirmation and proceeds.
- async def confirm_and_build() -> str:
- """Confirm the draft and transition from planning to building phase.
+ async def confirm_and_build(*, agent_name: str | None = None) -> str:
+ """Confirm the draft, create agent directory, and transition to building.
This tool should ONLY be called after the user has explicitly approved
- the draft graph design via ask_user. It gates the planning→building
- transition so the user always has a chance to review before code is written.
+ the draft graph design via ask_user. It creates the agent directory and
+ transitions to BUILDING phase. The queen then writes agent.json directly.
"""
if phase_state is None:
return json.dumps({"error": "Phase state not available."})
@@ -2238,9 +1902,14 @@ def register_queen_lifecycle_tools(
# Create agent folder early so flowchart and agent_path are available
# throughout the entire BUILDING phase.
- _agent_name = phase_state.draft_graph.get("agent_name", "").strip()
+ _agent_name = (
+ agent_name
+ or phase_state.draft_graph.get("agent_name", "").strip()
+ )
if _agent_name:
- _agent_folder = Path("exports") / _agent_name
+ from framework.config import COLONIES_DIR
+
+ _agent_folder = COLONIES_DIR / _agent_name
_agent_folder.mkdir(parents=True, exist_ok=True)
_save_flowchart_file(_agent_folder, original_copy, fmap)
phase_state.agent_path = str(_agent_folder)
@@ -2271,20 +1940,30 @@ def register_queen_lifecycle_tools(
f"{subagent_count} sub-agent node(s) dissolved into predecessor sub_agents"
)
+ # Transition to BUILDING phase
+ await phase_state.switch_to_building(source="tool")
+ _update_meta_json(
+ session_manager, manager_session_id, {"phase": "building"}
+ )
+ phase_state.build_confirmed = False
+
+ # No injection here -- the return message tells the queen what to do.
+ # Injecting would queue a BUILDING message that drains AFTER the queen
+ # may have already moved to STAGING via load_built_agent.
+
return json.dumps(
{
"status": "confirmed",
- "agent_name": phase_state.draft_graph.get("agent_name", ""),
+ "phase": "building",
+ "agent_name": _agent_name,
+ "agent_path": str(_agent_folder),
"planning_nodes_dissolved": dissolved_count,
- "decision_nodes_dissolved": decision_count,
- "subagent_nodes_dissolved": subagent_count,
"flowchart_map": fmap,
"message": (
- "User has confirmed the design. "
+ "Design confirmed and directory created. "
+ ("; ".join(dissolution_parts) + ". " if dissolution_parts else "")
- + "Now call initialize_and_build_agent(agent_name, nodes) to scaffold the "
- "agent package and start building. The draft metadata will be "
- "used to pre-populate the generated files."
+ + f"Now write the complete agent config to {_agent_folder}/agent.json "
+ "using write_file(). Include all system prompts, tools, edges, and goal."
),
}
)
@@ -2292,180 +1971,30 @@ def register_queen_lifecycle_tools(
_confirm_tool = Tool(
name="confirm_and_build",
description=(
- "Confirm the draft graph design and approve transition to building phase. "
+ "Confirm the draft graph design, create agent directory, and transition to building phase. "
"ONLY call this after the user has explicitly approved the design via ask_user. "
- "After confirmation, call initialize_and_build_agent() to scaffold and build."
+ "After confirmation, write the complete agent.json using write_file()."
),
- parameters={"type": "object", "properties": {}},
+ parameters={
+ "type": "object",
+ "properties": {
+ "agent_name": {
+ "type": "string",
+ "description": "Snake_case name for the agent (e.g. 'linkedin_outreach'). "
+ "If omitted, uses the name from save_agent_draft().",
+ },
+ },
+ },
)
registry.register(
"confirm_and_build",
_confirm_tool,
- lambda inputs: confirm_and_build(),
+ lambda inputs: confirm_and_build(
+ agent_name=inputs.get("agent_name"),
+ ),
)
tools_registered += 1
- # --- initialize_and_build_agent wrapper (Planning → Building) -------------
- # With agent_name: scaffold a new agent via MCP tool, then switch to building.
- # Without agent_name: just switch to building (for fixing an existing loaded agent).
-
- _existing_init = registry._tools.get("initialize_and_build_agent")
- if _existing_init is not None:
- _orig_init_executor = _existing_init.executor
-
- async def initialize_and_build_agent_wrapper(inputs: dict) -> str:
- """Wrapper: scaffold or just switch to building phase."""
- agent_name = (inputs.get("agent_name") or "").strip()
-
- # Gate: when in planning phase and creating a new agent,
- # require the user to have confirmed the draft first.
- if (
- agent_name
- and phase_state is not None
- and phase_state.phase == "planning"
- and not phase_state.build_confirmed
- ):
- if phase_state.draft_graph is None:
- return json.dumps(
- {
- "error": (
- "Cannot transition to building without a draft. "
- "Call save_agent_draft() first to create a visual draft of the "
- "graph, present it to the user for review, then call "
- "confirm_and_build() after the user approves."
- )
- }
- )
- return json.dumps(
- {
- "error": (
- "The user has not confirmed the draft design yet. "
- "Present the draft to the user and call ask_user() to get "
- "their approval. Then call confirm_and_build() before "
- "calling initialize_and_build_agent()."
- )
- }
- )
-
- # No agent_name → try to fall back to the session's current agent,
- # or fail with actionable guidance.
- if not agent_name:
- # Try to resolve agent_name from the current session
- fallback_path = getattr(session, "worker_path", None)
- if fallback_path is not None:
- agent_name = Path(fallback_path).name
- else:
- # Server path: check SessionManager
- if session_manager is not None and manager_session_id:
- srv_session = session_manager.get_session(manager_session_id)
- if srv_session and getattr(srv_session, "worker_path", None):
- fallback_path = srv_session.worker_path
- agent_name = Path(fallback_path).name
-
- if not agent_name:
- return json.dumps(
- {
- "error": (
- "No agent_name provided and no agent loaded in this session. "
- "To fix: call list_agents() to find the agent name, then call "
- "initialize_and_build_agent(agent_name='') to scaffold it."
- )
- }
- )
-
- # Fall back succeeded — switch to building without scaffolding
- logger.info(
- "initialize_and_build_agent: no agent_name provided, "
- "falling back to session agent '%s'",
- agent_name,
- )
- if phase_state is not None:
- if fallback_path:
- phase_state.agent_path = str(fallback_path)
- await phase_state.switch_to_building(source="tool")
- _update_meta_json(session_manager, manager_session_id, {"phase": "building"})
- if phase_state.inject_notification:
- await phase_state.inject_notification(
- "[PHASE CHANGE] Switched to BUILDING phase. "
- "Start implementing the fix now."
- )
- return json.dumps(
- {
- "status": "editing",
- "phase": "building",
- "agent_name": agent_name,
- "warning": (
- f"No agent_name provided — using session agent '{agent_name}'. "
- f"Agent files are at exports/{agent_name}/."
- ),
- "message": (
- "Switched to BUILDING phase. Full coding tools restored. "
- "Implement the fix, then call load_built_agent(path) to reload."
- ),
- }
- )
-
- # Has agent_name → scaffold via MCP tool.
- # If a draft exists, pass its metadata so the scaffolder can
- # pre-populate descriptions, goals, and node metadata.
- scaffold_inputs = dict(inputs)
- draft = phase_state.draft_graph if phase_state else None
- if draft and draft.get("agent_name") == agent_name:
- scaffold_inputs["_draft"] = draft
-
- result = _orig_init_executor(scaffold_inputs)
- # Handle both sync and async executors
- if asyncio.iscoroutine(result) or asyncio.isfuture(result):
- result = await result
- # If result is a ToolResult, extract the text content
- result_str = str(result)
- if hasattr(result, "content"):
- result_str = str(result.content)
- try:
- parsed = json.loads(result_str)
- if parsed.get("success", True):
- if phase_state is not None:
- # Set agent_path so the frontend can query credentials
- phase_state.agent_path = phase_state.agent_path or str(
- Path("exports") / agent_name
- )
- await phase_state.switch_to_building(source="tool")
- _update_meta_json(
- session_manager, manager_session_id, {"phase": "building"}
- )
- # Reset draft state after successful scaffolding
- phase_state.build_confirmed = False
- # Persist flowchart now that the agent folder exists
- if phase_state.original_draft_graph and phase_state.flowchart_map:
- _save_flowchart_file(
- Path("exports") / agent_name,
- phase_state.original_draft_graph,
- phase_state.flowchart_map,
- )
- # Inject a continuation message so the queen starts
- # building immediately instead of blocking for user input.
- draft_hint = ""
- if draft:
- draft_hint = (
- " The draft metadata has been used to pre-populate "
- "node descriptions, goal, and success criteria. "
- "Review and refine the generated files."
- )
- if phase_state.inject_notification:
- await phase_state.inject_notification(
- "[PHASE CHANGE] Agent scaffolded and switched to BUILDING phase. "
- "Start implementing the agent nodes now." + draft_hint
- )
- except (json.JSONDecodeError, KeyError, TypeError):
- pass
- return result_str
-
- registry.register(
- "initialize_and_build_agent",
- _existing_init.tool,
- lambda inputs: initialize_and_build_agent_wrapper(inputs),
- )
-
# --- stop_graph (Running → Staging) --------------------------------------
async def stop_graph_to_staging() -> str:
@@ -2554,7 +2083,7 @@ def register_queen_lifecycle_tools(
return s
def _build_preamble(
- runtime: AgentRuntime,
+ runtime: AgentHost,
) -> dict[str, Any]:
"""Build the lightweight preamble: status, node, elapsed, iteration.
@@ -2712,9 +2241,9 @@ def register_queen_lifecycle_tools(
return "\n".join(lines)
- async def _format_memory(runtime: AgentRuntime) -> str:
+ async def _format_memory(runtime: AgentHost) -> str:
"""Format the worker's shared buffer snapshot and recent changes."""
- from framework.runtime.shared_state import IsolationLevel
+ from framework.host.shared_state import IsolationLevel
lines = []
active_streams = runtime.get_active_streams()
@@ -2865,7 +2394,7 @@ def register_queen_lifecycle_tools(
header = f"{total} issue(s) detected."
return header + "\n\n" + "\n".join(lines)
- async def _format_progress(runtime: AgentRuntime, bus: EventBus) -> str:
+ async def _format_progress(runtime: AgentHost, bus: EventBus) -> str:
"""Format goal progress, token consumption, and execution outcomes."""
lines = []
@@ -2921,7 +2450,7 @@ def register_queen_lifecycle_tools(
return "\n".join(lines)
def _build_full_json(
- runtime: AgentRuntime,
+ runtime: AgentHost,
bus: EventBus,
preamble: dict[str, Any],
last_n: int,
@@ -3475,50 +3004,59 @@ def register_queen_lifecycle_tools(
if not resolved_path.exists():
return json.dumps({"error": f"Agent path does not exist: {agent_path}"})
- # Pre-check: verify the module exports goal/nodes/edges before
- # attempting the full load. This gives the queen an actionable
- # error message instead of a cryptic ImportError or TypeError.
- try:
- import importlib
- import sys as _sys
+ # Pre-check: verify the agent can be loaded before attempting
+ # the full session load. Declarative (agent.json) agents skip
+ # the Python import check since AgentRunner.load() handles them.
+ _has_yaml = (resolved_path / "agent.json").exists()
+ if not _has_yaml:
+ # Legacy Python agent: verify module exports goal/nodes/edges
+ try:
+ import importlib
+ import sys as _sys
- pkg_name = resolved_path.name
- parent_dir = str(resolved_path.resolve().parent)
- # Temporarily put parent on sys.path for import
- if parent_dir not in _sys.path:
- _sys.path.insert(0, parent_dir)
- # Evict stale cached modules
- stale = [n for n in _sys.modules if n == pkg_name or n.startswith(f"{pkg_name}.")]
- for n in stale:
- del _sys.modules[n]
+ pkg_name = resolved_path.name
+ parent_dir = str(resolved_path.resolve().parent)
+ if parent_dir not in _sys.path:
+ _sys.path.insert(0, parent_dir)
+ stale = [
+ n for n in _sys.modules
+ if n == pkg_name or n.startswith(f"{pkg_name}.")
+ ]
+ for n in stale:
+ del _sys.modules[n]
- mod = importlib.import_module(pkg_name)
- missing_attrs = [
- attr for attr in ("goal", "nodes", "edges") if getattr(mod, attr, None) is None
- ]
- if missing_attrs:
+ mod = importlib.import_module(pkg_name)
+ missing_attrs = [
+ attr
+ for attr in ("goal", "nodes", "edges")
+ if getattr(mod, attr, None) is None
+ ]
+ if missing_attrs:
+ return json.dumps(
+ {
+ "error": (
+ f"Agent module '{pkg_name}' is missing module-level "
+ f"attributes: {', '.join(missing_attrs)}. "
+ f"Fix: in {pkg_name}/__init__.py, add "
+ f"'from .agent import {', '.join(missing_attrs)}' "
+ f"so that 'import {pkg_name}' exposes them at "
+ f"package level."
+ )
+ }
+ )
+ except Exception as pre_err:
return json.dumps(
{
"error": (
- f"Agent module '{pkg_name}' is missing module-level "
- f"attributes: {', '.join(missing_attrs)}. "
- f"Fix: in {pkg_name}/__init__.py, add "
- f"'from .agent import {', '.join(missing_attrs)}' "
- f"so that 'import {pkg_name}' exposes them at package level."
+ f"Failed to import agent module "
+ f"'{resolved_path.name}': {pre_err}. "
+ f"Fix: ensure {resolved_path.name}/__init__.py "
+ f"exists and can be imported without errors "
+ f"(check syntax, missing dependencies, and "
+ f"relative imports)."
)
}
)
- except Exception as pre_err:
- return json.dumps(
- {
- "error": (
- f"Failed to import agent module '{resolved_path.name}': {pre_err}. "
- f"Fix: ensure {resolved_path.name}/__init__.py exists and can be "
- f"imported without errors (check syntax, missing dependencies, "
- f"and relative imports)."
- )
- }
- )
try:
updated_session = await session_manager.load_graph(
@@ -3635,7 +3173,7 @@ def register_queen_lifecycle_tools(
description=(
"Load a newly built agent as the worker in this session. "
"After building and validating an agent, call this with the agent's "
- "path (e.g. 'exports/my_agent') to make it available immediately. "
+ "path (e.g. '~/.hive/colonies/my_agent') to make it available immediately. "
"The user will see the agent's graph and can interact with it."
),
parameters={
@@ -3643,7 +3181,7 @@ def register_queen_lifecycle_tools(
"properties": {
"agent_path": {
"type": "string",
- "description": ("Path to the agent directory (e.g. 'exports/my_agent')"),
+ "description": ("Path to the agent directory (e.g. '~/.hive/colonies/my_agent')"),
},
},
"required": ["agent_path"],
@@ -3795,7 +3333,7 @@ def register_queen_lifecycle_tools(
if tdef is None:
if trigger_type and trigger_config:
- from framework.runtime.triggers import TriggerDefinition
+ from framework.host.triggers import TriggerDefinition
tdef = TriggerDefinition(
id=trigger_id,
diff --git a/core/framework/tools/session_graph_tools.py b/core/framework/tools/session_graph_tools.py
index 8b068770..aadd3557 100644
--- a/core/framework/tools/session_graph_tools.py
+++ b/core/framework/tools/session_graph_tools.py
@@ -21,13 +21,13 @@ import logging
from typing import TYPE_CHECKING
if TYPE_CHECKING:
- from framework.runner.tool_registry import ToolRegistry
- from framework.runtime.agent_runtime import AgentRuntime
+ from framework.loader.tool_registry import ToolRegistry
+ from framework.host.agent_host import AgentHost
logger = logging.getLogger(__name__)
-def register_graph_tools(registry: ToolRegistry, runtime: AgentRuntime) -> int:
+def register_graph_tools(registry: ToolRegistry, runtime: AgentHost) -> int:
"""Register graph lifecycle tools bound to *runtime*.
Returns the number of tools registered.
@@ -41,12 +41,13 @@ def register_graph_tools(registry: ToolRegistry, runtime: AgentRuntime) -> int:
async def load_agent(agent_path: str) -> str:
"""Load an agent graph from disk into the running session.
- The agent is imported from *agent_path* (a directory containing
- ``agent.py``). Its graph, goal, and entry points are registered
- as a secondary graph on the runtime. Returns a JSON summary.
+ The agent is loaded from *agent_path* (a directory containing
+ ``agent.json`` or ``agent.py``). Its graph, goal, and entry points
+ are registered as a secondary graph on the runtime. Returns a JSON
+ summary.
"""
- from framework.runner.runner import AgentRunner
- from framework.runtime.execution_stream import EntryPointSpec
+ from framework.loader.agent_loader import AgentLoader
+ from framework.host.execution_manager import EntryPointSpec
from framework.server.app import validate_agent_path
try:
@@ -57,7 +58,7 @@ def register_graph_tools(registry: ToolRegistry, runtime: AgentRuntime) -> int:
return json.dumps({"error": f"Agent path does not exist: {agent_path}"})
try:
- runner = AgentRunner.load(path)
+ runner = AgentLoader.load(path)
except Exception as exc:
return json.dumps({"error": f"Failed to load agent: {exc}"})
@@ -105,7 +106,7 @@ def register_graph_tools(registry: ToolRegistry, runtime: AgentRuntime) -> int:
"properties": {
"agent_path": {
"type": "string",
- "description": "Path to the agent directory (containing agent.py)",
+ "description": "Path to the agent directory",
},
},
"required": ["agent_path"],
diff --git a/core/framework/tools/worker_monitoring_tools.py b/core/framework/tools/worker_monitoring_tools.py
index 9d78708b..d1382020 100644
--- a/core/framework/tools/worker_monitoring_tools.py
+++ b/core/framework/tools/worker_monitoring_tools.py
@@ -23,7 +23,7 @@ from pathlib import Path
from typing import TYPE_CHECKING
if TYPE_CHECKING:
- from framework.runner.tool_registry import ToolRegistry
+ from framework.loader.tool_registry import ToolRegistry
logger = logging.getLogger(__name__)
diff --git a/core/framework/tracker/__init__.py b/core/framework/tracker/__init__.py
new file mode 100644
index 00000000..6cc52d06
--- /dev/null
+++ b/core/framework/tracker/__init__.py
@@ -0,0 +1,3 @@
+"""Tracker layer -- decision/run logging for Builder analysis."""
+
+from framework.tracker.decision_tracker import DecisionTracker # noqa: F401
diff --git a/core/framework/runtime/core.py b/core/framework/tracker/decision_tracker.py
similarity index 99%
rename from core/framework/runtime/core.py
rename to core/framework/tracker/decision_tracker.py
index c61e8d96..5f134b7a 100644
--- a/core/framework/runtime/core.py
+++ b/core/framework/tracker/decision_tracker.py
@@ -21,7 +21,7 @@ from framework.storage.concurrent import ConcurrentStorage
logger = logging.getLogger(__name__)
-class Runtime:
+class DecisionTracker:
"""
The runtime environment that agents execute within.
diff --git a/core/framework/runtime/llm_debug_logger.py b/core/framework/tracker/llm_debug_logger.py
similarity index 100%
rename from core/framework/runtime/llm_debug_logger.py
rename to core/framework/tracker/llm_debug_logger.py
diff --git a/core/framework/runtime/runtime_log_schemas.py b/core/framework/tracker/runtime_log_schemas.py
similarity index 100%
rename from core/framework/runtime/runtime_log_schemas.py
rename to core/framework/tracker/runtime_log_schemas.py
diff --git a/core/framework/runtime/runtime_log_store.py b/core/framework/tracker/runtime_log_store.py
similarity index 99%
rename from core/framework/runtime/runtime_log_store.py
rename to core/framework/tracker/runtime_log_store.py
index 7be0942c..b3f98db2 100644
--- a/core/framework/runtime/runtime_log_store.py
+++ b/core/framework/tracker/runtime_log_store.py
@@ -29,7 +29,7 @@ import logging
from datetime import UTC, datetime
from pathlib import Path
-from framework.runtime.runtime_log_schemas import (
+from framework.tracker.runtime_log_schemas import (
NodeDetail,
NodeStepLog,
RunDetailsLog,
diff --git a/core/framework/runtime/runtime_logger.py b/core/framework/tracker/runtime_logger.py
similarity index 98%
rename from core/framework/runtime/runtime_logger.py
rename to core/framework/tracker/runtime_logger.py
index f816131c..0da112fa 100644
--- a/core/framework/runtime/runtime_logger.py
+++ b/core/framework/tracker/runtime_logger.py
@@ -27,13 +27,13 @@ from datetime import UTC, datetime
from typing import Any
from framework.observability import get_trace_context
-from framework.runtime.runtime_log_schemas import (
+from framework.tracker.runtime_log_schemas import (
NodeDetail,
NodeStepLog,
RunSummaryLog,
ToolCallLog,
)
-from framework.runtime.runtime_log_store import RuntimeLogStore
+from framework.tracker.runtime_log_store import RuntimeLogStore
logger = logging.getLogger(__name__)
diff --git a/examples/templates/competitive_intel_agent/__main__.py b/examples/templates/competitive_intel_agent/__main__.py
index 50dfc8f6..286d7e97 100644
--- a/examples/templates/competitive_intel_agent/__main__.py
+++ b/examples/templates/competitive_intel_agent/__main__.py
@@ -121,10 +121,10 @@ def tui(verbose: bool, debug: bool) -> None:
sys.exit(1)
from framework.llm import LiteLLMProvider
- from framework.runner.tool_registry import ToolRegistry
- from framework.runtime.agent_runtime import create_agent_runtime
- from framework.runtime.event_bus import EventBus
- from framework.runtime.execution_stream import EntryPointSpec
+ from framework.loader.tool_registry import ToolRegistry
+ from framework.host.agent_host import AgentHost
+ from framework.host.event_bus import EventBus
+ from framework.host.execution_manager import EntryPointSpec
async def run_with_tui() -> None:
agent = CompetitiveIntelAgent()
@@ -150,7 +150,7 @@ def tui(verbose: bool, debug: bool) -> None:
tool_executor = agent._tool_registry.get_executor()
graph = agent._build_graph()
- runtime = create_agent_runtime(
+ runtime = AgentHost(
graph=graph,
goal=agent.goal,
storage_path=storage_path,
diff --git a/examples/templates/competitive_intel_agent/agent.py b/examples/templates/competitive_intel_agent/agent.py
index 7ae2ea3f..879cab88 100644
--- a/examples/templates/competitive_intel_agent/agent.py
+++ b/examples/templates/competitive_intel_agent/agent.py
@@ -1,7 +1,7 @@
"""Agent graph construction for Competitive Intelligence Agent."""
from typing import Any, TYPE_CHECKING
-from framework.graph import (
+from framework.orchestrator import (
EdgeSpec,
EdgeCondition,
Goal,
@@ -9,12 +9,12 @@ from framework.graph import (
Constraint,
NodeSpec,
)
-from framework.graph.edge import GraphSpec
-from framework.graph.executor import ExecutionResult, GraphExecutor
-from framework.runtime.event_bus import EventBus
-from framework.runtime.core import Runtime
+from framework.orchestrator.edge import GraphSpec
+from framework.orchestrator.orchestrator import ExecutionResult, Orchestrator
+from framework.host.event_bus import EventBus
+from framework.tracker.decision_tracker import DecisionTracker as Runtime
from framework.llm import LiteLLMProvider
-from framework.runner.tool_registry import ToolRegistry
+from framework.loader.tool_registry import ToolRegistry
from .config import default_config, metadata, RuntimeConfig
from .nodes import (
@@ -188,7 +188,7 @@ class CompetitiveIntelAgent:
self.entry_points = entry_points
self.pause_nodes = pause_nodes
self.terminal_nodes = terminal_nodes
- self._executor: GraphExecutor | None = None
+ self._executor: Orchestrator | None = None
self._graph: GraphSpec | None = None
self._event_bus: EventBus | None = None
self._tool_registry: ToolRegistry | None = None
@@ -219,12 +219,12 @@ class CompetitiveIntelAgent:
},
)
- def _setup(self) -> GraphExecutor:
+ def _setup(self) -> Orchestrator:
"""
Set up the executor with all components (runtime, LLM, tools).
Returns:
- An initialized GraphExecutor instance.
+ An initialized Orchestrator instance.
"""
from pathlib import Path
@@ -250,7 +250,7 @@ class CompetitiveIntelAgent:
self._graph = self._build_graph()
runtime = Runtime(storage_path)
- self._executor = GraphExecutor(
+ self._executor = Orchestrator(
runtime=runtime,
llm=llm,
tools=tools,
diff --git a/examples/templates/competitive_intel_agent/nodes/__init__.py b/examples/templates/competitive_intel_agent/nodes/__init__.py
index 5d1b716d..449e6a64 100644
--- a/examples/templates/competitive_intel_agent/nodes/__init__.py
+++ b/examples/templates/competitive_intel_agent/nodes/__init__.py
@@ -1,6 +1,6 @@
"""Node definitions for Competitive Intelligence Agent."""
-from framework.graph import NodeSpec
+from framework.orchestrator import NodeSpec
# Node 1: Intake (client-facing)
intake_node: NodeSpec = NodeSpec(
diff --git a/examples/templates/deep_research_agent/__main__.py b/examples/templates/deep_research_agent/__main__.py
index 48c4f81a..adcfb4d4 100644
--- a/examples/templates/deep_research_agent/__main__.py
+++ b/examples/templates/deep_research_agent/__main__.py
@@ -74,10 +74,10 @@ def tui(verbose, debug):
from pathlib import Path
from framework.llm import LiteLLMProvider
- from framework.runner.tool_registry import ToolRegistry
- from framework.runtime.agent_runtime import create_agent_runtime
- from framework.runtime.event_bus import EventBus
- from framework.runtime.execution_stream import EntryPointSpec
+ from framework.loader.tool_registry import ToolRegistry
+ from framework.host.agent_host import AgentHost
+ from framework.host.event_bus import EventBus
+ from framework.host.execution_manager import EntryPointSpec
async def run_with_tui():
agent = DeepResearchAgent()
@@ -103,7 +103,7 @@ def tui(verbose, debug):
tool_executor = agent._tool_registry.get_executor()
graph = agent._build_graph()
- runtime = create_agent_runtime(
+ runtime = AgentHost(
graph=graph,
goal=agent.goal,
storage_path=storage_path,
diff --git a/examples/templates/deep_research_agent/agent.py b/examples/templates/deep_research_agent/agent.py
index 0ef6df69..d95b8a5e 100644
--- a/examples/templates/deep_research_agent/agent.py
+++ b/examples/templates/deep_research_agent/agent.py
@@ -2,14 +2,14 @@
from pathlib import Path
-from framework.graph import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
-from framework.graph.edge import GraphSpec
-from framework.graph.executor import ExecutionResult
-from framework.graph.checkpoint_config import CheckpointConfig
+from framework.orchestrator import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
+from framework.orchestrator.edge import GraphSpec
+from framework.orchestrator.orchestrator import ExecutionResult
+from framework.orchestrator.checkpoint_config import CheckpointConfig
from framework.llm import LiteLLMProvider
-from framework.runner.tool_registry import ToolRegistry
-from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
-from framework.runtime.execution_stream import EntryPointSpec
+from framework.loader.tool_registry import ToolRegistry
+from framework.host.agent_host import AgentHost
+from framework.host.execution_manager import EntryPointSpec
from .config import default_config, metadata
from .nodes import (
@@ -244,7 +244,7 @@ class DeepResearchAgent:
)
]
- self._agent_runtime = create_agent_runtime(
+ self._agent_runtime = AgentHost(
graph=self._graph,
goal=self.goal,
storage_path=self._storage_path,
diff --git a/examples/templates/deep_research_agent/nodes/__init__.py b/examples/templates/deep_research_agent/nodes/__init__.py
index 9350f14d..00a7bbb5 100644
--- a/examples/templates/deep_research_agent/nodes/__init__.py
+++ b/examples/templates/deep_research_agent/nodes/__init__.py
@@ -1,6 +1,6 @@
"""Node definitions for Deep Research Agent."""
-from framework.graph import NodeSpec
+from framework.orchestrator import NodeSpec
# Node 1: Intake (client-facing)
# Brief conversation to clarify what the user wants researched.
diff --git a/examples/templates/email_inbox_management/__main__.py b/examples/templates/email_inbox_management/__main__.py
index d75b3e3c..58a22b70 100644
--- a/examples/templates/email_inbox_management/__main__.py
+++ b/examples/templates/email_inbox_management/__main__.py
@@ -83,10 +83,10 @@ def tui(mock, verbose, debug):
from pathlib import Path
from framework.llm import LiteLLMProvider
- from framework.runner.tool_registry import ToolRegistry
- from framework.runtime.agent_runtime import create_agent_runtime
- from framework.runtime.event_bus import EventBus
- from framework.runtime.execution_stream import EntryPointSpec
+ from framework.loader.tool_registry import ToolRegistry
+ from framework.host.agent_host import AgentHost
+ from framework.host.event_bus import EventBus
+ from framework.host.execution_manager import EntryPointSpec
async def run_with_tui():
agent = InboxManagementAgent()
@@ -118,7 +118,7 @@ def tui(mock, verbose, debug):
tool_executor = agent._tool_registry.get_executor()
graph = agent._build_graph()
- runtime = create_agent_runtime(
+ runtime = AgentHost(
graph=graph,
goal=agent.goal,
storage_path=storage_path,
diff --git a/examples/templates/email_inbox_management/agent.py b/examples/templates/email_inbox_management/agent.py
index 97df181d..ab805eb6 100644
--- a/examples/templates/email_inbox_management/agent.py
+++ b/examples/templates/email_inbox_management/agent.py
@@ -2,15 +2,15 @@
from pathlib import Path
-from framework.graph import EdgeCondition, EdgeSpec, Goal, SuccessCriterion, Constraint
-from framework.graph.checkpoint_config import CheckpointConfig
-from framework.graph.edge import GraphSpec
-from framework.graph.executor import ExecutionResult, GraphExecutor
+from framework.orchestrator import EdgeCondition, EdgeSpec, Goal, SuccessCriterion, Constraint
+from framework.orchestrator.checkpoint_config import CheckpointConfig
+from framework.orchestrator.edge import GraphSpec
+from framework.orchestrator.orchestrator import ExecutionResult, Orchestrator
from framework.llm import LiteLLMProvider
-from framework.runner.tool_registry import ToolRegistry
-from framework.runtime.agent_runtime import create_agent_runtime
-from framework.runtime.event_bus import EventBus
-from framework.runtime.execution_stream import EntryPointSpec
+from framework.loader.tool_registry import ToolRegistry
+from framework.host.agent_host import AgentHost
+from framework.host.event_bus import EventBus
+from framework.host.execution_manager import EntryPointSpec
from .config import default_config, metadata
from .nodes import (
@@ -190,7 +190,7 @@ class EmailInboxManagementAgent:
self.entry_points = entry_points
self.pause_nodes = pause_nodes
self.terminal_nodes = terminal_nodes
- self._executor: GraphExecutor | None = None
+ self._executor: Orchestrator | None = None
self._graph: GraphSpec | None = None
self._event_bus: EventBus | None = None
self._tool_registry: ToolRegistry | None = None
@@ -264,7 +264,7 @@ class EmailInboxManagementAgent:
),
]
- self._agent_runtime = create_agent_runtime(
+ self._agent_runtime = AgentHost(
graph=self._graph,
goal=self.goal,
storage_path=self._storage_path,
diff --git a/examples/templates/email_inbox_management/nodes/__init__.py b/examples/templates/email_inbox_management/nodes/__init__.py
index 89a56a09..407956c0 100644
--- a/examples/templates/email_inbox_management/nodes/__init__.py
+++ b/examples/templates/email_inbox_management/nodes/__init__.py
@@ -1,6 +1,6 @@
"""Node definitions for Inbox Management Agent."""
-from framework.graph import NodeSpec
+from framework.orchestrator import NodeSpec
# Node 1: Intake (client-facing)
# Receives user rules and max_emails, confirms understanding with user.
diff --git a/examples/templates/email_inbox_management/tools.py b/examples/templates/email_inbox_management/tools.py
index 27370650..5959bc7b 100644
--- a/examples/templates/email_inbox_management/tools.py
+++ b/examples/templates/email_inbox_management/tools.py
@@ -15,7 +15,7 @@ from pathlib import Path
import httpx
from framework.llm.provider import Tool, ToolResult, ToolUse
-from framework.runner.tool_registry import _execution_context
+from framework.loader.tool_registry import _execution_context
logger = logging.getLogger(__name__)
@@ -102,7 +102,7 @@ def _get_data_dir() -> str:
ctx = _execution_context.get()
if not ctx or "data_dir" not in ctx:
raise RuntimeError(
- "data_dir not set in execution context. Is the tool running inside a GraphExecutor?"
+ "data_dir not set in execution context. Is the tool running inside a Orchestrator?"
)
return ctx["data_dir"]
diff --git a/examples/templates/email_reply_agent/__main__.py b/examples/templates/email_reply_agent/__main__.py
index 9858c770..4fd4086d 100644
--- a/examples/templates/email_reply_agent/__main__.py
+++ b/examples/templates/email_reply_agent/__main__.py
@@ -51,9 +51,9 @@ def tui():
from framework.tui.app import AdenTUI
from framework.llm import LiteLLMProvider
- from framework.runner.tool_registry import ToolRegistry
- from framework.runtime.agent_runtime import create_agent_runtime
- from framework.runtime.execution_stream import EntryPointSpec
+ from framework.loader.tool_registry import ToolRegistry
+ from framework.host.agent_host import AgentHost
+ from framework.host.execution_manager import EntryPointSpec
async def run_tui():
agent = EmailReplyAgent()
@@ -68,7 +68,7 @@ def tui():
api_key=agent.config.api_key,
api_base=agent.config.api_base,
)
- runtime = create_agent_runtime(
+ runtime = AgentHost(
graph=agent._build_graph(),
goal=agent.goal,
storage_path=storage,
diff --git a/examples/templates/email_reply_agent/agent.py b/examples/templates/email_reply_agent/agent.py
index 03448409..434683d6 100644
--- a/examples/templates/email_reply_agent/agent.py
+++ b/examples/templates/email_reply_agent/agent.py
@@ -2,14 +2,14 @@
from pathlib import Path
-from framework.graph import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
-from framework.graph.edge import GraphSpec
-from framework.graph.executor import ExecutionResult
-from framework.graph.checkpoint_config import CheckpointConfig
+from framework.orchestrator import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
+from framework.orchestrator.edge import GraphSpec
+from framework.orchestrator.orchestrator import ExecutionResult
+from framework.orchestrator.checkpoint_config import CheckpointConfig
from framework.llm import LiteLLMProvider
-from framework.runner.tool_registry import ToolRegistry
-from framework.runtime.agent_runtime import create_agent_runtime
-from framework.runtime.execution_stream import EntryPointSpec
+from framework.loader.tool_registry import ToolRegistry
+from framework.host.agent_host import AgentHost
+from framework.host.execution_manager import EntryPointSpec
from .config import default_config, metadata
from .nodes import intake_node, search_node, confirm_draft_node
@@ -101,7 +101,7 @@ entry_points = {"start": "intake"}
pause_nodes = []
terminal_nodes = []
-# Module-level vars read by AgentRunner.load()
+# Module-level vars read by AgentLoader.load()
conversation_mode = "continuous"
identity_prompt = "You are a helpful email reply assistant that filters unreplied emails and sends personalized responses."
loop_config = {
@@ -159,7 +159,7 @@ class EmailReplyAgent:
tools = list(self._tool_registry.get_tools().values())
tool_executor = self._tool_registry.get_executor()
self._graph = self._build_graph()
- self._agent_runtime = create_agent_runtime(
+ self._agent_runtime = AgentHost(
graph=self._graph,
goal=self.goal,
storage_path=self._storage_path,
diff --git a/examples/templates/email_reply_agent/nodes/__init__.py b/examples/templates/email_reply_agent/nodes/__init__.py
index aaf69a95..71f827e9 100644
--- a/examples/templates/email_reply_agent/nodes/__init__.py
+++ b/examples/templates/email_reply_agent/nodes/__init__.py
@@ -1,6 +1,6 @@
"""Node definitions for Email Reply Agent."""
-from framework.graph import NodeSpec
+from framework.orchestrator import NodeSpec
# Node 1: Intake (client-facing)
intake_node = NodeSpec(
diff --git a/examples/templates/email_reply_agent/tests/conftest.py b/examples/templates/email_reply_agent/tests/conftest.py
index 2e9d1813..96f98ee1 100644
--- a/examples/templates/email_reply_agent/tests/conftest.py
+++ b/examples/templates/email_reply_agent/tests/conftest.py
@@ -25,6 +25,6 @@ def agent_module():
@pytest.fixture(scope="session")
def runner_loaded():
"""Load the agent through AgentRunner (structural only, no LLM needed)."""
- from framework.runner.runner import AgentRunner
+ from framework.loader.agent_loader import AgentLoader
- return AgentRunner.load(AGENT_PATH)
+ return AgentLoader.load(AGENT_PATH)
diff --git a/examples/templates/email_reply_agent/tests/test_email_reply_agent.py b/examples/templates/email_reply_agent/tests/test_email_reply_agent.py
index ec5f05c9..717c309e 100644
--- a/examples/templates/email_reply_agent/tests/test_email_reply_agent.py
+++ b/examples/templates/email_reply_agent/tests/test_email_reply_agent.py
@@ -77,7 +77,7 @@ class TestRunnerLoad:
"""Test AgentRunner can load the agent."""
def test_runner_load_succeeds(self, runner_loaded):
- """AgentRunner.load() succeeds."""
+ """AgentLoader.load() succeeds."""
assert runner_loaded is not None
def test_runner_has_goal(self, runner_loaded):
diff --git a/examples/templates/job_hunter/__main__.py b/examples/templates/job_hunter/__main__.py
index 752ae545..bdf5726d 100644
--- a/examples/templates/job_hunter/__main__.py
+++ b/examples/templates/job_hunter/__main__.py
@@ -75,10 +75,10 @@ def tui(mock, verbose, debug):
from pathlib import Path
from framework.llm import LiteLLMProvider
- from framework.runner.tool_registry import ToolRegistry
- from framework.runtime.agent_runtime import create_agent_runtime
- from framework.runtime.event_bus import EventBus
- from framework.runtime.execution_stream import EntryPointSpec
+ from framework.loader.tool_registry import ToolRegistry
+ from framework.host.agent_host import AgentHost
+ from framework.host.event_bus import EventBus
+ from framework.host.execution_manager import EntryPointSpec
async def run_with_tui():
agent = JobHunterAgent()
@@ -106,7 +106,7 @@ def tui(mock, verbose, debug):
tool_executor = agent._tool_registry.get_executor()
graph = agent._build_graph()
- runtime = create_agent_runtime(
+ runtime = AgentHost(
graph=graph,
goal=agent.goal,
storage_path=storage_path,
diff --git a/examples/templates/job_hunter/agent.py b/examples/templates/job_hunter/agent.py
index 29d37efc..6e7d9036 100644
--- a/examples/templates/job_hunter/agent.py
+++ b/examples/templates/job_hunter/agent.py
@@ -2,14 +2,14 @@
from pathlib import Path
-from framework.graph import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
-from framework.graph.edge import GraphSpec
-from framework.graph.executor import ExecutionResult
-from framework.graph.checkpoint_config import CheckpointConfig
+from framework.orchestrator import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
+from framework.orchestrator.edge import GraphSpec
+from framework.orchestrator.orchestrator import ExecutionResult
+from framework.orchestrator.checkpoint_config import CheckpointConfig
from framework.llm import LiteLLMProvider
-from framework.runner.tool_registry import ToolRegistry
-from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
-from framework.runtime.execution_stream import EntryPointSpec
+from framework.loader.tool_registry import ToolRegistry
+from framework.host.agent_host import AgentHost
+from framework.host.execution_manager import EntryPointSpec
from .config import default_config
from .nodes import (
@@ -224,7 +224,7 @@ class JobHunterAgent:
)
]
- self._agent_runtime = create_agent_runtime(
+ self._agent_runtime = AgentHost(
graph=self._graph,
goal=self.goal,
storage_path=self._storage_path,
diff --git a/examples/templates/job_hunter/nodes/__init__.py b/examples/templates/job_hunter/nodes/__init__.py
index 9d6dc619..3b9841de 100644
--- a/examples/templates/job_hunter/nodes/__init__.py
+++ b/examples/templates/job_hunter/nodes/__init__.py
@@ -1,6 +1,6 @@
"""Node definitions for Job Hunter Agent."""
-from framework.graph import NodeSpec
+from framework.orchestrator import NodeSpec
# Node 1: Intake (simple)
# Collect resume and identify strongest role types.
diff --git a/examples/templates/local_business_extractor/agent.py b/examples/templates/local_business_extractor/agent.py
index 82c1736d..822b040e 100644
--- a/examples/templates/local_business_extractor/agent.py
+++ b/examples/templates/local_business_extractor/agent.py
@@ -1,14 +1,14 @@
"""Agent graph construction for Local Business Extractor."""
from pathlib import Path
-from framework.graph import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
-from framework.graph.edge import GraphSpec
-from framework.graph.executor import ExecutionResult
-from framework.graph.checkpoint_config import CheckpointConfig
+from framework.orchestrator import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
+from framework.orchestrator.edge import GraphSpec
+from framework.orchestrator.orchestrator import ExecutionResult
+from framework.orchestrator.checkpoint_config import CheckpointConfig
from framework.llm import LiteLLMProvider
-from framework.runner.tool_registry import ToolRegistry
-from framework.runtime.agent_runtime import create_agent_runtime
-from framework.runtime.execution_stream import EntryPointSpec
+from framework.loader.tool_registry import ToolRegistry
+from framework.host.agent_host import AgentHost
+from framework.host.execution_manager import EntryPointSpec
from .config import default_config, metadata
from .nodes import map_search_gcu, extract_contacts_node, sheets_sync_node
@@ -125,7 +125,7 @@ class LocalBusinessExtractor:
tools = list(self._tool_registry.get_tools().values())
tool_executor = self._tool_registry.get_executor()
self._graph = self._build_graph()
- self._agent_runtime = create_agent_runtime(
+ self._agent_runtime = AgentHost(
graph=self._graph,
goal=self.goal,
storage_path=self._storage_path,
diff --git a/examples/templates/local_business_extractor/nodes/__init__.py b/examples/templates/local_business_extractor/nodes/__init__.py
index 26c4e16c..f0b5727f 100644
--- a/examples/templates/local_business_extractor/nodes/__init__.py
+++ b/examples/templates/local_business_extractor/nodes/__init__.py
@@ -1,6 +1,6 @@
"""Node definitions for Local Business Extractor."""
-from framework.graph import NodeSpec
+from framework.orchestrator import NodeSpec
# GCU Subagent for Google Maps
map_search_gcu = NodeSpec(
diff --git a/examples/templates/meeting_scheduler/__main__.py b/examples/templates/meeting_scheduler/__main__.py
index b6ff5493..6315d102 100644
--- a/examples/templates/meeting_scheduler/__main__.py
+++ b/examples/templates/meeting_scheduler/__main__.py
@@ -54,9 +54,9 @@ def tui():
from pathlib import Path
from framework.tui.app import AdenTUI
from framework.llm import LiteLLMProvider
- from framework.runner.tool_registry import ToolRegistry
- from framework.runtime.agent_runtime import create_agent_runtime
- from framework.runtime.execution_stream import EntryPointSpec
+ from framework.loader.tool_registry import ToolRegistry
+ from framework.host.agent_host import AgentHost
+ from framework.host.execution_manager import EntryPointSpec
async def run_tui():
agent = MeetingScheduler()
@@ -71,7 +71,7 @@ def tui():
api_key=agent.config.api_key,
api_base=agent.config.api_base,
)
- runtime = create_agent_runtime(
+ runtime = AgentHost(
graph=agent._build_graph(),
goal=agent.goal,
storage_path=storage,
diff --git a/examples/templates/meeting_scheduler/agent.py b/examples/templates/meeting_scheduler/agent.py
index f3de5e0c..ec077a33 100644
--- a/examples/templates/meeting_scheduler/agent.py
+++ b/examples/templates/meeting_scheduler/agent.py
@@ -2,14 +2,14 @@
from pathlib import Path
-from framework.graph import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
-from framework.graph.edge import GraphSpec
-from framework.graph.executor import ExecutionResult
-from framework.graph.checkpoint_config import CheckpointConfig
+from framework.orchestrator import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
+from framework.orchestrator.edge import GraphSpec
+from framework.orchestrator.orchestrator import ExecutionResult
+from framework.orchestrator.checkpoint_config import CheckpointConfig
from framework.llm import LiteLLMProvider
-from framework.runner.tool_registry import ToolRegistry
-from framework.runtime.agent_runtime import create_agent_runtime
-from framework.runtime.execution_stream import EntryPointSpec
+from framework.loader.tool_registry import ToolRegistry
+from framework.host.agent_host import AgentHost
+from framework.host.execution_manager import EntryPointSpec
from .config import default_config, metadata
from .nodes import intake_node, schedule_node, confirm_node
@@ -107,7 +107,7 @@ entry_points = {"start": "intake"}
pause_nodes = []
terminal_nodes = [] # Forever-alive
-# Module-level vars read by AgentRunner.load()
+# Module-level vars read by AgentLoader.load()
conversation_mode = "continuous"
identity_prompt = "You are a helpful meeting scheduler assistant that manages calendar availability and sends confirmations."
loop_config = {
@@ -165,7 +165,7 @@ class MeetingScheduler:
tools = list(self._tool_registry.get_tools().values())
tool_executor = self._tool_registry.get_executor()
self._graph = self._build_graph()
- self._agent_runtime = create_agent_runtime(
+ self._agent_runtime = AgentHost(
graph=self._graph,
goal=self.goal,
storage_path=self._storage_path,
diff --git a/examples/templates/meeting_scheduler/nodes/__init__.py b/examples/templates/meeting_scheduler/nodes/__init__.py
index 5ccf3dae..81394f69 100644
--- a/examples/templates/meeting_scheduler/nodes/__init__.py
+++ b/examples/templates/meeting_scheduler/nodes/__init__.py
@@ -1,6 +1,6 @@
"""Node definitions for Meeting Scheduler."""
-from framework.graph import NodeSpec
+from framework.orchestrator import NodeSpec
# Node 1: Intake (client-facing)
intake_node = NodeSpec(
diff --git a/examples/templates/meeting_scheduler/tests/conftest.py b/examples/templates/meeting_scheduler/tests/conftest.py
index d1e60437..289333c1 100644
--- a/examples/templates/meeting_scheduler/tests/conftest.py
+++ b/examples/templates/meeting_scheduler/tests/conftest.py
@@ -25,10 +25,10 @@ def agent_module():
@pytest.fixture(scope="session")
def runner_loaded():
"""Load the agent through AgentRunner (structural only, no LLM needed)."""
- from framework.runner.runner import AgentRunner
+ from framework.loader.agent_loader import AgentLoader
from framework.credentials.models import CredentialError
try:
- return AgentRunner.load(AGENT_PATH)
+ return AgentLoader.load(AGENT_PATH)
except CredentialError:
pytest.skip("Google OAuth credentials not configured")
diff --git a/examples/templates/sdr_agent/agent.py b/examples/templates/sdr_agent/agent.py
index 105cf3dc..b279ae3d 100644
--- a/examples/templates/sdr_agent/agent.py
+++ b/examples/templates/sdr_agent/agent.py
@@ -2,14 +2,14 @@
from pathlib import Path
-from framework.graph import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
-from framework.graph.checkpoint_config import CheckpointConfig
-from framework.graph.edge import AsyncEntryPointSpec, GraphSpec
-from framework.graph.executor import ExecutionResult
+from framework.orchestrator import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
+from framework.orchestrator.checkpoint_config import CheckpointConfig
+from framework.orchestrator.edge import AsyncEntryPointSpec, GraphSpec
+from framework.orchestrator.orchestrator import ExecutionResult
from framework.llm import LiteLLMProvider
-from framework.runner.tool_registry import ToolRegistry
-from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
-from framework.runtime.execution_stream import EntryPointSpec
+from framework.loader.tool_registry import ToolRegistry
+from framework.host.agent_host import AgentHost
+from framework.host.execution_manager import EntryPointSpec
from .config import default_config, metadata
from .nodes import (
@@ -265,7 +265,7 @@ class SDRAgent:
),
]
- self._agent_runtime = create_agent_runtime(
+ self._agent_runtime = AgentHost(
graph=self._graph,
goal=self.goal,
storage_path=self._storage_path,
diff --git a/examples/templates/sdr_agent/nodes/__init__.py b/examples/templates/sdr_agent/nodes/__init__.py
index cbd274f8..6de8b3e5 100644
--- a/examples/templates/sdr_agent/nodes/__init__.py
+++ b/examples/templates/sdr_agent/nodes/__init__.py
@@ -1,6 +1,6 @@
"""Node definitions for SDR Agent."""
-from framework.graph import NodeSpec
+from framework.orchestrator import NodeSpec
# Node 1: Intake (client-facing)
# Receives contact list and outreach goal, confirms with user before proceeding.
diff --git a/examples/templates/sdr_agent/tools.py b/examples/templates/sdr_agent/tools.py
index 26c24aab..4a3eb3f9 100644
--- a/examples/templates/sdr_agent/tools.py
+++ b/examples/templates/sdr_agent/tools.py
@@ -15,7 +15,7 @@ from __future__ import annotations
import json
from framework.llm.provider import Tool, ToolResult, ToolUse
-from framework.runner.tool_registry import _execution_context
+from framework.loader.tool_registry import _execution_context
# ---------------------------------------------------------------------------
# Tool definitions (auto-discovered by ToolRegistry.discover_from_module)
@@ -56,7 +56,7 @@ def _get_data_dir() -> str:
ctx = _execution_context.get()
if not ctx or "data_dir" not in ctx:
raise RuntimeError(
- "data_dir not set in execution context. Is the tool running inside a GraphExecutor?"
+ "data_dir not set in execution context. Is the tool running inside a Orchestrator?"
)
return ctx["data_dir"]
diff --git a/examples/templates/tech_news_reporter/__main__.py b/examples/templates/tech_news_reporter/__main__.py
index 711c0f23..f37e0b09 100644
--- a/examples/templates/tech_news_reporter/__main__.py
+++ b/examples/templates/tech_news_reporter/__main__.py
@@ -73,10 +73,10 @@ def tui(verbose, debug):
from pathlib import Path
from framework.llm import LiteLLMProvider
- from framework.runner.tool_registry import ToolRegistry
- from framework.runtime.agent_runtime import create_agent_runtime
- from framework.runtime.event_bus import EventBus
- from framework.runtime.execution_stream import EntryPointSpec
+ from framework.loader.tool_registry import ToolRegistry
+ from framework.host.agent_host import AgentHost
+ from framework.host.event_bus import EventBus
+ from framework.host.execution_manager import EntryPointSpec
async def run_with_tui():
agent = TechNewsReporterAgent()
@@ -101,7 +101,7 @@ def tui(verbose, debug):
tool_executor = agent._tool_registry.get_executor()
graph = agent._build_graph()
- runtime = create_agent_runtime(
+ runtime = AgentHost(
graph=graph,
goal=agent.goal,
storage_path=storage_path,
diff --git a/examples/templates/tech_news_reporter/agent.py b/examples/templates/tech_news_reporter/agent.py
index ef65fbb3..1346184f 100644
--- a/examples/templates/tech_news_reporter/agent.py
+++ b/examples/templates/tech_news_reporter/agent.py
@@ -1,12 +1,12 @@
"""Agent graph construction for Tech & AI News Reporter."""
-from framework.graph import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
-from framework.graph.edge import GraphSpec
-from framework.graph.executor import ExecutionResult, GraphExecutor
-from framework.runtime.event_bus import EventBus
-from framework.runtime.core import Runtime
+from framework.orchestrator import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
+from framework.orchestrator.edge import GraphSpec
+from framework.orchestrator.orchestrator import ExecutionResult, Orchestrator
+from framework.host.event_bus import EventBus
+from framework.tracker.decision_tracker import DecisionTracker as Runtime
from framework.llm import LiteLLMProvider
-from framework.runner.tool_registry import ToolRegistry
+from framework.loader.tool_registry import ToolRegistry
from .config import default_config, metadata
from .nodes import (
@@ -131,7 +131,7 @@ class TechNewsReporterAgent:
self.entry_points = entry_points
self.pause_nodes = pause_nodes
self.terminal_nodes = terminal_nodes
- self._executor: GraphExecutor | None = None
+ self._executor: Orchestrator | None = None
self._graph: GraphSpec | None = None
self._event_bus: EventBus | None = None
self._tool_registry: ToolRegistry | None = None
@@ -157,7 +157,7 @@ class TechNewsReporterAgent:
},
)
- def _setup(self) -> GraphExecutor:
+ def _setup(self) -> Orchestrator:
"""Set up the executor with all components."""
from pathlib import Path
@@ -183,7 +183,7 @@ class TechNewsReporterAgent:
self._graph = self._build_graph()
runtime = Runtime(storage_path)
- self._executor = GraphExecutor(
+ self._executor = Orchestrator(
runtime=runtime,
llm=llm,
tools=tools,
diff --git a/examples/templates/tech_news_reporter/nodes/__init__.py b/examples/templates/tech_news_reporter/nodes/__init__.py
index 2d0b9b27..a7e2fbe7 100644
--- a/examples/templates/tech_news_reporter/nodes/__init__.py
+++ b/examples/templates/tech_news_reporter/nodes/__init__.py
@@ -1,6 +1,6 @@
"""Node definitions for Tech & AI News Reporter."""
-from framework.graph import NodeSpec
+from framework.orchestrator import NodeSpec
# Node 1: Intake (client-facing)
# Brief conversation to understand what topics the user cares about.
diff --git a/examples/templates/twitter_news_agent/agent.py b/examples/templates/twitter_news_agent/agent.py
index 0dffb4a2..50627cf3 100644
--- a/examples/templates/twitter_news_agent/agent.py
+++ b/examples/templates/twitter_news_agent/agent.py
@@ -2,14 +2,14 @@
from pathlib import Path
-from framework.graph import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
-from framework.graph.edge import GraphSpec
-from framework.graph.executor import ExecutionResult
-from framework.graph.checkpoint_config import CheckpointConfig
+from framework.orchestrator import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
+from framework.orchestrator.edge import GraphSpec
+from framework.orchestrator.orchestrator import ExecutionResult
+from framework.orchestrator.checkpoint_config import CheckpointConfig
from framework.llm import LiteLLMProvider
-from framework.runner.tool_registry import ToolRegistry
-from framework.runtime.agent_runtime import create_agent_runtime
-from framework.runtime.execution_stream import EntryPointSpec
+from framework.loader.tool_registry import ToolRegistry
+from framework.host.agent_host import AgentHost
+from framework.host.execution_manager import EntryPointSpec
from .config import default_config, metadata
from .nodes import fetch_node, process_node, review_node
@@ -91,7 +91,7 @@ entry_points = {"start": "process-news"}
pause_nodes = []
terminal_nodes = [] # Forever-alive
-# Module-level vars read by AgentRunner.load()
+# Module-level vars read by AgentLoader.load()
conversation_mode = "continuous"
identity_prompt = "You are a professional news analyst and researcher."
loop_config = {
@@ -149,7 +149,7 @@ class TwitterNewsAgent:
tools = list(self._tool_registry.get_tools().values())
tool_executor = self._tool_registry.get_executor()
self._graph = self._build_graph()
- self._agent_runtime = create_agent_runtime(
+ self._agent_runtime = AgentHost(
graph=self._graph,
goal=self.goal,
storage_path=self._storage_path,
diff --git a/examples/templates/twitter_news_agent/nodes/__init__.py b/examples/templates/twitter_news_agent/nodes/__init__.py
index bda48139..b21a493d 100644
--- a/examples/templates/twitter_news_agent/nodes/__init__.py
+++ b/examples/templates/twitter_news_agent/nodes/__init__.py
@@ -1,6 +1,6 @@
"""Node definitions for Twitter News Digest."""
-from framework.graph import NodeSpec
+from framework.orchestrator import NodeSpec
# Node 1: Browser subagent (GCU) to fetch tweets
fetch_node = NodeSpec(
diff --git a/examples/templates/vulnerability_assessment/__main__.py b/examples/templates/vulnerability_assessment/__main__.py
index fa1382c1..921c5031 100644
--- a/examples/templates/vulnerability_assessment/__main__.py
+++ b/examples/templates/vulnerability_assessment/__main__.py
@@ -76,10 +76,10 @@ def tui(mock, verbose, debug):
from pathlib import Path
from framework.llm import LiteLLMProvider
- from framework.runner.tool_registry import ToolRegistry
- from framework.runtime.agent_runtime import create_agent_runtime
- from framework.runtime.event_bus import EventBus
- from framework.runtime.execution_stream import EntryPointSpec
+ from framework.loader.tool_registry import ToolRegistry
+ from framework.host.agent_host import AgentHost
+ from framework.host.event_bus import EventBus
+ from framework.host.execution_manager import EntryPointSpec
async def run_with_tui():
agent = VulnerabilityResearcherAgent()
@@ -107,7 +107,7 @@ def tui(mock, verbose, debug):
tool_executor = agent._tool_registry.get_executor()
graph = agent._build_graph()
- runtime = create_agent_runtime(
+ runtime = AgentHost(
graph=graph,
goal=agent.goal,
storage_path=storage_path,
diff --git a/examples/templates/vulnerability_assessment/agent.py b/examples/templates/vulnerability_assessment/agent.py
index 0cc79436..fbc2ffc8 100644
--- a/examples/templates/vulnerability_assessment/agent.py
+++ b/examples/templates/vulnerability_assessment/agent.py
@@ -1,12 +1,12 @@
"""Agent graph construction for Passive Website Vulnerability Assessment."""
-from framework.graph import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
-from framework.graph.edge import GraphSpec
-from framework.graph.executor import ExecutionResult, GraphExecutor
-from framework.runtime.event_bus import EventBus
-from framework.runtime.core import Runtime
+from framework.orchestrator import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
+from framework.orchestrator.edge import GraphSpec
+from framework.orchestrator.orchestrator import ExecutionResult, Orchestrator
+from framework.host.event_bus import EventBus
+from framework.tracker.decision_tracker import DecisionTracker as Runtime
from framework.llm import LiteLLMProvider
-from framework.runner.tool_registry import ToolRegistry
+from framework.loader.tool_registry import ToolRegistry
from .config import default_config, metadata
from .nodes import (
@@ -186,7 +186,7 @@ class VulnerabilityResearcherAgent:
self.entry_points = entry_points
self.pause_nodes = pause_nodes
self.terminal_nodes = terminal_nodes
- self._executor: GraphExecutor | None = None
+ self._executor: Orchestrator | None = None
self._graph: GraphSpec | None = None
self._event_bus: EventBus | None = None
self._tool_registry: ToolRegistry | None = None
@@ -219,7 +219,7 @@ class VulnerabilityResearcherAgent:
),
)
- def _setup(self, mock_mode=False) -> GraphExecutor:
+ def _setup(self, mock_mode=False) -> Orchestrator:
"""Set up the executor with all components."""
from pathlib import Path
@@ -247,7 +247,7 @@ class VulnerabilityResearcherAgent:
self._graph = self._build_graph()
runtime = Runtime(storage_path)
- self._executor = GraphExecutor(
+ self._executor = Orchestrator(
runtime=runtime,
llm=llm,
tools=tools,
diff --git a/examples/templates/vulnerability_assessment/nodes/__init__.py b/examples/templates/vulnerability_assessment/nodes/__init__.py
index 17212d79..a88eb565 100644
--- a/examples/templates/vulnerability_assessment/nodes/__init__.py
+++ b/examples/templates/vulnerability_assessment/nodes/__init__.py
@@ -1,6 +1,6 @@
"""Node definitions for Passive Website Vulnerability Assessment."""
-from framework.graph import NodeSpec
+from framework.orchestrator import NodeSpec
# Node 1: Intake (client-facing)
# Collect the target domain and confirm scanning scope.
diff --git a/tools/browser-extension/offscreen.js b/tools/browser-extension/offscreen.js
index 00705e12..8a64c687 100644
--- a/tools/browser-extension/offscreen.js
+++ b/tools/browser-extension/offscreen.js
@@ -9,51 +9,34 @@
const HIVE_WS_URL = "ws://127.0.0.1:9229/bridge";
let ws = null;
-let reconnectAttempts = 0;
-const MAX_RECONNECT_DELAY = 10000; // Max 10 seconds between attempts
+const RETRY_INTERVAL = 2000; // Poll every 2s while disconnected
function connect() {
- // Exponential backoff with cap
- const delay = Math.min(reconnectAttempts * 1000, MAX_RECONNECT_DELAY);
+ try {
+ ws = new WebSocket(HIVE_WS_URL);
- if (reconnectAttempts > 0) {
- console.log(`[Beeline] Reconnecting in ${delay}ms (attempt ${reconnectAttempts + 1})...`);
+ ws.onopen = () => {
+ console.log("[Beeline] WebSocket connected to Hive");
+ chrome.runtime.sendMessage({ _beeline: true, type: "ws_open" });
+ };
+
+ ws.onmessage = (event) => {
+ chrome.runtime.sendMessage({ _beeline: true, type: "ws_message", data: event.data });
+ };
+
+ ws.onclose = (event) => {
+ console.log(`[Beeline] WebSocket closed: code=${event.code}, reason=${event.reason}`);
+ chrome.runtime.sendMessage({ _beeline: true, type: "ws_close" });
+ setTimeout(connect, RETRY_INTERVAL);
+ };
+
+ ws.onerror = () => {
+ console.warn(`[Beeline] WebSocket connection failed (server may not be running)`);
+ };
+ } catch (error) {
+ console.error("[Beeline] Failed to create WebSocket:", error.message);
+ setTimeout(connect, RETRY_INTERVAL);
}
-
- setTimeout(() => {
- try {
- ws = new WebSocket(HIVE_WS_URL);
-
- ws.onopen = () => {
- console.log("[Beeline] WebSocket connected to Hive");
- reconnectAttempts = 0;
- chrome.runtime.sendMessage({ _beeline: true, type: "ws_open" });
- };
-
- ws.onmessage = (event) => {
- chrome.runtime.sendMessage({ _beeline: true, type: "ws_message", data: event.data });
- };
-
- ws.onclose = (event) => {
- console.log(`[Beeline] WebSocket closed: code=${event.code}, reason=${event.reason}`);
- chrome.runtime.sendMessage({ _beeline: true, type: "ws_close" });
- reconnectAttempts++;
- // Reconnect after delay
- setTimeout(connect, 2000);
- };
-
- ws.onerror = () => {
- // Don't log the full error object - it's usually just an Event
- // The actual error will be reflected in onclose
- console.warn(`[Beeline] WebSocket connection failed (server may not be running)`);
- // Don't close here - let onclose handle cleanup
- };
- } catch (error) {
- console.error("[Beeline] Failed to create WebSocket:", error.message);
- reconnectAttempts++;
- setTimeout(connect, 2000);
- }
- }, delay);
}
// Forward outbound messages from the service worker onto the WebSocket.
diff --git a/tools/coder_tools_server.py b/tools/coder_tools_server.py
index 1aee0819..f5f480fb 100644
--- a/tools/coder_tools_server.py
+++ b/tools/coder_tools_server.py
@@ -456,8 +456,8 @@ def list_agent_tools(
try:
from pathlib import Path
- from framework.runner.mcp_client import MCPClient, MCPServerConfig
- from framework.runner.tool_registry import ToolRegistry
+ from framework.loader.mcp_client import MCPClient, MCPServerConfig
+ from framework.loader.tool_registry import ToolRegistry
except ImportError:
return json.dumps({"error": "Cannot import MCPClient"})
@@ -806,8 +806,8 @@ def _validate_agent_tools_impl(agent_path: str) -> dict:
try:
from pathlib import Path
- from framework.runner.mcp_client import MCPClient, MCPServerConfig
- from framework.runner.tool_registry import ToolRegistry
+ from framework.loader.mcp_client import MCPClient, MCPServerConfig
+ from framework.loader.tool_registry import ToolRegistry
except ImportError:
return {"error": "Cannot import MCPClient"}
@@ -845,27 +845,50 @@ def _validate_agent_tools_impl(agent_path: str) -> dict:
discovery_errors.append({"server": server_name, "error": str(e)})
# --- Load agent nodes and extract declared tools ---
+ agent_json_file = os.path.join(agent_dir, "agent.json")
agent_py = os.path.join(agent_dir, "agent.py")
- if not os.path.isfile(agent_py):
- return {"error": f"No agent.py found in {agent_path}"}
- import importlib
- import importlib.util
- import sys
+ nodes = None
+ if os.path.isfile(agent_json_file):
+ # Declarative JSON agent
+ try:
+ with open(agent_json_file, encoding="utf-8") as f:
+ data = json.load(f)
+ # Build lightweight node stubs with .tools and .id/.name
+ class _NodeStub:
+ def __init__(self, d):
+ self.id = d.get("id", "?")
+ self.name = d.get("name", self.id)
+ t = d.get("tools", {})
+ if isinstance(t, dict):
+ self.tools = t.get("allowed", [])
+ elif isinstance(t, list):
+ self.tools = t
+ else:
+ self.tools = []
+ nodes = [_NodeStub(n) for n in data.get("nodes", [])]
+ except Exception as e:
+ return {"error": f"Failed to parse agent.json: {e}"}
+ elif os.path.isfile(agent_py):
+ # Legacy Python agent
+ import importlib
+ import importlib.util
+ import sys
- package_name = os.path.basename(agent_dir)
- parent_dir = os.path.dirname(os.path.abspath(agent_dir))
- if parent_dir not in sys.path:
- sys.path.insert(0, parent_dir)
+ package_name = os.path.basename(agent_dir)
+ parent_dir = os.path.dirname(os.path.abspath(agent_dir))
+ if parent_dir not in sys.path:
+ sys.path.insert(0, parent_dir)
+ try:
+ agent_module = importlib.import_module(package_name)
+ except Exception as e:
+ return {"error": f"Failed to import agent: {e}"}
+ nodes = getattr(agent_module, "nodes", None)
+ else:
+ return {"error": f"No agent.json or agent.py found in {agent_path}"}
- try:
- agent_module = importlib.import_module(package_name)
- except Exception as e:
- return {"error": f"Failed to import agent: {e}"}
-
- nodes = getattr(agent_module, "nodes", None)
if not nodes:
- return {"error": "Agent module has no 'nodes' attribute"}
+ return {"error": "Agent has no nodes defined"}
# --- Validate declared vs available ---
missing_by_node: dict[str, list[str]] = {}
@@ -951,37 +974,46 @@ def list_agents() -> str:
if not os.path.isdir(agent_dir):
continue
- # Must have agent.py to be considered an agent package
- if not os.path.isfile(os.path.join(agent_dir, "agent.py")):
+ # Must have agent.json (declarative) or agent.py (legacy)
+ has_json = os.path.isfile(os.path.join(agent_dir, "agent.json"))
+ has_py = os.path.isfile(os.path.join(agent_dir, "agent.py"))
+ if not has_json and not has_py:
continue
info = {
"name": entry,
"path": os.path.relpath(agent_dir, PROJECT_ROOT),
"source": source,
- "has_nodes": os.path.isdir(os.path.join(agent_dir, "nodes")),
- "has_tests": os.path.isdir(os.path.join(agent_dir, "tests")),
+ "format": "json" if has_json else "python",
"has_mcp_config": os.path.isfile(os.path.join(agent_dir, "mcp_servers.json")),
}
- # Read description from __init__.py docstring
- init_path = os.path.join(agent_dir, "__init__.py")
- if os.path.isfile(init_path):
+ # Read description from agent.json or __init__.py
+ if has_json:
try:
- with open(init_path, encoding="utf-8") as f:
- content = f.read(2000)
- # Extract module docstring
- for quote in ['"""', "'''"]:
- start = content.find(quote)
- if start != -1:
- end = content.find(quote, start + 3)
- if end != -1:
- info["description"] = (
- content[start + 3 : end].strip().split("\n")[0]
- )
- break
- except OSError:
+ with open(os.path.join(agent_dir, "agent.json"), encoding="utf-8") as f:
+ data = json.load(f)
+ if isinstance(data, dict) and data.get("description"):
+ info["description"] = data["description"]
+ except Exception:
pass
+ else:
+ init_path = os.path.join(agent_dir, "__init__.py")
+ if os.path.isfile(init_path):
+ try:
+ with open(init_path, encoding="utf-8") as f:
+ content = f.read(2000)
+ for quote in ['"""', "'''"]:
+ start = content.find(quote)
+ if start != -1:
+ end = content.find(quote, start + 3)
+ if end != -1:
+ info["description"] = (
+ content[start + 3 : end].strip().split("\n")[0]
+ )
+ break
+ except OSError:
+ pass
# Check runtime data
runtime_dir = hive_agents_dir / entry
@@ -1266,8 +1298,8 @@ def _run_agent_tests_impl(
if not tests_dir.exists():
return {
- "error": f"No tests directory: exports/{agent_name}/tests/",
- "hint": "Create test files in the tests/ directory first.",
+ "skipped": True,
+ "summary": "No tests directory (OK for declarative agents)",
}
# Parse test types
@@ -1446,13 +1478,11 @@ def run_agent_tests(
def validate_agent_package(agent_name: str) -> str:
"""Run structural validation checks on a built agent package in one call.
- Executes 5 steps and reports all results (does not stop on first failure):
- 1. Class validation — checks graph structure and entry_points contract
- 2. Node completeness — every NodeSpec in nodes/ must be in the nodes list,
- and GCU nodes must be referenced in a parent's sub_agents
- 3. Graph validation — loads the agent graph without credential checks
- 4. Tool validation — checks declared tools exist in MCP servers
- 5. Tests — runs the agent's pytest suite
+ Executes validation steps and reports all results:
+ 1. Schema validation — loads agent.json via load_agent_config
+ 2. Graph validation — loads the agent graph via AgentLoader
+ 3. Tool validation — checks declared tools exist in MCP servers
+ 4. Tests — runs the agent's pytest suite (skipped if no tests/)
Note: Credential validation is intentionally skipped here (building phase).
Credentials are validated at run time by run_agent_with_input() preflight.
@@ -1477,137 +1507,88 @@ def validate_agent_package(agent_name: str) -> str:
path_parts.append(pythonpath)
env["PYTHONPATH"] = os.pathsep.join(path_parts)
- # Step 0: Module contract — __init__.py must expose goal, nodes, edges
- try:
- _contract_script = textwrap.dedent("""\
- import importlib, json
- mod = importlib.import_module('{agent_name}')
- missing = [a for a in ('goal', 'nodes', 'edges') if getattr(mod, a, None) is None]
- if missing:
+ # Detect agent format
+ _is_json = os.path.isfile(os.path.join(PROJECT_ROOT, agent_path, "agent.json"))
+
+ if _is_json:
+ # JSON agents: validate via load_agent_config (schema + round-trip)
+ try:
+ _json_script = textwrap.dedent("""\
+ import json, pathlib
+ from framework.loader.agent_loader import load_agent_config
+ data = json.loads(
+ pathlib.Path('exports/{agent_name}/agent.json').read_text()
+ )
+ g, goal = load_agent_config(data)
print(json.dumps({{
- 'valid': False,
- 'error': (
- "Module '{agent_name}' is missing module-level attributes: "
- + ", ".join(missing) + ". "
- "Fix: in {agent_name}/__init__.py, add "
- "'from .agent import " + ", ".join(missing) + "' "
- "so that 'import {agent_name}' exposes them at package level."
- )
+ 'valid': True,
+ 'nodes': len(g.nodes),
+ 'edges': len(g.edges),
+ 'entry': g.entry_node,
+ 'errors': errors,
}}))
+ """).format(agent_name=agent_name)
+ proc = subprocess.run(
+ ["uv", "run", "python", "-c", _json_script],
+ capture_output=True,
+ text=True,
+ timeout=30,
+ env=env,
+ cwd=PROJECT_ROOT,
+ stdin=subprocess.DEVNULL,
+ )
+ if proc.returncode == 0:
+ result = json.loads(proc.stdout.strip())
+ steps["schema_validation"] = {
+ "passed": result["valid"],
+ "output": (
+ f"{result['nodes']} nodes, {result['edges']} edges, "
+ f"entry={result['entry']}"
+ ),
+ }
+ if result.get("errors"):
+ steps["schema_validation"]["errors"] = result["errors"]
else:
- print(json.dumps({{'valid': True}}))
- """).format(agent_name=agent_name)
- proc = subprocess.run(
- ["uv", "run", "python", "-c", _contract_script],
- capture_output=True,
- text=True,
- timeout=30,
- env=env,
- cwd=PROJECT_ROOT,
- stdin=subprocess.DEVNULL,
- )
- if proc.returncode == 0:
- result = json.loads(proc.stdout.strip())
- steps["module_contract"] = {
- "passed": result["valid"],
- "output": result.get("error", "goal, nodes, edges exported correctly"),
- }
- else:
- steps["module_contract"] = {
- "passed": False,
- "error": (
- f"Failed to import '{agent_name}': {proc.stderr.strip()[:1000]}. "
- f"Fix: ensure {agent_name}/__init__.py exists and can be imported "
- f"without errors (check syntax, missing dependencies, relative imports)."
- ),
- }
- except Exception as e:
- steps["module_contract"] = {"passed": False, "error": str(e)}
-
- # Step A: Class validation (subprocess for import isolation)
- try:
- proc = subprocess.run(
- [
- "uv",
- "run",
- "python",
- "-c",
- f"from {agent_name} import default_agent; print(default_agent.validate())",
- ],
- capture_output=True,
- text=True,
- timeout=30,
- env=env,
- cwd=PROJECT_ROOT,
- stdin=subprocess.DEVNULL,
- )
- passed = proc.returncode == 0
- steps["class_validation"] = {
- "passed": passed,
- "output": (proc.stdout.strip() or proc.stderr.strip())[:2000],
- }
- if not passed:
- steps["class_validation"]["error"] = proc.stderr.strip()[:2000]
- except Exception as e:
- steps["class_validation"] = {"passed": False, "error": str(e)}
-
- # Step A2: Node completeness — every NodeSpec in nodes/ must be in the nodes list
- try:
- _check_template = textwrap.dedent("""\
- import importlib, json
- agent = importlib.import_module('{agent_name}')
- nodes_mod = importlib.import_module('{agent_name}.nodes')
- graph_ids = {{n.id for n in agent.nodes}}
- defined = {{}}
- for attr in dir(nodes_mod):
- obj = getattr(nodes_mod, attr)
- if hasattr(obj, 'id') and hasattr(obj, 'node_type'):
- defined[obj.id] = attr
- orphaned = set(defined) - graph_ids
- errors = [
- f"Node '{{nid}}' ({{defined[nid]}}) defined in nodes/ but not in nodes list"
- for nid in sorted(orphaned)
- ]
- sub_refs = set()
- for n in agent.nodes:
- for sa in getattr(n, 'sub_agents', []) or []:
- sub_refs.add(sa)
- for n in agent.nodes:
- if n.node_type == 'gcu' and n.id not in sub_refs:
- errors.append(
- f"GCU node '{{n.id}}' not referenced in any node's sub_agents list"
- )
- print(json.dumps({{'valid': len(errors) == 0, 'errors': errors}}))
- """)
- check_script = _check_template.format(agent_name=agent_name)
- proc = subprocess.run(
- ["uv", "run", "python", "-c", check_script],
- capture_output=True,
- text=True,
- timeout=30,
- env=env,
- cwd=PROJECT_ROOT,
- stdin=subprocess.DEVNULL,
- )
- if proc.returncode == 0:
- result = json.loads(proc.stdout.strip())
- steps["node_completeness"] = {
- "passed": result["valid"],
- "output": (
- "; ".join(result["errors"])
- if result["errors"]
- else "All defined nodes are in the graph"
- ),
- }
- if not result["valid"]:
- steps["node_completeness"]["errors"] = result["errors"]
- else:
- steps["node_completeness"] = {
- "passed": False,
- "error": proc.stderr.strip()[:2000],
- }
- except Exception as e:
- steps["node_completeness"] = {"passed": False, "error": str(e)}
+ steps["schema_validation"] = {
+ "passed": False,
+ "error": proc.stderr.strip()[:2000],
+ }
+ except Exception as e:
+ steps["schema_validation"] = {"passed": False, "error": str(e)}
+ else:
+ # Legacy Python agents: module contract + class validation
+ try:
+ _contract_script = textwrap.dedent("""\
+ import importlib, json
+ mod = importlib.import_module('{agent_name}')
+ missing = [
+ a for a in ('goal', 'nodes', 'edges')
+ if getattr(mod, a, None) is None
+ ]
+ print(json.dumps({{'valid': len(missing) == 0, 'missing': missing}}))
+ """).format(agent_name=agent_name)
+ proc = subprocess.run(
+ ["uv", "run", "python", "-c", _contract_script],
+ capture_output=True, text=True, timeout=30,
+ env=env, cwd=PROJECT_ROOT, stdin=subprocess.DEVNULL,
+ )
+ if proc.returncode == 0:
+ result = json.loads(proc.stdout.strip())
+ steps["module_contract"] = {
+ "passed": result["valid"],
+ "output": (
+ f"Missing: {result['missing']}"
+ if result.get("missing")
+ else "goal, nodes, edges exported correctly"
+ ),
+ }
+ else:
+ steps["module_contract"] = {
+ "passed": False,
+ "error": proc.stderr.strip()[:1000],
+ }
+ except Exception as e:
+ steps["module_contract"] = {"passed": False, "error": str(e)}
# Step B: Graph validation (subprocess for import isolation)
# Credentials are checked at run time (run_agent_with_input preflight),
@@ -1619,10 +1600,10 @@ def validate_agent_package(agent_name: str) -> str:
"run",
"python",
"-c",
- f"from framework.runner.runner import AgentRunner; "
- f'r = AgentRunner.load("exports/{agent_name}", '
+ f"from framework.loader.agent_loader import AgentLoader; "
+ f'r = AgentLoader.load("exports/{agent_name}", '
f"skip_credential_validation=True); "
- f'print("AgentRunner.load (graph-only): OK")',
+ f'print("AgentLoader.load (graph-only): OK")',
],
capture_output=True,
text=True,
@@ -1659,7 +1640,9 @@ def validate_agent_package(agent_name: str) -> str:
# Step D: Tests (direct call)
try:
test_result = _run_agent_tests_impl(agent_name)
- if "error" in test_result:
+ if test_result.get("skipped"):
+ steps["tests"] = {"passed": True, "output": "No tests (skipped)"}
+ elif "error" in test_result:
steps["tests"] = {"passed": False, "error": test_result["error"]}
else:
all_passed = test_result.get("failed", 0) == 0 and test_result.get("errors", 0) == 0
@@ -1697,703 +1680,6 @@ def validate_agent_package(agent_name: str) -> str:
# ── Meta-agent: Package initialization ─────────────────────────────────────
-def _snake_to_camel(name: str) -> str:
- """Convert snake_case to CamelCase."""
- return "".join(word.capitalize() for word in name.split("_"))
-
-
-def _node_var_name(node_id: str) -> str:
- """Convert node id to a Python variable name."""
- return node_id.replace("-", "_") + "_node"
-
-
-@mcp.tool()
-def initialize_and_build_agent(
- agent_name: str,
- nodes: str | None = None,
- _draft: dict | None = None,
-) -> str:
- """Scaffold a new agent package with placeholder files.
-
- Creates exports/{agent_name}/ with all files needed for a runnable agent:
- config.py, nodes/__init__.py, agent.py, __init__.py, __main__.py,
- mcp_servers.json, tests/conftest.py.
-
- After initialization, customize the generated files:
- - System prompts and node logic in nodes/__init__.py
- - Goal and edges in agent.py
- - CLI options in __main__.py
-
- Args:
- agent_name: Name for the agent package. Must be snake_case (e.g. 'my_agent').
- nodes: Comma-separated node names (snake_case or kebab-case).
- If omitted, a single 'start' node is created.
- Example: 'intake,process,review'
- _draft: Internal. Draft graph metadata from planning phase, used to
- pre-populate descriptions, goals, and node metadata.
-
- Returns:
- JSON with files written and next steps.
- """
- import re
-
- if not re.match(r"^[a-z][a-z0-9_]*$", agent_name):
- return json.dumps(
- {
- "success": False,
- "error": (
- f"Invalid agent_name '{agent_name}'. Must be snake_case: "
- "lowercase letters, numbers, underscores, starting with a letter."
- ),
- }
- )
-
- node_list = [n.strip() for n in nodes.split(",") if n.strip()] if nodes else ["start"]
-
- # Build draft node lookup for pre-populating metadata from planning phase
- _draft_nodes: dict[str, dict] = {}
- if _draft and _draft.get("nodes"):
- for dn in _draft["nodes"]:
- _draft_nodes[dn.get("id", "")] = dn
-
- # Extract top-level draft metadata early so it's available for all templates
- _draft_desc = (_draft.get("description") or "") if _draft else ""
-
- class_name = _snake_to_camel(agent_name)
- human_name = agent_name.replace("_", " ").title()
- entry_node = node_list[0]
-
- exports_dir = os.path.join(PROJECT_ROOT, "exports", agent_name)
- nodes_dir = os.path.join(exports_dir, "nodes")
- tests_dir = os.path.join(exports_dir, "tests")
- os.makedirs(nodes_dir, exist_ok=True)
- os.makedirs(tests_dir, exist_ok=True)
-
- files_written: dict[str, dict] = {}
-
- def _write(rel_path: str, content: str) -> None:
- full = os.path.join(exports_dir, rel_path)
- os.makedirs(os.path.dirname(full), exist_ok=True)
- with open(full, "w", encoding="utf-8") as f:
- f.write(content)
- files_written[rel_path] = {
- "path": f"exports/{agent_name}/{rel_path}",
- "size_bytes": os.path.getsize(full),
- }
-
- # -- config.py --
- _write(
- "config.py",
- f'''\
-"""Runtime configuration."""
-
-import json
-from dataclasses import dataclass, field
-from pathlib import Path
-
-
-def _load_preferred_model() -> str:
- """Load preferred model from ~/.hive/configuration.json."""
- config_path = Path.home() / ".hive" / "configuration.json"
- if config_path.exists():
- try:
- with open(config_path) as f:
- config = json.load(f)
- llm = config.get("llm", {{}})
- if llm.get("provider") and llm.get("model"):
- return f"{{llm[\'provider\']}}/{{llm[\'model\']}}"
- except Exception:
- pass
- return "anthropic/claude-sonnet-4-20250514"
-
-
-@dataclass
-class RuntimeConfig:
- model: str = field(default_factory=_load_preferred_model)
- temperature: float = 0.7
- max_tokens: int = 40000
- api_key: str | None = None
- api_base: str | None = None
-
-
-default_config = RuntimeConfig()
-
-
-@dataclass
-class AgentMetadata:
- name: str = "{human_name}"
- version: str = "1.0.0"
- description: str = "{_draft_desc or "TODO: Add agent description."}"
- intro_message: str = "TODO: Add intro message."
-
-
-metadata = AgentMetadata()
-''',
- )
-
- # -- nodes/__init__.py --
- node_specs = []
- node_var_names = []
- for node_id in node_list:
- var = _node_var_name(node_id)
- node_var_names.append(var)
- is_first = node_id == entry_node
-
- # Use draft metadata to pre-populate if available
- dn = _draft_nodes.get(node_id, {})
- node_name = dn.get("name") or node_id.replace("_", " ").replace("-", " ").title()
- node_desc = dn.get("description") or "TODO: Describe what this node does."
- node_type = dn.get("node_type") or "event_loop"
- node_tools = dn.get("tools") or []
- node_input_keys = dn.get("input_keys") or []
- node_output_keys = dn.get("output_keys") or []
- node_sc = dn.get("success_criteria") or "TODO: Define success criteria."
-
- node_specs.append(f'''\
-{var} = NodeSpec(
- id="{node_id}",
- name="{node_name}",
- description="{node_desc}",
- node_type="{node_type}",
- client_facing={is_first},
- max_node_visits=0,
- input_keys={node_input_keys!r},
- output_keys={node_output_keys!r},
- nullable_output_keys=[],
- success_criteria="{node_sc}",
- system_prompt="""\\
-TODO: Add system prompt for this node.
-""",
- tools={node_tools!r},
-)''')
-
- nodes_init = f'''\
-"""Node definitions for {human_name}."""
-
-from framework.graph import NodeSpec
-
-{chr(10).join(node_specs)}
-
-__all__ = {node_var_names!r}
-'''
- _write("nodes/__init__.py", nodes_init)
-
- # -- agent.py --
- node_imports = ", ".join(node_var_names)
- nodes_list = ", ".join(node_var_names)
-
- # Use draft edges if available, otherwise generate linear edges
- _draft_edges = _draft.get("edges", []) if _draft else []
- edge_defs = []
- if _draft_edges:
- for de in _draft_edges:
- eid = de.get("id", f"{de.get('source', '')}-to-{de.get('target', '')}")
- src = de.get("source", "")
- tgt = de.get("target", "")
- cond = de.get("condition", "on_success").upper()
- desc = de.get("description", "")
- desc_line = f'\n description="{desc}",' if desc else ""
- edge_defs.append(f"""\
- EdgeSpec(
- id="{eid}",
- source="{src}",
- target="{tgt}",
- condition=EdgeCondition.{cond},{desc_line}
- priority=1,
- ),""")
- else:
- for i in range(len(node_list) - 1):
- src, tgt = node_list[i], node_list[i + 1]
- edge_defs.append(f"""\
- EdgeSpec(
- id="{src}-to-{tgt}",
- source="{src}",
- target="{tgt}",
- condition=EdgeCondition.ON_SUCCESS,
- priority=1,
- ),""")
- edges_str = "\n".join(edge_defs) if edge_defs else " # TODO: Add edges"
-
- # Pre-populate goal from draft metadata
- _draft_goal = (
- (_draft.get("goal") or "TODO: Describe the agent's goal.")
- if _draft
- else "TODO: Describe the agent's goal."
- )
- _draft_sc = (_draft.get("success_criteria") or []) if _draft else []
- _draft_constraints = (_draft.get("constraints") or []) if _draft else []
-
- # Build success criteria entries
- if _draft_sc:
- sc_entries = "\n".join(
- f"""\
- SuccessCriterion(
- id="sc-{i + 1}",
- description="{sc}",
- metric="TODO",
- target="TODO",
- weight=1.0,
- ),"""
- for i, sc in enumerate(_draft_sc)
- )
- else:
- sc_entries = """\
- SuccessCriterion(
- id="sc-1",
- description="TODO: Define success criterion.",
- metric="TODO",
- target="TODO",
- weight=1.0,
- ),"""
-
- # Build constraint entries
- if _draft_constraints:
- constraint_entries = "\n".join(
- f"""\
- Constraint(
- id="c-{i + 1}",
- description="{c}",
- constraint_type="hard",
- category="functional",
- ),"""
- for i, c in enumerate(_draft_constraints)
- )
- else:
- constraint_entries = """\
- Constraint(
- id="c-1",
- description="TODO: Define constraint.",
- constraint_type="hard",
- category="functional",
- ),"""
-
- _write(
- "agent.py",
- f'''\
-"""Agent graph construction for {human_name}."""
-
-from pathlib import Path
-
-from framework.graph import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint
-from framework.graph.edge import GraphSpec
-from framework.graph.executor import ExecutionResult
-from framework.graph.checkpoint_config import CheckpointConfig
-from framework.llm import LiteLLMProvider
-from framework.runner.tool_registry import ToolRegistry
-from framework.runtime.agent_runtime import create_agent_runtime
-from framework.runtime.execution_stream import EntryPointSpec
-
-from .config import default_config, metadata
-from .nodes import {node_imports}
-
-# Goal definition
-goal = Goal(
- id="{agent_name}-goal",
- name="{human_name}",
- description="{_draft_goal}",
- success_criteria=[
-{sc_entries}
- ],
- constraints=[
-{constraint_entries}
- ],
-)
-
-# Node list
-nodes = [{nodes_list}]
-
-# Edge definitions
-edges = [
-{edges_str}
-]
-
-# Graph configuration
-entry_node = "{entry_node}"
-entry_points = {{"start": "{entry_node}"}}
-pause_nodes = []
-terminal_nodes = []
-
-conversation_mode = "continuous"
-identity_prompt = "TODO: Add identity prompt."
-loop_config = {{
- "max_iterations": 100,
- "max_tool_calls_per_turn": 30,
- "max_history_tokens": 32000,
-}}
-
-
-class {class_name}:
- def __init__(self, config=None):
- self.config = config or default_config
- self.goal = goal
- self.nodes = nodes
- self.edges = edges
- self.entry_node = entry_node
- self.entry_points = entry_points
- self.pause_nodes = pause_nodes
- self.terminal_nodes = terminal_nodes
- self._graph = None
- self._agent_runtime = None
- self._tool_registry = None
- self._storage_path = None
-
- def _build_graph(self):
- return GraphSpec(
- id="{agent_name}-graph",
- goal_id=self.goal.id,
- version="1.0.0",
- entry_node=self.entry_node,
- entry_points=self.entry_points,
- terminal_nodes=self.terminal_nodes,
- pause_nodes=self.pause_nodes,
- nodes=self.nodes,
- edges=self.edges,
- default_model=self.config.model,
- max_tokens=self.config.max_tokens,
- loop_config=loop_config,
- conversation_mode=conversation_mode,
- identity_prompt=identity_prompt,
- )
-
- def _setup(self):
- self._storage_path = Path.home() / ".hive" / "agents" / "{agent_name}"
- self._storage_path.mkdir(parents=True, exist_ok=True)
- self._tool_registry = ToolRegistry()
- mcp_config = Path(__file__).parent / "mcp_servers.json"
- if mcp_config.exists():
- self._tool_registry.load_mcp_config(mcp_config)
- llm = LiteLLMProvider(
- model=self.config.model,
- api_key=self.config.api_key,
- api_base=self.config.api_base,
- )
- tools = list(self._tool_registry.get_tools().values())
- tool_executor = self._tool_registry.get_executor()
- self._graph = self._build_graph()
- self._agent_runtime = create_agent_runtime(
- graph=self._graph,
- goal=self.goal,
- storage_path=self._storage_path,
- entry_points=[
- EntryPointSpec(
- id="default",
- name="Default",
- entry_node=self.entry_node,
- trigger_type="manual",
- isolation_level="shared",
- ),
- ],
- llm=llm,
- tools=tools,
- tool_executor=tool_executor,
- checkpoint_config=CheckpointConfig(
- enabled=True,
- checkpoint_on_node_complete=True,
- checkpoint_max_age_days=7,
- async_checkpoint=True,
- ),
- )
-
- async def start(self):
- if self._agent_runtime is None:
- self._setup()
- if not self._agent_runtime.is_running:
- await self._agent_runtime.start()
-
- async def stop(self):
- if self._agent_runtime and self._agent_runtime.is_running:
- await self._agent_runtime.stop()
- self._agent_runtime = None
-
- async def trigger_and_wait(
- self,
- entry_point="default",
- input_data=None,
- timeout=None,
- session_state=None,
- ):
- if self._agent_runtime is None:
- raise RuntimeError("Agent not started. Call start() first.")
- return await self._agent_runtime.trigger_and_wait(
- entry_point_id=entry_point,
- input_data=input_data or {{}},
- session_state=session_state,
- )
-
- async def run(self, context, session_state=None):
- await self.start()
- try:
- result = await self.trigger_and_wait(
- "default", context, session_state=session_state
- )
- return result or ExecutionResult(success=False, error="Execution timeout")
- finally:
- await self.stop()
-
- def info(self):
- return {{
- "name": metadata.name,
- "version": metadata.version,
- "description": metadata.description,
- "goal": {{
- "name": self.goal.name,
- "description": self.goal.description,
- }},
- "nodes": [n.id for n in self.nodes],
- "edges": [e.id for e in self.edges],
- "entry_node": self.entry_node,
- "entry_points": self.entry_points,
- "terminal_nodes": self.terminal_nodes,
- "client_facing_nodes": [n.id for n in self.nodes if n.client_facing],
- }}
-
- def validate(self):
- errors, warnings = [], []
- node_ids = {{n.id for n in self.nodes}}
- for e in self.edges:
- if e.source not in node_ids:
- errors.append(f"Edge {{e.id}}: source '{{e.source}}' not found")
- if e.target not in node_ids:
- errors.append(f"Edge {{e.id}}: target '{{e.target}}' not found")
- if self.entry_node not in node_ids:
- errors.append(f"Entry node '{{self.entry_node}}' not found")
- for t in self.terminal_nodes:
- if t not in node_ids:
- errors.append(f"Terminal node '{{t}}' not found")
- for ep_id, nid in self.entry_points.items():
- if nid not in node_ids:
- errors.append(f"Entry point '{{ep_id}}' references unknown node '{{nid}}'")
-
- return {{"valid": len(errors) == 0, "errors": errors, "warnings": warnings}}
-
-
-default_agent = {class_name}()
-''',
- )
-
- # -- __init__.py --
- _write(
- "__init__.py",
- f'''\
-"""{human_name} — TODO: Add description."""
-
-from .agent import (
- {class_name},
- default_agent,
- goal,
- nodes,
- edges,
- entry_node,
- entry_points,
- pause_nodes,
- terminal_nodes,
- conversation_mode,
- identity_prompt,
- loop_config,
-)
-from .config import default_config, metadata
-
-__all__ = [
- "{class_name}",
- "default_agent",
- "goal",
- "nodes",
- "edges",
- "entry_node",
- "entry_points",
- "pause_nodes",
- "terminal_nodes",
- "conversation_mode",
- "identity_prompt",
- "loop_config",
- "default_config",
- "metadata",
-]
-''',
- )
-
- # -- __main__.py --
- _write(
- "__main__.py",
- f'''\
-"""CLI entry point for {human_name}."""
-
-import asyncio
-import json
-import logging
-import sys
-
-import click
-
-from .agent import default_agent, {class_name}
-
-
-def setup_logging(verbose=False, debug=False):
- if debug:
- level, fmt = logging.DEBUG, "%(asctime)s %(name)s: %(message)s"
- elif verbose:
- level, fmt = logging.INFO, "%(message)s"
- else:
- level, fmt = logging.WARNING, "%(levelname)s: %(message)s"
- logging.basicConfig(level=level, format=fmt, stream=sys.stderr)
-
-
-@click.group()
-@click.version_option(version="1.0.0")
-def cli():
- """{human_name}."""
- pass
-
-
-@cli.command()
-@click.option("--verbose", "-v", is_flag=True)
-def run(verbose):
- """Execute the agent."""
- setup_logging(verbose=verbose)
- result = asyncio.run(default_agent.run({{}}))
- click.echo(
- json.dumps(
- {{"success": result.success, "output": result.output}},
- indent=2,
- default=str,
- )
- )
- sys.exit(0 if result.success else 1)
-
-
-@cli.command()
-def info():
- """Show agent info."""
- data = default_agent.info()
- click.echo(
- f"Agent: {{data[\'name\']}}\n"
- f"Version: {{data[\'version\']}}\n"
- f"Description: {{data[\'description\']}}"
- )
- click.echo(f"Nodes: {{', '.join(data[\'nodes\'])}}")
- click.echo(f"Client-facing: {{', '.join(data[\'client_facing_nodes\'])}}")
-
-
-@cli.command()
-def validate():
- """Validate agent structure."""
- v = default_agent.validate()
- if v["valid"]:
- click.echo("Agent is valid")
- else:
- click.echo("Errors:")
- for e in v["errors"]:
- click.echo(f" {{e}}")
- sys.exit(0 if v["valid"] else 1)
-
-
-if __name__ == "__main__":
- cli()
-''',
- )
-
- # -- mcp_servers.json --
- mcp_config: dict = {
- "hive-tools": {
- "transport": "stdio",
- "command": "uv",
- "args": ["run", "python", "mcp_server.py", "--stdio"],
- "cwd": "../../tools",
- "description": "Hive tools MCP server",
- },
- "gcu-tools": {
- "transport": "stdio",
- "command": "uv",
- "args": ["run", "python", "-m", "gcu.server", "--stdio"],
- "cwd": "../../tools",
- "description": "GCU browser automation tools",
- },
- }
-
- _write("mcp_servers.json", json.dumps(mcp_config, indent=2))
-
- # -- tests/conftest.py --
- _write(
- "tests/conftest.py",
- '''\
-"""Test fixtures."""
-
-import sys
-from pathlib import Path
-
-import pytest
-
-_repo_root = Path(__file__).resolve().parents[3]
-for _p in ["exports", "core"]:
- _path = str(_repo_root / _p)
- if _path not in sys.path:
- sys.path.insert(0, _path)
-
-AGENT_PATH = str(Path(__file__).resolve().parents[1])
-
-
-@pytest.fixture(scope="session")
-def agent_module():
- """Import the agent package for structural validation."""
- import importlib
-
- return importlib.import_module(Path(AGENT_PATH).name)
-
-
-@pytest.fixture(scope="session")
-def runner_loaded():
- """Load the agent through AgentRunner (structural only, no LLM needed)."""
- from framework.runner.runner import AgentRunner
-
- return AgentRunner.load(AGENT_PATH)
-''',
- )
-
- # Build list of all generated file paths for the caller.
- all_file_paths = [info["path"] for info in files_written.values()]
-
- return json.dumps(
- {
- "success": True,
- "agent_name": agent_name,
- "class_name": class_name,
- "entry_node": entry_node,
- "nodes": node_list,
- "files_written": files_written,
- "file_count": len(files_written),
- "files": all_file_paths,
- "next_steps": [
- (
- "IMPORTANT: All generated files are structurally complete "
- "with correct imports, class definition, validate() method, "
- "and __init__.py exports. Use edit_file to customize TODO "
- "placeholders — do NOT use write_file to rewrite entire files, "
- "as this will break imports and structure."
- ),
- (
- f"Use edit_file to customize system prompts, tools, "
- f"input_keys, output_keys, and success_criteria in "
- f"exports/{agent_name}/nodes/__init__.py"
- ),
- (
- f"Use edit_file to customize goal description, "
- f"success_criteria values, constraint values, edge "
- f"definitions, and identity_prompt in "
- f"exports/{agent_name}/agent.py"
- ),
- (
- "Do NOT modify: imports at top of agent.py, the class "
- "definition, validate() method, _build_graph()/_setup()/"
- "lifecycle methods, or __init__.py exports — they are "
- "already correct."
- ),
- f'Run validate_agent_package("{agent_name}") to verify structure',
- ],
- },
- indent=2,
- )
-
-
# ── Main ──────────────────────────────────────────────────────────────────
diff --git a/tools/src/gcu/browser/bridge.py b/tools/src/gcu/browser/bridge.py
index 3ef3e4d7..6d3afd7d 100644
--- a/tools/src/gcu/browser/bridge.py
+++ b/tools/src/gcu/browser/bridge.py
@@ -1026,6 +1026,9 @@ class BeelineBridge:
await self.highlight_point(tab_id, x, y, label=f"{key} ({x},{y})")
return {"ok": True, "action": "press_at", "x": x, "y": y, "key": key}
+ # Duration (ms) that injected highlights stay visible before fading out.
+ _HIGHLIGHT_DURATION_MS = 1500
+
async def highlight_rect(
self,
tab_id: int,
@@ -1036,61 +1039,112 @@ class BeelineBridge:
label: str = "",
color: dict | None = None,
) -> None:
- """Draw a CDP Overlay highlight box in the live browser window.
+ """Inject a visible highlight overlay into the page DOM.
- Visible in the next screenshot. Automatically cleared on the next
- interaction or by calling clear_highlight().
+ Creates a fixed-position div with border, background tint, and an
+ optional label tag. The element fades out after ``_HIGHLIGHT_DURATION_MS``
+ and removes itself. Much more visible than the CDP Overlay API.
"""
- await self.cdp_attach(tab_id)
- await self._try_enable_domain(tab_id, "Overlay")
- fill = color or {"r": 59, "g": 130, "b": 246, "a": 0.35} # blue-500 @ 35%
- outline = {"r": fill["r"], "g": fill["g"], "b": fill["b"], "a": 1.0}
- await self._cdp(
- tab_id,
- "Overlay.highlightRect",
- {
- "x": int(x),
- "y": int(y),
- "width": max(1, int(w)),
- "height": max(1, int(h)),
- "color": fill,
- "outlineColor": outline,
- },
- )
+ fill = color or {"r": 59, "g": 130, "b": 246, "a": 0.18}
+ border_rgb = f"rgb({fill['r']},{fill['g']},{fill['b']})"
+ bg_rgba = f"rgba({fill['r']},{fill['g']},{fill['b']},{fill.get('a', 0.18)})"
+ duration = self._HIGHLIGHT_DURATION_MS
+
+ # Escape label for safe injection
+ safe_label = json.dumps(label[:60]) if label else '""'
+
+ js = f"""
+ (function() {{
+ // Remove any previous hive highlight
+ var old = document.getElementById('__hive_hl');
+ if (old) old.remove();
+
+ var box = document.createElement('div');
+ box.id = '__hive_hl';
+ box.style.cssText = 'position:fixed;z-index:2147483647;pointer-events:none;'
+ + 'left:{int(x)}px;top:{int(y)}px;width:{max(1,int(w))}px;height:{max(1,int(h))}px;'
+ + 'border:2px solid {border_rgb};background:{bg_rgba};'
+ + 'border-radius:3px;transition:opacity 0.4s ease;opacity:1;'
+ + 'box-shadow:0 0 8px {bg_rgba};';
+
+ var lbl = {safe_label};
+ if (lbl) {{
+ var tag = document.createElement('span');
+ tag.textContent = lbl;
+ tag.style.cssText = 'position:absolute;left:0;top:-20px;'
+ + 'background:{border_rgb};color:#fff;font:bold 11px/16px system-ui;'
+ + 'padding:1px 6px;border-radius:3px;white-space:nowrap;max-width:200px;'
+ + 'overflow:hidden;text-overflow:ellipsis;';
+ box.appendChild(tag);
+ }}
+
+ document.documentElement.appendChild(box);
+ setTimeout(function() {{ box.style.opacity = '0'; }}, {duration});
+ setTimeout(function() {{ box.remove(); }}, {duration + 500});
+ }})();
+ """
+ try:
+ await self.cdp_attach(tab_id)
+ await self.evaluate(tab_id, js)
+ except Exception:
+ pass # best-effort visual feedback
+
_interaction_highlights[tab_id] = {
- "x": x,
- "y": y,
- "w": w,
- "h": h,
- "label": label,
- "kind": "rect",
+ "x": x, "y": y, "w": w, "h": h,
+ "label": label, "kind": "rect",
}
async def highlight_point(self, tab_id: int, x: float, y: float, label: str = "") -> None:
- """Highlight a coordinate as a small crosshair box in the browser."""
- r = 12 # half-size of the crosshair box in CSS px
- await self.highlight_rect(
- tab_id,
- x - r,
- y - r,
- r * 2,
- r * 2,
- label=label,
- color={"r": 239, "g": 68, "b": 68, "a": 0.45}, # red-500 @ 45%
- )
+ """Highlight a coordinate with a pulsing dot and crosshair."""
+ duration = self._HIGHLIGHT_DURATION_MS
+ safe_label = json.dumps(label[:60]) if label else '""'
+
+ js = f"""
+ (function() {{
+ var old = document.getElementById('__hive_hl');
+ if (old) old.remove();
+
+ var dot = document.createElement('div');
+ dot.id = '__hive_hl';
+ dot.style.cssText = 'position:fixed;z-index:2147483647;pointer-events:none;'
+ + 'left:{int(x)-8}px;top:{int(y)-8}px;width:16px;height:16px;'
+ + 'border-radius:50%;background:rgba(239,68,68,0.7);'
+ + 'box-shadow:0 0 0 4px rgba(239,68,68,0.25),0 0 12px rgba(239,68,68,0.4);'
+ + 'transition:opacity 0.4s ease;opacity:1;';
+
+ var lbl = {safe_label};
+ if (lbl) {{
+ var tag = document.createElement('span');
+ tag.textContent = lbl;
+ tag.style.cssText = 'position:absolute;left:20px;top:-4px;'
+ + 'background:rgba(239,68,68,0.9);color:#fff;font:bold 11px/16px system-ui;'
+ + 'padding:1px 6px;border-radius:3px;white-space:nowrap;';
+ dot.appendChild(tag);
+ }}
+
+ document.documentElement.appendChild(dot);
+ setTimeout(function() {{ dot.style.opacity = '0'; }}, {duration});
+ setTimeout(function() {{ dot.remove(); }}, {duration + 500});
+ }})();
+ """
+ try:
+ await self.cdp_attach(tab_id)
+ await self.evaluate(tab_id, js)
+ except Exception:
+ pass
+
_interaction_highlights[tab_id] = {
- "x": x,
- "y": y,
- "w": 0,
- "h": 0,
- "label": label,
- "kind": "point",
+ "x": x, "y": y, "w": 0, "h": 0,
+ "label": label, "kind": "point",
}
async def clear_highlight(self, tab_id: int) -> None:
- """Remove the CDP Overlay highlight from the browser."""
+ """Remove the injected highlight from the page."""
try:
- await self._cdp(tab_id, "Overlay.hideHighlight")
+ await self.evaluate(tab_id, """
+ var el = document.getElementById('__hive_hl');
+ if (el) el.remove();
+ """)
except Exception:
pass
_interaction_highlights.pop(tab_id, None)
@@ -1199,6 +1253,20 @@ class BeelineBridge:
},
)
+ # Highlight the select element
+ rect_result = await self.evaluate(
+ tab_id,
+ f"(function(){{const el=document.querySelector("
+ f"{json.dumps(selector)});if(!el)return null;"
+ f"const r=el.getBoundingClientRect();"
+ f"return{{x:r.left,y:r.top,w:r.width,h:r.height}};}})()",
+ )
+ rect = (rect_result or {}).get("result")
+ if rect:
+ await self.highlight_rect(
+ tab_id, rect["x"], rect["y"], rect["w"], rect["h"], label=selector
+ )
+
return {"ok": True, "action": "select", "selector": selector, "selected": values}
# ── Inspection ─────────────────────────────────────────────────────────────
diff --git a/tools/src/gcu/browser/tools/advanced.py b/tools/src/gcu/browser/tools/advanced.py
index 80c4a3e6..d75860ec 100644
--- a/tools/src/gcu/browser/tools/advanced.py
+++ b/tools/src/gcu/browser/tools/advanced.py
@@ -113,6 +113,28 @@ def register_advanced_tools(mcp: FastMCP) -> None:
return {"ok": False, "error": "No active tab"}
try:
+ # Show a brief toast in the browser so the user sees JS executing
+ snippet = script.strip().replace("'", "\\'")[:80]
+ toast_js = f"""
+ (function(){{
+ var old=document.getElementById('__hive_toast');if(old)old.remove();
+ var t=document.createElement('div');t.id='__hive_toast';
+ t.style.cssText='position:fixed;z-index:2147483647;top:12px;right:12px;'
+ +'background:rgba(30,30,30,0.9);color:#a5d6ff;font:12px/18px monospace;'
+ +'padding:8px 14px;border-radius:6px;max-width:420px;pointer-events:none;'
+ +'white-space:pre-wrap;word-break:break-all;transition:opacity 0.4s;opacity:1;'
+ +'border:1px solid rgba(59,130,246,0.4);box-shadow:0 4px 12px rgba(0,0,0,0.3);';
+ t.textContent='\\u25b6 '+'{snippet}';
+ document.documentElement.appendChild(t);
+ setTimeout(function(){{t.style.opacity='0';}},2000);
+ setTimeout(function(){{t.remove();}},2500);
+ }})();
+ """
+ try:
+ await bridge.evaluate(target_tab, toast_js)
+ except Exception:
+ pass
+
result = await bridge.evaluate(target_tab, script)
return result
except Exception as e:
diff --git a/tools/src/gcu/browser/tools/lifecycle.py b/tools/src/gcu/browser/tools/lifecycle.py
index add68502..e39e769a 100644
--- a/tools/src/gcu/browser/tools/lifecycle.py
+++ b/tools/src/gcu/browser/tools/lifecycle.py
@@ -245,6 +245,7 @@ def register_lifecycle_tools(mcp: FastMCP) -> None:
_contexts[profile_name] = {
"groupId": group_id,
"activeTabId": tab_id,
+ "_seedTabId": tab_id, # reused by first browser_open call
}
logger.info(
diff --git a/tools/src/gcu/browser/tools/tabs.py b/tools/src/gcu/browser/tools/tabs.py
index 5555bd45..e487cfbc 100644
--- a/tools/src/gcu/browser/tools/tabs.py
+++ b/tools/src/gcu/browser/tools/tabs.py
@@ -128,9 +128,13 @@ def register_tab_tools(mcp: FastMCP) -> None:
return result
try:
- # Create tab in the group
- result = await bridge.create_tab(url=url, group_id=ctx.get("groupId"))
- tab_id = result.get("tabId")
+ # Reuse the seed about:blank tab from context.create on first open
+ seed_tab = ctx.pop("_seedTabId", None)
+ if seed_tab is not None:
+ tab_id = seed_tab
+ else:
+ result = await bridge.create_tab(url=url, group_id=ctx.get("groupId"))
+ tab_id = result.get("tabId")
# Update active tab if not background
if not background and tab_id is not None: