Merge branch 'feature/hive-experimental-comp-pipeline' into feat/open-hive-colony

This commit is contained in:
Bryan
2026-04-08 11:50:39 -07:00
167 changed files with 4504 additions and 7221 deletions
+2
View File
@@ -70,6 +70,8 @@ tmp/
temp/
exports/*
exports.old*
artifacts/*
.claude/settings.local.json
+17 -65
View File
@@ -1,71 +1,23 @@
"""
Aden Hive Framework: A goal-driven agent runtime optimized for Builder observability.
"""Hive Agent Framework.
The runtime is designed around DECISIONS, not just actions. Every significant
choice the agent makes is captured with:
- What it was trying to do (intent)
- What options it considered
- What it chose and why
- What happened as a result
- Whether that was good or bad (evaluated post-hoc)
This gives the Builder LLM the information it needs to improve agent behavior.
## Testing Framework
The framework includes a Goal-Based Testing system (Goal Agent Eval):
- Generate tests from Goal success_criteria and constraints
- Mandatory user approval before tests are stored
- Parallel test execution with error categorization
- Debug tools with fix suggestions
See `framework.testing` for details.
Core classes:
AgentHost -- hosts agents, manages entry points and pipeline
Orchestrator -- routes between nodes in a graph
AgentLoop -- the LLM + tool execution loop (one per node)
AgentLoader -- loads agent.json from disk, builds pipeline
DecisionTracker -- records decisions for post-hoc analysis
"""
from framework.llm import LLMProvider
try:
from framework.llm import AnthropicProvider # noqa: F401
except ImportError:
pass
from framework.runner import AgentRunner
from framework.runtime.core import Runtime
from framework.schemas.decision import Decision, DecisionEvaluation, Option, Outcome
from framework.schemas.run import Problem, Run, RunSummary
# Testing framework
from framework.testing import (
ApprovalStatus,
DebugTool,
ErrorCategory,
Test,
TestResult,
TestStorage,
TestSuiteResult,
)
from framework.agent_loop import AgentLoop
from framework.host import AgentHost
from framework.loader import AgentLoader
from framework.orchestrator import Orchestrator
from framework.tracker import DecisionTracker
__all__ = [
# Schemas
"Decision",
"Option",
"Outcome",
"DecisionEvaluation",
"Run",
"RunSummary",
"Problem",
# Runtime
"Runtime",
# LLM
"LLMProvider",
"AnthropicProvider",
# Runner
"AgentRunner",
# Testing
"Test",
"TestResult",
"TestSuiteResult",
"TestStorage",
"ApprovalStatus",
"ErrorCategory",
"DebugTool",
"AgentHost",
"AgentLoader",
"AgentLoop",
"DecisionTracker",
"Orchestrator",
]
+32
View File
@@ -0,0 +1,32 @@
"""Agent loop -- the core agent execution primitive."""
from framework.agent_loop.conversation import ( # noqa: F401
ConversationStore,
Message,
NodeConversation,
)
# Lazy import to avoid circular dependency with graph/event_loop/
# (graph/event_loop/* imports framework.graph.conversation which is a shim
# pointing here, which would trigger agent_loop.py loading, which imports
# graph/event_loop/* again)
def __getattr__(name: str):
if name in ("AgentLoop", "JudgeProtocol", "JudgeVerdict", "LoopConfig", "OutputAccumulator"):
from framework.agent_loop.agent_loop import (
AgentLoop,
JudgeProtocol,
JudgeVerdict,
LoopConfig,
OutputAccumulator,
)
_exports = {
"AgentLoop": AgentLoop,
"JudgeProtocol": JudgeProtocol,
"JudgeVerdict": JudgeVerdict,
"LoopConfig": LoopConfig,
"OutputAccumulator": OutputAccumulator,
}
return _exports[name]
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
@@ -21,16 +21,16 @@ from collections.abc import Awaitable, Callable
from datetime import UTC, datetime
from typing import Any
from framework.graph.conversation import ConversationStore, NodeConversation
from framework.graph.event_loop import types as event_loop_types
from framework.graph.event_loop.compaction import (
from framework.agent_loop.conversation import ConversationStore, NodeConversation
from framework.agent_loop.internals import types as event_loop_types
from framework.agent_loop.internals.compaction import (
build_emergency_summary,
build_llm_compaction_prompt,
compact,
format_messages_for_summary,
llm_compact,
)
from framework.graph.event_loop.cursor_persistence import (
from framework.agent_loop.internals.cursor_persistence import (
RestoredState,
check_pause,
drain_injection_queue,
@@ -38,7 +38,7 @@ from framework.graph.event_loop.cursor_persistence import (
restore,
write_cursor,
)
from framework.graph.event_loop.event_publishing import (
from framework.agent_loop.internals.event_publishing import (
generate_action_plan,
log_skip_judge,
publish_context_usage,
@@ -54,27 +54,24 @@ from framework.graph.event_loop.event_publishing import (
publish_tool_started,
run_hooks,
)
from framework.graph.event_loop.judge_pipeline import (
from framework.agent_loop.internals.judge_pipeline import (
SubagentJudge as SharedSubagentJudge,
judge_turn,
)
from framework.graph.event_loop.stall_detector import (
from framework.agent_loop.internals.stall_detector import (
fingerprint_tool_calls,
is_stalled,
is_tool_doom_loop,
ngram_similarity,
)
from framework.graph.event_loop.subagent_executor import execute_subagent
from framework.graph.event_loop.synthetic_tools import (
from framework.agent_loop.internals.synthetic_tools import (
build_ask_user_multiple_tool,
build_ask_user_tool,
build_delegate_tool,
build_escalate_tool,
build_report_to_parent_tool,
build_set_output_tool,
handle_set_output,
)
from framework.graph.event_loop.tool_result_handler import (
from framework.agent_loop.internals.tool_result_handler import (
build_json_preview,
execute_tool,
extract_json_metadata,
@@ -82,12 +79,12 @@ from framework.graph.event_loop.tool_result_handler import (
restore_spill_counter,
truncate_tool_result,
)
from framework.graph.event_loop.types import (
from framework.agent_loop.internals.types import (
JudgeProtocol,
JudgeVerdict,
TriggerEvent,
)
from framework.graph.node import NodeContext, NodeProtocol, NodeResult
from framework.orchestrator.node import NodeContext, NodeProtocol, NodeResult
from framework.llm.capabilities import supports_image_tool_results
from framework.llm.provider import Tool, ToolResult, ToolUse
from framework.llm.stream_events import (
@@ -96,8 +93,8 @@ from framework.llm.stream_events import (
TextDeltaEvent,
ToolCallEvent,
)
from framework.runtime.event_bus import EventBus
from framework.runtime.llm_debug_logger import log_llm_turn
from framework.host.event_bus import EventBus
from framework.tracker.llm_debug_logger import log_llm_turn
logger = logging.getLogger(__name__)
@@ -163,43 +160,9 @@ def _is_context_too_large_error(exc: BaseException) -> bool:
# ---------------------------------------------------------------------------
# Escalation receiver (temporary routing target for subagent → user input)
# ---------------------------------------------------------------------------
class _EscalationReceiver:
"""Temporary receiver registered in node_registry for subagent escalation routing.
When a subagent calls ``report_to_parent(wait_for_response=True)``, the callback
creates one of these, registers it under a unique escalation ID in the executor's
``node_registry``, and awaits ``wait()``. The TUI / runner calls
``inject_input(escalation_id, content)`` which the ``ExecutionStream`` routes here
via ``inject_event()`` matching the same ``hasattr(node, "inject_event")`` check
used for regular ``EventLoopNode`` instances.
"""
def __init__(self) -> None:
self._event = asyncio.Event()
self._response: str | None = None
self._awaiting_input = True # So inject_message() can prefer us
async def inject_event(
self,
content: str,
*,
is_client_input: bool = False,
image_content: list[dict] | None = None,
) -> None:
"""Called by ExecutionStream.inject_input() when the user responds."""
self._response = content
self._event.set()
async def wait(self) -> str | None:
"""Block until inject_event() delivers the user's response."""
await self._event.wait()
return self._response
# ---------------------------------------------------------------------------
# Judge protocol (simple 3-action interface for event loop evaluation)
# ---------------------------------------------------------------------------
@@ -224,7 +187,7 @@ OutputAccumulator = event_loop_types.OutputAccumulator
# ---------------------------------------------------------------------------
class EventLoopNode(NodeProtocol):
class AgentLoop(NodeProtocol):
"""Multi-turn LLM streaming loop with tool execution and judge evaluation.
Lifecycle:
@@ -284,9 +247,6 @@ class EventLoopNode(NodeProtocol):
# Monotonic counter for spillover file naming (web_search_1.txt, etc.)
self._spill_counter: int = 0
# Subagent mark_complete: when True, _evaluate returns ACCEPT immediately
self._mark_complete_flag = False
# Counter for subagent instances (1, 2, 3, ...)
self._subagent_instance_counter: dict[str, int] = {}
def validate_input(self, ctx: NodeContext) -> list[str]:
"""Validate hard requirements only.
@@ -307,7 +267,7 @@ class EventLoopNode(NodeProtocol):
async def execute(self, ctx: NodeContext) -> NodeResult:
"""Run the event loop."""
logger.debug(
"[EventLoopNode.execute] Starting execution for node=%s, stream=%s",
"[AgentLoop.execute] Starting execution for node=%s, stream=%s",
ctx.node_id,
ctx.stream_id,
)
@@ -320,7 +280,7 @@ class EventLoopNode(NodeProtocol):
# Store skill dirs for AS-9 file-read interception in _execute_tool
self._skill_dirs: list[str] = ctx.skill_dirs
logger.debug(
"[EventLoopNode.execute] node_id=%s, execution_id=%s, max_iterations=%d",
"[AgentLoop.execute] node_id=%s, execution_id=%s, max_iterations=%d",
node_id,
execution_id,
self._config.max_iterations,
@@ -402,7 +362,7 @@ class EventLoopNode(NodeProtocol):
# execution preamble and node-type preamble. The stored
# prompt may be stale after code changes or when runtime-
# injected context (e.g. worker identity) has changed.
from framework.graph.prompting import build_system_prompt_for_node_context
from framework.orchestrator.prompting import build_system_prompt_for_node_context
_current_prompt = build_system_prompt_for_node_context(ctx)
if conversation.system_prompt != _current_prompt:
@@ -425,7 +385,7 @@ class EventLoopNode(NodeProtocol):
await self._conversation_store.clear()
# Fresh conversation: either isolated mode or first node in continuous mode.
from framework.graph.prompting import build_system_prompt_for_node_context
from framework.orchestrator.prompting import build_system_prompt_for_node_context
system_prompt = build_system_prompt_for_node_context(ctx)
@@ -484,7 +444,7 @@ class EventLoopNode(NodeProtocol):
# 2a. Guard: ensure at least one non-system message exists.
# A restored conversation may have 0 messages if phase_id filtering
# removes them all, or if a prior run stored metadata without messages
# (e.g. subagent that failed before the first LLM call).
# (e.g. node that failed before the first LLM call).
if conversation.message_count == 0:
initial_message = self._build_initial_message(ctx)
if initial_message:
@@ -502,37 +462,10 @@ class EventLoopNode(NodeProtocol):
tools.append(self._build_ask_user_tool())
if stream_id == "queen":
tools.append(self._build_ask_user_multiple_tool())
# Workers/subagents can escalate blockers to the queen.
# Workers can escalate blockers to the queen.
if stream_id not in ("queen", "judge"):
tools.append(self._build_escalate_tool())
# Add delegate_to_sub_agent tool if:
# - Node has sub_agents defined
# - We are NOT in subagent mode (prevents nested delegation)
if not ctx.is_subagent_mode:
sub_agents = getattr(ctx.node_spec, "sub_agents", None) or []
if sub_agents:
delegate_tool = self._build_delegate_tool(sub_agents, ctx.node_registry)
if delegate_tool:
tools.append(delegate_tool)
logger.info(
"[%s] delegate_to_sub_agent injected (sub_agents=%s)",
node_id,
sub_agents,
)
else:
logger.error(
"[%s] _build_delegate_tool returned None for sub_agents=%s",
node_id,
sub_agents,
)
else:
logger.debug("[%s] Skipped delegate tool (is_subagent_mode=True)", node_id)
# Add report_to_parent tool for sub-agents with a report callback
if ctx.is_subagent_mode and ctx.report_callback is not None:
tools.append(self._build_report_to_parent_tool())
logger.info(
"[%s] Tools available (%d): %s | direct_user_io=%s | judge=%s",
node_id,
@@ -565,11 +498,11 @@ class EventLoopNode(NodeProtocol):
# 6. Main loop
logger.debug(
"[EventLoopNode.execute] Entering main loop, start_iteration=%d", start_iteration
"[AgentLoop.execute] Entering main loop, start_iteration=%d", start_iteration
)
for iteration in range(start_iteration, self._config.max_iterations):
iter_start = time.time()
logger.debug("[EventLoopNode.execute] iteration=%d starting", iteration)
logger.debug("[AgentLoop.execute] iteration=%d starting", iteration)
# 6a. Check pause (no current-iteration data yet — only log_node_complete needed)
if await self._check_pause(ctx, conversation, iteration):
@@ -601,18 +534,18 @@ class EventLoopNode(NodeProtocol):
# 6b. Drain injection queue
logger.debug(
"[EventLoopNode.execute] iteration=%d: draining injection queue...", iteration
"[AgentLoop.execute] iteration=%d: draining injection queue...", iteration
)
drained_injections = await self._drain_injection_queue(conversation, ctx)
logger.debug(
"[EventLoopNode.execute] iteration=%d: drained %d injections",
"[AgentLoop.execute] iteration=%d: drained %d injections",
iteration,
drained_injections,
)
# 6b1. Drain trigger queue (framework-level signals)
drained_triggers = await self._drain_trigger_queue(conversation)
logger.debug(
"[EventLoopNode.execute] iteration=%d: drained %d triggers",
"[AgentLoop.execute] iteration=%d: drained %d triggers",
iteration,
drained_triggers,
)
@@ -685,8 +618,6 @@ class EventLoopNode(NodeProtocol):
"ask_user",
"ask_user_multiple",
"escalate",
"delegate_to_sub_agent",
"report_to_parent",
}
synthetic = [t for t in tools if t.name in _synthetic_names]
tools.clear()
@@ -696,11 +627,11 @@ class EventLoopNode(NodeProtocol):
# 6b3. Dynamic prompt refresh (phase switching / memory refresh)
if ctx.dynamic_prompt_provider is not None or ctx.dynamic_memory_provider is not None:
if ctx.dynamic_prompt_provider is not None:
from framework.graph.prompting import stamp_prompt_datetime
from framework.orchestrator.prompting import stamp_prompt_datetime
_new_prompt = stamp_prompt_datetime(ctx.dynamic_prompt_provider())
else:
from framework.graph.prompting import build_system_prompt_for_node_context
from framework.orchestrator.prompting import build_system_prompt_for_node_context
_new_prompt = build_system_prompt_for_node_context(ctx)
if _new_prompt != conversation.system_prompt:
@@ -743,7 +674,7 @@ class EventLoopNode(NodeProtocol):
len(conversation.messages),
)
logger.debug(
"[EventLoopNode.execute] iteration=%d: entering _run_single_turn loop", iteration
"[AgentLoop.execute] iteration=%d: entering _run_single_turn loop", iteration
)
_stream_retry_count = 0
_turn_cancelled = False
@@ -752,7 +683,7 @@ class EventLoopNode(NodeProtocol):
while True:
try:
logger.debug(
"[EventLoopNode.execute] iteration=%d: calling _run_single_turn (retry=%d)",
"[AgentLoop.execute] iteration=%d: calling _run_single_turn (retry=%d)",
iteration,
_stream_retry_count,
)
@@ -768,12 +699,12 @@ class EventLoopNode(NodeProtocol):
queen_input_requested,
request_system_prompt,
request_messages,
reported_to_parent,
_,
) = await self._run_single_turn(
ctx, conversation, tools, iteration, accumulator
)
logger.debug(
"[EventLoopNode.execute] iteration=%d:"
"[AgentLoop.execute] iteration=%d:"
" _run_single_turn completed successfully",
iteration,
)
@@ -842,13 +773,13 @@ class EventLoopNode(NodeProtocol):
break # success — exit retry loop
except TurnCancelled:
logger.debug("[EventLoopNode.execute] iteration=%d: TurnCancelled", iteration)
logger.debug("[AgentLoop.execute] iteration=%d: TurnCancelled", iteration)
_turn_cancelled = True
break
except Exception as e:
logger.debug(
"[EventLoopNode.execute] iteration=%d:"
"[AgentLoop.execute] iteration=%d:"
" Exception in _run_single_turn: %s (%s)",
iteration,
type(e).__name__,
@@ -1024,7 +955,7 @@ class EventLoopNode(NodeProtocol):
and not outputs_set
and not user_input_requested
and not queen_input_requested
and not reported_to_parent
)
if truly_empty and accumulator is not None:
missing = self._get_missing_output_keys(
@@ -1276,14 +1207,14 @@ class EventLoopNode(NodeProtocol):
# blocking and resumption.
_is_worker = (
stream_id not in ("queen", "judge")
and not ctx.is_subagent_mode
and not False
and not ctx.supports_direct_user_io
and self._event_bus is not None
)
_worker_no_tool_turn = (
not real_tool_results
and not outputs_set
and not reported_to_parent
and not queen_input_requested
and not user_input_requested
)
@@ -1733,7 +1664,7 @@ class EventLoopNode(NodeProtocol):
# 6i. Judge evaluation
should_judge = (
ctx.is_subagent_mode # Always evaluate subagents
False
or (iteration + 1) % self._config.judge_every_n_turns == 0
or not real_tool_results # no real tool calls = natural stop
)
@@ -1789,7 +1720,7 @@ class EventLoopNode(NodeProtocol):
missing = self._get_missing_output_keys(
accumulator, ctx.node_spec.output_keys, ctx.node_spec.nullable_output_keys
)
if missing and self._judge is not None and not self._mark_complete_flag:
if missing and self._judge is not None :
hint = (
f"Task incomplete. Required outputs not yet produced: {missing}. "
f"Follow your system prompt instructions to complete the work."
@@ -1988,7 +1919,7 @@ class EventLoopNode(NodeProtocol):
image_content: Optional list of OpenAI-style image blocks to attach.
"""
logger.debug(
"[EventLoopNode.inject_event] content_len=%d,"
"[AgentLoop.inject_event] content_len=%d,"
" is_client_input=%s, has_images=%s,"
" queue_size_before=%d",
len(content) if content else 0,
@@ -1998,15 +1929,15 @@ class EventLoopNode(NodeProtocol):
)
try:
await self._injection_queue.put((content, is_client_input, image_content))
logger.debug("[EventLoopNode.inject_event] Message queued successfully")
logger.debug("[AgentLoop.inject_event] Message queued successfully")
except Exception as e:
logger.exception("[EventLoopNode.inject_event] Failed to queue message: %s", e)
logger.exception("[AgentLoop.inject_event] Failed to queue message: %s", e)
raise
try:
self._input_ready.set()
logger.debug("[EventLoopNode.inject_event] _input_ready.set() called")
logger.debug("[AgentLoop.inject_event] _input_ready.set() called")
except Exception as e:
logger.exception("[EventLoopNode.inject_event] Failed to set _input_ready: %s", e)
logger.exception("[AgentLoop.inject_event] Failed to set _input_ready: %s", e)
raise
async def inject_trigger(self, trigger: TriggerEvent) -> None:
@@ -2157,7 +2088,6 @@ class EventLoopNode(NodeProtocol):
ask_user_prompt = ""
ask_user_options: list[str] | None = None
queen_input_requested = False
reported_to_parent = False
# Accumulate ALL tool calls across inner iterations for L3 logging.
# Unlike real_tool_results (reset each inner iteration), this persists.
logged_tool_calls: list[dict] = []
@@ -2231,16 +2161,28 @@ class EventLoopNode(NodeProtocol):
):
if isinstance(event, TextDeltaEvent):
accumulated_text = event.snapshot
await self._publish_text_delta(
stream_id,
node_id,
event.content,
event.snapshot,
ctx,
execution_id,
iteration=iteration,
inner_turn=inner_turn,
)
# Filter <think>...</think> blocks from client output.
# Content inside think tags is internal reasoning -- only
# the text after </think> is shown to the user.
_content = event.content
if "<think>" in event.snapshot and "</think>" not in event.snapshot:
_content = "" # still inside think block
elif "</think>" in _content:
# End of think block -- emit only text after the tag
_content = _content.split("</think>", 1)[-1]
elif "<think>" in _content:
_content = "" # opening tag in this chunk
if _content:
await self._publish_text_delta(
stream_id,
node_id,
_content,
event.snapshot,
ctx,
execution_id,
iteration=iteration,
inner_turn=inner_turn,
)
elif isinstance(event, ToolCallEvent):
_tc.append(event)
@@ -2348,10 +2290,27 @@ class EventLoopNode(NodeProtocol):
queen_input_requested,
final_system_prompt,
final_messages,
reported_to_parent,
False,
)
# Execute tool calls — framework tools (set_output, ask_user)
# Priority drain: if user sent a message while the LLM was
# streaming, inject it into the conversation NOW -- before tool
# execution. The LLM will see it on the next inner turn.
if not self._injection_queue.empty():
while not self._injection_queue.empty():
_inj_content, _inj_client, _inj_images = (
self._injection_queue.get_nowait()
)
if _inj_client:
await conversation.add_user_message(_inj_content)
logger.info(
"[%s] Priority-injected user message mid-turn (%d chars)",
node_id, len(_inj_content),
)
else:
await conversation.add_user_message(_inj_content)
# Execute tool calls -- framework tools (set_output, ask_user)
# run inline; real MCP tools run in parallel.
real_tool_results: list[dict] = []
limit_hit = False
@@ -2361,13 +2320,12 @@ class EventLoopNode(NodeProtocol):
)
# Phase 1: triage — handle framework tools immediately,
# queue real tools and subagents for parallel execution.
# queue real tools for parallel execution.
results_by_id: dict[str, ToolResult] = {}
timing_by_id: dict[
str, dict[str, Any]
] = {} # tool_use_id -> {start_timestamp, duration_s}
pending_real: list[ToolCallEvent] = []
pending_subagent: list[ToolCallEvent] = []
for tc in tool_calls:
tool_call_count += 1
@@ -2610,76 +2568,6 @@ class EventLoopNode(NodeProtocol):
)
results_by_id[tc.tool_use_id] = result
elif tc.tool_name == "delegate_to_sub_agent":
# Guard: in continuous mode the LLM may see delegate
# calls from a previous node's conversation history and
# attempt to re-use the tool on a node that doesn't own
# it. Only accept if the tool was actually offered.
if not any(t.name == "delegate_to_sub_agent" for t in tools):
logger.warning(
"[%s] LLM called delegate_to_sub_agent but tool "
"was not offered to this node — rejecting",
node_id,
)
result = ToolResult(
tool_use_id=tc.tool_use_id,
content=(
"ERROR: delegate_to_sub_agent is not available "
"on this node. This tool belongs to a different "
"node in the workflow."
),
is_error=True,
)
results_by_id[tc.tool_use_id] = result
continue
# --- Framework-level subagent delegation ---
# Queue for parallel execution in Phase 2
logger.info(
"🔄 LLM requesting subagent delegation: agent_id='%s', task='%s'",
tc.tool_input.get("agent_id", "?"),
(tc.tool_input.get("task", "")[:100] + "...")
if len(tc.tool_input.get("task", "")) > 100
else tc.tool_input.get("task", ""),
)
pending_subagent.append(tc)
elif tc.tool_name == "report_to_parent":
# --- Report from sub-agent to parent (optionally blocking) ---
reported_to_parent = True
msg = tc.tool_input.get("message", "")
data = tc.tool_input.get("data")
wait = tc.tool_input.get("wait_for_response", False)
mark_complete = tc.tool_input.get("mark_complete", False)
response = None
if ctx.report_callback:
try:
response = await ctx.report_callback(
msg,
data,
wait_for_response=wait,
)
except Exception:
logger.warning(
"[%s] report_to_parent callback failed (swallowed)",
node_id,
exc_info=True,
)
if mark_complete:
self._mark_complete_flag = True
logger.info(
"[%s] mark_complete=True — subagent will accept on this iteration",
node_id,
)
result = ToolResult(
tool_use_id=tc.tool_use_id,
content=response if (wait and response) else "Report sent to parent.",
is_error=False,
)
results_by_id[tc.tool_use_id] = result
else:
# --- Real tool: check for truncated args, else queue ---
if "_raw" in tc.tool_input:
@@ -2754,175 +2642,6 @@ class EventLoopNode(NodeProtocol):
result = raw
results_by_id[tc.tool_use_id] = self._truncate_tool_result(result, tc.tool_name)
# Phase 2b: execute subagent delegations in parallel.
if pending_subagent:
_subagent_timeout = self._config.subagent_timeout_seconds
_inactivity_timeout = self._config.subagent_inactivity_timeout_seconds
async def _timed_subagent(
_ctx: NodeContext,
_tc: ToolCallEvent,
_acc: OutputAccumulator = accumulator,
_wall_timeout: float = _subagent_timeout,
_activity_timeout: float = _inactivity_timeout,
) -> tuple[ToolResult | BaseException, str, float]:
_s = time.time()
_iso = datetime.now(UTC).isoformat()
_last_activity = _s
_activity_event = asyncio.Event()
async def _watchdog() -> None:
"""Watchdog that times out only after inactivity period."""
nonlocal _last_activity
while True:
_now = time.time()
_inactive_for = _now - _last_activity
_remaining = _activity_timeout - _inactive_for
if _remaining <= 0:
# Inactivity timeout reached
return
try:
await asyncio.wait_for(_activity_event.wait(), timeout=_remaining)
_activity_event.clear()
except TimeoutError:
# Check again in case activity happened during wait
continue
async def _run_with_activity_timeout(
_coro,
) -> ToolResult:
"""Run subagent with activity-based timeout."""
_watchdog_task = asyncio.create_task(_watchdog())
try:
_result = await _coro
return _result
finally:
_watchdog_task.cancel()
try:
await _watchdog_task
except asyncio.CancelledError:
pass
try:
# Subscribe to subagent activity events to reset inactivity timer
async def _on_subagent_activity(event) -> None:
nonlocal _last_activity
_last_activity = time.time()
_activity_event.set()
_sub_id = None
if self._event_bus and _activity_timeout > 0:
from framework.runtime.event_bus import EventType
_sub_id = self._event_bus.subscribe(
event_types=[
EventType.TOOL_CALL_STARTED,
EventType.LLM_TEXT_DELTA,
EventType.EXECUTION_STARTED,
],
handler=_on_subagent_activity,
)
try:
_coro = self._execute_subagent(
_ctx,
_tc.tool_input.get("agent_id", ""),
_tc.tool_input.get("task", ""),
accumulator=_acc,
)
if _activity_timeout > 0:
# Use activity-based timeout with wall-clock max
_result_coro = _run_with_activity_timeout(_coro)
if _wall_timeout > 0:
_r = await asyncio.wait_for(_result_coro, timeout=_wall_timeout)
else:
_r = await _result_coro
elif _wall_timeout > 0:
_r = await asyncio.wait_for(_coro, timeout=_wall_timeout)
else:
_r = await _coro
finally:
if _sub_id and self._event_bus:
self._event_bus.unsubscribe(_sub_id)
except TimeoutError:
_agent_id = _tc.tool_input.get("agent_id", "unknown")
_elapsed = time.time() - _s
logger.warning(
"Subagent '%s' timed out after %.0fs (inactivity threshold: %.0fs)",
_agent_id,
_elapsed,
_activity_timeout if _activity_timeout > 0 else _wall_timeout,
)
_r = ToolResult(
tool_use_id=_tc.tool_use_id,
content=(
f"Subagent '{_agent_id}' timed out after "
f"{_elapsed:.0f}s of inactivity. "
"The subagent was not making progress. "
"Try a simpler task or break it into smaller pieces."
),
is_error=True,
)
except BaseException as _exc:
_r = _exc
_dur = round(time.time() - _s, 3)
return _r, _iso, _dur
subagent_timed = await asyncio.gather(
*(_timed_subagent(ctx, tc) for tc in pending_subagent),
return_exceptions=True,
)
for tc, entry in zip(pending_subagent, subagent_timed, strict=True):
if isinstance(entry, BaseException):
raw = entry
_start_iso = datetime.now(UTC).isoformat()
_dur_s = 0
else:
raw, _start_iso, _dur_s = entry
_sa_timing = {
"start_timestamp": _start_iso,
"duration_s": _dur_s,
}
if isinstance(raw, BaseException):
result = ToolResult(
tool_use_id=tc.tool_use_id,
content=json.dumps(
{
"message": f"Sub-agent execution raised: {raw}",
"data": None,
"metadata": {"success": False, "error": str(raw)},
}
),
is_error=True,
)
else:
# Attach the tool_use_id to the result
result = ToolResult(
tool_use_id=tc.tool_use_id,
content=raw.content,
is_error=raw.is_error,
)
# Route through _truncate_tool_result so large
# subagent results are saved to spillover files
# and survive pruning (instead of being "cleared
# from context" with no recovery path).
result = self._truncate_tool_result(result, "delegate_to_sub_agent")
results_by_id[tc.tool_use_id] = result
logged_tool_calls.append(
{
"tool_use_id": tc.tool_use_id,
"tool_name": "delegate_to_sub_agent",
"tool_input": tc.tool_input,
"content": result.content,
"is_error": result.is_error,
**_sa_timing,
}
)
# Phase 3: record results into conversation in original order,
# build logged/real lists, and publish completed events.
for tc in tool_calls[:executed_in_batch]:
@@ -2936,8 +2655,6 @@ class EventLoopNode(NodeProtocol):
"ask_user",
"ask_user_multiple",
"escalate",
"delegate_to_sub_agent",
"report_to_parent",
):
tool_entry = {
"tool_use_id": tc.tool_use_id,
@@ -3056,7 +2773,7 @@ class EventLoopNode(NodeProtocol):
queen_input_requested,
final_system_prompt,
final_messages,
reported_to_parent,
False,
)
# --- Mid-turn pruning: prevent context blowup within a single turn ---
@@ -3090,7 +2807,7 @@ class EventLoopNode(NodeProtocol):
queen_input_requested,
final_system_prompt,
final_messages,
reported_to_parent,
False,
)
# Tool calls processed -- loop back to stream with updated conversation
@@ -3118,16 +2835,6 @@ class EventLoopNode(NodeProtocol):
"""Build the synthetic escalate tool. Delegates to synthetic_tools module."""
return build_escalate_tool()
def _build_delegate_tool(
self, sub_agents: list[str], node_registry: dict[str, Any]
) -> Tool | None:
"""Build the synthetic delegate_to_sub_agent tool. Delegates to synthetic_tools module."""
return build_delegate_tool(sub_agents, node_registry)
def _build_report_to_parent_tool(self) -> Tool:
"""Build the synthetic report_to_parent tool. Delegates to synthetic_tools module."""
return build_report_to_parent_tool()
def _handle_set_output(
self,
tool_input: dict[str, Any],
@@ -3151,7 +2858,7 @@ class EventLoopNode(NodeProtocol):
) -> JudgeVerdict:
"""Evaluate the current state. Delegates to judge_pipeline module."""
return await judge_turn(
mark_complete_flag=self._mark_complete_flag,
mark_complete_flag=False,
judge=self._judge,
ctx=ctx,
conversation=conversation,
@@ -3176,7 +2883,7 @@ class EventLoopNode(NodeProtocol):
Delegates to :func:`extract_tool_call_history` in conversation.py.
"""
from framework.graph.conversation import extract_tool_call_history
from framework.agent_loop.conversation import extract_tool_call_history
return extract_tool_call_history(conversation.messages, max_entries=max_entries)
@@ -3781,46 +3488,3 @@ class EventLoopNode(NodeProtocol):
# Subagent Execution
# -------------------------------------------------------------------
async def _execute_subagent(
self,
ctx: NodeContext,
agent_id: str,
task: str,
*,
accumulator: OutputAccumulator | None = None,
) -> ToolResult:
"""Execute a subagent and return the result as a ToolResult.
The subagent:
- Gets a fresh conversation with just the task
- Has read-only access to the parent's readable data buffer
- Cannot delegate to its own subagents (prevents recursion)
- Returns its output in structured JSON format
Args:
ctx: Parent node's context (for data buffer, tools, LLM access).
agent_id: The node ID of the subagent to invoke.
task: The task description to give the subagent.
accumulator: Parent's OutputAccumulator — provides outputs that
have been set via ``set_output`` but not yet written to
data buffer (which only happens after the node completes).
Returns:
ToolResult with structured JSON output containing:
- message: Human-readable summary
- data: Subagent's output (free-form JSON)
- metadata: Execution metadata (success, tokens, latency)
"""
return await execute_subagent(
ctx=ctx,
agent_id=agent_id,
task=task,
accumulator=accumulator,
event_bus=self._event_bus,
config=self._config,
tool_executor=self._tool_executor,
conversation_store=self._conversation_store,
subagent_instance_counter=self._subagent_instance_counter,
event_loop_node_cls=type(self),
escalation_receiver_cls=_EscalationReceiver,
)
@@ -324,7 +324,7 @@ def _try_extract_key(content: str, key: str) -> str | None:
3. Colon format: ``key: value``.
4. Equals format: ``key = value``.
"""
from framework.graph.node import find_json_object
from framework.orchestrator.node import find_json_object
# 1. Whole message is JSON
try:
@@ -0,0 +1,7 @@
"""Agent loop internals -- compaction, judge, tools, subagent execution.
Re-exports from legacy locations for the new import path.
"""
from framework.agent_loop.internals.compaction import * # noqa: F401, F403
from framework.agent_loop.internals.synthetic_tools import * # noqa: F401, F403
@@ -19,11 +19,11 @@ from datetime import UTC, datetime
from pathlib import Path
from typing import Any
from framework.graph.conversation import Message, NodeConversation
from framework.graph.event_loop.event_publishing import publish_context_usage
from framework.graph.event_loop.types import LoopConfig, OutputAccumulator
from framework.graph.node import NodeContext
from framework.runtime.event_bus import EventBus
from framework.agent_loop.conversation import Message, NodeConversation
from framework.agent_loop.internals.event_publishing import publish_context_usage
from framework.agent_loop.internals.types import LoopConfig, OutputAccumulator
from framework.orchestrator.node import NodeContext
from framework.host.event_bus import EventBus
logger = logging.getLogger(__name__)
@@ -368,8 +368,8 @@ async def llm_compact(
in half and each half is summarised independently. Tool history is
appended once at the top-level call (``_depth == 0``).
"""
from framework.graph.conversation import extract_tool_call_history
from framework.graph.event_loop.tool_result_handler import is_context_too_large_error
from framework.agent_loop.conversation import extract_tool_call_history
from framework.agent_loop.internals.tool_result_handler import is_context_too_large_error
if _depth > max_depth:
raise RuntimeError(f"LLM compaction recursion limit ({max_depth})")
@@ -724,7 +724,7 @@ async def log_compaction(
)
if event_bus:
from framework.runtime.event_bus import AgentEvent, EventType
from framework.host.event_bus import AgentEvent, EventType
event_data: dict[str, Any] = {
"level": level,
@@ -861,6 +861,6 @@ def _extract_tool_call_history(conversation: NodeConversation) -> str:
directly (vs. the module-level extract_tool_call_history in conversation.py
which works on raw message lists).
"""
from framework.graph.conversation import extract_tool_call_history
from framework.agent_loop.conversation import extract_tool_call_history
return extract_tool_call_history(list(conversation.messages))
@@ -14,9 +14,9 @@ from collections.abc import Awaitable, Callable
from dataclasses import dataclass
from typing import Any
from framework.graph.conversation import ConversationStore, NodeConversation
from framework.graph.event_loop.types import LoopConfig, OutputAccumulator, TriggerEvent
from framework.graph.node import NodeContext
from framework.agent_loop.conversation import ConversationStore, NodeConversation
from framework.agent_loop.internals.types import LoopConfig, OutputAccumulator, TriggerEvent
from framework.orchestrator.node import NodeContext
from framework.llm.capabilities import supports_image_tool_results
logger = logging.getLogger(__name__)
@@ -9,10 +9,10 @@ from __future__ import annotations
import logging
import time
from framework.graph.conversation import NodeConversation
from framework.graph.event_loop.types import HookContext
from framework.graph.node import NodeContext
from framework.runtime.event_bus import EventBus
from framework.agent_loop.conversation import NodeConversation
from framework.agent_loop.internals.types import HookContext
from framework.orchestrator.node import NodeContext
from framework.host.event_bus import EventBus
logger = logging.getLogger(__name__)
@@ -177,7 +177,7 @@ async def publish_context_usage(
if not event_bus:
return
from framework.runtime.event_bus import AgentEvent, EventType
from framework.host.event_bus import AgentEvent, EventType
estimated = conversation.estimate_tokens()
max_tokens = conversation._max_context_tokens
@@ -5,9 +5,9 @@ from __future__ import annotations
import logging
from collections.abc import Callable
from framework.graph.conversation import NodeConversation
from framework.graph.event_loop.types import JudgeProtocol, JudgeVerdict, OutputAccumulator
from framework.graph.node import NodeContext
from framework.agent_loop.conversation import NodeConversation
from framework.agent_loop.internals.types import JudgeProtocol, JudgeVerdict, OutputAccumulator
from framework.orchestrator.node import NodeContext
logger = logging.getLogger(__name__)
@@ -155,7 +155,7 @@ async def judge_turn(
# Level 2b: conversation-aware quality check (if success_criteria set)
if ctx.node_spec.success_criteria and ctx.llm:
from framework.graph.conversation_judge import evaluate_phase_completion
from framework.orchestrator.conversation_judge import evaluate_phase_completion
verdict = await evaluate_phase_completion(
llm=ctx.llm,
@@ -204,118 +204,6 @@ def build_escalate_tool() -> Tool:
},
)
def build_delegate_tool(sub_agents: list[str], node_registry: dict[str, Any]) -> Tool | None:
"""Build the synthetic delegate_to_sub_agent tool for subagent invocation.
Args:
sub_agents: List of node IDs that can be invoked as subagents.
node_registry: Map of node_id -> NodeSpec for looking up subagent descriptions.
Returns:
Tool definition if sub_agents is non-empty, None otherwise.
"""
if not sub_agents:
return None
agent_descriptions = []
for agent_id in sub_agents:
spec = node_registry.get(agent_id)
if spec:
desc = getattr(spec, "description", "(no description)")
agent_descriptions.append(f"- {agent_id}: {desc}")
else:
agent_descriptions.append(f"- {agent_id}: (not found in registry)")
return Tool(
name="delegate_to_sub_agent",
description=(
"Delegate a task to a specialized sub-agent. The sub-agent runs "
"autonomously with read-only access to current memory and returns "
"its result. Use this to parallelize work or leverage specialized capabilities.\n\n"
"Available sub-agents:\n" + "\n".join(agent_descriptions)
),
parameters={
"type": "object",
"properties": {
"agent_id": {
"type": "string",
"description": f"The sub-agent to invoke. Must be one of: {sub_agents}",
"enum": sub_agents,
},
"task": {
"type": "string",
"description": (
"The task description for the sub-agent to execute. "
"Be specific about what you want the sub-agent to do and "
"what information to return."
),
},
},
"required": ["agent_id", "task"],
},
)
def build_report_to_parent_tool() -> Tool:
"""Build the synthetic report_to_parent tool for sub-agent progress reports.
Sub-agents call this to send one-way progress updates, partial findings,
or status reports to the parent node (and external observers via event bus)
without blocking execution.
When ``wait_for_response`` is True, the sub-agent blocks until the parent
relays the user's response — used for escalation (e.g. login pages, CAPTCHAs).
When ``mark_complete`` is True, the sub-agent terminates immediately after
sending the report no need to call set_output for each output key.
"""
return Tool(
name="report_to_parent",
description=(
"Send a report to the parent agent. By default this is fire-and-forget: "
"the parent receives the report but does not respond. "
"Set wait_for_response=true to BLOCK until the user replies — use this "
"when you need human intervention (e.g. login pages, CAPTCHAs, "
"authentication walls). The user's response is returned as the tool result. "
"Set mark_complete=true to finish your task and terminate immediately "
"after sending the report — use this when your findings are in the "
"message/data fields and you don't need to call set_output."
),
parameters={
"type": "object",
"properties": {
"message": {
"type": "string",
"description": "A human-readable status or progress message.",
},
"data": {
"type": "object",
"description": "Optional structured data to include with the report.",
},
"wait_for_response": {
"type": "boolean",
"description": (
"If true, block execution until the user responds. "
"Use for escalation scenarios requiring human intervention."
),
"default": False,
},
"mark_complete": {
"type": "boolean",
"description": (
"If true, terminate the sub-agent immediately after sending "
"this report. The report message and data are delivered to the "
"parent as the final result. No set_output calls are needed."
),
"default": False,
},
},
"required": ["message"],
},
)
def handle_set_output(
tool_input: dict[str, Any],
output_keys: list[str] | None,
@@ -9,7 +9,7 @@ from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Literal, Protocol, runtime_checkable
from framework.graph.conversation import (
from framework.agent_loop.conversation import (
ConversationStore,
)
@@ -68,7 +68,7 @@ class LoopConfig:
max_output_value_chars: int = 2_000
# Stream retry.
max_stream_retries: int = 3
max_stream_retries: int = 5
stream_retry_backoff_base: float = 2.0
stream_retry_max_delay: float = 60.0
+9 -1
View File
@@ -8,6 +8,14 @@ FRAMEWORK_AGENTS_DIR = Path(__file__).parent
def list_framework_agents() -> list[Path]:
"""List all framework agent directories."""
return sorted(
[p for p in FRAMEWORK_AGENTS_DIR.iterdir() if p.is_dir() and (p / "agent.py").exists()],
[
p
for p in FRAMEWORK_AGENTS_DIR.iterdir()
if p.is_dir()
and (
(p / "agent.json").exists()
or (p / "agent.py").exists()
)
],
key=lambda p: p.name,
)
@@ -21,15 +21,15 @@ from pathlib import Path
from typing import TYPE_CHECKING
from framework.config import get_max_context_tokens
from framework.graph import Goal, NodeSpec, SuccessCriterion
from framework.graph.checkpoint_config import CheckpointConfig
from framework.graph.edge import GraphSpec
from framework.graph.executor import ExecutionResult
from framework.orchestrator import Goal, NodeSpec, SuccessCriterion
from framework.orchestrator.checkpoint_config import CheckpointConfig
from framework.orchestrator.edge import GraphSpec
from framework.orchestrator.orchestrator import ExecutionResult
from framework.llm import LiteLLMProvider
from framework.runner.mcp_registry import MCPRegistry
from framework.runner.tool_registry import ToolRegistry
from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
from framework.runtime.execution_stream import EntryPointSpec
from framework.loader.mcp_registry import MCPRegistry
from framework.loader.tool_registry import ToolRegistry
from framework.host.agent_host import AgentHost
from framework.host.execution_manager import EntryPointSpec
from .config import default_config
from .nodes import build_tester_node
@@ -37,7 +37,7 @@ from .nodes import build_tester_node
logger = logging.getLogger(__name__)
if TYPE_CHECKING:
from framework.runner import AgentRunner
from framework.loader import AgentLoader
logger = logging.getLogger(__name__)
@@ -233,7 +233,7 @@ requires_account_selection = True
"""Signal TUI to show account picker before starting the agent."""
def configure_for_account(runner: AgentRunner, account: dict) -> None:
def configure_for_account(runner: AgentLoader, account: dict) -> None:
"""Scope the tester node's tools to the selected provider.
Handles both Aden accounts (account= routing) and local accounts
@@ -325,7 +325,7 @@ def _activate_local_account(credential_id: str, alias: str) -> None:
def _configure_aden_node(
runner: AgentRunner,
runner: AgentLoader,
provider: str,
alias: str,
detail: str,
@@ -368,7 +368,7 @@ or any other identifier — always use the alias exactly as shown.
def _configure_local_node(
runner: AgentRunner,
runner: AgentLoader,
provider: str,
alias: str,
identity: dict,
@@ -497,7 +497,7 @@ class CredentialTesterAgent:
def __init__(self, config=None):
self.config = config or default_config
self._selected_account: dict | None = None
self._agent_runtime: AgentRuntime | None = None
self._agent_runtime: AgentHost | None = None
self._tool_registry: ToolRegistry | None = None
self._storage_path: Path | None = None
@@ -613,7 +613,7 @@ class CredentialTesterAgent:
graph = self._build_graph()
self._agent_runtime = create_agent_runtime(
self._agent_runtime = AgentHost(
graph=graph,
goal=goal,
storage_path=self._storage_path,
@@ -1,6 +1,6 @@
"""Node definitions for Credential Tester agent."""
from framework.graph import NodeSpec
from framework.orchestrator import NodeSpec
def build_tester_node(
+54 -29
View File
@@ -27,8 +27,8 @@ def _get_last_active(agent_path: Path) -> str | None:
"""Return the most recent updated_at timestamp across all sessions.
Checks both worker sessions (``~/.hive/agents/{name}/sessions/``) and
queen sessions (``~/.hive/queen/session/``) whose ``meta.json`` references
the same *agent_path*.
queen sessions (``~/.hive/agents/queens/default/sessions/``) whose
``meta.json`` references the same *agent_path*.
"""
from datetime import datetime
@@ -53,7 +53,9 @@ def _get_last_active(agent_path: Path) -> str | None:
continue
# 2. Queen sessions
queen_sessions_dir = Path.home() / ".hive" / "queen" / "session"
from framework.config import QUEENS_DIR
queen_sessions_dir = QUEENS_DIR / "default" / "sessions"
if queen_sessions_dir.exists():
resolved = agent_path.resolve()
for d in queen_sessions_dir.iterdir():
@@ -112,13 +114,33 @@ def _count_runs(agent_name: str) -> int:
def _extract_agent_stats(agent_path: Path) -> tuple[int, int, list[str]]:
"""Extract node count, tool count, and tags from an agent directory.
Prefers agent.py (AST-parsed) over agent.json for node/tool counts
since agent.json may be stale. Tags are only available from agent.json.
Checks agent.json (declarative) first, then agent.py (legacy).
"""
import ast
node_count, tool_count, tags = 0, 0, []
# Declarative JSON agents (preferred)
agent_json = agent_path / "agent.json"
if agent_json.exists():
try:
data = json.loads(agent_json.read_text(encoding="utf-8"))
if isinstance(data, dict):
json_nodes = data.get("nodes", [])
node_count = len(json_nodes)
tools: set[str] = set()
for n in json_nodes:
node_tools = n.get("tools", {})
if isinstance(node_tools, dict):
tools.update(node_tools.get("allowed", []))
elif isinstance(node_tools, list):
tools.update(node_tools)
tool_count = len(tools)
return node_count, tool_count, tags
except Exception:
pass
# Legacy: agent.py (AST-parsed)
agent_py = agent_path / "agent.py"
if agent_py.exists():
try:
@@ -132,39 +154,31 @@ def _extract_agent_stats(agent_path: Path) -> tuple[int, int, list[str]]:
except Exception:
pass
agent_json = agent_path / "agent.json"
if agent_json.exists():
try:
data = json.loads(agent_json.read_text(encoding="utf-8"))
json_nodes = data.get("graph", {}).get("nodes", []) or data.get("nodes", [])
if node_count == 0:
node_count = len(json_nodes)
tools: set[str] = set()
for n in json_nodes:
tools.update(n.get("tools", []))
tool_count = len(tools)
tags = data.get("agent", {}).get("tags", [])
except Exception:
pass
return node_count, tool_count, tags
def discover_agents() -> dict[str, list[AgentEntry]]:
"""Discover agents from all known sources grouped by category."""
from framework.runner.cli import (
from framework.loader.cli import (
_extract_python_agent_metadata,
_get_framework_agents_dir,
_is_valid_agent_dir,
)
from framework.config import COLONIES_DIR
groups: dict[str, list[AgentEntry]] = {}
sources = [
("Your Agents", Path("exports")),
("Your Agents", COLONIES_DIR),
("Your Agents", Path("exports")), # compat fallback
("Framework", _get_framework_agents_dir()),
("Examples", Path("examples/templates")),
]
# Track seen agent directory names to avoid duplicates when the same
# agent exists in both colonies/ and exports/ (colonies takes priority).
_seen_agent_names: set[str] = set()
for category, base_dir in sources:
if not base_dir.exists():
continue
@@ -172,6 +186,9 @@ def discover_agents() -> dict[str, list[AgentEntry]]:
for path in sorted(base_dir.iterdir(), key=lambda p: p.name):
if not _is_valid_agent_dir(path):
continue
if path.name in _seen_agent_names:
continue
_seen_agent_names.add(path.name)
name, desc = _extract_python_agent_metadata(path)
config_fallback_name = path.name.replace("_", " ").title()
@@ -179,13 +196,19 @@ def discover_agents() -> dict[str, list[AgentEntry]]:
node_count, tool_count, tags = _extract_agent_stats(path)
if not used_config:
agent_json = path / "agent.json"
if agent_json.exists():
# Try agent.json (declarative) for metadata
agent_json_path = path / "agent.json"
if agent_json_path.exists():
try:
data = json.loads(agent_json.read_text(encoding="utf-8"))
meta = data.get("agent", {})
name = meta.get("name", name)
desc = meta.get("description", desc)
data = json.loads(
agent_json_path.read_text(encoding="utf-8"),
)
if isinstance(data, dict):
raw_name = data.get("name", name)
if "-" in raw_name and " " not in raw_name:
raw_name = raw_name.replace("-", " ").title()
name = raw_name
desc = data.get("description", desc)
except Exception:
pass
@@ -204,6 +227,8 @@ def discover_agents() -> dict[str, list[AgentEntry]]:
)
)
if entries:
groups[category] = entries
existing = groups.get(category, [])
existing.extend(entries)
groups[category] = existing
return groups
+3 -9
View File
@@ -1,19 +1,13 @@
"""
Queen Native agent builder for the Hive framework.
"""Queen -- the agent builder for the Hive framework."""
Deeply understands the agent framework and produces complete Python packages
with goals, nodes, edges, system prompts, MCP configuration, and tests
from natural language specifications.
"""
from .agent import queen_goal, queen_graph
from .agent import queen_goal, queen_loop_config
from .config import AgentMetadata, RuntimeConfig, default_config, metadata
__version__ = "1.0.0"
__all__ = [
"queen_goal",
"queen_graph",
"queen_loop_config",
"RuntimeConfig",
"AgentMetadata",
"default_config",
+16 -25
View File
@@ -1,38 +1,29 @@
"""Queen graph definition."""
"""Queen agent definition.
from framework.graph import Goal
from framework.graph.edge import GraphSpec
The queen is a single AgentLoop -- no graph, no orchestrator.
Loaded by queen_orchestrator.create_queen().
"""
from framework.orchestrator.goal import Goal
from .nodes import queen_node
# ---------------------------------------------------------------------------
# Queen graph — the primary persistent conversation.
# Loaded by queen_orchestrator.create_queen(), NOT by AgentRunner.
# ---------------------------------------------------------------------------
queen_goal = Goal(
id="queen-manager",
name="Queen Manager",
description=(
"Manage the worker agent lifecycle and serve as the user's primary interactive interface."
"Manage the worker agent lifecycle and serve as the "
"user's primary interactive interface."
),
success_criteria=[],
constraints=[],
)
queen_graph = GraphSpec(
id="queen-graph",
goal_id=queen_goal.id,
version="1.0.0",
entry_node="queen",
entry_points={"start": "queen"},
terminal_nodes=[],
pause_nodes=[],
nodes=[queen_node],
edges=[],
conversation_mode="continuous",
loop_config={
"max_iterations": 999_999,
"max_tool_calls_per_turn": 30,
},
)
# Loop config -- used by queen_orchestrator to build LoopConfig
queen_loop_config = {
"max_iterations": 999_999,
"max_tool_calls_per_turn": 30,
"max_context_tokens": 180_000,
}
__all__ = ["queen_goal", "queen_loop_config", "queen_node"]
@@ -0,0 +1,3 @@
{
"include": ["gcu-tools"]
}
+109 -121
View File
@@ -2,7 +2,7 @@
from pathlib import Path
from framework.graph import NodeSpec
from framework.orchestrator import NodeSpec
# Load reference docs at import time so they're always in the system prompt.
# No voluntary read_file() calls needed — the LLM gets everything upfront.
@@ -37,7 +37,7 @@ _appendices = _build_appendices()
# GCU guide — shared between planning and building via _shared_building_knowledge.
_gcu_section = (
("\n\n# GCU Nodes — Browser Automation\n\n" + _gcu_guide)
("\n\n# Browser Automation Nodes\n\n" + _gcu_guide)
if _is_gcu_enabled() and _gcu_guide
else ""
)
@@ -81,7 +81,6 @@ _QUEEN_PLANNING_TOOLS = [
"save_agent_draft",
"confirm_and_build",
# Scaffold + transition to building (requires confirm_and_build first)
"initialize_and_build_agent",
# Load existing agent (after user confirms)
"load_built_agent",
]
@@ -172,7 +171,7 @@ _shared_building_knowledge = (
## Paths (MANDATORY)
**Always use RELATIVE paths** \
(e.g. `exports/agent_name/config.py`, `exports/agent_name/nodes/__init__.py`).
(e.g. `exports/agent_name/agent.json`).
**Never use absolute paths** like `/mnt/data/...` or `/workspace/...` they fail.
The project root is implicit.
@@ -182,14 +181,18 @@ When designing worker nodes or writing worker system prompts, reference these \
tool names NOT the coder-tools names (read_file, write_file, etc.).
Worker data tools (for large results and spillover):
- save_data(filename, data, data_dir) save data to a file for later retrieval
- load_data(filename, data_dir, offset_bytes?, limit_bytes?) load data \
with byte-based pagination
- list_data_files(data_dir) list available data files
- append_data(filename, data, data_dir) append to a file incrementally
- edit_data(filename, old_text, new_text, data_dir) find-and-replace in a data file
- serve_file_to_user(filename, data_dir, label?, open_in_browser?) \
generate a clickable file URI for the user
Worker data tools (from files-tools MCP server):
- read_file(path) read a file
- write_file(path, content) write/create a file
- list_files(path) list directory contents
- search_files(pattern, path) regex search in files
Worker data tools (from hive-tools MCP server):
- csv_read, csv_write, csv_append CSV operations
- pdf_read read PDF files
All tools are registered in the global MCP registry (~/.hive/mcp_registry/). \
Workers get tools from: hive-tools, gcu-tools, files-tools.
IMPORTANT: Do NOT tell workers to use read_file, write_file, edit_file, \
search_files, or list_directory those are YOUR tools, not theirs.
@@ -204,7 +207,7 @@ _planning_knowledge = """\
# Core Mandates (Planning)
- **DO NOT propose a complete goal on your own.** Instead, \
collaborate with the user to define it.
- **NEVER call `initialize_and_build_agent` without explicit user approval.** \
- **NEVER call `confirm_and_build` without explicit user approval.** \
Present the full design first and wait for the user to confirm before building.
- **Discover tools dynamically.** NEVER reference tools from static \
docs. Always run list_agent_tools() to see what actually exists.
@@ -252,9 +255,9 @@ When the stakeholder describes what they want, mentally construct:
**After the user responds, assess fit and gaps together.** Be honest and specific. \
Reference tools from list_agent_tools() AND built-in capabilities:
- **GCU browser automation** (`node_type="gcu"`) provides full Playwright-based \
- **Browser automation provides full Playwright-based \
browser control (navigation, clicking, typing, scrolling, JS-rendered pages, \
multi-tab). Do NOT list browser automation as missing use GCU nodes.
multi-tab). Do NOT list browser automation as missing use browser nodes with tools: {policy: "all"}.
Present a short **Framework Fit Assessment**:
- **Works well**: 2-4 strengths for this use case
@@ -306,14 +309,11 @@ explicitly on a node. Available types:
- **io** (dusty purple, parallelogram): External data input/output
- **document** (steel blue, wavy rect): Report or document generation
- **database** (muted teal, cylinder): Database or data store
- **subprocess** (dark cyan, subroutine): Delegated sub-agent / predefined process
- **browser** (deep blue, hexagon): GCU browser automation / sub-agent \
delegation. At build time, browser nodes are dissolved into the parent \
node's sub_agents list. Use for any GCU or sub-agent leaf node.
- **browser** (deep blue, hexagon): Browser automation node (uses gcu-tools).
Auto-detection works well for most cases: first node start, nodes with \
no outgoing edges terminal, nodes with multiple conditional outgoing \
edges decision, GCU nodes browser, nodes mentioning "database" \
edges decision, browser tool nodes browser, nodes mentioning "database" \
database, nodes mentioning "report/document" document, I/O tools like \
send_email io. Everything else defaults to process. Set flowchart_type \
explicitly only when auto-detection would be wrong.
@@ -354,48 +354,19 @@ gather → [Valid data?] →Yes→ transform → deliver
In the draft: the `[Valid data?]` node has `flowchart_type: "decision"`, \
`decision_clause: "Data passes validation checks?"`, with labeled yes/no edges.
## Sub-Agent Nodes — Planning-Only Delegation
## Browser Automation Nodes
Sub-agent nodes (dark teal subroutines) are **planning-only** visual elements \
that show which nodes delegate to sub-agents. At `confirm_and_build()`, \
sub-agent nodes are **dissolved** into their parent node:
- The sub-agent node's ID is added to the predecessor's `sub_agents` list
- The sub-agent node and its connecting edge are removed
- At runtime, the parent node can invoke the sub-agent via `delegate_to_sub_agent`
**Rules for sub-agent nodes (INCLUDING GCU nodes):**
- GCU nodes are auto-detected as `flowchart_type: "browser"` (hexagon)
- Connect from the managing parent node to the sub-agent node
- Sub-agent nodes must be **leaf nodes** NO outgoing edges to other nodes
- At build time, browser/GCU nodes are dissolved into the parent's \
`sub_agents` list, just like decision nodes are dissolved into criteria
**CRITICAL: GCU nodes (`node_type: "gcu"`) are ALWAYS sub-agents.** \
They MUST NOT appear in the linear flow. NEVER chain GCU nodes \
sequentially (A gcu1 gcu2 B is WRONG). Instead, attach them \
as leaves to the parent that orchestrates them:
Browser nodes are regular `event_loop` nodes with browser tools \
(from the gcu-tools MCP server) in their tool list. They are wired \
into the graph with edges like any other node:
```
WRONG: intake gcu_find_prospect gcu_scan_mutuals check_results
WRONG: decision_node gcu_node (as a yes/no branch)
RIGHT: intake (sub_agents: [gcu_find, gcu_scan]) check_results
research browser_scan analyze_results
```
The parent node delegates to its GCU sub-agents and collects results. \
The main flow continues from the parent, not from the GCU node. \
GCU nodes MUST NOT be children of decision nodes decision nodes \
dissolve at build time, which would leave the GCU as a dangling \
workflow step.
Use `tools: {policy: "all"}` to give browser nodes access to all \
browser tools, or list specific ones with `policy: "explicit"`.
**How to show delegation in the flowchart:**
```
research (deep_searcher) browser/GCU node, leaf
research [Enough results?] decision node
```
After dissolution: `research` node gets `sub_agents: ["deep_searcher"]` \
and `success_criteria: "Enough results?"`.
If the worker agent start from some initial input it is okay. \
The queen(you) owns intake: you gathers user requirements, then calls \
If the worker agent starts from some initial input it is okay. \
The queen(you) owns intake: you gather user requirements, then call \
`run_agent_with_input(task)` with a structured task description. \
When building the agent, design the entry node's `input_keys` to \
match what the queen will provide at run time. Worker nodes should \
@@ -411,14 +382,14 @@ You MUST get explicit user approval before ANY code is generated.
2. **WAIT for user response.** Do NOT proceed without it.
3. Handle the response:
- If **Approve / Proceed**: Call confirm_and_build(), then \
initialize_and_build_agent(agent_name, nodes)
confirm_and_build(agent_name)
- If **Adjust scope**: Discuss changes, update the draft with \
save_agent_draft() again, and re-ask
- If **More questions**: Answer them honestly, then ask again
- If **Reconsider**: Discuss alternatives. If they decide to proceed, \
that's their informed choice
**NEVER call initialize_and_build_agent without first calling \
**NEVER call confirm_and_build without first calling \
confirm_and_build().** The system will block the transition if you try.
"""
@@ -477,53 +448,75 @@ When a user says "my agent is failing" or "debug this agent":
## 5. Implement
**You should only reach this step after the user has approved the draft design \
in the planning phase. The draft metadata will pre-populate descriptions, \
goals, success criteria, and node metadata in the generated files.**
and you have called `confirm_and_build(agent_name="my_agent")`.**
Call `initialize_and_build_agent(agent_name, nodes)` to generate all package \
files. The agent_name must be snake_case (e.g., "my_agent"). Pass node names \
as comma-separated string (e.g., "gather,process,review").
The tool creates: config.py, nodes/__init__.py, agent.py, \
__init__.py, __main__.py, mcp_servers.json, tests/conftest.py.
`confirm_and_build` created the agent directory (returned in agent_path). \
Now write the complete agent config directly:
The generated files are **structurally complete** with correct imports, \
class definition, `validate()` method, `default_agent` export, and \
`__init__.py` re-exports. They pass validation as-is.
```
write_file("<colony_path>/agent.json", <complete JSON config>)
```
`mcp_servers.json` is auto-generated with hive-tools as the default. \
Do NOT manually create or overwrite `mcp_servers.json`.
The agent.json must include ALL of these in one write:
- `name`, `version`, `description`
- `goal` with `description`, `success_criteria`, `constraints`
- `identity_prompt` (agent-level behavior)
- `nodes` each with `id`, `description`, `system_prompt`, `tools`, \
`input_keys`, `output_keys`, `success_criteria`
- `edges` connecting all nodes with proper conditions
- `entry_node`, `terminal_nodes`
- `mcp_servers` REQUIRED. Always include all three: \
`[{"name": "hive-tools"}, {"name": "gcu-tools"}, {"name": "files-tools"}]`
- `loop_config` `max_iterations`, `max_context_tokens`
### Customizing generated files
**Write the COMPLETE config in one `write_file` call. No TODOs, no placeholders.** \
The queen writes final production-ready system prompts directly.
**CRITICAL: Use `edit_file` to customize TODO placeholders. \
NEVER use `write_file` to rewrite generated files from scratch. \
Rewriting breaks imports, class structure, and causes validation failures.**
**There are NO Python files.** The framework loads agent.json directly.
Safe to edit with `edit_file`:
- System prompts, tools, input_keys, output_keys, success_criteria in \
nodes/__init__.py
- Goal description, success criteria values, constraint values, edge \
definitions, identity_prompt in agent.py
- CLI options in __main__.py
- For triggers (timers/webhooks), add entries to triggers.json in the \
agent's export directory
MCP servers are loaded from the global registry by name. Available servers:
- `hive-tools` web search, email, CRM, calendar, 100+ integrations
- `gcu-tools` browser automation (click, type, navigate, screenshot)
- `files-tools` file I/O (read, write, edit, search, list)
Do NOT modify or rewrite:
- Import statements at top of agent.py (they are correct)
- The agent class definition, `validate()`, `_build_graph()`, `_setup()`, \
or lifecycle methods (start/stop/run)
- `__init__.py` exports (all required variables are already re-exported)
- `default_agent = ClassName()` at bottom of agent.py
**Template variables:** Add a `variables:` section at the top of agent.json \
and use `{{variable_name}}` in system prompts for config injection:
```yaml
variables:
spreadsheet_id: "1ZVx..."
nodes:
- id: start
system_prompt: |
Use spreadsheet: {{spreadsheet_id}}
```
### Tool access in nodes
Each node declares its tool access policy:
```yaml
# Explicit list (recommended)
tools:
policy: explicit
allowed: [web_search, write_file]
# All tools (for browser automation nodes)
tools:
policy: all
# No tools (for handoff/summary nodes)
tools:
policy: none
```
## 6. Verify and Load
Call `validate_agent_package("{name}")` after initialization. \
It runs structural checks (class validation, graph validation, tool \
validation, tests) and returns a consolidated result. If anything \
fails: read the error, fix with edit_file, re-validate. Up to 3x.
fails: read the error, fix with read_file+write_file, re-validate. Up to 3x.
When validation passes, immediately call \
`load_built_agent("exports/{name}")` to load the agent into the \
`load_built_agent("<agent_path>")` to load the agent into the \
session. This switches to STAGING phase and shows the graph in the \
visualizer. Do NOT wait for user input between validation and loading.
"""
@@ -625,13 +618,11 @@ document, database, subprocess, etc.) with unique shapes and colors. Set \
flowchart_type on a node to override. Nodes need only an id. \
Use decision nodes (flowchart_type: "decision", with decision_clause and \
labeled yes/no edges) to make conditional branching explicit. \
GCU/sub-agent nodes (node_type: "gcu") are auto-detected as browser \
hexagons connect them as leaf nodes to their parent.
- confirm_and_build() Record user confirmation of the draft. Dissolves \
planning-only nodes (decision predecessor criteria; browser/GCU \
predecessor sub_agents list). Call this ONLY after the user explicitly \
approves via ask_user.
- initialize_and_build_agent(agent_name?, nodes?) Scaffold the agent package \
- confirm_and_build(agent_name) Scaffold the agent package \
and transition to BUILDING phase. For new agents, this REQUIRES \
save_agent_draft() + confirm_and_build() first. The draft metadata is used to \
pre-populate the generated files. Without agent_name: transition to BUILDING \
@@ -647,8 +638,8 @@ phase. Only use this when the user explicitly asks to work with an existing agen
2. Call save_agent_draft() to create visual draft present to user
3. Call ask_user() to get explicit approval
4. Call confirm_and_build() to record approval
5. Call initialize_and_build_agent() to scaffold and start building
For diagnosis of existing agents, call initialize_and_build_agent() \
5. Call confirm_and_build() to scaffold and start building
For diagnosis of existing agents, call confirm_and_build() \
(no args) after agreeing on a fix plan with the user.
"""
@@ -884,7 +875,7 @@ that changes the structure, call save_agent_draft() again so they see the \
update in real-time. The flowchart is a live collaboration tool.
8. When the design is stable, use ask_user to get explicit approval
9. Call confirm_and_build() after the user approves
10. Call initialize_and_build_agent(agent_name, nodes) to scaffold and start building
10. Call confirm_and_build(agent_name) to scaffold and start building
**The flowchart is your shared whiteboard.** Don't describe changes in text \
and then ask "should I update the draft?" just update it. If the user says \
@@ -895,7 +886,7 @@ see every structural change reflected in the visualizer as you discuss it.
**CRITICAL: Planning Building boundary.** You MUST get explicit user \
confirmation before moving to building. The sequence is:
save_agent_draft() iterate with user ask_user() confirm_and_build() \
initialize_and_build_agent()
confirm_and_build()
Skipping any of these steps will be blocked by the system.
Remember: DO NOT write or edit any files yet. This is a read-only exploration \
@@ -911,7 +902,7 @@ your priority is diagnosis, not new design:
2. Summarize the root cause to the user
3. Propose a fix plan (what to change, what behavior to adjust)
4. Get user approval via ask_user
5. Call initialize_and_build_agent() (no args) to transition to building and implement the fix
5. Call confirm_and_build() (no args) to transition to building and implement the fix
Do NOT start the full discovery workflow (tool discovery, gap analysis) in \
diagnosis mode you already have a built agent, you just need to fix it.
@@ -947,7 +938,7 @@ delegate agent construction to the worker, even as a "research" subtask.
## Keeping the flowchart in sync during building
When you make structural changes to the agent (add/remove/rename nodes, \
change edges, modify sub-agent assignments), call save_agent_draft() to \
change edges, modify node connections), call save_agent_draft() to \
update the flowchart. During building, this auto-dissolves planning-only \
nodes without needing user re-confirmation. The user sees the updated \
flowchart immediately.
@@ -966,15 +957,15 @@ user says "replan", "go back", "let's redesign", "change the approach", \
## CRITICAL — Graph topology errors require replanning, not code edits
If you discover that the agent graph has structural problems GCU nodes \
If you discover that the agent graph has structural problems browser nodes \
in the linear flow, missing edges, wrong node connections, incorrect \
sub-agent assignments you MUST call replan_agent() and fix the draft. \
Do NOT attempt to fix topology by editing agent.py directly. The graph \
node connections you MUST call replan_agent() and fix the draft. \
Do NOT attempt to fix topology by editing agent.json directly. The graph \
structure is defined by the draft dissolution code-gen pipeline. \
Editing code to rewire nodes bypasses the flowchart and creates drift \
between what the user sees and what the code does.
Editing the config to rewire nodes bypasses the flowchart and creates drift \
between what the user sees and what the config does.
**WRONG:** "Let me fix agent.py to remove GCU nodes from edges..."
**WRONG:** "Let me fix agent.json to remove browser nodes from edges..."
**RIGHT:** Call replan_agent(), fix the draft with save_agent_draft(), \
get user approval, then confirm_and_build() the corrected code is \
generated automatically.
@@ -1100,18 +1091,15 @@ You wake up when:
If the user asks for progress, call get_graph_status() ONCE and report. \
If the summary mentions issues, follow up with get_graph_status(focus="issues").
## Subagent delegations (browser automation, GCU)
## Browser automation nodes
When the worker delegates to a subagent (e.g., GCU browser automation), expect it \
to take 2-5 minutes. During this time:
- Progress will show 0% this is NORMAL. The subagent only calls set_output at the end.
- Check get_graph_status(focus="full") for "subagent_activity" this shows the \
subagent's latest reasoning text and confirms it is making real progress.
- Do NOT conclude the subagent is stuck just because progress is 0% or because \
you see repeated browser_click/browser_snapshot calls that is the expected \
pattern for web scraping.
- Only intervene if: the subagent has been running for 5+ minutes with no new \
subagent_activity updates, OR the judge escalates.
Browser nodes may take 2-5 minutes for web scraping tasks. During this time:
- Progress will show 0% until the node calls set_output at the end.
- Check get_graph_status(focus="full") for activity updates.
- Do NOT conclude it is stuck just because you see repeated \
browser_click/browser_snapshot calls that is expected for web scraping.
- Only intervene if: the node has been running for 5+ minutes with no new \
activity updates, OR the judge escalates.
## Handling worker termination ([WORKER_TERMINAL])
@@ -1143,11 +1131,11 @@ escalations. If the user gave you instructions (e.g., "just retry on errors", \
CRITICAL escalation relay protocol:
When an escalation requires user input (auth blocks, human review), the worker \
or its subagent is BLOCKED and waiting for your response. You MUST follow this \
or is BLOCKED and waiting for your response. You MUST follow this \
exact two-step sequence:
Step 1: call ask_user() to get the user's answer.
Step 2: call inject_message() with the user's answer IMMEDIATELY after.
If you skip Step 2, the worker/subagent stays blocked FOREVER and the task hangs. \
If you skip Step 2, the worker stays blocked FOREVER and the task hangs. \
NEVER respond to the user without also calling inject_message() to unblock \
the worker. Even if the user says "skip" or "cancel", you must still relay that \
decision via inject_message() so the worker can clean up.
@@ -1233,7 +1221,7 @@ _queen_tools_docs = (
+ "\n\n### Phase transitions\n"
"- save_agent_draft(...) → creates visual-only draft graph (stays in PLANNING)\n"
"- confirm_and_build() → records user approval of draft (stays in PLANNING)\n"
"- initialize_and_build_agent(agent_name?, nodes?) → scaffolds package + switches to "
"- confirm_and_build(agent_name) → scaffolds package + switches to "
"BUILDING (requires draft + confirmation for new agents)\n"
"- replan_agent() → switches back to PLANNING phase (only when user explicitly requests)\n"
"- load_built_agent(path) → switches to STAGING phase\n"
+28 -7
View File
@@ -1,9 +1,15 @@
"""Queen global memory helpers.
Global memory lives in ``~/.hive/queen/global_memory/`` and stores durable
cross-session knowledge about the user (profile, preferences, environment,
feedback). Each memory is an individual ``.md`` file with optional YAML
frontmatter (name, type, description).
Memory hierarchy::
~/.hive/memories/
global/ # shared across all queens and colonies
colonies/{name}/ # colony-scoped memories
agents/queens/{name}/ # queen-specific memories
agents/{name}/ # per-worker-agent memories
Each memory is an individual ``.md`` file with optional YAML frontmatter
(name, type, description).
"""
from __future__ import annotations
@@ -21,7 +27,7 @@ logger = logging.getLogger(__name__)
GLOBAL_MEMORY_CATEGORIES: tuple[str, ...] = ("profile", "preference", "environment", "feedback")
_HIVE_QUEEN_DIR = Path.home() / ".hive" / "queen"
from framework.config import MEMORIES_DIR
MAX_FILES: int = 200
MAX_FILE_SIZE_BYTES: int = 4096 # 4 KB hard limit per memory file
@@ -31,8 +37,23 @@ _HEADER_LINE_LIMIT: int = 30
def global_memory_dir() -> Path:
"""Return the queen-global memory directory."""
return _HIVE_QUEEN_DIR / "global_memory"
"""Return the global memory directory (shared across all queens/colonies)."""
return MEMORIES_DIR / "global"
def colony_memory_dir(colony_name: str) -> Path:
"""Return the memory directory for a named colony."""
return MEMORIES_DIR / "colonies" / colony_name
def queen_memory_dir(queen_name: str = "default") -> Path:
"""Return the memory directory for a named queen."""
return MEMORIES_DIR / "agents" / "queens" / queen_name
def agent_memory_dir(agent_name: str) -> Path:
"""Return the memory directory for a worker agent."""
return MEMORIES_DIR / "agents" / agent_name
# ---------------------------------------------------------------------------
+13 -1
View File
@@ -91,7 +91,19 @@ async def select_memories(
resp.stop_reason,
)
return []
data = json.loads(raw)
# Some models wrap JSON in markdown fences or add preamble text.
# Try to extract the JSON object if raw parse fails.
try:
data = json.loads(raw)
except json.JSONDecodeError:
import re
m = re.search(r"\{.*\}", raw, re.DOTALL)
if m:
data = json.loads(m.group())
else:
logger.warning("recall: LLM returned non-JSON: %.200s", raw)
return []
selected = data.get("selected_memories", [])
valid_names = {f.filename for f in files}
result = [s for s in selected if s in valid_names][:max_results]
@@ -25,10 +25,7 @@
14. **Forgetting sys.path setup in conftest.py** — Tests need `exports/` and `core/` on sys.path.
## GCU Errors
15. **Manually wiring browser tools on event_loop nodes**Use `node_type="gcu"` which auto-includes browser tools. Do NOT manually list browser tool names.
16. **Using GCU nodes as regular graph nodes** — GCU nodes are subagents only. They must ONLY appear in `sub_agents=["gcu-node-id"]` and be invoked via `delegate_to_sub_agent()`. Never connect via edges or use as entry/terminal nodes.
17. **Reusing the same GCU node ID for parallel tasks** — Each concurrent browser task needs a distinct GCU node ID (e.g. `gcu-site-a`, `gcu-site-b`). Two `delegate_to_sub_agent` calls with the same `agent_id` share a browser profile and will interfere with each other's pages.
18. **Passing `profile=` in GCU tool calls** — Profile isolation for parallel subagents is automatic. The framework injects a unique profile per subagent via an asyncio `ContextVar`. Hardcoding `profile="default"` in a GCU system prompt breaks this isolation.
15. **Manually wiring browser tools on event_loop nodes**Browser nodes use tools: {policy: "all"} to get all browser tools.
## Worker Agent Errors
19. **Adding client-facing intake node to workers** — The queen owns intake. Workers should start with an autonomous processing node. Route worker review/approval through queen escalation instead of direct worker HITL.
@@ -0,0 +1,227 @@
# Declarative Agent File Templates
Agents are defined as a single `agent.yaml` file. No Python code needed.
The runner loads this file directly -- no `agent.py`, `config.py`, or
`nodes/__init__.py` required.
## agent.yaml -- Complete Agent Definition
```yaml
name: my-agent
version: 1.0.0
description: What this agent does.
metadata:
intro_message: Welcome! What would you like me to do?
# Template variables -- substituted into system_prompt and identity_prompt
# via {{variable_name}} syntax. Use this for config values that appear
# in prompts (spreadsheet IDs, API endpoints, account names, etc.)
variables:
spreadsheet_id: "1ZVxWDL..."
sheet_name: "contacts"
goal:
description: What this agent achieves.
success_criteria:
- "First success criterion"
- "Second success criterion"
constraints:
- "Hard constraint the agent must respect"
identity_prompt: |
You are a helpful agent.
conversation_mode: continuous # always "continuous" for Hive agents
loop_config:
max_iterations: 100
max_tool_calls_per_turn: 30
max_context_tokens: 32000
# MCP servers to connect (resolved by name from ~/.hive/mcp_registry/)
mcp_servers:
- name: hive-tools
- name: gcu-tools
nodes:
# Node 1: Process (autonomous entry node)
# The queen handles intake and passes structured input via
# run_agent_with_input(task). NO client-facing intake node.
- id: process
name: Process
description: Execute the task using available tools
max_node_visits: 0 # 0 = unlimited (forever-alive agents)
input_keys: [user_request, feedback]
output_keys: [results]
nullable_output_keys: [feedback]
tools:
policy: explicit
allowed: [web_search, web_scrape, save_data, load_data, list_data_files]
success_criteria: Results are complete and accurate.
system_prompt: |
You are a processing agent. Your task is in memory under "user_request".
If "feedback" is present, this is a revision.
Work in phases:
1. Use tools to gather/process data
2. Analyze results
3. Call set_output in a SEPARATE turn:
- set_output("results", "structured results")
# Node 2: Handoff (autonomous)
- id: handoff
name: Handoff
description: Prepare worker results for queen review
max_node_visits: 0
input_keys: [results, user_request]
output_keys: [next_action, feedback, worker_summary]
nullable_output_keys: [feedback, worker_summary]
tools:
policy: none # handoff nodes don't need tools
success_criteria: Results are packaged for queen decision-making.
system_prompt: |
Do NOT talk to the user directly. The queen is the only user interface.
If blocked, call escalate(reason, context) then set:
- set_output("next_action", "escalated")
- set_output("feedback", "what help is needed")
Otherwise summarize and set:
- set_output("worker_summary", "short summary for queen")
- set_output("next_action", "done") or "revise"
- set_output("feedback", "what to revise") only when revising
edges:
- from_node: process
to_node: handoff
# Feedback loop
- from_node: handoff
to_node: process
condition: conditional
condition_expr: "str(next_action).lower() == 'revise'"
priority: 2
# Escalation loop
- from_node: handoff
to_node: process
condition: conditional
condition_expr: "str(next_action).lower() == 'escalated'"
priority: 3
# Loop back for next task
- from_node: handoff
to_node: process
condition: conditional
condition_expr: "str(next_action).lower() == 'done'"
entry_node: process
terminal_nodes: [] # [] = forever-alive
```
## Key differences from Python templates
| Before (Python) | After (YAML) |
|-------------------------------------|----------------------------------------|
| `agent.py` (250 lines boilerplate) | Not needed |
| `config.py` (dataclass + metadata) | `variables:` + `metadata:` in YAML |
| `nodes/__init__.py` (NodeSpec calls)| `nodes:` list in YAML |
| `__init__.py`, `__main__.py` | Not needed |
| f-string config injection | `{{variable_name}}` templates |
| `mcp_servers.json` (separate file) | `mcp_servers:` in YAML (or keep file) |
## Node types
| Type | Description | Tools |
|--------------|---------------------------------------|--------------------------|
| `event_loop` | LLM-driven orchestration (default) | Explicit list or `none` |
| `gcu` | Browser automation via GCU tools | `policy: all` (auto) |
## Tool access policies
```yaml
# Explicit list (recommended for most nodes)
tools:
policy: explicit
allowed: [web_search, save_data]
# All tools (for browser automation nodes)
tools:
policy: all
# No tools (for handoff/summary nodes)
tools:
policy: none
```
## Edge conditions
| Condition | When to use |
|---------------|-------------------------------------------------------|
| `on_success` | Default. Next node after current succeeds. |
| `on_failure` | Fallback path when current node fails. |
| `always` | Always traverse regardless of outcome. |
| `conditional` | Evaluate `condition_expr` against shared memory keys. |
| `llm_decide` | Let the LLM decide at runtime. |
## Template variables
Use `{{variable_name}}` in `system_prompt` and `identity_prompt`.
Variables are defined in the top-level `variables:` map.
```yaml
variables:
spreadsheet_id: "1ZVxWDL..."
api_endpoint: "https://api.example.com"
nodes:
- id: start
system_prompt: |
Connect to spreadsheet: {{spreadsheet_id}}
API endpoint: {{api_endpoint}}
```
## Entry points
Default is a single manual entry point. For timer/scheduled triggers:
```yaml
entry_points:
- id: default
trigger_type: manual
- id: daily-check
trigger_type: timer
trigger_config:
interval_minutes: 30
```
## mcp_servers.json -- Still Supported
The `mcp_servers.json` file is still loaded automatically if present alongside
`agent.yaml`. You can also inline servers in the YAML:
```yaml
mcp_servers:
- name: hive-tools
- name: gcu-tools
```
Both approaches work. The JSON file takes precedence for backward compatibility.
## Migration from Python agents
Run the migration tool to convert existing agents:
```bash
uv run python -m framework.tools.migrate_agent exports/my_agent
```
This generates `agent.yaml` from the existing `agent.py` + `nodes/` + `config.py`.
The original files are left untouched. Once verified, you can delete the Python files.
## Files after migration
```
my_agent/
agent.yaml # The only required file
mcp_servers.json # Optional (can inline in YAML)
flowchart.json # Optional (auto-generated)
```
@@ -1,306 +1,193 @@
# Hive Agent Framework Condensed Reference
# Hive Agent Framework -- Condensed Reference
## Architecture
Agents are Python packages in `exports/`:
Agents are declarative JSON configs in `exports/`:
```
exports/my_agent/
├── __init__.py # MUST re-export ALL module-level vars from agent.py
├── __main__.py # CLI (run, tui, info, validate, shell)
├── agent.py # Graph construction (goal, edges, agent class)
├── config.py # Runtime config
├── nodes/__init__.py # Node definitions (NodeSpec)
├── mcp_servers.json # MCP tool server config
└── tests/ # pytest tests
agent.json # The entire agent definition
mcp_servers.json # MCP tool server config (optional, prefer registry refs)
```
## Agent Loading Contract
No Python files. No `__init__.py`, `__main__.py`, `config.py`, or `nodes/`.
`AgentRunner.load()` imports the package (`__init__.py`) and reads these
module-level variables via `getattr()`:
## Agent Loading
| Variable | Required | Default if missing | Consequence |
|----------|----------|--------------------|-------------|
| `goal` | YES | `None` | **FATAL** — "must define goal, nodes, edges" |
| `nodes` | YES | `None` | **FATAL** — same error |
| `edges` | YES | `None` | **FATAL** — same error |
| `entry_node` | no | `nodes[0].id` | Probably wrong node |
| `entry_points` | no | `{}` | **Nodes unreachable** — validation fails |
| `terminal_nodes` | **YES** | `[]` | **FATAL** — graph must have at least one terminal node |
| `pause_nodes` | no | `[]` | OK |
| `conversation_mode` | no | not passed | Isolated mode (no context carryover) |
| `identity_prompt` | no | not passed | No agent-level identity |
| `loop_config` | no | `{}` | No iteration limits |
| `triggers.json` (file) | no | not present | No triggers (timers, webhooks) |
`AgentLoader.load()` reads `agent.json` and builds the execution graph.
If `agent.py` exists (legacy), it's loaded as a Python module instead.
**CRITICAL:** `__init__.py` MUST import and re-export ALL of these from
`agent.py`. Missing exports silently fall back to defaults, causing
hard-to-debug failures.
## agent.json Schema
**Why `default_agent.validate()` is NOT sufficient:**
`validate()` checks the agent CLASS's internal graph (self.nodes, self.edges).
These are always correct because the constructor references agent.py's module
vars directly. But `AgentRunner.load()` reads from the PACKAGE (`__init__.py`),
not the class. So `validate()` passes while `AgentRunner.load()` fails.
Always test with `AgentRunner.load("exports/{name}")` — this is the same
code path the TUI and `hive run` use.
## Goal
Defines success criteria and constraints:
```python
goal = Goal(
id="kebab-case-id",
name="Display Name",
description="What the agent does",
success_criteria=[
SuccessCriterion(id="sc-id", description="...", metric="...", target="...", weight=0.25),
],
constraints=[
Constraint(id="c-id", description="...", constraint_type="hard", category="quality"),
],
)
```json
{
"name": "my-agent",
"version": "1.0.0",
"description": "What this agent does",
"goal": {
"description": "What to achieve",
"success_criteria": ["criterion 1", "criterion 2"],
"constraints": ["constraint 1"]
},
"identity_prompt": "You are a helpful agent.",
"conversation_mode": "continuous",
"loop_config": {
"max_iterations": 100,
"max_tool_calls_per_turn": 30,
"max_context_tokens": 32000
},
"mcp_servers": [
{"name": "hive-tools"},
{"name": "gcu-tools"}
],
"variables": {
"spreadsheet_id": "1ZVx..."
},
"nodes": [...],
"edges": [...],
"entry_node": "process",
"terminal_nodes": []
}
```
- 3-5 success criteria, weights sum to 1.0
- 1-5 constraints (hard/soft, categories: quality, accuracy, interaction, functional)
## NodeSpec Fields
## Template Variables
Use `{{variable_name}}` in `system_prompt` and `identity_prompt`. Variables
are defined in the top-level `variables` object:
```json
{
"variables": {"sheet_id": "1ZVx..."},
"nodes": [{
"id": "start",
"system_prompt": "Use sheet: {{sheet_id}}"
}]
}
```
## Node Fields
| Field | Type | Default | Description |
|-------|------|---------|-------------|
| id | str | required | kebab-case identifier |
| name | str | required | Display name |
| name | str | id | Display name |
| description | str | required | What the node does |
| node_type | str | required | `"event_loop"` or `"gcu"` (browser automation — see GCU Guide appendix) |
| input_keys | list[str] | required | Memory keys this node reads |
| output_keys | list[str] | required | Memory keys this node writes via set_output |
| node_type | str | "event_loop" | `"event_loop"` |
| input_keys | list | [] | Memory keys this node reads |
| output_keys | list | [] | Memory keys this node writes via set_output |
| system_prompt | str | "" | LLM instructions |
| tools | list[str] | [] | Tool names from MCP servers |
| client_facing | bool | False | Deprecated compatibility field. Queen interactivity is implicit; workers should escalate instead |
| nullable_output_keys | list[str] | [] | Keys that may remain unset |
| max_node_visits | int | 0 | 0=unlimited (default); >1 for one-shot feedback loops |
| max_retries | int | 3 | Retries on failure |
| tools | object | {} | Tool access policy (see below) |
| nullable_output_keys | list | [] | Keys that may remain unset |
| max_node_visits | int | 1 | 0=unlimited (for forever-alive agents) |
| success_criteria | str | "" | Natural language for judge evaluation |
| client_facing | bool | false | Whether output is shown to user |
## EdgeSpec Fields
## Tool Access Policies
Each node declares its tools via a policy object:
```json
{"tools": {"policy": "explicit", "allowed": ["web_search", "save_data"]}}
{"tools": {"policy": "all"}}
{"tools": {"policy": "none"}}
```
- `explicit` (default): only named tools. Empty `allowed` = zero tools.
- `all`: all tools from registry (e.g. for browser automation nodes).
- `none`: no tools (for handoff/summary nodes).
## Edge Fields
| Field | Type | Description |
|-------|------|-------------|
| id | str | kebab-case identifier |
| source | str | Source node ID |
| target | str | Target node ID |
| condition | EdgeCondition | ON_SUCCESS, ON_FAILURE, ALWAYS, CONDITIONAL |
| condition_expr | str | Python expression evaluated against memory (for CONDITIONAL) |
| priority | int | Positive=forward (evaluated first), negative=feedback (loop-back) |
| from_node | str | Source node ID |
| to_node | str | Target node ID |
| condition | str | `on_success`, `on_failure`, `always`, `conditional` |
| condition_expr | str | Python expression for conditional routing |
| priority | int | Higher = evaluated first |
condition_expr examples:
- `"needs_more_research == True"`
- `"str(next_action).lower() == 'revise'"`
## Key Patterns
### STEP 1/STEP 2 (Client-Facing Nodes)
```
**STEP 1 — Respond to the user (text only, NO tool calls):**
[Present information, ask questions]
**STEP 2 — After the user responds, call set_output:**
- set_output("key", "value based on user response")
```
This prevents premature set_output before user interaction.
### Fewer, Richer Nodes (CRITICAL)
**Hard limit: 3-6 nodes for most agents.** Never exceed 6 unless the user
explicitly requests a complex multi-phase pipeline.
**Hard limit: 3-6 nodes for most agents.** Each node boundary serializes
outputs and destroys in-context information. Merge unless:
1. Client-facing boundary (different interaction models)
2. Disjoint tool sets
3. Parallel execution (fan-out branches)
Each node boundary serializes outputs to the shared buffer and **destroys** all
in-context information: tool call results, intermediate reasoning, conversation
history. A research node that searches, fetches, and analyzes in ONE node keeps
all source material in its conversation context. Split across 3 nodes, each
downstream node only sees the serialized summary string.
**Decision framework — merge unless ANY of these apply:**
1. **Client-facing boundary** — Autonomous and client-facing work MUST be
separate nodes (different interaction models)
2. **Disjoint tool sets** — If tools are fundamentally different (e.g., web
search vs database), separate nodes make sense
3. **Parallel execution** — Fan-out branches must be separate nodes
**Red flags that you have too many nodes:**
- A node with 0 tools (pure LLM reasoning) → merge into predecessor/successor
- A node that sets only 1 trivial output → collapse into predecessor
- Multiple consecutive autonomous nodes → combine into one rich node
- A "report" node that presents analysis → merge into the client-facing node
- A "confirm" or "schedule" node that doesn't call any external service → remove
**Typical agent structure (2 nodes):**
**Typical structure (2 nodes):**
```
process (autonomous) ←→ review (queen-mediated)
```
The queen owns intake — she gathers requirements from the user, then
passes structured input via `run_agent_with_input(task)`. When building
the agent, design the entry node's `input_keys` to match what the queen
will provide at run time. Worker agents should NOT have a client-facing
intake node. Mid-execution review/approval should happen through queen
escalation rather than direct worker HITL.
For simpler agents, just 1 autonomous node:
```
process (autonomous) — loops back to itself
process (autonomous) <-> review (queen-mediated)
```
### nullable_output_keys
For inputs that only arrive on certain edges:
```python
research_node = NodeSpec(
input_keys=["brief", "feedback"],
nullable_output_keys=["feedback"], # Only present on feedback edge
max_node_visits=3,
)
```
### Mutually Exclusive Outputs
For routing decisions:
```python
review_node = NodeSpec(
output_keys=["approved", "feedback"],
nullable_output_keys=["approved", "feedback"], # Node sets one or the other
)
```
### Continuous Loop Pattern
Mark the primary event_loop node as terminal: `terminal_nodes=["process"]`.
The node has `output_keys` and can complete when the agent finishes its work.
Use `conversation_mode="continuous"` to preserve context across transitions.
The queen owns intake. Worker agents should NOT have a client-facing intake
node. Mid-execution review should happen through queen escalation.
### set_output
- Synthetic tool injected by framework
- Call separately from real tool calls (separate turn)
- `set_output("key", "value")` stores to the shared buffer
## Edge Conditions
| Condition | When |
|-----------|------|
| ON_SUCCESS | Node completed successfully |
| ON_FAILURE | Node failed |
| ALWAYS | Unconditional |
| CONDITIONAL | condition_expr evaluates to True against memory |
condition_expr examples:
- `"needs_more_research == True"`
- `"str(next_action).lower() == 'new_agent'"`
- `"feedback is not None"`
## Graph Lifecycle
### Graph Lifecycle
| Pattern | terminal_nodes | When |
|---------|---------------|------|
| **Continuous loop** | `["node-with-output-keys"]` | **DEFAULT for all agents** |
| Continuous loop | `["node-with-output-keys"]` | DEFAULT for all agents |
| Linear | `["last-node"]` | One-shot/batch agents |
**Every graph must have at least one terminal node.** Terminal nodes
define where execution ends. For interactive agents that loop continuously,
mark the primary event_loop node as terminal (it has `output_keys` and can
complete at any point). The framework default for `max_node_visits` is 0
(unbounded), so nodes work correctly in continuous loops without explicit
override. Only set `max_node_visits > 0` in one-shot agents with feedback loops.
Every node must have at least one outgoing edge — no dead ends.
Every graph must have at least one terminal node.
## Continuous Conversation Mode
### Continuous Conversation Mode
`conversation_mode` has ONLY two valid states:
- `"continuous"` recommended for interactive agents
- Omit entirely isolated per-node conversations (each node starts fresh)
- `"continuous"` -- recommended (context carries across node transitions)
- Omit entirely -- isolated per-node conversations
**INVALID values** (do NOT use): `"client_facing"`, `"interactive"`,
`"adaptive"`, `"shared"`. These do not exist in the framework.
When `conversation_mode="continuous"`:
- Same conversation thread carries across node transitions
- Layered system prompts: identity (agent-level) + narrative + focus (per-node)
- Transition markers inserted at boundaries
- Compaction happens opportunistically at phase transitions
**INVALID values:** `"client_facing"`, `"interactive"`, `"shared"`.
## loop_config
Only three valid keys:
```python
loop_config = {
"max_iterations": 100, # Max LLM turns per node visit
"max_tool_calls_per_turn": 20, # Max tool calls per LLM response
"max_context_tokens": 32000, # Triggers conversation compaction
```json
{
"max_iterations": 100,
"max_tool_calls_per_turn": 20,
"max_context_tokens": 32000
}
```
**INVALID keys** (do NOT use): `"strategy"`, `"mode"`, `"timeout"`,
`"temperature"`. These are silently ignored or cause errors.
## Data Tools (Spillover)
For large data that exceeds context:
- `save_data(filename, data)` — Write to session data dir
- `load_data(filename, offset, limit)` — Read with pagination
- `list_data_files()` — List files
- `serve_file_to_user(filename, label)` — Clickable file:// URI
- `save_data(filename, data)` -- write to session data dir
- `load_data(filename, offset, limit)` -- read with pagination
- `list_data_files()` -- list files
- `serve_file_to_user(filename, label)` -- clickable file URI
`data_dir` is auto-injected by framework — LLM never sees it.
`data_dir` is auto-injected by framework.
## Fan-Out / Fan-In
Multiple ON_SUCCESS edges from same source parallel execution via asyncio.gather().
- Parallel nodes must have disjoint output_keys
- Only one branch may have client_facing nodes
- Fan-in node gets all outputs in the shared buffer
Multiple `on_success` edges from same source = parallel execution.
Parallel nodes must have disjoint output_keys.
## Judge System
- **Implicit** (default): ACCEPTs when LLM finishes with no tool calls and all required outputs set
- **SchemaJudge**: Validates against Pydantic model
- **Custom**: Implement `evaluate(context) -> JudgeVerdict`
Judge is the SOLE acceptance mechanism — no ad-hoc framework gating.
## Triggers (Timers, Webhooks)
For agents that react to external events, create a `triggers.json` file
in the agent's export directory:
```json
[
{
"id": "daily-check",
"name": "Daily Check",
"trigger_type": "timer",
"trigger_config": {"cron": "0 9 * * *"},
"task": "Run the daily check process"
}
]
```
### Key Fields
- `trigger_type`: `"timer"` or `"webhook"`
- `trigger_config`: `{"cron": "0 9 * * *"}` or `{"interval_minutes": 20}`
- `task`: describes what the worker should do when the trigger fires
- Triggers can also be created/removed at runtime via `set_trigger` / `remove_trigger` queen tools
## Tool Discovery
Do NOT rely on a static tool list — it will be outdated. Always call
`list_agent_tools()` with NO arguments first to see ALL available tools.
Only use `group=` or `output_schema=` as follow-up calls after seeing the
full list.
Always call `list_agent_tools()` first to see available tools.
Do NOT rely on a static tool list.
```
list_agent_tools() # ALWAYS call this first
list_agent_tools(group="gmail", output_schema="full") # then drill into a category
list_agent_tools("exports/my_agent/mcp_servers.json") # specific agent's tools
list_agent_tools() # full summary
list_agent_tools(group="gmail", output_schema="full") # drill into category
```
After building, run `validate_agent_package("{name}")` to check everything at once.
Common tool categories (verify via list_agent_tools):
- **Web**: search, scrape, PDF
- **Data**: save/load/append/list data files, serve to user
- **File**: view, write, replace, diff, list, grep
- **Communication**: email, gmail, slack, telegram
- **CRM**: hubspot, apollo, calcom
- **GitHub**: stargazers, user profiles, repos
- **Vision**: image analysis
- **Time**: current time
After building, run `validate_agent_package("{name}")` to check everything.
@@ -1,158 +1,53 @@
# GCU Browser Automation Guide
# Browser Automation Guide
## When to Use GCU Nodes
## When to Use Browser Nodes
Use `node_type="gcu"` when:
- The user's workflow requires **navigating real websites** (scraping, form-filling, social media interaction, testing web UIs)
- The task involves **dynamic/JS-rendered pages** that `web_scrape` cannot handle (SPAs, infinite scroll, login-gated content)
- The agent needs to **interact with a website** — clicking, typing, scrolling, selecting, uploading files
Use browser nodes (with `tools: {policy: "all"}`) when:
- The task requires interacting with web pages (clicking, typing, navigating)
- No API is available for the target service
- The user is already logged in to the target site
Do NOT use GCU for:
- Static content that `web_scrape` handles fine
- API-accessible data (use the API directly)
- PDF/file processing
- Anything that doesn't require a browser UI
## What Browser Nodes Are
## What GCU Nodes Are
- Regular `event_loop` nodes with browser tools from gcu-tools MCP server
- Set `tools: {policy: "all"}` to give access to all browser tools
- Wire into the graph with edges like any other node
- No special node_type needed
- `node_type="gcu"` — a declarative enhancement over `event_loop`
- Framework auto-prepends browser best-practices system prompt
- Framework auto-includes all 31 browser tools from `gcu-tools` MCP server
- Same underlying `EventLoopNode` class — no new imports needed
- `tools=[]` is correct — tools are auto-populated at runtime
## Available Browser Tools
## GCU Architecture Pattern
All tools are prefixed with `browser_`:
- `browser_start`, `browser_open` -- launch/navigate
- `browser_click`, `browser_fill`, `browser_type` -- interact
- `browser_snapshot` -- read page content (preferred over screenshot)
- `browser_screenshot` -- visual capture
- `browser_scroll`, `browser_wait` -- navigation helpers
- `browser_evaluate` -- run JavaScript
GCU nodes are **subagents** — invoked via `delegate_to_sub_agent()`, not connected via edges.
## System Prompt Tips for Browser Nodes
- Primary nodes (`event_loop`, client-facing) orchestrate; GCU nodes do browser work
- Parent node declares `sub_agents=["gcu-node-id"]` and calls `delegate_to_sub_agent(agent_id="gcu-node-id", task="...")`
- GCU nodes set `max_node_visits=1` (single execution per delegation), `client_facing=False`
- GCU nodes use `output_keys=["result"]` and return structured JSON via `set_output("result", ...)`
## GCU Node Definition Template
```python
gcu_browser_node = NodeSpec(
id="gcu-browser-worker",
name="Browser Worker",
description="Browser subagent that does X.",
node_type="gcu",
client_facing=False,
max_node_visits=1,
input_keys=[],
output_keys=["result"],
tools=[], # Auto-populated with all browser tools
system_prompt="""\
You are a browser agent. Your job: [specific task].
## Workflow
1. browser_start (only if no browser is running yet)
2. browser_open(url=TARGET_URL) — note the returned targetId
3. browser_snapshot to read the page
4. [task-specific steps]
5. set_output("result", JSON)
## Output format
set_output("result", JSON) with:
- [field]: [type and description]
""",
)
```
1. Use browser_snapshot() to read page content (NOT browser_get_text)
2. Use browser_wait(seconds=2-3) after navigation for page load
3. If you hit an auth wall, call set_output with an error and move on
4. Keep tool calls per turn <= 10 for reliability
```
## Parent Node Template (orchestrating GCU subagents)
```python
orchestrator_node = NodeSpec(
id="orchestrator",
...
node_type="event_loop",
sub_agents=["gcu-browser-worker"],
system_prompt="""\
...
delegate_to_sub_agent(
agent_id="gcu-browser-worker",
task="Navigate to [URL]. Do [specific task]. Return JSON with [fields]."
)
...
""",
tools=[], # Orchestrator doesn't need browser tools
)
```
## mcp_servers.json with GCU
## Example
```json
{
"hive-tools": { ... },
"gcu-tools": {
"transport": "stdio",
"command": "uv",
"args": ["run", "python", "-m", "gcu.server", "--stdio"],
"cwd": "../../tools",
"description": "GCU tools for browser automation"
}
"id": "scan-profiles",
"name": "Scan LinkedIn Profiles",
"description": "Navigate LinkedIn search results and collect profile data",
"tools": {"policy": "all"},
"input_keys": ["search_url"],
"output_keys": ["profiles"],
"system_prompt": "Navigate to the search URL, paginate through results..."
}
```
Note: `gcu-tools` is auto-added if any node uses `node_type="gcu"`, but including it explicitly is fine.
## GCU System Prompt Best Practices
Key rules to bake into GCU node prompts:
- Prefer `browser_snapshot` over `browser_get_text("body")` — compact accessibility tree vs 100KB+ raw HTML
- Always `browser_wait` after navigation
- Use large scroll amounts (~2000-5000) for lazy-loaded content
- For spillover files, use `run_command` with grep, not `read_file`
- If auth wall detected, report immediately — don't attempt login
- Keep tool calls per turn ≤10
- Tab isolation: when browser is already running, use `browser_open(background=true)` and pass `target_id` to every call
## Multiple Concurrent GCU Subagents
When a task can be parallelized across multiple sites or profiles, declare a distinct GCU
node for each and invoke them all in the same LLM turn. The framework batches all
`delegate_to_sub_agent` calls made in one turn and runs them with `asyncio.gather`, so
they execute concurrently — not sequentially.
**Each GCU subagent automatically gets its own isolated browser context** — no `profile=`
argument is needed in tool calls. The framework derives a unique profile from the subagent's
node ID and instance counter and injects it via an asyncio `ContextVar` before the subagent
runs.
### Example: three sites in parallel
```python
# Three distinct GCU nodes
gcu_site_a = NodeSpec(id="gcu-site-a", node_type="gcu", ...)
gcu_site_b = NodeSpec(id="gcu-site-b", node_type="gcu", ...)
gcu_site_c = NodeSpec(id="gcu-site-c", node_type="gcu", ...)
orchestrator = NodeSpec(
id="orchestrator",
node_type="event_loop",
sub_agents=["gcu-site-a", "gcu-site-b", "gcu-site-c"],
system_prompt="""\
Call all three subagents in a single response to run them in parallel:
delegate_to_sub_agent(agent_id="gcu-site-a", task="Scrape prices from site A")
delegate_to_sub_agent(agent_id="gcu-site-b", task="Scrape prices from site B")
delegate_to_sub_agent(agent_id="gcu-site-c", task="Scrape prices from site C")
""",
)
Connected via regular edges:
```
search-setup -> scan-profiles -> process-results
```
**Rules:**
- Use distinct node IDs for each concurrent task — sharing an ID shares the browser context.
- The GCU node prompts do not need to mention `profile=`; isolation is automatic.
- Cleanup is automatic at session end, but GCU nodes can call `browser_stop()` explicitly
if they want to release resources mid-run.
## GCU Anti-Patterns
- Using `browser_screenshot` to read text (use `browser_snapshot` instead; screenshots are for visual context only)
- Re-navigating after scrolling (resets scroll position)
- Attempting login on auth walls
- Forgetting `target_id` in multi-tab scenarios
- Putting browser tools directly on `event_loop` nodes instead of using GCU subagent pattern
- Making GCU nodes `client_facing=True` (they should be autonomous subagents)
@@ -2,7 +2,7 @@
A lightweight side agent that runs after each queen LLM turn. It inspects
recent conversation messages and extracts durable user knowledge into
individual memory files in ``~/.hive/queen/global_memory/``.
individual memory files in ``~/.hive/memories/global/``.
Two reflection types:
- **Short reflection**: after conversational queen turns. Distills
@@ -493,7 +493,7 @@ async def subscribe_reflection_triggers(
Call this once during queen setup. Returns a list of event-bus
subscription IDs for cleanup during session teardown.
"""
from framework.runtime.event_bus import EventType
from framework.host.event_bus import EventType
mem_dir = memory_dir or global_memory_dir()
_lock = asyncio.Lock()
@@ -22,10 +22,10 @@ def mock_mode():
@pytest_asyncio.fixture(scope="session")
async def runner(tmp_path_factory, mock_mode):
from framework.runner.runner import AgentRunner
from framework.loader.agent_loader import AgentLoader
storage = tmp_path_factory.mktemp("agent_storage")
r = AgentRunner.load(AGENT_PATH, mock_mode=mock_mode, storage_path=storage)
r = AgentLoader.load(AGENT_PATH, mock_mode=mock_mode, storage_path=storage)
r._setup()
yield r
await r.cleanup_async()
+2 -2
View File
@@ -79,7 +79,7 @@ def main():
subparsers = parser.add_subparsers(dest="command", required=True)
# Register runner commands (run, info, validate, list, shell)
from framework.runner.cli import register_commands
from framework.loader.cli import register_commands
register_commands(subparsers)
@@ -99,7 +99,7 @@ def main():
register_debugger_commands(subparsers)
# Register MCP registry commands (mcp install, mcp add, ...)
from framework.runner.mcp_registry_cli import register_mcp_commands
from framework.loader.mcp_registry_cli import register_mcp_commands
register_mcp_commands(subparsers)
+67 -12
View File
@@ -12,13 +12,47 @@ from dataclasses import dataclass, field
from pathlib import Path
from typing import Any
from framework.graph.edge import DEFAULT_MAX_TOKENS
from framework.orchestrator.edge import DEFAULT_MAX_TOKENS
# ---------------------------------------------------------------------------
# Hive home directory structure
# ---------------------------------------------------------------------------
HIVE_HOME = Path.home() / ".hive"
QUEENS_DIR = HIVE_HOME / "agents" / "queens"
COLONIES_DIR = HIVE_HOME / "colonies"
MEMORIES_DIR = HIVE_HOME / "memories"
def queen_dir(queen_name: str = "default") -> Path:
"""Return the storage directory for a named queen agent."""
return QUEENS_DIR / queen_name
def colony_dir(colony_name: str) -> Path:
"""Return the directory for a named colony."""
return COLONIES_DIR / colony_name
def memory_dir(scope: str, name: str | None = None) -> Path:
"""Return memory dir for a scope.
Examples::
memory_dir("global") -> ~/.hive/memories/global
memory_dir("colonies", "my_agent") -> ~/.hive/memories/colonies/my_agent
memory_dir("agents/queens", "default")-> ~/.hive/memories/agents/queens/default
memory_dir("agents", "worker_name") -> ~/.hive/memories/agents/worker_name
"""
base = MEMORIES_DIR / scope
return base / name if name else base
# ---------------------------------------------------------------------------
# Low-level config file access
# ---------------------------------------------------------------------------
HIVE_CONFIG_FILE = Path.home() / ".hive" / "configuration.json"
HIVE_CONFIG_FILE = HIVE_HOME / "configuration.json"
# Hive LLM router endpoint (Anthropic-compatible).
# litellm's Anthropic handler appends /v1/messages, so this is just the base host.
@@ -130,7 +164,7 @@ def get_worker_api_key() -> str | None:
# Worker-specific subscription / env var
if worker_llm.get("use_claude_code_subscription"):
try:
from framework.runner.runner import get_claude_code_token
from framework.loader.agent_loader import get_claude_code_token
token = get_claude_code_token()
if token:
@@ -140,7 +174,7 @@ def get_worker_api_key() -> str | None:
if worker_llm.get("use_codex_subscription"):
try:
from framework.runner.runner import get_codex_token
from framework.loader.agent_loader import get_codex_token
token = get_codex_token()
if token:
@@ -150,7 +184,7 @@ def get_worker_api_key() -> str | None:
if worker_llm.get("use_kimi_code_subscription"):
try:
from framework.runner.runner import get_kimi_code_token
from framework.loader.agent_loader import get_kimi_code_token
token = get_kimi_code_token()
if token:
@@ -160,7 +194,7 @@ def get_worker_api_key() -> str | None:
if worker_llm.get("use_antigravity_subscription"):
try:
from framework.runner.runner import get_antigravity_token
from framework.loader.agent_loader import get_antigravity_token
token = get_antigravity_token()
if token:
@@ -216,7 +250,7 @@ def get_worker_llm_extra_kwargs() -> dict[str, Any]:
"User-Agent": "CodexBar",
}
try:
from framework.runner.runner import get_codex_account_id
from framework.loader.agent_loader import get_codex_account_id
account_id = get_codex_account_id()
if account_id:
@@ -263,22 +297,43 @@ def get_max_context_tokens() -> int:
return get_hive_config().get("llm", {}).get("max_context_tokens", DEFAULT_MAX_CONTEXT_TOKENS)
def get_api_keys() -> list[str] | None:
"""Return a list of API keys if ``api_keys`` is configured, else ``None``.
This supports key-pool rotation: configure multiple keys in
``~/.hive/configuration.json`` under ``llm.api_keys`` and the
:class:`~framework.llm.key_pool.KeyPool` will rotate through them.
"""
llm = get_hive_config().get("llm", {})
keys = llm.get("api_keys")
if keys and isinstance(keys, list) and len(keys) > 0:
return [k for k in keys if k] # filter empties
return None
def get_api_key() -> str | None:
"""Return the API key, supporting env var, Claude Code subscription, Codex, and ZAI Code.
Priority:
0. Explicit key pool (``api_keys`` list) -- returns first key for
single-key callers; full pool available via :func:`get_api_keys`.
1. Claude Code subscription (``use_claude_code_subscription: true``)
reads the OAuth token from ``~/.claude/.credentials.json``.
2. Codex subscription (``use_codex_subscription: true``)
reads the OAuth token from macOS Keychain or ``~/.codex/auth.json``.
3. Environment variable named in ``api_key_env_var``.
"""
# If an explicit key pool is configured, use the first key.
pool_keys = get_api_keys()
if pool_keys:
return pool_keys[0]
llm = get_hive_config().get("llm", {})
# Claude Code subscription: read OAuth token directly
if llm.get("use_claude_code_subscription"):
try:
from framework.runner.runner import get_claude_code_token
from framework.loader.agent_loader import get_claude_code_token
token = get_claude_code_token()
if token:
@@ -289,7 +344,7 @@ def get_api_key() -> str | None:
# Codex subscription: read OAuth token from Keychain / auth.json
if llm.get("use_codex_subscription"):
try:
from framework.runner.runner import get_codex_token
from framework.loader.agent_loader import get_codex_token
token = get_codex_token()
if token:
@@ -300,7 +355,7 @@ def get_api_key() -> str | None:
# Kimi Code subscription: read API key from ~/.kimi/config.toml
if llm.get("use_kimi_code_subscription"):
try:
from framework.runner.runner import get_kimi_code_token
from framework.loader.agent_loader import get_kimi_code_token
token = get_kimi_code_token()
if token:
@@ -311,7 +366,7 @@ def get_api_key() -> str | None:
# Antigravity subscription: read OAuth token from accounts JSON
if llm.get("use_antigravity_subscription"):
try:
from framework.runner.runner import get_antigravity_token
from framework.loader.agent_loader import get_antigravity_token
token = get_antigravity_token()
if token:
@@ -468,7 +523,7 @@ def get_llm_extra_kwargs() -> dict[str, Any]:
"User-Agent": "CodexBar",
}
try:
from framework.runner.runner import get_codex_account_id
from framework.loader.agent_loader import get_codex_account_id
account_id = get_codex_account_id()
if account_id:
+21 -7
View File
@@ -36,7 +36,7 @@ from pathlib import Path
from typing import TYPE_CHECKING, Any
if TYPE_CHECKING:
from framework.graph import NodeSpec
from framework.orchestrator import NodeSpec
logger = logging.getLogger(__name__)
@@ -533,7 +533,9 @@ class CredentialSetupSession:
def load_agent_nodes(agent_path: str | Path) -> list:
"""Load NodeSpec list from an agent's agent.py or agent.json.
"""Load NodeSpec list from an agent directory.
Checks agent.json (declarative) first, then agent.py (legacy).
Args:
agent_path: Path to agent directory.
@@ -542,16 +544,28 @@ def load_agent_nodes(agent_path: str | Path) -> list:
List of NodeSpec objects (empty list if agent can't be loaded).
"""
agent_path = Path(agent_path)
agent_json_file = agent_path / "agent.json"
agent_py = agent_path / "agent.py"
agent_json = agent_path / "agent.json"
if agent_py.exists():
if agent_json_file.exists():
return _load_nodes_from_json_declarative(agent_json_file)
elif agent_py.exists():
return _load_nodes_from_python_agent(agent_path)
elif agent_json.exists():
return _load_nodes_from_json_agent(agent_json)
return []
def _load_nodes_from_json_declarative(agent_json: Path) -> list:
"""Load nodes from a declarative JSON agent."""
try:
from framework.loader.agent_loader import load_agent_config
data = json.loads(agent_json.read_text(encoding="utf-8"))
graph, _ = load_agent_config(data)
return list(graph.nodes)
except Exception:
return []
def _load_nodes_from_python_agent(agent_path: Path) -> list:
"""Load nodes from a Python-based agent."""
import importlib.util
@@ -590,7 +604,7 @@ def _load_nodes_from_json_agent(agent_json: Path) -> list:
with open(agent_json, encoding="utf-8-sig") as f:
data = json.load(f)
from framework.graph import NodeSpec
from framework.orchestrator import NodeSpec
nodes_data = data.get("graph", {}).get("nodes", [])
nodes = []
-65
View File
@@ -1,65 +0,0 @@
"""Graph structures: Goals, Nodes, Edges, and Execution."""
from framework.graph.context import GraphContext
from framework.graph.context_handoff import ContextHandoff, HandoffContext
from framework.graph.conversation import ConversationStore, Message, NodeConversation
from framework.graph.edge import DEFAULT_MAX_TOKENS, EdgeCondition, EdgeSpec, GraphSpec
from framework.graph.event_loop_node import (
EventLoopNode,
JudgeProtocol,
JudgeVerdict,
LoopConfig,
OutputAccumulator,
)
from framework.graph.executor import GraphExecutor
from framework.graph.goal import Constraint, Goal, GoalStatus, SuccessCriterion
from framework.graph.node import NodeContext, NodeProtocol, NodeResult, NodeSpec
from framework.graph.worker_agent import (
Activation,
FanOutTag,
FanOutTracker,
WorkerAgent,
WorkerCompletion,
WorkerLifecycle,
)
__all__ = [
# Goal
"Goal",
"SuccessCriterion",
"Constraint",
"GoalStatus",
# Node
"NodeSpec",
"NodeContext",
"NodeResult",
"NodeProtocol",
# Edge
"EdgeSpec",
"EdgeCondition",
"GraphSpec",
"DEFAULT_MAX_TOKENS",
# Executor
"GraphExecutor",
# Conversation
"NodeConversation",
"ConversationStore",
"Message",
# Event Loop
"EventLoopNode",
"LoopConfig",
"OutputAccumulator",
"JudgeProtocol",
"JudgeVerdict",
# Context Handoff
"ContextHandoff",
"HandoffContext",
# Worker Agent
"WorkerAgent",
"WorkerLifecycle",
"WorkerCompletion",
"Activation",
"FanOutTag",
"FanOutTracker",
"GraphContext",
]
@@ -1,6 +0,0 @@
"""EventLoopNode subpackage — modular components of the event loop orchestrator.
All public symbols are re-exported by the parent ``event_loop_node.py`` for
backward compatibility. Internal consumers may import directly from these
submodules for clarity.
"""
@@ -1,370 +0,0 @@
"""Subagent execution for the event loop.
Handles the full subagent lifecycle: validation, context setup, tool filtering,
conversation store derivation, execution, and cleanup.
"""
from __future__ import annotations
import json
import logging
import time
from collections.abc import Awaitable, Callable
from pathlib import Path
from typing import TYPE_CHECKING, Any
from framework.graph.conversation import ConversationStore
from framework.graph.event_loop.judge_pipeline import SubagentJudge
from framework.graph.event_loop.types import LoopConfig, OutputAccumulator
from framework.graph.node import DataBuffer, NodeContext
from framework.llm.provider import ToolResult, ToolUse
from framework.runner.tool_registry import ToolRegistry
from framework.runtime.event_bus import EventBus
if TYPE_CHECKING:
from framework.graph.event_loop_node import EventLoopNode
logger = logging.getLogger(__name__)
async def execute_subagent(
ctx: NodeContext,
agent_id: str,
task: str,
*,
config: LoopConfig,
event_loop_node_cls: type[EventLoopNode],
escalation_receiver_cls: Callable[[], Any],
accumulator: OutputAccumulator | None = None,
event_bus: EventBus | None = None,
tool_executor: Callable[[ToolUse], ToolResult | Awaitable[ToolResult]] | None = None,
conversation_store: ConversationStore | None = None,
subagent_instance_counter: dict[str, int] | None = None,
) -> ToolResult:
"""Execute a subagent and return the result as a ToolResult.
The subagent:
- Gets a fresh conversation with just the task
- Has read-only access to the parent's readable memory
- Cannot delegate to its own subagents (prevents recursion)
- Returns its output in structured JSON format
Args:
ctx: Parent node's context (for memory, tools, LLM access).
agent_id: The node ID of the subagent to invoke.
task: The task description to give the subagent.
accumulator: Parent's OutputAccumulator.
event_bus: EventBus for lifecycle events.
config: LoopConfig for iteration/tool limits.
tool_executor: Tool executor callable.
conversation_store: Parent conversation store (for deriving subagent store).
subagent_instance_counter: Mutable counter dict for unique subagent paths.
Returns:
ToolResult with structured JSON output.
"""
# Log subagent invocation start
logger.info(
"\n" + "=" * 60 + "\n"
"🤖 SUBAGENT INVOCATION\n"
"=" * 60 + "\n"
"Parent Node: %s\n"
"Subagent ID: %s\n"
"Task: %s\n" + "=" * 60,
ctx.node_id,
agent_id,
task[:500] + "..." if len(task) > 500 else task,
)
# 1. Validate agent exists in registry
if agent_id not in ctx.node_registry:
return ToolResult(
tool_use_id="",
content=json.dumps(
{
"message": f"Sub-agent '{agent_id}' not found in registry",
"data": None,
"metadata": {"agent_id": agent_id, "success": False, "error": "not_found"},
}
),
is_error=True,
)
subagent_spec = ctx.node_registry[agent_id]
# 2. Create read-only memory snapshot
parent_data = ctx.buffer.read_all()
# Merge in-flight outputs from the parent's accumulator.
if accumulator:
for key, value in accumulator.to_dict().items():
if key not in parent_data:
parent_data[key] = value
subagent_buffer = DataBuffer()
for key, value in parent_data.items():
subagent_buffer.write(key, value, validate=False)
read_keys = set(parent_data.keys()) | set(subagent_spec.input_keys or [])
scoped_buffer = subagent_buffer.with_permissions(
read_keys=list(read_keys),
write_keys=[], # Read-only!
)
# 2b. Compute instance counter early so the callback and child context
# share the same stable node_id for this subagent invocation.
if subagent_instance_counter is not None:
subagent_instance_counter.setdefault(agent_id, 0)
subagent_instance_counter[agent_id] += 1
subagent_instance = str(subagent_instance_counter[agent_id])
else:
subagent_instance = "1"
if subagent_instance == "1":
sa_node_id = f"{ctx.node_id}:subagent:{agent_id}"
else:
sa_node_id = f"{ctx.node_id}:subagent:{agent_id}:{subagent_instance}"
# 2c. Set up report callback (one-way channel to parent / event bus)
subagent_reports: list[dict] = []
async def _report_callback(
message: str,
data: dict | None = None,
*,
wait_for_response: bool = False,
) -> str | None:
subagent_reports.append({"message": message, "data": data, "timestamp": time.time()})
if event_bus:
await event_bus.emit_subagent_report(
stream_id=ctx.node_id,
node_id=sa_node_id,
subagent_id=agent_id,
message=message,
data=data,
execution_id=ctx.execution_id,
)
if not wait_for_response:
return None
if not event_bus:
logger.warning(
"Subagent '%s' requested user response but no event_bus available",
agent_id,
)
return None
# Create isolated receiver and register for input routing
import uuid
escalation_id = f"{ctx.node_id}:escalation:{uuid.uuid4().hex[:8]}"
receiver = escalation_receiver_cls()
registry = ctx.shared_node_registry
registry[escalation_id] = receiver
try:
await event_bus.emit_escalation_requested(
stream_id=ctx.stream_id or ctx.node_id,
node_id=escalation_id,
reason=f"Subagent report (wait_for_response) from {agent_id}",
context=message,
execution_id=ctx.execution_id,
)
# Block until queen responds
return await receiver.wait()
finally:
registry.pop(escalation_id, None)
# 3. Filter tools for subagent
subagent_tool_names = set(subagent_spec.tools or [])
tool_source = ctx.all_tools if ctx.all_tools else ctx.available_tools
# GCU auto-population
if subagent_spec.node_type == "gcu" and not subagent_tool_names:
subagent_tools = [t for t in tool_source if t.name != "delegate_to_sub_agent"]
else:
subagent_tools = [
t
for t in tool_source
if t.name in subagent_tool_names and t.name != "delegate_to_sub_agent"
]
missing = subagent_tool_names - {t.name for t in subagent_tools}
if missing:
logger.warning(
"Subagent '%s' requested tools not found in catalog: %s",
agent_id,
sorted(missing),
)
logger.info(
"📦 Subagent '%s' configuration:\n"
" - System prompt: %s\n"
" - Tools available (%d): %s\n"
" - Memory keys inherited: %s",
agent_id,
(subagent_spec.system_prompt[:200] + "...")
if subagent_spec.system_prompt and len(subagent_spec.system_prompt) > 200
else subagent_spec.system_prompt,
len(subagent_tools),
[t.name for t in subagent_tools],
list(parent_data.keys()),
)
# 4. Build subagent context
max_iter = min(config.max_iterations, 10)
subagent_ctx = NodeContext(
runtime=ctx.runtime,
node_id=sa_node_id,
node_spec=subagent_spec,
buffer=scoped_buffer,
input_data={"task": task, **parent_data},
llm=ctx.llm,
available_tools=subagent_tools,
goal_context=(
f"Your specific task: {task}\n\n"
f"COMPLETION REQUIREMENTS:\n"
f"When your task is done, you MUST call set_output() "
f"for each required key: {subagent_spec.output_keys}\n"
f"Alternatively, call report_to_parent(mark_complete=true) "
f"with your findings in message/data.\n"
+ (
"Before finishing, call browser_close_finished() to clean up your browser tabs.\n"
if subagent_spec.node_type == "gcu"
else ""
)
+ f"You have a maximum of {max_iter} turns to complete this task."
),
goal=ctx.goal,
max_tokens=ctx.max_tokens,
runtime_logger=ctx.runtime_logger,
is_subagent_mode=True, # Prevents nested delegation
report_callback=_report_callback,
node_registry={}, # Empty - no nested subagents
shared_node_registry=ctx.shared_node_registry, # For escalation routing
)
# 5. Create and execute subagent EventLoopNode
subagent_conv_store = None
if conversation_store is not None:
from framework.storage.conversation_store import FileConversationStore
parent_base = getattr(conversation_store, "_base", None)
if parent_base is not None:
conversations_dir = parent_base.parent
subagent_dir_name = f"{agent_id}-{subagent_instance}"
subagent_store_path = conversations_dir / subagent_dir_name
subagent_conv_store = FileConversationStore(base_path=subagent_store_path)
# Derive a subagent-scoped spillover dir
subagent_spillover = None
if config.spillover_dir:
subagent_spillover = str(Path(config.spillover_dir) / agent_id / subagent_instance)
subagent_node = event_loop_node_cls(
event_bus=event_bus,
judge=SubagentJudge(task=task, max_iterations=max_iter),
config=LoopConfig(
max_iterations=max_iter,
max_tool_calls_per_turn=config.max_tool_calls_per_turn,
tool_call_overflow_margin=config.tool_call_overflow_margin,
max_context_tokens=config.max_context_tokens,
stall_detection_threshold=config.stall_detection_threshold,
max_tool_result_chars=config.max_tool_result_chars,
spillover_dir=subagent_spillover,
),
tool_executor=tool_executor,
conversation_store=subagent_conv_store,
)
# Each subagent instance gets its own unique browser profile so concurrent
# subagents don't share tab groups. The profile is set as execution context
# so the tool registry auto-injects it into every browser_* MCP tool call.
_gcu_profile = f"{agent_id}:{subagent_instance}"
_profile_token = ToolRegistry.set_execution_context(profile=_gcu_profile)
try:
logger.info("🚀 Starting subagent '%s' execution...", agent_id)
start_time = time.time()
result = await subagent_node.execute(subagent_ctx)
latency_ms = int((time.time() - start_time) * 1000)
separator = "-" * 60
logger.info(
"\n%s\n"
"✅ SUBAGENT '%s' COMPLETED\n"
"%s\n"
"Success: %s\n"
"Latency: %dms\n"
"Tokens used: %s\n"
"Output keys: %s\n"
"%s",
separator,
agent_id,
separator,
result.success,
latency_ms,
result.tokens_used,
list(result.output.keys()) if result.output else [],
separator,
)
result_json = {
"message": (
f"Sub-agent '{agent_id}' completed successfully"
if result.success
else f"Sub-agent '{agent_id}' failed: {result.error}"
),
"data": result.output,
"reports": subagent_reports if subagent_reports else None,
"metadata": {
"agent_id": agent_id,
"success": result.success,
"tokens_used": result.tokens_used,
"latency_ms": latency_ms,
"report_count": len(subagent_reports),
},
}
return ToolResult(
tool_use_id="",
content=json.dumps(result_json, indent=2, default=str),
is_error=not result.success,
)
except Exception as e:
logger.exception(
"\n" + "!" * 60 + "\n❌ SUBAGENT '%s' FAILED\nError: %s\n" + "!" * 60,
agent_id,
str(e),
)
result_json = {
"message": f"Sub-agent '{agent_id}' raised exception: {e}",
"data": None,
"metadata": {
"agent_id": agent_id,
"success": False,
"error": str(e),
},
}
return ToolResult(
tool_use_id="",
content=json.dumps(result_json, indent=2),
is_error=True,
)
finally:
ToolRegistry.reset_execution_context(_profile_token)
# Close the tab group this subagent created, if any.
try:
from gcu.browser.bridge import get_bridge
from gcu.browser.tools.lifecycle import _contexts
bridge = get_bridge()
ctx_entry = _contexts.pop(_gcu_profile, None)
if bridge and bridge.is_connected and ctx_entry:
group_id = ctx_entry.get("groupId")
if group_id is not None:
await bridge.destroy_context(group_id)
except Exception:
pass
+11
View File
@@ -0,0 +1,11 @@
"""Host layer -- how agents are triggered and hosted."""
from framework.host.agent_host import ( # noqa: F401
AgentHost,
AgentRuntimeConfig,
)
from framework.host.event_bus import AgentEvent, EventBus, EventType # noqa: F401
from framework.host.execution_manager import ( # noqa: F401
EntryPointSpec,
ExecutionManager,
)
File diff suppressed because it is too large Load Diff
@@ -18,18 +18,18 @@ from dataclasses import dataclass, field
from datetime import datetime
from typing import TYPE_CHECKING, Any
from framework.graph.checkpoint_config import CheckpointConfig
from framework.graph.executor import ExecutionResult, GraphExecutor
from framework.runtime.event_bus import EventBus
from framework.runtime.shared_state import IsolationLevel, SharedBufferManager
from framework.runtime.stream_runtime import StreamRuntime, StreamRuntimeAdapter
from framework.orchestrator.checkpoint_config import CheckpointConfig
from framework.orchestrator.orchestrator import ExecutionResult, Orchestrator
from framework.host.event_bus import EventBus
from framework.host.shared_state import IsolationLevel, SharedBufferManager
from framework.host.stream_runtime import StreamDecisionTracker, StreamRuntimeAdapter
if TYPE_CHECKING:
from framework.graph.edge import GraphSpec
from framework.graph.goal import Goal
from framework.orchestrator.edge import GraphSpec
from framework.orchestrator.goal import Goal
from framework.llm.provider import LLMProvider, Tool
from framework.runtime.event_bus import AgentEvent
from framework.runtime.outcome_aggregator import OutcomeAggregator
from framework.host.event_bus import AgentEvent
from framework.host.outcome_aggregator import OutcomeAggregator
from framework.storage.concurrent import ConcurrentStorage
from framework.storage.session_store import SessionStore
@@ -133,7 +133,7 @@ class ExecutionContext:
status: str = "pending" # pending, running, completed, failed, paused
class ExecutionStream:
class ExecutionManager:
"""
Manages concurrent executions for a single entry point.
@@ -262,7 +262,7 @@ class ExecutionStream:
)
# Create stream-scoped runtime
self._runtime = StreamRuntime(
self._runtime = StreamDecisionTracker(
stream_id=stream_id,
storage=storage,
outcome_aggregator=outcome_aggregator,
@@ -271,7 +271,7 @@ class ExecutionStream:
# Execution tracking
self._active_executions: dict[str, ExecutionContext] = {}
self._execution_tasks: dict[str, asyncio.Task] = {}
self._active_executors: dict[str, GraphExecutor] = {}
self._active_executors: dict[str, Orchestrator] = {}
self._cancel_reasons: dict[str, str] = {}
self._execution_results: OrderedDict[str, ExecutionResult] = OrderedDict()
self._execution_result_times: dict[str, float] = {}
@@ -301,7 +301,7 @@ class ExecutionStream:
# Emit stream started event
if self._scoped_event_bus:
from framework.runtime.event_bus import AgentEvent, EventType
from framework.host.event_bus import AgentEvent, EventType
await self._scoped_event_bus.publish(
AgentEvent(
@@ -426,7 +426,7 @@ class ExecutionStream:
# Emit stream stopped event
if self._scoped_event_bus:
from framework.runtime.event_bus import AgentEvent, EventType
from framework.host.event_bus import AgentEvent, EventType
await self._scoped_event_bus.publish(
AgentEvent(
@@ -668,7 +668,7 @@ class ExecutionStream:
# Create per-execution runtime logger
runtime_logger = None
if self._runtime_log_store:
from framework.runtime.runtime_logger import RuntimeLogger
from framework.tracker.runtime_logger import RuntimeLogger
runtime_logger = RuntimeLogger(
store=self._runtime_log_store, agent_id=self.graph.id
@@ -697,12 +697,7 @@ class ExecutionStream:
# forward so the next attempt resumes at the failed node.
while True:
# Create executor for this execution.
# Each execution gets its own storage under sessions/{exec_id}/
# so conversations, spillover, and data files are all scoped
# to this execution. The executor sets data_dir via execution
# context (contextvars) so data tools and spillover share the
# same session-scoped directory.
executor = GraphExecutor(
executor = Orchestrator(
runtime=runtime_adapter,
llm=self._llm,
tools=self._tools,
@@ -763,7 +758,7 @@ class ExecutionStream:
# Emit resurrection event
if self._scoped_event_bus:
from framework.runtime.event_bus import AgentEvent, EventType
from framework.host.event_bus import AgentEvent, EventType
await self._scoped_event_bus.publish(
AgentEvent(
@@ -1119,7 +1114,7 @@ class ExecutionStream:
Each stream only executes from its own entry_node, but the full
graph must validate with all entry points accounted for.
"""
from framework.graph.edge import GraphSpec
from framework.orchestrator.edge import GraphSpec
# Merge entry points: this stream's entry + original graph's primary
# entry + any other entry points. This ensures all nodes are
@@ -14,8 +14,8 @@ from typing import TYPE_CHECKING, Any
from framework.schemas.decision import Decision, Outcome
if TYPE_CHECKING:
from framework.graph.goal import Goal
from framework.runtime.event_bus import EventBus
from framework.orchestrator.goal import Goal
from framework.host.event_bus import EventBus
logger = logging.getLogger(__name__)
@@ -18,12 +18,12 @@ from framework.schemas.run import Run, RunStatus
from framework.storage.concurrent import ConcurrentStorage
if TYPE_CHECKING:
from framework.runtime.outcome_aggregator import OutcomeAggregator
from framework.host.outcome_aggregator import OutcomeAggregator
logger = logging.getLogger(__name__)
class StreamRuntime:
class StreamDecisionTracker:
"""
Thread-safe runtime for a single execution stream.
@@ -431,7 +431,7 @@ class StreamRuntimeAdapter:
by providing the same API as Runtime but routing to a specific execution.
"""
def __init__(self, stream_runtime: StreamRuntime, execution_id: str):
def __init__(self, stream_runtime: StreamDecisionTracker, execution_id: str):
"""
Create adapter for a specific execution.
@@ -13,7 +13,7 @@ from dataclasses import dataclass
from aiohttp import web
from framework.runtime.event_bus import EventBus
from framework.host.event_bus import EventBus
logger = logging.getLogger(__name__)
+101
View File
@@ -0,0 +1,101 @@
"""Thread-safe API key pool with round-robin rotation and health tracking.
When multiple API keys are configured, the pool rotates through them on each
request. Keys that hit rate limits are temporarily cooled-down so the next
call automatically uses a healthy key -- no sleep required.
"""
from __future__ import annotations
import logging
import threading
import time
from dataclasses import dataclass
logger = logging.getLogger(__name__)
@dataclass
class KeyHealth:
"""Per-key health counters."""
rate_limited_until: float = 0.0 # monotonic timestamp
consecutive_errors: int = 0
total_requests: int = 0
total_successes: int = 0
class KeyPool:
"""Round-robin key pool with health tracking.
Thread-safe: all mutations protected by a lock so concurrent LLM calls
(e.g. parallel tool execution in EventLoopNode) don't race.
"""
def __init__(self, keys: list[str]) -> None:
if not keys:
raise ValueError("KeyPool requires at least one key")
self._keys = list(keys)
self._index = 0
self._health: dict[str, KeyHealth] = {k: KeyHealth() for k in keys}
self._lock = threading.Lock()
@property
def size(self) -> int:
return len(self._keys)
def get_key(self) -> str:
"""Return the next healthy key (round-robin).
If every key is currently rate-limited, returns the one whose cooldown
expires soonest so the caller can proceed with minimal delay.
"""
with self._lock:
now = time.monotonic()
for _ in range(len(self._keys)):
key = self._keys[self._index]
self._index = (self._index + 1) % len(self._keys)
health = self._health[key]
if health.rate_limited_until <= now:
health.total_requests += 1
return key
# All rate-limited -- pick the one that expires soonest.
soonest = min(self._keys, key=lambda k: self._health[k].rate_limited_until)
self._health[soonest].total_requests += 1
return soonest
def mark_rate_limited(self, key: str, retry_after: float = 60.0) -> None:
"""Mark *key* as rate-limited for *retry_after* seconds."""
with self._lock:
health = self._health.get(key)
if health:
health.rate_limited_until = time.monotonic() + retry_after
health.consecutive_errors += 1
logger.info(
"[key-pool] Key ...%s rate-limited for %.0fs (errors=%d)",
key[-6:],
retry_after,
health.consecutive_errors,
)
def mark_success(self, key: str) -> None:
"""Record a successful call on *key*."""
with self._lock:
health = self._health.get(key)
if health:
health.consecutive_errors = 0
health.total_successes += 1
def get_stats(self) -> dict[str, dict]:
"""Return health stats keyed by the last 6 chars of each key."""
with self._lock:
now = time.monotonic()
return {
f"...{k[-6:]}": {
"healthy": self._health[k].rate_limited_until <= now,
"requests": self._health[k].total_requests,
"successes": self._health[k].total_successes,
"consecutive_errors": self._health[k].consecutive_errors,
}
for k in self._keys
}
+70 -6
View File
@@ -7,6 +7,8 @@ Groq, and local models.
See: https://docs.litellm.ai/docs/providers
"""
from __future__ import annotations
import ast
import asyncio
import hashlib
@@ -18,7 +20,10 @@ import time
from collections.abc import AsyncIterator
from datetime import datetime
from pathlib import Path
from typing import Any
from typing import TYPE_CHECKING, Any
if TYPE_CHECKING:
from framework.llm.key_pool import KeyPool
try:
import litellm
@@ -561,6 +566,7 @@ class LiteLLMProvider(LLMProvider):
model: str = "gpt-4o-mini",
api_key: str | None = None,
api_base: str | None = None,
api_keys: list[str] | None = None,
**kwargs: Any,
):
"""
@@ -573,6 +579,9 @@ class LiteLLMProvider(LLMProvider):
look for the appropriate env var (OPENAI_API_KEY,
ANTHROPIC_API_KEY, etc.)
api_base: Custom API base URL (for proxies or local deployments)
api_keys: Optional list of API keys for key-pool rotation. When
provided with 2+ keys, a :class:`KeyPool` is created and
keys are rotated on rate-limit errors.
**kwargs: Additional arguments passed to litellm.completion()
"""
# Kimi For Coding exposes an Anthropic-compatible endpoint at
@@ -594,11 +603,24 @@ class LiteLLMProvider(LLMProvider):
if api_base and api_base.rstrip("/").endswith("/v1"):
api_base = api_base.rstrip("/")[:-3]
self.model = model
self.api_key = api_key
# Key pool: when multiple keys are provided, enable rotation.
self._key_pool: KeyPool | None = None
if api_keys and len(api_keys) > 1:
from framework.llm.key_pool import KeyPool
self._key_pool = KeyPool(api_keys)
self.api_key = api_keys[0] # default for OAuth detection below
logger.info(
"[litellm] Key pool enabled with %d keys for model %s",
len(api_keys),
model,
)
else:
self.api_key = api_key or (api_keys[0] if api_keys else None)
self.api_base = api_base or self._default_api_base_for_model(_original_model)
self.extra_kwargs = kwargs
# Detect Claude Code OAuth subscription by checking the api_key prefix.
self._claude_code_oauth = bool(api_key and api_key.startswith("sk-ant-oat"))
self._claude_code_oauth = bool(self.api_key and self.api_key.startswith("sk-ant-oat"))
if self._claude_code_oauth:
# Anthropic requires a specific User-Agent for OAuth requests.
eh = self.extra_kwargs.setdefault("extra_headers", {})
@@ -669,10 +691,20 @@ class LiteLLMProvider(LLMProvider):
def _completion_with_rate_limit_retry(
self, max_retries: int | None = None, **kwargs: Any
) -> Any:
"""Call litellm.completion with retry on 429 rate limit errors and empty responses."""
"""Call litellm.completion with retry on 429 rate limit errors and empty responses.
When a :class:`KeyPool` is configured, rate-limited keys are rotated
automatically so the next attempt uses a different key -- no sleep
needed between attempts.
"""
model = kwargs.get("model", self.model)
retries = max_retries if max_retries is not None else RATE_LIMIT_MAX_RETRIES
for attempt in range(retries + 1):
# Rotate key from pool when available.
current_key: str | None = None
if self._key_pool:
current_key = self._key_pool.get_key()
kwargs["api_key"] = current_key
try:
response = litellm.completion(**kwargs) # type: ignore[union-attr]
@@ -747,8 +779,22 @@ class LiteLLMProvider(LLMProvider):
time.sleep(wait)
continue
if self._key_pool and current_key:
self._key_pool.mark_success(current_key)
return response
except RateLimitError as e:
# Key pool: mark the offending key and rotate immediately.
if self._key_pool and current_key:
self._key_pool.mark_rate_limited(current_key, retry_after=60.0)
# When we have other healthy keys, skip the sleep -- the
# next iteration will pick a different key automatically.
if attempt < retries:
logger.info(
"[retry] Key pool rotating away from ...%s on 429",
current_key[-6:],
)
continue
# Dump full request to file for debugging
messages = kwargs.get("messages", [])
token_count, token_method = _estimate_tokens(model, messages)
@@ -761,7 +807,7 @@ class LiteLLMProvider(LLMProvider):
if attempt == retries:
logger.error(
f"[retry] GAVE UP on {model} after {retries + 1} "
f"attempts rate limit error: {e!s}. "
f"attempts -- rate limit error: {e!s}. "
f"~{token_count} tokens ({token_method}). "
f"Full request dumped to: {dump_path}"
)
@@ -880,10 +926,16 @@ class LiteLLMProvider(LLMProvider):
"""Async version of _completion_with_rate_limit_retry.
Uses litellm.acompletion and asyncio.sleep instead of blocking calls.
When a :class:`KeyPool` is configured, rate-limited keys are rotated.
"""
model = kwargs.get("model", self.model)
retries = max_retries if max_retries is not None else RATE_LIMIT_MAX_RETRIES
for attempt in range(retries + 1):
# Rotate key from pool when available.
current_key: str | None = None
if self._key_pool:
current_key = self._key_pool.get_key()
kwargs["api_key"] = current_key
try:
response = await litellm.acompletion(**kwargs) # type: ignore[union-attr]
@@ -952,8 +1004,20 @@ class LiteLLMProvider(LLMProvider):
await asyncio.sleep(wait)
continue
if self._key_pool and current_key:
self._key_pool.mark_success(current_key)
return response
except RateLimitError as e:
# Key pool: mark the offending key and rotate immediately.
if self._key_pool and current_key:
self._key_pool.mark_rate_limited(current_key, retry_after=60.0)
if attempt < retries:
logger.info(
"[async-retry] Key pool rotating away from ...%s on 429",
current_key[-6:],
)
continue
messages = kwargs.get("messages", [])
token_count, token_method = _estimate_tokens(model, messages)
dump_path = _dump_failed_request(
@@ -965,7 +1029,7 @@ class LiteLLMProvider(LLMProvider):
if attempt == retries:
logger.error(
f"[async-retry] GAVE UP on {model} after {retries + 1} "
f"attempts rate limit error: {e!s}. "
f"attempts -- rate limit error: {e!s}. "
f"~{token_count} tokens ({token_method}). "
f"Full request dumped to: {dump_path}"
)
+4
View File
@@ -0,0 +1,4 @@
"""Loader layer -- agent loading from disk (JSON config, MCP, credentials)."""
from framework.loader.agent_loader import AgentLoader # noqa: F401
from framework.loader.tool_registry import ToolRegistry # noqa: F401
@@ -13,21 +13,20 @@ from framework.config import get_hive_config, get_max_context_tokens, get_prefer
from framework.credentials.validation import (
ensure_credential_key_env as _ensure_credential_key_env,
)
from framework.graph import Goal
from framework.graph.edge import (
from framework.orchestrator import Goal
from framework.orchestrator.edge import (
DEFAULT_MAX_TOKENS,
EdgeCondition,
EdgeSpec,
GraphSpec,
)
from framework.graph.executor import ExecutionResult
from framework.graph.node import NodeSpec
from framework.orchestrator.orchestrator import ExecutionResult
from framework.orchestrator.node import NodeSpec
from framework.llm.provider import LLMProvider, Tool
from framework.runner.preload_validation import run_preload_validation
from framework.runner.tool_registry import ToolRegistry
from framework.runtime.agent_runtime import AgentRuntime, AgentRuntimeConfig, create_agent_runtime
from framework.runtime.execution_stream import EntryPointSpec
from framework.runtime.runtime_log_store import RuntimeLogStore
from framework.loader.preload_validation import run_preload_validation
from framework.loader.tool_registry import ToolRegistry
from framework.host.agent_host import AgentHost, AgentRuntimeConfig
from framework.host.execution_manager import EntryPointSpec
from framework.tools.flowchart_utils import generate_fallback_flowchart
logger = logging.getLogger(__name__)
@@ -881,6 +880,172 @@ class ValidationResult:
missing_credentials: list[str] = field(default_factory=list)
def _resolve_template_vars(text: str | None, variables: dict[str, str]) -> str | None:
"""Resolve ``{{variable_name}}`` placeholders in *text*."""
if text is None or not variables:
return text
import re
def _replace(m: re.Match) -> str:
key = m.group(1).strip()
return variables.get(key, m.group(0))
return re.sub(r"\{\{(.+?)\}\}", _replace, text)
def load_agent_config(data: str | dict) -> tuple[GraphSpec, Goal]:
"""Load ``GraphSpec`` and ``Goal`` from a declarative :class:`AgentConfig`.
The declarative format uses a ``name`` key at the top level, unlike the
legacy export format which uses ``graph``/``goal`` keys. The runner
auto-detects the format in :meth:`AgentLoader.load`.
Template variables in ``config.variables`` are resolved in all
``system_prompt`` and ``identity_prompt`` fields via ``{{var_name}}``.
Returns:
Tuple of (GraphSpec, Goal)
"""
from framework.orchestrator.edge import EdgeCondition, EdgeSpec
from framework.orchestrator.goal import Constraint, Goal as GoalModel, SuccessCriterion
from framework.schemas.agent_config import AgentConfig
if isinstance(data, str):
data = json.loads(data)
config = AgentConfig.model_validate(data)
tvars = config.variables
# Build Goal
success_criteria = [
SuccessCriterion(
id=f"sc-{i}",
description=sc,
metric="llm_judge",
target="",
)
for i, sc in enumerate(config.goal.success_criteria)
]
constraints = [
Constraint(
id=f"c-{i}",
description=c,
constraint_type="hard",
category="general",
)
for i, c in enumerate(config.goal.constraints)
]
goal = GoalModel(
id=f"{config.name}-goal",
name=config.name,
description=config.goal.description,
success_criteria=success_criteria,
constraints=constraints,
)
# Build nodes
condition_map = {
"always": EdgeCondition.ALWAYS,
"on_success": EdgeCondition.ON_SUCCESS,
"on_failure": EdgeCondition.ON_FAILURE,
"conditional": EdgeCondition.CONDITIONAL,
"llm_decide": EdgeCondition.LLM_DECIDE,
}
nodes = []
for nc in config.nodes:
# Resolve tool access: node-level config -> agent-level fallback
if nc.tools.policy == "explicit" and nc.tools.allowed:
tools_list = nc.tools.allowed
tool_policy = "explicit"
elif nc.tools.policy == "none":
tools_list = []
tool_policy = "none"
elif nc.tools.policy == "all":
tools_list = []
tool_policy = "all"
else:
# Inherit agent-level tool config
if config.tools.policy == "explicit" and config.tools.allowed:
tools_list = config.tools.allowed
else:
tools_list = []
tool_policy = config.tools.policy
node_kwargs: dict = {
"id": nc.id,
"name": nc.name or nc.id,
"description": nc.description or "",
"node_type": nc.node_type,
"system_prompt": _resolve_template_vars(nc.system_prompt, tvars),
"tools": tools_list,
"tool_access_policy": tool_policy,
"model": nc.model,
"input_keys": nc.input_keys,
"output_keys": nc.output_keys,
"nullable_output_keys": nc.nullable_output_keys,
"max_iterations": nc.max_iterations,
"success_criteria": nc.success_criteria,
"skip_judge": nc.skip_judge,
}
# Optional fields -- only pass when set (avoids overriding defaults)
if nc.client_facing:
node_kwargs["client_facing"] = nc.client_facing
if nc.max_node_visits != 1:
node_kwargs["max_node_visits"] = nc.max_node_visits
if nc.failure_criteria:
node_kwargs["failure_criteria"] = nc.failure_criteria
if nc.max_retries is not None:
node_kwargs["max_retries"] = nc.max_retries
nodes.append(NodeSpec(**node_kwargs))
# Build edges
edges = []
for i, ec in enumerate(config.edges):
edges.append(
EdgeSpec(
id=f"e-{i}-{ec.from_node}-{ec.to_node}",
source=ec.from_node,
target=ec.to_node,
condition=condition_map.get(ec.condition, EdgeCondition.ON_SUCCESS),
condition_expr=ec.condition_expr,
priority=ec.priority,
input_mapping=ec.input_mapping,
)
)
# Build entry_points dict for GraphSpec
entry_points_dict: dict = {}
if config.entry_points:
for ep in config.entry_points:
entry_points_dict[ep.id] = ep.entry_node or config.entry_node
else:
entry_points_dict = {"default": config.entry_node}
# Build GraphSpec
graph_kwargs: dict = {
"id": f"{config.name}-graph",
"goal_id": goal.id,
"version": config.version,
"entry_node": config.entry_node,
"entry_points": entry_points_dict,
"terminal_nodes": config.terminal_nodes,
"pause_nodes": config.pause_nodes,
"nodes": nodes,
"edges": edges,
"max_tokens": config.max_tokens,
"loop_config": dict(config.loop_config),
"conversation_mode": config.conversation_mode,
"identity_prompt": _resolve_template_vars(
config.identity_prompt, tvars
) or "",
}
graph = GraphSpec(**graph_kwargs)
return graph, goal
def load_agent_export(data: str | dict) -> tuple[GraphSpec, Goal]:
"""
Load GraphSpec and Goal from export_graph() output.
@@ -942,7 +1107,7 @@ def load_agent_export(data: str | dict) -> tuple[GraphSpec, Goal]:
)
# Build Goal
from framework.graph.goal import Constraint, SuccessCriterion
from framework.orchestrator.goal import Constraint, SuccessCriterion
success_criteria = []
for sc_data in goal_data.get("success_criteria", []):
@@ -979,7 +1144,7 @@ def load_agent_export(data: str | dict) -> tuple[GraphSpec, Goal]:
return graph, goal
class AgentRunner:
class AgentLoader:
"""
Loads and runs exported agents with minimal boilerplate.
@@ -991,15 +1156,15 @@ class AgentRunner:
Usage:
# Simple usage
runner = AgentRunner.load("exports/outbound-sales-agent")
runner = AgentLoader.load("exports/outbound-sales-agent")
result = await runner.run({"lead_id": "123"})
# With context manager
async with AgentRunner.load("exports/outbound-sales-agent") as runner:
async with AgentLoader.load("exports/outbound-sales-agent") as runner:
result = await runner.run({"lead_id": "123"})
# With custom tools
runner = AgentRunner.load("exports/outbound-sales-agent")
runner = AgentLoader.load("exports/outbound-sales-agent")
runner.register_tool("my_tool", my_tool_func)
result = await runner.run({"lead_id": "123"})
"""
@@ -1027,7 +1192,7 @@ class AgentRunner:
credential_store: Any | None = None,
):
"""
Initialize the runner (use AgentRunner.load() instead).
Initialize the runner (use AgentLoader.load() instead).
Args:
agent_path: Path to agent folder
@@ -1082,7 +1247,7 @@ class AgentRunner:
self._approval_callback: Callable | None = None
# AgentRuntime — unified execution path for all agents
self._agent_runtime: AgentRuntime | None = None
self._agent_runtime: AgentHost | None = None
# Pre-load validation: structural checks + credentials.
# Fails fast with actionable guidance — no MCP noise on screen.
run_preload_validation(
@@ -1101,14 +1266,7 @@ class AgentRunner:
os.environ["HIVE_AGENT_NAME"] = agent_path.name
os.environ["HIVE_STORAGE_PATH"] = str(self._storage_path)
# Auto-discover MCP servers from mcp_servers.json
mcp_config_path = agent_path / "mcp_servers.json"
if mcp_config_path.exists():
self._load_mcp_servers_from_config(mcp_config_path)
# Auto-discover registry-selected MCP servers from mcp_registry.json
self._load_registry_mcp_servers(agent_path)
# MCP tools are loaded by McpRegistryStage in the pipeline during AgentHost.start()
@staticmethod
def _import_agent_module(agent_path: Path):
"""Import an agent package from its directory path.
@@ -1158,7 +1316,7 @@ class AgentRunner:
interactive: bool = True,
skip_credential_validation: bool | None = None,
credential_store: Any | None = None,
) -> "AgentRunner":
) -> "AgentLoader":
"""
Load an agent from an export folder.
@@ -1299,21 +1457,22 @@ class AgentRunner:
runner._agent_skills = agent_skills
return runner
# Fallback: load from agent.json (legacy JSON-based agents)
# Fallback: load from agent.json (declarative config)
agent_json_path = agent_path / "agent.json"
if not agent_json_path.is_file():
raise FileNotFoundError(f"No agent.py or agent.json found in {agent_path}")
with open(agent_json_path, encoding="utf-8") as f:
export_data = f.read()
export_data = agent_json_path.read_text(encoding="utf-8")
if not export_data.strip():
raise ValueError(f"Empty agent export file: {agent_json_path}")
raise ValueError(f"Empty agent.json: {agent_json_path}")
try:
graph, goal = load_agent_export(export_data)
except json.JSONDecodeError as exc:
raise ValueError(f"Invalid JSON in agent export file: {agent_json_path}") from exc
parsed = json.loads(export_data)
graph, goal = load_agent_config(parsed)
logger.info(
"Loaded declarative agent config from agent.json (name=%s)",
parsed.get("name"),
)
# Generate flowchart.json if missing (for legacy JSON-based agents)
generate_fallback_flowchart(graph, goal, agent_path)
@@ -1396,60 +1555,6 @@ class AgentRunner:
}
return self._tool_registry.register_mcp_server(server_config)
def _load_mcp_servers_from_config(self, config_path: Path) -> None:
"""Load and register MCP servers from a configuration file."""
self._tool_registry.load_mcp_config(config_path)
def _load_registry_mcp_servers(self, agent_path: Path) -> None:
"""Load and register MCP servers selected via ``mcp_registry.json``."""
registry_json = agent_path / "mcp_registry.json"
if registry_json.is_file():
self._tool_registry.set_mcp_registry_agent_path(agent_path)
else:
self._tool_registry.set_mcp_registry_agent_path(None)
from framework.runner.mcp_registry import MCPRegistry
try:
registry = MCPRegistry()
registry.initialize()
server_configs, selection_max_tools = registry.load_agent_selection(agent_path)
except Exception as exc:
logger.warning(
"Failed to load MCP registry servers for '%s': %s",
agent_path.name,
exc,
)
return
if not server_configs:
return
results = self._tool_registry.load_registry_servers(
server_configs,
preserve_existing_tools=True,
log_collisions=True,
max_tools=selection_max_tools,
)
loaded = [result for result in results if result["status"] == "loaded"]
skipped = [result for result in results if result["status"] != "loaded"]
logger.info(
"Loaded %d/%d MCP registry server(s) for agent '%s'",
len(loaded),
len(results),
agent_path.name,
)
if skipped:
logger.info(
"Skipped MCP registry servers for agent '%s': %s",
agent_path.name,
[
{"server": result["server"], "reason": result["skipped_reason"]}
for result in skipped
],
)
def set_approval_callback(self, callback: Callable) -> None:
"""
Set a callback for human-in-the-loop approval during execution.
@@ -1460,272 +1565,119 @@ class AgentRunner:
self._approval_callback = callback
def _setup(self, event_bus=None) -> None:
"""Set up runtime, LLM, and executor."""
# Configure structured logging (auto-detects JSON vs human-readable)
"""Set up runtime via pipeline stages.
Builds a pipeline with the default stages (LLM, credentials, MCP,
skills) and passes it to AgentHost. The stages initialize during
``AgentHost.start()`` and inject tools/LLM/credentials/skills.
"""
from framework.observability import configure_logging
from framework.pipeline.stages.credential_resolver import CredentialResolverStage
from framework.pipeline.stages.llm_provider import LlmProviderStage
from framework.pipeline.stages.mcp_registry import McpRegistryStage
from framework.pipeline.stages.skill_registry import SkillRegistryStage
from framework.skills.config import SkillsConfig
configure_logging(level="INFO", format="auto")
# Set up session context for tools (agent_id)
# Set up session context for tools
agent_id = self.graph.id or "unknown"
self._tool_registry.set_session_context(agent_id=agent_id)
self._tool_registry.set_session_context(
agent_id=agent_id,
)
# Read MCP server refs from agent.json
mcp_refs = []
agent_json = self.agent_path / "agent.json"
if agent_json.exists():
try:
import json as _json
# Create LLM provider
# Uses LiteLLM which auto-detects the provider from model name
# Skip if already injected (e.g. worker agents with a pre-built LLM)
if self._llm is not None:
pass # LLM already configured externally
elif self.mock_mode:
# Use mock LLM for testing without real API calls
from framework.llm.mock import MockLLMProvider
data = _json.loads(agent_json.read_text(encoding="utf-8"))
mcp_refs = data.get("mcp_servers", [])
except Exception:
pass
self._llm = MockLLMProvider(model=self.model)
else:
from framework.llm.litellm import LiteLLMProvider
# Check if a subscription mode is configured
config = get_hive_config()
llm_config = config.get("llm", {})
use_claude_code = llm_config.get("use_claude_code_subscription", False)
use_codex = llm_config.get("use_codex_subscription", False)
use_kimi_code = llm_config.get("use_kimi_code_subscription", False)
use_antigravity = llm_config.get("use_antigravity_subscription", False)
api_base = llm_config.get("api_base")
api_key = None
if use_claude_code:
# Get OAuth token from Claude Code subscription
api_key = get_claude_code_token()
if not api_key:
logger.warning(
"Claude Code subscription configured but no token found. "
"Run 'claude' to authenticate, then try again."
)
elif use_codex:
# Get OAuth token from Codex subscription
api_key = get_codex_token()
if not api_key:
logger.warning(
"Codex subscription configured but no token found. "
"Run 'codex' to authenticate, then try again."
)
elif use_kimi_code:
# Get API key from Kimi Code CLI config (~/.kimi/config.toml)
api_key = get_kimi_code_token()
if not api_key:
logger.warning(
"Kimi Code subscription configured but no key found. "
"Run 'kimi /login' to authenticate, then try again."
)
elif use_antigravity:
pass # AntigravityProvider handles credentials internally
if api_key and use_claude_code:
# Use litellm's built-in Anthropic OAuth support.
# The lowercase "authorization" key triggers OAuth detection which
# adds the required anthropic-beta and browser-access headers.
self._llm = LiteLLMProvider(
model=self.model,
api_key=api_key,
api_base=api_base,
extra_headers={"authorization": f"Bearer {api_key}"},
)
elif api_key and use_codex:
# OpenAI Codex subscription routes through the ChatGPT backend
# (chatgpt.com/backend-api/codex/responses), NOT the standard
# OpenAI API. The consumer OAuth token lacks platform API scopes.
extra_headers: dict[str, str] = {
"Authorization": f"Bearer {api_key}",
"User-Agent": "CodexBar",
}
account_id = get_codex_account_id()
if account_id:
extra_headers["ChatGPT-Account-Id"] = account_id
self._llm = LiteLLMProvider(
model=self.model,
api_key=api_key,
api_base="https://chatgpt.com/backend-api/codex",
extra_headers=extra_headers,
store=False,
allowed_openai_params=["store"],
)
elif api_key and use_kimi_code:
# Kimi Code subscription uses the Kimi coding API (OpenAI-compatible).
# The api_base is set automatically by LiteLLMProvider for kimi/ models.
self._llm = LiteLLMProvider(
model=self.model,
api_key=api_key,
api_base=api_base,
)
elif use_antigravity:
# Direct OAuth to Google's internal Cloud Code Assist gateway.
# No local proxy required — AntigravityProvider handles token
# refresh and Gemini-format request/response conversion natively.
from framework.llm.antigravity import AntigravityProvider # noqa: PLC0415
provider = AntigravityProvider(model=self.model)
if not provider.has_credentials():
print(
"Warning: Antigravity credentials not found. "
"Run: uv run python core/antigravity_auth.py auth account add"
)
self._llm = provider
else:
# Local models (e.g. Ollama) don't need an API key
if self._is_local_model(self.model):
self._llm = LiteLLMProvider(
model=self.model,
api_base=api_base,
)
else:
# Fall back to environment variable
# First check api_key_env_var from config (set by quickstart)
api_key_env = llm_config.get("api_key_env_var") or self._get_api_key_env_var(
self.model
)
if api_key_env and os.environ.get(api_key_env):
self._llm = LiteLLMProvider(
model=self.model,
api_key=os.environ[api_key_env],
api_base=api_base,
)
else:
# Fall back to credential store
api_key = self._get_api_key_from_credential_store()
if api_key:
self._llm = LiteLLMProvider(
model=self.model, api_key=api_key, api_base=api_base
)
# Set env var so downstream code (e.g. cleanup LLM in
# node._extract_json) can also find it
if api_key_env:
os.environ[api_key_env] = api_key
elif api_key_env:
logger.warning(
"%s not set. LLM calls will fail. "
"Set it with: export %s=your-api-key",
api_key_env,
api_key_env,
)
# Fail fast if the agent needs an LLM but none was configured
if self._llm is None:
has_llm_nodes = any(
node.node_type in ("event_loop", "gcu") for node in self.graph.nodes
)
if has_llm_nodes:
from framework.credentials.models import CredentialError
if self._is_local_model(self.model):
raise CredentialError(
f"Failed to initialize LLM for local model '{self.model}'. "
f"Ensure your local LLM server is running "
f"(e.g. 'ollama serve' for Ollama)."
)
api_key_env = self._get_api_key_env_var(self.model)
hint = (
f"Set it with: export {api_key_env}=your-api-key"
if api_key_env
else "Configure an API key for your LLM provider."
)
raise CredentialError(f"LLM API key not found for model '{self.model}'. {hint}")
# For GCU nodes: auto-register GCU MCP server if needed, then expand tool lists
has_gcu_nodes = any(node.node_type == "gcu" for node in self.graph.nodes)
if has_gcu_nodes:
from framework.graph.gcu import GCU_MCP_SERVER_CONFIG, GCU_SERVER_NAME
# Auto-register GCU MCP server if tools aren't loaded yet
gcu_tool_names = self._tool_registry.get_server_tool_names(GCU_SERVER_NAME)
if not gcu_tool_names:
# Resolve cwd to repo-level tools/ (not relative to agent_path)
gcu_config = dict(GCU_MCP_SERVER_CONFIG)
_repo_root = Path(__file__).resolve().parent.parent.parent.parent
gcu_config["cwd"] = str(_repo_root / "tools")
self._tool_registry.register_mcp_server(gcu_config)
gcu_tool_names = self._tool_registry.get_server_tool_names(GCU_SERVER_NAME)
# Expand each GCU node's tools list to include all GCU server tools
if gcu_tool_names:
for node in self.graph.nodes:
if node.node_type == "gcu":
existing = set(node.tools)
for tool_name in sorted(gcu_tool_names):
if tool_name not in existing:
node.tools.append(tool_name)
# For event_loop/gcu nodes: auto-register file tools MCP server, then expand tool lists
has_loop_nodes = any(node.node_type in ("event_loop", "gcu") for node in self.graph.nodes)
if has_loop_nodes:
from framework.graph.files import FILES_MCP_SERVER_CONFIG, FILES_MCP_SERVER_NAME
files_tool_names = self._tool_registry.get_server_tool_names(FILES_MCP_SERVER_NAME)
if not files_tool_names:
# Resolve cwd to repo-level tools/ (not relative to agent_path)
files_config = dict(FILES_MCP_SERVER_CONFIG)
_repo_root = Path(__file__).resolve().parent.parent.parent.parent
files_config["cwd"] = str(_repo_root / "tools")
self._tool_registry.register_mcp_server(files_config)
files_tool_names = self._tool_registry.get_server_tool_names(FILES_MCP_SERVER_NAME)
if files_tool_names:
for node in self.graph.nodes:
if node.node_type in ("event_loop", "gcu"):
existing = set(node.tools)
for tool_name in sorted(files_tool_names):
if tool_name not in existing:
node.tools.append(tool_name)
# Get tools for runtime
tools = list(self._tool_registry.get_tools().values())
tool_executor = self._tool_registry.get_executor()
# Collect connected account info for system prompt injection
accounts_prompt = ""
accounts_data: list[dict] | None = None
tool_provider_map: dict[str, str] | None = None
try:
from aden_tools.credentials.store_adapter import CredentialStoreAdapter
if self._credential_store is not None:
adapter = CredentialStoreAdapter(store=self._credential_store)
else:
adapter = CredentialStoreAdapter.default()
accounts_data = adapter.get_all_account_info()
tool_provider_map = adapter.get_tool_provider_map()
if accounts_data:
from framework.graph.prompting import build_accounts_prompt
accounts_prompt = build_accounts_prompt(accounts_data, tool_provider_map)
except Exception:
pass # Best-effort — agent works without account info
# Skill configuration — the runtime handles discovery, loading, trust-gating and
# prompt rasterization. The runner just builds the config.
from framework.skills.config import SkillsConfig
from framework.skills.manager import SkillsManagerConfig
skills_manager_config = SkillsManagerConfig(
skills_config=SkillsConfig.from_agent_vars(
default_skills=getattr(self, "_agent_default_skills", None),
skills=getattr(self, "_agent_skills", None),
# Build default pipeline stages
# Default infrastructure stages (always present)
pipeline_stages = [
LlmProviderStage(
model=self.model,
mock_mode=self.mock_mode,
llm=self._llm,
),
project_root=self.agent_path,
interactive=self._interactive,
)
CredentialResolverStage(
credential_store=self._credential_store,
),
McpRegistryStage(
server_refs=mcp_refs,
agent_path=self.agent_path,
tool_registry=self._tool_registry,
),
SkillRegistryStage(
project_root=self.agent_path,
interactive=self._interactive,
skills_config=SkillsConfig.from_agent_vars(
default_skills=getattr(self, "_agent_default_skills", None),
skills=getattr(self, "_agent_skills", None),
),
),
]
self._setup_agent_runtime(
tools,
tool_executor,
accounts_prompt=accounts_prompt,
accounts_data=accounts_data,
tool_provider_map=tool_provider_map,
# Merge user-configured stages from ~/.hive/configuration.json
from framework.config import get_hive_config
from framework.pipeline.registry import build_pipeline_from_config
hive_config = get_hive_config()
user_stages_config = hive_config.get("pipeline", {}).get("stages", [])
if user_stages_config:
user_pipeline = build_pipeline_from_config(user_stages_config)
pipeline_stages.extend(user_pipeline.stages)
# Merge agent-level overrides from agent.json pipeline field
if agent_json.exists():
try:
agent_pipeline = (
_json.loads(agent_json.read_text(encoding="utf-8"))
.get("pipeline", {})
.get("stages", [])
)
if agent_pipeline:
agent_stages = build_pipeline_from_config(agent_pipeline)
pipeline_stages.extend(agent_stages.stages)
except Exception:
pass
# Create AgentHost directly (no wrapper)
from framework.host.execution_manager import EntryPointSpec
from framework.orchestrator.checkpoint_config import CheckpointConfig
from framework.tracker.runtime_log_store import RuntimeLogStore
self._agent_runtime = AgentHost(
graph=self.graph,
goal=self.goal,
storage_path=self._storage_path,
runtime_log_store=RuntimeLogStore(
base_path=self._storage_path / "runtime_logs",
),
checkpoint_config=CheckpointConfig(
enabled=True,
checkpoint_on_node_complete=True,
checkpoint_max_age_days=7,
async_checkpoint=True,
),
graph_id=self.graph.id or self.agent_path.name,
event_bus=event_bus,
skills_manager_config=skills_manager_config,
pipeline_stages=pipeline_stages,
)
self._agent_runtime.register_entry_point(
EntryPointSpec(
id="default",
name="Default",
entry_node=self.graph.entry_node,
trigger_type="manual",
isolation_level="shared",
),
)
self._agent_runtime.intro_message = self.intro_message
def _get_api_key_env_var(self, model: str) -> str | None:
"""Get the environment variable name for the API key based on model name."""
@@ -1833,83 +1785,6 @@ class AgentRunner:
)
return model.lower().startswith(LOCAL_PREFIXES)
def _setup_agent_runtime(
self,
tools: list,
tool_executor: Callable | None,
accounts_prompt: str = "",
accounts_data: list[dict] | None = None,
tool_provider_map: dict[str, str] | None = None,
event_bus=None,
skills_catalog_prompt: str = "",
protocols_prompt: str = "",
skill_dirs: list[str] | None = None,
skills_manager_config=None,
) -> None:
"""Set up multi-entry-point execution using AgentRuntime."""
entry_points = []
# Always create a primary entry point for the graph's entry node.
# For multi-entry-point agents this ensures the primary path (e.g.
# user-facing rule setup) is reachable alongside async entry points.
if self.graph.entry_node:
entry_points.insert(
0,
EntryPointSpec(
id="default",
name="Default",
entry_node=self.graph.entry_node,
trigger_type="manual",
isolation_level="shared",
),
)
# Create AgentRuntime with all entry points
log_store = RuntimeLogStore(base_path=self._storage_path / "runtime_logs")
# Enable checkpointing by default for resumable sessions
from framework.graph.checkpoint_config import CheckpointConfig
checkpoint_config = CheckpointConfig(
enabled=True,
checkpoint_on_node_start=False, # Only checkpoint after nodes complete
checkpoint_on_node_complete=True,
checkpoint_max_age_days=7,
async_checkpoint=True, # Non-blocking
)
# Handle runtime_config - only pass through if it's actually an AgentRuntimeConfig.
# Agents may export a RuntimeConfig (LLM settings) or queen-generated custom classes
# that would crash AgentRuntime if passed through.
runtime_config = None
if self.runtime_config is not None:
from framework.runtime.agent_runtime import AgentRuntimeConfig
if isinstance(self.runtime_config, AgentRuntimeConfig):
runtime_config = self.runtime_config
self._agent_runtime = create_agent_runtime(
graph=self.graph,
goal=self.goal,
storage_path=self._storage_path,
entry_points=entry_points,
llm=self._llm,
tools=tools,
tool_executor=tool_executor,
runtime_log_store=log_store,
checkpoint_config=checkpoint_config,
config=runtime_config,
graph_id=self.graph.id or self.agent_path.name,
accounts_prompt=accounts_prompt,
accounts_data=accounts_data,
tool_provider_map=tool_provider_map,
event_bus=event_bus,
skills_manager_config=skills_manager_config,
)
# Pass intro_message through for TUI display
self._agent_runtime.intro_message = self.intro_message
# ------------------------------------------------------------------
# Execution modes
#
@@ -1990,7 +1865,7 @@ class AgentRunner:
sub_ids: list[str] = []
if has_queen and sys.stdin.isatty():
from framework.runtime.event_bus import EventType
from framework.host.event_bus import EventType
runtime = self._agent_runtime
@@ -2246,7 +2121,7 @@ class AgentRunner:
except ImportError:
# aden_tools not installed - fall back to direct check
has_llm_nodes = any(
node.node_type in ("event_loop", "gcu") for node in self.graph.nodes
node.node_type == "event_loop" for node in self.graph.nodes
)
if has_llm_nodes:
api_key_env = self._get_api_key_env_var(self.model)
@@ -2283,7 +2158,7 @@ class AgentRunner:
# Run synchronous cleanup
self.cleanup()
async def __aenter__(self) -> "AgentRunner":
async def __aenter__(self) -> "AgentLoader":
"""Context manager entry."""
self._setup()
if self._agent_runtime is not None:
@@ -19,7 +19,7 @@ def register_commands(subparsers: argparse._SubParsersAction) -> None:
run_parser.add_argument(
"agent_path",
type=str,
help="Path to agent folder (containing agent.json)",
help="Path to agent folder (containing agent.json or agent.py)",
)
run_parser.add_argument(
"--input",
@@ -87,7 +87,7 @@ def register_commands(subparsers: argparse._SubParsersAction) -> None:
info_parser.add_argument(
"agent_path",
type=str,
help="Path to agent folder (containing agent.json)",
help="Path to agent folder (containing agent.json or agent.py)",
)
info_parser.add_argument(
"--json",
@@ -105,7 +105,7 @@ def register_commands(subparsers: argparse._SubParsersAction) -> None:
validate_parser.add_argument(
"agent_path",
type=str,
help="Path to agent folder (containing agent.json)",
help="Path to agent folder (containing agent.json or agent.py)",
)
validate_parser.set_defaults(func=cmd_validate)
@@ -310,7 +310,7 @@ def _prompt_before_start(agent_path: str, runner, model: str | None = None):
Updated runner if user proceeds, None if user aborts.
"""
from framework.credentials.setup import CredentialSetupSession
from framework.runner import AgentRunner
from framework.loader import AgentLoader
while True:
print()
@@ -328,7 +328,7 @@ def _prompt_before_start(agent_path: str, runner, model: str | None = None):
if result.success:
# Reload runner with updated credentials
try:
runner = AgentRunner.load(agent_path, model=model)
runner = AgentLoader.load(agent_path, model=model)
except Exception as e:
print(f"Error reloading agent: {e}")
return None
@@ -342,7 +342,7 @@ def cmd_run(args: argparse.Namespace) -> int:
from framework.credentials.models import CredentialError
from framework.observability import configure_logging
from framework.runner import AgentRunner
from framework.loader import AgentLoader
# Set logging level (quiet by default for cleaner output)
if args.quiet:
@@ -390,7 +390,7 @@ def cmd_run(args: argparse.Namespace) -> int:
# Standard execution
# AgentRunner handles credential setup interactively when stdin is a TTY.
try:
runner = AgentRunner.load(
runner = AgentLoader.load(
args.agent_path,
model=args.model,
)
@@ -528,10 +528,10 @@ def cmd_run(args: argparse.Namespace) -> int:
def cmd_info(args: argparse.Namespace) -> int:
"""Show agent information."""
from framework.credentials.models import CredentialError
from framework.runner import AgentRunner
from framework.loader import AgentLoader
try:
runner = AgentRunner.load(args.agent_path)
runner = AgentLoader.load(args.agent_path)
except CredentialError as e:
print(f"\n{e}", file=sys.stderr)
return 1
@@ -595,10 +595,10 @@ def cmd_info(args: argparse.Namespace) -> int:
def cmd_validate(args: argparse.Namespace) -> int:
"""Validate an exported agent."""
from framework.credentials.models import CredentialError
from framework.runner import AgentRunner
from framework.loader import AgentLoader
try:
runner = AgentRunner.load(args.agent_path)
runner = AgentLoader.load(args.agent_path)
except CredentialError as e:
print(f"\n{e}", file=sys.stderr)
return 1
@@ -632,7 +632,7 @@ def cmd_validate(args: argparse.Namespace) -> int:
def cmd_list(args: argparse.Namespace) -> int:
"""List available agents."""
from framework.runner import AgentRunner
from framework.loader import AgentLoader
directory = Path(args.directory)
if not directory.exists():
@@ -644,7 +644,7 @@ def cmd_list(args: argparse.Namespace) -> int:
for path in directory.iterdir():
if _is_valid_agent_dir(path):
try:
runner = AgentRunner.load(path)
runner = AgentLoader.load(path)
info = runner.info()
agents.append(
{
@@ -686,7 +686,7 @@ def cmd_list(args: argparse.Namespace) -> int:
def _interactive_approval(request):
"""Interactive approval callback for HITL mode."""
from framework.graph import ApprovalDecision, ApprovalResult
from framework.orchestrator import ApprovalDecision, ApprovalResult
print()
print("=" * 60)
@@ -775,7 +775,7 @@ def cmd_shell(args: argparse.Namespace) -> int:
from framework.credentials.models import CredentialError
from framework.observability import configure_logging
from framework.runner import AgentRunner
from framework.loader import AgentLoader
configure_logging(level="INFO")
@@ -789,7 +789,7 @@ def cmd_shell(args: argparse.Namespace) -> int:
return 1
try:
runner = AgentRunner.load(agent_path)
runner = AgentLoader.load(agent_path)
except CredentialError as e:
print(f"\n{e}", file=sys.stderr)
return 1
@@ -1004,17 +1004,35 @@ def _get_framework_agents_dir() -> Path:
def _extract_python_agent_metadata(agent_path: Path) -> tuple[str, str]:
"""Extract name and description from a Python-based agent's config.py.
"""Extract name and description from an agent directory.
Uses AST parsing to safely extract values without executing code.
Checks agent.json first (declarative), then falls back to config.py
(legacy Python). Uses AST parsing for Python to avoid executing code.
Returns (name, description) tuple, with fallbacks if parsing fails.
"""
import ast
config_path = agent_path / "config.py"
fallback_name = agent_path.name.replace("_", " ").title()
fallback_desc = "(Python-based agent)"
# Declarative agent: read from agent.json
agent_json = agent_path / "agent.json"
if agent_json.exists():
try:
import json
data = json.loads(agent_json.read_text(encoding="utf-8"))
if isinstance(data, dict):
name = data.get("name", fallback_name)
# Convert kebab-case to Title Case for display
if "-" in name and " " not in name:
name = name.replace("-", " ").title()
desc = data.get("description", fallback_desc)
return name, desc
except Exception:
pass
config_path = agent_path / "config.py"
if not config_path.exists():
return fallback_name, fallback_desc
@@ -1083,7 +1101,7 @@ def _is_valid_agent_dir(path: Path) -> bool:
def _has_agents(directory: Path) -> bool:
"""Check if a directory contains any valid agents (folders with agent.json or agent.py)."""
"""Check if a directory contains any valid agents."""
if not directory.exists():
return False
return any(_is_valid_agent_dir(p) for p in directory.iterdir())
@@ -14,7 +14,7 @@ from typing import Any, Literal
import httpx
from framework.runner.mcp_errors import MCPToolNotFoundError
from framework.loader.mcp_errors import MCPToolNotFoundError
logger = logging.getLogger(__name__)
@@ -5,7 +5,7 @@ import threading
import httpx
from framework.runner.mcp_client import MCPClient, MCPServerConfig
from framework.loader.mcp_client import MCPClient, MCPServerConfig
logger = logging.getLogger(__name__)
@@ -14,9 +14,9 @@ from typing import Any, Literal
import httpx
from framework.runner.mcp_client import MCPClient, MCPServerConfig
from framework.runner.mcp_connection_manager import MCPConnectionManager
from framework.runner.mcp_errors import (
from framework.loader.mcp_client import MCPClient, MCPServerConfig
from framework.loader.mcp_connection_manager import MCPConnectionManager
from framework.loader.mcp_errors import (
MCPError,
MCPErrorCode,
MCPInstallError,
@@ -28,7 +28,7 @@ from typing import Any
def _get_registry(base_path: Path | None = None):
"""Initialize and return an MCPRegistry instance."""
from framework.runner.mcp_registry import MCPRegistry
from framework.loader.mcp_registry import MCPRegistry
registry = MCPRegistry(base_path=base_path)
registry.initialize()
@@ -11,8 +11,8 @@ from dataclasses import dataclass, field
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from framework.graph.edge import GraphSpec
from framework.graph.node import NodeSpec
from framework.orchestrator.edge import GraphSpec
from framework.orchestrator.node import NodeSpec
logger = logging.getLogger(__name__)
@@ -262,15 +262,21 @@ class ToolRegistry:
is_error=False,
)
registry_ref = self
def executor(tool_use: ToolUse) -> ToolResult:
if tool_use.name not in self._tools:
# Check if credential files changed (lightweight dir listing).
# If new OAuth tokens appeared, restarts MCP servers to pick them up.
registry_ref.resync_mcp_servers_if_needed()
if tool_use.name not in registry_ref._tools:
return ToolResult(
tool_use_id=tool_use.id,
content=json.dumps({"error": f"Unknown tool: {tool_use.name}"}),
is_error=True,
)
registered = self._tools[tool_use.name]
registered = registry_ref._tools[tool_use.name]
try:
result = registered.executor(tool_use.input)
@@ -635,8 +641,8 @@ class ToolRegistry:
Number of tools registered from this server
"""
try:
from framework.runner.mcp_client import MCPClient, MCPServerConfig
from framework.runner.mcp_connection_manager import MCPConnectionManager
from framework.loader.mcp_client import MCPClient, MCPServerConfig
from framework.loader.mcp_connection_manager import MCPConnectionManager
# Build config object
config = MCPServerConfig(
@@ -883,7 +889,7 @@ class ToolRegistry:
"""Re-run ``mcp_registry.json`` resolution and register servers (post-resync)."""
if self._mcp_registry_agent_path is None:
return
from framework.runner.mcp_registry import MCPRegistry
from framework.loader.mcp_registry import MCPRegistry
try:
reg = MCPRegistry()
@@ -922,6 +928,11 @@ class ToolRegistry:
clients and re-loads them so the new subprocess picks up the fresh
credentials.
Note: Individual credential TTL/refresh is handled by the MCP server
process internally -- it resolves tokens from the credential store
on every tool call, not at startup. This method only handles the case
where entirely new credential files appear.
Returns True if a resync was performed, False otherwise.
"""
if not self._mcp_clients or self._mcp_config_path is None:
@@ -975,7 +986,7 @@ class ToolRegistry:
server_name = self._mcp_client_servers.get(client_id, client.config.name)
try:
if client_id in self._mcp_managed_clients:
from framework.runner.mcp_connection_manager import MCPConnectionManager
from framework.loader.mcp_connection_manager import MCPConnectionManager
MCPConnectionManager.get_instance().release(server_name)
else:
+27
View File
@@ -0,0 +1,27 @@
"""Orchestrator layer -- how agents are composed via graphs.
Lazy imports to avoid circular dependencies with graph/event_loop/*.
"""
def __getattr__(name: str):
if name in ("GraphContext",):
from framework.orchestrator.context import GraphContext
return GraphContext
if name in ("DEFAULT_MAX_TOKENS", "EdgeCondition", "EdgeSpec", "GraphSpec"):
from framework.orchestrator import edge as _e
return getattr(_e, name)
if name in ("Orchestrator", "ExecutionResult"):
from framework.orchestrator import orchestrator as _o
return getattr(_o, name)
if name in ("Constraint", "Goal", "GoalStatus", "SuccessCriterion"):
from framework.orchestrator import goal as _g
return getattr(_g, name)
if name in ("DataBuffer", "NodeContext", "NodeProtocol", "NodeResult", "NodeSpec"):
from framework.orchestrator import node as _n
return getattr(_n, name)
if name in ("NodeWorker", "Activation", "FanOutTag", "FanOutTracker",
"WorkerCompletion", "WorkerLifecycle"):
from framework.orchestrator import node_worker as _nw
return getattr(_nw, name)
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
@@ -16,7 +16,7 @@ from collections.abc import AsyncIterator
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from framework.runtime.event_bus import EventBus
from framework.host.event_bus import EventBus
logger = logging.getLogger(__name__)
@@ -13,10 +13,10 @@ import asyncio
from dataclasses import dataclass, field
from typing import Any
from framework.graph.edge import GraphSpec
from framework.graph.goal import Goal
from framework.graph.node import DataBuffer, NodeContext, NodeProtocol, NodeSpec
from framework.runtime.core import Runtime
from framework.orchestrator.edge import GraphSpec
from framework.orchestrator.goal import Goal
from framework.orchestrator.node import DataBuffer, NodeContext, NodeProtocol, NodeSpec
from framework.tracker.decision_tracker import DecisionTracker
@dataclass
@@ -26,7 +26,7 @@ class GraphContext:
graph: GraphSpec
goal: Goal
buffer: DataBuffer
runtime: Runtime
runtime: DecisionTracker
llm: Any # LLMProvider
tools: list[Any] # list[Tool]
tool_executor: Any # Callable
@@ -106,7 +106,7 @@ def build_node_accounts_prompt(
resolved = accounts_prompt
if accounts_data and tool_provider_map:
from framework.graph.prompting import build_accounts_prompt
from framework.orchestrator.prompting import build_accounts_prompt
filtered = build_accounts_prompt(
accounts_data,
@@ -125,11 +125,27 @@ def _resolve_available_tools(
tools: list[Any],
override_tools: list[Any] | None,
) -> list[Any]:
"""Select tools available to the current node."""
"""Select tools available to the current node.
Respects ``node_spec.tool_access_policy``:
- ``"all"`` -- all tools from the registry (no filtering).
- ``"explicit"`` -- only tools whose name appears in ``node_spec.tools``.
If the list is empty, **no tools** are given (default-deny).
- ``"none"`` -- no tools at all.
"""
if override_tools is not None:
return list(override_tools)
policy = getattr(node_spec, "tool_access_policy", "explicit")
if policy == "none":
return []
if policy == "all":
return list(tools)
# "explicit" (default): only tools named in node_spec.tools.
if not node_spec.tools:
return []
@@ -149,7 +165,7 @@ def _derive_input_data(buffer: DataBuffer, input_keys: list[str]) -> dict[str, A
def build_node_context(
*,
runtime: Runtime,
runtime: DecisionTracker,
node_spec: NodeSpec,
buffer: DataBuffer,
goal: Goal,
@@ -234,9 +250,6 @@ def build_node_context(
execution_id=execution_id,
run_id=run_id,
stream_id=stream_id,
node_registry=node_registry or {},
all_tools=list(all_tools or tools),
shared_node_registry=shared_node_registry or {},
dynamic_tools_provider=dynamic_tools_provider,
dynamic_prompt_provider=dynamic_prompt_provider,
dynamic_memory_provider=dynamic_memory_provider,
@@ -308,9 +321,6 @@ def build_node_context_from_graph_context(
execution_id=gc.execution_id,
run_id=gc.run_id,
stream_id=gc.stream_id,
node_registry=node_registry or gc.node_spec_registry,
all_tools=gc.tools,
shared_node_registry=gc.node_registry,
dynamic_tools_provider=gc.dynamic_tools_provider,
dynamic_prompt_provider=gc.dynamic_prompt_provider,
dynamic_memory_provider=gc.dynamic_memory_provider,
@@ -6,10 +6,10 @@ import logging
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any
from framework.graph.conversation import _try_extract_key
from framework.agent_loop.conversation import _try_extract_key
if TYPE_CHECKING:
from framework.graph.conversation import NodeConversation
from framework.agent_loop.conversation import NodeConversation
from framework.llm.provider import LLMProvider
logger = logging.getLogger(__name__)
@@ -15,7 +15,7 @@ import logging
from dataclasses import dataclass
from typing import Any
from framework.graph.conversation import NodeConversation
from framework.agent_loop.conversation import NodeConversation
from framework.llm.provider import LLMProvider
logger = logging.getLogger(__name__)
@@ -29,7 +29,7 @@ from typing import Any
from pydantic import BaseModel, Field, model_validator
from framework.graph.safe_eval import safe_eval
from framework.orchestrator.safe_eval import safe_eval
logger = logging.getLogger(__name__)
@@ -538,13 +538,6 @@ class GraphSpec(BaseModel):
for edge in self.get_outgoing_edges(current):
to_visit.append(edge.target)
# Also mark sub-agents as reachable (they're invoked via delegate_to_sub_agent, not edges)
for node in self.nodes:
if node.id in reachable:
sub_agents = getattr(node, "sub_agents", []) or []
for sub_agent_id in sub_agents:
reachable.add(sub_agent_id)
for node in self.nodes:
if node.id not in reachable:
# Skip if node is a pause node or entry point target
@@ -583,48 +576,4 @@ class GraphSpec(BaseModel):
else:
seen_keys[key] = node_id
# GCU nodes must only be used as subagents
gcu_node_ids = {n.id for n in self.nodes if n.node_type == "gcu"}
if gcu_node_ids:
# GCU nodes must not be entry nodes
if self.entry_node in gcu_node_ids:
errors.append(
f"GCU node '{self.entry_node}' is used as entry node. "
"GCU nodes must only be used as subagents via delegate_to_sub_agent()."
)
# GCU nodes must not be terminal nodes
for term in self.terminal_nodes:
if term in gcu_node_ids:
errors.append(
f"GCU node '{term}' is used as terminal node. "
"GCU nodes must only be used as subagents."
)
# GCU nodes must not be connected via edges
for edge in self.edges:
if edge.source in gcu_node_ids:
errors.append(
f"GCU node '{edge.source}' is used as edge source (edge '{edge.id}'). "
"GCU nodes must only be used as subagents, not connected via edges."
)
if edge.target in gcu_node_ids:
errors.append(
f"GCU node '{edge.target}' is used as edge target (edge '{edge.id}'). "
"GCU nodes must only be used as subagents, not connected via edges."
)
# GCU nodes must be referenced in at least one parent's sub_agents
referenced_subagents = set()
for node in self.nodes:
for sa_id in node.sub_agents or []:
referenced_subagents.add(sa_id)
orphaned = gcu_node_ids - referenced_subagents
for nid in orphaned:
errors.append(
f"GCU node '{nid}' is not referenced in any node's sub_agents list. "
"GCU nodes must be declared as subagents of a parent node."
)
return {"errors": errors, "warnings": warnings}
@@ -1,34 +1,14 @@
"""GCU (browser automation) node type constants.
"""Browser automation best-practices prompt.
A ``gcu`` node is an ``event_loop`` node with two automatic enhancements:
1. A canonical browser best-practices system prompt is prepended.
2. All tools from the GCU MCP server are auto-included.
This module provides ``GCU_BROWSER_SYSTEM_PROMPT`` -- a canonical set of
browser automation guidelines that can be included in any node's system
prompt that uses browser tools from the gcu-tools MCP server.
No new ``NodeProtocol`` subclass the ``gcu`` type is purely a declarative
signal processed by the runner and executor at setup time.
Browser tools are registered via the global MCP registry (gcu-tools).
Nodes that need browser access declare ``tools: {policy: "all"}`` in their
agent.json config.
"""
# ---------------------------------------------------------------------------
# MCP server identity
# ---------------------------------------------------------------------------
GCU_SERVER_NAME = "gcu-tools"
"""Name used to identify the GCU MCP server in ``mcp_servers.json``."""
GCU_MCP_SERVER_CONFIG: dict = {
"name": GCU_SERVER_NAME,
"transport": "stdio",
"command": "uv",
"args": ["run", "python", "-m", "gcu.server", "--stdio"],
"cwd": "../../tools",
"description": "GCU tools for browser automation",
}
"""Default stdio config for the GCU MCP server (relative to exports/<agent>/)."""
# ---------------------------------------------------------------------------
# Browser best-practices system prompt
# ---------------------------------------------------------------------------
GCU_BROWSER_SYSTEM_PROMPT = """\
# Browser Automation Best Practices
@@ -25,7 +25,7 @@ from typing import Any
from pydantic import BaseModel, Field
from framework.llm.provider import LLMProvider, Tool
from framework.runtime.core import Runtime
from framework.tracker.decision_tracker import DecisionTracker
logger = logging.getLogger(__name__)
@@ -144,15 +144,19 @@ class NodeSpec(BaseModel):
# For LLM nodes
system_prompt: str | None = Field(default=None, description="System prompt for LLM nodes")
tools: list[str] = Field(default_factory=list, description="Tool names this node can use")
tool_access_policy: str = Field(
default="explicit",
description=(
"Tool access policy for this node. "
"'all' = all tools from registry, "
"'explicit' = only tools listed in `tools` (default, recommended), "
"'none' = no tools at all."
),
)
model: str | None = Field(
default=None, description="Specific model to use (defaults to graph default)"
)
# For subagent delegation
sub_agents: list[str] = Field(
default_factory=list,
description="Node IDs that can be invoked as subagents from this node",
)
# For function nodes
function: str | None = Field(
default=None, description="Function name or path for function nodes"
@@ -459,7 +463,7 @@ class NodeContext:
"""
# Core runtime
runtime: Runtime
runtime: DecisionTracker
# Node identity
node_id: str
@@ -526,20 +530,6 @@ class NodeContext:
# Falls back to node_id when not set (legacy / standalone executor).
stream_id: str = ""
# Subagent mode
is_subagent_mode: bool = False # True when running as a subagent (prevents nested delegation)
report_callback: Any = None # async (message: str, data: dict | None) -> None
node_registry: dict[str, "NodeSpec"] = field(default_factory=dict) # For subagent lookup
# Full tool catalog (unfiltered) — used by _execute_subagent to resolve
# subagent tools that aren't in the parent node's filtered available_tools.
all_tools: list[Tool] = field(default_factory=list)
# Shared reference to the executor's node_registry — used by subagent
# escalation (_EscalationReceiver) to register temporary receivers that
# the inject_input() routing chain can find.
shared_node_registry: dict[str, Any] = field(default_factory=dict)
# Dynamic tool provider — when set, EventLoopNode rebuilds the tool
# list from this callback at the start of each iteration. Used by
# the queen to switch between building-mode and running-mode tools.
@@ -19,15 +19,15 @@ from dataclasses import dataclass, field
from enum import StrEnum
from typing import Any
from framework.graph.context import GraphContext, build_node_context_from_graph_context
from framework.graph.edge import EdgeCondition, EdgeSpec
from framework.graph.node import (
from framework.orchestrator.context import GraphContext, build_node_context_from_graph_context
from framework.orchestrator.edge import EdgeCondition, EdgeSpec
from framework.orchestrator.node import (
NodeContext,
NodeProtocol,
NodeResult,
NodeSpec,
)
from framework.graph.validator import OutputValidator
from framework.orchestrator.validator import OutputValidator
logger = logging.getLogger(__name__)
@@ -109,7 +109,7 @@ class RetryState:
# ---------------------------------------------------------------------------
class WorkerAgent:
class NodeWorker:
"""First-class autonomous worker for one node in the graph.
Lifecycle:
@@ -355,7 +355,7 @@ class WorkerAgent:
# Only skip retries for actual EventLoopNode instances (they handle
# retries internally). Custom NodeProtocol impls registered via
# register_node should be retried by the executor.
from framework.graph.event_loop_node import EventLoopNode as _ELN
from framework.agent_loop.agent_loop import AgentLoop as _ELN
if isinstance(node_impl, _ELN):
max_retries = 0
@@ -603,10 +603,10 @@ class WorkerAgent:
return self._node_impl
# Auto-create EventLoopNode
if self.node_spec.node_type in ("event_loop", "gcu"):
from framework.graph.event_loop.types import LoopConfig
from framework.graph.event_loop_node import EventLoopNode
from framework.graph.node import warn_if_deprecated_client_facing
if self.node_spec.node_type == "event_loop":
from framework.agent_loop.internals.types import LoopConfig
from framework.agent_loop.agent_loop import AgentLoop
from framework.orchestrator.node import warn_if_deprecated_client_facing
conv_store = None
if gc.storage_path:
@@ -619,7 +619,7 @@ class WorkerAgent:
warn_if_deprecated_client_facing(self.node_spec)
default_max_iter = 100 if self.node_spec.supports_direct_user_io() else 50
node = EventLoopNode(
node = AgentLoop(
event_bus=gc.event_bus,
judge=None,
config=LoopConfig(
@@ -734,7 +734,7 @@ class WorkerAgent:
if not next_spec or next_spec.node_type != "event_loop":
return
from framework.graph.prompting import (
from framework.orchestrator.prompting import (
TransitionSpec,
build_narrative,
build_system_prompt_for_node_context,
@@ -16,21 +16,21 @@ from dataclasses import dataclass, field
from pathlib import Path
from typing import Any
from framework.graph.checkpoint_config import CheckpointConfig
from framework.graph.context import GraphContext, build_node_context
from framework.graph.conversation import LEGACY_RUN_ID
from framework.graph.edge import EdgeCondition, EdgeSpec, GraphSpec
from framework.graph.goal import Goal
from framework.graph.node import (
from framework.orchestrator.checkpoint_config import CheckpointConfig
from framework.orchestrator.context import GraphContext, build_node_context
from framework.agent_loop.conversation import LEGACY_RUN_ID
from framework.orchestrator.edge import EdgeCondition, EdgeSpec, GraphSpec
from framework.orchestrator.goal import Goal
from framework.orchestrator.node import (
DataBuffer,
NodeProtocol,
NodeResult,
NodeSpec,
)
from framework.graph.validator import OutputValidator
from framework.orchestrator.validator import OutputValidator
from framework.llm.provider import LLMProvider, Tool
from framework.observability import set_trace_context
from framework.runtime.core import Runtime
from framework.tracker.decision_tracker import DecisionTracker
from framework.schemas.checkpoint import Checkpoint
from framework.storage.checkpoint_store import CheckpointStore
from framework.utils.io import atomic_write
@@ -112,7 +112,7 @@ class ParallelExecutionConfig:
branch_timeout_seconds: float = 300.0
class GraphExecutor:
class Orchestrator:
"""
Executes agent graphs.
@@ -133,7 +133,7 @@ class GraphExecutor:
def __init__(
self,
runtime: Runtime,
runtime: DecisionTracker,
llm: LLMProvider | None = None,
tools: list[Tool] | None = None,
tool_executor: Callable | None = None,
@@ -165,7 +165,7 @@ class GraphExecutor:
Initialize the executor.
Args:
runtime: Runtime for decision logging
runtime: DecisionTracker for decision logging
llm: LLM provider for LLM nodes
tools: Available tools
tool_executor: Function to execute tools
@@ -202,7 +202,7 @@ class GraphExecutor:
self.validator = OutputValidator()
self.logger = logging.getLogger(__name__)
self.logger.debug(
"[GraphExecutor.__init__] Created with"
"[Orchestrator.__init__] Created with"
" stream_id=%s, execution_id=%s,"
" initial node_registry keys: %s",
stream_id,
@@ -361,8 +361,8 @@ class GraphExecutor:
Uses the same recursive binary-search splitting as EventLoopNode.
"""
from framework.graph.conversation import extract_tool_call_history
from framework.graph.event_loop_node import _is_context_too_large_error
from framework.agent_loop.conversation import extract_tool_call_history
from framework.agent_loop.agent_loop import _is_context_too_large_error
if _depth > self._PHASE_LLM_MAX_DEPTH:
raise RuntimeError("Phase LLM compaction recursion limit")
@@ -690,7 +690,7 @@ class GraphExecutor:
# and spillover files share the same session-scoped directory.
_ctx_token = None
if self._storage_path:
from framework.runner.tool_registry import ToolRegistry
from framework.loader.tool_registry import ToolRegistry
_ctx_token = ToolRegistry.set_execution_context(
data_dir=str(self._storage_path / "data"),
@@ -712,13 +712,12 @@ class GraphExecutor:
finally:
if _ctx_token is not None:
from framework.runner.tool_registry import ToolRegistry
from framework.loader.tool_registry import ToolRegistry
ToolRegistry.reset_execution_context(_ctx_token)
VALID_NODE_TYPES = {
"event_loop",
"gcu",
}
# Node types removed in v0.5 — provide migration guidance
REMOVED_NODE_TYPES = {
@@ -736,11 +735,11 @@ class GraphExecutor:
# Check registry first
if node_spec.id in self.node_registry:
logger.debug(
"[GraphExecutor._get_node_implementation] Found node '%s' in registry", node_spec.id
"[Orchestrator._get_node_implementation] Found node '%s' in registry", node_spec.id
)
return self.node_registry[node_spec.id]
logger.debug(
"[GraphExecutor._get_node_implementation]"
"[Orchestrator._get_node_implementation]"
" Node '%s' not in registry (keys: %s),"
" creating new",
node_spec.id,
@@ -764,10 +763,10 @@ class GraphExecutor:
)
# Create based on type
if node_spec.node_type in ("event_loop", "gcu"):
if node_spec.node_type == "event_loop":
# Auto-create EventLoopNode with sensible defaults.
# Custom configs can still be pre-registered via node_registry.
from framework.graph.event_loop_node import EventLoopNode, LoopConfig
from framework.agent_loop.agent_loop import AgentLoop, LoopConfig
# Create a FileConversationStore if a storage path is available
conv_store = None
@@ -787,13 +786,13 @@ class GraphExecutor:
if self._storage_path:
spillover = str(self._storage_path / "data")
from framework.graph.node import warn_if_deprecated_client_facing
from framework.orchestrator.node import warn_if_deprecated_client_facing
warn_if_deprecated_client_facing(node_spec)
lc = self._loop_config
default_max_iter = 100 if node_spec.supports_direct_user_io() else 50
node = EventLoopNode(
node = AgentLoop(
event_bus=self._event_bus,
judge=None, # implicit judge: accept when output_keys are filled
config=LoopConfig(
@@ -812,7 +811,7 @@ class GraphExecutor:
# Cache so inject_event() is reachable for queen interaction and escalation routing
self.node_registry[node_spec.id] = node
logger.debug(
"[GraphExecutor._get_node_implementation]"
"[Orchestrator._get_node_implementation]"
" Cached node '%s' in node_registry,"
" registry now has keys: %s",
node_spec.id,
@@ -998,10 +997,10 @@ class GraphExecutor:
branch_impl = self._get_node_implementation(node_spec, graph.cleanup_llm_model)
effective_max_retries = node_spec.max_retries
# Only override for actual EventLoopNode instances, not custom NodeProtocol impls
from framework.graph.event_loop_node import EventLoopNode
# Only override for actual AgentLoop instances, not custom NodeProtocol impls
from framework.agent_loop.agent_loop import AgentLoop as _AgentLoop # noqa: F811
if isinstance(branch_impl, EventLoopNode) and effective_max_retries > 1:
if isinstance(branch_impl, _AgentLoop) and effective_max_retries > 1:
self.logger.warning(
f"EventLoopNode '{node_spec.id}' has "
f"max_retries={effective_max_retries}. Overriding "
@@ -1042,9 +1041,6 @@ class GraphExecutor:
execution_id=self._execution_id,
run_id=self._run_id,
stream_id=self._stream_id,
node_registry=node_registry,
all_tools=self.tools,
shared_node_registry=self.node_registry,
dynamic_tools_provider=self.dynamic_tools_provider,
dynamic_prompt_provider=self.dynamic_prompt_provider,
dynamic_memory_provider=self.dynamic_memory_provider,
@@ -1293,14 +1289,14 @@ class GraphExecutor:
Replaces the imperative while-loop with autonomous workers that
self-activate based on edge conditions and fan-out tracking.
"""
from framework.graph.worker_agent import (
from framework.orchestrator.node_worker import (
Activation,
FanOutTag,
WorkerAgent,
NodeWorker,
WorkerCompletion,
WorkerLifecycle,
)
from framework.runtime.event_bus import AgentEvent, EventType
from framework.host.event_bus import AgentEvent, EventType
# Build shared graph context
gc = GraphContext(
@@ -1339,9 +1335,9 @@ class GraphExecutor:
)
# Create one WorkerAgent per node
workers: dict[str, WorkerAgent] = {}
workers: dict[str, NodeWorker] = {}
for node_spec in graph.nodes:
workers[node_spec.id] = WorkerAgent(node_spec=node_spec, graph_context=gc)
workers[node_spec.id] = NodeWorker(node_spec=node_spec, graph_context=gc)
# Identify entry workers (graph entry node, not based on edge count)
# A node can be the entry point AND have incoming feedback edges.
@@ -1442,7 +1438,7 @@ class GraphExecutor:
def _route_activation(
activation: Activation,
workers_map: dict[str, WorkerAgent],
workers_map: dict[str, NodeWorker],
pending_tasks_map: dict[str, asyncio.Task],
*,
has_event_subscription: bool,
@@ -9,7 +9,7 @@ import json
from pathlib import Path
from typing import TYPE_CHECKING
from framework.graph.prompting import (
from framework.orchestrator.prompting import (
EXECUTION_SCOPE_PREAMBLE,
TransitionSpec,
build_accounts_prompt,
@@ -19,7 +19,7 @@ from framework.graph.prompting import (
)
if TYPE_CHECKING:
from framework.graph.node import DataBuffer, NodeSpec
from framework.orchestrator.node import DataBuffer, NodeSpec
_with_datetime = stamp_prompt_datetime
@@ -36,7 +36,7 @@ def compose_system_prompt(
node_type_preamble: str | None = None,
) -> str:
"""Compatibility wrapper for the legacy function signature."""
from framework.graph.prompting import NodePromptSpec
from framework.orchestrator.prompting import NodePromptSpec
spec = NodePromptSpec(
identity_prompt=identity_prompt or "",
@@ -66,7 +66,6 @@ def compose_system_prompt(
protocols_prompt=spec.protocols_prompt,
node_type=spec.node_type,
output_keys=spec.output_keys,
is_subagent_mode=spec.is_subagent_mode,
)
return build_system_prompt(spec)
@@ -135,7 +134,7 @@ def build_transition_marker(
)
from framework.graph.prompting import build_transition_message # noqa: E402
from framework.orchestrator.prompting import build_transition_message # noqa: E402
__all__ = [
"EXECUTION_SCOPE_PREAMBLE",
@@ -12,8 +12,8 @@ from datetime import datetime
from typing import TYPE_CHECKING, Any
if TYPE_CHECKING:
from framework.graph.edge import GraphSpec
from framework.graph.node import DataBuffer
from framework.orchestrator.edge import GraphSpec
from framework.orchestrator.node import DataBuffer
# Injected into every worker node's system prompt so the LLM understands
@@ -40,7 +40,6 @@ class NodePromptSpec:
memory_prompt: str = ""
node_type: str = "event_loop"
output_keys: tuple[str, ...] = ()
is_subagent_mode: bool = False
@dataclass(frozen=True)
@@ -165,7 +164,6 @@ def build_prompt_spec_from_node_context(
memory_prompt=resolved_memory_prompt,
node_type=ctx.node_spec.node_type,
output_keys=tuple(ctx.node_spec.output_keys or ()),
is_subagent_mode=bool(getattr(ctx, "is_subagent_mode", False)),
)
@@ -195,13 +193,10 @@ def build_system_prompt(spec: NodePromptSpec) -> str:
if spec.narrative:
parts.append(f"\n--- Context (what has happened so far) ---\n{spec.narrative}")
if not spec.is_subagent_mode and spec.node_type in ("event_loop", "gcu") and spec.output_keys:
if not False and spec.node_type == "event_loop" and spec.output_keys:
parts.append(f"\n{EXECUTION_SCOPE_PREAMBLE}")
if spec.node_type == "gcu":
from framework.graph.gcu import GCU_BROWSER_SYSTEM_PROMPT
parts.append(f"\n{GCU_BROWSER_SYSTEM_PROMPT}")
if spec.focus_prompt:
parts.append(f"\n--- Current Focus ---\n{spec.focus_prompt}")
+32
View File
@@ -0,0 +1,32 @@
"""Pipeline middleware for the agent runtime.
Stages run in order when :meth:`AgentRuntime.trigger` receives a request.
Each stage can pass the context through, transform the input data, or reject
the request entirely. This is the runtime-level analogue of AstrBot's
pipeline architecture and lets operators compose rate limiting, validation,
cost guards, and custom pre/post-processing without patching core code.
"""
from framework.pipeline.registry import (
build_pipeline_from_config,
build_stage,
register,
)
from framework.pipeline.runner import PipelineRunner
from framework.pipeline.stage import (
PipelineContext,
PipelineRejectedError,
PipelineResult,
PipelineStage,
)
__all__ = [
"PipelineContext",
"PipelineRejectedError",
"PipelineResult",
"PipelineRunner",
"PipelineStage",
"build_pipeline_from_config",
"build_stage",
"register",
]
@@ -0,0 +1,44 @@
"""Execution-level middleware protocol.
Unlike :class:`PipelineStage` (which gates ``AgentHost.trigger()`` at the
request level), execution middleware runs at the start of **every** execution
attempt inside ``ExecutionManager._run_execution()`` -- including resurrection
retries.
Use this for concerns that must re-evaluate per attempt:
- Cost tracking (charge per attempt, not per trigger)
- Tool scoping (different tools on retry)
- Checkpoint config overrides
- Per-execution logging/tracing setup
"""
from __future__ import annotations
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from typing import Any
@dataclass
class ExecutionContext:
"""Context passed to execution middleware."""
execution_id: str
stream_id: str
run_id: str
input_data: dict[str, Any]
session_state: dict[str, Any] | None = None
attempt: int = 1
metadata: dict[str, Any] = field(default_factory=dict)
class ExecutionMiddleware(ABC):
"""Base class for per-execution middleware."""
@abstractmethod
async def on_execution_start(self, ctx: ExecutionContext) -> ExecutionContext:
"""Called before each execution attempt (including resurrections).
Modify and return *ctx* to transform execution parameters.
Raise to abort the execution.
"""
+107
View File
@@ -0,0 +1,107 @@
"""Pipeline stage registry -- maps type names to stage classes.
Stages self-register via the ``@register`` decorator. The
``build_pipeline_from_config`` function reads a declarative config
(from ``~/.hive/configuration.json`` or ``agent.json``) and
instantiates the corresponding stage objects.
Example config::
{
"pipeline": {
"stages": [
{"type": "rate_limit", "order": 200, "config": {"max_requests_per_minute": 60}},
{"type": "cost_guard", "order": 300, "config": {"max_cost_per_request": 0.50}}
]
}
}
"""
from __future__ import annotations
import logging
from typing import Any
from framework.pipeline.runner import PipelineRunner
from framework.pipeline.stage import PipelineStage
logger = logging.getLogger(__name__)
_STAGE_REGISTRY: dict[str, type[PipelineStage]] = {}
def register(name: str):
"""Decorator to register a pipeline stage class by type name.
Usage::
@register("rate_limit")
class RateLimitStage(PipelineStage):
...
"""
def decorator(cls: type[PipelineStage]) -> type[PipelineStage]:
_STAGE_REGISTRY[name] = cls
return cls
return decorator
def get_registered_stages() -> dict[str, type[PipelineStage]]:
"""Return a copy of the stage registry."""
return dict(_STAGE_REGISTRY)
def build_stage(spec: dict[str, Any]) -> PipelineStage:
"""Instantiate a single stage from a config spec.
Args:
spec: Dict with ``type`` (required), ``order`` (optional),
and ``config`` (optional kwargs dict).
Raises:
KeyError: If the stage type is not registered.
"""
stage_type = spec["type"]
if stage_type not in _STAGE_REGISTRY:
available = ", ".join(sorted(_STAGE_REGISTRY)) or "(none)"
raise KeyError(
f"Unknown pipeline stage type '{stage_type}'. "
f"Available: {available}"
)
cls = _STAGE_REGISTRY[stage_type]
config = spec.get("config", {})
stage = cls(**config)
if "order" in spec:
stage.order = spec["order"]
return stage
def build_pipeline_from_config(
stages_config: list[dict[str, Any]],
) -> PipelineRunner:
"""Build a ``PipelineRunner`` from a declarative stages list.
Each entry is ``{"type": "...", "order": N, "config": {...}}``.
"""
# Import built-in stages so they self-register
_ensure_builtins_registered()
stages = [build_stage(s) for s in stages_config]
return PipelineRunner(stages)
def _ensure_builtins_registered() -> None:
"""Import built-in stage modules so their ``@register`` decorators fire."""
if _STAGE_REGISTRY:
return # already populated
try:
import framework.pipeline.stages.cost_guard # noqa: F401
import framework.pipeline.stages.credential_resolver # noqa: F401
import framework.pipeline.stages.input_validation # noqa: F401
import framework.pipeline.stages.llm_provider # noqa: F401
import framework.pipeline.stages.mcp_registry # noqa: F401
import framework.pipeline.stages.rate_limit # noqa: F401
import framework.pipeline.stages.skill_registry # noqa: F401
except ImportError:
pass
+111
View File
@@ -0,0 +1,111 @@
"""Pipeline runner -- executes registered stages in order."""
from __future__ import annotations
import logging
from typing import Any
from framework.pipeline.stage import (
PipelineContext,
PipelineRejectedError,
PipelineStage,
)
logger = logging.getLogger(__name__)
class PipelineRunner:
"""Executes a list of :class:`PipelineStage` instances in ``order``.
The runner is the orchestration layer that :class:`AgentRuntime` calls
on every trigger. Stages execute in ascending ``order`` (ties broken by
registration order). A stage returning ``reject`` short-circuits the
pipeline and causes the trigger to raise :class:`PipelineRejectedError`.
"""
def __init__(self, stages: list[PipelineStage] | None = None) -> None:
self._stages: list[PipelineStage] = sorted(stages or [], key=lambda s: s.order)
@property
def stages(self) -> list[PipelineStage]:
return list(self._stages)
def add_stage(self, stage: PipelineStage) -> None:
"""Add a stage after construction (for dynamic registration)."""
self._stages.append(stage)
self._stages.sort(key=lambda s: s.order)
async def initialize_all(self) -> None:
"""Call ``initialize`` on every registered stage."""
for stage in self._stages:
name = stage.__class__.__name__
logger.info("[pipeline] Initializing %s (order=%d)", name, stage.order)
await stage.initialize()
logger.info("[pipeline] %s initialized", name)
if self._stages:
logger.info(
"[pipeline] Ready: %d stages [%s]",
len(self._stages),
" -> ".join(s.__class__.__name__ for s in self._stages),
)
async def run(self, ctx: PipelineContext) -> PipelineContext:
"""Run all stages. Raises ``PipelineRejectedError`` on rejection.
Returns the (possibly transformed) context.
"""
if not self._stages:
return ctx
import time
pipeline_start = time.perf_counter()
logger.info(
"[pipeline] Running %d stages for entry_point=%s",
len(self._stages),
ctx.entry_point_id,
)
for stage in self._stages:
stage_name = stage.__class__.__name__
t0 = time.perf_counter()
result = await stage.process(ctx)
elapsed_ms = (time.perf_counter() - t0) * 1000
if result.action == "reject":
reason = result.rejection_reason or "(no reason given)"
logger.warning(
"[pipeline] REJECTED by %s (%.1fms): %s",
stage_name, elapsed_ms, reason,
)
raise PipelineRejectedError(stage_name, reason)
if result.action == "transform":
logger.info(
"[pipeline] %s TRANSFORMED input (%.1fms)",
stage_name, elapsed_ms,
)
if result.input_data is not None:
ctx.input_data = result.input_data
else:
logger.info(
"[pipeline] %s passed (%.1fms)",
stage_name, elapsed_ms,
)
total_ms = (time.perf_counter() - pipeline_start) * 1000
logger.info("[pipeline] Complete (%.1fms total)", total_ms)
return ctx
async def run_post(self, ctx: PipelineContext, result: Any) -> Any:
"""Run all stages' ``post_process`` hooks in order.
Each stage can transform the result; the final value is returned.
Exceptions are logged and swallowed -- post-processing must not
break a successful execution.
"""
current = result
for stage in self._stages:
try:
current = await stage.post_process(ctx, current)
except Exception:
logger.exception(
"Pipeline post_process raised in %s; continuing with previous result",
stage.__class__.__name__,
)
return current
+77
View File
@@ -0,0 +1,77 @@
"""Pipeline stage base class and request/response types."""
from __future__ import annotations
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from typing import Any, Literal
class PipelineRejectedError(Exception):
"""Raised by ``AgentHost.trigger`` when a stage rejects the request."""
def __init__(self, stage_name: str, reason: str) -> None:
super().__init__(f"Pipeline rejected by {stage_name}: {reason}")
self.stage_name = stage_name
self.reason = reason
@dataclass
class PipelineContext:
"""Carries request data through the pipeline."""
entry_point_id: str
input_data: dict[str, Any]
correlation_id: str | None = None
session_state: dict[str, Any] | None = None
metadata: dict[str, Any] = field(default_factory=dict)
@dataclass
class PipelineResult:
"""Outcome of a stage's ``process`` call."""
action: Literal["continue", "reject", "transform"] = "continue"
input_data: dict[str, Any] | None = None
rejection_reason: str | None = None
class PipelineStage(ABC):
"""Base class for all middleware stages.
Infrastructure stages (LLM, MCP, credentials, skills) set typed
attributes during ``initialize()`` that the host reads after all
stages have initialized. Request-level stages (rate limit, input
validation, cost guard) implement ``process()``.
Attributes set by infrastructure stages:
llm: LLM provider instance (set by LlmProviderStage)
tool_registry: ToolRegistry with discovered MCP tools (set by McpRegistryStage)
accounts_prompt: Connected accounts system prompt block (set by CredentialResolverStage)
accounts_data: Raw account info list (set by CredentialResolverStage)
tool_provider_map: Tool name -> provider mapping (set by CredentialResolverStage)
skills_manager: SkillsManager instance (set by SkillRegistryStage)
"""
order: int = 100
# Infrastructure stage outputs -- typed so _apply_pipeline_results
# doesn't need hasattr() sniffing.
llm: Any = None
tool_registry: Any = None
accounts_prompt: str = ""
accounts_data: list[dict] | None = None
tool_provider_map: dict[str, str] | None = None
skills_manager: Any = None
async def initialize(self) -> None:
"""Called once when the runtime starts."""
return None
@abstractmethod
async def process(self, ctx: PipelineContext) -> PipelineResult:
"""Process the incoming request."""
async def post_process(self, ctx: PipelineContext, result: Any) -> Any:
"""Optional post-execution hook. Default: pass-through."""
return result
@@ -0,0 +1,19 @@
"""Built-in pipeline stages."""
from framework.pipeline.stages.cost_guard import CostGuardStage
from framework.pipeline.stages.credential_resolver import CredentialResolverStage
from framework.pipeline.stages.input_validation import InputValidationStage
from framework.pipeline.stages.llm_provider import LlmProviderStage
from framework.pipeline.stages.mcp_registry import McpRegistryStage
from framework.pipeline.stages.rate_limit import RateLimitStage
from framework.pipeline.stages.skill_registry import SkillRegistryStage
__all__ = [
"CostGuardStage",
"CredentialResolverStage",
"InputValidationStage",
"LlmProviderStage",
"McpRegistryStage",
"RateLimitStage",
"SkillRegistryStage",
]
@@ -0,0 +1,35 @@
"""Cost guard stage -- reject requests over a pre-flight budget."""
from __future__ import annotations
from framework.pipeline.registry import register
from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage
@register("cost_guard")
class CostGuardStage(PipelineStage):
"""Reject requests whose estimated cost exceeds the per-request budget.
The cost estimate must be populated in ``ctx.metadata["estimated_cost"]``
by an earlier stage (or by the caller). When no estimate is present,
the stage passes through.
"""
order = 300
def __init__(self, max_cost_per_request: float = 1.0) -> None:
self._budget = max_cost_per_request
async def process(self, ctx: PipelineContext) -> PipelineResult:
estimated = ctx.metadata.get("estimated_cost")
if estimated is None:
return PipelineResult(action="continue")
if estimated > self._budget:
return PipelineResult(
action="reject",
rejection_reason=(
f"Estimated cost ${estimated:.4f} exceeds budget "
f"${self._budget:.4f}"
),
)
return PipelineResult(action="continue")
@@ -0,0 +1,58 @@
"""Credential resolver pipeline stage.
Resolves connected accounts at startup. Individual credential TTL/refresh
is handled by MCP server processes internally -- they resolve tokens from
the credential store on every tool call.
"""
from __future__ import annotations
import logging
from typing import Any
from framework.pipeline.registry import register
from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage
logger = logging.getLogger(__name__)
@register("credential_resolver")
class CredentialResolverStage(PipelineStage):
"""Resolve connected accounts for system prompt injection."""
order = 40
def __init__(self, credential_store: Any = None, **kwargs: Any) -> None:
self._credential_store = credential_store
self.accounts_prompt = ""
self.accounts_data: list[dict] | None = None
self.tool_provider_map: dict[str, str] | None = None
async def initialize(self) -> None:
try:
from aden_tools.credentials.store_adapter import (
CredentialStoreAdapter,
)
from framework.orchestrator.prompting import build_accounts_prompt
if self._credential_store is not None:
adapter = CredentialStoreAdapter(store=self._credential_store)
else:
adapter = CredentialStoreAdapter.default()
self.accounts_data = adapter.get_all_account_info()
self.tool_provider_map = adapter.get_tool_provider_map()
if self.accounts_data:
self.accounts_prompt = build_accounts_prompt(
self.accounts_data, self.tool_provider_map,
)
logger.info(
"[pipeline] CredentialResolverStage: %d accounts",
len(self.accounts_data or []),
)
except Exception:
logger.debug(
"Credential resolution failed (non-fatal)", exc_info=True,
)
async def process(self, ctx: PipelineContext) -> PipelineResult:
return PipelineResult(action="continue")
@@ -0,0 +1,47 @@
"""Input validation stage.
Rejects requests whose ``input_data`` does not match the entry point's
declared input schema. Uses a user-provided schema map:
``{entry_point_id: {required_key: expected_type, ...}}``.
"""
from __future__ import annotations
from framework.pipeline.registry import register
from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage
@register("input_validation")
class InputValidationStage(PipelineStage):
"""Validate ``input_data`` against per-entry-point schemas.
The schema is a simple dict mapping key -> expected Python type.
For richer validation, substitute a Pydantic-based stage.
"""
order = 100
def __init__(self, schemas: dict[str, dict[str, type]] | None = None) -> None:
self._schemas = schemas or {}
async def process(self, ctx: PipelineContext) -> PipelineResult:
schema = self._schemas.get(ctx.entry_point_id)
if not schema:
return PipelineResult(action="continue")
for key, expected_type in schema.items():
if key not in ctx.input_data:
return PipelineResult(
action="reject",
rejection_reason=f"Missing required input key: '{key}'",
)
value = ctx.input_data[key]
if not isinstance(value, expected_type):
return PipelineResult(
action="reject",
rejection_reason=(
f"Input key '{key}' has type {type(value).__name__}, "
f"expected {expected_type.__name__}"
),
)
return PipelineResult(action="continue")
@@ -0,0 +1,95 @@
"""LLM provider pipeline stage.
Resolves the LLM provider from global config. This is the ONLY place
the LLM gets created for worker agents.
"""
from __future__ import annotations
import logging
from typing import Any
from framework.pipeline.registry import register
from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage
logger = logging.getLogger(__name__)
@register("llm_provider")
class LlmProviderStage(PipelineStage):
"""Resolve LLM provider and make it available."""
order = 10
def __init__(
self,
model: str | None = None,
mock_mode: bool = False,
llm: Any = None,
**kwargs: Any,
) -> None:
self._model = model
self._mock_mode = mock_mode
self.llm = llm # Pre-injected LLM (e.g. from session)
async def initialize(self) -> None:
if self.llm is not None:
return # Already injected
from framework.config import (
get_api_key,
get_api_keys,
get_hive_config,
get_preferred_model,
)
model = self._model or get_preferred_model()
if self._mock_mode:
from framework.llm.mock import MockLLMProvider
self.llm = MockLLMProvider(model=model)
return
config = get_hive_config()
llm_config = config.get("llm", {})
api_base = llm_config.get("api_base")
# Check for Antigravity (special provider)
if llm_config.get("use_antigravity_subscription"):
try:
from framework.llm.antigravity import AntigravityProvider
provider = AntigravityProvider(model=model)
if provider.has_credentials():
self.llm = provider
logger.info("[pipeline] LlmProviderStage: Antigravity")
return
except Exception:
pass
from framework.llm.litellm import LiteLLMProvider
api_key = get_api_key()
api_keys = get_api_keys()
if api_keys and len(api_keys) > 1:
self.llm = LiteLLMProvider(
model=model, api_keys=api_keys, api_base=api_base,
)
elif api_key:
extra = {}
if api_key.startswith("sk-ant-oat"):
extra["extra_headers"] = {
"authorization": f"Bearer {api_key}"
}
self.llm = LiteLLMProvider(
model=model, api_key=api_key, api_base=api_base, **extra,
)
else:
self.llm = LiteLLMProvider(model=model, api_base=api_base)
logger.info("[pipeline] LlmProviderStage: %s", model)
async def process(self, ctx: PipelineContext) -> PipelineResult:
return PipelineResult(action="continue")
@@ -0,0 +1,92 @@
"""MCP registry pipeline stage.
Resolves MCP server references from the agent config against the global
registry and registers tools. This is the ONLY place MCP tools get loaded.
"""
from __future__ import annotations
import logging
from dataclasses import asdict
from pathlib import Path
from typing import Any
from framework.pipeline.registry import register
from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage
logger = logging.getLogger(__name__)
@register("mcp_registry")
class McpRegistryStage(PipelineStage):
"""Resolve MCP tools from the global registry."""
order = 50
def __init__(
self,
server_refs: list[dict[str, Any]] | None = None,
agent_path: str | Path | None = None,
tool_registry: Any = None,
**kwargs: Any,
) -> None:
self._server_refs = server_refs or []
self._agent_path = Path(agent_path) if agent_path else None
self._tool_registry = tool_registry
async def initialize(self) -> None:
"""Connect to MCP servers and discover tools."""
if self._tool_registry is None:
from framework.loader.tool_registry import ToolRegistry
self._tool_registry = ToolRegistry()
from framework.loader.mcp_registry import MCPRegistry
registry = MCPRegistry()
mcp_loaded = False
# 1. From agent.json mcp_servers refs
if self._server_refs:
names = [ref["name"] for ref in self._server_refs if ref.get("name")]
if names:
configs = registry.resolve_for_agent(include=names)
if configs:
self._tool_registry.load_registry_servers(
[asdict(c) for c in configs]
)
mcp_loaded = True
logger.info(
"[pipeline] McpRegistryStage: loaded %d servers: %s",
len(configs),
names,
)
# 2. Legacy: mcp_servers.json
if not mcp_loaded and self._agent_path:
mcp_json = self._agent_path / "mcp_servers.json"
if mcp_json.exists():
self._tool_registry.load_mcp_config(mcp_json)
mcp_loaded = True
# 3. Fallback: all servers from global registry
if not mcp_loaded:
configs = registry.resolve_for_agent(profile="all")
if configs:
self._tool_registry.load_registry_servers(
[asdict(c) for c in configs]
)
logger.info(
"[pipeline] McpRegistryStage: loaded %d servers (fallback)",
len(configs),
)
total = len(self._tool_registry.get_tools())
logger.info("[pipeline] McpRegistryStage: %d tools available", total)
async def process(self, ctx: PipelineContext) -> PipelineResult:
return PipelineResult(action="continue")
@property
def tool_registry(self):
return self._tool_registry
@@ -0,0 +1,44 @@
"""Per-(entry-point, session) rate limiting stage."""
from __future__ import annotations
import time
from collections import defaultdict
from framework.pipeline.registry import register
from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage
@register("rate_limit")
class RateLimitStage(PipelineStage):
"""Reject requests that exceed ``max_requests_per_minute`` per session.
The key is ``<entry_point_id>:<session_id>``. When no session_id is
present in ``session_state``, a single shared "default" bucket is used.
"""
order = 200
def __init__(self, max_requests_per_minute: int = 60) -> None:
self._max_rpm = max_requests_per_minute
self._timestamps: dict[str, list[float]] = defaultdict(list)
async def process(self, ctx: PipelineContext) -> PipelineResult:
session_id = "default"
if ctx.session_state:
session_id = str(ctx.session_state.get("session_id", "default"))
key = f"{ctx.entry_point_id}:{session_id}"
now = time.monotonic()
# Prune entries older than 60s.
self._timestamps[key] = [t for t in self._timestamps[key] if now - t < 60.0]
if len(self._timestamps[key]) >= self._max_rpm:
return PipelineResult(
action="reject",
rejection_reason=(
f"Rate limit exceeded: {self._max_rpm} req/min "
f"for session '{session_id}'"
),
)
self._timestamps[key].append(now)
return PipelineResult(action="continue")
@@ -0,0 +1,55 @@
"""Skill registry pipeline stage.
Discovers and loads skills. This is the ONLY place skills get loaded.
"""
from __future__ import annotations
import logging
from pathlib import Path
from typing import Any
from framework.pipeline.registry import register
from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage
logger = logging.getLogger(__name__)
@register("skill_registry")
class SkillRegistryStage(PipelineStage):
"""Discover skills and provide prompts."""
order = 60
def __init__(
self,
project_root: str | Path | None = None,
interactive: bool = True,
skills_config: Any = None,
**kwargs: Any,
) -> None:
self._project_root = Path(project_root) if project_root else None
self._interactive = interactive
self._skills_config = skills_config
self.skills_manager: Any = None
async def initialize(self) -> None:
from framework.skills.config import SkillsConfig
from framework.skills.manager import SkillsManager, SkillsManagerConfig
config = SkillsManagerConfig(
skills_config=self._skills_config or SkillsConfig(),
project_root=self._project_root,
interactive=self._interactive,
)
self.skills_manager = SkillsManager(config)
self.skills_manager.load()
await self.skills_manager.start_watching()
logger.info(
"[pipeline] SkillRegistryStage: catalog=%d chars, protocols=%d chars",
len(self.skills_manager.skills_catalog_prompt),
len(self.skills_manager.protocols_prompt),
)
async def process(self, ctx: PipelineContext) -> PipelineResult:
return PipelineResult(action="continue")
-27
View File
@@ -1,27 +0,0 @@
"""Agent Runner - load and run exported agents."""
from framework.runner.mcp_registry import MCPRegistry
from framework.runner.protocol import (
AgentMessage,
CapabilityLevel,
CapabilityResponse,
MessageType,
OrchestratorResult,
)
from framework.runner.runner import AgentInfo, AgentRunner, ValidationResult
from framework.runner.tool_registry import ToolRegistry, tool
__all__ = [
# Single agent
"AgentRunner",
"AgentInfo",
"ValidationResult",
"ToolRegistry",
"MCPRegistry",
"tool",
"AgentMessage",
"MessageType",
"CapabilityLevel",
"CapabilityResponse",
"OrchestratorResult",
]
-493
View File
@@ -1,493 +0,0 @@
# Event Types and Schema Reference
The Hive runtime uses a pub/sub `EventBus` for inter-component communication and observability. Every event is an `AgentEvent` dataclass published through `EventBus.publish()`.
## Event Envelope (`AgentEvent`)
Every event shares a common envelope:
| Field | Type | Description |
| ---------------- | ----------------- | ------------------------------------------------------------ |
| `type` | `EventType` (str) | Event type identifier (see below) |
| `stream_id` | `str` | Entry point / pipeline that emitted the event |
| `node_id` | `str \| None` | Graph node that emitted the event |
| `execution_id` | `str \| None` | Unique execution run ID (UUID, set by `ExecutionStream`) |
| `graph_id` | `str \| None` | Graph that emitted the event (set by `GraphScopedEventBus`) |
| `data` | `dict` | Event-type-specific payload (see individual schemas below) |
| `timestamp` | `datetime` | When the event was created |
| `correlation_id` | `str \| None` | Optional ID for tracking related events across streams |
### Identity Fields
The identity tuple `(graph_id, stream_id, node_id, execution_id)` uniquely locates any event:
- **`graph_id`** — Which graph produced the event. Set automatically by `GraphScopedEventBus` (a subclass that stamps `graph_id` on every `publish()` call). Values: `"worker"`, `"judge"`, `"queen"`, or the graph spec ID.
- **`stream_id`** — Which entry point / pipeline. Corresponds to `EntryPointSpec.id` in the graph definition. For single-entry-point graphs, this equals the entry point name (e.g. `"default"`, `"health_check"`).
- **`node_id`** — Which specific node emitted the event. For `EventLoopNode` events, this is the node spec ID.
- **`execution_id`** — UUID identifying a specific execution run. Multiple concurrent executions of the same entry point each get a unique `execution_id`.
---
## Execution Lifecycle
### `execution_started`
A new graph execution has begun.
| Data Field | Type | Description |
| ---------- | ------ | ------------------------------- |
| `input` | `dict` | Input data passed to the graph |
**Emitted by:** `ExecutionStream._run_execution()`
---
### `execution_completed`
A graph execution finished successfully.
| Data Field | Type | Description |
| ---------- | ------ | ----------------- |
| `output` | `dict` | Final output data |
**Emitted by:** `ExecutionStream._run_execution()`
**Queen notification:** When a worker execution completes, the session manager \
injects a `[WORKER_TERMINAL]` notification into the queen with the output summary. \
The queen reports to the user and asks what to do next.
---
### `execution_failed`
A graph execution failed with an error.
| Data Field | Type | Description |
| ---------- | ----- | ------------- |
| `error` | `str` | Error message |
**Emitted by:** `ExecutionStream._run_execution()`
**Queen notification:** When a worker execution fails, the session manager \
injects a `[WORKER_TERMINAL]` notification into the queen with the error. \
The queen reports to the user and helps troubleshoot.
---
### `execution_paused`
Execution has been paused (Ctrl+Z or HITL approval).
| Data Field | Type | Description |
| ---------- | ----- | ----------------- |
| `reason` | `str` | Why it was paused |
**Emitted by:** `GraphExecutor.execute()`
---
### `execution_resumed`
Execution has resumed from a paused state.
| Data Field | Type | Description |
| ---------- | ---- | ----------- |
| *(none)* | | |
**Emitted by:** `GraphExecutor.execute()`
---
## Node Event-Loop Lifecycle
These events track the inner loop of `EventLoopNode` — the multi-turn LLM streaming loop that powers most agent nodes.
### `node_loop_started`
An EventLoopNode has begun its execution loop.
| Data Field | Type | Description |
| ---------------- | ---------- | ------------------------------- |
| `max_iterations` | `int\|null`| Maximum iterations configured |
**Emitted by:** `EventLoopNode._publish_loop_started()`, `GraphExecutor` (for function nodes in parallel branches)
---
### `node_loop_iteration`
An EventLoopNode has started a new iteration (one LLM turn).
| Data Field | Type | Description |
| ----------- | ----- | ------------------------- |
| `iteration` | `int` | Zero-based iteration index |
**Emitted by:** `EventLoopNode._publish_iteration()`
---
### `node_loop_completed`
An EventLoopNode has finished its execution loop.
| Data Field | Type | Description |
| ------------ | ----- | -------------------------------------- |
| `iterations` | `int` | Total number of iterations completed |
**Emitted by:** `EventLoopNode._publish_loop_completed()`, `GraphExecutor` (for function nodes in parallel branches)
---
## LLM Streaming
### `llm_text_delta`
Incremental text output from the LLM (non-client-facing nodes only).
| Data Field | Type | Description |
| ---------- | ----- | ---------------------------------------- |
| `content` | `str` | New text chunk (delta) |
| `snapshot` | `str` | Full accumulated text so far |
**Emitted by:** `EventLoopNode._publish_text_delta()` when `client_facing=False`
---
### `llm_reasoning_delta`
Incremental reasoning/thinking output from the LLM.
| Data Field | Type | Description |
| ---------- | ----- | ------------------- |
| `content` | `str` | New reasoning chunk |
**Emitted by:** Not currently wired in `EventLoopNode` (reserved for extended thinking models).
---
## Tool Lifecycle
### `tool_call_started`
The LLM has requested a tool call and execution is about to begin.
| Data Field | Type | Description |
| ------------ | ------ | ------------------------------------ |
| `tool_use_id`| `str` | Unique ID for this tool invocation |
| `tool_name` | `str` | Name of the tool being called |
| `tool_input` | `dict` | Arguments passed to the tool |
**Emitted by:** `EventLoopNode._publish_tool_started()`
---
### `tool_call_completed`
A tool call has finished executing.
| Data Field | Type | Description |
| ------------ | ------ | -------------------------------------- |
| `tool_use_id`| `str` | Same ID from `tool_call_started` |
| `tool_name` | `str` | Name of the tool |
| `result` | `str` | Tool execution result (may be truncated)|
| `is_error` | `bool` | Whether the tool returned an error |
**Emitted by:** `EventLoopNode._publish_tool_completed()`
---
## Client I/O
These events are emitted by the queen's interactive turns. They drive the TUI's chat interface.
### `client_output_delta`
Incremental text output meant for the human operator.
| Data Field | Type | Description |
| ---------- | ----- | ---------------------------- |
| `content` | `str` | New text chunk (delta) |
| `snapshot` | `str` | Full accumulated text so far |
**Emitted by:** `EventLoopNode._publish_text_delta()` for queen/user-facing output
---
### `client_input_requested`
The node is waiting for human input (via `ask_user` tool or auto-block on text-only turns).
| Data Field | Type | Description |
| ---------- | ----- | ------------------------------------------------- |
| `prompt` | `str` | Optional prompt/question shown to the user |
**Emitted by:** `EventLoopNode._await_user_input()`, doom loop handler
The TUI subscribes to this event to show the input prompt and focus the chat input. After the user types, `inject_event()` is called on the node to unblock it.
---
## Internal Node Observability
### `node_internal_output`
Output from a non-client-facing node (for debugging/monitoring).
| Data Field | Type | Description |
| ---------- | ----- | ---------------- |
| `content` | `str` | Output text |
**Emitted by:** Available via `emit_node_internal_output()` — not currently wired in the default `EventLoopNode`.
---
### `node_input_blocked`
A non-client-facing node is blocked waiting for input.
| Data Field | Type | Description |
| ---------- | ----- | --------------- |
| `prompt` | `str` | Block reason |
**Emitted by:** Available via `emit_node_input_blocked()` — reserved for future use.
---
### `node_stalled`
The node's LLM has produced identical responses for several consecutive turns (stall detection).
| Data Field | Type | Description |
| ---------- | ----- | ------------------------------------------------- |
| `reason` | `str` | Always `"Consecutive identical responses detected"`|
**Emitted by:** `EventLoopNode._publish_stalled()`
---
### `node_tool_doom_loop`
The LLM is calling the same tool(s) with identical arguments repeatedly (doom loop detection).
| Data Field | Type | Description |
| ------------- | ----- | ------------------------------------ |
| `description` | `str` | Human-readable doom loop description |
**Emitted by:** `EventLoopNode` doom loop handler
---
## Judge Decisions
### `judge_verdict`
The judge (custom or implicit) has evaluated the current iteration.
| Data Field | Type | Description |
| ------------ | ----- | ---------------------------------------------------- |
| `action` | `str` | `"ACCEPT"`, `"RETRY"`, `"ESCALATE"`, or `"CONTINUE"` |
| `feedback` | `str` | Judge feedback (empty for ACCEPT/CONTINUE) |
| `judge_type` | `str` | `"custom"` (explicit JudgeProtocol) or `"implicit"` (stop-reason heuristic) |
| `iteration` | `int` | Which iteration this verdict applies to |
**Emitted by:** `EventLoopNode._publish_judge_verdict()`
**Verdict meanings:**
- **ACCEPT** — Output meets requirements; node exits successfully.
- **RETRY** — Output needs improvement; loop continues with feedback injected.
- **ESCALATE** — Problem cannot be solved at this level; triggers escalation.
- **CONTINUE** — Implicit verdict: LLM called tools, so it's making progress — let it keep going.
---
## Output Tracking
### `output_key_set`
A node has set an output key via the `set_output` synthetic tool.
| Data Field | Type | Description |
| ---------- | ----- | ----------------- |
| `key` | `str` | Output key name |
**Emitted by:** `EventLoopNode._publish_output_key_set()`
---
## Retry & Edge Tracking
### `node_retry`
A transient error occurred during an LLM call and the node is retrying.
| Data Field | Type | Description |
| ------------- | ----- | ---------------------------------- |
| `retry_count` | `int` | Current retry attempt number |
| `max_retries` | `int` | Maximum retries configured |
| `error` | `str` | Error message (truncated to 500ch) |
**Emitted by:** `EventLoopNode` (stream retry handler), `GraphExecutor` (node-level retry)
---
### `edge_traversed`
The executor has traversed an edge from one node to another.
| Data Field | Type | Description |
| ---------------- | ----- | ---------------------------------------------- |
| `source_node` | `str` | Node ID the edge starts from |
| `target_node` | `str` | Node ID the edge goes to |
| `edge_condition` | `str` | Edge condition: `"router"`, `"on_success"`, etc. |
**Emitted by:** `GraphExecutor.execute()` — after router decisions, condition-based edges, and fallback edges.
---
## Context Management
### `context_compacted`
Not currently emitted — reserved for future use when `NodeConversation` compacts history.
---
## State Changes
### `state_changed`
A shared buffer key has been modified.
| Data Field | Type | Description |
| ----------- | ----- | ---------------------------------- |
| `key` | `str` | Buffer key that changed |
| `old_value` | `Any` | Previous value |
| `new_value` | `Any` | New value |
| `scope` | `str` | Scope of the change |
**Emitted by:** Available via `emit_state_changed()` — not currently wired in default execution.
---
### `state_conflict`
Not currently emitted — reserved for concurrent write conflict detection.
---
## Goal Tracking
### `goal_progress`
Goal completion progress update.
| Data Field | Type | Description |
| ----------------- | ------- | ------------------------------------ |
| `progress` | `float` | 0.01.0 completion fraction |
| `criteria_status` | `dict` | Per-criterion status |
**Emitted by:** Available via `emit_goal_progress()` — not currently wired in default execution.
---
### `goal_achieved`
Not currently emitted — reserved for explicit goal completion signals.
---
### `constraint_violation`
A goal constraint has been violated.
| Data Field | Type | Description |
| --------------- | ----- | ------------------------ |
| `constraint_id` | `str` | Which constraint failed |
| `description` | `str` | What went wrong |
**Emitted by:** Available via `emit_constraint_violation()`.
---
## Stream Lifecycle
### `stream_started` / `stream_stopped`
Not currently emitted — reserved for `ExecutionStream` lifecycle tracking.
---
## External Triggers
### `webhook_received`
An external webhook has been received.
| Data Field | Type | Description |
| -------------- | ------ | ---------------------------- |
| `path` | `str` | Webhook URL path |
| `method` | `str` | HTTP method |
| `headers` | `dict` | HTTP headers |
| `payload` | `dict` | Request body |
| `query_params` | `dict` | URL query parameters |
**Emitted by:** Webhook server integration.
Note: `node_id` is not set on this event; `stream_id` is the webhook source ID.
---
## Escalation
### `escalation_requested`
An agent has requested handoff to the Hive Coder (via the `escalate` synthetic tool).
| Data Field | Type | Description |
| ---------- | ----- | ------------------------------- |
| `reason` | `str` | Why escalation is needed |
| `context` | `str` | Additional context for the coder|
**Emitted by:** `EventLoopNode` when the LLM calls `escalate`.
---
## Custom Events
### `custom`
User-defined events with arbitrary payloads. No schema enforced.
---
## Subscription & Filtering
Events can be filtered when subscribing:
```python
bus.subscribe(
event_types=[EventType.TOOL_CALL_STARTED, EventType.TOOL_CALL_COMPLETED],
handler=my_handler,
filter_stream="default", # Only events from this stream
filter_node="planner", # Only events from this node
filter_execution="exec-uuid", # Only events from this execution
filter_graph="worker", # Only events from this graph
)
```
## Debug Event Logging
Set `HIVE_DEBUG_EVENTS=1` to write every published event to a JSONL file at `~/.hive/event_logs/<timestamp>.jsonl`. Each line is the full JSON serialization of an `AgentEvent`:
```json
{
"type": "tool_call_started",
"stream_id": "default",
"node_id": "planner",
"execution_id": "a1b2c3d4-...",
"graph_id": "worker",
"data": {"tool_use_id": "tu_1", "tool_name": "web_search", "tool_input": {"query": "..."}},
"timestamp": "2026-02-24T12:00:00.000000",
"correlation_id": null
}
```
-171
View File
@@ -1,171 +0,0 @@
# Agent Runtime
Unified execution system for all Hive agents. Every agent — single-entry or multi-entry, headless or dashboard — runs through the same runtime stack.
## Topology
```
AgentRunner.load(agent_path)
|
AgentRunner
(factory + public API)
|
_setup_agent_runtime()
|
AgentRuntime
(lifecycle + orchestration)
/ | \
Stream A Stream B Stream C ← one per entry point
| | |
GraphExecutor GraphExecutor GraphExecutor
| | |
Node → Node → Node (graph traversal)
```
Single-entry agents get a `"default"` entry point automatically. There is no separate code path.
## Components
| Component | File | Role |
|---|---|---|
| `AgentRunner` | `runner/runner.py` | Load agents, configure tools/LLM, expose high-level API |
| `AgentRuntime` | `runtime/agent_runtime.py` | Lifecycle management, entry point routing, event bus |
| `ExecutionStream` | `runtime/execution_stream.py` | Per-entry-point execution queue, session persistence |
| `GraphExecutor` | `graph/executor.py` | Node traversal, tool dispatch, checkpointing |
| `EventBus` | `runtime/event_bus.py` | Pub/sub for execution events (streaming, I/O) |
| `SharedBufferManager` | `runtime/shared_state.py` | Cross-stream state with isolation levels |
| `OutcomeAggregator` | `runtime/outcome_aggregator.py` | Goal progress tracking across streams |
| `SessionStore` | `storage/session_store.py` | Session state persistence (`sessions/{id}/state.json`) |
## Programming Interface
### AgentRunner (high-level)
```python
from framework.runner import AgentRunner
# Load and run
runner = AgentRunner.load("exports/my_agent", model="anthropic/claude-sonnet-4-20250514")
result = await runner.run({"query": "hello"})
# Resume from paused session
result = await runner.run({"query": "continue"}, session_state=saved_state)
# Lifecycle
await runner.start() # Start the runtime
await runner.stop() # Stop the runtime
exec_id = await runner.trigger("default", {}) # Non-blocking trigger
entry_points = runner.get_entry_points() # List entry points
# Context manager
async with AgentRunner.load("exports/my_agent") as runner:
result = await runner.run({"query": "hello"})
# Cleanup
runner.cleanup() # Synchronous
await runner.cleanup_async() # Asynchronous
```
### AgentRuntime (lower-level)
```python
from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
from framework.runtime.execution_stream import EntryPointSpec
# Create runtime with entry points
runtime = create_agent_runtime(
graph=graph,
goal=goal,
storage_path=Path("~/.hive/agents/my_agent"),
entry_points=[
EntryPointSpec(id="default", name="Default", entry_node="start", trigger_type="manual"),
],
llm=llm,
tools=tools,
tool_executor=tool_executor,
checkpoint_config=checkpoint_config,
)
# Lifecycle
await runtime.start()
await runtime.stop()
# Execution
exec_id = await runtime.trigger("default", {"query": "hello"}) # Non-blocking
result = await runtime.trigger_and_wait("default", {"query": "hello"}) # Blocking
result = await runtime.trigger_and_wait("default", {}, session_state=state) # Resume
# Client-facing node I/O
await runtime.inject_input(node_id="chat", content="user response")
# Events
sub_id = runtime.subscribe_to_events(
event_types=[EventType.CLIENT_OUTPUT_DELTA],
handler=my_handler,
)
runtime.unsubscribe_from_events(sub_id)
# Inspection
runtime.is_running # bool
runtime.event_bus # EventBus
runtime.state_manager # SharedBufferManager
runtime.get_stats() # Runtime statistics
```
## Execution Flow
1. `AgentRunner.run()` calls `AgentRuntime.trigger_and_wait()`
2. `AgentRuntime` routes to the `ExecutionStream` for the entry point
3. `ExecutionStream` creates a `GraphExecutor` and calls `execute()`
4. `GraphExecutor` traverses nodes, dispatches tools, manages checkpoints
5. `ExecutionResult` flows back up through the stack
6. `ExecutionStream` writes session state to disk
## Session Resume
All execution paths support session resume:
```python
# First run (agent pauses at a client-facing node)
result = await runner.run({"query": "start task"})
# result.paused_at = "review-node"
# result.session_state = {"memory": {...}, "paused_at": "review-node", ...}
# Resume
result = await runner.run({"input": "approved"}, session_state=result.session_state)
```
Session state flows: `AgentRunner.run()``AgentRuntime.trigger_and_wait()``ExecutionStream.execute()``GraphExecutor.execute()`.
Checkpoints are saved at node boundaries (`sessions/{id}/checkpoints/`) for crash recovery.
## Event Bus
The `EventBus` provides real-time execution visibility:
| Event | When |
|---|---|
| `NODE_STARTED` | Node begins execution |
| `NODE_COMPLETED` | Node finishes |
| `TOOL_CALL_STARTED` | Tool invocation begins |
| `TOOL_CALL_COMPLETED` | Tool invocation finishes |
| `CLIENT_OUTPUT_DELTA` | Agent streams text to user |
| `CLIENT_INPUT_REQUESTED` | Agent needs user input |
| `EXECUTION_COMPLETED` | Full execution finishes |
In headless mode, `AgentRunner` subscribes to `CLIENT_OUTPUT_DELTA` and `CLIENT_INPUT_REQUESTED` to print output and read stdin. The web dashboard subscribes to route events to the frontend.
## Storage Layout
```
~/.hive/agents/{agent_name}/
sessions/
session_YYYYMMDD_HHMMSS_{uuid}/
state.json # Session state (status, memory, progress)
checkpoints/ # Node-boundary snapshots
logs/
summary.json # Execution summary
details.jsonl # Detailed event log
tool_logs.jsonl # Tool call log
runtime_logs/ # Cross-session runtime logs
```
-5
View File
@@ -1,5 +0,0 @@
"""Runtime core for agent execution."""
from framework.runtime.core import Runtime
__all__ = ["Runtime"]
-1
View File
@@ -1 +0,0 @@
"""Tests for runtime components."""
@@ -1,869 +0,0 @@
"""
Tests for AgentRuntime and multi-entry-point execution.
Tests:
1. AgentRuntime creation and lifecycle
2. Entry point registration
3. Concurrent executions across streams
4. SharedBufferManager isolation levels
5. OutcomeAggregator goal evaluation
6. EventBus pub/sub
"""
import asyncio
import tempfile
from pathlib import Path
import pytest
from framework.graph import Goal
from framework.graph.edge import EdgeCondition, EdgeSpec, GraphSpec
from framework.graph.goal import Constraint, SuccessCriterion
from framework.graph.node import NodeSpec
from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
from framework.runtime.event_bus import AgentEvent, EventBus, EventType
from framework.runtime.execution_stream import EntryPointSpec
from framework.runtime.outcome_aggregator import OutcomeAggregator
from framework.runtime.shared_state import IsolationLevel, SharedBufferManager
from framework.schemas.session_state import SessionState, SessionTimestamps
# === Test Fixtures ===
@pytest.fixture
def sample_goal():
"""Create a sample goal for testing."""
return Goal(
id="test-goal",
name="Test Goal",
description="A goal for testing multi-entry-point execution",
success_criteria=[
SuccessCriterion(
id="sc-1",
description="Process all requests",
metric="requests_processed",
target="100%",
weight=1.0,
),
],
constraints=[
Constraint(
id="c-1",
description="Must not exceed rate limits",
constraint_type="hard",
category="operational",
),
],
)
@pytest.fixture
def sample_graph():
"""Create a sample graph with multiple entry points."""
nodes = [
NodeSpec(
id="process-webhook",
name="Process Webhook",
description="Process incoming webhook",
node_type="event_loop",
input_keys=["webhook_data"],
output_keys=["result"],
),
NodeSpec(
id="process-api",
name="Process API Request",
description="Process API request",
node_type="event_loop",
input_keys=["request_data"],
output_keys=["result"],
),
NodeSpec(
id="complete",
name="Complete",
description="Execution complete",
node_type="terminal",
input_keys=["result"],
output_keys=["final_result"],
),
]
edges = [
EdgeSpec(
id="webhook-to-complete",
source="process-webhook",
target="complete",
condition=EdgeCondition.ON_SUCCESS,
),
EdgeSpec(
id="api-to-complete",
source="process-api",
target="complete",
condition=EdgeCondition.ON_SUCCESS,
),
]
return GraphSpec(
id="test-graph",
goal_id="test-goal",
version="1.0.0",
entry_node="process-webhook",
entry_points={"start": "process-webhook"},
terminal_nodes=["complete"],
pause_nodes=[],
nodes=nodes,
edges=edges,
)
@pytest.fixture
def temp_storage():
"""Create a temporary storage directory."""
with tempfile.TemporaryDirectory() as tmpdir:
yield Path(tmpdir)
# === SharedBufferManager Tests ===
class TestSharedBufferManager:
"""Tests for SharedBufferManager."""
def test_create_buffer(self):
"""Test creating execution-scoped buffer."""
manager = SharedBufferManager()
buffer = manager.create_buffer(
execution_id="exec-1",
stream_id="webhook",
isolation=IsolationLevel.SHARED,
)
assert buffer is not None
assert buffer._execution_id == "exec-1"
assert buffer._stream_id == "webhook"
@pytest.mark.asyncio
async def test_isolated_state(self):
"""Test isolated state doesn't leak between executions."""
manager = SharedBufferManager()
buf1 = manager.create_buffer("exec-1", "stream-1", IsolationLevel.ISOLATED)
buf2 = manager.create_buffer("exec-2", "stream-1", IsolationLevel.ISOLATED)
await buf1.write("key", "value1")
await buf2.write("key", "value2")
assert await buf1.read("key") == "value1"
assert await buf2.read("key") == "value2"
@pytest.mark.asyncio
async def test_shared_state(self):
"""Test shared state is visible across executions."""
manager = SharedBufferManager()
manager.create_buffer("exec-1", "stream-1", IsolationLevel.SHARED)
manager.create_buffer("exec-2", "stream-1", IsolationLevel.SHARED)
# Write to global scope
await manager.write(
key="global_key",
value="global_value",
execution_id="exec-1",
stream_id="stream-1",
isolation=IsolationLevel.SHARED,
scope="global",
)
# Both should see it
value1 = await manager.read("global_key", "exec-1", "stream-1", IsolationLevel.SHARED)
value2 = await manager.read("global_key", "exec-2", "stream-1", IsolationLevel.SHARED)
assert value1 == "global_value"
assert value2 == "global_value"
def test_cleanup_execution(self):
"""Test execution cleanup removes state."""
manager = SharedBufferManager()
manager.create_buffer("exec-1", "stream-1", IsolationLevel.ISOLATED)
assert "exec-1" in manager._execution_state
manager.cleanup_execution("exec-1")
assert "exec-1" not in manager._execution_state
class TestSessionState:
"""Tests for session state data-buffer compatibility."""
def test_legacy_memory_alias_populates_data_buffer(self):
"""Legacy `memory` payloads should still hydrate the session buffer."""
state = SessionState(
session_id="session-1",
goal_id="goal-1",
timestamps=SessionTimestamps(
started_at="2026-01-01T00:00:00",
updated_at="2026-01-01T00:00:00",
),
memory={"rules": "keep starred mail"},
)
assert state.data_buffer == {"rules": "keep starred mail"}
assert state.memory == {"rules": "keep starred mail"}
assert state.to_session_state_dict()["data_buffer"] == {"rules": "keep starred mail"}
# === EventBus Tests ===
class TestEventBus:
"""Tests for EventBus pub/sub."""
@pytest.mark.asyncio
async def test_publish_subscribe(self):
"""Test basic publish/subscribe."""
bus = EventBus()
received_events = []
async def handler(event: AgentEvent):
received_events.append(event)
bus.subscribe(
event_types=[EventType.EXECUTION_STARTED],
handler=handler,
)
await bus.publish(
AgentEvent(
type=EventType.EXECUTION_STARTED,
stream_id="webhook",
execution_id="exec-1",
data={"test": "data"},
)
)
# Allow handler to run
await asyncio.sleep(0.1)
assert len(received_events) == 1
assert received_events[0].type == EventType.EXECUTION_STARTED
assert received_events[0].stream_id == "webhook"
@pytest.mark.asyncio
async def test_stream_filter(self):
"""Test filtering by stream ID."""
bus = EventBus()
received_events = []
async def handler(event: AgentEvent):
received_events.append(event)
bus.subscribe(
event_types=[EventType.EXECUTION_STARTED],
handler=handler,
filter_stream="webhook",
)
# Publish to webhook stream (should be received)
await bus.publish(
AgentEvent(
type=EventType.EXECUTION_STARTED,
stream_id="webhook",
)
)
# Publish to api stream (should NOT be received)
await bus.publish(
AgentEvent(
type=EventType.EXECUTION_STARTED,
stream_id="api",
)
)
await asyncio.sleep(0.1)
assert len(received_events) == 1
assert received_events[0].stream_id == "webhook"
def test_unsubscribe(self):
"""Test unsubscribing from events."""
bus = EventBus()
async def handler(event: AgentEvent):
pass
sub_id = bus.subscribe(
event_types=[EventType.EXECUTION_STARTED],
handler=handler,
)
assert sub_id in bus._subscriptions
result = bus.unsubscribe(sub_id)
assert result is True
assert sub_id not in bus._subscriptions
@pytest.mark.asyncio
async def test_wait_for(self):
"""Test waiting for a specific event."""
bus = EventBus()
# Start waiting in background
async def wait_and_check():
event = await bus.wait_for(
event_type=EventType.EXECUTION_COMPLETED,
timeout=1.0,
)
return event
wait_task = asyncio.create_task(wait_and_check())
# Publish the event
await asyncio.sleep(0.1)
await bus.publish(
AgentEvent(
type=EventType.EXECUTION_COMPLETED,
stream_id="webhook",
execution_id="exec-1",
)
)
event = await wait_task
assert event is not None
assert event.type == EventType.EXECUTION_COMPLETED
# === OutcomeAggregator Tests ===
class TestOutcomeAggregator:
"""Tests for OutcomeAggregator."""
def test_record_decision(self, sample_goal):
"""Test recording decisions."""
aggregator = OutcomeAggregator(sample_goal)
from framework.schemas.decision import Decision, DecisionType
decision = Decision(
id="dec-1",
node_id="process-webhook",
intent="Process incoming webhook",
decision_type=DecisionType.PATH_CHOICE,
options=[],
chosen_option_id="opt-1",
reasoning="Standard processing path",
)
aggregator.record_decision("webhook", "exec-1", decision)
assert aggregator._total_decisions == 1
assert len(aggregator._decisions) == 1
@pytest.mark.asyncio
async def test_evaluate_goal_progress(self, sample_goal):
"""Test goal progress evaluation."""
aggregator = OutcomeAggregator(sample_goal)
progress = await aggregator.evaluate_goal_progress()
assert "overall_progress" in progress
assert "criteria_status" in progress
assert "constraint_violations" in progress
assert "recommendation" in progress
def test_record_constraint_violation(self, sample_goal):
"""Test recording constraint violations."""
aggregator = OutcomeAggregator(sample_goal)
aggregator.record_constraint_violation(
constraint_id="c-1",
description="Rate limit exceeded",
violation_details="More than 100 requests/minute",
stream_id="webhook",
execution_id="exec-1",
)
assert len(aggregator._constraint_violations) == 1
assert aggregator._constraint_violations[0].constraint_id == "c-1"
# === AgentRuntime Tests ===
class TestAgentRuntime:
"""Tests for AgentRuntime orchestration."""
def test_register_entry_point(self, sample_graph, sample_goal, temp_storage):
"""Test registering entry points."""
runtime = AgentRuntime(
graph=sample_graph,
goal=sample_goal,
storage_path=temp_storage,
)
entry_spec = EntryPointSpec(
id="manual",
name="Manual Trigger",
entry_node="process-webhook",
trigger_type="manual",
)
runtime.register_entry_point(entry_spec)
assert "manual" in runtime._entry_points
assert len(runtime.get_entry_points()) == 1
def test_register_duplicate_entry_point_fails(self, sample_graph, sample_goal, temp_storage):
"""Test that duplicate entry point IDs fail."""
runtime = AgentRuntime(
graph=sample_graph,
goal=sample_goal,
storage_path=temp_storage,
)
entry_spec = EntryPointSpec(
id="webhook",
name="Webhook Handler",
entry_node="process-webhook",
trigger_type="webhook",
)
runtime.register_entry_point(entry_spec)
with pytest.raises(ValueError, match="already registered"):
runtime.register_entry_point(entry_spec)
def test_register_invalid_entry_node_fails(self, sample_graph, sample_goal, temp_storage):
"""Test that invalid entry nodes fail."""
runtime = AgentRuntime(
graph=sample_graph,
goal=sample_goal,
storage_path=temp_storage,
)
entry_spec = EntryPointSpec(
id="invalid",
name="Invalid Entry",
entry_node="nonexistent-node",
trigger_type="manual",
)
with pytest.raises(ValueError, match="not found in graph"):
runtime.register_entry_point(entry_spec)
@pytest.mark.asyncio
async def test_start_stop_lifecycle(self, sample_graph, sample_goal, temp_storage):
"""Test runtime start/stop lifecycle."""
runtime = AgentRuntime(
graph=sample_graph,
goal=sample_goal,
storage_path=temp_storage,
)
entry_spec = EntryPointSpec(
id="webhook",
name="Webhook Handler",
entry_node="process-webhook",
trigger_type="webhook",
)
runtime.register_entry_point(entry_spec)
assert not runtime.is_running
await runtime.start()
assert runtime.is_running
assert "webhook" in runtime._streams
await runtime.stop()
assert not runtime.is_running
assert len(runtime._streams) == 0
@pytest.mark.asyncio
async def test_trigger_requires_running(self, sample_graph, sample_goal, temp_storage):
"""Test that trigger fails if runtime not running."""
runtime = AgentRuntime(
graph=sample_graph,
goal=sample_goal,
storage_path=temp_storage,
)
entry_spec = EntryPointSpec(
id="webhook",
name="Webhook Handler",
entry_node="process-webhook",
trigger_type="webhook",
)
runtime.register_entry_point(entry_spec)
with pytest.raises(RuntimeError, match="not running"):
await runtime.trigger("webhook", {"test": "data"})
# === GraphSpec Validation Tests ===
# === Integration Tests ===
class TestCreateAgentRuntime:
"""Tests for the create_agent_runtime factory."""
def test_create_with_entry_points(self, sample_graph, sample_goal, temp_storage):
"""Test factory creates runtime with entry points."""
entry_points = [
EntryPointSpec(
id="webhook",
name="Webhook",
entry_node="process-webhook",
trigger_type="webhook",
),
EntryPointSpec(
id="api",
name="API",
entry_node="process-api",
trigger_type="api",
),
]
runtime = create_agent_runtime(
graph=sample_graph,
goal=sample_goal,
storage_path=temp_storage,
entry_points=entry_points,
)
assert len(runtime.get_entry_points()) == 2
assert "webhook" in runtime._entry_points
assert "api" in runtime._entry_points
# === Timer Entry Point Tests ===
class TestTimerEntryPoints:
"""Tests for timer-driven entry points (interval and cron)."""
@pytest.mark.asyncio
async def test_interval_timer_starts_task(self, sample_graph, sample_goal, temp_storage):
"""Test that interval_minutes timer creates an async task."""
runtime = AgentRuntime(
graph=sample_graph,
goal=sample_goal,
storage_path=temp_storage,
)
entry_spec = EntryPointSpec(
id="timer-interval",
name="Interval Timer",
entry_node="process-webhook",
trigger_type="timer",
trigger_config={"interval_minutes": 60},
)
runtime.register_entry_point(entry_spec)
await runtime.start()
try:
assert len(runtime._timer_tasks) == 1
assert not runtime._timer_tasks[0].done()
# Give the async task a moment to set next_fire
await asyncio.sleep(0.05)
assert "timer-interval" in runtime._timer_next_fire
finally:
await runtime.stop()
assert len(runtime._timer_tasks) == 0
@pytest.mark.asyncio
async def test_cron_timer_starts_task(self, sample_graph, sample_goal, temp_storage):
"""Test that cron expression timer creates an async task."""
runtime = AgentRuntime(
graph=sample_graph,
goal=sample_goal,
storage_path=temp_storage,
)
entry_spec = EntryPointSpec(
id="timer-cron",
name="Cron Timer",
entry_node="process-webhook",
trigger_type="timer",
trigger_config={"cron": "*/5 * * * *"}, # Every 5 minutes
)
runtime.register_entry_point(entry_spec)
await runtime.start()
try:
assert len(runtime._timer_tasks) == 1
assert not runtime._timer_tasks[0].done()
# Give the async task a moment to set next_fire
await asyncio.sleep(0.05)
assert "timer-cron" in runtime._timer_next_fire
finally:
await runtime.stop()
@pytest.mark.asyncio
async def test_invalid_cron_expression_skipped(
self, sample_graph, sample_goal, temp_storage, caplog
):
"""Test that an invalid cron expression logs a warning and skips."""
runtime = AgentRuntime(
graph=sample_graph,
goal=sample_goal,
storage_path=temp_storage,
)
entry_spec = EntryPointSpec(
id="timer-bad-cron",
name="Bad Cron Timer",
entry_node="process-webhook",
trigger_type="timer",
trigger_config={"cron": "not a cron expression"},
)
runtime.register_entry_point(entry_spec)
await runtime.start()
try:
assert len(runtime._timer_tasks) == 0
assert "invalid cron" in caplog.text.lower() or "Invalid cron" in caplog.text
finally:
await runtime.stop()
@pytest.mark.asyncio
async def test_cron_takes_priority_over_interval(
self, sample_graph, sample_goal, temp_storage, caplog
):
"""Test that when both cron and interval_minutes are set, cron wins."""
import logging
runtime = AgentRuntime(
graph=sample_graph,
goal=sample_goal,
storage_path=temp_storage,
)
entry_spec = EntryPointSpec(
id="timer-both",
name="Both Timer",
entry_node="process-webhook",
trigger_type="timer",
trigger_config={"cron": "0 9 * * *", "interval_minutes": 30},
)
runtime.register_entry_point(entry_spec)
with caplog.at_level(logging.INFO):
await runtime.start()
try:
assert len(runtime._timer_tasks) == 1
# Should log cron, not interval
assert any("cron" in r.message.lower() for r in caplog.records)
finally:
await runtime.stop()
@pytest.mark.asyncio
async def test_no_interval_or_cron_warns(self, sample_graph, sample_goal, temp_storage, caplog):
"""Test that timer with neither cron nor interval_minutes logs a warning."""
runtime = AgentRuntime(
graph=sample_graph,
goal=sample_goal,
storage_path=temp_storage,
)
entry_spec = EntryPointSpec(
id="timer-empty",
name="Empty Timer",
entry_node="process-webhook",
trigger_type="timer",
trigger_config={},
)
runtime.register_entry_point(entry_spec)
await runtime.start()
try:
assert len(runtime._timer_tasks) == 0
assert "no 'cron' or valid 'interval_minutes'" in caplog.text
finally:
await runtime.stop()
@pytest.mark.asyncio
async def test_cron_immediate_fires_first(self, sample_graph, sample_goal, temp_storage):
"""Test that run_immediately=True with cron doesn't set next_fire before first run."""
runtime = AgentRuntime(
graph=sample_graph,
goal=sample_goal,
storage_path=temp_storage,
)
entry_spec = EntryPointSpec(
id="timer-cron-immediate",
name="Cron Immediate",
entry_node="process-webhook",
trigger_type="timer",
trigger_config={"cron": "0 0 * * *", "run_immediately": True},
)
runtime.register_entry_point(entry_spec)
await runtime.start()
try:
assert len(runtime._timer_tasks) == 1
# With run_immediately, the task enters the while loop directly,
# so _timer_next_fire is NOT set before the first trigger attempt
# (it pops it at the top of the loop)
# Give it a moment to start executing
await asyncio.sleep(0.05)
# Task should still be running (it will try to trigger and likely fail
# since there's no LLM, but the task itself continues)
assert not runtime._timer_tasks[0].done()
finally:
await runtime.stop()
# === Cancel All Tasks Tests ===
class TestCancelAllTasks:
"""Tests for cancel_all_tasks and cancel_all_tasks_async."""
@pytest.mark.asyncio
async def test_cancel_all_tasks_async_returns_false_when_no_tasks(
self, sample_graph, sample_goal, temp_storage
):
"""Test that cancel_all_tasks_async returns False with no running tasks."""
runtime = AgentRuntime(
graph=sample_graph,
goal=sample_goal,
storage_path=temp_storage,
)
entry_spec = EntryPointSpec(
id="webhook",
name="Webhook",
entry_node="process-webhook",
trigger_type="webhook",
)
runtime.register_entry_point(entry_spec)
await runtime.start()
try:
result = await runtime.cancel_all_tasks_async()
assert result is False
finally:
await runtime.stop()
@pytest.mark.asyncio
async def test_cancel_all_tasks_async_cancels_running_task(
self, sample_graph, sample_goal, temp_storage
):
"""Test that cancel_all_tasks_async cancels a running task and returns True."""
runtime = AgentRuntime(
graph=sample_graph,
goal=sample_goal,
storage_path=temp_storage,
)
entry_spec = EntryPointSpec(
id="webhook",
name="Webhook",
entry_node="process-webhook",
trigger_type="webhook",
)
runtime.register_entry_point(entry_spec)
await runtime.start()
try:
# Inject a fake running task into the stream
stream = runtime._streams["webhook"]
async def hang_forever():
await asyncio.get_event_loop().create_future()
fake_task = asyncio.ensure_future(hang_forever())
stream._execution_tasks["fake-exec"] = fake_task
result = await runtime.cancel_all_tasks_async()
assert result is True
# Let the CancelledError propagate
try:
await fake_task
except asyncio.CancelledError:
pass
assert fake_task.cancelled()
# Clean up
del stream._execution_tasks["fake-exec"]
finally:
await runtime.stop()
@pytest.mark.asyncio
async def test_cancel_all_tasks_async_cancels_multiple_tasks_across_streams(
self, sample_graph, sample_goal, temp_storage
):
"""Test that cancel_all_tasks_async cancels tasks across multiple streams."""
runtime = AgentRuntime(
graph=sample_graph,
goal=sample_goal,
storage_path=temp_storage,
)
# Register two entry points so we get two streams
runtime.register_entry_point(
EntryPointSpec(
id="stream-a",
name="Stream A",
entry_node="process-webhook",
trigger_type="webhook",
)
)
runtime.register_entry_point(
EntryPointSpec(
id="stream-b",
name="Stream B",
entry_node="process-webhook",
trigger_type="webhook",
)
)
await runtime.start()
try:
async def hang_forever():
await asyncio.get_event_loop().create_future()
stream_a = runtime._streams["stream-a"]
stream_b = runtime._streams["stream-b"]
# Two tasks in stream A, one task in stream B
task_a1 = asyncio.ensure_future(hang_forever())
task_a2 = asyncio.ensure_future(hang_forever())
task_b1 = asyncio.ensure_future(hang_forever())
stream_a._execution_tasks["exec-a1"] = task_a1
stream_a._execution_tasks["exec-a2"] = task_a2
stream_b._execution_tasks["exec-b1"] = task_b1
result = await runtime.cancel_all_tasks_async()
assert result is True
# Let CancelledErrors propagate
for task in [task_a1, task_a2, task_b1]:
try:
await task
except asyncio.CancelledError:
pass
assert task.cancelled()
# Clean up
del stream_a._execution_tasks["exec-a1"]
del stream_a._execution_tasks["exec-a2"]
del stream_b._execution_tasks["exec-b1"]
finally:
await runtime.stop()
if __name__ == "__main__":
pytest.main([__file__, "-v"])
@@ -1,268 +0,0 @@
"""Tests for webhook idempotency key support in AgentRuntime.trigger()."""
import asyncio
import time
from collections import OrderedDict
from unittest.mock import AsyncMock, MagicMock
import pytest
from framework.runtime.agent_runtime import AgentRuntime, AgentRuntimeConfig
def _make_runtime(ttl=300.0, max_keys=10000):
"""Create a minimal AgentRuntime with idempotency cache attributes.
Uses ``object.__new__`` to skip ``__init__`` and its heavy dependencies
(storage, LLM, skills) we only need the cache and config for these tests.
"""
runtime = object.__new__(AgentRuntime)
runtime._config = AgentRuntimeConfig(idempotency_ttl_seconds=ttl, idempotency_max_keys=max_keys)
runtime._running = True
runtime._lock = asyncio.Lock()
runtime._idempotency_keys = OrderedDict()
runtime._idempotency_times = {}
runtime._graphs = {}
runtime._active_graph_id = "primary"
runtime._graph_id = "primary"
runtime._streams = {}
runtime._entry_points = {}
return runtime
def _make_runtime_with_stream(ttl=300.0, max_keys=10000):
"""Create a mock runtime whose stream.execute() returns unique IDs."""
runtime = _make_runtime(ttl=ttl, max_keys=max_keys)
call_count = 0
async def _fake_execute(*args, **kwargs):
nonlocal call_count
call_count += 1
return f"session-{call_count:04d}"
stream = MagicMock()
stream.execute = _fake_execute
runtime._streams = {"webhook": stream}
runtime._entry_points = {"webhook": MagicMock()}
return runtime
class TestIdempotencyConfig:
"""Verify idempotency configuration defaults."""
def test_default_ttl(self):
config = AgentRuntimeConfig()
assert config.idempotency_ttl_seconds == 300.0
def test_default_max_keys(self):
config = AgentRuntimeConfig()
assert config.idempotency_max_keys == 10000
def test_custom_config(self):
config = AgentRuntimeConfig(idempotency_ttl_seconds=60.0, idempotency_max_keys=100)
assert config.idempotency_ttl_seconds == 60.0
assert config.idempotency_max_keys == 100
class TestIdempotencyCache:
"""Test the idempotency cache and pruning logic directly."""
def test_cache_stores_and_retrieves_key(self):
runtime = _make_runtime()
runtime._idempotency_keys["stripe-evt-123"] = "exec-001"
runtime._idempotency_times["stripe-evt-123"] = time.time()
assert runtime._idempotency_keys.get("stripe-evt-123") == "exec-001"
def test_cache_returns_none_for_unknown_key(self):
runtime = _make_runtime()
assert runtime._idempotency_keys.get("unknown") is None
def test_prune_removes_expired_keys(self):
runtime = _make_runtime(ttl=0.1)
runtime._idempotency_keys["old-key"] = "exec-old"
runtime._idempotency_times["old-key"] = time.time() - 1.0 # expired
runtime._prune_idempotency_keys()
assert "old-key" not in runtime._idempotency_keys
assert "old-key" not in runtime._idempotency_times
def test_prune_keeps_fresh_keys(self):
runtime = _make_runtime(ttl=300.0)
runtime._idempotency_keys["fresh-key"] = "exec-fresh"
runtime._idempotency_times["fresh-key"] = time.time()
runtime._prune_idempotency_keys()
assert "fresh-key" in runtime._idempotency_keys
def test_prune_respects_max_keys(self):
runtime = _make_runtime(max_keys=2)
for i in range(3):
key = f"key-{i}"
runtime._idempotency_keys[key] = f"exec-{i}"
runtime._idempotency_times[key] = time.time()
runtime._prune_idempotency_keys()
assert len(runtime._idempotency_keys) == 2
# Oldest (key-0) should be evicted
assert "key-0" not in runtime._idempotency_keys
assert "key-1" in runtime._idempotency_keys
assert "key-2" in runtime._idempotency_keys
def test_prune_evicts_fifo(self):
runtime = _make_runtime(max_keys=1)
runtime._idempotency_keys["first"] = "exec-1"
runtime._idempotency_times["first"] = time.time()
runtime._idempotency_keys["second"] = "exec-2"
runtime._idempotency_times["second"] = time.time()
runtime._prune_idempotency_keys()
assert len(runtime._idempotency_keys) == 1
assert "second" in runtime._idempotency_keys
assert "first" not in runtime._idempotency_keys
def test_mixed_expired_and_max_size(self):
runtime = _make_runtime(ttl=0.1, max_keys=2)
# Add expired key
runtime._idempotency_keys["expired"] = "exec-e"
runtime._idempotency_times["expired"] = time.time() - 1.0
# Add fresh keys
runtime._idempotency_keys["fresh-1"] = "exec-f1"
runtime._idempotency_times["fresh-1"] = time.time()
runtime._idempotency_keys["fresh-2"] = "exec-f2"
runtime._idempotency_times["fresh-2"] = time.time()
runtime._prune_idempotency_keys()
assert "expired" not in runtime._idempotency_keys
assert "fresh-1" in runtime._idempotency_keys
assert "fresh-2" in runtime._idempotency_keys
class TestTriggerIdempotency:
"""Tests for trigger() idempotency deduplication."""
def test_trigger_accepts_idempotency_key(self):
"""trigger() accepts idempotency_key as a keyword argument."""
import inspect
sig = inspect.signature(AgentRuntime.trigger)
assert "idempotency_key" in sig.parameters
def test_idempotency_key_defaults_to_none(self):
"""idempotency_key defaults to None (backward compatible)."""
import inspect
sig = inspect.signature(AgentRuntime.trigger)
assert sig.parameters["idempotency_key"].default is None
def test_trigger_and_wait_accepts_idempotency_key(self):
"""trigger_and_wait() also accepts idempotency_key."""
import inspect
sig = inspect.signature(AgentRuntime.trigger_and_wait)
assert "idempotency_key" in sig.parameters
def test_trigger_and_wait_idempotency_key_defaults_to_none(self):
"""trigger_and_wait() idempotency_key defaults to None."""
import inspect
sig = inspect.signature(AgentRuntime.trigger_and_wait)
assert sig.parameters["idempotency_key"].default is None
@pytest.mark.asyncio
async def test_duplicate_key_returns_cached_id(self):
"""Same idempotency key within TTL returns the cached execution ID."""
runtime = _make_runtime_with_stream()
first = await runtime.trigger("webhook", {}, idempotency_key="stripe-evt-001")
second = await runtime.trigger("webhook", {}, idempotency_key="stripe-evt-001")
assert first == second
assert first == "session-0001"
@pytest.mark.asyncio
async def test_different_keys_produce_different_ids(self):
"""Different idempotency keys start separate executions."""
runtime = _make_runtime_with_stream()
id_a = await runtime.trigger("webhook", {}, idempotency_key="evt-aaa")
id_b = await runtime.trigger("webhook", {}, idempotency_key="evt-bbb")
assert id_a != id_b
assert id_a == "session-0001"
assert id_b == "session-0002"
@pytest.mark.asyncio
async def test_none_key_always_starts_new_execution(self):
"""key=None (default) skips dedup — every call starts fresh."""
runtime = _make_runtime_with_stream()
id_1 = await runtime.trigger("webhook", {})
id_2 = await runtime.trigger("webhook", {})
assert id_1 != id_2
assert len(runtime._idempotency_keys) == 0 # nothing cached
@pytest.mark.asyncio
async def test_expired_key_allows_new_execution(self):
"""After TTL expires, the same key starts a new execution."""
runtime = _make_runtime_with_stream(ttl=0.1)
first = await runtime.trigger("webhook", {}, idempotency_key="evt-expire")
# Backdate the cached timestamp so the key looks expired
runtime._idempotency_times["evt-expire"] = time.time() - 1.0
second = await runtime.trigger("webhook", {}, idempotency_key="evt-expire")
assert first != second
assert first == "session-0001"
assert second == "session-0002"
@pytest.mark.asyncio
async def test_stream_not_found_does_not_cache(self):
"""If entry point doesn't exist, nothing is cached."""
runtime = _make_runtime_with_stream()
with pytest.raises(ValueError, match="not found"):
await runtime.trigger("nonexistent", {}, idempotency_key="evt-orphan")
assert "evt-orphan" not in runtime._idempotency_keys
@pytest.mark.asyncio
async def test_execute_error_does_not_cache(self):
"""If stream.execute() raises, nothing is cached so retries can go through."""
runtime = _make_runtime()
failing_stream = MagicMock()
failing_stream.execute = AsyncMock(side_effect=RuntimeError("stream not running"))
runtime._streams = {"webhook": failing_stream}
runtime._entry_points = {"webhook": MagicMock()}
with pytest.raises(RuntimeError, match="stream not running"):
await runtime.trigger("webhook", {}, idempotency_key="evt-123")
assert "evt-123" not in runtime._idempotency_keys
@pytest.mark.asyncio
async def test_cache_holds_real_execution_id(self):
"""Cached value matches the actual execution ID from execute()."""
runtime = _make_runtime_with_stream()
exec_id = await runtime.trigger("webhook", {}, idempotency_key="evt-real")
cached = runtime._idempotency_keys.get("evt-real")
assert cached == exec_id
assert cached == "session-0001"
@@ -1,29 +0,0 @@
"""Tests for custom session-backed runtime logging paths."""
from pathlib import Path
from unittest.mock import MagicMock
from framework.graph.executor import GraphExecutor
from framework.runtime.runtime_log_store import RuntimeLogStore
from framework.runtime.runtime_logger import RuntimeLogger
def test_graph_executor_uses_custom_session_dir_name_for_runtime_logs():
executor = GraphExecutor(
runtime=MagicMock(),
storage_path=Path("/tmp/test-agent/sessions/my-custom-session"),
)
assert executor._get_runtime_log_session_id() == "my-custom-session"
def test_runtime_logger_creates_session_log_dir_for_custom_session_id(tmp_path):
base = tmp_path / ".hive" / "agents" / "test_agent"
base.mkdir(parents=True)
store = RuntimeLogStore(base)
logger = RuntimeLogger(store=store, agent_id="test-agent")
run_id = logger.start_run(goal_id="goal-1", session_id="my-custom-session")
assert run_id == "my-custom-session"
assert (base / "sessions" / "my-custom-session" / "logs").is_dir()
@@ -1,716 +0,0 @@
"""
Tests for WebhookServer and event-driven entry points.
"""
import asyncio
import hashlib
import hmac as hmac_mod
import json
import tempfile
from pathlib import Path
from unittest.mock import patch
import aiohttp
import pytest
from framework.runtime.agent_runtime import AgentRuntime, AgentRuntimeConfig
from framework.runtime.event_bus import AgentEvent, EventBus, EventType
from framework.runtime.execution_stream import EntryPointSpec
from framework.runtime.webhook_server import (
WebhookRoute,
WebhookServer,
WebhookServerConfig,
)
def _make_server(event_bus: EventBus, routes: list[WebhookRoute] | None = None):
"""Helper to create a WebhookServer with port=0 for OS-assigned port."""
config = WebhookServerConfig(host="127.0.0.1", port=0)
server = WebhookServer(event_bus, config)
for route in routes or []:
server.add_route(route)
return server
def _base_url(server: WebhookServer) -> str:
"""Get the base URL for a running server."""
return f"http://127.0.0.1:{server.port}"
class TestWebhookServerLifecycle:
"""Tests for server start/stop."""
@pytest.mark.asyncio
async def test_start_stop(self):
bus = EventBus()
server = _make_server(
bus,
[
WebhookRoute(source_id="test", path="/webhooks/test", methods=["POST"]),
],
)
await server.start()
assert server.is_running
assert server.port is not None
await server.stop()
assert not server.is_running
assert server.port is None
@pytest.mark.asyncio
async def test_no_routes_skips_start(self):
bus = EventBus()
server = _make_server(bus) # no routes
await server.start()
assert not server.is_running
@pytest.mark.asyncio
async def test_stop_when_not_started(self):
bus = EventBus()
server = _make_server(bus)
# Should be a no-op, not raise
await server.stop()
assert not server.is_running
class TestWebhookEventPublishing:
"""Tests for HTTP request -> EventBus event publishing."""
@pytest.mark.asyncio
async def test_post_publishes_webhook_received(self):
bus = EventBus()
received = []
async def handler(event):
received.append(event)
bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
server = _make_server(
bus,
[
WebhookRoute(source_id="gh", path="/webhooks/github", methods=["POST"]),
],
)
await server.start()
try:
async with aiohttp.ClientSession() as session:
async with session.post(
f"{_base_url(server)}/webhooks/github",
json={"action": "opened", "number": 42},
) as resp:
assert resp.status == 202
body = await resp.json()
assert body["status"] == "accepted"
# Give event bus time to dispatch
await asyncio.sleep(0.05)
assert len(received) == 1
event = received[0]
assert event.type == EventType.WEBHOOK_RECEIVED
assert event.stream_id == "gh"
assert event.data["path"] == "/webhooks/github"
assert event.data["method"] == "POST"
assert event.data["payload"] == {"action": "opened", "number": 42}
assert isinstance(event.data["headers"], dict)
assert event.data["query_params"] == {}
finally:
await server.stop()
@pytest.mark.asyncio
async def test_query_params_included(self):
bus = EventBus()
received = []
async def handler(event):
received.append(event)
bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
server = _make_server(
bus,
[
WebhookRoute(source_id="hook", path="/webhooks/hook", methods=["POST"]),
],
)
await server.start()
try:
async with aiohttp.ClientSession() as session:
async with session.post(
f"{_base_url(server)}/webhooks/hook?source=test&v=2",
json={"data": "hello"},
) as resp:
assert resp.status == 202
await asyncio.sleep(0.05)
assert len(received) == 1
assert received[0].data["query_params"] == {"source": "test", "v": "2"}
finally:
await server.stop()
@pytest.mark.asyncio
async def test_non_json_body(self):
bus = EventBus()
received = []
async def handler(event):
received.append(event)
bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
server = _make_server(
bus,
[
WebhookRoute(source_id="raw", path="/webhooks/raw", methods=["POST"]),
],
)
await server.start()
try:
async with aiohttp.ClientSession() as session:
async with session.post(
f"{_base_url(server)}/webhooks/raw",
data=b"plain text body",
headers={"Content-Type": "text/plain"},
) as resp:
assert resp.status == 202
await asyncio.sleep(0.05)
assert len(received) == 1
assert received[0].data["payload"] == {"raw_body": "plain text body"}
finally:
await server.stop()
@pytest.mark.asyncio
async def test_empty_body(self):
bus = EventBus()
received = []
async def handler(event):
received.append(event)
bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
server = _make_server(
bus,
[
WebhookRoute(source_id="empty", path="/webhooks/empty", methods=["POST"]),
],
)
await server.start()
try:
async with aiohttp.ClientSession() as session:
async with session.post(f"{_base_url(server)}/webhooks/empty") as resp:
assert resp.status == 202
await asyncio.sleep(0.05)
assert len(received) == 1
assert received[0].data["payload"] == {}
finally:
await server.stop()
@pytest.mark.asyncio
async def test_multiple_routes(self):
bus = EventBus()
received = []
async def handler(event):
received.append(event)
bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
server = _make_server(
bus,
[
WebhookRoute(source_id="a", path="/webhooks/a", methods=["POST"]),
WebhookRoute(source_id="b", path="/webhooks/b", methods=["POST"]),
],
)
await server.start()
try:
async with aiohttp.ClientSession() as session:
async with session.post(
f"{_base_url(server)}/webhooks/a", json={"from": "a"}
) as resp:
assert resp.status == 202
async with session.post(
f"{_base_url(server)}/webhooks/b", json={"from": "b"}
) as resp:
assert resp.status == 202
await asyncio.sleep(0.05)
assert len(received) == 2
stream_ids = {e.stream_id for e in received}
assert stream_ids == {"a", "b"}
finally:
await server.stop()
@pytest.mark.asyncio
async def test_filter_stream_subscription(self):
"""Subscribers can filter by stream_id (source_id)."""
bus = EventBus()
a_events = []
b_events = []
async def handle_a(event):
a_events.append(event)
async def handle_b(event):
b_events.append(event)
bus.subscribe([EventType.WEBHOOK_RECEIVED], handle_a, filter_stream="a")
bus.subscribe([EventType.WEBHOOK_RECEIVED], handle_b, filter_stream="b")
server = _make_server(
bus,
[
WebhookRoute(source_id="a", path="/webhooks/a", methods=["POST"]),
WebhookRoute(source_id="b", path="/webhooks/b", methods=["POST"]),
],
)
await server.start()
try:
async with aiohttp.ClientSession() as session:
await session.post(f"{_base_url(server)}/webhooks/a", json={"x": 1})
await session.post(f"{_base_url(server)}/webhooks/b", json={"x": 2})
await asyncio.sleep(0.05)
assert len(a_events) == 1
assert a_events[0].data["payload"] == {"x": 1}
assert len(b_events) == 1
assert b_events[0].data["payload"] == {"x": 2}
finally:
await server.stop()
class TestHMACVerification:
"""Tests for HMAC-SHA256 signature verification."""
@pytest.mark.asyncio
async def test_valid_signature_accepted(self):
bus = EventBus()
received = []
async def handler(event):
received.append(event)
bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
secret = "test-secret-key"
server = _make_server(
bus,
[
WebhookRoute(
source_id="secure",
path="/webhooks/secure",
methods=["POST"],
secret=secret,
),
],
)
await server.start()
try:
body = json.dumps({"event": "push"}).encode()
sig = hmac_mod.new(secret.encode(), body, hashlib.sha256).hexdigest()
async with aiohttp.ClientSession() as session:
async with session.post(
f"{_base_url(server)}/webhooks/secure",
data=body,
headers={
"Content-Type": "application/json",
"X-Hub-Signature-256": f"sha256={sig}",
},
) as resp:
assert resp.status == 202
await asyncio.sleep(0.05)
assert len(received) == 1
finally:
await server.stop()
@pytest.mark.asyncio
async def test_invalid_signature_rejected(self):
bus = EventBus()
received = []
async def handler(event):
received.append(event)
bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
server = _make_server(
bus,
[
WebhookRoute(
source_id="secure",
path="/webhooks/secure",
methods=["POST"],
secret="real-secret",
),
],
)
await server.start()
try:
async with aiohttp.ClientSession() as session:
async with session.post(
f"{_base_url(server)}/webhooks/secure",
json={"event": "push"},
headers={"X-Hub-Signature-256": "sha256=invalidsignature"},
) as resp:
assert resp.status == 401
await asyncio.sleep(0.05)
assert len(received) == 0 # No event published
finally:
await server.stop()
@pytest.mark.asyncio
async def test_missing_signature_rejected(self):
bus = EventBus()
received = []
async def handler(event):
received.append(event)
bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
server = _make_server(
bus,
[
WebhookRoute(
source_id="secure",
path="/webhooks/secure",
methods=["POST"],
secret="my-secret",
),
],
)
await server.start()
try:
async with aiohttp.ClientSession() as session:
# No X-Hub-Signature-256 header
async with session.post(
f"{_base_url(server)}/webhooks/secure",
json={"event": "push"},
) as resp:
assert resp.status == 401
await asyncio.sleep(0.05)
assert len(received) == 0
finally:
await server.stop()
@pytest.mark.asyncio
async def test_no_secret_skips_verification(self):
"""Routes without a secret accept any request."""
bus = EventBus()
received = []
async def handler(event):
received.append(event)
bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
server = _make_server(
bus,
[
WebhookRoute(
source_id="open",
path="/webhooks/open",
methods=["POST"],
secret=None,
),
],
)
await server.start()
try:
async with aiohttp.ClientSession() as session:
async with session.post(
f"{_base_url(server)}/webhooks/open",
json={"data": "test"},
) as resp:
assert resp.status == 202
await asyncio.sleep(0.05)
assert len(received) == 1
finally:
await server.stop()
class TestEventDrivenEntryPoints:
"""Tests for event-driven entry points wired through AgentRuntime."""
def _make_graph_and_goal(self):
"""Minimal graph + goal for testing entry point triggering."""
from framework.graph import Goal
from framework.graph.edge import GraphSpec
from framework.graph.goal import SuccessCriterion
from framework.graph.node import NodeSpec
nodes = [
NodeSpec(
id="process-event",
name="Process Event",
description="Process incoming event",
node_type="event_loop",
input_keys=["event"],
output_keys=["result"],
),
]
graph = GraphSpec(
id="test-graph",
goal_id="test-goal",
version="1.0.0",
entry_node="process-event",
entry_points={"start": "process-event"},
terminal_nodes=[],
pause_nodes=[],
nodes=nodes,
edges=[],
)
goal = Goal(
id="test-goal",
name="Test Goal",
description="Test",
success_criteria=[
SuccessCriterion(
id="sc-1",
description="Done",
metric="done",
target="yes",
weight=1.0,
),
],
)
return graph, goal
@pytest.mark.asyncio
async def test_event_entry_point_subscribes_to_bus(self):
"""Entry point with trigger_type='event' subscribes and triggers on matching events."""
graph, goal = self._make_graph_and_goal()
config = AgentRuntimeConfig(
webhook_host="127.0.0.1",
webhook_port=0,
webhook_routes=[
{"source_id": "gh", "path": "/webhooks/github"},
],
)
with tempfile.TemporaryDirectory() as tmpdir:
runtime = AgentRuntime(
graph=graph,
goal=goal,
storage_path=Path(tmpdir),
config=config,
)
runtime.register_entry_point(
EntryPointSpec(
id="gh-handler",
name="GitHub Handler",
entry_node="process-event",
trigger_type="event",
trigger_config={
"event_types": ["webhook_received"],
"filter_stream": "gh",
},
)
)
trigger_calls = []
async def mock_trigger(ep_id, data, **kwargs):
trigger_calls.append((ep_id, data))
with patch.object(runtime, "trigger", side_effect=mock_trigger):
await runtime.start()
try:
assert runtime.webhook_server is not None
assert runtime.webhook_server.is_running
port = runtime.webhook_server.port
async with aiohttp.ClientSession() as session:
async with session.post(
f"http://127.0.0.1:{port}/webhooks/github",
json={"action": "push", "ref": "main"},
) as resp:
assert resp.status == 202
await asyncio.sleep(0.1)
assert len(trigger_calls) == 1
ep_id, data = trigger_calls[0]
assert ep_id == "gh-handler"
assert "event" in data
assert data["event"]["type"] == "webhook_received"
assert data["event"]["stream_id"] == "gh"
assert data["event"]["data"]["payload"] == {
"action": "push",
"ref": "main",
}
finally:
await runtime.stop()
assert runtime.webhook_server is None
@pytest.mark.asyncio
async def test_event_entry_point_filter_stream(self):
"""Entry point only triggers for matching stream_id (source_id)."""
graph, goal = self._make_graph_and_goal()
config = AgentRuntimeConfig(
webhook_routes=[
{"source_id": "github", "path": "/webhooks/github"},
{"source_id": "stripe", "path": "/webhooks/stripe"},
],
webhook_port=0,
)
with tempfile.TemporaryDirectory() as tmpdir:
runtime = AgentRuntime(
graph=graph,
goal=goal,
storage_path=Path(tmpdir),
config=config,
)
runtime.register_entry_point(
EntryPointSpec(
id="gh-only",
name="GitHub Only",
entry_node="process-event",
trigger_type="event",
trigger_config={
"event_types": ["webhook_received"],
"filter_stream": "github",
},
)
)
trigger_calls = []
async def mock_trigger(ep_id, data, **kwargs):
trigger_calls.append((ep_id, data))
with patch.object(runtime, "trigger", side_effect=mock_trigger):
await runtime.start()
try:
port = runtime.webhook_server.port
async with aiohttp.ClientSession() as session:
# POST to stripe — should NOT trigger
await session.post(
f"http://127.0.0.1:{port}/webhooks/stripe",
json={"type": "payment"},
)
# POST to github — should trigger
await session.post(
f"http://127.0.0.1:{port}/webhooks/github",
json={"action": "opened"},
)
await asyncio.sleep(0.1)
assert len(trigger_calls) == 1
assert trigger_calls[0][0] == "gh-only"
finally:
await runtime.stop()
@pytest.mark.asyncio
async def test_no_webhook_routes_skips_server(self):
"""Runtime without webhook_routes does not start a webhook server."""
graph, goal = self._make_graph_and_goal()
with tempfile.TemporaryDirectory() as tmpdir:
runtime = AgentRuntime(
graph=graph,
goal=goal,
storage_path=Path(tmpdir),
)
runtime.register_entry_point(
EntryPointSpec(
id="manual",
name="Manual",
entry_node="process-event",
trigger_type="manual",
)
)
await runtime.start()
try:
assert runtime.webhook_server is None
finally:
await runtime.stop()
@pytest.mark.asyncio
async def test_event_entry_point_custom_event(self):
"""Entry point can subscribe to CUSTOM events, not just webhooks."""
graph, goal = self._make_graph_and_goal()
with tempfile.TemporaryDirectory() as tmpdir:
runtime = AgentRuntime(
graph=graph,
goal=goal,
storage_path=Path(tmpdir),
)
runtime.register_entry_point(
EntryPointSpec(
id="custom-handler",
name="Custom Handler",
entry_node="process-event",
trigger_type="event",
trigger_config={
"event_types": ["custom"],
},
)
)
trigger_calls = []
async def mock_trigger(ep_id, data, **kwargs):
trigger_calls.append((ep_id, data))
with patch.object(runtime, "trigger", side_effect=mock_trigger):
await runtime.start()
try:
await runtime.event_bus.publish(
AgentEvent(
type=EventType.CUSTOM,
stream_id="some-source",
data={"key": "value"},
)
)
await asyncio.sleep(0.1)
assert len(trigger_calls) == 1
assert trigger_calls[0][0] == "custom-handler"
assert trigger_calls[0][1]["event"]["type"] == "custom"
assert trigger_calls[0][1]["event"]["data"]["key"] == "value"
finally:
await runtime.stop()
+192
View File
@@ -0,0 +1,192 @@
"""Declarative agent configuration schema.
Allows defining agents via JSON/YAML config files instead of Python modules.
The ``AgentConfig`` model is the top-level schema loaded from ``agent.json``.
The runner detects this format by checking for a ``name`` key at the top level.
Template variables
------------------
System prompts and identity_prompt support ``{{variable_name}}`` placeholders.
These are resolved at load time from ``AgentConfig.variables``.
"""
from __future__ import annotations
from pydantic import BaseModel, Field
class ToolAccessConfig(BaseModel):
"""Declarative tool access policy.
Controls which tools a node/agent has access to.
* ``all`` -- every tool from the registry.
* ``explicit`` -- only tools listed in ``allowed`` (default; empty = zero tools).
* ``none`` -- no tools at all.
"""
policy: str = Field(
default="explicit",
description="One of: 'all', 'explicit', 'none'.",
)
allowed: list[str] = Field(
default_factory=list,
description="Tool names when policy='explicit'.",
)
denied: list[str] = Field(
default_factory=list,
description="Tool names to deny (applied after allowed).",
)
class NodeConfig(BaseModel):
"""Declarative node definition."""
id: str
name: str | None = None
description: str | None = None
node_type: str = Field(
default="event_loop",
description="event_loop",
)
system_prompt: str | None = None
tools: ToolAccessConfig = Field(default_factory=ToolAccessConfig)
model: str | None = None
input_keys: list[str] = Field(default_factory=list)
output_keys: list[str] = Field(default_factory=list)
nullable_output_keys: list[str] = Field(default_factory=list)
max_iterations: int = 30
max_node_visits: int = 1
client_facing: bool = False
success_criteria: str | None = None
failure_criteria: str | None = None
skip_judge: bool = False
max_retries: int | None = None
class EdgeConfig(BaseModel):
"""Declarative edge definition."""
from_node: str = Field(description="Source node ID.")
to_node: str = Field(description="Target node ID.")
condition: str = Field(
default="on_success",
description="always | on_success | on_failure | conditional | llm_decide",
)
condition_expr: str | None = None
input_mapping: dict[str, str] = Field(default_factory=dict)
priority: int = 1
class GoalConfig(BaseModel):
"""Simplified goal definition for declarative config."""
description: str
success_criteria: list[str] = Field(default_factory=list)
constraints: list[str] = Field(default_factory=list)
class EntryPointConfig(BaseModel):
"""Entry point configuration."""
id: str = "default"
name: str = "Default"
entry_node: str | None = None # defaults to AgentConfig.entry_node
trigger_type: str = Field(
default="manual",
description="manual | scheduled | timer",
)
trigger_config: dict = Field(default_factory=dict)
isolation_level: str = "shared"
max_concurrent: int | None = None
class MCPServerRef(BaseModel):
"""Reference to an MCP server to connect for this agent."""
name: str
config: dict | None = None
class MetadataConfig(BaseModel):
"""Agent metadata for display / intro messages."""
intro_message: str = ""
class AgentConfig(BaseModel):
"""Top-level declarative agent configuration.
Load from ``agent.json`` and pass to
:func:`framework.runner.runner.load_agent_config` to build the
``GraphSpec`` + ``Goal`` pair.
Example (YAML)::
name: lead-enrichment-agent
version: 1.0.0
variables:
spreadsheet_id: "1ZVx..."
sheet_name: "contacts"
goal:
description: "Enrich leads in Google Sheets"
success_criteria:
- "All unprocessed leads enriched"
constraints:
- "Browser-only research"
identity_prompt: |
You are the Lead Enrichment Agent...
nodes:
- id: start
tools: {policy: explicit, allowed: [google_sheets_get_values]}
system_prompt: |
Spreadsheet ID: {{spreadsheet_id}}
...
"""
name: str
version: str = "1.0.0"
description: str | None = None
metadata: MetadataConfig = Field(default_factory=MetadataConfig)
# Template variables -- substituted into prompts via {{var_name}}
variables: dict[str, str] = Field(default_factory=dict)
# Goal
goal: GoalConfig
# Graph structure
nodes: list[NodeConfig]
edges: list[EdgeConfig]
entry_node: str
terminal_nodes: list[str] = Field(default_factory=list)
pause_nodes: list[str] = Field(default_factory=list)
# Entry points (if omitted, a single "default" manual entry is created)
entry_points: list[EntryPointConfig] = Field(default_factory=list)
# Agent-level tool defaults (nodes inherit unless they override)
tools: ToolAccessConfig = Field(default_factory=ToolAccessConfig)
mcp_servers: list[MCPServerRef] = Field(default_factory=list)
# LLM / execution
model: str | None = None
max_tokens: int = 4096
conversation_mode: str = "continuous"
identity_prompt: str = ""
loop_config: dict = Field(
default_factory=lambda: {
"max_iterations": 100,
"max_tool_calls_per_turn": 30,
"max_context_tokens": 32000,
},
)
# Pipeline overrides (per-agent, merged with global config)
pipeline: dict = Field(
default_factory=dict,
description="Per-agent pipeline stage overrides. Same format as global pipeline config.",
)
# Resource limits
max_cost_per_run: float | None = None
+1 -1
View File
@@ -12,7 +12,7 @@ from typing import TYPE_CHECKING, Any
from pydantic import AliasChoices, BaseModel, Field, computed_field
if TYPE_CHECKING:
from framework.graph.executor import ExecutionResult
from framework.orchestrator.orchestrator import ExecutionResult
from framework.schemas.run import Run
+6 -2
View File
@@ -28,8 +28,11 @@ def _get_allowed_agent_roots() -> tuple[Path, ...]:
"""
global _ALLOWED_AGENT_ROOTS
if _ALLOWED_AGENT_ROOTS is None:
from framework.config import COLONIES_DIR
_ALLOWED_AGENT_ROOTS = (
(_REPO_ROOT / "exports").resolve(),
COLONIES_DIR.resolve(), # ~/.hive/colonies/
(_REPO_ROOT / "exports").resolve(), # compat fallback
(_REPO_ROOT / "examples").resolve(),
(Path.home() / ".hive" / "agents").resolve(),
)
@@ -53,7 +56,8 @@ def validate_agent_path(agent_path: str | Path) -> Path:
if resolved.is_relative_to(root) and resolved != root:
return resolved
raise ValueError(
"agent_path must be inside an allowed directory (exports/, examples/, or ~/.hive/agents/)"
"agent_path must be inside an allowed directory "
"(~/.hive/colonies/, exports/, examples/, or ~/.hive/agents/)"
)

Some files were not shown because too many files have changed in this diff Show More