Merge branch 'feature/hive-experimental-comp-pipeline' into feat/open-hive-colony
This commit is contained in:
@@ -70,6 +70,8 @@ tmp/
|
|||||||
temp/
|
temp/
|
||||||
|
|
||||||
exports/*
|
exports/*
|
||||||
|
exports.old*
|
||||||
|
artifacts/*
|
||||||
|
|
||||||
.claude/settings.local.json
|
.claude/settings.local.json
|
||||||
|
|
||||||
|
|||||||
+17
-65
@@ -1,71 +1,23 @@
|
|||||||
"""
|
"""Hive Agent Framework.
|
||||||
Aden Hive Framework: A goal-driven agent runtime optimized for Builder observability.
|
|
||||||
|
|
||||||
The runtime is designed around DECISIONS, not just actions. Every significant
|
Core classes:
|
||||||
choice the agent makes is captured with:
|
AgentHost -- hosts agents, manages entry points and pipeline
|
||||||
- What it was trying to do (intent)
|
Orchestrator -- routes between nodes in a graph
|
||||||
- What options it considered
|
AgentLoop -- the LLM + tool execution loop (one per node)
|
||||||
- What it chose and why
|
AgentLoader -- loads agent.json from disk, builds pipeline
|
||||||
- What happened as a result
|
DecisionTracker -- records decisions for post-hoc analysis
|
||||||
- Whether that was good or bad (evaluated post-hoc)
|
|
||||||
|
|
||||||
This gives the Builder LLM the information it needs to improve agent behavior.
|
|
||||||
|
|
||||||
## Testing Framework
|
|
||||||
|
|
||||||
The framework includes a Goal-Based Testing system (Goal → Agent → Eval):
|
|
||||||
- Generate tests from Goal success_criteria and constraints
|
|
||||||
- Mandatory user approval before tests are stored
|
|
||||||
- Parallel test execution with error categorization
|
|
||||||
- Debug tools with fix suggestions
|
|
||||||
|
|
||||||
See `framework.testing` for details.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from framework.llm import LLMProvider
|
from framework.agent_loop import AgentLoop
|
||||||
|
from framework.host import AgentHost
|
||||||
try:
|
from framework.loader import AgentLoader
|
||||||
from framework.llm import AnthropicProvider # noqa: F401
|
from framework.orchestrator import Orchestrator
|
||||||
except ImportError:
|
from framework.tracker import DecisionTracker
|
||||||
pass
|
|
||||||
from framework.runner import AgentRunner
|
|
||||||
from framework.runtime.core import Runtime
|
|
||||||
from framework.schemas.decision import Decision, DecisionEvaluation, Option, Outcome
|
|
||||||
from framework.schemas.run import Problem, Run, RunSummary
|
|
||||||
|
|
||||||
# Testing framework
|
|
||||||
from framework.testing import (
|
|
||||||
ApprovalStatus,
|
|
||||||
DebugTool,
|
|
||||||
ErrorCategory,
|
|
||||||
Test,
|
|
||||||
TestResult,
|
|
||||||
TestStorage,
|
|
||||||
TestSuiteResult,
|
|
||||||
)
|
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
# Schemas
|
"AgentHost",
|
||||||
"Decision",
|
"AgentLoader",
|
||||||
"Option",
|
"AgentLoop",
|
||||||
"Outcome",
|
"DecisionTracker",
|
||||||
"DecisionEvaluation",
|
"Orchestrator",
|
||||||
"Run",
|
|
||||||
"RunSummary",
|
|
||||||
"Problem",
|
|
||||||
# Runtime
|
|
||||||
"Runtime",
|
|
||||||
# LLM
|
|
||||||
"LLMProvider",
|
|
||||||
"AnthropicProvider",
|
|
||||||
# Runner
|
|
||||||
"AgentRunner",
|
|
||||||
# Testing
|
|
||||||
"Test",
|
|
||||||
"TestResult",
|
|
||||||
"TestSuiteResult",
|
|
||||||
"TestStorage",
|
|
||||||
"ApprovalStatus",
|
|
||||||
"ErrorCategory",
|
|
||||||
"DebugTool",
|
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -0,0 +1,32 @@
|
|||||||
|
"""Agent loop -- the core agent execution primitive."""
|
||||||
|
|
||||||
|
from framework.agent_loop.conversation import ( # noqa: F401
|
||||||
|
ConversationStore,
|
||||||
|
Message,
|
||||||
|
NodeConversation,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Lazy import to avoid circular dependency with graph/event_loop/
|
||||||
|
# (graph/event_loop/* imports framework.graph.conversation which is a shim
|
||||||
|
# pointing here, which would trigger agent_loop.py loading, which imports
|
||||||
|
# graph/event_loop/* again)
|
||||||
|
|
||||||
|
|
||||||
|
def __getattr__(name: str):
|
||||||
|
if name in ("AgentLoop", "JudgeProtocol", "JudgeVerdict", "LoopConfig", "OutputAccumulator"):
|
||||||
|
from framework.agent_loop.agent_loop import (
|
||||||
|
AgentLoop,
|
||||||
|
JudgeProtocol,
|
||||||
|
JudgeVerdict,
|
||||||
|
LoopConfig,
|
||||||
|
OutputAccumulator,
|
||||||
|
)
|
||||||
|
_exports = {
|
||||||
|
"AgentLoop": AgentLoop,
|
||||||
|
"JudgeProtocol": JudgeProtocol,
|
||||||
|
"JudgeVerdict": JudgeVerdict,
|
||||||
|
"LoopConfig": LoopConfig,
|
||||||
|
"OutputAccumulator": OutputAccumulator,
|
||||||
|
}
|
||||||
|
return _exports[name]
|
||||||
|
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
||||||
+89
-425
@@ -21,16 +21,16 @@ from collections.abc import Awaitable, Callable
|
|||||||
from datetime import UTC, datetime
|
from datetime import UTC, datetime
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from framework.graph.conversation import ConversationStore, NodeConversation
|
from framework.agent_loop.conversation import ConversationStore, NodeConversation
|
||||||
from framework.graph.event_loop import types as event_loop_types
|
from framework.agent_loop.internals import types as event_loop_types
|
||||||
from framework.graph.event_loop.compaction import (
|
from framework.agent_loop.internals.compaction import (
|
||||||
build_emergency_summary,
|
build_emergency_summary,
|
||||||
build_llm_compaction_prompt,
|
build_llm_compaction_prompt,
|
||||||
compact,
|
compact,
|
||||||
format_messages_for_summary,
|
format_messages_for_summary,
|
||||||
llm_compact,
|
llm_compact,
|
||||||
)
|
)
|
||||||
from framework.graph.event_loop.cursor_persistence import (
|
from framework.agent_loop.internals.cursor_persistence import (
|
||||||
RestoredState,
|
RestoredState,
|
||||||
check_pause,
|
check_pause,
|
||||||
drain_injection_queue,
|
drain_injection_queue,
|
||||||
@@ -38,7 +38,7 @@ from framework.graph.event_loop.cursor_persistence import (
|
|||||||
restore,
|
restore,
|
||||||
write_cursor,
|
write_cursor,
|
||||||
)
|
)
|
||||||
from framework.graph.event_loop.event_publishing import (
|
from framework.agent_loop.internals.event_publishing import (
|
||||||
generate_action_plan,
|
generate_action_plan,
|
||||||
log_skip_judge,
|
log_skip_judge,
|
||||||
publish_context_usage,
|
publish_context_usage,
|
||||||
@@ -54,27 +54,24 @@ from framework.graph.event_loop.event_publishing import (
|
|||||||
publish_tool_started,
|
publish_tool_started,
|
||||||
run_hooks,
|
run_hooks,
|
||||||
)
|
)
|
||||||
from framework.graph.event_loop.judge_pipeline import (
|
from framework.agent_loop.internals.judge_pipeline import (
|
||||||
SubagentJudge as SharedSubagentJudge,
|
SubagentJudge as SharedSubagentJudge,
|
||||||
judge_turn,
|
judge_turn,
|
||||||
)
|
)
|
||||||
from framework.graph.event_loop.stall_detector import (
|
from framework.agent_loop.internals.stall_detector import (
|
||||||
fingerprint_tool_calls,
|
fingerprint_tool_calls,
|
||||||
is_stalled,
|
is_stalled,
|
||||||
is_tool_doom_loop,
|
is_tool_doom_loop,
|
||||||
ngram_similarity,
|
ngram_similarity,
|
||||||
)
|
)
|
||||||
from framework.graph.event_loop.subagent_executor import execute_subagent
|
from framework.agent_loop.internals.synthetic_tools import (
|
||||||
from framework.graph.event_loop.synthetic_tools import (
|
|
||||||
build_ask_user_multiple_tool,
|
build_ask_user_multiple_tool,
|
||||||
build_ask_user_tool,
|
build_ask_user_tool,
|
||||||
build_delegate_tool,
|
|
||||||
build_escalate_tool,
|
build_escalate_tool,
|
||||||
build_report_to_parent_tool,
|
|
||||||
build_set_output_tool,
|
build_set_output_tool,
|
||||||
handle_set_output,
|
handle_set_output,
|
||||||
)
|
)
|
||||||
from framework.graph.event_loop.tool_result_handler import (
|
from framework.agent_loop.internals.tool_result_handler import (
|
||||||
build_json_preview,
|
build_json_preview,
|
||||||
execute_tool,
|
execute_tool,
|
||||||
extract_json_metadata,
|
extract_json_metadata,
|
||||||
@@ -82,12 +79,12 @@ from framework.graph.event_loop.tool_result_handler import (
|
|||||||
restore_spill_counter,
|
restore_spill_counter,
|
||||||
truncate_tool_result,
|
truncate_tool_result,
|
||||||
)
|
)
|
||||||
from framework.graph.event_loop.types import (
|
from framework.agent_loop.internals.types import (
|
||||||
JudgeProtocol,
|
JudgeProtocol,
|
||||||
JudgeVerdict,
|
JudgeVerdict,
|
||||||
TriggerEvent,
|
TriggerEvent,
|
||||||
)
|
)
|
||||||
from framework.graph.node import NodeContext, NodeProtocol, NodeResult
|
from framework.orchestrator.node import NodeContext, NodeProtocol, NodeResult
|
||||||
from framework.llm.capabilities import supports_image_tool_results
|
from framework.llm.capabilities import supports_image_tool_results
|
||||||
from framework.llm.provider import Tool, ToolResult, ToolUse
|
from framework.llm.provider import Tool, ToolResult, ToolUse
|
||||||
from framework.llm.stream_events import (
|
from framework.llm.stream_events import (
|
||||||
@@ -96,8 +93,8 @@ from framework.llm.stream_events import (
|
|||||||
TextDeltaEvent,
|
TextDeltaEvent,
|
||||||
ToolCallEvent,
|
ToolCallEvent,
|
||||||
)
|
)
|
||||||
from framework.runtime.event_bus import EventBus
|
from framework.host.event_bus import EventBus
|
||||||
from framework.runtime.llm_debug_logger import log_llm_turn
|
from framework.tracker.llm_debug_logger import log_llm_turn
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -163,43 +160,9 @@ def _is_context_too_large_error(exc: BaseException) -> bool:
|
|||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Escalation receiver (temporary routing target for subagent → user input)
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
class _EscalationReceiver:
|
|
||||||
"""Temporary receiver registered in node_registry for subagent escalation routing.
|
|
||||||
|
|
||||||
When a subagent calls ``report_to_parent(wait_for_response=True)``, the callback
|
|
||||||
creates one of these, registers it under a unique escalation ID in the executor's
|
|
||||||
``node_registry``, and awaits ``wait()``. The TUI / runner calls
|
|
||||||
``inject_input(escalation_id, content)`` which the ``ExecutionStream`` routes here
|
|
||||||
via ``inject_event()`` — matching the same ``hasattr(node, "inject_event")`` check
|
|
||||||
used for regular ``EventLoopNode`` instances.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self) -> None:
|
|
||||||
self._event = asyncio.Event()
|
|
||||||
self._response: str | None = None
|
|
||||||
self._awaiting_input = True # So inject_message() can prefer us
|
|
||||||
|
|
||||||
async def inject_event(
|
|
||||||
self,
|
|
||||||
content: str,
|
|
||||||
*,
|
|
||||||
is_client_input: bool = False,
|
|
||||||
image_content: list[dict] | None = None,
|
|
||||||
) -> None:
|
|
||||||
"""Called by ExecutionStream.inject_input() when the user responds."""
|
|
||||||
self._response = content
|
|
||||||
self._event.set()
|
|
||||||
|
|
||||||
async def wait(self) -> str | None:
|
|
||||||
"""Block until inject_event() delivers the user's response."""
|
|
||||||
await self._event.wait()
|
|
||||||
return self._response
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Judge protocol (simple 3-action interface for event loop evaluation)
|
# Judge protocol (simple 3-action interface for event loop evaluation)
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@@ -224,7 +187,7 @@ OutputAccumulator = event_loop_types.OutputAccumulator
|
|||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
class EventLoopNode(NodeProtocol):
|
class AgentLoop(NodeProtocol):
|
||||||
"""Multi-turn LLM streaming loop with tool execution and judge evaluation.
|
"""Multi-turn LLM streaming loop with tool execution and judge evaluation.
|
||||||
|
|
||||||
Lifecycle:
|
Lifecycle:
|
||||||
@@ -284,9 +247,6 @@ class EventLoopNode(NodeProtocol):
|
|||||||
# Monotonic counter for spillover file naming (web_search_1.txt, etc.)
|
# Monotonic counter for spillover file naming (web_search_1.txt, etc.)
|
||||||
self._spill_counter: int = 0
|
self._spill_counter: int = 0
|
||||||
# Subagent mark_complete: when True, _evaluate returns ACCEPT immediately
|
# Subagent mark_complete: when True, _evaluate returns ACCEPT immediately
|
||||||
self._mark_complete_flag = False
|
|
||||||
# Counter for subagent instances (1, 2, 3, ...)
|
|
||||||
self._subagent_instance_counter: dict[str, int] = {}
|
|
||||||
|
|
||||||
def validate_input(self, ctx: NodeContext) -> list[str]:
|
def validate_input(self, ctx: NodeContext) -> list[str]:
|
||||||
"""Validate hard requirements only.
|
"""Validate hard requirements only.
|
||||||
@@ -307,7 +267,7 @@ class EventLoopNode(NodeProtocol):
|
|||||||
async def execute(self, ctx: NodeContext) -> NodeResult:
|
async def execute(self, ctx: NodeContext) -> NodeResult:
|
||||||
"""Run the event loop."""
|
"""Run the event loop."""
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"[EventLoopNode.execute] Starting execution for node=%s, stream=%s",
|
"[AgentLoop.execute] Starting execution for node=%s, stream=%s",
|
||||||
ctx.node_id,
|
ctx.node_id,
|
||||||
ctx.stream_id,
|
ctx.stream_id,
|
||||||
)
|
)
|
||||||
@@ -320,7 +280,7 @@ class EventLoopNode(NodeProtocol):
|
|||||||
# Store skill dirs for AS-9 file-read interception in _execute_tool
|
# Store skill dirs for AS-9 file-read interception in _execute_tool
|
||||||
self._skill_dirs: list[str] = ctx.skill_dirs
|
self._skill_dirs: list[str] = ctx.skill_dirs
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"[EventLoopNode.execute] node_id=%s, execution_id=%s, max_iterations=%d",
|
"[AgentLoop.execute] node_id=%s, execution_id=%s, max_iterations=%d",
|
||||||
node_id,
|
node_id,
|
||||||
execution_id,
|
execution_id,
|
||||||
self._config.max_iterations,
|
self._config.max_iterations,
|
||||||
@@ -402,7 +362,7 @@ class EventLoopNode(NodeProtocol):
|
|||||||
# execution preamble and node-type preamble. The stored
|
# execution preamble and node-type preamble. The stored
|
||||||
# prompt may be stale after code changes or when runtime-
|
# prompt may be stale after code changes or when runtime-
|
||||||
# injected context (e.g. worker identity) has changed.
|
# injected context (e.g. worker identity) has changed.
|
||||||
from framework.graph.prompting import build_system_prompt_for_node_context
|
from framework.orchestrator.prompting import build_system_prompt_for_node_context
|
||||||
|
|
||||||
_current_prompt = build_system_prompt_for_node_context(ctx)
|
_current_prompt = build_system_prompt_for_node_context(ctx)
|
||||||
if conversation.system_prompt != _current_prompt:
|
if conversation.system_prompt != _current_prompt:
|
||||||
@@ -425,7 +385,7 @@ class EventLoopNode(NodeProtocol):
|
|||||||
await self._conversation_store.clear()
|
await self._conversation_store.clear()
|
||||||
|
|
||||||
# Fresh conversation: either isolated mode or first node in continuous mode.
|
# Fresh conversation: either isolated mode or first node in continuous mode.
|
||||||
from framework.graph.prompting import build_system_prompt_for_node_context
|
from framework.orchestrator.prompting import build_system_prompt_for_node_context
|
||||||
|
|
||||||
system_prompt = build_system_prompt_for_node_context(ctx)
|
system_prompt = build_system_prompt_for_node_context(ctx)
|
||||||
|
|
||||||
@@ -484,7 +444,7 @@ class EventLoopNode(NodeProtocol):
|
|||||||
# 2a. Guard: ensure at least one non-system message exists.
|
# 2a. Guard: ensure at least one non-system message exists.
|
||||||
# A restored conversation may have 0 messages if phase_id filtering
|
# A restored conversation may have 0 messages if phase_id filtering
|
||||||
# removes them all, or if a prior run stored metadata without messages
|
# removes them all, or if a prior run stored metadata without messages
|
||||||
# (e.g. subagent that failed before the first LLM call).
|
# (e.g. node that failed before the first LLM call).
|
||||||
if conversation.message_count == 0:
|
if conversation.message_count == 0:
|
||||||
initial_message = self._build_initial_message(ctx)
|
initial_message = self._build_initial_message(ctx)
|
||||||
if initial_message:
|
if initial_message:
|
||||||
@@ -502,37 +462,10 @@ class EventLoopNode(NodeProtocol):
|
|||||||
tools.append(self._build_ask_user_tool())
|
tools.append(self._build_ask_user_tool())
|
||||||
if stream_id == "queen":
|
if stream_id == "queen":
|
||||||
tools.append(self._build_ask_user_multiple_tool())
|
tools.append(self._build_ask_user_multiple_tool())
|
||||||
# Workers/subagents can escalate blockers to the queen.
|
# Workers can escalate blockers to the queen.
|
||||||
if stream_id not in ("queen", "judge"):
|
if stream_id not in ("queen", "judge"):
|
||||||
tools.append(self._build_escalate_tool())
|
tools.append(self._build_escalate_tool())
|
||||||
|
|
||||||
# Add delegate_to_sub_agent tool if:
|
|
||||||
# - Node has sub_agents defined
|
|
||||||
# - We are NOT in subagent mode (prevents nested delegation)
|
|
||||||
if not ctx.is_subagent_mode:
|
|
||||||
sub_agents = getattr(ctx.node_spec, "sub_agents", None) or []
|
|
||||||
if sub_agents:
|
|
||||||
delegate_tool = self._build_delegate_tool(sub_agents, ctx.node_registry)
|
|
||||||
if delegate_tool:
|
|
||||||
tools.append(delegate_tool)
|
|
||||||
logger.info(
|
|
||||||
"[%s] delegate_to_sub_agent injected (sub_agents=%s)",
|
|
||||||
node_id,
|
|
||||||
sub_agents,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
logger.error(
|
|
||||||
"[%s] _build_delegate_tool returned None for sub_agents=%s",
|
|
||||||
node_id,
|
|
||||||
sub_agents,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
logger.debug("[%s] Skipped delegate tool (is_subagent_mode=True)", node_id)
|
|
||||||
|
|
||||||
# Add report_to_parent tool for sub-agents with a report callback
|
|
||||||
if ctx.is_subagent_mode and ctx.report_callback is not None:
|
|
||||||
tools.append(self._build_report_to_parent_tool())
|
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"[%s] Tools available (%d): %s | direct_user_io=%s | judge=%s",
|
"[%s] Tools available (%d): %s | direct_user_io=%s | judge=%s",
|
||||||
node_id,
|
node_id,
|
||||||
@@ -565,11 +498,11 @@ class EventLoopNode(NodeProtocol):
|
|||||||
|
|
||||||
# 6. Main loop
|
# 6. Main loop
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"[EventLoopNode.execute] Entering main loop, start_iteration=%d", start_iteration
|
"[AgentLoop.execute] Entering main loop, start_iteration=%d", start_iteration
|
||||||
)
|
)
|
||||||
for iteration in range(start_iteration, self._config.max_iterations):
|
for iteration in range(start_iteration, self._config.max_iterations):
|
||||||
iter_start = time.time()
|
iter_start = time.time()
|
||||||
logger.debug("[EventLoopNode.execute] iteration=%d starting", iteration)
|
logger.debug("[AgentLoop.execute] iteration=%d starting", iteration)
|
||||||
|
|
||||||
# 6a. Check pause (no current-iteration data yet — only log_node_complete needed)
|
# 6a. Check pause (no current-iteration data yet — only log_node_complete needed)
|
||||||
if await self._check_pause(ctx, conversation, iteration):
|
if await self._check_pause(ctx, conversation, iteration):
|
||||||
@@ -601,18 +534,18 @@ class EventLoopNode(NodeProtocol):
|
|||||||
|
|
||||||
# 6b. Drain injection queue
|
# 6b. Drain injection queue
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"[EventLoopNode.execute] iteration=%d: draining injection queue...", iteration
|
"[AgentLoop.execute] iteration=%d: draining injection queue...", iteration
|
||||||
)
|
)
|
||||||
drained_injections = await self._drain_injection_queue(conversation, ctx)
|
drained_injections = await self._drain_injection_queue(conversation, ctx)
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"[EventLoopNode.execute] iteration=%d: drained %d injections",
|
"[AgentLoop.execute] iteration=%d: drained %d injections",
|
||||||
iteration,
|
iteration,
|
||||||
drained_injections,
|
drained_injections,
|
||||||
)
|
)
|
||||||
# 6b1. Drain trigger queue (framework-level signals)
|
# 6b1. Drain trigger queue (framework-level signals)
|
||||||
drained_triggers = await self._drain_trigger_queue(conversation)
|
drained_triggers = await self._drain_trigger_queue(conversation)
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"[EventLoopNode.execute] iteration=%d: drained %d triggers",
|
"[AgentLoop.execute] iteration=%d: drained %d triggers",
|
||||||
iteration,
|
iteration,
|
||||||
drained_triggers,
|
drained_triggers,
|
||||||
)
|
)
|
||||||
@@ -685,8 +618,6 @@ class EventLoopNode(NodeProtocol):
|
|||||||
"ask_user",
|
"ask_user",
|
||||||
"ask_user_multiple",
|
"ask_user_multiple",
|
||||||
"escalate",
|
"escalate",
|
||||||
"delegate_to_sub_agent",
|
|
||||||
"report_to_parent",
|
|
||||||
}
|
}
|
||||||
synthetic = [t for t in tools if t.name in _synthetic_names]
|
synthetic = [t for t in tools if t.name in _synthetic_names]
|
||||||
tools.clear()
|
tools.clear()
|
||||||
@@ -696,11 +627,11 @@ class EventLoopNode(NodeProtocol):
|
|||||||
# 6b3. Dynamic prompt refresh (phase switching / memory refresh)
|
# 6b3. Dynamic prompt refresh (phase switching / memory refresh)
|
||||||
if ctx.dynamic_prompt_provider is not None or ctx.dynamic_memory_provider is not None:
|
if ctx.dynamic_prompt_provider is not None or ctx.dynamic_memory_provider is not None:
|
||||||
if ctx.dynamic_prompt_provider is not None:
|
if ctx.dynamic_prompt_provider is not None:
|
||||||
from framework.graph.prompting import stamp_prompt_datetime
|
from framework.orchestrator.prompting import stamp_prompt_datetime
|
||||||
|
|
||||||
_new_prompt = stamp_prompt_datetime(ctx.dynamic_prompt_provider())
|
_new_prompt = stamp_prompt_datetime(ctx.dynamic_prompt_provider())
|
||||||
else:
|
else:
|
||||||
from framework.graph.prompting import build_system_prompt_for_node_context
|
from framework.orchestrator.prompting import build_system_prompt_for_node_context
|
||||||
|
|
||||||
_new_prompt = build_system_prompt_for_node_context(ctx)
|
_new_prompt = build_system_prompt_for_node_context(ctx)
|
||||||
if _new_prompt != conversation.system_prompt:
|
if _new_prompt != conversation.system_prompt:
|
||||||
@@ -743,7 +674,7 @@ class EventLoopNode(NodeProtocol):
|
|||||||
len(conversation.messages),
|
len(conversation.messages),
|
||||||
)
|
)
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"[EventLoopNode.execute] iteration=%d: entering _run_single_turn loop", iteration
|
"[AgentLoop.execute] iteration=%d: entering _run_single_turn loop", iteration
|
||||||
)
|
)
|
||||||
_stream_retry_count = 0
|
_stream_retry_count = 0
|
||||||
_turn_cancelled = False
|
_turn_cancelled = False
|
||||||
@@ -752,7 +683,7 @@ class EventLoopNode(NodeProtocol):
|
|||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"[EventLoopNode.execute] iteration=%d: calling _run_single_turn (retry=%d)",
|
"[AgentLoop.execute] iteration=%d: calling _run_single_turn (retry=%d)",
|
||||||
iteration,
|
iteration,
|
||||||
_stream_retry_count,
|
_stream_retry_count,
|
||||||
)
|
)
|
||||||
@@ -768,12 +699,12 @@ class EventLoopNode(NodeProtocol):
|
|||||||
queen_input_requested,
|
queen_input_requested,
|
||||||
request_system_prompt,
|
request_system_prompt,
|
||||||
request_messages,
|
request_messages,
|
||||||
reported_to_parent,
|
_,
|
||||||
) = await self._run_single_turn(
|
) = await self._run_single_turn(
|
||||||
ctx, conversation, tools, iteration, accumulator
|
ctx, conversation, tools, iteration, accumulator
|
||||||
)
|
)
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"[EventLoopNode.execute] iteration=%d:"
|
"[AgentLoop.execute] iteration=%d:"
|
||||||
" _run_single_turn completed successfully",
|
" _run_single_turn completed successfully",
|
||||||
iteration,
|
iteration,
|
||||||
)
|
)
|
||||||
@@ -842,13 +773,13 @@ class EventLoopNode(NodeProtocol):
|
|||||||
break # success — exit retry loop
|
break # success — exit retry loop
|
||||||
|
|
||||||
except TurnCancelled:
|
except TurnCancelled:
|
||||||
logger.debug("[EventLoopNode.execute] iteration=%d: TurnCancelled", iteration)
|
logger.debug("[AgentLoop.execute] iteration=%d: TurnCancelled", iteration)
|
||||||
_turn_cancelled = True
|
_turn_cancelled = True
|
||||||
break
|
break
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"[EventLoopNode.execute] iteration=%d:"
|
"[AgentLoop.execute] iteration=%d:"
|
||||||
" Exception in _run_single_turn: %s (%s)",
|
" Exception in _run_single_turn: %s (%s)",
|
||||||
iteration,
|
iteration,
|
||||||
type(e).__name__,
|
type(e).__name__,
|
||||||
@@ -1024,7 +955,7 @@ class EventLoopNode(NodeProtocol):
|
|||||||
and not outputs_set
|
and not outputs_set
|
||||||
and not user_input_requested
|
and not user_input_requested
|
||||||
and not queen_input_requested
|
and not queen_input_requested
|
||||||
and not reported_to_parent
|
|
||||||
)
|
)
|
||||||
if truly_empty and accumulator is not None:
|
if truly_empty and accumulator is not None:
|
||||||
missing = self._get_missing_output_keys(
|
missing = self._get_missing_output_keys(
|
||||||
@@ -1276,14 +1207,14 @@ class EventLoopNode(NodeProtocol):
|
|||||||
# blocking and resumption.
|
# blocking and resumption.
|
||||||
_is_worker = (
|
_is_worker = (
|
||||||
stream_id not in ("queen", "judge")
|
stream_id not in ("queen", "judge")
|
||||||
and not ctx.is_subagent_mode
|
and not False
|
||||||
and not ctx.supports_direct_user_io
|
and not ctx.supports_direct_user_io
|
||||||
and self._event_bus is not None
|
and self._event_bus is not None
|
||||||
)
|
)
|
||||||
_worker_no_tool_turn = (
|
_worker_no_tool_turn = (
|
||||||
not real_tool_results
|
not real_tool_results
|
||||||
and not outputs_set
|
and not outputs_set
|
||||||
and not reported_to_parent
|
|
||||||
and not queen_input_requested
|
and not queen_input_requested
|
||||||
and not user_input_requested
|
and not user_input_requested
|
||||||
)
|
)
|
||||||
@@ -1733,7 +1664,7 @@ class EventLoopNode(NodeProtocol):
|
|||||||
|
|
||||||
# 6i. Judge evaluation
|
# 6i. Judge evaluation
|
||||||
should_judge = (
|
should_judge = (
|
||||||
ctx.is_subagent_mode # Always evaluate subagents
|
False
|
||||||
or (iteration + 1) % self._config.judge_every_n_turns == 0
|
or (iteration + 1) % self._config.judge_every_n_turns == 0
|
||||||
or not real_tool_results # no real tool calls = natural stop
|
or not real_tool_results # no real tool calls = natural stop
|
||||||
)
|
)
|
||||||
@@ -1789,7 +1720,7 @@ class EventLoopNode(NodeProtocol):
|
|||||||
missing = self._get_missing_output_keys(
|
missing = self._get_missing_output_keys(
|
||||||
accumulator, ctx.node_spec.output_keys, ctx.node_spec.nullable_output_keys
|
accumulator, ctx.node_spec.output_keys, ctx.node_spec.nullable_output_keys
|
||||||
)
|
)
|
||||||
if missing and self._judge is not None and not self._mark_complete_flag:
|
if missing and self._judge is not None :
|
||||||
hint = (
|
hint = (
|
||||||
f"Task incomplete. Required outputs not yet produced: {missing}. "
|
f"Task incomplete. Required outputs not yet produced: {missing}. "
|
||||||
f"Follow your system prompt instructions to complete the work."
|
f"Follow your system prompt instructions to complete the work."
|
||||||
@@ -1988,7 +1919,7 @@ class EventLoopNode(NodeProtocol):
|
|||||||
image_content: Optional list of OpenAI-style image blocks to attach.
|
image_content: Optional list of OpenAI-style image blocks to attach.
|
||||||
"""
|
"""
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"[EventLoopNode.inject_event] content_len=%d,"
|
"[AgentLoop.inject_event] content_len=%d,"
|
||||||
" is_client_input=%s, has_images=%s,"
|
" is_client_input=%s, has_images=%s,"
|
||||||
" queue_size_before=%d",
|
" queue_size_before=%d",
|
||||||
len(content) if content else 0,
|
len(content) if content else 0,
|
||||||
@@ -1998,15 +1929,15 @@ class EventLoopNode(NodeProtocol):
|
|||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
await self._injection_queue.put((content, is_client_input, image_content))
|
await self._injection_queue.put((content, is_client_input, image_content))
|
||||||
logger.debug("[EventLoopNode.inject_event] Message queued successfully")
|
logger.debug("[AgentLoop.inject_event] Message queued successfully")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.exception("[EventLoopNode.inject_event] Failed to queue message: %s", e)
|
logger.exception("[AgentLoop.inject_event] Failed to queue message: %s", e)
|
||||||
raise
|
raise
|
||||||
try:
|
try:
|
||||||
self._input_ready.set()
|
self._input_ready.set()
|
||||||
logger.debug("[EventLoopNode.inject_event] _input_ready.set() called")
|
logger.debug("[AgentLoop.inject_event] _input_ready.set() called")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.exception("[EventLoopNode.inject_event] Failed to set _input_ready: %s", e)
|
logger.exception("[AgentLoop.inject_event] Failed to set _input_ready: %s", e)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
async def inject_trigger(self, trigger: TriggerEvent) -> None:
|
async def inject_trigger(self, trigger: TriggerEvent) -> None:
|
||||||
@@ -2157,7 +2088,6 @@ class EventLoopNode(NodeProtocol):
|
|||||||
ask_user_prompt = ""
|
ask_user_prompt = ""
|
||||||
ask_user_options: list[str] | None = None
|
ask_user_options: list[str] | None = None
|
||||||
queen_input_requested = False
|
queen_input_requested = False
|
||||||
reported_to_parent = False
|
|
||||||
# Accumulate ALL tool calls across inner iterations for L3 logging.
|
# Accumulate ALL tool calls across inner iterations for L3 logging.
|
||||||
# Unlike real_tool_results (reset each inner iteration), this persists.
|
# Unlike real_tool_results (reset each inner iteration), this persists.
|
||||||
logged_tool_calls: list[dict] = []
|
logged_tool_calls: list[dict] = []
|
||||||
@@ -2231,16 +2161,28 @@ class EventLoopNode(NodeProtocol):
|
|||||||
):
|
):
|
||||||
if isinstance(event, TextDeltaEvent):
|
if isinstance(event, TextDeltaEvent):
|
||||||
accumulated_text = event.snapshot
|
accumulated_text = event.snapshot
|
||||||
await self._publish_text_delta(
|
# Filter <think>...</think> blocks from client output.
|
||||||
stream_id,
|
# Content inside think tags is internal reasoning -- only
|
||||||
node_id,
|
# the text after </think> is shown to the user.
|
||||||
event.content,
|
_content = event.content
|
||||||
event.snapshot,
|
if "<think>" in event.snapshot and "</think>" not in event.snapshot:
|
||||||
ctx,
|
_content = "" # still inside think block
|
||||||
execution_id,
|
elif "</think>" in _content:
|
||||||
iteration=iteration,
|
# End of think block -- emit only text after the tag
|
||||||
inner_turn=inner_turn,
|
_content = _content.split("</think>", 1)[-1]
|
||||||
)
|
elif "<think>" in _content:
|
||||||
|
_content = "" # opening tag in this chunk
|
||||||
|
if _content:
|
||||||
|
await self._publish_text_delta(
|
||||||
|
stream_id,
|
||||||
|
node_id,
|
||||||
|
_content,
|
||||||
|
event.snapshot,
|
||||||
|
ctx,
|
||||||
|
execution_id,
|
||||||
|
iteration=iteration,
|
||||||
|
inner_turn=inner_turn,
|
||||||
|
)
|
||||||
|
|
||||||
elif isinstance(event, ToolCallEvent):
|
elif isinstance(event, ToolCallEvent):
|
||||||
_tc.append(event)
|
_tc.append(event)
|
||||||
@@ -2348,10 +2290,27 @@ class EventLoopNode(NodeProtocol):
|
|||||||
queen_input_requested,
|
queen_input_requested,
|
||||||
final_system_prompt,
|
final_system_prompt,
|
||||||
final_messages,
|
final_messages,
|
||||||
reported_to_parent,
|
False,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Execute tool calls — framework tools (set_output, ask_user)
|
# Priority drain: if user sent a message while the LLM was
|
||||||
|
# streaming, inject it into the conversation NOW -- before tool
|
||||||
|
# execution. The LLM will see it on the next inner turn.
|
||||||
|
if not self._injection_queue.empty():
|
||||||
|
while not self._injection_queue.empty():
|
||||||
|
_inj_content, _inj_client, _inj_images = (
|
||||||
|
self._injection_queue.get_nowait()
|
||||||
|
)
|
||||||
|
if _inj_client:
|
||||||
|
await conversation.add_user_message(_inj_content)
|
||||||
|
logger.info(
|
||||||
|
"[%s] Priority-injected user message mid-turn (%d chars)",
|
||||||
|
node_id, len(_inj_content),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
await conversation.add_user_message(_inj_content)
|
||||||
|
|
||||||
|
# Execute tool calls -- framework tools (set_output, ask_user)
|
||||||
# run inline; real MCP tools run in parallel.
|
# run inline; real MCP tools run in parallel.
|
||||||
real_tool_results: list[dict] = []
|
real_tool_results: list[dict] = []
|
||||||
limit_hit = False
|
limit_hit = False
|
||||||
@@ -2361,13 +2320,12 @@ class EventLoopNode(NodeProtocol):
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Phase 1: triage — handle framework tools immediately,
|
# Phase 1: triage — handle framework tools immediately,
|
||||||
# queue real tools and subagents for parallel execution.
|
# queue real tools for parallel execution.
|
||||||
results_by_id: dict[str, ToolResult] = {}
|
results_by_id: dict[str, ToolResult] = {}
|
||||||
timing_by_id: dict[
|
timing_by_id: dict[
|
||||||
str, dict[str, Any]
|
str, dict[str, Any]
|
||||||
] = {} # tool_use_id -> {start_timestamp, duration_s}
|
] = {} # tool_use_id -> {start_timestamp, duration_s}
|
||||||
pending_real: list[ToolCallEvent] = []
|
pending_real: list[ToolCallEvent] = []
|
||||||
pending_subagent: list[ToolCallEvent] = []
|
|
||||||
|
|
||||||
for tc in tool_calls:
|
for tc in tool_calls:
|
||||||
tool_call_count += 1
|
tool_call_count += 1
|
||||||
@@ -2610,76 +2568,6 @@ class EventLoopNode(NodeProtocol):
|
|||||||
)
|
)
|
||||||
results_by_id[tc.tool_use_id] = result
|
results_by_id[tc.tool_use_id] = result
|
||||||
|
|
||||||
elif tc.tool_name == "delegate_to_sub_agent":
|
|
||||||
# Guard: in continuous mode the LLM may see delegate
|
|
||||||
# calls from a previous node's conversation history and
|
|
||||||
# attempt to re-use the tool on a node that doesn't own
|
|
||||||
# it. Only accept if the tool was actually offered.
|
|
||||||
if not any(t.name == "delegate_to_sub_agent" for t in tools):
|
|
||||||
logger.warning(
|
|
||||||
"[%s] LLM called delegate_to_sub_agent but tool "
|
|
||||||
"was not offered to this node — rejecting",
|
|
||||||
node_id,
|
|
||||||
)
|
|
||||||
result = ToolResult(
|
|
||||||
tool_use_id=tc.tool_use_id,
|
|
||||||
content=(
|
|
||||||
"ERROR: delegate_to_sub_agent is not available "
|
|
||||||
"on this node. This tool belongs to a different "
|
|
||||||
"node in the workflow."
|
|
||||||
),
|
|
||||||
is_error=True,
|
|
||||||
)
|
|
||||||
results_by_id[tc.tool_use_id] = result
|
|
||||||
continue
|
|
||||||
# --- Framework-level subagent delegation ---
|
|
||||||
# Queue for parallel execution in Phase 2
|
|
||||||
logger.info(
|
|
||||||
"🔄 LLM requesting subagent delegation: agent_id='%s', task='%s'",
|
|
||||||
tc.tool_input.get("agent_id", "?"),
|
|
||||||
(tc.tool_input.get("task", "")[:100] + "...")
|
|
||||||
if len(tc.tool_input.get("task", "")) > 100
|
|
||||||
else tc.tool_input.get("task", ""),
|
|
||||||
)
|
|
||||||
pending_subagent.append(tc)
|
|
||||||
|
|
||||||
elif tc.tool_name == "report_to_parent":
|
|
||||||
# --- Report from sub-agent to parent (optionally blocking) ---
|
|
||||||
reported_to_parent = True
|
|
||||||
msg = tc.tool_input.get("message", "")
|
|
||||||
data = tc.tool_input.get("data")
|
|
||||||
wait = tc.tool_input.get("wait_for_response", False)
|
|
||||||
mark_complete = tc.tool_input.get("mark_complete", False)
|
|
||||||
response = None
|
|
||||||
|
|
||||||
if ctx.report_callback:
|
|
||||||
try:
|
|
||||||
response = await ctx.report_callback(
|
|
||||||
msg,
|
|
||||||
data,
|
|
||||||
wait_for_response=wait,
|
|
||||||
)
|
|
||||||
except Exception:
|
|
||||||
logger.warning(
|
|
||||||
"[%s] report_to_parent callback failed (swallowed)",
|
|
||||||
node_id,
|
|
||||||
exc_info=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
if mark_complete:
|
|
||||||
self._mark_complete_flag = True
|
|
||||||
logger.info(
|
|
||||||
"[%s] mark_complete=True — subagent will accept on this iteration",
|
|
||||||
node_id,
|
|
||||||
)
|
|
||||||
|
|
||||||
result = ToolResult(
|
|
||||||
tool_use_id=tc.tool_use_id,
|
|
||||||
content=response if (wait and response) else "Report sent to parent.",
|
|
||||||
is_error=False,
|
|
||||||
)
|
|
||||||
results_by_id[tc.tool_use_id] = result
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# --- Real tool: check for truncated args, else queue ---
|
# --- Real tool: check for truncated args, else queue ---
|
||||||
if "_raw" in tc.tool_input:
|
if "_raw" in tc.tool_input:
|
||||||
@@ -2754,175 +2642,6 @@ class EventLoopNode(NodeProtocol):
|
|||||||
result = raw
|
result = raw
|
||||||
results_by_id[tc.tool_use_id] = self._truncate_tool_result(result, tc.tool_name)
|
results_by_id[tc.tool_use_id] = self._truncate_tool_result(result, tc.tool_name)
|
||||||
|
|
||||||
# Phase 2b: execute subagent delegations in parallel.
|
|
||||||
if pending_subagent:
|
|
||||||
_subagent_timeout = self._config.subagent_timeout_seconds
|
|
||||||
_inactivity_timeout = self._config.subagent_inactivity_timeout_seconds
|
|
||||||
|
|
||||||
async def _timed_subagent(
|
|
||||||
_ctx: NodeContext,
|
|
||||||
_tc: ToolCallEvent,
|
|
||||||
_acc: OutputAccumulator = accumulator,
|
|
||||||
_wall_timeout: float = _subagent_timeout,
|
|
||||||
_activity_timeout: float = _inactivity_timeout,
|
|
||||||
) -> tuple[ToolResult | BaseException, str, float]:
|
|
||||||
_s = time.time()
|
|
||||||
_iso = datetime.now(UTC).isoformat()
|
|
||||||
_last_activity = _s
|
|
||||||
_activity_event = asyncio.Event()
|
|
||||||
|
|
||||||
async def _watchdog() -> None:
|
|
||||||
"""Watchdog that times out only after inactivity period."""
|
|
||||||
nonlocal _last_activity
|
|
||||||
while True:
|
|
||||||
_now = time.time()
|
|
||||||
_inactive_for = _now - _last_activity
|
|
||||||
_remaining = _activity_timeout - _inactive_for
|
|
||||||
|
|
||||||
if _remaining <= 0:
|
|
||||||
# Inactivity timeout reached
|
|
||||||
return
|
|
||||||
|
|
||||||
try:
|
|
||||||
await asyncio.wait_for(_activity_event.wait(), timeout=_remaining)
|
|
||||||
_activity_event.clear()
|
|
||||||
except TimeoutError:
|
|
||||||
# Check again in case activity happened during wait
|
|
||||||
continue
|
|
||||||
|
|
||||||
async def _run_with_activity_timeout(
|
|
||||||
_coro,
|
|
||||||
) -> ToolResult:
|
|
||||||
"""Run subagent with activity-based timeout."""
|
|
||||||
_watchdog_task = asyncio.create_task(_watchdog())
|
|
||||||
try:
|
|
||||||
_result = await _coro
|
|
||||||
return _result
|
|
||||||
finally:
|
|
||||||
_watchdog_task.cancel()
|
|
||||||
try:
|
|
||||||
await _watchdog_task
|
|
||||||
except asyncio.CancelledError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Subscribe to subagent activity events to reset inactivity timer
|
|
||||||
async def _on_subagent_activity(event) -> None:
|
|
||||||
nonlocal _last_activity
|
|
||||||
_last_activity = time.time()
|
|
||||||
_activity_event.set()
|
|
||||||
|
|
||||||
_sub_id = None
|
|
||||||
if self._event_bus and _activity_timeout > 0:
|
|
||||||
from framework.runtime.event_bus import EventType
|
|
||||||
|
|
||||||
_sub_id = self._event_bus.subscribe(
|
|
||||||
event_types=[
|
|
||||||
EventType.TOOL_CALL_STARTED,
|
|
||||||
EventType.LLM_TEXT_DELTA,
|
|
||||||
EventType.EXECUTION_STARTED,
|
|
||||||
],
|
|
||||||
handler=_on_subagent_activity,
|
|
||||||
)
|
|
||||||
|
|
||||||
try:
|
|
||||||
_coro = self._execute_subagent(
|
|
||||||
_ctx,
|
|
||||||
_tc.tool_input.get("agent_id", ""),
|
|
||||||
_tc.tool_input.get("task", ""),
|
|
||||||
accumulator=_acc,
|
|
||||||
)
|
|
||||||
|
|
||||||
if _activity_timeout > 0:
|
|
||||||
# Use activity-based timeout with wall-clock max
|
|
||||||
_result_coro = _run_with_activity_timeout(_coro)
|
|
||||||
if _wall_timeout > 0:
|
|
||||||
_r = await asyncio.wait_for(_result_coro, timeout=_wall_timeout)
|
|
||||||
else:
|
|
||||||
_r = await _result_coro
|
|
||||||
elif _wall_timeout > 0:
|
|
||||||
_r = await asyncio.wait_for(_coro, timeout=_wall_timeout)
|
|
||||||
else:
|
|
||||||
_r = await _coro
|
|
||||||
finally:
|
|
||||||
if _sub_id and self._event_bus:
|
|
||||||
self._event_bus.unsubscribe(_sub_id)
|
|
||||||
|
|
||||||
except TimeoutError:
|
|
||||||
_agent_id = _tc.tool_input.get("agent_id", "unknown")
|
|
||||||
_elapsed = time.time() - _s
|
|
||||||
logger.warning(
|
|
||||||
"Subagent '%s' timed out after %.0fs (inactivity threshold: %.0fs)",
|
|
||||||
_agent_id,
|
|
||||||
_elapsed,
|
|
||||||
_activity_timeout if _activity_timeout > 0 else _wall_timeout,
|
|
||||||
)
|
|
||||||
_r = ToolResult(
|
|
||||||
tool_use_id=_tc.tool_use_id,
|
|
||||||
content=(
|
|
||||||
f"Subagent '{_agent_id}' timed out after "
|
|
||||||
f"{_elapsed:.0f}s of inactivity. "
|
|
||||||
"The subagent was not making progress. "
|
|
||||||
"Try a simpler task or break it into smaller pieces."
|
|
||||||
),
|
|
||||||
is_error=True,
|
|
||||||
)
|
|
||||||
except BaseException as _exc:
|
|
||||||
_r = _exc
|
|
||||||
_dur = round(time.time() - _s, 3)
|
|
||||||
return _r, _iso, _dur
|
|
||||||
|
|
||||||
subagent_timed = await asyncio.gather(
|
|
||||||
*(_timed_subagent(ctx, tc) for tc in pending_subagent),
|
|
||||||
return_exceptions=True,
|
|
||||||
)
|
|
||||||
for tc, entry in zip(pending_subagent, subagent_timed, strict=True):
|
|
||||||
if isinstance(entry, BaseException):
|
|
||||||
raw = entry
|
|
||||||
_start_iso = datetime.now(UTC).isoformat()
|
|
||||||
_dur_s = 0
|
|
||||||
else:
|
|
||||||
raw, _start_iso, _dur_s = entry
|
|
||||||
_sa_timing = {
|
|
||||||
"start_timestamp": _start_iso,
|
|
||||||
"duration_s": _dur_s,
|
|
||||||
}
|
|
||||||
if isinstance(raw, BaseException):
|
|
||||||
result = ToolResult(
|
|
||||||
tool_use_id=tc.tool_use_id,
|
|
||||||
content=json.dumps(
|
|
||||||
{
|
|
||||||
"message": f"Sub-agent execution raised: {raw}",
|
|
||||||
"data": None,
|
|
||||||
"metadata": {"success": False, "error": str(raw)},
|
|
||||||
}
|
|
||||||
),
|
|
||||||
is_error=True,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
# Attach the tool_use_id to the result
|
|
||||||
result = ToolResult(
|
|
||||||
tool_use_id=tc.tool_use_id,
|
|
||||||
content=raw.content,
|
|
||||||
is_error=raw.is_error,
|
|
||||||
)
|
|
||||||
# Route through _truncate_tool_result so large
|
|
||||||
# subagent results are saved to spillover files
|
|
||||||
# and survive pruning (instead of being "cleared
|
|
||||||
# from context" with no recovery path).
|
|
||||||
result = self._truncate_tool_result(result, "delegate_to_sub_agent")
|
|
||||||
results_by_id[tc.tool_use_id] = result
|
|
||||||
logged_tool_calls.append(
|
|
||||||
{
|
|
||||||
"tool_use_id": tc.tool_use_id,
|
|
||||||
"tool_name": "delegate_to_sub_agent",
|
|
||||||
"tool_input": tc.tool_input,
|
|
||||||
"content": result.content,
|
|
||||||
"is_error": result.is_error,
|
|
||||||
**_sa_timing,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
# Phase 3: record results into conversation in original order,
|
# Phase 3: record results into conversation in original order,
|
||||||
# build logged/real lists, and publish completed events.
|
# build logged/real lists, and publish completed events.
|
||||||
for tc in tool_calls[:executed_in_batch]:
|
for tc in tool_calls[:executed_in_batch]:
|
||||||
@@ -2936,8 +2655,6 @@ class EventLoopNode(NodeProtocol):
|
|||||||
"ask_user",
|
"ask_user",
|
||||||
"ask_user_multiple",
|
"ask_user_multiple",
|
||||||
"escalate",
|
"escalate",
|
||||||
"delegate_to_sub_agent",
|
|
||||||
"report_to_parent",
|
|
||||||
):
|
):
|
||||||
tool_entry = {
|
tool_entry = {
|
||||||
"tool_use_id": tc.tool_use_id,
|
"tool_use_id": tc.tool_use_id,
|
||||||
@@ -3056,7 +2773,7 @@ class EventLoopNode(NodeProtocol):
|
|||||||
queen_input_requested,
|
queen_input_requested,
|
||||||
final_system_prompt,
|
final_system_prompt,
|
||||||
final_messages,
|
final_messages,
|
||||||
reported_to_parent,
|
False,
|
||||||
)
|
)
|
||||||
|
|
||||||
# --- Mid-turn pruning: prevent context blowup within a single turn ---
|
# --- Mid-turn pruning: prevent context blowup within a single turn ---
|
||||||
@@ -3090,7 +2807,7 @@ class EventLoopNode(NodeProtocol):
|
|||||||
queen_input_requested,
|
queen_input_requested,
|
||||||
final_system_prompt,
|
final_system_prompt,
|
||||||
final_messages,
|
final_messages,
|
||||||
reported_to_parent,
|
False,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Tool calls processed -- loop back to stream with updated conversation
|
# Tool calls processed -- loop back to stream with updated conversation
|
||||||
@@ -3118,16 +2835,6 @@ class EventLoopNode(NodeProtocol):
|
|||||||
"""Build the synthetic escalate tool. Delegates to synthetic_tools module."""
|
"""Build the synthetic escalate tool. Delegates to synthetic_tools module."""
|
||||||
return build_escalate_tool()
|
return build_escalate_tool()
|
||||||
|
|
||||||
def _build_delegate_tool(
|
|
||||||
self, sub_agents: list[str], node_registry: dict[str, Any]
|
|
||||||
) -> Tool | None:
|
|
||||||
"""Build the synthetic delegate_to_sub_agent tool. Delegates to synthetic_tools module."""
|
|
||||||
return build_delegate_tool(sub_agents, node_registry)
|
|
||||||
|
|
||||||
def _build_report_to_parent_tool(self) -> Tool:
|
|
||||||
"""Build the synthetic report_to_parent tool. Delegates to synthetic_tools module."""
|
|
||||||
return build_report_to_parent_tool()
|
|
||||||
|
|
||||||
def _handle_set_output(
|
def _handle_set_output(
|
||||||
self,
|
self,
|
||||||
tool_input: dict[str, Any],
|
tool_input: dict[str, Any],
|
||||||
@@ -3151,7 +2858,7 @@ class EventLoopNode(NodeProtocol):
|
|||||||
) -> JudgeVerdict:
|
) -> JudgeVerdict:
|
||||||
"""Evaluate the current state. Delegates to judge_pipeline module."""
|
"""Evaluate the current state. Delegates to judge_pipeline module."""
|
||||||
return await judge_turn(
|
return await judge_turn(
|
||||||
mark_complete_flag=self._mark_complete_flag,
|
mark_complete_flag=False,
|
||||||
judge=self._judge,
|
judge=self._judge,
|
||||||
ctx=ctx,
|
ctx=ctx,
|
||||||
conversation=conversation,
|
conversation=conversation,
|
||||||
@@ -3176,7 +2883,7 @@ class EventLoopNode(NodeProtocol):
|
|||||||
|
|
||||||
Delegates to :func:`extract_tool_call_history` in conversation.py.
|
Delegates to :func:`extract_tool_call_history` in conversation.py.
|
||||||
"""
|
"""
|
||||||
from framework.graph.conversation import extract_tool_call_history
|
from framework.agent_loop.conversation import extract_tool_call_history
|
||||||
|
|
||||||
return extract_tool_call_history(conversation.messages, max_entries=max_entries)
|
return extract_tool_call_history(conversation.messages, max_entries=max_entries)
|
||||||
|
|
||||||
@@ -3781,46 +3488,3 @@ class EventLoopNode(NodeProtocol):
|
|||||||
# Subagent Execution
|
# Subagent Execution
|
||||||
# -------------------------------------------------------------------
|
# -------------------------------------------------------------------
|
||||||
|
|
||||||
async def _execute_subagent(
|
|
||||||
self,
|
|
||||||
ctx: NodeContext,
|
|
||||||
agent_id: str,
|
|
||||||
task: str,
|
|
||||||
*,
|
|
||||||
accumulator: OutputAccumulator | None = None,
|
|
||||||
) -> ToolResult:
|
|
||||||
"""Execute a subagent and return the result as a ToolResult.
|
|
||||||
|
|
||||||
The subagent:
|
|
||||||
- Gets a fresh conversation with just the task
|
|
||||||
- Has read-only access to the parent's readable data buffer
|
|
||||||
- Cannot delegate to its own subagents (prevents recursion)
|
|
||||||
- Returns its output in structured JSON format
|
|
||||||
|
|
||||||
Args:
|
|
||||||
ctx: Parent node's context (for data buffer, tools, LLM access).
|
|
||||||
agent_id: The node ID of the subagent to invoke.
|
|
||||||
task: The task description to give the subagent.
|
|
||||||
accumulator: Parent's OutputAccumulator — provides outputs that
|
|
||||||
have been set via ``set_output`` but not yet written to
|
|
||||||
data buffer (which only happens after the node completes).
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
ToolResult with structured JSON output containing:
|
|
||||||
- message: Human-readable summary
|
|
||||||
- data: Subagent's output (free-form JSON)
|
|
||||||
- metadata: Execution metadata (success, tokens, latency)
|
|
||||||
"""
|
|
||||||
return await execute_subagent(
|
|
||||||
ctx=ctx,
|
|
||||||
agent_id=agent_id,
|
|
||||||
task=task,
|
|
||||||
accumulator=accumulator,
|
|
||||||
event_bus=self._event_bus,
|
|
||||||
config=self._config,
|
|
||||||
tool_executor=self._tool_executor,
|
|
||||||
conversation_store=self._conversation_store,
|
|
||||||
subagent_instance_counter=self._subagent_instance_counter,
|
|
||||||
event_loop_node_cls=type(self),
|
|
||||||
escalation_receiver_cls=_EscalationReceiver,
|
|
||||||
)
|
|
||||||
@@ -324,7 +324,7 @@ def _try_extract_key(content: str, key: str) -> str | None:
|
|||||||
3. Colon format: ``key: value``.
|
3. Colon format: ``key: value``.
|
||||||
4. Equals format: ``key = value``.
|
4. Equals format: ``key = value``.
|
||||||
"""
|
"""
|
||||||
from framework.graph.node import find_json_object
|
from framework.orchestrator.node import find_json_object
|
||||||
|
|
||||||
# 1. Whole message is JSON
|
# 1. Whole message is JSON
|
||||||
try:
|
try:
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
"""Agent loop internals -- compaction, judge, tools, subagent execution.
|
||||||
|
|
||||||
|
Re-exports from legacy locations for the new import path.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from framework.agent_loop.internals.compaction import * # noqa: F401, F403
|
||||||
|
from framework.agent_loop.internals.synthetic_tools import * # noqa: F401, F403
|
||||||
+9
-9
@@ -19,11 +19,11 @@ from datetime import UTC, datetime
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from framework.graph.conversation import Message, NodeConversation
|
from framework.agent_loop.conversation import Message, NodeConversation
|
||||||
from framework.graph.event_loop.event_publishing import publish_context_usage
|
from framework.agent_loop.internals.event_publishing import publish_context_usage
|
||||||
from framework.graph.event_loop.types import LoopConfig, OutputAccumulator
|
from framework.agent_loop.internals.types import LoopConfig, OutputAccumulator
|
||||||
from framework.graph.node import NodeContext
|
from framework.orchestrator.node import NodeContext
|
||||||
from framework.runtime.event_bus import EventBus
|
from framework.host.event_bus import EventBus
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -368,8 +368,8 @@ async def llm_compact(
|
|||||||
in half and each half is summarised independently. Tool history is
|
in half and each half is summarised independently. Tool history is
|
||||||
appended once at the top-level call (``_depth == 0``).
|
appended once at the top-level call (``_depth == 0``).
|
||||||
"""
|
"""
|
||||||
from framework.graph.conversation import extract_tool_call_history
|
from framework.agent_loop.conversation import extract_tool_call_history
|
||||||
from framework.graph.event_loop.tool_result_handler import is_context_too_large_error
|
from framework.agent_loop.internals.tool_result_handler import is_context_too_large_error
|
||||||
|
|
||||||
if _depth > max_depth:
|
if _depth > max_depth:
|
||||||
raise RuntimeError(f"LLM compaction recursion limit ({max_depth})")
|
raise RuntimeError(f"LLM compaction recursion limit ({max_depth})")
|
||||||
@@ -724,7 +724,7 @@ async def log_compaction(
|
|||||||
)
|
)
|
||||||
|
|
||||||
if event_bus:
|
if event_bus:
|
||||||
from framework.runtime.event_bus import AgentEvent, EventType
|
from framework.host.event_bus import AgentEvent, EventType
|
||||||
|
|
||||||
event_data: dict[str, Any] = {
|
event_data: dict[str, Any] = {
|
||||||
"level": level,
|
"level": level,
|
||||||
@@ -861,6 +861,6 @@ def _extract_tool_call_history(conversation: NodeConversation) -> str:
|
|||||||
directly (vs. the module-level extract_tool_call_history in conversation.py
|
directly (vs. the module-level extract_tool_call_history in conversation.py
|
||||||
which works on raw message lists).
|
which works on raw message lists).
|
||||||
"""
|
"""
|
||||||
from framework.graph.conversation import extract_tool_call_history
|
from framework.agent_loop.conversation import extract_tool_call_history
|
||||||
|
|
||||||
return extract_tool_call_history(list(conversation.messages))
|
return extract_tool_call_history(list(conversation.messages))
|
||||||
+3
-3
@@ -14,9 +14,9 @@ from collections.abc import Awaitable, Callable
|
|||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from framework.graph.conversation import ConversationStore, NodeConversation
|
from framework.agent_loop.conversation import ConversationStore, NodeConversation
|
||||||
from framework.graph.event_loop.types import LoopConfig, OutputAccumulator, TriggerEvent
|
from framework.agent_loop.internals.types import LoopConfig, OutputAccumulator, TriggerEvent
|
||||||
from framework.graph.node import NodeContext
|
from framework.orchestrator.node import NodeContext
|
||||||
from framework.llm.capabilities import supports_image_tool_results
|
from framework.llm.capabilities import supports_image_tool_results
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
+5
-5
@@ -9,10 +9,10 @@ from __future__ import annotations
|
|||||||
import logging
|
import logging
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from framework.graph.conversation import NodeConversation
|
from framework.agent_loop.conversation import NodeConversation
|
||||||
from framework.graph.event_loop.types import HookContext
|
from framework.agent_loop.internals.types import HookContext
|
||||||
from framework.graph.node import NodeContext
|
from framework.orchestrator.node import NodeContext
|
||||||
from framework.runtime.event_bus import EventBus
|
from framework.host.event_bus import EventBus
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -177,7 +177,7 @@ async def publish_context_usage(
|
|||||||
if not event_bus:
|
if not event_bus:
|
||||||
return
|
return
|
||||||
|
|
||||||
from framework.runtime.event_bus import AgentEvent, EventType
|
from framework.host.event_bus import AgentEvent, EventType
|
||||||
|
|
||||||
estimated = conversation.estimate_tokens()
|
estimated = conversation.estimate_tokens()
|
||||||
max_tokens = conversation._max_context_tokens
|
max_tokens = conversation._max_context_tokens
|
||||||
+4
-4
@@ -5,9 +5,9 @@ from __future__ import annotations
|
|||||||
import logging
|
import logging
|
||||||
from collections.abc import Callable
|
from collections.abc import Callable
|
||||||
|
|
||||||
from framework.graph.conversation import NodeConversation
|
from framework.agent_loop.conversation import NodeConversation
|
||||||
from framework.graph.event_loop.types import JudgeProtocol, JudgeVerdict, OutputAccumulator
|
from framework.agent_loop.internals.types import JudgeProtocol, JudgeVerdict, OutputAccumulator
|
||||||
from framework.graph.node import NodeContext
|
from framework.orchestrator.node import NodeContext
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -155,7 +155,7 @@ async def judge_turn(
|
|||||||
|
|
||||||
# Level 2b: conversation-aware quality check (if success_criteria set)
|
# Level 2b: conversation-aware quality check (if success_criteria set)
|
||||||
if ctx.node_spec.success_criteria and ctx.llm:
|
if ctx.node_spec.success_criteria and ctx.llm:
|
||||||
from framework.graph.conversation_judge import evaluate_phase_completion
|
from framework.orchestrator.conversation_judge import evaluate_phase_completion
|
||||||
|
|
||||||
verdict = await evaluate_phase_completion(
|
verdict = await evaluate_phase_completion(
|
||||||
llm=ctx.llm,
|
llm=ctx.llm,
|
||||||
-112
@@ -204,118 +204,6 @@ def build_escalate_tool() -> Tool:
|
|||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def build_delegate_tool(sub_agents: list[str], node_registry: dict[str, Any]) -> Tool | None:
|
|
||||||
"""Build the synthetic delegate_to_sub_agent tool for subagent invocation.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
sub_agents: List of node IDs that can be invoked as subagents.
|
|
||||||
node_registry: Map of node_id -> NodeSpec for looking up subagent descriptions.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Tool definition if sub_agents is non-empty, None otherwise.
|
|
||||||
"""
|
|
||||||
if not sub_agents:
|
|
||||||
return None
|
|
||||||
|
|
||||||
agent_descriptions = []
|
|
||||||
for agent_id in sub_agents:
|
|
||||||
spec = node_registry.get(agent_id)
|
|
||||||
if spec:
|
|
||||||
desc = getattr(spec, "description", "(no description)")
|
|
||||||
agent_descriptions.append(f"- {agent_id}: {desc}")
|
|
||||||
else:
|
|
||||||
agent_descriptions.append(f"- {agent_id}: (not found in registry)")
|
|
||||||
|
|
||||||
return Tool(
|
|
||||||
name="delegate_to_sub_agent",
|
|
||||||
description=(
|
|
||||||
"Delegate a task to a specialized sub-agent. The sub-agent runs "
|
|
||||||
"autonomously with read-only access to current memory and returns "
|
|
||||||
"its result. Use this to parallelize work or leverage specialized capabilities.\n\n"
|
|
||||||
"Available sub-agents:\n" + "\n".join(agent_descriptions)
|
|
||||||
),
|
|
||||||
parameters={
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"agent_id": {
|
|
||||||
"type": "string",
|
|
||||||
"description": f"The sub-agent to invoke. Must be one of: {sub_agents}",
|
|
||||||
"enum": sub_agents,
|
|
||||||
},
|
|
||||||
"task": {
|
|
||||||
"type": "string",
|
|
||||||
"description": (
|
|
||||||
"The task description for the sub-agent to execute. "
|
|
||||||
"Be specific about what you want the sub-agent to do and "
|
|
||||||
"what information to return."
|
|
||||||
),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"required": ["agent_id", "task"],
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def build_report_to_parent_tool() -> Tool:
|
|
||||||
"""Build the synthetic report_to_parent tool for sub-agent progress reports.
|
|
||||||
|
|
||||||
Sub-agents call this to send one-way progress updates, partial findings,
|
|
||||||
or status reports to the parent node (and external observers via event bus)
|
|
||||||
without blocking execution.
|
|
||||||
|
|
||||||
When ``wait_for_response`` is True, the sub-agent blocks until the parent
|
|
||||||
relays the user's response — used for escalation (e.g. login pages, CAPTCHAs).
|
|
||||||
|
|
||||||
When ``mark_complete`` is True, the sub-agent terminates immediately after
|
|
||||||
sending the report — no need to call set_output for each output key.
|
|
||||||
"""
|
|
||||||
return Tool(
|
|
||||||
name="report_to_parent",
|
|
||||||
description=(
|
|
||||||
"Send a report to the parent agent. By default this is fire-and-forget: "
|
|
||||||
"the parent receives the report but does not respond. "
|
|
||||||
"Set wait_for_response=true to BLOCK until the user replies — use this "
|
|
||||||
"when you need human intervention (e.g. login pages, CAPTCHAs, "
|
|
||||||
"authentication walls). The user's response is returned as the tool result. "
|
|
||||||
"Set mark_complete=true to finish your task and terminate immediately "
|
|
||||||
"after sending the report — use this when your findings are in the "
|
|
||||||
"message/data fields and you don't need to call set_output."
|
|
||||||
),
|
|
||||||
parameters={
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"message": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "A human-readable status or progress message.",
|
|
||||||
},
|
|
||||||
"data": {
|
|
||||||
"type": "object",
|
|
||||||
"description": "Optional structured data to include with the report.",
|
|
||||||
},
|
|
||||||
"wait_for_response": {
|
|
||||||
"type": "boolean",
|
|
||||||
"description": (
|
|
||||||
"If true, block execution until the user responds. "
|
|
||||||
"Use for escalation scenarios requiring human intervention."
|
|
||||||
),
|
|
||||||
"default": False,
|
|
||||||
},
|
|
||||||
"mark_complete": {
|
|
||||||
"type": "boolean",
|
|
||||||
"description": (
|
|
||||||
"If true, terminate the sub-agent immediately after sending "
|
|
||||||
"this report. The report message and data are delivered to the "
|
|
||||||
"parent as the final result. No set_output calls are needed."
|
|
||||||
),
|
|
||||||
"default": False,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"required": ["message"],
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def handle_set_output(
|
def handle_set_output(
|
||||||
tool_input: dict[str, Any],
|
tool_input: dict[str, Any],
|
||||||
output_keys: list[str] | None,
|
output_keys: list[str] | None,
|
||||||
+2
-2
@@ -9,7 +9,7 @@ from dataclasses import dataclass, field
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Literal, Protocol, runtime_checkable
|
from typing import Any, Literal, Protocol, runtime_checkable
|
||||||
|
|
||||||
from framework.graph.conversation import (
|
from framework.agent_loop.conversation import (
|
||||||
ConversationStore,
|
ConversationStore,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -68,7 +68,7 @@ class LoopConfig:
|
|||||||
max_output_value_chars: int = 2_000
|
max_output_value_chars: int = 2_000
|
||||||
|
|
||||||
# Stream retry.
|
# Stream retry.
|
||||||
max_stream_retries: int = 3
|
max_stream_retries: int = 5
|
||||||
stream_retry_backoff_base: float = 2.0
|
stream_retry_backoff_base: float = 2.0
|
||||||
stream_retry_max_delay: float = 60.0
|
stream_retry_max_delay: float = 60.0
|
||||||
|
|
||||||
@@ -8,6 +8,14 @@ FRAMEWORK_AGENTS_DIR = Path(__file__).parent
|
|||||||
def list_framework_agents() -> list[Path]:
|
def list_framework_agents() -> list[Path]:
|
||||||
"""List all framework agent directories."""
|
"""List all framework agent directories."""
|
||||||
return sorted(
|
return sorted(
|
||||||
[p for p in FRAMEWORK_AGENTS_DIR.iterdir() if p.is_dir() and (p / "agent.py").exists()],
|
[
|
||||||
|
p
|
||||||
|
for p in FRAMEWORK_AGENTS_DIR.iterdir()
|
||||||
|
if p.is_dir()
|
||||||
|
and (
|
||||||
|
(p / "agent.json").exists()
|
||||||
|
or (p / "agent.py").exists()
|
||||||
|
)
|
||||||
|
],
|
||||||
key=lambda p: p.name,
|
key=lambda p: p.name,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -21,15 +21,15 @@ from pathlib import Path
|
|||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
from framework.config import get_max_context_tokens
|
from framework.config import get_max_context_tokens
|
||||||
from framework.graph import Goal, NodeSpec, SuccessCriterion
|
from framework.orchestrator import Goal, NodeSpec, SuccessCriterion
|
||||||
from framework.graph.checkpoint_config import CheckpointConfig
|
from framework.orchestrator.checkpoint_config import CheckpointConfig
|
||||||
from framework.graph.edge import GraphSpec
|
from framework.orchestrator.edge import GraphSpec
|
||||||
from framework.graph.executor import ExecutionResult
|
from framework.orchestrator.orchestrator import ExecutionResult
|
||||||
from framework.llm import LiteLLMProvider
|
from framework.llm import LiteLLMProvider
|
||||||
from framework.runner.mcp_registry import MCPRegistry
|
from framework.loader.mcp_registry import MCPRegistry
|
||||||
from framework.runner.tool_registry import ToolRegistry
|
from framework.loader.tool_registry import ToolRegistry
|
||||||
from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
|
from framework.host.agent_host import AgentHost
|
||||||
from framework.runtime.execution_stream import EntryPointSpec
|
from framework.host.execution_manager import EntryPointSpec
|
||||||
|
|
||||||
from .config import default_config
|
from .config import default_config
|
||||||
from .nodes import build_tester_node
|
from .nodes import build_tester_node
|
||||||
@@ -37,7 +37,7 @@ from .nodes import build_tester_node
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from framework.runner import AgentRunner
|
from framework.loader import AgentLoader
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -233,7 +233,7 @@ requires_account_selection = True
|
|||||||
"""Signal TUI to show account picker before starting the agent."""
|
"""Signal TUI to show account picker before starting the agent."""
|
||||||
|
|
||||||
|
|
||||||
def configure_for_account(runner: AgentRunner, account: dict) -> None:
|
def configure_for_account(runner: AgentLoader, account: dict) -> None:
|
||||||
"""Scope the tester node's tools to the selected provider.
|
"""Scope the tester node's tools to the selected provider.
|
||||||
|
|
||||||
Handles both Aden accounts (account= routing) and local accounts
|
Handles both Aden accounts (account= routing) and local accounts
|
||||||
@@ -325,7 +325,7 @@ def _activate_local_account(credential_id: str, alias: str) -> None:
|
|||||||
|
|
||||||
|
|
||||||
def _configure_aden_node(
|
def _configure_aden_node(
|
||||||
runner: AgentRunner,
|
runner: AgentLoader,
|
||||||
provider: str,
|
provider: str,
|
||||||
alias: str,
|
alias: str,
|
||||||
detail: str,
|
detail: str,
|
||||||
@@ -368,7 +368,7 @@ or any other identifier — always use the alias exactly as shown.
|
|||||||
|
|
||||||
|
|
||||||
def _configure_local_node(
|
def _configure_local_node(
|
||||||
runner: AgentRunner,
|
runner: AgentLoader,
|
||||||
provider: str,
|
provider: str,
|
||||||
alias: str,
|
alias: str,
|
||||||
identity: dict,
|
identity: dict,
|
||||||
@@ -497,7 +497,7 @@ class CredentialTesterAgent:
|
|||||||
def __init__(self, config=None):
|
def __init__(self, config=None):
|
||||||
self.config = config or default_config
|
self.config = config or default_config
|
||||||
self._selected_account: dict | None = None
|
self._selected_account: dict | None = None
|
||||||
self._agent_runtime: AgentRuntime | None = None
|
self._agent_runtime: AgentHost | None = None
|
||||||
self._tool_registry: ToolRegistry | None = None
|
self._tool_registry: ToolRegistry | None = None
|
||||||
self._storage_path: Path | None = None
|
self._storage_path: Path | None = None
|
||||||
|
|
||||||
@@ -613,7 +613,7 @@ class CredentialTesterAgent:
|
|||||||
|
|
||||||
graph = self._build_graph()
|
graph = self._build_graph()
|
||||||
|
|
||||||
self._agent_runtime = create_agent_runtime(
|
self._agent_runtime = AgentHost(
|
||||||
graph=graph,
|
graph=graph,
|
||||||
goal=goal,
|
goal=goal,
|
||||||
storage_path=self._storage_path,
|
storage_path=self._storage_path,
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
"""Node definitions for Credential Tester agent."""
|
"""Node definitions for Credential Tester agent."""
|
||||||
|
|
||||||
from framework.graph import NodeSpec
|
from framework.orchestrator import NodeSpec
|
||||||
|
|
||||||
|
|
||||||
def build_tester_node(
|
def build_tester_node(
|
||||||
|
|||||||
@@ -27,8 +27,8 @@ def _get_last_active(agent_path: Path) -> str | None:
|
|||||||
"""Return the most recent updated_at timestamp across all sessions.
|
"""Return the most recent updated_at timestamp across all sessions.
|
||||||
|
|
||||||
Checks both worker sessions (``~/.hive/agents/{name}/sessions/``) and
|
Checks both worker sessions (``~/.hive/agents/{name}/sessions/``) and
|
||||||
queen sessions (``~/.hive/queen/session/``) whose ``meta.json`` references
|
queen sessions (``~/.hive/agents/queens/default/sessions/``) whose
|
||||||
the same *agent_path*.
|
``meta.json`` references the same *agent_path*.
|
||||||
"""
|
"""
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
@@ -53,7 +53,9 @@ def _get_last_active(agent_path: Path) -> str | None:
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
# 2. Queen sessions
|
# 2. Queen sessions
|
||||||
queen_sessions_dir = Path.home() / ".hive" / "queen" / "session"
|
from framework.config import QUEENS_DIR
|
||||||
|
|
||||||
|
queen_sessions_dir = QUEENS_DIR / "default" / "sessions"
|
||||||
if queen_sessions_dir.exists():
|
if queen_sessions_dir.exists():
|
||||||
resolved = agent_path.resolve()
|
resolved = agent_path.resolve()
|
||||||
for d in queen_sessions_dir.iterdir():
|
for d in queen_sessions_dir.iterdir():
|
||||||
@@ -112,13 +114,33 @@ def _count_runs(agent_name: str) -> int:
|
|||||||
def _extract_agent_stats(agent_path: Path) -> tuple[int, int, list[str]]:
|
def _extract_agent_stats(agent_path: Path) -> tuple[int, int, list[str]]:
|
||||||
"""Extract node count, tool count, and tags from an agent directory.
|
"""Extract node count, tool count, and tags from an agent directory.
|
||||||
|
|
||||||
Prefers agent.py (AST-parsed) over agent.json for node/tool counts
|
Checks agent.json (declarative) first, then agent.py (legacy).
|
||||||
since agent.json may be stale. Tags are only available from agent.json.
|
|
||||||
"""
|
"""
|
||||||
import ast
|
import ast
|
||||||
|
|
||||||
node_count, tool_count, tags = 0, 0, []
|
node_count, tool_count, tags = 0, 0, []
|
||||||
|
|
||||||
|
# Declarative JSON agents (preferred)
|
||||||
|
agent_json = agent_path / "agent.json"
|
||||||
|
if agent_json.exists():
|
||||||
|
try:
|
||||||
|
data = json.loads(agent_json.read_text(encoding="utf-8"))
|
||||||
|
if isinstance(data, dict):
|
||||||
|
json_nodes = data.get("nodes", [])
|
||||||
|
node_count = len(json_nodes)
|
||||||
|
tools: set[str] = set()
|
||||||
|
for n in json_nodes:
|
||||||
|
node_tools = n.get("tools", {})
|
||||||
|
if isinstance(node_tools, dict):
|
||||||
|
tools.update(node_tools.get("allowed", []))
|
||||||
|
elif isinstance(node_tools, list):
|
||||||
|
tools.update(node_tools)
|
||||||
|
tool_count = len(tools)
|
||||||
|
return node_count, tool_count, tags
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Legacy: agent.py (AST-parsed)
|
||||||
agent_py = agent_path / "agent.py"
|
agent_py = agent_path / "agent.py"
|
||||||
if agent_py.exists():
|
if agent_py.exists():
|
||||||
try:
|
try:
|
||||||
@@ -132,39 +154,31 @@ def _extract_agent_stats(agent_path: Path) -> tuple[int, int, list[str]]:
|
|||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
agent_json = agent_path / "agent.json"
|
|
||||||
if agent_json.exists():
|
|
||||||
try:
|
|
||||||
data = json.loads(agent_json.read_text(encoding="utf-8"))
|
|
||||||
json_nodes = data.get("graph", {}).get("nodes", []) or data.get("nodes", [])
|
|
||||||
if node_count == 0:
|
|
||||||
node_count = len(json_nodes)
|
|
||||||
tools: set[str] = set()
|
|
||||||
for n in json_nodes:
|
|
||||||
tools.update(n.get("tools", []))
|
|
||||||
tool_count = len(tools)
|
|
||||||
tags = data.get("agent", {}).get("tags", [])
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return node_count, tool_count, tags
|
return node_count, tool_count, tags
|
||||||
|
|
||||||
|
|
||||||
def discover_agents() -> dict[str, list[AgentEntry]]:
|
def discover_agents() -> dict[str, list[AgentEntry]]:
|
||||||
"""Discover agents from all known sources grouped by category."""
|
"""Discover agents from all known sources grouped by category."""
|
||||||
from framework.runner.cli import (
|
from framework.loader.cli import (
|
||||||
_extract_python_agent_metadata,
|
_extract_python_agent_metadata,
|
||||||
_get_framework_agents_dir,
|
_get_framework_agents_dir,
|
||||||
_is_valid_agent_dir,
|
_is_valid_agent_dir,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
from framework.config import COLONIES_DIR
|
||||||
|
|
||||||
groups: dict[str, list[AgentEntry]] = {}
|
groups: dict[str, list[AgentEntry]] = {}
|
||||||
sources = [
|
sources = [
|
||||||
("Your Agents", Path("exports")),
|
("Your Agents", COLONIES_DIR),
|
||||||
|
("Your Agents", Path("exports")), # compat fallback
|
||||||
("Framework", _get_framework_agents_dir()),
|
("Framework", _get_framework_agents_dir()),
|
||||||
("Examples", Path("examples/templates")),
|
("Examples", Path("examples/templates")),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# Track seen agent directory names to avoid duplicates when the same
|
||||||
|
# agent exists in both colonies/ and exports/ (colonies takes priority).
|
||||||
|
_seen_agent_names: set[str] = set()
|
||||||
|
|
||||||
for category, base_dir in sources:
|
for category, base_dir in sources:
|
||||||
if not base_dir.exists():
|
if not base_dir.exists():
|
||||||
continue
|
continue
|
||||||
@@ -172,6 +186,9 @@ def discover_agents() -> dict[str, list[AgentEntry]]:
|
|||||||
for path in sorted(base_dir.iterdir(), key=lambda p: p.name):
|
for path in sorted(base_dir.iterdir(), key=lambda p: p.name):
|
||||||
if not _is_valid_agent_dir(path):
|
if not _is_valid_agent_dir(path):
|
||||||
continue
|
continue
|
||||||
|
if path.name in _seen_agent_names:
|
||||||
|
continue
|
||||||
|
_seen_agent_names.add(path.name)
|
||||||
|
|
||||||
name, desc = _extract_python_agent_metadata(path)
|
name, desc = _extract_python_agent_metadata(path)
|
||||||
config_fallback_name = path.name.replace("_", " ").title()
|
config_fallback_name = path.name.replace("_", " ").title()
|
||||||
@@ -179,13 +196,19 @@ def discover_agents() -> dict[str, list[AgentEntry]]:
|
|||||||
|
|
||||||
node_count, tool_count, tags = _extract_agent_stats(path)
|
node_count, tool_count, tags = _extract_agent_stats(path)
|
||||||
if not used_config:
|
if not used_config:
|
||||||
agent_json = path / "agent.json"
|
# Try agent.json (declarative) for metadata
|
||||||
if agent_json.exists():
|
agent_json_path = path / "agent.json"
|
||||||
|
if agent_json_path.exists():
|
||||||
try:
|
try:
|
||||||
data = json.loads(agent_json.read_text(encoding="utf-8"))
|
data = json.loads(
|
||||||
meta = data.get("agent", {})
|
agent_json_path.read_text(encoding="utf-8"),
|
||||||
name = meta.get("name", name)
|
)
|
||||||
desc = meta.get("description", desc)
|
if isinstance(data, dict):
|
||||||
|
raw_name = data.get("name", name)
|
||||||
|
if "-" in raw_name and " " not in raw_name:
|
||||||
|
raw_name = raw_name.replace("-", " ").title()
|
||||||
|
name = raw_name
|
||||||
|
desc = data.get("description", desc)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@@ -204,6 +227,8 @@ def discover_agents() -> dict[str, list[AgentEntry]]:
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
if entries:
|
if entries:
|
||||||
groups[category] = entries
|
existing = groups.get(category, [])
|
||||||
|
existing.extend(entries)
|
||||||
|
groups[category] = existing
|
||||||
|
|
||||||
return groups
|
return groups
|
||||||
|
|||||||
@@ -1,19 +1,13 @@
|
|||||||
"""
|
"""Queen -- the agent builder for the Hive framework."""
|
||||||
Queen — Native agent builder for the Hive framework.
|
|
||||||
|
|
||||||
Deeply understands the agent framework and produces complete Python packages
|
from .agent import queen_goal, queen_loop_config
|
||||||
with goals, nodes, edges, system prompts, MCP configuration, and tests
|
|
||||||
from natural language specifications.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from .agent import queen_goal, queen_graph
|
|
||||||
from .config import AgentMetadata, RuntimeConfig, default_config, metadata
|
from .config import AgentMetadata, RuntimeConfig, default_config, metadata
|
||||||
|
|
||||||
__version__ = "1.0.0"
|
__version__ = "1.0.0"
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"queen_goal",
|
"queen_goal",
|
||||||
"queen_graph",
|
"queen_loop_config",
|
||||||
"RuntimeConfig",
|
"RuntimeConfig",
|
||||||
"AgentMetadata",
|
"AgentMetadata",
|
||||||
"default_config",
|
"default_config",
|
||||||
|
|||||||
@@ -1,38 +1,29 @@
|
|||||||
"""Queen graph definition."""
|
"""Queen agent definition.
|
||||||
|
|
||||||
from framework.graph import Goal
|
The queen is a single AgentLoop -- no graph, no orchestrator.
|
||||||
from framework.graph.edge import GraphSpec
|
Loaded by queen_orchestrator.create_queen().
|
||||||
|
"""
|
||||||
|
|
||||||
|
from framework.orchestrator.goal import Goal
|
||||||
|
|
||||||
from .nodes import queen_node
|
from .nodes import queen_node
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# Queen graph — the primary persistent conversation.
|
|
||||||
# Loaded by queen_orchestrator.create_queen(), NOT by AgentRunner.
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
queen_goal = Goal(
|
queen_goal = Goal(
|
||||||
id="queen-manager",
|
id="queen-manager",
|
||||||
name="Queen Manager",
|
name="Queen Manager",
|
||||||
description=(
|
description=(
|
||||||
"Manage the worker agent lifecycle and serve as the user's primary interactive interface."
|
"Manage the worker agent lifecycle and serve as the "
|
||||||
|
"user's primary interactive interface."
|
||||||
),
|
),
|
||||||
success_criteria=[],
|
success_criteria=[],
|
||||||
constraints=[],
|
constraints=[],
|
||||||
)
|
)
|
||||||
|
|
||||||
queen_graph = GraphSpec(
|
# Loop config -- used by queen_orchestrator to build LoopConfig
|
||||||
id="queen-graph",
|
queen_loop_config = {
|
||||||
goal_id=queen_goal.id,
|
"max_iterations": 999_999,
|
||||||
version="1.0.0",
|
"max_tool_calls_per_turn": 30,
|
||||||
entry_node="queen",
|
"max_context_tokens": 180_000,
|
||||||
entry_points={"start": "queen"},
|
}
|
||||||
terminal_nodes=[],
|
|
||||||
pause_nodes=[],
|
__all__ = ["queen_goal", "queen_loop_config", "queen_node"]
|
||||||
nodes=[queen_node],
|
|
||||||
edges=[],
|
|
||||||
conversation_mode="continuous",
|
|
||||||
loop_config={
|
|
||||||
"max_iterations": 999_999,
|
|
||||||
"max_tool_calls_per_turn": 30,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|||||||
@@ -0,0 +1,3 @@
|
|||||||
|
{
|
||||||
|
"include": ["gcu-tools"]
|
||||||
|
}
|
||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from framework.graph import NodeSpec
|
from framework.orchestrator import NodeSpec
|
||||||
|
|
||||||
# Load reference docs at import time so they're always in the system prompt.
|
# Load reference docs at import time so they're always in the system prompt.
|
||||||
# No voluntary read_file() calls needed — the LLM gets everything upfront.
|
# No voluntary read_file() calls needed — the LLM gets everything upfront.
|
||||||
@@ -37,7 +37,7 @@ _appendices = _build_appendices()
|
|||||||
|
|
||||||
# GCU guide — shared between planning and building via _shared_building_knowledge.
|
# GCU guide — shared between planning and building via _shared_building_knowledge.
|
||||||
_gcu_section = (
|
_gcu_section = (
|
||||||
("\n\n# GCU Nodes — Browser Automation\n\n" + _gcu_guide)
|
("\n\n# Browser Automation Nodes\n\n" + _gcu_guide)
|
||||||
if _is_gcu_enabled() and _gcu_guide
|
if _is_gcu_enabled() and _gcu_guide
|
||||||
else ""
|
else ""
|
||||||
)
|
)
|
||||||
@@ -81,7 +81,6 @@ _QUEEN_PLANNING_TOOLS = [
|
|||||||
"save_agent_draft",
|
"save_agent_draft",
|
||||||
"confirm_and_build",
|
"confirm_and_build",
|
||||||
# Scaffold + transition to building (requires confirm_and_build first)
|
# Scaffold + transition to building (requires confirm_and_build first)
|
||||||
"initialize_and_build_agent",
|
|
||||||
# Load existing agent (after user confirms)
|
# Load existing agent (after user confirms)
|
||||||
"load_built_agent",
|
"load_built_agent",
|
||||||
]
|
]
|
||||||
@@ -172,7 +171,7 @@ _shared_building_knowledge = (
|
|||||||
|
|
||||||
## Paths (MANDATORY)
|
## Paths (MANDATORY)
|
||||||
**Always use RELATIVE paths** \
|
**Always use RELATIVE paths** \
|
||||||
(e.g. `exports/agent_name/config.py`, `exports/agent_name/nodes/__init__.py`).
|
(e.g. `exports/agent_name/agent.json`).
|
||||||
**Never use absolute paths** like `/mnt/data/...` or `/workspace/...` — they fail.
|
**Never use absolute paths** like `/mnt/data/...` or `/workspace/...` — they fail.
|
||||||
The project root is implicit.
|
The project root is implicit.
|
||||||
|
|
||||||
@@ -182,14 +181,18 @@ When designing worker nodes or writing worker system prompts, reference these \
|
|||||||
tool names — NOT the coder-tools names (read_file, write_file, etc.).
|
tool names — NOT the coder-tools names (read_file, write_file, etc.).
|
||||||
|
|
||||||
Worker data tools (for large results and spillover):
|
Worker data tools (for large results and spillover):
|
||||||
- save_data(filename, data, data_dir) — save data to a file for later retrieval
|
Worker data tools (from files-tools MCP server):
|
||||||
- load_data(filename, data_dir, offset_bytes?, limit_bytes?) — load data \
|
- read_file(path) — read a file
|
||||||
with byte-based pagination
|
- write_file(path, content) — write/create a file
|
||||||
- list_data_files(data_dir) — list available data files
|
- list_files(path) — list directory contents
|
||||||
- append_data(filename, data, data_dir) — append to a file incrementally
|
- search_files(pattern, path) — regex search in files
|
||||||
- edit_data(filename, old_text, new_text, data_dir) — find-and-replace in a data file
|
|
||||||
- serve_file_to_user(filename, data_dir, label?, open_in_browser?) — \
|
Worker data tools (from hive-tools MCP server):
|
||||||
generate a clickable file URI for the user
|
- csv_read, csv_write, csv_append — CSV operations
|
||||||
|
- pdf_read — read PDF files
|
||||||
|
|
||||||
|
All tools are registered in the global MCP registry (~/.hive/mcp_registry/). \
|
||||||
|
Workers get tools from: hive-tools, gcu-tools, files-tools.
|
||||||
|
|
||||||
IMPORTANT: Do NOT tell workers to use read_file, write_file, edit_file, \
|
IMPORTANT: Do NOT tell workers to use read_file, write_file, edit_file, \
|
||||||
search_files, or list_directory — those are YOUR tools, not theirs.
|
search_files, or list_directory — those are YOUR tools, not theirs.
|
||||||
@@ -204,7 +207,7 @@ _planning_knowledge = """\
|
|||||||
# Core Mandates (Planning)
|
# Core Mandates (Planning)
|
||||||
- **DO NOT propose a complete goal on your own.** Instead, \
|
- **DO NOT propose a complete goal on your own.** Instead, \
|
||||||
collaborate with the user to define it.
|
collaborate with the user to define it.
|
||||||
- **NEVER call `initialize_and_build_agent` without explicit user approval.** \
|
- **NEVER call `confirm_and_build` without explicit user approval.** \
|
||||||
Present the full design first and wait for the user to confirm before building.
|
Present the full design first and wait for the user to confirm before building.
|
||||||
- **Discover tools dynamically.** NEVER reference tools from static \
|
- **Discover tools dynamically.** NEVER reference tools from static \
|
||||||
docs. Always run list_agent_tools() to see what actually exists.
|
docs. Always run list_agent_tools() to see what actually exists.
|
||||||
@@ -252,9 +255,9 @@ When the stakeholder describes what they want, mentally construct:
|
|||||||
|
|
||||||
**After the user responds, assess fit and gaps together.** Be honest and specific. \
|
**After the user responds, assess fit and gaps together.** Be honest and specific. \
|
||||||
Reference tools from list_agent_tools() AND built-in capabilities:
|
Reference tools from list_agent_tools() AND built-in capabilities:
|
||||||
- **GCU browser automation** (`node_type="gcu"`) provides full Playwright-based \
|
- **Browser automation provides full Playwright-based \
|
||||||
browser control (navigation, clicking, typing, scrolling, JS-rendered pages, \
|
browser control (navigation, clicking, typing, scrolling, JS-rendered pages, \
|
||||||
multi-tab). Do NOT list browser automation as missing — use GCU nodes.
|
multi-tab). Do NOT list browser automation as missing — use browser nodes with tools: {policy: "all"}.
|
||||||
|
|
||||||
Present a short **Framework Fit Assessment**:
|
Present a short **Framework Fit Assessment**:
|
||||||
- **Works well**: 2-4 strengths for this use case
|
- **Works well**: 2-4 strengths for this use case
|
||||||
@@ -306,14 +309,11 @@ explicitly on a node. Available types:
|
|||||||
- **io** (dusty purple, parallelogram): External data input/output
|
- **io** (dusty purple, parallelogram): External data input/output
|
||||||
- **document** (steel blue, wavy rect): Report or document generation
|
- **document** (steel blue, wavy rect): Report or document generation
|
||||||
- **database** (muted teal, cylinder): Database or data store
|
- **database** (muted teal, cylinder): Database or data store
|
||||||
- **subprocess** (dark cyan, subroutine): Delegated sub-agent / predefined process
|
- **browser** (deep blue, hexagon): Browser automation node (uses gcu-tools).
|
||||||
- **browser** (deep blue, hexagon): GCU browser automation / sub-agent \
|
|
||||||
delegation. At build time, browser nodes are dissolved into the parent \
|
|
||||||
node's sub_agents list. Use for any GCU or sub-agent leaf node.
|
|
||||||
|
|
||||||
Auto-detection works well for most cases: first node → start, nodes with \
|
Auto-detection works well for most cases: first node → start, nodes with \
|
||||||
no outgoing edges → terminal, nodes with multiple conditional outgoing \
|
no outgoing edges → terminal, nodes with multiple conditional outgoing \
|
||||||
edges → decision, GCU nodes → browser, nodes mentioning "database" → \
|
edges → decision, browser tool nodes → browser, nodes mentioning "database" → \
|
||||||
database, nodes mentioning "report/document" → document, I/O tools like \
|
database, nodes mentioning "report/document" → document, I/O tools like \
|
||||||
send_email → io. Everything else defaults to process. Set flowchart_type \
|
send_email → io. Everything else defaults to process. Set flowchart_type \
|
||||||
explicitly only when auto-detection would be wrong.
|
explicitly only when auto-detection would be wrong.
|
||||||
@@ -354,48 +354,19 @@ gather → [Valid data?] →Yes→ transform → deliver
|
|||||||
In the draft: the `[Valid data?]` node has `flowchart_type: "decision"`, \
|
In the draft: the `[Valid data?]` node has `flowchart_type: "decision"`, \
|
||||||
`decision_clause: "Data passes validation checks?"`, with labeled yes/no edges.
|
`decision_clause: "Data passes validation checks?"`, with labeled yes/no edges.
|
||||||
|
|
||||||
## Sub-Agent Nodes — Planning-Only Delegation
|
## Browser Automation Nodes
|
||||||
|
|
||||||
Sub-agent nodes (dark teal subroutines) are **planning-only** visual elements \
|
Browser nodes are regular `event_loop` nodes with browser tools \
|
||||||
that show which nodes delegate to sub-agents. At `confirm_and_build()`, \
|
(from the gcu-tools MCP server) in their tool list. They are wired \
|
||||||
sub-agent nodes are **dissolved** into their parent node:
|
into the graph with edges like any other node:
|
||||||
|
|
||||||
- The sub-agent node's ID is added to the predecessor's `sub_agents` list
|
|
||||||
- The sub-agent node and its connecting edge are removed
|
|
||||||
- At runtime, the parent node can invoke the sub-agent via `delegate_to_sub_agent`
|
|
||||||
|
|
||||||
**Rules for sub-agent nodes (INCLUDING GCU nodes):**
|
|
||||||
- GCU nodes are auto-detected as `flowchart_type: "browser"` (hexagon)
|
|
||||||
- Connect from the managing parent node to the sub-agent node
|
|
||||||
- Sub-agent nodes must be **leaf nodes** — NO outgoing edges to other nodes
|
|
||||||
- At build time, browser/GCU nodes are dissolved into the parent's \
|
|
||||||
`sub_agents` list, just like decision nodes are dissolved into criteria
|
|
||||||
|
|
||||||
**CRITICAL: GCU nodes (`node_type: "gcu"`) are ALWAYS sub-agents.** \
|
|
||||||
They MUST NOT appear in the linear flow. NEVER chain GCU nodes \
|
|
||||||
sequentially (A → gcu1 → gcu2 → B is WRONG). Instead, attach them \
|
|
||||||
as leaves to the parent that orchestrates them:
|
|
||||||
```
|
```
|
||||||
WRONG: intake → gcu_find_prospect → gcu_scan_mutuals → check_results
|
research → browser_scan → analyze_results
|
||||||
WRONG: decision_node → gcu_node (as a yes/no branch)
|
|
||||||
RIGHT: intake (sub_agents: [gcu_find, gcu_scan]) → check_results
|
|
||||||
```
|
```
|
||||||
The parent node delegates to its GCU sub-agents and collects results. \
|
Use `tools: {policy: "all"}` to give browser nodes access to all \
|
||||||
The main flow continues from the parent, not from the GCU node. \
|
browser tools, or list specific ones with `policy: "explicit"`.
|
||||||
GCU nodes MUST NOT be children of decision nodes — decision nodes \
|
|
||||||
dissolve at build time, which would leave the GCU as a dangling \
|
|
||||||
workflow step.
|
|
||||||
|
|
||||||
**How to show delegation in the flowchart:**
|
If the worker agent starts from some initial input it is okay. \
|
||||||
```
|
The queen(you) owns intake: you gather user requirements, then call \
|
||||||
research → (deep_searcher) ← browser/GCU node, leaf
|
|
||||||
research → [Enough results?] ← decision node
|
|
||||||
```
|
|
||||||
After dissolution: `research` node gets `sub_agents: ["deep_searcher"]` \
|
|
||||||
and `success_criteria: "Enough results?"`.
|
|
||||||
|
|
||||||
If the worker agent start from some initial input it is okay. \
|
|
||||||
The queen(you) owns intake: you gathers user requirements, then calls \
|
|
||||||
`run_agent_with_input(task)` with a structured task description. \
|
`run_agent_with_input(task)` with a structured task description. \
|
||||||
When building the agent, design the entry node's `input_keys` to \
|
When building the agent, design the entry node's `input_keys` to \
|
||||||
match what the queen will provide at run time. Worker nodes should \
|
match what the queen will provide at run time. Worker nodes should \
|
||||||
@@ -411,14 +382,14 @@ You MUST get explicit user approval before ANY code is generated.
|
|||||||
2. **WAIT for user response.** Do NOT proceed without it.
|
2. **WAIT for user response.** Do NOT proceed without it.
|
||||||
3. Handle the response:
|
3. Handle the response:
|
||||||
- If **Approve / Proceed**: Call confirm_and_build(), then \
|
- If **Approve / Proceed**: Call confirm_and_build(), then \
|
||||||
initialize_and_build_agent(agent_name, nodes)
|
confirm_and_build(agent_name)
|
||||||
- If **Adjust scope**: Discuss changes, update the draft with \
|
- If **Adjust scope**: Discuss changes, update the draft with \
|
||||||
save_agent_draft() again, and re-ask
|
save_agent_draft() again, and re-ask
|
||||||
- If **More questions**: Answer them honestly, then ask again
|
- If **More questions**: Answer them honestly, then ask again
|
||||||
- If **Reconsider**: Discuss alternatives. If they decide to proceed, \
|
- If **Reconsider**: Discuss alternatives. If they decide to proceed, \
|
||||||
that's their informed choice
|
that's their informed choice
|
||||||
|
|
||||||
**NEVER call initialize_and_build_agent without first calling \
|
**NEVER call confirm_and_build without first calling \
|
||||||
confirm_and_build().** The system will block the transition if you try.
|
confirm_and_build().** The system will block the transition if you try.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@@ -477,53 +448,75 @@ When a user says "my agent is failing" or "debug this agent":
|
|||||||
## 5. Implement
|
## 5. Implement
|
||||||
|
|
||||||
**You should only reach this step after the user has approved the draft design \
|
**You should only reach this step after the user has approved the draft design \
|
||||||
in the planning phase. The draft metadata will pre-populate descriptions, \
|
and you have called `confirm_and_build(agent_name="my_agent")`.**
|
||||||
goals, success criteria, and node metadata in the generated files.**
|
|
||||||
|
|
||||||
Call `initialize_and_build_agent(agent_name, nodes)` to generate all package \
|
`confirm_and_build` created the agent directory (returned in agent_path). \
|
||||||
files. The agent_name must be snake_case (e.g., "my_agent"). Pass node names \
|
Now write the complete agent config directly:
|
||||||
as comma-separated string (e.g., "gather,process,review").
|
|
||||||
The tool creates: config.py, nodes/__init__.py, agent.py, \
|
|
||||||
__init__.py, __main__.py, mcp_servers.json, tests/conftest.py.
|
|
||||||
|
|
||||||
The generated files are **structurally complete** with correct imports, \
|
```
|
||||||
class definition, `validate()` method, `default_agent` export, and \
|
write_file("<colony_path>/agent.json", <complete JSON config>)
|
||||||
`__init__.py` re-exports. They pass validation as-is.
|
```
|
||||||
|
|
||||||
`mcp_servers.json` is auto-generated with hive-tools as the default. \
|
The agent.json must include ALL of these in one write:
|
||||||
Do NOT manually create or overwrite `mcp_servers.json`.
|
- `name`, `version`, `description`
|
||||||
|
- `goal` with `description`, `success_criteria`, `constraints`
|
||||||
|
- `identity_prompt` (agent-level behavior)
|
||||||
|
- `nodes` — each with `id`, `description`, `system_prompt`, `tools`, \
|
||||||
|
`input_keys`, `output_keys`, `success_criteria`
|
||||||
|
- `edges` — connecting all nodes with proper conditions
|
||||||
|
- `entry_node`, `terminal_nodes`
|
||||||
|
- `mcp_servers` — REQUIRED. Always include all three: \
|
||||||
|
`[{"name": "hive-tools"}, {"name": "gcu-tools"}, {"name": "files-tools"}]`
|
||||||
|
- `loop_config` — `max_iterations`, `max_context_tokens`
|
||||||
|
|
||||||
### Customizing generated files
|
**Write the COMPLETE config in one `write_file` call. No TODOs, no placeholders.** \
|
||||||
|
The queen writes final production-ready system prompts directly.
|
||||||
|
|
||||||
**CRITICAL: Use `edit_file` to customize TODO placeholders. \
|
**There are NO Python files.** The framework loads agent.json directly.
|
||||||
NEVER use `write_file` to rewrite generated files from scratch. \
|
|
||||||
Rewriting breaks imports, class structure, and causes validation failures.**
|
|
||||||
|
|
||||||
Safe to edit with `edit_file`:
|
MCP servers are loaded from the global registry by name. Available servers:
|
||||||
- System prompts, tools, input_keys, output_keys, success_criteria in \
|
- `hive-tools` — web search, email, CRM, calendar, 100+ integrations
|
||||||
nodes/__init__.py
|
- `gcu-tools` — browser automation (click, type, navigate, screenshot)
|
||||||
- Goal description, success criteria values, constraint values, edge \
|
- `files-tools` — file I/O (read, write, edit, search, list)
|
||||||
definitions, identity_prompt in agent.py
|
|
||||||
- CLI options in __main__.py
|
|
||||||
- For triggers (timers/webhooks), add entries to triggers.json in the \
|
|
||||||
agent's export directory
|
|
||||||
|
|
||||||
Do NOT modify or rewrite:
|
**Template variables:** Add a `variables:` section at the top of agent.json \
|
||||||
- Import statements at top of agent.py (they are correct)
|
and use `{{variable_name}}` in system prompts for config injection:
|
||||||
- The agent class definition, `validate()`, `_build_graph()`, `_setup()`, \
|
```yaml
|
||||||
or lifecycle methods (start/stop/run)
|
variables:
|
||||||
- `__init__.py` exports (all required variables are already re-exported)
|
spreadsheet_id: "1ZVx..."
|
||||||
- `default_agent = ClassName()` at bottom of agent.py
|
nodes:
|
||||||
|
- id: start
|
||||||
|
system_prompt: |
|
||||||
|
Use spreadsheet: {{spreadsheet_id}}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Tool access in nodes
|
||||||
|
|
||||||
|
Each node declares its tool access policy:
|
||||||
|
```yaml
|
||||||
|
# Explicit list (recommended)
|
||||||
|
tools:
|
||||||
|
policy: explicit
|
||||||
|
allowed: [web_search, write_file]
|
||||||
|
|
||||||
|
# All tools (for browser automation nodes)
|
||||||
|
tools:
|
||||||
|
policy: all
|
||||||
|
|
||||||
|
# No tools (for handoff/summary nodes)
|
||||||
|
tools:
|
||||||
|
policy: none
|
||||||
|
```
|
||||||
|
|
||||||
## 6. Verify and Load
|
## 6. Verify and Load
|
||||||
|
|
||||||
Call `validate_agent_package("{name}")` after initialization. \
|
Call `validate_agent_package("{name}")` after initialization. \
|
||||||
It runs structural checks (class validation, graph validation, tool \
|
It runs structural checks (class validation, graph validation, tool \
|
||||||
validation, tests) and returns a consolidated result. If anything \
|
validation, tests) and returns a consolidated result. If anything \
|
||||||
fails: read the error, fix with edit_file, re-validate. Up to 3x.
|
fails: read the error, fix with read_file+write_file, re-validate. Up to 3x.
|
||||||
|
|
||||||
When validation passes, immediately call \
|
When validation passes, immediately call \
|
||||||
`load_built_agent("exports/{name}")` to load the agent into the \
|
`load_built_agent("<agent_path>")` to load the agent into the \
|
||||||
session. This switches to STAGING phase and shows the graph in the \
|
session. This switches to STAGING phase and shows the graph in the \
|
||||||
visualizer. Do NOT wait for user input between validation and loading.
|
visualizer. Do NOT wait for user input between validation and loading.
|
||||||
"""
|
"""
|
||||||
@@ -625,13 +618,11 @@ document, database, subprocess, etc.) with unique shapes and colors. Set \
|
|||||||
flowchart_type on a node to override. Nodes need only an id. \
|
flowchart_type on a node to override. Nodes need only an id. \
|
||||||
Use decision nodes (flowchart_type: "decision", with decision_clause and \
|
Use decision nodes (flowchart_type: "decision", with decision_clause and \
|
||||||
labeled yes/no edges) to make conditional branching explicit. \
|
labeled yes/no edges) to make conditional branching explicit. \
|
||||||
GCU/sub-agent nodes (node_type: "gcu") are auto-detected as browser \
|
|
||||||
hexagons — connect them as leaf nodes to their parent.
|
hexagons — connect them as leaf nodes to their parent.
|
||||||
- confirm_and_build() — Record user confirmation of the draft. Dissolves \
|
- confirm_and_build() — Record user confirmation of the draft. Dissolves \
|
||||||
planning-only nodes (decision → predecessor criteria; browser/GCU → \
|
planning-only nodes (decision → predecessor criteria; browser/GCU → \
|
||||||
predecessor sub_agents list). Call this ONLY after the user explicitly \
|
|
||||||
approves via ask_user.
|
approves via ask_user.
|
||||||
- initialize_and_build_agent(agent_name?, nodes?) — Scaffold the agent package \
|
- confirm_and_build(agent_name) — Scaffold the agent package \
|
||||||
and transition to BUILDING phase. For new agents, this REQUIRES \
|
and transition to BUILDING phase. For new agents, this REQUIRES \
|
||||||
save_agent_draft() + confirm_and_build() first. The draft metadata is used to \
|
save_agent_draft() + confirm_and_build() first. The draft metadata is used to \
|
||||||
pre-populate the generated files. Without agent_name: transition to BUILDING \
|
pre-populate the generated files. Without agent_name: transition to BUILDING \
|
||||||
@@ -647,8 +638,8 @@ phase. Only use this when the user explicitly asks to work with an existing agen
|
|||||||
2. Call save_agent_draft() to create visual draft → present to user
|
2. Call save_agent_draft() to create visual draft → present to user
|
||||||
3. Call ask_user() to get explicit approval
|
3. Call ask_user() to get explicit approval
|
||||||
4. Call confirm_and_build() to record approval
|
4. Call confirm_and_build() to record approval
|
||||||
5. Call initialize_and_build_agent() to scaffold and start building
|
5. Call confirm_and_build() to scaffold and start building
|
||||||
For diagnosis of existing agents, call initialize_and_build_agent() \
|
For diagnosis of existing agents, call confirm_and_build() \
|
||||||
(no args) after agreeing on a fix plan with the user.
|
(no args) after agreeing on a fix plan with the user.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@@ -884,7 +875,7 @@ that changes the structure, call save_agent_draft() again so they see the \
|
|||||||
update in real-time. The flowchart is a live collaboration tool.
|
update in real-time. The flowchart is a live collaboration tool.
|
||||||
8. When the design is stable, use ask_user to get explicit approval
|
8. When the design is stable, use ask_user to get explicit approval
|
||||||
9. Call confirm_and_build() after the user approves
|
9. Call confirm_and_build() after the user approves
|
||||||
10. Call initialize_and_build_agent(agent_name, nodes) to scaffold and start building
|
10. Call confirm_and_build(agent_name) to scaffold and start building
|
||||||
|
|
||||||
**The flowchart is your shared whiteboard.** Don't describe changes in text \
|
**The flowchart is your shared whiteboard.** Don't describe changes in text \
|
||||||
and then ask "should I update the draft?" — just update it. If the user says \
|
and then ask "should I update the draft?" — just update it. If the user says \
|
||||||
@@ -895,7 +886,7 @@ see every structural change reflected in the visualizer as you discuss it.
|
|||||||
**CRITICAL: Planning → Building boundary.** You MUST get explicit user \
|
**CRITICAL: Planning → Building boundary.** You MUST get explicit user \
|
||||||
confirmation before moving to building. The sequence is:
|
confirmation before moving to building. The sequence is:
|
||||||
save_agent_draft() → iterate with user → ask_user() → confirm_and_build() → \
|
save_agent_draft() → iterate with user → ask_user() → confirm_and_build() → \
|
||||||
initialize_and_build_agent()
|
confirm_and_build()
|
||||||
Skipping any of these steps will be blocked by the system.
|
Skipping any of these steps will be blocked by the system.
|
||||||
|
|
||||||
Remember: DO NOT write or edit any files yet. This is a read-only exploration \
|
Remember: DO NOT write or edit any files yet. This is a read-only exploration \
|
||||||
@@ -911,7 +902,7 @@ your priority is diagnosis, not new design:
|
|||||||
2. Summarize the root cause to the user
|
2. Summarize the root cause to the user
|
||||||
3. Propose a fix plan (what to change, what behavior to adjust)
|
3. Propose a fix plan (what to change, what behavior to adjust)
|
||||||
4. Get user approval via ask_user
|
4. Get user approval via ask_user
|
||||||
5. Call initialize_and_build_agent() (no args) to transition to building and implement the fix
|
5. Call confirm_and_build() (no args) to transition to building and implement the fix
|
||||||
|
|
||||||
Do NOT start the full discovery workflow (tool discovery, gap analysis) in \
|
Do NOT start the full discovery workflow (tool discovery, gap analysis) in \
|
||||||
diagnosis mode — you already have a built agent, you just need to fix it.
|
diagnosis mode — you already have a built agent, you just need to fix it.
|
||||||
@@ -947,7 +938,7 @@ delegate agent construction to the worker, even as a "research" subtask.
|
|||||||
## Keeping the flowchart in sync during building
|
## Keeping the flowchart in sync during building
|
||||||
|
|
||||||
When you make structural changes to the agent (add/remove/rename nodes, \
|
When you make structural changes to the agent (add/remove/rename nodes, \
|
||||||
change edges, modify sub-agent assignments), call save_agent_draft() to \
|
change edges, modify node connections), call save_agent_draft() to \
|
||||||
update the flowchart. During building, this auto-dissolves planning-only \
|
update the flowchart. During building, this auto-dissolves planning-only \
|
||||||
nodes without needing user re-confirmation. The user sees the updated \
|
nodes without needing user re-confirmation. The user sees the updated \
|
||||||
flowchart immediately.
|
flowchart immediately.
|
||||||
@@ -966,15 +957,15 @@ user says "replan", "go back", "let's redesign", "change the approach", \
|
|||||||
|
|
||||||
## CRITICAL — Graph topology errors require replanning, not code edits
|
## CRITICAL — Graph topology errors require replanning, not code edits
|
||||||
|
|
||||||
If you discover that the agent graph has structural problems — GCU nodes \
|
If you discover that the agent graph has structural problems — browser nodes \
|
||||||
in the linear flow, missing edges, wrong node connections, incorrect \
|
in the linear flow, missing edges, wrong node connections, incorrect \
|
||||||
sub-agent assignments — you MUST call replan_agent() and fix the draft. \
|
node connections — you MUST call replan_agent() and fix the draft. \
|
||||||
Do NOT attempt to fix topology by editing agent.py directly. The graph \
|
Do NOT attempt to fix topology by editing agent.json directly. The graph \
|
||||||
structure is defined by the draft → dissolution → code-gen pipeline. \
|
structure is defined by the draft → dissolution → code-gen pipeline. \
|
||||||
Editing code to rewire nodes bypasses the flowchart and creates drift \
|
Editing the config to rewire nodes bypasses the flowchart and creates drift \
|
||||||
between what the user sees and what the code does.
|
between what the user sees and what the config does.
|
||||||
|
|
||||||
**WRONG:** "Let me fix agent.py to remove GCU nodes from edges..."
|
**WRONG:** "Let me fix agent.json to remove browser nodes from edges..."
|
||||||
**RIGHT:** Call replan_agent(), fix the draft with save_agent_draft(), \
|
**RIGHT:** Call replan_agent(), fix the draft with save_agent_draft(), \
|
||||||
get user approval, then confirm_and_build() → the corrected code is \
|
get user approval, then confirm_and_build() → the corrected code is \
|
||||||
generated automatically.
|
generated automatically.
|
||||||
@@ -1100,18 +1091,15 @@ You wake up when:
|
|||||||
If the user asks for progress, call get_graph_status() ONCE and report. \
|
If the user asks for progress, call get_graph_status() ONCE and report. \
|
||||||
If the summary mentions issues, follow up with get_graph_status(focus="issues").
|
If the summary mentions issues, follow up with get_graph_status(focus="issues").
|
||||||
|
|
||||||
## Subagent delegations (browser automation, GCU)
|
## Browser automation nodes
|
||||||
|
|
||||||
When the worker delegates to a subagent (e.g., GCU browser automation), expect it \
|
Browser nodes may take 2-5 minutes for web scraping tasks. During this time:
|
||||||
to take 2-5 minutes. During this time:
|
- Progress will show 0% until the node calls set_output at the end.
|
||||||
- Progress will show 0% — this is NORMAL. The subagent only calls set_output at the end.
|
- Check get_graph_status(focus="full") for activity updates.
|
||||||
- Check get_graph_status(focus="full") for "subagent_activity" — this shows the \
|
- Do NOT conclude it is stuck just because you see repeated \
|
||||||
subagent's latest reasoning text and confirms it is making real progress.
|
browser_click/browser_snapshot calls — that is expected for web scraping.
|
||||||
- Do NOT conclude the subagent is stuck just because progress is 0% or because \
|
- Only intervene if: the node has been running for 5+ minutes with no new \
|
||||||
you see repeated browser_click/browser_snapshot calls — that is the expected \
|
activity updates, OR the judge escalates.
|
||||||
pattern for web scraping.
|
|
||||||
- Only intervene if: the subagent has been running for 5+ minutes with no new \
|
|
||||||
subagent_activity updates, OR the judge escalates.
|
|
||||||
|
|
||||||
## Handling worker termination ([WORKER_TERMINAL])
|
## Handling worker termination ([WORKER_TERMINAL])
|
||||||
|
|
||||||
@@ -1143,11 +1131,11 @@ escalations. If the user gave you instructions (e.g., "just retry on errors", \
|
|||||||
|
|
||||||
CRITICAL — escalation relay protocol:
|
CRITICAL — escalation relay protocol:
|
||||||
When an escalation requires user input (auth blocks, human review), the worker \
|
When an escalation requires user input (auth blocks, human review), the worker \
|
||||||
or its subagent is BLOCKED and waiting for your response. You MUST follow this \
|
or is BLOCKED and waiting for your response. You MUST follow this \
|
||||||
exact two-step sequence:
|
exact two-step sequence:
|
||||||
Step 1: call ask_user() to get the user's answer.
|
Step 1: call ask_user() to get the user's answer.
|
||||||
Step 2: call inject_message() with the user's answer IMMEDIATELY after.
|
Step 2: call inject_message() with the user's answer IMMEDIATELY after.
|
||||||
If you skip Step 2, the worker/subagent stays blocked FOREVER and the task hangs. \
|
If you skip Step 2, the worker stays blocked FOREVER and the task hangs. \
|
||||||
NEVER respond to the user without also calling inject_message() to unblock \
|
NEVER respond to the user without also calling inject_message() to unblock \
|
||||||
the worker. Even if the user says "skip" or "cancel", you must still relay that \
|
the worker. Even if the user says "skip" or "cancel", you must still relay that \
|
||||||
decision via inject_message() so the worker can clean up.
|
decision via inject_message() so the worker can clean up.
|
||||||
@@ -1233,7 +1221,7 @@ _queen_tools_docs = (
|
|||||||
+ "\n\n### Phase transitions\n"
|
+ "\n\n### Phase transitions\n"
|
||||||
"- save_agent_draft(...) → creates visual-only draft graph (stays in PLANNING)\n"
|
"- save_agent_draft(...) → creates visual-only draft graph (stays in PLANNING)\n"
|
||||||
"- confirm_and_build() → records user approval of draft (stays in PLANNING)\n"
|
"- confirm_and_build() → records user approval of draft (stays in PLANNING)\n"
|
||||||
"- initialize_and_build_agent(agent_name?, nodes?) → scaffolds package + switches to "
|
"- confirm_and_build(agent_name) → scaffolds package + switches to "
|
||||||
"BUILDING (requires draft + confirmation for new agents)\n"
|
"BUILDING (requires draft + confirmation for new agents)\n"
|
||||||
"- replan_agent() → switches back to PLANNING phase (only when user explicitly requests)\n"
|
"- replan_agent() → switches back to PLANNING phase (only when user explicitly requests)\n"
|
||||||
"- load_built_agent(path) → switches to STAGING phase\n"
|
"- load_built_agent(path) → switches to STAGING phase\n"
|
||||||
|
|||||||
@@ -1,9 +1,15 @@
|
|||||||
"""Queen global memory helpers.
|
"""Queen global memory helpers.
|
||||||
|
|
||||||
Global memory lives in ``~/.hive/queen/global_memory/`` and stores durable
|
Memory hierarchy::
|
||||||
cross-session knowledge about the user (profile, preferences, environment,
|
|
||||||
feedback). Each memory is an individual ``.md`` file with optional YAML
|
~/.hive/memories/
|
||||||
frontmatter (name, type, description).
|
global/ # shared across all queens and colonies
|
||||||
|
colonies/{name}/ # colony-scoped memories
|
||||||
|
agents/queens/{name}/ # queen-specific memories
|
||||||
|
agents/{name}/ # per-worker-agent memories
|
||||||
|
|
||||||
|
Each memory is an individual ``.md`` file with optional YAML frontmatter
|
||||||
|
(name, type, description).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
@@ -21,7 +27,7 @@ logger = logging.getLogger(__name__)
|
|||||||
|
|
||||||
GLOBAL_MEMORY_CATEGORIES: tuple[str, ...] = ("profile", "preference", "environment", "feedback")
|
GLOBAL_MEMORY_CATEGORIES: tuple[str, ...] = ("profile", "preference", "environment", "feedback")
|
||||||
|
|
||||||
_HIVE_QUEEN_DIR = Path.home() / ".hive" / "queen"
|
from framework.config import MEMORIES_DIR
|
||||||
|
|
||||||
MAX_FILES: int = 200
|
MAX_FILES: int = 200
|
||||||
MAX_FILE_SIZE_BYTES: int = 4096 # 4 KB hard limit per memory file
|
MAX_FILE_SIZE_BYTES: int = 4096 # 4 KB hard limit per memory file
|
||||||
@@ -31,8 +37,23 @@ _HEADER_LINE_LIMIT: int = 30
|
|||||||
|
|
||||||
|
|
||||||
def global_memory_dir() -> Path:
|
def global_memory_dir() -> Path:
|
||||||
"""Return the queen-global memory directory."""
|
"""Return the global memory directory (shared across all queens/colonies)."""
|
||||||
return _HIVE_QUEEN_DIR / "global_memory"
|
return MEMORIES_DIR / "global"
|
||||||
|
|
||||||
|
|
||||||
|
def colony_memory_dir(colony_name: str) -> Path:
|
||||||
|
"""Return the memory directory for a named colony."""
|
||||||
|
return MEMORIES_DIR / "colonies" / colony_name
|
||||||
|
|
||||||
|
|
||||||
|
def queen_memory_dir(queen_name: str = "default") -> Path:
|
||||||
|
"""Return the memory directory for a named queen."""
|
||||||
|
return MEMORIES_DIR / "agents" / "queens" / queen_name
|
||||||
|
|
||||||
|
|
||||||
|
def agent_memory_dir(agent_name: str) -> Path:
|
||||||
|
"""Return the memory directory for a worker agent."""
|
||||||
|
return MEMORIES_DIR / "agents" / agent_name
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|||||||
@@ -91,7 +91,19 @@ async def select_memories(
|
|||||||
resp.stop_reason,
|
resp.stop_reason,
|
||||||
)
|
)
|
||||||
return []
|
return []
|
||||||
data = json.loads(raw)
|
# Some models wrap JSON in markdown fences or add preamble text.
|
||||||
|
# Try to extract the JSON object if raw parse fails.
|
||||||
|
try:
|
||||||
|
data = json.loads(raw)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
import re
|
||||||
|
|
||||||
|
m = re.search(r"\{.*\}", raw, re.DOTALL)
|
||||||
|
if m:
|
||||||
|
data = json.loads(m.group())
|
||||||
|
else:
|
||||||
|
logger.warning("recall: LLM returned non-JSON: %.200s", raw)
|
||||||
|
return []
|
||||||
selected = data.get("selected_memories", [])
|
selected = data.get("selected_memories", [])
|
||||||
valid_names = {f.filename for f in files}
|
valid_names = {f.filename for f in files}
|
||||||
result = [s for s in selected if s in valid_names][:max_results]
|
result = [s for s in selected if s in valid_names][:max_results]
|
||||||
|
|||||||
@@ -25,10 +25,7 @@
|
|||||||
14. **Forgetting sys.path setup in conftest.py** — Tests need `exports/` and `core/` on sys.path.
|
14. **Forgetting sys.path setup in conftest.py** — Tests need `exports/` and `core/` on sys.path.
|
||||||
|
|
||||||
## GCU Errors
|
## GCU Errors
|
||||||
15. **Manually wiring browser tools on event_loop nodes** — Use `node_type="gcu"` which auto-includes browser tools. Do NOT manually list browser tool names.
|
15. **Manually wiring browser tools on event_loop nodes** — Browser nodes use tools: {policy: "all"} to get all browser tools.
|
||||||
16. **Using GCU nodes as regular graph nodes** — GCU nodes are subagents only. They must ONLY appear in `sub_agents=["gcu-node-id"]` and be invoked via `delegate_to_sub_agent()`. Never connect via edges or use as entry/terminal nodes.
|
|
||||||
17. **Reusing the same GCU node ID for parallel tasks** — Each concurrent browser task needs a distinct GCU node ID (e.g. `gcu-site-a`, `gcu-site-b`). Two `delegate_to_sub_agent` calls with the same `agent_id` share a browser profile and will interfere with each other's pages.
|
|
||||||
18. **Passing `profile=` in GCU tool calls** — Profile isolation for parallel subagents is automatic. The framework injects a unique profile per subagent via an asyncio `ContextVar`. Hardcoding `profile="default"` in a GCU system prompt breaks this isolation.
|
|
||||||
|
|
||||||
## Worker Agent Errors
|
## Worker Agent Errors
|
||||||
19. **Adding client-facing intake node to workers** — The queen owns intake. Workers should start with an autonomous processing node. Route worker review/approval through queen escalation instead of direct worker HITL.
|
19. **Adding client-facing intake node to workers** — The queen owns intake. Workers should start with an autonomous processing node. Route worker review/approval through queen escalation instead of direct worker HITL.
|
||||||
|
|||||||
@@ -0,0 +1,227 @@
|
|||||||
|
# Declarative Agent File Templates
|
||||||
|
|
||||||
|
Agents are defined as a single `agent.yaml` file. No Python code needed.
|
||||||
|
The runner loads this file directly -- no `agent.py`, `config.py`, or
|
||||||
|
`nodes/__init__.py` required.
|
||||||
|
|
||||||
|
## agent.yaml -- Complete Agent Definition
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
name: my-agent
|
||||||
|
version: 1.0.0
|
||||||
|
description: What this agent does.
|
||||||
|
|
||||||
|
metadata:
|
||||||
|
intro_message: Welcome! What would you like me to do?
|
||||||
|
|
||||||
|
# Template variables -- substituted into system_prompt and identity_prompt
|
||||||
|
# via {{variable_name}} syntax. Use this for config values that appear
|
||||||
|
# in prompts (spreadsheet IDs, API endpoints, account names, etc.)
|
||||||
|
variables:
|
||||||
|
spreadsheet_id: "1ZVxWDL..."
|
||||||
|
sheet_name: "contacts"
|
||||||
|
|
||||||
|
goal:
|
||||||
|
description: What this agent achieves.
|
||||||
|
success_criteria:
|
||||||
|
- "First success criterion"
|
||||||
|
- "Second success criterion"
|
||||||
|
constraints:
|
||||||
|
- "Hard constraint the agent must respect"
|
||||||
|
|
||||||
|
identity_prompt: |
|
||||||
|
You are a helpful agent.
|
||||||
|
|
||||||
|
conversation_mode: continuous # always "continuous" for Hive agents
|
||||||
|
|
||||||
|
loop_config:
|
||||||
|
max_iterations: 100
|
||||||
|
max_tool_calls_per_turn: 30
|
||||||
|
max_context_tokens: 32000
|
||||||
|
|
||||||
|
# MCP servers to connect (resolved by name from ~/.hive/mcp_registry/)
|
||||||
|
mcp_servers:
|
||||||
|
- name: hive-tools
|
||||||
|
- name: gcu-tools
|
||||||
|
|
||||||
|
nodes:
|
||||||
|
# Node 1: Process (autonomous entry node)
|
||||||
|
# The queen handles intake and passes structured input via
|
||||||
|
# run_agent_with_input(task). NO client-facing intake node.
|
||||||
|
- id: process
|
||||||
|
name: Process
|
||||||
|
description: Execute the task using available tools
|
||||||
|
max_node_visits: 0 # 0 = unlimited (forever-alive agents)
|
||||||
|
input_keys: [user_request, feedback]
|
||||||
|
output_keys: [results]
|
||||||
|
nullable_output_keys: [feedback]
|
||||||
|
tools:
|
||||||
|
policy: explicit
|
||||||
|
allowed: [web_search, web_scrape, save_data, load_data, list_data_files]
|
||||||
|
success_criteria: Results are complete and accurate.
|
||||||
|
system_prompt: |
|
||||||
|
You are a processing agent. Your task is in memory under "user_request".
|
||||||
|
If "feedback" is present, this is a revision.
|
||||||
|
|
||||||
|
Work in phases:
|
||||||
|
1. Use tools to gather/process data
|
||||||
|
2. Analyze results
|
||||||
|
3. Call set_output in a SEPARATE turn:
|
||||||
|
- set_output("results", "structured results")
|
||||||
|
|
||||||
|
# Node 2: Handoff (autonomous)
|
||||||
|
- id: handoff
|
||||||
|
name: Handoff
|
||||||
|
description: Prepare worker results for queen review
|
||||||
|
max_node_visits: 0
|
||||||
|
input_keys: [results, user_request]
|
||||||
|
output_keys: [next_action, feedback, worker_summary]
|
||||||
|
nullable_output_keys: [feedback, worker_summary]
|
||||||
|
tools:
|
||||||
|
policy: none # handoff nodes don't need tools
|
||||||
|
success_criteria: Results are packaged for queen decision-making.
|
||||||
|
system_prompt: |
|
||||||
|
Do NOT talk to the user directly. The queen is the only user interface.
|
||||||
|
|
||||||
|
If blocked, call escalate(reason, context) then set:
|
||||||
|
- set_output("next_action", "escalated")
|
||||||
|
- set_output("feedback", "what help is needed")
|
||||||
|
|
||||||
|
Otherwise summarize and set:
|
||||||
|
- set_output("worker_summary", "short summary for queen")
|
||||||
|
- set_output("next_action", "done") or "revise"
|
||||||
|
- set_output("feedback", "what to revise") only when revising
|
||||||
|
|
||||||
|
edges:
|
||||||
|
- from_node: process
|
||||||
|
to_node: handoff
|
||||||
|
# Feedback loop
|
||||||
|
- from_node: handoff
|
||||||
|
to_node: process
|
||||||
|
condition: conditional
|
||||||
|
condition_expr: "str(next_action).lower() == 'revise'"
|
||||||
|
priority: 2
|
||||||
|
# Escalation loop
|
||||||
|
- from_node: handoff
|
||||||
|
to_node: process
|
||||||
|
condition: conditional
|
||||||
|
condition_expr: "str(next_action).lower() == 'escalated'"
|
||||||
|
priority: 3
|
||||||
|
# Loop back for next task
|
||||||
|
- from_node: handoff
|
||||||
|
to_node: process
|
||||||
|
condition: conditional
|
||||||
|
condition_expr: "str(next_action).lower() == 'done'"
|
||||||
|
|
||||||
|
entry_node: process
|
||||||
|
terminal_nodes: [] # [] = forever-alive
|
||||||
|
```
|
||||||
|
|
||||||
|
## Key differences from Python templates
|
||||||
|
|
||||||
|
| Before (Python) | After (YAML) |
|
||||||
|
|-------------------------------------|----------------------------------------|
|
||||||
|
| `agent.py` (250 lines boilerplate) | Not needed |
|
||||||
|
| `config.py` (dataclass + metadata) | `variables:` + `metadata:` in YAML |
|
||||||
|
| `nodes/__init__.py` (NodeSpec calls)| `nodes:` list in YAML |
|
||||||
|
| `__init__.py`, `__main__.py` | Not needed |
|
||||||
|
| f-string config injection | `{{variable_name}}` templates |
|
||||||
|
| `mcp_servers.json` (separate file) | `mcp_servers:` in YAML (or keep file) |
|
||||||
|
|
||||||
|
## Node types
|
||||||
|
|
||||||
|
| Type | Description | Tools |
|
||||||
|
|--------------|---------------------------------------|--------------------------|
|
||||||
|
| `event_loop` | LLM-driven orchestration (default) | Explicit list or `none` |
|
||||||
|
| `gcu` | Browser automation via GCU tools | `policy: all` (auto) |
|
||||||
|
|
||||||
|
## Tool access policies
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# Explicit list (recommended for most nodes)
|
||||||
|
tools:
|
||||||
|
policy: explicit
|
||||||
|
allowed: [web_search, save_data]
|
||||||
|
|
||||||
|
# All tools (for browser automation nodes)
|
||||||
|
tools:
|
||||||
|
policy: all
|
||||||
|
|
||||||
|
# No tools (for handoff/summary nodes)
|
||||||
|
tools:
|
||||||
|
policy: none
|
||||||
|
```
|
||||||
|
|
||||||
|
## Edge conditions
|
||||||
|
|
||||||
|
| Condition | When to use |
|
||||||
|
|---------------|-------------------------------------------------------|
|
||||||
|
| `on_success` | Default. Next node after current succeeds. |
|
||||||
|
| `on_failure` | Fallback path when current node fails. |
|
||||||
|
| `always` | Always traverse regardless of outcome. |
|
||||||
|
| `conditional` | Evaluate `condition_expr` against shared memory keys. |
|
||||||
|
| `llm_decide` | Let the LLM decide at runtime. |
|
||||||
|
|
||||||
|
## Template variables
|
||||||
|
|
||||||
|
Use `{{variable_name}}` in `system_prompt` and `identity_prompt`.
|
||||||
|
Variables are defined in the top-level `variables:` map.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
variables:
|
||||||
|
spreadsheet_id: "1ZVxWDL..."
|
||||||
|
api_endpoint: "https://api.example.com"
|
||||||
|
|
||||||
|
nodes:
|
||||||
|
- id: start
|
||||||
|
system_prompt: |
|
||||||
|
Connect to spreadsheet: {{spreadsheet_id}}
|
||||||
|
API endpoint: {{api_endpoint}}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Entry points
|
||||||
|
|
||||||
|
Default is a single manual entry point. For timer/scheduled triggers:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
entry_points:
|
||||||
|
- id: default
|
||||||
|
trigger_type: manual
|
||||||
|
- id: daily-check
|
||||||
|
trigger_type: timer
|
||||||
|
trigger_config:
|
||||||
|
interval_minutes: 30
|
||||||
|
```
|
||||||
|
|
||||||
|
## mcp_servers.json -- Still Supported
|
||||||
|
|
||||||
|
The `mcp_servers.json` file is still loaded automatically if present alongside
|
||||||
|
`agent.yaml`. You can also inline servers in the YAML:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
mcp_servers:
|
||||||
|
- name: hive-tools
|
||||||
|
- name: gcu-tools
|
||||||
|
```
|
||||||
|
|
||||||
|
Both approaches work. The JSON file takes precedence for backward compatibility.
|
||||||
|
|
||||||
|
## Migration from Python agents
|
||||||
|
|
||||||
|
Run the migration tool to convert existing agents:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
uv run python -m framework.tools.migrate_agent exports/my_agent
|
||||||
|
```
|
||||||
|
|
||||||
|
This generates `agent.yaml` from the existing `agent.py` + `nodes/` + `config.py`.
|
||||||
|
The original files are left untouched. Once verified, you can delete the Python files.
|
||||||
|
|
||||||
|
## Files after migration
|
||||||
|
|
||||||
|
```
|
||||||
|
my_agent/
|
||||||
|
agent.yaml # The only required file
|
||||||
|
mcp_servers.json # Optional (can inline in YAML)
|
||||||
|
flowchart.json # Optional (auto-generated)
|
||||||
|
```
|
||||||
@@ -1,306 +1,193 @@
|
|||||||
# Hive Agent Framework — Condensed Reference
|
# Hive Agent Framework -- Condensed Reference
|
||||||
|
|
||||||
## Architecture
|
## Architecture
|
||||||
|
|
||||||
Agents are Python packages in `exports/`:
|
Agents are declarative JSON configs in `exports/`:
|
||||||
```
|
```
|
||||||
exports/my_agent/
|
exports/my_agent/
|
||||||
├── __init__.py # MUST re-export ALL module-level vars from agent.py
|
agent.json # The entire agent definition
|
||||||
├── __main__.py # CLI (run, tui, info, validate, shell)
|
mcp_servers.json # MCP tool server config (optional, prefer registry refs)
|
||||||
├── agent.py # Graph construction (goal, edges, agent class)
|
|
||||||
├── config.py # Runtime config
|
|
||||||
├── nodes/__init__.py # Node definitions (NodeSpec)
|
|
||||||
├── mcp_servers.json # MCP tool server config
|
|
||||||
└── tests/ # pytest tests
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Agent Loading Contract
|
No Python files. No `__init__.py`, `__main__.py`, `config.py`, or `nodes/`.
|
||||||
|
|
||||||
`AgentRunner.load()` imports the package (`__init__.py`) and reads these
|
## Agent Loading
|
||||||
module-level variables via `getattr()`:
|
|
||||||
|
|
||||||
| Variable | Required | Default if missing | Consequence |
|
`AgentLoader.load()` reads `agent.json` and builds the execution graph.
|
||||||
|----------|----------|--------------------|-------------|
|
If `agent.py` exists (legacy), it's loaded as a Python module instead.
|
||||||
| `goal` | YES | `None` | **FATAL** — "must define goal, nodes, edges" |
|
|
||||||
| `nodes` | YES | `None` | **FATAL** — same error |
|
|
||||||
| `edges` | YES | `None` | **FATAL** — same error |
|
|
||||||
| `entry_node` | no | `nodes[0].id` | Probably wrong node |
|
|
||||||
| `entry_points` | no | `{}` | **Nodes unreachable** — validation fails |
|
|
||||||
| `terminal_nodes` | **YES** | `[]` | **FATAL** — graph must have at least one terminal node |
|
|
||||||
| `pause_nodes` | no | `[]` | OK |
|
|
||||||
| `conversation_mode` | no | not passed | Isolated mode (no context carryover) |
|
|
||||||
| `identity_prompt` | no | not passed | No agent-level identity |
|
|
||||||
| `loop_config` | no | `{}` | No iteration limits |
|
|
||||||
| `triggers.json` (file) | no | not present | No triggers (timers, webhooks) |
|
|
||||||
|
|
||||||
**CRITICAL:** `__init__.py` MUST import and re-export ALL of these from
|
## agent.json Schema
|
||||||
`agent.py`. Missing exports silently fall back to defaults, causing
|
|
||||||
hard-to-debug failures.
|
|
||||||
|
|
||||||
**Why `default_agent.validate()` is NOT sufficient:**
|
```json
|
||||||
`validate()` checks the agent CLASS's internal graph (self.nodes, self.edges).
|
{
|
||||||
These are always correct because the constructor references agent.py's module
|
"name": "my-agent",
|
||||||
vars directly. But `AgentRunner.load()` reads from the PACKAGE (`__init__.py`),
|
"version": "1.0.0",
|
||||||
not the class. So `validate()` passes while `AgentRunner.load()` fails.
|
"description": "What this agent does",
|
||||||
Always test with `AgentRunner.load("exports/{name}")` — this is the same
|
"goal": {
|
||||||
code path the TUI and `hive run` use.
|
"description": "What to achieve",
|
||||||
|
"success_criteria": ["criterion 1", "criterion 2"],
|
||||||
## Goal
|
"constraints": ["constraint 1"]
|
||||||
|
},
|
||||||
Defines success criteria and constraints:
|
"identity_prompt": "You are a helpful agent.",
|
||||||
```python
|
"conversation_mode": "continuous",
|
||||||
goal = Goal(
|
"loop_config": {
|
||||||
id="kebab-case-id",
|
"max_iterations": 100,
|
||||||
name="Display Name",
|
"max_tool_calls_per_turn": 30,
|
||||||
description="What the agent does",
|
"max_context_tokens": 32000
|
||||||
success_criteria=[
|
},
|
||||||
SuccessCriterion(id="sc-id", description="...", metric="...", target="...", weight=0.25),
|
"mcp_servers": [
|
||||||
],
|
{"name": "hive-tools"},
|
||||||
constraints=[
|
{"name": "gcu-tools"}
|
||||||
Constraint(id="c-id", description="...", constraint_type="hard", category="quality"),
|
],
|
||||||
],
|
"variables": {
|
||||||
)
|
"spreadsheet_id": "1ZVx..."
|
||||||
|
},
|
||||||
|
"nodes": [...],
|
||||||
|
"edges": [...],
|
||||||
|
"entry_node": "process",
|
||||||
|
"terminal_nodes": []
|
||||||
|
}
|
||||||
```
|
```
|
||||||
- 3-5 success criteria, weights sum to 1.0
|
|
||||||
- 1-5 constraints (hard/soft, categories: quality, accuracy, interaction, functional)
|
|
||||||
|
|
||||||
## NodeSpec Fields
|
## Template Variables
|
||||||
|
|
||||||
|
Use `{{variable_name}}` in `system_prompt` and `identity_prompt`. Variables
|
||||||
|
are defined in the top-level `variables` object:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"variables": {"sheet_id": "1ZVx..."},
|
||||||
|
"nodes": [{
|
||||||
|
"id": "start",
|
||||||
|
"system_prompt": "Use sheet: {{sheet_id}}"
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Node Fields
|
||||||
|
|
||||||
| Field | Type | Default | Description |
|
| Field | Type | Default | Description |
|
||||||
|-------|------|---------|-------------|
|
|-------|------|---------|-------------|
|
||||||
| id | str | required | kebab-case identifier |
|
| id | str | required | kebab-case identifier |
|
||||||
| name | str | required | Display name |
|
| name | str | id | Display name |
|
||||||
| description | str | required | What the node does |
|
| description | str | required | What the node does |
|
||||||
| node_type | str | required | `"event_loop"` or `"gcu"` (browser automation — see GCU Guide appendix) |
|
| node_type | str | "event_loop" | `"event_loop"` |
|
||||||
| input_keys | list[str] | required | Memory keys this node reads |
|
| input_keys | list | [] | Memory keys this node reads |
|
||||||
| output_keys | list[str] | required | Memory keys this node writes via set_output |
|
| output_keys | list | [] | Memory keys this node writes via set_output |
|
||||||
| system_prompt | str | "" | LLM instructions |
|
| system_prompt | str | "" | LLM instructions |
|
||||||
| tools | list[str] | [] | Tool names from MCP servers |
|
| tools | object | {} | Tool access policy (see below) |
|
||||||
| client_facing | bool | False | Deprecated compatibility field. Queen interactivity is implicit; workers should escalate instead |
|
| nullable_output_keys | list | [] | Keys that may remain unset |
|
||||||
| nullable_output_keys | list[str] | [] | Keys that may remain unset |
|
| max_node_visits | int | 1 | 0=unlimited (for forever-alive agents) |
|
||||||
| max_node_visits | int | 0 | 0=unlimited (default); >1 for one-shot feedback loops |
|
|
||||||
| max_retries | int | 3 | Retries on failure |
|
|
||||||
| success_criteria | str | "" | Natural language for judge evaluation |
|
| success_criteria | str | "" | Natural language for judge evaluation |
|
||||||
|
| client_facing | bool | false | Whether output is shown to user |
|
||||||
|
|
||||||
## EdgeSpec Fields
|
## Tool Access Policies
|
||||||
|
|
||||||
|
Each node declares its tools via a policy object:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{"tools": {"policy": "explicit", "allowed": ["web_search", "save_data"]}}
|
||||||
|
{"tools": {"policy": "all"}}
|
||||||
|
{"tools": {"policy": "none"}}
|
||||||
|
```
|
||||||
|
|
||||||
|
- `explicit` (default): only named tools. Empty `allowed` = zero tools.
|
||||||
|
- `all`: all tools from registry (e.g. for browser automation nodes).
|
||||||
|
- `none`: no tools (for handoff/summary nodes).
|
||||||
|
|
||||||
|
## Edge Fields
|
||||||
|
|
||||||
| Field | Type | Description |
|
| Field | Type | Description |
|
||||||
|-------|------|-------------|
|
|-------|------|-------------|
|
||||||
| id | str | kebab-case identifier |
|
| from_node | str | Source node ID |
|
||||||
| source | str | Source node ID |
|
| to_node | str | Target node ID |
|
||||||
| target | str | Target node ID |
|
| condition | str | `on_success`, `on_failure`, `always`, `conditional` |
|
||||||
| condition | EdgeCondition | ON_SUCCESS, ON_FAILURE, ALWAYS, CONDITIONAL |
|
| condition_expr | str | Python expression for conditional routing |
|
||||||
| condition_expr | str | Python expression evaluated against memory (for CONDITIONAL) |
|
| priority | int | Higher = evaluated first |
|
||||||
| priority | int | Positive=forward (evaluated first), negative=feedback (loop-back) |
|
|
||||||
|
condition_expr examples:
|
||||||
|
- `"needs_more_research == True"`
|
||||||
|
- `"str(next_action).lower() == 'revise'"`
|
||||||
|
|
||||||
## Key Patterns
|
## Key Patterns
|
||||||
|
|
||||||
### STEP 1/STEP 2 (Client-Facing Nodes)
|
|
||||||
```
|
|
||||||
**STEP 1 — Respond to the user (text only, NO tool calls):**
|
|
||||||
[Present information, ask questions]
|
|
||||||
|
|
||||||
**STEP 2 — After the user responds, call set_output:**
|
|
||||||
- set_output("key", "value based on user response")
|
|
||||||
```
|
|
||||||
This prevents premature set_output before user interaction.
|
|
||||||
|
|
||||||
### Fewer, Richer Nodes (CRITICAL)
|
### Fewer, Richer Nodes (CRITICAL)
|
||||||
|
|
||||||
**Hard limit: 3-6 nodes for most agents.** Never exceed 6 unless the user
|
**Hard limit: 3-6 nodes for most agents.** Each node boundary serializes
|
||||||
explicitly requests a complex multi-phase pipeline.
|
outputs and destroys in-context information. Merge unless:
|
||||||
|
1. Client-facing boundary (different interaction models)
|
||||||
|
2. Disjoint tool sets
|
||||||
|
3. Parallel execution (fan-out branches)
|
||||||
|
|
||||||
Each node boundary serializes outputs to the shared buffer and **destroys** all
|
**Typical structure (2 nodes):**
|
||||||
in-context information: tool call results, intermediate reasoning, conversation
|
|
||||||
history. A research node that searches, fetches, and analyzes in ONE node keeps
|
|
||||||
all source material in its conversation context. Split across 3 nodes, each
|
|
||||||
downstream node only sees the serialized summary string.
|
|
||||||
|
|
||||||
**Decision framework — merge unless ANY of these apply:**
|
|
||||||
1. **Client-facing boundary** — Autonomous and client-facing work MUST be
|
|
||||||
separate nodes (different interaction models)
|
|
||||||
2. **Disjoint tool sets** — If tools are fundamentally different (e.g., web
|
|
||||||
search vs database), separate nodes make sense
|
|
||||||
3. **Parallel execution** — Fan-out branches must be separate nodes
|
|
||||||
|
|
||||||
**Red flags that you have too many nodes:**
|
|
||||||
- A node with 0 tools (pure LLM reasoning) → merge into predecessor/successor
|
|
||||||
- A node that sets only 1 trivial output → collapse into predecessor
|
|
||||||
- Multiple consecutive autonomous nodes → combine into one rich node
|
|
||||||
- A "report" node that presents analysis → merge into the client-facing node
|
|
||||||
- A "confirm" or "schedule" node that doesn't call any external service → remove
|
|
||||||
|
|
||||||
**Typical agent structure (2 nodes):**
|
|
||||||
```
|
```
|
||||||
process (autonomous) ←→ review (queen-mediated)
|
process (autonomous) <-> review (queen-mediated)
|
||||||
```
|
|
||||||
The queen owns intake — she gathers requirements from the user, then
|
|
||||||
passes structured input via `run_agent_with_input(task)`. When building
|
|
||||||
the agent, design the entry node's `input_keys` to match what the queen
|
|
||||||
will provide at run time. Worker agents should NOT have a client-facing
|
|
||||||
intake node. Mid-execution review/approval should happen through queen
|
|
||||||
escalation rather than direct worker HITL.
|
|
||||||
|
|
||||||
For simpler agents, just 1 autonomous node:
|
|
||||||
```
|
|
||||||
process (autonomous) — loops back to itself
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### nullable_output_keys
|
The queen owns intake. Worker agents should NOT have a client-facing intake
|
||||||
For inputs that only arrive on certain edges:
|
node. Mid-execution review should happen through queen escalation.
|
||||||
```python
|
|
||||||
research_node = NodeSpec(
|
|
||||||
input_keys=["brief", "feedback"],
|
|
||||||
nullable_output_keys=["feedback"], # Only present on feedback edge
|
|
||||||
max_node_visits=3,
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
### Mutually Exclusive Outputs
|
|
||||||
For routing decisions:
|
|
||||||
```python
|
|
||||||
review_node = NodeSpec(
|
|
||||||
output_keys=["approved", "feedback"],
|
|
||||||
nullable_output_keys=["approved", "feedback"], # Node sets one or the other
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
### Continuous Loop Pattern
|
|
||||||
Mark the primary event_loop node as terminal: `terminal_nodes=["process"]`.
|
|
||||||
The node has `output_keys` and can complete when the agent finishes its work.
|
|
||||||
Use `conversation_mode="continuous"` to preserve context across transitions.
|
|
||||||
|
|
||||||
### set_output
|
### set_output
|
||||||
- Synthetic tool injected by framework
|
- Synthetic tool injected by framework
|
||||||
- Call separately from real tool calls (separate turn)
|
- Call separately from real tool calls (separate turn)
|
||||||
- `set_output("key", "value")` stores to the shared buffer
|
- `set_output("key", "value")` stores to the shared buffer
|
||||||
|
|
||||||
## Edge Conditions
|
### Graph Lifecycle
|
||||||
|
|
||||||
| Condition | When |
|
|
||||||
|-----------|------|
|
|
||||||
| ON_SUCCESS | Node completed successfully |
|
|
||||||
| ON_FAILURE | Node failed |
|
|
||||||
| ALWAYS | Unconditional |
|
|
||||||
| CONDITIONAL | condition_expr evaluates to True against memory |
|
|
||||||
|
|
||||||
condition_expr examples:
|
|
||||||
- `"needs_more_research == True"`
|
|
||||||
- `"str(next_action).lower() == 'new_agent'"`
|
|
||||||
- `"feedback is not None"`
|
|
||||||
|
|
||||||
## Graph Lifecycle
|
|
||||||
|
|
||||||
| Pattern | terminal_nodes | When |
|
| Pattern | terminal_nodes | When |
|
||||||
|---------|---------------|------|
|
|---------|---------------|------|
|
||||||
| **Continuous loop** | `["node-with-output-keys"]` | **DEFAULT for all agents** |
|
| Continuous loop | `["node-with-output-keys"]` | DEFAULT for all agents |
|
||||||
| Linear | `["last-node"]` | One-shot/batch agents |
|
| Linear | `["last-node"]` | One-shot/batch agents |
|
||||||
|
|
||||||
**Every graph must have at least one terminal node.** Terminal nodes
|
Every graph must have at least one terminal node.
|
||||||
define where execution ends. For interactive agents that loop continuously,
|
|
||||||
mark the primary event_loop node as terminal (it has `output_keys` and can
|
|
||||||
complete at any point). The framework default for `max_node_visits` is 0
|
|
||||||
(unbounded), so nodes work correctly in continuous loops without explicit
|
|
||||||
override. Only set `max_node_visits > 0` in one-shot agents with feedback loops.
|
|
||||||
Every node must have at least one outgoing edge — no dead ends.
|
|
||||||
|
|
||||||
## Continuous Conversation Mode
|
### Continuous Conversation Mode
|
||||||
|
|
||||||
`conversation_mode` has ONLY two valid states:
|
`conversation_mode` has ONLY two valid states:
|
||||||
- `"continuous"` — recommended for interactive agents
|
- `"continuous"` -- recommended (context carries across node transitions)
|
||||||
- Omit entirely — isolated per-node conversations (each node starts fresh)
|
- Omit entirely -- isolated per-node conversations
|
||||||
|
|
||||||
**INVALID values** (do NOT use): `"client_facing"`, `"interactive"`,
|
**INVALID values:** `"client_facing"`, `"interactive"`, `"shared"`.
|
||||||
`"adaptive"`, `"shared"`. These do not exist in the framework.
|
|
||||||
|
|
||||||
When `conversation_mode="continuous"`:
|
|
||||||
- Same conversation thread carries across node transitions
|
|
||||||
- Layered system prompts: identity (agent-level) + narrative + focus (per-node)
|
|
||||||
- Transition markers inserted at boundaries
|
|
||||||
- Compaction happens opportunistically at phase transitions
|
|
||||||
|
|
||||||
## loop_config
|
## loop_config
|
||||||
|
|
||||||
Only three valid keys:
|
Only three valid keys:
|
||||||
```python
|
```json
|
||||||
loop_config = {
|
{
|
||||||
"max_iterations": 100, # Max LLM turns per node visit
|
"max_iterations": 100,
|
||||||
"max_tool_calls_per_turn": 20, # Max tool calls per LLM response
|
"max_tool_calls_per_turn": 20,
|
||||||
"max_context_tokens": 32000, # Triggers conversation compaction
|
"max_context_tokens": 32000
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
**INVALID keys** (do NOT use): `"strategy"`, `"mode"`, `"timeout"`,
|
|
||||||
`"temperature"`. These are silently ignored or cause errors.
|
|
||||||
|
|
||||||
## Data Tools (Spillover)
|
## Data Tools (Spillover)
|
||||||
|
|
||||||
For large data that exceeds context:
|
For large data that exceeds context:
|
||||||
- `save_data(filename, data)` — Write to session data dir
|
- `save_data(filename, data)` -- write to session data dir
|
||||||
- `load_data(filename, offset, limit)` — Read with pagination
|
- `load_data(filename, offset, limit)` -- read with pagination
|
||||||
- `list_data_files()` — List files
|
- `list_data_files()` -- list files
|
||||||
- `serve_file_to_user(filename, label)` — Clickable file:// URI
|
- `serve_file_to_user(filename, label)` -- clickable file URI
|
||||||
|
|
||||||
`data_dir` is auto-injected by framework — LLM never sees it.
|
`data_dir` is auto-injected by framework.
|
||||||
|
|
||||||
## Fan-Out / Fan-In
|
## Fan-Out / Fan-In
|
||||||
|
|
||||||
Multiple ON_SUCCESS edges from same source → parallel execution via asyncio.gather().
|
Multiple `on_success` edges from same source = parallel execution.
|
||||||
- Parallel nodes must have disjoint output_keys
|
Parallel nodes must have disjoint output_keys.
|
||||||
- Only one branch may have client_facing nodes
|
|
||||||
- Fan-in node gets all outputs in the shared buffer
|
|
||||||
|
|
||||||
## Judge System
|
## Judge System
|
||||||
|
|
||||||
- **Implicit** (default): ACCEPTs when LLM finishes with no tool calls and all required outputs set
|
- **Implicit** (default): ACCEPTs when LLM finishes with no tool calls and all required outputs set
|
||||||
- **SchemaJudge**: Validates against Pydantic model
|
- **SchemaJudge**: Validates against Pydantic model
|
||||||
- **Custom**: Implement `evaluate(context) -> JudgeVerdict`
|
|
||||||
|
|
||||||
Judge is the SOLE acceptance mechanism — no ad-hoc framework gating.
|
|
||||||
|
|
||||||
## Triggers (Timers, Webhooks)
|
|
||||||
|
|
||||||
For agents that react to external events, create a `triggers.json` file
|
|
||||||
in the agent's export directory:
|
|
||||||
|
|
||||||
```json
|
|
||||||
[
|
|
||||||
{
|
|
||||||
"id": "daily-check",
|
|
||||||
"name": "Daily Check",
|
|
||||||
"trigger_type": "timer",
|
|
||||||
"trigger_config": {"cron": "0 9 * * *"},
|
|
||||||
"task": "Run the daily check process"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
```
|
|
||||||
|
|
||||||
### Key Fields
|
|
||||||
- `trigger_type`: `"timer"` or `"webhook"`
|
|
||||||
- `trigger_config`: `{"cron": "0 9 * * *"}` or `{"interval_minutes": 20}`
|
|
||||||
- `task`: describes what the worker should do when the trigger fires
|
|
||||||
- Triggers can also be created/removed at runtime via `set_trigger` / `remove_trigger` queen tools
|
|
||||||
|
|
||||||
## Tool Discovery
|
## Tool Discovery
|
||||||
|
|
||||||
Do NOT rely on a static tool list — it will be outdated. Always call
|
Always call `list_agent_tools()` first to see available tools.
|
||||||
`list_agent_tools()` with NO arguments first to see ALL available tools.
|
Do NOT rely on a static tool list.
|
||||||
Only use `group=` or `output_schema=` as follow-up calls after seeing the
|
|
||||||
full list.
|
|
||||||
|
|
||||||
```
|
```
|
||||||
list_agent_tools() # ALWAYS call this first
|
list_agent_tools() # full summary
|
||||||
list_agent_tools(group="gmail", output_schema="full") # then drill into a category
|
list_agent_tools(group="gmail", output_schema="full") # drill into category
|
||||||
list_agent_tools("exports/my_agent/mcp_servers.json") # specific agent's tools
|
|
||||||
```
|
```
|
||||||
|
|
||||||
After building, run `validate_agent_package("{name}")` to check everything at once.
|
After building, run `validate_agent_package("{name}")` to check everything.
|
||||||
|
|
||||||
Common tool categories (verify via list_agent_tools):
|
|
||||||
- **Web**: search, scrape, PDF
|
|
||||||
- **Data**: save/load/append/list data files, serve to user
|
|
||||||
- **File**: view, write, replace, diff, list, grep
|
|
||||||
- **Communication**: email, gmail, slack, telegram
|
|
||||||
- **CRM**: hubspot, apollo, calcom
|
|
||||||
- **GitHub**: stargazers, user profiles, repos
|
|
||||||
- **Vision**: image analysis
|
|
||||||
- **Time**: current time
|
|
||||||
|
|||||||
@@ -1,158 +1,53 @@
|
|||||||
# GCU Browser Automation Guide
|
# Browser Automation Guide
|
||||||
|
|
||||||
## When to Use GCU Nodes
|
## When to Use Browser Nodes
|
||||||
|
|
||||||
Use `node_type="gcu"` when:
|
Use browser nodes (with `tools: {policy: "all"}`) when:
|
||||||
- The user's workflow requires **navigating real websites** (scraping, form-filling, social media interaction, testing web UIs)
|
- The task requires interacting with web pages (clicking, typing, navigating)
|
||||||
- The task involves **dynamic/JS-rendered pages** that `web_scrape` cannot handle (SPAs, infinite scroll, login-gated content)
|
- No API is available for the target service
|
||||||
- The agent needs to **interact with a website** — clicking, typing, scrolling, selecting, uploading files
|
- The user is already logged in to the target site
|
||||||
|
|
||||||
Do NOT use GCU for:
|
## What Browser Nodes Are
|
||||||
- Static content that `web_scrape` handles fine
|
|
||||||
- API-accessible data (use the API directly)
|
|
||||||
- PDF/file processing
|
|
||||||
- Anything that doesn't require a browser UI
|
|
||||||
|
|
||||||
## What GCU Nodes Are
|
- Regular `event_loop` nodes with browser tools from gcu-tools MCP server
|
||||||
|
- Set `tools: {policy: "all"}` to give access to all browser tools
|
||||||
|
- Wire into the graph with edges like any other node
|
||||||
|
- No special node_type needed
|
||||||
|
|
||||||
- `node_type="gcu"` — a declarative enhancement over `event_loop`
|
## Available Browser Tools
|
||||||
- Framework auto-prepends browser best-practices system prompt
|
|
||||||
- Framework auto-includes all 31 browser tools from `gcu-tools` MCP server
|
|
||||||
- Same underlying `EventLoopNode` class — no new imports needed
|
|
||||||
- `tools=[]` is correct — tools are auto-populated at runtime
|
|
||||||
|
|
||||||
## GCU Architecture Pattern
|
All tools are prefixed with `browser_`:
|
||||||
|
- `browser_start`, `browser_open` -- launch/navigate
|
||||||
|
- `browser_click`, `browser_fill`, `browser_type` -- interact
|
||||||
|
- `browser_snapshot` -- read page content (preferred over screenshot)
|
||||||
|
- `browser_screenshot` -- visual capture
|
||||||
|
- `browser_scroll`, `browser_wait` -- navigation helpers
|
||||||
|
- `browser_evaluate` -- run JavaScript
|
||||||
|
|
||||||
GCU nodes are **subagents** — invoked via `delegate_to_sub_agent()`, not connected via edges.
|
## System Prompt Tips for Browser Nodes
|
||||||
|
|
||||||
- Primary nodes (`event_loop`, client-facing) orchestrate; GCU nodes do browser work
|
```
|
||||||
- Parent node declares `sub_agents=["gcu-node-id"]` and calls `delegate_to_sub_agent(agent_id="gcu-node-id", task="...")`
|
1. Use browser_snapshot() to read page content (NOT browser_get_text)
|
||||||
- GCU nodes set `max_node_visits=1` (single execution per delegation), `client_facing=False`
|
2. Use browser_wait(seconds=2-3) after navigation for page load
|
||||||
- GCU nodes use `output_keys=["result"]` and return structured JSON via `set_output("result", ...)`
|
3. If you hit an auth wall, call set_output with an error and move on
|
||||||
|
4. Keep tool calls per turn <= 10 for reliability
|
||||||
## GCU Node Definition Template
|
|
||||||
|
|
||||||
```python
|
|
||||||
gcu_browser_node = NodeSpec(
|
|
||||||
id="gcu-browser-worker",
|
|
||||||
name="Browser Worker",
|
|
||||||
description="Browser subagent that does X.",
|
|
||||||
node_type="gcu",
|
|
||||||
client_facing=False,
|
|
||||||
max_node_visits=1,
|
|
||||||
input_keys=[],
|
|
||||||
output_keys=["result"],
|
|
||||||
tools=[], # Auto-populated with all browser tools
|
|
||||||
system_prompt="""\
|
|
||||||
You are a browser agent. Your job: [specific task].
|
|
||||||
|
|
||||||
## Workflow
|
|
||||||
1. browser_start (only if no browser is running yet)
|
|
||||||
2. browser_open(url=TARGET_URL) — note the returned targetId
|
|
||||||
3. browser_snapshot to read the page
|
|
||||||
4. [task-specific steps]
|
|
||||||
5. set_output("result", JSON)
|
|
||||||
|
|
||||||
## Output format
|
|
||||||
set_output("result", JSON) with:
|
|
||||||
- [field]: [type and description]
|
|
||||||
""",
|
|
||||||
)
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Parent Node Template (orchestrating GCU subagents)
|
## Example
|
||||||
|
|
||||||
```python
|
|
||||||
orchestrator_node = NodeSpec(
|
|
||||||
id="orchestrator",
|
|
||||||
...
|
|
||||||
node_type="event_loop",
|
|
||||||
sub_agents=["gcu-browser-worker"],
|
|
||||||
system_prompt="""\
|
|
||||||
...
|
|
||||||
delegate_to_sub_agent(
|
|
||||||
agent_id="gcu-browser-worker",
|
|
||||||
task="Navigate to [URL]. Do [specific task]. Return JSON with [fields]."
|
|
||||||
)
|
|
||||||
...
|
|
||||||
""",
|
|
||||||
tools=[], # Orchestrator doesn't need browser tools
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
## mcp_servers.json with GCU
|
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"hive-tools": { ... },
|
"id": "scan-profiles",
|
||||||
"gcu-tools": {
|
"name": "Scan LinkedIn Profiles",
|
||||||
"transport": "stdio",
|
"description": "Navigate LinkedIn search results and collect profile data",
|
||||||
"command": "uv",
|
"tools": {"policy": "all"},
|
||||||
"args": ["run", "python", "-m", "gcu.server", "--stdio"],
|
"input_keys": ["search_url"],
|
||||||
"cwd": "../../tools",
|
"output_keys": ["profiles"],
|
||||||
"description": "GCU tools for browser automation"
|
"system_prompt": "Navigate to the search URL, paginate through results..."
|
||||||
}
|
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
Note: `gcu-tools` is auto-added if any node uses `node_type="gcu"`, but including it explicitly is fine.
|
Connected via regular edges:
|
||||||
|
```
|
||||||
## GCU System Prompt Best Practices
|
search-setup -> scan-profiles -> process-results
|
||||||
|
|
||||||
Key rules to bake into GCU node prompts:
|
|
||||||
|
|
||||||
- Prefer `browser_snapshot` over `browser_get_text("body")` — compact accessibility tree vs 100KB+ raw HTML
|
|
||||||
- Always `browser_wait` after navigation
|
|
||||||
- Use large scroll amounts (~2000-5000) for lazy-loaded content
|
|
||||||
- For spillover files, use `run_command` with grep, not `read_file`
|
|
||||||
- If auth wall detected, report immediately — don't attempt login
|
|
||||||
- Keep tool calls per turn ≤10
|
|
||||||
- Tab isolation: when browser is already running, use `browser_open(background=true)` and pass `target_id` to every call
|
|
||||||
|
|
||||||
## Multiple Concurrent GCU Subagents
|
|
||||||
|
|
||||||
When a task can be parallelized across multiple sites or profiles, declare a distinct GCU
|
|
||||||
node for each and invoke them all in the same LLM turn. The framework batches all
|
|
||||||
`delegate_to_sub_agent` calls made in one turn and runs them with `asyncio.gather`, so
|
|
||||||
they execute concurrently — not sequentially.
|
|
||||||
|
|
||||||
**Each GCU subagent automatically gets its own isolated browser context** — no `profile=`
|
|
||||||
argument is needed in tool calls. The framework derives a unique profile from the subagent's
|
|
||||||
node ID and instance counter and injects it via an asyncio `ContextVar` before the subagent
|
|
||||||
runs.
|
|
||||||
|
|
||||||
### Example: three sites in parallel
|
|
||||||
|
|
||||||
```python
|
|
||||||
# Three distinct GCU nodes
|
|
||||||
gcu_site_a = NodeSpec(id="gcu-site-a", node_type="gcu", ...)
|
|
||||||
gcu_site_b = NodeSpec(id="gcu-site-b", node_type="gcu", ...)
|
|
||||||
gcu_site_c = NodeSpec(id="gcu-site-c", node_type="gcu", ...)
|
|
||||||
|
|
||||||
orchestrator = NodeSpec(
|
|
||||||
id="orchestrator",
|
|
||||||
node_type="event_loop",
|
|
||||||
sub_agents=["gcu-site-a", "gcu-site-b", "gcu-site-c"],
|
|
||||||
system_prompt="""\
|
|
||||||
Call all three subagents in a single response to run them in parallel:
|
|
||||||
delegate_to_sub_agent(agent_id="gcu-site-a", task="Scrape prices from site A")
|
|
||||||
delegate_to_sub_agent(agent_id="gcu-site-b", task="Scrape prices from site B")
|
|
||||||
delegate_to_sub_agent(agent_id="gcu-site-c", task="Scrape prices from site C")
|
|
||||||
""",
|
|
||||||
)
|
|
||||||
```
|
```
|
||||||
|
|
||||||
**Rules:**
|
|
||||||
- Use distinct node IDs for each concurrent task — sharing an ID shares the browser context.
|
|
||||||
- The GCU node prompts do not need to mention `profile=`; isolation is automatic.
|
|
||||||
- Cleanup is automatic at session end, but GCU nodes can call `browser_stop()` explicitly
|
|
||||||
if they want to release resources mid-run.
|
|
||||||
|
|
||||||
## GCU Anti-Patterns
|
|
||||||
|
|
||||||
- Using `browser_screenshot` to read text (use `browser_snapshot` instead; screenshots are for visual context only)
|
|
||||||
- Re-navigating after scrolling (resets scroll position)
|
|
||||||
- Attempting login on auth walls
|
|
||||||
- Forgetting `target_id` in multi-tab scenarios
|
|
||||||
- Putting browser tools directly on `event_loop` nodes instead of using GCU subagent pattern
|
|
||||||
- Making GCU nodes `client_facing=True` (they should be autonomous subagents)
|
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
A lightweight side agent that runs after each queen LLM turn. It inspects
|
A lightweight side agent that runs after each queen LLM turn. It inspects
|
||||||
recent conversation messages and extracts durable user knowledge into
|
recent conversation messages and extracts durable user knowledge into
|
||||||
individual memory files in ``~/.hive/queen/global_memory/``.
|
individual memory files in ``~/.hive/memories/global/``.
|
||||||
|
|
||||||
Two reflection types:
|
Two reflection types:
|
||||||
- **Short reflection**: after conversational queen turns. Distills
|
- **Short reflection**: after conversational queen turns. Distills
|
||||||
@@ -493,7 +493,7 @@ async def subscribe_reflection_triggers(
|
|||||||
Call this once during queen setup. Returns a list of event-bus
|
Call this once during queen setup. Returns a list of event-bus
|
||||||
subscription IDs for cleanup during session teardown.
|
subscription IDs for cleanup during session teardown.
|
||||||
"""
|
"""
|
||||||
from framework.runtime.event_bus import EventType
|
from framework.host.event_bus import EventType
|
||||||
|
|
||||||
mem_dir = memory_dir or global_memory_dir()
|
mem_dir = memory_dir or global_memory_dir()
|
||||||
_lock = asyncio.Lock()
|
_lock = asyncio.Lock()
|
||||||
|
|||||||
@@ -22,10 +22,10 @@ def mock_mode():
|
|||||||
|
|
||||||
@pytest_asyncio.fixture(scope="session")
|
@pytest_asyncio.fixture(scope="session")
|
||||||
async def runner(tmp_path_factory, mock_mode):
|
async def runner(tmp_path_factory, mock_mode):
|
||||||
from framework.runner.runner import AgentRunner
|
from framework.loader.agent_loader import AgentLoader
|
||||||
|
|
||||||
storage = tmp_path_factory.mktemp("agent_storage")
|
storage = tmp_path_factory.mktemp("agent_storage")
|
||||||
r = AgentRunner.load(AGENT_PATH, mock_mode=mock_mode, storage_path=storage)
|
r = AgentLoader.load(AGENT_PATH, mock_mode=mock_mode, storage_path=storage)
|
||||||
r._setup()
|
r._setup()
|
||||||
yield r
|
yield r
|
||||||
await r.cleanup_async()
|
await r.cleanup_async()
|
||||||
|
|||||||
@@ -79,7 +79,7 @@ def main():
|
|||||||
subparsers = parser.add_subparsers(dest="command", required=True)
|
subparsers = parser.add_subparsers(dest="command", required=True)
|
||||||
|
|
||||||
# Register runner commands (run, info, validate, list, shell)
|
# Register runner commands (run, info, validate, list, shell)
|
||||||
from framework.runner.cli import register_commands
|
from framework.loader.cli import register_commands
|
||||||
|
|
||||||
register_commands(subparsers)
|
register_commands(subparsers)
|
||||||
|
|
||||||
@@ -99,7 +99,7 @@ def main():
|
|||||||
register_debugger_commands(subparsers)
|
register_debugger_commands(subparsers)
|
||||||
|
|
||||||
# Register MCP registry commands (mcp install, mcp add, ...)
|
# Register MCP registry commands (mcp install, mcp add, ...)
|
||||||
from framework.runner.mcp_registry_cli import register_mcp_commands
|
from framework.loader.mcp_registry_cli import register_mcp_commands
|
||||||
|
|
||||||
register_mcp_commands(subparsers)
|
register_mcp_commands(subparsers)
|
||||||
|
|
||||||
|
|||||||
+67
-12
@@ -12,13 +12,47 @@ from dataclasses import dataclass, field
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from framework.graph.edge import DEFAULT_MAX_TOKENS
|
from framework.orchestrator.edge import DEFAULT_MAX_TOKENS
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Hive home directory structure
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
HIVE_HOME = Path.home() / ".hive"
|
||||||
|
QUEENS_DIR = HIVE_HOME / "agents" / "queens"
|
||||||
|
COLONIES_DIR = HIVE_HOME / "colonies"
|
||||||
|
MEMORIES_DIR = HIVE_HOME / "memories"
|
||||||
|
|
||||||
|
|
||||||
|
def queen_dir(queen_name: str = "default") -> Path:
|
||||||
|
"""Return the storage directory for a named queen agent."""
|
||||||
|
return QUEENS_DIR / queen_name
|
||||||
|
|
||||||
|
|
||||||
|
def colony_dir(colony_name: str) -> Path:
|
||||||
|
"""Return the directory for a named colony."""
|
||||||
|
return COLONIES_DIR / colony_name
|
||||||
|
|
||||||
|
|
||||||
|
def memory_dir(scope: str, name: str | None = None) -> Path:
|
||||||
|
"""Return memory dir for a scope.
|
||||||
|
|
||||||
|
Examples::
|
||||||
|
|
||||||
|
memory_dir("global") -> ~/.hive/memories/global
|
||||||
|
memory_dir("colonies", "my_agent") -> ~/.hive/memories/colonies/my_agent
|
||||||
|
memory_dir("agents/queens", "default")-> ~/.hive/memories/agents/queens/default
|
||||||
|
memory_dir("agents", "worker_name") -> ~/.hive/memories/agents/worker_name
|
||||||
|
"""
|
||||||
|
base = MEMORIES_DIR / scope
|
||||||
|
return base / name if name else base
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Low-level config file access
|
# Low-level config file access
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
HIVE_CONFIG_FILE = Path.home() / ".hive" / "configuration.json"
|
HIVE_CONFIG_FILE = HIVE_HOME / "configuration.json"
|
||||||
|
|
||||||
# Hive LLM router endpoint (Anthropic-compatible).
|
# Hive LLM router endpoint (Anthropic-compatible).
|
||||||
# litellm's Anthropic handler appends /v1/messages, so this is just the base host.
|
# litellm's Anthropic handler appends /v1/messages, so this is just the base host.
|
||||||
@@ -130,7 +164,7 @@ def get_worker_api_key() -> str | None:
|
|||||||
# Worker-specific subscription / env var
|
# Worker-specific subscription / env var
|
||||||
if worker_llm.get("use_claude_code_subscription"):
|
if worker_llm.get("use_claude_code_subscription"):
|
||||||
try:
|
try:
|
||||||
from framework.runner.runner import get_claude_code_token
|
from framework.loader.agent_loader import get_claude_code_token
|
||||||
|
|
||||||
token = get_claude_code_token()
|
token = get_claude_code_token()
|
||||||
if token:
|
if token:
|
||||||
@@ -140,7 +174,7 @@ def get_worker_api_key() -> str | None:
|
|||||||
|
|
||||||
if worker_llm.get("use_codex_subscription"):
|
if worker_llm.get("use_codex_subscription"):
|
||||||
try:
|
try:
|
||||||
from framework.runner.runner import get_codex_token
|
from framework.loader.agent_loader import get_codex_token
|
||||||
|
|
||||||
token = get_codex_token()
|
token = get_codex_token()
|
||||||
if token:
|
if token:
|
||||||
@@ -150,7 +184,7 @@ def get_worker_api_key() -> str | None:
|
|||||||
|
|
||||||
if worker_llm.get("use_kimi_code_subscription"):
|
if worker_llm.get("use_kimi_code_subscription"):
|
||||||
try:
|
try:
|
||||||
from framework.runner.runner import get_kimi_code_token
|
from framework.loader.agent_loader import get_kimi_code_token
|
||||||
|
|
||||||
token = get_kimi_code_token()
|
token = get_kimi_code_token()
|
||||||
if token:
|
if token:
|
||||||
@@ -160,7 +194,7 @@ def get_worker_api_key() -> str | None:
|
|||||||
|
|
||||||
if worker_llm.get("use_antigravity_subscription"):
|
if worker_llm.get("use_antigravity_subscription"):
|
||||||
try:
|
try:
|
||||||
from framework.runner.runner import get_antigravity_token
|
from framework.loader.agent_loader import get_antigravity_token
|
||||||
|
|
||||||
token = get_antigravity_token()
|
token = get_antigravity_token()
|
||||||
if token:
|
if token:
|
||||||
@@ -216,7 +250,7 @@ def get_worker_llm_extra_kwargs() -> dict[str, Any]:
|
|||||||
"User-Agent": "CodexBar",
|
"User-Agent": "CodexBar",
|
||||||
}
|
}
|
||||||
try:
|
try:
|
||||||
from framework.runner.runner import get_codex_account_id
|
from framework.loader.agent_loader import get_codex_account_id
|
||||||
|
|
||||||
account_id = get_codex_account_id()
|
account_id = get_codex_account_id()
|
||||||
if account_id:
|
if account_id:
|
||||||
@@ -263,22 +297,43 @@ def get_max_context_tokens() -> int:
|
|||||||
return get_hive_config().get("llm", {}).get("max_context_tokens", DEFAULT_MAX_CONTEXT_TOKENS)
|
return get_hive_config().get("llm", {}).get("max_context_tokens", DEFAULT_MAX_CONTEXT_TOKENS)
|
||||||
|
|
||||||
|
|
||||||
|
def get_api_keys() -> list[str] | None:
|
||||||
|
"""Return a list of API keys if ``api_keys`` is configured, else ``None``.
|
||||||
|
|
||||||
|
This supports key-pool rotation: configure multiple keys in
|
||||||
|
``~/.hive/configuration.json`` under ``llm.api_keys`` and the
|
||||||
|
:class:`~framework.llm.key_pool.KeyPool` will rotate through them.
|
||||||
|
"""
|
||||||
|
llm = get_hive_config().get("llm", {})
|
||||||
|
keys = llm.get("api_keys")
|
||||||
|
if keys and isinstance(keys, list) and len(keys) > 0:
|
||||||
|
return [k for k in keys if k] # filter empties
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def get_api_key() -> str | None:
|
def get_api_key() -> str | None:
|
||||||
"""Return the API key, supporting env var, Claude Code subscription, Codex, and ZAI Code.
|
"""Return the API key, supporting env var, Claude Code subscription, Codex, and ZAI Code.
|
||||||
|
|
||||||
Priority:
|
Priority:
|
||||||
|
0. Explicit key pool (``api_keys`` list) -- returns first key for
|
||||||
|
single-key callers; full pool available via :func:`get_api_keys`.
|
||||||
1. Claude Code subscription (``use_claude_code_subscription: true``)
|
1. Claude Code subscription (``use_claude_code_subscription: true``)
|
||||||
reads the OAuth token from ``~/.claude/.credentials.json``.
|
reads the OAuth token from ``~/.claude/.credentials.json``.
|
||||||
2. Codex subscription (``use_codex_subscription: true``)
|
2. Codex subscription (``use_codex_subscription: true``)
|
||||||
reads the OAuth token from macOS Keychain or ``~/.codex/auth.json``.
|
reads the OAuth token from macOS Keychain or ``~/.codex/auth.json``.
|
||||||
3. Environment variable named in ``api_key_env_var``.
|
3. Environment variable named in ``api_key_env_var``.
|
||||||
"""
|
"""
|
||||||
|
# If an explicit key pool is configured, use the first key.
|
||||||
|
pool_keys = get_api_keys()
|
||||||
|
if pool_keys:
|
||||||
|
return pool_keys[0]
|
||||||
|
|
||||||
llm = get_hive_config().get("llm", {})
|
llm = get_hive_config().get("llm", {})
|
||||||
|
|
||||||
# Claude Code subscription: read OAuth token directly
|
# Claude Code subscription: read OAuth token directly
|
||||||
if llm.get("use_claude_code_subscription"):
|
if llm.get("use_claude_code_subscription"):
|
||||||
try:
|
try:
|
||||||
from framework.runner.runner import get_claude_code_token
|
from framework.loader.agent_loader import get_claude_code_token
|
||||||
|
|
||||||
token = get_claude_code_token()
|
token = get_claude_code_token()
|
||||||
if token:
|
if token:
|
||||||
@@ -289,7 +344,7 @@ def get_api_key() -> str | None:
|
|||||||
# Codex subscription: read OAuth token from Keychain / auth.json
|
# Codex subscription: read OAuth token from Keychain / auth.json
|
||||||
if llm.get("use_codex_subscription"):
|
if llm.get("use_codex_subscription"):
|
||||||
try:
|
try:
|
||||||
from framework.runner.runner import get_codex_token
|
from framework.loader.agent_loader import get_codex_token
|
||||||
|
|
||||||
token = get_codex_token()
|
token = get_codex_token()
|
||||||
if token:
|
if token:
|
||||||
@@ -300,7 +355,7 @@ def get_api_key() -> str | None:
|
|||||||
# Kimi Code subscription: read API key from ~/.kimi/config.toml
|
# Kimi Code subscription: read API key from ~/.kimi/config.toml
|
||||||
if llm.get("use_kimi_code_subscription"):
|
if llm.get("use_kimi_code_subscription"):
|
||||||
try:
|
try:
|
||||||
from framework.runner.runner import get_kimi_code_token
|
from framework.loader.agent_loader import get_kimi_code_token
|
||||||
|
|
||||||
token = get_kimi_code_token()
|
token = get_kimi_code_token()
|
||||||
if token:
|
if token:
|
||||||
@@ -311,7 +366,7 @@ def get_api_key() -> str | None:
|
|||||||
# Antigravity subscription: read OAuth token from accounts JSON
|
# Antigravity subscription: read OAuth token from accounts JSON
|
||||||
if llm.get("use_antigravity_subscription"):
|
if llm.get("use_antigravity_subscription"):
|
||||||
try:
|
try:
|
||||||
from framework.runner.runner import get_antigravity_token
|
from framework.loader.agent_loader import get_antigravity_token
|
||||||
|
|
||||||
token = get_antigravity_token()
|
token = get_antigravity_token()
|
||||||
if token:
|
if token:
|
||||||
@@ -468,7 +523,7 @@ def get_llm_extra_kwargs() -> dict[str, Any]:
|
|||||||
"User-Agent": "CodexBar",
|
"User-Agent": "CodexBar",
|
||||||
}
|
}
|
||||||
try:
|
try:
|
||||||
from framework.runner.runner import get_codex_account_id
|
from framework.loader.agent_loader import get_codex_account_id
|
||||||
|
|
||||||
account_id = get_codex_account_id()
|
account_id = get_codex_account_id()
|
||||||
if account_id:
|
if account_id:
|
||||||
|
|||||||
@@ -36,7 +36,7 @@ from pathlib import Path
|
|||||||
from typing import TYPE_CHECKING, Any
|
from typing import TYPE_CHECKING, Any
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from framework.graph import NodeSpec
|
from framework.orchestrator import NodeSpec
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -533,7 +533,9 @@ class CredentialSetupSession:
|
|||||||
|
|
||||||
|
|
||||||
def load_agent_nodes(agent_path: str | Path) -> list:
|
def load_agent_nodes(agent_path: str | Path) -> list:
|
||||||
"""Load NodeSpec list from an agent's agent.py or agent.json.
|
"""Load NodeSpec list from an agent directory.
|
||||||
|
|
||||||
|
Checks agent.json (declarative) first, then agent.py (legacy).
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
agent_path: Path to agent directory.
|
agent_path: Path to agent directory.
|
||||||
@@ -542,16 +544,28 @@ def load_agent_nodes(agent_path: str | Path) -> list:
|
|||||||
List of NodeSpec objects (empty list if agent can't be loaded).
|
List of NodeSpec objects (empty list if agent can't be loaded).
|
||||||
"""
|
"""
|
||||||
agent_path = Path(agent_path)
|
agent_path = Path(agent_path)
|
||||||
|
agent_json_file = agent_path / "agent.json"
|
||||||
agent_py = agent_path / "agent.py"
|
agent_py = agent_path / "agent.py"
|
||||||
agent_json = agent_path / "agent.json"
|
|
||||||
|
|
||||||
if agent_py.exists():
|
if agent_json_file.exists():
|
||||||
|
return _load_nodes_from_json_declarative(agent_json_file)
|
||||||
|
elif agent_py.exists():
|
||||||
return _load_nodes_from_python_agent(agent_path)
|
return _load_nodes_from_python_agent(agent_path)
|
||||||
elif agent_json.exists():
|
|
||||||
return _load_nodes_from_json_agent(agent_json)
|
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def _load_nodes_from_json_declarative(agent_json: Path) -> list:
|
||||||
|
"""Load nodes from a declarative JSON agent."""
|
||||||
|
try:
|
||||||
|
from framework.loader.agent_loader import load_agent_config
|
||||||
|
|
||||||
|
data = json.loads(agent_json.read_text(encoding="utf-8"))
|
||||||
|
graph, _ = load_agent_config(data)
|
||||||
|
return list(graph.nodes)
|
||||||
|
except Exception:
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
def _load_nodes_from_python_agent(agent_path: Path) -> list:
|
def _load_nodes_from_python_agent(agent_path: Path) -> list:
|
||||||
"""Load nodes from a Python-based agent."""
|
"""Load nodes from a Python-based agent."""
|
||||||
import importlib.util
|
import importlib.util
|
||||||
@@ -590,7 +604,7 @@ def _load_nodes_from_json_agent(agent_json: Path) -> list:
|
|||||||
with open(agent_json, encoding="utf-8-sig") as f:
|
with open(agent_json, encoding="utf-8-sig") as f:
|
||||||
data = json.load(f)
|
data = json.load(f)
|
||||||
|
|
||||||
from framework.graph import NodeSpec
|
from framework.orchestrator import NodeSpec
|
||||||
|
|
||||||
nodes_data = data.get("graph", {}).get("nodes", [])
|
nodes_data = data.get("graph", {}).get("nodes", [])
|
||||||
nodes = []
|
nodes = []
|
||||||
|
|||||||
@@ -1,65 +0,0 @@
|
|||||||
"""Graph structures: Goals, Nodes, Edges, and Execution."""
|
|
||||||
|
|
||||||
from framework.graph.context import GraphContext
|
|
||||||
from framework.graph.context_handoff import ContextHandoff, HandoffContext
|
|
||||||
from framework.graph.conversation import ConversationStore, Message, NodeConversation
|
|
||||||
from framework.graph.edge import DEFAULT_MAX_TOKENS, EdgeCondition, EdgeSpec, GraphSpec
|
|
||||||
from framework.graph.event_loop_node import (
|
|
||||||
EventLoopNode,
|
|
||||||
JudgeProtocol,
|
|
||||||
JudgeVerdict,
|
|
||||||
LoopConfig,
|
|
||||||
OutputAccumulator,
|
|
||||||
)
|
|
||||||
from framework.graph.executor import GraphExecutor
|
|
||||||
from framework.graph.goal import Constraint, Goal, GoalStatus, SuccessCriterion
|
|
||||||
from framework.graph.node import NodeContext, NodeProtocol, NodeResult, NodeSpec
|
|
||||||
from framework.graph.worker_agent import (
|
|
||||||
Activation,
|
|
||||||
FanOutTag,
|
|
||||||
FanOutTracker,
|
|
||||||
WorkerAgent,
|
|
||||||
WorkerCompletion,
|
|
||||||
WorkerLifecycle,
|
|
||||||
)
|
|
||||||
|
|
||||||
__all__ = [
|
|
||||||
# Goal
|
|
||||||
"Goal",
|
|
||||||
"SuccessCriterion",
|
|
||||||
"Constraint",
|
|
||||||
"GoalStatus",
|
|
||||||
# Node
|
|
||||||
"NodeSpec",
|
|
||||||
"NodeContext",
|
|
||||||
"NodeResult",
|
|
||||||
"NodeProtocol",
|
|
||||||
# Edge
|
|
||||||
"EdgeSpec",
|
|
||||||
"EdgeCondition",
|
|
||||||
"GraphSpec",
|
|
||||||
"DEFAULT_MAX_TOKENS",
|
|
||||||
# Executor
|
|
||||||
"GraphExecutor",
|
|
||||||
# Conversation
|
|
||||||
"NodeConversation",
|
|
||||||
"ConversationStore",
|
|
||||||
"Message",
|
|
||||||
# Event Loop
|
|
||||||
"EventLoopNode",
|
|
||||||
"LoopConfig",
|
|
||||||
"OutputAccumulator",
|
|
||||||
"JudgeProtocol",
|
|
||||||
"JudgeVerdict",
|
|
||||||
# Context Handoff
|
|
||||||
"ContextHandoff",
|
|
||||||
"HandoffContext",
|
|
||||||
# Worker Agent
|
|
||||||
"WorkerAgent",
|
|
||||||
"WorkerLifecycle",
|
|
||||||
"WorkerCompletion",
|
|
||||||
"Activation",
|
|
||||||
"FanOutTag",
|
|
||||||
"FanOutTracker",
|
|
||||||
"GraphContext",
|
|
||||||
]
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
"""EventLoopNode subpackage — modular components of the event loop orchestrator.
|
|
||||||
|
|
||||||
All public symbols are re-exported by the parent ``event_loop_node.py`` for
|
|
||||||
backward compatibility. Internal consumers may import directly from these
|
|
||||||
submodules for clarity.
|
|
||||||
"""
|
|
||||||
@@ -1,370 +0,0 @@
|
|||||||
"""Subagent execution for the event loop.
|
|
||||||
|
|
||||||
Handles the full subagent lifecycle: validation, context setup, tool filtering,
|
|
||||||
conversation store derivation, execution, and cleanup.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import json
|
|
||||||
import logging
|
|
||||||
import time
|
|
||||||
from collections.abc import Awaitable, Callable
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import TYPE_CHECKING, Any
|
|
||||||
|
|
||||||
from framework.graph.conversation import ConversationStore
|
|
||||||
from framework.graph.event_loop.judge_pipeline import SubagentJudge
|
|
||||||
from framework.graph.event_loop.types import LoopConfig, OutputAccumulator
|
|
||||||
from framework.graph.node import DataBuffer, NodeContext
|
|
||||||
from framework.llm.provider import ToolResult, ToolUse
|
|
||||||
from framework.runner.tool_registry import ToolRegistry
|
|
||||||
from framework.runtime.event_bus import EventBus
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
from framework.graph.event_loop_node import EventLoopNode
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
async def execute_subagent(
|
|
||||||
ctx: NodeContext,
|
|
||||||
agent_id: str,
|
|
||||||
task: str,
|
|
||||||
*,
|
|
||||||
config: LoopConfig,
|
|
||||||
event_loop_node_cls: type[EventLoopNode],
|
|
||||||
escalation_receiver_cls: Callable[[], Any],
|
|
||||||
accumulator: OutputAccumulator | None = None,
|
|
||||||
event_bus: EventBus | None = None,
|
|
||||||
tool_executor: Callable[[ToolUse], ToolResult | Awaitable[ToolResult]] | None = None,
|
|
||||||
conversation_store: ConversationStore | None = None,
|
|
||||||
subagent_instance_counter: dict[str, int] | None = None,
|
|
||||||
) -> ToolResult:
|
|
||||||
"""Execute a subagent and return the result as a ToolResult.
|
|
||||||
|
|
||||||
The subagent:
|
|
||||||
- Gets a fresh conversation with just the task
|
|
||||||
- Has read-only access to the parent's readable memory
|
|
||||||
- Cannot delegate to its own subagents (prevents recursion)
|
|
||||||
- Returns its output in structured JSON format
|
|
||||||
|
|
||||||
Args:
|
|
||||||
ctx: Parent node's context (for memory, tools, LLM access).
|
|
||||||
agent_id: The node ID of the subagent to invoke.
|
|
||||||
task: The task description to give the subagent.
|
|
||||||
accumulator: Parent's OutputAccumulator.
|
|
||||||
event_bus: EventBus for lifecycle events.
|
|
||||||
config: LoopConfig for iteration/tool limits.
|
|
||||||
tool_executor: Tool executor callable.
|
|
||||||
conversation_store: Parent conversation store (for deriving subagent store).
|
|
||||||
subagent_instance_counter: Mutable counter dict for unique subagent paths.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
ToolResult with structured JSON output.
|
|
||||||
"""
|
|
||||||
# Log subagent invocation start
|
|
||||||
logger.info(
|
|
||||||
"\n" + "=" * 60 + "\n"
|
|
||||||
"🤖 SUBAGENT INVOCATION\n"
|
|
||||||
"=" * 60 + "\n"
|
|
||||||
"Parent Node: %s\n"
|
|
||||||
"Subagent ID: %s\n"
|
|
||||||
"Task: %s\n" + "=" * 60,
|
|
||||||
ctx.node_id,
|
|
||||||
agent_id,
|
|
||||||
task[:500] + "..." if len(task) > 500 else task,
|
|
||||||
)
|
|
||||||
|
|
||||||
# 1. Validate agent exists in registry
|
|
||||||
if agent_id not in ctx.node_registry:
|
|
||||||
return ToolResult(
|
|
||||||
tool_use_id="",
|
|
||||||
content=json.dumps(
|
|
||||||
{
|
|
||||||
"message": f"Sub-agent '{agent_id}' not found in registry",
|
|
||||||
"data": None,
|
|
||||||
"metadata": {"agent_id": agent_id, "success": False, "error": "not_found"},
|
|
||||||
}
|
|
||||||
),
|
|
||||||
is_error=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
subagent_spec = ctx.node_registry[agent_id]
|
|
||||||
|
|
||||||
# 2. Create read-only memory snapshot
|
|
||||||
parent_data = ctx.buffer.read_all()
|
|
||||||
|
|
||||||
# Merge in-flight outputs from the parent's accumulator.
|
|
||||||
if accumulator:
|
|
||||||
for key, value in accumulator.to_dict().items():
|
|
||||||
if key not in parent_data:
|
|
||||||
parent_data[key] = value
|
|
||||||
|
|
||||||
subagent_buffer = DataBuffer()
|
|
||||||
for key, value in parent_data.items():
|
|
||||||
subagent_buffer.write(key, value, validate=False)
|
|
||||||
|
|
||||||
read_keys = set(parent_data.keys()) | set(subagent_spec.input_keys or [])
|
|
||||||
scoped_buffer = subagent_buffer.with_permissions(
|
|
||||||
read_keys=list(read_keys),
|
|
||||||
write_keys=[], # Read-only!
|
|
||||||
)
|
|
||||||
|
|
||||||
# 2b. Compute instance counter early so the callback and child context
|
|
||||||
# share the same stable node_id for this subagent invocation.
|
|
||||||
if subagent_instance_counter is not None:
|
|
||||||
subagent_instance_counter.setdefault(agent_id, 0)
|
|
||||||
subagent_instance_counter[agent_id] += 1
|
|
||||||
subagent_instance = str(subagent_instance_counter[agent_id])
|
|
||||||
else:
|
|
||||||
subagent_instance = "1"
|
|
||||||
|
|
||||||
if subagent_instance == "1":
|
|
||||||
sa_node_id = f"{ctx.node_id}:subagent:{agent_id}"
|
|
||||||
else:
|
|
||||||
sa_node_id = f"{ctx.node_id}:subagent:{agent_id}:{subagent_instance}"
|
|
||||||
|
|
||||||
# 2c. Set up report callback (one-way channel to parent / event bus)
|
|
||||||
subagent_reports: list[dict] = []
|
|
||||||
|
|
||||||
async def _report_callback(
|
|
||||||
message: str,
|
|
||||||
data: dict | None = None,
|
|
||||||
*,
|
|
||||||
wait_for_response: bool = False,
|
|
||||||
) -> str | None:
|
|
||||||
subagent_reports.append({"message": message, "data": data, "timestamp": time.time()})
|
|
||||||
if event_bus:
|
|
||||||
await event_bus.emit_subagent_report(
|
|
||||||
stream_id=ctx.node_id,
|
|
||||||
node_id=sa_node_id,
|
|
||||||
subagent_id=agent_id,
|
|
||||||
message=message,
|
|
||||||
data=data,
|
|
||||||
execution_id=ctx.execution_id,
|
|
||||||
)
|
|
||||||
|
|
||||||
if not wait_for_response:
|
|
||||||
return None
|
|
||||||
|
|
||||||
if not event_bus:
|
|
||||||
logger.warning(
|
|
||||||
"Subagent '%s' requested user response but no event_bus available",
|
|
||||||
agent_id,
|
|
||||||
)
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Create isolated receiver and register for input routing
|
|
||||||
import uuid
|
|
||||||
|
|
||||||
escalation_id = f"{ctx.node_id}:escalation:{uuid.uuid4().hex[:8]}"
|
|
||||||
receiver = escalation_receiver_cls()
|
|
||||||
registry = ctx.shared_node_registry
|
|
||||||
|
|
||||||
registry[escalation_id] = receiver
|
|
||||||
try:
|
|
||||||
await event_bus.emit_escalation_requested(
|
|
||||||
stream_id=ctx.stream_id or ctx.node_id,
|
|
||||||
node_id=escalation_id,
|
|
||||||
reason=f"Subagent report (wait_for_response) from {agent_id}",
|
|
||||||
context=message,
|
|
||||||
execution_id=ctx.execution_id,
|
|
||||||
)
|
|
||||||
# Block until queen responds
|
|
||||||
return await receiver.wait()
|
|
||||||
finally:
|
|
||||||
registry.pop(escalation_id, None)
|
|
||||||
|
|
||||||
# 3. Filter tools for subagent
|
|
||||||
subagent_tool_names = set(subagent_spec.tools or [])
|
|
||||||
tool_source = ctx.all_tools if ctx.all_tools else ctx.available_tools
|
|
||||||
|
|
||||||
# GCU auto-population
|
|
||||||
if subagent_spec.node_type == "gcu" and not subagent_tool_names:
|
|
||||||
subagent_tools = [t for t in tool_source if t.name != "delegate_to_sub_agent"]
|
|
||||||
else:
|
|
||||||
subagent_tools = [
|
|
||||||
t
|
|
||||||
for t in tool_source
|
|
||||||
if t.name in subagent_tool_names and t.name != "delegate_to_sub_agent"
|
|
||||||
]
|
|
||||||
|
|
||||||
missing = subagent_tool_names - {t.name for t in subagent_tools}
|
|
||||||
if missing:
|
|
||||||
logger.warning(
|
|
||||||
"Subagent '%s' requested tools not found in catalog: %s",
|
|
||||||
agent_id,
|
|
||||||
sorted(missing),
|
|
||||||
)
|
|
||||||
|
|
||||||
logger.info(
|
|
||||||
"📦 Subagent '%s' configuration:\n"
|
|
||||||
" - System prompt: %s\n"
|
|
||||||
" - Tools available (%d): %s\n"
|
|
||||||
" - Memory keys inherited: %s",
|
|
||||||
agent_id,
|
|
||||||
(subagent_spec.system_prompt[:200] + "...")
|
|
||||||
if subagent_spec.system_prompt and len(subagent_spec.system_prompt) > 200
|
|
||||||
else subagent_spec.system_prompt,
|
|
||||||
len(subagent_tools),
|
|
||||||
[t.name for t in subagent_tools],
|
|
||||||
list(parent_data.keys()),
|
|
||||||
)
|
|
||||||
|
|
||||||
# 4. Build subagent context
|
|
||||||
max_iter = min(config.max_iterations, 10)
|
|
||||||
subagent_ctx = NodeContext(
|
|
||||||
runtime=ctx.runtime,
|
|
||||||
node_id=sa_node_id,
|
|
||||||
node_spec=subagent_spec,
|
|
||||||
buffer=scoped_buffer,
|
|
||||||
input_data={"task": task, **parent_data},
|
|
||||||
llm=ctx.llm,
|
|
||||||
available_tools=subagent_tools,
|
|
||||||
goal_context=(
|
|
||||||
f"Your specific task: {task}\n\n"
|
|
||||||
f"COMPLETION REQUIREMENTS:\n"
|
|
||||||
f"When your task is done, you MUST call set_output() "
|
|
||||||
f"for each required key: {subagent_spec.output_keys}\n"
|
|
||||||
f"Alternatively, call report_to_parent(mark_complete=true) "
|
|
||||||
f"with your findings in message/data.\n"
|
|
||||||
+ (
|
|
||||||
"Before finishing, call browser_close_finished() to clean up your browser tabs.\n"
|
|
||||||
if subagent_spec.node_type == "gcu"
|
|
||||||
else ""
|
|
||||||
)
|
|
||||||
+ f"You have a maximum of {max_iter} turns to complete this task."
|
|
||||||
),
|
|
||||||
goal=ctx.goal,
|
|
||||||
max_tokens=ctx.max_tokens,
|
|
||||||
runtime_logger=ctx.runtime_logger,
|
|
||||||
is_subagent_mode=True, # Prevents nested delegation
|
|
||||||
report_callback=_report_callback,
|
|
||||||
node_registry={}, # Empty - no nested subagents
|
|
||||||
shared_node_registry=ctx.shared_node_registry, # For escalation routing
|
|
||||||
)
|
|
||||||
|
|
||||||
# 5. Create and execute subagent EventLoopNode
|
|
||||||
subagent_conv_store = None
|
|
||||||
if conversation_store is not None:
|
|
||||||
from framework.storage.conversation_store import FileConversationStore
|
|
||||||
|
|
||||||
parent_base = getattr(conversation_store, "_base", None)
|
|
||||||
if parent_base is not None:
|
|
||||||
conversations_dir = parent_base.parent
|
|
||||||
subagent_dir_name = f"{agent_id}-{subagent_instance}"
|
|
||||||
subagent_store_path = conversations_dir / subagent_dir_name
|
|
||||||
subagent_conv_store = FileConversationStore(base_path=subagent_store_path)
|
|
||||||
|
|
||||||
# Derive a subagent-scoped spillover dir
|
|
||||||
subagent_spillover = None
|
|
||||||
if config.spillover_dir:
|
|
||||||
subagent_spillover = str(Path(config.spillover_dir) / agent_id / subagent_instance)
|
|
||||||
|
|
||||||
subagent_node = event_loop_node_cls(
|
|
||||||
event_bus=event_bus,
|
|
||||||
judge=SubagentJudge(task=task, max_iterations=max_iter),
|
|
||||||
config=LoopConfig(
|
|
||||||
max_iterations=max_iter,
|
|
||||||
max_tool_calls_per_turn=config.max_tool_calls_per_turn,
|
|
||||||
tool_call_overflow_margin=config.tool_call_overflow_margin,
|
|
||||||
max_context_tokens=config.max_context_tokens,
|
|
||||||
stall_detection_threshold=config.stall_detection_threshold,
|
|
||||||
max_tool_result_chars=config.max_tool_result_chars,
|
|
||||||
spillover_dir=subagent_spillover,
|
|
||||||
),
|
|
||||||
tool_executor=tool_executor,
|
|
||||||
conversation_store=subagent_conv_store,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Each subagent instance gets its own unique browser profile so concurrent
|
|
||||||
# subagents don't share tab groups. The profile is set as execution context
|
|
||||||
# so the tool registry auto-injects it into every browser_* MCP tool call.
|
|
||||||
_gcu_profile = f"{agent_id}:{subagent_instance}"
|
|
||||||
_profile_token = ToolRegistry.set_execution_context(profile=_gcu_profile)
|
|
||||||
|
|
||||||
try:
|
|
||||||
logger.info("🚀 Starting subagent '%s' execution...", agent_id)
|
|
||||||
start_time = time.time()
|
|
||||||
result = await subagent_node.execute(subagent_ctx)
|
|
||||||
latency_ms = int((time.time() - start_time) * 1000)
|
|
||||||
|
|
||||||
separator = "-" * 60
|
|
||||||
logger.info(
|
|
||||||
"\n%s\n"
|
|
||||||
"✅ SUBAGENT '%s' COMPLETED\n"
|
|
||||||
"%s\n"
|
|
||||||
"Success: %s\n"
|
|
||||||
"Latency: %dms\n"
|
|
||||||
"Tokens used: %s\n"
|
|
||||||
"Output keys: %s\n"
|
|
||||||
"%s",
|
|
||||||
separator,
|
|
||||||
agent_id,
|
|
||||||
separator,
|
|
||||||
result.success,
|
|
||||||
latency_ms,
|
|
||||||
result.tokens_used,
|
|
||||||
list(result.output.keys()) if result.output else [],
|
|
||||||
separator,
|
|
||||||
)
|
|
||||||
|
|
||||||
result_json = {
|
|
||||||
"message": (
|
|
||||||
f"Sub-agent '{agent_id}' completed successfully"
|
|
||||||
if result.success
|
|
||||||
else f"Sub-agent '{agent_id}' failed: {result.error}"
|
|
||||||
),
|
|
||||||
"data": result.output,
|
|
||||||
"reports": subagent_reports if subagent_reports else None,
|
|
||||||
"metadata": {
|
|
||||||
"agent_id": agent_id,
|
|
||||||
"success": result.success,
|
|
||||||
"tokens_used": result.tokens_used,
|
|
||||||
"latency_ms": latency_ms,
|
|
||||||
"report_count": len(subagent_reports),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
return ToolResult(
|
|
||||||
tool_use_id="",
|
|
||||||
content=json.dumps(result_json, indent=2, default=str),
|
|
||||||
is_error=not result.success,
|
|
||||||
)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.exception(
|
|
||||||
"\n" + "!" * 60 + "\n❌ SUBAGENT '%s' FAILED\nError: %s\n" + "!" * 60,
|
|
||||||
agent_id,
|
|
||||||
str(e),
|
|
||||||
)
|
|
||||||
result_json = {
|
|
||||||
"message": f"Sub-agent '{agent_id}' raised exception: {e}",
|
|
||||||
"data": None,
|
|
||||||
"metadata": {
|
|
||||||
"agent_id": agent_id,
|
|
||||||
"success": False,
|
|
||||||
"error": str(e),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
return ToolResult(
|
|
||||||
tool_use_id="",
|
|
||||||
content=json.dumps(result_json, indent=2),
|
|
||||||
is_error=True,
|
|
||||||
)
|
|
||||||
finally:
|
|
||||||
ToolRegistry.reset_execution_context(_profile_token)
|
|
||||||
# Close the tab group this subagent created, if any.
|
|
||||||
try:
|
|
||||||
from gcu.browser.bridge import get_bridge
|
|
||||||
from gcu.browser.tools.lifecycle import _contexts
|
|
||||||
|
|
||||||
bridge = get_bridge()
|
|
||||||
ctx_entry = _contexts.pop(_gcu_profile, None)
|
|
||||||
if bridge and bridge.is_connected and ctx_entry:
|
|
||||||
group_id = ctx_entry.get("groupId")
|
|
||||||
if group_id is not None:
|
|
||||||
await bridge.destroy_context(group_id)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
@@ -0,0 +1,11 @@
|
|||||||
|
"""Host layer -- how agents are triggered and hosted."""
|
||||||
|
|
||||||
|
from framework.host.agent_host import ( # noqa: F401
|
||||||
|
AgentHost,
|
||||||
|
AgentRuntimeConfig,
|
||||||
|
)
|
||||||
|
from framework.host.event_bus import AgentEvent, EventBus, EventType # noqa: F401
|
||||||
|
from framework.host.execution_manager import ( # noqa: F401
|
||||||
|
EntryPointSpec,
|
||||||
|
ExecutionManager,
|
||||||
|
)
|
||||||
File diff suppressed because it is too large
Load Diff
+18
-23
@@ -18,18 +18,18 @@ from dataclasses import dataclass, field
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import TYPE_CHECKING, Any
|
from typing import TYPE_CHECKING, Any
|
||||||
|
|
||||||
from framework.graph.checkpoint_config import CheckpointConfig
|
from framework.orchestrator.checkpoint_config import CheckpointConfig
|
||||||
from framework.graph.executor import ExecutionResult, GraphExecutor
|
from framework.orchestrator.orchestrator import ExecutionResult, Orchestrator
|
||||||
from framework.runtime.event_bus import EventBus
|
from framework.host.event_bus import EventBus
|
||||||
from framework.runtime.shared_state import IsolationLevel, SharedBufferManager
|
from framework.host.shared_state import IsolationLevel, SharedBufferManager
|
||||||
from framework.runtime.stream_runtime import StreamRuntime, StreamRuntimeAdapter
|
from framework.host.stream_runtime import StreamDecisionTracker, StreamRuntimeAdapter
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from framework.graph.edge import GraphSpec
|
from framework.orchestrator.edge import GraphSpec
|
||||||
from framework.graph.goal import Goal
|
from framework.orchestrator.goal import Goal
|
||||||
from framework.llm.provider import LLMProvider, Tool
|
from framework.llm.provider import LLMProvider, Tool
|
||||||
from framework.runtime.event_bus import AgentEvent
|
from framework.host.event_bus import AgentEvent
|
||||||
from framework.runtime.outcome_aggregator import OutcomeAggregator
|
from framework.host.outcome_aggregator import OutcomeAggregator
|
||||||
from framework.storage.concurrent import ConcurrentStorage
|
from framework.storage.concurrent import ConcurrentStorage
|
||||||
from framework.storage.session_store import SessionStore
|
from framework.storage.session_store import SessionStore
|
||||||
|
|
||||||
@@ -133,7 +133,7 @@ class ExecutionContext:
|
|||||||
status: str = "pending" # pending, running, completed, failed, paused
|
status: str = "pending" # pending, running, completed, failed, paused
|
||||||
|
|
||||||
|
|
||||||
class ExecutionStream:
|
class ExecutionManager:
|
||||||
"""
|
"""
|
||||||
Manages concurrent executions for a single entry point.
|
Manages concurrent executions for a single entry point.
|
||||||
|
|
||||||
@@ -262,7 +262,7 @@ class ExecutionStream:
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Create stream-scoped runtime
|
# Create stream-scoped runtime
|
||||||
self._runtime = StreamRuntime(
|
self._runtime = StreamDecisionTracker(
|
||||||
stream_id=stream_id,
|
stream_id=stream_id,
|
||||||
storage=storage,
|
storage=storage,
|
||||||
outcome_aggregator=outcome_aggregator,
|
outcome_aggregator=outcome_aggregator,
|
||||||
@@ -271,7 +271,7 @@ class ExecutionStream:
|
|||||||
# Execution tracking
|
# Execution tracking
|
||||||
self._active_executions: dict[str, ExecutionContext] = {}
|
self._active_executions: dict[str, ExecutionContext] = {}
|
||||||
self._execution_tasks: dict[str, asyncio.Task] = {}
|
self._execution_tasks: dict[str, asyncio.Task] = {}
|
||||||
self._active_executors: dict[str, GraphExecutor] = {}
|
self._active_executors: dict[str, Orchestrator] = {}
|
||||||
self._cancel_reasons: dict[str, str] = {}
|
self._cancel_reasons: dict[str, str] = {}
|
||||||
self._execution_results: OrderedDict[str, ExecutionResult] = OrderedDict()
|
self._execution_results: OrderedDict[str, ExecutionResult] = OrderedDict()
|
||||||
self._execution_result_times: dict[str, float] = {}
|
self._execution_result_times: dict[str, float] = {}
|
||||||
@@ -301,7 +301,7 @@ class ExecutionStream:
|
|||||||
|
|
||||||
# Emit stream started event
|
# Emit stream started event
|
||||||
if self._scoped_event_bus:
|
if self._scoped_event_bus:
|
||||||
from framework.runtime.event_bus import AgentEvent, EventType
|
from framework.host.event_bus import AgentEvent, EventType
|
||||||
|
|
||||||
await self._scoped_event_bus.publish(
|
await self._scoped_event_bus.publish(
|
||||||
AgentEvent(
|
AgentEvent(
|
||||||
@@ -426,7 +426,7 @@ class ExecutionStream:
|
|||||||
|
|
||||||
# Emit stream stopped event
|
# Emit stream stopped event
|
||||||
if self._scoped_event_bus:
|
if self._scoped_event_bus:
|
||||||
from framework.runtime.event_bus import AgentEvent, EventType
|
from framework.host.event_bus import AgentEvent, EventType
|
||||||
|
|
||||||
await self._scoped_event_bus.publish(
|
await self._scoped_event_bus.publish(
|
||||||
AgentEvent(
|
AgentEvent(
|
||||||
@@ -668,7 +668,7 @@ class ExecutionStream:
|
|||||||
# Create per-execution runtime logger
|
# Create per-execution runtime logger
|
||||||
runtime_logger = None
|
runtime_logger = None
|
||||||
if self._runtime_log_store:
|
if self._runtime_log_store:
|
||||||
from framework.runtime.runtime_logger import RuntimeLogger
|
from framework.tracker.runtime_logger import RuntimeLogger
|
||||||
|
|
||||||
runtime_logger = RuntimeLogger(
|
runtime_logger = RuntimeLogger(
|
||||||
store=self._runtime_log_store, agent_id=self.graph.id
|
store=self._runtime_log_store, agent_id=self.graph.id
|
||||||
@@ -697,12 +697,7 @@ class ExecutionStream:
|
|||||||
# forward so the next attempt resumes at the failed node.
|
# forward so the next attempt resumes at the failed node.
|
||||||
while True:
|
while True:
|
||||||
# Create executor for this execution.
|
# Create executor for this execution.
|
||||||
# Each execution gets its own storage under sessions/{exec_id}/
|
executor = Orchestrator(
|
||||||
# so conversations, spillover, and data files are all scoped
|
|
||||||
# to this execution. The executor sets data_dir via execution
|
|
||||||
# context (contextvars) so data tools and spillover share the
|
|
||||||
# same session-scoped directory.
|
|
||||||
executor = GraphExecutor(
|
|
||||||
runtime=runtime_adapter,
|
runtime=runtime_adapter,
|
||||||
llm=self._llm,
|
llm=self._llm,
|
||||||
tools=self._tools,
|
tools=self._tools,
|
||||||
@@ -763,7 +758,7 @@ class ExecutionStream:
|
|||||||
|
|
||||||
# Emit resurrection event
|
# Emit resurrection event
|
||||||
if self._scoped_event_bus:
|
if self._scoped_event_bus:
|
||||||
from framework.runtime.event_bus import AgentEvent, EventType
|
from framework.host.event_bus import AgentEvent, EventType
|
||||||
|
|
||||||
await self._scoped_event_bus.publish(
|
await self._scoped_event_bus.publish(
|
||||||
AgentEvent(
|
AgentEvent(
|
||||||
@@ -1119,7 +1114,7 @@ class ExecutionStream:
|
|||||||
Each stream only executes from its own entry_node, but the full
|
Each stream only executes from its own entry_node, but the full
|
||||||
graph must validate with all entry points accounted for.
|
graph must validate with all entry points accounted for.
|
||||||
"""
|
"""
|
||||||
from framework.graph.edge import GraphSpec
|
from framework.orchestrator.edge import GraphSpec
|
||||||
|
|
||||||
# Merge entry points: this stream's entry + original graph's primary
|
# Merge entry points: this stream's entry + original graph's primary
|
||||||
# entry + any other entry points. This ensures all nodes are
|
# entry + any other entry points. This ensures all nodes are
|
||||||
+2
-2
@@ -14,8 +14,8 @@ from typing import TYPE_CHECKING, Any
|
|||||||
from framework.schemas.decision import Decision, Outcome
|
from framework.schemas.decision import Decision, Outcome
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from framework.graph.goal import Goal
|
from framework.orchestrator.goal import Goal
|
||||||
from framework.runtime.event_bus import EventBus
|
from framework.host.event_bus import EventBus
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -18,12 +18,12 @@ from framework.schemas.run import Run, RunStatus
|
|||||||
from framework.storage.concurrent import ConcurrentStorage
|
from framework.storage.concurrent import ConcurrentStorage
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from framework.runtime.outcome_aggregator import OutcomeAggregator
|
from framework.host.outcome_aggregator import OutcomeAggregator
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class StreamRuntime:
|
class StreamDecisionTracker:
|
||||||
"""
|
"""
|
||||||
Thread-safe runtime for a single execution stream.
|
Thread-safe runtime for a single execution stream.
|
||||||
|
|
||||||
@@ -431,7 +431,7 @@ class StreamRuntimeAdapter:
|
|||||||
by providing the same API as Runtime but routing to a specific execution.
|
by providing the same API as Runtime but routing to a specific execution.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, stream_runtime: StreamRuntime, execution_id: str):
|
def __init__(self, stream_runtime: StreamDecisionTracker, execution_id: str):
|
||||||
"""
|
"""
|
||||||
Create adapter for a specific execution.
|
Create adapter for a specific execution.
|
||||||
|
|
||||||
@@ -13,7 +13,7 @@ from dataclasses import dataclass
|
|||||||
|
|
||||||
from aiohttp import web
|
from aiohttp import web
|
||||||
|
|
||||||
from framework.runtime.event_bus import EventBus
|
from framework.host.event_bus import EventBus
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -0,0 +1,101 @@
|
|||||||
|
"""Thread-safe API key pool with round-robin rotation and health tracking.
|
||||||
|
|
||||||
|
When multiple API keys are configured, the pool rotates through them on each
|
||||||
|
request. Keys that hit rate limits are temporarily cooled-down so the next
|
||||||
|
call automatically uses a healthy key -- no sleep required.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class KeyHealth:
|
||||||
|
"""Per-key health counters."""
|
||||||
|
|
||||||
|
rate_limited_until: float = 0.0 # monotonic timestamp
|
||||||
|
consecutive_errors: int = 0
|
||||||
|
total_requests: int = 0
|
||||||
|
total_successes: int = 0
|
||||||
|
|
||||||
|
|
||||||
|
class KeyPool:
|
||||||
|
"""Round-robin key pool with health tracking.
|
||||||
|
|
||||||
|
Thread-safe: all mutations protected by a lock so concurrent LLM calls
|
||||||
|
(e.g. parallel tool execution in EventLoopNode) don't race.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, keys: list[str]) -> None:
|
||||||
|
if not keys:
|
||||||
|
raise ValueError("KeyPool requires at least one key")
|
||||||
|
self._keys = list(keys)
|
||||||
|
self._index = 0
|
||||||
|
self._health: dict[str, KeyHealth] = {k: KeyHealth() for k in keys}
|
||||||
|
self._lock = threading.Lock()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def size(self) -> int:
|
||||||
|
return len(self._keys)
|
||||||
|
|
||||||
|
def get_key(self) -> str:
|
||||||
|
"""Return the next healthy key (round-robin).
|
||||||
|
|
||||||
|
If every key is currently rate-limited, returns the one whose cooldown
|
||||||
|
expires soonest so the caller can proceed with minimal delay.
|
||||||
|
"""
|
||||||
|
with self._lock:
|
||||||
|
now = time.monotonic()
|
||||||
|
for _ in range(len(self._keys)):
|
||||||
|
key = self._keys[self._index]
|
||||||
|
self._index = (self._index + 1) % len(self._keys)
|
||||||
|
health = self._health[key]
|
||||||
|
if health.rate_limited_until <= now:
|
||||||
|
health.total_requests += 1
|
||||||
|
return key
|
||||||
|
# All rate-limited -- pick the one that expires soonest.
|
||||||
|
soonest = min(self._keys, key=lambda k: self._health[k].rate_limited_until)
|
||||||
|
self._health[soonest].total_requests += 1
|
||||||
|
return soonest
|
||||||
|
|
||||||
|
def mark_rate_limited(self, key: str, retry_after: float = 60.0) -> None:
|
||||||
|
"""Mark *key* as rate-limited for *retry_after* seconds."""
|
||||||
|
with self._lock:
|
||||||
|
health = self._health.get(key)
|
||||||
|
if health:
|
||||||
|
health.rate_limited_until = time.monotonic() + retry_after
|
||||||
|
health.consecutive_errors += 1
|
||||||
|
logger.info(
|
||||||
|
"[key-pool] Key ...%s rate-limited for %.0fs (errors=%d)",
|
||||||
|
key[-6:],
|
||||||
|
retry_after,
|
||||||
|
health.consecutive_errors,
|
||||||
|
)
|
||||||
|
|
||||||
|
def mark_success(self, key: str) -> None:
|
||||||
|
"""Record a successful call on *key*."""
|
||||||
|
with self._lock:
|
||||||
|
health = self._health.get(key)
|
||||||
|
if health:
|
||||||
|
health.consecutive_errors = 0
|
||||||
|
health.total_successes += 1
|
||||||
|
|
||||||
|
def get_stats(self) -> dict[str, dict]:
|
||||||
|
"""Return health stats keyed by the last 6 chars of each key."""
|
||||||
|
with self._lock:
|
||||||
|
now = time.monotonic()
|
||||||
|
return {
|
||||||
|
f"...{k[-6:]}": {
|
||||||
|
"healthy": self._health[k].rate_limited_until <= now,
|
||||||
|
"requests": self._health[k].total_requests,
|
||||||
|
"successes": self._health[k].total_successes,
|
||||||
|
"consecutive_errors": self._health[k].consecutive_errors,
|
||||||
|
}
|
||||||
|
for k in self._keys
|
||||||
|
}
|
||||||
@@ -7,6 +7,8 @@ Groq, and local models.
|
|||||||
See: https://docs.litellm.ai/docs/providers
|
See: https://docs.litellm.ai/docs/providers
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import ast
|
import ast
|
||||||
import asyncio
|
import asyncio
|
||||||
import hashlib
|
import hashlib
|
||||||
@@ -18,7 +20,10 @@ import time
|
|||||||
from collections.abc import AsyncIterator
|
from collections.abc import AsyncIterator
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any
|
from typing import TYPE_CHECKING, Any
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from framework.llm.key_pool import KeyPool
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import litellm
|
import litellm
|
||||||
@@ -561,6 +566,7 @@ class LiteLLMProvider(LLMProvider):
|
|||||||
model: str = "gpt-4o-mini",
|
model: str = "gpt-4o-mini",
|
||||||
api_key: str | None = None,
|
api_key: str | None = None,
|
||||||
api_base: str | None = None,
|
api_base: str | None = None,
|
||||||
|
api_keys: list[str] | None = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
@@ -573,6 +579,9 @@ class LiteLLMProvider(LLMProvider):
|
|||||||
look for the appropriate env var (OPENAI_API_KEY,
|
look for the appropriate env var (OPENAI_API_KEY,
|
||||||
ANTHROPIC_API_KEY, etc.)
|
ANTHROPIC_API_KEY, etc.)
|
||||||
api_base: Custom API base URL (for proxies or local deployments)
|
api_base: Custom API base URL (for proxies or local deployments)
|
||||||
|
api_keys: Optional list of API keys for key-pool rotation. When
|
||||||
|
provided with 2+ keys, a :class:`KeyPool` is created and
|
||||||
|
keys are rotated on rate-limit errors.
|
||||||
**kwargs: Additional arguments passed to litellm.completion()
|
**kwargs: Additional arguments passed to litellm.completion()
|
||||||
"""
|
"""
|
||||||
# Kimi For Coding exposes an Anthropic-compatible endpoint at
|
# Kimi For Coding exposes an Anthropic-compatible endpoint at
|
||||||
@@ -594,11 +603,24 @@ class LiteLLMProvider(LLMProvider):
|
|||||||
if api_base and api_base.rstrip("/").endswith("/v1"):
|
if api_base and api_base.rstrip("/").endswith("/v1"):
|
||||||
api_base = api_base.rstrip("/")[:-3]
|
api_base = api_base.rstrip("/")[:-3]
|
||||||
self.model = model
|
self.model = model
|
||||||
self.api_key = api_key
|
# Key pool: when multiple keys are provided, enable rotation.
|
||||||
|
self._key_pool: KeyPool | None = None
|
||||||
|
if api_keys and len(api_keys) > 1:
|
||||||
|
from framework.llm.key_pool import KeyPool
|
||||||
|
|
||||||
|
self._key_pool = KeyPool(api_keys)
|
||||||
|
self.api_key = api_keys[0] # default for OAuth detection below
|
||||||
|
logger.info(
|
||||||
|
"[litellm] Key pool enabled with %d keys for model %s",
|
||||||
|
len(api_keys),
|
||||||
|
model,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self.api_key = api_key or (api_keys[0] if api_keys else None)
|
||||||
self.api_base = api_base or self._default_api_base_for_model(_original_model)
|
self.api_base = api_base or self._default_api_base_for_model(_original_model)
|
||||||
self.extra_kwargs = kwargs
|
self.extra_kwargs = kwargs
|
||||||
# Detect Claude Code OAuth subscription by checking the api_key prefix.
|
# Detect Claude Code OAuth subscription by checking the api_key prefix.
|
||||||
self._claude_code_oauth = bool(api_key and api_key.startswith("sk-ant-oat"))
|
self._claude_code_oauth = bool(self.api_key and self.api_key.startswith("sk-ant-oat"))
|
||||||
if self._claude_code_oauth:
|
if self._claude_code_oauth:
|
||||||
# Anthropic requires a specific User-Agent for OAuth requests.
|
# Anthropic requires a specific User-Agent for OAuth requests.
|
||||||
eh = self.extra_kwargs.setdefault("extra_headers", {})
|
eh = self.extra_kwargs.setdefault("extra_headers", {})
|
||||||
@@ -669,10 +691,20 @@ class LiteLLMProvider(LLMProvider):
|
|||||||
def _completion_with_rate_limit_retry(
|
def _completion_with_rate_limit_retry(
|
||||||
self, max_retries: int | None = None, **kwargs: Any
|
self, max_retries: int | None = None, **kwargs: Any
|
||||||
) -> Any:
|
) -> Any:
|
||||||
"""Call litellm.completion with retry on 429 rate limit errors and empty responses."""
|
"""Call litellm.completion with retry on 429 rate limit errors and empty responses.
|
||||||
|
|
||||||
|
When a :class:`KeyPool` is configured, rate-limited keys are rotated
|
||||||
|
automatically so the next attempt uses a different key -- no sleep
|
||||||
|
needed between attempts.
|
||||||
|
"""
|
||||||
model = kwargs.get("model", self.model)
|
model = kwargs.get("model", self.model)
|
||||||
retries = max_retries if max_retries is not None else RATE_LIMIT_MAX_RETRIES
|
retries = max_retries if max_retries is not None else RATE_LIMIT_MAX_RETRIES
|
||||||
for attempt in range(retries + 1):
|
for attempt in range(retries + 1):
|
||||||
|
# Rotate key from pool when available.
|
||||||
|
current_key: str | None = None
|
||||||
|
if self._key_pool:
|
||||||
|
current_key = self._key_pool.get_key()
|
||||||
|
kwargs["api_key"] = current_key
|
||||||
try:
|
try:
|
||||||
response = litellm.completion(**kwargs) # type: ignore[union-attr]
|
response = litellm.completion(**kwargs) # type: ignore[union-attr]
|
||||||
|
|
||||||
@@ -747,8 +779,22 @@ class LiteLLMProvider(LLMProvider):
|
|||||||
time.sleep(wait)
|
time.sleep(wait)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
if self._key_pool and current_key:
|
||||||
|
self._key_pool.mark_success(current_key)
|
||||||
return response
|
return response
|
||||||
except RateLimitError as e:
|
except RateLimitError as e:
|
||||||
|
# Key pool: mark the offending key and rotate immediately.
|
||||||
|
if self._key_pool and current_key:
|
||||||
|
self._key_pool.mark_rate_limited(current_key, retry_after=60.0)
|
||||||
|
# When we have other healthy keys, skip the sleep -- the
|
||||||
|
# next iteration will pick a different key automatically.
|
||||||
|
if attempt < retries:
|
||||||
|
logger.info(
|
||||||
|
"[retry] Key pool rotating away from ...%s on 429",
|
||||||
|
current_key[-6:],
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
# Dump full request to file for debugging
|
# Dump full request to file for debugging
|
||||||
messages = kwargs.get("messages", [])
|
messages = kwargs.get("messages", [])
|
||||||
token_count, token_method = _estimate_tokens(model, messages)
|
token_count, token_method = _estimate_tokens(model, messages)
|
||||||
@@ -761,7 +807,7 @@ class LiteLLMProvider(LLMProvider):
|
|||||||
if attempt == retries:
|
if attempt == retries:
|
||||||
logger.error(
|
logger.error(
|
||||||
f"[retry] GAVE UP on {model} after {retries + 1} "
|
f"[retry] GAVE UP on {model} after {retries + 1} "
|
||||||
f"attempts — rate limit error: {e!s}. "
|
f"attempts -- rate limit error: {e!s}. "
|
||||||
f"~{token_count} tokens ({token_method}). "
|
f"~{token_count} tokens ({token_method}). "
|
||||||
f"Full request dumped to: {dump_path}"
|
f"Full request dumped to: {dump_path}"
|
||||||
)
|
)
|
||||||
@@ -880,10 +926,16 @@ class LiteLLMProvider(LLMProvider):
|
|||||||
"""Async version of _completion_with_rate_limit_retry.
|
"""Async version of _completion_with_rate_limit_retry.
|
||||||
|
|
||||||
Uses litellm.acompletion and asyncio.sleep instead of blocking calls.
|
Uses litellm.acompletion and asyncio.sleep instead of blocking calls.
|
||||||
|
When a :class:`KeyPool` is configured, rate-limited keys are rotated.
|
||||||
"""
|
"""
|
||||||
model = kwargs.get("model", self.model)
|
model = kwargs.get("model", self.model)
|
||||||
retries = max_retries if max_retries is not None else RATE_LIMIT_MAX_RETRIES
|
retries = max_retries if max_retries is not None else RATE_LIMIT_MAX_RETRIES
|
||||||
for attempt in range(retries + 1):
|
for attempt in range(retries + 1):
|
||||||
|
# Rotate key from pool when available.
|
||||||
|
current_key: str | None = None
|
||||||
|
if self._key_pool:
|
||||||
|
current_key = self._key_pool.get_key()
|
||||||
|
kwargs["api_key"] = current_key
|
||||||
try:
|
try:
|
||||||
response = await litellm.acompletion(**kwargs) # type: ignore[union-attr]
|
response = await litellm.acompletion(**kwargs) # type: ignore[union-attr]
|
||||||
|
|
||||||
@@ -952,8 +1004,20 @@ class LiteLLMProvider(LLMProvider):
|
|||||||
await asyncio.sleep(wait)
|
await asyncio.sleep(wait)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
if self._key_pool and current_key:
|
||||||
|
self._key_pool.mark_success(current_key)
|
||||||
return response
|
return response
|
||||||
except RateLimitError as e:
|
except RateLimitError as e:
|
||||||
|
# Key pool: mark the offending key and rotate immediately.
|
||||||
|
if self._key_pool and current_key:
|
||||||
|
self._key_pool.mark_rate_limited(current_key, retry_after=60.0)
|
||||||
|
if attempt < retries:
|
||||||
|
logger.info(
|
||||||
|
"[async-retry] Key pool rotating away from ...%s on 429",
|
||||||
|
current_key[-6:],
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
messages = kwargs.get("messages", [])
|
messages = kwargs.get("messages", [])
|
||||||
token_count, token_method = _estimate_tokens(model, messages)
|
token_count, token_method = _estimate_tokens(model, messages)
|
||||||
dump_path = _dump_failed_request(
|
dump_path = _dump_failed_request(
|
||||||
@@ -965,7 +1029,7 @@ class LiteLLMProvider(LLMProvider):
|
|||||||
if attempt == retries:
|
if attempt == retries:
|
||||||
logger.error(
|
logger.error(
|
||||||
f"[async-retry] GAVE UP on {model} after {retries + 1} "
|
f"[async-retry] GAVE UP on {model} after {retries + 1} "
|
||||||
f"attempts — rate limit error: {e!s}. "
|
f"attempts -- rate limit error: {e!s}. "
|
||||||
f"~{token_count} tokens ({token_method}). "
|
f"~{token_count} tokens ({token_method}). "
|
||||||
f"Full request dumped to: {dump_path}"
|
f"Full request dumped to: {dump_path}"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -0,0 +1,4 @@
|
|||||||
|
"""Loader layer -- agent loading from disk (JSON config, MCP, credentials)."""
|
||||||
|
|
||||||
|
from framework.loader.agent_loader import AgentLoader # noqa: F401
|
||||||
|
from framework.loader.tool_registry import ToolRegistry # noqa: F401
|
||||||
@@ -13,21 +13,20 @@ from framework.config import get_hive_config, get_max_context_tokens, get_prefer
|
|||||||
from framework.credentials.validation import (
|
from framework.credentials.validation import (
|
||||||
ensure_credential_key_env as _ensure_credential_key_env,
|
ensure_credential_key_env as _ensure_credential_key_env,
|
||||||
)
|
)
|
||||||
from framework.graph import Goal
|
from framework.orchestrator import Goal
|
||||||
from framework.graph.edge import (
|
from framework.orchestrator.edge import (
|
||||||
DEFAULT_MAX_TOKENS,
|
DEFAULT_MAX_TOKENS,
|
||||||
EdgeCondition,
|
EdgeCondition,
|
||||||
EdgeSpec,
|
EdgeSpec,
|
||||||
GraphSpec,
|
GraphSpec,
|
||||||
)
|
)
|
||||||
from framework.graph.executor import ExecutionResult
|
from framework.orchestrator.orchestrator import ExecutionResult
|
||||||
from framework.graph.node import NodeSpec
|
from framework.orchestrator.node import NodeSpec
|
||||||
from framework.llm.provider import LLMProvider, Tool
|
from framework.llm.provider import LLMProvider, Tool
|
||||||
from framework.runner.preload_validation import run_preload_validation
|
from framework.loader.preload_validation import run_preload_validation
|
||||||
from framework.runner.tool_registry import ToolRegistry
|
from framework.loader.tool_registry import ToolRegistry
|
||||||
from framework.runtime.agent_runtime import AgentRuntime, AgentRuntimeConfig, create_agent_runtime
|
from framework.host.agent_host import AgentHost, AgentRuntimeConfig
|
||||||
from framework.runtime.execution_stream import EntryPointSpec
|
from framework.host.execution_manager import EntryPointSpec
|
||||||
from framework.runtime.runtime_log_store import RuntimeLogStore
|
|
||||||
from framework.tools.flowchart_utils import generate_fallback_flowchart
|
from framework.tools.flowchart_utils import generate_fallback_flowchart
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@@ -881,6 +880,172 @@ class ValidationResult:
|
|||||||
missing_credentials: list[str] = field(default_factory=list)
|
missing_credentials: list[str] = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_template_vars(text: str | None, variables: dict[str, str]) -> str | None:
|
||||||
|
"""Resolve ``{{variable_name}}`` placeholders in *text*."""
|
||||||
|
if text is None or not variables:
|
||||||
|
return text
|
||||||
|
import re
|
||||||
|
|
||||||
|
def _replace(m: re.Match) -> str:
|
||||||
|
key = m.group(1).strip()
|
||||||
|
return variables.get(key, m.group(0))
|
||||||
|
|
||||||
|
return re.sub(r"\{\{(.+?)\}\}", _replace, text)
|
||||||
|
|
||||||
|
|
||||||
|
def load_agent_config(data: str | dict) -> tuple[GraphSpec, Goal]:
|
||||||
|
"""Load ``GraphSpec`` and ``Goal`` from a declarative :class:`AgentConfig`.
|
||||||
|
|
||||||
|
The declarative format uses a ``name`` key at the top level, unlike the
|
||||||
|
legacy export format which uses ``graph``/``goal`` keys. The runner
|
||||||
|
auto-detects the format in :meth:`AgentLoader.load`.
|
||||||
|
|
||||||
|
Template variables in ``config.variables`` are resolved in all
|
||||||
|
``system_prompt`` and ``identity_prompt`` fields via ``{{var_name}}``.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (GraphSpec, Goal)
|
||||||
|
"""
|
||||||
|
from framework.orchestrator.edge import EdgeCondition, EdgeSpec
|
||||||
|
from framework.orchestrator.goal import Constraint, Goal as GoalModel, SuccessCriterion
|
||||||
|
from framework.schemas.agent_config import AgentConfig
|
||||||
|
|
||||||
|
if isinstance(data, str):
|
||||||
|
data = json.loads(data)
|
||||||
|
|
||||||
|
config = AgentConfig.model_validate(data)
|
||||||
|
tvars = config.variables
|
||||||
|
|
||||||
|
# Build Goal
|
||||||
|
success_criteria = [
|
||||||
|
SuccessCriterion(
|
||||||
|
id=f"sc-{i}",
|
||||||
|
description=sc,
|
||||||
|
metric="llm_judge",
|
||||||
|
target="",
|
||||||
|
)
|
||||||
|
for i, sc in enumerate(config.goal.success_criteria)
|
||||||
|
]
|
||||||
|
constraints = [
|
||||||
|
Constraint(
|
||||||
|
id=f"c-{i}",
|
||||||
|
description=c,
|
||||||
|
constraint_type="hard",
|
||||||
|
category="general",
|
||||||
|
)
|
||||||
|
for i, c in enumerate(config.goal.constraints)
|
||||||
|
]
|
||||||
|
goal = GoalModel(
|
||||||
|
id=f"{config.name}-goal",
|
||||||
|
name=config.name,
|
||||||
|
description=config.goal.description,
|
||||||
|
success_criteria=success_criteria,
|
||||||
|
constraints=constraints,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Build nodes
|
||||||
|
condition_map = {
|
||||||
|
"always": EdgeCondition.ALWAYS,
|
||||||
|
"on_success": EdgeCondition.ON_SUCCESS,
|
||||||
|
"on_failure": EdgeCondition.ON_FAILURE,
|
||||||
|
"conditional": EdgeCondition.CONDITIONAL,
|
||||||
|
"llm_decide": EdgeCondition.LLM_DECIDE,
|
||||||
|
}
|
||||||
|
|
||||||
|
nodes = []
|
||||||
|
for nc in config.nodes:
|
||||||
|
# Resolve tool access: node-level config -> agent-level fallback
|
||||||
|
if nc.tools.policy == "explicit" and nc.tools.allowed:
|
||||||
|
tools_list = nc.tools.allowed
|
||||||
|
tool_policy = "explicit"
|
||||||
|
elif nc.tools.policy == "none":
|
||||||
|
tools_list = []
|
||||||
|
tool_policy = "none"
|
||||||
|
elif nc.tools.policy == "all":
|
||||||
|
tools_list = []
|
||||||
|
tool_policy = "all"
|
||||||
|
else:
|
||||||
|
# Inherit agent-level tool config
|
||||||
|
if config.tools.policy == "explicit" and config.tools.allowed:
|
||||||
|
tools_list = config.tools.allowed
|
||||||
|
else:
|
||||||
|
tools_list = []
|
||||||
|
tool_policy = config.tools.policy
|
||||||
|
|
||||||
|
node_kwargs: dict = {
|
||||||
|
"id": nc.id,
|
||||||
|
"name": nc.name or nc.id,
|
||||||
|
"description": nc.description or "",
|
||||||
|
"node_type": nc.node_type,
|
||||||
|
"system_prompt": _resolve_template_vars(nc.system_prompt, tvars),
|
||||||
|
"tools": tools_list,
|
||||||
|
"tool_access_policy": tool_policy,
|
||||||
|
"model": nc.model,
|
||||||
|
"input_keys": nc.input_keys,
|
||||||
|
"output_keys": nc.output_keys,
|
||||||
|
"nullable_output_keys": nc.nullable_output_keys,
|
||||||
|
"max_iterations": nc.max_iterations,
|
||||||
|
"success_criteria": nc.success_criteria,
|
||||||
|
"skip_judge": nc.skip_judge,
|
||||||
|
}
|
||||||
|
# Optional fields -- only pass when set (avoids overriding defaults)
|
||||||
|
if nc.client_facing:
|
||||||
|
node_kwargs["client_facing"] = nc.client_facing
|
||||||
|
if nc.max_node_visits != 1:
|
||||||
|
node_kwargs["max_node_visits"] = nc.max_node_visits
|
||||||
|
if nc.failure_criteria:
|
||||||
|
node_kwargs["failure_criteria"] = nc.failure_criteria
|
||||||
|
if nc.max_retries is not None:
|
||||||
|
node_kwargs["max_retries"] = nc.max_retries
|
||||||
|
|
||||||
|
nodes.append(NodeSpec(**node_kwargs))
|
||||||
|
|
||||||
|
# Build edges
|
||||||
|
edges = []
|
||||||
|
for i, ec in enumerate(config.edges):
|
||||||
|
edges.append(
|
||||||
|
EdgeSpec(
|
||||||
|
id=f"e-{i}-{ec.from_node}-{ec.to_node}",
|
||||||
|
source=ec.from_node,
|
||||||
|
target=ec.to_node,
|
||||||
|
condition=condition_map.get(ec.condition, EdgeCondition.ON_SUCCESS),
|
||||||
|
condition_expr=ec.condition_expr,
|
||||||
|
priority=ec.priority,
|
||||||
|
input_mapping=ec.input_mapping,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Build entry_points dict for GraphSpec
|
||||||
|
entry_points_dict: dict = {}
|
||||||
|
if config.entry_points:
|
||||||
|
for ep in config.entry_points:
|
||||||
|
entry_points_dict[ep.id] = ep.entry_node or config.entry_node
|
||||||
|
else:
|
||||||
|
entry_points_dict = {"default": config.entry_node}
|
||||||
|
|
||||||
|
# Build GraphSpec
|
||||||
|
graph_kwargs: dict = {
|
||||||
|
"id": f"{config.name}-graph",
|
||||||
|
"goal_id": goal.id,
|
||||||
|
"version": config.version,
|
||||||
|
"entry_node": config.entry_node,
|
||||||
|
"entry_points": entry_points_dict,
|
||||||
|
"terminal_nodes": config.terminal_nodes,
|
||||||
|
"pause_nodes": config.pause_nodes,
|
||||||
|
"nodes": nodes,
|
||||||
|
"edges": edges,
|
||||||
|
"max_tokens": config.max_tokens,
|
||||||
|
"loop_config": dict(config.loop_config),
|
||||||
|
"conversation_mode": config.conversation_mode,
|
||||||
|
"identity_prompt": _resolve_template_vars(
|
||||||
|
config.identity_prompt, tvars
|
||||||
|
) or "",
|
||||||
|
}
|
||||||
|
|
||||||
|
graph = GraphSpec(**graph_kwargs)
|
||||||
|
return graph, goal
|
||||||
|
|
||||||
|
|
||||||
def load_agent_export(data: str | dict) -> tuple[GraphSpec, Goal]:
|
def load_agent_export(data: str | dict) -> tuple[GraphSpec, Goal]:
|
||||||
"""
|
"""
|
||||||
Load GraphSpec and Goal from export_graph() output.
|
Load GraphSpec and Goal from export_graph() output.
|
||||||
@@ -942,7 +1107,7 @@ def load_agent_export(data: str | dict) -> tuple[GraphSpec, Goal]:
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Build Goal
|
# Build Goal
|
||||||
from framework.graph.goal import Constraint, SuccessCriterion
|
from framework.orchestrator.goal import Constraint, SuccessCriterion
|
||||||
|
|
||||||
success_criteria = []
|
success_criteria = []
|
||||||
for sc_data in goal_data.get("success_criteria", []):
|
for sc_data in goal_data.get("success_criteria", []):
|
||||||
@@ -979,7 +1144,7 @@ def load_agent_export(data: str | dict) -> tuple[GraphSpec, Goal]:
|
|||||||
return graph, goal
|
return graph, goal
|
||||||
|
|
||||||
|
|
||||||
class AgentRunner:
|
class AgentLoader:
|
||||||
"""
|
"""
|
||||||
Loads and runs exported agents with minimal boilerplate.
|
Loads and runs exported agents with minimal boilerplate.
|
||||||
|
|
||||||
@@ -991,15 +1156,15 @@ class AgentRunner:
|
|||||||
|
|
||||||
Usage:
|
Usage:
|
||||||
# Simple usage
|
# Simple usage
|
||||||
runner = AgentRunner.load("exports/outbound-sales-agent")
|
runner = AgentLoader.load("exports/outbound-sales-agent")
|
||||||
result = await runner.run({"lead_id": "123"})
|
result = await runner.run({"lead_id": "123"})
|
||||||
|
|
||||||
# With context manager
|
# With context manager
|
||||||
async with AgentRunner.load("exports/outbound-sales-agent") as runner:
|
async with AgentLoader.load("exports/outbound-sales-agent") as runner:
|
||||||
result = await runner.run({"lead_id": "123"})
|
result = await runner.run({"lead_id": "123"})
|
||||||
|
|
||||||
# With custom tools
|
# With custom tools
|
||||||
runner = AgentRunner.load("exports/outbound-sales-agent")
|
runner = AgentLoader.load("exports/outbound-sales-agent")
|
||||||
runner.register_tool("my_tool", my_tool_func)
|
runner.register_tool("my_tool", my_tool_func)
|
||||||
result = await runner.run({"lead_id": "123"})
|
result = await runner.run({"lead_id": "123"})
|
||||||
"""
|
"""
|
||||||
@@ -1027,7 +1192,7 @@ class AgentRunner:
|
|||||||
credential_store: Any | None = None,
|
credential_store: Any | None = None,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Initialize the runner (use AgentRunner.load() instead).
|
Initialize the runner (use AgentLoader.load() instead).
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
agent_path: Path to agent folder
|
agent_path: Path to agent folder
|
||||||
@@ -1082,7 +1247,7 @@ class AgentRunner:
|
|||||||
self._approval_callback: Callable | None = None
|
self._approval_callback: Callable | None = None
|
||||||
|
|
||||||
# AgentRuntime — unified execution path for all agents
|
# AgentRuntime — unified execution path for all agents
|
||||||
self._agent_runtime: AgentRuntime | None = None
|
self._agent_runtime: AgentHost | None = None
|
||||||
# Pre-load validation: structural checks + credentials.
|
# Pre-load validation: structural checks + credentials.
|
||||||
# Fails fast with actionable guidance — no MCP noise on screen.
|
# Fails fast with actionable guidance — no MCP noise on screen.
|
||||||
run_preload_validation(
|
run_preload_validation(
|
||||||
@@ -1101,14 +1266,7 @@ class AgentRunner:
|
|||||||
os.environ["HIVE_AGENT_NAME"] = agent_path.name
|
os.environ["HIVE_AGENT_NAME"] = agent_path.name
|
||||||
os.environ["HIVE_STORAGE_PATH"] = str(self._storage_path)
|
os.environ["HIVE_STORAGE_PATH"] = str(self._storage_path)
|
||||||
|
|
||||||
# Auto-discover MCP servers from mcp_servers.json
|
# MCP tools are loaded by McpRegistryStage in the pipeline during AgentHost.start()
|
||||||
mcp_config_path = agent_path / "mcp_servers.json"
|
|
||||||
if mcp_config_path.exists():
|
|
||||||
self._load_mcp_servers_from_config(mcp_config_path)
|
|
||||||
|
|
||||||
# Auto-discover registry-selected MCP servers from mcp_registry.json
|
|
||||||
self._load_registry_mcp_servers(agent_path)
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _import_agent_module(agent_path: Path):
|
def _import_agent_module(agent_path: Path):
|
||||||
"""Import an agent package from its directory path.
|
"""Import an agent package from its directory path.
|
||||||
@@ -1158,7 +1316,7 @@ class AgentRunner:
|
|||||||
interactive: bool = True,
|
interactive: bool = True,
|
||||||
skip_credential_validation: bool | None = None,
|
skip_credential_validation: bool | None = None,
|
||||||
credential_store: Any | None = None,
|
credential_store: Any | None = None,
|
||||||
) -> "AgentRunner":
|
) -> "AgentLoader":
|
||||||
"""
|
"""
|
||||||
Load an agent from an export folder.
|
Load an agent from an export folder.
|
||||||
|
|
||||||
@@ -1299,21 +1457,22 @@ class AgentRunner:
|
|||||||
runner._agent_skills = agent_skills
|
runner._agent_skills = agent_skills
|
||||||
return runner
|
return runner
|
||||||
|
|
||||||
# Fallback: load from agent.json (legacy JSON-based agents)
|
# Fallback: load from agent.json (declarative config)
|
||||||
agent_json_path = agent_path / "agent.json"
|
agent_json_path = agent_path / "agent.json"
|
||||||
|
|
||||||
if not agent_json_path.is_file():
|
if not agent_json_path.is_file():
|
||||||
raise FileNotFoundError(f"No agent.py or agent.json found in {agent_path}")
|
raise FileNotFoundError(f"No agent.py or agent.json found in {agent_path}")
|
||||||
|
|
||||||
with open(agent_json_path, encoding="utf-8") as f:
|
export_data = agent_json_path.read_text(encoding="utf-8")
|
||||||
export_data = f.read()
|
|
||||||
|
|
||||||
if not export_data.strip():
|
if not export_data.strip():
|
||||||
raise ValueError(f"Empty agent export file: {agent_json_path}")
|
raise ValueError(f"Empty agent.json: {agent_json_path}")
|
||||||
|
|
||||||
try:
|
parsed = json.loads(export_data)
|
||||||
graph, goal = load_agent_export(export_data)
|
graph, goal = load_agent_config(parsed)
|
||||||
except json.JSONDecodeError as exc:
|
logger.info(
|
||||||
raise ValueError(f"Invalid JSON in agent export file: {agent_json_path}") from exc
|
"Loaded declarative agent config from agent.json (name=%s)",
|
||||||
|
parsed.get("name"),
|
||||||
|
)
|
||||||
|
|
||||||
# Generate flowchart.json if missing (for legacy JSON-based agents)
|
# Generate flowchart.json if missing (for legacy JSON-based agents)
|
||||||
generate_fallback_flowchart(graph, goal, agent_path)
|
generate_fallback_flowchart(graph, goal, agent_path)
|
||||||
@@ -1396,60 +1555,6 @@ class AgentRunner:
|
|||||||
}
|
}
|
||||||
return self._tool_registry.register_mcp_server(server_config)
|
return self._tool_registry.register_mcp_server(server_config)
|
||||||
|
|
||||||
def _load_mcp_servers_from_config(self, config_path: Path) -> None:
|
|
||||||
"""Load and register MCP servers from a configuration file."""
|
|
||||||
self._tool_registry.load_mcp_config(config_path)
|
|
||||||
|
|
||||||
def _load_registry_mcp_servers(self, agent_path: Path) -> None:
|
|
||||||
"""Load and register MCP servers selected via ``mcp_registry.json``."""
|
|
||||||
registry_json = agent_path / "mcp_registry.json"
|
|
||||||
if registry_json.is_file():
|
|
||||||
self._tool_registry.set_mcp_registry_agent_path(agent_path)
|
|
||||||
else:
|
|
||||||
self._tool_registry.set_mcp_registry_agent_path(None)
|
|
||||||
|
|
||||||
from framework.runner.mcp_registry import MCPRegistry
|
|
||||||
|
|
||||||
try:
|
|
||||||
registry = MCPRegistry()
|
|
||||||
registry.initialize()
|
|
||||||
server_configs, selection_max_tools = registry.load_agent_selection(agent_path)
|
|
||||||
except Exception as exc:
|
|
||||||
logger.warning(
|
|
||||||
"Failed to load MCP registry servers for '%s': %s",
|
|
||||||
agent_path.name,
|
|
||||||
exc,
|
|
||||||
)
|
|
||||||
return
|
|
||||||
|
|
||||||
if not server_configs:
|
|
||||||
return
|
|
||||||
|
|
||||||
results = self._tool_registry.load_registry_servers(
|
|
||||||
server_configs,
|
|
||||||
preserve_existing_tools=True,
|
|
||||||
log_collisions=True,
|
|
||||||
max_tools=selection_max_tools,
|
|
||||||
)
|
|
||||||
loaded = [result for result in results if result["status"] == "loaded"]
|
|
||||||
skipped = [result for result in results if result["status"] != "loaded"]
|
|
||||||
|
|
||||||
logger.info(
|
|
||||||
"Loaded %d/%d MCP registry server(s) for agent '%s'",
|
|
||||||
len(loaded),
|
|
||||||
len(results),
|
|
||||||
agent_path.name,
|
|
||||||
)
|
|
||||||
if skipped:
|
|
||||||
logger.info(
|
|
||||||
"Skipped MCP registry servers for agent '%s': %s",
|
|
||||||
agent_path.name,
|
|
||||||
[
|
|
||||||
{"server": result["server"], "reason": result["skipped_reason"]}
|
|
||||||
for result in skipped
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
def set_approval_callback(self, callback: Callable) -> None:
|
def set_approval_callback(self, callback: Callable) -> None:
|
||||||
"""
|
"""
|
||||||
Set a callback for human-in-the-loop approval during execution.
|
Set a callback for human-in-the-loop approval during execution.
|
||||||
@@ -1460,272 +1565,119 @@ class AgentRunner:
|
|||||||
self._approval_callback = callback
|
self._approval_callback = callback
|
||||||
|
|
||||||
def _setup(self, event_bus=None) -> None:
|
def _setup(self, event_bus=None) -> None:
|
||||||
"""Set up runtime, LLM, and executor."""
|
"""Set up runtime via pipeline stages.
|
||||||
# Configure structured logging (auto-detects JSON vs human-readable)
|
|
||||||
|
Builds a pipeline with the default stages (LLM, credentials, MCP,
|
||||||
|
skills) and passes it to AgentHost. The stages initialize during
|
||||||
|
``AgentHost.start()`` and inject tools/LLM/credentials/skills.
|
||||||
|
"""
|
||||||
from framework.observability import configure_logging
|
from framework.observability import configure_logging
|
||||||
|
from framework.pipeline.stages.credential_resolver import CredentialResolverStage
|
||||||
|
from framework.pipeline.stages.llm_provider import LlmProviderStage
|
||||||
|
from framework.pipeline.stages.mcp_registry import McpRegistryStage
|
||||||
|
from framework.pipeline.stages.skill_registry import SkillRegistryStage
|
||||||
|
from framework.skills.config import SkillsConfig
|
||||||
|
|
||||||
configure_logging(level="INFO", format="auto")
|
configure_logging(level="INFO", format="auto")
|
||||||
|
|
||||||
# Set up session context for tools (agent_id)
|
# Set up session context for tools
|
||||||
agent_id = self.graph.id or "unknown"
|
agent_id = self.graph.id or "unknown"
|
||||||
|
self._tool_registry.set_session_context(agent_id=agent_id)
|
||||||
|
|
||||||
self._tool_registry.set_session_context(
|
# Read MCP server refs from agent.json
|
||||||
agent_id=agent_id,
|
mcp_refs = []
|
||||||
)
|
agent_json = self.agent_path / "agent.json"
|
||||||
|
if agent_json.exists():
|
||||||
|
try:
|
||||||
|
import json as _json
|
||||||
|
|
||||||
# Create LLM provider
|
data = _json.loads(agent_json.read_text(encoding="utf-8"))
|
||||||
# Uses LiteLLM which auto-detects the provider from model name
|
mcp_refs = data.get("mcp_servers", [])
|
||||||
# Skip if already injected (e.g. worker agents with a pre-built LLM)
|
except Exception:
|
||||||
if self._llm is not None:
|
pass
|
||||||
pass # LLM already configured externally
|
|
||||||
elif self.mock_mode:
|
|
||||||
# Use mock LLM for testing without real API calls
|
|
||||||
from framework.llm.mock import MockLLMProvider
|
|
||||||
|
|
||||||
self._llm = MockLLMProvider(model=self.model)
|
# Build default pipeline stages
|
||||||
else:
|
# Default infrastructure stages (always present)
|
||||||
from framework.llm.litellm import LiteLLMProvider
|
pipeline_stages = [
|
||||||
|
LlmProviderStage(
|
||||||
# Check if a subscription mode is configured
|
model=self.model,
|
||||||
config = get_hive_config()
|
mock_mode=self.mock_mode,
|
||||||
llm_config = config.get("llm", {})
|
llm=self._llm,
|
||||||
use_claude_code = llm_config.get("use_claude_code_subscription", False)
|
|
||||||
use_codex = llm_config.get("use_codex_subscription", False)
|
|
||||||
use_kimi_code = llm_config.get("use_kimi_code_subscription", False)
|
|
||||||
use_antigravity = llm_config.get("use_antigravity_subscription", False)
|
|
||||||
api_base = llm_config.get("api_base")
|
|
||||||
|
|
||||||
api_key = None
|
|
||||||
if use_claude_code:
|
|
||||||
# Get OAuth token from Claude Code subscription
|
|
||||||
api_key = get_claude_code_token()
|
|
||||||
if not api_key:
|
|
||||||
logger.warning(
|
|
||||||
"Claude Code subscription configured but no token found. "
|
|
||||||
"Run 'claude' to authenticate, then try again."
|
|
||||||
)
|
|
||||||
elif use_codex:
|
|
||||||
# Get OAuth token from Codex subscription
|
|
||||||
api_key = get_codex_token()
|
|
||||||
if not api_key:
|
|
||||||
logger.warning(
|
|
||||||
"Codex subscription configured but no token found. "
|
|
||||||
"Run 'codex' to authenticate, then try again."
|
|
||||||
)
|
|
||||||
elif use_kimi_code:
|
|
||||||
# Get API key from Kimi Code CLI config (~/.kimi/config.toml)
|
|
||||||
api_key = get_kimi_code_token()
|
|
||||||
if not api_key:
|
|
||||||
logger.warning(
|
|
||||||
"Kimi Code subscription configured but no key found. "
|
|
||||||
"Run 'kimi /login' to authenticate, then try again."
|
|
||||||
)
|
|
||||||
elif use_antigravity:
|
|
||||||
pass # AntigravityProvider handles credentials internally
|
|
||||||
|
|
||||||
if api_key and use_claude_code:
|
|
||||||
# Use litellm's built-in Anthropic OAuth support.
|
|
||||||
# The lowercase "authorization" key triggers OAuth detection which
|
|
||||||
# adds the required anthropic-beta and browser-access headers.
|
|
||||||
self._llm = LiteLLMProvider(
|
|
||||||
model=self.model,
|
|
||||||
api_key=api_key,
|
|
||||||
api_base=api_base,
|
|
||||||
extra_headers={"authorization": f"Bearer {api_key}"},
|
|
||||||
)
|
|
||||||
elif api_key and use_codex:
|
|
||||||
# OpenAI Codex subscription routes through the ChatGPT backend
|
|
||||||
# (chatgpt.com/backend-api/codex/responses), NOT the standard
|
|
||||||
# OpenAI API. The consumer OAuth token lacks platform API scopes.
|
|
||||||
extra_headers: dict[str, str] = {
|
|
||||||
"Authorization": f"Bearer {api_key}",
|
|
||||||
"User-Agent": "CodexBar",
|
|
||||||
}
|
|
||||||
account_id = get_codex_account_id()
|
|
||||||
if account_id:
|
|
||||||
extra_headers["ChatGPT-Account-Id"] = account_id
|
|
||||||
self._llm = LiteLLMProvider(
|
|
||||||
model=self.model,
|
|
||||||
api_key=api_key,
|
|
||||||
api_base="https://chatgpt.com/backend-api/codex",
|
|
||||||
extra_headers=extra_headers,
|
|
||||||
store=False,
|
|
||||||
allowed_openai_params=["store"],
|
|
||||||
)
|
|
||||||
elif api_key and use_kimi_code:
|
|
||||||
# Kimi Code subscription uses the Kimi coding API (OpenAI-compatible).
|
|
||||||
# The api_base is set automatically by LiteLLMProvider for kimi/ models.
|
|
||||||
self._llm = LiteLLMProvider(
|
|
||||||
model=self.model,
|
|
||||||
api_key=api_key,
|
|
||||||
api_base=api_base,
|
|
||||||
)
|
|
||||||
elif use_antigravity:
|
|
||||||
# Direct OAuth to Google's internal Cloud Code Assist gateway.
|
|
||||||
# No local proxy required — AntigravityProvider handles token
|
|
||||||
# refresh and Gemini-format request/response conversion natively.
|
|
||||||
from framework.llm.antigravity import AntigravityProvider # noqa: PLC0415
|
|
||||||
|
|
||||||
provider = AntigravityProvider(model=self.model)
|
|
||||||
if not provider.has_credentials():
|
|
||||||
print(
|
|
||||||
"Warning: Antigravity credentials not found. "
|
|
||||||
"Run: uv run python core/antigravity_auth.py auth account add"
|
|
||||||
)
|
|
||||||
self._llm = provider
|
|
||||||
else:
|
|
||||||
# Local models (e.g. Ollama) don't need an API key
|
|
||||||
if self._is_local_model(self.model):
|
|
||||||
self._llm = LiteLLMProvider(
|
|
||||||
model=self.model,
|
|
||||||
api_base=api_base,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
# Fall back to environment variable
|
|
||||||
# First check api_key_env_var from config (set by quickstart)
|
|
||||||
api_key_env = llm_config.get("api_key_env_var") or self._get_api_key_env_var(
|
|
||||||
self.model
|
|
||||||
)
|
|
||||||
if api_key_env and os.environ.get(api_key_env):
|
|
||||||
self._llm = LiteLLMProvider(
|
|
||||||
model=self.model,
|
|
||||||
api_key=os.environ[api_key_env],
|
|
||||||
api_base=api_base,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
# Fall back to credential store
|
|
||||||
api_key = self._get_api_key_from_credential_store()
|
|
||||||
if api_key:
|
|
||||||
self._llm = LiteLLMProvider(
|
|
||||||
model=self.model, api_key=api_key, api_base=api_base
|
|
||||||
)
|
|
||||||
# Set env var so downstream code (e.g. cleanup LLM in
|
|
||||||
# node._extract_json) can also find it
|
|
||||||
if api_key_env:
|
|
||||||
os.environ[api_key_env] = api_key
|
|
||||||
elif api_key_env:
|
|
||||||
logger.warning(
|
|
||||||
"%s not set. LLM calls will fail. "
|
|
||||||
"Set it with: export %s=your-api-key",
|
|
||||||
api_key_env,
|
|
||||||
api_key_env,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Fail fast if the agent needs an LLM but none was configured
|
|
||||||
if self._llm is None:
|
|
||||||
has_llm_nodes = any(
|
|
||||||
node.node_type in ("event_loop", "gcu") for node in self.graph.nodes
|
|
||||||
)
|
|
||||||
if has_llm_nodes:
|
|
||||||
from framework.credentials.models import CredentialError
|
|
||||||
|
|
||||||
if self._is_local_model(self.model):
|
|
||||||
raise CredentialError(
|
|
||||||
f"Failed to initialize LLM for local model '{self.model}'. "
|
|
||||||
f"Ensure your local LLM server is running "
|
|
||||||
f"(e.g. 'ollama serve' for Ollama)."
|
|
||||||
)
|
|
||||||
api_key_env = self._get_api_key_env_var(self.model)
|
|
||||||
hint = (
|
|
||||||
f"Set it with: export {api_key_env}=your-api-key"
|
|
||||||
if api_key_env
|
|
||||||
else "Configure an API key for your LLM provider."
|
|
||||||
)
|
|
||||||
raise CredentialError(f"LLM API key not found for model '{self.model}'. {hint}")
|
|
||||||
|
|
||||||
# For GCU nodes: auto-register GCU MCP server if needed, then expand tool lists
|
|
||||||
has_gcu_nodes = any(node.node_type == "gcu" for node in self.graph.nodes)
|
|
||||||
if has_gcu_nodes:
|
|
||||||
from framework.graph.gcu import GCU_MCP_SERVER_CONFIG, GCU_SERVER_NAME
|
|
||||||
|
|
||||||
# Auto-register GCU MCP server if tools aren't loaded yet
|
|
||||||
gcu_tool_names = self._tool_registry.get_server_tool_names(GCU_SERVER_NAME)
|
|
||||||
if not gcu_tool_names:
|
|
||||||
# Resolve cwd to repo-level tools/ (not relative to agent_path)
|
|
||||||
gcu_config = dict(GCU_MCP_SERVER_CONFIG)
|
|
||||||
_repo_root = Path(__file__).resolve().parent.parent.parent.parent
|
|
||||||
gcu_config["cwd"] = str(_repo_root / "tools")
|
|
||||||
self._tool_registry.register_mcp_server(gcu_config)
|
|
||||||
gcu_tool_names = self._tool_registry.get_server_tool_names(GCU_SERVER_NAME)
|
|
||||||
|
|
||||||
# Expand each GCU node's tools list to include all GCU server tools
|
|
||||||
if gcu_tool_names:
|
|
||||||
for node in self.graph.nodes:
|
|
||||||
if node.node_type == "gcu":
|
|
||||||
existing = set(node.tools)
|
|
||||||
for tool_name in sorted(gcu_tool_names):
|
|
||||||
if tool_name not in existing:
|
|
||||||
node.tools.append(tool_name)
|
|
||||||
|
|
||||||
# For event_loop/gcu nodes: auto-register file tools MCP server, then expand tool lists
|
|
||||||
has_loop_nodes = any(node.node_type in ("event_loop", "gcu") for node in self.graph.nodes)
|
|
||||||
if has_loop_nodes:
|
|
||||||
from framework.graph.files import FILES_MCP_SERVER_CONFIG, FILES_MCP_SERVER_NAME
|
|
||||||
|
|
||||||
files_tool_names = self._tool_registry.get_server_tool_names(FILES_MCP_SERVER_NAME)
|
|
||||||
if not files_tool_names:
|
|
||||||
# Resolve cwd to repo-level tools/ (not relative to agent_path)
|
|
||||||
files_config = dict(FILES_MCP_SERVER_CONFIG)
|
|
||||||
_repo_root = Path(__file__).resolve().parent.parent.parent.parent
|
|
||||||
files_config["cwd"] = str(_repo_root / "tools")
|
|
||||||
self._tool_registry.register_mcp_server(files_config)
|
|
||||||
files_tool_names = self._tool_registry.get_server_tool_names(FILES_MCP_SERVER_NAME)
|
|
||||||
|
|
||||||
if files_tool_names:
|
|
||||||
for node in self.graph.nodes:
|
|
||||||
if node.node_type in ("event_loop", "gcu"):
|
|
||||||
existing = set(node.tools)
|
|
||||||
for tool_name in sorted(files_tool_names):
|
|
||||||
if tool_name not in existing:
|
|
||||||
node.tools.append(tool_name)
|
|
||||||
|
|
||||||
# Get tools for runtime
|
|
||||||
tools = list(self._tool_registry.get_tools().values())
|
|
||||||
tool_executor = self._tool_registry.get_executor()
|
|
||||||
|
|
||||||
# Collect connected account info for system prompt injection
|
|
||||||
accounts_prompt = ""
|
|
||||||
accounts_data: list[dict] | None = None
|
|
||||||
tool_provider_map: dict[str, str] | None = None
|
|
||||||
try:
|
|
||||||
from aden_tools.credentials.store_adapter import CredentialStoreAdapter
|
|
||||||
|
|
||||||
if self._credential_store is not None:
|
|
||||||
adapter = CredentialStoreAdapter(store=self._credential_store)
|
|
||||||
else:
|
|
||||||
adapter = CredentialStoreAdapter.default()
|
|
||||||
accounts_data = adapter.get_all_account_info()
|
|
||||||
tool_provider_map = adapter.get_tool_provider_map()
|
|
||||||
if accounts_data:
|
|
||||||
from framework.graph.prompting import build_accounts_prompt
|
|
||||||
|
|
||||||
accounts_prompt = build_accounts_prompt(accounts_data, tool_provider_map)
|
|
||||||
except Exception:
|
|
||||||
pass # Best-effort — agent works without account info
|
|
||||||
|
|
||||||
# Skill configuration — the runtime handles discovery, loading, trust-gating and
|
|
||||||
# prompt rasterization. The runner just builds the config.
|
|
||||||
from framework.skills.config import SkillsConfig
|
|
||||||
from framework.skills.manager import SkillsManagerConfig
|
|
||||||
|
|
||||||
skills_manager_config = SkillsManagerConfig(
|
|
||||||
skills_config=SkillsConfig.from_agent_vars(
|
|
||||||
default_skills=getattr(self, "_agent_default_skills", None),
|
|
||||||
skills=getattr(self, "_agent_skills", None),
|
|
||||||
),
|
),
|
||||||
project_root=self.agent_path,
|
CredentialResolverStage(
|
||||||
interactive=self._interactive,
|
credential_store=self._credential_store,
|
||||||
)
|
),
|
||||||
|
McpRegistryStage(
|
||||||
|
server_refs=mcp_refs,
|
||||||
|
agent_path=self.agent_path,
|
||||||
|
tool_registry=self._tool_registry,
|
||||||
|
),
|
||||||
|
SkillRegistryStage(
|
||||||
|
project_root=self.agent_path,
|
||||||
|
interactive=self._interactive,
|
||||||
|
skills_config=SkillsConfig.from_agent_vars(
|
||||||
|
default_skills=getattr(self, "_agent_default_skills", None),
|
||||||
|
skills=getattr(self, "_agent_skills", None),
|
||||||
|
),
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
self._setup_agent_runtime(
|
# Merge user-configured stages from ~/.hive/configuration.json
|
||||||
tools,
|
from framework.config import get_hive_config
|
||||||
tool_executor,
|
from framework.pipeline.registry import build_pipeline_from_config
|
||||||
accounts_prompt=accounts_prompt,
|
|
||||||
accounts_data=accounts_data,
|
hive_config = get_hive_config()
|
||||||
tool_provider_map=tool_provider_map,
|
user_stages_config = hive_config.get("pipeline", {}).get("stages", [])
|
||||||
|
if user_stages_config:
|
||||||
|
user_pipeline = build_pipeline_from_config(user_stages_config)
|
||||||
|
pipeline_stages.extend(user_pipeline.stages)
|
||||||
|
|
||||||
|
# Merge agent-level overrides from agent.json pipeline field
|
||||||
|
if agent_json.exists():
|
||||||
|
try:
|
||||||
|
agent_pipeline = (
|
||||||
|
_json.loads(agent_json.read_text(encoding="utf-8"))
|
||||||
|
.get("pipeline", {})
|
||||||
|
.get("stages", [])
|
||||||
|
)
|
||||||
|
if agent_pipeline:
|
||||||
|
agent_stages = build_pipeline_from_config(agent_pipeline)
|
||||||
|
pipeline_stages.extend(agent_stages.stages)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Create AgentHost directly (no wrapper)
|
||||||
|
from framework.host.execution_manager import EntryPointSpec
|
||||||
|
from framework.orchestrator.checkpoint_config import CheckpointConfig
|
||||||
|
from framework.tracker.runtime_log_store import RuntimeLogStore
|
||||||
|
|
||||||
|
self._agent_runtime = AgentHost(
|
||||||
|
graph=self.graph,
|
||||||
|
goal=self.goal,
|
||||||
|
storage_path=self._storage_path,
|
||||||
|
runtime_log_store=RuntimeLogStore(
|
||||||
|
base_path=self._storage_path / "runtime_logs",
|
||||||
|
),
|
||||||
|
checkpoint_config=CheckpointConfig(
|
||||||
|
enabled=True,
|
||||||
|
checkpoint_on_node_complete=True,
|
||||||
|
checkpoint_max_age_days=7,
|
||||||
|
async_checkpoint=True,
|
||||||
|
),
|
||||||
|
graph_id=self.graph.id or self.agent_path.name,
|
||||||
event_bus=event_bus,
|
event_bus=event_bus,
|
||||||
skills_manager_config=skills_manager_config,
|
pipeline_stages=pipeline_stages,
|
||||||
)
|
)
|
||||||
|
self._agent_runtime.register_entry_point(
|
||||||
|
EntryPointSpec(
|
||||||
|
id="default",
|
||||||
|
name="Default",
|
||||||
|
entry_node=self.graph.entry_node,
|
||||||
|
trigger_type="manual",
|
||||||
|
isolation_level="shared",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
self._agent_runtime.intro_message = self.intro_message
|
||||||
|
|
||||||
def _get_api_key_env_var(self, model: str) -> str | None:
|
def _get_api_key_env_var(self, model: str) -> str | None:
|
||||||
"""Get the environment variable name for the API key based on model name."""
|
"""Get the environment variable name for the API key based on model name."""
|
||||||
@@ -1833,83 +1785,6 @@ class AgentRunner:
|
|||||||
)
|
)
|
||||||
return model.lower().startswith(LOCAL_PREFIXES)
|
return model.lower().startswith(LOCAL_PREFIXES)
|
||||||
|
|
||||||
def _setup_agent_runtime(
|
|
||||||
self,
|
|
||||||
tools: list,
|
|
||||||
tool_executor: Callable | None,
|
|
||||||
accounts_prompt: str = "",
|
|
||||||
accounts_data: list[dict] | None = None,
|
|
||||||
tool_provider_map: dict[str, str] | None = None,
|
|
||||||
event_bus=None,
|
|
||||||
skills_catalog_prompt: str = "",
|
|
||||||
protocols_prompt: str = "",
|
|
||||||
skill_dirs: list[str] | None = None,
|
|
||||||
skills_manager_config=None,
|
|
||||||
) -> None:
|
|
||||||
"""Set up multi-entry-point execution using AgentRuntime."""
|
|
||||||
entry_points = []
|
|
||||||
|
|
||||||
# Always create a primary entry point for the graph's entry node.
|
|
||||||
# For multi-entry-point agents this ensures the primary path (e.g.
|
|
||||||
# user-facing rule setup) is reachable alongside async entry points.
|
|
||||||
if self.graph.entry_node:
|
|
||||||
entry_points.insert(
|
|
||||||
0,
|
|
||||||
EntryPointSpec(
|
|
||||||
id="default",
|
|
||||||
name="Default",
|
|
||||||
entry_node=self.graph.entry_node,
|
|
||||||
trigger_type="manual",
|
|
||||||
isolation_level="shared",
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
# Create AgentRuntime with all entry points
|
|
||||||
log_store = RuntimeLogStore(base_path=self._storage_path / "runtime_logs")
|
|
||||||
|
|
||||||
# Enable checkpointing by default for resumable sessions
|
|
||||||
from framework.graph.checkpoint_config import CheckpointConfig
|
|
||||||
|
|
||||||
checkpoint_config = CheckpointConfig(
|
|
||||||
enabled=True,
|
|
||||||
checkpoint_on_node_start=False, # Only checkpoint after nodes complete
|
|
||||||
checkpoint_on_node_complete=True,
|
|
||||||
checkpoint_max_age_days=7,
|
|
||||||
async_checkpoint=True, # Non-blocking
|
|
||||||
)
|
|
||||||
|
|
||||||
# Handle runtime_config - only pass through if it's actually an AgentRuntimeConfig.
|
|
||||||
# Agents may export a RuntimeConfig (LLM settings) or queen-generated custom classes
|
|
||||||
# that would crash AgentRuntime if passed through.
|
|
||||||
runtime_config = None
|
|
||||||
if self.runtime_config is not None:
|
|
||||||
from framework.runtime.agent_runtime import AgentRuntimeConfig
|
|
||||||
|
|
||||||
if isinstance(self.runtime_config, AgentRuntimeConfig):
|
|
||||||
runtime_config = self.runtime_config
|
|
||||||
|
|
||||||
self._agent_runtime = create_agent_runtime(
|
|
||||||
graph=self.graph,
|
|
||||||
goal=self.goal,
|
|
||||||
storage_path=self._storage_path,
|
|
||||||
entry_points=entry_points,
|
|
||||||
llm=self._llm,
|
|
||||||
tools=tools,
|
|
||||||
tool_executor=tool_executor,
|
|
||||||
runtime_log_store=log_store,
|
|
||||||
checkpoint_config=checkpoint_config,
|
|
||||||
config=runtime_config,
|
|
||||||
graph_id=self.graph.id or self.agent_path.name,
|
|
||||||
accounts_prompt=accounts_prompt,
|
|
||||||
accounts_data=accounts_data,
|
|
||||||
tool_provider_map=tool_provider_map,
|
|
||||||
event_bus=event_bus,
|
|
||||||
skills_manager_config=skills_manager_config,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Pass intro_message through for TUI display
|
|
||||||
self._agent_runtime.intro_message = self.intro_message
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# Execution modes
|
# Execution modes
|
||||||
#
|
#
|
||||||
@@ -1990,7 +1865,7 @@ class AgentRunner:
|
|||||||
sub_ids: list[str] = []
|
sub_ids: list[str] = []
|
||||||
|
|
||||||
if has_queen and sys.stdin.isatty():
|
if has_queen and sys.stdin.isatty():
|
||||||
from framework.runtime.event_bus import EventType
|
from framework.host.event_bus import EventType
|
||||||
|
|
||||||
runtime = self._agent_runtime
|
runtime = self._agent_runtime
|
||||||
|
|
||||||
@@ -2246,7 +2121,7 @@ class AgentRunner:
|
|||||||
except ImportError:
|
except ImportError:
|
||||||
# aden_tools not installed - fall back to direct check
|
# aden_tools not installed - fall back to direct check
|
||||||
has_llm_nodes = any(
|
has_llm_nodes = any(
|
||||||
node.node_type in ("event_loop", "gcu") for node in self.graph.nodes
|
node.node_type == "event_loop" for node in self.graph.nodes
|
||||||
)
|
)
|
||||||
if has_llm_nodes:
|
if has_llm_nodes:
|
||||||
api_key_env = self._get_api_key_env_var(self.model)
|
api_key_env = self._get_api_key_env_var(self.model)
|
||||||
@@ -2283,7 +2158,7 @@ class AgentRunner:
|
|||||||
# Run synchronous cleanup
|
# Run synchronous cleanup
|
||||||
self.cleanup()
|
self.cleanup()
|
||||||
|
|
||||||
async def __aenter__(self) -> "AgentRunner":
|
async def __aenter__(self) -> "AgentLoader":
|
||||||
"""Context manager entry."""
|
"""Context manager entry."""
|
||||||
self._setup()
|
self._setup()
|
||||||
if self._agent_runtime is not None:
|
if self._agent_runtime is not None:
|
||||||
@@ -19,7 +19,7 @@ def register_commands(subparsers: argparse._SubParsersAction) -> None:
|
|||||||
run_parser.add_argument(
|
run_parser.add_argument(
|
||||||
"agent_path",
|
"agent_path",
|
||||||
type=str,
|
type=str,
|
||||||
help="Path to agent folder (containing agent.json)",
|
help="Path to agent folder (containing agent.json or agent.py)",
|
||||||
)
|
)
|
||||||
run_parser.add_argument(
|
run_parser.add_argument(
|
||||||
"--input",
|
"--input",
|
||||||
@@ -87,7 +87,7 @@ def register_commands(subparsers: argparse._SubParsersAction) -> None:
|
|||||||
info_parser.add_argument(
|
info_parser.add_argument(
|
||||||
"agent_path",
|
"agent_path",
|
||||||
type=str,
|
type=str,
|
||||||
help="Path to agent folder (containing agent.json)",
|
help="Path to agent folder (containing agent.json or agent.py)",
|
||||||
)
|
)
|
||||||
info_parser.add_argument(
|
info_parser.add_argument(
|
||||||
"--json",
|
"--json",
|
||||||
@@ -105,7 +105,7 @@ def register_commands(subparsers: argparse._SubParsersAction) -> None:
|
|||||||
validate_parser.add_argument(
|
validate_parser.add_argument(
|
||||||
"agent_path",
|
"agent_path",
|
||||||
type=str,
|
type=str,
|
||||||
help="Path to agent folder (containing agent.json)",
|
help="Path to agent folder (containing agent.json or agent.py)",
|
||||||
)
|
)
|
||||||
validate_parser.set_defaults(func=cmd_validate)
|
validate_parser.set_defaults(func=cmd_validate)
|
||||||
|
|
||||||
@@ -310,7 +310,7 @@ def _prompt_before_start(agent_path: str, runner, model: str | None = None):
|
|||||||
Updated runner if user proceeds, None if user aborts.
|
Updated runner if user proceeds, None if user aborts.
|
||||||
"""
|
"""
|
||||||
from framework.credentials.setup import CredentialSetupSession
|
from framework.credentials.setup import CredentialSetupSession
|
||||||
from framework.runner import AgentRunner
|
from framework.loader import AgentLoader
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
print()
|
print()
|
||||||
@@ -328,7 +328,7 @@ def _prompt_before_start(agent_path: str, runner, model: str | None = None):
|
|||||||
if result.success:
|
if result.success:
|
||||||
# Reload runner with updated credentials
|
# Reload runner with updated credentials
|
||||||
try:
|
try:
|
||||||
runner = AgentRunner.load(agent_path, model=model)
|
runner = AgentLoader.load(agent_path, model=model)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error reloading agent: {e}")
|
print(f"Error reloading agent: {e}")
|
||||||
return None
|
return None
|
||||||
@@ -342,7 +342,7 @@ def cmd_run(args: argparse.Namespace) -> int:
|
|||||||
|
|
||||||
from framework.credentials.models import CredentialError
|
from framework.credentials.models import CredentialError
|
||||||
from framework.observability import configure_logging
|
from framework.observability import configure_logging
|
||||||
from framework.runner import AgentRunner
|
from framework.loader import AgentLoader
|
||||||
|
|
||||||
# Set logging level (quiet by default for cleaner output)
|
# Set logging level (quiet by default for cleaner output)
|
||||||
if args.quiet:
|
if args.quiet:
|
||||||
@@ -390,7 +390,7 @@ def cmd_run(args: argparse.Namespace) -> int:
|
|||||||
# Standard execution
|
# Standard execution
|
||||||
# AgentRunner handles credential setup interactively when stdin is a TTY.
|
# AgentRunner handles credential setup interactively when stdin is a TTY.
|
||||||
try:
|
try:
|
||||||
runner = AgentRunner.load(
|
runner = AgentLoader.load(
|
||||||
args.agent_path,
|
args.agent_path,
|
||||||
model=args.model,
|
model=args.model,
|
||||||
)
|
)
|
||||||
@@ -528,10 +528,10 @@ def cmd_run(args: argparse.Namespace) -> int:
|
|||||||
def cmd_info(args: argparse.Namespace) -> int:
|
def cmd_info(args: argparse.Namespace) -> int:
|
||||||
"""Show agent information."""
|
"""Show agent information."""
|
||||||
from framework.credentials.models import CredentialError
|
from framework.credentials.models import CredentialError
|
||||||
from framework.runner import AgentRunner
|
from framework.loader import AgentLoader
|
||||||
|
|
||||||
try:
|
try:
|
||||||
runner = AgentRunner.load(args.agent_path)
|
runner = AgentLoader.load(args.agent_path)
|
||||||
except CredentialError as e:
|
except CredentialError as e:
|
||||||
print(f"\n{e}", file=sys.stderr)
|
print(f"\n{e}", file=sys.stderr)
|
||||||
return 1
|
return 1
|
||||||
@@ -595,10 +595,10 @@ def cmd_info(args: argparse.Namespace) -> int:
|
|||||||
def cmd_validate(args: argparse.Namespace) -> int:
|
def cmd_validate(args: argparse.Namespace) -> int:
|
||||||
"""Validate an exported agent."""
|
"""Validate an exported agent."""
|
||||||
from framework.credentials.models import CredentialError
|
from framework.credentials.models import CredentialError
|
||||||
from framework.runner import AgentRunner
|
from framework.loader import AgentLoader
|
||||||
|
|
||||||
try:
|
try:
|
||||||
runner = AgentRunner.load(args.agent_path)
|
runner = AgentLoader.load(args.agent_path)
|
||||||
except CredentialError as e:
|
except CredentialError as e:
|
||||||
print(f"\n{e}", file=sys.stderr)
|
print(f"\n{e}", file=sys.stderr)
|
||||||
return 1
|
return 1
|
||||||
@@ -632,7 +632,7 @@ def cmd_validate(args: argparse.Namespace) -> int:
|
|||||||
|
|
||||||
def cmd_list(args: argparse.Namespace) -> int:
|
def cmd_list(args: argparse.Namespace) -> int:
|
||||||
"""List available agents."""
|
"""List available agents."""
|
||||||
from framework.runner import AgentRunner
|
from framework.loader import AgentLoader
|
||||||
|
|
||||||
directory = Path(args.directory)
|
directory = Path(args.directory)
|
||||||
if not directory.exists():
|
if not directory.exists():
|
||||||
@@ -644,7 +644,7 @@ def cmd_list(args: argparse.Namespace) -> int:
|
|||||||
for path in directory.iterdir():
|
for path in directory.iterdir():
|
||||||
if _is_valid_agent_dir(path):
|
if _is_valid_agent_dir(path):
|
||||||
try:
|
try:
|
||||||
runner = AgentRunner.load(path)
|
runner = AgentLoader.load(path)
|
||||||
info = runner.info()
|
info = runner.info()
|
||||||
agents.append(
|
agents.append(
|
||||||
{
|
{
|
||||||
@@ -686,7 +686,7 @@ def cmd_list(args: argparse.Namespace) -> int:
|
|||||||
|
|
||||||
def _interactive_approval(request):
|
def _interactive_approval(request):
|
||||||
"""Interactive approval callback for HITL mode."""
|
"""Interactive approval callback for HITL mode."""
|
||||||
from framework.graph import ApprovalDecision, ApprovalResult
|
from framework.orchestrator import ApprovalDecision, ApprovalResult
|
||||||
|
|
||||||
print()
|
print()
|
||||||
print("=" * 60)
|
print("=" * 60)
|
||||||
@@ -775,7 +775,7 @@ def cmd_shell(args: argparse.Namespace) -> int:
|
|||||||
|
|
||||||
from framework.credentials.models import CredentialError
|
from framework.credentials.models import CredentialError
|
||||||
from framework.observability import configure_logging
|
from framework.observability import configure_logging
|
||||||
from framework.runner import AgentRunner
|
from framework.loader import AgentLoader
|
||||||
|
|
||||||
configure_logging(level="INFO")
|
configure_logging(level="INFO")
|
||||||
|
|
||||||
@@ -789,7 +789,7 @@ def cmd_shell(args: argparse.Namespace) -> int:
|
|||||||
return 1
|
return 1
|
||||||
|
|
||||||
try:
|
try:
|
||||||
runner = AgentRunner.load(agent_path)
|
runner = AgentLoader.load(agent_path)
|
||||||
except CredentialError as e:
|
except CredentialError as e:
|
||||||
print(f"\n{e}", file=sys.stderr)
|
print(f"\n{e}", file=sys.stderr)
|
||||||
return 1
|
return 1
|
||||||
@@ -1004,17 +1004,35 @@ def _get_framework_agents_dir() -> Path:
|
|||||||
|
|
||||||
|
|
||||||
def _extract_python_agent_metadata(agent_path: Path) -> tuple[str, str]:
|
def _extract_python_agent_metadata(agent_path: Path) -> tuple[str, str]:
|
||||||
"""Extract name and description from a Python-based agent's config.py.
|
"""Extract name and description from an agent directory.
|
||||||
|
|
||||||
Uses AST parsing to safely extract values without executing code.
|
Checks agent.json first (declarative), then falls back to config.py
|
||||||
|
(legacy Python). Uses AST parsing for Python to avoid executing code.
|
||||||
Returns (name, description) tuple, with fallbacks if parsing fails.
|
Returns (name, description) tuple, with fallbacks if parsing fails.
|
||||||
"""
|
"""
|
||||||
import ast
|
import ast
|
||||||
|
|
||||||
config_path = agent_path / "config.py"
|
|
||||||
fallback_name = agent_path.name.replace("_", " ").title()
|
fallback_name = agent_path.name.replace("_", " ").title()
|
||||||
fallback_desc = "(Python-based agent)"
|
fallback_desc = "(Python-based agent)"
|
||||||
|
|
||||||
|
# Declarative agent: read from agent.json
|
||||||
|
agent_json = agent_path / "agent.json"
|
||||||
|
if agent_json.exists():
|
||||||
|
try:
|
||||||
|
import json
|
||||||
|
|
||||||
|
data = json.loads(agent_json.read_text(encoding="utf-8"))
|
||||||
|
if isinstance(data, dict):
|
||||||
|
name = data.get("name", fallback_name)
|
||||||
|
# Convert kebab-case to Title Case for display
|
||||||
|
if "-" in name and " " not in name:
|
||||||
|
name = name.replace("-", " ").title()
|
||||||
|
desc = data.get("description", fallback_desc)
|
||||||
|
return name, desc
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
config_path = agent_path / "config.py"
|
||||||
if not config_path.exists():
|
if not config_path.exists():
|
||||||
return fallback_name, fallback_desc
|
return fallback_name, fallback_desc
|
||||||
|
|
||||||
@@ -1083,7 +1101,7 @@ def _is_valid_agent_dir(path: Path) -> bool:
|
|||||||
|
|
||||||
|
|
||||||
def _has_agents(directory: Path) -> bool:
|
def _has_agents(directory: Path) -> bool:
|
||||||
"""Check if a directory contains any valid agents (folders with agent.json or agent.py)."""
|
"""Check if a directory contains any valid agents."""
|
||||||
if not directory.exists():
|
if not directory.exists():
|
||||||
return False
|
return False
|
||||||
return any(_is_valid_agent_dir(p) for p in directory.iterdir())
|
return any(_is_valid_agent_dir(p) for p in directory.iterdir())
|
||||||
@@ -14,7 +14,7 @@ from typing import Any, Literal
|
|||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
from framework.runner.mcp_errors import MCPToolNotFoundError
|
from framework.loader.mcp_errors import MCPToolNotFoundError
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
+1
-1
@@ -5,7 +5,7 @@ import threading
|
|||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
from framework.runner.mcp_client import MCPClient, MCPServerConfig
|
from framework.loader.mcp_client import MCPClient, MCPServerConfig
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -14,9 +14,9 @@ from typing import Any, Literal
|
|||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
from framework.runner.mcp_client import MCPClient, MCPServerConfig
|
from framework.loader.mcp_client import MCPClient, MCPServerConfig
|
||||||
from framework.runner.mcp_connection_manager import MCPConnectionManager
|
from framework.loader.mcp_connection_manager import MCPConnectionManager
|
||||||
from framework.runner.mcp_errors import (
|
from framework.loader.mcp_errors import (
|
||||||
MCPError,
|
MCPError,
|
||||||
MCPErrorCode,
|
MCPErrorCode,
|
||||||
MCPInstallError,
|
MCPInstallError,
|
||||||
+1
-1
@@ -28,7 +28,7 @@ from typing import Any
|
|||||||
|
|
||||||
def _get_registry(base_path: Path | None = None):
|
def _get_registry(base_path: Path | None = None):
|
||||||
"""Initialize and return an MCPRegistry instance."""
|
"""Initialize and return an MCPRegistry instance."""
|
||||||
from framework.runner.mcp_registry import MCPRegistry
|
from framework.loader.mcp_registry import MCPRegistry
|
||||||
|
|
||||||
registry = MCPRegistry(base_path=base_path)
|
registry = MCPRegistry(base_path=base_path)
|
||||||
registry.initialize()
|
registry.initialize()
|
||||||
+2
-2
@@ -11,8 +11,8 @@ from dataclasses import dataclass, field
|
|||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from framework.graph.edge import GraphSpec
|
from framework.orchestrator.edge import GraphSpec
|
||||||
from framework.graph.node import NodeSpec
|
from framework.orchestrator.node import NodeSpec
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -262,15 +262,21 @@ class ToolRegistry:
|
|||||||
is_error=False,
|
is_error=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
registry_ref = self
|
||||||
|
|
||||||
def executor(tool_use: ToolUse) -> ToolResult:
|
def executor(tool_use: ToolUse) -> ToolResult:
|
||||||
if tool_use.name not in self._tools:
|
# Check if credential files changed (lightweight dir listing).
|
||||||
|
# If new OAuth tokens appeared, restarts MCP servers to pick them up.
|
||||||
|
registry_ref.resync_mcp_servers_if_needed()
|
||||||
|
|
||||||
|
if tool_use.name not in registry_ref._tools:
|
||||||
return ToolResult(
|
return ToolResult(
|
||||||
tool_use_id=tool_use.id,
|
tool_use_id=tool_use.id,
|
||||||
content=json.dumps({"error": f"Unknown tool: {tool_use.name}"}),
|
content=json.dumps({"error": f"Unknown tool: {tool_use.name}"}),
|
||||||
is_error=True,
|
is_error=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
registered = self._tools[tool_use.name]
|
registered = registry_ref._tools[tool_use.name]
|
||||||
try:
|
try:
|
||||||
result = registered.executor(tool_use.input)
|
result = registered.executor(tool_use.input)
|
||||||
|
|
||||||
@@ -635,8 +641,8 @@ class ToolRegistry:
|
|||||||
Number of tools registered from this server
|
Number of tools registered from this server
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
from framework.runner.mcp_client import MCPClient, MCPServerConfig
|
from framework.loader.mcp_client import MCPClient, MCPServerConfig
|
||||||
from framework.runner.mcp_connection_manager import MCPConnectionManager
|
from framework.loader.mcp_connection_manager import MCPConnectionManager
|
||||||
|
|
||||||
# Build config object
|
# Build config object
|
||||||
config = MCPServerConfig(
|
config = MCPServerConfig(
|
||||||
@@ -883,7 +889,7 @@ class ToolRegistry:
|
|||||||
"""Re-run ``mcp_registry.json`` resolution and register servers (post-resync)."""
|
"""Re-run ``mcp_registry.json`` resolution and register servers (post-resync)."""
|
||||||
if self._mcp_registry_agent_path is None:
|
if self._mcp_registry_agent_path is None:
|
||||||
return
|
return
|
||||||
from framework.runner.mcp_registry import MCPRegistry
|
from framework.loader.mcp_registry import MCPRegistry
|
||||||
|
|
||||||
try:
|
try:
|
||||||
reg = MCPRegistry()
|
reg = MCPRegistry()
|
||||||
@@ -922,6 +928,11 @@ class ToolRegistry:
|
|||||||
clients and re-loads them so the new subprocess picks up the fresh
|
clients and re-loads them so the new subprocess picks up the fresh
|
||||||
credentials.
|
credentials.
|
||||||
|
|
||||||
|
Note: Individual credential TTL/refresh is handled by the MCP server
|
||||||
|
process internally -- it resolves tokens from the credential store
|
||||||
|
on every tool call, not at startup. This method only handles the case
|
||||||
|
where entirely new credential files appear.
|
||||||
|
|
||||||
Returns True if a resync was performed, False otherwise.
|
Returns True if a resync was performed, False otherwise.
|
||||||
"""
|
"""
|
||||||
if not self._mcp_clients or self._mcp_config_path is None:
|
if not self._mcp_clients or self._mcp_config_path is None:
|
||||||
@@ -975,7 +986,7 @@ class ToolRegistry:
|
|||||||
server_name = self._mcp_client_servers.get(client_id, client.config.name)
|
server_name = self._mcp_client_servers.get(client_id, client.config.name)
|
||||||
try:
|
try:
|
||||||
if client_id in self._mcp_managed_clients:
|
if client_id in self._mcp_managed_clients:
|
||||||
from framework.runner.mcp_connection_manager import MCPConnectionManager
|
from framework.loader.mcp_connection_manager import MCPConnectionManager
|
||||||
|
|
||||||
MCPConnectionManager.get_instance().release(server_name)
|
MCPConnectionManager.get_instance().release(server_name)
|
||||||
else:
|
else:
|
||||||
@@ -0,0 +1,27 @@
|
|||||||
|
"""Orchestrator layer -- how agents are composed via graphs.
|
||||||
|
|
||||||
|
Lazy imports to avoid circular dependencies with graph/event_loop/*.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def __getattr__(name: str):
|
||||||
|
if name in ("GraphContext",):
|
||||||
|
from framework.orchestrator.context import GraphContext
|
||||||
|
return GraphContext
|
||||||
|
if name in ("DEFAULT_MAX_TOKENS", "EdgeCondition", "EdgeSpec", "GraphSpec"):
|
||||||
|
from framework.orchestrator import edge as _e
|
||||||
|
return getattr(_e, name)
|
||||||
|
if name in ("Orchestrator", "ExecutionResult"):
|
||||||
|
from framework.orchestrator import orchestrator as _o
|
||||||
|
return getattr(_o, name)
|
||||||
|
if name in ("Constraint", "Goal", "GoalStatus", "SuccessCriterion"):
|
||||||
|
from framework.orchestrator import goal as _g
|
||||||
|
return getattr(_g, name)
|
||||||
|
if name in ("DataBuffer", "NodeContext", "NodeProtocol", "NodeResult", "NodeSpec"):
|
||||||
|
from framework.orchestrator import node as _n
|
||||||
|
return getattr(_n, name)
|
||||||
|
if name in ("NodeWorker", "Activation", "FanOutTag", "FanOutTracker",
|
||||||
|
"WorkerCompletion", "WorkerLifecycle"):
|
||||||
|
from framework.orchestrator import node_worker as _nw
|
||||||
|
return getattr(_nw, name)
|
||||||
|
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
||||||
@@ -16,7 +16,7 @@ from collections.abc import AsyncIterator
|
|||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from framework.runtime.event_bus import EventBus
|
from framework.host.event_bus import EventBus
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -13,10 +13,10 @@ import asyncio
|
|||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from framework.graph.edge import GraphSpec
|
from framework.orchestrator.edge import GraphSpec
|
||||||
from framework.graph.goal import Goal
|
from framework.orchestrator.goal import Goal
|
||||||
from framework.graph.node import DataBuffer, NodeContext, NodeProtocol, NodeSpec
|
from framework.orchestrator.node import DataBuffer, NodeContext, NodeProtocol, NodeSpec
|
||||||
from framework.runtime.core import Runtime
|
from framework.tracker.decision_tracker import DecisionTracker
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -26,7 +26,7 @@ class GraphContext:
|
|||||||
graph: GraphSpec
|
graph: GraphSpec
|
||||||
goal: Goal
|
goal: Goal
|
||||||
buffer: DataBuffer
|
buffer: DataBuffer
|
||||||
runtime: Runtime
|
runtime: DecisionTracker
|
||||||
llm: Any # LLMProvider
|
llm: Any # LLMProvider
|
||||||
tools: list[Any] # list[Tool]
|
tools: list[Any] # list[Tool]
|
||||||
tool_executor: Any # Callable
|
tool_executor: Any # Callable
|
||||||
@@ -106,7 +106,7 @@ def build_node_accounts_prompt(
|
|||||||
|
|
||||||
resolved = accounts_prompt
|
resolved = accounts_prompt
|
||||||
if accounts_data and tool_provider_map:
|
if accounts_data and tool_provider_map:
|
||||||
from framework.graph.prompting import build_accounts_prompt
|
from framework.orchestrator.prompting import build_accounts_prompt
|
||||||
|
|
||||||
filtered = build_accounts_prompt(
|
filtered = build_accounts_prompt(
|
||||||
accounts_data,
|
accounts_data,
|
||||||
@@ -125,11 +125,27 @@ def _resolve_available_tools(
|
|||||||
tools: list[Any],
|
tools: list[Any],
|
||||||
override_tools: list[Any] | None,
|
override_tools: list[Any] | None,
|
||||||
) -> list[Any]:
|
) -> list[Any]:
|
||||||
"""Select tools available to the current node."""
|
"""Select tools available to the current node.
|
||||||
|
|
||||||
|
Respects ``node_spec.tool_access_policy``:
|
||||||
|
- ``"all"`` -- all tools from the registry (no filtering).
|
||||||
|
- ``"explicit"`` -- only tools whose name appears in ``node_spec.tools``.
|
||||||
|
If the list is empty, **no tools** are given (default-deny).
|
||||||
|
- ``"none"`` -- no tools at all.
|
||||||
|
"""
|
||||||
|
|
||||||
if override_tools is not None:
|
if override_tools is not None:
|
||||||
return list(override_tools)
|
return list(override_tools)
|
||||||
|
|
||||||
|
policy = getattr(node_spec, "tool_access_policy", "explicit")
|
||||||
|
|
||||||
|
if policy == "none":
|
||||||
|
return []
|
||||||
|
|
||||||
|
if policy == "all":
|
||||||
|
return list(tools)
|
||||||
|
|
||||||
|
# "explicit" (default): only tools named in node_spec.tools.
|
||||||
if not node_spec.tools:
|
if not node_spec.tools:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
@@ -149,7 +165,7 @@ def _derive_input_data(buffer: DataBuffer, input_keys: list[str]) -> dict[str, A
|
|||||||
|
|
||||||
def build_node_context(
|
def build_node_context(
|
||||||
*,
|
*,
|
||||||
runtime: Runtime,
|
runtime: DecisionTracker,
|
||||||
node_spec: NodeSpec,
|
node_spec: NodeSpec,
|
||||||
buffer: DataBuffer,
|
buffer: DataBuffer,
|
||||||
goal: Goal,
|
goal: Goal,
|
||||||
@@ -234,9 +250,6 @@ def build_node_context(
|
|||||||
execution_id=execution_id,
|
execution_id=execution_id,
|
||||||
run_id=run_id,
|
run_id=run_id,
|
||||||
stream_id=stream_id,
|
stream_id=stream_id,
|
||||||
node_registry=node_registry or {},
|
|
||||||
all_tools=list(all_tools or tools),
|
|
||||||
shared_node_registry=shared_node_registry or {},
|
|
||||||
dynamic_tools_provider=dynamic_tools_provider,
|
dynamic_tools_provider=dynamic_tools_provider,
|
||||||
dynamic_prompt_provider=dynamic_prompt_provider,
|
dynamic_prompt_provider=dynamic_prompt_provider,
|
||||||
dynamic_memory_provider=dynamic_memory_provider,
|
dynamic_memory_provider=dynamic_memory_provider,
|
||||||
@@ -308,9 +321,6 @@ def build_node_context_from_graph_context(
|
|||||||
execution_id=gc.execution_id,
|
execution_id=gc.execution_id,
|
||||||
run_id=gc.run_id,
|
run_id=gc.run_id,
|
||||||
stream_id=gc.stream_id,
|
stream_id=gc.stream_id,
|
||||||
node_registry=node_registry or gc.node_spec_registry,
|
|
||||||
all_tools=gc.tools,
|
|
||||||
shared_node_registry=gc.node_registry,
|
|
||||||
dynamic_tools_provider=gc.dynamic_tools_provider,
|
dynamic_tools_provider=gc.dynamic_tools_provider,
|
||||||
dynamic_prompt_provider=gc.dynamic_prompt_provider,
|
dynamic_prompt_provider=gc.dynamic_prompt_provider,
|
||||||
dynamic_memory_provider=gc.dynamic_memory_provider,
|
dynamic_memory_provider=gc.dynamic_memory_provider,
|
||||||
+2
-2
@@ -6,10 +6,10 @@ import logging
|
|||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import TYPE_CHECKING, Any
|
from typing import TYPE_CHECKING, Any
|
||||||
|
|
||||||
from framework.graph.conversation import _try_extract_key
|
from framework.agent_loop.conversation import _try_extract_key
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from framework.graph.conversation import NodeConversation
|
from framework.agent_loop.conversation import NodeConversation
|
||||||
from framework.llm.provider import LLMProvider
|
from framework.llm.provider import LLMProvider
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
+1
-1
@@ -15,7 +15,7 @@ import logging
|
|||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from framework.graph.conversation import NodeConversation
|
from framework.agent_loop.conversation import NodeConversation
|
||||||
from framework.llm.provider import LLMProvider
|
from framework.llm.provider import LLMProvider
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@@ -29,7 +29,7 @@ from typing import Any
|
|||||||
|
|
||||||
from pydantic import BaseModel, Field, model_validator
|
from pydantic import BaseModel, Field, model_validator
|
||||||
|
|
||||||
from framework.graph.safe_eval import safe_eval
|
from framework.orchestrator.safe_eval import safe_eval
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -538,13 +538,6 @@ class GraphSpec(BaseModel):
|
|||||||
for edge in self.get_outgoing_edges(current):
|
for edge in self.get_outgoing_edges(current):
|
||||||
to_visit.append(edge.target)
|
to_visit.append(edge.target)
|
||||||
|
|
||||||
# Also mark sub-agents as reachable (they're invoked via delegate_to_sub_agent, not edges)
|
|
||||||
for node in self.nodes:
|
|
||||||
if node.id in reachable:
|
|
||||||
sub_agents = getattr(node, "sub_agents", []) or []
|
|
||||||
for sub_agent_id in sub_agents:
|
|
||||||
reachable.add(sub_agent_id)
|
|
||||||
|
|
||||||
for node in self.nodes:
|
for node in self.nodes:
|
||||||
if node.id not in reachable:
|
if node.id not in reachable:
|
||||||
# Skip if node is a pause node or entry point target
|
# Skip if node is a pause node or entry point target
|
||||||
@@ -583,48 +576,4 @@ class GraphSpec(BaseModel):
|
|||||||
else:
|
else:
|
||||||
seen_keys[key] = node_id
|
seen_keys[key] = node_id
|
||||||
|
|
||||||
# GCU nodes must only be used as subagents
|
|
||||||
gcu_node_ids = {n.id for n in self.nodes if n.node_type == "gcu"}
|
|
||||||
if gcu_node_ids:
|
|
||||||
# GCU nodes must not be entry nodes
|
|
||||||
if self.entry_node in gcu_node_ids:
|
|
||||||
errors.append(
|
|
||||||
f"GCU node '{self.entry_node}' is used as entry node. "
|
|
||||||
"GCU nodes must only be used as subagents via delegate_to_sub_agent()."
|
|
||||||
)
|
|
||||||
|
|
||||||
# GCU nodes must not be terminal nodes
|
|
||||||
for term in self.terminal_nodes:
|
|
||||||
if term in gcu_node_ids:
|
|
||||||
errors.append(
|
|
||||||
f"GCU node '{term}' is used as terminal node. "
|
|
||||||
"GCU nodes must only be used as subagents."
|
|
||||||
)
|
|
||||||
|
|
||||||
# GCU nodes must not be connected via edges
|
|
||||||
for edge in self.edges:
|
|
||||||
if edge.source in gcu_node_ids:
|
|
||||||
errors.append(
|
|
||||||
f"GCU node '{edge.source}' is used as edge source (edge '{edge.id}'). "
|
|
||||||
"GCU nodes must only be used as subagents, not connected via edges."
|
|
||||||
)
|
|
||||||
if edge.target in gcu_node_ids:
|
|
||||||
errors.append(
|
|
||||||
f"GCU node '{edge.target}' is used as edge target (edge '{edge.id}'). "
|
|
||||||
"GCU nodes must only be used as subagents, not connected via edges."
|
|
||||||
)
|
|
||||||
|
|
||||||
# GCU nodes must be referenced in at least one parent's sub_agents
|
|
||||||
referenced_subagents = set()
|
|
||||||
for node in self.nodes:
|
|
||||||
for sa_id in node.sub_agents or []:
|
|
||||||
referenced_subagents.add(sa_id)
|
|
||||||
|
|
||||||
orphaned = gcu_node_ids - referenced_subagents
|
|
||||||
for nid in orphaned:
|
|
||||||
errors.append(
|
|
||||||
f"GCU node '{nid}' is not referenced in any node's sub_agents list. "
|
|
||||||
"GCU nodes must be declared as subagents of a parent node."
|
|
||||||
)
|
|
||||||
|
|
||||||
return {"errors": errors, "warnings": warnings}
|
return {"errors": errors, "warnings": warnings}
|
||||||
@@ -1,34 +1,14 @@
|
|||||||
"""GCU (browser automation) node type constants.
|
"""Browser automation best-practices prompt.
|
||||||
|
|
||||||
A ``gcu`` node is an ``event_loop`` node with two automatic enhancements:
|
This module provides ``GCU_BROWSER_SYSTEM_PROMPT`` -- a canonical set of
|
||||||
1. A canonical browser best-practices system prompt is prepended.
|
browser automation guidelines that can be included in any node's system
|
||||||
2. All tools from the GCU MCP server are auto-included.
|
prompt that uses browser tools from the gcu-tools MCP server.
|
||||||
|
|
||||||
No new ``NodeProtocol`` subclass — the ``gcu`` type is purely a declarative
|
Browser tools are registered via the global MCP registry (gcu-tools).
|
||||||
signal processed by the runner and executor at setup time.
|
Nodes that need browser access declare ``tools: {policy: "all"}`` in their
|
||||||
|
agent.json config.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# MCP server identity
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
GCU_SERVER_NAME = "gcu-tools"
|
|
||||||
"""Name used to identify the GCU MCP server in ``mcp_servers.json``."""
|
|
||||||
|
|
||||||
GCU_MCP_SERVER_CONFIG: dict = {
|
|
||||||
"name": GCU_SERVER_NAME,
|
|
||||||
"transport": "stdio",
|
|
||||||
"command": "uv",
|
|
||||||
"args": ["run", "python", "-m", "gcu.server", "--stdio"],
|
|
||||||
"cwd": "../../tools",
|
|
||||||
"description": "GCU tools for browser automation",
|
|
||||||
}
|
|
||||||
"""Default stdio config for the GCU MCP server (relative to exports/<agent>/)."""
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# Browser best-practices system prompt
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
GCU_BROWSER_SYSTEM_PROMPT = """\
|
GCU_BROWSER_SYSTEM_PROMPT = """\
|
||||||
# Browser Automation Best Practices
|
# Browser Automation Best Practices
|
||||||
|
|
||||||
@@ -25,7 +25,7 @@ from typing import Any
|
|||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from framework.llm.provider import LLMProvider, Tool
|
from framework.llm.provider import LLMProvider, Tool
|
||||||
from framework.runtime.core import Runtime
|
from framework.tracker.decision_tracker import DecisionTracker
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -144,15 +144,19 @@ class NodeSpec(BaseModel):
|
|||||||
# For LLM nodes
|
# For LLM nodes
|
||||||
system_prompt: str | None = Field(default=None, description="System prompt for LLM nodes")
|
system_prompt: str | None = Field(default=None, description="System prompt for LLM nodes")
|
||||||
tools: list[str] = Field(default_factory=list, description="Tool names this node can use")
|
tools: list[str] = Field(default_factory=list, description="Tool names this node can use")
|
||||||
|
tool_access_policy: str = Field(
|
||||||
|
default="explicit",
|
||||||
|
description=(
|
||||||
|
"Tool access policy for this node. "
|
||||||
|
"'all' = all tools from registry, "
|
||||||
|
"'explicit' = only tools listed in `tools` (default, recommended), "
|
||||||
|
"'none' = no tools at all."
|
||||||
|
),
|
||||||
|
)
|
||||||
model: str | None = Field(
|
model: str | None = Field(
|
||||||
default=None, description="Specific model to use (defaults to graph default)"
|
default=None, description="Specific model to use (defaults to graph default)"
|
||||||
)
|
)
|
||||||
|
|
||||||
# For subagent delegation
|
|
||||||
sub_agents: list[str] = Field(
|
|
||||||
default_factory=list,
|
|
||||||
description="Node IDs that can be invoked as subagents from this node",
|
|
||||||
)
|
|
||||||
# For function nodes
|
# For function nodes
|
||||||
function: str | None = Field(
|
function: str | None = Field(
|
||||||
default=None, description="Function name or path for function nodes"
|
default=None, description="Function name or path for function nodes"
|
||||||
@@ -459,7 +463,7 @@ class NodeContext:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
# Core runtime
|
# Core runtime
|
||||||
runtime: Runtime
|
runtime: DecisionTracker
|
||||||
|
|
||||||
# Node identity
|
# Node identity
|
||||||
node_id: str
|
node_id: str
|
||||||
@@ -526,20 +530,6 @@ class NodeContext:
|
|||||||
# Falls back to node_id when not set (legacy / standalone executor).
|
# Falls back to node_id when not set (legacy / standalone executor).
|
||||||
stream_id: str = ""
|
stream_id: str = ""
|
||||||
|
|
||||||
# Subagent mode
|
|
||||||
is_subagent_mode: bool = False # True when running as a subagent (prevents nested delegation)
|
|
||||||
report_callback: Any = None # async (message: str, data: dict | None) -> None
|
|
||||||
node_registry: dict[str, "NodeSpec"] = field(default_factory=dict) # For subagent lookup
|
|
||||||
|
|
||||||
# Full tool catalog (unfiltered) — used by _execute_subagent to resolve
|
|
||||||
# subagent tools that aren't in the parent node's filtered available_tools.
|
|
||||||
all_tools: list[Tool] = field(default_factory=list)
|
|
||||||
|
|
||||||
# Shared reference to the executor's node_registry — used by subagent
|
|
||||||
# escalation (_EscalationReceiver) to register temporary receivers that
|
|
||||||
# the inject_input() routing chain can find.
|
|
||||||
shared_node_registry: dict[str, Any] = field(default_factory=dict)
|
|
||||||
|
|
||||||
# Dynamic tool provider — when set, EventLoopNode rebuilds the tool
|
# Dynamic tool provider — when set, EventLoopNode rebuilds the tool
|
||||||
# list from this callback at the start of each iteration. Used by
|
# list from this callback at the start of each iteration. Used by
|
||||||
# the queen to switch between building-mode and running-mode tools.
|
# the queen to switch between building-mode and running-mode tools.
|
||||||
@@ -19,15 +19,15 @@ from dataclasses import dataclass, field
|
|||||||
from enum import StrEnum
|
from enum import StrEnum
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from framework.graph.context import GraphContext, build_node_context_from_graph_context
|
from framework.orchestrator.context import GraphContext, build_node_context_from_graph_context
|
||||||
from framework.graph.edge import EdgeCondition, EdgeSpec
|
from framework.orchestrator.edge import EdgeCondition, EdgeSpec
|
||||||
from framework.graph.node import (
|
from framework.orchestrator.node import (
|
||||||
NodeContext,
|
NodeContext,
|
||||||
NodeProtocol,
|
NodeProtocol,
|
||||||
NodeResult,
|
NodeResult,
|
||||||
NodeSpec,
|
NodeSpec,
|
||||||
)
|
)
|
||||||
from framework.graph.validator import OutputValidator
|
from framework.orchestrator.validator import OutputValidator
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -109,7 +109,7 @@ class RetryState:
|
|||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
class WorkerAgent:
|
class NodeWorker:
|
||||||
"""First-class autonomous worker for one node in the graph.
|
"""First-class autonomous worker for one node in the graph.
|
||||||
|
|
||||||
Lifecycle:
|
Lifecycle:
|
||||||
@@ -355,7 +355,7 @@ class WorkerAgent:
|
|||||||
# Only skip retries for actual EventLoopNode instances (they handle
|
# Only skip retries for actual EventLoopNode instances (they handle
|
||||||
# retries internally). Custom NodeProtocol impls registered via
|
# retries internally). Custom NodeProtocol impls registered via
|
||||||
# register_node should be retried by the executor.
|
# register_node should be retried by the executor.
|
||||||
from framework.graph.event_loop_node import EventLoopNode as _ELN
|
from framework.agent_loop.agent_loop import AgentLoop as _ELN
|
||||||
|
|
||||||
if isinstance(node_impl, _ELN):
|
if isinstance(node_impl, _ELN):
|
||||||
max_retries = 0
|
max_retries = 0
|
||||||
@@ -603,10 +603,10 @@ class WorkerAgent:
|
|||||||
return self._node_impl
|
return self._node_impl
|
||||||
|
|
||||||
# Auto-create EventLoopNode
|
# Auto-create EventLoopNode
|
||||||
if self.node_spec.node_type in ("event_loop", "gcu"):
|
if self.node_spec.node_type == "event_loop":
|
||||||
from framework.graph.event_loop.types import LoopConfig
|
from framework.agent_loop.internals.types import LoopConfig
|
||||||
from framework.graph.event_loop_node import EventLoopNode
|
from framework.agent_loop.agent_loop import AgentLoop
|
||||||
from framework.graph.node import warn_if_deprecated_client_facing
|
from framework.orchestrator.node import warn_if_deprecated_client_facing
|
||||||
|
|
||||||
conv_store = None
|
conv_store = None
|
||||||
if gc.storage_path:
|
if gc.storage_path:
|
||||||
@@ -619,7 +619,7 @@ class WorkerAgent:
|
|||||||
warn_if_deprecated_client_facing(self.node_spec)
|
warn_if_deprecated_client_facing(self.node_spec)
|
||||||
default_max_iter = 100 if self.node_spec.supports_direct_user_io() else 50
|
default_max_iter = 100 if self.node_spec.supports_direct_user_io() else 50
|
||||||
|
|
||||||
node = EventLoopNode(
|
node = AgentLoop(
|
||||||
event_bus=gc.event_bus,
|
event_bus=gc.event_bus,
|
||||||
judge=None,
|
judge=None,
|
||||||
config=LoopConfig(
|
config=LoopConfig(
|
||||||
@@ -734,7 +734,7 @@ class WorkerAgent:
|
|||||||
if not next_spec or next_spec.node_type != "event_loop":
|
if not next_spec or next_spec.node_type != "event_loop":
|
||||||
return
|
return
|
||||||
|
|
||||||
from framework.graph.prompting import (
|
from framework.orchestrator.prompting import (
|
||||||
TransitionSpec,
|
TransitionSpec,
|
||||||
build_narrative,
|
build_narrative,
|
||||||
build_system_prompt_for_node_context,
|
build_system_prompt_for_node_context,
|
||||||
@@ -16,21 +16,21 @@ from dataclasses import dataclass, field
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from framework.graph.checkpoint_config import CheckpointConfig
|
from framework.orchestrator.checkpoint_config import CheckpointConfig
|
||||||
from framework.graph.context import GraphContext, build_node_context
|
from framework.orchestrator.context import GraphContext, build_node_context
|
||||||
from framework.graph.conversation import LEGACY_RUN_ID
|
from framework.agent_loop.conversation import LEGACY_RUN_ID
|
||||||
from framework.graph.edge import EdgeCondition, EdgeSpec, GraphSpec
|
from framework.orchestrator.edge import EdgeCondition, EdgeSpec, GraphSpec
|
||||||
from framework.graph.goal import Goal
|
from framework.orchestrator.goal import Goal
|
||||||
from framework.graph.node import (
|
from framework.orchestrator.node import (
|
||||||
DataBuffer,
|
DataBuffer,
|
||||||
NodeProtocol,
|
NodeProtocol,
|
||||||
NodeResult,
|
NodeResult,
|
||||||
NodeSpec,
|
NodeSpec,
|
||||||
)
|
)
|
||||||
from framework.graph.validator import OutputValidator
|
from framework.orchestrator.validator import OutputValidator
|
||||||
from framework.llm.provider import LLMProvider, Tool
|
from framework.llm.provider import LLMProvider, Tool
|
||||||
from framework.observability import set_trace_context
|
from framework.observability import set_trace_context
|
||||||
from framework.runtime.core import Runtime
|
from framework.tracker.decision_tracker import DecisionTracker
|
||||||
from framework.schemas.checkpoint import Checkpoint
|
from framework.schemas.checkpoint import Checkpoint
|
||||||
from framework.storage.checkpoint_store import CheckpointStore
|
from framework.storage.checkpoint_store import CheckpointStore
|
||||||
from framework.utils.io import atomic_write
|
from framework.utils.io import atomic_write
|
||||||
@@ -112,7 +112,7 @@ class ParallelExecutionConfig:
|
|||||||
branch_timeout_seconds: float = 300.0
|
branch_timeout_seconds: float = 300.0
|
||||||
|
|
||||||
|
|
||||||
class GraphExecutor:
|
class Orchestrator:
|
||||||
"""
|
"""
|
||||||
Executes agent graphs.
|
Executes agent graphs.
|
||||||
|
|
||||||
@@ -133,7 +133,7 @@ class GraphExecutor:
|
|||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
runtime: Runtime,
|
runtime: DecisionTracker,
|
||||||
llm: LLMProvider | None = None,
|
llm: LLMProvider | None = None,
|
||||||
tools: list[Tool] | None = None,
|
tools: list[Tool] | None = None,
|
||||||
tool_executor: Callable | None = None,
|
tool_executor: Callable | None = None,
|
||||||
@@ -165,7 +165,7 @@ class GraphExecutor:
|
|||||||
Initialize the executor.
|
Initialize the executor.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
runtime: Runtime for decision logging
|
runtime: DecisionTracker for decision logging
|
||||||
llm: LLM provider for LLM nodes
|
llm: LLM provider for LLM nodes
|
||||||
tools: Available tools
|
tools: Available tools
|
||||||
tool_executor: Function to execute tools
|
tool_executor: Function to execute tools
|
||||||
@@ -202,7 +202,7 @@ class GraphExecutor:
|
|||||||
self.validator = OutputValidator()
|
self.validator = OutputValidator()
|
||||||
self.logger = logging.getLogger(__name__)
|
self.logger = logging.getLogger(__name__)
|
||||||
self.logger.debug(
|
self.logger.debug(
|
||||||
"[GraphExecutor.__init__] Created with"
|
"[Orchestrator.__init__] Created with"
|
||||||
" stream_id=%s, execution_id=%s,"
|
" stream_id=%s, execution_id=%s,"
|
||||||
" initial node_registry keys: %s",
|
" initial node_registry keys: %s",
|
||||||
stream_id,
|
stream_id,
|
||||||
@@ -361,8 +361,8 @@ class GraphExecutor:
|
|||||||
|
|
||||||
Uses the same recursive binary-search splitting as EventLoopNode.
|
Uses the same recursive binary-search splitting as EventLoopNode.
|
||||||
"""
|
"""
|
||||||
from framework.graph.conversation import extract_tool_call_history
|
from framework.agent_loop.conversation import extract_tool_call_history
|
||||||
from framework.graph.event_loop_node import _is_context_too_large_error
|
from framework.agent_loop.agent_loop import _is_context_too_large_error
|
||||||
|
|
||||||
if _depth > self._PHASE_LLM_MAX_DEPTH:
|
if _depth > self._PHASE_LLM_MAX_DEPTH:
|
||||||
raise RuntimeError("Phase LLM compaction recursion limit")
|
raise RuntimeError("Phase LLM compaction recursion limit")
|
||||||
@@ -690,7 +690,7 @@ class GraphExecutor:
|
|||||||
# and spillover files share the same session-scoped directory.
|
# and spillover files share the same session-scoped directory.
|
||||||
_ctx_token = None
|
_ctx_token = None
|
||||||
if self._storage_path:
|
if self._storage_path:
|
||||||
from framework.runner.tool_registry import ToolRegistry
|
from framework.loader.tool_registry import ToolRegistry
|
||||||
|
|
||||||
_ctx_token = ToolRegistry.set_execution_context(
|
_ctx_token = ToolRegistry.set_execution_context(
|
||||||
data_dir=str(self._storage_path / "data"),
|
data_dir=str(self._storage_path / "data"),
|
||||||
@@ -712,13 +712,12 @@ class GraphExecutor:
|
|||||||
|
|
||||||
finally:
|
finally:
|
||||||
if _ctx_token is not None:
|
if _ctx_token is not None:
|
||||||
from framework.runner.tool_registry import ToolRegistry
|
from framework.loader.tool_registry import ToolRegistry
|
||||||
|
|
||||||
ToolRegistry.reset_execution_context(_ctx_token)
|
ToolRegistry.reset_execution_context(_ctx_token)
|
||||||
|
|
||||||
VALID_NODE_TYPES = {
|
VALID_NODE_TYPES = {
|
||||||
"event_loop",
|
"event_loop",
|
||||||
"gcu",
|
|
||||||
}
|
}
|
||||||
# Node types removed in v0.5 — provide migration guidance
|
# Node types removed in v0.5 — provide migration guidance
|
||||||
REMOVED_NODE_TYPES = {
|
REMOVED_NODE_TYPES = {
|
||||||
@@ -736,11 +735,11 @@ class GraphExecutor:
|
|||||||
# Check registry first
|
# Check registry first
|
||||||
if node_spec.id in self.node_registry:
|
if node_spec.id in self.node_registry:
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"[GraphExecutor._get_node_implementation] Found node '%s' in registry", node_spec.id
|
"[Orchestrator._get_node_implementation] Found node '%s' in registry", node_spec.id
|
||||||
)
|
)
|
||||||
return self.node_registry[node_spec.id]
|
return self.node_registry[node_spec.id]
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"[GraphExecutor._get_node_implementation]"
|
"[Orchestrator._get_node_implementation]"
|
||||||
" Node '%s' not in registry (keys: %s),"
|
" Node '%s' not in registry (keys: %s),"
|
||||||
" creating new",
|
" creating new",
|
||||||
node_spec.id,
|
node_spec.id,
|
||||||
@@ -764,10 +763,10 @@ class GraphExecutor:
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Create based on type
|
# Create based on type
|
||||||
if node_spec.node_type in ("event_loop", "gcu"):
|
if node_spec.node_type == "event_loop":
|
||||||
# Auto-create EventLoopNode with sensible defaults.
|
# Auto-create EventLoopNode with sensible defaults.
|
||||||
# Custom configs can still be pre-registered via node_registry.
|
# Custom configs can still be pre-registered via node_registry.
|
||||||
from framework.graph.event_loop_node import EventLoopNode, LoopConfig
|
from framework.agent_loop.agent_loop import AgentLoop, LoopConfig
|
||||||
|
|
||||||
# Create a FileConversationStore if a storage path is available
|
# Create a FileConversationStore if a storage path is available
|
||||||
conv_store = None
|
conv_store = None
|
||||||
@@ -787,13 +786,13 @@ class GraphExecutor:
|
|||||||
if self._storage_path:
|
if self._storage_path:
|
||||||
spillover = str(self._storage_path / "data")
|
spillover = str(self._storage_path / "data")
|
||||||
|
|
||||||
from framework.graph.node import warn_if_deprecated_client_facing
|
from framework.orchestrator.node import warn_if_deprecated_client_facing
|
||||||
|
|
||||||
warn_if_deprecated_client_facing(node_spec)
|
warn_if_deprecated_client_facing(node_spec)
|
||||||
|
|
||||||
lc = self._loop_config
|
lc = self._loop_config
|
||||||
default_max_iter = 100 if node_spec.supports_direct_user_io() else 50
|
default_max_iter = 100 if node_spec.supports_direct_user_io() else 50
|
||||||
node = EventLoopNode(
|
node = AgentLoop(
|
||||||
event_bus=self._event_bus,
|
event_bus=self._event_bus,
|
||||||
judge=None, # implicit judge: accept when output_keys are filled
|
judge=None, # implicit judge: accept when output_keys are filled
|
||||||
config=LoopConfig(
|
config=LoopConfig(
|
||||||
@@ -812,7 +811,7 @@ class GraphExecutor:
|
|||||||
# Cache so inject_event() is reachable for queen interaction and escalation routing
|
# Cache so inject_event() is reachable for queen interaction and escalation routing
|
||||||
self.node_registry[node_spec.id] = node
|
self.node_registry[node_spec.id] = node
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"[GraphExecutor._get_node_implementation]"
|
"[Orchestrator._get_node_implementation]"
|
||||||
" Cached node '%s' in node_registry,"
|
" Cached node '%s' in node_registry,"
|
||||||
" registry now has keys: %s",
|
" registry now has keys: %s",
|
||||||
node_spec.id,
|
node_spec.id,
|
||||||
@@ -998,10 +997,10 @@ class GraphExecutor:
|
|||||||
branch_impl = self._get_node_implementation(node_spec, graph.cleanup_llm_model)
|
branch_impl = self._get_node_implementation(node_spec, graph.cleanup_llm_model)
|
||||||
|
|
||||||
effective_max_retries = node_spec.max_retries
|
effective_max_retries = node_spec.max_retries
|
||||||
# Only override for actual EventLoopNode instances, not custom NodeProtocol impls
|
# Only override for actual AgentLoop instances, not custom NodeProtocol impls
|
||||||
from framework.graph.event_loop_node import EventLoopNode
|
from framework.agent_loop.agent_loop import AgentLoop as _AgentLoop # noqa: F811
|
||||||
|
|
||||||
if isinstance(branch_impl, EventLoopNode) and effective_max_retries > 1:
|
if isinstance(branch_impl, _AgentLoop) and effective_max_retries > 1:
|
||||||
self.logger.warning(
|
self.logger.warning(
|
||||||
f"EventLoopNode '{node_spec.id}' has "
|
f"EventLoopNode '{node_spec.id}' has "
|
||||||
f"max_retries={effective_max_retries}. Overriding "
|
f"max_retries={effective_max_retries}. Overriding "
|
||||||
@@ -1042,9 +1041,6 @@ class GraphExecutor:
|
|||||||
execution_id=self._execution_id,
|
execution_id=self._execution_id,
|
||||||
run_id=self._run_id,
|
run_id=self._run_id,
|
||||||
stream_id=self._stream_id,
|
stream_id=self._stream_id,
|
||||||
node_registry=node_registry,
|
|
||||||
all_tools=self.tools,
|
|
||||||
shared_node_registry=self.node_registry,
|
|
||||||
dynamic_tools_provider=self.dynamic_tools_provider,
|
dynamic_tools_provider=self.dynamic_tools_provider,
|
||||||
dynamic_prompt_provider=self.dynamic_prompt_provider,
|
dynamic_prompt_provider=self.dynamic_prompt_provider,
|
||||||
dynamic_memory_provider=self.dynamic_memory_provider,
|
dynamic_memory_provider=self.dynamic_memory_provider,
|
||||||
@@ -1293,14 +1289,14 @@ class GraphExecutor:
|
|||||||
Replaces the imperative while-loop with autonomous workers that
|
Replaces the imperative while-loop with autonomous workers that
|
||||||
self-activate based on edge conditions and fan-out tracking.
|
self-activate based on edge conditions and fan-out tracking.
|
||||||
"""
|
"""
|
||||||
from framework.graph.worker_agent import (
|
from framework.orchestrator.node_worker import (
|
||||||
Activation,
|
Activation,
|
||||||
FanOutTag,
|
FanOutTag,
|
||||||
WorkerAgent,
|
NodeWorker,
|
||||||
WorkerCompletion,
|
WorkerCompletion,
|
||||||
WorkerLifecycle,
|
WorkerLifecycle,
|
||||||
)
|
)
|
||||||
from framework.runtime.event_bus import AgentEvent, EventType
|
from framework.host.event_bus import AgentEvent, EventType
|
||||||
|
|
||||||
# Build shared graph context
|
# Build shared graph context
|
||||||
gc = GraphContext(
|
gc = GraphContext(
|
||||||
@@ -1339,9 +1335,9 @@ class GraphExecutor:
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Create one WorkerAgent per node
|
# Create one WorkerAgent per node
|
||||||
workers: dict[str, WorkerAgent] = {}
|
workers: dict[str, NodeWorker] = {}
|
||||||
for node_spec in graph.nodes:
|
for node_spec in graph.nodes:
|
||||||
workers[node_spec.id] = WorkerAgent(node_spec=node_spec, graph_context=gc)
|
workers[node_spec.id] = NodeWorker(node_spec=node_spec, graph_context=gc)
|
||||||
|
|
||||||
# Identify entry workers (graph entry node, not based on edge count)
|
# Identify entry workers (graph entry node, not based on edge count)
|
||||||
# A node can be the entry point AND have incoming feedback edges.
|
# A node can be the entry point AND have incoming feedback edges.
|
||||||
@@ -1442,7 +1438,7 @@ class GraphExecutor:
|
|||||||
|
|
||||||
def _route_activation(
|
def _route_activation(
|
||||||
activation: Activation,
|
activation: Activation,
|
||||||
workers_map: dict[str, WorkerAgent],
|
workers_map: dict[str, NodeWorker],
|
||||||
pending_tasks_map: dict[str, asyncio.Task],
|
pending_tasks_map: dict[str, asyncio.Task],
|
||||||
*,
|
*,
|
||||||
has_event_subscription: bool,
|
has_event_subscription: bool,
|
||||||
+4
-5
@@ -9,7 +9,7 @@ import json
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
from framework.graph.prompting import (
|
from framework.orchestrator.prompting import (
|
||||||
EXECUTION_SCOPE_PREAMBLE,
|
EXECUTION_SCOPE_PREAMBLE,
|
||||||
TransitionSpec,
|
TransitionSpec,
|
||||||
build_accounts_prompt,
|
build_accounts_prompt,
|
||||||
@@ -19,7 +19,7 @@ from framework.graph.prompting import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from framework.graph.node import DataBuffer, NodeSpec
|
from framework.orchestrator.node import DataBuffer, NodeSpec
|
||||||
|
|
||||||
|
|
||||||
_with_datetime = stamp_prompt_datetime
|
_with_datetime = stamp_prompt_datetime
|
||||||
@@ -36,7 +36,7 @@ def compose_system_prompt(
|
|||||||
node_type_preamble: str | None = None,
|
node_type_preamble: str | None = None,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Compatibility wrapper for the legacy function signature."""
|
"""Compatibility wrapper for the legacy function signature."""
|
||||||
from framework.graph.prompting import NodePromptSpec
|
from framework.orchestrator.prompting import NodePromptSpec
|
||||||
|
|
||||||
spec = NodePromptSpec(
|
spec = NodePromptSpec(
|
||||||
identity_prompt=identity_prompt or "",
|
identity_prompt=identity_prompt or "",
|
||||||
@@ -66,7 +66,6 @@ def compose_system_prompt(
|
|||||||
protocols_prompt=spec.protocols_prompt,
|
protocols_prompt=spec.protocols_prompt,
|
||||||
node_type=spec.node_type,
|
node_type=spec.node_type,
|
||||||
output_keys=spec.output_keys,
|
output_keys=spec.output_keys,
|
||||||
is_subagent_mode=spec.is_subagent_mode,
|
|
||||||
)
|
)
|
||||||
return build_system_prompt(spec)
|
return build_system_prompt(spec)
|
||||||
|
|
||||||
@@ -135,7 +134,7 @@ def build_transition_marker(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
from framework.graph.prompting import build_transition_message # noqa: E402
|
from framework.orchestrator.prompting import build_transition_message # noqa: E402
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"EXECUTION_SCOPE_PREAMBLE",
|
"EXECUTION_SCOPE_PREAMBLE",
|
||||||
@@ -12,8 +12,8 @@ from datetime import datetime
|
|||||||
from typing import TYPE_CHECKING, Any
|
from typing import TYPE_CHECKING, Any
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from framework.graph.edge import GraphSpec
|
from framework.orchestrator.edge import GraphSpec
|
||||||
from framework.graph.node import DataBuffer
|
from framework.orchestrator.node import DataBuffer
|
||||||
|
|
||||||
|
|
||||||
# Injected into every worker node's system prompt so the LLM understands
|
# Injected into every worker node's system prompt so the LLM understands
|
||||||
@@ -40,7 +40,6 @@ class NodePromptSpec:
|
|||||||
memory_prompt: str = ""
|
memory_prompt: str = ""
|
||||||
node_type: str = "event_loop"
|
node_type: str = "event_loop"
|
||||||
output_keys: tuple[str, ...] = ()
|
output_keys: tuple[str, ...] = ()
|
||||||
is_subagent_mode: bool = False
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
@@ -165,7 +164,6 @@ def build_prompt_spec_from_node_context(
|
|||||||
memory_prompt=resolved_memory_prompt,
|
memory_prompt=resolved_memory_prompt,
|
||||||
node_type=ctx.node_spec.node_type,
|
node_type=ctx.node_spec.node_type,
|
||||||
output_keys=tuple(ctx.node_spec.output_keys or ()),
|
output_keys=tuple(ctx.node_spec.output_keys or ()),
|
||||||
is_subagent_mode=bool(getattr(ctx, "is_subagent_mode", False)),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -195,13 +193,10 @@ def build_system_prompt(spec: NodePromptSpec) -> str:
|
|||||||
if spec.narrative:
|
if spec.narrative:
|
||||||
parts.append(f"\n--- Context (what has happened so far) ---\n{spec.narrative}")
|
parts.append(f"\n--- Context (what has happened so far) ---\n{spec.narrative}")
|
||||||
|
|
||||||
if not spec.is_subagent_mode and spec.node_type in ("event_loop", "gcu") and spec.output_keys:
|
if not False and spec.node_type == "event_loop" and spec.output_keys:
|
||||||
parts.append(f"\n{EXECUTION_SCOPE_PREAMBLE}")
|
parts.append(f"\n{EXECUTION_SCOPE_PREAMBLE}")
|
||||||
|
|
||||||
if spec.node_type == "gcu":
|
|
||||||
from framework.graph.gcu import GCU_BROWSER_SYSTEM_PROMPT
|
|
||||||
|
|
||||||
parts.append(f"\n{GCU_BROWSER_SYSTEM_PROMPT}")
|
|
||||||
|
|
||||||
if spec.focus_prompt:
|
if spec.focus_prompt:
|
||||||
parts.append(f"\n--- Current Focus ---\n{spec.focus_prompt}")
|
parts.append(f"\n--- Current Focus ---\n{spec.focus_prompt}")
|
||||||
@@ -0,0 +1,32 @@
|
|||||||
|
"""Pipeline middleware for the agent runtime.
|
||||||
|
|
||||||
|
Stages run in order when :meth:`AgentRuntime.trigger` receives a request.
|
||||||
|
Each stage can pass the context through, transform the input data, or reject
|
||||||
|
the request entirely. This is the runtime-level analogue of AstrBot's
|
||||||
|
pipeline architecture and lets operators compose rate limiting, validation,
|
||||||
|
cost guards, and custom pre/post-processing without patching core code.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from framework.pipeline.registry import (
|
||||||
|
build_pipeline_from_config,
|
||||||
|
build_stage,
|
||||||
|
register,
|
||||||
|
)
|
||||||
|
from framework.pipeline.runner import PipelineRunner
|
||||||
|
from framework.pipeline.stage import (
|
||||||
|
PipelineContext,
|
||||||
|
PipelineRejectedError,
|
||||||
|
PipelineResult,
|
||||||
|
PipelineStage,
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"PipelineContext",
|
||||||
|
"PipelineRejectedError",
|
||||||
|
"PipelineResult",
|
||||||
|
"PipelineRunner",
|
||||||
|
"PipelineStage",
|
||||||
|
"build_pipeline_from_config",
|
||||||
|
"build_stage",
|
||||||
|
"register",
|
||||||
|
]
|
||||||
@@ -0,0 +1,44 @@
|
|||||||
|
"""Execution-level middleware protocol.
|
||||||
|
|
||||||
|
Unlike :class:`PipelineStage` (which gates ``AgentHost.trigger()`` at the
|
||||||
|
request level), execution middleware runs at the start of **every** execution
|
||||||
|
attempt inside ``ExecutionManager._run_execution()`` -- including resurrection
|
||||||
|
retries.
|
||||||
|
|
||||||
|
Use this for concerns that must re-evaluate per attempt:
|
||||||
|
- Cost tracking (charge per attempt, not per trigger)
|
||||||
|
- Tool scoping (different tools on retry)
|
||||||
|
- Checkpoint config overrides
|
||||||
|
- Per-execution logging/tracing setup
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ExecutionContext:
|
||||||
|
"""Context passed to execution middleware."""
|
||||||
|
|
||||||
|
execution_id: str
|
||||||
|
stream_id: str
|
||||||
|
run_id: str
|
||||||
|
input_data: dict[str, Any]
|
||||||
|
session_state: dict[str, Any] | None = None
|
||||||
|
attempt: int = 1
|
||||||
|
metadata: dict[str, Any] = field(default_factory=dict)
|
||||||
|
|
||||||
|
|
||||||
|
class ExecutionMiddleware(ABC):
|
||||||
|
"""Base class for per-execution middleware."""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def on_execution_start(self, ctx: ExecutionContext) -> ExecutionContext:
|
||||||
|
"""Called before each execution attempt (including resurrections).
|
||||||
|
|
||||||
|
Modify and return *ctx* to transform execution parameters.
|
||||||
|
Raise to abort the execution.
|
||||||
|
"""
|
||||||
@@ -0,0 +1,107 @@
|
|||||||
|
"""Pipeline stage registry -- maps type names to stage classes.
|
||||||
|
|
||||||
|
Stages self-register via the ``@register`` decorator. The
|
||||||
|
``build_pipeline_from_config`` function reads a declarative config
|
||||||
|
(from ``~/.hive/configuration.json`` or ``agent.json``) and
|
||||||
|
instantiates the corresponding stage objects.
|
||||||
|
|
||||||
|
Example config::
|
||||||
|
|
||||||
|
{
|
||||||
|
"pipeline": {
|
||||||
|
"stages": [
|
||||||
|
{"type": "rate_limit", "order": 200, "config": {"max_requests_per_minute": 60}},
|
||||||
|
{"type": "cost_guard", "order": 300, "config": {"max_cost_per_request": 0.50}}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from framework.pipeline.runner import PipelineRunner
|
||||||
|
from framework.pipeline.stage import PipelineStage
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_STAGE_REGISTRY: dict[str, type[PipelineStage]] = {}
|
||||||
|
|
||||||
|
|
||||||
|
def register(name: str):
|
||||||
|
"""Decorator to register a pipeline stage class by type name.
|
||||||
|
|
||||||
|
Usage::
|
||||||
|
|
||||||
|
@register("rate_limit")
|
||||||
|
class RateLimitStage(PipelineStage):
|
||||||
|
...
|
||||||
|
"""
|
||||||
|
|
||||||
|
def decorator(cls: type[PipelineStage]) -> type[PipelineStage]:
|
||||||
|
_STAGE_REGISTRY[name] = cls
|
||||||
|
return cls
|
||||||
|
|
||||||
|
return decorator
|
||||||
|
|
||||||
|
|
||||||
|
def get_registered_stages() -> dict[str, type[PipelineStage]]:
|
||||||
|
"""Return a copy of the stage registry."""
|
||||||
|
return dict(_STAGE_REGISTRY)
|
||||||
|
|
||||||
|
|
||||||
|
def build_stage(spec: dict[str, Any]) -> PipelineStage:
|
||||||
|
"""Instantiate a single stage from a config spec.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
spec: Dict with ``type`` (required), ``order`` (optional),
|
||||||
|
and ``config`` (optional kwargs dict).
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
KeyError: If the stage type is not registered.
|
||||||
|
"""
|
||||||
|
stage_type = spec["type"]
|
||||||
|
if stage_type not in _STAGE_REGISTRY:
|
||||||
|
available = ", ".join(sorted(_STAGE_REGISTRY)) or "(none)"
|
||||||
|
raise KeyError(
|
||||||
|
f"Unknown pipeline stage type '{stage_type}'. "
|
||||||
|
f"Available: {available}"
|
||||||
|
)
|
||||||
|
cls = _STAGE_REGISTRY[stage_type]
|
||||||
|
config = spec.get("config", {})
|
||||||
|
stage = cls(**config)
|
||||||
|
if "order" in spec:
|
||||||
|
stage.order = spec["order"]
|
||||||
|
return stage
|
||||||
|
|
||||||
|
|
||||||
|
def build_pipeline_from_config(
|
||||||
|
stages_config: list[dict[str, Any]],
|
||||||
|
) -> PipelineRunner:
|
||||||
|
"""Build a ``PipelineRunner`` from a declarative stages list.
|
||||||
|
|
||||||
|
Each entry is ``{"type": "...", "order": N, "config": {...}}``.
|
||||||
|
"""
|
||||||
|
# Import built-in stages so they self-register
|
||||||
|
_ensure_builtins_registered()
|
||||||
|
|
||||||
|
stages = [build_stage(s) for s in stages_config]
|
||||||
|
return PipelineRunner(stages)
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_builtins_registered() -> None:
|
||||||
|
"""Import built-in stage modules so their ``@register`` decorators fire."""
|
||||||
|
if _STAGE_REGISTRY:
|
||||||
|
return # already populated
|
||||||
|
try:
|
||||||
|
import framework.pipeline.stages.cost_guard # noqa: F401
|
||||||
|
import framework.pipeline.stages.credential_resolver # noqa: F401
|
||||||
|
import framework.pipeline.stages.input_validation # noqa: F401
|
||||||
|
import framework.pipeline.stages.llm_provider # noqa: F401
|
||||||
|
import framework.pipeline.stages.mcp_registry # noqa: F401
|
||||||
|
import framework.pipeline.stages.rate_limit # noqa: F401
|
||||||
|
import framework.pipeline.stages.skill_registry # noqa: F401
|
||||||
|
except ImportError:
|
||||||
|
pass
|
||||||
@@ -0,0 +1,111 @@
|
|||||||
|
"""Pipeline runner -- executes registered stages in order."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from framework.pipeline.stage import (
|
||||||
|
PipelineContext,
|
||||||
|
PipelineRejectedError,
|
||||||
|
PipelineStage,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class PipelineRunner:
|
||||||
|
"""Executes a list of :class:`PipelineStage` instances in ``order``.
|
||||||
|
|
||||||
|
The runner is the orchestration layer that :class:`AgentRuntime` calls
|
||||||
|
on every trigger. Stages execute in ascending ``order`` (ties broken by
|
||||||
|
registration order). A stage returning ``reject`` short-circuits the
|
||||||
|
pipeline and causes the trigger to raise :class:`PipelineRejectedError`.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, stages: list[PipelineStage] | None = None) -> None:
|
||||||
|
self._stages: list[PipelineStage] = sorted(stages or [], key=lambda s: s.order)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def stages(self) -> list[PipelineStage]:
|
||||||
|
return list(self._stages)
|
||||||
|
|
||||||
|
def add_stage(self, stage: PipelineStage) -> None:
|
||||||
|
"""Add a stage after construction (for dynamic registration)."""
|
||||||
|
self._stages.append(stage)
|
||||||
|
self._stages.sort(key=lambda s: s.order)
|
||||||
|
|
||||||
|
async def initialize_all(self) -> None:
|
||||||
|
"""Call ``initialize`` on every registered stage."""
|
||||||
|
for stage in self._stages:
|
||||||
|
name = stage.__class__.__name__
|
||||||
|
logger.info("[pipeline] Initializing %s (order=%d)", name, stage.order)
|
||||||
|
await stage.initialize()
|
||||||
|
logger.info("[pipeline] %s initialized", name)
|
||||||
|
if self._stages:
|
||||||
|
logger.info(
|
||||||
|
"[pipeline] Ready: %d stages [%s]",
|
||||||
|
len(self._stages),
|
||||||
|
" -> ".join(s.__class__.__name__ for s in self._stages),
|
||||||
|
)
|
||||||
|
|
||||||
|
async def run(self, ctx: PipelineContext) -> PipelineContext:
|
||||||
|
"""Run all stages. Raises ``PipelineRejectedError`` on rejection.
|
||||||
|
|
||||||
|
Returns the (possibly transformed) context.
|
||||||
|
"""
|
||||||
|
if not self._stages:
|
||||||
|
return ctx
|
||||||
|
import time
|
||||||
|
|
||||||
|
pipeline_start = time.perf_counter()
|
||||||
|
logger.info(
|
||||||
|
"[pipeline] Running %d stages for entry_point=%s",
|
||||||
|
len(self._stages),
|
||||||
|
ctx.entry_point_id,
|
||||||
|
)
|
||||||
|
for stage in self._stages:
|
||||||
|
stage_name = stage.__class__.__name__
|
||||||
|
t0 = time.perf_counter()
|
||||||
|
result = await stage.process(ctx)
|
||||||
|
elapsed_ms = (time.perf_counter() - t0) * 1000
|
||||||
|
if result.action == "reject":
|
||||||
|
reason = result.rejection_reason or "(no reason given)"
|
||||||
|
logger.warning(
|
||||||
|
"[pipeline] REJECTED by %s (%.1fms): %s",
|
||||||
|
stage_name, elapsed_ms, reason,
|
||||||
|
)
|
||||||
|
raise PipelineRejectedError(stage_name, reason)
|
||||||
|
if result.action == "transform":
|
||||||
|
logger.info(
|
||||||
|
"[pipeline] %s TRANSFORMED input (%.1fms)",
|
||||||
|
stage_name, elapsed_ms,
|
||||||
|
)
|
||||||
|
if result.input_data is not None:
|
||||||
|
ctx.input_data = result.input_data
|
||||||
|
else:
|
||||||
|
logger.info(
|
||||||
|
"[pipeline] %s passed (%.1fms)",
|
||||||
|
stage_name, elapsed_ms,
|
||||||
|
)
|
||||||
|
total_ms = (time.perf_counter() - pipeline_start) * 1000
|
||||||
|
logger.info("[pipeline] Complete (%.1fms total)", total_ms)
|
||||||
|
return ctx
|
||||||
|
|
||||||
|
async def run_post(self, ctx: PipelineContext, result: Any) -> Any:
|
||||||
|
"""Run all stages' ``post_process`` hooks in order.
|
||||||
|
|
||||||
|
Each stage can transform the result; the final value is returned.
|
||||||
|
Exceptions are logged and swallowed -- post-processing must not
|
||||||
|
break a successful execution.
|
||||||
|
"""
|
||||||
|
current = result
|
||||||
|
for stage in self._stages:
|
||||||
|
try:
|
||||||
|
current = await stage.post_process(ctx, current)
|
||||||
|
except Exception:
|
||||||
|
logger.exception(
|
||||||
|
"Pipeline post_process raised in %s; continuing with previous result",
|
||||||
|
stage.__class__.__name__,
|
||||||
|
)
|
||||||
|
return current
|
||||||
@@ -0,0 +1,77 @@
|
|||||||
|
"""Pipeline stage base class and request/response types."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import Any, Literal
|
||||||
|
|
||||||
|
|
||||||
|
class PipelineRejectedError(Exception):
|
||||||
|
"""Raised by ``AgentHost.trigger`` when a stage rejects the request."""
|
||||||
|
|
||||||
|
def __init__(self, stage_name: str, reason: str) -> None:
|
||||||
|
super().__init__(f"Pipeline rejected by {stage_name}: {reason}")
|
||||||
|
self.stage_name = stage_name
|
||||||
|
self.reason = reason
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class PipelineContext:
|
||||||
|
"""Carries request data through the pipeline."""
|
||||||
|
|
||||||
|
entry_point_id: str
|
||||||
|
input_data: dict[str, Any]
|
||||||
|
correlation_id: str | None = None
|
||||||
|
session_state: dict[str, Any] | None = None
|
||||||
|
metadata: dict[str, Any] = field(default_factory=dict)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class PipelineResult:
|
||||||
|
"""Outcome of a stage's ``process`` call."""
|
||||||
|
|
||||||
|
action: Literal["continue", "reject", "transform"] = "continue"
|
||||||
|
input_data: dict[str, Any] | None = None
|
||||||
|
rejection_reason: str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
class PipelineStage(ABC):
|
||||||
|
"""Base class for all middleware stages.
|
||||||
|
|
||||||
|
Infrastructure stages (LLM, MCP, credentials, skills) set typed
|
||||||
|
attributes during ``initialize()`` that the host reads after all
|
||||||
|
stages have initialized. Request-level stages (rate limit, input
|
||||||
|
validation, cost guard) implement ``process()``.
|
||||||
|
|
||||||
|
Attributes set by infrastructure stages:
|
||||||
|
llm: LLM provider instance (set by LlmProviderStage)
|
||||||
|
tool_registry: ToolRegistry with discovered MCP tools (set by McpRegistryStage)
|
||||||
|
accounts_prompt: Connected accounts system prompt block (set by CredentialResolverStage)
|
||||||
|
accounts_data: Raw account info list (set by CredentialResolverStage)
|
||||||
|
tool_provider_map: Tool name -> provider mapping (set by CredentialResolverStage)
|
||||||
|
skills_manager: SkillsManager instance (set by SkillRegistryStage)
|
||||||
|
"""
|
||||||
|
|
||||||
|
order: int = 100
|
||||||
|
|
||||||
|
# Infrastructure stage outputs -- typed so _apply_pipeline_results
|
||||||
|
# doesn't need hasattr() sniffing.
|
||||||
|
llm: Any = None
|
||||||
|
tool_registry: Any = None
|
||||||
|
accounts_prompt: str = ""
|
||||||
|
accounts_data: list[dict] | None = None
|
||||||
|
tool_provider_map: dict[str, str] | None = None
|
||||||
|
skills_manager: Any = None
|
||||||
|
|
||||||
|
async def initialize(self) -> None:
|
||||||
|
"""Called once when the runtime starts."""
|
||||||
|
return None
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def process(self, ctx: PipelineContext) -> PipelineResult:
|
||||||
|
"""Process the incoming request."""
|
||||||
|
|
||||||
|
async def post_process(self, ctx: PipelineContext, result: Any) -> Any:
|
||||||
|
"""Optional post-execution hook. Default: pass-through."""
|
||||||
|
return result
|
||||||
@@ -0,0 +1,19 @@
|
|||||||
|
"""Built-in pipeline stages."""
|
||||||
|
|
||||||
|
from framework.pipeline.stages.cost_guard import CostGuardStage
|
||||||
|
from framework.pipeline.stages.credential_resolver import CredentialResolverStage
|
||||||
|
from framework.pipeline.stages.input_validation import InputValidationStage
|
||||||
|
from framework.pipeline.stages.llm_provider import LlmProviderStage
|
||||||
|
from framework.pipeline.stages.mcp_registry import McpRegistryStage
|
||||||
|
from framework.pipeline.stages.rate_limit import RateLimitStage
|
||||||
|
from framework.pipeline.stages.skill_registry import SkillRegistryStage
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"CostGuardStage",
|
||||||
|
"CredentialResolverStage",
|
||||||
|
"InputValidationStage",
|
||||||
|
"LlmProviderStage",
|
||||||
|
"McpRegistryStage",
|
||||||
|
"RateLimitStage",
|
||||||
|
"SkillRegistryStage",
|
||||||
|
]
|
||||||
@@ -0,0 +1,35 @@
|
|||||||
|
"""Cost guard stage -- reject requests over a pre-flight budget."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from framework.pipeline.registry import register
|
||||||
|
from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage
|
||||||
|
|
||||||
|
|
||||||
|
@register("cost_guard")
|
||||||
|
class CostGuardStage(PipelineStage):
|
||||||
|
"""Reject requests whose estimated cost exceeds the per-request budget.
|
||||||
|
|
||||||
|
The cost estimate must be populated in ``ctx.metadata["estimated_cost"]``
|
||||||
|
by an earlier stage (or by the caller). When no estimate is present,
|
||||||
|
the stage passes through.
|
||||||
|
"""
|
||||||
|
|
||||||
|
order = 300
|
||||||
|
|
||||||
|
def __init__(self, max_cost_per_request: float = 1.0) -> None:
|
||||||
|
self._budget = max_cost_per_request
|
||||||
|
|
||||||
|
async def process(self, ctx: PipelineContext) -> PipelineResult:
|
||||||
|
estimated = ctx.metadata.get("estimated_cost")
|
||||||
|
if estimated is None:
|
||||||
|
return PipelineResult(action="continue")
|
||||||
|
if estimated > self._budget:
|
||||||
|
return PipelineResult(
|
||||||
|
action="reject",
|
||||||
|
rejection_reason=(
|
||||||
|
f"Estimated cost ${estimated:.4f} exceeds budget "
|
||||||
|
f"${self._budget:.4f}"
|
||||||
|
),
|
||||||
|
)
|
||||||
|
return PipelineResult(action="continue")
|
||||||
@@ -0,0 +1,58 @@
|
|||||||
|
"""Credential resolver pipeline stage.
|
||||||
|
|
||||||
|
Resolves connected accounts at startup. Individual credential TTL/refresh
|
||||||
|
is handled by MCP server processes internally -- they resolve tokens from
|
||||||
|
the credential store on every tool call.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from framework.pipeline.registry import register
|
||||||
|
from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@register("credential_resolver")
|
||||||
|
class CredentialResolverStage(PipelineStage):
|
||||||
|
"""Resolve connected accounts for system prompt injection."""
|
||||||
|
|
||||||
|
order = 40
|
||||||
|
|
||||||
|
def __init__(self, credential_store: Any = None, **kwargs: Any) -> None:
|
||||||
|
self._credential_store = credential_store
|
||||||
|
self.accounts_prompt = ""
|
||||||
|
self.accounts_data: list[dict] | None = None
|
||||||
|
self.tool_provider_map: dict[str, str] | None = None
|
||||||
|
|
||||||
|
async def initialize(self) -> None:
|
||||||
|
try:
|
||||||
|
from aden_tools.credentials.store_adapter import (
|
||||||
|
CredentialStoreAdapter,
|
||||||
|
)
|
||||||
|
from framework.orchestrator.prompting import build_accounts_prompt
|
||||||
|
|
||||||
|
if self._credential_store is not None:
|
||||||
|
adapter = CredentialStoreAdapter(store=self._credential_store)
|
||||||
|
else:
|
||||||
|
adapter = CredentialStoreAdapter.default()
|
||||||
|
self.accounts_data = adapter.get_all_account_info()
|
||||||
|
self.tool_provider_map = adapter.get_tool_provider_map()
|
||||||
|
if self.accounts_data:
|
||||||
|
self.accounts_prompt = build_accounts_prompt(
|
||||||
|
self.accounts_data, self.tool_provider_map,
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
"[pipeline] CredentialResolverStage: %d accounts",
|
||||||
|
len(self.accounts_data or []),
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
logger.debug(
|
||||||
|
"Credential resolution failed (non-fatal)", exc_info=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def process(self, ctx: PipelineContext) -> PipelineResult:
|
||||||
|
return PipelineResult(action="continue")
|
||||||
@@ -0,0 +1,47 @@
|
|||||||
|
"""Input validation stage.
|
||||||
|
|
||||||
|
Rejects requests whose ``input_data`` does not match the entry point's
|
||||||
|
declared input schema. Uses a user-provided schema map:
|
||||||
|
``{entry_point_id: {required_key: expected_type, ...}}``.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from framework.pipeline.registry import register
|
||||||
|
from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage
|
||||||
|
|
||||||
|
|
||||||
|
@register("input_validation")
|
||||||
|
class InputValidationStage(PipelineStage):
|
||||||
|
"""Validate ``input_data`` against per-entry-point schemas.
|
||||||
|
|
||||||
|
The schema is a simple dict mapping key -> expected Python type.
|
||||||
|
For richer validation, substitute a Pydantic-based stage.
|
||||||
|
"""
|
||||||
|
|
||||||
|
order = 100
|
||||||
|
|
||||||
|
def __init__(self, schemas: dict[str, dict[str, type]] | None = None) -> None:
|
||||||
|
self._schemas = schemas or {}
|
||||||
|
|
||||||
|
async def process(self, ctx: PipelineContext) -> PipelineResult:
|
||||||
|
schema = self._schemas.get(ctx.entry_point_id)
|
||||||
|
if not schema:
|
||||||
|
return PipelineResult(action="continue")
|
||||||
|
|
||||||
|
for key, expected_type in schema.items():
|
||||||
|
if key not in ctx.input_data:
|
||||||
|
return PipelineResult(
|
||||||
|
action="reject",
|
||||||
|
rejection_reason=f"Missing required input key: '{key}'",
|
||||||
|
)
|
||||||
|
value = ctx.input_data[key]
|
||||||
|
if not isinstance(value, expected_type):
|
||||||
|
return PipelineResult(
|
||||||
|
action="reject",
|
||||||
|
rejection_reason=(
|
||||||
|
f"Input key '{key}' has type {type(value).__name__}, "
|
||||||
|
f"expected {expected_type.__name__}"
|
||||||
|
),
|
||||||
|
)
|
||||||
|
return PipelineResult(action="continue")
|
||||||
@@ -0,0 +1,95 @@
|
|||||||
|
"""LLM provider pipeline stage.
|
||||||
|
|
||||||
|
Resolves the LLM provider from global config. This is the ONLY place
|
||||||
|
the LLM gets created for worker agents.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from framework.pipeline.registry import register
|
||||||
|
from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@register("llm_provider")
|
||||||
|
class LlmProviderStage(PipelineStage):
|
||||||
|
"""Resolve LLM provider and make it available."""
|
||||||
|
|
||||||
|
order = 10
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
model: str | None = None,
|
||||||
|
mock_mode: bool = False,
|
||||||
|
llm: Any = None,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> None:
|
||||||
|
self._model = model
|
||||||
|
self._mock_mode = mock_mode
|
||||||
|
self.llm = llm # Pre-injected LLM (e.g. from session)
|
||||||
|
|
||||||
|
async def initialize(self) -> None:
|
||||||
|
if self.llm is not None:
|
||||||
|
return # Already injected
|
||||||
|
|
||||||
|
from framework.config import (
|
||||||
|
get_api_key,
|
||||||
|
get_api_keys,
|
||||||
|
get_hive_config,
|
||||||
|
get_preferred_model,
|
||||||
|
)
|
||||||
|
|
||||||
|
model = self._model or get_preferred_model()
|
||||||
|
|
||||||
|
if self._mock_mode:
|
||||||
|
from framework.llm.mock import MockLLMProvider
|
||||||
|
|
||||||
|
self.llm = MockLLMProvider(model=model)
|
||||||
|
return
|
||||||
|
|
||||||
|
config = get_hive_config()
|
||||||
|
llm_config = config.get("llm", {})
|
||||||
|
api_base = llm_config.get("api_base")
|
||||||
|
|
||||||
|
# Check for Antigravity (special provider)
|
||||||
|
if llm_config.get("use_antigravity_subscription"):
|
||||||
|
try:
|
||||||
|
from framework.llm.antigravity import AntigravityProvider
|
||||||
|
|
||||||
|
provider = AntigravityProvider(model=model)
|
||||||
|
if provider.has_credentials():
|
||||||
|
self.llm = provider
|
||||||
|
logger.info("[pipeline] LlmProviderStage: Antigravity")
|
||||||
|
return
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
from framework.llm.litellm import LiteLLMProvider
|
||||||
|
|
||||||
|
api_key = get_api_key()
|
||||||
|
api_keys = get_api_keys()
|
||||||
|
|
||||||
|
if api_keys and len(api_keys) > 1:
|
||||||
|
self.llm = LiteLLMProvider(
|
||||||
|
model=model, api_keys=api_keys, api_base=api_base,
|
||||||
|
)
|
||||||
|
elif api_key:
|
||||||
|
extra = {}
|
||||||
|
if api_key.startswith("sk-ant-oat"):
|
||||||
|
extra["extra_headers"] = {
|
||||||
|
"authorization": f"Bearer {api_key}"
|
||||||
|
}
|
||||||
|
self.llm = LiteLLMProvider(
|
||||||
|
model=model, api_key=api_key, api_base=api_base, **extra,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self.llm = LiteLLMProvider(model=model, api_base=api_base)
|
||||||
|
|
||||||
|
logger.info("[pipeline] LlmProviderStage: %s", model)
|
||||||
|
|
||||||
|
async def process(self, ctx: PipelineContext) -> PipelineResult:
|
||||||
|
return PipelineResult(action="continue")
|
||||||
@@ -0,0 +1,92 @@
|
|||||||
|
"""MCP registry pipeline stage.
|
||||||
|
|
||||||
|
Resolves MCP server references from the agent config against the global
|
||||||
|
registry and registers tools. This is the ONLY place MCP tools get loaded.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from dataclasses import asdict
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from framework.pipeline.registry import register
|
||||||
|
from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@register("mcp_registry")
|
||||||
|
class McpRegistryStage(PipelineStage):
|
||||||
|
"""Resolve MCP tools from the global registry."""
|
||||||
|
|
||||||
|
order = 50
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
server_refs: list[dict[str, Any]] | None = None,
|
||||||
|
agent_path: str | Path | None = None,
|
||||||
|
tool_registry: Any = None,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> None:
|
||||||
|
self._server_refs = server_refs or []
|
||||||
|
self._agent_path = Path(agent_path) if agent_path else None
|
||||||
|
self._tool_registry = tool_registry
|
||||||
|
|
||||||
|
async def initialize(self) -> None:
|
||||||
|
"""Connect to MCP servers and discover tools."""
|
||||||
|
if self._tool_registry is None:
|
||||||
|
from framework.loader.tool_registry import ToolRegistry
|
||||||
|
|
||||||
|
self._tool_registry = ToolRegistry()
|
||||||
|
|
||||||
|
from framework.loader.mcp_registry import MCPRegistry
|
||||||
|
|
||||||
|
registry = MCPRegistry()
|
||||||
|
mcp_loaded = False
|
||||||
|
|
||||||
|
# 1. From agent.json mcp_servers refs
|
||||||
|
if self._server_refs:
|
||||||
|
names = [ref["name"] for ref in self._server_refs if ref.get("name")]
|
||||||
|
if names:
|
||||||
|
configs = registry.resolve_for_agent(include=names)
|
||||||
|
if configs:
|
||||||
|
self._tool_registry.load_registry_servers(
|
||||||
|
[asdict(c) for c in configs]
|
||||||
|
)
|
||||||
|
mcp_loaded = True
|
||||||
|
logger.info(
|
||||||
|
"[pipeline] McpRegistryStage: loaded %d servers: %s",
|
||||||
|
len(configs),
|
||||||
|
names,
|
||||||
|
)
|
||||||
|
|
||||||
|
# 2. Legacy: mcp_servers.json
|
||||||
|
if not mcp_loaded and self._agent_path:
|
||||||
|
mcp_json = self._agent_path / "mcp_servers.json"
|
||||||
|
if mcp_json.exists():
|
||||||
|
self._tool_registry.load_mcp_config(mcp_json)
|
||||||
|
mcp_loaded = True
|
||||||
|
|
||||||
|
# 3. Fallback: all servers from global registry
|
||||||
|
if not mcp_loaded:
|
||||||
|
configs = registry.resolve_for_agent(profile="all")
|
||||||
|
if configs:
|
||||||
|
self._tool_registry.load_registry_servers(
|
||||||
|
[asdict(c) for c in configs]
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
"[pipeline] McpRegistryStage: loaded %d servers (fallback)",
|
||||||
|
len(configs),
|
||||||
|
)
|
||||||
|
|
||||||
|
total = len(self._tool_registry.get_tools())
|
||||||
|
logger.info("[pipeline] McpRegistryStage: %d tools available", total)
|
||||||
|
|
||||||
|
async def process(self, ctx: PipelineContext) -> PipelineResult:
|
||||||
|
return PipelineResult(action="continue")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def tool_registry(self):
|
||||||
|
return self._tool_registry
|
||||||
@@ -0,0 +1,44 @@
|
|||||||
|
"""Per-(entry-point, session) rate limiting stage."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import time
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
|
from framework.pipeline.registry import register
|
||||||
|
from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage
|
||||||
|
|
||||||
|
|
||||||
|
@register("rate_limit")
|
||||||
|
class RateLimitStage(PipelineStage):
|
||||||
|
"""Reject requests that exceed ``max_requests_per_minute`` per session.
|
||||||
|
|
||||||
|
The key is ``<entry_point_id>:<session_id>``. When no session_id is
|
||||||
|
present in ``session_state``, a single shared "default" bucket is used.
|
||||||
|
"""
|
||||||
|
|
||||||
|
order = 200
|
||||||
|
|
||||||
|
def __init__(self, max_requests_per_minute: int = 60) -> None:
|
||||||
|
self._max_rpm = max_requests_per_minute
|
||||||
|
self._timestamps: dict[str, list[float]] = defaultdict(list)
|
||||||
|
|
||||||
|
async def process(self, ctx: PipelineContext) -> PipelineResult:
|
||||||
|
session_id = "default"
|
||||||
|
if ctx.session_state:
|
||||||
|
session_id = str(ctx.session_state.get("session_id", "default"))
|
||||||
|
key = f"{ctx.entry_point_id}:{session_id}"
|
||||||
|
|
||||||
|
now = time.monotonic()
|
||||||
|
# Prune entries older than 60s.
|
||||||
|
self._timestamps[key] = [t for t in self._timestamps[key] if now - t < 60.0]
|
||||||
|
if len(self._timestamps[key]) >= self._max_rpm:
|
||||||
|
return PipelineResult(
|
||||||
|
action="reject",
|
||||||
|
rejection_reason=(
|
||||||
|
f"Rate limit exceeded: {self._max_rpm} req/min "
|
||||||
|
f"for session '{session_id}'"
|
||||||
|
),
|
||||||
|
)
|
||||||
|
self._timestamps[key].append(now)
|
||||||
|
return PipelineResult(action="continue")
|
||||||
@@ -0,0 +1,55 @@
|
|||||||
|
"""Skill registry pipeline stage.
|
||||||
|
|
||||||
|
Discovers and loads skills. This is the ONLY place skills get loaded.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from framework.pipeline.registry import register
|
||||||
|
from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@register("skill_registry")
|
||||||
|
class SkillRegistryStage(PipelineStage):
|
||||||
|
"""Discover skills and provide prompts."""
|
||||||
|
|
||||||
|
order = 60
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
project_root: str | Path | None = None,
|
||||||
|
interactive: bool = True,
|
||||||
|
skills_config: Any = None,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> None:
|
||||||
|
self._project_root = Path(project_root) if project_root else None
|
||||||
|
self._interactive = interactive
|
||||||
|
self._skills_config = skills_config
|
||||||
|
self.skills_manager: Any = None
|
||||||
|
|
||||||
|
async def initialize(self) -> None:
|
||||||
|
from framework.skills.config import SkillsConfig
|
||||||
|
from framework.skills.manager import SkillsManager, SkillsManagerConfig
|
||||||
|
|
||||||
|
config = SkillsManagerConfig(
|
||||||
|
skills_config=self._skills_config or SkillsConfig(),
|
||||||
|
project_root=self._project_root,
|
||||||
|
interactive=self._interactive,
|
||||||
|
)
|
||||||
|
self.skills_manager = SkillsManager(config)
|
||||||
|
self.skills_manager.load()
|
||||||
|
await self.skills_manager.start_watching()
|
||||||
|
logger.info(
|
||||||
|
"[pipeline] SkillRegistryStage: catalog=%d chars, protocols=%d chars",
|
||||||
|
len(self.skills_manager.skills_catalog_prompt),
|
||||||
|
len(self.skills_manager.protocols_prompt),
|
||||||
|
)
|
||||||
|
|
||||||
|
async def process(self, ctx: PipelineContext) -> PipelineResult:
|
||||||
|
return PipelineResult(action="continue")
|
||||||
@@ -1,27 +0,0 @@
|
|||||||
"""Agent Runner - load and run exported agents."""
|
|
||||||
|
|
||||||
from framework.runner.mcp_registry import MCPRegistry
|
|
||||||
from framework.runner.protocol import (
|
|
||||||
AgentMessage,
|
|
||||||
CapabilityLevel,
|
|
||||||
CapabilityResponse,
|
|
||||||
MessageType,
|
|
||||||
OrchestratorResult,
|
|
||||||
)
|
|
||||||
from framework.runner.runner import AgentInfo, AgentRunner, ValidationResult
|
|
||||||
from framework.runner.tool_registry import ToolRegistry, tool
|
|
||||||
|
|
||||||
__all__ = [
|
|
||||||
# Single agent
|
|
||||||
"AgentRunner",
|
|
||||||
"AgentInfo",
|
|
||||||
"ValidationResult",
|
|
||||||
"ToolRegistry",
|
|
||||||
"MCPRegistry",
|
|
||||||
"tool",
|
|
||||||
"AgentMessage",
|
|
||||||
"MessageType",
|
|
||||||
"CapabilityLevel",
|
|
||||||
"CapabilityResponse",
|
|
||||||
"OrchestratorResult",
|
|
||||||
]
|
|
||||||
@@ -1,493 +0,0 @@
|
|||||||
# Event Types and Schema Reference
|
|
||||||
|
|
||||||
The Hive runtime uses a pub/sub `EventBus` for inter-component communication and observability. Every event is an `AgentEvent` dataclass published through `EventBus.publish()`.
|
|
||||||
|
|
||||||
## Event Envelope (`AgentEvent`)
|
|
||||||
|
|
||||||
Every event shares a common envelope:
|
|
||||||
|
|
||||||
| Field | Type | Description |
|
|
||||||
| ---------------- | ----------------- | ------------------------------------------------------------ |
|
|
||||||
| `type` | `EventType` (str) | Event type identifier (see below) |
|
|
||||||
| `stream_id` | `str` | Entry point / pipeline that emitted the event |
|
|
||||||
| `node_id` | `str \| None` | Graph node that emitted the event |
|
|
||||||
| `execution_id` | `str \| None` | Unique execution run ID (UUID, set by `ExecutionStream`) |
|
|
||||||
| `graph_id` | `str \| None` | Graph that emitted the event (set by `GraphScopedEventBus`) |
|
|
||||||
| `data` | `dict` | Event-type-specific payload (see individual schemas below) |
|
|
||||||
| `timestamp` | `datetime` | When the event was created |
|
|
||||||
| `correlation_id` | `str \| None` | Optional ID for tracking related events across streams |
|
|
||||||
|
|
||||||
### Identity Fields
|
|
||||||
|
|
||||||
The identity tuple `(graph_id, stream_id, node_id, execution_id)` uniquely locates any event:
|
|
||||||
|
|
||||||
- **`graph_id`** — Which graph produced the event. Set automatically by `GraphScopedEventBus` (a subclass that stamps `graph_id` on every `publish()` call). Values: `"worker"`, `"judge"`, `"queen"`, or the graph spec ID.
|
|
||||||
- **`stream_id`** — Which entry point / pipeline. Corresponds to `EntryPointSpec.id` in the graph definition. For single-entry-point graphs, this equals the entry point name (e.g. `"default"`, `"health_check"`).
|
|
||||||
- **`node_id`** — Which specific node emitted the event. For `EventLoopNode` events, this is the node spec ID.
|
|
||||||
- **`execution_id`** — UUID identifying a specific execution run. Multiple concurrent executions of the same entry point each get a unique `execution_id`.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Execution Lifecycle
|
|
||||||
|
|
||||||
### `execution_started`
|
|
||||||
|
|
||||||
A new graph execution has begun.
|
|
||||||
|
|
||||||
| Data Field | Type | Description |
|
|
||||||
| ---------- | ------ | ------------------------------- |
|
|
||||||
| `input` | `dict` | Input data passed to the graph |
|
|
||||||
|
|
||||||
**Emitted by:** `ExecutionStream._run_execution()`
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
### `execution_completed`
|
|
||||||
|
|
||||||
A graph execution finished successfully.
|
|
||||||
|
|
||||||
| Data Field | Type | Description |
|
|
||||||
| ---------- | ------ | ----------------- |
|
|
||||||
| `output` | `dict` | Final output data |
|
|
||||||
|
|
||||||
**Emitted by:** `ExecutionStream._run_execution()`
|
|
||||||
|
|
||||||
**Queen notification:** When a worker execution completes, the session manager \
|
|
||||||
injects a `[WORKER_TERMINAL]` notification into the queen with the output summary. \
|
|
||||||
The queen reports to the user and asks what to do next.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
### `execution_failed`
|
|
||||||
|
|
||||||
A graph execution failed with an error.
|
|
||||||
|
|
||||||
| Data Field | Type | Description |
|
|
||||||
| ---------- | ----- | ------------- |
|
|
||||||
| `error` | `str` | Error message |
|
|
||||||
|
|
||||||
**Emitted by:** `ExecutionStream._run_execution()`
|
|
||||||
|
|
||||||
**Queen notification:** When a worker execution fails, the session manager \
|
|
||||||
injects a `[WORKER_TERMINAL]` notification into the queen with the error. \
|
|
||||||
The queen reports to the user and helps troubleshoot.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
### `execution_paused`
|
|
||||||
|
|
||||||
Execution has been paused (Ctrl+Z or HITL approval).
|
|
||||||
|
|
||||||
| Data Field | Type | Description |
|
|
||||||
| ---------- | ----- | ----------------- |
|
|
||||||
| `reason` | `str` | Why it was paused |
|
|
||||||
|
|
||||||
**Emitted by:** `GraphExecutor.execute()`
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
### `execution_resumed`
|
|
||||||
|
|
||||||
Execution has resumed from a paused state.
|
|
||||||
|
|
||||||
| Data Field | Type | Description |
|
|
||||||
| ---------- | ---- | ----------- |
|
|
||||||
| *(none)* | | |
|
|
||||||
|
|
||||||
**Emitted by:** `GraphExecutor.execute()`
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Node Event-Loop Lifecycle
|
|
||||||
|
|
||||||
These events track the inner loop of `EventLoopNode` — the multi-turn LLM streaming loop that powers most agent nodes.
|
|
||||||
|
|
||||||
### `node_loop_started`
|
|
||||||
|
|
||||||
An EventLoopNode has begun its execution loop.
|
|
||||||
|
|
||||||
| Data Field | Type | Description |
|
|
||||||
| ---------------- | ---------- | ------------------------------- |
|
|
||||||
| `max_iterations` | `int\|null`| Maximum iterations configured |
|
|
||||||
|
|
||||||
**Emitted by:** `EventLoopNode._publish_loop_started()`, `GraphExecutor` (for function nodes in parallel branches)
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
### `node_loop_iteration`
|
|
||||||
|
|
||||||
An EventLoopNode has started a new iteration (one LLM turn).
|
|
||||||
|
|
||||||
| Data Field | Type | Description |
|
|
||||||
| ----------- | ----- | ------------------------- |
|
|
||||||
| `iteration` | `int` | Zero-based iteration index |
|
|
||||||
|
|
||||||
**Emitted by:** `EventLoopNode._publish_iteration()`
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
### `node_loop_completed`
|
|
||||||
|
|
||||||
An EventLoopNode has finished its execution loop.
|
|
||||||
|
|
||||||
| Data Field | Type | Description |
|
|
||||||
| ------------ | ----- | -------------------------------------- |
|
|
||||||
| `iterations` | `int` | Total number of iterations completed |
|
|
||||||
|
|
||||||
**Emitted by:** `EventLoopNode._publish_loop_completed()`, `GraphExecutor` (for function nodes in parallel branches)
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## LLM Streaming
|
|
||||||
|
|
||||||
### `llm_text_delta`
|
|
||||||
|
|
||||||
Incremental text output from the LLM (non-client-facing nodes only).
|
|
||||||
|
|
||||||
| Data Field | Type | Description |
|
|
||||||
| ---------- | ----- | ---------------------------------------- |
|
|
||||||
| `content` | `str` | New text chunk (delta) |
|
|
||||||
| `snapshot` | `str` | Full accumulated text so far |
|
|
||||||
|
|
||||||
**Emitted by:** `EventLoopNode._publish_text_delta()` when `client_facing=False`
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
### `llm_reasoning_delta`
|
|
||||||
|
|
||||||
Incremental reasoning/thinking output from the LLM.
|
|
||||||
|
|
||||||
| Data Field | Type | Description |
|
|
||||||
| ---------- | ----- | ------------------- |
|
|
||||||
| `content` | `str` | New reasoning chunk |
|
|
||||||
|
|
||||||
**Emitted by:** Not currently wired in `EventLoopNode` (reserved for extended thinking models).
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Tool Lifecycle
|
|
||||||
|
|
||||||
### `tool_call_started`
|
|
||||||
|
|
||||||
The LLM has requested a tool call and execution is about to begin.
|
|
||||||
|
|
||||||
| Data Field | Type | Description |
|
|
||||||
| ------------ | ------ | ------------------------------------ |
|
|
||||||
| `tool_use_id`| `str` | Unique ID for this tool invocation |
|
|
||||||
| `tool_name` | `str` | Name of the tool being called |
|
|
||||||
| `tool_input` | `dict` | Arguments passed to the tool |
|
|
||||||
|
|
||||||
**Emitted by:** `EventLoopNode._publish_tool_started()`
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
### `tool_call_completed`
|
|
||||||
|
|
||||||
A tool call has finished executing.
|
|
||||||
|
|
||||||
| Data Field | Type | Description |
|
|
||||||
| ------------ | ------ | -------------------------------------- |
|
|
||||||
| `tool_use_id`| `str` | Same ID from `tool_call_started` |
|
|
||||||
| `tool_name` | `str` | Name of the tool |
|
|
||||||
| `result` | `str` | Tool execution result (may be truncated)|
|
|
||||||
| `is_error` | `bool` | Whether the tool returned an error |
|
|
||||||
|
|
||||||
**Emitted by:** `EventLoopNode._publish_tool_completed()`
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Client I/O
|
|
||||||
|
|
||||||
These events are emitted by the queen's interactive turns. They drive the TUI's chat interface.
|
|
||||||
|
|
||||||
### `client_output_delta`
|
|
||||||
|
|
||||||
Incremental text output meant for the human operator.
|
|
||||||
|
|
||||||
| Data Field | Type | Description |
|
|
||||||
| ---------- | ----- | ---------------------------- |
|
|
||||||
| `content` | `str` | New text chunk (delta) |
|
|
||||||
| `snapshot` | `str` | Full accumulated text so far |
|
|
||||||
|
|
||||||
**Emitted by:** `EventLoopNode._publish_text_delta()` for queen/user-facing output
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
### `client_input_requested`
|
|
||||||
|
|
||||||
The node is waiting for human input (via `ask_user` tool or auto-block on text-only turns).
|
|
||||||
|
|
||||||
| Data Field | Type | Description |
|
|
||||||
| ---------- | ----- | ------------------------------------------------- |
|
|
||||||
| `prompt` | `str` | Optional prompt/question shown to the user |
|
|
||||||
|
|
||||||
**Emitted by:** `EventLoopNode._await_user_input()`, doom loop handler
|
|
||||||
|
|
||||||
The TUI subscribes to this event to show the input prompt and focus the chat input. After the user types, `inject_event()` is called on the node to unblock it.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Internal Node Observability
|
|
||||||
|
|
||||||
### `node_internal_output`
|
|
||||||
|
|
||||||
Output from a non-client-facing node (for debugging/monitoring).
|
|
||||||
|
|
||||||
| Data Field | Type | Description |
|
|
||||||
| ---------- | ----- | ---------------- |
|
|
||||||
| `content` | `str` | Output text |
|
|
||||||
|
|
||||||
**Emitted by:** Available via `emit_node_internal_output()` — not currently wired in the default `EventLoopNode`.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
### `node_input_blocked`
|
|
||||||
|
|
||||||
A non-client-facing node is blocked waiting for input.
|
|
||||||
|
|
||||||
| Data Field | Type | Description |
|
|
||||||
| ---------- | ----- | --------------- |
|
|
||||||
| `prompt` | `str` | Block reason |
|
|
||||||
|
|
||||||
**Emitted by:** Available via `emit_node_input_blocked()` — reserved for future use.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
### `node_stalled`
|
|
||||||
|
|
||||||
The node's LLM has produced identical responses for several consecutive turns (stall detection).
|
|
||||||
|
|
||||||
| Data Field | Type | Description |
|
|
||||||
| ---------- | ----- | ------------------------------------------------- |
|
|
||||||
| `reason` | `str` | Always `"Consecutive identical responses detected"`|
|
|
||||||
|
|
||||||
**Emitted by:** `EventLoopNode._publish_stalled()`
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
### `node_tool_doom_loop`
|
|
||||||
|
|
||||||
The LLM is calling the same tool(s) with identical arguments repeatedly (doom loop detection).
|
|
||||||
|
|
||||||
| Data Field | Type | Description |
|
|
||||||
| ------------- | ----- | ------------------------------------ |
|
|
||||||
| `description` | `str` | Human-readable doom loop description |
|
|
||||||
|
|
||||||
**Emitted by:** `EventLoopNode` doom loop handler
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Judge Decisions
|
|
||||||
|
|
||||||
### `judge_verdict`
|
|
||||||
|
|
||||||
The judge (custom or implicit) has evaluated the current iteration.
|
|
||||||
|
|
||||||
| Data Field | Type | Description |
|
|
||||||
| ------------ | ----- | ---------------------------------------------------- |
|
|
||||||
| `action` | `str` | `"ACCEPT"`, `"RETRY"`, `"ESCALATE"`, or `"CONTINUE"` |
|
|
||||||
| `feedback` | `str` | Judge feedback (empty for ACCEPT/CONTINUE) |
|
|
||||||
| `judge_type` | `str` | `"custom"` (explicit JudgeProtocol) or `"implicit"` (stop-reason heuristic) |
|
|
||||||
| `iteration` | `int` | Which iteration this verdict applies to |
|
|
||||||
|
|
||||||
**Emitted by:** `EventLoopNode._publish_judge_verdict()`
|
|
||||||
|
|
||||||
**Verdict meanings:**
|
|
||||||
- **ACCEPT** — Output meets requirements; node exits successfully.
|
|
||||||
- **RETRY** — Output needs improvement; loop continues with feedback injected.
|
|
||||||
- **ESCALATE** — Problem cannot be solved at this level; triggers escalation.
|
|
||||||
- **CONTINUE** — Implicit verdict: LLM called tools, so it's making progress — let it keep going.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Output Tracking
|
|
||||||
|
|
||||||
### `output_key_set`
|
|
||||||
|
|
||||||
A node has set an output key via the `set_output` synthetic tool.
|
|
||||||
|
|
||||||
| Data Field | Type | Description |
|
|
||||||
| ---------- | ----- | ----------------- |
|
|
||||||
| `key` | `str` | Output key name |
|
|
||||||
|
|
||||||
**Emitted by:** `EventLoopNode._publish_output_key_set()`
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Retry & Edge Tracking
|
|
||||||
|
|
||||||
### `node_retry`
|
|
||||||
|
|
||||||
A transient error occurred during an LLM call and the node is retrying.
|
|
||||||
|
|
||||||
| Data Field | Type | Description |
|
|
||||||
| ------------- | ----- | ---------------------------------- |
|
|
||||||
| `retry_count` | `int` | Current retry attempt number |
|
|
||||||
| `max_retries` | `int` | Maximum retries configured |
|
|
||||||
| `error` | `str` | Error message (truncated to 500ch) |
|
|
||||||
|
|
||||||
**Emitted by:** `EventLoopNode` (stream retry handler), `GraphExecutor` (node-level retry)
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
### `edge_traversed`
|
|
||||||
|
|
||||||
The executor has traversed an edge from one node to another.
|
|
||||||
|
|
||||||
| Data Field | Type | Description |
|
|
||||||
| ---------------- | ----- | ---------------------------------------------- |
|
|
||||||
| `source_node` | `str` | Node ID the edge starts from |
|
|
||||||
| `target_node` | `str` | Node ID the edge goes to |
|
|
||||||
| `edge_condition` | `str` | Edge condition: `"router"`, `"on_success"`, etc. |
|
|
||||||
|
|
||||||
**Emitted by:** `GraphExecutor.execute()` — after router decisions, condition-based edges, and fallback edges.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Context Management
|
|
||||||
|
|
||||||
### `context_compacted`
|
|
||||||
|
|
||||||
Not currently emitted — reserved for future use when `NodeConversation` compacts history.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## State Changes
|
|
||||||
|
|
||||||
### `state_changed`
|
|
||||||
|
|
||||||
A shared buffer key has been modified.
|
|
||||||
|
|
||||||
| Data Field | Type | Description |
|
|
||||||
| ----------- | ----- | ---------------------------------- |
|
|
||||||
| `key` | `str` | Buffer key that changed |
|
|
||||||
| `old_value` | `Any` | Previous value |
|
|
||||||
| `new_value` | `Any` | New value |
|
|
||||||
| `scope` | `str` | Scope of the change |
|
|
||||||
|
|
||||||
**Emitted by:** Available via `emit_state_changed()` — not currently wired in default execution.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
### `state_conflict`
|
|
||||||
|
|
||||||
Not currently emitted — reserved for concurrent write conflict detection.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Goal Tracking
|
|
||||||
|
|
||||||
### `goal_progress`
|
|
||||||
|
|
||||||
Goal completion progress update.
|
|
||||||
|
|
||||||
| Data Field | Type | Description |
|
|
||||||
| ----------------- | ------- | ------------------------------------ |
|
|
||||||
| `progress` | `float` | 0.0–1.0 completion fraction |
|
|
||||||
| `criteria_status` | `dict` | Per-criterion status |
|
|
||||||
|
|
||||||
**Emitted by:** Available via `emit_goal_progress()` — not currently wired in default execution.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
### `goal_achieved`
|
|
||||||
|
|
||||||
Not currently emitted — reserved for explicit goal completion signals.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
### `constraint_violation`
|
|
||||||
|
|
||||||
A goal constraint has been violated.
|
|
||||||
|
|
||||||
| Data Field | Type | Description |
|
|
||||||
| --------------- | ----- | ------------------------ |
|
|
||||||
| `constraint_id` | `str` | Which constraint failed |
|
|
||||||
| `description` | `str` | What went wrong |
|
|
||||||
|
|
||||||
**Emitted by:** Available via `emit_constraint_violation()`.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Stream Lifecycle
|
|
||||||
|
|
||||||
### `stream_started` / `stream_stopped`
|
|
||||||
|
|
||||||
Not currently emitted — reserved for `ExecutionStream` lifecycle tracking.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## External Triggers
|
|
||||||
|
|
||||||
### `webhook_received`
|
|
||||||
|
|
||||||
An external webhook has been received.
|
|
||||||
|
|
||||||
| Data Field | Type | Description |
|
|
||||||
| -------------- | ------ | ---------------------------- |
|
|
||||||
| `path` | `str` | Webhook URL path |
|
|
||||||
| `method` | `str` | HTTP method |
|
|
||||||
| `headers` | `dict` | HTTP headers |
|
|
||||||
| `payload` | `dict` | Request body |
|
|
||||||
| `query_params` | `dict` | URL query parameters |
|
|
||||||
|
|
||||||
**Emitted by:** Webhook server integration.
|
|
||||||
|
|
||||||
Note: `node_id` is not set on this event; `stream_id` is the webhook source ID.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Escalation
|
|
||||||
|
|
||||||
### `escalation_requested`
|
|
||||||
|
|
||||||
An agent has requested handoff to the Hive Coder (via the `escalate` synthetic tool).
|
|
||||||
|
|
||||||
| Data Field | Type | Description |
|
|
||||||
| ---------- | ----- | ------------------------------- |
|
|
||||||
| `reason` | `str` | Why escalation is needed |
|
|
||||||
| `context` | `str` | Additional context for the coder|
|
|
||||||
|
|
||||||
**Emitted by:** `EventLoopNode` when the LLM calls `escalate`.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Custom Events
|
|
||||||
|
|
||||||
### `custom`
|
|
||||||
|
|
||||||
User-defined events with arbitrary payloads. No schema enforced.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Subscription & Filtering
|
|
||||||
|
|
||||||
Events can be filtered when subscribing:
|
|
||||||
|
|
||||||
```python
|
|
||||||
bus.subscribe(
|
|
||||||
event_types=[EventType.TOOL_CALL_STARTED, EventType.TOOL_CALL_COMPLETED],
|
|
||||||
handler=my_handler,
|
|
||||||
filter_stream="default", # Only events from this stream
|
|
||||||
filter_node="planner", # Only events from this node
|
|
||||||
filter_execution="exec-uuid", # Only events from this execution
|
|
||||||
filter_graph="worker", # Only events from this graph
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
## Debug Event Logging
|
|
||||||
|
|
||||||
Set `HIVE_DEBUG_EVENTS=1` to write every published event to a JSONL file at `~/.hive/event_logs/<timestamp>.jsonl`. Each line is the full JSON serialization of an `AgentEvent`:
|
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"type": "tool_call_started",
|
|
||||||
"stream_id": "default",
|
|
||||||
"node_id": "planner",
|
|
||||||
"execution_id": "a1b2c3d4-...",
|
|
||||||
"graph_id": "worker",
|
|
||||||
"data": {"tool_use_id": "tu_1", "tool_name": "web_search", "tool_input": {"query": "..."}},
|
|
||||||
"timestamp": "2026-02-24T12:00:00.000000",
|
|
||||||
"correlation_id": null
|
|
||||||
}
|
|
||||||
```
|
|
||||||
@@ -1,171 +0,0 @@
|
|||||||
# Agent Runtime
|
|
||||||
|
|
||||||
Unified execution system for all Hive agents. Every agent — single-entry or multi-entry, headless or dashboard — runs through the same runtime stack.
|
|
||||||
|
|
||||||
## Topology
|
|
||||||
|
|
||||||
```
|
|
||||||
AgentRunner.load(agent_path)
|
|
||||||
|
|
|
||||||
AgentRunner
|
|
||||||
(factory + public API)
|
|
||||||
|
|
|
||||||
_setup_agent_runtime()
|
|
||||||
|
|
|
||||||
AgentRuntime
|
|
||||||
(lifecycle + orchestration)
|
|
||||||
/ | \
|
|
||||||
Stream A Stream B Stream C ← one per entry point
|
|
||||||
| | |
|
|
||||||
GraphExecutor GraphExecutor GraphExecutor
|
|
||||||
| | |
|
|
||||||
Node → Node → Node (graph traversal)
|
|
||||||
```
|
|
||||||
|
|
||||||
Single-entry agents get a `"default"` entry point automatically. There is no separate code path.
|
|
||||||
|
|
||||||
## Components
|
|
||||||
|
|
||||||
| Component | File | Role |
|
|
||||||
|---|---|---|
|
|
||||||
| `AgentRunner` | `runner/runner.py` | Load agents, configure tools/LLM, expose high-level API |
|
|
||||||
| `AgentRuntime` | `runtime/agent_runtime.py` | Lifecycle management, entry point routing, event bus |
|
|
||||||
| `ExecutionStream` | `runtime/execution_stream.py` | Per-entry-point execution queue, session persistence |
|
|
||||||
| `GraphExecutor` | `graph/executor.py` | Node traversal, tool dispatch, checkpointing |
|
|
||||||
| `EventBus` | `runtime/event_bus.py` | Pub/sub for execution events (streaming, I/O) |
|
|
||||||
| `SharedBufferManager` | `runtime/shared_state.py` | Cross-stream state with isolation levels |
|
|
||||||
| `OutcomeAggregator` | `runtime/outcome_aggregator.py` | Goal progress tracking across streams |
|
|
||||||
| `SessionStore` | `storage/session_store.py` | Session state persistence (`sessions/{id}/state.json`) |
|
|
||||||
|
|
||||||
## Programming Interface
|
|
||||||
|
|
||||||
### AgentRunner (high-level)
|
|
||||||
|
|
||||||
```python
|
|
||||||
from framework.runner import AgentRunner
|
|
||||||
|
|
||||||
# Load and run
|
|
||||||
runner = AgentRunner.load("exports/my_agent", model="anthropic/claude-sonnet-4-20250514")
|
|
||||||
result = await runner.run({"query": "hello"})
|
|
||||||
|
|
||||||
# Resume from paused session
|
|
||||||
result = await runner.run({"query": "continue"}, session_state=saved_state)
|
|
||||||
|
|
||||||
# Lifecycle
|
|
||||||
await runner.start() # Start the runtime
|
|
||||||
await runner.stop() # Stop the runtime
|
|
||||||
exec_id = await runner.trigger("default", {}) # Non-blocking trigger
|
|
||||||
entry_points = runner.get_entry_points() # List entry points
|
|
||||||
|
|
||||||
# Context manager
|
|
||||||
async with AgentRunner.load("exports/my_agent") as runner:
|
|
||||||
result = await runner.run({"query": "hello"})
|
|
||||||
|
|
||||||
# Cleanup
|
|
||||||
runner.cleanup() # Synchronous
|
|
||||||
await runner.cleanup_async() # Asynchronous
|
|
||||||
```
|
|
||||||
|
|
||||||
### AgentRuntime (lower-level)
|
|
||||||
|
|
||||||
```python
|
|
||||||
from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
|
|
||||||
from framework.runtime.execution_stream import EntryPointSpec
|
|
||||||
|
|
||||||
# Create runtime with entry points
|
|
||||||
runtime = create_agent_runtime(
|
|
||||||
graph=graph,
|
|
||||||
goal=goal,
|
|
||||||
storage_path=Path("~/.hive/agents/my_agent"),
|
|
||||||
entry_points=[
|
|
||||||
EntryPointSpec(id="default", name="Default", entry_node="start", trigger_type="manual"),
|
|
||||||
],
|
|
||||||
llm=llm,
|
|
||||||
tools=tools,
|
|
||||||
tool_executor=tool_executor,
|
|
||||||
checkpoint_config=checkpoint_config,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Lifecycle
|
|
||||||
await runtime.start()
|
|
||||||
await runtime.stop()
|
|
||||||
|
|
||||||
# Execution
|
|
||||||
exec_id = await runtime.trigger("default", {"query": "hello"}) # Non-blocking
|
|
||||||
result = await runtime.trigger_and_wait("default", {"query": "hello"}) # Blocking
|
|
||||||
result = await runtime.trigger_and_wait("default", {}, session_state=state) # Resume
|
|
||||||
|
|
||||||
# Client-facing node I/O
|
|
||||||
await runtime.inject_input(node_id="chat", content="user response")
|
|
||||||
|
|
||||||
# Events
|
|
||||||
sub_id = runtime.subscribe_to_events(
|
|
||||||
event_types=[EventType.CLIENT_OUTPUT_DELTA],
|
|
||||||
handler=my_handler,
|
|
||||||
)
|
|
||||||
runtime.unsubscribe_from_events(sub_id)
|
|
||||||
|
|
||||||
# Inspection
|
|
||||||
runtime.is_running # bool
|
|
||||||
runtime.event_bus # EventBus
|
|
||||||
runtime.state_manager # SharedBufferManager
|
|
||||||
runtime.get_stats() # Runtime statistics
|
|
||||||
```
|
|
||||||
|
|
||||||
## Execution Flow
|
|
||||||
|
|
||||||
1. `AgentRunner.run()` calls `AgentRuntime.trigger_and_wait()`
|
|
||||||
2. `AgentRuntime` routes to the `ExecutionStream` for the entry point
|
|
||||||
3. `ExecutionStream` creates a `GraphExecutor` and calls `execute()`
|
|
||||||
4. `GraphExecutor` traverses nodes, dispatches tools, manages checkpoints
|
|
||||||
5. `ExecutionResult` flows back up through the stack
|
|
||||||
6. `ExecutionStream` writes session state to disk
|
|
||||||
|
|
||||||
## Session Resume
|
|
||||||
|
|
||||||
All execution paths support session resume:
|
|
||||||
|
|
||||||
```python
|
|
||||||
# First run (agent pauses at a client-facing node)
|
|
||||||
result = await runner.run({"query": "start task"})
|
|
||||||
# result.paused_at = "review-node"
|
|
||||||
# result.session_state = {"memory": {...}, "paused_at": "review-node", ...}
|
|
||||||
|
|
||||||
# Resume
|
|
||||||
result = await runner.run({"input": "approved"}, session_state=result.session_state)
|
|
||||||
```
|
|
||||||
|
|
||||||
Session state flows: `AgentRunner.run()` → `AgentRuntime.trigger_and_wait()` → `ExecutionStream.execute()` → `GraphExecutor.execute()`.
|
|
||||||
|
|
||||||
Checkpoints are saved at node boundaries (`sessions/{id}/checkpoints/`) for crash recovery.
|
|
||||||
|
|
||||||
## Event Bus
|
|
||||||
|
|
||||||
The `EventBus` provides real-time execution visibility:
|
|
||||||
|
|
||||||
| Event | When |
|
|
||||||
|---|---|
|
|
||||||
| `NODE_STARTED` | Node begins execution |
|
|
||||||
| `NODE_COMPLETED` | Node finishes |
|
|
||||||
| `TOOL_CALL_STARTED` | Tool invocation begins |
|
|
||||||
| `TOOL_CALL_COMPLETED` | Tool invocation finishes |
|
|
||||||
| `CLIENT_OUTPUT_DELTA` | Agent streams text to user |
|
|
||||||
| `CLIENT_INPUT_REQUESTED` | Agent needs user input |
|
|
||||||
| `EXECUTION_COMPLETED` | Full execution finishes |
|
|
||||||
|
|
||||||
In headless mode, `AgentRunner` subscribes to `CLIENT_OUTPUT_DELTA` and `CLIENT_INPUT_REQUESTED` to print output and read stdin. The web dashboard subscribes to route events to the frontend.
|
|
||||||
|
|
||||||
## Storage Layout
|
|
||||||
|
|
||||||
```
|
|
||||||
~/.hive/agents/{agent_name}/
|
|
||||||
sessions/
|
|
||||||
session_YYYYMMDD_HHMMSS_{uuid}/
|
|
||||||
state.json # Session state (status, memory, progress)
|
|
||||||
checkpoints/ # Node-boundary snapshots
|
|
||||||
logs/
|
|
||||||
summary.json # Execution summary
|
|
||||||
details.jsonl # Detailed event log
|
|
||||||
tool_logs.jsonl # Tool call log
|
|
||||||
runtime_logs/ # Cross-session runtime logs
|
|
||||||
```
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
"""Runtime core for agent execution."""
|
|
||||||
|
|
||||||
from framework.runtime.core import Runtime
|
|
||||||
|
|
||||||
__all__ = ["Runtime"]
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
"""Tests for runtime components."""
|
|
||||||
@@ -1,869 +0,0 @@
|
|||||||
"""
|
|
||||||
Tests for AgentRuntime and multi-entry-point execution.
|
|
||||||
|
|
||||||
Tests:
|
|
||||||
1. AgentRuntime creation and lifecycle
|
|
||||||
2. Entry point registration
|
|
||||||
3. Concurrent executions across streams
|
|
||||||
4. SharedBufferManager isolation levels
|
|
||||||
5. OutcomeAggregator goal evaluation
|
|
||||||
6. EventBus pub/sub
|
|
||||||
"""
|
|
||||||
|
|
||||||
import asyncio
|
|
||||||
import tempfile
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
from framework.graph import Goal
|
|
||||||
from framework.graph.edge import EdgeCondition, EdgeSpec, GraphSpec
|
|
||||||
from framework.graph.goal import Constraint, SuccessCriterion
|
|
||||||
from framework.graph.node import NodeSpec
|
|
||||||
from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime
|
|
||||||
from framework.runtime.event_bus import AgentEvent, EventBus, EventType
|
|
||||||
from framework.runtime.execution_stream import EntryPointSpec
|
|
||||||
from framework.runtime.outcome_aggregator import OutcomeAggregator
|
|
||||||
from framework.runtime.shared_state import IsolationLevel, SharedBufferManager
|
|
||||||
from framework.schemas.session_state import SessionState, SessionTimestamps
|
|
||||||
|
|
||||||
# === Test Fixtures ===
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def sample_goal():
|
|
||||||
"""Create a sample goal for testing."""
|
|
||||||
return Goal(
|
|
||||||
id="test-goal",
|
|
||||||
name="Test Goal",
|
|
||||||
description="A goal for testing multi-entry-point execution",
|
|
||||||
success_criteria=[
|
|
||||||
SuccessCriterion(
|
|
||||||
id="sc-1",
|
|
||||||
description="Process all requests",
|
|
||||||
metric="requests_processed",
|
|
||||||
target="100%",
|
|
||||||
weight=1.0,
|
|
||||||
),
|
|
||||||
],
|
|
||||||
constraints=[
|
|
||||||
Constraint(
|
|
||||||
id="c-1",
|
|
||||||
description="Must not exceed rate limits",
|
|
||||||
constraint_type="hard",
|
|
||||||
category="operational",
|
|
||||||
),
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def sample_graph():
|
|
||||||
"""Create a sample graph with multiple entry points."""
|
|
||||||
nodes = [
|
|
||||||
NodeSpec(
|
|
||||||
id="process-webhook",
|
|
||||||
name="Process Webhook",
|
|
||||||
description="Process incoming webhook",
|
|
||||||
node_type="event_loop",
|
|
||||||
input_keys=["webhook_data"],
|
|
||||||
output_keys=["result"],
|
|
||||||
),
|
|
||||||
NodeSpec(
|
|
||||||
id="process-api",
|
|
||||||
name="Process API Request",
|
|
||||||
description="Process API request",
|
|
||||||
node_type="event_loop",
|
|
||||||
input_keys=["request_data"],
|
|
||||||
output_keys=["result"],
|
|
||||||
),
|
|
||||||
NodeSpec(
|
|
||||||
id="complete",
|
|
||||||
name="Complete",
|
|
||||||
description="Execution complete",
|
|
||||||
node_type="terminal",
|
|
||||||
input_keys=["result"],
|
|
||||||
output_keys=["final_result"],
|
|
||||||
),
|
|
||||||
]
|
|
||||||
|
|
||||||
edges = [
|
|
||||||
EdgeSpec(
|
|
||||||
id="webhook-to-complete",
|
|
||||||
source="process-webhook",
|
|
||||||
target="complete",
|
|
||||||
condition=EdgeCondition.ON_SUCCESS,
|
|
||||||
),
|
|
||||||
EdgeSpec(
|
|
||||||
id="api-to-complete",
|
|
||||||
source="process-api",
|
|
||||||
target="complete",
|
|
||||||
condition=EdgeCondition.ON_SUCCESS,
|
|
||||||
),
|
|
||||||
]
|
|
||||||
|
|
||||||
return GraphSpec(
|
|
||||||
id="test-graph",
|
|
||||||
goal_id="test-goal",
|
|
||||||
version="1.0.0",
|
|
||||||
entry_node="process-webhook",
|
|
||||||
entry_points={"start": "process-webhook"},
|
|
||||||
terminal_nodes=["complete"],
|
|
||||||
pause_nodes=[],
|
|
||||||
nodes=nodes,
|
|
||||||
edges=edges,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def temp_storage():
|
|
||||||
"""Create a temporary storage directory."""
|
|
||||||
with tempfile.TemporaryDirectory() as tmpdir:
|
|
||||||
yield Path(tmpdir)
|
|
||||||
|
|
||||||
|
|
||||||
# === SharedBufferManager Tests ===
|
|
||||||
|
|
||||||
|
|
||||||
class TestSharedBufferManager:
|
|
||||||
"""Tests for SharedBufferManager."""
|
|
||||||
|
|
||||||
def test_create_buffer(self):
|
|
||||||
"""Test creating execution-scoped buffer."""
|
|
||||||
manager = SharedBufferManager()
|
|
||||||
buffer = manager.create_buffer(
|
|
||||||
execution_id="exec-1",
|
|
||||||
stream_id="webhook",
|
|
||||||
isolation=IsolationLevel.SHARED,
|
|
||||||
)
|
|
||||||
assert buffer is not None
|
|
||||||
assert buffer._execution_id == "exec-1"
|
|
||||||
assert buffer._stream_id == "webhook"
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_isolated_state(self):
|
|
||||||
"""Test isolated state doesn't leak between executions."""
|
|
||||||
manager = SharedBufferManager()
|
|
||||||
|
|
||||||
buf1 = manager.create_buffer("exec-1", "stream-1", IsolationLevel.ISOLATED)
|
|
||||||
buf2 = manager.create_buffer("exec-2", "stream-1", IsolationLevel.ISOLATED)
|
|
||||||
|
|
||||||
await buf1.write("key", "value1")
|
|
||||||
await buf2.write("key", "value2")
|
|
||||||
|
|
||||||
assert await buf1.read("key") == "value1"
|
|
||||||
assert await buf2.read("key") == "value2"
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_shared_state(self):
|
|
||||||
"""Test shared state is visible across executions."""
|
|
||||||
manager = SharedBufferManager()
|
|
||||||
|
|
||||||
manager.create_buffer("exec-1", "stream-1", IsolationLevel.SHARED)
|
|
||||||
manager.create_buffer("exec-2", "stream-1", IsolationLevel.SHARED)
|
|
||||||
|
|
||||||
# Write to global scope
|
|
||||||
await manager.write(
|
|
||||||
key="global_key",
|
|
||||||
value="global_value",
|
|
||||||
execution_id="exec-1",
|
|
||||||
stream_id="stream-1",
|
|
||||||
isolation=IsolationLevel.SHARED,
|
|
||||||
scope="global",
|
|
||||||
)
|
|
||||||
|
|
||||||
# Both should see it
|
|
||||||
value1 = await manager.read("global_key", "exec-1", "stream-1", IsolationLevel.SHARED)
|
|
||||||
value2 = await manager.read("global_key", "exec-2", "stream-1", IsolationLevel.SHARED)
|
|
||||||
|
|
||||||
assert value1 == "global_value"
|
|
||||||
assert value2 == "global_value"
|
|
||||||
|
|
||||||
def test_cleanup_execution(self):
|
|
||||||
"""Test execution cleanup removes state."""
|
|
||||||
manager = SharedBufferManager()
|
|
||||||
manager.create_buffer("exec-1", "stream-1", IsolationLevel.ISOLATED)
|
|
||||||
|
|
||||||
assert "exec-1" in manager._execution_state
|
|
||||||
|
|
||||||
manager.cleanup_execution("exec-1")
|
|
||||||
|
|
||||||
assert "exec-1" not in manager._execution_state
|
|
||||||
|
|
||||||
|
|
||||||
class TestSessionState:
|
|
||||||
"""Tests for session state data-buffer compatibility."""
|
|
||||||
|
|
||||||
def test_legacy_memory_alias_populates_data_buffer(self):
|
|
||||||
"""Legacy `memory` payloads should still hydrate the session buffer."""
|
|
||||||
state = SessionState(
|
|
||||||
session_id="session-1",
|
|
||||||
goal_id="goal-1",
|
|
||||||
timestamps=SessionTimestamps(
|
|
||||||
started_at="2026-01-01T00:00:00",
|
|
||||||
updated_at="2026-01-01T00:00:00",
|
|
||||||
),
|
|
||||||
memory={"rules": "keep starred mail"},
|
|
||||||
)
|
|
||||||
|
|
||||||
assert state.data_buffer == {"rules": "keep starred mail"}
|
|
||||||
assert state.memory == {"rules": "keep starred mail"}
|
|
||||||
assert state.to_session_state_dict()["data_buffer"] == {"rules": "keep starred mail"}
|
|
||||||
|
|
||||||
|
|
||||||
# === EventBus Tests ===
|
|
||||||
|
|
||||||
|
|
||||||
class TestEventBus:
|
|
||||||
"""Tests for EventBus pub/sub."""
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_publish_subscribe(self):
|
|
||||||
"""Test basic publish/subscribe."""
|
|
||||||
bus = EventBus()
|
|
||||||
received_events = []
|
|
||||||
|
|
||||||
async def handler(event: AgentEvent):
|
|
||||||
received_events.append(event)
|
|
||||||
|
|
||||||
bus.subscribe(
|
|
||||||
event_types=[EventType.EXECUTION_STARTED],
|
|
||||||
handler=handler,
|
|
||||||
)
|
|
||||||
|
|
||||||
await bus.publish(
|
|
||||||
AgentEvent(
|
|
||||||
type=EventType.EXECUTION_STARTED,
|
|
||||||
stream_id="webhook",
|
|
||||||
execution_id="exec-1",
|
|
||||||
data={"test": "data"},
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
# Allow handler to run
|
|
||||||
await asyncio.sleep(0.1)
|
|
||||||
|
|
||||||
assert len(received_events) == 1
|
|
||||||
assert received_events[0].type == EventType.EXECUTION_STARTED
|
|
||||||
assert received_events[0].stream_id == "webhook"
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_stream_filter(self):
|
|
||||||
"""Test filtering by stream ID."""
|
|
||||||
bus = EventBus()
|
|
||||||
received_events = []
|
|
||||||
|
|
||||||
async def handler(event: AgentEvent):
|
|
||||||
received_events.append(event)
|
|
||||||
|
|
||||||
bus.subscribe(
|
|
||||||
event_types=[EventType.EXECUTION_STARTED],
|
|
||||||
handler=handler,
|
|
||||||
filter_stream="webhook",
|
|
||||||
)
|
|
||||||
|
|
||||||
# Publish to webhook stream (should be received)
|
|
||||||
await bus.publish(
|
|
||||||
AgentEvent(
|
|
||||||
type=EventType.EXECUTION_STARTED,
|
|
||||||
stream_id="webhook",
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
# Publish to api stream (should NOT be received)
|
|
||||||
await bus.publish(
|
|
||||||
AgentEvent(
|
|
||||||
type=EventType.EXECUTION_STARTED,
|
|
||||||
stream_id="api",
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
await asyncio.sleep(0.1)
|
|
||||||
|
|
||||||
assert len(received_events) == 1
|
|
||||||
assert received_events[0].stream_id == "webhook"
|
|
||||||
|
|
||||||
def test_unsubscribe(self):
|
|
||||||
"""Test unsubscribing from events."""
|
|
||||||
bus = EventBus()
|
|
||||||
|
|
||||||
async def handler(event: AgentEvent):
|
|
||||||
pass
|
|
||||||
|
|
||||||
sub_id = bus.subscribe(
|
|
||||||
event_types=[EventType.EXECUTION_STARTED],
|
|
||||||
handler=handler,
|
|
||||||
)
|
|
||||||
|
|
||||||
assert sub_id in bus._subscriptions
|
|
||||||
|
|
||||||
result = bus.unsubscribe(sub_id)
|
|
||||||
|
|
||||||
assert result is True
|
|
||||||
assert sub_id not in bus._subscriptions
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_wait_for(self):
|
|
||||||
"""Test waiting for a specific event."""
|
|
||||||
bus = EventBus()
|
|
||||||
|
|
||||||
# Start waiting in background
|
|
||||||
async def wait_and_check():
|
|
||||||
event = await bus.wait_for(
|
|
||||||
event_type=EventType.EXECUTION_COMPLETED,
|
|
||||||
timeout=1.0,
|
|
||||||
)
|
|
||||||
return event
|
|
||||||
|
|
||||||
wait_task = asyncio.create_task(wait_and_check())
|
|
||||||
|
|
||||||
# Publish the event
|
|
||||||
await asyncio.sleep(0.1)
|
|
||||||
await bus.publish(
|
|
||||||
AgentEvent(
|
|
||||||
type=EventType.EXECUTION_COMPLETED,
|
|
||||||
stream_id="webhook",
|
|
||||||
execution_id="exec-1",
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
event = await wait_task
|
|
||||||
|
|
||||||
assert event is not None
|
|
||||||
assert event.type == EventType.EXECUTION_COMPLETED
|
|
||||||
|
|
||||||
|
|
||||||
# === OutcomeAggregator Tests ===
|
|
||||||
|
|
||||||
|
|
||||||
class TestOutcomeAggregator:
|
|
||||||
"""Tests for OutcomeAggregator."""
|
|
||||||
|
|
||||||
def test_record_decision(self, sample_goal):
|
|
||||||
"""Test recording decisions."""
|
|
||||||
aggregator = OutcomeAggregator(sample_goal)
|
|
||||||
|
|
||||||
from framework.schemas.decision import Decision, DecisionType
|
|
||||||
|
|
||||||
decision = Decision(
|
|
||||||
id="dec-1",
|
|
||||||
node_id="process-webhook",
|
|
||||||
intent="Process incoming webhook",
|
|
||||||
decision_type=DecisionType.PATH_CHOICE,
|
|
||||||
options=[],
|
|
||||||
chosen_option_id="opt-1",
|
|
||||||
reasoning="Standard processing path",
|
|
||||||
)
|
|
||||||
|
|
||||||
aggregator.record_decision("webhook", "exec-1", decision)
|
|
||||||
|
|
||||||
assert aggregator._total_decisions == 1
|
|
||||||
assert len(aggregator._decisions) == 1
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_evaluate_goal_progress(self, sample_goal):
|
|
||||||
"""Test goal progress evaluation."""
|
|
||||||
aggregator = OutcomeAggregator(sample_goal)
|
|
||||||
|
|
||||||
progress = await aggregator.evaluate_goal_progress()
|
|
||||||
|
|
||||||
assert "overall_progress" in progress
|
|
||||||
assert "criteria_status" in progress
|
|
||||||
assert "constraint_violations" in progress
|
|
||||||
assert "recommendation" in progress
|
|
||||||
|
|
||||||
def test_record_constraint_violation(self, sample_goal):
|
|
||||||
"""Test recording constraint violations."""
|
|
||||||
aggregator = OutcomeAggregator(sample_goal)
|
|
||||||
|
|
||||||
aggregator.record_constraint_violation(
|
|
||||||
constraint_id="c-1",
|
|
||||||
description="Rate limit exceeded",
|
|
||||||
violation_details="More than 100 requests/minute",
|
|
||||||
stream_id="webhook",
|
|
||||||
execution_id="exec-1",
|
|
||||||
)
|
|
||||||
|
|
||||||
assert len(aggregator._constraint_violations) == 1
|
|
||||||
assert aggregator._constraint_violations[0].constraint_id == "c-1"
|
|
||||||
|
|
||||||
|
|
||||||
# === AgentRuntime Tests ===
|
|
||||||
|
|
||||||
|
|
||||||
class TestAgentRuntime:
|
|
||||||
"""Tests for AgentRuntime orchestration."""
|
|
||||||
|
|
||||||
def test_register_entry_point(self, sample_graph, sample_goal, temp_storage):
|
|
||||||
"""Test registering entry points."""
|
|
||||||
runtime = AgentRuntime(
|
|
||||||
graph=sample_graph,
|
|
||||||
goal=sample_goal,
|
|
||||||
storage_path=temp_storage,
|
|
||||||
)
|
|
||||||
|
|
||||||
entry_spec = EntryPointSpec(
|
|
||||||
id="manual",
|
|
||||||
name="Manual Trigger",
|
|
||||||
entry_node="process-webhook",
|
|
||||||
trigger_type="manual",
|
|
||||||
)
|
|
||||||
|
|
||||||
runtime.register_entry_point(entry_spec)
|
|
||||||
|
|
||||||
assert "manual" in runtime._entry_points
|
|
||||||
assert len(runtime.get_entry_points()) == 1
|
|
||||||
|
|
||||||
def test_register_duplicate_entry_point_fails(self, sample_graph, sample_goal, temp_storage):
|
|
||||||
"""Test that duplicate entry point IDs fail."""
|
|
||||||
runtime = AgentRuntime(
|
|
||||||
graph=sample_graph,
|
|
||||||
goal=sample_goal,
|
|
||||||
storage_path=temp_storage,
|
|
||||||
)
|
|
||||||
|
|
||||||
entry_spec = EntryPointSpec(
|
|
||||||
id="webhook",
|
|
||||||
name="Webhook Handler",
|
|
||||||
entry_node="process-webhook",
|
|
||||||
trigger_type="webhook",
|
|
||||||
)
|
|
||||||
|
|
||||||
runtime.register_entry_point(entry_spec)
|
|
||||||
|
|
||||||
with pytest.raises(ValueError, match="already registered"):
|
|
||||||
runtime.register_entry_point(entry_spec)
|
|
||||||
|
|
||||||
def test_register_invalid_entry_node_fails(self, sample_graph, sample_goal, temp_storage):
|
|
||||||
"""Test that invalid entry nodes fail."""
|
|
||||||
runtime = AgentRuntime(
|
|
||||||
graph=sample_graph,
|
|
||||||
goal=sample_goal,
|
|
||||||
storage_path=temp_storage,
|
|
||||||
)
|
|
||||||
|
|
||||||
entry_spec = EntryPointSpec(
|
|
||||||
id="invalid",
|
|
||||||
name="Invalid Entry",
|
|
||||||
entry_node="nonexistent-node",
|
|
||||||
trigger_type="manual",
|
|
||||||
)
|
|
||||||
|
|
||||||
with pytest.raises(ValueError, match="not found in graph"):
|
|
||||||
runtime.register_entry_point(entry_spec)
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_start_stop_lifecycle(self, sample_graph, sample_goal, temp_storage):
|
|
||||||
"""Test runtime start/stop lifecycle."""
|
|
||||||
runtime = AgentRuntime(
|
|
||||||
graph=sample_graph,
|
|
||||||
goal=sample_goal,
|
|
||||||
storage_path=temp_storage,
|
|
||||||
)
|
|
||||||
|
|
||||||
entry_spec = EntryPointSpec(
|
|
||||||
id="webhook",
|
|
||||||
name="Webhook Handler",
|
|
||||||
entry_node="process-webhook",
|
|
||||||
trigger_type="webhook",
|
|
||||||
)
|
|
||||||
|
|
||||||
runtime.register_entry_point(entry_spec)
|
|
||||||
|
|
||||||
assert not runtime.is_running
|
|
||||||
|
|
||||||
await runtime.start()
|
|
||||||
|
|
||||||
assert runtime.is_running
|
|
||||||
assert "webhook" in runtime._streams
|
|
||||||
|
|
||||||
await runtime.stop()
|
|
||||||
|
|
||||||
assert not runtime.is_running
|
|
||||||
assert len(runtime._streams) == 0
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_trigger_requires_running(self, sample_graph, sample_goal, temp_storage):
|
|
||||||
"""Test that trigger fails if runtime not running."""
|
|
||||||
runtime = AgentRuntime(
|
|
||||||
graph=sample_graph,
|
|
||||||
goal=sample_goal,
|
|
||||||
storage_path=temp_storage,
|
|
||||||
)
|
|
||||||
|
|
||||||
entry_spec = EntryPointSpec(
|
|
||||||
id="webhook",
|
|
||||||
name="Webhook Handler",
|
|
||||||
entry_node="process-webhook",
|
|
||||||
trigger_type="webhook",
|
|
||||||
)
|
|
||||||
|
|
||||||
runtime.register_entry_point(entry_spec)
|
|
||||||
|
|
||||||
with pytest.raises(RuntimeError, match="not running"):
|
|
||||||
await runtime.trigger("webhook", {"test": "data"})
|
|
||||||
|
|
||||||
|
|
||||||
# === GraphSpec Validation Tests ===
|
|
||||||
|
|
||||||
|
|
||||||
# === Integration Tests ===
|
|
||||||
|
|
||||||
|
|
||||||
class TestCreateAgentRuntime:
|
|
||||||
"""Tests for the create_agent_runtime factory."""
|
|
||||||
|
|
||||||
def test_create_with_entry_points(self, sample_graph, sample_goal, temp_storage):
|
|
||||||
"""Test factory creates runtime with entry points."""
|
|
||||||
entry_points = [
|
|
||||||
EntryPointSpec(
|
|
||||||
id="webhook",
|
|
||||||
name="Webhook",
|
|
||||||
entry_node="process-webhook",
|
|
||||||
trigger_type="webhook",
|
|
||||||
),
|
|
||||||
EntryPointSpec(
|
|
||||||
id="api",
|
|
||||||
name="API",
|
|
||||||
entry_node="process-api",
|
|
||||||
trigger_type="api",
|
|
||||||
),
|
|
||||||
]
|
|
||||||
|
|
||||||
runtime = create_agent_runtime(
|
|
||||||
graph=sample_graph,
|
|
||||||
goal=sample_goal,
|
|
||||||
storage_path=temp_storage,
|
|
||||||
entry_points=entry_points,
|
|
||||||
)
|
|
||||||
|
|
||||||
assert len(runtime.get_entry_points()) == 2
|
|
||||||
assert "webhook" in runtime._entry_points
|
|
||||||
assert "api" in runtime._entry_points
|
|
||||||
|
|
||||||
|
|
||||||
# === Timer Entry Point Tests ===
|
|
||||||
|
|
||||||
|
|
||||||
class TestTimerEntryPoints:
|
|
||||||
"""Tests for timer-driven entry points (interval and cron)."""
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_interval_timer_starts_task(self, sample_graph, sample_goal, temp_storage):
|
|
||||||
"""Test that interval_minutes timer creates an async task."""
|
|
||||||
runtime = AgentRuntime(
|
|
||||||
graph=sample_graph,
|
|
||||||
goal=sample_goal,
|
|
||||||
storage_path=temp_storage,
|
|
||||||
)
|
|
||||||
|
|
||||||
entry_spec = EntryPointSpec(
|
|
||||||
id="timer-interval",
|
|
||||||
name="Interval Timer",
|
|
||||||
entry_node="process-webhook",
|
|
||||||
trigger_type="timer",
|
|
||||||
trigger_config={"interval_minutes": 60},
|
|
||||||
)
|
|
||||||
runtime.register_entry_point(entry_spec)
|
|
||||||
|
|
||||||
await runtime.start()
|
|
||||||
try:
|
|
||||||
assert len(runtime._timer_tasks) == 1
|
|
||||||
assert not runtime._timer_tasks[0].done()
|
|
||||||
# Give the async task a moment to set next_fire
|
|
||||||
await asyncio.sleep(0.05)
|
|
||||||
assert "timer-interval" in runtime._timer_next_fire
|
|
||||||
finally:
|
|
||||||
await runtime.stop()
|
|
||||||
|
|
||||||
assert len(runtime._timer_tasks) == 0
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_cron_timer_starts_task(self, sample_graph, sample_goal, temp_storage):
|
|
||||||
"""Test that cron expression timer creates an async task."""
|
|
||||||
runtime = AgentRuntime(
|
|
||||||
graph=sample_graph,
|
|
||||||
goal=sample_goal,
|
|
||||||
storage_path=temp_storage,
|
|
||||||
)
|
|
||||||
|
|
||||||
entry_spec = EntryPointSpec(
|
|
||||||
id="timer-cron",
|
|
||||||
name="Cron Timer",
|
|
||||||
entry_node="process-webhook",
|
|
||||||
trigger_type="timer",
|
|
||||||
trigger_config={"cron": "*/5 * * * *"}, # Every 5 minutes
|
|
||||||
)
|
|
||||||
runtime.register_entry_point(entry_spec)
|
|
||||||
|
|
||||||
await runtime.start()
|
|
||||||
try:
|
|
||||||
assert len(runtime._timer_tasks) == 1
|
|
||||||
assert not runtime._timer_tasks[0].done()
|
|
||||||
# Give the async task a moment to set next_fire
|
|
||||||
await asyncio.sleep(0.05)
|
|
||||||
assert "timer-cron" in runtime._timer_next_fire
|
|
||||||
finally:
|
|
||||||
await runtime.stop()
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_invalid_cron_expression_skipped(
|
|
||||||
self, sample_graph, sample_goal, temp_storage, caplog
|
|
||||||
):
|
|
||||||
"""Test that an invalid cron expression logs a warning and skips."""
|
|
||||||
runtime = AgentRuntime(
|
|
||||||
graph=sample_graph,
|
|
||||||
goal=sample_goal,
|
|
||||||
storage_path=temp_storage,
|
|
||||||
)
|
|
||||||
|
|
||||||
entry_spec = EntryPointSpec(
|
|
||||||
id="timer-bad-cron",
|
|
||||||
name="Bad Cron Timer",
|
|
||||||
entry_node="process-webhook",
|
|
||||||
trigger_type="timer",
|
|
||||||
trigger_config={"cron": "not a cron expression"},
|
|
||||||
)
|
|
||||||
runtime.register_entry_point(entry_spec)
|
|
||||||
|
|
||||||
await runtime.start()
|
|
||||||
try:
|
|
||||||
assert len(runtime._timer_tasks) == 0
|
|
||||||
assert "invalid cron" in caplog.text.lower() or "Invalid cron" in caplog.text
|
|
||||||
finally:
|
|
||||||
await runtime.stop()
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_cron_takes_priority_over_interval(
|
|
||||||
self, sample_graph, sample_goal, temp_storage, caplog
|
|
||||||
):
|
|
||||||
"""Test that when both cron and interval_minutes are set, cron wins."""
|
|
||||||
import logging
|
|
||||||
|
|
||||||
runtime = AgentRuntime(
|
|
||||||
graph=sample_graph,
|
|
||||||
goal=sample_goal,
|
|
||||||
storage_path=temp_storage,
|
|
||||||
)
|
|
||||||
|
|
||||||
entry_spec = EntryPointSpec(
|
|
||||||
id="timer-both",
|
|
||||||
name="Both Timer",
|
|
||||||
entry_node="process-webhook",
|
|
||||||
trigger_type="timer",
|
|
||||||
trigger_config={"cron": "0 9 * * *", "interval_minutes": 30},
|
|
||||||
)
|
|
||||||
runtime.register_entry_point(entry_spec)
|
|
||||||
|
|
||||||
with caplog.at_level(logging.INFO):
|
|
||||||
await runtime.start()
|
|
||||||
try:
|
|
||||||
assert len(runtime._timer_tasks) == 1
|
|
||||||
# Should log cron, not interval
|
|
||||||
assert any("cron" in r.message.lower() for r in caplog.records)
|
|
||||||
finally:
|
|
||||||
await runtime.stop()
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_no_interval_or_cron_warns(self, sample_graph, sample_goal, temp_storage, caplog):
|
|
||||||
"""Test that timer with neither cron nor interval_minutes logs a warning."""
|
|
||||||
runtime = AgentRuntime(
|
|
||||||
graph=sample_graph,
|
|
||||||
goal=sample_goal,
|
|
||||||
storage_path=temp_storage,
|
|
||||||
)
|
|
||||||
|
|
||||||
entry_spec = EntryPointSpec(
|
|
||||||
id="timer-empty",
|
|
||||||
name="Empty Timer",
|
|
||||||
entry_node="process-webhook",
|
|
||||||
trigger_type="timer",
|
|
||||||
trigger_config={},
|
|
||||||
)
|
|
||||||
runtime.register_entry_point(entry_spec)
|
|
||||||
|
|
||||||
await runtime.start()
|
|
||||||
try:
|
|
||||||
assert len(runtime._timer_tasks) == 0
|
|
||||||
assert "no 'cron' or valid 'interval_minutes'" in caplog.text
|
|
||||||
finally:
|
|
||||||
await runtime.stop()
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_cron_immediate_fires_first(self, sample_graph, sample_goal, temp_storage):
|
|
||||||
"""Test that run_immediately=True with cron doesn't set next_fire before first run."""
|
|
||||||
runtime = AgentRuntime(
|
|
||||||
graph=sample_graph,
|
|
||||||
goal=sample_goal,
|
|
||||||
storage_path=temp_storage,
|
|
||||||
)
|
|
||||||
|
|
||||||
entry_spec = EntryPointSpec(
|
|
||||||
id="timer-cron-immediate",
|
|
||||||
name="Cron Immediate",
|
|
||||||
entry_node="process-webhook",
|
|
||||||
trigger_type="timer",
|
|
||||||
trigger_config={"cron": "0 0 * * *", "run_immediately": True},
|
|
||||||
)
|
|
||||||
runtime.register_entry_point(entry_spec)
|
|
||||||
|
|
||||||
await runtime.start()
|
|
||||||
try:
|
|
||||||
assert len(runtime._timer_tasks) == 1
|
|
||||||
# With run_immediately, the task enters the while loop directly,
|
|
||||||
# so _timer_next_fire is NOT set before the first trigger attempt
|
|
||||||
# (it pops it at the top of the loop)
|
|
||||||
# Give it a moment to start executing
|
|
||||||
await asyncio.sleep(0.05)
|
|
||||||
# Task should still be running (it will try to trigger and likely fail
|
|
||||||
# since there's no LLM, but the task itself continues)
|
|
||||||
assert not runtime._timer_tasks[0].done()
|
|
||||||
finally:
|
|
||||||
await runtime.stop()
|
|
||||||
|
|
||||||
|
|
||||||
# === Cancel All Tasks Tests ===
|
|
||||||
|
|
||||||
|
|
||||||
class TestCancelAllTasks:
|
|
||||||
"""Tests for cancel_all_tasks and cancel_all_tasks_async."""
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_cancel_all_tasks_async_returns_false_when_no_tasks(
|
|
||||||
self, sample_graph, sample_goal, temp_storage
|
|
||||||
):
|
|
||||||
"""Test that cancel_all_tasks_async returns False with no running tasks."""
|
|
||||||
runtime = AgentRuntime(
|
|
||||||
graph=sample_graph,
|
|
||||||
goal=sample_goal,
|
|
||||||
storage_path=temp_storage,
|
|
||||||
)
|
|
||||||
|
|
||||||
entry_spec = EntryPointSpec(
|
|
||||||
id="webhook",
|
|
||||||
name="Webhook",
|
|
||||||
entry_node="process-webhook",
|
|
||||||
trigger_type="webhook",
|
|
||||||
)
|
|
||||||
runtime.register_entry_point(entry_spec)
|
|
||||||
await runtime.start()
|
|
||||||
|
|
||||||
try:
|
|
||||||
result = await runtime.cancel_all_tasks_async()
|
|
||||||
assert result is False
|
|
||||||
finally:
|
|
||||||
await runtime.stop()
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_cancel_all_tasks_async_cancels_running_task(
|
|
||||||
self, sample_graph, sample_goal, temp_storage
|
|
||||||
):
|
|
||||||
"""Test that cancel_all_tasks_async cancels a running task and returns True."""
|
|
||||||
runtime = AgentRuntime(
|
|
||||||
graph=sample_graph,
|
|
||||||
goal=sample_goal,
|
|
||||||
storage_path=temp_storage,
|
|
||||||
)
|
|
||||||
|
|
||||||
entry_spec = EntryPointSpec(
|
|
||||||
id="webhook",
|
|
||||||
name="Webhook",
|
|
||||||
entry_node="process-webhook",
|
|
||||||
trigger_type="webhook",
|
|
||||||
)
|
|
||||||
runtime.register_entry_point(entry_spec)
|
|
||||||
await runtime.start()
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Inject a fake running task into the stream
|
|
||||||
stream = runtime._streams["webhook"]
|
|
||||||
|
|
||||||
async def hang_forever():
|
|
||||||
await asyncio.get_event_loop().create_future()
|
|
||||||
|
|
||||||
fake_task = asyncio.ensure_future(hang_forever())
|
|
||||||
stream._execution_tasks["fake-exec"] = fake_task
|
|
||||||
|
|
||||||
result = await runtime.cancel_all_tasks_async()
|
|
||||||
assert result is True
|
|
||||||
|
|
||||||
# Let the CancelledError propagate
|
|
||||||
try:
|
|
||||||
await fake_task
|
|
||||||
except asyncio.CancelledError:
|
|
||||||
pass
|
|
||||||
assert fake_task.cancelled()
|
|
||||||
|
|
||||||
# Clean up
|
|
||||||
del stream._execution_tasks["fake-exec"]
|
|
||||||
finally:
|
|
||||||
await runtime.stop()
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_cancel_all_tasks_async_cancels_multiple_tasks_across_streams(
|
|
||||||
self, sample_graph, sample_goal, temp_storage
|
|
||||||
):
|
|
||||||
"""Test that cancel_all_tasks_async cancels tasks across multiple streams."""
|
|
||||||
runtime = AgentRuntime(
|
|
||||||
graph=sample_graph,
|
|
||||||
goal=sample_goal,
|
|
||||||
storage_path=temp_storage,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Register two entry points so we get two streams
|
|
||||||
runtime.register_entry_point(
|
|
||||||
EntryPointSpec(
|
|
||||||
id="stream-a",
|
|
||||||
name="Stream A",
|
|
||||||
entry_node="process-webhook",
|
|
||||||
trigger_type="webhook",
|
|
||||||
)
|
|
||||||
)
|
|
||||||
runtime.register_entry_point(
|
|
||||||
EntryPointSpec(
|
|
||||||
id="stream-b",
|
|
||||||
name="Stream B",
|
|
||||||
entry_node="process-webhook",
|
|
||||||
trigger_type="webhook",
|
|
||||||
)
|
|
||||||
)
|
|
||||||
await runtime.start()
|
|
||||||
|
|
||||||
try:
|
|
||||||
|
|
||||||
async def hang_forever():
|
|
||||||
await asyncio.get_event_loop().create_future()
|
|
||||||
|
|
||||||
stream_a = runtime._streams["stream-a"]
|
|
||||||
stream_b = runtime._streams["stream-b"]
|
|
||||||
|
|
||||||
# Two tasks in stream A, one task in stream B
|
|
||||||
task_a1 = asyncio.ensure_future(hang_forever())
|
|
||||||
task_a2 = asyncio.ensure_future(hang_forever())
|
|
||||||
task_b1 = asyncio.ensure_future(hang_forever())
|
|
||||||
|
|
||||||
stream_a._execution_tasks["exec-a1"] = task_a1
|
|
||||||
stream_a._execution_tasks["exec-a2"] = task_a2
|
|
||||||
stream_b._execution_tasks["exec-b1"] = task_b1
|
|
||||||
|
|
||||||
result = await runtime.cancel_all_tasks_async()
|
|
||||||
assert result is True
|
|
||||||
|
|
||||||
# Let CancelledErrors propagate
|
|
||||||
for task in [task_a1, task_a2, task_b1]:
|
|
||||||
try:
|
|
||||||
await task
|
|
||||||
except asyncio.CancelledError:
|
|
||||||
pass
|
|
||||||
assert task.cancelled()
|
|
||||||
|
|
||||||
# Clean up
|
|
||||||
del stream_a._execution_tasks["exec-a1"]
|
|
||||||
del stream_a._execution_tasks["exec-a2"]
|
|
||||||
del stream_b._execution_tasks["exec-b1"]
|
|
||||||
finally:
|
|
||||||
await runtime.stop()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
pytest.main([__file__, "-v"])
|
|
||||||
@@ -1,268 +0,0 @@
|
|||||||
"""Tests for webhook idempotency key support in AgentRuntime.trigger()."""
|
|
||||||
|
|
||||||
import asyncio
|
|
||||||
import time
|
|
||||||
from collections import OrderedDict
|
|
||||||
from unittest.mock import AsyncMock, MagicMock
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
from framework.runtime.agent_runtime import AgentRuntime, AgentRuntimeConfig
|
|
||||||
|
|
||||||
|
|
||||||
def _make_runtime(ttl=300.0, max_keys=10000):
|
|
||||||
"""Create a minimal AgentRuntime with idempotency cache attributes.
|
|
||||||
|
|
||||||
Uses ``object.__new__`` to skip ``__init__`` and its heavy dependencies
|
|
||||||
(storage, LLM, skills) — we only need the cache and config for these tests.
|
|
||||||
"""
|
|
||||||
runtime = object.__new__(AgentRuntime)
|
|
||||||
runtime._config = AgentRuntimeConfig(idempotency_ttl_seconds=ttl, idempotency_max_keys=max_keys)
|
|
||||||
runtime._running = True
|
|
||||||
runtime._lock = asyncio.Lock()
|
|
||||||
runtime._idempotency_keys = OrderedDict()
|
|
||||||
runtime._idempotency_times = {}
|
|
||||||
runtime._graphs = {}
|
|
||||||
runtime._active_graph_id = "primary"
|
|
||||||
runtime._graph_id = "primary"
|
|
||||||
runtime._streams = {}
|
|
||||||
runtime._entry_points = {}
|
|
||||||
return runtime
|
|
||||||
|
|
||||||
|
|
||||||
def _make_runtime_with_stream(ttl=300.0, max_keys=10000):
|
|
||||||
"""Create a mock runtime whose stream.execute() returns unique IDs."""
|
|
||||||
runtime = _make_runtime(ttl=ttl, max_keys=max_keys)
|
|
||||||
|
|
||||||
call_count = 0
|
|
||||||
|
|
||||||
async def _fake_execute(*args, **kwargs):
|
|
||||||
nonlocal call_count
|
|
||||||
call_count += 1
|
|
||||||
return f"session-{call_count:04d}"
|
|
||||||
|
|
||||||
stream = MagicMock()
|
|
||||||
stream.execute = _fake_execute
|
|
||||||
runtime._streams = {"webhook": stream}
|
|
||||||
runtime._entry_points = {"webhook": MagicMock()}
|
|
||||||
return runtime
|
|
||||||
|
|
||||||
|
|
||||||
class TestIdempotencyConfig:
|
|
||||||
"""Verify idempotency configuration defaults."""
|
|
||||||
|
|
||||||
def test_default_ttl(self):
|
|
||||||
config = AgentRuntimeConfig()
|
|
||||||
assert config.idempotency_ttl_seconds == 300.0
|
|
||||||
|
|
||||||
def test_default_max_keys(self):
|
|
||||||
config = AgentRuntimeConfig()
|
|
||||||
assert config.idempotency_max_keys == 10000
|
|
||||||
|
|
||||||
def test_custom_config(self):
|
|
||||||
config = AgentRuntimeConfig(idempotency_ttl_seconds=60.0, idempotency_max_keys=100)
|
|
||||||
assert config.idempotency_ttl_seconds == 60.0
|
|
||||||
assert config.idempotency_max_keys == 100
|
|
||||||
|
|
||||||
|
|
||||||
class TestIdempotencyCache:
|
|
||||||
"""Test the idempotency cache and pruning logic directly."""
|
|
||||||
|
|
||||||
def test_cache_stores_and_retrieves_key(self):
|
|
||||||
runtime = _make_runtime()
|
|
||||||
runtime._idempotency_keys["stripe-evt-123"] = "exec-001"
|
|
||||||
runtime._idempotency_times["stripe-evt-123"] = time.time()
|
|
||||||
|
|
||||||
assert runtime._idempotency_keys.get("stripe-evt-123") == "exec-001"
|
|
||||||
|
|
||||||
def test_cache_returns_none_for_unknown_key(self):
|
|
||||||
runtime = _make_runtime()
|
|
||||||
assert runtime._idempotency_keys.get("unknown") is None
|
|
||||||
|
|
||||||
def test_prune_removes_expired_keys(self):
|
|
||||||
runtime = _make_runtime(ttl=0.1)
|
|
||||||
|
|
||||||
runtime._idempotency_keys["old-key"] = "exec-old"
|
|
||||||
runtime._idempotency_times["old-key"] = time.time() - 1.0 # expired
|
|
||||||
|
|
||||||
runtime._prune_idempotency_keys()
|
|
||||||
|
|
||||||
assert "old-key" not in runtime._idempotency_keys
|
|
||||||
assert "old-key" not in runtime._idempotency_times
|
|
||||||
|
|
||||||
def test_prune_keeps_fresh_keys(self):
|
|
||||||
runtime = _make_runtime(ttl=300.0)
|
|
||||||
|
|
||||||
runtime._idempotency_keys["fresh-key"] = "exec-fresh"
|
|
||||||
runtime._idempotency_times["fresh-key"] = time.time()
|
|
||||||
|
|
||||||
runtime._prune_idempotency_keys()
|
|
||||||
|
|
||||||
assert "fresh-key" in runtime._idempotency_keys
|
|
||||||
|
|
||||||
def test_prune_respects_max_keys(self):
|
|
||||||
runtime = _make_runtime(max_keys=2)
|
|
||||||
|
|
||||||
for i in range(3):
|
|
||||||
key = f"key-{i}"
|
|
||||||
runtime._idempotency_keys[key] = f"exec-{i}"
|
|
||||||
runtime._idempotency_times[key] = time.time()
|
|
||||||
|
|
||||||
runtime._prune_idempotency_keys()
|
|
||||||
|
|
||||||
assert len(runtime._idempotency_keys) == 2
|
|
||||||
# Oldest (key-0) should be evicted
|
|
||||||
assert "key-0" not in runtime._idempotency_keys
|
|
||||||
assert "key-1" in runtime._idempotency_keys
|
|
||||||
assert "key-2" in runtime._idempotency_keys
|
|
||||||
|
|
||||||
def test_prune_evicts_fifo(self):
|
|
||||||
runtime = _make_runtime(max_keys=1)
|
|
||||||
|
|
||||||
runtime._idempotency_keys["first"] = "exec-1"
|
|
||||||
runtime._idempotency_times["first"] = time.time()
|
|
||||||
runtime._idempotency_keys["second"] = "exec-2"
|
|
||||||
runtime._idempotency_times["second"] = time.time()
|
|
||||||
|
|
||||||
runtime._prune_idempotency_keys()
|
|
||||||
|
|
||||||
assert len(runtime._idempotency_keys) == 1
|
|
||||||
assert "second" in runtime._idempotency_keys
|
|
||||||
assert "first" not in runtime._idempotency_keys
|
|
||||||
|
|
||||||
def test_mixed_expired_and_max_size(self):
|
|
||||||
runtime = _make_runtime(ttl=0.1, max_keys=2)
|
|
||||||
|
|
||||||
# Add expired key
|
|
||||||
runtime._idempotency_keys["expired"] = "exec-e"
|
|
||||||
runtime._idempotency_times["expired"] = time.time() - 1.0
|
|
||||||
|
|
||||||
# Add fresh keys
|
|
||||||
runtime._idempotency_keys["fresh-1"] = "exec-f1"
|
|
||||||
runtime._idempotency_times["fresh-1"] = time.time()
|
|
||||||
runtime._idempotency_keys["fresh-2"] = "exec-f2"
|
|
||||||
runtime._idempotency_times["fresh-2"] = time.time()
|
|
||||||
|
|
||||||
runtime._prune_idempotency_keys()
|
|
||||||
|
|
||||||
assert "expired" not in runtime._idempotency_keys
|
|
||||||
assert "fresh-1" in runtime._idempotency_keys
|
|
||||||
assert "fresh-2" in runtime._idempotency_keys
|
|
||||||
|
|
||||||
|
|
||||||
class TestTriggerIdempotency:
|
|
||||||
"""Tests for trigger() idempotency deduplication."""
|
|
||||||
|
|
||||||
def test_trigger_accepts_idempotency_key(self):
|
|
||||||
"""trigger() accepts idempotency_key as a keyword argument."""
|
|
||||||
import inspect
|
|
||||||
|
|
||||||
sig = inspect.signature(AgentRuntime.trigger)
|
|
||||||
assert "idempotency_key" in sig.parameters
|
|
||||||
|
|
||||||
def test_idempotency_key_defaults_to_none(self):
|
|
||||||
"""idempotency_key defaults to None (backward compatible)."""
|
|
||||||
import inspect
|
|
||||||
|
|
||||||
sig = inspect.signature(AgentRuntime.trigger)
|
|
||||||
assert sig.parameters["idempotency_key"].default is None
|
|
||||||
|
|
||||||
def test_trigger_and_wait_accepts_idempotency_key(self):
|
|
||||||
"""trigger_and_wait() also accepts idempotency_key."""
|
|
||||||
import inspect
|
|
||||||
|
|
||||||
sig = inspect.signature(AgentRuntime.trigger_and_wait)
|
|
||||||
assert "idempotency_key" in sig.parameters
|
|
||||||
|
|
||||||
def test_trigger_and_wait_idempotency_key_defaults_to_none(self):
|
|
||||||
"""trigger_and_wait() idempotency_key defaults to None."""
|
|
||||||
import inspect
|
|
||||||
|
|
||||||
sig = inspect.signature(AgentRuntime.trigger_and_wait)
|
|
||||||
assert sig.parameters["idempotency_key"].default is None
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_duplicate_key_returns_cached_id(self):
|
|
||||||
"""Same idempotency key within TTL returns the cached execution ID."""
|
|
||||||
runtime = _make_runtime_with_stream()
|
|
||||||
|
|
||||||
first = await runtime.trigger("webhook", {}, idempotency_key="stripe-evt-001")
|
|
||||||
second = await runtime.trigger("webhook", {}, idempotency_key="stripe-evt-001")
|
|
||||||
|
|
||||||
assert first == second
|
|
||||||
assert first == "session-0001"
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_different_keys_produce_different_ids(self):
|
|
||||||
"""Different idempotency keys start separate executions."""
|
|
||||||
runtime = _make_runtime_with_stream()
|
|
||||||
|
|
||||||
id_a = await runtime.trigger("webhook", {}, idempotency_key="evt-aaa")
|
|
||||||
id_b = await runtime.trigger("webhook", {}, idempotency_key="evt-bbb")
|
|
||||||
|
|
||||||
assert id_a != id_b
|
|
||||||
assert id_a == "session-0001"
|
|
||||||
assert id_b == "session-0002"
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_none_key_always_starts_new_execution(self):
|
|
||||||
"""key=None (default) skips dedup — every call starts fresh."""
|
|
||||||
runtime = _make_runtime_with_stream()
|
|
||||||
|
|
||||||
id_1 = await runtime.trigger("webhook", {})
|
|
||||||
id_2 = await runtime.trigger("webhook", {})
|
|
||||||
|
|
||||||
assert id_1 != id_2
|
|
||||||
assert len(runtime._idempotency_keys) == 0 # nothing cached
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_expired_key_allows_new_execution(self):
|
|
||||||
"""After TTL expires, the same key starts a new execution."""
|
|
||||||
runtime = _make_runtime_with_stream(ttl=0.1)
|
|
||||||
|
|
||||||
first = await runtime.trigger("webhook", {}, idempotency_key="evt-expire")
|
|
||||||
|
|
||||||
# Backdate the cached timestamp so the key looks expired
|
|
||||||
runtime._idempotency_times["evt-expire"] = time.time() - 1.0
|
|
||||||
|
|
||||||
second = await runtime.trigger("webhook", {}, idempotency_key="evt-expire")
|
|
||||||
|
|
||||||
assert first != second
|
|
||||||
assert first == "session-0001"
|
|
||||||
assert second == "session-0002"
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_stream_not_found_does_not_cache(self):
|
|
||||||
"""If entry point doesn't exist, nothing is cached."""
|
|
||||||
runtime = _make_runtime_with_stream()
|
|
||||||
|
|
||||||
with pytest.raises(ValueError, match="not found"):
|
|
||||||
await runtime.trigger("nonexistent", {}, idempotency_key="evt-orphan")
|
|
||||||
|
|
||||||
assert "evt-orphan" not in runtime._idempotency_keys
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_execute_error_does_not_cache(self):
|
|
||||||
"""If stream.execute() raises, nothing is cached so retries can go through."""
|
|
||||||
runtime = _make_runtime()
|
|
||||||
|
|
||||||
failing_stream = MagicMock()
|
|
||||||
failing_stream.execute = AsyncMock(side_effect=RuntimeError("stream not running"))
|
|
||||||
runtime._streams = {"webhook": failing_stream}
|
|
||||||
runtime._entry_points = {"webhook": MagicMock()}
|
|
||||||
|
|
||||||
with pytest.raises(RuntimeError, match="stream not running"):
|
|
||||||
await runtime.trigger("webhook", {}, idempotency_key="evt-123")
|
|
||||||
|
|
||||||
assert "evt-123" not in runtime._idempotency_keys
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_cache_holds_real_execution_id(self):
|
|
||||||
"""Cached value matches the actual execution ID from execute()."""
|
|
||||||
runtime = _make_runtime_with_stream()
|
|
||||||
|
|
||||||
exec_id = await runtime.trigger("webhook", {}, idempotency_key="evt-real")
|
|
||||||
|
|
||||||
cached = runtime._idempotency_keys.get("evt-real")
|
|
||||||
assert cached == exec_id
|
|
||||||
assert cached == "session-0001"
|
|
||||||
@@ -1,29 +0,0 @@
|
|||||||
"""Tests for custom session-backed runtime logging paths."""
|
|
||||||
|
|
||||||
from pathlib import Path
|
|
||||||
from unittest.mock import MagicMock
|
|
||||||
|
|
||||||
from framework.graph.executor import GraphExecutor
|
|
||||||
from framework.runtime.runtime_log_store import RuntimeLogStore
|
|
||||||
from framework.runtime.runtime_logger import RuntimeLogger
|
|
||||||
|
|
||||||
|
|
||||||
def test_graph_executor_uses_custom_session_dir_name_for_runtime_logs():
|
|
||||||
executor = GraphExecutor(
|
|
||||||
runtime=MagicMock(),
|
|
||||||
storage_path=Path("/tmp/test-agent/sessions/my-custom-session"),
|
|
||||||
)
|
|
||||||
|
|
||||||
assert executor._get_runtime_log_session_id() == "my-custom-session"
|
|
||||||
|
|
||||||
|
|
||||||
def test_runtime_logger_creates_session_log_dir_for_custom_session_id(tmp_path):
|
|
||||||
base = tmp_path / ".hive" / "agents" / "test_agent"
|
|
||||||
base.mkdir(parents=True)
|
|
||||||
store = RuntimeLogStore(base)
|
|
||||||
logger = RuntimeLogger(store=store, agent_id="test-agent")
|
|
||||||
|
|
||||||
run_id = logger.start_run(goal_id="goal-1", session_id="my-custom-session")
|
|
||||||
|
|
||||||
assert run_id == "my-custom-session"
|
|
||||||
assert (base / "sessions" / "my-custom-session" / "logs").is_dir()
|
|
||||||
@@ -1,716 +0,0 @@
|
|||||||
"""
|
|
||||||
Tests for WebhookServer and event-driven entry points.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import asyncio
|
|
||||||
import hashlib
|
|
||||||
import hmac as hmac_mod
|
|
||||||
import json
|
|
||||||
import tempfile
|
|
||||||
from pathlib import Path
|
|
||||||
from unittest.mock import patch
|
|
||||||
|
|
||||||
import aiohttp
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
from framework.runtime.agent_runtime import AgentRuntime, AgentRuntimeConfig
|
|
||||||
from framework.runtime.event_bus import AgentEvent, EventBus, EventType
|
|
||||||
from framework.runtime.execution_stream import EntryPointSpec
|
|
||||||
from framework.runtime.webhook_server import (
|
|
||||||
WebhookRoute,
|
|
||||||
WebhookServer,
|
|
||||||
WebhookServerConfig,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _make_server(event_bus: EventBus, routes: list[WebhookRoute] | None = None):
|
|
||||||
"""Helper to create a WebhookServer with port=0 for OS-assigned port."""
|
|
||||||
config = WebhookServerConfig(host="127.0.0.1", port=0)
|
|
||||||
server = WebhookServer(event_bus, config)
|
|
||||||
for route in routes or []:
|
|
||||||
server.add_route(route)
|
|
||||||
return server
|
|
||||||
|
|
||||||
|
|
||||||
def _base_url(server: WebhookServer) -> str:
|
|
||||||
"""Get the base URL for a running server."""
|
|
||||||
return f"http://127.0.0.1:{server.port}"
|
|
||||||
|
|
||||||
|
|
||||||
class TestWebhookServerLifecycle:
|
|
||||||
"""Tests for server start/stop."""
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_start_stop(self):
|
|
||||||
bus = EventBus()
|
|
||||||
server = _make_server(
|
|
||||||
bus,
|
|
||||||
[
|
|
||||||
WebhookRoute(source_id="test", path="/webhooks/test", methods=["POST"]),
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
await server.start()
|
|
||||||
assert server.is_running
|
|
||||||
assert server.port is not None
|
|
||||||
|
|
||||||
await server.stop()
|
|
||||||
assert not server.is_running
|
|
||||||
assert server.port is None
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_no_routes_skips_start(self):
|
|
||||||
bus = EventBus()
|
|
||||||
server = _make_server(bus) # no routes
|
|
||||||
|
|
||||||
await server.start()
|
|
||||||
assert not server.is_running
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_stop_when_not_started(self):
|
|
||||||
bus = EventBus()
|
|
||||||
server = _make_server(bus)
|
|
||||||
|
|
||||||
# Should be a no-op, not raise
|
|
||||||
await server.stop()
|
|
||||||
assert not server.is_running
|
|
||||||
|
|
||||||
|
|
||||||
class TestWebhookEventPublishing:
|
|
||||||
"""Tests for HTTP request -> EventBus event publishing."""
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_post_publishes_webhook_received(self):
|
|
||||||
bus = EventBus()
|
|
||||||
received = []
|
|
||||||
|
|
||||||
async def handler(event):
|
|
||||||
received.append(event)
|
|
||||||
|
|
||||||
bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
|
|
||||||
|
|
||||||
server = _make_server(
|
|
||||||
bus,
|
|
||||||
[
|
|
||||||
WebhookRoute(source_id="gh", path="/webhooks/github", methods=["POST"]),
|
|
||||||
],
|
|
||||||
)
|
|
||||||
await server.start()
|
|
||||||
|
|
||||||
try:
|
|
||||||
async with aiohttp.ClientSession() as session:
|
|
||||||
async with session.post(
|
|
||||||
f"{_base_url(server)}/webhooks/github",
|
|
||||||
json={"action": "opened", "number": 42},
|
|
||||||
) as resp:
|
|
||||||
assert resp.status == 202
|
|
||||||
body = await resp.json()
|
|
||||||
assert body["status"] == "accepted"
|
|
||||||
|
|
||||||
# Give event bus time to dispatch
|
|
||||||
await asyncio.sleep(0.05)
|
|
||||||
|
|
||||||
assert len(received) == 1
|
|
||||||
event = received[0]
|
|
||||||
assert event.type == EventType.WEBHOOK_RECEIVED
|
|
||||||
assert event.stream_id == "gh"
|
|
||||||
assert event.data["path"] == "/webhooks/github"
|
|
||||||
assert event.data["method"] == "POST"
|
|
||||||
assert event.data["payload"] == {"action": "opened", "number": 42}
|
|
||||||
assert isinstance(event.data["headers"], dict)
|
|
||||||
assert event.data["query_params"] == {}
|
|
||||||
finally:
|
|
||||||
await server.stop()
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_query_params_included(self):
|
|
||||||
bus = EventBus()
|
|
||||||
received = []
|
|
||||||
|
|
||||||
async def handler(event):
|
|
||||||
received.append(event)
|
|
||||||
|
|
||||||
bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
|
|
||||||
|
|
||||||
server = _make_server(
|
|
||||||
bus,
|
|
||||||
[
|
|
||||||
WebhookRoute(source_id="hook", path="/webhooks/hook", methods=["POST"]),
|
|
||||||
],
|
|
||||||
)
|
|
||||||
await server.start()
|
|
||||||
|
|
||||||
try:
|
|
||||||
async with aiohttp.ClientSession() as session:
|
|
||||||
async with session.post(
|
|
||||||
f"{_base_url(server)}/webhooks/hook?source=test&v=2",
|
|
||||||
json={"data": "hello"},
|
|
||||||
) as resp:
|
|
||||||
assert resp.status == 202
|
|
||||||
|
|
||||||
await asyncio.sleep(0.05)
|
|
||||||
|
|
||||||
assert len(received) == 1
|
|
||||||
assert received[0].data["query_params"] == {"source": "test", "v": "2"}
|
|
||||||
finally:
|
|
||||||
await server.stop()
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_non_json_body(self):
|
|
||||||
bus = EventBus()
|
|
||||||
received = []
|
|
||||||
|
|
||||||
async def handler(event):
|
|
||||||
received.append(event)
|
|
||||||
|
|
||||||
bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
|
|
||||||
|
|
||||||
server = _make_server(
|
|
||||||
bus,
|
|
||||||
[
|
|
||||||
WebhookRoute(source_id="raw", path="/webhooks/raw", methods=["POST"]),
|
|
||||||
],
|
|
||||||
)
|
|
||||||
await server.start()
|
|
||||||
|
|
||||||
try:
|
|
||||||
async with aiohttp.ClientSession() as session:
|
|
||||||
async with session.post(
|
|
||||||
f"{_base_url(server)}/webhooks/raw",
|
|
||||||
data=b"plain text body",
|
|
||||||
headers={"Content-Type": "text/plain"},
|
|
||||||
) as resp:
|
|
||||||
assert resp.status == 202
|
|
||||||
|
|
||||||
await asyncio.sleep(0.05)
|
|
||||||
|
|
||||||
assert len(received) == 1
|
|
||||||
assert received[0].data["payload"] == {"raw_body": "plain text body"}
|
|
||||||
finally:
|
|
||||||
await server.stop()
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_empty_body(self):
|
|
||||||
bus = EventBus()
|
|
||||||
received = []
|
|
||||||
|
|
||||||
async def handler(event):
|
|
||||||
received.append(event)
|
|
||||||
|
|
||||||
bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
|
|
||||||
|
|
||||||
server = _make_server(
|
|
||||||
bus,
|
|
||||||
[
|
|
||||||
WebhookRoute(source_id="empty", path="/webhooks/empty", methods=["POST"]),
|
|
||||||
],
|
|
||||||
)
|
|
||||||
await server.start()
|
|
||||||
|
|
||||||
try:
|
|
||||||
async with aiohttp.ClientSession() as session:
|
|
||||||
async with session.post(f"{_base_url(server)}/webhooks/empty") as resp:
|
|
||||||
assert resp.status == 202
|
|
||||||
|
|
||||||
await asyncio.sleep(0.05)
|
|
||||||
|
|
||||||
assert len(received) == 1
|
|
||||||
assert received[0].data["payload"] == {}
|
|
||||||
finally:
|
|
||||||
await server.stop()
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_multiple_routes(self):
|
|
||||||
bus = EventBus()
|
|
||||||
received = []
|
|
||||||
|
|
||||||
async def handler(event):
|
|
||||||
received.append(event)
|
|
||||||
|
|
||||||
bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
|
|
||||||
|
|
||||||
server = _make_server(
|
|
||||||
bus,
|
|
||||||
[
|
|
||||||
WebhookRoute(source_id="a", path="/webhooks/a", methods=["POST"]),
|
|
||||||
WebhookRoute(source_id="b", path="/webhooks/b", methods=["POST"]),
|
|
||||||
],
|
|
||||||
)
|
|
||||||
await server.start()
|
|
||||||
|
|
||||||
try:
|
|
||||||
async with aiohttp.ClientSession() as session:
|
|
||||||
async with session.post(
|
|
||||||
f"{_base_url(server)}/webhooks/a", json={"from": "a"}
|
|
||||||
) as resp:
|
|
||||||
assert resp.status == 202
|
|
||||||
|
|
||||||
async with session.post(
|
|
||||||
f"{_base_url(server)}/webhooks/b", json={"from": "b"}
|
|
||||||
) as resp:
|
|
||||||
assert resp.status == 202
|
|
||||||
|
|
||||||
await asyncio.sleep(0.05)
|
|
||||||
|
|
||||||
assert len(received) == 2
|
|
||||||
stream_ids = {e.stream_id for e in received}
|
|
||||||
assert stream_ids == {"a", "b"}
|
|
||||||
finally:
|
|
||||||
await server.stop()
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_filter_stream_subscription(self):
|
|
||||||
"""Subscribers can filter by stream_id (source_id)."""
|
|
||||||
bus = EventBus()
|
|
||||||
a_events = []
|
|
||||||
b_events = []
|
|
||||||
|
|
||||||
async def handle_a(event):
|
|
||||||
a_events.append(event)
|
|
||||||
|
|
||||||
async def handle_b(event):
|
|
||||||
b_events.append(event)
|
|
||||||
|
|
||||||
bus.subscribe([EventType.WEBHOOK_RECEIVED], handle_a, filter_stream="a")
|
|
||||||
bus.subscribe([EventType.WEBHOOK_RECEIVED], handle_b, filter_stream="b")
|
|
||||||
|
|
||||||
server = _make_server(
|
|
||||||
bus,
|
|
||||||
[
|
|
||||||
WebhookRoute(source_id="a", path="/webhooks/a", methods=["POST"]),
|
|
||||||
WebhookRoute(source_id="b", path="/webhooks/b", methods=["POST"]),
|
|
||||||
],
|
|
||||||
)
|
|
||||||
await server.start()
|
|
||||||
|
|
||||||
try:
|
|
||||||
async with aiohttp.ClientSession() as session:
|
|
||||||
await session.post(f"{_base_url(server)}/webhooks/a", json={"x": 1})
|
|
||||||
await session.post(f"{_base_url(server)}/webhooks/b", json={"x": 2})
|
|
||||||
|
|
||||||
await asyncio.sleep(0.05)
|
|
||||||
|
|
||||||
assert len(a_events) == 1
|
|
||||||
assert a_events[0].data["payload"] == {"x": 1}
|
|
||||||
assert len(b_events) == 1
|
|
||||||
assert b_events[0].data["payload"] == {"x": 2}
|
|
||||||
finally:
|
|
||||||
await server.stop()
|
|
||||||
|
|
||||||
|
|
||||||
class TestHMACVerification:
|
|
||||||
"""Tests for HMAC-SHA256 signature verification."""
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_valid_signature_accepted(self):
|
|
||||||
bus = EventBus()
|
|
||||||
received = []
|
|
||||||
|
|
||||||
async def handler(event):
|
|
||||||
received.append(event)
|
|
||||||
|
|
||||||
bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
|
|
||||||
|
|
||||||
secret = "test-secret-key"
|
|
||||||
server = _make_server(
|
|
||||||
bus,
|
|
||||||
[
|
|
||||||
WebhookRoute(
|
|
||||||
source_id="secure",
|
|
||||||
path="/webhooks/secure",
|
|
||||||
methods=["POST"],
|
|
||||||
secret=secret,
|
|
||||||
),
|
|
||||||
],
|
|
||||||
)
|
|
||||||
await server.start()
|
|
||||||
|
|
||||||
try:
|
|
||||||
body = json.dumps({"event": "push"}).encode()
|
|
||||||
sig = hmac_mod.new(secret.encode(), body, hashlib.sha256).hexdigest()
|
|
||||||
|
|
||||||
async with aiohttp.ClientSession() as session:
|
|
||||||
async with session.post(
|
|
||||||
f"{_base_url(server)}/webhooks/secure",
|
|
||||||
data=body,
|
|
||||||
headers={
|
|
||||||
"Content-Type": "application/json",
|
|
||||||
"X-Hub-Signature-256": f"sha256={sig}",
|
|
||||||
},
|
|
||||||
) as resp:
|
|
||||||
assert resp.status == 202
|
|
||||||
|
|
||||||
await asyncio.sleep(0.05)
|
|
||||||
assert len(received) == 1
|
|
||||||
finally:
|
|
||||||
await server.stop()
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_invalid_signature_rejected(self):
|
|
||||||
bus = EventBus()
|
|
||||||
received = []
|
|
||||||
|
|
||||||
async def handler(event):
|
|
||||||
received.append(event)
|
|
||||||
|
|
||||||
bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
|
|
||||||
|
|
||||||
server = _make_server(
|
|
||||||
bus,
|
|
||||||
[
|
|
||||||
WebhookRoute(
|
|
||||||
source_id="secure",
|
|
||||||
path="/webhooks/secure",
|
|
||||||
methods=["POST"],
|
|
||||||
secret="real-secret",
|
|
||||||
),
|
|
||||||
],
|
|
||||||
)
|
|
||||||
await server.start()
|
|
||||||
|
|
||||||
try:
|
|
||||||
async with aiohttp.ClientSession() as session:
|
|
||||||
async with session.post(
|
|
||||||
f"{_base_url(server)}/webhooks/secure",
|
|
||||||
json={"event": "push"},
|
|
||||||
headers={"X-Hub-Signature-256": "sha256=invalidsignature"},
|
|
||||||
) as resp:
|
|
||||||
assert resp.status == 401
|
|
||||||
|
|
||||||
await asyncio.sleep(0.05)
|
|
||||||
assert len(received) == 0 # No event published
|
|
||||||
finally:
|
|
||||||
await server.stop()
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_missing_signature_rejected(self):
|
|
||||||
bus = EventBus()
|
|
||||||
received = []
|
|
||||||
|
|
||||||
async def handler(event):
|
|
||||||
received.append(event)
|
|
||||||
|
|
||||||
bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
|
|
||||||
|
|
||||||
server = _make_server(
|
|
||||||
bus,
|
|
||||||
[
|
|
||||||
WebhookRoute(
|
|
||||||
source_id="secure",
|
|
||||||
path="/webhooks/secure",
|
|
||||||
methods=["POST"],
|
|
||||||
secret="my-secret",
|
|
||||||
),
|
|
||||||
],
|
|
||||||
)
|
|
||||||
await server.start()
|
|
||||||
|
|
||||||
try:
|
|
||||||
async with aiohttp.ClientSession() as session:
|
|
||||||
# No X-Hub-Signature-256 header
|
|
||||||
async with session.post(
|
|
||||||
f"{_base_url(server)}/webhooks/secure",
|
|
||||||
json={"event": "push"},
|
|
||||||
) as resp:
|
|
||||||
assert resp.status == 401
|
|
||||||
|
|
||||||
await asyncio.sleep(0.05)
|
|
||||||
assert len(received) == 0
|
|
||||||
finally:
|
|
||||||
await server.stop()
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_no_secret_skips_verification(self):
|
|
||||||
"""Routes without a secret accept any request."""
|
|
||||||
bus = EventBus()
|
|
||||||
received = []
|
|
||||||
|
|
||||||
async def handler(event):
|
|
||||||
received.append(event)
|
|
||||||
|
|
||||||
bus.subscribe([EventType.WEBHOOK_RECEIVED], handler)
|
|
||||||
|
|
||||||
server = _make_server(
|
|
||||||
bus,
|
|
||||||
[
|
|
||||||
WebhookRoute(
|
|
||||||
source_id="open",
|
|
||||||
path="/webhooks/open",
|
|
||||||
methods=["POST"],
|
|
||||||
secret=None,
|
|
||||||
),
|
|
||||||
],
|
|
||||||
)
|
|
||||||
await server.start()
|
|
||||||
|
|
||||||
try:
|
|
||||||
async with aiohttp.ClientSession() as session:
|
|
||||||
async with session.post(
|
|
||||||
f"{_base_url(server)}/webhooks/open",
|
|
||||||
json={"data": "test"},
|
|
||||||
) as resp:
|
|
||||||
assert resp.status == 202
|
|
||||||
|
|
||||||
await asyncio.sleep(0.05)
|
|
||||||
assert len(received) == 1
|
|
||||||
finally:
|
|
||||||
await server.stop()
|
|
||||||
|
|
||||||
|
|
||||||
class TestEventDrivenEntryPoints:
|
|
||||||
"""Tests for event-driven entry points wired through AgentRuntime."""
|
|
||||||
|
|
||||||
def _make_graph_and_goal(self):
|
|
||||||
"""Minimal graph + goal for testing entry point triggering."""
|
|
||||||
from framework.graph import Goal
|
|
||||||
from framework.graph.edge import GraphSpec
|
|
||||||
from framework.graph.goal import SuccessCriterion
|
|
||||||
from framework.graph.node import NodeSpec
|
|
||||||
|
|
||||||
nodes = [
|
|
||||||
NodeSpec(
|
|
||||||
id="process-event",
|
|
||||||
name="Process Event",
|
|
||||||
description="Process incoming event",
|
|
||||||
node_type="event_loop",
|
|
||||||
input_keys=["event"],
|
|
||||||
output_keys=["result"],
|
|
||||||
),
|
|
||||||
]
|
|
||||||
graph = GraphSpec(
|
|
||||||
id="test-graph",
|
|
||||||
goal_id="test-goal",
|
|
||||||
version="1.0.0",
|
|
||||||
entry_node="process-event",
|
|
||||||
entry_points={"start": "process-event"},
|
|
||||||
terminal_nodes=[],
|
|
||||||
pause_nodes=[],
|
|
||||||
nodes=nodes,
|
|
||||||
edges=[],
|
|
||||||
)
|
|
||||||
goal = Goal(
|
|
||||||
id="test-goal",
|
|
||||||
name="Test Goal",
|
|
||||||
description="Test",
|
|
||||||
success_criteria=[
|
|
||||||
SuccessCriterion(
|
|
||||||
id="sc-1",
|
|
||||||
description="Done",
|
|
||||||
metric="done",
|
|
||||||
target="yes",
|
|
||||||
weight=1.0,
|
|
||||||
),
|
|
||||||
],
|
|
||||||
)
|
|
||||||
return graph, goal
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_event_entry_point_subscribes_to_bus(self):
|
|
||||||
"""Entry point with trigger_type='event' subscribes and triggers on matching events."""
|
|
||||||
graph, goal = self._make_graph_and_goal()
|
|
||||||
|
|
||||||
config = AgentRuntimeConfig(
|
|
||||||
webhook_host="127.0.0.1",
|
|
||||||
webhook_port=0,
|
|
||||||
webhook_routes=[
|
|
||||||
{"source_id": "gh", "path": "/webhooks/github"},
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tmpdir:
|
|
||||||
runtime = AgentRuntime(
|
|
||||||
graph=graph,
|
|
||||||
goal=goal,
|
|
||||||
storage_path=Path(tmpdir),
|
|
||||||
config=config,
|
|
||||||
)
|
|
||||||
|
|
||||||
runtime.register_entry_point(
|
|
||||||
EntryPointSpec(
|
|
||||||
id="gh-handler",
|
|
||||||
name="GitHub Handler",
|
|
||||||
entry_node="process-event",
|
|
||||||
trigger_type="event",
|
|
||||||
trigger_config={
|
|
||||||
"event_types": ["webhook_received"],
|
|
||||||
"filter_stream": "gh",
|
|
||||||
},
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
trigger_calls = []
|
|
||||||
|
|
||||||
async def mock_trigger(ep_id, data, **kwargs):
|
|
||||||
trigger_calls.append((ep_id, data))
|
|
||||||
|
|
||||||
with patch.object(runtime, "trigger", side_effect=mock_trigger):
|
|
||||||
await runtime.start()
|
|
||||||
|
|
||||||
try:
|
|
||||||
assert runtime.webhook_server is not None
|
|
||||||
assert runtime.webhook_server.is_running
|
|
||||||
|
|
||||||
port = runtime.webhook_server.port
|
|
||||||
async with aiohttp.ClientSession() as session:
|
|
||||||
async with session.post(
|
|
||||||
f"http://127.0.0.1:{port}/webhooks/github",
|
|
||||||
json={"action": "push", "ref": "main"},
|
|
||||||
) as resp:
|
|
||||||
assert resp.status == 202
|
|
||||||
|
|
||||||
await asyncio.sleep(0.1)
|
|
||||||
|
|
||||||
assert len(trigger_calls) == 1
|
|
||||||
ep_id, data = trigger_calls[0]
|
|
||||||
assert ep_id == "gh-handler"
|
|
||||||
assert "event" in data
|
|
||||||
assert data["event"]["type"] == "webhook_received"
|
|
||||||
assert data["event"]["stream_id"] == "gh"
|
|
||||||
assert data["event"]["data"]["payload"] == {
|
|
||||||
"action": "push",
|
|
||||||
"ref": "main",
|
|
||||||
}
|
|
||||||
finally:
|
|
||||||
await runtime.stop()
|
|
||||||
|
|
||||||
assert runtime.webhook_server is None
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_event_entry_point_filter_stream(self):
|
|
||||||
"""Entry point only triggers for matching stream_id (source_id)."""
|
|
||||||
graph, goal = self._make_graph_and_goal()
|
|
||||||
|
|
||||||
config = AgentRuntimeConfig(
|
|
||||||
webhook_routes=[
|
|
||||||
{"source_id": "github", "path": "/webhooks/github"},
|
|
||||||
{"source_id": "stripe", "path": "/webhooks/stripe"},
|
|
||||||
],
|
|
||||||
webhook_port=0,
|
|
||||||
)
|
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tmpdir:
|
|
||||||
runtime = AgentRuntime(
|
|
||||||
graph=graph,
|
|
||||||
goal=goal,
|
|
||||||
storage_path=Path(tmpdir),
|
|
||||||
config=config,
|
|
||||||
)
|
|
||||||
|
|
||||||
runtime.register_entry_point(
|
|
||||||
EntryPointSpec(
|
|
||||||
id="gh-only",
|
|
||||||
name="GitHub Only",
|
|
||||||
entry_node="process-event",
|
|
||||||
trigger_type="event",
|
|
||||||
trigger_config={
|
|
||||||
"event_types": ["webhook_received"],
|
|
||||||
"filter_stream": "github",
|
|
||||||
},
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
trigger_calls = []
|
|
||||||
|
|
||||||
async def mock_trigger(ep_id, data, **kwargs):
|
|
||||||
trigger_calls.append((ep_id, data))
|
|
||||||
|
|
||||||
with patch.object(runtime, "trigger", side_effect=mock_trigger):
|
|
||||||
await runtime.start()
|
|
||||||
|
|
||||||
try:
|
|
||||||
port = runtime.webhook_server.port
|
|
||||||
async with aiohttp.ClientSession() as session:
|
|
||||||
# POST to stripe — should NOT trigger
|
|
||||||
await session.post(
|
|
||||||
f"http://127.0.0.1:{port}/webhooks/stripe",
|
|
||||||
json={"type": "payment"},
|
|
||||||
)
|
|
||||||
# POST to github — should trigger
|
|
||||||
await session.post(
|
|
||||||
f"http://127.0.0.1:{port}/webhooks/github",
|
|
||||||
json={"action": "opened"},
|
|
||||||
)
|
|
||||||
|
|
||||||
await asyncio.sleep(0.1)
|
|
||||||
|
|
||||||
assert len(trigger_calls) == 1
|
|
||||||
assert trigger_calls[0][0] == "gh-only"
|
|
||||||
finally:
|
|
||||||
await runtime.stop()
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_no_webhook_routes_skips_server(self):
|
|
||||||
"""Runtime without webhook_routes does not start a webhook server."""
|
|
||||||
graph, goal = self._make_graph_and_goal()
|
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tmpdir:
|
|
||||||
runtime = AgentRuntime(
|
|
||||||
graph=graph,
|
|
||||||
goal=goal,
|
|
||||||
storage_path=Path(tmpdir),
|
|
||||||
)
|
|
||||||
|
|
||||||
runtime.register_entry_point(
|
|
||||||
EntryPointSpec(
|
|
||||||
id="manual",
|
|
||||||
name="Manual",
|
|
||||||
entry_node="process-event",
|
|
||||||
trigger_type="manual",
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
await runtime.start()
|
|
||||||
try:
|
|
||||||
assert runtime.webhook_server is None
|
|
||||||
finally:
|
|
||||||
await runtime.stop()
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_event_entry_point_custom_event(self):
|
|
||||||
"""Entry point can subscribe to CUSTOM events, not just webhooks."""
|
|
||||||
graph, goal = self._make_graph_and_goal()
|
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tmpdir:
|
|
||||||
runtime = AgentRuntime(
|
|
||||||
graph=graph,
|
|
||||||
goal=goal,
|
|
||||||
storage_path=Path(tmpdir),
|
|
||||||
)
|
|
||||||
|
|
||||||
runtime.register_entry_point(
|
|
||||||
EntryPointSpec(
|
|
||||||
id="custom-handler",
|
|
||||||
name="Custom Handler",
|
|
||||||
entry_node="process-event",
|
|
||||||
trigger_type="event",
|
|
||||||
trigger_config={
|
|
||||||
"event_types": ["custom"],
|
|
||||||
},
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
trigger_calls = []
|
|
||||||
|
|
||||||
async def mock_trigger(ep_id, data, **kwargs):
|
|
||||||
trigger_calls.append((ep_id, data))
|
|
||||||
|
|
||||||
with patch.object(runtime, "trigger", side_effect=mock_trigger):
|
|
||||||
await runtime.start()
|
|
||||||
|
|
||||||
try:
|
|
||||||
await runtime.event_bus.publish(
|
|
||||||
AgentEvent(
|
|
||||||
type=EventType.CUSTOM,
|
|
||||||
stream_id="some-source",
|
|
||||||
data={"key": "value"},
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
await asyncio.sleep(0.1)
|
|
||||||
|
|
||||||
assert len(trigger_calls) == 1
|
|
||||||
assert trigger_calls[0][0] == "custom-handler"
|
|
||||||
assert trigger_calls[0][1]["event"]["type"] == "custom"
|
|
||||||
assert trigger_calls[0][1]["event"]["data"]["key"] == "value"
|
|
||||||
finally:
|
|
||||||
await runtime.stop()
|
|
||||||
@@ -0,0 +1,192 @@
|
|||||||
|
"""Declarative agent configuration schema.
|
||||||
|
|
||||||
|
Allows defining agents via JSON/YAML config files instead of Python modules.
|
||||||
|
The ``AgentConfig`` model is the top-level schema loaded from ``agent.json``.
|
||||||
|
The runner detects this format by checking for a ``name`` key at the top level.
|
||||||
|
|
||||||
|
Template variables
|
||||||
|
------------------
|
||||||
|
System prompts and identity_prompt support ``{{variable_name}}`` placeholders.
|
||||||
|
These are resolved at load time from ``AgentConfig.variables``.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
class ToolAccessConfig(BaseModel):
|
||||||
|
"""Declarative tool access policy.
|
||||||
|
|
||||||
|
Controls which tools a node/agent has access to.
|
||||||
|
|
||||||
|
* ``all`` -- every tool from the registry.
|
||||||
|
* ``explicit`` -- only tools listed in ``allowed`` (default; empty = zero tools).
|
||||||
|
* ``none`` -- no tools at all.
|
||||||
|
"""
|
||||||
|
|
||||||
|
policy: str = Field(
|
||||||
|
default="explicit",
|
||||||
|
description="One of: 'all', 'explicit', 'none'.",
|
||||||
|
)
|
||||||
|
allowed: list[str] = Field(
|
||||||
|
default_factory=list,
|
||||||
|
description="Tool names when policy='explicit'.",
|
||||||
|
)
|
||||||
|
denied: list[str] = Field(
|
||||||
|
default_factory=list,
|
||||||
|
description="Tool names to deny (applied after allowed).",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class NodeConfig(BaseModel):
|
||||||
|
"""Declarative node definition."""
|
||||||
|
|
||||||
|
id: str
|
||||||
|
name: str | None = None
|
||||||
|
description: str | None = None
|
||||||
|
node_type: str = Field(
|
||||||
|
default="event_loop",
|
||||||
|
description="event_loop",
|
||||||
|
)
|
||||||
|
system_prompt: str | None = None
|
||||||
|
tools: ToolAccessConfig = Field(default_factory=ToolAccessConfig)
|
||||||
|
model: str | None = None
|
||||||
|
input_keys: list[str] = Field(default_factory=list)
|
||||||
|
output_keys: list[str] = Field(default_factory=list)
|
||||||
|
nullable_output_keys: list[str] = Field(default_factory=list)
|
||||||
|
max_iterations: int = 30
|
||||||
|
max_node_visits: int = 1
|
||||||
|
client_facing: bool = False
|
||||||
|
success_criteria: str | None = None
|
||||||
|
failure_criteria: str | None = None
|
||||||
|
skip_judge: bool = False
|
||||||
|
max_retries: int | None = None
|
||||||
|
|
||||||
|
|
||||||
|
class EdgeConfig(BaseModel):
|
||||||
|
"""Declarative edge definition."""
|
||||||
|
|
||||||
|
from_node: str = Field(description="Source node ID.")
|
||||||
|
to_node: str = Field(description="Target node ID.")
|
||||||
|
condition: str = Field(
|
||||||
|
default="on_success",
|
||||||
|
description="always | on_success | on_failure | conditional | llm_decide",
|
||||||
|
)
|
||||||
|
condition_expr: str | None = None
|
||||||
|
input_mapping: dict[str, str] = Field(default_factory=dict)
|
||||||
|
priority: int = 1
|
||||||
|
|
||||||
|
|
||||||
|
class GoalConfig(BaseModel):
|
||||||
|
"""Simplified goal definition for declarative config."""
|
||||||
|
|
||||||
|
description: str
|
||||||
|
success_criteria: list[str] = Field(default_factory=list)
|
||||||
|
constraints: list[str] = Field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
class EntryPointConfig(BaseModel):
|
||||||
|
"""Entry point configuration."""
|
||||||
|
|
||||||
|
id: str = "default"
|
||||||
|
name: str = "Default"
|
||||||
|
entry_node: str | None = None # defaults to AgentConfig.entry_node
|
||||||
|
trigger_type: str = Field(
|
||||||
|
default="manual",
|
||||||
|
description="manual | scheduled | timer",
|
||||||
|
)
|
||||||
|
trigger_config: dict = Field(default_factory=dict)
|
||||||
|
isolation_level: str = "shared"
|
||||||
|
max_concurrent: int | None = None
|
||||||
|
|
||||||
|
|
||||||
|
class MCPServerRef(BaseModel):
|
||||||
|
"""Reference to an MCP server to connect for this agent."""
|
||||||
|
|
||||||
|
name: str
|
||||||
|
config: dict | None = None
|
||||||
|
|
||||||
|
|
||||||
|
class MetadataConfig(BaseModel):
|
||||||
|
"""Agent metadata for display / intro messages."""
|
||||||
|
|
||||||
|
intro_message: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
class AgentConfig(BaseModel):
|
||||||
|
"""Top-level declarative agent configuration.
|
||||||
|
|
||||||
|
Load from ``agent.json`` and pass to
|
||||||
|
:func:`framework.runner.runner.load_agent_config` to build the
|
||||||
|
``GraphSpec`` + ``Goal`` pair.
|
||||||
|
|
||||||
|
Example (YAML)::
|
||||||
|
|
||||||
|
name: lead-enrichment-agent
|
||||||
|
version: 1.0.0
|
||||||
|
variables:
|
||||||
|
spreadsheet_id: "1ZVx..."
|
||||||
|
sheet_name: "contacts"
|
||||||
|
goal:
|
||||||
|
description: "Enrich leads in Google Sheets"
|
||||||
|
success_criteria:
|
||||||
|
- "All unprocessed leads enriched"
|
||||||
|
constraints:
|
||||||
|
- "Browser-only research"
|
||||||
|
identity_prompt: |
|
||||||
|
You are the Lead Enrichment Agent...
|
||||||
|
nodes:
|
||||||
|
- id: start
|
||||||
|
tools: {policy: explicit, allowed: [google_sheets_get_values]}
|
||||||
|
system_prompt: |
|
||||||
|
Spreadsheet ID: {{spreadsheet_id}}
|
||||||
|
...
|
||||||
|
"""
|
||||||
|
|
||||||
|
name: str
|
||||||
|
version: str = "1.0.0"
|
||||||
|
description: str | None = None
|
||||||
|
metadata: MetadataConfig = Field(default_factory=MetadataConfig)
|
||||||
|
|
||||||
|
# Template variables -- substituted into prompts via {{var_name}}
|
||||||
|
variables: dict[str, str] = Field(default_factory=dict)
|
||||||
|
|
||||||
|
# Goal
|
||||||
|
goal: GoalConfig
|
||||||
|
|
||||||
|
# Graph structure
|
||||||
|
nodes: list[NodeConfig]
|
||||||
|
edges: list[EdgeConfig]
|
||||||
|
entry_node: str
|
||||||
|
terminal_nodes: list[str] = Field(default_factory=list)
|
||||||
|
pause_nodes: list[str] = Field(default_factory=list)
|
||||||
|
|
||||||
|
# Entry points (if omitted, a single "default" manual entry is created)
|
||||||
|
entry_points: list[EntryPointConfig] = Field(default_factory=list)
|
||||||
|
|
||||||
|
# Agent-level tool defaults (nodes inherit unless they override)
|
||||||
|
tools: ToolAccessConfig = Field(default_factory=ToolAccessConfig)
|
||||||
|
mcp_servers: list[MCPServerRef] = Field(default_factory=list)
|
||||||
|
|
||||||
|
# LLM / execution
|
||||||
|
model: str | None = None
|
||||||
|
max_tokens: int = 4096
|
||||||
|
conversation_mode: str = "continuous"
|
||||||
|
identity_prompt: str = ""
|
||||||
|
loop_config: dict = Field(
|
||||||
|
default_factory=lambda: {
|
||||||
|
"max_iterations": 100,
|
||||||
|
"max_tool_calls_per_turn": 30,
|
||||||
|
"max_context_tokens": 32000,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Pipeline overrides (per-agent, merged with global config)
|
||||||
|
pipeline: dict = Field(
|
||||||
|
default_factory=dict,
|
||||||
|
description="Per-agent pipeline stage overrides. Same format as global pipeline config.",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Resource limits
|
||||||
|
max_cost_per_run: float | None = None
|
||||||
@@ -12,7 +12,7 @@ from typing import TYPE_CHECKING, Any
|
|||||||
from pydantic import AliasChoices, BaseModel, Field, computed_field
|
from pydantic import AliasChoices, BaseModel, Field, computed_field
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from framework.graph.executor import ExecutionResult
|
from framework.orchestrator.orchestrator import ExecutionResult
|
||||||
from framework.schemas.run import Run
|
from framework.schemas.run import Run
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -28,8 +28,11 @@ def _get_allowed_agent_roots() -> tuple[Path, ...]:
|
|||||||
"""
|
"""
|
||||||
global _ALLOWED_AGENT_ROOTS
|
global _ALLOWED_AGENT_ROOTS
|
||||||
if _ALLOWED_AGENT_ROOTS is None:
|
if _ALLOWED_AGENT_ROOTS is None:
|
||||||
|
from framework.config import COLONIES_DIR
|
||||||
|
|
||||||
_ALLOWED_AGENT_ROOTS = (
|
_ALLOWED_AGENT_ROOTS = (
|
||||||
(_REPO_ROOT / "exports").resolve(),
|
COLONIES_DIR.resolve(), # ~/.hive/colonies/
|
||||||
|
(_REPO_ROOT / "exports").resolve(), # compat fallback
|
||||||
(_REPO_ROOT / "examples").resolve(),
|
(_REPO_ROOT / "examples").resolve(),
|
||||||
(Path.home() / ".hive" / "agents").resolve(),
|
(Path.home() / ".hive" / "agents").resolve(),
|
||||||
)
|
)
|
||||||
@@ -53,7 +56,8 @@ def validate_agent_path(agent_path: str | Path) -> Path:
|
|||||||
if resolved.is_relative_to(root) and resolved != root:
|
if resolved.is_relative_to(root) and resolved != root:
|
||||||
return resolved
|
return resolved
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"agent_path must be inside an allowed directory (exports/, examples/, or ~/.hive/agents/)"
|
"agent_path must be inside an allowed directory "
|
||||||
|
"(~/.hive/colonies/, exports/, examples/, or ~/.hive/agents/)"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user