diff --git a/.gitignore b/.gitignore index 54798a34..dccd6d8e 100644 --- a/.gitignore +++ b/.gitignore @@ -70,6 +70,8 @@ tmp/ temp/ exports/* +exports.old* +artifacts/* .claude/settings.local.json diff --git a/core/framework/__init__.py b/core/framework/__init__.py index 27909db6..438323c5 100644 --- a/core/framework/__init__.py +++ b/core/framework/__init__.py @@ -1,71 +1,23 @@ -""" -Aden Hive Framework: A goal-driven agent runtime optimized for Builder observability. +"""Hive Agent Framework. -The runtime is designed around DECISIONS, not just actions. Every significant -choice the agent makes is captured with: -- What it was trying to do (intent) -- What options it considered -- What it chose and why -- What happened as a result -- Whether that was good or bad (evaluated post-hoc) - -This gives the Builder LLM the information it needs to improve agent behavior. - -## Testing Framework - -The framework includes a Goal-Based Testing system (Goal → Agent → Eval): -- Generate tests from Goal success_criteria and constraints -- Mandatory user approval before tests are stored -- Parallel test execution with error categorization -- Debug tools with fix suggestions - -See `framework.testing` for details. +Core classes: + AgentHost -- hosts agents, manages entry points and pipeline + Orchestrator -- routes between nodes in a graph + AgentLoop -- the LLM + tool execution loop (one per node) + AgentLoader -- loads agent.json from disk, builds pipeline + DecisionTracker -- records decisions for post-hoc analysis """ -from framework.llm import LLMProvider - -try: - from framework.llm import AnthropicProvider # noqa: F401 -except ImportError: - pass -from framework.runner import AgentRunner -from framework.runtime.core import Runtime -from framework.schemas.decision import Decision, DecisionEvaluation, Option, Outcome -from framework.schemas.run import Problem, Run, RunSummary - -# Testing framework -from framework.testing import ( - ApprovalStatus, - DebugTool, - ErrorCategory, - Test, - TestResult, - TestStorage, - TestSuiteResult, -) +from framework.agent_loop import AgentLoop +from framework.host import AgentHost +from framework.loader import AgentLoader +from framework.orchestrator import Orchestrator +from framework.tracker import DecisionTracker __all__ = [ - # Schemas - "Decision", - "Option", - "Outcome", - "DecisionEvaluation", - "Run", - "RunSummary", - "Problem", - # Runtime - "Runtime", - # LLM - "LLMProvider", - "AnthropicProvider", - # Runner - "AgentRunner", - # Testing - "Test", - "TestResult", - "TestSuiteResult", - "TestStorage", - "ApprovalStatus", - "ErrorCategory", - "DebugTool", + "AgentHost", + "AgentLoader", + "AgentLoop", + "DecisionTracker", + "Orchestrator", ] diff --git a/core/framework/agent_loop/__init__.py b/core/framework/agent_loop/__init__.py new file mode 100644 index 00000000..845428b1 --- /dev/null +++ b/core/framework/agent_loop/__init__.py @@ -0,0 +1,32 @@ +"""Agent loop -- the core agent execution primitive.""" + +from framework.agent_loop.conversation import ( # noqa: F401 + ConversationStore, + Message, + NodeConversation, +) + +# Lazy import to avoid circular dependency with graph/event_loop/ +# (graph/event_loop/* imports framework.graph.conversation which is a shim +# pointing here, which would trigger agent_loop.py loading, which imports +# graph/event_loop/* again) + + +def __getattr__(name: str): + if name in ("AgentLoop", "JudgeProtocol", "JudgeVerdict", "LoopConfig", "OutputAccumulator"): + from framework.agent_loop.agent_loop import ( + AgentLoop, + JudgeProtocol, + JudgeVerdict, + LoopConfig, + OutputAccumulator, + ) + _exports = { + "AgentLoop": AgentLoop, + "JudgeProtocol": JudgeProtocol, + "JudgeVerdict": JudgeVerdict, + "LoopConfig": LoopConfig, + "OutputAccumulator": OutputAccumulator, + } + return _exports[name] + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") diff --git a/core/framework/graph/event_loop_node.py b/core/framework/agent_loop/agent_loop.py similarity index 87% rename from core/framework/graph/event_loop_node.py rename to core/framework/agent_loop/agent_loop.py index 61f447af..39f47dd4 100644 --- a/core/framework/graph/event_loop_node.py +++ b/core/framework/agent_loop/agent_loop.py @@ -21,16 +21,16 @@ from collections.abc import Awaitable, Callable from datetime import UTC, datetime from typing import Any -from framework.graph.conversation import ConversationStore, NodeConversation -from framework.graph.event_loop import types as event_loop_types -from framework.graph.event_loop.compaction import ( +from framework.agent_loop.conversation import ConversationStore, NodeConversation +from framework.agent_loop.internals import types as event_loop_types +from framework.agent_loop.internals.compaction import ( build_emergency_summary, build_llm_compaction_prompt, compact, format_messages_for_summary, llm_compact, ) -from framework.graph.event_loop.cursor_persistence import ( +from framework.agent_loop.internals.cursor_persistence import ( RestoredState, check_pause, drain_injection_queue, @@ -38,7 +38,7 @@ from framework.graph.event_loop.cursor_persistence import ( restore, write_cursor, ) -from framework.graph.event_loop.event_publishing import ( +from framework.agent_loop.internals.event_publishing import ( generate_action_plan, log_skip_judge, publish_context_usage, @@ -54,27 +54,24 @@ from framework.graph.event_loop.event_publishing import ( publish_tool_started, run_hooks, ) -from framework.graph.event_loop.judge_pipeline import ( +from framework.agent_loop.internals.judge_pipeline import ( SubagentJudge as SharedSubagentJudge, judge_turn, ) -from framework.graph.event_loop.stall_detector import ( +from framework.agent_loop.internals.stall_detector import ( fingerprint_tool_calls, is_stalled, is_tool_doom_loop, ngram_similarity, ) -from framework.graph.event_loop.subagent_executor import execute_subagent -from framework.graph.event_loop.synthetic_tools import ( +from framework.agent_loop.internals.synthetic_tools import ( build_ask_user_multiple_tool, build_ask_user_tool, - build_delegate_tool, build_escalate_tool, - build_report_to_parent_tool, build_set_output_tool, handle_set_output, ) -from framework.graph.event_loop.tool_result_handler import ( +from framework.agent_loop.internals.tool_result_handler import ( build_json_preview, execute_tool, extract_json_metadata, @@ -82,12 +79,12 @@ from framework.graph.event_loop.tool_result_handler import ( restore_spill_counter, truncate_tool_result, ) -from framework.graph.event_loop.types import ( +from framework.agent_loop.internals.types import ( JudgeProtocol, JudgeVerdict, TriggerEvent, ) -from framework.graph.node import NodeContext, NodeProtocol, NodeResult +from framework.orchestrator.node import NodeContext, NodeProtocol, NodeResult from framework.llm.capabilities import supports_image_tool_results from framework.llm.provider import Tool, ToolResult, ToolUse from framework.llm.stream_events import ( @@ -96,8 +93,8 @@ from framework.llm.stream_events import ( TextDeltaEvent, ToolCallEvent, ) -from framework.runtime.event_bus import EventBus -from framework.runtime.llm_debug_logger import log_llm_turn +from framework.host.event_bus import EventBus +from framework.tracker.llm_debug_logger import log_llm_turn logger = logging.getLogger(__name__) @@ -163,43 +160,9 @@ def _is_context_too_large_error(exc: BaseException) -> bool: # --------------------------------------------------------------------------- -# Escalation receiver (temporary routing target for subagent → user input) # --------------------------------------------------------------------------- -class _EscalationReceiver: - """Temporary receiver registered in node_registry for subagent escalation routing. - - When a subagent calls ``report_to_parent(wait_for_response=True)``, the callback - creates one of these, registers it under a unique escalation ID in the executor's - ``node_registry``, and awaits ``wait()``. The TUI / runner calls - ``inject_input(escalation_id, content)`` which the ``ExecutionStream`` routes here - via ``inject_event()`` — matching the same ``hasattr(node, "inject_event")`` check - used for regular ``EventLoopNode`` instances. - """ - - def __init__(self) -> None: - self._event = asyncio.Event() - self._response: str | None = None - self._awaiting_input = True # So inject_message() can prefer us - - async def inject_event( - self, - content: str, - *, - is_client_input: bool = False, - image_content: list[dict] | None = None, - ) -> None: - """Called by ExecutionStream.inject_input() when the user responds.""" - self._response = content - self._event.set() - - async def wait(self) -> str | None: - """Block until inject_event() delivers the user's response.""" - await self._event.wait() - return self._response - - # --------------------------------------------------------------------------- # Judge protocol (simple 3-action interface for event loop evaluation) # --------------------------------------------------------------------------- @@ -224,7 +187,7 @@ OutputAccumulator = event_loop_types.OutputAccumulator # --------------------------------------------------------------------------- -class EventLoopNode(NodeProtocol): +class AgentLoop(NodeProtocol): """Multi-turn LLM streaming loop with tool execution and judge evaluation. Lifecycle: @@ -284,9 +247,6 @@ class EventLoopNode(NodeProtocol): # Monotonic counter for spillover file naming (web_search_1.txt, etc.) self._spill_counter: int = 0 # Subagent mark_complete: when True, _evaluate returns ACCEPT immediately - self._mark_complete_flag = False - # Counter for subagent instances (1, 2, 3, ...) - self._subagent_instance_counter: dict[str, int] = {} def validate_input(self, ctx: NodeContext) -> list[str]: """Validate hard requirements only. @@ -307,7 +267,7 @@ class EventLoopNode(NodeProtocol): async def execute(self, ctx: NodeContext) -> NodeResult: """Run the event loop.""" logger.debug( - "[EventLoopNode.execute] Starting execution for node=%s, stream=%s", + "[AgentLoop.execute] Starting execution for node=%s, stream=%s", ctx.node_id, ctx.stream_id, ) @@ -320,7 +280,7 @@ class EventLoopNode(NodeProtocol): # Store skill dirs for AS-9 file-read interception in _execute_tool self._skill_dirs: list[str] = ctx.skill_dirs logger.debug( - "[EventLoopNode.execute] node_id=%s, execution_id=%s, max_iterations=%d", + "[AgentLoop.execute] node_id=%s, execution_id=%s, max_iterations=%d", node_id, execution_id, self._config.max_iterations, @@ -402,7 +362,7 @@ class EventLoopNode(NodeProtocol): # execution preamble and node-type preamble. The stored # prompt may be stale after code changes or when runtime- # injected context (e.g. worker identity) has changed. - from framework.graph.prompting import build_system_prompt_for_node_context + from framework.orchestrator.prompting import build_system_prompt_for_node_context _current_prompt = build_system_prompt_for_node_context(ctx) if conversation.system_prompt != _current_prompt: @@ -425,7 +385,7 @@ class EventLoopNode(NodeProtocol): await self._conversation_store.clear() # Fresh conversation: either isolated mode or first node in continuous mode. - from framework.graph.prompting import build_system_prompt_for_node_context + from framework.orchestrator.prompting import build_system_prompt_for_node_context system_prompt = build_system_prompt_for_node_context(ctx) @@ -484,7 +444,7 @@ class EventLoopNode(NodeProtocol): # 2a. Guard: ensure at least one non-system message exists. # A restored conversation may have 0 messages if phase_id filtering # removes them all, or if a prior run stored metadata without messages - # (e.g. subagent that failed before the first LLM call). + # (e.g. node that failed before the first LLM call). if conversation.message_count == 0: initial_message = self._build_initial_message(ctx) if initial_message: @@ -502,37 +462,10 @@ class EventLoopNode(NodeProtocol): tools.append(self._build_ask_user_tool()) if stream_id == "queen": tools.append(self._build_ask_user_multiple_tool()) - # Workers/subagents can escalate blockers to the queen. + # Workers can escalate blockers to the queen. if stream_id not in ("queen", "judge"): tools.append(self._build_escalate_tool()) - # Add delegate_to_sub_agent tool if: - # - Node has sub_agents defined - # - We are NOT in subagent mode (prevents nested delegation) - if not ctx.is_subagent_mode: - sub_agents = getattr(ctx.node_spec, "sub_agents", None) or [] - if sub_agents: - delegate_tool = self._build_delegate_tool(sub_agents, ctx.node_registry) - if delegate_tool: - tools.append(delegate_tool) - logger.info( - "[%s] delegate_to_sub_agent injected (sub_agents=%s)", - node_id, - sub_agents, - ) - else: - logger.error( - "[%s] _build_delegate_tool returned None for sub_agents=%s", - node_id, - sub_agents, - ) - else: - logger.debug("[%s] Skipped delegate tool (is_subagent_mode=True)", node_id) - - # Add report_to_parent tool for sub-agents with a report callback - if ctx.is_subagent_mode and ctx.report_callback is not None: - tools.append(self._build_report_to_parent_tool()) - logger.info( "[%s] Tools available (%d): %s | direct_user_io=%s | judge=%s", node_id, @@ -565,11 +498,11 @@ class EventLoopNode(NodeProtocol): # 6. Main loop logger.debug( - "[EventLoopNode.execute] Entering main loop, start_iteration=%d", start_iteration + "[AgentLoop.execute] Entering main loop, start_iteration=%d", start_iteration ) for iteration in range(start_iteration, self._config.max_iterations): iter_start = time.time() - logger.debug("[EventLoopNode.execute] iteration=%d starting", iteration) + logger.debug("[AgentLoop.execute] iteration=%d starting", iteration) # 6a. Check pause (no current-iteration data yet — only log_node_complete needed) if await self._check_pause(ctx, conversation, iteration): @@ -601,18 +534,18 @@ class EventLoopNode(NodeProtocol): # 6b. Drain injection queue logger.debug( - "[EventLoopNode.execute] iteration=%d: draining injection queue...", iteration + "[AgentLoop.execute] iteration=%d: draining injection queue...", iteration ) drained_injections = await self._drain_injection_queue(conversation, ctx) logger.debug( - "[EventLoopNode.execute] iteration=%d: drained %d injections", + "[AgentLoop.execute] iteration=%d: drained %d injections", iteration, drained_injections, ) # 6b1. Drain trigger queue (framework-level signals) drained_triggers = await self._drain_trigger_queue(conversation) logger.debug( - "[EventLoopNode.execute] iteration=%d: drained %d triggers", + "[AgentLoop.execute] iteration=%d: drained %d triggers", iteration, drained_triggers, ) @@ -685,8 +618,6 @@ class EventLoopNode(NodeProtocol): "ask_user", "ask_user_multiple", "escalate", - "delegate_to_sub_agent", - "report_to_parent", } synthetic = [t for t in tools if t.name in _synthetic_names] tools.clear() @@ -696,11 +627,11 @@ class EventLoopNode(NodeProtocol): # 6b3. Dynamic prompt refresh (phase switching / memory refresh) if ctx.dynamic_prompt_provider is not None or ctx.dynamic_memory_provider is not None: if ctx.dynamic_prompt_provider is not None: - from framework.graph.prompting import stamp_prompt_datetime + from framework.orchestrator.prompting import stamp_prompt_datetime _new_prompt = stamp_prompt_datetime(ctx.dynamic_prompt_provider()) else: - from framework.graph.prompting import build_system_prompt_for_node_context + from framework.orchestrator.prompting import build_system_prompt_for_node_context _new_prompt = build_system_prompt_for_node_context(ctx) if _new_prompt != conversation.system_prompt: @@ -743,7 +674,7 @@ class EventLoopNode(NodeProtocol): len(conversation.messages), ) logger.debug( - "[EventLoopNode.execute] iteration=%d: entering _run_single_turn loop", iteration + "[AgentLoop.execute] iteration=%d: entering _run_single_turn loop", iteration ) _stream_retry_count = 0 _turn_cancelled = False @@ -752,7 +683,7 @@ class EventLoopNode(NodeProtocol): while True: try: logger.debug( - "[EventLoopNode.execute] iteration=%d: calling _run_single_turn (retry=%d)", + "[AgentLoop.execute] iteration=%d: calling _run_single_turn (retry=%d)", iteration, _stream_retry_count, ) @@ -768,12 +699,12 @@ class EventLoopNode(NodeProtocol): queen_input_requested, request_system_prompt, request_messages, - reported_to_parent, + _, ) = await self._run_single_turn( ctx, conversation, tools, iteration, accumulator ) logger.debug( - "[EventLoopNode.execute] iteration=%d:" + "[AgentLoop.execute] iteration=%d:" " _run_single_turn completed successfully", iteration, ) @@ -842,13 +773,13 @@ class EventLoopNode(NodeProtocol): break # success — exit retry loop except TurnCancelled: - logger.debug("[EventLoopNode.execute] iteration=%d: TurnCancelled", iteration) + logger.debug("[AgentLoop.execute] iteration=%d: TurnCancelled", iteration) _turn_cancelled = True break except Exception as e: logger.debug( - "[EventLoopNode.execute] iteration=%d:" + "[AgentLoop.execute] iteration=%d:" " Exception in _run_single_turn: %s (%s)", iteration, type(e).__name__, @@ -1024,7 +955,7 @@ class EventLoopNode(NodeProtocol): and not outputs_set and not user_input_requested and not queen_input_requested - and not reported_to_parent + ) if truly_empty and accumulator is not None: missing = self._get_missing_output_keys( @@ -1276,14 +1207,14 @@ class EventLoopNode(NodeProtocol): # blocking and resumption. _is_worker = ( stream_id not in ("queen", "judge") - and not ctx.is_subagent_mode + and not False and not ctx.supports_direct_user_io and self._event_bus is not None ) _worker_no_tool_turn = ( not real_tool_results and not outputs_set - and not reported_to_parent + and not queen_input_requested and not user_input_requested ) @@ -1733,7 +1664,7 @@ class EventLoopNode(NodeProtocol): # 6i. Judge evaluation should_judge = ( - ctx.is_subagent_mode # Always evaluate subagents + False or (iteration + 1) % self._config.judge_every_n_turns == 0 or not real_tool_results # no real tool calls = natural stop ) @@ -1789,7 +1720,7 @@ class EventLoopNode(NodeProtocol): missing = self._get_missing_output_keys( accumulator, ctx.node_spec.output_keys, ctx.node_spec.nullable_output_keys ) - if missing and self._judge is not None and not self._mark_complete_flag: + if missing and self._judge is not None : hint = ( f"Task incomplete. Required outputs not yet produced: {missing}. " f"Follow your system prompt instructions to complete the work." @@ -1988,7 +1919,7 @@ class EventLoopNode(NodeProtocol): image_content: Optional list of OpenAI-style image blocks to attach. """ logger.debug( - "[EventLoopNode.inject_event] content_len=%d," + "[AgentLoop.inject_event] content_len=%d," " is_client_input=%s, has_images=%s," " queue_size_before=%d", len(content) if content else 0, @@ -1998,15 +1929,15 @@ class EventLoopNode(NodeProtocol): ) try: await self._injection_queue.put((content, is_client_input, image_content)) - logger.debug("[EventLoopNode.inject_event] Message queued successfully") + logger.debug("[AgentLoop.inject_event] Message queued successfully") except Exception as e: - logger.exception("[EventLoopNode.inject_event] Failed to queue message: %s", e) + logger.exception("[AgentLoop.inject_event] Failed to queue message: %s", e) raise try: self._input_ready.set() - logger.debug("[EventLoopNode.inject_event] _input_ready.set() called") + logger.debug("[AgentLoop.inject_event] _input_ready.set() called") except Exception as e: - logger.exception("[EventLoopNode.inject_event] Failed to set _input_ready: %s", e) + logger.exception("[AgentLoop.inject_event] Failed to set _input_ready: %s", e) raise async def inject_trigger(self, trigger: TriggerEvent) -> None: @@ -2157,7 +2088,6 @@ class EventLoopNode(NodeProtocol): ask_user_prompt = "" ask_user_options: list[str] | None = None queen_input_requested = False - reported_to_parent = False # Accumulate ALL tool calls across inner iterations for L3 logging. # Unlike real_tool_results (reset each inner iteration), this persists. logged_tool_calls: list[dict] = [] @@ -2231,16 +2161,28 @@ class EventLoopNode(NodeProtocol): ): if isinstance(event, TextDeltaEvent): accumulated_text = event.snapshot - await self._publish_text_delta( - stream_id, - node_id, - event.content, - event.snapshot, - ctx, - execution_id, - iteration=iteration, - inner_turn=inner_turn, - ) + # Filter ... blocks from client output. + # Content inside think tags is internal reasoning -- only + # the text after is shown to the user. + _content = event.content + if "" in event.snapshot and "" not in event.snapshot: + _content = "" # still inside think block + elif "" in _content: + # End of think block -- emit only text after the tag + _content = _content.split("", 1)[-1] + elif "" in _content: + _content = "" # opening tag in this chunk + if _content: + await self._publish_text_delta( + stream_id, + node_id, + _content, + event.snapshot, + ctx, + execution_id, + iteration=iteration, + inner_turn=inner_turn, + ) elif isinstance(event, ToolCallEvent): _tc.append(event) @@ -2348,10 +2290,27 @@ class EventLoopNode(NodeProtocol): queen_input_requested, final_system_prompt, final_messages, - reported_to_parent, + False, ) - # Execute tool calls — framework tools (set_output, ask_user) + # Priority drain: if user sent a message while the LLM was + # streaming, inject it into the conversation NOW -- before tool + # execution. The LLM will see it on the next inner turn. + if not self._injection_queue.empty(): + while not self._injection_queue.empty(): + _inj_content, _inj_client, _inj_images = ( + self._injection_queue.get_nowait() + ) + if _inj_client: + await conversation.add_user_message(_inj_content) + logger.info( + "[%s] Priority-injected user message mid-turn (%d chars)", + node_id, len(_inj_content), + ) + else: + await conversation.add_user_message(_inj_content) + + # Execute tool calls -- framework tools (set_output, ask_user) # run inline; real MCP tools run in parallel. real_tool_results: list[dict] = [] limit_hit = False @@ -2361,13 +2320,12 @@ class EventLoopNode(NodeProtocol): ) # Phase 1: triage — handle framework tools immediately, - # queue real tools and subagents for parallel execution. + # queue real tools for parallel execution. results_by_id: dict[str, ToolResult] = {} timing_by_id: dict[ str, dict[str, Any] ] = {} # tool_use_id -> {start_timestamp, duration_s} pending_real: list[ToolCallEvent] = [] - pending_subagent: list[ToolCallEvent] = [] for tc in tool_calls: tool_call_count += 1 @@ -2610,76 +2568,6 @@ class EventLoopNode(NodeProtocol): ) results_by_id[tc.tool_use_id] = result - elif tc.tool_name == "delegate_to_sub_agent": - # Guard: in continuous mode the LLM may see delegate - # calls from a previous node's conversation history and - # attempt to re-use the tool on a node that doesn't own - # it. Only accept if the tool was actually offered. - if not any(t.name == "delegate_to_sub_agent" for t in tools): - logger.warning( - "[%s] LLM called delegate_to_sub_agent but tool " - "was not offered to this node — rejecting", - node_id, - ) - result = ToolResult( - tool_use_id=tc.tool_use_id, - content=( - "ERROR: delegate_to_sub_agent is not available " - "on this node. This tool belongs to a different " - "node in the workflow." - ), - is_error=True, - ) - results_by_id[tc.tool_use_id] = result - continue - # --- Framework-level subagent delegation --- - # Queue for parallel execution in Phase 2 - logger.info( - "🔄 LLM requesting subagent delegation: agent_id='%s', task='%s'", - tc.tool_input.get("agent_id", "?"), - (tc.tool_input.get("task", "")[:100] + "...") - if len(tc.tool_input.get("task", "")) > 100 - else tc.tool_input.get("task", ""), - ) - pending_subagent.append(tc) - - elif tc.tool_name == "report_to_parent": - # --- Report from sub-agent to parent (optionally blocking) --- - reported_to_parent = True - msg = tc.tool_input.get("message", "") - data = tc.tool_input.get("data") - wait = tc.tool_input.get("wait_for_response", False) - mark_complete = tc.tool_input.get("mark_complete", False) - response = None - - if ctx.report_callback: - try: - response = await ctx.report_callback( - msg, - data, - wait_for_response=wait, - ) - except Exception: - logger.warning( - "[%s] report_to_parent callback failed (swallowed)", - node_id, - exc_info=True, - ) - - if mark_complete: - self._mark_complete_flag = True - logger.info( - "[%s] mark_complete=True — subagent will accept on this iteration", - node_id, - ) - - result = ToolResult( - tool_use_id=tc.tool_use_id, - content=response if (wait and response) else "Report sent to parent.", - is_error=False, - ) - results_by_id[tc.tool_use_id] = result - else: # --- Real tool: check for truncated args, else queue --- if "_raw" in tc.tool_input: @@ -2754,175 +2642,6 @@ class EventLoopNode(NodeProtocol): result = raw results_by_id[tc.tool_use_id] = self._truncate_tool_result(result, tc.tool_name) - # Phase 2b: execute subagent delegations in parallel. - if pending_subagent: - _subagent_timeout = self._config.subagent_timeout_seconds - _inactivity_timeout = self._config.subagent_inactivity_timeout_seconds - - async def _timed_subagent( - _ctx: NodeContext, - _tc: ToolCallEvent, - _acc: OutputAccumulator = accumulator, - _wall_timeout: float = _subagent_timeout, - _activity_timeout: float = _inactivity_timeout, - ) -> tuple[ToolResult | BaseException, str, float]: - _s = time.time() - _iso = datetime.now(UTC).isoformat() - _last_activity = _s - _activity_event = asyncio.Event() - - async def _watchdog() -> None: - """Watchdog that times out only after inactivity period.""" - nonlocal _last_activity - while True: - _now = time.time() - _inactive_for = _now - _last_activity - _remaining = _activity_timeout - _inactive_for - - if _remaining <= 0: - # Inactivity timeout reached - return - - try: - await asyncio.wait_for(_activity_event.wait(), timeout=_remaining) - _activity_event.clear() - except TimeoutError: - # Check again in case activity happened during wait - continue - - async def _run_with_activity_timeout( - _coro, - ) -> ToolResult: - """Run subagent with activity-based timeout.""" - _watchdog_task = asyncio.create_task(_watchdog()) - try: - _result = await _coro - return _result - finally: - _watchdog_task.cancel() - try: - await _watchdog_task - except asyncio.CancelledError: - pass - - try: - # Subscribe to subagent activity events to reset inactivity timer - async def _on_subagent_activity(event) -> None: - nonlocal _last_activity - _last_activity = time.time() - _activity_event.set() - - _sub_id = None - if self._event_bus and _activity_timeout > 0: - from framework.runtime.event_bus import EventType - - _sub_id = self._event_bus.subscribe( - event_types=[ - EventType.TOOL_CALL_STARTED, - EventType.LLM_TEXT_DELTA, - EventType.EXECUTION_STARTED, - ], - handler=_on_subagent_activity, - ) - - try: - _coro = self._execute_subagent( - _ctx, - _tc.tool_input.get("agent_id", ""), - _tc.tool_input.get("task", ""), - accumulator=_acc, - ) - - if _activity_timeout > 0: - # Use activity-based timeout with wall-clock max - _result_coro = _run_with_activity_timeout(_coro) - if _wall_timeout > 0: - _r = await asyncio.wait_for(_result_coro, timeout=_wall_timeout) - else: - _r = await _result_coro - elif _wall_timeout > 0: - _r = await asyncio.wait_for(_coro, timeout=_wall_timeout) - else: - _r = await _coro - finally: - if _sub_id and self._event_bus: - self._event_bus.unsubscribe(_sub_id) - - except TimeoutError: - _agent_id = _tc.tool_input.get("agent_id", "unknown") - _elapsed = time.time() - _s - logger.warning( - "Subagent '%s' timed out after %.0fs (inactivity threshold: %.0fs)", - _agent_id, - _elapsed, - _activity_timeout if _activity_timeout > 0 else _wall_timeout, - ) - _r = ToolResult( - tool_use_id=_tc.tool_use_id, - content=( - f"Subagent '{_agent_id}' timed out after " - f"{_elapsed:.0f}s of inactivity. " - "The subagent was not making progress. " - "Try a simpler task or break it into smaller pieces." - ), - is_error=True, - ) - except BaseException as _exc: - _r = _exc - _dur = round(time.time() - _s, 3) - return _r, _iso, _dur - - subagent_timed = await asyncio.gather( - *(_timed_subagent(ctx, tc) for tc in pending_subagent), - return_exceptions=True, - ) - for tc, entry in zip(pending_subagent, subagent_timed, strict=True): - if isinstance(entry, BaseException): - raw = entry - _start_iso = datetime.now(UTC).isoformat() - _dur_s = 0 - else: - raw, _start_iso, _dur_s = entry - _sa_timing = { - "start_timestamp": _start_iso, - "duration_s": _dur_s, - } - if isinstance(raw, BaseException): - result = ToolResult( - tool_use_id=tc.tool_use_id, - content=json.dumps( - { - "message": f"Sub-agent execution raised: {raw}", - "data": None, - "metadata": {"success": False, "error": str(raw)}, - } - ), - is_error=True, - ) - else: - # Attach the tool_use_id to the result - result = ToolResult( - tool_use_id=tc.tool_use_id, - content=raw.content, - is_error=raw.is_error, - ) - # Route through _truncate_tool_result so large - # subagent results are saved to spillover files - # and survive pruning (instead of being "cleared - # from context" with no recovery path). - result = self._truncate_tool_result(result, "delegate_to_sub_agent") - results_by_id[tc.tool_use_id] = result - logged_tool_calls.append( - { - "tool_use_id": tc.tool_use_id, - "tool_name": "delegate_to_sub_agent", - "tool_input": tc.tool_input, - "content": result.content, - "is_error": result.is_error, - **_sa_timing, - } - ) - # Phase 3: record results into conversation in original order, # build logged/real lists, and publish completed events. for tc in tool_calls[:executed_in_batch]: @@ -2936,8 +2655,6 @@ class EventLoopNode(NodeProtocol): "ask_user", "ask_user_multiple", "escalate", - "delegate_to_sub_agent", - "report_to_parent", ): tool_entry = { "tool_use_id": tc.tool_use_id, @@ -3056,7 +2773,7 @@ class EventLoopNode(NodeProtocol): queen_input_requested, final_system_prompt, final_messages, - reported_to_parent, + False, ) # --- Mid-turn pruning: prevent context blowup within a single turn --- @@ -3090,7 +2807,7 @@ class EventLoopNode(NodeProtocol): queen_input_requested, final_system_prompt, final_messages, - reported_to_parent, + False, ) # Tool calls processed -- loop back to stream with updated conversation @@ -3118,16 +2835,6 @@ class EventLoopNode(NodeProtocol): """Build the synthetic escalate tool. Delegates to synthetic_tools module.""" return build_escalate_tool() - def _build_delegate_tool( - self, sub_agents: list[str], node_registry: dict[str, Any] - ) -> Tool | None: - """Build the synthetic delegate_to_sub_agent tool. Delegates to synthetic_tools module.""" - return build_delegate_tool(sub_agents, node_registry) - - def _build_report_to_parent_tool(self) -> Tool: - """Build the synthetic report_to_parent tool. Delegates to synthetic_tools module.""" - return build_report_to_parent_tool() - def _handle_set_output( self, tool_input: dict[str, Any], @@ -3151,7 +2858,7 @@ class EventLoopNode(NodeProtocol): ) -> JudgeVerdict: """Evaluate the current state. Delegates to judge_pipeline module.""" return await judge_turn( - mark_complete_flag=self._mark_complete_flag, + mark_complete_flag=False, judge=self._judge, ctx=ctx, conversation=conversation, @@ -3176,7 +2883,7 @@ class EventLoopNode(NodeProtocol): Delegates to :func:`extract_tool_call_history` in conversation.py. """ - from framework.graph.conversation import extract_tool_call_history + from framework.agent_loop.conversation import extract_tool_call_history return extract_tool_call_history(conversation.messages, max_entries=max_entries) @@ -3781,46 +3488,3 @@ class EventLoopNode(NodeProtocol): # Subagent Execution # ------------------------------------------------------------------- - async def _execute_subagent( - self, - ctx: NodeContext, - agent_id: str, - task: str, - *, - accumulator: OutputAccumulator | None = None, - ) -> ToolResult: - """Execute a subagent and return the result as a ToolResult. - - The subagent: - - Gets a fresh conversation with just the task - - Has read-only access to the parent's readable data buffer - - Cannot delegate to its own subagents (prevents recursion) - - Returns its output in structured JSON format - - Args: - ctx: Parent node's context (for data buffer, tools, LLM access). - agent_id: The node ID of the subagent to invoke. - task: The task description to give the subagent. - accumulator: Parent's OutputAccumulator — provides outputs that - have been set via ``set_output`` but not yet written to - data buffer (which only happens after the node completes). - - Returns: - ToolResult with structured JSON output containing: - - message: Human-readable summary - - data: Subagent's output (free-form JSON) - - metadata: Execution metadata (success, tokens, latency) - """ - return await execute_subagent( - ctx=ctx, - agent_id=agent_id, - task=task, - accumulator=accumulator, - event_bus=self._event_bus, - config=self._config, - tool_executor=self._tool_executor, - conversation_store=self._conversation_store, - subagent_instance_counter=self._subagent_instance_counter, - event_loop_node_cls=type(self), - escalation_receiver_cls=_EscalationReceiver, - ) diff --git a/core/framework/graph/conversation.py b/core/framework/agent_loop/conversation.py similarity index 99% rename from core/framework/graph/conversation.py rename to core/framework/agent_loop/conversation.py index 40b720fc..dcd9da70 100644 --- a/core/framework/graph/conversation.py +++ b/core/framework/agent_loop/conversation.py @@ -324,7 +324,7 @@ def _try_extract_key(content: str, key: str) -> str | None: 3. Colon format: ``key: value``. 4. Equals format: ``key = value``. """ - from framework.graph.node import find_json_object + from framework.orchestrator.node import find_json_object # 1. Whole message is JSON try: diff --git a/core/framework/agent_loop/internals/__init__.py b/core/framework/agent_loop/internals/__init__.py new file mode 100644 index 00000000..45601913 --- /dev/null +++ b/core/framework/agent_loop/internals/__init__.py @@ -0,0 +1,7 @@ +"""Agent loop internals -- compaction, judge, tools, subagent execution. + +Re-exports from legacy locations for the new import path. +""" + +from framework.agent_loop.internals.compaction import * # noqa: F401, F403 +from framework.agent_loop.internals.synthetic_tools import * # noqa: F401, F403 diff --git a/core/framework/graph/event_loop/compaction.py b/core/framework/agent_loop/internals/compaction.py similarity index 97% rename from core/framework/graph/event_loop/compaction.py rename to core/framework/agent_loop/internals/compaction.py index a22da05a..1b54be9a 100644 --- a/core/framework/graph/event_loop/compaction.py +++ b/core/framework/agent_loop/internals/compaction.py @@ -19,11 +19,11 @@ from datetime import UTC, datetime from pathlib import Path from typing import Any -from framework.graph.conversation import Message, NodeConversation -from framework.graph.event_loop.event_publishing import publish_context_usage -from framework.graph.event_loop.types import LoopConfig, OutputAccumulator -from framework.graph.node import NodeContext -from framework.runtime.event_bus import EventBus +from framework.agent_loop.conversation import Message, NodeConversation +from framework.agent_loop.internals.event_publishing import publish_context_usage +from framework.agent_loop.internals.types import LoopConfig, OutputAccumulator +from framework.orchestrator.node import NodeContext +from framework.host.event_bus import EventBus logger = logging.getLogger(__name__) @@ -368,8 +368,8 @@ async def llm_compact( in half and each half is summarised independently. Tool history is appended once at the top-level call (``_depth == 0``). """ - from framework.graph.conversation import extract_tool_call_history - from framework.graph.event_loop.tool_result_handler import is_context_too_large_error + from framework.agent_loop.conversation import extract_tool_call_history + from framework.agent_loop.internals.tool_result_handler import is_context_too_large_error if _depth > max_depth: raise RuntimeError(f"LLM compaction recursion limit ({max_depth})") @@ -724,7 +724,7 @@ async def log_compaction( ) if event_bus: - from framework.runtime.event_bus import AgentEvent, EventType + from framework.host.event_bus import AgentEvent, EventType event_data: dict[str, Any] = { "level": level, @@ -861,6 +861,6 @@ def _extract_tool_call_history(conversation: NodeConversation) -> str: directly (vs. the module-level extract_tool_call_history in conversation.py which works on raw message lists). """ - from framework.graph.conversation import extract_tool_call_history + from framework.agent_loop.conversation import extract_tool_call_history return extract_tool_call_history(list(conversation.messages)) diff --git a/core/framework/graph/event_loop/cursor_persistence.py b/core/framework/agent_loop/internals/cursor_persistence.py similarity index 97% rename from core/framework/graph/event_loop/cursor_persistence.py rename to core/framework/agent_loop/internals/cursor_persistence.py index 65f7d5fc..627fee8c 100644 --- a/core/framework/graph/event_loop/cursor_persistence.py +++ b/core/framework/agent_loop/internals/cursor_persistence.py @@ -14,9 +14,9 @@ from collections.abc import Awaitable, Callable from dataclasses import dataclass from typing import Any -from framework.graph.conversation import ConversationStore, NodeConversation -from framework.graph.event_loop.types import LoopConfig, OutputAccumulator, TriggerEvent -from framework.graph.node import NodeContext +from framework.agent_loop.conversation import ConversationStore, NodeConversation +from framework.agent_loop.internals.types import LoopConfig, OutputAccumulator, TriggerEvent +from framework.orchestrator.node import NodeContext from framework.llm.capabilities import supports_image_tool_results logger = logging.getLogger(__name__) diff --git a/core/framework/graph/event_loop/event_publishing.py b/core/framework/agent_loop/internals/event_publishing.py similarity index 97% rename from core/framework/graph/event_loop/event_publishing.py rename to core/framework/agent_loop/internals/event_publishing.py index 85846620..69e487ab 100644 --- a/core/framework/graph/event_loop/event_publishing.py +++ b/core/framework/agent_loop/internals/event_publishing.py @@ -9,10 +9,10 @@ from __future__ import annotations import logging import time -from framework.graph.conversation import NodeConversation -from framework.graph.event_loop.types import HookContext -from framework.graph.node import NodeContext -from framework.runtime.event_bus import EventBus +from framework.agent_loop.conversation import NodeConversation +from framework.agent_loop.internals.types import HookContext +from framework.orchestrator.node import NodeContext +from framework.host.event_bus import EventBus logger = logging.getLogger(__name__) @@ -177,7 +177,7 @@ async def publish_context_usage( if not event_bus: return - from framework.runtime.event_bus import AgentEvent, EventType + from framework.host.event_bus import AgentEvent, EventType estimated = conversation.estimate_tokens() max_tokens = conversation._max_context_tokens diff --git a/core/framework/graph/event_loop/judge_pipeline.py b/core/framework/agent_loop/internals/judge_pipeline.py similarity index 95% rename from core/framework/graph/event_loop/judge_pipeline.py rename to core/framework/agent_loop/internals/judge_pipeline.py index 281d3991..2bebe4a7 100644 --- a/core/framework/graph/event_loop/judge_pipeline.py +++ b/core/framework/agent_loop/internals/judge_pipeline.py @@ -5,9 +5,9 @@ from __future__ import annotations import logging from collections.abc import Callable -from framework.graph.conversation import NodeConversation -from framework.graph.event_loop.types import JudgeProtocol, JudgeVerdict, OutputAccumulator -from framework.graph.node import NodeContext +from framework.agent_loop.conversation import NodeConversation +from framework.agent_loop.internals.types import JudgeProtocol, JudgeVerdict, OutputAccumulator +from framework.orchestrator.node import NodeContext logger = logging.getLogger(__name__) @@ -155,7 +155,7 @@ async def judge_turn( # Level 2b: conversation-aware quality check (if success_criteria set) if ctx.node_spec.success_criteria and ctx.llm: - from framework.graph.conversation_judge import evaluate_phase_completion + from framework.orchestrator.conversation_judge import evaluate_phase_completion verdict = await evaluate_phase_completion( llm=ctx.llm, diff --git a/core/framework/graph/event_loop/stall_detector.py b/core/framework/agent_loop/internals/stall_detector.py similarity index 100% rename from core/framework/graph/event_loop/stall_detector.py rename to core/framework/agent_loop/internals/stall_detector.py diff --git a/core/framework/graph/event_loop/synthetic_tools.py b/core/framework/agent_loop/internals/synthetic_tools.py similarity index 69% rename from core/framework/graph/event_loop/synthetic_tools.py rename to core/framework/agent_loop/internals/synthetic_tools.py index fa9d19d5..5a5bf3c3 100644 --- a/core/framework/graph/event_loop/synthetic_tools.py +++ b/core/framework/agent_loop/internals/synthetic_tools.py @@ -204,118 +204,6 @@ def build_escalate_tool() -> Tool: }, ) - -def build_delegate_tool(sub_agents: list[str], node_registry: dict[str, Any]) -> Tool | None: - """Build the synthetic delegate_to_sub_agent tool for subagent invocation. - - Args: - sub_agents: List of node IDs that can be invoked as subagents. - node_registry: Map of node_id -> NodeSpec for looking up subagent descriptions. - - Returns: - Tool definition if sub_agents is non-empty, None otherwise. - """ - if not sub_agents: - return None - - agent_descriptions = [] - for agent_id in sub_agents: - spec = node_registry.get(agent_id) - if spec: - desc = getattr(spec, "description", "(no description)") - agent_descriptions.append(f"- {agent_id}: {desc}") - else: - agent_descriptions.append(f"- {agent_id}: (not found in registry)") - - return Tool( - name="delegate_to_sub_agent", - description=( - "Delegate a task to a specialized sub-agent. The sub-agent runs " - "autonomously with read-only access to current memory and returns " - "its result. Use this to parallelize work or leverage specialized capabilities.\n\n" - "Available sub-agents:\n" + "\n".join(agent_descriptions) - ), - parameters={ - "type": "object", - "properties": { - "agent_id": { - "type": "string", - "description": f"The sub-agent to invoke. Must be one of: {sub_agents}", - "enum": sub_agents, - }, - "task": { - "type": "string", - "description": ( - "The task description for the sub-agent to execute. " - "Be specific about what you want the sub-agent to do and " - "what information to return." - ), - }, - }, - "required": ["agent_id", "task"], - }, - ) - - -def build_report_to_parent_tool() -> Tool: - """Build the synthetic report_to_parent tool for sub-agent progress reports. - - Sub-agents call this to send one-way progress updates, partial findings, - or status reports to the parent node (and external observers via event bus) - without blocking execution. - - When ``wait_for_response`` is True, the sub-agent blocks until the parent - relays the user's response — used for escalation (e.g. login pages, CAPTCHAs). - - When ``mark_complete`` is True, the sub-agent terminates immediately after - sending the report — no need to call set_output for each output key. - """ - return Tool( - name="report_to_parent", - description=( - "Send a report to the parent agent. By default this is fire-and-forget: " - "the parent receives the report but does not respond. " - "Set wait_for_response=true to BLOCK until the user replies — use this " - "when you need human intervention (e.g. login pages, CAPTCHAs, " - "authentication walls). The user's response is returned as the tool result. " - "Set mark_complete=true to finish your task and terminate immediately " - "after sending the report — use this when your findings are in the " - "message/data fields and you don't need to call set_output." - ), - parameters={ - "type": "object", - "properties": { - "message": { - "type": "string", - "description": "A human-readable status or progress message.", - }, - "data": { - "type": "object", - "description": "Optional structured data to include with the report.", - }, - "wait_for_response": { - "type": "boolean", - "description": ( - "If true, block execution until the user responds. " - "Use for escalation scenarios requiring human intervention." - ), - "default": False, - }, - "mark_complete": { - "type": "boolean", - "description": ( - "If true, terminate the sub-agent immediately after sending " - "this report. The report message and data are delivered to the " - "parent as the final result. No set_output calls are needed." - ), - "default": False, - }, - }, - "required": ["message"], - }, - ) - - def handle_set_output( tool_input: dict[str, Any], output_keys: list[str] | None, diff --git a/core/framework/graph/event_loop/tool_result_handler.py b/core/framework/agent_loop/internals/tool_result_handler.py similarity index 100% rename from core/framework/graph/event_loop/tool_result_handler.py rename to core/framework/agent_loop/internals/tool_result_handler.py diff --git a/core/framework/graph/event_loop/types.py b/core/framework/agent_loop/internals/types.py similarity index 98% rename from core/framework/graph/event_loop/types.py rename to core/framework/agent_loop/internals/types.py index 69357fce..3a100f65 100644 --- a/core/framework/graph/event_loop/types.py +++ b/core/framework/agent_loop/internals/types.py @@ -9,7 +9,7 @@ from dataclasses import dataclass, field from pathlib import Path from typing import Any, Literal, Protocol, runtime_checkable -from framework.graph.conversation import ( +from framework.agent_loop.conversation import ( ConversationStore, ) @@ -68,7 +68,7 @@ class LoopConfig: max_output_value_chars: int = 2_000 # Stream retry. - max_stream_retries: int = 3 + max_stream_retries: int = 5 stream_retry_backoff_base: float = 2.0 stream_retry_max_delay: float = 60.0 diff --git a/core/framework/agents/__init__.py b/core/framework/agents/__init__.py index 561d96a0..46c0a5f8 100644 --- a/core/framework/agents/__init__.py +++ b/core/framework/agents/__init__.py @@ -8,6 +8,14 @@ FRAMEWORK_AGENTS_DIR = Path(__file__).parent def list_framework_agents() -> list[Path]: """List all framework agent directories.""" return sorted( - [p for p in FRAMEWORK_AGENTS_DIR.iterdir() if p.is_dir() and (p / "agent.py").exists()], + [ + p + for p in FRAMEWORK_AGENTS_DIR.iterdir() + if p.is_dir() + and ( + (p / "agent.json").exists() + or (p / "agent.py").exists() + ) + ], key=lambda p: p.name, ) diff --git a/core/framework/agents/credential_tester/agent.py b/core/framework/agents/credential_tester/agent.py index c78823f2..32336a72 100644 --- a/core/framework/agents/credential_tester/agent.py +++ b/core/framework/agents/credential_tester/agent.py @@ -21,15 +21,15 @@ from pathlib import Path from typing import TYPE_CHECKING from framework.config import get_max_context_tokens -from framework.graph import Goal, NodeSpec, SuccessCriterion -from framework.graph.checkpoint_config import CheckpointConfig -from framework.graph.edge import GraphSpec -from framework.graph.executor import ExecutionResult +from framework.orchestrator import Goal, NodeSpec, SuccessCriterion +from framework.orchestrator.checkpoint_config import CheckpointConfig +from framework.orchestrator.edge import GraphSpec +from framework.orchestrator.orchestrator import ExecutionResult from framework.llm import LiteLLMProvider -from framework.runner.mcp_registry import MCPRegistry -from framework.runner.tool_registry import ToolRegistry -from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime -from framework.runtime.execution_stream import EntryPointSpec +from framework.loader.mcp_registry import MCPRegistry +from framework.loader.tool_registry import ToolRegistry +from framework.host.agent_host import AgentHost +from framework.host.execution_manager import EntryPointSpec from .config import default_config from .nodes import build_tester_node @@ -37,7 +37,7 @@ from .nodes import build_tester_node logger = logging.getLogger(__name__) if TYPE_CHECKING: - from framework.runner import AgentRunner + from framework.loader import AgentLoader logger = logging.getLogger(__name__) @@ -233,7 +233,7 @@ requires_account_selection = True """Signal TUI to show account picker before starting the agent.""" -def configure_for_account(runner: AgentRunner, account: dict) -> None: +def configure_for_account(runner: AgentLoader, account: dict) -> None: """Scope the tester node's tools to the selected provider. Handles both Aden accounts (account= routing) and local accounts @@ -325,7 +325,7 @@ def _activate_local_account(credential_id: str, alias: str) -> None: def _configure_aden_node( - runner: AgentRunner, + runner: AgentLoader, provider: str, alias: str, detail: str, @@ -368,7 +368,7 @@ or any other identifier — always use the alias exactly as shown. def _configure_local_node( - runner: AgentRunner, + runner: AgentLoader, provider: str, alias: str, identity: dict, @@ -497,7 +497,7 @@ class CredentialTesterAgent: def __init__(self, config=None): self.config = config or default_config self._selected_account: dict | None = None - self._agent_runtime: AgentRuntime | None = None + self._agent_runtime: AgentHost | None = None self._tool_registry: ToolRegistry | None = None self._storage_path: Path | None = None @@ -613,7 +613,7 @@ class CredentialTesterAgent: graph = self._build_graph() - self._agent_runtime = create_agent_runtime( + self._agent_runtime = AgentHost( graph=graph, goal=goal, storage_path=self._storage_path, diff --git a/core/framework/agents/credential_tester/nodes/__init__.py b/core/framework/agents/credential_tester/nodes/__init__.py index 31b1ac7e..682ca08e 100644 --- a/core/framework/agents/credential_tester/nodes/__init__.py +++ b/core/framework/agents/credential_tester/nodes/__init__.py @@ -1,6 +1,6 @@ """Node definitions for Credential Tester agent.""" -from framework.graph import NodeSpec +from framework.orchestrator import NodeSpec def build_tester_node( diff --git a/core/framework/agents/discovery.py b/core/framework/agents/discovery.py index 2cbf712b..8f978636 100644 --- a/core/framework/agents/discovery.py +++ b/core/framework/agents/discovery.py @@ -27,8 +27,8 @@ def _get_last_active(agent_path: Path) -> str | None: """Return the most recent updated_at timestamp across all sessions. Checks both worker sessions (``~/.hive/agents/{name}/sessions/``) and - queen sessions (``~/.hive/queen/session/``) whose ``meta.json`` references - the same *agent_path*. + queen sessions (``~/.hive/agents/queens/default/sessions/``) whose + ``meta.json`` references the same *agent_path*. """ from datetime import datetime @@ -53,7 +53,9 @@ def _get_last_active(agent_path: Path) -> str | None: continue # 2. Queen sessions - queen_sessions_dir = Path.home() / ".hive" / "queen" / "session" + from framework.config import QUEENS_DIR + + queen_sessions_dir = QUEENS_DIR / "default" / "sessions" if queen_sessions_dir.exists(): resolved = agent_path.resolve() for d in queen_sessions_dir.iterdir(): @@ -112,13 +114,33 @@ def _count_runs(agent_name: str) -> int: def _extract_agent_stats(agent_path: Path) -> tuple[int, int, list[str]]: """Extract node count, tool count, and tags from an agent directory. - Prefers agent.py (AST-parsed) over agent.json for node/tool counts - since agent.json may be stale. Tags are only available from agent.json. + Checks agent.json (declarative) first, then agent.py (legacy). """ import ast node_count, tool_count, tags = 0, 0, [] + # Declarative JSON agents (preferred) + agent_json = agent_path / "agent.json" + if agent_json.exists(): + try: + data = json.loads(agent_json.read_text(encoding="utf-8")) + if isinstance(data, dict): + json_nodes = data.get("nodes", []) + node_count = len(json_nodes) + tools: set[str] = set() + for n in json_nodes: + node_tools = n.get("tools", {}) + if isinstance(node_tools, dict): + tools.update(node_tools.get("allowed", [])) + elif isinstance(node_tools, list): + tools.update(node_tools) + tool_count = len(tools) + return node_count, tool_count, tags + except Exception: + pass + + # Legacy: agent.py (AST-parsed) agent_py = agent_path / "agent.py" if agent_py.exists(): try: @@ -132,39 +154,31 @@ def _extract_agent_stats(agent_path: Path) -> tuple[int, int, list[str]]: except Exception: pass - agent_json = agent_path / "agent.json" - if agent_json.exists(): - try: - data = json.loads(agent_json.read_text(encoding="utf-8")) - json_nodes = data.get("graph", {}).get("nodes", []) or data.get("nodes", []) - if node_count == 0: - node_count = len(json_nodes) - tools: set[str] = set() - for n in json_nodes: - tools.update(n.get("tools", [])) - tool_count = len(tools) - tags = data.get("agent", {}).get("tags", []) - except Exception: - pass - return node_count, tool_count, tags def discover_agents() -> dict[str, list[AgentEntry]]: """Discover agents from all known sources grouped by category.""" - from framework.runner.cli import ( + from framework.loader.cli import ( _extract_python_agent_metadata, _get_framework_agents_dir, _is_valid_agent_dir, ) + from framework.config import COLONIES_DIR + groups: dict[str, list[AgentEntry]] = {} sources = [ - ("Your Agents", Path("exports")), + ("Your Agents", COLONIES_DIR), + ("Your Agents", Path("exports")), # compat fallback ("Framework", _get_framework_agents_dir()), ("Examples", Path("examples/templates")), ] + # Track seen agent directory names to avoid duplicates when the same + # agent exists in both colonies/ and exports/ (colonies takes priority). + _seen_agent_names: set[str] = set() + for category, base_dir in sources: if not base_dir.exists(): continue @@ -172,6 +186,9 @@ def discover_agents() -> dict[str, list[AgentEntry]]: for path in sorted(base_dir.iterdir(), key=lambda p: p.name): if not _is_valid_agent_dir(path): continue + if path.name in _seen_agent_names: + continue + _seen_agent_names.add(path.name) name, desc = _extract_python_agent_metadata(path) config_fallback_name = path.name.replace("_", " ").title() @@ -179,13 +196,19 @@ def discover_agents() -> dict[str, list[AgentEntry]]: node_count, tool_count, tags = _extract_agent_stats(path) if not used_config: - agent_json = path / "agent.json" - if agent_json.exists(): + # Try agent.json (declarative) for metadata + agent_json_path = path / "agent.json" + if agent_json_path.exists(): try: - data = json.loads(agent_json.read_text(encoding="utf-8")) - meta = data.get("agent", {}) - name = meta.get("name", name) - desc = meta.get("description", desc) + data = json.loads( + agent_json_path.read_text(encoding="utf-8"), + ) + if isinstance(data, dict): + raw_name = data.get("name", name) + if "-" in raw_name and " " not in raw_name: + raw_name = raw_name.replace("-", " ").title() + name = raw_name + desc = data.get("description", desc) except Exception: pass @@ -204,6 +227,8 @@ def discover_agents() -> dict[str, list[AgentEntry]]: ) ) if entries: - groups[category] = entries + existing = groups.get(category, []) + existing.extend(entries) + groups[category] = existing return groups diff --git a/core/framework/agents/queen/__init__.py b/core/framework/agents/queen/__init__.py index caff6298..f86488f6 100644 --- a/core/framework/agents/queen/__init__.py +++ b/core/framework/agents/queen/__init__.py @@ -1,19 +1,13 @@ -""" -Queen — Native agent builder for the Hive framework. +"""Queen -- the agent builder for the Hive framework.""" -Deeply understands the agent framework and produces complete Python packages -with goals, nodes, edges, system prompts, MCP configuration, and tests -from natural language specifications. -""" - -from .agent import queen_goal, queen_graph +from .agent import queen_goal, queen_loop_config from .config import AgentMetadata, RuntimeConfig, default_config, metadata __version__ = "1.0.0" __all__ = [ "queen_goal", - "queen_graph", + "queen_loop_config", "RuntimeConfig", "AgentMetadata", "default_config", diff --git a/core/framework/agents/queen/agent.py b/core/framework/agents/queen/agent.py index e6583354..ba59f963 100644 --- a/core/framework/agents/queen/agent.py +++ b/core/framework/agents/queen/agent.py @@ -1,38 +1,29 @@ -"""Queen graph definition.""" +"""Queen agent definition. -from framework.graph import Goal -from framework.graph.edge import GraphSpec +The queen is a single AgentLoop -- no graph, no orchestrator. +Loaded by queen_orchestrator.create_queen(). +""" + +from framework.orchestrator.goal import Goal from .nodes import queen_node -# --------------------------------------------------------------------------- -# Queen graph — the primary persistent conversation. -# Loaded by queen_orchestrator.create_queen(), NOT by AgentRunner. -# --------------------------------------------------------------------------- - queen_goal = Goal( id="queen-manager", name="Queen Manager", description=( - "Manage the worker agent lifecycle and serve as the user's primary interactive interface." + "Manage the worker agent lifecycle and serve as the " + "user's primary interactive interface." ), success_criteria=[], constraints=[], ) -queen_graph = GraphSpec( - id="queen-graph", - goal_id=queen_goal.id, - version="1.0.0", - entry_node="queen", - entry_points={"start": "queen"}, - terminal_nodes=[], - pause_nodes=[], - nodes=[queen_node], - edges=[], - conversation_mode="continuous", - loop_config={ - "max_iterations": 999_999, - "max_tool_calls_per_turn": 30, - }, -) +# Loop config -- used by queen_orchestrator to build LoopConfig +queen_loop_config = { + "max_iterations": 999_999, + "max_tool_calls_per_turn": 30, + "max_context_tokens": 180_000, +} + +__all__ = ["queen_goal", "queen_loop_config", "queen_node"] diff --git a/core/framework/agents/queen/mcp_registry.json b/core/framework/agents/queen/mcp_registry.json new file mode 100644 index 00000000..80e62804 --- /dev/null +++ b/core/framework/agents/queen/mcp_registry.json @@ -0,0 +1,3 @@ +{ + "include": ["gcu-tools"] +} diff --git a/core/framework/agents/queen/nodes/__init__.py b/core/framework/agents/queen/nodes/__init__.py index 3f447541..e078afa4 100644 --- a/core/framework/agents/queen/nodes/__init__.py +++ b/core/framework/agents/queen/nodes/__init__.py @@ -2,7 +2,7 @@ from pathlib import Path -from framework.graph import NodeSpec +from framework.orchestrator import NodeSpec # Load reference docs at import time so they're always in the system prompt. # No voluntary read_file() calls needed — the LLM gets everything upfront. @@ -37,7 +37,7 @@ _appendices = _build_appendices() # GCU guide — shared between planning and building via _shared_building_knowledge. _gcu_section = ( - ("\n\n# GCU Nodes — Browser Automation\n\n" + _gcu_guide) + ("\n\n# Browser Automation Nodes\n\n" + _gcu_guide) if _is_gcu_enabled() and _gcu_guide else "" ) @@ -81,7 +81,6 @@ _QUEEN_PLANNING_TOOLS = [ "save_agent_draft", "confirm_and_build", # Scaffold + transition to building (requires confirm_and_build first) - "initialize_and_build_agent", # Load existing agent (after user confirms) "load_built_agent", ] @@ -172,7 +171,7 @@ _shared_building_knowledge = ( ## Paths (MANDATORY) **Always use RELATIVE paths** \ -(e.g. `exports/agent_name/config.py`, `exports/agent_name/nodes/__init__.py`). +(e.g. `exports/agent_name/agent.json`). **Never use absolute paths** like `/mnt/data/...` or `/workspace/...` — they fail. The project root is implicit. @@ -182,14 +181,18 @@ When designing worker nodes or writing worker system prompts, reference these \ tool names — NOT the coder-tools names (read_file, write_file, etc.). Worker data tools (for large results and spillover): -- save_data(filename, data, data_dir) — save data to a file for later retrieval -- load_data(filename, data_dir, offset_bytes?, limit_bytes?) — load data \ -with byte-based pagination -- list_data_files(data_dir) — list available data files -- append_data(filename, data, data_dir) — append to a file incrementally -- edit_data(filename, old_text, new_text, data_dir) — find-and-replace in a data file -- serve_file_to_user(filename, data_dir, label?, open_in_browser?) — \ -generate a clickable file URI for the user +Worker data tools (from files-tools MCP server): +- read_file(path) — read a file +- write_file(path, content) — write/create a file +- list_files(path) — list directory contents +- search_files(pattern, path) — regex search in files + +Worker data tools (from hive-tools MCP server): +- csv_read, csv_write, csv_append — CSV operations +- pdf_read — read PDF files + +All tools are registered in the global MCP registry (~/.hive/mcp_registry/). \ +Workers get tools from: hive-tools, gcu-tools, files-tools. IMPORTANT: Do NOT tell workers to use read_file, write_file, edit_file, \ search_files, or list_directory — those are YOUR tools, not theirs. @@ -204,7 +207,7 @@ _planning_knowledge = """\ # Core Mandates (Planning) - **DO NOT propose a complete goal on your own.** Instead, \ collaborate with the user to define it. -- **NEVER call `initialize_and_build_agent` without explicit user approval.** \ +- **NEVER call `confirm_and_build` without explicit user approval.** \ Present the full design first and wait for the user to confirm before building. - **Discover tools dynamically.** NEVER reference tools from static \ docs. Always run list_agent_tools() to see what actually exists. @@ -252,9 +255,9 @@ When the stakeholder describes what they want, mentally construct: **After the user responds, assess fit and gaps together.** Be honest and specific. \ Reference tools from list_agent_tools() AND built-in capabilities: -- **GCU browser automation** (`node_type="gcu"`) provides full Playwright-based \ +- **Browser automation provides full Playwright-based \ browser control (navigation, clicking, typing, scrolling, JS-rendered pages, \ -multi-tab). Do NOT list browser automation as missing — use GCU nodes. +multi-tab). Do NOT list browser automation as missing — use browser nodes with tools: {policy: "all"}. Present a short **Framework Fit Assessment**: - **Works well**: 2-4 strengths for this use case @@ -306,14 +309,11 @@ explicitly on a node. Available types: - **io** (dusty purple, parallelogram): External data input/output - **document** (steel blue, wavy rect): Report or document generation - **database** (muted teal, cylinder): Database or data store -- **subprocess** (dark cyan, subroutine): Delegated sub-agent / predefined process -- **browser** (deep blue, hexagon): GCU browser automation / sub-agent \ -delegation. At build time, browser nodes are dissolved into the parent \ -node's sub_agents list. Use for any GCU or sub-agent leaf node. +- **browser** (deep blue, hexagon): Browser automation node (uses gcu-tools). Auto-detection works well for most cases: first node → start, nodes with \ no outgoing edges → terminal, nodes with multiple conditional outgoing \ -edges → decision, GCU nodes → browser, nodes mentioning "database" → \ +edges → decision, browser tool nodes → browser, nodes mentioning "database" → \ database, nodes mentioning "report/document" → document, I/O tools like \ send_email → io. Everything else defaults to process. Set flowchart_type \ explicitly only when auto-detection would be wrong. @@ -354,48 +354,19 @@ gather → [Valid data?] →Yes→ transform → deliver In the draft: the `[Valid data?]` node has `flowchart_type: "decision"`, \ `decision_clause: "Data passes validation checks?"`, with labeled yes/no edges. -## Sub-Agent Nodes — Planning-Only Delegation +## Browser Automation Nodes -Sub-agent nodes (dark teal subroutines) are **planning-only** visual elements \ -that show which nodes delegate to sub-agents. At `confirm_and_build()`, \ -sub-agent nodes are **dissolved** into their parent node: - -- The sub-agent node's ID is added to the predecessor's `sub_agents` list -- The sub-agent node and its connecting edge are removed -- At runtime, the parent node can invoke the sub-agent via `delegate_to_sub_agent` - -**Rules for sub-agent nodes (INCLUDING GCU nodes):** -- GCU nodes are auto-detected as `flowchart_type: "browser"` (hexagon) -- Connect from the managing parent node to the sub-agent node -- Sub-agent nodes must be **leaf nodes** — NO outgoing edges to other nodes -- At build time, browser/GCU nodes are dissolved into the parent's \ -`sub_agents` list, just like decision nodes are dissolved into criteria - -**CRITICAL: GCU nodes (`node_type: "gcu"`) are ALWAYS sub-agents.** \ -They MUST NOT appear in the linear flow. NEVER chain GCU nodes \ -sequentially (A → gcu1 → gcu2 → B is WRONG). Instead, attach them \ -as leaves to the parent that orchestrates them: +Browser nodes are regular `event_loop` nodes with browser tools \ +(from the gcu-tools MCP server) in their tool list. They are wired \ +into the graph with edges like any other node: ``` -WRONG: intake → gcu_find_prospect → gcu_scan_mutuals → check_results -WRONG: decision_node → gcu_node (as a yes/no branch) -RIGHT: intake (sub_agents: [gcu_find, gcu_scan]) → check_results +research → browser_scan → analyze_results ``` -The parent node delegates to its GCU sub-agents and collects results. \ -The main flow continues from the parent, not from the GCU node. \ -GCU nodes MUST NOT be children of decision nodes — decision nodes \ -dissolve at build time, which would leave the GCU as a dangling \ -workflow step. +Use `tools: {policy: "all"}` to give browser nodes access to all \ +browser tools, or list specific ones with `policy: "explicit"`. -**How to show delegation in the flowchart:** -``` -research → (deep_searcher) ← browser/GCU node, leaf -research → [Enough results?] ← decision node -``` -After dissolution: `research` node gets `sub_agents: ["deep_searcher"]` \ -and `success_criteria: "Enough results?"`. - -If the worker agent start from some initial input it is okay. \ -The queen(you) owns intake: you gathers user requirements, then calls \ +If the worker agent starts from some initial input it is okay. \ +The queen(you) owns intake: you gather user requirements, then call \ `run_agent_with_input(task)` with a structured task description. \ When building the agent, design the entry node's `input_keys` to \ match what the queen will provide at run time. Worker nodes should \ @@ -411,14 +382,14 @@ You MUST get explicit user approval before ANY code is generated. 2. **WAIT for user response.** Do NOT proceed without it. 3. Handle the response: - If **Approve / Proceed**: Call confirm_and_build(), then \ - initialize_and_build_agent(agent_name, nodes) + confirm_and_build(agent_name) - If **Adjust scope**: Discuss changes, update the draft with \ save_agent_draft() again, and re-ask - If **More questions**: Answer them honestly, then ask again - If **Reconsider**: Discuss alternatives. If they decide to proceed, \ that's their informed choice -**NEVER call initialize_and_build_agent without first calling \ +**NEVER call confirm_and_build without first calling \ confirm_and_build().** The system will block the transition if you try. """ @@ -477,53 +448,75 @@ When a user says "my agent is failing" or "debug this agent": ## 5. Implement **You should only reach this step after the user has approved the draft design \ -in the planning phase. The draft metadata will pre-populate descriptions, \ -goals, success criteria, and node metadata in the generated files.** +and you have called `confirm_and_build(agent_name="my_agent")`.** -Call `initialize_and_build_agent(agent_name, nodes)` to generate all package \ -files. The agent_name must be snake_case (e.g., "my_agent"). Pass node names \ -as comma-separated string (e.g., "gather,process,review"). -The tool creates: config.py, nodes/__init__.py, agent.py, \ -__init__.py, __main__.py, mcp_servers.json, tests/conftest.py. +`confirm_and_build` created the agent directory (returned in agent_path). \ +Now write the complete agent config directly: -The generated files are **structurally complete** with correct imports, \ -class definition, `validate()` method, `default_agent` export, and \ -`__init__.py` re-exports. They pass validation as-is. +``` +write_file("/agent.json", ) +``` -`mcp_servers.json` is auto-generated with hive-tools as the default. \ -Do NOT manually create or overwrite `mcp_servers.json`. +The agent.json must include ALL of these in one write: +- `name`, `version`, `description` +- `goal` with `description`, `success_criteria`, `constraints` +- `identity_prompt` (agent-level behavior) +- `nodes` — each with `id`, `description`, `system_prompt`, `tools`, \ +`input_keys`, `output_keys`, `success_criteria` +- `edges` — connecting all nodes with proper conditions +- `entry_node`, `terminal_nodes` +- `mcp_servers` — REQUIRED. Always include all three: \ +`[{"name": "hive-tools"}, {"name": "gcu-tools"}, {"name": "files-tools"}]` +- `loop_config` — `max_iterations`, `max_context_tokens` -### Customizing generated files +**Write the COMPLETE config in one `write_file` call. No TODOs, no placeholders.** \ +The queen writes final production-ready system prompts directly. -**CRITICAL: Use `edit_file` to customize TODO placeholders. \ -NEVER use `write_file` to rewrite generated files from scratch. \ -Rewriting breaks imports, class structure, and causes validation failures.** +**There are NO Python files.** The framework loads agent.json directly. -Safe to edit with `edit_file`: -- System prompts, tools, input_keys, output_keys, success_criteria in \ -nodes/__init__.py -- Goal description, success criteria values, constraint values, edge \ -definitions, identity_prompt in agent.py -- CLI options in __main__.py -- For triggers (timers/webhooks), add entries to triggers.json in the \ -agent's export directory +MCP servers are loaded from the global registry by name. Available servers: +- `hive-tools` — web search, email, CRM, calendar, 100+ integrations +- `gcu-tools` — browser automation (click, type, navigate, screenshot) +- `files-tools` — file I/O (read, write, edit, search, list) -Do NOT modify or rewrite: -- Import statements at top of agent.py (they are correct) -- The agent class definition, `validate()`, `_build_graph()`, `_setup()`, \ -or lifecycle methods (start/stop/run) -- `__init__.py` exports (all required variables are already re-exported) -- `default_agent = ClassName()` at bottom of agent.py +**Template variables:** Add a `variables:` section at the top of agent.json \ +and use `{{variable_name}}` in system prompts for config injection: +```yaml +variables: + spreadsheet_id: "1ZVx..." +nodes: + - id: start + system_prompt: | + Use spreadsheet: {{spreadsheet_id}} +``` + +### Tool access in nodes + +Each node declares its tool access policy: +```yaml +# Explicit list (recommended) +tools: + policy: explicit + allowed: [web_search, write_file] + +# All tools (for browser automation nodes) +tools: + policy: all + +# No tools (for handoff/summary nodes) +tools: + policy: none +``` ## 6. Verify and Load Call `validate_agent_package("{name}")` after initialization. \ It runs structural checks (class validation, graph validation, tool \ validation, tests) and returns a consolidated result. If anything \ -fails: read the error, fix with edit_file, re-validate. Up to 3x. +fails: read the error, fix with read_file+write_file, re-validate. Up to 3x. When validation passes, immediately call \ -`load_built_agent("exports/{name}")` to load the agent into the \ +`load_built_agent("")` to load the agent into the \ session. This switches to STAGING phase and shows the graph in the \ visualizer. Do NOT wait for user input between validation and loading. """ @@ -625,13 +618,11 @@ document, database, subprocess, etc.) with unique shapes and colors. Set \ flowchart_type on a node to override. Nodes need only an id. \ Use decision nodes (flowchart_type: "decision", with decision_clause and \ labeled yes/no edges) to make conditional branching explicit. \ -GCU/sub-agent nodes (node_type: "gcu") are auto-detected as browser \ hexagons — connect them as leaf nodes to their parent. - confirm_and_build() — Record user confirmation of the draft. Dissolves \ planning-only nodes (decision → predecessor criteria; browser/GCU → \ -predecessor sub_agents list). Call this ONLY after the user explicitly \ approves via ask_user. -- initialize_and_build_agent(agent_name?, nodes?) — Scaffold the agent package \ +- confirm_and_build(agent_name) — Scaffold the agent package \ and transition to BUILDING phase. For new agents, this REQUIRES \ save_agent_draft() + confirm_and_build() first. The draft metadata is used to \ pre-populate the generated files. Without agent_name: transition to BUILDING \ @@ -647,8 +638,8 @@ phase. Only use this when the user explicitly asks to work with an existing agen 2. Call save_agent_draft() to create visual draft → present to user 3. Call ask_user() to get explicit approval 4. Call confirm_and_build() to record approval -5. Call initialize_and_build_agent() to scaffold and start building -For diagnosis of existing agents, call initialize_and_build_agent() \ +5. Call confirm_and_build() to scaffold and start building +For diagnosis of existing agents, call confirm_and_build() \ (no args) after agreeing on a fix plan with the user. """ @@ -884,7 +875,7 @@ that changes the structure, call save_agent_draft() again so they see the \ update in real-time. The flowchart is a live collaboration tool. 8. When the design is stable, use ask_user to get explicit approval 9. Call confirm_and_build() after the user approves -10. Call initialize_and_build_agent(agent_name, nodes) to scaffold and start building +10. Call confirm_and_build(agent_name) to scaffold and start building **The flowchart is your shared whiteboard.** Don't describe changes in text \ and then ask "should I update the draft?" — just update it. If the user says \ @@ -895,7 +886,7 @@ see every structural change reflected in the visualizer as you discuss it. **CRITICAL: Planning → Building boundary.** You MUST get explicit user \ confirmation before moving to building. The sequence is: save_agent_draft() → iterate with user → ask_user() → confirm_and_build() → \ - initialize_and_build_agent() + confirm_and_build() Skipping any of these steps will be blocked by the system. Remember: DO NOT write or edit any files yet. This is a read-only exploration \ @@ -911,7 +902,7 @@ your priority is diagnosis, not new design: 2. Summarize the root cause to the user 3. Propose a fix plan (what to change, what behavior to adjust) 4. Get user approval via ask_user -5. Call initialize_and_build_agent() (no args) to transition to building and implement the fix +5. Call confirm_and_build() (no args) to transition to building and implement the fix Do NOT start the full discovery workflow (tool discovery, gap analysis) in \ diagnosis mode — you already have a built agent, you just need to fix it. @@ -947,7 +938,7 @@ delegate agent construction to the worker, even as a "research" subtask. ## Keeping the flowchart in sync during building When you make structural changes to the agent (add/remove/rename nodes, \ -change edges, modify sub-agent assignments), call save_agent_draft() to \ +change edges, modify node connections), call save_agent_draft() to \ update the flowchart. During building, this auto-dissolves planning-only \ nodes without needing user re-confirmation. The user sees the updated \ flowchart immediately. @@ -966,15 +957,15 @@ user says "replan", "go back", "let's redesign", "change the approach", \ ## CRITICAL — Graph topology errors require replanning, not code edits -If you discover that the agent graph has structural problems — GCU nodes \ +If you discover that the agent graph has structural problems — browser nodes \ in the linear flow, missing edges, wrong node connections, incorrect \ -sub-agent assignments — you MUST call replan_agent() and fix the draft. \ -Do NOT attempt to fix topology by editing agent.py directly. The graph \ +node connections — you MUST call replan_agent() and fix the draft. \ +Do NOT attempt to fix topology by editing agent.json directly. The graph \ structure is defined by the draft → dissolution → code-gen pipeline. \ -Editing code to rewire nodes bypasses the flowchart and creates drift \ -between what the user sees and what the code does. +Editing the config to rewire nodes bypasses the flowchart and creates drift \ +between what the user sees and what the config does. -**WRONG:** "Let me fix agent.py to remove GCU nodes from edges..." +**WRONG:** "Let me fix agent.json to remove browser nodes from edges..." **RIGHT:** Call replan_agent(), fix the draft with save_agent_draft(), \ get user approval, then confirm_and_build() → the corrected code is \ generated automatically. @@ -1100,18 +1091,15 @@ You wake up when: If the user asks for progress, call get_graph_status() ONCE and report. \ If the summary mentions issues, follow up with get_graph_status(focus="issues"). -## Subagent delegations (browser automation, GCU) +## Browser automation nodes -When the worker delegates to a subagent (e.g., GCU browser automation), expect it \ -to take 2-5 minutes. During this time: -- Progress will show 0% — this is NORMAL. The subagent only calls set_output at the end. -- Check get_graph_status(focus="full") for "subagent_activity" — this shows the \ -subagent's latest reasoning text and confirms it is making real progress. -- Do NOT conclude the subagent is stuck just because progress is 0% or because \ -you see repeated browser_click/browser_snapshot calls — that is the expected \ -pattern for web scraping. -- Only intervene if: the subagent has been running for 5+ minutes with no new \ -subagent_activity updates, OR the judge escalates. +Browser nodes may take 2-5 minutes for web scraping tasks. During this time: +- Progress will show 0% until the node calls set_output at the end. +- Check get_graph_status(focus="full") for activity updates. +- Do NOT conclude it is stuck just because you see repeated \ +browser_click/browser_snapshot calls — that is expected for web scraping. +- Only intervene if: the node has been running for 5+ minutes with no new \ +activity updates, OR the judge escalates. ## Handling worker termination ([WORKER_TERMINAL]) @@ -1143,11 +1131,11 @@ escalations. If the user gave you instructions (e.g., "just retry on errors", \ CRITICAL — escalation relay protocol: When an escalation requires user input (auth blocks, human review), the worker \ -or its subagent is BLOCKED and waiting for your response. You MUST follow this \ +or is BLOCKED and waiting for your response. You MUST follow this \ exact two-step sequence: Step 1: call ask_user() to get the user's answer. Step 2: call inject_message() with the user's answer IMMEDIATELY after. -If you skip Step 2, the worker/subagent stays blocked FOREVER and the task hangs. \ +If you skip Step 2, the worker stays blocked FOREVER and the task hangs. \ NEVER respond to the user without also calling inject_message() to unblock \ the worker. Even if the user says "skip" or "cancel", you must still relay that \ decision via inject_message() so the worker can clean up. @@ -1233,7 +1221,7 @@ _queen_tools_docs = ( + "\n\n### Phase transitions\n" "- save_agent_draft(...) → creates visual-only draft graph (stays in PLANNING)\n" "- confirm_and_build() → records user approval of draft (stays in PLANNING)\n" - "- initialize_and_build_agent(agent_name?, nodes?) → scaffolds package + switches to " + "- confirm_and_build(agent_name) → scaffolds package + switches to " "BUILDING (requires draft + confirmation for new agents)\n" "- replan_agent() → switches back to PLANNING phase (only when user explicitly requests)\n" "- load_built_agent(path) → switches to STAGING phase\n" diff --git a/core/framework/agents/queen/queen_memory_v2.py b/core/framework/agents/queen/queen_memory_v2.py index bf41a5a6..d2e61b6f 100644 --- a/core/framework/agents/queen/queen_memory_v2.py +++ b/core/framework/agents/queen/queen_memory_v2.py @@ -1,9 +1,15 @@ """Queen global memory helpers. -Global memory lives in ``~/.hive/queen/global_memory/`` and stores durable -cross-session knowledge about the user (profile, preferences, environment, -feedback). Each memory is an individual ``.md`` file with optional YAML -frontmatter (name, type, description). +Memory hierarchy:: + + ~/.hive/memories/ + global/ # shared across all queens and colonies + colonies/{name}/ # colony-scoped memories + agents/queens/{name}/ # queen-specific memories + agents/{name}/ # per-worker-agent memories + +Each memory is an individual ``.md`` file with optional YAML frontmatter +(name, type, description). """ from __future__ import annotations @@ -21,7 +27,7 @@ logger = logging.getLogger(__name__) GLOBAL_MEMORY_CATEGORIES: tuple[str, ...] = ("profile", "preference", "environment", "feedback") -_HIVE_QUEEN_DIR = Path.home() / ".hive" / "queen" +from framework.config import MEMORIES_DIR MAX_FILES: int = 200 MAX_FILE_SIZE_BYTES: int = 4096 # 4 KB hard limit per memory file @@ -31,8 +37,23 @@ _HEADER_LINE_LIMIT: int = 30 def global_memory_dir() -> Path: - """Return the queen-global memory directory.""" - return _HIVE_QUEEN_DIR / "global_memory" + """Return the global memory directory (shared across all queens/colonies).""" + return MEMORIES_DIR / "global" + + +def colony_memory_dir(colony_name: str) -> Path: + """Return the memory directory for a named colony.""" + return MEMORIES_DIR / "colonies" / colony_name + + +def queen_memory_dir(queen_name: str = "default") -> Path: + """Return the memory directory for a named queen.""" + return MEMORIES_DIR / "agents" / "queens" / queen_name + + +def agent_memory_dir(agent_name: str) -> Path: + """Return the memory directory for a worker agent.""" + return MEMORIES_DIR / "agents" / agent_name # --------------------------------------------------------------------------- diff --git a/core/framework/agents/queen/recall_selector.py b/core/framework/agents/queen/recall_selector.py index ad1676eb..f2ec6d21 100644 --- a/core/framework/agents/queen/recall_selector.py +++ b/core/framework/agents/queen/recall_selector.py @@ -91,7 +91,19 @@ async def select_memories( resp.stop_reason, ) return [] - data = json.loads(raw) + # Some models wrap JSON in markdown fences or add preamble text. + # Try to extract the JSON object if raw parse fails. + try: + data = json.loads(raw) + except json.JSONDecodeError: + import re + + m = re.search(r"\{.*\}", raw, re.DOTALL) + if m: + data = json.loads(m.group()) + else: + logger.warning("recall: LLM returned non-JSON: %.200s", raw) + return [] selected = data.get("selected_memories", []) valid_names = {f.filename for f in files} result = [s for s in selected if s in valid_names][:max_results] diff --git a/core/framework/agents/queen/reference/anti_patterns.md b/core/framework/agents/queen/reference/anti_patterns.md index 4e6bf085..1fa10218 100644 --- a/core/framework/agents/queen/reference/anti_patterns.md +++ b/core/framework/agents/queen/reference/anti_patterns.md @@ -25,10 +25,7 @@ 14. **Forgetting sys.path setup in conftest.py** — Tests need `exports/` and `core/` on sys.path. ## GCU Errors -15. **Manually wiring browser tools on event_loop nodes** — Use `node_type="gcu"` which auto-includes browser tools. Do NOT manually list browser tool names. -16. **Using GCU nodes as regular graph nodes** — GCU nodes are subagents only. They must ONLY appear in `sub_agents=["gcu-node-id"]` and be invoked via `delegate_to_sub_agent()`. Never connect via edges or use as entry/terminal nodes. -17. **Reusing the same GCU node ID for parallel tasks** — Each concurrent browser task needs a distinct GCU node ID (e.g. `gcu-site-a`, `gcu-site-b`). Two `delegate_to_sub_agent` calls with the same `agent_id` share a browser profile and will interfere with each other's pages. -18. **Passing `profile=` in GCU tool calls** — Profile isolation for parallel subagents is automatic. The framework injects a unique profile per subagent via an asyncio `ContextVar`. Hardcoding `profile="default"` in a GCU system prompt breaks this isolation. +15. **Manually wiring browser tools on event_loop nodes** — Browser nodes use tools: {policy: "all"} to get all browser tools. ## Worker Agent Errors 19. **Adding client-facing intake node to workers** — The queen owns intake. Workers should start with an autonomous processing node. Route worker review/approval through queen escalation instead of direct worker HITL. diff --git a/core/framework/agents/queen/reference/file_templates_declarative.md b/core/framework/agents/queen/reference/file_templates_declarative.md new file mode 100644 index 00000000..97afbb81 --- /dev/null +++ b/core/framework/agents/queen/reference/file_templates_declarative.md @@ -0,0 +1,227 @@ +# Declarative Agent File Templates + +Agents are defined as a single `agent.yaml` file. No Python code needed. +The runner loads this file directly -- no `agent.py`, `config.py`, or +`nodes/__init__.py` required. + +## agent.yaml -- Complete Agent Definition + +```yaml +name: my-agent +version: 1.0.0 +description: What this agent does. + +metadata: + intro_message: Welcome! What would you like me to do? + +# Template variables -- substituted into system_prompt and identity_prompt +# via {{variable_name}} syntax. Use this for config values that appear +# in prompts (spreadsheet IDs, API endpoints, account names, etc.) +variables: + spreadsheet_id: "1ZVxWDL..." + sheet_name: "contacts" + +goal: + description: What this agent achieves. + success_criteria: + - "First success criterion" + - "Second success criterion" + constraints: + - "Hard constraint the agent must respect" + +identity_prompt: | + You are a helpful agent. + +conversation_mode: continuous # always "continuous" for Hive agents + +loop_config: + max_iterations: 100 + max_tool_calls_per_turn: 30 + max_context_tokens: 32000 + +# MCP servers to connect (resolved by name from ~/.hive/mcp_registry/) +mcp_servers: + - name: hive-tools + - name: gcu-tools + +nodes: + # Node 1: Process (autonomous entry node) + # The queen handles intake and passes structured input via + # run_agent_with_input(task). NO client-facing intake node. + - id: process + name: Process + description: Execute the task using available tools + max_node_visits: 0 # 0 = unlimited (forever-alive agents) + input_keys: [user_request, feedback] + output_keys: [results] + nullable_output_keys: [feedback] + tools: + policy: explicit + allowed: [web_search, web_scrape, save_data, load_data, list_data_files] + success_criteria: Results are complete and accurate. + system_prompt: | + You are a processing agent. Your task is in memory under "user_request". + If "feedback" is present, this is a revision. + + Work in phases: + 1. Use tools to gather/process data + 2. Analyze results + 3. Call set_output in a SEPARATE turn: + - set_output("results", "structured results") + + # Node 2: Handoff (autonomous) + - id: handoff + name: Handoff + description: Prepare worker results for queen review + max_node_visits: 0 + input_keys: [results, user_request] + output_keys: [next_action, feedback, worker_summary] + nullable_output_keys: [feedback, worker_summary] + tools: + policy: none # handoff nodes don't need tools + success_criteria: Results are packaged for queen decision-making. + system_prompt: | + Do NOT talk to the user directly. The queen is the only user interface. + + If blocked, call escalate(reason, context) then set: + - set_output("next_action", "escalated") + - set_output("feedback", "what help is needed") + + Otherwise summarize and set: + - set_output("worker_summary", "short summary for queen") + - set_output("next_action", "done") or "revise" + - set_output("feedback", "what to revise") only when revising + +edges: + - from_node: process + to_node: handoff + # Feedback loop + - from_node: handoff + to_node: process + condition: conditional + condition_expr: "str(next_action).lower() == 'revise'" + priority: 2 + # Escalation loop + - from_node: handoff + to_node: process + condition: conditional + condition_expr: "str(next_action).lower() == 'escalated'" + priority: 3 + # Loop back for next task + - from_node: handoff + to_node: process + condition: conditional + condition_expr: "str(next_action).lower() == 'done'" + +entry_node: process +terminal_nodes: [] # [] = forever-alive +``` + +## Key differences from Python templates + +| Before (Python) | After (YAML) | +|-------------------------------------|----------------------------------------| +| `agent.py` (250 lines boilerplate) | Not needed | +| `config.py` (dataclass + metadata) | `variables:` + `metadata:` in YAML | +| `nodes/__init__.py` (NodeSpec calls)| `nodes:` list in YAML | +| `__init__.py`, `__main__.py` | Not needed | +| f-string config injection | `{{variable_name}}` templates | +| `mcp_servers.json` (separate file) | `mcp_servers:` in YAML (or keep file) | + +## Node types + +| Type | Description | Tools | +|--------------|---------------------------------------|--------------------------| +| `event_loop` | LLM-driven orchestration (default) | Explicit list or `none` | +| `gcu` | Browser automation via GCU tools | `policy: all` (auto) | + +## Tool access policies + +```yaml +# Explicit list (recommended for most nodes) +tools: + policy: explicit + allowed: [web_search, save_data] + +# All tools (for browser automation nodes) +tools: + policy: all + +# No tools (for handoff/summary nodes) +tools: + policy: none +``` + +## Edge conditions + +| Condition | When to use | +|---------------|-------------------------------------------------------| +| `on_success` | Default. Next node after current succeeds. | +| `on_failure` | Fallback path when current node fails. | +| `always` | Always traverse regardless of outcome. | +| `conditional` | Evaluate `condition_expr` against shared memory keys. | +| `llm_decide` | Let the LLM decide at runtime. | + +## Template variables + +Use `{{variable_name}}` in `system_prompt` and `identity_prompt`. +Variables are defined in the top-level `variables:` map. + +```yaml +variables: + spreadsheet_id: "1ZVxWDL..." + api_endpoint: "https://api.example.com" + +nodes: + - id: start + system_prompt: | + Connect to spreadsheet: {{spreadsheet_id}} + API endpoint: {{api_endpoint}} +``` + +## Entry points + +Default is a single manual entry point. For timer/scheduled triggers: + +```yaml +entry_points: + - id: default + trigger_type: manual + - id: daily-check + trigger_type: timer + trigger_config: + interval_minutes: 30 +``` + +## mcp_servers.json -- Still Supported + +The `mcp_servers.json` file is still loaded automatically if present alongside +`agent.yaml`. You can also inline servers in the YAML: + +```yaml +mcp_servers: + - name: hive-tools + - name: gcu-tools +``` + +Both approaches work. The JSON file takes precedence for backward compatibility. + +## Migration from Python agents + +Run the migration tool to convert existing agents: + +```bash +uv run python -m framework.tools.migrate_agent exports/my_agent +``` + +This generates `agent.yaml` from the existing `agent.py` + `nodes/` + `config.py`. +The original files are left untouched. Once verified, you can delete the Python files. + +## Files after migration + +``` +my_agent/ + agent.yaml # The only required file + mcp_servers.json # Optional (can inline in YAML) + flowchart.json # Optional (auto-generated) +``` diff --git a/core/framework/agents/queen/reference/framework_guide.md b/core/framework/agents/queen/reference/framework_guide.md index 5f93511a..1a0f7e4f 100644 --- a/core/framework/agents/queen/reference/framework_guide.md +++ b/core/framework/agents/queen/reference/framework_guide.md @@ -1,306 +1,193 @@ -# Hive Agent Framework — Condensed Reference +# Hive Agent Framework -- Condensed Reference ## Architecture -Agents are Python packages in `exports/`: +Agents are declarative JSON configs in `exports/`: ``` exports/my_agent/ -├── __init__.py # MUST re-export ALL module-level vars from agent.py -├── __main__.py # CLI (run, tui, info, validate, shell) -├── agent.py # Graph construction (goal, edges, agent class) -├── config.py # Runtime config -├── nodes/__init__.py # Node definitions (NodeSpec) -├── mcp_servers.json # MCP tool server config -└── tests/ # pytest tests + agent.json # The entire agent definition + mcp_servers.json # MCP tool server config (optional, prefer registry refs) ``` -## Agent Loading Contract +No Python files. No `__init__.py`, `__main__.py`, `config.py`, or `nodes/`. -`AgentRunner.load()` imports the package (`__init__.py`) and reads these -module-level variables via `getattr()`: +## Agent Loading -| Variable | Required | Default if missing | Consequence | -|----------|----------|--------------------|-------------| -| `goal` | YES | `None` | **FATAL** — "must define goal, nodes, edges" | -| `nodes` | YES | `None` | **FATAL** — same error | -| `edges` | YES | `None` | **FATAL** — same error | -| `entry_node` | no | `nodes[0].id` | Probably wrong node | -| `entry_points` | no | `{}` | **Nodes unreachable** — validation fails | -| `terminal_nodes` | **YES** | `[]` | **FATAL** — graph must have at least one terminal node | -| `pause_nodes` | no | `[]` | OK | -| `conversation_mode` | no | not passed | Isolated mode (no context carryover) | -| `identity_prompt` | no | not passed | No agent-level identity | -| `loop_config` | no | `{}` | No iteration limits | -| `triggers.json` (file) | no | not present | No triggers (timers, webhooks) | +`AgentLoader.load()` reads `agent.json` and builds the execution graph. +If `agent.py` exists (legacy), it's loaded as a Python module instead. -**CRITICAL:** `__init__.py` MUST import and re-export ALL of these from -`agent.py`. Missing exports silently fall back to defaults, causing -hard-to-debug failures. +## agent.json Schema -**Why `default_agent.validate()` is NOT sufficient:** -`validate()` checks the agent CLASS's internal graph (self.nodes, self.edges). -These are always correct because the constructor references agent.py's module -vars directly. But `AgentRunner.load()` reads from the PACKAGE (`__init__.py`), -not the class. So `validate()` passes while `AgentRunner.load()` fails. -Always test with `AgentRunner.load("exports/{name}")` — this is the same -code path the TUI and `hive run` use. - -## Goal - -Defines success criteria and constraints: -```python -goal = Goal( - id="kebab-case-id", - name="Display Name", - description="What the agent does", - success_criteria=[ - SuccessCriterion(id="sc-id", description="...", metric="...", target="...", weight=0.25), - ], - constraints=[ - Constraint(id="c-id", description="...", constraint_type="hard", category="quality"), - ], -) +```json +{ + "name": "my-agent", + "version": "1.0.0", + "description": "What this agent does", + "goal": { + "description": "What to achieve", + "success_criteria": ["criterion 1", "criterion 2"], + "constraints": ["constraint 1"] + }, + "identity_prompt": "You are a helpful agent.", + "conversation_mode": "continuous", + "loop_config": { + "max_iterations": 100, + "max_tool_calls_per_turn": 30, + "max_context_tokens": 32000 + }, + "mcp_servers": [ + {"name": "hive-tools"}, + {"name": "gcu-tools"} + ], + "variables": { + "spreadsheet_id": "1ZVx..." + }, + "nodes": [...], + "edges": [...], + "entry_node": "process", + "terminal_nodes": [] +} ``` -- 3-5 success criteria, weights sum to 1.0 -- 1-5 constraints (hard/soft, categories: quality, accuracy, interaction, functional) -## NodeSpec Fields +## Template Variables + +Use `{{variable_name}}` in `system_prompt` and `identity_prompt`. Variables +are defined in the top-level `variables` object: + +```json +{ + "variables": {"sheet_id": "1ZVx..."}, + "nodes": [{ + "id": "start", + "system_prompt": "Use sheet: {{sheet_id}}" + }] +} +``` + +## Node Fields | Field | Type | Default | Description | |-------|------|---------|-------------| | id | str | required | kebab-case identifier | -| name | str | required | Display name | +| name | str | id | Display name | | description | str | required | What the node does | -| node_type | str | required | `"event_loop"` or `"gcu"` (browser automation — see GCU Guide appendix) | -| input_keys | list[str] | required | Memory keys this node reads | -| output_keys | list[str] | required | Memory keys this node writes via set_output | +| node_type | str | "event_loop" | `"event_loop"` | +| input_keys | list | [] | Memory keys this node reads | +| output_keys | list | [] | Memory keys this node writes via set_output | | system_prompt | str | "" | LLM instructions | -| tools | list[str] | [] | Tool names from MCP servers | -| client_facing | bool | False | Deprecated compatibility field. Queen interactivity is implicit; workers should escalate instead | -| nullable_output_keys | list[str] | [] | Keys that may remain unset | -| max_node_visits | int | 0 | 0=unlimited (default); >1 for one-shot feedback loops | -| max_retries | int | 3 | Retries on failure | +| tools | object | {} | Tool access policy (see below) | +| nullable_output_keys | list | [] | Keys that may remain unset | +| max_node_visits | int | 1 | 0=unlimited (for forever-alive agents) | | success_criteria | str | "" | Natural language for judge evaluation | +| client_facing | bool | false | Whether output is shown to user | -## EdgeSpec Fields +## Tool Access Policies + +Each node declares its tools via a policy object: + +```json +{"tools": {"policy": "explicit", "allowed": ["web_search", "save_data"]}} +{"tools": {"policy": "all"}} +{"tools": {"policy": "none"}} +``` + +- `explicit` (default): only named tools. Empty `allowed` = zero tools. +- `all`: all tools from registry (e.g. for browser automation nodes). +- `none`: no tools (for handoff/summary nodes). + +## Edge Fields | Field | Type | Description | |-------|------|-------------| -| id | str | kebab-case identifier | -| source | str | Source node ID | -| target | str | Target node ID | -| condition | EdgeCondition | ON_SUCCESS, ON_FAILURE, ALWAYS, CONDITIONAL | -| condition_expr | str | Python expression evaluated against memory (for CONDITIONAL) | -| priority | int | Positive=forward (evaluated first), negative=feedback (loop-back) | +| from_node | str | Source node ID | +| to_node | str | Target node ID | +| condition | str | `on_success`, `on_failure`, `always`, `conditional` | +| condition_expr | str | Python expression for conditional routing | +| priority | int | Higher = evaluated first | + +condition_expr examples: +- `"needs_more_research == True"` +- `"str(next_action).lower() == 'revise'"` ## Key Patterns -### STEP 1/STEP 2 (Client-Facing Nodes) -``` -**STEP 1 — Respond to the user (text only, NO tool calls):** -[Present information, ask questions] - -**STEP 2 — After the user responds, call set_output:** -- set_output("key", "value based on user response") -``` -This prevents premature set_output before user interaction. - ### Fewer, Richer Nodes (CRITICAL) -**Hard limit: 3-6 nodes for most agents.** Never exceed 6 unless the user -explicitly requests a complex multi-phase pipeline. +**Hard limit: 3-6 nodes for most agents.** Each node boundary serializes +outputs and destroys in-context information. Merge unless: +1. Client-facing boundary (different interaction models) +2. Disjoint tool sets +3. Parallel execution (fan-out branches) -Each node boundary serializes outputs to the shared buffer and **destroys** all -in-context information: tool call results, intermediate reasoning, conversation -history. A research node that searches, fetches, and analyzes in ONE node keeps -all source material in its conversation context. Split across 3 nodes, each -downstream node only sees the serialized summary string. - -**Decision framework — merge unless ANY of these apply:** -1. **Client-facing boundary** — Autonomous and client-facing work MUST be - separate nodes (different interaction models) -2. **Disjoint tool sets** — If tools are fundamentally different (e.g., web - search vs database), separate nodes make sense -3. **Parallel execution** — Fan-out branches must be separate nodes - -**Red flags that you have too many nodes:** -- A node with 0 tools (pure LLM reasoning) → merge into predecessor/successor -- A node that sets only 1 trivial output → collapse into predecessor -- Multiple consecutive autonomous nodes → combine into one rich node -- A "report" node that presents analysis → merge into the client-facing node -- A "confirm" or "schedule" node that doesn't call any external service → remove - -**Typical agent structure (2 nodes):** +**Typical structure (2 nodes):** ``` -process (autonomous) ←→ review (queen-mediated) -``` -The queen owns intake — she gathers requirements from the user, then -passes structured input via `run_agent_with_input(task)`. When building -the agent, design the entry node's `input_keys` to match what the queen -will provide at run time. Worker agents should NOT have a client-facing -intake node. Mid-execution review/approval should happen through queen -escalation rather than direct worker HITL. - -For simpler agents, just 1 autonomous node: -``` -process (autonomous) — loops back to itself +process (autonomous) <-> review (queen-mediated) ``` -### nullable_output_keys -For inputs that only arrive on certain edges: -```python -research_node = NodeSpec( - input_keys=["brief", "feedback"], - nullable_output_keys=["feedback"], # Only present on feedback edge - max_node_visits=3, -) -``` - -### Mutually Exclusive Outputs -For routing decisions: -```python -review_node = NodeSpec( - output_keys=["approved", "feedback"], - nullable_output_keys=["approved", "feedback"], # Node sets one or the other -) -``` - -### Continuous Loop Pattern -Mark the primary event_loop node as terminal: `terminal_nodes=["process"]`. -The node has `output_keys` and can complete when the agent finishes its work. -Use `conversation_mode="continuous"` to preserve context across transitions. +The queen owns intake. Worker agents should NOT have a client-facing intake +node. Mid-execution review should happen through queen escalation. ### set_output - Synthetic tool injected by framework - Call separately from real tool calls (separate turn) - `set_output("key", "value")` stores to the shared buffer -## Edge Conditions - -| Condition | When | -|-----------|------| -| ON_SUCCESS | Node completed successfully | -| ON_FAILURE | Node failed | -| ALWAYS | Unconditional | -| CONDITIONAL | condition_expr evaluates to True against memory | - -condition_expr examples: -- `"needs_more_research == True"` -- `"str(next_action).lower() == 'new_agent'"` -- `"feedback is not None"` - -## Graph Lifecycle +### Graph Lifecycle | Pattern | terminal_nodes | When | |---------|---------------|------| -| **Continuous loop** | `["node-with-output-keys"]` | **DEFAULT for all agents** | +| Continuous loop | `["node-with-output-keys"]` | DEFAULT for all agents | | Linear | `["last-node"]` | One-shot/batch agents | -**Every graph must have at least one terminal node.** Terminal nodes -define where execution ends. For interactive agents that loop continuously, -mark the primary event_loop node as terminal (it has `output_keys` and can -complete at any point). The framework default for `max_node_visits` is 0 -(unbounded), so nodes work correctly in continuous loops without explicit -override. Only set `max_node_visits > 0` in one-shot agents with feedback loops. -Every node must have at least one outgoing edge — no dead ends. +Every graph must have at least one terminal node. -## Continuous Conversation Mode +### Continuous Conversation Mode `conversation_mode` has ONLY two valid states: -- `"continuous"` — recommended for interactive agents -- Omit entirely — isolated per-node conversations (each node starts fresh) +- `"continuous"` -- recommended (context carries across node transitions) +- Omit entirely -- isolated per-node conversations -**INVALID values** (do NOT use): `"client_facing"`, `"interactive"`, -`"adaptive"`, `"shared"`. These do not exist in the framework. - -When `conversation_mode="continuous"`: -- Same conversation thread carries across node transitions -- Layered system prompts: identity (agent-level) + narrative + focus (per-node) -- Transition markers inserted at boundaries -- Compaction happens opportunistically at phase transitions +**INVALID values:** `"client_facing"`, `"interactive"`, `"shared"`. ## loop_config Only three valid keys: -```python -loop_config = { - "max_iterations": 100, # Max LLM turns per node visit - "max_tool_calls_per_turn": 20, # Max tool calls per LLM response - "max_context_tokens": 32000, # Triggers conversation compaction +```json +{ + "max_iterations": 100, + "max_tool_calls_per_turn": 20, + "max_context_tokens": 32000 } ``` -**INVALID keys** (do NOT use): `"strategy"`, `"mode"`, `"timeout"`, -`"temperature"`. These are silently ignored or cause errors. ## Data Tools (Spillover) For large data that exceeds context: -- `save_data(filename, data)` — Write to session data dir -- `load_data(filename, offset, limit)` — Read with pagination -- `list_data_files()` — List files -- `serve_file_to_user(filename, label)` — Clickable file:// URI +- `save_data(filename, data)` -- write to session data dir +- `load_data(filename, offset, limit)` -- read with pagination +- `list_data_files()` -- list files +- `serve_file_to_user(filename, label)` -- clickable file URI -`data_dir` is auto-injected by framework — LLM never sees it. +`data_dir` is auto-injected by framework. ## Fan-Out / Fan-In -Multiple ON_SUCCESS edges from same source → parallel execution via asyncio.gather(). -- Parallel nodes must have disjoint output_keys -- Only one branch may have client_facing nodes -- Fan-in node gets all outputs in the shared buffer +Multiple `on_success` edges from same source = parallel execution. +Parallel nodes must have disjoint output_keys. ## Judge System - **Implicit** (default): ACCEPTs when LLM finishes with no tool calls and all required outputs set - **SchemaJudge**: Validates against Pydantic model -- **Custom**: Implement `evaluate(context) -> JudgeVerdict` - -Judge is the SOLE acceptance mechanism — no ad-hoc framework gating. - -## Triggers (Timers, Webhooks) - -For agents that react to external events, create a `triggers.json` file -in the agent's export directory: - -```json -[ - { - "id": "daily-check", - "name": "Daily Check", - "trigger_type": "timer", - "trigger_config": {"cron": "0 9 * * *"}, - "task": "Run the daily check process" - } -] -``` - -### Key Fields -- `trigger_type`: `"timer"` or `"webhook"` -- `trigger_config`: `{"cron": "0 9 * * *"}` or `{"interval_minutes": 20}` -- `task`: describes what the worker should do when the trigger fires -- Triggers can also be created/removed at runtime via `set_trigger` / `remove_trigger` queen tools ## Tool Discovery -Do NOT rely on a static tool list — it will be outdated. Always call -`list_agent_tools()` with NO arguments first to see ALL available tools. -Only use `group=` or `output_schema=` as follow-up calls after seeing the -full list. +Always call `list_agent_tools()` first to see available tools. +Do NOT rely on a static tool list. ``` -list_agent_tools() # ALWAYS call this first -list_agent_tools(group="gmail", output_schema="full") # then drill into a category -list_agent_tools("exports/my_agent/mcp_servers.json") # specific agent's tools +list_agent_tools() # full summary +list_agent_tools(group="gmail", output_schema="full") # drill into category ``` -After building, run `validate_agent_package("{name}")` to check everything at once. - -Common tool categories (verify via list_agent_tools): -- **Web**: search, scrape, PDF -- **Data**: save/load/append/list data files, serve to user -- **File**: view, write, replace, diff, list, grep -- **Communication**: email, gmail, slack, telegram -- **CRM**: hubspot, apollo, calcom -- **GitHub**: stargazers, user profiles, repos -- **Vision**: image analysis -- **Time**: current time +After building, run `validate_agent_package("{name}")` to check everything. diff --git a/core/framework/agents/queen/reference/gcu_guide.md b/core/framework/agents/queen/reference/gcu_guide.md index c27db24d..cf254637 100644 --- a/core/framework/agents/queen/reference/gcu_guide.md +++ b/core/framework/agents/queen/reference/gcu_guide.md @@ -1,158 +1,53 @@ -# GCU Browser Automation Guide +# Browser Automation Guide -## When to Use GCU Nodes +## When to Use Browser Nodes -Use `node_type="gcu"` when: -- The user's workflow requires **navigating real websites** (scraping, form-filling, social media interaction, testing web UIs) -- The task involves **dynamic/JS-rendered pages** that `web_scrape` cannot handle (SPAs, infinite scroll, login-gated content) -- The agent needs to **interact with a website** — clicking, typing, scrolling, selecting, uploading files +Use browser nodes (with `tools: {policy: "all"}`) when: +- The task requires interacting with web pages (clicking, typing, navigating) +- No API is available for the target service +- The user is already logged in to the target site -Do NOT use GCU for: -- Static content that `web_scrape` handles fine -- API-accessible data (use the API directly) -- PDF/file processing -- Anything that doesn't require a browser UI +## What Browser Nodes Are -## What GCU Nodes Are +- Regular `event_loop` nodes with browser tools from gcu-tools MCP server +- Set `tools: {policy: "all"}` to give access to all browser tools +- Wire into the graph with edges like any other node +- No special node_type needed -- `node_type="gcu"` — a declarative enhancement over `event_loop` -- Framework auto-prepends browser best-practices system prompt -- Framework auto-includes all 31 browser tools from `gcu-tools` MCP server -- Same underlying `EventLoopNode` class — no new imports needed -- `tools=[]` is correct — tools are auto-populated at runtime +## Available Browser Tools -## GCU Architecture Pattern +All tools are prefixed with `browser_`: +- `browser_start`, `browser_open` -- launch/navigate +- `browser_click`, `browser_fill`, `browser_type` -- interact +- `browser_snapshot` -- read page content (preferred over screenshot) +- `browser_screenshot` -- visual capture +- `browser_scroll`, `browser_wait` -- navigation helpers +- `browser_evaluate` -- run JavaScript -GCU nodes are **subagents** — invoked via `delegate_to_sub_agent()`, not connected via edges. +## System Prompt Tips for Browser Nodes -- Primary nodes (`event_loop`, client-facing) orchestrate; GCU nodes do browser work -- Parent node declares `sub_agents=["gcu-node-id"]` and calls `delegate_to_sub_agent(agent_id="gcu-node-id", task="...")` -- GCU nodes set `max_node_visits=1` (single execution per delegation), `client_facing=False` -- GCU nodes use `output_keys=["result"]` and return structured JSON via `set_output("result", ...)` - -## GCU Node Definition Template - -```python -gcu_browser_node = NodeSpec( - id="gcu-browser-worker", - name="Browser Worker", - description="Browser subagent that does X.", - node_type="gcu", - client_facing=False, - max_node_visits=1, - input_keys=[], - output_keys=["result"], - tools=[], # Auto-populated with all browser tools - system_prompt="""\ -You are a browser agent. Your job: [specific task]. - -## Workflow -1. browser_start (only if no browser is running yet) -2. browser_open(url=TARGET_URL) — note the returned targetId -3. browser_snapshot to read the page -4. [task-specific steps] -5. set_output("result", JSON) - -## Output format -set_output("result", JSON) with: -- [field]: [type and description] -""", -) +``` +1. Use browser_snapshot() to read page content (NOT browser_get_text) +2. Use browser_wait(seconds=2-3) after navigation for page load +3. If you hit an auth wall, call set_output with an error and move on +4. Keep tool calls per turn <= 10 for reliability ``` -## Parent Node Template (orchestrating GCU subagents) - -```python -orchestrator_node = NodeSpec( - id="orchestrator", - ... - node_type="event_loop", - sub_agents=["gcu-browser-worker"], - system_prompt="""\ -... -delegate_to_sub_agent( - agent_id="gcu-browser-worker", - task="Navigate to [URL]. Do [specific task]. Return JSON with [fields]." -) -... -""", - tools=[], # Orchestrator doesn't need browser tools -) -``` - -## mcp_servers.json with GCU +## Example ```json { - "hive-tools": { ... }, - "gcu-tools": { - "transport": "stdio", - "command": "uv", - "args": ["run", "python", "-m", "gcu.server", "--stdio"], - "cwd": "../../tools", - "description": "GCU tools for browser automation" - } + "id": "scan-profiles", + "name": "Scan LinkedIn Profiles", + "description": "Navigate LinkedIn search results and collect profile data", + "tools": {"policy": "all"}, + "input_keys": ["search_url"], + "output_keys": ["profiles"], + "system_prompt": "Navigate to the search URL, paginate through results..." } ``` -Note: `gcu-tools` is auto-added if any node uses `node_type="gcu"`, but including it explicitly is fine. - -## GCU System Prompt Best Practices - -Key rules to bake into GCU node prompts: - -- Prefer `browser_snapshot` over `browser_get_text("body")` — compact accessibility tree vs 100KB+ raw HTML -- Always `browser_wait` after navigation -- Use large scroll amounts (~2000-5000) for lazy-loaded content -- For spillover files, use `run_command` with grep, not `read_file` -- If auth wall detected, report immediately — don't attempt login -- Keep tool calls per turn ≤10 -- Tab isolation: when browser is already running, use `browser_open(background=true)` and pass `target_id` to every call - -## Multiple Concurrent GCU Subagents - -When a task can be parallelized across multiple sites or profiles, declare a distinct GCU -node for each and invoke them all in the same LLM turn. The framework batches all -`delegate_to_sub_agent` calls made in one turn and runs them with `asyncio.gather`, so -they execute concurrently — not sequentially. - -**Each GCU subagent automatically gets its own isolated browser context** — no `profile=` -argument is needed in tool calls. The framework derives a unique profile from the subagent's -node ID and instance counter and injects it via an asyncio `ContextVar` before the subagent -runs. - -### Example: three sites in parallel - -```python -# Three distinct GCU nodes -gcu_site_a = NodeSpec(id="gcu-site-a", node_type="gcu", ...) -gcu_site_b = NodeSpec(id="gcu-site-b", node_type="gcu", ...) -gcu_site_c = NodeSpec(id="gcu-site-c", node_type="gcu", ...) - -orchestrator = NodeSpec( - id="orchestrator", - node_type="event_loop", - sub_agents=["gcu-site-a", "gcu-site-b", "gcu-site-c"], - system_prompt="""\ -Call all three subagents in a single response to run them in parallel: - delegate_to_sub_agent(agent_id="gcu-site-a", task="Scrape prices from site A") - delegate_to_sub_agent(agent_id="gcu-site-b", task="Scrape prices from site B") - delegate_to_sub_agent(agent_id="gcu-site-c", task="Scrape prices from site C") -""", -) +Connected via regular edges: +``` +search-setup -> scan-profiles -> process-results ``` - -**Rules:** -- Use distinct node IDs for each concurrent task — sharing an ID shares the browser context. -- The GCU node prompts do not need to mention `profile=`; isolation is automatic. -- Cleanup is automatic at session end, but GCU nodes can call `browser_stop()` explicitly - if they want to release resources mid-run. - -## GCU Anti-Patterns - -- Using `browser_screenshot` to read text (use `browser_snapshot` instead; screenshots are for visual context only) -- Re-navigating after scrolling (resets scroll position) -- Attempting login on auth walls -- Forgetting `target_id` in multi-tab scenarios -- Putting browser tools directly on `event_loop` nodes instead of using GCU subagent pattern -- Making GCU nodes `client_facing=True` (they should be autonomous subagents) diff --git a/core/framework/agents/queen/reflection_agent.py b/core/framework/agents/queen/reflection_agent.py index 4bb20d64..d2033717 100644 --- a/core/framework/agents/queen/reflection_agent.py +++ b/core/framework/agents/queen/reflection_agent.py @@ -2,7 +2,7 @@ A lightweight side agent that runs after each queen LLM turn. It inspects recent conversation messages and extracts durable user knowledge into -individual memory files in ``~/.hive/queen/global_memory/``. +individual memory files in ``~/.hive/memories/global/``. Two reflection types: - **Short reflection**: after conversational queen turns. Distills @@ -493,7 +493,7 @@ async def subscribe_reflection_triggers( Call this once during queen setup. Returns a list of event-bus subscription IDs for cleanup during session teardown. """ - from framework.runtime.event_bus import EventType + from framework.host.event_bus import EventType mem_dir = memory_dir or global_memory_dir() _lock = asyncio.Lock() diff --git a/core/framework/agents/queen/tests/conftest.py b/core/framework/agents/queen/tests/conftest.py index de518df2..d34d2bf2 100644 --- a/core/framework/agents/queen/tests/conftest.py +++ b/core/framework/agents/queen/tests/conftest.py @@ -22,10 +22,10 @@ def mock_mode(): @pytest_asyncio.fixture(scope="session") async def runner(tmp_path_factory, mock_mode): - from framework.runner.runner import AgentRunner + from framework.loader.agent_loader import AgentLoader storage = tmp_path_factory.mktemp("agent_storage") - r = AgentRunner.load(AGENT_PATH, mock_mode=mock_mode, storage_path=storage) + r = AgentLoader.load(AGENT_PATH, mock_mode=mock_mode, storage_path=storage) r._setup() yield r await r.cleanup_async() diff --git a/core/framework/cli.py b/core/framework/cli.py index e7752922..86c51cd1 100644 --- a/core/framework/cli.py +++ b/core/framework/cli.py @@ -79,7 +79,7 @@ def main(): subparsers = parser.add_subparsers(dest="command", required=True) # Register runner commands (run, info, validate, list, shell) - from framework.runner.cli import register_commands + from framework.loader.cli import register_commands register_commands(subparsers) @@ -99,7 +99,7 @@ def main(): register_debugger_commands(subparsers) # Register MCP registry commands (mcp install, mcp add, ...) - from framework.runner.mcp_registry_cli import register_mcp_commands + from framework.loader.mcp_registry_cli import register_mcp_commands register_mcp_commands(subparsers) diff --git a/core/framework/config.py b/core/framework/config.py index 095f49fc..341f0ed4 100644 --- a/core/framework/config.py +++ b/core/framework/config.py @@ -12,13 +12,47 @@ from dataclasses import dataclass, field from pathlib import Path from typing import Any -from framework.graph.edge import DEFAULT_MAX_TOKENS +from framework.orchestrator.edge import DEFAULT_MAX_TOKENS + +# --------------------------------------------------------------------------- +# Hive home directory structure +# --------------------------------------------------------------------------- + +HIVE_HOME = Path.home() / ".hive" +QUEENS_DIR = HIVE_HOME / "agents" / "queens" +COLONIES_DIR = HIVE_HOME / "colonies" +MEMORIES_DIR = HIVE_HOME / "memories" + + +def queen_dir(queen_name: str = "default") -> Path: + """Return the storage directory for a named queen agent.""" + return QUEENS_DIR / queen_name + + +def colony_dir(colony_name: str) -> Path: + """Return the directory for a named colony.""" + return COLONIES_DIR / colony_name + + +def memory_dir(scope: str, name: str | None = None) -> Path: + """Return memory dir for a scope. + + Examples:: + + memory_dir("global") -> ~/.hive/memories/global + memory_dir("colonies", "my_agent") -> ~/.hive/memories/colonies/my_agent + memory_dir("agents/queens", "default")-> ~/.hive/memories/agents/queens/default + memory_dir("agents", "worker_name") -> ~/.hive/memories/agents/worker_name + """ + base = MEMORIES_DIR / scope + return base / name if name else base + # --------------------------------------------------------------------------- # Low-level config file access # --------------------------------------------------------------------------- -HIVE_CONFIG_FILE = Path.home() / ".hive" / "configuration.json" +HIVE_CONFIG_FILE = HIVE_HOME / "configuration.json" # Hive LLM router endpoint (Anthropic-compatible). # litellm's Anthropic handler appends /v1/messages, so this is just the base host. @@ -130,7 +164,7 @@ def get_worker_api_key() -> str | None: # Worker-specific subscription / env var if worker_llm.get("use_claude_code_subscription"): try: - from framework.runner.runner import get_claude_code_token + from framework.loader.agent_loader import get_claude_code_token token = get_claude_code_token() if token: @@ -140,7 +174,7 @@ def get_worker_api_key() -> str | None: if worker_llm.get("use_codex_subscription"): try: - from framework.runner.runner import get_codex_token + from framework.loader.agent_loader import get_codex_token token = get_codex_token() if token: @@ -150,7 +184,7 @@ def get_worker_api_key() -> str | None: if worker_llm.get("use_kimi_code_subscription"): try: - from framework.runner.runner import get_kimi_code_token + from framework.loader.agent_loader import get_kimi_code_token token = get_kimi_code_token() if token: @@ -160,7 +194,7 @@ def get_worker_api_key() -> str | None: if worker_llm.get("use_antigravity_subscription"): try: - from framework.runner.runner import get_antigravity_token + from framework.loader.agent_loader import get_antigravity_token token = get_antigravity_token() if token: @@ -216,7 +250,7 @@ def get_worker_llm_extra_kwargs() -> dict[str, Any]: "User-Agent": "CodexBar", } try: - from framework.runner.runner import get_codex_account_id + from framework.loader.agent_loader import get_codex_account_id account_id = get_codex_account_id() if account_id: @@ -263,22 +297,43 @@ def get_max_context_tokens() -> int: return get_hive_config().get("llm", {}).get("max_context_tokens", DEFAULT_MAX_CONTEXT_TOKENS) +def get_api_keys() -> list[str] | None: + """Return a list of API keys if ``api_keys`` is configured, else ``None``. + + This supports key-pool rotation: configure multiple keys in + ``~/.hive/configuration.json`` under ``llm.api_keys`` and the + :class:`~framework.llm.key_pool.KeyPool` will rotate through them. + """ + llm = get_hive_config().get("llm", {}) + keys = llm.get("api_keys") + if keys and isinstance(keys, list) and len(keys) > 0: + return [k for k in keys if k] # filter empties + return None + + def get_api_key() -> str | None: """Return the API key, supporting env var, Claude Code subscription, Codex, and ZAI Code. Priority: + 0. Explicit key pool (``api_keys`` list) -- returns first key for + single-key callers; full pool available via :func:`get_api_keys`. 1. Claude Code subscription (``use_claude_code_subscription: true``) reads the OAuth token from ``~/.claude/.credentials.json``. 2. Codex subscription (``use_codex_subscription: true``) reads the OAuth token from macOS Keychain or ``~/.codex/auth.json``. 3. Environment variable named in ``api_key_env_var``. """ + # If an explicit key pool is configured, use the first key. + pool_keys = get_api_keys() + if pool_keys: + return pool_keys[0] + llm = get_hive_config().get("llm", {}) # Claude Code subscription: read OAuth token directly if llm.get("use_claude_code_subscription"): try: - from framework.runner.runner import get_claude_code_token + from framework.loader.agent_loader import get_claude_code_token token = get_claude_code_token() if token: @@ -289,7 +344,7 @@ def get_api_key() -> str | None: # Codex subscription: read OAuth token from Keychain / auth.json if llm.get("use_codex_subscription"): try: - from framework.runner.runner import get_codex_token + from framework.loader.agent_loader import get_codex_token token = get_codex_token() if token: @@ -300,7 +355,7 @@ def get_api_key() -> str | None: # Kimi Code subscription: read API key from ~/.kimi/config.toml if llm.get("use_kimi_code_subscription"): try: - from framework.runner.runner import get_kimi_code_token + from framework.loader.agent_loader import get_kimi_code_token token = get_kimi_code_token() if token: @@ -311,7 +366,7 @@ def get_api_key() -> str | None: # Antigravity subscription: read OAuth token from accounts JSON if llm.get("use_antigravity_subscription"): try: - from framework.runner.runner import get_antigravity_token + from framework.loader.agent_loader import get_antigravity_token token = get_antigravity_token() if token: @@ -468,7 +523,7 @@ def get_llm_extra_kwargs() -> dict[str, Any]: "User-Agent": "CodexBar", } try: - from framework.runner.runner import get_codex_account_id + from framework.loader.agent_loader import get_codex_account_id account_id = get_codex_account_id() if account_id: diff --git a/core/framework/credentials/setup.py b/core/framework/credentials/setup.py index dfee6bf4..1bb4e90d 100644 --- a/core/framework/credentials/setup.py +++ b/core/framework/credentials/setup.py @@ -36,7 +36,7 @@ from pathlib import Path from typing import TYPE_CHECKING, Any if TYPE_CHECKING: - from framework.graph import NodeSpec + from framework.orchestrator import NodeSpec logger = logging.getLogger(__name__) @@ -533,7 +533,9 @@ class CredentialSetupSession: def load_agent_nodes(agent_path: str | Path) -> list: - """Load NodeSpec list from an agent's agent.py or agent.json. + """Load NodeSpec list from an agent directory. + + Checks agent.json (declarative) first, then agent.py (legacy). Args: agent_path: Path to agent directory. @@ -542,16 +544,28 @@ def load_agent_nodes(agent_path: str | Path) -> list: List of NodeSpec objects (empty list if agent can't be loaded). """ agent_path = Path(agent_path) + agent_json_file = agent_path / "agent.json" agent_py = agent_path / "agent.py" - agent_json = agent_path / "agent.json" - if agent_py.exists(): + if agent_json_file.exists(): + return _load_nodes_from_json_declarative(agent_json_file) + elif agent_py.exists(): return _load_nodes_from_python_agent(agent_path) - elif agent_json.exists(): - return _load_nodes_from_json_agent(agent_json) return [] +def _load_nodes_from_json_declarative(agent_json: Path) -> list: + """Load nodes from a declarative JSON agent.""" + try: + from framework.loader.agent_loader import load_agent_config + + data = json.loads(agent_json.read_text(encoding="utf-8")) + graph, _ = load_agent_config(data) + return list(graph.nodes) + except Exception: + return [] + + def _load_nodes_from_python_agent(agent_path: Path) -> list: """Load nodes from a Python-based agent.""" import importlib.util @@ -590,7 +604,7 @@ def _load_nodes_from_json_agent(agent_json: Path) -> list: with open(agent_json, encoding="utf-8-sig") as f: data = json.load(f) - from framework.graph import NodeSpec + from framework.orchestrator import NodeSpec nodes_data = data.get("graph", {}).get("nodes", []) nodes = [] diff --git a/core/framework/graph/__init__.py b/core/framework/graph/__init__.py deleted file mode 100644 index a6751ddf..00000000 --- a/core/framework/graph/__init__.py +++ /dev/null @@ -1,65 +0,0 @@ -"""Graph structures: Goals, Nodes, Edges, and Execution.""" - -from framework.graph.context import GraphContext -from framework.graph.context_handoff import ContextHandoff, HandoffContext -from framework.graph.conversation import ConversationStore, Message, NodeConversation -from framework.graph.edge import DEFAULT_MAX_TOKENS, EdgeCondition, EdgeSpec, GraphSpec -from framework.graph.event_loop_node import ( - EventLoopNode, - JudgeProtocol, - JudgeVerdict, - LoopConfig, - OutputAccumulator, -) -from framework.graph.executor import GraphExecutor -from framework.graph.goal import Constraint, Goal, GoalStatus, SuccessCriterion -from framework.graph.node import NodeContext, NodeProtocol, NodeResult, NodeSpec -from framework.graph.worker_agent import ( - Activation, - FanOutTag, - FanOutTracker, - WorkerAgent, - WorkerCompletion, - WorkerLifecycle, -) - -__all__ = [ - # Goal - "Goal", - "SuccessCriterion", - "Constraint", - "GoalStatus", - # Node - "NodeSpec", - "NodeContext", - "NodeResult", - "NodeProtocol", - # Edge - "EdgeSpec", - "EdgeCondition", - "GraphSpec", - "DEFAULT_MAX_TOKENS", - # Executor - "GraphExecutor", - # Conversation - "NodeConversation", - "ConversationStore", - "Message", - # Event Loop - "EventLoopNode", - "LoopConfig", - "OutputAccumulator", - "JudgeProtocol", - "JudgeVerdict", - # Context Handoff - "ContextHandoff", - "HandoffContext", - # Worker Agent - "WorkerAgent", - "WorkerLifecycle", - "WorkerCompletion", - "Activation", - "FanOutTag", - "FanOutTracker", - "GraphContext", -] diff --git a/core/framework/graph/event_loop/__init__.py b/core/framework/graph/event_loop/__init__.py deleted file mode 100644 index 1ec8a803..00000000 --- a/core/framework/graph/event_loop/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -"""EventLoopNode subpackage — modular components of the event loop orchestrator. - -All public symbols are re-exported by the parent ``event_loop_node.py`` for -backward compatibility. Internal consumers may import directly from these -submodules for clarity. -""" diff --git a/core/framework/graph/event_loop/subagent_executor.py b/core/framework/graph/event_loop/subagent_executor.py deleted file mode 100644 index cd5c207c..00000000 --- a/core/framework/graph/event_loop/subagent_executor.py +++ /dev/null @@ -1,370 +0,0 @@ -"""Subagent execution for the event loop. - -Handles the full subagent lifecycle: validation, context setup, tool filtering, -conversation store derivation, execution, and cleanup. -""" - -from __future__ import annotations - -import json -import logging -import time -from collections.abc import Awaitable, Callable -from pathlib import Path -from typing import TYPE_CHECKING, Any - -from framework.graph.conversation import ConversationStore -from framework.graph.event_loop.judge_pipeline import SubagentJudge -from framework.graph.event_loop.types import LoopConfig, OutputAccumulator -from framework.graph.node import DataBuffer, NodeContext -from framework.llm.provider import ToolResult, ToolUse -from framework.runner.tool_registry import ToolRegistry -from framework.runtime.event_bus import EventBus - -if TYPE_CHECKING: - from framework.graph.event_loop_node import EventLoopNode - -logger = logging.getLogger(__name__) - - -async def execute_subagent( - ctx: NodeContext, - agent_id: str, - task: str, - *, - config: LoopConfig, - event_loop_node_cls: type[EventLoopNode], - escalation_receiver_cls: Callable[[], Any], - accumulator: OutputAccumulator | None = None, - event_bus: EventBus | None = None, - tool_executor: Callable[[ToolUse], ToolResult | Awaitable[ToolResult]] | None = None, - conversation_store: ConversationStore | None = None, - subagent_instance_counter: dict[str, int] | None = None, -) -> ToolResult: - """Execute a subagent and return the result as a ToolResult. - - The subagent: - - Gets a fresh conversation with just the task - - Has read-only access to the parent's readable memory - - Cannot delegate to its own subagents (prevents recursion) - - Returns its output in structured JSON format - - Args: - ctx: Parent node's context (for memory, tools, LLM access). - agent_id: The node ID of the subagent to invoke. - task: The task description to give the subagent. - accumulator: Parent's OutputAccumulator. - event_bus: EventBus for lifecycle events. - config: LoopConfig for iteration/tool limits. - tool_executor: Tool executor callable. - conversation_store: Parent conversation store (for deriving subagent store). - subagent_instance_counter: Mutable counter dict for unique subagent paths. - - Returns: - ToolResult with structured JSON output. - """ - # Log subagent invocation start - logger.info( - "\n" + "=" * 60 + "\n" - "🤖 SUBAGENT INVOCATION\n" - "=" * 60 + "\n" - "Parent Node: %s\n" - "Subagent ID: %s\n" - "Task: %s\n" + "=" * 60, - ctx.node_id, - agent_id, - task[:500] + "..." if len(task) > 500 else task, - ) - - # 1. Validate agent exists in registry - if agent_id not in ctx.node_registry: - return ToolResult( - tool_use_id="", - content=json.dumps( - { - "message": f"Sub-agent '{agent_id}' not found in registry", - "data": None, - "metadata": {"agent_id": agent_id, "success": False, "error": "not_found"}, - } - ), - is_error=True, - ) - - subagent_spec = ctx.node_registry[agent_id] - - # 2. Create read-only memory snapshot - parent_data = ctx.buffer.read_all() - - # Merge in-flight outputs from the parent's accumulator. - if accumulator: - for key, value in accumulator.to_dict().items(): - if key not in parent_data: - parent_data[key] = value - - subagent_buffer = DataBuffer() - for key, value in parent_data.items(): - subagent_buffer.write(key, value, validate=False) - - read_keys = set(parent_data.keys()) | set(subagent_spec.input_keys or []) - scoped_buffer = subagent_buffer.with_permissions( - read_keys=list(read_keys), - write_keys=[], # Read-only! - ) - - # 2b. Compute instance counter early so the callback and child context - # share the same stable node_id for this subagent invocation. - if subagent_instance_counter is not None: - subagent_instance_counter.setdefault(agent_id, 0) - subagent_instance_counter[agent_id] += 1 - subagent_instance = str(subagent_instance_counter[agent_id]) - else: - subagent_instance = "1" - - if subagent_instance == "1": - sa_node_id = f"{ctx.node_id}:subagent:{agent_id}" - else: - sa_node_id = f"{ctx.node_id}:subagent:{agent_id}:{subagent_instance}" - - # 2c. Set up report callback (one-way channel to parent / event bus) - subagent_reports: list[dict] = [] - - async def _report_callback( - message: str, - data: dict | None = None, - *, - wait_for_response: bool = False, - ) -> str | None: - subagent_reports.append({"message": message, "data": data, "timestamp": time.time()}) - if event_bus: - await event_bus.emit_subagent_report( - stream_id=ctx.node_id, - node_id=sa_node_id, - subagent_id=agent_id, - message=message, - data=data, - execution_id=ctx.execution_id, - ) - - if not wait_for_response: - return None - - if not event_bus: - logger.warning( - "Subagent '%s' requested user response but no event_bus available", - agent_id, - ) - return None - - # Create isolated receiver and register for input routing - import uuid - - escalation_id = f"{ctx.node_id}:escalation:{uuid.uuid4().hex[:8]}" - receiver = escalation_receiver_cls() - registry = ctx.shared_node_registry - - registry[escalation_id] = receiver - try: - await event_bus.emit_escalation_requested( - stream_id=ctx.stream_id or ctx.node_id, - node_id=escalation_id, - reason=f"Subagent report (wait_for_response) from {agent_id}", - context=message, - execution_id=ctx.execution_id, - ) - # Block until queen responds - return await receiver.wait() - finally: - registry.pop(escalation_id, None) - - # 3. Filter tools for subagent - subagent_tool_names = set(subagent_spec.tools or []) - tool_source = ctx.all_tools if ctx.all_tools else ctx.available_tools - - # GCU auto-population - if subagent_spec.node_type == "gcu" and not subagent_tool_names: - subagent_tools = [t for t in tool_source if t.name != "delegate_to_sub_agent"] - else: - subagent_tools = [ - t - for t in tool_source - if t.name in subagent_tool_names and t.name != "delegate_to_sub_agent" - ] - - missing = subagent_tool_names - {t.name for t in subagent_tools} - if missing: - logger.warning( - "Subagent '%s' requested tools not found in catalog: %s", - agent_id, - sorted(missing), - ) - - logger.info( - "📦 Subagent '%s' configuration:\n" - " - System prompt: %s\n" - " - Tools available (%d): %s\n" - " - Memory keys inherited: %s", - agent_id, - (subagent_spec.system_prompt[:200] + "...") - if subagent_spec.system_prompt and len(subagent_spec.system_prompt) > 200 - else subagent_spec.system_prompt, - len(subagent_tools), - [t.name for t in subagent_tools], - list(parent_data.keys()), - ) - - # 4. Build subagent context - max_iter = min(config.max_iterations, 10) - subagent_ctx = NodeContext( - runtime=ctx.runtime, - node_id=sa_node_id, - node_spec=subagent_spec, - buffer=scoped_buffer, - input_data={"task": task, **parent_data}, - llm=ctx.llm, - available_tools=subagent_tools, - goal_context=( - f"Your specific task: {task}\n\n" - f"COMPLETION REQUIREMENTS:\n" - f"When your task is done, you MUST call set_output() " - f"for each required key: {subagent_spec.output_keys}\n" - f"Alternatively, call report_to_parent(mark_complete=true) " - f"with your findings in message/data.\n" - + ( - "Before finishing, call browser_close_finished() to clean up your browser tabs.\n" - if subagent_spec.node_type == "gcu" - else "" - ) - + f"You have a maximum of {max_iter} turns to complete this task." - ), - goal=ctx.goal, - max_tokens=ctx.max_tokens, - runtime_logger=ctx.runtime_logger, - is_subagent_mode=True, # Prevents nested delegation - report_callback=_report_callback, - node_registry={}, # Empty - no nested subagents - shared_node_registry=ctx.shared_node_registry, # For escalation routing - ) - - # 5. Create and execute subagent EventLoopNode - subagent_conv_store = None - if conversation_store is not None: - from framework.storage.conversation_store import FileConversationStore - - parent_base = getattr(conversation_store, "_base", None) - if parent_base is not None: - conversations_dir = parent_base.parent - subagent_dir_name = f"{agent_id}-{subagent_instance}" - subagent_store_path = conversations_dir / subagent_dir_name - subagent_conv_store = FileConversationStore(base_path=subagent_store_path) - - # Derive a subagent-scoped spillover dir - subagent_spillover = None - if config.spillover_dir: - subagent_spillover = str(Path(config.spillover_dir) / agent_id / subagent_instance) - - subagent_node = event_loop_node_cls( - event_bus=event_bus, - judge=SubagentJudge(task=task, max_iterations=max_iter), - config=LoopConfig( - max_iterations=max_iter, - max_tool_calls_per_turn=config.max_tool_calls_per_turn, - tool_call_overflow_margin=config.tool_call_overflow_margin, - max_context_tokens=config.max_context_tokens, - stall_detection_threshold=config.stall_detection_threshold, - max_tool_result_chars=config.max_tool_result_chars, - spillover_dir=subagent_spillover, - ), - tool_executor=tool_executor, - conversation_store=subagent_conv_store, - ) - - # Each subagent instance gets its own unique browser profile so concurrent - # subagents don't share tab groups. The profile is set as execution context - # so the tool registry auto-injects it into every browser_* MCP tool call. - _gcu_profile = f"{agent_id}:{subagent_instance}" - _profile_token = ToolRegistry.set_execution_context(profile=_gcu_profile) - - try: - logger.info("🚀 Starting subagent '%s' execution...", agent_id) - start_time = time.time() - result = await subagent_node.execute(subagent_ctx) - latency_ms = int((time.time() - start_time) * 1000) - - separator = "-" * 60 - logger.info( - "\n%s\n" - "✅ SUBAGENT '%s' COMPLETED\n" - "%s\n" - "Success: %s\n" - "Latency: %dms\n" - "Tokens used: %s\n" - "Output keys: %s\n" - "%s", - separator, - agent_id, - separator, - result.success, - latency_ms, - result.tokens_used, - list(result.output.keys()) if result.output else [], - separator, - ) - - result_json = { - "message": ( - f"Sub-agent '{agent_id}' completed successfully" - if result.success - else f"Sub-agent '{agent_id}' failed: {result.error}" - ), - "data": result.output, - "reports": subagent_reports if subagent_reports else None, - "metadata": { - "agent_id": agent_id, - "success": result.success, - "tokens_used": result.tokens_used, - "latency_ms": latency_ms, - "report_count": len(subagent_reports), - }, - } - - return ToolResult( - tool_use_id="", - content=json.dumps(result_json, indent=2, default=str), - is_error=not result.success, - ) - - except Exception as e: - logger.exception( - "\n" + "!" * 60 + "\n❌ SUBAGENT '%s' FAILED\nError: %s\n" + "!" * 60, - agent_id, - str(e), - ) - result_json = { - "message": f"Sub-agent '{agent_id}' raised exception: {e}", - "data": None, - "metadata": { - "agent_id": agent_id, - "success": False, - "error": str(e), - }, - } - return ToolResult( - tool_use_id="", - content=json.dumps(result_json, indent=2), - is_error=True, - ) - finally: - ToolRegistry.reset_execution_context(_profile_token) - # Close the tab group this subagent created, if any. - try: - from gcu.browser.bridge import get_bridge - from gcu.browser.tools.lifecycle import _contexts - - bridge = get_bridge() - ctx_entry = _contexts.pop(_gcu_profile, None) - if bridge and bridge.is_connected and ctx_entry: - group_id = ctx_entry.get("groupId") - if group_id is not None: - await bridge.destroy_context(group_id) - except Exception: - pass diff --git a/core/framework/host/__init__.py b/core/framework/host/__init__.py new file mode 100644 index 00000000..c432b680 --- /dev/null +++ b/core/framework/host/__init__.py @@ -0,0 +1,11 @@ +"""Host layer -- how agents are triggered and hosted.""" + +from framework.host.agent_host import ( # noqa: F401 + AgentHost, + AgentRuntimeConfig, +) +from framework.host.event_bus import AgentEvent, EventBus, EventType # noqa: F401 +from framework.host.execution_manager import ( # noqa: F401 + EntryPointSpec, + ExecutionManager, +) diff --git a/core/framework/runtime/agent_runtime.py b/core/framework/host/agent_host.py similarity index 74% rename from core/framework/runtime/agent_runtime.py rename to core/framework/host/agent_host.py index 320e5371..8cb367b5 100644 --- a/core/framework/runtime/agent_runtime.py +++ b/core/framework/host/agent_host.py @@ -16,20 +16,21 @@ from datetime import datetime from pathlib import Path from typing import TYPE_CHECKING, Any -from framework.graph.checkpoint_config import CheckpointConfig -from framework.graph.executor import ExecutionResult -from framework.runtime.event_bus import EventBus -from framework.runtime.execution_stream import EntryPointSpec, ExecutionStream -from framework.runtime.outcome_aggregator import OutcomeAggregator -from framework.runtime.runtime_log_store import RuntimeLogStore -from framework.runtime.shared_state import SharedBufferManager +from framework.orchestrator.checkpoint_config import CheckpointConfig +from framework.orchestrator.orchestrator import ExecutionResult +from framework.host.event_bus import EventBus +from framework.host.execution_manager import EntryPointSpec, ExecutionManager +from framework.host.outcome_aggregator import OutcomeAggregator +from framework.tracker.runtime_log_store import RuntimeLogStore +from framework.host.shared_state import SharedBufferManager from framework.storage.concurrent import ConcurrentStorage from framework.storage.session_store import SessionStore if TYPE_CHECKING: - from framework.graph.edge import GraphSpec - from framework.graph.goal import Goal + from framework.orchestrator.edge import GraphSpec + from framework.orchestrator.goal import Goal from framework.llm.provider import LLMProvider, Tool + from framework.pipeline.stage import PipelineStage from framework.skills.manager import SkillsManagerConfig logger = logging.getLogger(__name__) @@ -37,7 +38,7 @@ logger = logging.getLogger(__name__) @dataclass class AgentRuntimeConfig: - """Configuration for AgentRuntime.""" + """Configuration for AgentHost.""" max_concurrent_executions: int = 100 cache_ttl: float = 60.0 @@ -62,14 +63,14 @@ class _GraphRegistration: graph: "GraphSpec" goal: "Goal" entry_points: dict[str, EntryPointSpec] - streams: dict[str, ExecutionStream] # ep_id -> stream (NOT namespaced) + streams: dict[str, ExecutionManager] # ep_id -> stream (NOT namespaced) storage_subpath: str # relative to session root, e.g. "graphs/email_agent" event_subscriptions: list[str] = field(default_factory=list) timer_tasks: list[asyncio.Task] = field(default_factory=list) timer_next_fire: dict[str, float] = field(default_factory=dict) -class AgentRuntime: +class AgentHost: """ Top-level runtime that manages agent lifecycle and concurrent executions. @@ -142,6 +143,7 @@ class AgentRuntime: skills_catalog_prompt: str = "", protocols_prompt: str = "", skill_dirs: list[str] | None = None, + pipeline_stages: "list[PipelineStage] | None" = None, ): """ Initialize agent runtime. @@ -171,6 +173,7 @@ class AgentRuntime: skills_catalog_prompt: Deprecated. Pre-rendered skills catalog. protocols_prompt: Deprecated. Pre-rendered operational protocols. """ + from framework.pipeline.runner import PipelineRunner from framework.skills.manager import SkillsManager self.graph = graph @@ -180,6 +183,14 @@ class AgentRuntime: self._checkpoint_config = checkpoint_config self.accounts_prompt = accounts_prompt + # Pipeline middleware: runs before every trigger() dispatch. + # Accepts either pre-built stage objects or loads from config. + if pipeline_stages: + self._pipeline = PipelineRunner(pipeline_stages) + else: + self._pipeline = self._load_pipeline_from_config() + + # --- Skill lifecycle: runtime owns the SkillsManager --- if skills_manager_config is not None: # New path: config-driven, runtime handles loading @@ -246,7 +257,7 @@ class AgentRuntime: # Entry points and streams (primary graph) self._entry_points: dict[str, EntryPointSpec] = {} - self._streams: dict[str, ExecutionStream] = {} + self._streams: dict[str, ExecutionManager] = {} # Webhook server (created on start if webhook_routes configured) self._webhook_server: Any = None @@ -270,7 +281,7 @@ class AgentRuntime: self.intro_message: str = "" # ------------------------------------------------------------------ - # Skill prompt accessors (read by ExecutionStream constructors) + # Skill prompt accessors (read by ExecutionManager constructors) # ------------------------------------------------------------------ @property @@ -335,9 +346,14 @@ class AgentRuntime: # Start storage await self._storage.start() - # Create streams for each entry point + # Initialize pipeline stages FIRST -- they inject LLM, tools, + # credentials, and skills into the host before streams are created. + await self._pipeline.initialize_all() + self._apply_pipeline_results() + + # Create streams for each entry point (uses pipeline results) for ep_id, spec in self._entry_points.items(): - stream = ExecutionStream( + stream = ExecutionManager( stream_id=ep_id, entry_spec=spec, graph=self.graph, @@ -370,7 +386,7 @@ class AgentRuntime: # Start webhook server if routes are configured if self._config.webhook_routes: - from framework.runtime.webhook_server import ( + from framework.host.webhook_server import ( WebhookRoute, WebhookServer, WebhookServerConfig, @@ -394,7 +410,7 @@ class AgentRuntime: await self._webhook_server.start() # Subscribe event-driven entry points to EventBus - from framework.runtime.event_bus import EventType as _ET + from framework.host.event_bus import EventType as _ET for ep_id, spec in self._entry_points.items(): if spec.trigger_type != "event": @@ -458,332 +474,345 @@ class AgentRuntime: self._event_subscriptions.append(sub_id) # Start timer-driven entry points - for ep_id, spec in self._entry_points.items(): - if spec.trigger_type != "timer": - continue + await self._start_timers() - tc = spec.trigger_config - cron_expr = tc.get("cron") - _raw_interval = tc.get("interval_minutes") - interval = float(_raw_interval) if _raw_interval is not None else None - run_immediately = tc.get("run_immediately", False) - - if cron_expr: - # Cron expression mode — takes priority over interval_minutes - try: - from croniter import croniter - except ImportError as e: - raise RuntimeError( - "croniter is required for cron-based entry points. " - "Install it with: uv pip install croniter" - ) from e - - try: - if not croniter.is_valid(cron_expr): - raise ValueError(f"Invalid cron expression: {cron_expr}") - except ValueError as e: - logger.warning( - "Entry point '%s' has invalid cron config: %s", - ep_id, - e, - ) - continue - - def _make_cron_timer( - entry_point_id: str, - expr: str, - immediate: bool, - idle_timeout: float = 300, - ): - async def _cron_loop(): - from croniter import croniter - - _persistent_session_id: str | None = None - if not immediate: - cron = croniter(expr, datetime.now()) - next_dt = cron.get_next(datetime) - sleep_secs = (next_dt - datetime.now()).total_seconds() - self._timer_next_fire[entry_point_id] = ( - time.monotonic() + sleep_secs - ) - await asyncio.sleep(max(0, sleep_secs)) - while self._running: - # Calculate next fire time upfront (used by skip paths too) - cron = croniter(expr, datetime.now()) - next_dt = cron.get_next(datetime) - sleep_secs = (next_dt - datetime.now()).total_seconds() - - # Gate: skip tick if timers are explicitly paused - if self._timers_paused: - logger.debug( - "Cron '%s': paused, skipping tick", - entry_point_id, - ) - self._timer_next_fire[entry_point_id] = ( - time.monotonic() + sleep_secs - ) - await asyncio.sleep(max(0, sleep_secs)) - continue - - # Gate: skip tick if ANY stream is actively working. - # If the execution is idle (no LLM/tool activity - # beyond idle_timeout) let the timer proceed — - # execute() will cancel the stale execution. - _any_active = False - _min_idle = float("inf") - for _s in self._streams.values(): - if _s.active_execution_ids: - _any_active = True - _idle = _s.agent_idle_seconds - if _idle < _min_idle: - _min_idle = _idle - logger.info( - "Cron '%s': gate — active=%s, idle=%.1fs, timeout=%ds", - entry_point_id, - _any_active, - _min_idle, - idle_timeout, - ) - if _any_active and _min_idle < idle_timeout: - logger.info( - "Cron '%s': agent actively working, skipping tick", - entry_point_id, - ) - self._timer_next_fire[entry_point_id] = ( - time.monotonic() + sleep_secs - ) - await asyncio.sleep(max(0, sleep_secs)) - continue - - self._timer_next_fire.pop(entry_point_id, None) - try: - ep_spec = self._entry_points.get(entry_point_id) - is_isolated = ep_spec and ep_spec.isolation_level == "isolated" - if is_isolated: - if _persistent_session_id: - session_state = { - "resume_session_id": _persistent_session_id - } - else: - session_state = None - else: - session_state = self._get_primary_session_state( - exclude_entry_point=entry_point_id - ) - # Gate: skip tick if no active session - if session_state is None: - logger.debug( - "Cron '%s': no active session, skipping", - entry_point_id, - ) - self._timer_next_fire[entry_point_id] = ( - time.monotonic() + sleep_secs - ) - await asyncio.sleep(max(0, sleep_secs)) - continue - - exec_id = await self.trigger( - entry_point_id, - { - "event": { - "source": "timer", - "reason": "scheduled", - } - }, - session_state=session_state, - ) - if not _persistent_session_id and is_isolated: - _persistent_session_id = exec_id - logger.info( - "Cron fired for entry point '%s' (expr: %s)", - entry_point_id, - expr, - ) - except Exception: - logger.error( - "Cron trigger failed for '%s'", - entry_point_id, - exc_info=True, - ) - # Calculate next fire from now - cron = croniter(expr, datetime.now()) - next_dt = cron.get_next(datetime) - sleep_secs = (next_dt - datetime.now()).total_seconds() - self._timer_next_fire[entry_point_id] = ( - time.monotonic() + sleep_secs - ) - await asyncio.sleep(max(0, sleep_secs)) - - return _cron_loop - - task = asyncio.create_task( - _make_cron_timer( - ep_id, - cron_expr, - run_immediately, - idle_timeout=float(tc.get("idle_timeout_seconds", 300)), - )() - ) - self._timer_tasks.append(task) - logger.info( - "Started cron timer for entry point '%s' with expression '%s'%s", - ep_id, - cron_expr, - " (immediate first run)" if run_immediately else "", - ) - - elif interval and interval > 0: - # Fixed interval mode (original behavior) - def _make_timer( - entry_point_id: str, - mins: float, - immediate: bool, - idle_timeout: float = 300, - ): - async def _timer_loop(): - interval_secs = mins * 60 - _persistent_session_id: str | None = None - if not immediate: - self._timer_next_fire[entry_point_id] = ( - time.monotonic() + interval_secs - ) - await asyncio.sleep(interval_secs) - while self._running: - # Gate: skip tick if timers are explicitly paused - if self._timers_paused: - logger.debug( - "Timer '%s': paused, skipping tick", - entry_point_id, - ) - self._timer_next_fire[entry_point_id] = ( - time.monotonic() + interval_secs - ) - await asyncio.sleep(interval_secs) - continue - - # Gate: skip tick if agent is actively working. - # Gate: skip tick if ANY stream is actively working. - _any_active = False - _min_idle = float("inf") - for _s in self._streams.values(): - if _s.active_execution_ids: - _any_active = True - _idle = _s.agent_idle_seconds - if _idle < _min_idle: - _min_idle = _idle - logger.info( - "Timer '%s': gate — active=%s, idle=%.1fs, timeout=%ds", - entry_point_id, - _any_active, - _min_idle, - idle_timeout, - ) - if _any_active and _min_idle < idle_timeout: - logger.info( - "Timer '%s': agent actively working, skipping tick", - entry_point_id, - ) - self._timer_next_fire[entry_point_id] = ( - time.monotonic() + interval_secs - ) - await asyncio.sleep(interval_secs) - continue - - self._timer_next_fire.pop(entry_point_id, None) - try: - ep_spec = self._entry_points.get(entry_point_id) - is_isolated = ep_spec and ep_spec.isolation_level == "isolated" - if is_isolated: - if _persistent_session_id: - session_state = { - "resume_session_id": _persistent_session_id - } - else: - session_state = None - else: - session_state = self._get_primary_session_state( - exclude_entry_point=entry_point_id - ) - # Gate: skip tick if no active session - if session_state is None: - logger.debug( - "Timer '%s': no active session, skipping", - entry_point_id, - ) - self._timer_next_fire[entry_point_id] = ( - time.monotonic() + interval_secs - ) - await asyncio.sleep(interval_secs) - continue - - exec_id = await self.trigger( - entry_point_id, - { - "event": { - "source": "timer", - "reason": "scheduled", - } - }, - session_state=session_state, - ) - if not _persistent_session_id and is_isolated: - _persistent_session_id = exec_id - logger.info( - "Timer fired for entry point '%s' (next in %s min)", - entry_point_id, - mins, - ) - except Exception: - logger.error( - "Timer trigger failed for '%s'", - entry_point_id, - exc_info=True, - ) - self._timer_next_fire[entry_point_id] = ( - time.monotonic() + interval_secs - ) - await asyncio.sleep(interval_secs) - - return _timer_loop - - task = asyncio.create_task( - _make_timer( - ep_id, - interval, - run_immediately, - idle_timeout=float(tc.get("idle_timeout_seconds", 300)), - )() - ) - self._timer_tasks.append(task) - logger.info( - "Started timer for entry point '%s' every %s min%s", - ep_id, - interval, - " (immediate first run)" if run_immediately else "", - ) - - else: - logger.warning( - "Entry point '%s' has trigger_type='timer' " - "but no 'cron' or valid 'interval_minutes' in trigger_config", - ep_id, - ) - - # Register primary graph - self._graphs[self._graph_id] = _GraphRegistration( - graph=self.graph, - goal=self.goal, - entry_points=dict(self._entry_points), - streams=dict(self._streams), - storage_subpath="", - event_subscriptions=list(self._event_subscriptions), - timer_tasks=list(self._timer_tasks), - timer_next_fire=self._timer_next_fire, - ) + # Start skill hot-reload watcher (no-op if watchfiles not installed) + await self._skills_manager.start_watching() self._running = True self._timers_paused = False - logger.info(f"AgentRuntime started with {len(self._streams)} streams") + n_stages = len(self._pipeline.stages) + logger.info( + "AgentHost started with %d streams, %d pipeline stages", + len(self._streams), + n_stages, + ) + + async def _start_timers(self) -> None: + """Start timer-driven entry points (extracted from start()).""" + for ep_id, spec in self._entry_points.items(): + if spec.trigger_type != "timer": + continue + + tc = spec.trigger_config + cron_expr = tc.get("cron") + _raw_interval = tc.get("interval_minutes") + interval = float(_raw_interval) if _raw_interval is not None else None + run_immediately = tc.get("run_immediately", False) + + if cron_expr: + # Cron expression mode — takes priority over interval_minutes + try: + from croniter import croniter + except ImportError as e: + raise RuntimeError( + "croniter is required for cron-based entry points. " + "Install it with: uv pip install croniter" + ) from e + + try: + if not croniter.is_valid(cron_expr): + raise ValueError(f"Invalid cron expression: {cron_expr}") + except ValueError as e: + logger.warning( + "Entry point '%s' has invalid cron config: %s", + ep_id, + e, + ) + continue + + def _make_cron_timer( + entry_point_id: str, + expr: str, + immediate: bool, + idle_timeout: float = 300, + ): + async def _cron_loop(): + from croniter import croniter + + _persistent_session_id: str | None = None + if not immediate: + cron = croniter(expr, datetime.now()) + next_dt = cron.get_next(datetime) + sleep_secs = (next_dt - datetime.now()).total_seconds() + self._timer_next_fire[entry_point_id] = ( + time.monotonic() + sleep_secs + ) + await asyncio.sleep(max(0, sleep_secs)) + while self._running: + # Calculate next fire time upfront (used by skip paths too) + cron = croniter(expr, datetime.now()) + next_dt = cron.get_next(datetime) + sleep_secs = (next_dt - datetime.now()).total_seconds() + + # Gate: skip tick if timers are explicitly paused + if self._timers_paused: + logger.debug( + "Cron '%s': paused, skipping tick", + entry_point_id, + ) + self._timer_next_fire[entry_point_id] = ( + time.monotonic() + sleep_secs + ) + await asyncio.sleep(max(0, sleep_secs)) + continue + + # Gate: skip tick if ANY stream is actively working. + # If the execution is idle (no LLM/tool activity + # beyond idle_timeout) let the timer proceed — + # execute() will cancel the stale execution. + _any_active = False + _min_idle = float("inf") + for _s in self._streams.values(): + if _s.active_execution_ids: + _any_active = True + _idle = _s.agent_idle_seconds + if _idle < _min_idle: + _min_idle = _idle + logger.info( + "Cron '%s': gate — active=%s, idle=%.1fs, timeout=%ds", + entry_point_id, + _any_active, + _min_idle, + idle_timeout, + ) + if _any_active and _min_idle < idle_timeout: + logger.info( + "Cron '%s': agent actively working, skipping tick", + entry_point_id, + ) + self._timer_next_fire[entry_point_id] = ( + time.monotonic() + sleep_secs + ) + await asyncio.sleep(max(0, sleep_secs)) + continue + + self._timer_next_fire.pop(entry_point_id, None) + try: + ep_spec = self._entry_points.get(entry_point_id) + is_isolated = ep_spec and ep_spec.isolation_level == "isolated" + if is_isolated: + if _persistent_session_id: + session_state = { + "resume_session_id": _persistent_session_id + } + else: + session_state = None + else: + session_state = self._get_primary_session_state( + exclude_entry_point=entry_point_id + ) + # Gate: skip tick if no active session + if session_state is None: + logger.debug( + "Cron '%s': no active session, skipping", + entry_point_id, + ) + self._timer_next_fire[entry_point_id] = ( + time.monotonic() + sleep_secs + ) + await asyncio.sleep(max(0, sleep_secs)) + continue + + exec_id = await self.trigger( + entry_point_id, + { + "event": { + "source": "timer", + "reason": "scheduled", + } + }, + session_state=session_state, + ) + if not _persistent_session_id and is_isolated: + _persistent_session_id = exec_id + logger.info( + "Cron fired for entry point '%s' (expr: %s)", + entry_point_id, + expr, + ) + except Exception: + logger.error( + "Cron trigger failed for '%s'", + entry_point_id, + exc_info=True, + ) + # Calculate next fire from now + cron = croniter(expr, datetime.now()) + next_dt = cron.get_next(datetime) + sleep_secs = (next_dt - datetime.now()).total_seconds() + self._timer_next_fire[entry_point_id] = ( + time.monotonic() + sleep_secs + ) + await asyncio.sleep(max(0, sleep_secs)) + + return _cron_loop + + task = asyncio.create_task( + _make_cron_timer( + ep_id, + cron_expr, + run_immediately, + idle_timeout=float(tc.get("idle_timeout_seconds", 300)), + )() + ) + self._timer_tasks.append(task) + logger.info( + "Started cron timer for entry point '%s' with expression '%s'%s", + ep_id, + cron_expr, + " (immediate first run)" if run_immediately else "", + ) + + elif interval and interval > 0: + # Fixed interval mode (original behavior) + def _make_timer( + entry_point_id: str, + mins: float, + immediate: bool, + idle_timeout: float = 300, + ): + async def _timer_loop(): + interval_secs = mins * 60 + _persistent_session_id: str | None = None + if not immediate: + self._timer_next_fire[entry_point_id] = ( + time.monotonic() + interval_secs + ) + await asyncio.sleep(interval_secs) + while self._running: + # Gate: skip tick if timers are explicitly paused + if self._timers_paused: + logger.debug( + "Timer '%s': paused, skipping tick", + entry_point_id, + ) + self._timer_next_fire[entry_point_id] = ( + time.monotonic() + interval_secs + ) + await asyncio.sleep(interval_secs) + continue + + # Gate: skip tick if agent is actively working. + # Gate: skip tick if ANY stream is actively working. + _any_active = False + _min_idle = float("inf") + for _s in self._streams.values(): + if _s.active_execution_ids: + _any_active = True + _idle = _s.agent_idle_seconds + if _idle < _min_idle: + _min_idle = _idle + logger.info( + "Timer '%s': gate — active=%s, idle=%.1fs, timeout=%ds", + entry_point_id, + _any_active, + _min_idle, + idle_timeout, + ) + if _any_active and _min_idle < idle_timeout: + logger.info( + "Timer '%s': agent actively working, skipping tick", + entry_point_id, + ) + self._timer_next_fire[entry_point_id] = ( + time.monotonic() + interval_secs + ) + await asyncio.sleep(interval_secs) + continue + + self._timer_next_fire.pop(entry_point_id, None) + try: + ep_spec = self._entry_points.get(entry_point_id) + is_isolated = ep_spec and ep_spec.isolation_level == "isolated" + if is_isolated: + if _persistent_session_id: + session_state = { + "resume_session_id": _persistent_session_id + } + else: + session_state = None + else: + session_state = self._get_primary_session_state( + exclude_entry_point=entry_point_id + ) + # Gate: skip tick if no active session + if session_state is None: + logger.debug( + "Timer '%s': no active session, skipping", + entry_point_id, + ) + self._timer_next_fire[entry_point_id] = ( + time.monotonic() + interval_secs + ) + await asyncio.sleep(interval_secs) + continue + + exec_id = await self.trigger( + entry_point_id, + { + "event": { + "source": "timer", + "reason": "scheduled", + } + }, + session_state=session_state, + ) + if not _persistent_session_id and is_isolated: + _persistent_session_id = exec_id + logger.info( + "Timer fired for entry point '%s' (next in %s min)", + entry_point_id, + mins, + ) + except Exception: + logger.error( + "Timer trigger failed for '%s'", + entry_point_id, + exc_info=True, + ) + self._timer_next_fire[entry_point_id] = ( + time.monotonic() + interval_secs + ) + await asyncio.sleep(interval_secs) + + return _timer_loop + + task = asyncio.create_task( + _make_timer( + ep_id, + interval, + run_immediately, + idle_timeout=float(tc.get("idle_timeout_seconds", 300)), + )() + ) + self._timer_tasks.append(task) + logger.info( + "Started timer for entry point '%s' every %s min%s", + ep_id, + interval, + " (immediate first run)" if run_immediately else "", + ) + + else: + logger.warning( + "Entry point '%s' has trigger_type='timer' " + "but no 'cron' or valid 'interval_minutes' in trigger_config", + ep_id, + ) + + # Register primary graph + self._graphs[self._graph_id] = _GraphRegistration( + graph=self.graph, + goal=self.goal, + entry_points=dict(self._entry_points), + streams=dict(self._streams), + storage_subpath="", + event_subscriptions=list(self._event_subscriptions), + timer_tasks=list(self._timer_tasks), + timer_next_fire=self._timer_next_fire, + ) + async def stop(self) -> None: """Stop the agent runtime and all streams.""" @@ -818,11 +847,14 @@ class AgentRuntime: self._streams.clear() self._graphs.clear() + # Stop skill hot-reload watcher + await self._skills_manager.stop_watching() + # Stop storage await self._storage.stop() self._running = False - logger.info("AgentRuntime stopped") + logger.info("AgentHost stopped") def pause_timers(self) -> None: """Pause all timer-driven entry points. @@ -841,7 +873,7 @@ class AgentRuntime: self, entry_point_id: str, graph_id: str | None = None, - ) -> ExecutionStream | None: + ) -> ExecutionManager | None: """Find the stream for an entry point, searching the active graph first. Lookup order: @@ -865,6 +897,57 @@ class AgentRuntime: # Primary graph (also stored in self._streams) return self._streams.get(entry_point_id) + def _apply_pipeline_results(self) -> None: + """Read typed attributes from pipeline stages after initialization.""" + for stage in self._pipeline.stages: + name = stage.__class__.__name__ + + if stage.tool_registry is not None: + tools = list(stage.tool_registry.get_tools().values()) + if tools: + self._tools = tools + self._tool_executor = stage.tool_registry.get_executor() + logger.info("Pipeline: %d tools from %s", len(tools), name) + + if stage.llm is not None and self._llm is None: + self._llm = stage.llm + logger.info("Pipeline: LLM from %s", name) + + if stage.accounts_prompt: + self._accounts_prompt = stage.accounts_prompt + self._accounts_data = stage.accounts_data + self._tool_provider_map = stage.tool_provider_map + + if stage.skills_manager is not None: + self._skills_manager = stage.skills_manager + + + @staticmethod + def _load_pipeline_from_config(): + """Build pipeline from ``~/.hive/configuration.json`` ``pipeline`` key. + + Returns an empty pipeline if no config is set. + """ + from framework.config import get_hive_config + from framework.pipeline.registry import build_pipeline_from_config + from framework.pipeline.runner import PipelineRunner + + config = get_hive_config() + stages_config = config.get("pipeline", {}).get("stages", []) + if not stages_config: + return PipelineRunner([]) + return build_pipeline_from_config(stages_config) + + async def _reload_pipeline(self) -> None: + """Hot-reload pipeline from config. Atomic swap.""" + new_pipeline = self._load_pipeline_from_config() + await new_pipeline.initialize_all() + self._pipeline = new_pipeline + logger.info( + "Pipeline reloaded: %d stages", + len(new_pipeline.stages), + ) + def _prune_idempotency_keys(self) -> None: """Prune expired idempotency keys based on TTL and max size.""" ttl = self._config.idempotency_ttl_seconds @@ -915,7 +998,7 @@ class AgentRuntime: RuntimeError: If runtime not running """ if not self._running: - raise RuntimeError("AgentRuntime is not running") + raise RuntimeError("AgentHost is not running") # Idempotency check: return cached execution_id for duplicate keys. if idempotency_key is not None: @@ -929,6 +1012,21 @@ class AgentRuntime: ) return cached + # Run pipeline middleware (rate limiting, validation, cost guards, ...) + # Raises PipelineRejectedError if any stage rejects. + if self._pipeline.stages: + from framework.pipeline.stage import PipelineContext + + pipeline_ctx = PipelineContext( + entry_point_id=entry_point_id, + input_data=input_data, + correlation_id=correlation_id, + session_state=session_state, + ) + pipeline_ctx = await self._pipeline.run(pipeline_ctx) + # Stages may have transformed the input_data. + input_data = pipeline_ctx.input_data + stream = self._resolve_stream(entry_point_id, graph_id) if stream is None: raise ValueError(f"Entry point '{entry_point_id}' not found") @@ -1023,9 +1121,9 @@ class AgentRuntime: graph_log_store = RuntimeLogStore(graph_base / "runtime_logs") # Create streams for each entry point - streams: dict[str, ExecutionStream] = {} + streams: dict[str, ExecutionManager] = {} for ep_id, spec in entry_points.items(): - stream = ExecutionStream( + stream = ExecutionManager( stream_id=f"{graph_id}::{ep_id}", entry_spec=spec, graph=graph, @@ -1055,7 +1153,7 @@ class AgentRuntime: streams[ep_id] = stream # Set up event-driven subscriptions - from framework.runtime.event_bus import EventType as _ET + from framework.host.event_bus import EventType as _ET event_subs: list[str] = [] for ep_id, spec in entry_points.items(): @@ -1492,7 +1590,7 @@ class AgentRuntime: # Search primary graph's streams for an active session. # Skip isolated streams — they have their own session directories # and must never be used as a shared session. - all_streams: list[tuple[str, ExecutionStream]] = [] + all_streams: list[tuple[str, ExecutionManager]] = [] for _gid, reg in self._graphs.items(): for ep_id, stream in reg.streams.items(): # Skip isolated entry points — they run in their own namespace @@ -1653,7 +1751,7 @@ class AgentRuntime: return max(0.0, mono - time.monotonic()) return None - def get_stream(self, entry_point_id: str) -> ExecutionStream | None: + def get_stream(self, entry_point_id: str) -> ExecutionManager | None: """Get a specific execution stream.""" return self._streams.get(entry_point_id) @@ -1820,94 +1918,3 @@ class AgentRuntime: # === CONVENIENCE FACTORY === -def create_agent_runtime( - graph: "GraphSpec", - goal: "Goal", - storage_path: str | Path, - entry_points: list[EntryPointSpec], - llm: "LLMProvider | None" = None, - tools: list["Tool"] | None = None, - tool_executor: Callable | None = None, - config: AgentRuntimeConfig | None = None, - runtime_log_store: Any = None, - enable_logging: bool = True, - checkpoint_config: CheckpointConfig | None = None, - graph_id: str | None = None, - accounts_prompt: str = "", - accounts_data: list[dict] | None = None, - tool_provider_map: dict[str, str] | None = None, - event_bus: "EventBus | None" = None, - skills_manager_config: "SkillsManagerConfig | None" = None, - # Deprecated — pass skills_manager_config instead. - skills_catalog_prompt: str = "", - protocols_prompt: str = "", - skill_dirs: list[str] | None = None, -) -> AgentRuntime: - """ - Create and configure an AgentRuntime with entry points. - - Convenience factory that creates runtime and registers entry points. - Runtime logging is enabled by default for observability. - - Args: - graph: Graph specification - goal: Goal driving execution - storage_path: Path for persistent storage - entry_points: Entry point specifications - llm: LLM provider - tools: Available tools - tool_executor: Tool executor function - config: Runtime configuration - runtime_log_store: Optional RuntimeLogStore for per-execution logging. - If None and enable_logging=True, creates one automatically. - enable_logging: Whether to enable runtime logging (default: True). - Set to False to disable logging entirely. - checkpoint_config: Optional checkpoint configuration for resumable sessions. - If None, uses default checkpointing behavior. - graph_id: Optional identifier for the primary graph (defaults to "primary"). - accounts_data: Raw account data for per-node prompt generation. - tool_provider_map: Tool name to provider name mapping for account routing. - event_bus: Optional external EventBus to share with other components. - skills_catalog_prompt: Available skills catalog for system prompt. - protocols_prompt: Default skill operational protocols for system prompt. - skill_dirs: Skill base directories for Tier 3 resource access. - skills_manager_config: Skill configuration — the runtime owns - discovery, loading, and prompt renderation internally. - skills_catalog_prompt: Deprecated. Pre-rendered skills catalog. - protocols_prompt: Deprecated. Pre-rendered operational protocols. - - Returns: - Configured AgentRuntime (not yet started) - """ - # Auto-create runtime log store if logging is enabled and not provided - if enable_logging and runtime_log_store is None: - from framework.runtime.runtime_log_store import RuntimeLogStore - - storage_path_obj = Path(storage_path) if isinstance(storage_path, str) else storage_path - runtime_log_store = RuntimeLogStore(storage_path_obj / "runtime_logs") - - runtime = AgentRuntime( - graph=graph, - goal=goal, - storage_path=storage_path, - llm=llm, - tools=tools, - tool_executor=tool_executor, - config=config, - runtime_log_store=runtime_log_store, - checkpoint_config=checkpoint_config, - graph_id=graph_id, - accounts_prompt=accounts_prompt, - accounts_data=accounts_data, - tool_provider_map=tool_provider_map, - event_bus=event_bus, - skills_manager_config=skills_manager_config, - skills_catalog_prompt=skills_catalog_prompt, - protocols_prompt=protocols_prompt, - skill_dirs=skill_dirs, - ) - - for spec in entry_points: - runtime.register_entry_point(spec) - - return runtime diff --git a/core/framework/runtime/event_bus.py b/core/framework/host/event_bus.py similarity index 100% rename from core/framework/runtime/event_bus.py rename to core/framework/host/event_bus.py diff --git a/core/framework/runtime/execution_stream.py b/core/framework/host/execution_manager.py similarity index 97% rename from core/framework/runtime/execution_stream.py rename to core/framework/host/execution_manager.py index 6ff2f89b..39b9aadf 100644 --- a/core/framework/runtime/execution_stream.py +++ b/core/framework/host/execution_manager.py @@ -18,18 +18,18 @@ from dataclasses import dataclass, field from datetime import datetime from typing import TYPE_CHECKING, Any -from framework.graph.checkpoint_config import CheckpointConfig -from framework.graph.executor import ExecutionResult, GraphExecutor -from framework.runtime.event_bus import EventBus -from framework.runtime.shared_state import IsolationLevel, SharedBufferManager -from framework.runtime.stream_runtime import StreamRuntime, StreamRuntimeAdapter +from framework.orchestrator.checkpoint_config import CheckpointConfig +from framework.orchestrator.orchestrator import ExecutionResult, Orchestrator +from framework.host.event_bus import EventBus +from framework.host.shared_state import IsolationLevel, SharedBufferManager +from framework.host.stream_runtime import StreamDecisionTracker, StreamRuntimeAdapter if TYPE_CHECKING: - from framework.graph.edge import GraphSpec - from framework.graph.goal import Goal + from framework.orchestrator.edge import GraphSpec + from framework.orchestrator.goal import Goal from framework.llm.provider import LLMProvider, Tool - from framework.runtime.event_bus import AgentEvent - from framework.runtime.outcome_aggregator import OutcomeAggregator + from framework.host.event_bus import AgentEvent + from framework.host.outcome_aggregator import OutcomeAggregator from framework.storage.concurrent import ConcurrentStorage from framework.storage.session_store import SessionStore @@ -133,7 +133,7 @@ class ExecutionContext: status: str = "pending" # pending, running, completed, failed, paused -class ExecutionStream: +class ExecutionManager: """ Manages concurrent executions for a single entry point. @@ -262,7 +262,7 @@ class ExecutionStream: ) # Create stream-scoped runtime - self._runtime = StreamRuntime( + self._runtime = StreamDecisionTracker( stream_id=stream_id, storage=storage, outcome_aggregator=outcome_aggregator, @@ -271,7 +271,7 @@ class ExecutionStream: # Execution tracking self._active_executions: dict[str, ExecutionContext] = {} self._execution_tasks: dict[str, asyncio.Task] = {} - self._active_executors: dict[str, GraphExecutor] = {} + self._active_executors: dict[str, Orchestrator] = {} self._cancel_reasons: dict[str, str] = {} self._execution_results: OrderedDict[str, ExecutionResult] = OrderedDict() self._execution_result_times: dict[str, float] = {} @@ -301,7 +301,7 @@ class ExecutionStream: # Emit stream started event if self._scoped_event_bus: - from framework.runtime.event_bus import AgentEvent, EventType + from framework.host.event_bus import AgentEvent, EventType await self._scoped_event_bus.publish( AgentEvent( @@ -426,7 +426,7 @@ class ExecutionStream: # Emit stream stopped event if self._scoped_event_bus: - from framework.runtime.event_bus import AgentEvent, EventType + from framework.host.event_bus import AgentEvent, EventType await self._scoped_event_bus.publish( AgentEvent( @@ -668,7 +668,7 @@ class ExecutionStream: # Create per-execution runtime logger runtime_logger = None if self._runtime_log_store: - from framework.runtime.runtime_logger import RuntimeLogger + from framework.tracker.runtime_logger import RuntimeLogger runtime_logger = RuntimeLogger( store=self._runtime_log_store, agent_id=self.graph.id @@ -697,12 +697,7 @@ class ExecutionStream: # forward so the next attempt resumes at the failed node. while True: # Create executor for this execution. - # Each execution gets its own storage under sessions/{exec_id}/ - # so conversations, spillover, and data files are all scoped - # to this execution. The executor sets data_dir via execution - # context (contextvars) so data tools and spillover share the - # same session-scoped directory. - executor = GraphExecutor( + executor = Orchestrator( runtime=runtime_adapter, llm=self._llm, tools=self._tools, @@ -763,7 +758,7 @@ class ExecutionStream: # Emit resurrection event if self._scoped_event_bus: - from framework.runtime.event_bus import AgentEvent, EventType + from framework.host.event_bus import AgentEvent, EventType await self._scoped_event_bus.publish( AgentEvent( @@ -1119,7 +1114,7 @@ class ExecutionStream: Each stream only executes from its own entry_node, but the full graph must validate with all entry points accounted for. """ - from framework.graph.edge import GraphSpec + from framework.orchestrator.edge import GraphSpec # Merge entry points: this stream's entry + original graph's primary # entry + any other entry points. This ensures all nodes are diff --git a/core/framework/runtime/outcome_aggregator.py b/core/framework/host/outcome_aggregator.py similarity index 99% rename from core/framework/runtime/outcome_aggregator.py rename to core/framework/host/outcome_aggregator.py index 9bf8c596..164a8ceb 100644 --- a/core/framework/runtime/outcome_aggregator.py +++ b/core/framework/host/outcome_aggregator.py @@ -14,8 +14,8 @@ from typing import TYPE_CHECKING, Any from framework.schemas.decision import Decision, Outcome if TYPE_CHECKING: - from framework.graph.goal import Goal - from framework.runtime.event_bus import EventBus + from framework.orchestrator.goal import Goal + from framework.host.event_bus import EventBus logger = logging.getLogger(__name__) diff --git a/core/framework/runtime/shared_state.py b/core/framework/host/shared_state.py similarity index 100% rename from core/framework/runtime/shared_state.py rename to core/framework/host/shared_state.py diff --git a/core/framework/runtime/stream_runtime.py b/core/framework/host/stream_runtime.py similarity index 98% rename from core/framework/runtime/stream_runtime.py rename to core/framework/host/stream_runtime.py index 9c1a5131..eeecce0c 100644 --- a/core/framework/runtime/stream_runtime.py +++ b/core/framework/host/stream_runtime.py @@ -18,12 +18,12 @@ from framework.schemas.run import Run, RunStatus from framework.storage.concurrent import ConcurrentStorage if TYPE_CHECKING: - from framework.runtime.outcome_aggregator import OutcomeAggregator + from framework.host.outcome_aggregator import OutcomeAggregator logger = logging.getLogger(__name__) -class StreamRuntime: +class StreamDecisionTracker: """ Thread-safe runtime for a single execution stream. @@ -431,7 +431,7 @@ class StreamRuntimeAdapter: by providing the same API as Runtime but routing to a specific execution. """ - def __init__(self, stream_runtime: StreamRuntime, execution_id: str): + def __init__(self, stream_runtime: StreamDecisionTracker, execution_id: str): """ Create adapter for a specific execution. diff --git a/core/framework/runtime/triggers.py b/core/framework/host/triggers.py similarity index 100% rename from core/framework/runtime/triggers.py rename to core/framework/host/triggers.py diff --git a/core/framework/runtime/webhook_server.py b/core/framework/host/webhook_server.py similarity index 99% rename from core/framework/runtime/webhook_server.py rename to core/framework/host/webhook_server.py index 3d8a5754..b33dcaba 100644 --- a/core/framework/runtime/webhook_server.py +++ b/core/framework/host/webhook_server.py @@ -13,7 +13,7 @@ from dataclasses import dataclass from aiohttp import web -from framework.runtime.event_bus import EventBus +from framework.host.event_bus import EventBus logger = logging.getLogger(__name__) diff --git a/core/framework/llm/key_pool.py b/core/framework/llm/key_pool.py new file mode 100644 index 00000000..9790e1ba --- /dev/null +++ b/core/framework/llm/key_pool.py @@ -0,0 +1,101 @@ +"""Thread-safe API key pool with round-robin rotation and health tracking. + +When multiple API keys are configured, the pool rotates through them on each +request. Keys that hit rate limits are temporarily cooled-down so the next +call automatically uses a healthy key -- no sleep required. +""" + +from __future__ import annotations + +import logging +import threading +import time +from dataclasses import dataclass + +logger = logging.getLogger(__name__) + + +@dataclass +class KeyHealth: + """Per-key health counters.""" + + rate_limited_until: float = 0.0 # monotonic timestamp + consecutive_errors: int = 0 + total_requests: int = 0 + total_successes: int = 0 + + +class KeyPool: + """Round-robin key pool with health tracking. + + Thread-safe: all mutations protected by a lock so concurrent LLM calls + (e.g. parallel tool execution in EventLoopNode) don't race. + """ + + def __init__(self, keys: list[str]) -> None: + if not keys: + raise ValueError("KeyPool requires at least one key") + self._keys = list(keys) + self._index = 0 + self._health: dict[str, KeyHealth] = {k: KeyHealth() for k in keys} + self._lock = threading.Lock() + + @property + def size(self) -> int: + return len(self._keys) + + def get_key(self) -> str: + """Return the next healthy key (round-robin). + + If every key is currently rate-limited, returns the one whose cooldown + expires soonest so the caller can proceed with minimal delay. + """ + with self._lock: + now = time.monotonic() + for _ in range(len(self._keys)): + key = self._keys[self._index] + self._index = (self._index + 1) % len(self._keys) + health = self._health[key] + if health.rate_limited_until <= now: + health.total_requests += 1 + return key + # All rate-limited -- pick the one that expires soonest. + soonest = min(self._keys, key=lambda k: self._health[k].rate_limited_until) + self._health[soonest].total_requests += 1 + return soonest + + def mark_rate_limited(self, key: str, retry_after: float = 60.0) -> None: + """Mark *key* as rate-limited for *retry_after* seconds.""" + with self._lock: + health = self._health.get(key) + if health: + health.rate_limited_until = time.monotonic() + retry_after + health.consecutive_errors += 1 + logger.info( + "[key-pool] Key ...%s rate-limited for %.0fs (errors=%d)", + key[-6:], + retry_after, + health.consecutive_errors, + ) + + def mark_success(self, key: str) -> None: + """Record a successful call on *key*.""" + with self._lock: + health = self._health.get(key) + if health: + health.consecutive_errors = 0 + health.total_successes += 1 + + def get_stats(self) -> dict[str, dict]: + """Return health stats keyed by the last 6 chars of each key.""" + with self._lock: + now = time.monotonic() + return { + f"...{k[-6:]}": { + "healthy": self._health[k].rate_limited_until <= now, + "requests": self._health[k].total_requests, + "successes": self._health[k].total_successes, + "consecutive_errors": self._health[k].consecutive_errors, + } + for k in self._keys + } diff --git a/core/framework/llm/litellm.py b/core/framework/llm/litellm.py index 89e01b14..d4e2def6 100644 --- a/core/framework/llm/litellm.py +++ b/core/framework/llm/litellm.py @@ -7,6 +7,8 @@ Groq, and local models. See: https://docs.litellm.ai/docs/providers """ +from __future__ import annotations + import ast import asyncio import hashlib @@ -18,7 +20,10 @@ import time from collections.abc import AsyncIterator from datetime import datetime from pathlib import Path -from typing import Any +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + from framework.llm.key_pool import KeyPool try: import litellm @@ -561,6 +566,7 @@ class LiteLLMProvider(LLMProvider): model: str = "gpt-4o-mini", api_key: str | None = None, api_base: str | None = None, + api_keys: list[str] | None = None, **kwargs: Any, ): """ @@ -573,6 +579,9 @@ class LiteLLMProvider(LLMProvider): look for the appropriate env var (OPENAI_API_KEY, ANTHROPIC_API_KEY, etc.) api_base: Custom API base URL (for proxies or local deployments) + api_keys: Optional list of API keys for key-pool rotation. When + provided with 2+ keys, a :class:`KeyPool` is created and + keys are rotated on rate-limit errors. **kwargs: Additional arguments passed to litellm.completion() """ # Kimi For Coding exposes an Anthropic-compatible endpoint at @@ -594,11 +603,24 @@ class LiteLLMProvider(LLMProvider): if api_base and api_base.rstrip("/").endswith("/v1"): api_base = api_base.rstrip("/")[:-3] self.model = model - self.api_key = api_key + # Key pool: when multiple keys are provided, enable rotation. + self._key_pool: KeyPool | None = None + if api_keys and len(api_keys) > 1: + from framework.llm.key_pool import KeyPool + + self._key_pool = KeyPool(api_keys) + self.api_key = api_keys[0] # default for OAuth detection below + logger.info( + "[litellm] Key pool enabled with %d keys for model %s", + len(api_keys), + model, + ) + else: + self.api_key = api_key or (api_keys[0] if api_keys else None) self.api_base = api_base or self._default_api_base_for_model(_original_model) self.extra_kwargs = kwargs # Detect Claude Code OAuth subscription by checking the api_key prefix. - self._claude_code_oauth = bool(api_key and api_key.startswith("sk-ant-oat")) + self._claude_code_oauth = bool(self.api_key and self.api_key.startswith("sk-ant-oat")) if self._claude_code_oauth: # Anthropic requires a specific User-Agent for OAuth requests. eh = self.extra_kwargs.setdefault("extra_headers", {}) @@ -669,10 +691,20 @@ class LiteLLMProvider(LLMProvider): def _completion_with_rate_limit_retry( self, max_retries: int | None = None, **kwargs: Any ) -> Any: - """Call litellm.completion with retry on 429 rate limit errors and empty responses.""" + """Call litellm.completion with retry on 429 rate limit errors and empty responses. + + When a :class:`KeyPool` is configured, rate-limited keys are rotated + automatically so the next attempt uses a different key -- no sleep + needed between attempts. + """ model = kwargs.get("model", self.model) retries = max_retries if max_retries is not None else RATE_LIMIT_MAX_RETRIES for attempt in range(retries + 1): + # Rotate key from pool when available. + current_key: str | None = None + if self._key_pool: + current_key = self._key_pool.get_key() + kwargs["api_key"] = current_key try: response = litellm.completion(**kwargs) # type: ignore[union-attr] @@ -747,8 +779,22 @@ class LiteLLMProvider(LLMProvider): time.sleep(wait) continue + if self._key_pool and current_key: + self._key_pool.mark_success(current_key) return response except RateLimitError as e: + # Key pool: mark the offending key and rotate immediately. + if self._key_pool and current_key: + self._key_pool.mark_rate_limited(current_key, retry_after=60.0) + # When we have other healthy keys, skip the sleep -- the + # next iteration will pick a different key automatically. + if attempt < retries: + logger.info( + "[retry] Key pool rotating away from ...%s on 429", + current_key[-6:], + ) + continue + # Dump full request to file for debugging messages = kwargs.get("messages", []) token_count, token_method = _estimate_tokens(model, messages) @@ -761,7 +807,7 @@ class LiteLLMProvider(LLMProvider): if attempt == retries: logger.error( f"[retry] GAVE UP on {model} after {retries + 1} " - f"attempts — rate limit error: {e!s}. " + f"attempts -- rate limit error: {e!s}. " f"~{token_count} tokens ({token_method}). " f"Full request dumped to: {dump_path}" ) @@ -880,10 +926,16 @@ class LiteLLMProvider(LLMProvider): """Async version of _completion_with_rate_limit_retry. Uses litellm.acompletion and asyncio.sleep instead of blocking calls. + When a :class:`KeyPool` is configured, rate-limited keys are rotated. """ model = kwargs.get("model", self.model) retries = max_retries if max_retries is not None else RATE_LIMIT_MAX_RETRIES for attempt in range(retries + 1): + # Rotate key from pool when available. + current_key: str | None = None + if self._key_pool: + current_key = self._key_pool.get_key() + kwargs["api_key"] = current_key try: response = await litellm.acompletion(**kwargs) # type: ignore[union-attr] @@ -952,8 +1004,20 @@ class LiteLLMProvider(LLMProvider): await asyncio.sleep(wait) continue + if self._key_pool and current_key: + self._key_pool.mark_success(current_key) return response except RateLimitError as e: + # Key pool: mark the offending key and rotate immediately. + if self._key_pool and current_key: + self._key_pool.mark_rate_limited(current_key, retry_after=60.0) + if attempt < retries: + logger.info( + "[async-retry] Key pool rotating away from ...%s on 429", + current_key[-6:], + ) + continue + messages = kwargs.get("messages", []) token_count, token_method = _estimate_tokens(model, messages) dump_path = _dump_failed_request( @@ -965,7 +1029,7 @@ class LiteLLMProvider(LLMProvider): if attempt == retries: logger.error( f"[async-retry] GAVE UP on {model} after {retries + 1} " - f"attempts — rate limit error: {e!s}. " + f"attempts -- rate limit error: {e!s}. " f"~{token_count} tokens ({token_method}). " f"Full request dumped to: {dump_path}" ) diff --git a/core/framework/loader/__init__.py b/core/framework/loader/__init__.py new file mode 100644 index 00000000..1b00d9b4 --- /dev/null +++ b/core/framework/loader/__init__.py @@ -0,0 +1,4 @@ +"""Loader layer -- agent loading from disk (JSON config, MCP, credentials).""" + +from framework.loader.agent_loader import AgentLoader # noqa: F401 +from framework.loader.tool_registry import ToolRegistry # noqa: F401 diff --git a/core/framework/runner/runner.py b/core/framework/loader/agent_loader.py similarity index 77% rename from core/framework/runner/runner.py rename to core/framework/loader/agent_loader.py index e8735250..5adca172 100644 --- a/core/framework/runner/runner.py +++ b/core/framework/loader/agent_loader.py @@ -13,21 +13,20 @@ from framework.config import get_hive_config, get_max_context_tokens, get_prefer from framework.credentials.validation import ( ensure_credential_key_env as _ensure_credential_key_env, ) -from framework.graph import Goal -from framework.graph.edge import ( +from framework.orchestrator import Goal +from framework.orchestrator.edge import ( DEFAULT_MAX_TOKENS, EdgeCondition, EdgeSpec, GraphSpec, ) -from framework.graph.executor import ExecutionResult -from framework.graph.node import NodeSpec +from framework.orchestrator.orchestrator import ExecutionResult +from framework.orchestrator.node import NodeSpec from framework.llm.provider import LLMProvider, Tool -from framework.runner.preload_validation import run_preload_validation -from framework.runner.tool_registry import ToolRegistry -from framework.runtime.agent_runtime import AgentRuntime, AgentRuntimeConfig, create_agent_runtime -from framework.runtime.execution_stream import EntryPointSpec -from framework.runtime.runtime_log_store import RuntimeLogStore +from framework.loader.preload_validation import run_preload_validation +from framework.loader.tool_registry import ToolRegistry +from framework.host.agent_host import AgentHost, AgentRuntimeConfig +from framework.host.execution_manager import EntryPointSpec from framework.tools.flowchart_utils import generate_fallback_flowchart logger = logging.getLogger(__name__) @@ -881,6 +880,172 @@ class ValidationResult: missing_credentials: list[str] = field(default_factory=list) +def _resolve_template_vars(text: str | None, variables: dict[str, str]) -> str | None: + """Resolve ``{{variable_name}}`` placeholders in *text*.""" + if text is None or not variables: + return text + import re + + def _replace(m: re.Match) -> str: + key = m.group(1).strip() + return variables.get(key, m.group(0)) + + return re.sub(r"\{\{(.+?)\}\}", _replace, text) + + +def load_agent_config(data: str | dict) -> tuple[GraphSpec, Goal]: + """Load ``GraphSpec`` and ``Goal`` from a declarative :class:`AgentConfig`. + + The declarative format uses a ``name`` key at the top level, unlike the + legacy export format which uses ``graph``/``goal`` keys. The runner + auto-detects the format in :meth:`AgentLoader.load`. + + Template variables in ``config.variables`` are resolved in all + ``system_prompt`` and ``identity_prompt`` fields via ``{{var_name}}``. + + Returns: + Tuple of (GraphSpec, Goal) + """ + from framework.orchestrator.edge import EdgeCondition, EdgeSpec + from framework.orchestrator.goal import Constraint, Goal as GoalModel, SuccessCriterion + from framework.schemas.agent_config import AgentConfig + + if isinstance(data, str): + data = json.loads(data) + + config = AgentConfig.model_validate(data) + tvars = config.variables + + # Build Goal + success_criteria = [ + SuccessCriterion( + id=f"sc-{i}", + description=sc, + metric="llm_judge", + target="", + ) + for i, sc in enumerate(config.goal.success_criteria) + ] + constraints = [ + Constraint( + id=f"c-{i}", + description=c, + constraint_type="hard", + category="general", + ) + for i, c in enumerate(config.goal.constraints) + ] + goal = GoalModel( + id=f"{config.name}-goal", + name=config.name, + description=config.goal.description, + success_criteria=success_criteria, + constraints=constraints, + ) + + # Build nodes + condition_map = { + "always": EdgeCondition.ALWAYS, + "on_success": EdgeCondition.ON_SUCCESS, + "on_failure": EdgeCondition.ON_FAILURE, + "conditional": EdgeCondition.CONDITIONAL, + "llm_decide": EdgeCondition.LLM_DECIDE, + } + + nodes = [] + for nc in config.nodes: + # Resolve tool access: node-level config -> agent-level fallback + if nc.tools.policy == "explicit" and nc.tools.allowed: + tools_list = nc.tools.allowed + tool_policy = "explicit" + elif nc.tools.policy == "none": + tools_list = [] + tool_policy = "none" + elif nc.tools.policy == "all": + tools_list = [] + tool_policy = "all" + else: + # Inherit agent-level tool config + if config.tools.policy == "explicit" and config.tools.allowed: + tools_list = config.tools.allowed + else: + tools_list = [] + tool_policy = config.tools.policy + + node_kwargs: dict = { + "id": nc.id, + "name": nc.name or nc.id, + "description": nc.description or "", + "node_type": nc.node_type, + "system_prompt": _resolve_template_vars(nc.system_prompt, tvars), + "tools": tools_list, + "tool_access_policy": tool_policy, + "model": nc.model, + "input_keys": nc.input_keys, + "output_keys": nc.output_keys, + "nullable_output_keys": nc.nullable_output_keys, + "max_iterations": nc.max_iterations, + "success_criteria": nc.success_criteria, + "skip_judge": nc.skip_judge, + } + # Optional fields -- only pass when set (avoids overriding defaults) + if nc.client_facing: + node_kwargs["client_facing"] = nc.client_facing + if nc.max_node_visits != 1: + node_kwargs["max_node_visits"] = nc.max_node_visits + if nc.failure_criteria: + node_kwargs["failure_criteria"] = nc.failure_criteria + if nc.max_retries is not None: + node_kwargs["max_retries"] = nc.max_retries + + nodes.append(NodeSpec(**node_kwargs)) + + # Build edges + edges = [] + for i, ec in enumerate(config.edges): + edges.append( + EdgeSpec( + id=f"e-{i}-{ec.from_node}-{ec.to_node}", + source=ec.from_node, + target=ec.to_node, + condition=condition_map.get(ec.condition, EdgeCondition.ON_SUCCESS), + condition_expr=ec.condition_expr, + priority=ec.priority, + input_mapping=ec.input_mapping, + ) + ) + + # Build entry_points dict for GraphSpec + entry_points_dict: dict = {} + if config.entry_points: + for ep in config.entry_points: + entry_points_dict[ep.id] = ep.entry_node or config.entry_node + else: + entry_points_dict = {"default": config.entry_node} + + # Build GraphSpec + graph_kwargs: dict = { + "id": f"{config.name}-graph", + "goal_id": goal.id, + "version": config.version, + "entry_node": config.entry_node, + "entry_points": entry_points_dict, + "terminal_nodes": config.terminal_nodes, + "pause_nodes": config.pause_nodes, + "nodes": nodes, + "edges": edges, + "max_tokens": config.max_tokens, + "loop_config": dict(config.loop_config), + "conversation_mode": config.conversation_mode, + "identity_prompt": _resolve_template_vars( + config.identity_prompt, tvars + ) or "", + } + + graph = GraphSpec(**graph_kwargs) + return graph, goal + + def load_agent_export(data: str | dict) -> tuple[GraphSpec, Goal]: """ Load GraphSpec and Goal from export_graph() output. @@ -942,7 +1107,7 @@ def load_agent_export(data: str | dict) -> tuple[GraphSpec, Goal]: ) # Build Goal - from framework.graph.goal import Constraint, SuccessCriterion + from framework.orchestrator.goal import Constraint, SuccessCriterion success_criteria = [] for sc_data in goal_data.get("success_criteria", []): @@ -979,7 +1144,7 @@ def load_agent_export(data: str | dict) -> tuple[GraphSpec, Goal]: return graph, goal -class AgentRunner: +class AgentLoader: """ Loads and runs exported agents with minimal boilerplate. @@ -991,15 +1156,15 @@ class AgentRunner: Usage: # Simple usage - runner = AgentRunner.load("exports/outbound-sales-agent") + runner = AgentLoader.load("exports/outbound-sales-agent") result = await runner.run({"lead_id": "123"}) # With context manager - async with AgentRunner.load("exports/outbound-sales-agent") as runner: + async with AgentLoader.load("exports/outbound-sales-agent") as runner: result = await runner.run({"lead_id": "123"}) # With custom tools - runner = AgentRunner.load("exports/outbound-sales-agent") + runner = AgentLoader.load("exports/outbound-sales-agent") runner.register_tool("my_tool", my_tool_func) result = await runner.run({"lead_id": "123"}) """ @@ -1027,7 +1192,7 @@ class AgentRunner: credential_store: Any | None = None, ): """ - Initialize the runner (use AgentRunner.load() instead). + Initialize the runner (use AgentLoader.load() instead). Args: agent_path: Path to agent folder @@ -1082,7 +1247,7 @@ class AgentRunner: self._approval_callback: Callable | None = None # AgentRuntime — unified execution path for all agents - self._agent_runtime: AgentRuntime | None = None + self._agent_runtime: AgentHost | None = None # Pre-load validation: structural checks + credentials. # Fails fast with actionable guidance — no MCP noise on screen. run_preload_validation( @@ -1101,14 +1266,7 @@ class AgentRunner: os.environ["HIVE_AGENT_NAME"] = agent_path.name os.environ["HIVE_STORAGE_PATH"] = str(self._storage_path) - # Auto-discover MCP servers from mcp_servers.json - mcp_config_path = agent_path / "mcp_servers.json" - if mcp_config_path.exists(): - self._load_mcp_servers_from_config(mcp_config_path) - - # Auto-discover registry-selected MCP servers from mcp_registry.json - self._load_registry_mcp_servers(agent_path) - + # MCP tools are loaded by McpRegistryStage in the pipeline during AgentHost.start() @staticmethod def _import_agent_module(agent_path: Path): """Import an agent package from its directory path. @@ -1158,7 +1316,7 @@ class AgentRunner: interactive: bool = True, skip_credential_validation: bool | None = None, credential_store: Any | None = None, - ) -> "AgentRunner": + ) -> "AgentLoader": """ Load an agent from an export folder. @@ -1299,21 +1457,22 @@ class AgentRunner: runner._agent_skills = agent_skills return runner - # Fallback: load from agent.json (legacy JSON-based agents) + # Fallback: load from agent.json (declarative config) agent_json_path = agent_path / "agent.json" + if not agent_json_path.is_file(): raise FileNotFoundError(f"No agent.py or agent.json found in {agent_path}") - with open(agent_json_path, encoding="utf-8") as f: - export_data = f.read() - + export_data = agent_json_path.read_text(encoding="utf-8") if not export_data.strip(): - raise ValueError(f"Empty agent export file: {agent_json_path}") + raise ValueError(f"Empty agent.json: {agent_json_path}") - try: - graph, goal = load_agent_export(export_data) - except json.JSONDecodeError as exc: - raise ValueError(f"Invalid JSON in agent export file: {agent_json_path}") from exc + parsed = json.loads(export_data) + graph, goal = load_agent_config(parsed) + logger.info( + "Loaded declarative agent config from agent.json (name=%s)", + parsed.get("name"), + ) # Generate flowchart.json if missing (for legacy JSON-based agents) generate_fallback_flowchart(graph, goal, agent_path) @@ -1396,60 +1555,6 @@ class AgentRunner: } return self._tool_registry.register_mcp_server(server_config) - def _load_mcp_servers_from_config(self, config_path: Path) -> None: - """Load and register MCP servers from a configuration file.""" - self._tool_registry.load_mcp_config(config_path) - - def _load_registry_mcp_servers(self, agent_path: Path) -> None: - """Load and register MCP servers selected via ``mcp_registry.json``.""" - registry_json = agent_path / "mcp_registry.json" - if registry_json.is_file(): - self._tool_registry.set_mcp_registry_agent_path(agent_path) - else: - self._tool_registry.set_mcp_registry_agent_path(None) - - from framework.runner.mcp_registry import MCPRegistry - - try: - registry = MCPRegistry() - registry.initialize() - server_configs, selection_max_tools = registry.load_agent_selection(agent_path) - except Exception as exc: - logger.warning( - "Failed to load MCP registry servers for '%s': %s", - agent_path.name, - exc, - ) - return - - if not server_configs: - return - - results = self._tool_registry.load_registry_servers( - server_configs, - preserve_existing_tools=True, - log_collisions=True, - max_tools=selection_max_tools, - ) - loaded = [result for result in results if result["status"] == "loaded"] - skipped = [result for result in results if result["status"] != "loaded"] - - logger.info( - "Loaded %d/%d MCP registry server(s) for agent '%s'", - len(loaded), - len(results), - agent_path.name, - ) - if skipped: - logger.info( - "Skipped MCP registry servers for agent '%s': %s", - agent_path.name, - [ - {"server": result["server"], "reason": result["skipped_reason"]} - for result in skipped - ], - ) - def set_approval_callback(self, callback: Callable) -> None: """ Set a callback for human-in-the-loop approval during execution. @@ -1460,272 +1565,119 @@ class AgentRunner: self._approval_callback = callback def _setup(self, event_bus=None) -> None: - """Set up runtime, LLM, and executor.""" - # Configure structured logging (auto-detects JSON vs human-readable) + """Set up runtime via pipeline stages. + + Builds a pipeline with the default stages (LLM, credentials, MCP, + skills) and passes it to AgentHost. The stages initialize during + ``AgentHost.start()`` and inject tools/LLM/credentials/skills. + """ from framework.observability import configure_logging + from framework.pipeline.stages.credential_resolver import CredentialResolverStage + from framework.pipeline.stages.llm_provider import LlmProviderStage + from framework.pipeline.stages.mcp_registry import McpRegistryStage + from framework.pipeline.stages.skill_registry import SkillRegistryStage + from framework.skills.config import SkillsConfig configure_logging(level="INFO", format="auto") - # Set up session context for tools (agent_id) + # Set up session context for tools agent_id = self.graph.id or "unknown" + self._tool_registry.set_session_context(agent_id=agent_id) - self._tool_registry.set_session_context( - agent_id=agent_id, - ) + # Read MCP server refs from agent.json + mcp_refs = [] + agent_json = self.agent_path / "agent.json" + if agent_json.exists(): + try: + import json as _json - # Create LLM provider - # Uses LiteLLM which auto-detects the provider from model name - # Skip if already injected (e.g. worker agents with a pre-built LLM) - if self._llm is not None: - pass # LLM already configured externally - elif self.mock_mode: - # Use mock LLM for testing without real API calls - from framework.llm.mock import MockLLMProvider + data = _json.loads(agent_json.read_text(encoding="utf-8")) + mcp_refs = data.get("mcp_servers", []) + except Exception: + pass - self._llm = MockLLMProvider(model=self.model) - else: - from framework.llm.litellm import LiteLLMProvider - - # Check if a subscription mode is configured - config = get_hive_config() - llm_config = config.get("llm", {}) - use_claude_code = llm_config.get("use_claude_code_subscription", False) - use_codex = llm_config.get("use_codex_subscription", False) - use_kimi_code = llm_config.get("use_kimi_code_subscription", False) - use_antigravity = llm_config.get("use_antigravity_subscription", False) - api_base = llm_config.get("api_base") - - api_key = None - if use_claude_code: - # Get OAuth token from Claude Code subscription - api_key = get_claude_code_token() - if not api_key: - logger.warning( - "Claude Code subscription configured but no token found. " - "Run 'claude' to authenticate, then try again." - ) - elif use_codex: - # Get OAuth token from Codex subscription - api_key = get_codex_token() - if not api_key: - logger.warning( - "Codex subscription configured but no token found. " - "Run 'codex' to authenticate, then try again." - ) - elif use_kimi_code: - # Get API key from Kimi Code CLI config (~/.kimi/config.toml) - api_key = get_kimi_code_token() - if not api_key: - logger.warning( - "Kimi Code subscription configured but no key found. " - "Run 'kimi /login' to authenticate, then try again." - ) - elif use_antigravity: - pass # AntigravityProvider handles credentials internally - - if api_key and use_claude_code: - # Use litellm's built-in Anthropic OAuth support. - # The lowercase "authorization" key triggers OAuth detection which - # adds the required anthropic-beta and browser-access headers. - self._llm = LiteLLMProvider( - model=self.model, - api_key=api_key, - api_base=api_base, - extra_headers={"authorization": f"Bearer {api_key}"}, - ) - elif api_key and use_codex: - # OpenAI Codex subscription routes through the ChatGPT backend - # (chatgpt.com/backend-api/codex/responses), NOT the standard - # OpenAI API. The consumer OAuth token lacks platform API scopes. - extra_headers: dict[str, str] = { - "Authorization": f"Bearer {api_key}", - "User-Agent": "CodexBar", - } - account_id = get_codex_account_id() - if account_id: - extra_headers["ChatGPT-Account-Id"] = account_id - self._llm = LiteLLMProvider( - model=self.model, - api_key=api_key, - api_base="https://chatgpt.com/backend-api/codex", - extra_headers=extra_headers, - store=False, - allowed_openai_params=["store"], - ) - elif api_key and use_kimi_code: - # Kimi Code subscription uses the Kimi coding API (OpenAI-compatible). - # The api_base is set automatically by LiteLLMProvider for kimi/ models. - self._llm = LiteLLMProvider( - model=self.model, - api_key=api_key, - api_base=api_base, - ) - elif use_antigravity: - # Direct OAuth to Google's internal Cloud Code Assist gateway. - # No local proxy required — AntigravityProvider handles token - # refresh and Gemini-format request/response conversion natively. - from framework.llm.antigravity import AntigravityProvider # noqa: PLC0415 - - provider = AntigravityProvider(model=self.model) - if not provider.has_credentials(): - print( - "Warning: Antigravity credentials not found. " - "Run: uv run python core/antigravity_auth.py auth account add" - ) - self._llm = provider - else: - # Local models (e.g. Ollama) don't need an API key - if self._is_local_model(self.model): - self._llm = LiteLLMProvider( - model=self.model, - api_base=api_base, - ) - else: - # Fall back to environment variable - # First check api_key_env_var from config (set by quickstart) - api_key_env = llm_config.get("api_key_env_var") or self._get_api_key_env_var( - self.model - ) - if api_key_env and os.environ.get(api_key_env): - self._llm = LiteLLMProvider( - model=self.model, - api_key=os.environ[api_key_env], - api_base=api_base, - ) - else: - # Fall back to credential store - api_key = self._get_api_key_from_credential_store() - if api_key: - self._llm = LiteLLMProvider( - model=self.model, api_key=api_key, api_base=api_base - ) - # Set env var so downstream code (e.g. cleanup LLM in - # node._extract_json) can also find it - if api_key_env: - os.environ[api_key_env] = api_key - elif api_key_env: - logger.warning( - "%s not set. LLM calls will fail. " - "Set it with: export %s=your-api-key", - api_key_env, - api_key_env, - ) - - # Fail fast if the agent needs an LLM but none was configured - if self._llm is None: - has_llm_nodes = any( - node.node_type in ("event_loop", "gcu") for node in self.graph.nodes - ) - if has_llm_nodes: - from framework.credentials.models import CredentialError - - if self._is_local_model(self.model): - raise CredentialError( - f"Failed to initialize LLM for local model '{self.model}'. " - f"Ensure your local LLM server is running " - f"(e.g. 'ollama serve' for Ollama)." - ) - api_key_env = self._get_api_key_env_var(self.model) - hint = ( - f"Set it with: export {api_key_env}=your-api-key" - if api_key_env - else "Configure an API key for your LLM provider." - ) - raise CredentialError(f"LLM API key not found for model '{self.model}'. {hint}") - - # For GCU nodes: auto-register GCU MCP server if needed, then expand tool lists - has_gcu_nodes = any(node.node_type == "gcu" for node in self.graph.nodes) - if has_gcu_nodes: - from framework.graph.gcu import GCU_MCP_SERVER_CONFIG, GCU_SERVER_NAME - - # Auto-register GCU MCP server if tools aren't loaded yet - gcu_tool_names = self._tool_registry.get_server_tool_names(GCU_SERVER_NAME) - if not gcu_tool_names: - # Resolve cwd to repo-level tools/ (not relative to agent_path) - gcu_config = dict(GCU_MCP_SERVER_CONFIG) - _repo_root = Path(__file__).resolve().parent.parent.parent.parent - gcu_config["cwd"] = str(_repo_root / "tools") - self._tool_registry.register_mcp_server(gcu_config) - gcu_tool_names = self._tool_registry.get_server_tool_names(GCU_SERVER_NAME) - - # Expand each GCU node's tools list to include all GCU server tools - if gcu_tool_names: - for node in self.graph.nodes: - if node.node_type == "gcu": - existing = set(node.tools) - for tool_name in sorted(gcu_tool_names): - if tool_name not in existing: - node.tools.append(tool_name) - - # For event_loop/gcu nodes: auto-register file tools MCP server, then expand tool lists - has_loop_nodes = any(node.node_type in ("event_loop", "gcu") for node in self.graph.nodes) - if has_loop_nodes: - from framework.graph.files import FILES_MCP_SERVER_CONFIG, FILES_MCP_SERVER_NAME - - files_tool_names = self._tool_registry.get_server_tool_names(FILES_MCP_SERVER_NAME) - if not files_tool_names: - # Resolve cwd to repo-level tools/ (not relative to agent_path) - files_config = dict(FILES_MCP_SERVER_CONFIG) - _repo_root = Path(__file__).resolve().parent.parent.parent.parent - files_config["cwd"] = str(_repo_root / "tools") - self._tool_registry.register_mcp_server(files_config) - files_tool_names = self._tool_registry.get_server_tool_names(FILES_MCP_SERVER_NAME) - - if files_tool_names: - for node in self.graph.nodes: - if node.node_type in ("event_loop", "gcu"): - existing = set(node.tools) - for tool_name in sorted(files_tool_names): - if tool_name not in existing: - node.tools.append(tool_name) - - # Get tools for runtime - tools = list(self._tool_registry.get_tools().values()) - tool_executor = self._tool_registry.get_executor() - - # Collect connected account info for system prompt injection - accounts_prompt = "" - accounts_data: list[dict] | None = None - tool_provider_map: dict[str, str] | None = None - try: - from aden_tools.credentials.store_adapter import CredentialStoreAdapter - - if self._credential_store is not None: - adapter = CredentialStoreAdapter(store=self._credential_store) - else: - adapter = CredentialStoreAdapter.default() - accounts_data = adapter.get_all_account_info() - tool_provider_map = adapter.get_tool_provider_map() - if accounts_data: - from framework.graph.prompting import build_accounts_prompt - - accounts_prompt = build_accounts_prompt(accounts_data, tool_provider_map) - except Exception: - pass # Best-effort — agent works without account info - - # Skill configuration — the runtime handles discovery, loading, trust-gating and - # prompt rasterization. The runner just builds the config. - from framework.skills.config import SkillsConfig - from framework.skills.manager import SkillsManagerConfig - - skills_manager_config = SkillsManagerConfig( - skills_config=SkillsConfig.from_agent_vars( - default_skills=getattr(self, "_agent_default_skills", None), - skills=getattr(self, "_agent_skills", None), + # Build default pipeline stages + # Default infrastructure stages (always present) + pipeline_stages = [ + LlmProviderStage( + model=self.model, + mock_mode=self.mock_mode, + llm=self._llm, ), - project_root=self.agent_path, - interactive=self._interactive, - ) + CredentialResolverStage( + credential_store=self._credential_store, + ), + McpRegistryStage( + server_refs=mcp_refs, + agent_path=self.agent_path, + tool_registry=self._tool_registry, + ), + SkillRegistryStage( + project_root=self.agent_path, + interactive=self._interactive, + skills_config=SkillsConfig.from_agent_vars( + default_skills=getattr(self, "_agent_default_skills", None), + skills=getattr(self, "_agent_skills", None), + ), + ), + ] - self._setup_agent_runtime( - tools, - tool_executor, - accounts_prompt=accounts_prompt, - accounts_data=accounts_data, - tool_provider_map=tool_provider_map, + # Merge user-configured stages from ~/.hive/configuration.json + from framework.config import get_hive_config + from framework.pipeline.registry import build_pipeline_from_config + + hive_config = get_hive_config() + user_stages_config = hive_config.get("pipeline", {}).get("stages", []) + if user_stages_config: + user_pipeline = build_pipeline_from_config(user_stages_config) + pipeline_stages.extend(user_pipeline.stages) + + # Merge agent-level overrides from agent.json pipeline field + if agent_json.exists(): + try: + agent_pipeline = ( + _json.loads(agent_json.read_text(encoding="utf-8")) + .get("pipeline", {}) + .get("stages", []) + ) + if agent_pipeline: + agent_stages = build_pipeline_from_config(agent_pipeline) + pipeline_stages.extend(agent_stages.stages) + except Exception: + pass + + # Create AgentHost directly (no wrapper) + from framework.host.execution_manager import EntryPointSpec + from framework.orchestrator.checkpoint_config import CheckpointConfig + from framework.tracker.runtime_log_store import RuntimeLogStore + + self._agent_runtime = AgentHost( + graph=self.graph, + goal=self.goal, + storage_path=self._storage_path, + runtime_log_store=RuntimeLogStore( + base_path=self._storage_path / "runtime_logs", + ), + checkpoint_config=CheckpointConfig( + enabled=True, + checkpoint_on_node_complete=True, + checkpoint_max_age_days=7, + async_checkpoint=True, + ), + graph_id=self.graph.id or self.agent_path.name, event_bus=event_bus, - skills_manager_config=skills_manager_config, + pipeline_stages=pipeline_stages, ) + self._agent_runtime.register_entry_point( + EntryPointSpec( + id="default", + name="Default", + entry_node=self.graph.entry_node, + trigger_type="manual", + isolation_level="shared", + ), + ) + self._agent_runtime.intro_message = self.intro_message def _get_api_key_env_var(self, model: str) -> str | None: """Get the environment variable name for the API key based on model name.""" @@ -1833,83 +1785,6 @@ class AgentRunner: ) return model.lower().startswith(LOCAL_PREFIXES) - def _setup_agent_runtime( - self, - tools: list, - tool_executor: Callable | None, - accounts_prompt: str = "", - accounts_data: list[dict] | None = None, - tool_provider_map: dict[str, str] | None = None, - event_bus=None, - skills_catalog_prompt: str = "", - protocols_prompt: str = "", - skill_dirs: list[str] | None = None, - skills_manager_config=None, - ) -> None: - """Set up multi-entry-point execution using AgentRuntime.""" - entry_points = [] - - # Always create a primary entry point for the graph's entry node. - # For multi-entry-point agents this ensures the primary path (e.g. - # user-facing rule setup) is reachable alongside async entry points. - if self.graph.entry_node: - entry_points.insert( - 0, - EntryPointSpec( - id="default", - name="Default", - entry_node=self.graph.entry_node, - trigger_type="manual", - isolation_level="shared", - ), - ) - - # Create AgentRuntime with all entry points - log_store = RuntimeLogStore(base_path=self._storage_path / "runtime_logs") - - # Enable checkpointing by default for resumable sessions - from framework.graph.checkpoint_config import CheckpointConfig - - checkpoint_config = CheckpointConfig( - enabled=True, - checkpoint_on_node_start=False, # Only checkpoint after nodes complete - checkpoint_on_node_complete=True, - checkpoint_max_age_days=7, - async_checkpoint=True, # Non-blocking - ) - - # Handle runtime_config - only pass through if it's actually an AgentRuntimeConfig. - # Agents may export a RuntimeConfig (LLM settings) or queen-generated custom classes - # that would crash AgentRuntime if passed through. - runtime_config = None - if self.runtime_config is not None: - from framework.runtime.agent_runtime import AgentRuntimeConfig - - if isinstance(self.runtime_config, AgentRuntimeConfig): - runtime_config = self.runtime_config - - self._agent_runtime = create_agent_runtime( - graph=self.graph, - goal=self.goal, - storage_path=self._storage_path, - entry_points=entry_points, - llm=self._llm, - tools=tools, - tool_executor=tool_executor, - runtime_log_store=log_store, - checkpoint_config=checkpoint_config, - config=runtime_config, - graph_id=self.graph.id or self.agent_path.name, - accounts_prompt=accounts_prompt, - accounts_data=accounts_data, - tool_provider_map=tool_provider_map, - event_bus=event_bus, - skills_manager_config=skills_manager_config, - ) - - # Pass intro_message through for TUI display - self._agent_runtime.intro_message = self.intro_message - # ------------------------------------------------------------------ # Execution modes # @@ -1990,7 +1865,7 @@ class AgentRunner: sub_ids: list[str] = [] if has_queen and sys.stdin.isatty(): - from framework.runtime.event_bus import EventType + from framework.host.event_bus import EventType runtime = self._agent_runtime @@ -2246,7 +2121,7 @@ class AgentRunner: except ImportError: # aden_tools not installed - fall back to direct check has_llm_nodes = any( - node.node_type in ("event_loop", "gcu") for node in self.graph.nodes + node.node_type == "event_loop" for node in self.graph.nodes ) if has_llm_nodes: api_key_env = self._get_api_key_env_var(self.model) @@ -2283,7 +2158,7 @@ class AgentRunner: # Run synchronous cleanup self.cleanup() - async def __aenter__(self) -> "AgentRunner": + async def __aenter__(self) -> "AgentLoader": """Context manager entry.""" self._setup() if self._agent_runtime is not None: diff --git a/core/framework/runner/cli.py b/core/framework/loader/cli.py similarity index 96% rename from core/framework/runner/cli.py rename to core/framework/loader/cli.py index 3e94afb9..bd83fa8e 100644 --- a/core/framework/runner/cli.py +++ b/core/framework/loader/cli.py @@ -19,7 +19,7 @@ def register_commands(subparsers: argparse._SubParsersAction) -> None: run_parser.add_argument( "agent_path", type=str, - help="Path to agent folder (containing agent.json)", + help="Path to agent folder (containing agent.json or agent.py)", ) run_parser.add_argument( "--input", @@ -87,7 +87,7 @@ def register_commands(subparsers: argparse._SubParsersAction) -> None: info_parser.add_argument( "agent_path", type=str, - help="Path to agent folder (containing agent.json)", + help="Path to agent folder (containing agent.json or agent.py)", ) info_parser.add_argument( "--json", @@ -105,7 +105,7 @@ def register_commands(subparsers: argparse._SubParsersAction) -> None: validate_parser.add_argument( "agent_path", type=str, - help="Path to agent folder (containing agent.json)", + help="Path to agent folder (containing agent.json or agent.py)", ) validate_parser.set_defaults(func=cmd_validate) @@ -310,7 +310,7 @@ def _prompt_before_start(agent_path: str, runner, model: str | None = None): Updated runner if user proceeds, None if user aborts. """ from framework.credentials.setup import CredentialSetupSession - from framework.runner import AgentRunner + from framework.loader import AgentLoader while True: print() @@ -328,7 +328,7 @@ def _prompt_before_start(agent_path: str, runner, model: str | None = None): if result.success: # Reload runner with updated credentials try: - runner = AgentRunner.load(agent_path, model=model) + runner = AgentLoader.load(agent_path, model=model) except Exception as e: print(f"Error reloading agent: {e}") return None @@ -342,7 +342,7 @@ def cmd_run(args: argparse.Namespace) -> int: from framework.credentials.models import CredentialError from framework.observability import configure_logging - from framework.runner import AgentRunner + from framework.loader import AgentLoader # Set logging level (quiet by default for cleaner output) if args.quiet: @@ -390,7 +390,7 @@ def cmd_run(args: argparse.Namespace) -> int: # Standard execution # AgentRunner handles credential setup interactively when stdin is a TTY. try: - runner = AgentRunner.load( + runner = AgentLoader.load( args.agent_path, model=args.model, ) @@ -528,10 +528,10 @@ def cmd_run(args: argparse.Namespace) -> int: def cmd_info(args: argparse.Namespace) -> int: """Show agent information.""" from framework.credentials.models import CredentialError - from framework.runner import AgentRunner + from framework.loader import AgentLoader try: - runner = AgentRunner.load(args.agent_path) + runner = AgentLoader.load(args.agent_path) except CredentialError as e: print(f"\n{e}", file=sys.stderr) return 1 @@ -595,10 +595,10 @@ def cmd_info(args: argparse.Namespace) -> int: def cmd_validate(args: argparse.Namespace) -> int: """Validate an exported agent.""" from framework.credentials.models import CredentialError - from framework.runner import AgentRunner + from framework.loader import AgentLoader try: - runner = AgentRunner.load(args.agent_path) + runner = AgentLoader.load(args.agent_path) except CredentialError as e: print(f"\n{e}", file=sys.stderr) return 1 @@ -632,7 +632,7 @@ def cmd_validate(args: argparse.Namespace) -> int: def cmd_list(args: argparse.Namespace) -> int: """List available agents.""" - from framework.runner import AgentRunner + from framework.loader import AgentLoader directory = Path(args.directory) if not directory.exists(): @@ -644,7 +644,7 @@ def cmd_list(args: argparse.Namespace) -> int: for path in directory.iterdir(): if _is_valid_agent_dir(path): try: - runner = AgentRunner.load(path) + runner = AgentLoader.load(path) info = runner.info() agents.append( { @@ -686,7 +686,7 @@ def cmd_list(args: argparse.Namespace) -> int: def _interactive_approval(request): """Interactive approval callback for HITL mode.""" - from framework.graph import ApprovalDecision, ApprovalResult + from framework.orchestrator import ApprovalDecision, ApprovalResult print() print("=" * 60) @@ -775,7 +775,7 @@ def cmd_shell(args: argparse.Namespace) -> int: from framework.credentials.models import CredentialError from framework.observability import configure_logging - from framework.runner import AgentRunner + from framework.loader import AgentLoader configure_logging(level="INFO") @@ -789,7 +789,7 @@ def cmd_shell(args: argparse.Namespace) -> int: return 1 try: - runner = AgentRunner.load(agent_path) + runner = AgentLoader.load(agent_path) except CredentialError as e: print(f"\n{e}", file=sys.stderr) return 1 @@ -1004,17 +1004,35 @@ def _get_framework_agents_dir() -> Path: def _extract_python_agent_metadata(agent_path: Path) -> tuple[str, str]: - """Extract name and description from a Python-based agent's config.py. + """Extract name and description from an agent directory. - Uses AST parsing to safely extract values without executing code. + Checks agent.json first (declarative), then falls back to config.py + (legacy Python). Uses AST parsing for Python to avoid executing code. Returns (name, description) tuple, with fallbacks if parsing fails. """ import ast - config_path = agent_path / "config.py" fallback_name = agent_path.name.replace("_", " ").title() fallback_desc = "(Python-based agent)" + # Declarative agent: read from agent.json + agent_json = agent_path / "agent.json" + if agent_json.exists(): + try: + import json + + data = json.loads(agent_json.read_text(encoding="utf-8")) + if isinstance(data, dict): + name = data.get("name", fallback_name) + # Convert kebab-case to Title Case for display + if "-" in name and " " not in name: + name = name.replace("-", " ").title() + desc = data.get("description", fallback_desc) + return name, desc + except Exception: + pass + + config_path = agent_path / "config.py" if not config_path.exists(): return fallback_name, fallback_desc @@ -1083,7 +1101,7 @@ def _is_valid_agent_dir(path: Path) -> bool: def _has_agents(directory: Path) -> bool: - """Check if a directory contains any valid agents (folders with agent.json or agent.py).""" + """Check if a directory contains any valid agents.""" if not directory.exists(): return False return any(_is_valid_agent_dir(p) for p in directory.iterdir()) diff --git a/core/framework/runner/mcp_client.py b/core/framework/loader/mcp_client.py similarity index 99% rename from core/framework/runner/mcp_client.py rename to core/framework/loader/mcp_client.py index df665571..d2e36273 100644 --- a/core/framework/runner/mcp_client.py +++ b/core/framework/loader/mcp_client.py @@ -14,7 +14,7 @@ from typing import Any, Literal import httpx -from framework.runner.mcp_errors import MCPToolNotFoundError +from framework.loader.mcp_errors import MCPToolNotFoundError logger = logging.getLogger(__name__) diff --git a/core/framework/runner/mcp_connection_manager.py b/core/framework/loader/mcp_connection_manager.py similarity index 99% rename from core/framework/runner/mcp_connection_manager.py rename to core/framework/loader/mcp_connection_manager.py index 98bb9a24..f5118d94 100644 --- a/core/framework/runner/mcp_connection_manager.py +++ b/core/framework/loader/mcp_connection_manager.py @@ -5,7 +5,7 @@ import threading import httpx -from framework.runner.mcp_client import MCPClient, MCPServerConfig +from framework.loader.mcp_client import MCPClient, MCPServerConfig logger = logging.getLogger(__name__) diff --git a/core/framework/runner/mcp_errors.py b/core/framework/loader/mcp_errors.py similarity index 100% rename from core/framework/runner/mcp_errors.py rename to core/framework/loader/mcp_errors.py diff --git a/core/framework/runner/mcp_registry.py b/core/framework/loader/mcp_registry.py similarity index 99% rename from core/framework/runner/mcp_registry.py rename to core/framework/loader/mcp_registry.py index 4de4bb93..adaaebaa 100644 --- a/core/framework/runner/mcp_registry.py +++ b/core/framework/loader/mcp_registry.py @@ -14,9 +14,9 @@ from typing import Any, Literal import httpx -from framework.runner.mcp_client import MCPClient, MCPServerConfig -from framework.runner.mcp_connection_manager import MCPConnectionManager -from framework.runner.mcp_errors import ( +from framework.loader.mcp_client import MCPClient, MCPServerConfig +from framework.loader.mcp_connection_manager import MCPConnectionManager +from framework.loader.mcp_errors import ( MCPError, MCPErrorCode, MCPInstallError, diff --git a/core/framework/runner/mcp_registry_cli.py b/core/framework/loader/mcp_registry_cli.py similarity index 99% rename from core/framework/runner/mcp_registry_cli.py rename to core/framework/loader/mcp_registry_cli.py index b84b59dc..ccaa4861 100644 --- a/core/framework/runner/mcp_registry_cli.py +++ b/core/framework/loader/mcp_registry_cli.py @@ -28,7 +28,7 @@ from typing import Any def _get_registry(base_path: Path | None = None): """Initialize and return an MCPRegistry instance.""" - from framework.runner.mcp_registry import MCPRegistry + from framework.loader.mcp_registry import MCPRegistry registry = MCPRegistry(base_path=base_path) registry.initialize() diff --git a/core/framework/runner/preload_validation.py b/core/framework/loader/preload_validation.py similarity index 98% rename from core/framework/runner/preload_validation.py rename to core/framework/loader/preload_validation.py index c04ceabf..9e76e3d4 100644 --- a/core/framework/runner/preload_validation.py +++ b/core/framework/loader/preload_validation.py @@ -11,8 +11,8 @@ from dataclasses import dataclass, field from typing import TYPE_CHECKING if TYPE_CHECKING: - from framework.graph.edge import GraphSpec - from framework.graph.node import NodeSpec + from framework.orchestrator.edge import GraphSpec + from framework.orchestrator.node import NodeSpec logger = logging.getLogger(__name__) diff --git a/core/framework/runner/protocol.py b/core/framework/loader/protocol.py similarity index 100% rename from core/framework/runner/protocol.py rename to core/framework/loader/protocol.py diff --git a/core/framework/runner/tool_registry.py b/core/framework/loader/tool_registry.py similarity index 97% rename from core/framework/runner/tool_registry.py rename to core/framework/loader/tool_registry.py index 5ea8154f..4c862e44 100644 --- a/core/framework/runner/tool_registry.py +++ b/core/framework/loader/tool_registry.py @@ -262,15 +262,21 @@ class ToolRegistry: is_error=False, ) + registry_ref = self + def executor(tool_use: ToolUse) -> ToolResult: - if tool_use.name not in self._tools: + # Check if credential files changed (lightweight dir listing). + # If new OAuth tokens appeared, restarts MCP servers to pick them up. + registry_ref.resync_mcp_servers_if_needed() + + if tool_use.name not in registry_ref._tools: return ToolResult( tool_use_id=tool_use.id, content=json.dumps({"error": f"Unknown tool: {tool_use.name}"}), is_error=True, ) - registered = self._tools[tool_use.name] + registered = registry_ref._tools[tool_use.name] try: result = registered.executor(tool_use.input) @@ -635,8 +641,8 @@ class ToolRegistry: Number of tools registered from this server """ try: - from framework.runner.mcp_client import MCPClient, MCPServerConfig - from framework.runner.mcp_connection_manager import MCPConnectionManager + from framework.loader.mcp_client import MCPClient, MCPServerConfig + from framework.loader.mcp_connection_manager import MCPConnectionManager # Build config object config = MCPServerConfig( @@ -883,7 +889,7 @@ class ToolRegistry: """Re-run ``mcp_registry.json`` resolution and register servers (post-resync).""" if self._mcp_registry_agent_path is None: return - from framework.runner.mcp_registry import MCPRegistry + from framework.loader.mcp_registry import MCPRegistry try: reg = MCPRegistry() @@ -922,6 +928,11 @@ class ToolRegistry: clients and re-loads them so the new subprocess picks up the fresh credentials. + Note: Individual credential TTL/refresh is handled by the MCP server + process internally -- it resolves tokens from the credential store + on every tool call, not at startup. This method only handles the case + where entirely new credential files appear. + Returns True if a resync was performed, False otherwise. """ if not self._mcp_clients or self._mcp_config_path is None: @@ -975,7 +986,7 @@ class ToolRegistry: server_name = self._mcp_client_servers.get(client_id, client.config.name) try: if client_id in self._mcp_managed_clients: - from framework.runner.mcp_connection_manager import MCPConnectionManager + from framework.loader.mcp_connection_manager import MCPConnectionManager MCPConnectionManager.get_instance().release(server_name) else: diff --git a/core/framework/orchestrator/__init__.py b/core/framework/orchestrator/__init__.py new file mode 100644 index 00000000..6ffc277a --- /dev/null +++ b/core/framework/orchestrator/__init__.py @@ -0,0 +1,27 @@ +"""Orchestrator layer -- how agents are composed via graphs. + +Lazy imports to avoid circular dependencies with graph/event_loop/*. +""" + + +def __getattr__(name: str): + if name in ("GraphContext",): + from framework.orchestrator.context import GraphContext + return GraphContext + if name in ("DEFAULT_MAX_TOKENS", "EdgeCondition", "EdgeSpec", "GraphSpec"): + from framework.orchestrator import edge as _e + return getattr(_e, name) + if name in ("Orchestrator", "ExecutionResult"): + from framework.orchestrator import orchestrator as _o + return getattr(_o, name) + if name in ("Constraint", "Goal", "GoalStatus", "SuccessCriterion"): + from framework.orchestrator import goal as _g + return getattr(_g, name) + if name in ("DataBuffer", "NodeContext", "NodeProtocol", "NodeResult", "NodeSpec"): + from framework.orchestrator import node as _n + return getattr(_n, name) + if name in ("NodeWorker", "Activation", "FanOutTag", "FanOutTracker", + "WorkerCompletion", "WorkerLifecycle"): + from framework.orchestrator import node_worker as _nw + return getattr(_nw, name) + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") diff --git a/core/framework/graph/checkpoint_config.py b/core/framework/orchestrator/checkpoint_config.py similarity index 100% rename from core/framework/graph/checkpoint_config.py rename to core/framework/orchestrator/checkpoint_config.py diff --git a/core/framework/graph/client_io.py b/core/framework/orchestrator/client_io.py similarity index 99% rename from core/framework/graph/client_io.py rename to core/framework/orchestrator/client_io.py index 992b5818..1fbc66c9 100644 --- a/core/framework/graph/client_io.py +++ b/core/framework/orchestrator/client_io.py @@ -16,7 +16,7 @@ from collections.abc import AsyncIterator from typing import TYPE_CHECKING if TYPE_CHECKING: - from framework.runtime.event_bus import EventBus + from framework.host.event_bus import EventBus logger = logging.getLogger(__name__) diff --git a/core/framework/graph/context.py b/core/framework/orchestrator/context.py similarity index 91% rename from core/framework/graph/context.py rename to core/framework/orchestrator/context.py index 5b4fba4d..381c0474 100644 --- a/core/framework/graph/context.py +++ b/core/framework/orchestrator/context.py @@ -13,10 +13,10 @@ import asyncio from dataclasses import dataclass, field from typing import Any -from framework.graph.edge import GraphSpec -from framework.graph.goal import Goal -from framework.graph.node import DataBuffer, NodeContext, NodeProtocol, NodeSpec -from framework.runtime.core import Runtime +from framework.orchestrator.edge import GraphSpec +from framework.orchestrator.goal import Goal +from framework.orchestrator.node import DataBuffer, NodeContext, NodeProtocol, NodeSpec +from framework.tracker.decision_tracker import DecisionTracker @dataclass @@ -26,7 +26,7 @@ class GraphContext: graph: GraphSpec goal: Goal buffer: DataBuffer - runtime: Runtime + runtime: DecisionTracker llm: Any # LLMProvider tools: list[Any] # list[Tool] tool_executor: Any # Callable @@ -106,7 +106,7 @@ def build_node_accounts_prompt( resolved = accounts_prompt if accounts_data and tool_provider_map: - from framework.graph.prompting import build_accounts_prompt + from framework.orchestrator.prompting import build_accounts_prompt filtered = build_accounts_prompt( accounts_data, @@ -125,11 +125,27 @@ def _resolve_available_tools( tools: list[Any], override_tools: list[Any] | None, ) -> list[Any]: - """Select tools available to the current node.""" + """Select tools available to the current node. + + Respects ``node_spec.tool_access_policy``: + - ``"all"`` -- all tools from the registry (no filtering). + - ``"explicit"`` -- only tools whose name appears in ``node_spec.tools``. + If the list is empty, **no tools** are given (default-deny). + - ``"none"`` -- no tools at all. + """ if override_tools is not None: return list(override_tools) + policy = getattr(node_spec, "tool_access_policy", "explicit") + + if policy == "none": + return [] + + if policy == "all": + return list(tools) + + # "explicit" (default): only tools named in node_spec.tools. if not node_spec.tools: return [] @@ -149,7 +165,7 @@ def _derive_input_data(buffer: DataBuffer, input_keys: list[str]) -> dict[str, A def build_node_context( *, - runtime: Runtime, + runtime: DecisionTracker, node_spec: NodeSpec, buffer: DataBuffer, goal: Goal, @@ -234,9 +250,6 @@ def build_node_context( execution_id=execution_id, run_id=run_id, stream_id=stream_id, - node_registry=node_registry or {}, - all_tools=list(all_tools or tools), - shared_node_registry=shared_node_registry or {}, dynamic_tools_provider=dynamic_tools_provider, dynamic_prompt_provider=dynamic_prompt_provider, dynamic_memory_provider=dynamic_memory_provider, @@ -308,9 +321,6 @@ def build_node_context_from_graph_context( execution_id=gc.execution_id, run_id=gc.run_id, stream_id=gc.stream_id, - node_registry=node_registry or gc.node_spec_registry, - all_tools=gc.tools, - shared_node_registry=gc.node_registry, dynamic_tools_provider=gc.dynamic_tools_provider, dynamic_prompt_provider=gc.dynamic_prompt_provider, dynamic_memory_provider=gc.dynamic_memory_provider, diff --git a/core/framework/graph/context_handoff.py b/core/framework/orchestrator/context_handoff.py similarity index 98% rename from core/framework/graph/context_handoff.py rename to core/framework/orchestrator/context_handoff.py index 69831506..0d9a7e54 100644 --- a/core/framework/graph/context_handoff.py +++ b/core/framework/orchestrator/context_handoff.py @@ -6,10 +6,10 @@ import logging from dataclasses import dataclass from typing import TYPE_CHECKING, Any -from framework.graph.conversation import _try_extract_key +from framework.agent_loop.conversation import _try_extract_key if TYPE_CHECKING: - from framework.graph.conversation import NodeConversation + from framework.agent_loop.conversation import NodeConversation from framework.llm.provider import LLMProvider logger = logging.getLogger(__name__) diff --git a/core/framework/graph/conversation_judge.py b/core/framework/orchestrator/conversation_judge.py similarity index 99% rename from core/framework/graph/conversation_judge.py rename to core/framework/orchestrator/conversation_judge.py index 298776b4..e5a57a06 100644 --- a/core/framework/graph/conversation_judge.py +++ b/core/framework/orchestrator/conversation_judge.py @@ -15,7 +15,7 @@ import logging from dataclasses import dataclass from typing import Any -from framework.graph.conversation import NodeConversation +from framework.agent_loop.conversation import NodeConversation from framework.llm.provider import LLMProvider logger = logging.getLogger(__name__) diff --git a/core/framework/graph/edge.py b/core/framework/orchestrator/edge.py similarity index 89% rename from core/framework/graph/edge.py rename to core/framework/orchestrator/edge.py index 284f66f8..a617edb9 100644 --- a/core/framework/graph/edge.py +++ b/core/framework/orchestrator/edge.py @@ -29,7 +29,7 @@ from typing import Any from pydantic import BaseModel, Field, model_validator -from framework.graph.safe_eval import safe_eval +from framework.orchestrator.safe_eval import safe_eval logger = logging.getLogger(__name__) @@ -538,13 +538,6 @@ class GraphSpec(BaseModel): for edge in self.get_outgoing_edges(current): to_visit.append(edge.target) - # Also mark sub-agents as reachable (they're invoked via delegate_to_sub_agent, not edges) - for node in self.nodes: - if node.id in reachable: - sub_agents = getattr(node, "sub_agents", []) or [] - for sub_agent_id in sub_agents: - reachable.add(sub_agent_id) - for node in self.nodes: if node.id not in reachable: # Skip if node is a pause node or entry point target @@ -583,48 +576,4 @@ class GraphSpec(BaseModel): else: seen_keys[key] = node_id - # GCU nodes must only be used as subagents - gcu_node_ids = {n.id for n in self.nodes if n.node_type == "gcu"} - if gcu_node_ids: - # GCU nodes must not be entry nodes - if self.entry_node in gcu_node_ids: - errors.append( - f"GCU node '{self.entry_node}' is used as entry node. " - "GCU nodes must only be used as subagents via delegate_to_sub_agent()." - ) - - # GCU nodes must not be terminal nodes - for term in self.terminal_nodes: - if term in gcu_node_ids: - errors.append( - f"GCU node '{term}' is used as terminal node. " - "GCU nodes must only be used as subagents." - ) - - # GCU nodes must not be connected via edges - for edge in self.edges: - if edge.source in gcu_node_ids: - errors.append( - f"GCU node '{edge.source}' is used as edge source (edge '{edge.id}'). " - "GCU nodes must only be used as subagents, not connected via edges." - ) - if edge.target in gcu_node_ids: - errors.append( - f"GCU node '{edge.target}' is used as edge target (edge '{edge.id}'). " - "GCU nodes must only be used as subagents, not connected via edges." - ) - - # GCU nodes must be referenced in at least one parent's sub_agents - referenced_subagents = set() - for node in self.nodes: - for sa_id in node.sub_agents or []: - referenced_subagents.add(sa_id) - - orphaned = gcu_node_ids - referenced_subagents - for nid in orphaned: - errors.append( - f"GCU node '{nid}' is not referenced in any node's sub_agents list. " - "GCU nodes must be declared as subagents of a parent node." - ) - return {"errors": errors, "warnings": warnings} diff --git a/core/framework/graph/files.py b/core/framework/orchestrator/files.py similarity index 100% rename from core/framework/graph/files.py rename to core/framework/orchestrator/files.py diff --git a/core/framework/graph/gcu.py b/core/framework/orchestrator/gcu.py similarity index 86% rename from core/framework/graph/gcu.py rename to core/framework/orchestrator/gcu.py index c336faf4..a68d2d11 100644 --- a/core/framework/graph/gcu.py +++ b/core/framework/orchestrator/gcu.py @@ -1,34 +1,14 @@ -"""GCU (browser automation) node type constants. +"""Browser automation best-practices prompt. -A ``gcu`` node is an ``event_loop`` node with two automatic enhancements: -1. A canonical browser best-practices system prompt is prepended. -2. All tools from the GCU MCP server are auto-included. +This module provides ``GCU_BROWSER_SYSTEM_PROMPT`` -- a canonical set of +browser automation guidelines that can be included in any node's system +prompt that uses browser tools from the gcu-tools MCP server. -No new ``NodeProtocol`` subclass — the ``gcu`` type is purely a declarative -signal processed by the runner and executor at setup time. +Browser tools are registered via the global MCP registry (gcu-tools). +Nodes that need browser access declare ``tools: {policy: "all"}`` in their +agent.json config. """ -# --------------------------------------------------------------------------- -# MCP server identity -# --------------------------------------------------------------------------- - -GCU_SERVER_NAME = "gcu-tools" -"""Name used to identify the GCU MCP server in ``mcp_servers.json``.""" - -GCU_MCP_SERVER_CONFIG: dict = { - "name": GCU_SERVER_NAME, - "transport": "stdio", - "command": "uv", - "args": ["run", "python", "-m", "gcu.server", "--stdio"], - "cwd": "../../tools", - "description": "GCU tools for browser automation", -} -"""Default stdio config for the GCU MCP server (relative to exports//).""" - -# --------------------------------------------------------------------------- -# Browser best-practices system prompt -# --------------------------------------------------------------------------- - GCU_BROWSER_SYSTEM_PROMPT = """\ # Browser Automation Best Practices diff --git a/core/framework/graph/goal.py b/core/framework/orchestrator/goal.py similarity index 100% rename from core/framework/graph/goal.py rename to core/framework/orchestrator/goal.py diff --git a/core/framework/graph/node.py b/core/framework/orchestrator/node.py similarity index 95% rename from core/framework/graph/node.py rename to core/framework/orchestrator/node.py index a430f8d2..6c474817 100644 --- a/core/framework/graph/node.py +++ b/core/framework/orchestrator/node.py @@ -25,7 +25,7 @@ from typing import Any from pydantic import BaseModel, Field from framework.llm.provider import LLMProvider, Tool -from framework.runtime.core import Runtime +from framework.tracker.decision_tracker import DecisionTracker logger = logging.getLogger(__name__) @@ -144,15 +144,19 @@ class NodeSpec(BaseModel): # For LLM nodes system_prompt: str | None = Field(default=None, description="System prompt for LLM nodes") tools: list[str] = Field(default_factory=list, description="Tool names this node can use") + tool_access_policy: str = Field( + default="explicit", + description=( + "Tool access policy for this node. " + "'all' = all tools from registry, " + "'explicit' = only tools listed in `tools` (default, recommended), " + "'none' = no tools at all." + ), + ) model: str | None = Field( default=None, description="Specific model to use (defaults to graph default)" ) - # For subagent delegation - sub_agents: list[str] = Field( - default_factory=list, - description="Node IDs that can be invoked as subagents from this node", - ) # For function nodes function: str | None = Field( default=None, description="Function name or path for function nodes" @@ -459,7 +463,7 @@ class NodeContext: """ # Core runtime - runtime: Runtime + runtime: DecisionTracker # Node identity node_id: str @@ -526,20 +530,6 @@ class NodeContext: # Falls back to node_id when not set (legacy / standalone executor). stream_id: str = "" - # Subagent mode - is_subagent_mode: bool = False # True when running as a subagent (prevents nested delegation) - report_callback: Any = None # async (message: str, data: dict | None) -> None - node_registry: dict[str, "NodeSpec"] = field(default_factory=dict) # For subagent lookup - - # Full tool catalog (unfiltered) — used by _execute_subagent to resolve - # subagent tools that aren't in the parent node's filtered available_tools. - all_tools: list[Tool] = field(default_factory=list) - - # Shared reference to the executor's node_registry — used by subagent - # escalation (_EscalationReceiver) to register temporary receivers that - # the inject_input() routing chain can find. - shared_node_registry: dict[str, Any] = field(default_factory=dict) - # Dynamic tool provider — when set, EventLoopNode rebuilds the tool # list from this callback at the start of each iteration. Used by # the queen to switch between building-mode and running-mode tools. diff --git a/core/framework/graph/worker_agent.py b/core/framework/orchestrator/node_worker.py similarity index 97% rename from core/framework/graph/worker_agent.py rename to core/framework/orchestrator/node_worker.py index ce9d8b4e..436096f9 100644 --- a/core/framework/graph/worker_agent.py +++ b/core/framework/orchestrator/node_worker.py @@ -19,15 +19,15 @@ from dataclasses import dataclass, field from enum import StrEnum from typing import Any -from framework.graph.context import GraphContext, build_node_context_from_graph_context -from framework.graph.edge import EdgeCondition, EdgeSpec -from framework.graph.node import ( +from framework.orchestrator.context import GraphContext, build_node_context_from_graph_context +from framework.orchestrator.edge import EdgeCondition, EdgeSpec +from framework.orchestrator.node import ( NodeContext, NodeProtocol, NodeResult, NodeSpec, ) -from framework.graph.validator import OutputValidator +from framework.orchestrator.validator import OutputValidator logger = logging.getLogger(__name__) @@ -109,7 +109,7 @@ class RetryState: # --------------------------------------------------------------------------- -class WorkerAgent: +class NodeWorker: """First-class autonomous worker for one node in the graph. Lifecycle: @@ -355,7 +355,7 @@ class WorkerAgent: # Only skip retries for actual EventLoopNode instances (they handle # retries internally). Custom NodeProtocol impls registered via # register_node should be retried by the executor. - from framework.graph.event_loop_node import EventLoopNode as _ELN + from framework.agent_loop.agent_loop import AgentLoop as _ELN if isinstance(node_impl, _ELN): max_retries = 0 @@ -603,10 +603,10 @@ class WorkerAgent: return self._node_impl # Auto-create EventLoopNode - if self.node_spec.node_type in ("event_loop", "gcu"): - from framework.graph.event_loop.types import LoopConfig - from framework.graph.event_loop_node import EventLoopNode - from framework.graph.node import warn_if_deprecated_client_facing + if self.node_spec.node_type == "event_loop": + from framework.agent_loop.internals.types import LoopConfig + from framework.agent_loop.agent_loop import AgentLoop + from framework.orchestrator.node import warn_if_deprecated_client_facing conv_store = None if gc.storage_path: @@ -619,7 +619,7 @@ class WorkerAgent: warn_if_deprecated_client_facing(self.node_spec) default_max_iter = 100 if self.node_spec.supports_direct_user_io() else 50 - node = EventLoopNode( + node = AgentLoop( event_bus=gc.event_bus, judge=None, config=LoopConfig( @@ -734,7 +734,7 @@ class WorkerAgent: if not next_spec or next_spec.node_type != "event_loop": return - from framework.graph.prompting import ( + from framework.orchestrator.prompting import ( TransitionSpec, build_narrative, build_system_prompt_for_node_context, diff --git a/core/framework/graph/executor.py b/core/framework/orchestrator/orchestrator.py similarity index 97% rename from core/framework/graph/executor.py rename to core/framework/orchestrator/orchestrator.py index c2015744..666b021a 100644 --- a/core/framework/graph/executor.py +++ b/core/framework/orchestrator/orchestrator.py @@ -16,21 +16,21 @@ from dataclasses import dataclass, field from pathlib import Path from typing import Any -from framework.graph.checkpoint_config import CheckpointConfig -from framework.graph.context import GraphContext, build_node_context -from framework.graph.conversation import LEGACY_RUN_ID -from framework.graph.edge import EdgeCondition, EdgeSpec, GraphSpec -from framework.graph.goal import Goal -from framework.graph.node import ( +from framework.orchestrator.checkpoint_config import CheckpointConfig +from framework.orchestrator.context import GraphContext, build_node_context +from framework.agent_loop.conversation import LEGACY_RUN_ID +from framework.orchestrator.edge import EdgeCondition, EdgeSpec, GraphSpec +from framework.orchestrator.goal import Goal +from framework.orchestrator.node import ( DataBuffer, NodeProtocol, NodeResult, NodeSpec, ) -from framework.graph.validator import OutputValidator +from framework.orchestrator.validator import OutputValidator from framework.llm.provider import LLMProvider, Tool from framework.observability import set_trace_context -from framework.runtime.core import Runtime +from framework.tracker.decision_tracker import DecisionTracker from framework.schemas.checkpoint import Checkpoint from framework.storage.checkpoint_store import CheckpointStore from framework.utils.io import atomic_write @@ -112,7 +112,7 @@ class ParallelExecutionConfig: branch_timeout_seconds: float = 300.0 -class GraphExecutor: +class Orchestrator: """ Executes agent graphs. @@ -133,7 +133,7 @@ class GraphExecutor: def __init__( self, - runtime: Runtime, + runtime: DecisionTracker, llm: LLMProvider | None = None, tools: list[Tool] | None = None, tool_executor: Callable | None = None, @@ -165,7 +165,7 @@ class GraphExecutor: Initialize the executor. Args: - runtime: Runtime for decision logging + runtime: DecisionTracker for decision logging llm: LLM provider for LLM nodes tools: Available tools tool_executor: Function to execute tools @@ -202,7 +202,7 @@ class GraphExecutor: self.validator = OutputValidator() self.logger = logging.getLogger(__name__) self.logger.debug( - "[GraphExecutor.__init__] Created with" + "[Orchestrator.__init__] Created with" " stream_id=%s, execution_id=%s," " initial node_registry keys: %s", stream_id, @@ -361,8 +361,8 @@ class GraphExecutor: Uses the same recursive binary-search splitting as EventLoopNode. """ - from framework.graph.conversation import extract_tool_call_history - from framework.graph.event_loop_node import _is_context_too_large_error + from framework.agent_loop.conversation import extract_tool_call_history + from framework.agent_loop.agent_loop import _is_context_too_large_error if _depth > self._PHASE_LLM_MAX_DEPTH: raise RuntimeError("Phase LLM compaction recursion limit") @@ -690,7 +690,7 @@ class GraphExecutor: # and spillover files share the same session-scoped directory. _ctx_token = None if self._storage_path: - from framework.runner.tool_registry import ToolRegistry + from framework.loader.tool_registry import ToolRegistry _ctx_token = ToolRegistry.set_execution_context( data_dir=str(self._storage_path / "data"), @@ -712,13 +712,12 @@ class GraphExecutor: finally: if _ctx_token is not None: - from framework.runner.tool_registry import ToolRegistry + from framework.loader.tool_registry import ToolRegistry ToolRegistry.reset_execution_context(_ctx_token) VALID_NODE_TYPES = { "event_loop", - "gcu", } # Node types removed in v0.5 — provide migration guidance REMOVED_NODE_TYPES = { @@ -736,11 +735,11 @@ class GraphExecutor: # Check registry first if node_spec.id in self.node_registry: logger.debug( - "[GraphExecutor._get_node_implementation] Found node '%s' in registry", node_spec.id + "[Orchestrator._get_node_implementation] Found node '%s' in registry", node_spec.id ) return self.node_registry[node_spec.id] logger.debug( - "[GraphExecutor._get_node_implementation]" + "[Orchestrator._get_node_implementation]" " Node '%s' not in registry (keys: %s)," " creating new", node_spec.id, @@ -764,10 +763,10 @@ class GraphExecutor: ) # Create based on type - if node_spec.node_type in ("event_loop", "gcu"): + if node_spec.node_type == "event_loop": # Auto-create EventLoopNode with sensible defaults. # Custom configs can still be pre-registered via node_registry. - from framework.graph.event_loop_node import EventLoopNode, LoopConfig + from framework.agent_loop.agent_loop import AgentLoop, LoopConfig # Create a FileConversationStore if a storage path is available conv_store = None @@ -787,13 +786,13 @@ class GraphExecutor: if self._storage_path: spillover = str(self._storage_path / "data") - from framework.graph.node import warn_if_deprecated_client_facing + from framework.orchestrator.node import warn_if_deprecated_client_facing warn_if_deprecated_client_facing(node_spec) lc = self._loop_config default_max_iter = 100 if node_spec.supports_direct_user_io() else 50 - node = EventLoopNode( + node = AgentLoop( event_bus=self._event_bus, judge=None, # implicit judge: accept when output_keys are filled config=LoopConfig( @@ -812,7 +811,7 @@ class GraphExecutor: # Cache so inject_event() is reachable for queen interaction and escalation routing self.node_registry[node_spec.id] = node logger.debug( - "[GraphExecutor._get_node_implementation]" + "[Orchestrator._get_node_implementation]" " Cached node '%s' in node_registry," " registry now has keys: %s", node_spec.id, @@ -998,10 +997,10 @@ class GraphExecutor: branch_impl = self._get_node_implementation(node_spec, graph.cleanup_llm_model) effective_max_retries = node_spec.max_retries - # Only override for actual EventLoopNode instances, not custom NodeProtocol impls - from framework.graph.event_loop_node import EventLoopNode + # Only override for actual AgentLoop instances, not custom NodeProtocol impls + from framework.agent_loop.agent_loop import AgentLoop as _AgentLoop # noqa: F811 - if isinstance(branch_impl, EventLoopNode) and effective_max_retries > 1: + if isinstance(branch_impl, _AgentLoop) and effective_max_retries > 1: self.logger.warning( f"EventLoopNode '{node_spec.id}' has " f"max_retries={effective_max_retries}. Overriding " @@ -1042,9 +1041,6 @@ class GraphExecutor: execution_id=self._execution_id, run_id=self._run_id, stream_id=self._stream_id, - node_registry=node_registry, - all_tools=self.tools, - shared_node_registry=self.node_registry, dynamic_tools_provider=self.dynamic_tools_provider, dynamic_prompt_provider=self.dynamic_prompt_provider, dynamic_memory_provider=self.dynamic_memory_provider, @@ -1293,14 +1289,14 @@ class GraphExecutor: Replaces the imperative while-loop with autonomous workers that self-activate based on edge conditions and fan-out tracking. """ - from framework.graph.worker_agent import ( + from framework.orchestrator.node_worker import ( Activation, FanOutTag, - WorkerAgent, + NodeWorker, WorkerCompletion, WorkerLifecycle, ) - from framework.runtime.event_bus import AgentEvent, EventType + from framework.host.event_bus import AgentEvent, EventType # Build shared graph context gc = GraphContext( @@ -1339,9 +1335,9 @@ class GraphExecutor: ) # Create one WorkerAgent per node - workers: dict[str, WorkerAgent] = {} + workers: dict[str, NodeWorker] = {} for node_spec in graph.nodes: - workers[node_spec.id] = WorkerAgent(node_spec=node_spec, graph_context=gc) + workers[node_spec.id] = NodeWorker(node_spec=node_spec, graph_context=gc) # Identify entry workers (graph entry node, not based on edge count) # A node can be the entry point AND have incoming feedback edges. @@ -1442,7 +1438,7 @@ class GraphExecutor: def _route_activation( activation: Activation, - workers_map: dict[str, WorkerAgent], + workers_map: dict[str, NodeWorker], pending_tasks_map: dict[str, asyncio.Task], *, has_event_subscription: bool, diff --git a/core/framework/graph/prompt_composer.py b/core/framework/orchestrator/prompt_composer.py similarity index 94% rename from core/framework/graph/prompt_composer.py rename to core/framework/orchestrator/prompt_composer.py index b83c047e..92bbaca0 100644 --- a/core/framework/graph/prompt_composer.py +++ b/core/framework/orchestrator/prompt_composer.py @@ -9,7 +9,7 @@ import json from pathlib import Path from typing import TYPE_CHECKING -from framework.graph.prompting import ( +from framework.orchestrator.prompting import ( EXECUTION_SCOPE_PREAMBLE, TransitionSpec, build_accounts_prompt, @@ -19,7 +19,7 @@ from framework.graph.prompting import ( ) if TYPE_CHECKING: - from framework.graph.node import DataBuffer, NodeSpec + from framework.orchestrator.node import DataBuffer, NodeSpec _with_datetime = stamp_prompt_datetime @@ -36,7 +36,7 @@ def compose_system_prompt( node_type_preamble: str | None = None, ) -> str: """Compatibility wrapper for the legacy function signature.""" - from framework.graph.prompting import NodePromptSpec + from framework.orchestrator.prompting import NodePromptSpec spec = NodePromptSpec( identity_prompt=identity_prompt or "", @@ -66,7 +66,6 @@ def compose_system_prompt( protocols_prompt=spec.protocols_prompt, node_type=spec.node_type, output_keys=spec.output_keys, - is_subagent_mode=spec.is_subagent_mode, ) return build_system_prompt(spec) @@ -135,7 +134,7 @@ def build_transition_marker( ) -from framework.graph.prompting import build_transition_message # noqa: E402 +from framework.orchestrator.prompting import build_transition_message # noqa: E402 __all__ = [ "EXECUTION_SCOPE_PREAMBLE", diff --git a/core/framework/graph/prompting.py b/core/framework/orchestrator/prompting.py similarity index 95% rename from core/framework/graph/prompting.py rename to core/framework/orchestrator/prompting.py index 072abf83..b76faa9b 100644 --- a/core/framework/graph/prompting.py +++ b/core/framework/orchestrator/prompting.py @@ -12,8 +12,8 @@ from datetime import datetime from typing import TYPE_CHECKING, Any if TYPE_CHECKING: - from framework.graph.edge import GraphSpec - from framework.graph.node import DataBuffer + from framework.orchestrator.edge import GraphSpec + from framework.orchestrator.node import DataBuffer # Injected into every worker node's system prompt so the LLM understands @@ -40,7 +40,6 @@ class NodePromptSpec: memory_prompt: str = "" node_type: str = "event_loop" output_keys: tuple[str, ...] = () - is_subagent_mode: bool = False @dataclass(frozen=True) @@ -165,7 +164,6 @@ def build_prompt_spec_from_node_context( memory_prompt=resolved_memory_prompt, node_type=ctx.node_spec.node_type, output_keys=tuple(ctx.node_spec.output_keys or ()), - is_subagent_mode=bool(getattr(ctx, "is_subagent_mode", False)), ) @@ -195,13 +193,10 @@ def build_system_prompt(spec: NodePromptSpec) -> str: if spec.narrative: parts.append(f"\n--- Context (what has happened so far) ---\n{spec.narrative}") - if not spec.is_subagent_mode and spec.node_type in ("event_loop", "gcu") and spec.output_keys: + if not False and spec.node_type == "event_loop" and spec.output_keys: parts.append(f"\n{EXECUTION_SCOPE_PREAMBLE}") - if spec.node_type == "gcu": - from framework.graph.gcu import GCU_BROWSER_SYSTEM_PROMPT - parts.append(f"\n{GCU_BROWSER_SYSTEM_PROMPT}") if spec.focus_prompt: parts.append(f"\n--- Current Focus ---\n{spec.focus_prompt}") diff --git a/core/framework/graph/safe_eval.py b/core/framework/orchestrator/safe_eval.py similarity index 100% rename from core/framework/graph/safe_eval.py rename to core/framework/orchestrator/safe_eval.py diff --git a/core/framework/graph/validator.py b/core/framework/orchestrator/validator.py similarity index 100% rename from core/framework/graph/validator.py rename to core/framework/orchestrator/validator.py diff --git a/core/framework/pipeline/__init__.py b/core/framework/pipeline/__init__.py new file mode 100644 index 00000000..da2793a7 --- /dev/null +++ b/core/framework/pipeline/__init__.py @@ -0,0 +1,32 @@ +"""Pipeline middleware for the agent runtime. + +Stages run in order when :meth:`AgentRuntime.trigger` receives a request. +Each stage can pass the context through, transform the input data, or reject +the request entirely. This is the runtime-level analogue of AstrBot's +pipeline architecture and lets operators compose rate limiting, validation, +cost guards, and custom pre/post-processing without patching core code. +""" + +from framework.pipeline.registry import ( + build_pipeline_from_config, + build_stage, + register, +) +from framework.pipeline.runner import PipelineRunner +from framework.pipeline.stage import ( + PipelineContext, + PipelineRejectedError, + PipelineResult, + PipelineStage, +) + +__all__ = [ + "PipelineContext", + "PipelineRejectedError", + "PipelineResult", + "PipelineRunner", + "PipelineStage", + "build_pipeline_from_config", + "build_stage", + "register", +] diff --git a/core/framework/pipeline/execution_middleware.py b/core/framework/pipeline/execution_middleware.py new file mode 100644 index 00000000..cdebfc99 --- /dev/null +++ b/core/framework/pipeline/execution_middleware.py @@ -0,0 +1,44 @@ +"""Execution-level middleware protocol. + +Unlike :class:`PipelineStage` (which gates ``AgentHost.trigger()`` at the +request level), execution middleware runs at the start of **every** execution +attempt inside ``ExecutionManager._run_execution()`` -- including resurrection +retries. + +Use this for concerns that must re-evaluate per attempt: +- Cost tracking (charge per attempt, not per trigger) +- Tool scoping (different tools on retry) +- Checkpoint config overrides +- Per-execution logging/tracing setup +""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from typing import Any + + +@dataclass +class ExecutionContext: + """Context passed to execution middleware.""" + + execution_id: str + stream_id: str + run_id: str + input_data: dict[str, Any] + session_state: dict[str, Any] | None = None + attempt: int = 1 + metadata: dict[str, Any] = field(default_factory=dict) + + +class ExecutionMiddleware(ABC): + """Base class for per-execution middleware.""" + + @abstractmethod + async def on_execution_start(self, ctx: ExecutionContext) -> ExecutionContext: + """Called before each execution attempt (including resurrections). + + Modify and return *ctx* to transform execution parameters. + Raise to abort the execution. + """ diff --git a/core/framework/pipeline/registry.py b/core/framework/pipeline/registry.py new file mode 100644 index 00000000..f46f32c2 --- /dev/null +++ b/core/framework/pipeline/registry.py @@ -0,0 +1,107 @@ +"""Pipeline stage registry -- maps type names to stage classes. + +Stages self-register via the ``@register`` decorator. The +``build_pipeline_from_config`` function reads a declarative config +(from ``~/.hive/configuration.json`` or ``agent.json``) and +instantiates the corresponding stage objects. + +Example config:: + + { + "pipeline": { + "stages": [ + {"type": "rate_limit", "order": 200, "config": {"max_requests_per_minute": 60}}, + {"type": "cost_guard", "order": 300, "config": {"max_cost_per_request": 0.50}} + ] + } + } +""" + +from __future__ import annotations + +import logging +from typing import Any + +from framework.pipeline.runner import PipelineRunner +from framework.pipeline.stage import PipelineStage + +logger = logging.getLogger(__name__) + +_STAGE_REGISTRY: dict[str, type[PipelineStage]] = {} + + +def register(name: str): + """Decorator to register a pipeline stage class by type name. + + Usage:: + + @register("rate_limit") + class RateLimitStage(PipelineStage): + ... + """ + + def decorator(cls: type[PipelineStage]) -> type[PipelineStage]: + _STAGE_REGISTRY[name] = cls + return cls + + return decorator + + +def get_registered_stages() -> dict[str, type[PipelineStage]]: + """Return a copy of the stage registry.""" + return dict(_STAGE_REGISTRY) + + +def build_stage(spec: dict[str, Any]) -> PipelineStage: + """Instantiate a single stage from a config spec. + + Args: + spec: Dict with ``type`` (required), ``order`` (optional), + and ``config`` (optional kwargs dict). + + Raises: + KeyError: If the stage type is not registered. + """ + stage_type = spec["type"] + if stage_type not in _STAGE_REGISTRY: + available = ", ".join(sorted(_STAGE_REGISTRY)) or "(none)" + raise KeyError( + f"Unknown pipeline stage type '{stage_type}'. " + f"Available: {available}" + ) + cls = _STAGE_REGISTRY[stage_type] + config = spec.get("config", {}) + stage = cls(**config) + if "order" in spec: + stage.order = spec["order"] + return stage + + +def build_pipeline_from_config( + stages_config: list[dict[str, Any]], +) -> PipelineRunner: + """Build a ``PipelineRunner`` from a declarative stages list. + + Each entry is ``{"type": "...", "order": N, "config": {...}}``. + """ + # Import built-in stages so they self-register + _ensure_builtins_registered() + + stages = [build_stage(s) for s in stages_config] + return PipelineRunner(stages) + + +def _ensure_builtins_registered() -> None: + """Import built-in stage modules so their ``@register`` decorators fire.""" + if _STAGE_REGISTRY: + return # already populated + try: + import framework.pipeline.stages.cost_guard # noqa: F401 + import framework.pipeline.stages.credential_resolver # noqa: F401 + import framework.pipeline.stages.input_validation # noqa: F401 + import framework.pipeline.stages.llm_provider # noqa: F401 + import framework.pipeline.stages.mcp_registry # noqa: F401 + import framework.pipeline.stages.rate_limit # noqa: F401 + import framework.pipeline.stages.skill_registry # noqa: F401 + except ImportError: + pass diff --git a/core/framework/pipeline/runner.py b/core/framework/pipeline/runner.py new file mode 100644 index 00000000..7d05deb9 --- /dev/null +++ b/core/framework/pipeline/runner.py @@ -0,0 +1,111 @@ +"""Pipeline runner -- executes registered stages in order.""" + +from __future__ import annotations + +import logging +from typing import Any + +from framework.pipeline.stage import ( + PipelineContext, + PipelineRejectedError, + PipelineStage, +) + +logger = logging.getLogger(__name__) + + +class PipelineRunner: + """Executes a list of :class:`PipelineStage` instances in ``order``. + + The runner is the orchestration layer that :class:`AgentRuntime` calls + on every trigger. Stages execute in ascending ``order`` (ties broken by + registration order). A stage returning ``reject`` short-circuits the + pipeline and causes the trigger to raise :class:`PipelineRejectedError`. + """ + + def __init__(self, stages: list[PipelineStage] | None = None) -> None: + self._stages: list[PipelineStage] = sorted(stages or [], key=lambda s: s.order) + + @property + def stages(self) -> list[PipelineStage]: + return list(self._stages) + + def add_stage(self, stage: PipelineStage) -> None: + """Add a stage after construction (for dynamic registration).""" + self._stages.append(stage) + self._stages.sort(key=lambda s: s.order) + + async def initialize_all(self) -> None: + """Call ``initialize`` on every registered stage.""" + for stage in self._stages: + name = stage.__class__.__name__ + logger.info("[pipeline] Initializing %s (order=%d)", name, stage.order) + await stage.initialize() + logger.info("[pipeline] %s initialized", name) + if self._stages: + logger.info( + "[pipeline] Ready: %d stages [%s]", + len(self._stages), + " -> ".join(s.__class__.__name__ for s in self._stages), + ) + + async def run(self, ctx: PipelineContext) -> PipelineContext: + """Run all stages. Raises ``PipelineRejectedError`` on rejection. + + Returns the (possibly transformed) context. + """ + if not self._stages: + return ctx + import time + + pipeline_start = time.perf_counter() + logger.info( + "[pipeline] Running %d stages for entry_point=%s", + len(self._stages), + ctx.entry_point_id, + ) + for stage in self._stages: + stage_name = stage.__class__.__name__ + t0 = time.perf_counter() + result = await stage.process(ctx) + elapsed_ms = (time.perf_counter() - t0) * 1000 + if result.action == "reject": + reason = result.rejection_reason or "(no reason given)" + logger.warning( + "[pipeline] REJECTED by %s (%.1fms): %s", + stage_name, elapsed_ms, reason, + ) + raise PipelineRejectedError(stage_name, reason) + if result.action == "transform": + logger.info( + "[pipeline] %s TRANSFORMED input (%.1fms)", + stage_name, elapsed_ms, + ) + if result.input_data is not None: + ctx.input_data = result.input_data + else: + logger.info( + "[pipeline] %s passed (%.1fms)", + stage_name, elapsed_ms, + ) + total_ms = (time.perf_counter() - pipeline_start) * 1000 + logger.info("[pipeline] Complete (%.1fms total)", total_ms) + return ctx + + async def run_post(self, ctx: PipelineContext, result: Any) -> Any: + """Run all stages' ``post_process`` hooks in order. + + Each stage can transform the result; the final value is returned. + Exceptions are logged and swallowed -- post-processing must not + break a successful execution. + """ + current = result + for stage in self._stages: + try: + current = await stage.post_process(ctx, current) + except Exception: + logger.exception( + "Pipeline post_process raised in %s; continuing with previous result", + stage.__class__.__name__, + ) + return current diff --git a/core/framework/pipeline/stage.py b/core/framework/pipeline/stage.py new file mode 100644 index 00000000..e250189c --- /dev/null +++ b/core/framework/pipeline/stage.py @@ -0,0 +1,77 @@ +"""Pipeline stage base class and request/response types.""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from typing import Any, Literal + + +class PipelineRejectedError(Exception): + """Raised by ``AgentHost.trigger`` when a stage rejects the request.""" + + def __init__(self, stage_name: str, reason: str) -> None: + super().__init__(f"Pipeline rejected by {stage_name}: {reason}") + self.stage_name = stage_name + self.reason = reason + + +@dataclass +class PipelineContext: + """Carries request data through the pipeline.""" + + entry_point_id: str + input_data: dict[str, Any] + correlation_id: str | None = None + session_state: dict[str, Any] | None = None + metadata: dict[str, Any] = field(default_factory=dict) + + +@dataclass +class PipelineResult: + """Outcome of a stage's ``process`` call.""" + + action: Literal["continue", "reject", "transform"] = "continue" + input_data: dict[str, Any] | None = None + rejection_reason: str | None = None + + +class PipelineStage(ABC): + """Base class for all middleware stages. + + Infrastructure stages (LLM, MCP, credentials, skills) set typed + attributes during ``initialize()`` that the host reads after all + stages have initialized. Request-level stages (rate limit, input + validation, cost guard) implement ``process()``. + + Attributes set by infrastructure stages: + llm: LLM provider instance (set by LlmProviderStage) + tool_registry: ToolRegistry with discovered MCP tools (set by McpRegistryStage) + accounts_prompt: Connected accounts system prompt block (set by CredentialResolverStage) + accounts_data: Raw account info list (set by CredentialResolverStage) + tool_provider_map: Tool name -> provider mapping (set by CredentialResolverStage) + skills_manager: SkillsManager instance (set by SkillRegistryStage) + """ + + order: int = 100 + + # Infrastructure stage outputs -- typed so _apply_pipeline_results + # doesn't need hasattr() sniffing. + llm: Any = None + tool_registry: Any = None + accounts_prompt: str = "" + accounts_data: list[dict] | None = None + tool_provider_map: dict[str, str] | None = None + skills_manager: Any = None + + async def initialize(self) -> None: + """Called once when the runtime starts.""" + return None + + @abstractmethod + async def process(self, ctx: PipelineContext) -> PipelineResult: + """Process the incoming request.""" + + async def post_process(self, ctx: PipelineContext, result: Any) -> Any: + """Optional post-execution hook. Default: pass-through.""" + return result diff --git a/core/framework/pipeline/stages/__init__.py b/core/framework/pipeline/stages/__init__.py new file mode 100644 index 00000000..6a9105be --- /dev/null +++ b/core/framework/pipeline/stages/__init__.py @@ -0,0 +1,19 @@ +"""Built-in pipeline stages.""" + +from framework.pipeline.stages.cost_guard import CostGuardStage +from framework.pipeline.stages.credential_resolver import CredentialResolverStage +from framework.pipeline.stages.input_validation import InputValidationStage +from framework.pipeline.stages.llm_provider import LlmProviderStage +from framework.pipeline.stages.mcp_registry import McpRegistryStage +from framework.pipeline.stages.rate_limit import RateLimitStage +from framework.pipeline.stages.skill_registry import SkillRegistryStage + +__all__ = [ + "CostGuardStage", + "CredentialResolverStage", + "InputValidationStage", + "LlmProviderStage", + "McpRegistryStage", + "RateLimitStage", + "SkillRegistryStage", +] diff --git a/core/framework/pipeline/stages/cost_guard.py b/core/framework/pipeline/stages/cost_guard.py new file mode 100644 index 00000000..4850fe3b --- /dev/null +++ b/core/framework/pipeline/stages/cost_guard.py @@ -0,0 +1,35 @@ +"""Cost guard stage -- reject requests over a pre-flight budget.""" + +from __future__ import annotations + +from framework.pipeline.registry import register +from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage + + +@register("cost_guard") +class CostGuardStage(PipelineStage): + """Reject requests whose estimated cost exceeds the per-request budget. + + The cost estimate must be populated in ``ctx.metadata["estimated_cost"]`` + by an earlier stage (or by the caller). When no estimate is present, + the stage passes through. + """ + + order = 300 + + def __init__(self, max_cost_per_request: float = 1.0) -> None: + self._budget = max_cost_per_request + + async def process(self, ctx: PipelineContext) -> PipelineResult: + estimated = ctx.metadata.get("estimated_cost") + if estimated is None: + return PipelineResult(action="continue") + if estimated > self._budget: + return PipelineResult( + action="reject", + rejection_reason=( + f"Estimated cost ${estimated:.4f} exceeds budget " + f"${self._budget:.4f}" + ), + ) + return PipelineResult(action="continue") diff --git a/core/framework/pipeline/stages/credential_resolver.py b/core/framework/pipeline/stages/credential_resolver.py new file mode 100644 index 00000000..b76df37f --- /dev/null +++ b/core/framework/pipeline/stages/credential_resolver.py @@ -0,0 +1,58 @@ +"""Credential resolver pipeline stage. + +Resolves connected accounts at startup. Individual credential TTL/refresh +is handled by MCP server processes internally -- they resolve tokens from +the credential store on every tool call. +""" + +from __future__ import annotations + +import logging +from typing import Any + +from framework.pipeline.registry import register +from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage + +logger = logging.getLogger(__name__) + + +@register("credential_resolver") +class CredentialResolverStage(PipelineStage): + """Resolve connected accounts for system prompt injection.""" + + order = 40 + + def __init__(self, credential_store: Any = None, **kwargs: Any) -> None: + self._credential_store = credential_store + self.accounts_prompt = "" + self.accounts_data: list[dict] | None = None + self.tool_provider_map: dict[str, str] | None = None + + async def initialize(self) -> None: + try: + from aden_tools.credentials.store_adapter import ( + CredentialStoreAdapter, + ) + from framework.orchestrator.prompting import build_accounts_prompt + + if self._credential_store is not None: + adapter = CredentialStoreAdapter(store=self._credential_store) + else: + adapter = CredentialStoreAdapter.default() + self.accounts_data = adapter.get_all_account_info() + self.tool_provider_map = adapter.get_tool_provider_map() + if self.accounts_data: + self.accounts_prompt = build_accounts_prompt( + self.accounts_data, self.tool_provider_map, + ) + logger.info( + "[pipeline] CredentialResolverStage: %d accounts", + len(self.accounts_data or []), + ) + except Exception: + logger.debug( + "Credential resolution failed (non-fatal)", exc_info=True, + ) + + async def process(self, ctx: PipelineContext) -> PipelineResult: + return PipelineResult(action="continue") diff --git a/core/framework/pipeline/stages/input_validation.py b/core/framework/pipeline/stages/input_validation.py new file mode 100644 index 00000000..3a025c0e --- /dev/null +++ b/core/framework/pipeline/stages/input_validation.py @@ -0,0 +1,47 @@ +"""Input validation stage. + +Rejects requests whose ``input_data`` does not match the entry point's +declared input schema. Uses a user-provided schema map: +``{entry_point_id: {required_key: expected_type, ...}}``. +""" + +from __future__ import annotations + +from framework.pipeline.registry import register +from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage + + +@register("input_validation") +class InputValidationStage(PipelineStage): + """Validate ``input_data`` against per-entry-point schemas. + + The schema is a simple dict mapping key -> expected Python type. + For richer validation, substitute a Pydantic-based stage. + """ + + order = 100 + + def __init__(self, schemas: dict[str, dict[str, type]] | None = None) -> None: + self._schemas = schemas or {} + + async def process(self, ctx: PipelineContext) -> PipelineResult: + schema = self._schemas.get(ctx.entry_point_id) + if not schema: + return PipelineResult(action="continue") + + for key, expected_type in schema.items(): + if key not in ctx.input_data: + return PipelineResult( + action="reject", + rejection_reason=f"Missing required input key: '{key}'", + ) + value = ctx.input_data[key] + if not isinstance(value, expected_type): + return PipelineResult( + action="reject", + rejection_reason=( + f"Input key '{key}' has type {type(value).__name__}, " + f"expected {expected_type.__name__}" + ), + ) + return PipelineResult(action="continue") diff --git a/core/framework/pipeline/stages/llm_provider.py b/core/framework/pipeline/stages/llm_provider.py new file mode 100644 index 00000000..899342f2 --- /dev/null +++ b/core/framework/pipeline/stages/llm_provider.py @@ -0,0 +1,95 @@ +"""LLM provider pipeline stage. + +Resolves the LLM provider from global config. This is the ONLY place +the LLM gets created for worker agents. +""" + +from __future__ import annotations + +import logging +from typing import Any + +from framework.pipeline.registry import register +from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage + +logger = logging.getLogger(__name__) + + +@register("llm_provider") +class LlmProviderStage(PipelineStage): + """Resolve LLM provider and make it available.""" + + order = 10 + + def __init__( + self, + model: str | None = None, + mock_mode: bool = False, + llm: Any = None, + **kwargs: Any, + ) -> None: + self._model = model + self._mock_mode = mock_mode + self.llm = llm # Pre-injected LLM (e.g. from session) + + async def initialize(self) -> None: + if self.llm is not None: + return # Already injected + + from framework.config import ( + get_api_key, + get_api_keys, + get_hive_config, + get_preferred_model, + ) + + model = self._model or get_preferred_model() + + if self._mock_mode: + from framework.llm.mock import MockLLMProvider + + self.llm = MockLLMProvider(model=model) + return + + config = get_hive_config() + llm_config = config.get("llm", {}) + api_base = llm_config.get("api_base") + + # Check for Antigravity (special provider) + if llm_config.get("use_antigravity_subscription"): + try: + from framework.llm.antigravity import AntigravityProvider + + provider = AntigravityProvider(model=model) + if provider.has_credentials(): + self.llm = provider + logger.info("[pipeline] LlmProviderStage: Antigravity") + return + except Exception: + pass + + from framework.llm.litellm import LiteLLMProvider + + api_key = get_api_key() + api_keys = get_api_keys() + + if api_keys and len(api_keys) > 1: + self.llm = LiteLLMProvider( + model=model, api_keys=api_keys, api_base=api_base, + ) + elif api_key: + extra = {} + if api_key.startswith("sk-ant-oat"): + extra["extra_headers"] = { + "authorization": f"Bearer {api_key}" + } + self.llm = LiteLLMProvider( + model=model, api_key=api_key, api_base=api_base, **extra, + ) + else: + self.llm = LiteLLMProvider(model=model, api_base=api_base) + + logger.info("[pipeline] LlmProviderStage: %s", model) + + async def process(self, ctx: PipelineContext) -> PipelineResult: + return PipelineResult(action="continue") diff --git a/core/framework/pipeline/stages/mcp_registry.py b/core/framework/pipeline/stages/mcp_registry.py new file mode 100644 index 00000000..989cfd98 --- /dev/null +++ b/core/framework/pipeline/stages/mcp_registry.py @@ -0,0 +1,92 @@ +"""MCP registry pipeline stage. + +Resolves MCP server references from the agent config against the global +registry and registers tools. This is the ONLY place MCP tools get loaded. +""" + +from __future__ import annotations + +import logging +from dataclasses import asdict +from pathlib import Path +from typing import Any + +from framework.pipeline.registry import register +from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage + +logger = logging.getLogger(__name__) + + +@register("mcp_registry") +class McpRegistryStage(PipelineStage): + """Resolve MCP tools from the global registry.""" + + order = 50 + + def __init__( + self, + server_refs: list[dict[str, Any]] | None = None, + agent_path: str | Path | None = None, + tool_registry: Any = None, + **kwargs: Any, + ) -> None: + self._server_refs = server_refs or [] + self._agent_path = Path(agent_path) if agent_path else None + self._tool_registry = tool_registry + + async def initialize(self) -> None: + """Connect to MCP servers and discover tools.""" + if self._tool_registry is None: + from framework.loader.tool_registry import ToolRegistry + + self._tool_registry = ToolRegistry() + + from framework.loader.mcp_registry import MCPRegistry + + registry = MCPRegistry() + mcp_loaded = False + + # 1. From agent.json mcp_servers refs + if self._server_refs: + names = [ref["name"] for ref in self._server_refs if ref.get("name")] + if names: + configs = registry.resolve_for_agent(include=names) + if configs: + self._tool_registry.load_registry_servers( + [asdict(c) for c in configs] + ) + mcp_loaded = True + logger.info( + "[pipeline] McpRegistryStage: loaded %d servers: %s", + len(configs), + names, + ) + + # 2. Legacy: mcp_servers.json + if not mcp_loaded and self._agent_path: + mcp_json = self._agent_path / "mcp_servers.json" + if mcp_json.exists(): + self._tool_registry.load_mcp_config(mcp_json) + mcp_loaded = True + + # 3. Fallback: all servers from global registry + if not mcp_loaded: + configs = registry.resolve_for_agent(profile="all") + if configs: + self._tool_registry.load_registry_servers( + [asdict(c) for c in configs] + ) + logger.info( + "[pipeline] McpRegistryStage: loaded %d servers (fallback)", + len(configs), + ) + + total = len(self._tool_registry.get_tools()) + logger.info("[pipeline] McpRegistryStage: %d tools available", total) + + async def process(self, ctx: PipelineContext) -> PipelineResult: + return PipelineResult(action="continue") + + @property + def tool_registry(self): + return self._tool_registry diff --git a/core/framework/pipeline/stages/rate_limit.py b/core/framework/pipeline/stages/rate_limit.py new file mode 100644 index 00000000..364c10fa --- /dev/null +++ b/core/framework/pipeline/stages/rate_limit.py @@ -0,0 +1,44 @@ +"""Per-(entry-point, session) rate limiting stage.""" + +from __future__ import annotations + +import time +from collections import defaultdict + +from framework.pipeline.registry import register +from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage + + +@register("rate_limit") +class RateLimitStage(PipelineStage): + """Reject requests that exceed ``max_requests_per_minute`` per session. + + The key is ``:``. When no session_id is + present in ``session_state``, a single shared "default" bucket is used. + """ + + order = 200 + + def __init__(self, max_requests_per_minute: int = 60) -> None: + self._max_rpm = max_requests_per_minute + self._timestamps: dict[str, list[float]] = defaultdict(list) + + async def process(self, ctx: PipelineContext) -> PipelineResult: + session_id = "default" + if ctx.session_state: + session_id = str(ctx.session_state.get("session_id", "default")) + key = f"{ctx.entry_point_id}:{session_id}" + + now = time.monotonic() + # Prune entries older than 60s. + self._timestamps[key] = [t for t in self._timestamps[key] if now - t < 60.0] + if len(self._timestamps[key]) >= self._max_rpm: + return PipelineResult( + action="reject", + rejection_reason=( + f"Rate limit exceeded: {self._max_rpm} req/min " + f"for session '{session_id}'" + ), + ) + self._timestamps[key].append(now) + return PipelineResult(action="continue") diff --git a/core/framework/pipeline/stages/skill_registry.py b/core/framework/pipeline/stages/skill_registry.py new file mode 100644 index 00000000..71a73a69 --- /dev/null +++ b/core/framework/pipeline/stages/skill_registry.py @@ -0,0 +1,55 @@ +"""Skill registry pipeline stage. + +Discovers and loads skills. This is the ONLY place skills get loaded. +""" + +from __future__ import annotations + +import logging +from pathlib import Path +from typing import Any + +from framework.pipeline.registry import register +from framework.pipeline.stage import PipelineContext, PipelineResult, PipelineStage + +logger = logging.getLogger(__name__) + + +@register("skill_registry") +class SkillRegistryStage(PipelineStage): + """Discover skills and provide prompts.""" + + order = 60 + + def __init__( + self, + project_root: str | Path | None = None, + interactive: bool = True, + skills_config: Any = None, + **kwargs: Any, + ) -> None: + self._project_root = Path(project_root) if project_root else None + self._interactive = interactive + self._skills_config = skills_config + self.skills_manager: Any = None + + async def initialize(self) -> None: + from framework.skills.config import SkillsConfig + from framework.skills.manager import SkillsManager, SkillsManagerConfig + + config = SkillsManagerConfig( + skills_config=self._skills_config or SkillsConfig(), + project_root=self._project_root, + interactive=self._interactive, + ) + self.skills_manager = SkillsManager(config) + self.skills_manager.load() + await self.skills_manager.start_watching() + logger.info( + "[pipeline] SkillRegistryStage: catalog=%d chars, protocols=%d chars", + len(self.skills_manager.skills_catalog_prompt), + len(self.skills_manager.protocols_prompt), + ) + + async def process(self, ctx: PipelineContext) -> PipelineResult: + return PipelineResult(action="continue") diff --git a/core/framework/runner/__init__.py b/core/framework/runner/__init__.py deleted file mode 100644 index 376866a7..00000000 --- a/core/framework/runner/__init__.py +++ /dev/null @@ -1,27 +0,0 @@ -"""Agent Runner - load and run exported agents.""" - -from framework.runner.mcp_registry import MCPRegistry -from framework.runner.protocol import ( - AgentMessage, - CapabilityLevel, - CapabilityResponse, - MessageType, - OrchestratorResult, -) -from framework.runner.runner import AgentInfo, AgentRunner, ValidationResult -from framework.runner.tool_registry import ToolRegistry, tool - -__all__ = [ - # Single agent - "AgentRunner", - "AgentInfo", - "ValidationResult", - "ToolRegistry", - "MCPRegistry", - "tool", - "AgentMessage", - "MessageType", - "CapabilityLevel", - "CapabilityResponse", - "OrchestratorResult", -] diff --git a/core/framework/runtime/EVENT_TYPES.md b/core/framework/runtime/EVENT_TYPES.md deleted file mode 100644 index 22d3cc6a..00000000 --- a/core/framework/runtime/EVENT_TYPES.md +++ /dev/null @@ -1,493 +0,0 @@ -# Event Types and Schema Reference - -The Hive runtime uses a pub/sub `EventBus` for inter-component communication and observability. Every event is an `AgentEvent` dataclass published through `EventBus.publish()`. - -## Event Envelope (`AgentEvent`) - -Every event shares a common envelope: - -| Field | Type | Description | -| ---------------- | ----------------- | ------------------------------------------------------------ | -| `type` | `EventType` (str) | Event type identifier (see below) | -| `stream_id` | `str` | Entry point / pipeline that emitted the event | -| `node_id` | `str \| None` | Graph node that emitted the event | -| `execution_id` | `str \| None` | Unique execution run ID (UUID, set by `ExecutionStream`) | -| `graph_id` | `str \| None` | Graph that emitted the event (set by `GraphScopedEventBus`) | -| `data` | `dict` | Event-type-specific payload (see individual schemas below) | -| `timestamp` | `datetime` | When the event was created | -| `correlation_id` | `str \| None` | Optional ID for tracking related events across streams | - -### Identity Fields - -The identity tuple `(graph_id, stream_id, node_id, execution_id)` uniquely locates any event: - -- **`graph_id`** — Which graph produced the event. Set automatically by `GraphScopedEventBus` (a subclass that stamps `graph_id` on every `publish()` call). Values: `"worker"`, `"judge"`, `"queen"`, or the graph spec ID. -- **`stream_id`** — Which entry point / pipeline. Corresponds to `EntryPointSpec.id` in the graph definition. For single-entry-point graphs, this equals the entry point name (e.g. `"default"`, `"health_check"`). -- **`node_id`** — Which specific node emitted the event. For `EventLoopNode` events, this is the node spec ID. -- **`execution_id`** — UUID identifying a specific execution run. Multiple concurrent executions of the same entry point each get a unique `execution_id`. - ---- - -## Execution Lifecycle - -### `execution_started` - -A new graph execution has begun. - -| Data Field | Type | Description | -| ---------- | ------ | ------------------------------- | -| `input` | `dict` | Input data passed to the graph | - -**Emitted by:** `ExecutionStream._run_execution()` - ---- - -### `execution_completed` - -A graph execution finished successfully. - -| Data Field | Type | Description | -| ---------- | ------ | ----------------- | -| `output` | `dict` | Final output data | - -**Emitted by:** `ExecutionStream._run_execution()` - -**Queen notification:** When a worker execution completes, the session manager \ -injects a `[WORKER_TERMINAL]` notification into the queen with the output summary. \ -The queen reports to the user and asks what to do next. - ---- - -### `execution_failed` - -A graph execution failed with an error. - -| Data Field | Type | Description | -| ---------- | ----- | ------------- | -| `error` | `str` | Error message | - -**Emitted by:** `ExecutionStream._run_execution()` - -**Queen notification:** When a worker execution fails, the session manager \ -injects a `[WORKER_TERMINAL]` notification into the queen with the error. \ -The queen reports to the user and helps troubleshoot. - ---- - -### `execution_paused` - -Execution has been paused (Ctrl+Z or HITL approval). - -| Data Field | Type | Description | -| ---------- | ----- | ----------------- | -| `reason` | `str` | Why it was paused | - -**Emitted by:** `GraphExecutor.execute()` - ---- - -### `execution_resumed` - -Execution has resumed from a paused state. - -| Data Field | Type | Description | -| ---------- | ---- | ----------- | -| *(none)* | | | - -**Emitted by:** `GraphExecutor.execute()` - ---- - -## Node Event-Loop Lifecycle - -These events track the inner loop of `EventLoopNode` — the multi-turn LLM streaming loop that powers most agent nodes. - -### `node_loop_started` - -An EventLoopNode has begun its execution loop. - -| Data Field | Type | Description | -| ---------------- | ---------- | ------------------------------- | -| `max_iterations` | `int\|null`| Maximum iterations configured | - -**Emitted by:** `EventLoopNode._publish_loop_started()`, `GraphExecutor` (for function nodes in parallel branches) - ---- - -### `node_loop_iteration` - -An EventLoopNode has started a new iteration (one LLM turn). - -| Data Field | Type | Description | -| ----------- | ----- | ------------------------- | -| `iteration` | `int` | Zero-based iteration index | - -**Emitted by:** `EventLoopNode._publish_iteration()` - ---- - -### `node_loop_completed` - -An EventLoopNode has finished its execution loop. - -| Data Field | Type | Description | -| ------------ | ----- | -------------------------------------- | -| `iterations` | `int` | Total number of iterations completed | - -**Emitted by:** `EventLoopNode._publish_loop_completed()`, `GraphExecutor` (for function nodes in parallel branches) - ---- - -## LLM Streaming - -### `llm_text_delta` - -Incremental text output from the LLM (non-client-facing nodes only). - -| Data Field | Type | Description | -| ---------- | ----- | ---------------------------------------- | -| `content` | `str` | New text chunk (delta) | -| `snapshot` | `str` | Full accumulated text so far | - -**Emitted by:** `EventLoopNode._publish_text_delta()` when `client_facing=False` - ---- - -### `llm_reasoning_delta` - -Incremental reasoning/thinking output from the LLM. - -| Data Field | Type | Description | -| ---------- | ----- | ------------------- | -| `content` | `str` | New reasoning chunk | - -**Emitted by:** Not currently wired in `EventLoopNode` (reserved for extended thinking models). - ---- - -## Tool Lifecycle - -### `tool_call_started` - -The LLM has requested a tool call and execution is about to begin. - -| Data Field | Type | Description | -| ------------ | ------ | ------------------------------------ | -| `tool_use_id`| `str` | Unique ID for this tool invocation | -| `tool_name` | `str` | Name of the tool being called | -| `tool_input` | `dict` | Arguments passed to the tool | - -**Emitted by:** `EventLoopNode._publish_tool_started()` - ---- - -### `tool_call_completed` - -A tool call has finished executing. - -| Data Field | Type | Description | -| ------------ | ------ | -------------------------------------- | -| `tool_use_id`| `str` | Same ID from `tool_call_started` | -| `tool_name` | `str` | Name of the tool | -| `result` | `str` | Tool execution result (may be truncated)| -| `is_error` | `bool` | Whether the tool returned an error | - -**Emitted by:** `EventLoopNode._publish_tool_completed()` - ---- - -## Client I/O - -These events are emitted by the queen's interactive turns. They drive the TUI's chat interface. - -### `client_output_delta` - -Incremental text output meant for the human operator. - -| Data Field | Type | Description | -| ---------- | ----- | ---------------------------- | -| `content` | `str` | New text chunk (delta) | -| `snapshot` | `str` | Full accumulated text so far | - -**Emitted by:** `EventLoopNode._publish_text_delta()` for queen/user-facing output - ---- - -### `client_input_requested` - -The node is waiting for human input (via `ask_user` tool or auto-block on text-only turns). - -| Data Field | Type | Description | -| ---------- | ----- | ------------------------------------------------- | -| `prompt` | `str` | Optional prompt/question shown to the user | - -**Emitted by:** `EventLoopNode._await_user_input()`, doom loop handler - -The TUI subscribes to this event to show the input prompt and focus the chat input. After the user types, `inject_event()` is called on the node to unblock it. - ---- - -## Internal Node Observability - -### `node_internal_output` - -Output from a non-client-facing node (for debugging/monitoring). - -| Data Field | Type | Description | -| ---------- | ----- | ---------------- | -| `content` | `str` | Output text | - -**Emitted by:** Available via `emit_node_internal_output()` — not currently wired in the default `EventLoopNode`. - ---- - -### `node_input_blocked` - -A non-client-facing node is blocked waiting for input. - -| Data Field | Type | Description | -| ---------- | ----- | --------------- | -| `prompt` | `str` | Block reason | - -**Emitted by:** Available via `emit_node_input_blocked()` — reserved for future use. - ---- - -### `node_stalled` - -The node's LLM has produced identical responses for several consecutive turns (stall detection). - -| Data Field | Type | Description | -| ---------- | ----- | ------------------------------------------------- | -| `reason` | `str` | Always `"Consecutive identical responses detected"`| - -**Emitted by:** `EventLoopNode._publish_stalled()` - ---- - -### `node_tool_doom_loop` - -The LLM is calling the same tool(s) with identical arguments repeatedly (doom loop detection). - -| Data Field | Type | Description | -| ------------- | ----- | ------------------------------------ | -| `description` | `str` | Human-readable doom loop description | - -**Emitted by:** `EventLoopNode` doom loop handler - ---- - -## Judge Decisions - -### `judge_verdict` - -The judge (custom or implicit) has evaluated the current iteration. - -| Data Field | Type | Description | -| ------------ | ----- | ---------------------------------------------------- | -| `action` | `str` | `"ACCEPT"`, `"RETRY"`, `"ESCALATE"`, or `"CONTINUE"` | -| `feedback` | `str` | Judge feedback (empty for ACCEPT/CONTINUE) | -| `judge_type` | `str` | `"custom"` (explicit JudgeProtocol) or `"implicit"` (stop-reason heuristic) | -| `iteration` | `int` | Which iteration this verdict applies to | - -**Emitted by:** `EventLoopNode._publish_judge_verdict()` - -**Verdict meanings:** -- **ACCEPT** — Output meets requirements; node exits successfully. -- **RETRY** — Output needs improvement; loop continues with feedback injected. -- **ESCALATE** — Problem cannot be solved at this level; triggers escalation. -- **CONTINUE** — Implicit verdict: LLM called tools, so it's making progress — let it keep going. - ---- - -## Output Tracking - -### `output_key_set` - -A node has set an output key via the `set_output` synthetic tool. - -| Data Field | Type | Description | -| ---------- | ----- | ----------------- | -| `key` | `str` | Output key name | - -**Emitted by:** `EventLoopNode._publish_output_key_set()` - ---- - -## Retry & Edge Tracking - -### `node_retry` - -A transient error occurred during an LLM call and the node is retrying. - -| Data Field | Type | Description | -| ------------- | ----- | ---------------------------------- | -| `retry_count` | `int` | Current retry attempt number | -| `max_retries` | `int` | Maximum retries configured | -| `error` | `str` | Error message (truncated to 500ch) | - -**Emitted by:** `EventLoopNode` (stream retry handler), `GraphExecutor` (node-level retry) - ---- - -### `edge_traversed` - -The executor has traversed an edge from one node to another. - -| Data Field | Type | Description | -| ---------------- | ----- | ---------------------------------------------- | -| `source_node` | `str` | Node ID the edge starts from | -| `target_node` | `str` | Node ID the edge goes to | -| `edge_condition` | `str` | Edge condition: `"router"`, `"on_success"`, etc. | - -**Emitted by:** `GraphExecutor.execute()` — after router decisions, condition-based edges, and fallback edges. - ---- - -## Context Management - -### `context_compacted` - -Not currently emitted — reserved for future use when `NodeConversation` compacts history. - ---- - -## State Changes - -### `state_changed` - -A shared buffer key has been modified. - -| Data Field | Type | Description | -| ----------- | ----- | ---------------------------------- | -| `key` | `str` | Buffer key that changed | -| `old_value` | `Any` | Previous value | -| `new_value` | `Any` | New value | -| `scope` | `str` | Scope of the change | - -**Emitted by:** Available via `emit_state_changed()` — not currently wired in default execution. - ---- - -### `state_conflict` - -Not currently emitted — reserved for concurrent write conflict detection. - ---- - -## Goal Tracking - -### `goal_progress` - -Goal completion progress update. - -| Data Field | Type | Description | -| ----------------- | ------- | ------------------------------------ | -| `progress` | `float` | 0.0–1.0 completion fraction | -| `criteria_status` | `dict` | Per-criterion status | - -**Emitted by:** Available via `emit_goal_progress()` — not currently wired in default execution. - ---- - -### `goal_achieved` - -Not currently emitted — reserved for explicit goal completion signals. - ---- - -### `constraint_violation` - -A goal constraint has been violated. - -| Data Field | Type | Description | -| --------------- | ----- | ------------------------ | -| `constraint_id` | `str` | Which constraint failed | -| `description` | `str` | What went wrong | - -**Emitted by:** Available via `emit_constraint_violation()`. - ---- - -## Stream Lifecycle - -### `stream_started` / `stream_stopped` - -Not currently emitted — reserved for `ExecutionStream` lifecycle tracking. - ---- - -## External Triggers - -### `webhook_received` - -An external webhook has been received. - -| Data Field | Type | Description | -| -------------- | ------ | ---------------------------- | -| `path` | `str` | Webhook URL path | -| `method` | `str` | HTTP method | -| `headers` | `dict` | HTTP headers | -| `payload` | `dict` | Request body | -| `query_params` | `dict` | URL query parameters | - -**Emitted by:** Webhook server integration. - -Note: `node_id` is not set on this event; `stream_id` is the webhook source ID. - ---- - -## Escalation - -### `escalation_requested` - -An agent has requested handoff to the Hive Coder (via the `escalate` synthetic tool). - -| Data Field | Type | Description | -| ---------- | ----- | ------------------------------- | -| `reason` | `str` | Why escalation is needed | -| `context` | `str` | Additional context for the coder| - -**Emitted by:** `EventLoopNode` when the LLM calls `escalate`. - ---- - -## Custom Events - -### `custom` - -User-defined events with arbitrary payloads. No schema enforced. - ---- - -## Subscription & Filtering - -Events can be filtered when subscribing: - -```python -bus.subscribe( - event_types=[EventType.TOOL_CALL_STARTED, EventType.TOOL_CALL_COMPLETED], - handler=my_handler, - filter_stream="default", # Only events from this stream - filter_node="planner", # Only events from this node - filter_execution="exec-uuid", # Only events from this execution - filter_graph="worker", # Only events from this graph -) -``` - -## Debug Event Logging - -Set `HIVE_DEBUG_EVENTS=1` to write every published event to a JSONL file at `~/.hive/event_logs/.jsonl`. Each line is the full JSON serialization of an `AgentEvent`: - -```json -{ - "type": "tool_call_started", - "stream_id": "default", - "node_id": "planner", - "execution_id": "a1b2c3d4-...", - "graph_id": "worker", - "data": {"tool_use_id": "tu_1", "tool_name": "web_search", "tool_input": {"query": "..."}}, - "timestamp": "2026-02-24T12:00:00.000000", - "correlation_id": null -} -``` diff --git a/core/framework/runtime/README.md b/core/framework/runtime/README.md deleted file mode 100644 index 0159c2e1..00000000 --- a/core/framework/runtime/README.md +++ /dev/null @@ -1,171 +0,0 @@ -# Agent Runtime - -Unified execution system for all Hive agents. Every agent — single-entry or multi-entry, headless or dashboard — runs through the same runtime stack. - -## Topology - -``` - AgentRunner.load(agent_path) - | - AgentRunner - (factory + public API) - | - _setup_agent_runtime() - | - AgentRuntime - (lifecycle + orchestration) - / | \ - Stream A Stream B Stream C ← one per entry point - | | | - GraphExecutor GraphExecutor GraphExecutor - | | | - Node → Node → Node (graph traversal) -``` - -Single-entry agents get a `"default"` entry point automatically. There is no separate code path. - -## Components - -| Component | File | Role | -|---|---|---| -| `AgentRunner` | `runner/runner.py` | Load agents, configure tools/LLM, expose high-level API | -| `AgentRuntime` | `runtime/agent_runtime.py` | Lifecycle management, entry point routing, event bus | -| `ExecutionStream` | `runtime/execution_stream.py` | Per-entry-point execution queue, session persistence | -| `GraphExecutor` | `graph/executor.py` | Node traversal, tool dispatch, checkpointing | -| `EventBus` | `runtime/event_bus.py` | Pub/sub for execution events (streaming, I/O) | -| `SharedBufferManager` | `runtime/shared_state.py` | Cross-stream state with isolation levels | -| `OutcomeAggregator` | `runtime/outcome_aggregator.py` | Goal progress tracking across streams | -| `SessionStore` | `storage/session_store.py` | Session state persistence (`sessions/{id}/state.json`) | - -## Programming Interface - -### AgentRunner (high-level) - -```python -from framework.runner import AgentRunner - -# Load and run -runner = AgentRunner.load("exports/my_agent", model="anthropic/claude-sonnet-4-20250514") -result = await runner.run({"query": "hello"}) - -# Resume from paused session -result = await runner.run({"query": "continue"}, session_state=saved_state) - -# Lifecycle -await runner.start() # Start the runtime -await runner.stop() # Stop the runtime -exec_id = await runner.trigger("default", {}) # Non-blocking trigger -entry_points = runner.get_entry_points() # List entry points - -# Context manager -async with AgentRunner.load("exports/my_agent") as runner: - result = await runner.run({"query": "hello"}) - -# Cleanup -runner.cleanup() # Synchronous -await runner.cleanup_async() # Asynchronous -``` - -### AgentRuntime (lower-level) - -```python -from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime -from framework.runtime.execution_stream import EntryPointSpec - -# Create runtime with entry points -runtime = create_agent_runtime( - graph=graph, - goal=goal, - storage_path=Path("~/.hive/agents/my_agent"), - entry_points=[ - EntryPointSpec(id="default", name="Default", entry_node="start", trigger_type="manual"), - ], - llm=llm, - tools=tools, - tool_executor=tool_executor, - checkpoint_config=checkpoint_config, -) - -# Lifecycle -await runtime.start() -await runtime.stop() - -# Execution -exec_id = await runtime.trigger("default", {"query": "hello"}) # Non-blocking -result = await runtime.trigger_and_wait("default", {"query": "hello"}) # Blocking -result = await runtime.trigger_and_wait("default", {}, session_state=state) # Resume - -# Client-facing node I/O -await runtime.inject_input(node_id="chat", content="user response") - -# Events -sub_id = runtime.subscribe_to_events( - event_types=[EventType.CLIENT_OUTPUT_DELTA], - handler=my_handler, -) -runtime.unsubscribe_from_events(sub_id) - -# Inspection -runtime.is_running # bool -runtime.event_bus # EventBus -runtime.state_manager # SharedBufferManager -runtime.get_stats() # Runtime statistics -``` - -## Execution Flow - -1. `AgentRunner.run()` calls `AgentRuntime.trigger_and_wait()` -2. `AgentRuntime` routes to the `ExecutionStream` for the entry point -3. `ExecutionStream` creates a `GraphExecutor` and calls `execute()` -4. `GraphExecutor` traverses nodes, dispatches tools, manages checkpoints -5. `ExecutionResult` flows back up through the stack -6. `ExecutionStream` writes session state to disk - -## Session Resume - -All execution paths support session resume: - -```python -# First run (agent pauses at a client-facing node) -result = await runner.run({"query": "start task"}) -# result.paused_at = "review-node" -# result.session_state = {"memory": {...}, "paused_at": "review-node", ...} - -# Resume -result = await runner.run({"input": "approved"}, session_state=result.session_state) -``` - -Session state flows: `AgentRunner.run()` → `AgentRuntime.trigger_and_wait()` → `ExecutionStream.execute()` → `GraphExecutor.execute()`. - -Checkpoints are saved at node boundaries (`sessions/{id}/checkpoints/`) for crash recovery. - -## Event Bus - -The `EventBus` provides real-time execution visibility: - -| Event | When | -|---|---| -| `NODE_STARTED` | Node begins execution | -| `NODE_COMPLETED` | Node finishes | -| `TOOL_CALL_STARTED` | Tool invocation begins | -| `TOOL_CALL_COMPLETED` | Tool invocation finishes | -| `CLIENT_OUTPUT_DELTA` | Agent streams text to user | -| `CLIENT_INPUT_REQUESTED` | Agent needs user input | -| `EXECUTION_COMPLETED` | Full execution finishes | - -In headless mode, `AgentRunner` subscribes to `CLIENT_OUTPUT_DELTA` and `CLIENT_INPUT_REQUESTED` to print output and read stdin. The web dashboard subscribes to route events to the frontend. - -## Storage Layout - -``` -~/.hive/agents/{agent_name}/ - sessions/ - session_YYYYMMDD_HHMMSS_{uuid}/ - state.json # Session state (status, memory, progress) - checkpoints/ # Node-boundary snapshots - logs/ - summary.json # Execution summary - details.jsonl # Detailed event log - tool_logs.jsonl # Tool call log - runtime_logs/ # Cross-session runtime logs -``` diff --git a/core/framework/runtime/__init__.py b/core/framework/runtime/__init__.py deleted file mode 100644 index 26441d41..00000000 --- a/core/framework/runtime/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -"""Runtime core for agent execution.""" - -from framework.runtime.core import Runtime - -__all__ = ["Runtime"] diff --git a/core/framework/runtime/tests/__init__.py b/core/framework/runtime/tests/__init__.py deleted file mode 100644 index 2e79aec4..00000000 --- a/core/framework/runtime/tests/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Tests for runtime components.""" diff --git a/core/framework/runtime/tests/test_agent_runtime.py b/core/framework/runtime/tests/test_agent_runtime.py deleted file mode 100644 index 561aba1b..00000000 --- a/core/framework/runtime/tests/test_agent_runtime.py +++ /dev/null @@ -1,869 +0,0 @@ -""" -Tests for AgentRuntime and multi-entry-point execution. - -Tests: -1. AgentRuntime creation and lifecycle -2. Entry point registration -3. Concurrent executions across streams -4. SharedBufferManager isolation levels -5. OutcomeAggregator goal evaluation -6. EventBus pub/sub -""" - -import asyncio -import tempfile -from pathlib import Path - -import pytest - -from framework.graph import Goal -from framework.graph.edge import EdgeCondition, EdgeSpec, GraphSpec -from framework.graph.goal import Constraint, SuccessCriterion -from framework.graph.node import NodeSpec -from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime -from framework.runtime.event_bus import AgentEvent, EventBus, EventType -from framework.runtime.execution_stream import EntryPointSpec -from framework.runtime.outcome_aggregator import OutcomeAggregator -from framework.runtime.shared_state import IsolationLevel, SharedBufferManager -from framework.schemas.session_state import SessionState, SessionTimestamps - -# === Test Fixtures === - - -@pytest.fixture -def sample_goal(): - """Create a sample goal for testing.""" - return Goal( - id="test-goal", - name="Test Goal", - description="A goal for testing multi-entry-point execution", - success_criteria=[ - SuccessCriterion( - id="sc-1", - description="Process all requests", - metric="requests_processed", - target="100%", - weight=1.0, - ), - ], - constraints=[ - Constraint( - id="c-1", - description="Must not exceed rate limits", - constraint_type="hard", - category="operational", - ), - ], - ) - - -@pytest.fixture -def sample_graph(): - """Create a sample graph with multiple entry points.""" - nodes = [ - NodeSpec( - id="process-webhook", - name="Process Webhook", - description="Process incoming webhook", - node_type="event_loop", - input_keys=["webhook_data"], - output_keys=["result"], - ), - NodeSpec( - id="process-api", - name="Process API Request", - description="Process API request", - node_type="event_loop", - input_keys=["request_data"], - output_keys=["result"], - ), - NodeSpec( - id="complete", - name="Complete", - description="Execution complete", - node_type="terminal", - input_keys=["result"], - output_keys=["final_result"], - ), - ] - - edges = [ - EdgeSpec( - id="webhook-to-complete", - source="process-webhook", - target="complete", - condition=EdgeCondition.ON_SUCCESS, - ), - EdgeSpec( - id="api-to-complete", - source="process-api", - target="complete", - condition=EdgeCondition.ON_SUCCESS, - ), - ] - - return GraphSpec( - id="test-graph", - goal_id="test-goal", - version="1.0.0", - entry_node="process-webhook", - entry_points={"start": "process-webhook"}, - terminal_nodes=["complete"], - pause_nodes=[], - nodes=nodes, - edges=edges, - ) - - -@pytest.fixture -def temp_storage(): - """Create a temporary storage directory.""" - with tempfile.TemporaryDirectory() as tmpdir: - yield Path(tmpdir) - - -# === SharedBufferManager Tests === - - -class TestSharedBufferManager: - """Tests for SharedBufferManager.""" - - def test_create_buffer(self): - """Test creating execution-scoped buffer.""" - manager = SharedBufferManager() - buffer = manager.create_buffer( - execution_id="exec-1", - stream_id="webhook", - isolation=IsolationLevel.SHARED, - ) - assert buffer is not None - assert buffer._execution_id == "exec-1" - assert buffer._stream_id == "webhook" - - @pytest.mark.asyncio - async def test_isolated_state(self): - """Test isolated state doesn't leak between executions.""" - manager = SharedBufferManager() - - buf1 = manager.create_buffer("exec-1", "stream-1", IsolationLevel.ISOLATED) - buf2 = manager.create_buffer("exec-2", "stream-1", IsolationLevel.ISOLATED) - - await buf1.write("key", "value1") - await buf2.write("key", "value2") - - assert await buf1.read("key") == "value1" - assert await buf2.read("key") == "value2" - - @pytest.mark.asyncio - async def test_shared_state(self): - """Test shared state is visible across executions.""" - manager = SharedBufferManager() - - manager.create_buffer("exec-1", "stream-1", IsolationLevel.SHARED) - manager.create_buffer("exec-2", "stream-1", IsolationLevel.SHARED) - - # Write to global scope - await manager.write( - key="global_key", - value="global_value", - execution_id="exec-1", - stream_id="stream-1", - isolation=IsolationLevel.SHARED, - scope="global", - ) - - # Both should see it - value1 = await manager.read("global_key", "exec-1", "stream-1", IsolationLevel.SHARED) - value2 = await manager.read("global_key", "exec-2", "stream-1", IsolationLevel.SHARED) - - assert value1 == "global_value" - assert value2 == "global_value" - - def test_cleanup_execution(self): - """Test execution cleanup removes state.""" - manager = SharedBufferManager() - manager.create_buffer("exec-1", "stream-1", IsolationLevel.ISOLATED) - - assert "exec-1" in manager._execution_state - - manager.cleanup_execution("exec-1") - - assert "exec-1" not in manager._execution_state - - -class TestSessionState: - """Tests for session state data-buffer compatibility.""" - - def test_legacy_memory_alias_populates_data_buffer(self): - """Legacy `memory` payloads should still hydrate the session buffer.""" - state = SessionState( - session_id="session-1", - goal_id="goal-1", - timestamps=SessionTimestamps( - started_at="2026-01-01T00:00:00", - updated_at="2026-01-01T00:00:00", - ), - memory={"rules": "keep starred mail"}, - ) - - assert state.data_buffer == {"rules": "keep starred mail"} - assert state.memory == {"rules": "keep starred mail"} - assert state.to_session_state_dict()["data_buffer"] == {"rules": "keep starred mail"} - - -# === EventBus Tests === - - -class TestEventBus: - """Tests for EventBus pub/sub.""" - - @pytest.mark.asyncio - async def test_publish_subscribe(self): - """Test basic publish/subscribe.""" - bus = EventBus() - received_events = [] - - async def handler(event: AgentEvent): - received_events.append(event) - - bus.subscribe( - event_types=[EventType.EXECUTION_STARTED], - handler=handler, - ) - - await bus.publish( - AgentEvent( - type=EventType.EXECUTION_STARTED, - stream_id="webhook", - execution_id="exec-1", - data={"test": "data"}, - ) - ) - - # Allow handler to run - await asyncio.sleep(0.1) - - assert len(received_events) == 1 - assert received_events[0].type == EventType.EXECUTION_STARTED - assert received_events[0].stream_id == "webhook" - - @pytest.mark.asyncio - async def test_stream_filter(self): - """Test filtering by stream ID.""" - bus = EventBus() - received_events = [] - - async def handler(event: AgentEvent): - received_events.append(event) - - bus.subscribe( - event_types=[EventType.EXECUTION_STARTED], - handler=handler, - filter_stream="webhook", - ) - - # Publish to webhook stream (should be received) - await bus.publish( - AgentEvent( - type=EventType.EXECUTION_STARTED, - stream_id="webhook", - ) - ) - - # Publish to api stream (should NOT be received) - await bus.publish( - AgentEvent( - type=EventType.EXECUTION_STARTED, - stream_id="api", - ) - ) - - await asyncio.sleep(0.1) - - assert len(received_events) == 1 - assert received_events[0].stream_id == "webhook" - - def test_unsubscribe(self): - """Test unsubscribing from events.""" - bus = EventBus() - - async def handler(event: AgentEvent): - pass - - sub_id = bus.subscribe( - event_types=[EventType.EXECUTION_STARTED], - handler=handler, - ) - - assert sub_id in bus._subscriptions - - result = bus.unsubscribe(sub_id) - - assert result is True - assert sub_id not in bus._subscriptions - - @pytest.mark.asyncio - async def test_wait_for(self): - """Test waiting for a specific event.""" - bus = EventBus() - - # Start waiting in background - async def wait_and_check(): - event = await bus.wait_for( - event_type=EventType.EXECUTION_COMPLETED, - timeout=1.0, - ) - return event - - wait_task = asyncio.create_task(wait_and_check()) - - # Publish the event - await asyncio.sleep(0.1) - await bus.publish( - AgentEvent( - type=EventType.EXECUTION_COMPLETED, - stream_id="webhook", - execution_id="exec-1", - ) - ) - - event = await wait_task - - assert event is not None - assert event.type == EventType.EXECUTION_COMPLETED - - -# === OutcomeAggregator Tests === - - -class TestOutcomeAggregator: - """Tests for OutcomeAggregator.""" - - def test_record_decision(self, sample_goal): - """Test recording decisions.""" - aggregator = OutcomeAggregator(sample_goal) - - from framework.schemas.decision import Decision, DecisionType - - decision = Decision( - id="dec-1", - node_id="process-webhook", - intent="Process incoming webhook", - decision_type=DecisionType.PATH_CHOICE, - options=[], - chosen_option_id="opt-1", - reasoning="Standard processing path", - ) - - aggregator.record_decision("webhook", "exec-1", decision) - - assert aggregator._total_decisions == 1 - assert len(aggregator._decisions) == 1 - - @pytest.mark.asyncio - async def test_evaluate_goal_progress(self, sample_goal): - """Test goal progress evaluation.""" - aggregator = OutcomeAggregator(sample_goal) - - progress = await aggregator.evaluate_goal_progress() - - assert "overall_progress" in progress - assert "criteria_status" in progress - assert "constraint_violations" in progress - assert "recommendation" in progress - - def test_record_constraint_violation(self, sample_goal): - """Test recording constraint violations.""" - aggregator = OutcomeAggregator(sample_goal) - - aggregator.record_constraint_violation( - constraint_id="c-1", - description="Rate limit exceeded", - violation_details="More than 100 requests/minute", - stream_id="webhook", - execution_id="exec-1", - ) - - assert len(aggregator._constraint_violations) == 1 - assert aggregator._constraint_violations[0].constraint_id == "c-1" - - -# === AgentRuntime Tests === - - -class TestAgentRuntime: - """Tests for AgentRuntime orchestration.""" - - def test_register_entry_point(self, sample_graph, sample_goal, temp_storage): - """Test registering entry points.""" - runtime = AgentRuntime( - graph=sample_graph, - goal=sample_goal, - storage_path=temp_storage, - ) - - entry_spec = EntryPointSpec( - id="manual", - name="Manual Trigger", - entry_node="process-webhook", - trigger_type="manual", - ) - - runtime.register_entry_point(entry_spec) - - assert "manual" in runtime._entry_points - assert len(runtime.get_entry_points()) == 1 - - def test_register_duplicate_entry_point_fails(self, sample_graph, sample_goal, temp_storage): - """Test that duplicate entry point IDs fail.""" - runtime = AgentRuntime( - graph=sample_graph, - goal=sample_goal, - storage_path=temp_storage, - ) - - entry_spec = EntryPointSpec( - id="webhook", - name="Webhook Handler", - entry_node="process-webhook", - trigger_type="webhook", - ) - - runtime.register_entry_point(entry_spec) - - with pytest.raises(ValueError, match="already registered"): - runtime.register_entry_point(entry_spec) - - def test_register_invalid_entry_node_fails(self, sample_graph, sample_goal, temp_storage): - """Test that invalid entry nodes fail.""" - runtime = AgentRuntime( - graph=sample_graph, - goal=sample_goal, - storage_path=temp_storage, - ) - - entry_spec = EntryPointSpec( - id="invalid", - name="Invalid Entry", - entry_node="nonexistent-node", - trigger_type="manual", - ) - - with pytest.raises(ValueError, match="not found in graph"): - runtime.register_entry_point(entry_spec) - - @pytest.mark.asyncio - async def test_start_stop_lifecycle(self, sample_graph, sample_goal, temp_storage): - """Test runtime start/stop lifecycle.""" - runtime = AgentRuntime( - graph=sample_graph, - goal=sample_goal, - storage_path=temp_storage, - ) - - entry_spec = EntryPointSpec( - id="webhook", - name="Webhook Handler", - entry_node="process-webhook", - trigger_type="webhook", - ) - - runtime.register_entry_point(entry_spec) - - assert not runtime.is_running - - await runtime.start() - - assert runtime.is_running - assert "webhook" in runtime._streams - - await runtime.stop() - - assert not runtime.is_running - assert len(runtime._streams) == 0 - - @pytest.mark.asyncio - async def test_trigger_requires_running(self, sample_graph, sample_goal, temp_storage): - """Test that trigger fails if runtime not running.""" - runtime = AgentRuntime( - graph=sample_graph, - goal=sample_goal, - storage_path=temp_storage, - ) - - entry_spec = EntryPointSpec( - id="webhook", - name="Webhook Handler", - entry_node="process-webhook", - trigger_type="webhook", - ) - - runtime.register_entry_point(entry_spec) - - with pytest.raises(RuntimeError, match="not running"): - await runtime.trigger("webhook", {"test": "data"}) - - -# === GraphSpec Validation Tests === - - -# === Integration Tests === - - -class TestCreateAgentRuntime: - """Tests for the create_agent_runtime factory.""" - - def test_create_with_entry_points(self, sample_graph, sample_goal, temp_storage): - """Test factory creates runtime with entry points.""" - entry_points = [ - EntryPointSpec( - id="webhook", - name="Webhook", - entry_node="process-webhook", - trigger_type="webhook", - ), - EntryPointSpec( - id="api", - name="API", - entry_node="process-api", - trigger_type="api", - ), - ] - - runtime = create_agent_runtime( - graph=sample_graph, - goal=sample_goal, - storage_path=temp_storage, - entry_points=entry_points, - ) - - assert len(runtime.get_entry_points()) == 2 - assert "webhook" in runtime._entry_points - assert "api" in runtime._entry_points - - -# === Timer Entry Point Tests === - - -class TestTimerEntryPoints: - """Tests for timer-driven entry points (interval and cron).""" - - @pytest.mark.asyncio - async def test_interval_timer_starts_task(self, sample_graph, sample_goal, temp_storage): - """Test that interval_minutes timer creates an async task.""" - runtime = AgentRuntime( - graph=sample_graph, - goal=sample_goal, - storage_path=temp_storage, - ) - - entry_spec = EntryPointSpec( - id="timer-interval", - name="Interval Timer", - entry_node="process-webhook", - trigger_type="timer", - trigger_config={"interval_minutes": 60}, - ) - runtime.register_entry_point(entry_spec) - - await runtime.start() - try: - assert len(runtime._timer_tasks) == 1 - assert not runtime._timer_tasks[0].done() - # Give the async task a moment to set next_fire - await asyncio.sleep(0.05) - assert "timer-interval" in runtime._timer_next_fire - finally: - await runtime.stop() - - assert len(runtime._timer_tasks) == 0 - - @pytest.mark.asyncio - async def test_cron_timer_starts_task(self, sample_graph, sample_goal, temp_storage): - """Test that cron expression timer creates an async task.""" - runtime = AgentRuntime( - graph=sample_graph, - goal=sample_goal, - storage_path=temp_storage, - ) - - entry_spec = EntryPointSpec( - id="timer-cron", - name="Cron Timer", - entry_node="process-webhook", - trigger_type="timer", - trigger_config={"cron": "*/5 * * * *"}, # Every 5 minutes - ) - runtime.register_entry_point(entry_spec) - - await runtime.start() - try: - assert len(runtime._timer_tasks) == 1 - assert not runtime._timer_tasks[0].done() - # Give the async task a moment to set next_fire - await asyncio.sleep(0.05) - assert "timer-cron" in runtime._timer_next_fire - finally: - await runtime.stop() - - @pytest.mark.asyncio - async def test_invalid_cron_expression_skipped( - self, sample_graph, sample_goal, temp_storage, caplog - ): - """Test that an invalid cron expression logs a warning and skips.""" - runtime = AgentRuntime( - graph=sample_graph, - goal=sample_goal, - storage_path=temp_storage, - ) - - entry_spec = EntryPointSpec( - id="timer-bad-cron", - name="Bad Cron Timer", - entry_node="process-webhook", - trigger_type="timer", - trigger_config={"cron": "not a cron expression"}, - ) - runtime.register_entry_point(entry_spec) - - await runtime.start() - try: - assert len(runtime._timer_tasks) == 0 - assert "invalid cron" in caplog.text.lower() or "Invalid cron" in caplog.text - finally: - await runtime.stop() - - @pytest.mark.asyncio - async def test_cron_takes_priority_over_interval( - self, sample_graph, sample_goal, temp_storage, caplog - ): - """Test that when both cron and interval_minutes are set, cron wins.""" - import logging - - runtime = AgentRuntime( - graph=sample_graph, - goal=sample_goal, - storage_path=temp_storage, - ) - - entry_spec = EntryPointSpec( - id="timer-both", - name="Both Timer", - entry_node="process-webhook", - trigger_type="timer", - trigger_config={"cron": "0 9 * * *", "interval_minutes": 30}, - ) - runtime.register_entry_point(entry_spec) - - with caplog.at_level(logging.INFO): - await runtime.start() - try: - assert len(runtime._timer_tasks) == 1 - # Should log cron, not interval - assert any("cron" in r.message.lower() for r in caplog.records) - finally: - await runtime.stop() - - @pytest.mark.asyncio - async def test_no_interval_or_cron_warns(self, sample_graph, sample_goal, temp_storage, caplog): - """Test that timer with neither cron nor interval_minutes logs a warning.""" - runtime = AgentRuntime( - graph=sample_graph, - goal=sample_goal, - storage_path=temp_storage, - ) - - entry_spec = EntryPointSpec( - id="timer-empty", - name="Empty Timer", - entry_node="process-webhook", - trigger_type="timer", - trigger_config={}, - ) - runtime.register_entry_point(entry_spec) - - await runtime.start() - try: - assert len(runtime._timer_tasks) == 0 - assert "no 'cron' or valid 'interval_minutes'" in caplog.text - finally: - await runtime.stop() - - @pytest.mark.asyncio - async def test_cron_immediate_fires_first(self, sample_graph, sample_goal, temp_storage): - """Test that run_immediately=True with cron doesn't set next_fire before first run.""" - runtime = AgentRuntime( - graph=sample_graph, - goal=sample_goal, - storage_path=temp_storage, - ) - - entry_spec = EntryPointSpec( - id="timer-cron-immediate", - name="Cron Immediate", - entry_node="process-webhook", - trigger_type="timer", - trigger_config={"cron": "0 0 * * *", "run_immediately": True}, - ) - runtime.register_entry_point(entry_spec) - - await runtime.start() - try: - assert len(runtime._timer_tasks) == 1 - # With run_immediately, the task enters the while loop directly, - # so _timer_next_fire is NOT set before the first trigger attempt - # (it pops it at the top of the loop) - # Give it a moment to start executing - await asyncio.sleep(0.05) - # Task should still be running (it will try to trigger and likely fail - # since there's no LLM, but the task itself continues) - assert not runtime._timer_tasks[0].done() - finally: - await runtime.stop() - - -# === Cancel All Tasks Tests === - - -class TestCancelAllTasks: - """Tests for cancel_all_tasks and cancel_all_tasks_async.""" - - @pytest.mark.asyncio - async def test_cancel_all_tasks_async_returns_false_when_no_tasks( - self, sample_graph, sample_goal, temp_storage - ): - """Test that cancel_all_tasks_async returns False with no running tasks.""" - runtime = AgentRuntime( - graph=sample_graph, - goal=sample_goal, - storage_path=temp_storage, - ) - - entry_spec = EntryPointSpec( - id="webhook", - name="Webhook", - entry_node="process-webhook", - trigger_type="webhook", - ) - runtime.register_entry_point(entry_spec) - await runtime.start() - - try: - result = await runtime.cancel_all_tasks_async() - assert result is False - finally: - await runtime.stop() - - @pytest.mark.asyncio - async def test_cancel_all_tasks_async_cancels_running_task( - self, sample_graph, sample_goal, temp_storage - ): - """Test that cancel_all_tasks_async cancels a running task and returns True.""" - runtime = AgentRuntime( - graph=sample_graph, - goal=sample_goal, - storage_path=temp_storage, - ) - - entry_spec = EntryPointSpec( - id="webhook", - name="Webhook", - entry_node="process-webhook", - trigger_type="webhook", - ) - runtime.register_entry_point(entry_spec) - await runtime.start() - - try: - # Inject a fake running task into the stream - stream = runtime._streams["webhook"] - - async def hang_forever(): - await asyncio.get_event_loop().create_future() - - fake_task = asyncio.ensure_future(hang_forever()) - stream._execution_tasks["fake-exec"] = fake_task - - result = await runtime.cancel_all_tasks_async() - assert result is True - - # Let the CancelledError propagate - try: - await fake_task - except asyncio.CancelledError: - pass - assert fake_task.cancelled() - - # Clean up - del stream._execution_tasks["fake-exec"] - finally: - await runtime.stop() - - @pytest.mark.asyncio - async def test_cancel_all_tasks_async_cancels_multiple_tasks_across_streams( - self, sample_graph, sample_goal, temp_storage - ): - """Test that cancel_all_tasks_async cancels tasks across multiple streams.""" - runtime = AgentRuntime( - graph=sample_graph, - goal=sample_goal, - storage_path=temp_storage, - ) - - # Register two entry points so we get two streams - runtime.register_entry_point( - EntryPointSpec( - id="stream-a", - name="Stream A", - entry_node="process-webhook", - trigger_type="webhook", - ) - ) - runtime.register_entry_point( - EntryPointSpec( - id="stream-b", - name="Stream B", - entry_node="process-webhook", - trigger_type="webhook", - ) - ) - await runtime.start() - - try: - - async def hang_forever(): - await asyncio.get_event_loop().create_future() - - stream_a = runtime._streams["stream-a"] - stream_b = runtime._streams["stream-b"] - - # Two tasks in stream A, one task in stream B - task_a1 = asyncio.ensure_future(hang_forever()) - task_a2 = asyncio.ensure_future(hang_forever()) - task_b1 = asyncio.ensure_future(hang_forever()) - - stream_a._execution_tasks["exec-a1"] = task_a1 - stream_a._execution_tasks["exec-a2"] = task_a2 - stream_b._execution_tasks["exec-b1"] = task_b1 - - result = await runtime.cancel_all_tasks_async() - assert result is True - - # Let CancelledErrors propagate - for task in [task_a1, task_a2, task_b1]: - try: - await task - except asyncio.CancelledError: - pass - assert task.cancelled() - - # Clean up - del stream_a._execution_tasks["exec-a1"] - del stream_a._execution_tasks["exec-a2"] - del stream_b._execution_tasks["exec-b1"] - finally: - await runtime.stop() - - -if __name__ == "__main__": - pytest.main([__file__, "-v"]) diff --git a/core/framework/runtime/tests/test_idempotency.py b/core/framework/runtime/tests/test_idempotency.py deleted file mode 100644 index 713e037b..00000000 --- a/core/framework/runtime/tests/test_idempotency.py +++ /dev/null @@ -1,268 +0,0 @@ -"""Tests for webhook idempotency key support in AgentRuntime.trigger().""" - -import asyncio -import time -from collections import OrderedDict -from unittest.mock import AsyncMock, MagicMock - -import pytest - -from framework.runtime.agent_runtime import AgentRuntime, AgentRuntimeConfig - - -def _make_runtime(ttl=300.0, max_keys=10000): - """Create a minimal AgentRuntime with idempotency cache attributes. - - Uses ``object.__new__`` to skip ``__init__`` and its heavy dependencies - (storage, LLM, skills) — we only need the cache and config for these tests. - """ - runtime = object.__new__(AgentRuntime) - runtime._config = AgentRuntimeConfig(idempotency_ttl_seconds=ttl, idempotency_max_keys=max_keys) - runtime._running = True - runtime._lock = asyncio.Lock() - runtime._idempotency_keys = OrderedDict() - runtime._idempotency_times = {} - runtime._graphs = {} - runtime._active_graph_id = "primary" - runtime._graph_id = "primary" - runtime._streams = {} - runtime._entry_points = {} - return runtime - - -def _make_runtime_with_stream(ttl=300.0, max_keys=10000): - """Create a mock runtime whose stream.execute() returns unique IDs.""" - runtime = _make_runtime(ttl=ttl, max_keys=max_keys) - - call_count = 0 - - async def _fake_execute(*args, **kwargs): - nonlocal call_count - call_count += 1 - return f"session-{call_count:04d}" - - stream = MagicMock() - stream.execute = _fake_execute - runtime._streams = {"webhook": stream} - runtime._entry_points = {"webhook": MagicMock()} - return runtime - - -class TestIdempotencyConfig: - """Verify idempotency configuration defaults.""" - - def test_default_ttl(self): - config = AgentRuntimeConfig() - assert config.idempotency_ttl_seconds == 300.0 - - def test_default_max_keys(self): - config = AgentRuntimeConfig() - assert config.idempotency_max_keys == 10000 - - def test_custom_config(self): - config = AgentRuntimeConfig(idempotency_ttl_seconds=60.0, idempotency_max_keys=100) - assert config.idempotency_ttl_seconds == 60.0 - assert config.idempotency_max_keys == 100 - - -class TestIdempotencyCache: - """Test the idempotency cache and pruning logic directly.""" - - def test_cache_stores_and_retrieves_key(self): - runtime = _make_runtime() - runtime._idempotency_keys["stripe-evt-123"] = "exec-001" - runtime._idempotency_times["stripe-evt-123"] = time.time() - - assert runtime._idempotency_keys.get("stripe-evt-123") == "exec-001" - - def test_cache_returns_none_for_unknown_key(self): - runtime = _make_runtime() - assert runtime._idempotency_keys.get("unknown") is None - - def test_prune_removes_expired_keys(self): - runtime = _make_runtime(ttl=0.1) - - runtime._idempotency_keys["old-key"] = "exec-old" - runtime._idempotency_times["old-key"] = time.time() - 1.0 # expired - - runtime._prune_idempotency_keys() - - assert "old-key" not in runtime._idempotency_keys - assert "old-key" not in runtime._idempotency_times - - def test_prune_keeps_fresh_keys(self): - runtime = _make_runtime(ttl=300.0) - - runtime._idempotency_keys["fresh-key"] = "exec-fresh" - runtime._idempotency_times["fresh-key"] = time.time() - - runtime._prune_idempotency_keys() - - assert "fresh-key" in runtime._idempotency_keys - - def test_prune_respects_max_keys(self): - runtime = _make_runtime(max_keys=2) - - for i in range(3): - key = f"key-{i}" - runtime._idempotency_keys[key] = f"exec-{i}" - runtime._idempotency_times[key] = time.time() - - runtime._prune_idempotency_keys() - - assert len(runtime._idempotency_keys) == 2 - # Oldest (key-0) should be evicted - assert "key-0" not in runtime._idempotency_keys - assert "key-1" in runtime._idempotency_keys - assert "key-2" in runtime._idempotency_keys - - def test_prune_evicts_fifo(self): - runtime = _make_runtime(max_keys=1) - - runtime._idempotency_keys["first"] = "exec-1" - runtime._idempotency_times["first"] = time.time() - runtime._idempotency_keys["second"] = "exec-2" - runtime._idempotency_times["second"] = time.time() - - runtime._prune_idempotency_keys() - - assert len(runtime._idempotency_keys) == 1 - assert "second" in runtime._idempotency_keys - assert "first" not in runtime._idempotency_keys - - def test_mixed_expired_and_max_size(self): - runtime = _make_runtime(ttl=0.1, max_keys=2) - - # Add expired key - runtime._idempotency_keys["expired"] = "exec-e" - runtime._idempotency_times["expired"] = time.time() - 1.0 - - # Add fresh keys - runtime._idempotency_keys["fresh-1"] = "exec-f1" - runtime._idempotency_times["fresh-1"] = time.time() - runtime._idempotency_keys["fresh-2"] = "exec-f2" - runtime._idempotency_times["fresh-2"] = time.time() - - runtime._prune_idempotency_keys() - - assert "expired" not in runtime._idempotency_keys - assert "fresh-1" in runtime._idempotency_keys - assert "fresh-2" in runtime._idempotency_keys - - -class TestTriggerIdempotency: - """Tests for trigger() idempotency deduplication.""" - - def test_trigger_accepts_idempotency_key(self): - """trigger() accepts idempotency_key as a keyword argument.""" - import inspect - - sig = inspect.signature(AgentRuntime.trigger) - assert "idempotency_key" in sig.parameters - - def test_idempotency_key_defaults_to_none(self): - """idempotency_key defaults to None (backward compatible).""" - import inspect - - sig = inspect.signature(AgentRuntime.trigger) - assert sig.parameters["idempotency_key"].default is None - - def test_trigger_and_wait_accepts_idempotency_key(self): - """trigger_and_wait() also accepts idempotency_key.""" - import inspect - - sig = inspect.signature(AgentRuntime.trigger_and_wait) - assert "idempotency_key" in sig.parameters - - def test_trigger_and_wait_idempotency_key_defaults_to_none(self): - """trigger_and_wait() idempotency_key defaults to None.""" - import inspect - - sig = inspect.signature(AgentRuntime.trigger_and_wait) - assert sig.parameters["idempotency_key"].default is None - - @pytest.mark.asyncio - async def test_duplicate_key_returns_cached_id(self): - """Same idempotency key within TTL returns the cached execution ID.""" - runtime = _make_runtime_with_stream() - - first = await runtime.trigger("webhook", {}, idempotency_key="stripe-evt-001") - second = await runtime.trigger("webhook", {}, idempotency_key="stripe-evt-001") - - assert first == second - assert first == "session-0001" - - @pytest.mark.asyncio - async def test_different_keys_produce_different_ids(self): - """Different idempotency keys start separate executions.""" - runtime = _make_runtime_with_stream() - - id_a = await runtime.trigger("webhook", {}, idempotency_key="evt-aaa") - id_b = await runtime.trigger("webhook", {}, idempotency_key="evt-bbb") - - assert id_a != id_b - assert id_a == "session-0001" - assert id_b == "session-0002" - - @pytest.mark.asyncio - async def test_none_key_always_starts_new_execution(self): - """key=None (default) skips dedup — every call starts fresh.""" - runtime = _make_runtime_with_stream() - - id_1 = await runtime.trigger("webhook", {}) - id_2 = await runtime.trigger("webhook", {}) - - assert id_1 != id_2 - assert len(runtime._idempotency_keys) == 0 # nothing cached - - @pytest.mark.asyncio - async def test_expired_key_allows_new_execution(self): - """After TTL expires, the same key starts a new execution.""" - runtime = _make_runtime_with_stream(ttl=0.1) - - first = await runtime.trigger("webhook", {}, idempotency_key="evt-expire") - - # Backdate the cached timestamp so the key looks expired - runtime._idempotency_times["evt-expire"] = time.time() - 1.0 - - second = await runtime.trigger("webhook", {}, idempotency_key="evt-expire") - - assert first != second - assert first == "session-0001" - assert second == "session-0002" - - @pytest.mark.asyncio - async def test_stream_not_found_does_not_cache(self): - """If entry point doesn't exist, nothing is cached.""" - runtime = _make_runtime_with_stream() - - with pytest.raises(ValueError, match="not found"): - await runtime.trigger("nonexistent", {}, idempotency_key="evt-orphan") - - assert "evt-orphan" not in runtime._idempotency_keys - - @pytest.mark.asyncio - async def test_execute_error_does_not_cache(self): - """If stream.execute() raises, nothing is cached so retries can go through.""" - runtime = _make_runtime() - - failing_stream = MagicMock() - failing_stream.execute = AsyncMock(side_effect=RuntimeError("stream not running")) - runtime._streams = {"webhook": failing_stream} - runtime._entry_points = {"webhook": MagicMock()} - - with pytest.raises(RuntimeError, match="stream not running"): - await runtime.trigger("webhook", {}, idempotency_key="evt-123") - - assert "evt-123" not in runtime._idempotency_keys - - @pytest.mark.asyncio - async def test_cache_holds_real_execution_id(self): - """Cached value matches the actual execution ID from execute().""" - runtime = _make_runtime_with_stream() - - exec_id = await runtime.trigger("webhook", {}, idempotency_key="evt-real") - - cached = runtime._idempotency_keys.get("evt-real") - assert cached == exec_id - assert cached == "session-0001" diff --git a/core/framework/runtime/tests/test_runtime_logging_paths.py b/core/framework/runtime/tests/test_runtime_logging_paths.py deleted file mode 100644 index 3eb60ce0..00000000 --- a/core/framework/runtime/tests/test_runtime_logging_paths.py +++ /dev/null @@ -1,29 +0,0 @@ -"""Tests for custom session-backed runtime logging paths.""" - -from pathlib import Path -from unittest.mock import MagicMock - -from framework.graph.executor import GraphExecutor -from framework.runtime.runtime_log_store import RuntimeLogStore -from framework.runtime.runtime_logger import RuntimeLogger - - -def test_graph_executor_uses_custom_session_dir_name_for_runtime_logs(): - executor = GraphExecutor( - runtime=MagicMock(), - storage_path=Path("/tmp/test-agent/sessions/my-custom-session"), - ) - - assert executor._get_runtime_log_session_id() == "my-custom-session" - - -def test_runtime_logger_creates_session_log_dir_for_custom_session_id(tmp_path): - base = tmp_path / ".hive" / "agents" / "test_agent" - base.mkdir(parents=True) - store = RuntimeLogStore(base) - logger = RuntimeLogger(store=store, agent_id="test-agent") - - run_id = logger.start_run(goal_id="goal-1", session_id="my-custom-session") - - assert run_id == "my-custom-session" - assert (base / "sessions" / "my-custom-session" / "logs").is_dir() diff --git a/core/framework/runtime/tests/test_webhook_server.py b/core/framework/runtime/tests/test_webhook_server.py deleted file mode 100644 index 13d2628c..00000000 --- a/core/framework/runtime/tests/test_webhook_server.py +++ /dev/null @@ -1,716 +0,0 @@ -""" -Tests for WebhookServer and event-driven entry points. -""" - -import asyncio -import hashlib -import hmac as hmac_mod -import json -import tempfile -from pathlib import Path -from unittest.mock import patch - -import aiohttp -import pytest - -from framework.runtime.agent_runtime import AgentRuntime, AgentRuntimeConfig -from framework.runtime.event_bus import AgentEvent, EventBus, EventType -from framework.runtime.execution_stream import EntryPointSpec -from framework.runtime.webhook_server import ( - WebhookRoute, - WebhookServer, - WebhookServerConfig, -) - - -def _make_server(event_bus: EventBus, routes: list[WebhookRoute] | None = None): - """Helper to create a WebhookServer with port=0 for OS-assigned port.""" - config = WebhookServerConfig(host="127.0.0.1", port=0) - server = WebhookServer(event_bus, config) - for route in routes or []: - server.add_route(route) - return server - - -def _base_url(server: WebhookServer) -> str: - """Get the base URL for a running server.""" - return f"http://127.0.0.1:{server.port}" - - -class TestWebhookServerLifecycle: - """Tests for server start/stop.""" - - @pytest.mark.asyncio - async def test_start_stop(self): - bus = EventBus() - server = _make_server( - bus, - [ - WebhookRoute(source_id="test", path="/webhooks/test", methods=["POST"]), - ], - ) - - await server.start() - assert server.is_running - assert server.port is not None - - await server.stop() - assert not server.is_running - assert server.port is None - - @pytest.mark.asyncio - async def test_no_routes_skips_start(self): - bus = EventBus() - server = _make_server(bus) # no routes - - await server.start() - assert not server.is_running - - @pytest.mark.asyncio - async def test_stop_when_not_started(self): - bus = EventBus() - server = _make_server(bus) - - # Should be a no-op, not raise - await server.stop() - assert not server.is_running - - -class TestWebhookEventPublishing: - """Tests for HTTP request -> EventBus event publishing.""" - - @pytest.mark.asyncio - async def test_post_publishes_webhook_received(self): - bus = EventBus() - received = [] - - async def handler(event): - received.append(event) - - bus.subscribe([EventType.WEBHOOK_RECEIVED], handler) - - server = _make_server( - bus, - [ - WebhookRoute(source_id="gh", path="/webhooks/github", methods=["POST"]), - ], - ) - await server.start() - - try: - async with aiohttp.ClientSession() as session: - async with session.post( - f"{_base_url(server)}/webhooks/github", - json={"action": "opened", "number": 42}, - ) as resp: - assert resp.status == 202 - body = await resp.json() - assert body["status"] == "accepted" - - # Give event bus time to dispatch - await asyncio.sleep(0.05) - - assert len(received) == 1 - event = received[0] - assert event.type == EventType.WEBHOOK_RECEIVED - assert event.stream_id == "gh" - assert event.data["path"] == "/webhooks/github" - assert event.data["method"] == "POST" - assert event.data["payload"] == {"action": "opened", "number": 42} - assert isinstance(event.data["headers"], dict) - assert event.data["query_params"] == {} - finally: - await server.stop() - - @pytest.mark.asyncio - async def test_query_params_included(self): - bus = EventBus() - received = [] - - async def handler(event): - received.append(event) - - bus.subscribe([EventType.WEBHOOK_RECEIVED], handler) - - server = _make_server( - bus, - [ - WebhookRoute(source_id="hook", path="/webhooks/hook", methods=["POST"]), - ], - ) - await server.start() - - try: - async with aiohttp.ClientSession() as session: - async with session.post( - f"{_base_url(server)}/webhooks/hook?source=test&v=2", - json={"data": "hello"}, - ) as resp: - assert resp.status == 202 - - await asyncio.sleep(0.05) - - assert len(received) == 1 - assert received[0].data["query_params"] == {"source": "test", "v": "2"} - finally: - await server.stop() - - @pytest.mark.asyncio - async def test_non_json_body(self): - bus = EventBus() - received = [] - - async def handler(event): - received.append(event) - - bus.subscribe([EventType.WEBHOOK_RECEIVED], handler) - - server = _make_server( - bus, - [ - WebhookRoute(source_id="raw", path="/webhooks/raw", methods=["POST"]), - ], - ) - await server.start() - - try: - async with aiohttp.ClientSession() as session: - async with session.post( - f"{_base_url(server)}/webhooks/raw", - data=b"plain text body", - headers={"Content-Type": "text/plain"}, - ) as resp: - assert resp.status == 202 - - await asyncio.sleep(0.05) - - assert len(received) == 1 - assert received[0].data["payload"] == {"raw_body": "plain text body"} - finally: - await server.stop() - - @pytest.mark.asyncio - async def test_empty_body(self): - bus = EventBus() - received = [] - - async def handler(event): - received.append(event) - - bus.subscribe([EventType.WEBHOOK_RECEIVED], handler) - - server = _make_server( - bus, - [ - WebhookRoute(source_id="empty", path="/webhooks/empty", methods=["POST"]), - ], - ) - await server.start() - - try: - async with aiohttp.ClientSession() as session: - async with session.post(f"{_base_url(server)}/webhooks/empty") as resp: - assert resp.status == 202 - - await asyncio.sleep(0.05) - - assert len(received) == 1 - assert received[0].data["payload"] == {} - finally: - await server.stop() - - @pytest.mark.asyncio - async def test_multiple_routes(self): - bus = EventBus() - received = [] - - async def handler(event): - received.append(event) - - bus.subscribe([EventType.WEBHOOK_RECEIVED], handler) - - server = _make_server( - bus, - [ - WebhookRoute(source_id="a", path="/webhooks/a", methods=["POST"]), - WebhookRoute(source_id="b", path="/webhooks/b", methods=["POST"]), - ], - ) - await server.start() - - try: - async with aiohttp.ClientSession() as session: - async with session.post( - f"{_base_url(server)}/webhooks/a", json={"from": "a"} - ) as resp: - assert resp.status == 202 - - async with session.post( - f"{_base_url(server)}/webhooks/b", json={"from": "b"} - ) as resp: - assert resp.status == 202 - - await asyncio.sleep(0.05) - - assert len(received) == 2 - stream_ids = {e.stream_id for e in received} - assert stream_ids == {"a", "b"} - finally: - await server.stop() - - @pytest.mark.asyncio - async def test_filter_stream_subscription(self): - """Subscribers can filter by stream_id (source_id).""" - bus = EventBus() - a_events = [] - b_events = [] - - async def handle_a(event): - a_events.append(event) - - async def handle_b(event): - b_events.append(event) - - bus.subscribe([EventType.WEBHOOK_RECEIVED], handle_a, filter_stream="a") - bus.subscribe([EventType.WEBHOOK_RECEIVED], handle_b, filter_stream="b") - - server = _make_server( - bus, - [ - WebhookRoute(source_id="a", path="/webhooks/a", methods=["POST"]), - WebhookRoute(source_id="b", path="/webhooks/b", methods=["POST"]), - ], - ) - await server.start() - - try: - async with aiohttp.ClientSession() as session: - await session.post(f"{_base_url(server)}/webhooks/a", json={"x": 1}) - await session.post(f"{_base_url(server)}/webhooks/b", json={"x": 2}) - - await asyncio.sleep(0.05) - - assert len(a_events) == 1 - assert a_events[0].data["payload"] == {"x": 1} - assert len(b_events) == 1 - assert b_events[0].data["payload"] == {"x": 2} - finally: - await server.stop() - - -class TestHMACVerification: - """Tests for HMAC-SHA256 signature verification.""" - - @pytest.mark.asyncio - async def test_valid_signature_accepted(self): - bus = EventBus() - received = [] - - async def handler(event): - received.append(event) - - bus.subscribe([EventType.WEBHOOK_RECEIVED], handler) - - secret = "test-secret-key" - server = _make_server( - bus, - [ - WebhookRoute( - source_id="secure", - path="/webhooks/secure", - methods=["POST"], - secret=secret, - ), - ], - ) - await server.start() - - try: - body = json.dumps({"event": "push"}).encode() - sig = hmac_mod.new(secret.encode(), body, hashlib.sha256).hexdigest() - - async with aiohttp.ClientSession() as session: - async with session.post( - f"{_base_url(server)}/webhooks/secure", - data=body, - headers={ - "Content-Type": "application/json", - "X-Hub-Signature-256": f"sha256={sig}", - }, - ) as resp: - assert resp.status == 202 - - await asyncio.sleep(0.05) - assert len(received) == 1 - finally: - await server.stop() - - @pytest.mark.asyncio - async def test_invalid_signature_rejected(self): - bus = EventBus() - received = [] - - async def handler(event): - received.append(event) - - bus.subscribe([EventType.WEBHOOK_RECEIVED], handler) - - server = _make_server( - bus, - [ - WebhookRoute( - source_id="secure", - path="/webhooks/secure", - methods=["POST"], - secret="real-secret", - ), - ], - ) - await server.start() - - try: - async with aiohttp.ClientSession() as session: - async with session.post( - f"{_base_url(server)}/webhooks/secure", - json={"event": "push"}, - headers={"X-Hub-Signature-256": "sha256=invalidsignature"}, - ) as resp: - assert resp.status == 401 - - await asyncio.sleep(0.05) - assert len(received) == 0 # No event published - finally: - await server.stop() - - @pytest.mark.asyncio - async def test_missing_signature_rejected(self): - bus = EventBus() - received = [] - - async def handler(event): - received.append(event) - - bus.subscribe([EventType.WEBHOOK_RECEIVED], handler) - - server = _make_server( - bus, - [ - WebhookRoute( - source_id="secure", - path="/webhooks/secure", - methods=["POST"], - secret="my-secret", - ), - ], - ) - await server.start() - - try: - async with aiohttp.ClientSession() as session: - # No X-Hub-Signature-256 header - async with session.post( - f"{_base_url(server)}/webhooks/secure", - json={"event": "push"}, - ) as resp: - assert resp.status == 401 - - await asyncio.sleep(0.05) - assert len(received) == 0 - finally: - await server.stop() - - @pytest.mark.asyncio - async def test_no_secret_skips_verification(self): - """Routes without a secret accept any request.""" - bus = EventBus() - received = [] - - async def handler(event): - received.append(event) - - bus.subscribe([EventType.WEBHOOK_RECEIVED], handler) - - server = _make_server( - bus, - [ - WebhookRoute( - source_id="open", - path="/webhooks/open", - methods=["POST"], - secret=None, - ), - ], - ) - await server.start() - - try: - async with aiohttp.ClientSession() as session: - async with session.post( - f"{_base_url(server)}/webhooks/open", - json={"data": "test"}, - ) as resp: - assert resp.status == 202 - - await asyncio.sleep(0.05) - assert len(received) == 1 - finally: - await server.stop() - - -class TestEventDrivenEntryPoints: - """Tests for event-driven entry points wired through AgentRuntime.""" - - def _make_graph_and_goal(self): - """Minimal graph + goal for testing entry point triggering.""" - from framework.graph import Goal - from framework.graph.edge import GraphSpec - from framework.graph.goal import SuccessCriterion - from framework.graph.node import NodeSpec - - nodes = [ - NodeSpec( - id="process-event", - name="Process Event", - description="Process incoming event", - node_type="event_loop", - input_keys=["event"], - output_keys=["result"], - ), - ] - graph = GraphSpec( - id="test-graph", - goal_id="test-goal", - version="1.0.0", - entry_node="process-event", - entry_points={"start": "process-event"}, - terminal_nodes=[], - pause_nodes=[], - nodes=nodes, - edges=[], - ) - goal = Goal( - id="test-goal", - name="Test Goal", - description="Test", - success_criteria=[ - SuccessCriterion( - id="sc-1", - description="Done", - metric="done", - target="yes", - weight=1.0, - ), - ], - ) - return graph, goal - - @pytest.mark.asyncio - async def test_event_entry_point_subscribes_to_bus(self): - """Entry point with trigger_type='event' subscribes and triggers on matching events.""" - graph, goal = self._make_graph_and_goal() - - config = AgentRuntimeConfig( - webhook_host="127.0.0.1", - webhook_port=0, - webhook_routes=[ - {"source_id": "gh", "path": "/webhooks/github"}, - ], - ) - - with tempfile.TemporaryDirectory() as tmpdir: - runtime = AgentRuntime( - graph=graph, - goal=goal, - storage_path=Path(tmpdir), - config=config, - ) - - runtime.register_entry_point( - EntryPointSpec( - id="gh-handler", - name="GitHub Handler", - entry_node="process-event", - trigger_type="event", - trigger_config={ - "event_types": ["webhook_received"], - "filter_stream": "gh", - }, - ) - ) - - trigger_calls = [] - - async def mock_trigger(ep_id, data, **kwargs): - trigger_calls.append((ep_id, data)) - - with patch.object(runtime, "trigger", side_effect=mock_trigger): - await runtime.start() - - try: - assert runtime.webhook_server is not None - assert runtime.webhook_server.is_running - - port = runtime.webhook_server.port - async with aiohttp.ClientSession() as session: - async with session.post( - f"http://127.0.0.1:{port}/webhooks/github", - json={"action": "push", "ref": "main"}, - ) as resp: - assert resp.status == 202 - - await asyncio.sleep(0.1) - - assert len(trigger_calls) == 1 - ep_id, data = trigger_calls[0] - assert ep_id == "gh-handler" - assert "event" in data - assert data["event"]["type"] == "webhook_received" - assert data["event"]["stream_id"] == "gh" - assert data["event"]["data"]["payload"] == { - "action": "push", - "ref": "main", - } - finally: - await runtime.stop() - - assert runtime.webhook_server is None - - @pytest.mark.asyncio - async def test_event_entry_point_filter_stream(self): - """Entry point only triggers for matching stream_id (source_id).""" - graph, goal = self._make_graph_and_goal() - - config = AgentRuntimeConfig( - webhook_routes=[ - {"source_id": "github", "path": "/webhooks/github"}, - {"source_id": "stripe", "path": "/webhooks/stripe"}, - ], - webhook_port=0, - ) - - with tempfile.TemporaryDirectory() as tmpdir: - runtime = AgentRuntime( - graph=graph, - goal=goal, - storage_path=Path(tmpdir), - config=config, - ) - - runtime.register_entry_point( - EntryPointSpec( - id="gh-only", - name="GitHub Only", - entry_node="process-event", - trigger_type="event", - trigger_config={ - "event_types": ["webhook_received"], - "filter_stream": "github", - }, - ) - ) - - trigger_calls = [] - - async def mock_trigger(ep_id, data, **kwargs): - trigger_calls.append((ep_id, data)) - - with patch.object(runtime, "trigger", side_effect=mock_trigger): - await runtime.start() - - try: - port = runtime.webhook_server.port - async with aiohttp.ClientSession() as session: - # POST to stripe — should NOT trigger - await session.post( - f"http://127.0.0.1:{port}/webhooks/stripe", - json={"type": "payment"}, - ) - # POST to github — should trigger - await session.post( - f"http://127.0.0.1:{port}/webhooks/github", - json={"action": "opened"}, - ) - - await asyncio.sleep(0.1) - - assert len(trigger_calls) == 1 - assert trigger_calls[0][0] == "gh-only" - finally: - await runtime.stop() - - @pytest.mark.asyncio - async def test_no_webhook_routes_skips_server(self): - """Runtime without webhook_routes does not start a webhook server.""" - graph, goal = self._make_graph_and_goal() - - with tempfile.TemporaryDirectory() as tmpdir: - runtime = AgentRuntime( - graph=graph, - goal=goal, - storage_path=Path(tmpdir), - ) - - runtime.register_entry_point( - EntryPointSpec( - id="manual", - name="Manual", - entry_node="process-event", - trigger_type="manual", - ) - ) - - await runtime.start() - try: - assert runtime.webhook_server is None - finally: - await runtime.stop() - - @pytest.mark.asyncio - async def test_event_entry_point_custom_event(self): - """Entry point can subscribe to CUSTOM events, not just webhooks.""" - graph, goal = self._make_graph_and_goal() - - with tempfile.TemporaryDirectory() as tmpdir: - runtime = AgentRuntime( - graph=graph, - goal=goal, - storage_path=Path(tmpdir), - ) - - runtime.register_entry_point( - EntryPointSpec( - id="custom-handler", - name="Custom Handler", - entry_node="process-event", - trigger_type="event", - trigger_config={ - "event_types": ["custom"], - }, - ) - ) - - trigger_calls = [] - - async def mock_trigger(ep_id, data, **kwargs): - trigger_calls.append((ep_id, data)) - - with patch.object(runtime, "trigger", side_effect=mock_trigger): - await runtime.start() - - try: - await runtime.event_bus.publish( - AgentEvent( - type=EventType.CUSTOM, - stream_id="some-source", - data={"key": "value"}, - ) - ) - - await asyncio.sleep(0.1) - - assert len(trigger_calls) == 1 - assert trigger_calls[0][0] == "custom-handler" - assert trigger_calls[0][1]["event"]["type"] == "custom" - assert trigger_calls[0][1]["event"]["data"]["key"] == "value" - finally: - await runtime.stop() diff --git a/core/framework/schemas/agent_config.py b/core/framework/schemas/agent_config.py new file mode 100644 index 00000000..7c65c844 --- /dev/null +++ b/core/framework/schemas/agent_config.py @@ -0,0 +1,192 @@ +"""Declarative agent configuration schema. + +Allows defining agents via JSON/YAML config files instead of Python modules. +The ``AgentConfig`` model is the top-level schema loaded from ``agent.json``. +The runner detects this format by checking for a ``name`` key at the top level. + +Template variables +------------------ +System prompts and identity_prompt support ``{{variable_name}}`` placeholders. +These are resolved at load time from ``AgentConfig.variables``. +""" + +from __future__ import annotations + +from pydantic import BaseModel, Field + + +class ToolAccessConfig(BaseModel): + """Declarative tool access policy. + + Controls which tools a node/agent has access to. + + * ``all`` -- every tool from the registry. + * ``explicit`` -- only tools listed in ``allowed`` (default; empty = zero tools). + * ``none`` -- no tools at all. + """ + + policy: str = Field( + default="explicit", + description="One of: 'all', 'explicit', 'none'.", + ) + allowed: list[str] = Field( + default_factory=list, + description="Tool names when policy='explicit'.", + ) + denied: list[str] = Field( + default_factory=list, + description="Tool names to deny (applied after allowed).", + ) + + +class NodeConfig(BaseModel): + """Declarative node definition.""" + + id: str + name: str | None = None + description: str | None = None + node_type: str = Field( + default="event_loop", + description="event_loop", + ) + system_prompt: str | None = None + tools: ToolAccessConfig = Field(default_factory=ToolAccessConfig) + model: str | None = None + input_keys: list[str] = Field(default_factory=list) + output_keys: list[str] = Field(default_factory=list) + nullable_output_keys: list[str] = Field(default_factory=list) + max_iterations: int = 30 + max_node_visits: int = 1 + client_facing: bool = False + success_criteria: str | None = None + failure_criteria: str | None = None + skip_judge: bool = False + max_retries: int | None = None + + +class EdgeConfig(BaseModel): + """Declarative edge definition.""" + + from_node: str = Field(description="Source node ID.") + to_node: str = Field(description="Target node ID.") + condition: str = Field( + default="on_success", + description="always | on_success | on_failure | conditional | llm_decide", + ) + condition_expr: str | None = None + input_mapping: dict[str, str] = Field(default_factory=dict) + priority: int = 1 + + +class GoalConfig(BaseModel): + """Simplified goal definition for declarative config.""" + + description: str + success_criteria: list[str] = Field(default_factory=list) + constraints: list[str] = Field(default_factory=list) + + +class EntryPointConfig(BaseModel): + """Entry point configuration.""" + + id: str = "default" + name: str = "Default" + entry_node: str | None = None # defaults to AgentConfig.entry_node + trigger_type: str = Field( + default="manual", + description="manual | scheduled | timer", + ) + trigger_config: dict = Field(default_factory=dict) + isolation_level: str = "shared" + max_concurrent: int | None = None + + +class MCPServerRef(BaseModel): + """Reference to an MCP server to connect for this agent.""" + + name: str + config: dict | None = None + + +class MetadataConfig(BaseModel): + """Agent metadata for display / intro messages.""" + + intro_message: str = "" + + +class AgentConfig(BaseModel): + """Top-level declarative agent configuration. + + Load from ``agent.json`` and pass to + :func:`framework.runner.runner.load_agent_config` to build the + ``GraphSpec`` + ``Goal`` pair. + + Example (YAML):: + + name: lead-enrichment-agent + version: 1.0.0 + variables: + spreadsheet_id: "1ZVx..." + sheet_name: "contacts" + goal: + description: "Enrich leads in Google Sheets" + success_criteria: + - "All unprocessed leads enriched" + constraints: + - "Browser-only research" + identity_prompt: | + You are the Lead Enrichment Agent... + nodes: + - id: start + tools: {policy: explicit, allowed: [google_sheets_get_values]} + system_prompt: | + Spreadsheet ID: {{spreadsheet_id}} + ... + """ + + name: str + version: str = "1.0.0" + description: str | None = None + metadata: MetadataConfig = Field(default_factory=MetadataConfig) + + # Template variables -- substituted into prompts via {{var_name}} + variables: dict[str, str] = Field(default_factory=dict) + + # Goal + goal: GoalConfig + + # Graph structure + nodes: list[NodeConfig] + edges: list[EdgeConfig] + entry_node: str + terminal_nodes: list[str] = Field(default_factory=list) + pause_nodes: list[str] = Field(default_factory=list) + + # Entry points (if omitted, a single "default" manual entry is created) + entry_points: list[EntryPointConfig] = Field(default_factory=list) + + # Agent-level tool defaults (nodes inherit unless they override) + tools: ToolAccessConfig = Field(default_factory=ToolAccessConfig) + mcp_servers: list[MCPServerRef] = Field(default_factory=list) + + # LLM / execution + model: str | None = None + max_tokens: int = 4096 + conversation_mode: str = "continuous" + identity_prompt: str = "" + loop_config: dict = Field( + default_factory=lambda: { + "max_iterations": 100, + "max_tool_calls_per_turn": 30, + "max_context_tokens": 32000, + }, + ) + + # Pipeline overrides (per-agent, merged with global config) + pipeline: dict = Field( + default_factory=dict, + description="Per-agent pipeline stage overrides. Same format as global pipeline config.", + ) + + # Resource limits + max_cost_per_run: float | None = None diff --git a/core/framework/schemas/session_state.py b/core/framework/schemas/session_state.py index 7b143985..4fcecd7c 100644 --- a/core/framework/schemas/session_state.py +++ b/core/framework/schemas/session_state.py @@ -12,7 +12,7 @@ from typing import TYPE_CHECKING, Any from pydantic import AliasChoices, BaseModel, Field, computed_field if TYPE_CHECKING: - from framework.graph.executor import ExecutionResult + from framework.orchestrator.orchestrator import ExecutionResult from framework.schemas.run import Run diff --git a/core/framework/server/app.py b/core/framework/server/app.py index 32c2c081..e014f01d 100644 --- a/core/framework/server/app.py +++ b/core/framework/server/app.py @@ -28,8 +28,11 @@ def _get_allowed_agent_roots() -> tuple[Path, ...]: """ global _ALLOWED_AGENT_ROOTS if _ALLOWED_AGENT_ROOTS is None: + from framework.config import COLONIES_DIR + _ALLOWED_AGENT_ROOTS = ( - (_REPO_ROOT / "exports").resolve(), + COLONIES_DIR.resolve(), # ~/.hive/colonies/ + (_REPO_ROOT / "exports").resolve(), # compat fallback (_REPO_ROOT / "examples").resolve(), (Path.home() / ".hive" / "agents").resolve(), ) @@ -53,7 +56,8 @@ def validate_agent_path(agent_path: str | Path) -> Path: if resolved.is_relative_to(root) and resolved != root: return resolved raise ValueError( - "agent_path must be inside an allowed directory (exports/, examples/, or ~/.hive/agents/)" + "agent_path must be inside an allowed directory " + "(~/.hive/colonies/, exports/, examples/, or ~/.hive/agents/)" ) diff --git a/core/framework/server/queen_orchestrator.py b/core/framework/server/queen_orchestrator.py index 43f5c5dd..4022a5ab 100644 --- a/core/framework/server/queen_orchestrator.py +++ b/core/framework/server/queen_orchestrator.py @@ -32,7 +32,7 @@ async def create_queen( """ from framework.agents.queen.agent import ( queen_goal, - queen_graph as _queen_graph, + queen_loop_config as _base_loop_config, ) from framework.agents.queen.nodes import ( _QUEEN_BUILDING_TOOLS, @@ -65,18 +65,15 @@ async def create_queen( _shared_building_knowledge, ) from framework.agents.queen.nodes.thinking_hook import select_expert_persona - from framework.graph.event_loop_node import HookContext, HookResult - from framework.graph.executor import GraphExecutor - from framework.runner.mcp_registry import MCPRegistry - from framework.runner.tool_registry import ToolRegistry - from framework.runtime.core import Runtime - from framework.runtime.event_bus import AgentEvent, EventType + from framework.agent_loop.agent_loop import HookContext, HookResult + from framework.loader.mcp_registry import MCPRegistry + from framework.loader.tool_registry import ToolRegistry + from framework.host.event_bus import AgentEvent, EventType from framework.tools.queen_lifecycle_tools import ( QueenPhaseState, register_queen_lifecycle_tools, ) - hive_home = Path.home() / ".hive" # ---- Tool registry ------------------------------------------------ queen_registry = ToolRegistry() @@ -194,7 +191,7 @@ async def create_queen( phase_state.global_memory_dir = global_dir # ---- Compose phase-specific prompts ------------------------------ - _orig_node = _queen_graph.nodes[0] + from framework.agents.queen.nodes import queen_node as _orig_node if worker_identity is None: worker_identity = ( @@ -348,61 +345,81 @@ async def create_queen( if set(available_tools) != set(declared_tools): missing = sorted(set(declared_tools) - registered_tool_names) if missing: - logger.warning("Queen: tools not available: %s", missing) + logger.debug("Queen: tools not yet available (registered on worker load): %s", missing) node_updates["tools"] = available_tools adjusted_node = _orig_node.model_copy(update=node_updates) _queen_loop_config = { - **(_queen_graph.loop_config or {}), + **_base_loop_config, "hooks": {"session_start": [_persona_hook]}, } - queen_graph = _queen_graph.model_copy( - update={"nodes": [adjusted_node], "loop_config": _queen_loop_config} - ) - # ---- Queen event loop -------------------------------------------- - queen_runtime = Runtime(hive_home / "queen") + # ---- Queen event loop (AgentLoop directly, no Orchestrator) ------- + from types import SimpleNamespace + + from framework.agent_loop.agent_loop import AgentLoop, LoopConfig + from framework.storage.conversation_store import FileConversationStore + from framework.orchestrator.node import DataBuffer, NodeContext async def _queen_loop(): logger.debug("[_queen_loop] Starting queen loop for session %s", session.id) try: - logger.debug("[_queen_loop] Creating GraphExecutor...") - executor = GraphExecutor( - runtime=queen_runtime, - llm=session.llm, - tools=queen_tools, - tool_executor=queen_tool_executor, + # Build LoopConfig from the queen graph's config + persona hook + lc = _queen_loop_config + queen_loop_config = LoopConfig( + max_iterations=lc.get("max_iterations", 999_999), + max_tool_calls_per_turn=lc.get("max_tool_calls_per_turn", 30), + max_context_tokens=lc.get("max_context_tokens", 180_000), + hooks=lc.get("hooks", {}), + ) + + # Create AgentLoop directly -- no Orchestrator, no graph traversal + agent_loop = AgentLoop( event_bus=session.event_bus, + config=queen_loop_config, + tool_executor=queen_tool_executor, + conversation_store=FileConversationStore(queen_dir / "conversations"), + ) + + # Build NodeContext manually + from framework.tracker.decision_tracker import DecisionTracker + + ctx = NodeContext( + runtime=DecisionTracker(queen_dir), + node_id="queen", + node_spec=adjusted_node, + buffer=DataBuffer(), + llm=session.llm, + available_tools=queen_tools, + goal_context=queen_goal.description, + max_tokens=lc.get("max_tokens", 8192), stream_id="queen", - storage_path=queen_dir, - loop_config=_queen_loop_config, execution_id=session.id, dynamic_tools_provider=phase_state.get_current_tools, dynamic_prompt_provider=phase_state.get_current_prompt, iteration_metadata_provider=lambda: {"phase": phase_state.phase}, - skill_dirs=_queen_skill_dirs, - protocols_prompt=phase_state.protocols_prompt, skills_catalog_prompt=phase_state.skills_catalog_prompt, + protocols_prompt=phase_state.protocols_prompt, + skill_dirs=_queen_skill_dirs, + ) + + # Expose for chat handler injection (node_registry compat) + session.queen_executor = SimpleNamespace( + node_registry={"queen": agent_loop}, ) - session.queen_executor = executor - logger.debug("[_queen_loop] GraphExecutor created and stored in session.queen_executor") # Wire inject_notification so phase switches notify the queen LLM async def _inject_phase_notification(content: str) -> None: - node = executor.node_registry.get("queen") - if node is not None and hasattr(node, "inject_event"): - await node.inject_event(content) + await agent_loop.inject_event(content) phase_state.inject_notification = _inject_phase_notification # Auto-switch to editing when worker execution finishes. - # The worker stays loaded — queen can tweak config and re-run. async def _on_worker_done(event): if event.stream_id == "queen": return if phase_state.phase == "running": if event.type == EventType.EXECUTION_COMPLETED: - # Mark worker as configured after first successful run session.worker_configured = True output = event.data.get("output", {}) output_summary = "" @@ -420,7 +437,7 @@ async def create_queen( "Ask if they want to re-run with different input " "or tweak the configuration." ) - else: # EXECUTION_FAILED + else: error = event.data.get("error", "Unknown error") notification = ( "[WORKER_TERMINAL] Worker failed.\n" @@ -430,17 +447,14 @@ async def create_queen( "building/planning if code changes are needed." ) - node = executor.node_registry.get("queen") - if node is not None and hasattr(node, "inject_event"): - await node.inject_event(notification) - + await agent_loop.inject_event(notification) await phase_state.switch_to_editing(source="auto") session.event_bus.subscribe( event_types=[EventType.EXECUTION_COMPLETED, EventType.EXECUTION_FAILED], handler=_on_worker_done, ) - session_manager._subscribe_worker_handoffs(session, executor) + session_manager._subscribe_worker_handoffs(session, session.queen_executor) # ---- Global memory reflection + recall ------------------------- from framework.agents.queen.reflection_agent import subscribe_reflection_triggers @@ -459,23 +473,23 @@ async def create_queen( len(phase_state.get_current_tools()), [t.name for t in phase_state.get_current_tools()], ) - logger.debug("[_queen_loop] Calling executor.execute()...") - result = await executor.execute( - graph=queen_graph, - goal=queen_goal, - input_data={"greeting": initial_prompt or "Session started."}, - session_state={"resume_session_id": session.id}, - ) - logger.debug( - "[_queen_loop] executor.execute() returned with success=%s", result.success - ) - if result.success: - logger.warning("Queen executor returned (should be forever-alive)") - else: - logger.error( - "Queen executor failed: %s", - result.error or "(no error message)", - ) + + # Set the first user message. + # When initial_prompt is None (user opens UI without ?prompt=), + # use a generic greeting so the queen has a user message to + # respond to. The user's real first question arrives via /chat. + ctx.input_data = { + "user_request": initial_prompt or "Hello", + } + + # Run the queen -- forever-alive conversation loop + result = await agent_loop.execute(ctx) + + if result.stop_reason == "complete": + logger.warning("Queen returned (should be forever-alive)") + elif result.error: + logger.error("Queen failed: %s", result.error) + except asyncio.CancelledError: logger.info("[_queen_loop] Queen loop cancelled (normal shutdown)") raise @@ -484,7 +498,8 @@ async def create_queen( raise finally: logger.warning( - "[_queen_loop] Queen loop exiting — clearing queen_executor for session '%s'", + "[_queen_loop] Queen loop exiting — clearing queen_executor " + "for session '%s'", session.id, ) session.queen_executor = None diff --git a/core/framework/server/routes_events.py b/core/framework/server/routes_events.py index 3ef77428..f0c1616e 100644 --- a/core/framework/server/routes_events.py +++ b/core/framework/server/routes_events.py @@ -6,7 +6,7 @@ import logging from aiohttp import web from aiohttp.client_exceptions import ClientConnectionResetError as _AiohttpConnReset -from framework.runtime.event_bus import AgentEvent, EventType +from framework.host.event_bus import AgentEvent, EventType from framework.server.app import resolve_session logger = logging.getLogger(__name__) diff --git a/core/framework/server/routes_execution.py b/core/framework/server/routes_execution.py index f757746b..5efa9c94 100644 --- a/core/framework/server/routes_execution.py +++ b/core/framework/server/routes_execution.py @@ -8,7 +8,7 @@ from typing import Any from aiohttp import web from framework.credentials.validation import validate_agent_credentials -from framework.graph.conversation import LEGACY_RUN_ID +from framework.agent_loop.conversation import LEGACY_RUN_ID from framework.server.app import resolve_session, safe_path_segment, sessions_dir from framework.server.routes_sessions import _credential_error_response @@ -187,7 +187,7 @@ async def handle_chat(request: web.Request) -> web.Response: if node is not None and hasattr(node, "inject_event"): # Publish BEFORE inject_event so handlers (e.g. memory recall) # complete before the event loop unblocks and starts the LLM turn. - from framework.runtime.event_bus import AgentEvent, EventType + from framework.host.event_bus import AgentEvent, EventType await session.event_bus.publish( AgentEvent( diff --git a/core/framework/server/routes_graphs.py b/core/framework/server/routes_graphs.py index 3b735f5f..16ce63d6 100644 --- a/core/framework/server/routes_graphs.py +++ b/core/framework/server/routes_graphs.py @@ -46,7 +46,7 @@ def _node_to_dict(node) -> dict: "client_facing": node.client_facing, "success_criteria": node.success_criteria, "system_prompt": node.system_prompt or "", - "sub_agents": node.sub_agents, + "sub_agents": getattr(node, "sub_agents", []), } diff --git a/core/framework/server/routes_sessions.py b/core/framework/server/routes_sessions.py index 04f71b08..ab98b3fb 100644 --- a/core/framework/server/routes_sessions.py +++ b/core/framework/server/routes_sessions.py @@ -527,7 +527,7 @@ async def handle_update_trigger_task(request: web.Request) -> web.Response: # Emit SSE event so the frontend updates the graph and detail panel bus = getattr(session, "event_bus", None) if bus: - from framework.runtime.event_bus import AgentEvent, EventType + from framework.host.event_bus import AgentEvent, EventType await bus.publish( AgentEvent( @@ -583,7 +583,9 @@ async def handle_session_events_history(request: web.Request) -> web.Response: """ session_id = request.match_info["session_id"] - queen_dir = Path.home() / ".hive" / "queen" / "session" / session_id + from framework.server.session_manager import _queen_session_dir + + queen_dir = _queen_session_dir(session_id) events_path = queen_dir / "events.jsonl" if not events_path.exists(): return web.json_response({"events": [], "session_id": session_id}) @@ -608,7 +610,7 @@ async def handle_session_events_history(request: web.Request) -> web.Response: async def handle_session_history(request: web.Request) -> web.Response: """GET /api/sessions/history — all queen sessions on disk (live + cold). - Returns every session directory under ~/.hive/queen/session/, newest first. + Returns every queen session directory on disk, newest first. Live sessions have ``live: true, cold: false``; sessions that survived a server restart have ``live: false, cold: true``. """ @@ -634,7 +636,7 @@ async def handle_delete_history_session(request: web.Request) -> web.Response: """DELETE /api/sessions/history/{session_id} — permanently remove a session. Stops the live session (if still running) and deletes the queen session - directory from disk at ~/.hive/queen/session/{session_id}/. + directory from disk. This is the frontend 'delete from history' action. """ manager = _get_manager(request) @@ -645,7 +647,9 @@ async def handle_delete_history_session(request: web.Request) -> web.Response: await manager.stop_session(session_id) # Delete the queen session directory from disk - queen_session_dir = Path.home() / ".hive" / "queen" / "session" / session_id + from framework.server.session_manager import _queen_session_dir + + queen_session_dir = _queen_session_dir(session_id) if queen_session_dir.exists() and queen_session_dir.is_dir(): try: shutil.rmtree(queen_session_dir) @@ -743,7 +747,9 @@ async def handle_reveal_session_folder(request: web.Request) -> web.Response: session = manager.get_session(session_id) storage_session_id = (session.queen_resume_from or session.id) if session else session_id - folder = Path.home() / ".hive" / "queen" / "session" / storage_session_id + from framework.server.session_manager import _queen_session_dir + + folder = _queen_session_dir(storage_session_id) folder.mkdir(parents=True, exist_ok=True) try: diff --git a/core/framework/server/session_manager.py b/core/framework/server/session_manager.py index 25e3961f..e88ae445 100644 --- a/core/framework/server/session_manager.py +++ b/core/framework/server/session_manager.py @@ -19,11 +19,17 @@ from datetime import datetime from pathlib import Path from typing import Any -from framework.runtime.triggers import TriggerDefinition +from framework.config import QUEENS_DIR +from framework.host.triggers import TriggerDefinition logger = logging.getLogger(__name__) +def _queen_session_dir(session_id: str, queen_name: str = "default") -> Path: + """Return the on-disk directory for a queen session.""" + return QUEENS_DIR / queen_name / "sessions" / session_id + + @dataclass class Session: """A live session with a queen and optional worker.""" @@ -67,6 +73,10 @@ class Session: queen_resume_from: str | None = None # Queen session directory (set during _start_queen, used for shutdown reflection) queen_dir: Path | None = None + # Multi-queen support: which queen profile this session uses + queen_name: str = "default" + # Colony name: set when a worker is loaded from a colony + colony_name: str | None = None class SessionManager: @@ -86,6 +96,14 @@ class SessionManager: # reflections) so they aren't garbage-collected before completion. self._background_tasks: set[asyncio.Task] = set() + # Run one-time v2 directory structure migration + from framework.storage.migrate_v2 import run_migration + + try: + run_migration() + except Exception: + logger.warning("v2 migration failed (non-fatal)", exc_info=True) + # ------------------------------------------------------------------ # Session lifecycle # ------------------------------------------------------------------ @@ -100,7 +118,7 @@ class SessionManager: Internal helper — use create_session() or create_session_with_worker_graph(). """ from framework.config import RuntimeConfig, get_hive_config - from framework.runtime.event_bus import EventBus + from framework.host.event_bus import EventBus ts = datetime.now().strftime("%Y%m%d_%H%M%S") resolved_id = session_id or f"session_{ts}_{uuid.uuid4().hex[:8]}" @@ -194,9 +212,7 @@ class SessionManager: # is incomplete and will fail to import). if queen_resume_from: _resume_phase = None - _meta_path = ( - Path.home() / ".hive" / "queen" / "session" / queen_resume_from / "meta.json" - ) + _meta_path = _queen_session_dir(queen_resume_from) / "meta.json" if _meta_path.exists(): try: _meta = json.loads(_meta_path.read_text(encoding="utf-8")) @@ -281,7 +297,7 @@ class SessionManager: Sets up the runner, runtime, and session fields. Does NOT notify the queen — callers handle that step. """ - from framework.runner import AgentRunner + from framework.loader import AgentLoader agent_path = Path(agent_path) resolved_graph_id = graph_id or agent_path.name @@ -303,7 +319,7 @@ class SessionManager: resolved_model = model or session_model or self._model runner = await loop.run_in_executor( None, - lambda: AgentRunner.load( + lambda: AgentLoader.load( agent_path, model=resolved_model, interactive=False, @@ -536,7 +552,7 @@ class SessionManager: # Update meta.json so cold-restore can discover this session by agent_path storage_session_id = session.queen_resume_from or session.id - meta_path = Path.home() / ".hive" / "queen" / "session" / storage_session_id / "meta.json" + meta_path = _queen_session_dir(storage_session_id, session.queen_name) / "meta.json" try: _agent_name = ( session.worker_info.name @@ -644,10 +660,11 @@ class SessionManager: task = asyncio.create_task( asyncio.shield(run_shutdown_reflection(session.queen_dir, session.llm)), + name=f"shutdown-reflect-{session_id}", ) + logger.info("Session '%s': shutdown reflection spawned", session_id) self._background_tasks.add(task) task.add_done_callback(self._background_tasks.discard) - logger.info("Session '%s': shutdown reflection spawned", session_id) except Exception: logger.warning( "Session '%s': failed to spawn shutdown reflection", session_id, exc_info=True @@ -721,7 +738,7 @@ class SessionManager: def _subscribe_worker_handoffs(self, session: Session, executor: Any) -> None: """Subscribe queen to worker/subagent escalation handoff events.""" - from framework.runtime.event_bus import EventType as _ET + from framework.host.event_bus import EventType as _ET if session.worker_handoff_sub is not None: session.event_bus.unsubscribe(session.worker_handoff_sub) @@ -755,13 +772,11 @@ class SessionManager: session.queen_executor, ) - hive_home = Path.home() / ".hive" - # Determine which session directory to use for queen storage. # When queen_resume_from is set we write to the ORIGINAL session's # directory so that all messages accumulate in one place. storage_session_id = session.queen_resume_from or session.id - queen_dir = hive_home / "queen" / "session" / storage_session_id + queen_dir = _queen_session_dir(storage_session_id, session.queen_name) queen_dir.mkdir(parents=True, exist_ok=True) session.queen_dir = queen_dir @@ -920,7 +935,7 @@ class SessionManager: async def _emit_graph_loaded(self, session: Session) -> None: """Publish a WORKER_GRAPH_LOADED event so the frontend can update.""" - from framework.runtime.event_bus import AgentEvent, EventType + from framework.host.event_bus import AgentEvent, EventType info = session.worker_info await session.event_bus.publish( @@ -939,7 +954,7 @@ class SessionManager: async def _emit_flowchart_on_restore(self, session: Session, agent_path: str | Path) -> None: """Emit FLOWCHART_MAP_UPDATED from persisted flowchart file on cold restore.""" - from framework.runtime.event_bus import AgentEvent, EventType + from framework.host.event_bus import AgentEvent, EventType from framework.tools.flowchart_utils import load_flowchart_file original_draft, flowchart_map = load_flowchart_file(agent_path) @@ -982,7 +997,7 @@ class SessionManager: triggers: dict[str, TriggerDefinition], ) -> None: """Emit TRIGGER_AVAILABLE or TRIGGER_REMOVED events for each trigger.""" - from framework.runtime.event_bus import AgentEvent, EventType + from framework.host.event_bus import AgentEvent, EventType event_type = ( EventType.TRIGGER_AVAILABLE if kind == "available" else EventType.TRIGGER_REMOVED @@ -1076,10 +1091,10 @@ class SessionManager: """Return disk metadata for a session that is no longer live in memory. Checks whether queen conversation files exist at - ~/.hive/queen/session/{session_id}/conversations/. Returns None when + ~/.hive/agents/queens/{name}/sessions/{session_id}/conversations/. Returns None when no data is found so callers can fall through to a 404. """ - queen_dir = Path.home() / ".hive" / "queen" / "session" / session_id + queen_dir = _queen_session_dir(session_id) convs_dir = queen_dir / "conversations" if not convs_dir.exists(): return None @@ -1134,7 +1149,7 @@ class SessionManager: @staticmethod def list_cold_sessions() -> list[dict]: """Return metadata for every queen session directory on disk, newest first.""" - queen_sessions_dir = Path.home() / ".hive" / "queen" / "session" + queen_sessions_dir = QUEENS_DIR / "default" / "sessions" if not queen_sessions_dir.exists(): return [] diff --git a/core/framework/server/tests/test_api.py b/core/framework/server/tests/test_api.py index ef7cc905..e7e60081 100644 --- a/core/framework/server/tests/test_api.py +++ b/core/framework/server/tests/test_api.py @@ -14,7 +14,7 @@ from unittest.mock import AsyncMock, MagicMock import pytest from aiohttp.test_utils import TestClient, TestServer -from framework.runtime.triggers import TriggerDefinition +from framework.host.triggers import TriggerDefinition from framework.server.app import create_app from framework.server.session_manager import Session @@ -1055,7 +1055,7 @@ class TestNodeCriteria: nodes, edges = nodes_and_edges # Create a real RuntimeLogStore pointed at the temp agent dir - from framework.runtime.runtime_log_store import RuntimeLogStore + from framework.tracker.runtime_log_store import RuntimeLogStore log_store = RuntimeLogStore(base) @@ -1110,7 +1110,7 @@ class TestLogs: session_id, session_dir, state = sample_session tmp_path, agent_name, base = tmp_agent_dir - from framework.runtime.runtime_log_store import RuntimeLogStore + from framework.tracker.runtime_log_store import RuntimeLogStore log_store = RuntimeLogStore(base) session = _make_session( @@ -1132,7 +1132,7 @@ class TestLogs: session_id, session_dir, state = custom_id_session tmp_path, agent_name, base = tmp_agent_dir - from framework.runtime.runtime_log_store import RuntimeLogStore + from framework.tracker.runtime_log_store import RuntimeLogStore log_store = RuntimeLogStore(base) session = _make_session( @@ -1154,7 +1154,7 @@ class TestLogs: session_id, session_dir, state = sample_session tmp_path, agent_name, base = tmp_agent_dir - from framework.runtime.runtime_log_store import RuntimeLogStore + from framework.tracker.runtime_log_store import RuntimeLogStore log_store = RuntimeLogStore(base) session = _make_session( @@ -1177,7 +1177,7 @@ class TestLogs: session_id, session_dir, state = sample_session tmp_path, agent_name, base = tmp_agent_dir - from framework.runtime.runtime_log_store import RuntimeLogStore + from framework.tracker.runtime_log_store import RuntimeLogStore log_store = RuntimeLogStore(base) session = _make_session( @@ -1201,7 +1201,7 @@ class TestLogs: session_id, session_dir, state = sample_session tmp_path, agent_name, base = tmp_agent_dir - from framework.runtime.runtime_log_store import RuntimeLogStore + from framework.tracker.runtime_log_store import RuntimeLogStore log_store = RuntimeLogStore(base) session = _make_session( @@ -1227,7 +1227,7 @@ class TestNodeLogs: tmp_path, agent_name, base = tmp_agent_dir nodes, edges = nodes_and_edges - from framework.runtime.runtime_log_store import RuntimeLogStore + from framework.tracker.runtime_log_store import RuntimeLogStore log_store = RuntimeLogStore(base) session = _make_session( @@ -1256,7 +1256,7 @@ class TestNodeLogs: @pytest.mark.asyncio async def test_node_logs_missing_session_id(self, nodes_and_edges): nodes, edges = nodes_and_edges - from framework.runtime.runtime_log_store import RuntimeLogStore + from framework.tracker.runtime_log_store import RuntimeLogStore log_store = RuntimeLogStore(Path("/tmp/dummy")) session = _make_session(nodes=nodes, edges=edges, log_store=log_store) diff --git a/core/framework/skills/_default_skills/browser-automation/SKILL.md b/core/framework/skills/_default_skills/browser-automation/SKILL.md new file mode 100644 index 00000000..bc481fc4 --- /dev/null +++ b/core/framework/skills/_default_skills/browser-automation/SKILL.md @@ -0,0 +1,80 @@ +--- +name: hive.browser-automation +description: Best practices for browser automation via gcu-tools MCP server (reading pages, navigation, scrolling, tab management, shadow DOM, coordinates). +metadata: + author: hive + type: default-skill +--- + +## Operational Protocol: Browser Automation + +Follow these rules for reliable, efficient browser interaction. + +### Reading Pages +- ALWAYS prefer `browser_snapshot` over `browser_get_text("body")` -- it returns a compact ~1-5 KB accessibility tree vs 100+ KB of raw HTML. +- Interaction tools (`browser_click`, `browser_type`, `browser_fill`, `browser_scroll`, etc.) return a page snapshot automatically in their result. Use it to decide your next action -- do NOT call `browser_snapshot` separately after every action. Only call `browser_snapshot` when you need a fresh view without performing an action, or after setting `auto_snapshot=false`. +- Do NOT use `browser_screenshot` to read text -- use `browser_snapshot` for that (compact, searchable, fast). +- DO use `browser_screenshot` when you need visual context: charts, images, canvas elements, layout verification, or when the snapshot doesn't capture what you need. +- Only fall back to `browser_get_text` for extracting specific small elements by CSS selector. + +### Navigation & Waiting +- `browser_navigate` and `browser_open` already wait for the page to load. Do NOT call `browser_wait` with no arguments after navigation -- it wastes time. Only use `browser_wait` when you need a *specific element* or *text* to appear (pass `selector` or `text`). +- NEVER re-navigate to the same URL after scrolling -- this resets your scroll position and loses loaded content. + +### Scrolling +- Use large scroll amounts ~2000 when loading more content -- sites like twitter and linkedin have lazy loading for paging. +- The scroll result includes a snapshot automatically -- no need to call `browser_snapshot` separately. + +### Batching Actions +- You can call multiple tools in a single turn -- they execute in parallel. ALWAYS batch independent actions together. Examples: fill multiple form fields in one turn, navigate + snapshot in one turn, click + scroll if targeting different elements. +- When batching, set `auto_snapshot=false` on all but the last action to avoid redundant snapshots. +- Aim for 3-5 tool calls per turn minimum. One tool call per turn is wasteful. + +### Error Recovery +- If a tool fails, retry once with the same approach. +- If it fails a second time, STOP retrying and switch approach. +- If `browser_snapshot` fails, try `browser_get_text` with a specific small selector as fallback. +- If `browser_open` fails or page seems stale, `browser_stop`, then `browser_start`, then retry. + +### Tab Management +**Close tabs as soon as you are done with them** -- not only at the end of the task. After reading or extracting data from a tab, close it immediately. + +- Finished reading/extracting from a tab? `browser_close(target_id=...)` +- Completed a multi-tab workflow? `browser_close_finished()` to clean up all your tabs +- More than 3 tabs open? Stop and close finished ones before opening more +- Popup appeared that you didn't need? Close it immediately + +`browser_tabs` returns an `origin` field for each tab: +- `"agent"` -- you opened it; you own it; close it when done +- `"popup"` -- opened by a link or script; close after extracting what you need +- `"startup"` or `"user"` -- leave these alone unless the task requires it + +Never accumulate tabs. Treat every tab you open as a resource you must free. + +### Shadow DOM & Overlays +Some sites (LinkedIn messaging, etc.) render content inside closed shadow roots invisible to regular DOM queries. + +- `browser_shadow_query("#interop-outlet >>> #msg-overlay >>> p")` -- uses `>>>` to pierce shadow roots. Returns `rect` in CSS pixels and `physicalRect` ready for coordinate tools. +- `browser_get_rect(selector="...", pierce_shadow=true)` -- get physical rect for any element including shadow DOM. + +### Coordinate System +There are THREE coordinate spaces. Using the wrong one causes clicks/hovers to land in the wrong place. + +| Space | Used by | How to get | +|---|---|---| +| Physical pixels | `browser_click_coordinate` | `browser_coords` `physical_x/y` | +| CSS pixels | `getBoundingClientRect()`, `elementFromPoint` | `browser_coords` `css_x/y` | +| Screenshot pixels | What you see in the image | Raw position in screenshot | + +**Converting screenshot to physical**: `browser_coords(x, y)` then use `physical_x/y`. +**Converting CSS to physical**: multiply by `window.devicePixelRatio` (typically 1.6 on HiDPI). +**Never** pass raw `getBoundingClientRect()` values to coordinate tools without multiplying by DPR first. + +### Login & Auth Walls +- If you see a "Log in" or "Sign up" prompt, report the auth wall immediately -- do NOT attempt to log in. +- Check for cookie consent banners and dismiss them if they block content. + +### Efficiency +- Minimize tool calls -- combine actions where possible. +- When a snapshot result is saved to a spillover file, use `run_command` with grep to extract specific data rather than re-reading the full file. +- Call `set_output` in the same turn as your last browser action when possible -- don't waste a turn. diff --git a/core/framework/skills/catalog.py b/core/framework/skills/catalog.py index 3621dbe1..08d3285e 100644 --- a/core/framework/skills/catalog.py +++ b/core/framework/skills/catalog.py @@ -64,15 +64,14 @@ class SkillCatalog: Returns empty string if no community/user skills are discovered (default skills are handled separately by DefaultSkillManager). """ - # Filter out framework-scope skills (default skills) — they're - # injected via the protocols prompt, not the catalog - community_skills = [s for s in self._skills.values() if s.source_scope != "framework"] + # All skills go through the catalog for progressive disclosure. + all_skills = list(self._skills.values()) - if not community_skills: + if not all_skills: return "" lines = [""] - for skill in sorted(community_skills, key=lambda s: s.name): + for skill in sorted(all_skills, key=lambda s: s.name): lines.append(" ") lines.append(f" {escape(skill.name)}") lines.append(f" {escape(skill.description)}") diff --git a/core/framework/skills/discovery.py b/core/framework/skills/discovery.py index 2db1a78b..cd0ab6eb 100644 --- a/core/framework/skills/discovery.py +++ b/core/framework/skills/discovery.py @@ -56,6 +56,16 @@ class SkillDiscovery: def __init__(self, config: DiscoveryConfig | None = None): self._config = config or DiscoveryConfig() + self._scanned_dirs: list[Path] = [] + + @property + def scanned_directories(self) -> list[str]: + """Return the skill directories that were scanned during discovery. + + Populated after :meth:`discover` runs. Used by the hot-reload + watcher to know which directories to monitor for changes. + """ + return [str(d) for d in self._scanned_dirs if d.exists()] def discover(self) -> list[ParsedSkill]: """Scan all scopes and return deduplicated skill list. @@ -70,11 +80,13 @@ class SkillDiscovery: Later entries override earlier ones on name collision. """ all_skills: list[ParsedSkill] = [] + self._scanned_dirs = [] # Framework scope (lowest precedence) if not self._config.skip_framework_scope: framework_dir = Path(__file__).parent / "_default_skills" if framework_dir.is_dir(): + self._scanned_dirs.append(framework_dir) all_skills.extend(self._scan_scope(framework_dir, "framework")) # User scope @@ -84,11 +96,13 @@ class SkillDiscovery: # Cross-client (lower precedence within user scope) user_agents = home / ".agents" / "skills" if user_agents.is_dir(): + self._scanned_dirs.append(user_agents) all_skills.extend(self._scan_scope(user_agents, "user")) # Hive-specific (higher precedence within user scope) user_hive = home / ".hive" / "skills" if user_hive.is_dir(): + self._scanned_dirs.append(user_hive) all_skills.extend(self._scan_scope(user_hive, "user")) # Project scope (highest precedence) @@ -98,11 +112,13 @@ class SkillDiscovery: # Cross-client project_agents = root / ".agents" / "skills" if project_agents.is_dir(): + self._scanned_dirs.append(project_agents) all_skills.extend(self._scan_scope(project_agents, "project")) # Hive-specific project_hive = root / ".hive" / "skills" if project_hive.is_dir(): + self._scanned_dirs.append(project_hive) all_skills.extend(self._scan_scope(project_hive, "project")) resolved = self._resolve_collisions(all_skills) diff --git a/core/framework/skills/manager.py b/core/framework/skills/manager.py index 9c1b4b80..5f9006fd 100644 --- a/core/framework/skills/manager.py +++ b/core/framework/skills/manager.py @@ -68,6 +68,9 @@ class SkillsManager: self._protocols_prompt: str = "" self._allowlisted_dirs: list[str] = [] self._default_mgr: object = None # DefaultSkillManager, set after load() + # Hot-reload state + self._watched_dirs: list[str] = [] + self._watcher_task: object = None # asyncio.Task, set by start_watching() # ------------------------------------------------------------------ # Factory for backwards-compat bridge @@ -117,62 +120,140 @@ class SkillsManager: skills_config = self._config.skills_config - # 1. Community skill discovery (when project_root is available) - catalog_prompt = "" + # 1. Skill discovery -- always run to pick up framework skills; + # community/project skills only when project_root is available. + discovery = SkillDiscovery(DiscoveryConfig( + project_root=self._config.project_root, + skip_framework_scope=False, + )) + discovered = discovery.discover() + self._watched_dirs = discovery.scanned_directories + + # Trust-gate project-scope skills (AS-13) if self._config.project_root is not None and not self._config.skip_community_discovery: from framework.skills.trust import TrustGate - discovery = SkillDiscovery(DiscoveryConfig(project_root=self._config.project_root)) - discovered = discovery.discover() - - # Trust-gate project-scope skills (AS-13) discovered = TrustGate(interactive=self._config.interactive).filter_and_gate( discovered, project_dir=self._config.project_root ) - catalog = SkillCatalog(discovered) - self._allowlisted_dirs = catalog.allowlisted_dirs - catalog_prompt = catalog.to_prompt() + catalog = SkillCatalog(discovered) + self._allowlisted_dirs = catalog.allowlisted_dirs + catalog_prompt = catalog.to_prompt() - # Pre-activated community skills - if skills_config.skills: - pre_activated = catalog.build_pre_activated_prompt(skills_config.skills) - if pre_activated: - if catalog_prompt: - catalog_prompt = f"{catalog_prompt}\n\n{pre_activated}" - else: - catalog_prompt = pre_activated + # Pre-activated community skills + if skills_config.skills: + pre_activated = catalog.build_pre_activated_prompt(skills_config.skills) + if pre_activated: + if catalog_prompt: + catalog_prompt = f"{catalog_prompt}\n\n{pre_activated}" + else: + catalog_prompt = pre_activated - # 2. Default skills (always loaded unless explicitly disabled) + # 2. Default skills -- discovered via _default_skills/ and included + # in the catalog for progressive disclosure (no longer force-injected + # as protocols_prompt). DefaultSkillManager still handles config, + # logging, and metadata. default_mgr = DefaultSkillManager(config=skills_config) default_mgr.load() default_mgr.log_active_skills() - protocols_prompt = default_mgr.build_protocols_prompt() self._default_mgr = default_mgr - # DX-3: Community skill startup summary - if self._config.project_root is not None and not self._config.skip_community_discovery: - community_count = len(catalog._skills) if catalog_prompt else 0 - pre_activated_count = len(skills_config.skills) if skills_config.skills else 0 - logger.info( - "Skills: %d community (%d catalog, %d pre-activated)", - community_count, - community_count, - pre_activated_count, - ) # 3. Cache self._catalog_prompt = catalog_prompt - self._protocols_prompt = protocols_prompt + self._protocols_prompt = "" # all skills use progressive disclosure now - if protocols_prompt: - logger.info( - "Skill system ready: protocols=%d chars, catalog=%d chars", - len(protocols_prompt), - len(catalog_prompt), - ) - else: + if catalog_prompt: logger.warning("Skill system produced empty protocols_prompt") + # ------------------------------------------------------------------ + # Hot-reload: watch skill directories for SKILL.md changes. + # ------------------------------------------------------------------ + + async def start_watching(self) -> None: + """Start a background task watching skill directories for changes. + + When a ``SKILL.md`` file is added/modified/removed, the cached + ``skills_catalog_prompt`` is rebuilt. The next node iteration picks + up the new prompt automatically via the ``dynamic_prompt_provider``. + + Silently no-ops when ``watchfiles`` is not installed or when no + directories are being watched (e.g. bare mode, no project_root). + """ + import asyncio + + try: + import watchfiles # noqa: F401 -- optional dep check + except ImportError: + logger.debug("watchfiles not installed; skill hot-reload disabled") + return + + if not self._watched_dirs: + logger.debug("No skill directories to watch; hot-reload skipped") + return + + if self._watcher_task is not None: + return # already watching + + self._watcher_task = asyncio.create_task( + self._watch_loop(), + name="skills-hot-reload", + ) + logger.info( + "Skill hot-reload enabled (watching %d directories)", + len(self._watched_dirs), + ) + + async def stop_watching(self) -> None: + """Cancel the background watcher task (if running).""" + import asyncio + + task = self._watcher_task + if task is None: + return + self._watcher_task = None + if not task.done(): # type: ignore[attr-defined] + task.cancel() # type: ignore[attr-defined] + try: + await task # type: ignore[misc] + except asyncio.CancelledError: + pass + + async def _watch_loop(self) -> None: + """Background coroutine that watches SKILL.md files and triggers reload.""" + import asyncio + + import watchfiles + + def _filter(_change: object, path: str) -> bool: + return path.endswith("SKILL.md") + + try: + async for changes in watchfiles.awatch( + *self._watched_dirs, + watch_filter=_filter, + debounce=1000, + ): + paths = [p for _, p in changes] + logger.info("SKILL.md changes detected: %s", paths) + try: + self._reload() + except Exception: + logger.exception("Skill reload failed; keeping previous prompts") + except asyncio.CancelledError: + raise + except Exception: + logger.exception("Skill watcher crashed; hot-reload disabled for this session") + + def _reload(self) -> None: + """Re-run discovery and rebuild cached prompts.""" + # Reset loaded flag so _do_load actually re-runs. + self._loaded = False + self._do_load() + self._loaded = True + logger.info("Skills reloaded: protocols=%d chars, catalog=%d chars", + len(self._protocols_prompt), len(self._catalog_prompt)) + # ------------------------------------------------------------------ # Prompt accessors (consumed by downstream layers) # ------------------------------------------------------------------ diff --git a/core/framework/storage/migrate_v2.py b/core/framework/storage/migrate_v2.py new file mode 100644 index 00000000..33273926 --- /dev/null +++ b/core/framework/storage/migrate_v2.py @@ -0,0 +1,145 @@ +"""One-time migration to the v2 ~/.hive/ directory structure. + +Moves: +- exports/{name}/ -> ~/.hive/colonies/{name}/ +- ~/.hive/queen/session/{id}/ -> ~/.hive/agents/queens/default/sessions/{id}/ +- ~/.hive/queen/global_memory/ -> ~/.hive/memories/global/ + +Runs automatically on first startup when the marker file is absent. +Safe to re-run (skips already-migrated items). +""" + +from __future__ import annotations + +import json +import logging +import shutil +from pathlib import Path + +from framework.config import COLONIES_DIR, HIVE_HOME, MEMORIES_DIR, QUEENS_DIR + +logger = logging.getLogger(__name__) + +_MIGRATION_MARKER = HIVE_HOME / ".migrated-v2" + + +def needs_migration() -> bool: + """Return True if the v2 migration has not yet run.""" + return not _MIGRATION_MARKER.exists() + + +def run_migration(*, exports_dir: Path | None = None) -> None: + """Run the full v2 migration. Idempotent and safe to re-run.""" + if not needs_migration(): + return + + logger.info("migrate_v2: starting ~/.hive structure migration") + + _migrate_colonies(exports_dir or Path("exports")) + _migrate_queen_sessions() + _migrate_memories() + _cleanup_old_queen_dir() + + # Write marker + HIVE_HOME.mkdir(parents=True, exist_ok=True) + _MIGRATION_MARKER.write_text("1\n", encoding="utf-8") + logger.info("migrate_v2: migration complete") + + +def _migrate_colonies(exports_dir: Path) -> None: + """Copy exports/{name}/ -> ~/.hive/colonies/{name}/.""" + if not exports_dir.exists(): + return + + COLONIES_DIR.mkdir(parents=True, exist_ok=True) + migrated = 0 + + for agent_dir in sorted(exports_dir.iterdir()): + if not agent_dir.is_dir() or agent_dir.name.startswith("."): + continue + target = COLONIES_DIR / agent_dir.name + if target.exists(): + continue + try: + shutil.copytree(agent_dir, target) + migrated += 1 + except OSError: + logger.warning("migrate_v2: failed to copy %s", agent_dir, exc_info=True) + + if migrated: + logger.info("migrate_v2: copied %d agent(s) from exports/ to colonies/", migrated) + + +def _migrate_queen_sessions() -> None: + """Move ~/.hive/queen/session/{id}/ -> ~/.hive/agents/queens/default/sessions/{id}/.""" + old_sessions = HIVE_HOME / "queen" / "session" + if not old_sessions.exists(): + return + + new_sessions = QUEENS_DIR / "default" / "sessions" + new_sessions.mkdir(parents=True, exist_ok=True) + migrated = 0 + + for session_dir in sorted(old_sessions.iterdir()): + if not session_dir.is_dir(): + continue + target = new_sessions / session_dir.name + if target.exists(): + continue + try: + session_dir.rename(target) + migrated += 1 + except OSError: + logger.warning( + "migrate_v2: failed to move session %s", session_dir, exc_info=True + ) + + if migrated: + logger.info("migrate_v2: moved %d queen session(s) to new path", migrated) + + +def _migrate_memories() -> None: + """Move ~/.hive/queen/global_memory/ -> ~/.hive/memories/global/.""" + old_global = HIVE_HOME / "queen" / "global_memory" + if not old_global.exists(): + return + + new_global = MEMORIES_DIR / "global" + if new_global.exists(): + # Already has content -- merge individual files + merged = 0 + for f in old_global.iterdir(): + if f.is_file() and not (new_global / f.name).exists(): + try: + shutil.copy2(f, new_global / f.name) + merged += 1 + except OSError: + pass + if merged: + logger.info("migrate_v2: merged %d memory file(s) into global/", merged) + return + + new_global.mkdir(parents=True, exist_ok=True) + migrated = 0 + for f in old_global.iterdir(): + if f.is_file(): + try: + shutil.copy2(f, new_global / f.name) + migrated += 1 + except OSError: + pass + + if migrated: + logger.info("migrate_v2: copied %d memory file(s) to memories/global/", migrated) + + +def _cleanup_old_queen_dir() -> None: + """Remove ~/.hive/queen/ after all content has been migrated.""" + old_queen = HIVE_HOME / "queen" + if not old_queen.exists(): + return + try: + shutil.rmtree(old_queen) + logger.info("migrate_v2: removed old ~/.hive/queen/ directory") + except OSError: + logger.debug("migrate_v2: could not remove old queen dir", exc_info=True) diff --git a/core/framework/testing/prompts.py b/core/framework/testing/prompts.py index 08df7625..61ae340b 100644 --- a/core/framework/testing/prompts.py +++ b/core/framework/testing/prompts.py @@ -68,8 +68,8 @@ for _p in ["exports", "core"]: sys.path.insert(0, _path) import pytest -from framework.runner.runner import AgentRunner -from framework.runtime.event_bus import EventType +from framework.loader.agent_loader import AgentLoader +from framework.host.event_bus import EventType AGENT_PATH = Path(__file__).resolve().parents[1] diff --git a/core/framework/tools/flowchart_utils.py b/core/framework/tools/flowchart_utils.py index f436ceb8..0c2c37cb 100644 --- a/core/framework/tools/flowchart_utils.py +++ b/core/framework/tools/flowchart_utils.py @@ -119,12 +119,11 @@ def classify_flowchart_node( return FLOWCHART_REMAP[explicit] node_id = node["id"] - node_type = node.get("node_type", "event_loop") node_tools = set(node.get("tools") or []) desc = (node.get("description") or "").lower() # GCU / browser automation nodes → hexagon - if node_type == "gcu": + if False: # gcu removed return "browser" # Entry node (first node or no incoming edges) → start terminator diff --git a/core/framework/tools/migrate_agent.py b/core/framework/tools/migrate_agent.py new file mode 100644 index 00000000..52119c60 --- /dev/null +++ b/core/framework/tools/migrate_agent.py @@ -0,0 +1,273 @@ +"""Migrate a Python-based agent export to declarative agent.yaml. + +Usage:: + + uv run python -m framework.tools.migrate_agent exports/lead_enrichment_agent + +Reads agent.py, nodes/__init__.py, config.py, and mcp_servers.json from the +given directory and writes an ``agent.yaml`` file that is equivalent. The +original Python files are left untouched. + +After migration, verify with:: + + uv run python -c " + from framework.loader.agent_loader import load_agent_config + import yaml, pathlib + data = yaml.safe_load(pathlib.Path('exports/lead_enrichment_agent/agent.yaml').read_text()) + graph, goal = load_agent_config(data) + print(f'OK: {len(graph.nodes)} nodes, {len(graph.edges)} edges') + " +""" + +from __future__ import annotations + +import importlib +import importlib.util +import json +import logging +import sys +from pathlib import Path +from typing import Any + +logger = logging.getLogger(__name__) + + +def _import_module_from_path(module_name: str, file_path: Path) -> Any: + """Import a Python file as a module.""" + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None or spec.loader is None: + raise ImportError(f"Cannot import {file_path}") + mod = importlib.util.module_from_spec(spec) + sys.modules[module_name] = mod + spec.loader.exec_module(mod) + return mod + + +def _node_to_dict(node: Any) -> dict: + """Convert a NodeSpec instance to a YAML-friendly dict.""" + d: dict[str, Any] = {"id": node.id} + if node.name and node.name != node.id: + d["name"] = node.name + if node.description: + d["description"] = node.description + if node.node_type != "event_loop": + d["node_type"] = node.node_type + if node.client_facing: + d["client_facing"] = True + if node.max_node_visits != 1: + d["max_node_visits"] = node.max_node_visits + + if node.input_keys: + d["input_keys"] = list(node.input_keys) + if node.output_keys: + d["output_keys"] = list(node.output_keys) + if node.nullable_output_keys: + d["nullable_output_keys"] = list(node.nullable_output_keys) + + # Tools + tools_list = list(node.tools) if node.tools else [] + if tools_list: + d["tools"] = {"policy": "explicit", "allowed": tools_list} + elif False: # gcu removed + d["tools"] = {"policy": "all"} + else: + d["tools"] = {"policy": "none"} + + if node.sub_agents: + d["sub_agents"] = list(node.sub_agents) + if node.success_criteria: + d["success_criteria"] = node.success_criteria + if getattr(node, "failure_criteria", None): + d["failure_criteria"] = node.failure_criteria + if getattr(node, "max_retries", None): + d["max_retries"] = node.max_retries + if getattr(node, "skip_judge", False): + d["skip_judge"] = True + if getattr(node, "max_iterations", 30) != 30: + d["max_iterations"] = node.max_iterations + + if node.system_prompt: + d["system_prompt"] = node.system_prompt + + return d + + +def _edge_to_dict(edge: Any) -> dict: + """Convert an EdgeSpec instance to a YAML-friendly dict.""" + d: dict[str, Any] = { + "from_node": edge.source, + "to_node": edge.target, + } + cond = str(edge.condition.value) if hasattr(edge.condition, "value") else str(edge.condition) + if cond != "on_success": + d["condition"] = cond + if edge.condition_expr: + d["condition"] = "conditional" + d["condition_expr"] = edge.condition_expr + if edge.priority and edge.priority != 1: + d["priority"] = edge.priority + if edge.input_mapping: + d["input_mapping"] = dict(edge.input_mapping) + return d + + +def migrate_agent(agent_dir: str | Path) -> dict: + """Read a Python-based agent export and return the declarative config dict. + + The returned dict can be serialized to YAML or JSON. + """ + agent_dir = Path(agent_dir).resolve() + agent_py = agent_dir / "agent.py" + if not agent_py.exists(): + raise FileNotFoundError(f"No agent.py in {agent_dir}") + + # Make the agent importable as a package (handles relative imports) + parent = str(agent_dir.parent) + if parent not in sys.path: + sys.path.insert(0, parent) + + pkg_name = agent_dir.name + agent_mod = importlib.import_module(f"{pkg_name}.agent") + + # Extract module-level variables + goal = getattr(agent_mod, "goal", None) + nodes = getattr(agent_mod, "nodes", []) + edges = getattr(agent_mod, "edges", []) + entry_node = getattr(agent_mod, "entry_node", "") + terminal_nodes = getattr(agent_mod, "terminal_nodes", []) + pause_nodes = getattr(agent_mod, "pause_nodes", []) + conversation_mode = getattr(agent_mod, "conversation_mode", "continuous") + identity_prompt = getattr(agent_mod, "identity_prompt", "") + loop_config = getattr(agent_mod, "loop_config", {}) + + # Config / metadata + config_mod = None + config_py = agent_dir / "config.py" + if config_py.exists(): + try: + config_mod = importlib.import_module(f"{pkg_name}.config") + except ImportError: + pass + metadata = getattr(config_mod, "metadata", None) + default_config = getattr(config_mod, "default_config", None) + + # Agent name + name = agent_dir.name + if metadata and hasattr(metadata, "name"): + name = str(metadata.name).lower().replace(" ", "-") + + # Build config dict + config: dict[str, Any] = { + "name": name, + "version": getattr(metadata, "version", "1.0.0") if metadata else "1.0.0", + } + if goal and goal.description: + config["description"] = goal.description + if metadata and hasattr(metadata, "intro_message") and metadata.intro_message: + intro = metadata.intro_message + if intro and "TODO" not in intro: + config["metadata"] = {"intro_message": intro} + + # Variables (detect config fields injected into prompts) + variables: dict[str, str] = {} + _SKIP_CONFIG = {"model", "temperature", "max_tokens", "api_key", "api_base"} + if default_config: + for attr in dir(default_config): + if attr.startswith("_") or attr in _SKIP_CONFIG: + continue + val = getattr(default_config, attr) + if isinstance(val, str) and val: + variables[attr] = val + if variables: + config["variables"] = variables + + # Goal + if goal: + goal_dict: dict[str, Any] = {"description": goal.description} + if goal.success_criteria: + goal_dict["success_criteria"] = [sc.description for sc in goal.success_criteria] + if goal.constraints: + goal_dict["constraints"] = [c.description for c in goal.constraints] + config["goal"] = goal_dict + + # Identity / conversation / loop + if identity_prompt: + config["identity_prompt"] = identity_prompt + if conversation_mode and conversation_mode != "continuous": + config["conversation_mode"] = conversation_mode + if loop_config: + config["loop_config"] = dict(loop_config) + + # MCP servers + mcp_path = agent_dir / "mcp_servers.json" + if mcp_path.exists(): + with open(mcp_path) as f: + mcp_data = json.load(f) + if mcp_data: + config["mcp_servers"] = [{"name": name} for name in mcp_data] + + # Nodes + config["nodes"] = [_node_to_dict(n) for n in nodes] + + # Edges + config["edges"] = [_edge_to_dict(e) for e in edges] + + # Graph structure + config["entry_node"] = entry_node + if terminal_nodes: + config["terminal_nodes"] = terminal_nodes + if pause_nodes: + config["pause_nodes"] = pause_nodes + + return config + + +def write_yaml(config: dict, output_path: Path) -> None: + """Write config dict to YAML with clean formatting.""" + try: + import yaml + except ImportError: + raise ImportError("PyYAML required: uv pip install pyyaml") from None + + # Custom representer for multiline strings + def _str_representer(dumper: yaml.Dumper, data: str) -> Any: + if "\n" in data: + return dumper.represent_scalar("tag:yaml.org,2002:str", data, style="|") + return dumper.represent_scalar("tag:yaml.org,2002:str", data) + + yaml.add_representer(str, _str_representer) + + with open(output_path, "w") as f: + yaml.dump( + config, f, + default_flow_style=False, sort_keys=False, + allow_unicode=True, width=120, + ) + + logger.info("Wrote %s", output_path) + + +def main() -> None: + """CLI entry point.""" + logging.basicConfig(level=logging.INFO, format="%(message)s") + + if len(sys.argv) < 2: + print("Usage: uv run python -m framework.tools.migrate_agent ") + sys.exit(1) + + agent_dir = Path(sys.argv[1]) + config = migrate_agent(agent_dir) + + output = agent_dir / "agent.yaml" + write_yaml(config, output) + print(f"Wrote {output}") + + n_nodes = len(config["nodes"]) + n_edges = len(config["edges"]) + print(f"\nMigrated {config['name']}: {n_nodes} nodes, {n_edges} edges") + print("\nVerify with:") + print(f" uv run python -m framework.tools.migrate_agent --verify {output}") + + +if __name__ == "__main__": + main() diff --git a/core/framework/tools/queen_lifecycle/__init__.py b/core/framework/tools/queen_lifecycle/__init__.py new file mode 100644 index 00000000..6f850e5b --- /dev/null +++ b/core/framework/tools/queen_lifecycle/__init__.py @@ -0,0 +1,10 @@ +"""Queen lifecycle tools -- split into per-tool modules. + +The main entry point is still ``register_queen_lifecycle_tools()`` in +``queen_lifecycle_tools.py``. This package provides the shared context +and individual tool registration functions. +""" + +from framework.tools.queen_lifecycle.context import QueenToolContext + +__all__ = ["QueenToolContext"] diff --git a/core/framework/tools/queen_lifecycle/context.py b/core/framework/tools/queen_lifecycle/context.py new file mode 100644 index 00000000..4da53ecb --- /dev/null +++ b/core/framework/tools/queen_lifecycle/context.py @@ -0,0 +1,52 @@ +"""Shared context for queen lifecycle tools. + +All queen tools receive this context instead of closing over +individual variables from the registration function. +""" + +from __future__ import annotations + +import json +import logging +from dataclasses import dataclass +from typing import Any + +logger = logging.getLogger(__name__) + + +@dataclass +class QueenToolContext: + """Shared state passed to all queen lifecycle tool implementations.""" + + session: Any # Session or WorkerSessionAdapter + session_manager: Any | None = None + manager_session_id: str | None = None + phase_state: Any | None = None # QueenPhaseState + registry: Any = None # ToolRegistry + + def get_runtime(self): + """Get current graph runtime from session (late-binding).""" + return getattr(self.session, "graph_runtime", None) + + def update_meta(self, updates: dict) -> None: + """Update session metadata JSON.""" + if self.session_manager is None or self.manager_session_id is None: + return + try: + srv_session = self.session_manager.get_session(self.manager_session_id) + if srv_session is None: + return + meta_path = getattr(srv_session, "meta_path", None) + if meta_path is None: + return + import pathlib + + meta_file = pathlib.Path(meta_path) + if meta_file.exists(): + data = json.loads(meta_file.read_text(encoding="utf-8")) + else: + data = {} + data.update(updates) + meta_file.write_text(json.dumps(data, indent=2) + "\n") + except Exception: + logger.debug("Failed to update session meta", exc_info=True) diff --git a/core/framework/tools/queen_lifecycle_tools.py b/core/framework/tools/queen_lifecycle_tools.py index ee7e0cb9..229673ba 100644 --- a/core/framework/tools/queen_lifecycle_tools.py +++ b/core/framework/tools/queen_lifecycle_tools.py @@ -43,8 +43,8 @@ from pathlib import Path from typing import TYPE_CHECKING, Any from framework.credentials.models import CredentialError -from framework.runner.preload_validation import credential_errors_to_json, validate_credentials -from framework.runtime.event_bus import AgentEvent, EventType +from framework.loader.preload_validation import credential_errors_to_json, validate_credentials +from framework.host.event_bus import AgentEvent, EventType from framework.server.app import validate_agent_path from framework.tools.flowchart_utils import ( FLOWCHART_TYPES, @@ -55,9 +55,9 @@ from framework.tools.flowchart_utils import ( ) if TYPE_CHECKING: - from framework.runner.tool_registry import ToolRegistry - from framework.runtime.agent_runtime import AgentRuntime - from framework.runtime.event_bus import EventBus + from framework.loader.tool_registry import ToolRegistry + from framework.host.agent_host import AgentHost + from framework.host.event_bus import EventBus logger = logging.getLogger(__name__) @@ -323,7 +323,7 @@ class QueenPhaseState: ) -def build_worker_profile(runtime: AgentRuntime, agent_path: Path | str | None = None) -> str: +def build_worker_profile(runtime: AgentHost, agent_path: Path | str | None = None) -> str: """Build a worker capability profile from its graph/goal definition. Injected into the queen's system prompt so it knows what the worker @@ -452,7 +452,7 @@ async def _persist_active_triggers(session: Any, session_id: str) -> None: async def _start_trigger_timer(session: Any, trigger_id: str, tdef: Any) -> None: """Start an asyncio background task that fires the trigger on a timer.""" - from framework.graph.event_loop_node import TriggerEvent + from framework.agent_loop.agent_loop import TriggerEvent cron_expr = tdef.trigger_config.get("cron") interval_minutes = tdef.trigger_config.get("interval_minutes") @@ -513,8 +513,8 @@ async def _start_trigger_timer(session: Any, trigger_id: str, tdef: Any) -> None async def _start_trigger_webhook(session: Any, trigger_id: str, tdef: Any) -> None: """Subscribe to WEBHOOK_RECEIVED events and route matching ones to the queen.""" - from framework.graph.event_loop_node import TriggerEvent - from framework.runtime.webhook_server import WebhookRoute, WebhookServer, WebhookServerConfig + from framework.agent_loop.agent_loop import TriggerEvent + from framework.host.webhook_server import WebhookRoute, WebhookServer, WebhookServerConfig bus = session.event_bus path = tdef.trigger_config.get("path", "") @@ -722,54 +722,6 @@ def _dissolve_planning_nodes( nodes[:] = [n for n in nodes if n["id"] != d_id] del node_by_id[d_id] - # ── Dissolve sub-agent nodes ────────────────────────────── - # Sub-agent nodes are leaf delegates: parent -> subagent (no outgoing). - # Dissolution adds the subagent's ID to parent's sub_agents list. - subagent_ids = [ - n["id"] - for n in nodes - if n.get("flowchart_type") == "browser" or n.get("node_type") == "gcu" - ] - - for sa_id in subagent_ids: - sa_node = node_by_id.get(sa_id) - if sa_node is None: - continue - - in_edges = _incoming(sa_id) - out_edges = _outgoing(sa_id) - - # Validate: sub-agent nodes must be leaves (no outgoing edges) - if out_edges: - logger.warning( - "Sub-agent node '%s' has outgoing edges — they will be dropped " - "during dissolution. Sub-agent nodes should be leaf nodes.", - sa_id, - ) - - # Attach to each predecessor's sub_agents list - for ie in in_edges: - pred_id = ie["source"] - pred = node_by_id.get(pred_id) - if pred is None: - continue - - existing_subs = pred.get("sub_agents") or [] - if sa_id not in existing_subs: - existing_subs.append(sa_id) - pred["sub_agents"] = existing_subs - - # Record absorption - prev_absorbed = absorbed.get(pred_id, [pred_id]) - if sa_id not in prev_absorbed: - prev_absorbed.append(sa_id) - absorbed[pred_id] = prev_absorbed - - # Remove sub-agent node and all its edges - edges[:] = [e for e in edges if e["source"] != sa_id and e["target"] != sa_id] - nodes[:] = [n for n in nodes if n["id"] != sa_id] - del node_by_id[sa_id] - # Build complete flowchart_map (identity for non-absorbed nodes) flowchart_map: dict[str, list[str]] = {} for n in nodes: @@ -799,8 +751,11 @@ def _update_meta_json(session_manager, manager_session_id, updates: dict) -> Non srv_session = session_manager.get_session(manager_session_id) if not srv_session: return + from framework.config import QUEENS_DIR + storage_sid = getattr(srv_session, "queen_resume_from", None) or srv_session.id - meta_path = Path.home() / ".hive" / "queen" / "session" / storage_sid / "meta.json" + queen_name = getattr(srv_session, "queen_name", "default") + meta_path = QUEENS_DIR / queen_name / "sessions" / storage_sid / "meta.json" try: existing = {} if meta_path.exists(): @@ -816,7 +771,7 @@ def register_queen_lifecycle_tools( session: Any = None, session_id: str | None = None, # Legacy params — used by TUI when not passing a session object - graph_runtime: AgentRuntime | None = None, + graph_runtime: AgentHost | None = None, event_bus: EventBus | None = None, storage_path: Path | None = None, # Server context — enables load_built_agent tool @@ -1388,81 +1343,6 @@ def register_queen_lifecycle_tools( nodes[:] = [n for n in nodes if n["id"] != d_id] del node_by_id[d_id] - # ── Dissolve sub-agent nodes ────────────────────────────── - # Sub-agent nodes are leaf delegates: parent → subagent (no outgoing). - # Dissolution adds the subagent's ID to parent's sub_agents list. - subagent_ids = [ - n["id"] - for n in nodes - if n.get("flowchart_type") == "browser" or n.get("node_type") == "gcu" - ] - - for sa_id in subagent_ids: - sa_node = node_by_id.get(sa_id) - if sa_node is None: - continue - - in_edges = _incoming(sa_id) - out_edges = _outgoing(sa_id) - - # Validate: sub-agent nodes must be leaves (no outgoing edges) - if out_edges: - logger.warning( - "Sub-agent node '%s' has outgoing edges — they will be dropped " - "during dissolution. Sub-agent nodes should be leaf nodes.", - sa_id, - ) - - # Attach to each predecessor's sub_agents list - for ie in in_edges: - pred_id = ie["source"] - pred = node_by_id.get(pred_id) - if pred is None: - continue - - existing_subs = pred.get("sub_agents") or [] - if sa_id not in existing_subs: - existing_subs.append(sa_id) - pred["sub_agents"] = existing_subs - - # Record absorption - prev_absorbed = absorbed.get(pred_id, [pred_id]) - if sa_id not in prev_absorbed: - prev_absorbed.append(sa_id) - absorbed[pred_id] = prev_absorbed - - # Remove sub-agent node and all its edges - edges[:] = [e for e in edges if e["source"] != sa_id and e["target"] != sa_id] - nodes[:] = [n for n in nodes if n["id"] != sa_id] - del node_by_id[sa_id] - - # ── Dissolve implicit sub-agents ───────────────────────── - # Nodes that appear in another node's sub_agents list but weren't - # caught above (e.g. GCU nodes with flowchart_type="browser" where - # the queen set sub_agents directly on the parent). - implicit_sa_ids: list[str] = [] - for n in nodes: - for sa_id in n.get("sub_agents") or []: - if sa_id in node_by_id and sa_id != n["id"]: - implicit_sa_ids.append(sa_id) - - for sa_id in implicit_sa_ids: - if sa_id not in node_by_id: - continue # already removed - - # Find which parent(s) reference this sub-agent - for n in nodes: - if sa_id in (n.get("sub_agents") or []) and n["id"] != sa_id: - prev_absorbed = absorbed.get(n["id"], [n["id"]]) - if sa_id not in prev_absorbed: - prev_absorbed.append(sa_id) - absorbed[n["id"]] = prev_absorbed - - # Remove the sub-agent node and its edges - edges[:] = [e for e in edges if e["source"] != sa_id and e["target"] != sa_id] - nodes[:] = [n for n in nodes if n["id"] != sa_id] - del node_by_id[sa_id] - # Build complete flowchart_map (identity for non-absorbed nodes) flowchart_map: dict[str, list[str]] = {} for n in nodes: @@ -1470,14 +1350,9 @@ def register_queen_lifecycle_tools( flowchart_map[nid] = absorbed.get(nid, [nid]) # Rebuild terminal_nodes (decision targets may have changed). - # Sub-agent nodes are leaf helpers, not endpoints — exclude them. - post_sa_ids: set[str] = set() - for n in nodes: - for sa_id in n.get("sub_agents") or []: - post_sa_ids.add(sa_id) sources = {e["source"] for e in edges} all_ids = {n["id"] for n in nodes} - terminal_ids = all_ids - sources - post_sa_ids + terminal_ids = all_ids - sources if not terminal_ids and nodes: terminal_ids = {nodes[-1]["id"]} @@ -1563,7 +1438,6 @@ def register_queen_lifecycle_tools( "input_keys": n.get("input_keys", []), "output_keys": n.get("output_keys", []), "success_criteria": n.get("success_criteria", ""), - "sub_agents": n.get("sub_agents", []), # Decision nodes: the yes/no question to evaluate "decision_clause": n.get("decision_clause", ""), # Explicit flowchart override (preserved for classification) @@ -1601,219 +1475,7 @@ def register_queen_lifecycle_tools( } ) - # ── GCU nodes cannot be children of decision nodes ───────── - # Decision nodes dissolve into their predecessor. If a GCU node - # is a decision child, after dissolution it would become a - # conditional workflow step — violating the leaf sub-agent rule. - # Rewire: move the GCU to the decision's predecessor as a - # sub-agent and remove the decision → GCU edge. - node_by_id_v = {n["id"]: n for n in validated_nodes} - decision_node_ids = { - n["id"] for n in validated_nodes if n.get("flowchart_type") == "decision" - } - gcu_node_ids = { - n["id"] - for n in validated_nodes - if n.get("node_type") == "gcu" or n.get("flowchart_type") == "browser" - } topology_corrections: list[str] = [] - if decision_node_ids and gcu_node_ids: - for d_id in decision_node_ids: - gcu_children = [ - e - for e in validated_edges - if e["source"] == d_id and e["target"] in gcu_node_ids - ] - if not gcu_children: - continue - d_parents = [e["source"] for e in validated_edges if e["target"] == d_id] - for gc_edge in gcu_children: - gc_id = gc_edge["target"] - logger.warning( - "GCU node '%s' is a child of decision node '%s' " - "— moving it to the decision's predecessor.", - gc_id, - d_id, - ) - topology_corrections.append( - f"GCU node '{gc_id}' was a child of decision " - f"node '{d_id}' — invalid because decision " - f"nodes dissolve at build time. Moved " - f"'{gc_id}' to predecessor as a sub-agent." - ) - # Remove the decision → GCU edge - validated_edges[:] = [ - e - for e in validated_edges - if not (e["source"] == d_id and e["target"] == gc_id) - ] - # Remove any outgoing edges from the GCU node - # (keep report edges back to predecessors) - validated_edges[:] = [ - e - for e in validated_edges - if e["source"] != gc_id or e["target"] in set(d_parents) - ] - # Assign GCU as sub-agent of predecessor(s) - for pid in d_parents: - parent = node_by_id_v.get(pid) - if parent is None: - continue - existing = parent.get("sub_agents") or [] - if gc_id not in existing: - existing.append(gc_id) - parent["sub_agents"] = existing - - # ── Enforce GCU / subagent leaf constraint ──────────────── - # GCU nodes and nodes with flowchart_type "subagent" are leaf - # delegates: they can only receive a delegate edge IN from - # their parent and send a report edge OUT back to that parent. - # Any other outgoing edges are design errors — strip them and - # auto-assign the node as a sub-agent of its predecessor. - leaf_node_ids: set[str] = set() - for n in validated_nodes: - if n.get("node_type") == "gcu" or n.get("flowchart_type") == "browser": - leaf_node_ids.add(n["id"]) - if leaf_node_ids: - for leaf_id in leaf_node_ids: - # Find edges where this leaf node is the source - out_edges = [e for e in validated_edges if e["source"] == leaf_id] - in_edges = [e for e in validated_edges if e["target"] == leaf_id] - - # Identify the parent (predecessor that connects IN) - parent_ids = [e["source"] for e in in_edges] - - if not out_edges: - # Already a proper leaf — still ensure sub_agents is set - for pid in parent_ids: - parent = node_by_id_v.get(pid) - if parent is None: - continue - existing = parent.get("sub_agents") or [] - if leaf_id not in existing: - existing.append(leaf_id) - parent["sub_agents"] = existing - continue - - # Strip all outgoing edges from the leaf node that - # don't go back to a parent (report edges are OK) - illegal_targets: list[str] = [] - for oe in out_edges: - if oe["target"] not in parent_ids: - illegal_targets.append(oe["target"]) - - if illegal_targets: - logger.warning( - "GCU/subagent node '%s' has illegal outgoing " - "edges to %s — stripping them. GCU nodes " - "must be leaf sub-agents.", - leaf_id, - illegal_targets, - ) - topology_corrections.append( - f"GCU node '{leaf_id}' had illegal edges to " - f"{illegal_targets} — stripped. GCU nodes MUST " - f"be leaf sub-agents, never in the linear flow." - ) - # Rewire: predecessor → leaf's targets (skip leaf) - for parent_id in parent_ids: - for tgt_id in illegal_targets: - validated_edges.append( - { - "id": f"edge-rewire-{len(validated_edges)}", - "source": parent_id, - "target": tgt_id, - "condition": "on_success", - "description": "", - "label": "", - } - ) - # Remove the illegal edges - validated_edges[:] = [ - e - for e in validated_edges - if not (e["source"] == leaf_id and e["target"] in set(illegal_targets)) - ] - - # Ensure the leaf is in its parent's sub_agents list - for pid in parent_ids: - parent = node_by_id_v.get(pid) - if parent is None: - continue - existing = parent.get("sub_agents") or [] - if leaf_id not in existing: - existing.append(leaf_id) - parent["sub_agents"] = existing - - # ── Remove orphaned GCU / subagent nodes ────────────────── - # After enforcing the leaf constraint, any GCU/subagent node - # that has zero edges AND is not in any parent's sub_agents - # list is orphaned — remove it and warn the queen. - all_edge_node_ids = set() - for e in validated_edges: - all_edge_node_ids.add(e["source"]) - all_edge_node_ids.add(e["target"]) - all_sa_refs: set[str] = set() - for n in validated_nodes: - for sa_id in n.get("sub_agents") or []: - all_sa_refs.add(sa_id) - - orphaned_ids: list[str] = [] - for lid in leaf_node_ids: - if lid not in all_edge_node_ids and lid not in all_sa_refs: - orphaned_ids.append(lid) - - if orphaned_ids: - for oid in orphaned_ids: - logger.warning( - "GCU/subagent node '%s' is orphaned (no edges, " - "not in any parent's sub_agents) — removing it.", - oid, - ) - topology_corrections.append( - f"GCU node '{oid}' was orphaned (no edges, not " - f"assigned as a sub-agent of any parent node) — " - f"removed. Add it to a parent node's sub_agents " - f"list and re-save the draft." - ) - validated_nodes[:] = [n for n in validated_nodes if n["id"] not in set(orphaned_ids)] - node_by_id_v = {n["id"]: n for n in validated_nodes} - - # Synthesize visual edges for sub-agents that are referenced in - # a parent's sub_agents list but have no connecting edge yet. - node_id_set = {n["id"] for n in validated_nodes} - existing_edge_pairs = {(e["source"], e["target"]) for e in validated_edges} - edge_counter = len(validated_edges) - for n in validated_nodes: - for sa_id in n.get("sub_agents") or []: - if sa_id not in node_id_set: - continue - if (n["id"], sa_id) not in existing_edge_pairs: - validated_edges.append( - { - "id": f"edge-subagent-{edge_counter}", - "source": n["id"], - "target": sa_id, - "condition": "always", - "description": "sub-agent delegation", - "label": "delegate", - } - ) - edge_counter += 1 - existing_edge_pairs.add((n["id"], sa_id)) - if (sa_id, n["id"]) not in existing_edge_pairs: - validated_edges.append( - { - "id": f"edge-subagent-{edge_counter}", - "source": sa_id, - "target": n["id"], - "condition": "always", - "description": "sub-agent report back", - "label": "report", - } - ) - edge_counter += 1 - existing_edge_pairs.add((sa_id, n["id"])) # ── Validate graph connectivity ───────────────────────────── # Every node must be reachable from the entry node. Disconnected @@ -1928,7 +1590,9 @@ def register_queen_lifecycle_tools( # Worker not loaded yet — resolve from draft name draft_name = draft.get("agent_name", "") if draft_name: - candidate = Path("exports") / draft_name + from framework.config import COLONIES_DIR + + candidate = COLONIES_DIR / draft_name if candidate.is_dir(): save_path = candidate _save_flowchart_file( @@ -2195,12 +1859,12 @@ def register_queen_lifecycle_tools( # Explicit user confirmation is required before transitioning from planning # to building. This tool records that confirmation and proceeds. - async def confirm_and_build() -> str: - """Confirm the draft and transition from planning to building phase. + async def confirm_and_build(*, agent_name: str | None = None) -> str: + """Confirm the draft, create agent directory, and transition to building. This tool should ONLY be called after the user has explicitly approved - the draft graph design via ask_user. It gates the planning→building - transition so the user always has a chance to review before code is written. + the draft graph design via ask_user. It creates the agent directory and + transitions to BUILDING phase. The queen then writes agent.json directly. """ if phase_state is None: return json.dumps({"error": "Phase state not available."}) @@ -2238,9 +1902,14 @@ def register_queen_lifecycle_tools( # Create agent folder early so flowchart and agent_path are available # throughout the entire BUILDING phase. - _agent_name = phase_state.draft_graph.get("agent_name", "").strip() + _agent_name = ( + agent_name + or phase_state.draft_graph.get("agent_name", "").strip() + ) if _agent_name: - _agent_folder = Path("exports") / _agent_name + from framework.config import COLONIES_DIR + + _agent_folder = COLONIES_DIR / _agent_name _agent_folder.mkdir(parents=True, exist_ok=True) _save_flowchart_file(_agent_folder, original_copy, fmap) phase_state.agent_path = str(_agent_folder) @@ -2271,20 +1940,30 @@ def register_queen_lifecycle_tools( f"{subagent_count} sub-agent node(s) dissolved into predecessor sub_agents" ) + # Transition to BUILDING phase + await phase_state.switch_to_building(source="tool") + _update_meta_json( + session_manager, manager_session_id, {"phase": "building"} + ) + phase_state.build_confirmed = False + + # No injection here -- the return message tells the queen what to do. + # Injecting would queue a BUILDING message that drains AFTER the queen + # may have already moved to STAGING via load_built_agent. + return json.dumps( { "status": "confirmed", - "agent_name": phase_state.draft_graph.get("agent_name", ""), + "phase": "building", + "agent_name": _agent_name, + "agent_path": str(_agent_folder), "planning_nodes_dissolved": dissolved_count, - "decision_nodes_dissolved": decision_count, - "subagent_nodes_dissolved": subagent_count, "flowchart_map": fmap, "message": ( - "User has confirmed the design. " + "Design confirmed and directory created. " + ("; ".join(dissolution_parts) + ". " if dissolution_parts else "") - + "Now call initialize_and_build_agent(agent_name, nodes) to scaffold the " - "agent package and start building. The draft metadata will be " - "used to pre-populate the generated files." + + f"Now write the complete agent config to {_agent_folder}/agent.json " + "using write_file(). Include all system prompts, tools, edges, and goal." ), } ) @@ -2292,180 +1971,30 @@ def register_queen_lifecycle_tools( _confirm_tool = Tool( name="confirm_and_build", description=( - "Confirm the draft graph design and approve transition to building phase. " + "Confirm the draft graph design, create agent directory, and transition to building phase. " "ONLY call this after the user has explicitly approved the design via ask_user. " - "After confirmation, call initialize_and_build_agent() to scaffold and build." + "After confirmation, write the complete agent.json using write_file()." ), - parameters={"type": "object", "properties": {}}, + parameters={ + "type": "object", + "properties": { + "agent_name": { + "type": "string", + "description": "Snake_case name for the agent (e.g. 'linkedin_outreach'). " + "If omitted, uses the name from save_agent_draft().", + }, + }, + }, ) registry.register( "confirm_and_build", _confirm_tool, - lambda inputs: confirm_and_build(), + lambda inputs: confirm_and_build( + agent_name=inputs.get("agent_name"), + ), ) tools_registered += 1 - # --- initialize_and_build_agent wrapper (Planning → Building) ------------- - # With agent_name: scaffold a new agent via MCP tool, then switch to building. - # Without agent_name: just switch to building (for fixing an existing loaded agent). - - _existing_init = registry._tools.get("initialize_and_build_agent") - if _existing_init is not None: - _orig_init_executor = _existing_init.executor - - async def initialize_and_build_agent_wrapper(inputs: dict) -> str: - """Wrapper: scaffold or just switch to building phase.""" - agent_name = (inputs.get("agent_name") or "").strip() - - # Gate: when in planning phase and creating a new agent, - # require the user to have confirmed the draft first. - if ( - agent_name - and phase_state is not None - and phase_state.phase == "planning" - and not phase_state.build_confirmed - ): - if phase_state.draft_graph is None: - return json.dumps( - { - "error": ( - "Cannot transition to building without a draft. " - "Call save_agent_draft() first to create a visual draft of the " - "graph, present it to the user for review, then call " - "confirm_and_build() after the user approves." - ) - } - ) - return json.dumps( - { - "error": ( - "The user has not confirmed the draft design yet. " - "Present the draft to the user and call ask_user() to get " - "their approval. Then call confirm_and_build() before " - "calling initialize_and_build_agent()." - ) - } - ) - - # No agent_name → try to fall back to the session's current agent, - # or fail with actionable guidance. - if not agent_name: - # Try to resolve agent_name from the current session - fallback_path = getattr(session, "worker_path", None) - if fallback_path is not None: - agent_name = Path(fallback_path).name - else: - # Server path: check SessionManager - if session_manager is not None and manager_session_id: - srv_session = session_manager.get_session(manager_session_id) - if srv_session and getattr(srv_session, "worker_path", None): - fallback_path = srv_session.worker_path - agent_name = Path(fallback_path).name - - if not agent_name: - return json.dumps( - { - "error": ( - "No agent_name provided and no agent loaded in this session. " - "To fix: call list_agents() to find the agent name, then call " - "initialize_and_build_agent(agent_name='') to scaffold it." - ) - } - ) - - # Fall back succeeded — switch to building without scaffolding - logger.info( - "initialize_and_build_agent: no agent_name provided, " - "falling back to session agent '%s'", - agent_name, - ) - if phase_state is not None: - if fallback_path: - phase_state.agent_path = str(fallback_path) - await phase_state.switch_to_building(source="tool") - _update_meta_json(session_manager, manager_session_id, {"phase": "building"}) - if phase_state.inject_notification: - await phase_state.inject_notification( - "[PHASE CHANGE] Switched to BUILDING phase. " - "Start implementing the fix now." - ) - return json.dumps( - { - "status": "editing", - "phase": "building", - "agent_name": agent_name, - "warning": ( - f"No agent_name provided — using session agent '{agent_name}'. " - f"Agent files are at exports/{agent_name}/." - ), - "message": ( - "Switched to BUILDING phase. Full coding tools restored. " - "Implement the fix, then call load_built_agent(path) to reload." - ), - } - ) - - # Has agent_name → scaffold via MCP tool. - # If a draft exists, pass its metadata so the scaffolder can - # pre-populate descriptions, goals, and node metadata. - scaffold_inputs = dict(inputs) - draft = phase_state.draft_graph if phase_state else None - if draft and draft.get("agent_name") == agent_name: - scaffold_inputs["_draft"] = draft - - result = _orig_init_executor(scaffold_inputs) - # Handle both sync and async executors - if asyncio.iscoroutine(result) or asyncio.isfuture(result): - result = await result - # If result is a ToolResult, extract the text content - result_str = str(result) - if hasattr(result, "content"): - result_str = str(result.content) - try: - parsed = json.loads(result_str) - if parsed.get("success", True): - if phase_state is not None: - # Set agent_path so the frontend can query credentials - phase_state.agent_path = phase_state.agent_path or str( - Path("exports") / agent_name - ) - await phase_state.switch_to_building(source="tool") - _update_meta_json( - session_manager, manager_session_id, {"phase": "building"} - ) - # Reset draft state after successful scaffolding - phase_state.build_confirmed = False - # Persist flowchart now that the agent folder exists - if phase_state.original_draft_graph and phase_state.flowchart_map: - _save_flowchart_file( - Path("exports") / agent_name, - phase_state.original_draft_graph, - phase_state.flowchart_map, - ) - # Inject a continuation message so the queen starts - # building immediately instead of blocking for user input. - draft_hint = "" - if draft: - draft_hint = ( - " The draft metadata has been used to pre-populate " - "node descriptions, goal, and success criteria. " - "Review and refine the generated files." - ) - if phase_state.inject_notification: - await phase_state.inject_notification( - "[PHASE CHANGE] Agent scaffolded and switched to BUILDING phase. " - "Start implementing the agent nodes now." + draft_hint - ) - except (json.JSONDecodeError, KeyError, TypeError): - pass - return result_str - - registry.register( - "initialize_and_build_agent", - _existing_init.tool, - lambda inputs: initialize_and_build_agent_wrapper(inputs), - ) - # --- stop_graph (Running → Staging) -------------------------------------- async def stop_graph_to_staging() -> str: @@ -2554,7 +2083,7 @@ def register_queen_lifecycle_tools( return s def _build_preamble( - runtime: AgentRuntime, + runtime: AgentHost, ) -> dict[str, Any]: """Build the lightweight preamble: status, node, elapsed, iteration. @@ -2712,9 +2241,9 @@ def register_queen_lifecycle_tools( return "\n".join(lines) - async def _format_memory(runtime: AgentRuntime) -> str: + async def _format_memory(runtime: AgentHost) -> str: """Format the worker's shared buffer snapshot and recent changes.""" - from framework.runtime.shared_state import IsolationLevel + from framework.host.shared_state import IsolationLevel lines = [] active_streams = runtime.get_active_streams() @@ -2865,7 +2394,7 @@ def register_queen_lifecycle_tools( header = f"{total} issue(s) detected." return header + "\n\n" + "\n".join(lines) - async def _format_progress(runtime: AgentRuntime, bus: EventBus) -> str: + async def _format_progress(runtime: AgentHost, bus: EventBus) -> str: """Format goal progress, token consumption, and execution outcomes.""" lines = [] @@ -2921,7 +2450,7 @@ def register_queen_lifecycle_tools( return "\n".join(lines) def _build_full_json( - runtime: AgentRuntime, + runtime: AgentHost, bus: EventBus, preamble: dict[str, Any], last_n: int, @@ -3475,50 +3004,59 @@ def register_queen_lifecycle_tools( if not resolved_path.exists(): return json.dumps({"error": f"Agent path does not exist: {agent_path}"}) - # Pre-check: verify the module exports goal/nodes/edges before - # attempting the full load. This gives the queen an actionable - # error message instead of a cryptic ImportError or TypeError. - try: - import importlib - import sys as _sys + # Pre-check: verify the agent can be loaded before attempting + # the full session load. Declarative (agent.json) agents skip + # the Python import check since AgentRunner.load() handles them. + _has_yaml = (resolved_path / "agent.json").exists() + if not _has_yaml: + # Legacy Python agent: verify module exports goal/nodes/edges + try: + import importlib + import sys as _sys - pkg_name = resolved_path.name - parent_dir = str(resolved_path.resolve().parent) - # Temporarily put parent on sys.path for import - if parent_dir not in _sys.path: - _sys.path.insert(0, parent_dir) - # Evict stale cached modules - stale = [n for n in _sys.modules if n == pkg_name or n.startswith(f"{pkg_name}.")] - for n in stale: - del _sys.modules[n] + pkg_name = resolved_path.name + parent_dir = str(resolved_path.resolve().parent) + if parent_dir not in _sys.path: + _sys.path.insert(0, parent_dir) + stale = [ + n for n in _sys.modules + if n == pkg_name or n.startswith(f"{pkg_name}.") + ] + for n in stale: + del _sys.modules[n] - mod = importlib.import_module(pkg_name) - missing_attrs = [ - attr for attr in ("goal", "nodes", "edges") if getattr(mod, attr, None) is None - ] - if missing_attrs: + mod = importlib.import_module(pkg_name) + missing_attrs = [ + attr + for attr in ("goal", "nodes", "edges") + if getattr(mod, attr, None) is None + ] + if missing_attrs: + return json.dumps( + { + "error": ( + f"Agent module '{pkg_name}' is missing module-level " + f"attributes: {', '.join(missing_attrs)}. " + f"Fix: in {pkg_name}/__init__.py, add " + f"'from .agent import {', '.join(missing_attrs)}' " + f"so that 'import {pkg_name}' exposes them at " + f"package level." + ) + } + ) + except Exception as pre_err: return json.dumps( { "error": ( - f"Agent module '{pkg_name}' is missing module-level " - f"attributes: {', '.join(missing_attrs)}. " - f"Fix: in {pkg_name}/__init__.py, add " - f"'from .agent import {', '.join(missing_attrs)}' " - f"so that 'import {pkg_name}' exposes them at package level." + f"Failed to import agent module " + f"'{resolved_path.name}': {pre_err}. " + f"Fix: ensure {resolved_path.name}/__init__.py " + f"exists and can be imported without errors " + f"(check syntax, missing dependencies, and " + f"relative imports)." ) } ) - except Exception as pre_err: - return json.dumps( - { - "error": ( - f"Failed to import agent module '{resolved_path.name}': {pre_err}. " - f"Fix: ensure {resolved_path.name}/__init__.py exists and can be " - f"imported without errors (check syntax, missing dependencies, " - f"and relative imports)." - ) - } - ) try: updated_session = await session_manager.load_graph( @@ -3635,7 +3173,7 @@ def register_queen_lifecycle_tools( description=( "Load a newly built agent as the worker in this session. " "After building and validating an agent, call this with the agent's " - "path (e.g. 'exports/my_agent') to make it available immediately. " + "path (e.g. '~/.hive/colonies/my_agent') to make it available immediately. " "The user will see the agent's graph and can interact with it." ), parameters={ @@ -3643,7 +3181,7 @@ def register_queen_lifecycle_tools( "properties": { "agent_path": { "type": "string", - "description": ("Path to the agent directory (e.g. 'exports/my_agent')"), + "description": ("Path to the agent directory (e.g. '~/.hive/colonies/my_agent')"), }, }, "required": ["agent_path"], @@ -3795,7 +3333,7 @@ def register_queen_lifecycle_tools( if tdef is None: if trigger_type and trigger_config: - from framework.runtime.triggers import TriggerDefinition + from framework.host.triggers import TriggerDefinition tdef = TriggerDefinition( id=trigger_id, diff --git a/core/framework/tools/session_graph_tools.py b/core/framework/tools/session_graph_tools.py index 8b068770..aadd3557 100644 --- a/core/framework/tools/session_graph_tools.py +++ b/core/framework/tools/session_graph_tools.py @@ -21,13 +21,13 @@ import logging from typing import TYPE_CHECKING if TYPE_CHECKING: - from framework.runner.tool_registry import ToolRegistry - from framework.runtime.agent_runtime import AgentRuntime + from framework.loader.tool_registry import ToolRegistry + from framework.host.agent_host import AgentHost logger = logging.getLogger(__name__) -def register_graph_tools(registry: ToolRegistry, runtime: AgentRuntime) -> int: +def register_graph_tools(registry: ToolRegistry, runtime: AgentHost) -> int: """Register graph lifecycle tools bound to *runtime*. Returns the number of tools registered. @@ -41,12 +41,13 @@ def register_graph_tools(registry: ToolRegistry, runtime: AgentRuntime) -> int: async def load_agent(agent_path: str) -> str: """Load an agent graph from disk into the running session. - The agent is imported from *agent_path* (a directory containing - ``agent.py``). Its graph, goal, and entry points are registered - as a secondary graph on the runtime. Returns a JSON summary. + The agent is loaded from *agent_path* (a directory containing + ``agent.json`` or ``agent.py``). Its graph, goal, and entry points + are registered as a secondary graph on the runtime. Returns a JSON + summary. """ - from framework.runner.runner import AgentRunner - from framework.runtime.execution_stream import EntryPointSpec + from framework.loader.agent_loader import AgentLoader + from framework.host.execution_manager import EntryPointSpec from framework.server.app import validate_agent_path try: @@ -57,7 +58,7 @@ def register_graph_tools(registry: ToolRegistry, runtime: AgentRuntime) -> int: return json.dumps({"error": f"Agent path does not exist: {agent_path}"}) try: - runner = AgentRunner.load(path) + runner = AgentLoader.load(path) except Exception as exc: return json.dumps({"error": f"Failed to load agent: {exc}"}) @@ -105,7 +106,7 @@ def register_graph_tools(registry: ToolRegistry, runtime: AgentRuntime) -> int: "properties": { "agent_path": { "type": "string", - "description": "Path to the agent directory (containing agent.py)", + "description": "Path to the agent directory", }, }, "required": ["agent_path"], diff --git a/core/framework/tools/worker_monitoring_tools.py b/core/framework/tools/worker_monitoring_tools.py index 9d78708b..d1382020 100644 --- a/core/framework/tools/worker_monitoring_tools.py +++ b/core/framework/tools/worker_monitoring_tools.py @@ -23,7 +23,7 @@ from pathlib import Path from typing import TYPE_CHECKING if TYPE_CHECKING: - from framework.runner.tool_registry import ToolRegistry + from framework.loader.tool_registry import ToolRegistry logger = logging.getLogger(__name__) diff --git a/core/framework/tracker/__init__.py b/core/framework/tracker/__init__.py new file mode 100644 index 00000000..6cc52d06 --- /dev/null +++ b/core/framework/tracker/__init__.py @@ -0,0 +1,3 @@ +"""Tracker layer -- decision/run logging for Builder analysis.""" + +from framework.tracker.decision_tracker import DecisionTracker # noqa: F401 diff --git a/core/framework/runtime/core.py b/core/framework/tracker/decision_tracker.py similarity index 99% rename from core/framework/runtime/core.py rename to core/framework/tracker/decision_tracker.py index c61e8d96..5f134b7a 100644 --- a/core/framework/runtime/core.py +++ b/core/framework/tracker/decision_tracker.py @@ -21,7 +21,7 @@ from framework.storage.concurrent import ConcurrentStorage logger = logging.getLogger(__name__) -class Runtime: +class DecisionTracker: """ The runtime environment that agents execute within. diff --git a/core/framework/runtime/llm_debug_logger.py b/core/framework/tracker/llm_debug_logger.py similarity index 100% rename from core/framework/runtime/llm_debug_logger.py rename to core/framework/tracker/llm_debug_logger.py diff --git a/core/framework/runtime/runtime_log_schemas.py b/core/framework/tracker/runtime_log_schemas.py similarity index 100% rename from core/framework/runtime/runtime_log_schemas.py rename to core/framework/tracker/runtime_log_schemas.py diff --git a/core/framework/runtime/runtime_log_store.py b/core/framework/tracker/runtime_log_store.py similarity index 99% rename from core/framework/runtime/runtime_log_store.py rename to core/framework/tracker/runtime_log_store.py index 7be0942c..b3f98db2 100644 --- a/core/framework/runtime/runtime_log_store.py +++ b/core/framework/tracker/runtime_log_store.py @@ -29,7 +29,7 @@ import logging from datetime import UTC, datetime from pathlib import Path -from framework.runtime.runtime_log_schemas import ( +from framework.tracker.runtime_log_schemas import ( NodeDetail, NodeStepLog, RunDetailsLog, diff --git a/core/framework/runtime/runtime_logger.py b/core/framework/tracker/runtime_logger.py similarity index 98% rename from core/framework/runtime/runtime_logger.py rename to core/framework/tracker/runtime_logger.py index f816131c..0da112fa 100644 --- a/core/framework/runtime/runtime_logger.py +++ b/core/framework/tracker/runtime_logger.py @@ -27,13 +27,13 @@ from datetime import UTC, datetime from typing import Any from framework.observability import get_trace_context -from framework.runtime.runtime_log_schemas import ( +from framework.tracker.runtime_log_schemas import ( NodeDetail, NodeStepLog, RunSummaryLog, ToolCallLog, ) -from framework.runtime.runtime_log_store import RuntimeLogStore +from framework.tracker.runtime_log_store import RuntimeLogStore logger = logging.getLogger(__name__) diff --git a/examples/templates/competitive_intel_agent/__main__.py b/examples/templates/competitive_intel_agent/__main__.py index 50dfc8f6..286d7e97 100644 --- a/examples/templates/competitive_intel_agent/__main__.py +++ b/examples/templates/competitive_intel_agent/__main__.py @@ -121,10 +121,10 @@ def tui(verbose: bool, debug: bool) -> None: sys.exit(1) from framework.llm import LiteLLMProvider - from framework.runner.tool_registry import ToolRegistry - from framework.runtime.agent_runtime import create_agent_runtime - from framework.runtime.event_bus import EventBus - from framework.runtime.execution_stream import EntryPointSpec + from framework.loader.tool_registry import ToolRegistry + from framework.host.agent_host import AgentHost + from framework.host.event_bus import EventBus + from framework.host.execution_manager import EntryPointSpec async def run_with_tui() -> None: agent = CompetitiveIntelAgent() @@ -150,7 +150,7 @@ def tui(verbose: bool, debug: bool) -> None: tool_executor = agent._tool_registry.get_executor() graph = agent._build_graph() - runtime = create_agent_runtime( + runtime = AgentHost( graph=graph, goal=agent.goal, storage_path=storage_path, diff --git a/examples/templates/competitive_intel_agent/agent.py b/examples/templates/competitive_intel_agent/agent.py index 7ae2ea3f..879cab88 100644 --- a/examples/templates/competitive_intel_agent/agent.py +++ b/examples/templates/competitive_intel_agent/agent.py @@ -1,7 +1,7 @@ """Agent graph construction for Competitive Intelligence Agent.""" from typing import Any, TYPE_CHECKING -from framework.graph import ( +from framework.orchestrator import ( EdgeSpec, EdgeCondition, Goal, @@ -9,12 +9,12 @@ from framework.graph import ( Constraint, NodeSpec, ) -from framework.graph.edge import GraphSpec -from framework.graph.executor import ExecutionResult, GraphExecutor -from framework.runtime.event_bus import EventBus -from framework.runtime.core import Runtime +from framework.orchestrator.edge import GraphSpec +from framework.orchestrator.orchestrator import ExecutionResult, Orchestrator +from framework.host.event_bus import EventBus +from framework.tracker.decision_tracker import DecisionTracker as Runtime from framework.llm import LiteLLMProvider -from framework.runner.tool_registry import ToolRegistry +from framework.loader.tool_registry import ToolRegistry from .config import default_config, metadata, RuntimeConfig from .nodes import ( @@ -188,7 +188,7 @@ class CompetitiveIntelAgent: self.entry_points = entry_points self.pause_nodes = pause_nodes self.terminal_nodes = terminal_nodes - self._executor: GraphExecutor | None = None + self._executor: Orchestrator | None = None self._graph: GraphSpec | None = None self._event_bus: EventBus | None = None self._tool_registry: ToolRegistry | None = None @@ -219,12 +219,12 @@ class CompetitiveIntelAgent: }, ) - def _setup(self) -> GraphExecutor: + def _setup(self) -> Orchestrator: """ Set up the executor with all components (runtime, LLM, tools). Returns: - An initialized GraphExecutor instance. + An initialized Orchestrator instance. """ from pathlib import Path @@ -250,7 +250,7 @@ class CompetitiveIntelAgent: self._graph = self._build_graph() runtime = Runtime(storage_path) - self._executor = GraphExecutor( + self._executor = Orchestrator( runtime=runtime, llm=llm, tools=tools, diff --git a/examples/templates/competitive_intel_agent/nodes/__init__.py b/examples/templates/competitive_intel_agent/nodes/__init__.py index 5d1b716d..449e6a64 100644 --- a/examples/templates/competitive_intel_agent/nodes/__init__.py +++ b/examples/templates/competitive_intel_agent/nodes/__init__.py @@ -1,6 +1,6 @@ """Node definitions for Competitive Intelligence Agent.""" -from framework.graph import NodeSpec +from framework.orchestrator import NodeSpec # Node 1: Intake (client-facing) intake_node: NodeSpec = NodeSpec( diff --git a/examples/templates/deep_research_agent/__main__.py b/examples/templates/deep_research_agent/__main__.py index 48c4f81a..adcfb4d4 100644 --- a/examples/templates/deep_research_agent/__main__.py +++ b/examples/templates/deep_research_agent/__main__.py @@ -74,10 +74,10 @@ def tui(verbose, debug): from pathlib import Path from framework.llm import LiteLLMProvider - from framework.runner.tool_registry import ToolRegistry - from framework.runtime.agent_runtime import create_agent_runtime - from framework.runtime.event_bus import EventBus - from framework.runtime.execution_stream import EntryPointSpec + from framework.loader.tool_registry import ToolRegistry + from framework.host.agent_host import AgentHost + from framework.host.event_bus import EventBus + from framework.host.execution_manager import EntryPointSpec async def run_with_tui(): agent = DeepResearchAgent() @@ -103,7 +103,7 @@ def tui(verbose, debug): tool_executor = agent._tool_registry.get_executor() graph = agent._build_graph() - runtime = create_agent_runtime( + runtime = AgentHost( graph=graph, goal=agent.goal, storage_path=storage_path, diff --git a/examples/templates/deep_research_agent/agent.py b/examples/templates/deep_research_agent/agent.py index 0ef6df69..d95b8a5e 100644 --- a/examples/templates/deep_research_agent/agent.py +++ b/examples/templates/deep_research_agent/agent.py @@ -2,14 +2,14 @@ from pathlib import Path -from framework.graph import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint -from framework.graph.edge import GraphSpec -from framework.graph.executor import ExecutionResult -from framework.graph.checkpoint_config import CheckpointConfig +from framework.orchestrator import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint +from framework.orchestrator.edge import GraphSpec +from framework.orchestrator.orchestrator import ExecutionResult +from framework.orchestrator.checkpoint_config import CheckpointConfig from framework.llm import LiteLLMProvider -from framework.runner.tool_registry import ToolRegistry -from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime -from framework.runtime.execution_stream import EntryPointSpec +from framework.loader.tool_registry import ToolRegistry +from framework.host.agent_host import AgentHost +from framework.host.execution_manager import EntryPointSpec from .config import default_config, metadata from .nodes import ( @@ -244,7 +244,7 @@ class DeepResearchAgent: ) ] - self._agent_runtime = create_agent_runtime( + self._agent_runtime = AgentHost( graph=self._graph, goal=self.goal, storage_path=self._storage_path, diff --git a/examples/templates/deep_research_agent/nodes/__init__.py b/examples/templates/deep_research_agent/nodes/__init__.py index 9350f14d..00a7bbb5 100644 --- a/examples/templates/deep_research_agent/nodes/__init__.py +++ b/examples/templates/deep_research_agent/nodes/__init__.py @@ -1,6 +1,6 @@ """Node definitions for Deep Research Agent.""" -from framework.graph import NodeSpec +from framework.orchestrator import NodeSpec # Node 1: Intake (client-facing) # Brief conversation to clarify what the user wants researched. diff --git a/examples/templates/email_inbox_management/__main__.py b/examples/templates/email_inbox_management/__main__.py index d75b3e3c..58a22b70 100644 --- a/examples/templates/email_inbox_management/__main__.py +++ b/examples/templates/email_inbox_management/__main__.py @@ -83,10 +83,10 @@ def tui(mock, verbose, debug): from pathlib import Path from framework.llm import LiteLLMProvider - from framework.runner.tool_registry import ToolRegistry - from framework.runtime.agent_runtime import create_agent_runtime - from framework.runtime.event_bus import EventBus - from framework.runtime.execution_stream import EntryPointSpec + from framework.loader.tool_registry import ToolRegistry + from framework.host.agent_host import AgentHost + from framework.host.event_bus import EventBus + from framework.host.execution_manager import EntryPointSpec async def run_with_tui(): agent = InboxManagementAgent() @@ -118,7 +118,7 @@ def tui(mock, verbose, debug): tool_executor = agent._tool_registry.get_executor() graph = agent._build_graph() - runtime = create_agent_runtime( + runtime = AgentHost( graph=graph, goal=agent.goal, storage_path=storage_path, diff --git a/examples/templates/email_inbox_management/agent.py b/examples/templates/email_inbox_management/agent.py index 97df181d..ab805eb6 100644 --- a/examples/templates/email_inbox_management/agent.py +++ b/examples/templates/email_inbox_management/agent.py @@ -2,15 +2,15 @@ from pathlib import Path -from framework.graph import EdgeCondition, EdgeSpec, Goal, SuccessCriterion, Constraint -from framework.graph.checkpoint_config import CheckpointConfig -from framework.graph.edge import GraphSpec -from framework.graph.executor import ExecutionResult, GraphExecutor +from framework.orchestrator import EdgeCondition, EdgeSpec, Goal, SuccessCriterion, Constraint +from framework.orchestrator.checkpoint_config import CheckpointConfig +from framework.orchestrator.edge import GraphSpec +from framework.orchestrator.orchestrator import ExecutionResult, Orchestrator from framework.llm import LiteLLMProvider -from framework.runner.tool_registry import ToolRegistry -from framework.runtime.agent_runtime import create_agent_runtime -from framework.runtime.event_bus import EventBus -from framework.runtime.execution_stream import EntryPointSpec +from framework.loader.tool_registry import ToolRegistry +from framework.host.agent_host import AgentHost +from framework.host.event_bus import EventBus +from framework.host.execution_manager import EntryPointSpec from .config import default_config, metadata from .nodes import ( @@ -190,7 +190,7 @@ class EmailInboxManagementAgent: self.entry_points = entry_points self.pause_nodes = pause_nodes self.terminal_nodes = terminal_nodes - self._executor: GraphExecutor | None = None + self._executor: Orchestrator | None = None self._graph: GraphSpec | None = None self._event_bus: EventBus | None = None self._tool_registry: ToolRegistry | None = None @@ -264,7 +264,7 @@ class EmailInboxManagementAgent: ), ] - self._agent_runtime = create_agent_runtime( + self._agent_runtime = AgentHost( graph=self._graph, goal=self.goal, storage_path=self._storage_path, diff --git a/examples/templates/email_inbox_management/nodes/__init__.py b/examples/templates/email_inbox_management/nodes/__init__.py index 89a56a09..407956c0 100644 --- a/examples/templates/email_inbox_management/nodes/__init__.py +++ b/examples/templates/email_inbox_management/nodes/__init__.py @@ -1,6 +1,6 @@ """Node definitions for Inbox Management Agent.""" -from framework.graph import NodeSpec +from framework.orchestrator import NodeSpec # Node 1: Intake (client-facing) # Receives user rules and max_emails, confirms understanding with user. diff --git a/examples/templates/email_inbox_management/tools.py b/examples/templates/email_inbox_management/tools.py index 27370650..5959bc7b 100644 --- a/examples/templates/email_inbox_management/tools.py +++ b/examples/templates/email_inbox_management/tools.py @@ -15,7 +15,7 @@ from pathlib import Path import httpx from framework.llm.provider import Tool, ToolResult, ToolUse -from framework.runner.tool_registry import _execution_context +from framework.loader.tool_registry import _execution_context logger = logging.getLogger(__name__) @@ -102,7 +102,7 @@ def _get_data_dir() -> str: ctx = _execution_context.get() if not ctx or "data_dir" not in ctx: raise RuntimeError( - "data_dir not set in execution context. Is the tool running inside a GraphExecutor?" + "data_dir not set in execution context. Is the tool running inside a Orchestrator?" ) return ctx["data_dir"] diff --git a/examples/templates/email_reply_agent/__main__.py b/examples/templates/email_reply_agent/__main__.py index 9858c770..4fd4086d 100644 --- a/examples/templates/email_reply_agent/__main__.py +++ b/examples/templates/email_reply_agent/__main__.py @@ -51,9 +51,9 @@ def tui(): from framework.tui.app import AdenTUI from framework.llm import LiteLLMProvider - from framework.runner.tool_registry import ToolRegistry - from framework.runtime.agent_runtime import create_agent_runtime - from framework.runtime.execution_stream import EntryPointSpec + from framework.loader.tool_registry import ToolRegistry + from framework.host.agent_host import AgentHost + from framework.host.execution_manager import EntryPointSpec async def run_tui(): agent = EmailReplyAgent() @@ -68,7 +68,7 @@ def tui(): api_key=agent.config.api_key, api_base=agent.config.api_base, ) - runtime = create_agent_runtime( + runtime = AgentHost( graph=agent._build_graph(), goal=agent.goal, storage_path=storage, diff --git a/examples/templates/email_reply_agent/agent.py b/examples/templates/email_reply_agent/agent.py index 03448409..434683d6 100644 --- a/examples/templates/email_reply_agent/agent.py +++ b/examples/templates/email_reply_agent/agent.py @@ -2,14 +2,14 @@ from pathlib import Path -from framework.graph import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint -from framework.graph.edge import GraphSpec -from framework.graph.executor import ExecutionResult -from framework.graph.checkpoint_config import CheckpointConfig +from framework.orchestrator import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint +from framework.orchestrator.edge import GraphSpec +from framework.orchestrator.orchestrator import ExecutionResult +from framework.orchestrator.checkpoint_config import CheckpointConfig from framework.llm import LiteLLMProvider -from framework.runner.tool_registry import ToolRegistry -from framework.runtime.agent_runtime import create_agent_runtime -from framework.runtime.execution_stream import EntryPointSpec +from framework.loader.tool_registry import ToolRegistry +from framework.host.agent_host import AgentHost +from framework.host.execution_manager import EntryPointSpec from .config import default_config, metadata from .nodes import intake_node, search_node, confirm_draft_node @@ -101,7 +101,7 @@ entry_points = {"start": "intake"} pause_nodes = [] terminal_nodes = [] -# Module-level vars read by AgentRunner.load() +# Module-level vars read by AgentLoader.load() conversation_mode = "continuous" identity_prompt = "You are a helpful email reply assistant that filters unreplied emails and sends personalized responses." loop_config = { @@ -159,7 +159,7 @@ class EmailReplyAgent: tools = list(self._tool_registry.get_tools().values()) tool_executor = self._tool_registry.get_executor() self._graph = self._build_graph() - self._agent_runtime = create_agent_runtime( + self._agent_runtime = AgentHost( graph=self._graph, goal=self.goal, storage_path=self._storage_path, diff --git a/examples/templates/email_reply_agent/nodes/__init__.py b/examples/templates/email_reply_agent/nodes/__init__.py index aaf69a95..71f827e9 100644 --- a/examples/templates/email_reply_agent/nodes/__init__.py +++ b/examples/templates/email_reply_agent/nodes/__init__.py @@ -1,6 +1,6 @@ """Node definitions for Email Reply Agent.""" -from framework.graph import NodeSpec +from framework.orchestrator import NodeSpec # Node 1: Intake (client-facing) intake_node = NodeSpec( diff --git a/examples/templates/email_reply_agent/tests/conftest.py b/examples/templates/email_reply_agent/tests/conftest.py index 2e9d1813..96f98ee1 100644 --- a/examples/templates/email_reply_agent/tests/conftest.py +++ b/examples/templates/email_reply_agent/tests/conftest.py @@ -25,6 +25,6 @@ def agent_module(): @pytest.fixture(scope="session") def runner_loaded(): """Load the agent through AgentRunner (structural only, no LLM needed).""" - from framework.runner.runner import AgentRunner + from framework.loader.agent_loader import AgentLoader - return AgentRunner.load(AGENT_PATH) + return AgentLoader.load(AGENT_PATH) diff --git a/examples/templates/email_reply_agent/tests/test_email_reply_agent.py b/examples/templates/email_reply_agent/tests/test_email_reply_agent.py index ec5f05c9..717c309e 100644 --- a/examples/templates/email_reply_agent/tests/test_email_reply_agent.py +++ b/examples/templates/email_reply_agent/tests/test_email_reply_agent.py @@ -77,7 +77,7 @@ class TestRunnerLoad: """Test AgentRunner can load the agent.""" def test_runner_load_succeeds(self, runner_loaded): - """AgentRunner.load() succeeds.""" + """AgentLoader.load() succeeds.""" assert runner_loaded is not None def test_runner_has_goal(self, runner_loaded): diff --git a/examples/templates/job_hunter/__main__.py b/examples/templates/job_hunter/__main__.py index 752ae545..bdf5726d 100644 --- a/examples/templates/job_hunter/__main__.py +++ b/examples/templates/job_hunter/__main__.py @@ -75,10 +75,10 @@ def tui(mock, verbose, debug): from pathlib import Path from framework.llm import LiteLLMProvider - from framework.runner.tool_registry import ToolRegistry - from framework.runtime.agent_runtime import create_agent_runtime - from framework.runtime.event_bus import EventBus - from framework.runtime.execution_stream import EntryPointSpec + from framework.loader.tool_registry import ToolRegistry + from framework.host.agent_host import AgentHost + from framework.host.event_bus import EventBus + from framework.host.execution_manager import EntryPointSpec async def run_with_tui(): agent = JobHunterAgent() @@ -106,7 +106,7 @@ def tui(mock, verbose, debug): tool_executor = agent._tool_registry.get_executor() graph = agent._build_graph() - runtime = create_agent_runtime( + runtime = AgentHost( graph=graph, goal=agent.goal, storage_path=storage_path, diff --git a/examples/templates/job_hunter/agent.py b/examples/templates/job_hunter/agent.py index 29d37efc..6e7d9036 100644 --- a/examples/templates/job_hunter/agent.py +++ b/examples/templates/job_hunter/agent.py @@ -2,14 +2,14 @@ from pathlib import Path -from framework.graph import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint -from framework.graph.edge import GraphSpec -from framework.graph.executor import ExecutionResult -from framework.graph.checkpoint_config import CheckpointConfig +from framework.orchestrator import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint +from framework.orchestrator.edge import GraphSpec +from framework.orchestrator.orchestrator import ExecutionResult +from framework.orchestrator.checkpoint_config import CheckpointConfig from framework.llm import LiteLLMProvider -from framework.runner.tool_registry import ToolRegistry -from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime -from framework.runtime.execution_stream import EntryPointSpec +from framework.loader.tool_registry import ToolRegistry +from framework.host.agent_host import AgentHost +from framework.host.execution_manager import EntryPointSpec from .config import default_config from .nodes import ( @@ -224,7 +224,7 @@ class JobHunterAgent: ) ] - self._agent_runtime = create_agent_runtime( + self._agent_runtime = AgentHost( graph=self._graph, goal=self.goal, storage_path=self._storage_path, diff --git a/examples/templates/job_hunter/nodes/__init__.py b/examples/templates/job_hunter/nodes/__init__.py index 9d6dc619..3b9841de 100644 --- a/examples/templates/job_hunter/nodes/__init__.py +++ b/examples/templates/job_hunter/nodes/__init__.py @@ -1,6 +1,6 @@ """Node definitions for Job Hunter Agent.""" -from framework.graph import NodeSpec +from framework.orchestrator import NodeSpec # Node 1: Intake (simple) # Collect resume and identify strongest role types. diff --git a/examples/templates/local_business_extractor/agent.py b/examples/templates/local_business_extractor/agent.py index 82c1736d..822b040e 100644 --- a/examples/templates/local_business_extractor/agent.py +++ b/examples/templates/local_business_extractor/agent.py @@ -1,14 +1,14 @@ """Agent graph construction for Local Business Extractor.""" from pathlib import Path -from framework.graph import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint -from framework.graph.edge import GraphSpec -from framework.graph.executor import ExecutionResult -from framework.graph.checkpoint_config import CheckpointConfig +from framework.orchestrator import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint +from framework.orchestrator.edge import GraphSpec +from framework.orchestrator.orchestrator import ExecutionResult +from framework.orchestrator.checkpoint_config import CheckpointConfig from framework.llm import LiteLLMProvider -from framework.runner.tool_registry import ToolRegistry -from framework.runtime.agent_runtime import create_agent_runtime -from framework.runtime.execution_stream import EntryPointSpec +from framework.loader.tool_registry import ToolRegistry +from framework.host.agent_host import AgentHost +from framework.host.execution_manager import EntryPointSpec from .config import default_config, metadata from .nodes import map_search_gcu, extract_contacts_node, sheets_sync_node @@ -125,7 +125,7 @@ class LocalBusinessExtractor: tools = list(self._tool_registry.get_tools().values()) tool_executor = self._tool_registry.get_executor() self._graph = self._build_graph() - self._agent_runtime = create_agent_runtime( + self._agent_runtime = AgentHost( graph=self._graph, goal=self.goal, storage_path=self._storage_path, diff --git a/examples/templates/local_business_extractor/nodes/__init__.py b/examples/templates/local_business_extractor/nodes/__init__.py index 26c4e16c..f0b5727f 100644 --- a/examples/templates/local_business_extractor/nodes/__init__.py +++ b/examples/templates/local_business_extractor/nodes/__init__.py @@ -1,6 +1,6 @@ """Node definitions for Local Business Extractor.""" -from framework.graph import NodeSpec +from framework.orchestrator import NodeSpec # GCU Subagent for Google Maps map_search_gcu = NodeSpec( diff --git a/examples/templates/meeting_scheduler/__main__.py b/examples/templates/meeting_scheduler/__main__.py index b6ff5493..6315d102 100644 --- a/examples/templates/meeting_scheduler/__main__.py +++ b/examples/templates/meeting_scheduler/__main__.py @@ -54,9 +54,9 @@ def tui(): from pathlib import Path from framework.tui.app import AdenTUI from framework.llm import LiteLLMProvider - from framework.runner.tool_registry import ToolRegistry - from framework.runtime.agent_runtime import create_agent_runtime - from framework.runtime.execution_stream import EntryPointSpec + from framework.loader.tool_registry import ToolRegistry + from framework.host.agent_host import AgentHost + from framework.host.execution_manager import EntryPointSpec async def run_tui(): agent = MeetingScheduler() @@ -71,7 +71,7 @@ def tui(): api_key=agent.config.api_key, api_base=agent.config.api_base, ) - runtime = create_agent_runtime( + runtime = AgentHost( graph=agent._build_graph(), goal=agent.goal, storage_path=storage, diff --git a/examples/templates/meeting_scheduler/agent.py b/examples/templates/meeting_scheduler/agent.py index f3de5e0c..ec077a33 100644 --- a/examples/templates/meeting_scheduler/agent.py +++ b/examples/templates/meeting_scheduler/agent.py @@ -2,14 +2,14 @@ from pathlib import Path -from framework.graph import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint -from framework.graph.edge import GraphSpec -from framework.graph.executor import ExecutionResult -from framework.graph.checkpoint_config import CheckpointConfig +from framework.orchestrator import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint +from framework.orchestrator.edge import GraphSpec +from framework.orchestrator.orchestrator import ExecutionResult +from framework.orchestrator.checkpoint_config import CheckpointConfig from framework.llm import LiteLLMProvider -from framework.runner.tool_registry import ToolRegistry -from framework.runtime.agent_runtime import create_agent_runtime -from framework.runtime.execution_stream import EntryPointSpec +from framework.loader.tool_registry import ToolRegistry +from framework.host.agent_host import AgentHost +from framework.host.execution_manager import EntryPointSpec from .config import default_config, metadata from .nodes import intake_node, schedule_node, confirm_node @@ -107,7 +107,7 @@ entry_points = {"start": "intake"} pause_nodes = [] terminal_nodes = [] # Forever-alive -# Module-level vars read by AgentRunner.load() +# Module-level vars read by AgentLoader.load() conversation_mode = "continuous" identity_prompt = "You are a helpful meeting scheduler assistant that manages calendar availability and sends confirmations." loop_config = { @@ -165,7 +165,7 @@ class MeetingScheduler: tools = list(self._tool_registry.get_tools().values()) tool_executor = self._tool_registry.get_executor() self._graph = self._build_graph() - self._agent_runtime = create_agent_runtime( + self._agent_runtime = AgentHost( graph=self._graph, goal=self.goal, storage_path=self._storage_path, diff --git a/examples/templates/meeting_scheduler/nodes/__init__.py b/examples/templates/meeting_scheduler/nodes/__init__.py index 5ccf3dae..81394f69 100644 --- a/examples/templates/meeting_scheduler/nodes/__init__.py +++ b/examples/templates/meeting_scheduler/nodes/__init__.py @@ -1,6 +1,6 @@ """Node definitions for Meeting Scheduler.""" -from framework.graph import NodeSpec +from framework.orchestrator import NodeSpec # Node 1: Intake (client-facing) intake_node = NodeSpec( diff --git a/examples/templates/meeting_scheduler/tests/conftest.py b/examples/templates/meeting_scheduler/tests/conftest.py index d1e60437..289333c1 100644 --- a/examples/templates/meeting_scheduler/tests/conftest.py +++ b/examples/templates/meeting_scheduler/tests/conftest.py @@ -25,10 +25,10 @@ def agent_module(): @pytest.fixture(scope="session") def runner_loaded(): """Load the agent through AgentRunner (structural only, no LLM needed).""" - from framework.runner.runner import AgentRunner + from framework.loader.agent_loader import AgentLoader from framework.credentials.models import CredentialError try: - return AgentRunner.load(AGENT_PATH) + return AgentLoader.load(AGENT_PATH) except CredentialError: pytest.skip("Google OAuth credentials not configured") diff --git a/examples/templates/sdr_agent/agent.py b/examples/templates/sdr_agent/agent.py index 105cf3dc..b279ae3d 100644 --- a/examples/templates/sdr_agent/agent.py +++ b/examples/templates/sdr_agent/agent.py @@ -2,14 +2,14 @@ from pathlib import Path -from framework.graph import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint -from framework.graph.checkpoint_config import CheckpointConfig -from framework.graph.edge import AsyncEntryPointSpec, GraphSpec -from framework.graph.executor import ExecutionResult +from framework.orchestrator import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint +from framework.orchestrator.checkpoint_config import CheckpointConfig +from framework.orchestrator.edge import AsyncEntryPointSpec, GraphSpec +from framework.orchestrator.orchestrator import ExecutionResult from framework.llm import LiteLLMProvider -from framework.runner.tool_registry import ToolRegistry -from framework.runtime.agent_runtime import AgentRuntime, create_agent_runtime -from framework.runtime.execution_stream import EntryPointSpec +from framework.loader.tool_registry import ToolRegistry +from framework.host.agent_host import AgentHost +from framework.host.execution_manager import EntryPointSpec from .config import default_config, metadata from .nodes import ( @@ -265,7 +265,7 @@ class SDRAgent: ), ] - self._agent_runtime = create_agent_runtime( + self._agent_runtime = AgentHost( graph=self._graph, goal=self.goal, storage_path=self._storage_path, diff --git a/examples/templates/sdr_agent/nodes/__init__.py b/examples/templates/sdr_agent/nodes/__init__.py index cbd274f8..6de8b3e5 100644 --- a/examples/templates/sdr_agent/nodes/__init__.py +++ b/examples/templates/sdr_agent/nodes/__init__.py @@ -1,6 +1,6 @@ """Node definitions for SDR Agent.""" -from framework.graph import NodeSpec +from framework.orchestrator import NodeSpec # Node 1: Intake (client-facing) # Receives contact list and outreach goal, confirms with user before proceeding. diff --git a/examples/templates/sdr_agent/tools.py b/examples/templates/sdr_agent/tools.py index 26c24aab..4a3eb3f9 100644 --- a/examples/templates/sdr_agent/tools.py +++ b/examples/templates/sdr_agent/tools.py @@ -15,7 +15,7 @@ from __future__ import annotations import json from framework.llm.provider import Tool, ToolResult, ToolUse -from framework.runner.tool_registry import _execution_context +from framework.loader.tool_registry import _execution_context # --------------------------------------------------------------------------- # Tool definitions (auto-discovered by ToolRegistry.discover_from_module) @@ -56,7 +56,7 @@ def _get_data_dir() -> str: ctx = _execution_context.get() if not ctx or "data_dir" not in ctx: raise RuntimeError( - "data_dir not set in execution context. Is the tool running inside a GraphExecutor?" + "data_dir not set in execution context. Is the tool running inside a Orchestrator?" ) return ctx["data_dir"] diff --git a/examples/templates/tech_news_reporter/__main__.py b/examples/templates/tech_news_reporter/__main__.py index 711c0f23..f37e0b09 100644 --- a/examples/templates/tech_news_reporter/__main__.py +++ b/examples/templates/tech_news_reporter/__main__.py @@ -73,10 +73,10 @@ def tui(verbose, debug): from pathlib import Path from framework.llm import LiteLLMProvider - from framework.runner.tool_registry import ToolRegistry - from framework.runtime.agent_runtime import create_agent_runtime - from framework.runtime.event_bus import EventBus - from framework.runtime.execution_stream import EntryPointSpec + from framework.loader.tool_registry import ToolRegistry + from framework.host.agent_host import AgentHost + from framework.host.event_bus import EventBus + from framework.host.execution_manager import EntryPointSpec async def run_with_tui(): agent = TechNewsReporterAgent() @@ -101,7 +101,7 @@ def tui(verbose, debug): tool_executor = agent._tool_registry.get_executor() graph = agent._build_graph() - runtime = create_agent_runtime( + runtime = AgentHost( graph=graph, goal=agent.goal, storage_path=storage_path, diff --git a/examples/templates/tech_news_reporter/agent.py b/examples/templates/tech_news_reporter/agent.py index ef65fbb3..1346184f 100644 --- a/examples/templates/tech_news_reporter/agent.py +++ b/examples/templates/tech_news_reporter/agent.py @@ -1,12 +1,12 @@ """Agent graph construction for Tech & AI News Reporter.""" -from framework.graph import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint -from framework.graph.edge import GraphSpec -from framework.graph.executor import ExecutionResult, GraphExecutor -from framework.runtime.event_bus import EventBus -from framework.runtime.core import Runtime +from framework.orchestrator import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint +from framework.orchestrator.edge import GraphSpec +from framework.orchestrator.orchestrator import ExecutionResult, Orchestrator +from framework.host.event_bus import EventBus +from framework.tracker.decision_tracker import DecisionTracker as Runtime from framework.llm import LiteLLMProvider -from framework.runner.tool_registry import ToolRegistry +from framework.loader.tool_registry import ToolRegistry from .config import default_config, metadata from .nodes import ( @@ -131,7 +131,7 @@ class TechNewsReporterAgent: self.entry_points = entry_points self.pause_nodes = pause_nodes self.terminal_nodes = terminal_nodes - self._executor: GraphExecutor | None = None + self._executor: Orchestrator | None = None self._graph: GraphSpec | None = None self._event_bus: EventBus | None = None self._tool_registry: ToolRegistry | None = None @@ -157,7 +157,7 @@ class TechNewsReporterAgent: }, ) - def _setup(self) -> GraphExecutor: + def _setup(self) -> Orchestrator: """Set up the executor with all components.""" from pathlib import Path @@ -183,7 +183,7 @@ class TechNewsReporterAgent: self._graph = self._build_graph() runtime = Runtime(storage_path) - self._executor = GraphExecutor( + self._executor = Orchestrator( runtime=runtime, llm=llm, tools=tools, diff --git a/examples/templates/tech_news_reporter/nodes/__init__.py b/examples/templates/tech_news_reporter/nodes/__init__.py index 2d0b9b27..a7e2fbe7 100644 --- a/examples/templates/tech_news_reporter/nodes/__init__.py +++ b/examples/templates/tech_news_reporter/nodes/__init__.py @@ -1,6 +1,6 @@ """Node definitions for Tech & AI News Reporter.""" -from framework.graph import NodeSpec +from framework.orchestrator import NodeSpec # Node 1: Intake (client-facing) # Brief conversation to understand what topics the user cares about. diff --git a/examples/templates/twitter_news_agent/agent.py b/examples/templates/twitter_news_agent/agent.py index 0dffb4a2..50627cf3 100644 --- a/examples/templates/twitter_news_agent/agent.py +++ b/examples/templates/twitter_news_agent/agent.py @@ -2,14 +2,14 @@ from pathlib import Path -from framework.graph import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint -from framework.graph.edge import GraphSpec -from framework.graph.executor import ExecutionResult -from framework.graph.checkpoint_config import CheckpointConfig +from framework.orchestrator import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint +from framework.orchestrator.edge import GraphSpec +from framework.orchestrator.orchestrator import ExecutionResult +from framework.orchestrator.checkpoint_config import CheckpointConfig from framework.llm import LiteLLMProvider -from framework.runner.tool_registry import ToolRegistry -from framework.runtime.agent_runtime import create_agent_runtime -from framework.runtime.execution_stream import EntryPointSpec +from framework.loader.tool_registry import ToolRegistry +from framework.host.agent_host import AgentHost +from framework.host.execution_manager import EntryPointSpec from .config import default_config, metadata from .nodes import fetch_node, process_node, review_node @@ -91,7 +91,7 @@ entry_points = {"start": "process-news"} pause_nodes = [] terminal_nodes = [] # Forever-alive -# Module-level vars read by AgentRunner.load() +# Module-level vars read by AgentLoader.load() conversation_mode = "continuous" identity_prompt = "You are a professional news analyst and researcher." loop_config = { @@ -149,7 +149,7 @@ class TwitterNewsAgent: tools = list(self._tool_registry.get_tools().values()) tool_executor = self._tool_registry.get_executor() self._graph = self._build_graph() - self._agent_runtime = create_agent_runtime( + self._agent_runtime = AgentHost( graph=self._graph, goal=self.goal, storage_path=self._storage_path, diff --git a/examples/templates/twitter_news_agent/nodes/__init__.py b/examples/templates/twitter_news_agent/nodes/__init__.py index bda48139..b21a493d 100644 --- a/examples/templates/twitter_news_agent/nodes/__init__.py +++ b/examples/templates/twitter_news_agent/nodes/__init__.py @@ -1,6 +1,6 @@ """Node definitions for Twitter News Digest.""" -from framework.graph import NodeSpec +from framework.orchestrator import NodeSpec # Node 1: Browser subagent (GCU) to fetch tweets fetch_node = NodeSpec( diff --git a/examples/templates/vulnerability_assessment/__main__.py b/examples/templates/vulnerability_assessment/__main__.py index fa1382c1..921c5031 100644 --- a/examples/templates/vulnerability_assessment/__main__.py +++ b/examples/templates/vulnerability_assessment/__main__.py @@ -76,10 +76,10 @@ def tui(mock, verbose, debug): from pathlib import Path from framework.llm import LiteLLMProvider - from framework.runner.tool_registry import ToolRegistry - from framework.runtime.agent_runtime import create_agent_runtime - from framework.runtime.event_bus import EventBus - from framework.runtime.execution_stream import EntryPointSpec + from framework.loader.tool_registry import ToolRegistry + from framework.host.agent_host import AgentHost + from framework.host.event_bus import EventBus + from framework.host.execution_manager import EntryPointSpec async def run_with_tui(): agent = VulnerabilityResearcherAgent() @@ -107,7 +107,7 @@ def tui(mock, verbose, debug): tool_executor = agent._tool_registry.get_executor() graph = agent._build_graph() - runtime = create_agent_runtime( + runtime = AgentHost( graph=graph, goal=agent.goal, storage_path=storage_path, diff --git a/examples/templates/vulnerability_assessment/agent.py b/examples/templates/vulnerability_assessment/agent.py index 0cc79436..fbc2ffc8 100644 --- a/examples/templates/vulnerability_assessment/agent.py +++ b/examples/templates/vulnerability_assessment/agent.py @@ -1,12 +1,12 @@ """Agent graph construction for Passive Website Vulnerability Assessment.""" -from framework.graph import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint -from framework.graph.edge import GraphSpec -from framework.graph.executor import ExecutionResult, GraphExecutor -from framework.runtime.event_bus import EventBus -from framework.runtime.core import Runtime +from framework.orchestrator import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint +from framework.orchestrator.edge import GraphSpec +from framework.orchestrator.orchestrator import ExecutionResult, Orchestrator +from framework.host.event_bus import EventBus +from framework.tracker.decision_tracker import DecisionTracker as Runtime from framework.llm import LiteLLMProvider -from framework.runner.tool_registry import ToolRegistry +from framework.loader.tool_registry import ToolRegistry from .config import default_config, metadata from .nodes import ( @@ -186,7 +186,7 @@ class VulnerabilityResearcherAgent: self.entry_points = entry_points self.pause_nodes = pause_nodes self.terminal_nodes = terminal_nodes - self._executor: GraphExecutor | None = None + self._executor: Orchestrator | None = None self._graph: GraphSpec | None = None self._event_bus: EventBus | None = None self._tool_registry: ToolRegistry | None = None @@ -219,7 +219,7 @@ class VulnerabilityResearcherAgent: ), ) - def _setup(self, mock_mode=False) -> GraphExecutor: + def _setup(self, mock_mode=False) -> Orchestrator: """Set up the executor with all components.""" from pathlib import Path @@ -247,7 +247,7 @@ class VulnerabilityResearcherAgent: self._graph = self._build_graph() runtime = Runtime(storage_path) - self._executor = GraphExecutor( + self._executor = Orchestrator( runtime=runtime, llm=llm, tools=tools, diff --git a/examples/templates/vulnerability_assessment/nodes/__init__.py b/examples/templates/vulnerability_assessment/nodes/__init__.py index 17212d79..a88eb565 100644 --- a/examples/templates/vulnerability_assessment/nodes/__init__.py +++ b/examples/templates/vulnerability_assessment/nodes/__init__.py @@ -1,6 +1,6 @@ """Node definitions for Passive Website Vulnerability Assessment.""" -from framework.graph import NodeSpec +from framework.orchestrator import NodeSpec # Node 1: Intake (client-facing) # Collect the target domain and confirm scanning scope. diff --git a/tools/browser-extension/offscreen.js b/tools/browser-extension/offscreen.js index 00705e12..8a64c687 100644 --- a/tools/browser-extension/offscreen.js +++ b/tools/browser-extension/offscreen.js @@ -9,51 +9,34 @@ const HIVE_WS_URL = "ws://127.0.0.1:9229/bridge"; let ws = null; -let reconnectAttempts = 0; -const MAX_RECONNECT_DELAY = 10000; // Max 10 seconds between attempts +const RETRY_INTERVAL = 2000; // Poll every 2s while disconnected function connect() { - // Exponential backoff with cap - const delay = Math.min(reconnectAttempts * 1000, MAX_RECONNECT_DELAY); + try { + ws = new WebSocket(HIVE_WS_URL); - if (reconnectAttempts > 0) { - console.log(`[Beeline] Reconnecting in ${delay}ms (attempt ${reconnectAttempts + 1})...`); + ws.onopen = () => { + console.log("[Beeline] WebSocket connected to Hive"); + chrome.runtime.sendMessage({ _beeline: true, type: "ws_open" }); + }; + + ws.onmessage = (event) => { + chrome.runtime.sendMessage({ _beeline: true, type: "ws_message", data: event.data }); + }; + + ws.onclose = (event) => { + console.log(`[Beeline] WebSocket closed: code=${event.code}, reason=${event.reason}`); + chrome.runtime.sendMessage({ _beeline: true, type: "ws_close" }); + setTimeout(connect, RETRY_INTERVAL); + }; + + ws.onerror = () => { + console.warn(`[Beeline] WebSocket connection failed (server may not be running)`); + }; + } catch (error) { + console.error("[Beeline] Failed to create WebSocket:", error.message); + setTimeout(connect, RETRY_INTERVAL); } - - setTimeout(() => { - try { - ws = new WebSocket(HIVE_WS_URL); - - ws.onopen = () => { - console.log("[Beeline] WebSocket connected to Hive"); - reconnectAttempts = 0; - chrome.runtime.sendMessage({ _beeline: true, type: "ws_open" }); - }; - - ws.onmessage = (event) => { - chrome.runtime.sendMessage({ _beeline: true, type: "ws_message", data: event.data }); - }; - - ws.onclose = (event) => { - console.log(`[Beeline] WebSocket closed: code=${event.code}, reason=${event.reason}`); - chrome.runtime.sendMessage({ _beeline: true, type: "ws_close" }); - reconnectAttempts++; - // Reconnect after delay - setTimeout(connect, 2000); - }; - - ws.onerror = () => { - // Don't log the full error object - it's usually just an Event - // The actual error will be reflected in onclose - console.warn(`[Beeline] WebSocket connection failed (server may not be running)`); - // Don't close here - let onclose handle cleanup - }; - } catch (error) { - console.error("[Beeline] Failed to create WebSocket:", error.message); - reconnectAttempts++; - setTimeout(connect, 2000); - } - }, delay); } // Forward outbound messages from the service worker onto the WebSocket. diff --git a/tools/coder_tools_server.py b/tools/coder_tools_server.py index 1aee0819..f5f480fb 100644 --- a/tools/coder_tools_server.py +++ b/tools/coder_tools_server.py @@ -456,8 +456,8 @@ def list_agent_tools( try: from pathlib import Path - from framework.runner.mcp_client import MCPClient, MCPServerConfig - from framework.runner.tool_registry import ToolRegistry + from framework.loader.mcp_client import MCPClient, MCPServerConfig + from framework.loader.tool_registry import ToolRegistry except ImportError: return json.dumps({"error": "Cannot import MCPClient"}) @@ -806,8 +806,8 @@ def _validate_agent_tools_impl(agent_path: str) -> dict: try: from pathlib import Path - from framework.runner.mcp_client import MCPClient, MCPServerConfig - from framework.runner.tool_registry import ToolRegistry + from framework.loader.mcp_client import MCPClient, MCPServerConfig + from framework.loader.tool_registry import ToolRegistry except ImportError: return {"error": "Cannot import MCPClient"} @@ -845,27 +845,50 @@ def _validate_agent_tools_impl(agent_path: str) -> dict: discovery_errors.append({"server": server_name, "error": str(e)}) # --- Load agent nodes and extract declared tools --- + agent_json_file = os.path.join(agent_dir, "agent.json") agent_py = os.path.join(agent_dir, "agent.py") - if not os.path.isfile(agent_py): - return {"error": f"No agent.py found in {agent_path}"} - import importlib - import importlib.util - import sys + nodes = None + if os.path.isfile(agent_json_file): + # Declarative JSON agent + try: + with open(agent_json_file, encoding="utf-8") as f: + data = json.load(f) + # Build lightweight node stubs with .tools and .id/.name + class _NodeStub: + def __init__(self, d): + self.id = d.get("id", "?") + self.name = d.get("name", self.id) + t = d.get("tools", {}) + if isinstance(t, dict): + self.tools = t.get("allowed", []) + elif isinstance(t, list): + self.tools = t + else: + self.tools = [] + nodes = [_NodeStub(n) for n in data.get("nodes", [])] + except Exception as e: + return {"error": f"Failed to parse agent.json: {e}"} + elif os.path.isfile(agent_py): + # Legacy Python agent + import importlib + import importlib.util + import sys - package_name = os.path.basename(agent_dir) - parent_dir = os.path.dirname(os.path.abspath(agent_dir)) - if parent_dir not in sys.path: - sys.path.insert(0, parent_dir) + package_name = os.path.basename(agent_dir) + parent_dir = os.path.dirname(os.path.abspath(agent_dir)) + if parent_dir not in sys.path: + sys.path.insert(0, parent_dir) + try: + agent_module = importlib.import_module(package_name) + except Exception as e: + return {"error": f"Failed to import agent: {e}"} + nodes = getattr(agent_module, "nodes", None) + else: + return {"error": f"No agent.json or agent.py found in {agent_path}"} - try: - agent_module = importlib.import_module(package_name) - except Exception as e: - return {"error": f"Failed to import agent: {e}"} - - nodes = getattr(agent_module, "nodes", None) if not nodes: - return {"error": "Agent module has no 'nodes' attribute"} + return {"error": "Agent has no nodes defined"} # --- Validate declared vs available --- missing_by_node: dict[str, list[str]] = {} @@ -951,37 +974,46 @@ def list_agents() -> str: if not os.path.isdir(agent_dir): continue - # Must have agent.py to be considered an agent package - if not os.path.isfile(os.path.join(agent_dir, "agent.py")): + # Must have agent.json (declarative) or agent.py (legacy) + has_json = os.path.isfile(os.path.join(agent_dir, "agent.json")) + has_py = os.path.isfile(os.path.join(agent_dir, "agent.py")) + if not has_json and not has_py: continue info = { "name": entry, "path": os.path.relpath(agent_dir, PROJECT_ROOT), "source": source, - "has_nodes": os.path.isdir(os.path.join(agent_dir, "nodes")), - "has_tests": os.path.isdir(os.path.join(agent_dir, "tests")), + "format": "json" if has_json else "python", "has_mcp_config": os.path.isfile(os.path.join(agent_dir, "mcp_servers.json")), } - # Read description from __init__.py docstring - init_path = os.path.join(agent_dir, "__init__.py") - if os.path.isfile(init_path): + # Read description from agent.json or __init__.py + if has_json: try: - with open(init_path, encoding="utf-8") as f: - content = f.read(2000) - # Extract module docstring - for quote in ['"""', "'''"]: - start = content.find(quote) - if start != -1: - end = content.find(quote, start + 3) - if end != -1: - info["description"] = ( - content[start + 3 : end].strip().split("\n")[0] - ) - break - except OSError: + with open(os.path.join(agent_dir, "agent.json"), encoding="utf-8") as f: + data = json.load(f) + if isinstance(data, dict) and data.get("description"): + info["description"] = data["description"] + except Exception: pass + else: + init_path = os.path.join(agent_dir, "__init__.py") + if os.path.isfile(init_path): + try: + with open(init_path, encoding="utf-8") as f: + content = f.read(2000) + for quote in ['"""', "'''"]: + start = content.find(quote) + if start != -1: + end = content.find(quote, start + 3) + if end != -1: + info["description"] = ( + content[start + 3 : end].strip().split("\n")[0] + ) + break + except OSError: + pass # Check runtime data runtime_dir = hive_agents_dir / entry @@ -1266,8 +1298,8 @@ def _run_agent_tests_impl( if not tests_dir.exists(): return { - "error": f"No tests directory: exports/{agent_name}/tests/", - "hint": "Create test files in the tests/ directory first.", + "skipped": True, + "summary": "No tests directory (OK for declarative agents)", } # Parse test types @@ -1446,13 +1478,11 @@ def run_agent_tests( def validate_agent_package(agent_name: str) -> str: """Run structural validation checks on a built agent package in one call. - Executes 5 steps and reports all results (does not stop on first failure): - 1. Class validation — checks graph structure and entry_points contract - 2. Node completeness — every NodeSpec in nodes/ must be in the nodes list, - and GCU nodes must be referenced in a parent's sub_agents - 3. Graph validation — loads the agent graph without credential checks - 4. Tool validation — checks declared tools exist in MCP servers - 5. Tests — runs the agent's pytest suite + Executes validation steps and reports all results: + 1. Schema validation — loads agent.json via load_agent_config + 2. Graph validation — loads the agent graph via AgentLoader + 3. Tool validation — checks declared tools exist in MCP servers + 4. Tests — runs the agent's pytest suite (skipped if no tests/) Note: Credential validation is intentionally skipped here (building phase). Credentials are validated at run time by run_agent_with_input() preflight. @@ -1477,137 +1507,88 @@ def validate_agent_package(agent_name: str) -> str: path_parts.append(pythonpath) env["PYTHONPATH"] = os.pathsep.join(path_parts) - # Step 0: Module contract — __init__.py must expose goal, nodes, edges - try: - _contract_script = textwrap.dedent("""\ - import importlib, json - mod = importlib.import_module('{agent_name}') - missing = [a for a in ('goal', 'nodes', 'edges') if getattr(mod, a, None) is None] - if missing: + # Detect agent format + _is_json = os.path.isfile(os.path.join(PROJECT_ROOT, agent_path, "agent.json")) + + if _is_json: + # JSON agents: validate via load_agent_config (schema + round-trip) + try: + _json_script = textwrap.dedent("""\ + import json, pathlib + from framework.loader.agent_loader import load_agent_config + data = json.loads( + pathlib.Path('exports/{agent_name}/agent.json').read_text() + ) + g, goal = load_agent_config(data) print(json.dumps({{ - 'valid': False, - 'error': ( - "Module '{agent_name}' is missing module-level attributes: " - + ", ".join(missing) + ". " - "Fix: in {agent_name}/__init__.py, add " - "'from .agent import " + ", ".join(missing) + "' " - "so that 'import {agent_name}' exposes them at package level." - ) + 'valid': True, + 'nodes': len(g.nodes), + 'edges': len(g.edges), + 'entry': g.entry_node, + 'errors': errors, }})) + """).format(agent_name=agent_name) + proc = subprocess.run( + ["uv", "run", "python", "-c", _json_script], + capture_output=True, + text=True, + timeout=30, + env=env, + cwd=PROJECT_ROOT, + stdin=subprocess.DEVNULL, + ) + if proc.returncode == 0: + result = json.loads(proc.stdout.strip()) + steps["schema_validation"] = { + "passed": result["valid"], + "output": ( + f"{result['nodes']} nodes, {result['edges']} edges, " + f"entry={result['entry']}" + ), + } + if result.get("errors"): + steps["schema_validation"]["errors"] = result["errors"] else: - print(json.dumps({{'valid': True}})) - """).format(agent_name=agent_name) - proc = subprocess.run( - ["uv", "run", "python", "-c", _contract_script], - capture_output=True, - text=True, - timeout=30, - env=env, - cwd=PROJECT_ROOT, - stdin=subprocess.DEVNULL, - ) - if proc.returncode == 0: - result = json.loads(proc.stdout.strip()) - steps["module_contract"] = { - "passed": result["valid"], - "output": result.get("error", "goal, nodes, edges exported correctly"), - } - else: - steps["module_contract"] = { - "passed": False, - "error": ( - f"Failed to import '{agent_name}': {proc.stderr.strip()[:1000]}. " - f"Fix: ensure {agent_name}/__init__.py exists and can be imported " - f"without errors (check syntax, missing dependencies, relative imports)." - ), - } - except Exception as e: - steps["module_contract"] = {"passed": False, "error": str(e)} - - # Step A: Class validation (subprocess for import isolation) - try: - proc = subprocess.run( - [ - "uv", - "run", - "python", - "-c", - f"from {agent_name} import default_agent; print(default_agent.validate())", - ], - capture_output=True, - text=True, - timeout=30, - env=env, - cwd=PROJECT_ROOT, - stdin=subprocess.DEVNULL, - ) - passed = proc.returncode == 0 - steps["class_validation"] = { - "passed": passed, - "output": (proc.stdout.strip() or proc.stderr.strip())[:2000], - } - if not passed: - steps["class_validation"]["error"] = proc.stderr.strip()[:2000] - except Exception as e: - steps["class_validation"] = {"passed": False, "error": str(e)} - - # Step A2: Node completeness — every NodeSpec in nodes/ must be in the nodes list - try: - _check_template = textwrap.dedent("""\ - import importlib, json - agent = importlib.import_module('{agent_name}') - nodes_mod = importlib.import_module('{agent_name}.nodes') - graph_ids = {{n.id for n in agent.nodes}} - defined = {{}} - for attr in dir(nodes_mod): - obj = getattr(nodes_mod, attr) - if hasattr(obj, 'id') and hasattr(obj, 'node_type'): - defined[obj.id] = attr - orphaned = set(defined) - graph_ids - errors = [ - f"Node '{{nid}}' ({{defined[nid]}}) defined in nodes/ but not in nodes list" - for nid in sorted(orphaned) - ] - sub_refs = set() - for n in agent.nodes: - for sa in getattr(n, 'sub_agents', []) or []: - sub_refs.add(sa) - for n in agent.nodes: - if n.node_type == 'gcu' and n.id not in sub_refs: - errors.append( - f"GCU node '{{n.id}}' not referenced in any node's sub_agents list" - ) - print(json.dumps({{'valid': len(errors) == 0, 'errors': errors}})) - """) - check_script = _check_template.format(agent_name=agent_name) - proc = subprocess.run( - ["uv", "run", "python", "-c", check_script], - capture_output=True, - text=True, - timeout=30, - env=env, - cwd=PROJECT_ROOT, - stdin=subprocess.DEVNULL, - ) - if proc.returncode == 0: - result = json.loads(proc.stdout.strip()) - steps["node_completeness"] = { - "passed": result["valid"], - "output": ( - "; ".join(result["errors"]) - if result["errors"] - else "All defined nodes are in the graph" - ), - } - if not result["valid"]: - steps["node_completeness"]["errors"] = result["errors"] - else: - steps["node_completeness"] = { - "passed": False, - "error": proc.stderr.strip()[:2000], - } - except Exception as e: - steps["node_completeness"] = {"passed": False, "error": str(e)} + steps["schema_validation"] = { + "passed": False, + "error": proc.stderr.strip()[:2000], + } + except Exception as e: + steps["schema_validation"] = {"passed": False, "error": str(e)} + else: + # Legacy Python agents: module contract + class validation + try: + _contract_script = textwrap.dedent("""\ + import importlib, json + mod = importlib.import_module('{agent_name}') + missing = [ + a for a in ('goal', 'nodes', 'edges') + if getattr(mod, a, None) is None + ] + print(json.dumps({{'valid': len(missing) == 0, 'missing': missing}})) + """).format(agent_name=agent_name) + proc = subprocess.run( + ["uv", "run", "python", "-c", _contract_script], + capture_output=True, text=True, timeout=30, + env=env, cwd=PROJECT_ROOT, stdin=subprocess.DEVNULL, + ) + if proc.returncode == 0: + result = json.loads(proc.stdout.strip()) + steps["module_contract"] = { + "passed": result["valid"], + "output": ( + f"Missing: {result['missing']}" + if result.get("missing") + else "goal, nodes, edges exported correctly" + ), + } + else: + steps["module_contract"] = { + "passed": False, + "error": proc.stderr.strip()[:1000], + } + except Exception as e: + steps["module_contract"] = {"passed": False, "error": str(e)} # Step B: Graph validation (subprocess for import isolation) # Credentials are checked at run time (run_agent_with_input preflight), @@ -1619,10 +1600,10 @@ def validate_agent_package(agent_name: str) -> str: "run", "python", "-c", - f"from framework.runner.runner import AgentRunner; " - f'r = AgentRunner.load("exports/{agent_name}", ' + f"from framework.loader.agent_loader import AgentLoader; " + f'r = AgentLoader.load("exports/{agent_name}", ' f"skip_credential_validation=True); " - f'print("AgentRunner.load (graph-only): OK")', + f'print("AgentLoader.load (graph-only): OK")', ], capture_output=True, text=True, @@ -1659,7 +1640,9 @@ def validate_agent_package(agent_name: str) -> str: # Step D: Tests (direct call) try: test_result = _run_agent_tests_impl(agent_name) - if "error" in test_result: + if test_result.get("skipped"): + steps["tests"] = {"passed": True, "output": "No tests (skipped)"} + elif "error" in test_result: steps["tests"] = {"passed": False, "error": test_result["error"]} else: all_passed = test_result.get("failed", 0) == 0 and test_result.get("errors", 0) == 0 @@ -1697,703 +1680,6 @@ def validate_agent_package(agent_name: str) -> str: # ── Meta-agent: Package initialization ───────────────────────────────────── -def _snake_to_camel(name: str) -> str: - """Convert snake_case to CamelCase.""" - return "".join(word.capitalize() for word in name.split("_")) - - -def _node_var_name(node_id: str) -> str: - """Convert node id to a Python variable name.""" - return node_id.replace("-", "_") + "_node" - - -@mcp.tool() -def initialize_and_build_agent( - agent_name: str, - nodes: str | None = None, - _draft: dict | None = None, -) -> str: - """Scaffold a new agent package with placeholder files. - - Creates exports/{agent_name}/ with all files needed for a runnable agent: - config.py, nodes/__init__.py, agent.py, __init__.py, __main__.py, - mcp_servers.json, tests/conftest.py. - - After initialization, customize the generated files: - - System prompts and node logic in nodes/__init__.py - - Goal and edges in agent.py - - CLI options in __main__.py - - Args: - agent_name: Name for the agent package. Must be snake_case (e.g. 'my_agent'). - nodes: Comma-separated node names (snake_case or kebab-case). - If omitted, a single 'start' node is created. - Example: 'intake,process,review' - _draft: Internal. Draft graph metadata from planning phase, used to - pre-populate descriptions, goals, and node metadata. - - Returns: - JSON with files written and next steps. - """ - import re - - if not re.match(r"^[a-z][a-z0-9_]*$", agent_name): - return json.dumps( - { - "success": False, - "error": ( - f"Invalid agent_name '{agent_name}'. Must be snake_case: " - "lowercase letters, numbers, underscores, starting with a letter." - ), - } - ) - - node_list = [n.strip() for n in nodes.split(",") if n.strip()] if nodes else ["start"] - - # Build draft node lookup for pre-populating metadata from planning phase - _draft_nodes: dict[str, dict] = {} - if _draft and _draft.get("nodes"): - for dn in _draft["nodes"]: - _draft_nodes[dn.get("id", "")] = dn - - # Extract top-level draft metadata early so it's available for all templates - _draft_desc = (_draft.get("description") or "") if _draft else "" - - class_name = _snake_to_camel(agent_name) - human_name = agent_name.replace("_", " ").title() - entry_node = node_list[0] - - exports_dir = os.path.join(PROJECT_ROOT, "exports", agent_name) - nodes_dir = os.path.join(exports_dir, "nodes") - tests_dir = os.path.join(exports_dir, "tests") - os.makedirs(nodes_dir, exist_ok=True) - os.makedirs(tests_dir, exist_ok=True) - - files_written: dict[str, dict] = {} - - def _write(rel_path: str, content: str) -> None: - full = os.path.join(exports_dir, rel_path) - os.makedirs(os.path.dirname(full), exist_ok=True) - with open(full, "w", encoding="utf-8") as f: - f.write(content) - files_written[rel_path] = { - "path": f"exports/{agent_name}/{rel_path}", - "size_bytes": os.path.getsize(full), - } - - # -- config.py -- - _write( - "config.py", - f'''\ -"""Runtime configuration.""" - -import json -from dataclasses import dataclass, field -from pathlib import Path - - -def _load_preferred_model() -> str: - """Load preferred model from ~/.hive/configuration.json.""" - config_path = Path.home() / ".hive" / "configuration.json" - if config_path.exists(): - try: - with open(config_path) as f: - config = json.load(f) - llm = config.get("llm", {{}}) - if llm.get("provider") and llm.get("model"): - return f"{{llm[\'provider\']}}/{{llm[\'model\']}}" - except Exception: - pass - return "anthropic/claude-sonnet-4-20250514" - - -@dataclass -class RuntimeConfig: - model: str = field(default_factory=_load_preferred_model) - temperature: float = 0.7 - max_tokens: int = 40000 - api_key: str | None = None - api_base: str | None = None - - -default_config = RuntimeConfig() - - -@dataclass -class AgentMetadata: - name: str = "{human_name}" - version: str = "1.0.0" - description: str = "{_draft_desc or "TODO: Add agent description."}" - intro_message: str = "TODO: Add intro message." - - -metadata = AgentMetadata() -''', - ) - - # -- nodes/__init__.py -- - node_specs = [] - node_var_names = [] - for node_id in node_list: - var = _node_var_name(node_id) - node_var_names.append(var) - is_first = node_id == entry_node - - # Use draft metadata to pre-populate if available - dn = _draft_nodes.get(node_id, {}) - node_name = dn.get("name") or node_id.replace("_", " ").replace("-", " ").title() - node_desc = dn.get("description") or "TODO: Describe what this node does." - node_type = dn.get("node_type") or "event_loop" - node_tools = dn.get("tools") or [] - node_input_keys = dn.get("input_keys") or [] - node_output_keys = dn.get("output_keys") or [] - node_sc = dn.get("success_criteria") or "TODO: Define success criteria." - - node_specs.append(f'''\ -{var} = NodeSpec( - id="{node_id}", - name="{node_name}", - description="{node_desc}", - node_type="{node_type}", - client_facing={is_first}, - max_node_visits=0, - input_keys={node_input_keys!r}, - output_keys={node_output_keys!r}, - nullable_output_keys=[], - success_criteria="{node_sc}", - system_prompt="""\\ -TODO: Add system prompt for this node. -""", - tools={node_tools!r}, -)''') - - nodes_init = f'''\ -"""Node definitions for {human_name}.""" - -from framework.graph import NodeSpec - -{chr(10).join(node_specs)} - -__all__ = {node_var_names!r} -''' - _write("nodes/__init__.py", nodes_init) - - # -- agent.py -- - node_imports = ", ".join(node_var_names) - nodes_list = ", ".join(node_var_names) - - # Use draft edges if available, otherwise generate linear edges - _draft_edges = _draft.get("edges", []) if _draft else [] - edge_defs = [] - if _draft_edges: - for de in _draft_edges: - eid = de.get("id", f"{de.get('source', '')}-to-{de.get('target', '')}") - src = de.get("source", "") - tgt = de.get("target", "") - cond = de.get("condition", "on_success").upper() - desc = de.get("description", "") - desc_line = f'\n description="{desc}",' if desc else "" - edge_defs.append(f"""\ - EdgeSpec( - id="{eid}", - source="{src}", - target="{tgt}", - condition=EdgeCondition.{cond},{desc_line} - priority=1, - ),""") - else: - for i in range(len(node_list) - 1): - src, tgt = node_list[i], node_list[i + 1] - edge_defs.append(f"""\ - EdgeSpec( - id="{src}-to-{tgt}", - source="{src}", - target="{tgt}", - condition=EdgeCondition.ON_SUCCESS, - priority=1, - ),""") - edges_str = "\n".join(edge_defs) if edge_defs else " # TODO: Add edges" - - # Pre-populate goal from draft metadata - _draft_goal = ( - (_draft.get("goal") or "TODO: Describe the agent's goal.") - if _draft - else "TODO: Describe the agent's goal." - ) - _draft_sc = (_draft.get("success_criteria") or []) if _draft else [] - _draft_constraints = (_draft.get("constraints") or []) if _draft else [] - - # Build success criteria entries - if _draft_sc: - sc_entries = "\n".join( - f"""\ - SuccessCriterion( - id="sc-{i + 1}", - description="{sc}", - metric="TODO", - target="TODO", - weight=1.0, - ),""" - for i, sc in enumerate(_draft_sc) - ) - else: - sc_entries = """\ - SuccessCriterion( - id="sc-1", - description="TODO: Define success criterion.", - metric="TODO", - target="TODO", - weight=1.0, - ),""" - - # Build constraint entries - if _draft_constraints: - constraint_entries = "\n".join( - f"""\ - Constraint( - id="c-{i + 1}", - description="{c}", - constraint_type="hard", - category="functional", - ),""" - for i, c in enumerate(_draft_constraints) - ) - else: - constraint_entries = """\ - Constraint( - id="c-1", - description="TODO: Define constraint.", - constraint_type="hard", - category="functional", - ),""" - - _write( - "agent.py", - f'''\ -"""Agent graph construction for {human_name}.""" - -from pathlib import Path - -from framework.graph import EdgeSpec, EdgeCondition, Goal, SuccessCriterion, Constraint -from framework.graph.edge import GraphSpec -from framework.graph.executor import ExecutionResult -from framework.graph.checkpoint_config import CheckpointConfig -from framework.llm import LiteLLMProvider -from framework.runner.tool_registry import ToolRegistry -from framework.runtime.agent_runtime import create_agent_runtime -from framework.runtime.execution_stream import EntryPointSpec - -from .config import default_config, metadata -from .nodes import {node_imports} - -# Goal definition -goal = Goal( - id="{agent_name}-goal", - name="{human_name}", - description="{_draft_goal}", - success_criteria=[ -{sc_entries} - ], - constraints=[ -{constraint_entries} - ], -) - -# Node list -nodes = [{nodes_list}] - -# Edge definitions -edges = [ -{edges_str} -] - -# Graph configuration -entry_node = "{entry_node}" -entry_points = {{"start": "{entry_node}"}} -pause_nodes = [] -terminal_nodes = [] - -conversation_mode = "continuous" -identity_prompt = "TODO: Add identity prompt." -loop_config = {{ - "max_iterations": 100, - "max_tool_calls_per_turn": 30, - "max_history_tokens": 32000, -}} - - -class {class_name}: - def __init__(self, config=None): - self.config = config or default_config - self.goal = goal - self.nodes = nodes - self.edges = edges - self.entry_node = entry_node - self.entry_points = entry_points - self.pause_nodes = pause_nodes - self.terminal_nodes = terminal_nodes - self._graph = None - self._agent_runtime = None - self._tool_registry = None - self._storage_path = None - - def _build_graph(self): - return GraphSpec( - id="{agent_name}-graph", - goal_id=self.goal.id, - version="1.0.0", - entry_node=self.entry_node, - entry_points=self.entry_points, - terminal_nodes=self.terminal_nodes, - pause_nodes=self.pause_nodes, - nodes=self.nodes, - edges=self.edges, - default_model=self.config.model, - max_tokens=self.config.max_tokens, - loop_config=loop_config, - conversation_mode=conversation_mode, - identity_prompt=identity_prompt, - ) - - def _setup(self): - self._storage_path = Path.home() / ".hive" / "agents" / "{agent_name}" - self._storage_path.mkdir(parents=True, exist_ok=True) - self._tool_registry = ToolRegistry() - mcp_config = Path(__file__).parent / "mcp_servers.json" - if mcp_config.exists(): - self._tool_registry.load_mcp_config(mcp_config) - llm = LiteLLMProvider( - model=self.config.model, - api_key=self.config.api_key, - api_base=self.config.api_base, - ) - tools = list(self._tool_registry.get_tools().values()) - tool_executor = self._tool_registry.get_executor() - self._graph = self._build_graph() - self._agent_runtime = create_agent_runtime( - graph=self._graph, - goal=self.goal, - storage_path=self._storage_path, - entry_points=[ - EntryPointSpec( - id="default", - name="Default", - entry_node=self.entry_node, - trigger_type="manual", - isolation_level="shared", - ), - ], - llm=llm, - tools=tools, - tool_executor=tool_executor, - checkpoint_config=CheckpointConfig( - enabled=True, - checkpoint_on_node_complete=True, - checkpoint_max_age_days=7, - async_checkpoint=True, - ), - ) - - async def start(self): - if self._agent_runtime is None: - self._setup() - if not self._agent_runtime.is_running: - await self._agent_runtime.start() - - async def stop(self): - if self._agent_runtime and self._agent_runtime.is_running: - await self._agent_runtime.stop() - self._agent_runtime = None - - async def trigger_and_wait( - self, - entry_point="default", - input_data=None, - timeout=None, - session_state=None, - ): - if self._agent_runtime is None: - raise RuntimeError("Agent not started. Call start() first.") - return await self._agent_runtime.trigger_and_wait( - entry_point_id=entry_point, - input_data=input_data or {{}}, - session_state=session_state, - ) - - async def run(self, context, session_state=None): - await self.start() - try: - result = await self.trigger_and_wait( - "default", context, session_state=session_state - ) - return result or ExecutionResult(success=False, error="Execution timeout") - finally: - await self.stop() - - def info(self): - return {{ - "name": metadata.name, - "version": metadata.version, - "description": metadata.description, - "goal": {{ - "name": self.goal.name, - "description": self.goal.description, - }}, - "nodes": [n.id for n in self.nodes], - "edges": [e.id for e in self.edges], - "entry_node": self.entry_node, - "entry_points": self.entry_points, - "terminal_nodes": self.terminal_nodes, - "client_facing_nodes": [n.id for n in self.nodes if n.client_facing], - }} - - def validate(self): - errors, warnings = [], [] - node_ids = {{n.id for n in self.nodes}} - for e in self.edges: - if e.source not in node_ids: - errors.append(f"Edge {{e.id}}: source '{{e.source}}' not found") - if e.target not in node_ids: - errors.append(f"Edge {{e.id}}: target '{{e.target}}' not found") - if self.entry_node not in node_ids: - errors.append(f"Entry node '{{self.entry_node}}' not found") - for t in self.terminal_nodes: - if t not in node_ids: - errors.append(f"Terminal node '{{t}}' not found") - for ep_id, nid in self.entry_points.items(): - if nid not in node_ids: - errors.append(f"Entry point '{{ep_id}}' references unknown node '{{nid}}'") - - return {{"valid": len(errors) == 0, "errors": errors, "warnings": warnings}} - - -default_agent = {class_name}() -''', - ) - - # -- __init__.py -- - _write( - "__init__.py", - f'''\ -"""{human_name} — TODO: Add description.""" - -from .agent import ( - {class_name}, - default_agent, - goal, - nodes, - edges, - entry_node, - entry_points, - pause_nodes, - terminal_nodes, - conversation_mode, - identity_prompt, - loop_config, -) -from .config import default_config, metadata - -__all__ = [ - "{class_name}", - "default_agent", - "goal", - "nodes", - "edges", - "entry_node", - "entry_points", - "pause_nodes", - "terminal_nodes", - "conversation_mode", - "identity_prompt", - "loop_config", - "default_config", - "metadata", -] -''', - ) - - # -- __main__.py -- - _write( - "__main__.py", - f'''\ -"""CLI entry point for {human_name}.""" - -import asyncio -import json -import logging -import sys - -import click - -from .agent import default_agent, {class_name} - - -def setup_logging(verbose=False, debug=False): - if debug: - level, fmt = logging.DEBUG, "%(asctime)s %(name)s: %(message)s" - elif verbose: - level, fmt = logging.INFO, "%(message)s" - else: - level, fmt = logging.WARNING, "%(levelname)s: %(message)s" - logging.basicConfig(level=level, format=fmt, stream=sys.stderr) - - -@click.group() -@click.version_option(version="1.0.0") -def cli(): - """{human_name}.""" - pass - - -@cli.command() -@click.option("--verbose", "-v", is_flag=True) -def run(verbose): - """Execute the agent.""" - setup_logging(verbose=verbose) - result = asyncio.run(default_agent.run({{}})) - click.echo( - json.dumps( - {{"success": result.success, "output": result.output}}, - indent=2, - default=str, - ) - ) - sys.exit(0 if result.success else 1) - - -@cli.command() -def info(): - """Show agent info.""" - data = default_agent.info() - click.echo( - f"Agent: {{data[\'name\']}}\n" - f"Version: {{data[\'version\']}}\n" - f"Description: {{data[\'description\']}}" - ) - click.echo(f"Nodes: {{', '.join(data[\'nodes\'])}}") - click.echo(f"Client-facing: {{', '.join(data[\'client_facing_nodes\'])}}") - - -@cli.command() -def validate(): - """Validate agent structure.""" - v = default_agent.validate() - if v["valid"]: - click.echo("Agent is valid") - else: - click.echo("Errors:") - for e in v["errors"]: - click.echo(f" {{e}}") - sys.exit(0 if v["valid"] else 1) - - -if __name__ == "__main__": - cli() -''', - ) - - # -- mcp_servers.json -- - mcp_config: dict = { - "hive-tools": { - "transport": "stdio", - "command": "uv", - "args": ["run", "python", "mcp_server.py", "--stdio"], - "cwd": "../../tools", - "description": "Hive tools MCP server", - }, - "gcu-tools": { - "transport": "stdio", - "command": "uv", - "args": ["run", "python", "-m", "gcu.server", "--stdio"], - "cwd": "../../tools", - "description": "GCU browser automation tools", - }, - } - - _write("mcp_servers.json", json.dumps(mcp_config, indent=2)) - - # -- tests/conftest.py -- - _write( - "tests/conftest.py", - '''\ -"""Test fixtures.""" - -import sys -from pathlib import Path - -import pytest - -_repo_root = Path(__file__).resolve().parents[3] -for _p in ["exports", "core"]: - _path = str(_repo_root / _p) - if _path not in sys.path: - sys.path.insert(0, _path) - -AGENT_PATH = str(Path(__file__).resolve().parents[1]) - - -@pytest.fixture(scope="session") -def agent_module(): - """Import the agent package for structural validation.""" - import importlib - - return importlib.import_module(Path(AGENT_PATH).name) - - -@pytest.fixture(scope="session") -def runner_loaded(): - """Load the agent through AgentRunner (structural only, no LLM needed).""" - from framework.runner.runner import AgentRunner - - return AgentRunner.load(AGENT_PATH) -''', - ) - - # Build list of all generated file paths for the caller. - all_file_paths = [info["path"] for info in files_written.values()] - - return json.dumps( - { - "success": True, - "agent_name": agent_name, - "class_name": class_name, - "entry_node": entry_node, - "nodes": node_list, - "files_written": files_written, - "file_count": len(files_written), - "files": all_file_paths, - "next_steps": [ - ( - "IMPORTANT: All generated files are structurally complete " - "with correct imports, class definition, validate() method, " - "and __init__.py exports. Use edit_file to customize TODO " - "placeholders — do NOT use write_file to rewrite entire files, " - "as this will break imports and structure." - ), - ( - f"Use edit_file to customize system prompts, tools, " - f"input_keys, output_keys, and success_criteria in " - f"exports/{agent_name}/nodes/__init__.py" - ), - ( - f"Use edit_file to customize goal description, " - f"success_criteria values, constraint values, edge " - f"definitions, and identity_prompt in " - f"exports/{agent_name}/agent.py" - ), - ( - "Do NOT modify: imports at top of agent.py, the class " - "definition, validate() method, _build_graph()/_setup()/" - "lifecycle methods, or __init__.py exports — they are " - "already correct." - ), - f'Run validate_agent_package("{agent_name}") to verify structure', - ], - }, - indent=2, - ) - - # ── Main ────────────────────────────────────────────────────────────────── diff --git a/tools/src/gcu/browser/bridge.py b/tools/src/gcu/browser/bridge.py index 3ef3e4d7..6d3afd7d 100644 --- a/tools/src/gcu/browser/bridge.py +++ b/tools/src/gcu/browser/bridge.py @@ -1026,6 +1026,9 @@ class BeelineBridge: await self.highlight_point(tab_id, x, y, label=f"{key} ({x},{y})") return {"ok": True, "action": "press_at", "x": x, "y": y, "key": key} + # Duration (ms) that injected highlights stay visible before fading out. + _HIGHLIGHT_DURATION_MS = 1500 + async def highlight_rect( self, tab_id: int, @@ -1036,61 +1039,112 @@ class BeelineBridge: label: str = "", color: dict | None = None, ) -> None: - """Draw a CDP Overlay highlight box in the live browser window. + """Inject a visible highlight overlay into the page DOM. - Visible in the next screenshot. Automatically cleared on the next - interaction or by calling clear_highlight(). + Creates a fixed-position div with border, background tint, and an + optional label tag. The element fades out after ``_HIGHLIGHT_DURATION_MS`` + and removes itself. Much more visible than the CDP Overlay API. """ - await self.cdp_attach(tab_id) - await self._try_enable_domain(tab_id, "Overlay") - fill = color or {"r": 59, "g": 130, "b": 246, "a": 0.35} # blue-500 @ 35% - outline = {"r": fill["r"], "g": fill["g"], "b": fill["b"], "a": 1.0} - await self._cdp( - tab_id, - "Overlay.highlightRect", - { - "x": int(x), - "y": int(y), - "width": max(1, int(w)), - "height": max(1, int(h)), - "color": fill, - "outlineColor": outline, - }, - ) + fill = color or {"r": 59, "g": 130, "b": 246, "a": 0.18} + border_rgb = f"rgb({fill['r']},{fill['g']},{fill['b']})" + bg_rgba = f"rgba({fill['r']},{fill['g']},{fill['b']},{fill.get('a', 0.18)})" + duration = self._HIGHLIGHT_DURATION_MS + + # Escape label for safe injection + safe_label = json.dumps(label[:60]) if label else '""' + + js = f""" + (function() {{ + // Remove any previous hive highlight + var old = document.getElementById('__hive_hl'); + if (old) old.remove(); + + var box = document.createElement('div'); + box.id = '__hive_hl'; + box.style.cssText = 'position:fixed;z-index:2147483647;pointer-events:none;' + + 'left:{int(x)}px;top:{int(y)}px;width:{max(1,int(w))}px;height:{max(1,int(h))}px;' + + 'border:2px solid {border_rgb};background:{bg_rgba};' + + 'border-radius:3px;transition:opacity 0.4s ease;opacity:1;' + + 'box-shadow:0 0 8px {bg_rgba};'; + + var lbl = {safe_label}; + if (lbl) {{ + var tag = document.createElement('span'); + tag.textContent = lbl; + tag.style.cssText = 'position:absolute;left:0;top:-20px;' + + 'background:{border_rgb};color:#fff;font:bold 11px/16px system-ui;' + + 'padding:1px 6px;border-radius:3px;white-space:nowrap;max-width:200px;' + + 'overflow:hidden;text-overflow:ellipsis;'; + box.appendChild(tag); + }} + + document.documentElement.appendChild(box); + setTimeout(function() {{ box.style.opacity = '0'; }}, {duration}); + setTimeout(function() {{ box.remove(); }}, {duration + 500}); + }})(); + """ + try: + await self.cdp_attach(tab_id) + await self.evaluate(tab_id, js) + except Exception: + pass # best-effort visual feedback + _interaction_highlights[tab_id] = { - "x": x, - "y": y, - "w": w, - "h": h, - "label": label, - "kind": "rect", + "x": x, "y": y, "w": w, "h": h, + "label": label, "kind": "rect", } async def highlight_point(self, tab_id: int, x: float, y: float, label: str = "") -> None: - """Highlight a coordinate as a small crosshair box in the browser.""" - r = 12 # half-size of the crosshair box in CSS px - await self.highlight_rect( - tab_id, - x - r, - y - r, - r * 2, - r * 2, - label=label, - color={"r": 239, "g": 68, "b": 68, "a": 0.45}, # red-500 @ 45% - ) + """Highlight a coordinate with a pulsing dot and crosshair.""" + duration = self._HIGHLIGHT_DURATION_MS + safe_label = json.dumps(label[:60]) if label else '""' + + js = f""" + (function() {{ + var old = document.getElementById('__hive_hl'); + if (old) old.remove(); + + var dot = document.createElement('div'); + dot.id = '__hive_hl'; + dot.style.cssText = 'position:fixed;z-index:2147483647;pointer-events:none;' + + 'left:{int(x)-8}px;top:{int(y)-8}px;width:16px;height:16px;' + + 'border-radius:50%;background:rgba(239,68,68,0.7);' + + 'box-shadow:0 0 0 4px rgba(239,68,68,0.25),0 0 12px rgba(239,68,68,0.4);' + + 'transition:opacity 0.4s ease;opacity:1;'; + + var lbl = {safe_label}; + if (lbl) {{ + var tag = document.createElement('span'); + tag.textContent = lbl; + tag.style.cssText = 'position:absolute;left:20px;top:-4px;' + + 'background:rgba(239,68,68,0.9);color:#fff;font:bold 11px/16px system-ui;' + + 'padding:1px 6px;border-radius:3px;white-space:nowrap;'; + dot.appendChild(tag); + }} + + document.documentElement.appendChild(dot); + setTimeout(function() {{ dot.style.opacity = '0'; }}, {duration}); + setTimeout(function() {{ dot.remove(); }}, {duration + 500}); + }})(); + """ + try: + await self.cdp_attach(tab_id) + await self.evaluate(tab_id, js) + except Exception: + pass + _interaction_highlights[tab_id] = { - "x": x, - "y": y, - "w": 0, - "h": 0, - "label": label, - "kind": "point", + "x": x, "y": y, "w": 0, "h": 0, + "label": label, "kind": "point", } async def clear_highlight(self, tab_id: int) -> None: - """Remove the CDP Overlay highlight from the browser.""" + """Remove the injected highlight from the page.""" try: - await self._cdp(tab_id, "Overlay.hideHighlight") + await self.evaluate(tab_id, """ + var el = document.getElementById('__hive_hl'); + if (el) el.remove(); + """) except Exception: pass _interaction_highlights.pop(tab_id, None) @@ -1199,6 +1253,20 @@ class BeelineBridge: }, ) + # Highlight the select element + rect_result = await self.evaluate( + tab_id, + f"(function(){{const el=document.querySelector(" + f"{json.dumps(selector)});if(!el)return null;" + f"const r=el.getBoundingClientRect();" + f"return{{x:r.left,y:r.top,w:r.width,h:r.height}};}})()", + ) + rect = (rect_result or {}).get("result") + if rect: + await self.highlight_rect( + tab_id, rect["x"], rect["y"], rect["w"], rect["h"], label=selector + ) + return {"ok": True, "action": "select", "selector": selector, "selected": values} # ── Inspection ───────────────────────────────────────────────────────────── diff --git a/tools/src/gcu/browser/tools/advanced.py b/tools/src/gcu/browser/tools/advanced.py index 80c4a3e6..d75860ec 100644 --- a/tools/src/gcu/browser/tools/advanced.py +++ b/tools/src/gcu/browser/tools/advanced.py @@ -113,6 +113,28 @@ def register_advanced_tools(mcp: FastMCP) -> None: return {"ok": False, "error": "No active tab"} try: + # Show a brief toast in the browser so the user sees JS executing + snippet = script.strip().replace("'", "\\'")[:80] + toast_js = f""" + (function(){{ + var old=document.getElementById('__hive_toast');if(old)old.remove(); + var t=document.createElement('div');t.id='__hive_toast'; + t.style.cssText='position:fixed;z-index:2147483647;top:12px;right:12px;' + +'background:rgba(30,30,30,0.9);color:#a5d6ff;font:12px/18px monospace;' + +'padding:8px 14px;border-radius:6px;max-width:420px;pointer-events:none;' + +'white-space:pre-wrap;word-break:break-all;transition:opacity 0.4s;opacity:1;' + +'border:1px solid rgba(59,130,246,0.4);box-shadow:0 4px 12px rgba(0,0,0,0.3);'; + t.textContent='\\u25b6 '+'{snippet}'; + document.documentElement.appendChild(t); + setTimeout(function(){{t.style.opacity='0';}},2000); + setTimeout(function(){{t.remove();}},2500); + }})(); + """ + try: + await bridge.evaluate(target_tab, toast_js) + except Exception: + pass + result = await bridge.evaluate(target_tab, script) return result except Exception as e: diff --git a/tools/src/gcu/browser/tools/lifecycle.py b/tools/src/gcu/browser/tools/lifecycle.py index add68502..e39e769a 100644 --- a/tools/src/gcu/browser/tools/lifecycle.py +++ b/tools/src/gcu/browser/tools/lifecycle.py @@ -245,6 +245,7 @@ def register_lifecycle_tools(mcp: FastMCP) -> None: _contexts[profile_name] = { "groupId": group_id, "activeTabId": tab_id, + "_seedTabId": tab_id, # reused by first browser_open call } logger.info( diff --git a/tools/src/gcu/browser/tools/tabs.py b/tools/src/gcu/browser/tools/tabs.py index 5555bd45..e487cfbc 100644 --- a/tools/src/gcu/browser/tools/tabs.py +++ b/tools/src/gcu/browser/tools/tabs.py @@ -128,9 +128,13 @@ def register_tab_tools(mcp: FastMCP) -> None: return result try: - # Create tab in the group - result = await bridge.create_tab(url=url, group_id=ctx.get("groupId")) - tab_id = result.get("tabId") + # Reuse the seed about:blank tab from context.create on first open + seed_tab = ctx.pop("_seedTabId", None) + if seed_tab is not None: + tab_id = seed_tab + else: + result = await bridge.create_tab(url=url, group_id=ctx.get("groupId")) + tab_id = result.get("tabId") # Update active tab if not background if not background and tab_id is not None: